From ef851949d78426068d3798d9ef385f1ab1f73454 Mon Sep 17 00:00:00 2001 From: missinglink Date: Wed, 12 Jul 2017 18:39:18 +0200 Subject: [PATCH 01/15] relations: load relation members in to leveldb --- command/bitmask_custom.go | 28 +++++++- command/json_flat.go | 6 +- command/store_noderefs.go | 6 +- handler/bitmask_custom.go | 71 ++++++++++++++++++-- leveldb/{writer.go => coord_writer.go} | 0 leveldb/path_writer.go | 91 ++++++++++++++++++++++++++ lib/bitmaskmap.go | 4 ++ proxy/store_noderefs.go | 14 ++-- 8 files changed, 202 insertions(+), 18 deletions(-) rename leveldb/{writer.go => coord_writer.go} (100%) create mode 100644 leveldb/path_writer.go diff --git a/command/bitmask_custom.go b/command/bitmask_custom.go index d71ba24..c72407b 100644 --- a/command/bitmask_custom.go +++ b/command/bitmask_custom.go @@ -22,7 +22,7 @@ func BitmaskCustom(c *cli.Context) error { } // create parser - parser := parser.NewParser(c.Args()[0]) + p := parser.NewParser(c.Args()[0]) // don't clobber existing bitmask file if _, err := os.Stat(c.Args()[1]); err == nil { @@ -60,7 +60,31 @@ func BitmaskCustom(c *cli.Context) error { defer handle.Masks.WriteToFile(c.Args()[1]) // Parse will block until it is done or an error occurs. - parser.Parse(handle) + p.Parse(handle) + + // --- second pass --- + // run parser a second time, skipping the nodes + + // if we are not interested in relations, exit now + if 0 == len(config.RelationPatterns) { + return nil + } + + // disable indexing + os.Unsetenv("INDEXING") + + // create a new parser + p2 := parser.NewParser(c.Args()[0]) + + // find first way offset + offset, err := p2.GetDecoder().Index.FirstOffsetOfType("way") + if nil != err { + log.Printf("target type: %s not found in file\n", "way") + os.Exit(1) + } + + // Parse will block until it is done or an error occurs. + p2.ParseFrom(handle, offset) return nil } diff --git a/command/json_flat.go b/command/json_flat.go index 802d960..a1816a6 100644 --- a/command/json_flat.go +++ b/command/json_flat.go @@ -75,9 +75,9 @@ func JSONFlat(c *cli.Context) error { // create store proxy var store = &proxy.StoreRefs{ - Handler: handle, - Writer: writer, - Masks: masks, + Handler: handle, + CoordWriter: writer, + Masks: masks, } p.Parse(store) diff --git a/command/store_noderefs.go b/command/store_noderefs.go index 2508013..3daaf64 100644 --- a/command/store_noderefs.go +++ b/command/store_noderefs.go @@ -62,9 +62,9 @@ func StoreNodeRefs(c *cli.Context) error { // create store proxy var store = &proxy.StoreRefs{ - Handler: &handler.Null{}, - Writer: writer, - Masks: masks, + Handler: &handler.Null{}, + CoordWriter: writer, + Masks: masks, } // Parse will block until it is done or an error occurs. diff --git a/handler/bitmask_custom.go b/handler/bitmask_custom.go index 900b728..9b521a2 100644 --- a/handler/bitmask_custom.go +++ b/handler/bitmask_custom.go @@ -14,6 +14,8 @@ type BitmaskCustom struct { // ReadNode - called once per node func (b *BitmaskCustom) ReadNode(item gosmparse.Node) { + + // nodes in feature list if b.Features.MatchNode(item) { b.Masks.Nodes.Insert(item.ID) } @@ -21,7 +23,10 @@ func (b *BitmaskCustom) ReadNode(item gosmparse.Node) { // ReadWay - called once per way func (b *BitmaskCustom) ReadWay(item gosmparse.Way) { + + // ways in feature list if b.Features.MatchWay(item) { + b.Masks.Ways.Insert(item.ID) // insert dependents in mask @@ -29,15 +34,69 @@ func (b *BitmaskCustom) ReadWay(item gosmparse.Way) { b.Masks.WayRefs.Insert(ref) } } + + // ways belonging to a relation + if b.Masks.RelWays.Has(item.ID) { + + // insert dependents in mask + for _, ref := range item.NodeIDs { + b.Masks.RelNodes.Insert(ref) + } + } } // ReadRelation - called once per relation func (b *BitmaskCustom) ReadRelation(item gosmparse.Relation) { - // @todo: relations currently not supported - // due to requiring a 'second-pass' to gather the node ids for - // each member way + if b.Features.MatchRelation(item) { + + // we currently only support the 'multipolygon' type + // see: http://wiki.openstreetmap.org/wiki/Types_of_relation + if val, ok := item.Tags["type"]; ok && val == "multipolygon" { - // if b.Features.MatchRelation(item) { - // b.Masks.Relations.Insert(item.ID) - // } + // detect relation class + var isSuperRelation = false + var hasAdminCentre = false + + // iterate members once to try to classify the relation + for _, member := range item.Members { + switch member.Type { + case gosmparse.RelationType: + isSuperRelation = true + case gosmparse.NodeType: + if member.Role == "admin_centre" { + hasAdminCentre = true + } + } + } + + // super relations are relations containing other relations + // we currently do not support these due to their complexity + if isSuperRelation { + return + } + + // iterate over relation members + for _, member := range item.Members { + + switch member.Type { + case gosmparse.NodeType: + + // only store nodes if they are for 'admin_centre' + if member.Role == "admin_centre" { + b.Masks.RelNodes.Insert(member.ID) + } + + case gosmparse.WayType: + + // only store ways if we don't have the admin_centre + if !hasAdminCentre { + b.Masks.RelWays.Insert(member.ID) + } + } + } + + // insert relation in mask + b.Masks.Relations.Insert(item.ID) + } + } } diff --git a/leveldb/writer.go b/leveldb/coord_writer.go similarity index 100% rename from leveldb/writer.go rename to leveldb/coord_writer.go diff --git a/leveldb/path_writer.go b/leveldb/path_writer.go new file mode 100644 index 0000000..dd66921 --- /dev/null +++ b/leveldb/path_writer.go @@ -0,0 +1,91 @@ +package leveldb + +import ( + "encoding/binary" + "log" + "sync" + + "github.com/missinglink/gosmparse" + "github.com/syndtr/goleveldb/leveldb" +) + +// PathWriter - buffered stdout writer with sync channel +type PathWriter struct { + Conn *Connection + WaitGroup *sync.WaitGroup + Queue chan kv +} + +// NewPathWriter - constructor +func NewPathWriter(conn *Connection) *PathWriter { + w := &PathWriter{ + Conn: conn, + WaitGroup: &sync.WaitGroup{}, + Queue: make(chan kv, batchSize*10), + } + + // start writer routine + w.WaitGroup.Add(1) + go func() { + batch := new(leveldb.Batch) + for row := range w.Queue { + + // put + batch.Put(row.Key, row.Val) + + // flush when full + if batch.Len() >= batchSize { + + // write batch + err := w.Conn.DB.Write(batch, nil) + if err != nil { + log.Println(err) + } + + // reset batch + batch.Reset() + } + } + + // write final batch + err := w.Conn.DB.Write(batch, nil) + if err != nil { + log.Println(err) + } + + w.WaitGroup.Done() + }() + + return w +} + +// Enqueue - close the channel and block until done +func (w *PathWriter) Enqueue(item *gosmparse.Way) { + + // encode id + key := make([]byte, 8) + binary.BigEndian.PutUint64(key, uint64(item.ID)) + + // encoded path + var value []byte + + // iterate over node refs, appending each int64 id to the value + for _, ref := range item.NodeIDs { + + // encode id + // @todo: use varint encoding to save bytes + idBytes := make([]byte, 8) + binary.BigEndian.PutUint64(idBytes, uint64(ref)) + + // append to slice + value = append(value, idBytes...) + } + + w.Queue <- kv{Key: key, Val: value} +} + +// Close - close the channel and block until done +func (w *PathWriter) Close() { + close(w.Queue) + w.WaitGroup.Wait() +} diff --git a/lib/bitmaskmap.go b/lib/bitmaskmap.go index e2ac185..af4f1d3 100644 --- a/lib/bitmaskmap.go +++ b/lib/bitmaskmap.go @@ -15,6 +15,8 @@ type BitmaskMap struct { Ways *Bitmask Relations *Bitmask WayRefs *Bitmask + RelNodes *Bitmask + RelWays *Bitmask } // NewBitmaskMap - constructor @@ -24,6 +26,8 @@ func NewBitmaskMap() *BitmaskMap { Ways: NewBitMask(), Relations: NewBitMask(), WayRefs: NewBitMask(), + RelNodes: NewBitMask(), + RelWays: NewBitMask(), } } diff --git a/proxy/store_noderefs.go b/proxy/store_noderefs.go index 3479e47..4e9ec92 100644 --- a/proxy/store_noderefs.go +++ b/proxy/store_noderefs.go @@ -8,15 +8,18 @@ import ( // StoreRefs - filter only elements that appear in masks type StoreRefs struct { - Handler gosmparse.OSMReader - Writer *leveldb.CoordWriter - Masks *lib.BitmaskMap + Handler gosmparse.OSMReader + CoordWriter *leveldb.CoordWriter + PathWriter *leveldb.PathWriter + Masks *lib.BitmaskMap } // ReadNode - called once per node func (s *StoreRefs) ReadNode(item gosmparse.Node) { if nil != s.Masks.WayRefs && s.Masks.WayRefs.Has(item.ID) { - s.Writer.Enqueue(&item) // write to db + s.CoordWriter.Enqueue(&item) // write to db + } else if nil != s.Masks.RelNodes && s.Masks.RelNodes.Has(item.ID) { + s.CoordWriter.Enqueue(&item) // write to db } if nil != s.Masks.Nodes && s.Masks.Nodes.Has(item.ID) { s.Handler.ReadNode(item) @@ -25,6 +28,9 @@ func (s *StoreRefs) ReadNode(item gosmparse.Node) { // ReadWay - called once per way func (s *StoreRefs) ReadWay(item gosmparse.Way) { + if nil != s.Masks.RelWays && s.Masks.RelWays.Has(item.ID) { + s.PathWriter.Enqueue(&item) // write to db + } if nil != s.Masks.Ways && s.Masks.Ways.Has(item.ID) { s.Handler.ReadWay(item) } From 841a374ea9fa985ce595ec5321b15517a4fb0372 Mon Sep 17 00:00:00 2001 From: missinglink Date: Wed, 12 Jul 2017 18:45:12 +0200 Subject: [PATCH 02/15] relations: prefix way keys with 'W' to avoid id collisions --- leveldb/path_writer.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/leveldb/path_writer.go b/leveldb/path_writer.go index dd66921..f5386f2 100644 --- a/leveldb/path_writer.go +++ b/leveldb/path_writer.go @@ -63,8 +63,8 @@ func NewPathWriter(conn *Connection) *PathWriter { func (w *PathWriter) Enqueue(item *gosmparse.Way) { // encode id - key := make([]byte, 8) - binary.BigEndian.PutUint64(key, uint64(item.ID)) + idBytes := make([]byte, 8) + binary.BigEndian.PutUint64(idBytes, uint64(item.ID)) // encoded path var value []byte @@ -81,6 +81,9 @@ func (w *PathWriter) Enqueue(item *gosmparse.Way) { value = append(value, idBytes...) } + // prefix way keys with 'W' to avoid id collisions + key := append([]byte{'W'}, idBytes...) + w.Queue <- kv{Key: key, Val: value} } From a29f04ea56b3ee6a95f9f9ecd191c058bd37dd0b Mon Sep 17 00:00:00 2001 From: missinglink Date: Thu, 13 Jul 2017 14:15:31 +0200 Subject: [PATCH 03/15] relations: store + load relation members from leveldb --- command/bitmask_custom.go | 1 + command/json_flat.go | 19 +++-- command/store_noderefs.go | 19 +++-- handler/denormalized_json.go | 104 ++++++++++++++++++----- json/denormalized_relation.go | 44 ++++++++++ json/denormalized_way.go | 5 +- leveldb/coord_writer.go | 95 --------------------- leveldb/path.go | 76 +++++++++++++++++ leveldb/path_writer.go | 94 --------------------- leveldb/writer.go | 155 ++++++++++++++++++++++++++++++++++ proxy/store_noderefs.go | 13 ++- 11 files changed, 388 insertions(+), 237 deletions(-) create mode 100644 json/denormalized_relation.go delete mode 100644 leveldb/coord_writer.go create mode 100644 leveldb/path.go delete mode 100644 leveldb/path_writer.go create mode 100644 leveldb/writer.go diff --git a/command/bitmask_custom.go b/command/bitmask_custom.go index c72407b..44c85d0 100644 --- a/command/bitmask_custom.go +++ b/command/bitmask_custom.go @@ -64,6 +64,7 @@ func BitmaskCustom(c *cli.Context) error { // --- second pass --- // run parser a second time, skipping the nodes + // @todo: skip relations on the second pass too // if we are not interested in relations, exit now if 0 == len(config.RelationPatterns) { diff --git a/command/json_flat.go b/command/json_flat.go index a1816a6..155cc69 100644 --- a/command/json_flat.go +++ b/command/json_flat.go @@ -59,25 +59,28 @@ func JSONFlat(c *cli.Context) error { defer handle.Writer.Close() // create db writer routine - writer := leveldb.NewCoordWriter(conn) + writer := leveldb.NewWriter(conn) // ensure all node refs are written to disk before starting on the ways dec := p.GetDecoder() dec.Triggers = []func(int, uint64){ func(i int, offset uint64) { - if 0 == i { - log.Println("writer close") - writer.Close() - log.Println("writer closed") + switch i { + case 0: + writer.NodeQueue.Close() + log.Println("nodes written") + case 1: + writer.WayQueue.Close() + log.Println("ways written") } }, } // create store proxy var store = &proxy.StoreRefs{ - Handler: handle, - CoordWriter: writer, - Masks: masks, + Handler: handle, + Writer: writer, + Masks: masks, } p.Parse(store) diff --git a/command/store_noderefs.go b/command/store_noderefs.go index 3daaf64..3df799d 100644 --- a/command/store_noderefs.go +++ b/command/store_noderefs.go @@ -46,25 +46,28 @@ func StoreNodeRefs(c *cli.Context) error { defer conn.Close() // create db writer routine - writer := leveldb.NewCoordWriter(conn) + writer := leveldb.NewWriter(conn) // ensure all node refs are written to disk before starting on the ways dec := parser.GetDecoder() dec.Triggers = []func(int, uint64){ func(i int, offset uint64) { - if 0 == i { - log.Println("writer close") - writer.Close() - log.Println("writer closed") + switch i { + case 0: + writer.NodeQueue.Close() + log.Println("nodes written") + case 1: + writer.WayQueue.Close() + log.Println("ways written") } }, } // create store proxy var store = &proxy.StoreRefs{ - Handler: &handler.Null{}, - CoordWriter: writer, - Masks: masks, + Handler: &handler.Null{}, + Writer: writer, + Masks: masks, } // Parse will block until it is done or an error occurs. diff --git a/handler/denormalized_json.go b/handler/denormalized_json.go index 3915c2a..6739ce2 100644 --- a/handler/denormalized_json.go +++ b/handler/denormalized_json.go @@ -66,27 +66,6 @@ func (d *DenormalizedJSON) ReadWay(item gosmparse.Way) { Tags: item.Tags, } - // compute line/street centroid - if d.ComputeCentroid { - var lon, lat = lib.WayCentroid(refs) - obj.Centroid = json.NewLatLon(lat, lon) - } - - // compute geohash - if d.ComputeGeohash { - obj.Hash = geohash.Encode(obj.Centroid.Lat, obj.Centroid.Lon) - } - - // convert refs to latlons - if d.ExportLatLons { - for _, node := range refs { - obj.LatLons = append(obj.LatLons, &json.LatLon{ - Lat: node.Lat, - Lon: node.Lon, - }) - } - } - // write d.Writer.Queue <- obj.Bytes() } @@ -100,6 +79,87 @@ func (d *DenormalizedJSON) ReadRelation(item gosmparse.Relation) { DeleteTags(item.Tags, uninterestingTags) // relation - obj := json.RelationFromParser(item) + obj := json.DenormalizedRelation{ + ID: item.ID, + Type: "relation", + Tags: item.Tags, + } + + // iterate members once to try to classify the relation + var adminCentreID int64 + var wayIDs []int64 + + for _, member := range item.Members { + switch member.Type { + case gosmparse.NodeType: + // only target the 'admin_centre' node + if member.Role == "admin_centre" { + + // store the ID of the admin centre node + adminCentreID = member.ID + } + case gosmparse.WayType: + // skip cyclic references to parent + if member.Role != "subarea" { + + // append way ID to list of member ways + wayIDs = append(wayIDs, member.ID) + } + } + } + + // this is the simplest relation to build, we simply need to load the + // admin centre coord and use that as the centroid + if 0 != adminCentreID { + + var node, readError = d.Conn.ReadCoord(adminCentreID) + if nil != readError { + // skip relation if the point is not found in the db + log.Printf("skipping relation %d. failed to load admin centre %d\n", item.ID, adminCentreID) + return + } + + // set the centroid + obj.Centroid = json.NewLatLon(node.Lat, node.Lon) + + } else { + // this is more complex, we need to load all the multipolygon rings + // from the DB and assemble the geometry before calculating the centroid + + // load ring data from database + var ways []*json.DenormalizedWay + for _, wayID := range wayIDs { + + // load way from DB + var way, readError = d.Conn.ReadPath(wayID) + if nil != readError { + // skip ways which fail to denormalize + log.Printf("skipping relation %d. failed to load way %d\n", item.ID, wayID) + return + } + + // use a struct which allows us to store the refs within + var denormalizedWay = json.DenormalizedWayFromParser(*way) + + // load way refs from DB + for _, ref := range way.NodeIDs { + var node, readError = d.Conn.ReadCoord(ref) + if nil != readError { + // skip ways which fail to denormalize + log.Printf("skipping relation way %d. failed to load ref %d\n", item.ID, ref) + return + } + + // append way vertex + denormalizedWay.LatLons = append(denormalizedWay.LatLons, json.NewLatLon(node.Lat, node.Lon)) + } + + // store way + ways = append(ways, denormalizedWay) + } + + log.Println("write relation", item.ID) + } + d.Writer.Queue <- obj.Bytes() } diff --git a/json/denormalized_relation.go b/json/denormalized_relation.go new file mode 100644 index 0000000..07d1b99 --- /dev/null +++ b/json/denormalized_relation.go @@ -0,0 +1,44 @@ +package json + +import ( + "encoding/json" + "fmt" + + "github.com/missinglink/gosmparse" +) + +// DenormalizedRelation struct +type DenormalizedRelation struct { + ID int64 `json:"id"` + Type string `json:"type"` + Hash string `json:"hash,omitempty"` + Tags map[string]string `json:"tags,omitempty"` + Centroid *LatLon `json:"centroid,omitempty"` +} + +// Print json +func (rel DenormalizedRelation) Print() { + json, _ := json.Marshal(rel) + fmt.Println(string(json)) +} + +// PrintIndent json indented +func (rel DenormalizedRelation) PrintIndent() { + json, _ := json.MarshalIndent(rel, "", " ") + fmt.Println(string(json)) +} + +// Bytes - return json +func (rel DenormalizedRelation) Bytes() []byte { + json, _ := json.Marshal(rel) + return json +} + +// DenormalizedRelationFromParser - generate a new JSON struct based off a parse struct +func DenormalizedRelationFromParser(item gosmparse.Relation) *Relation { + return &Relation{ + ID: item.ID, + Type: "relation", + Tags: item.Tags, + } +} diff --git a/json/denormalized_way.go b/json/denormalized_way.go index ea35e7a..3ab62c9 100644 --- a/json/denormalized_way.go +++ b/json/denormalized_way.go @@ -36,11 +36,10 @@ func (way DenormalizedWay) Bytes() []byte { } // DenormalizedWayFromParser - generate a new JSON struct based off a parse struct -func DenormalizedWayFromParser(item gosmparse.Way) *Way { - return &Way{ +func DenormalizedWayFromParser(item gosmparse.Way) *DenormalizedWay { + return &DenormalizedWay{ ID: item.ID, Type: "way", Tags: item.Tags, - Refs: item.NodeIDs, } } diff --git a/leveldb/coord_writer.go b/leveldb/coord_writer.go deleted file mode 100644 index 78624b3..0000000 --- a/leveldb/coord_writer.go +++ /dev/null @@ -1,95 +0,0 @@ -package leveldb - -import ( - "encoding/binary" - "log" - "math" - "sync" - - "github.com/missinglink/gosmparse" - "github.com/syndtr/goleveldb/leveldb" -) - -var batchSize = 20000 - -// CoordWriter - buffered stdout writer with sync channel -type CoordWriter struct { - Conn *Connection - WaitGroup *sync.WaitGroup - Queue chan kv -} - -type kv struct { - Key []byte - Val []byte -} - -// NewCoordWriter - constructor -func NewCoordWriter(conn *Connection) *CoordWriter { - w := &CoordWriter{ - Conn: conn, - WaitGroup: &sync.WaitGroup{}, - Queue: make(chan kv, batchSize*10), - } - - // start writer routine - w.WaitGroup.Add(1) - go func() { - batch := new(leveldb.Batch) - for row := range w.Queue { - - // put - batch.Put(row.Key, row.Val) - - // flush when full - if batch.Len() >= batchSize { - - // write batch - err := w.Conn.DB.Write(batch, nil) - if err != nil { - log.Println(err) - } - - // reset batch - batch.Reset() - } - } - - // write final batch - err := w.Conn.DB.Write(batch, nil) - if err != nil { - log.Println(err) - } - - w.WaitGroup.Done() - }() - - return w -} - -// Enqueue - close the channel and block until done -func (w *CoordWriter) Enqueue(item *gosmparse.Node) { - - // encode id - key := make([]byte, 8) - binary.BigEndian.PutUint64(key, uint64(item.ID)) - - // encode lat - lat := make([]byte, 8) - binary.BigEndian.PutUint64(lat, math.Float64bits(item.Lat)) - - // encode lon - lon := make([]byte, 8) - binary.BigEndian.PutUint64(lon, math.Float64bits(item.Lon)) - - // value - value := append(lat, lon...) - - w.Queue <- kv{Key: key, Val: value} -} - -// Close - close the channel and block until done -func (w *CoordWriter) Close() { - close(w.Queue) - w.WaitGroup.Wait() -} diff --git a/leveldb/path.go b/leveldb/path.go new file mode 100644 index 0000000..d1d6933 --- /dev/null +++ b/leveldb/path.go @@ -0,0 +1,76 @@ +package leveldb + +import ( + "encoding/binary" + "log" + + "github.com/missinglink/gosmparse" +) + +// WritePath - encode and write an array of IDs to db +func (c *Connection) WritePath(item gosmparse.Way) error { + + // encode id + idBytes := make([]byte, 8) + binary.BigEndian.PutUint64(idBytes, uint64(item.ID)) + + // prefix way keys with 'W' to avoid id collisions + key := append([]byte{'W'}, idBytes...) + + // encoded path + var value []byte + + // iterate over node refs, appending each int64 id to the value + for _, ref := range item.NodeIDs { + + // encode id + // @todo: use varint encoding to save bytes + refBytes := make([]byte, 8) + binary.BigEndian.PutUint64(refBytes, uint64(ref)) + + // append to slice + value = append(value, refBytes...) + } + + // write to db + err := c.DB.Put(key, value, nil) + if err != nil { + return err + } + + return nil +} + +// ReadPath - read array of IDS from db +func (c *Connection) ReadPath(id int64) (*gosmparse.Way, error) { + + // encode id + idBytes := make([]byte, 8) + binary.BigEndian.PutUint64(idBytes, uint64(id)) + + // prefix way keys with 'W' to avoid id collisions + key := append([]byte{'W'}, idBytes...) + + if id == 341691675 { + log.Println("341691675", "load") + } + + // read from db + data, err := c.DB.Get(key, nil) + + if err != nil { + return nil, err + } + + // decode node refs + var refs = make([]int64, 0, len(data)/8) + for i := 0; i < len(data); i += 8 { + refs = append(refs, int64(binary.BigEndian.Uint64(data[i:i+8]))) + } + + // decode item + return &gosmparse.Way{ + ID: id, + NodeIDs: refs, + }, nil +} diff --git a/leveldb/path_writer.go b/leveldb/path_writer.go deleted file mode 100644 index f5386f2..0000000 --- a/leveldb/path_writer.go +++ /dev/null @@ -1,94 +0,0 @@ -package leveldb - -import ( - "encoding/binary" - "log" - "sync" - - "github.com/missinglink/gosmparse" - "github.com/syndtr/goleveldb/leveldb" -) - -// PathWriter - buffered stdout writer with sync channel -type PathWriter struct { - Conn *Connection - WaitGroup *sync.WaitGroup - Queue chan kv -} - -// NewPathWriter - constructor -func NewPathWriter(conn *Connection) *PathWriter { - w := &PathWriter{ - Conn: conn, - WaitGroup: &sync.WaitGroup{}, - Queue: make(chan kv, batchSize*10), - } - - // start writer routine - w.WaitGroup.Add(1) - go func() { - batch := new(leveldb.Batch) - for row := range w.Queue { - - // put - batch.Put(row.Key, row.Val) - - // flush when full - if batch.Len() >= batchSize { - - // write batch - err := w.Conn.DB.Write(batch, nil) - if err != nil { - log.Println(err) - } - - // reset batch - batch.Reset() - } - } - - // write final batch - err := w.Conn.DB.Write(batch, nil) - if err != nil { - log.Println(err) - } - - w.WaitGroup.Done() - }() - - return w -} - -// Enqueue - close the channel and block until done -func (w *PathWriter) Enqueue(item *gosmparse.Way) { - - // encode id - idBytes := make([]byte, 8) - binary.BigEndian.PutUint64(idBytes, uint64(item.ID)) - - // encoded path - var value []byte - - // iterate over node refs, appending each int64 id to the value - for _, ref := range item.NodeIDs { - - // encode id - // @todo: use varint encoding to save bytes - idBytes := make([]byte, 8) - binary.BigEndian.PutUint64(idBytes, uint64(ref)) - - // append to slice - value = append(value, idBytes...) - } - - // prefix way keys with 'W' to avoid id collisions - key := append([]byte{'W'}, idBytes...) - - w.Queue <- kv{Key: key, Val: value} -} - -// Close - close the channel and block until done -func (w *PathWriter) Close() { - close(w.Queue) - w.WaitGroup.Wait() -} diff --git a/leveldb/writer.go b/leveldb/writer.go new file mode 100644 index 0000000..f4c8878 --- /dev/null +++ b/leveldb/writer.go @@ -0,0 +1,155 @@ +package leveldb + +import ( + "encoding/binary" + "log" + "math" + "sync" + + "github.com/missinglink/gosmparse" + "github.com/syndtr/goleveldb/leveldb" +) + +var batchSize = 20000 + +// WriteQueue - a channel + waitgroup for when it's done +type WriteQueue struct { + Conn *Connection + WaitGroup *sync.WaitGroup + Chan chan kv +} + +// Start the queue +func (q *WriteQueue) Start() { + + // start writer routine + q.WaitGroup.Add(1) + go func() { + batch := new(leveldb.Batch) + for row := range q.Chan { + + // put + batch.Put(row.Key, row.Val) + + // flush when full + if batch.Len() >= batchSize { + + // write batch + err := q.Conn.DB.Write(batch, nil) + if err != nil { + log.Println(err) + } + + // reset batch + batch.Reset() + } + } + + // write final batch + err := q.Conn.DB.Write(batch, nil) + if err != nil { + log.Println(err) + } + + q.WaitGroup.Done() + }() +} + +// Close - close the channel and block until done +func (q *WriteQueue) Close() { + close(q.Chan) + q.WaitGroup.Wait() +} + +// Writer - buffered stdout writer with sync channel +type Writer struct { + Conn *Connection + NodeQueue *WriteQueue + WayQueue *WriteQueue +} + +type kv struct { + Key []byte + Val []byte +} + +// NewWriter - constructor +func NewWriter(conn *Connection) *Writer { + var w = &Writer{ + Conn: conn, + NodeQueue: &WriteQueue{ + Conn: conn, + WaitGroup: &sync.WaitGroup{}, + Chan: make(chan kv, batchSize*10), + }, + WayQueue: &WriteQueue{ + Conn: conn, + WaitGroup: &sync.WaitGroup{}, + Chan: make(chan kv, batchSize*10), + }, + } + + w.NodeQueue.Start() + w.WayQueue.Start() + + return w +} + +// EnqueueNode - enqueue node bytes to be saved to db +func (w *Writer) EnqueueNode(item *gosmparse.Node) { + + // encode id + key := make([]byte, 8) + binary.BigEndian.PutUint64(key, uint64(item.ID)) + + // encode lat + lat := make([]byte, 8) + binary.BigEndian.PutUint64(lat, math.Float64bits(item.Lat)) + + // encode lon + lon := make([]byte, 8) + binary.BigEndian.PutUint64(lon, math.Float64bits(item.Lon)) + + // value + value := append(lat, lon...) + + w.NodeQueue.Chan <- kv{Key: key, Val: value} +} + +// EnqueueWay - enqueue way bytes to be saved to db +func (w *Writer) EnqueueWay(item *gosmparse.Way) { + + // encode id + idBytes := make([]byte, 8) + binary.BigEndian.PutUint64(idBytes, uint64(item.ID)) + + // prefix way keys with 'W' to avoid id collisions + key := append([]byte{'W'}, idBytes...) + + // encoded path + var value []byte + + // iterate over node refs, appending each int64 id to the value + for _, ref := range item.NodeIDs { + + // encode id + // @todo: use varint encoding to save bytes + refBytes := make([]byte, 8) + binary.BigEndian.PutUint64(refBytes, uint64(ref)) + + // append to slice + value = append(value, refBytes...) + } + + if item.ID == 341691675 { + log.Println("341691675", "queue") + } + + w.WayQueue.Chan <- kv{Key: key, Val: value} +} + +// Close - close the channel and block until done +func (w *Writer) Close() { + w.NodeQueue.Close() + w.WayQueue.Close() +} diff --git a/proxy/store_noderefs.go b/proxy/store_noderefs.go index 4e9ec92..58342b3 100644 --- a/proxy/store_noderefs.go +++ b/proxy/store_noderefs.go @@ -8,18 +8,17 @@ import ( // StoreRefs - filter only elements that appear in masks type StoreRefs struct { - Handler gosmparse.OSMReader - CoordWriter *leveldb.CoordWriter - PathWriter *leveldb.PathWriter - Masks *lib.BitmaskMap + Handler gosmparse.OSMReader + Writer *leveldb.Writer + Masks *lib.BitmaskMap } // ReadNode - called once per node func (s *StoreRefs) ReadNode(item gosmparse.Node) { if nil != s.Masks.WayRefs && s.Masks.WayRefs.Has(item.ID) { - s.CoordWriter.Enqueue(&item) // write to db + s.Writer.EnqueueNode(&item) // write to db } else if nil != s.Masks.RelNodes && s.Masks.RelNodes.Has(item.ID) { - s.CoordWriter.Enqueue(&item) // write to db + s.Writer.EnqueueNode(&item) // write to db } if nil != s.Masks.Nodes && s.Masks.Nodes.Has(item.ID) { s.Handler.ReadNode(item) @@ -29,7 +28,7 @@ func (s *StoreRefs) ReadNode(item gosmparse.Node) { // ReadWay - called once per way func (s *StoreRefs) ReadWay(item gosmparse.Way) { if nil != s.Masks.RelWays && s.Masks.RelWays.Has(item.ID) { - s.PathWriter.Enqueue(&item) // write to db + s.Writer.EnqueueWay(&item) // write to db } if nil != s.Masks.Ways && s.Masks.Ways.Has(item.ID) { s.Handler.ReadWay(item) From 6500797cde1fe250c44feb5a4bcf970101b05592 Mon Sep 17 00:00:00 2001 From: missinglink Date: Thu, 13 Jul 2017 14:53:35 +0200 Subject: [PATCH 04/15] formatting: remove debug statements --- leveldb/path.go | 5 ----- leveldb/writer.go | 4 ---- 2 files changed, 9 deletions(-) diff --git a/leveldb/path.go b/leveldb/path.go index d1d6933..631fa8b 100644 --- a/leveldb/path.go +++ b/leveldb/path.go @@ -2,7 +2,6 @@ package leveldb import ( "encoding/binary" - "log" "github.com/missinglink/gosmparse" ) @@ -51,10 +50,6 @@ func (c *Connection) ReadPath(id int64) (*gosmparse.Way, error) { // prefix way keys with 'W' to avoid id collisions key := append([]byte{'W'}, idBytes...) - if id == 341691675 { - log.Println("341691675", "load") - } - // read from db data, err := c.DB.Get(key, nil) diff --git a/leveldb/writer.go b/leveldb/writer.go index f4c8878..5a299c4 100644 --- a/leveldb/writer.go +++ b/leveldb/writer.go @@ -141,10 +141,6 @@ func (w *Writer) EnqueueWay(item *gosmparse.Way) { value = append(value, refBytes...) } - if item.ID == 341691675 { - log.Println("341691675", "queue") - } - w.WayQueue.Chan <- kv{Key: key, Val: value} } From cb28914ee85d689da8e743f93e6371cb2e70d54e Mon Sep 17 00:00:00 2001 From: missinglink Date: Thu, 13 Jul 2017 14:56:03 +0200 Subject: [PATCH 05/15] oops: restore deleted code --- handler/denormalized_json.go | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/handler/denormalized_json.go b/handler/denormalized_json.go index 6739ce2..ec1d7c4 100644 --- a/handler/denormalized_json.go +++ b/handler/denormalized_json.go @@ -66,6 +66,27 @@ func (d *DenormalizedJSON) ReadWay(item gosmparse.Way) { Tags: item.Tags, } + // compute line/street centroid + if d.ComputeCentroid { + var lon, lat = lib.WayCentroid(refs) + obj.Centroid = json.NewLatLon(lat, lon) + } + + // compute geohash + if d.ComputeGeohash { + obj.Hash = geohash.Encode(obj.Centroid.Lat, obj.Centroid.Lon) + } + + // convert refs to latlons + if d.ExportLatLons { + for _, node := range refs { + obj.LatLons = append(obj.LatLons, &json.LatLon{ + Lat: node.Lat, + Lon: node.Lon, + }) + } + } + // write d.Writer.Queue <- obj.Bytes() } From 088deb1f3fd6f2e8a7105b89ead237446c7f09c9 Mon Sep 17 00:00:00 2001 From: missinglink Date: Thu, 13 Jul 2017 14:57:37 +0200 Subject: [PATCH 06/15] relatons: support ComputeCentroid and ComputeGeohash flags --- handler/denormalized_json.go | 117 +++++++++++++++++++---------------- 1 file changed, 63 insertions(+), 54 deletions(-) diff --git a/handler/denormalized_json.go b/handler/denormalized_json.go index ec1d7c4..3ec731f 100644 --- a/handler/denormalized_json.go +++ b/handler/denormalized_json.go @@ -106,80 +106,89 @@ func (d *DenormalizedJSON) ReadRelation(item gosmparse.Relation) { Tags: item.Tags, } - // iterate members once to try to classify the relation - var adminCentreID int64 - var wayIDs []int64 - - for _, member := range item.Members { - switch member.Type { - case gosmparse.NodeType: - // only target the 'admin_centre' node - if member.Role == "admin_centre" { - - // store the ID of the admin centre node - adminCentreID = member.ID - } - case gosmparse.WayType: - // skip cyclic references to parent - if member.Role != "subarea" { - - // append way ID to list of member ways - wayIDs = append(wayIDs, member.ID) - } - } - } + // compute polygon centroid + if d.ComputeCentroid { - // this is the simplest relation to build, we simply need to load the - // admin centre coord and use that as the centroid - if 0 != adminCentreID { + // iterate members once to try to classify the relation + var adminCentreID int64 + var wayIDs []int64 - var node, readError = d.Conn.ReadCoord(adminCentreID) - if nil != readError { - // skip relation if the point is not found in the db - log.Printf("skipping relation %d. failed to load admin centre %d\n", item.ID, adminCentreID) - return - } + for _, member := range item.Members { + switch member.Type { + case gosmparse.NodeType: + // only target the 'admin_centre' node + if member.Role == "admin_centre" { - // set the centroid - obj.Centroid = json.NewLatLon(node.Lat, node.Lon) + // store the ID of the admin centre node + adminCentreID = member.ID + } + case gosmparse.WayType: + // skip cyclic references to parent + if member.Role != "subarea" { - } else { - // this is more complex, we need to load all the multipolygon rings - // from the DB and assemble the geometry before calculating the centroid + // append way ID to list of member ways + wayIDs = append(wayIDs, member.ID) + } + } + } - // load ring data from database - var ways []*json.DenormalizedWay - for _, wayID := range wayIDs { + // this is the simplest relation to build, we simply need to load the + // admin centre coord and use that as the centroid + if 0 != adminCentreID { - // load way from DB - var way, readError = d.Conn.ReadPath(wayID) + var node, readError = d.Conn.ReadCoord(adminCentreID) if nil != readError { - // skip ways which fail to denormalize - log.Printf("skipping relation %d. failed to load way %d\n", item.ID, wayID) + // skip relation if the point is not found in the db + log.Printf("skipping relation %d. failed to load admin centre %d\n", item.ID, adminCentreID) return } - // use a struct which allows us to store the refs within - var denormalizedWay = json.DenormalizedWayFromParser(*way) + // set the centroid + obj.Centroid = json.NewLatLon(node.Lat, node.Lon) + + } else { + // this is more complex, we need to load all the multipolygon rings + // from the DB and assemble the geometry before calculating the centroid + + // load ring data from database + var ways []*json.DenormalizedWay + for _, wayID := range wayIDs { - // load way refs from DB - for _, ref := range way.NodeIDs { - var node, readError = d.Conn.ReadCoord(ref) + // load way from DB + var way, readError = d.Conn.ReadPath(wayID) if nil != readError { // skip ways which fail to denormalize - log.Printf("skipping relation way %d. failed to load ref %d\n", item.ID, ref) + log.Printf("skipping relation %d. failed to load way %d\n", item.ID, wayID) return } - // append way vertex - denormalizedWay.LatLons = append(denormalizedWay.LatLons, json.NewLatLon(node.Lat, node.Lon)) + // use a struct which allows us to store the refs within + var denormalizedWay = json.DenormalizedWayFromParser(*way) + + // load way refs from DB + for _, ref := range way.NodeIDs { + var node, readError = d.Conn.ReadCoord(ref) + if nil != readError { + // skip ways which fail to denormalize + log.Printf("skipping relation way %d. failed to load ref %d\n", item.ID, ref) + return + } + + // append way vertex + denormalizedWay.LatLons = append(denormalizedWay.LatLons, json.NewLatLon(node.Lat, node.Lon)) + } + + // store way + ways = append(ways, denormalizedWay) } - // store way - ways = append(ways, denormalizedWay) + log.Println("write relation", item.ID) } + } - log.Println("write relation", item.ID) + // compute geohash + if d.ComputeGeohash { + obj.Hash = geohash.Encode(obj.Centroid.Lat, obj.Centroid.Lon) } d.Writer.Queue <- obj.Bytes() From 4c5b3b190793332e248f25755d90942cc4c4b29f Mon Sep 17 00:00:00 2001 From: missinglink Date: Thu, 13 Jul 2017 15:02:36 +0200 Subject: [PATCH 07/15] relations: use map instead of slice to store members --- handler/denormalized_json.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/handler/denormalized_json.go b/handler/denormalized_json.go index 3ec731f..c1e1002 100644 --- a/handler/denormalized_json.go +++ b/handler/denormalized_json.go @@ -151,7 +151,7 @@ func (d *DenormalizedJSON) ReadRelation(item gosmparse.Relation) { // from the DB and assemble the geometry before calculating the centroid // load ring data from database - var ways []*json.DenormalizedWay + var ways = make(map[int64]*json.DenormalizedWay) for _, wayID := range wayIDs { // load way from DB @@ -179,7 +179,7 @@ func (d *DenormalizedJSON) ReadRelation(item gosmparse.Relation) { } // store way - ways = append(ways, denormalizedWay) + ways[item.ID] = denormalizedWay } log.Println("write relation", item.ID) From dfbc88404befc700bf6b4ac630839814160a63c3 Mon Sep 17 00:00:00 2001 From: missinglink Date: Fri, 14 Jul 2017 06:29:24 +0200 Subject: [PATCH 08/15] relations: assemble geometries and compute centroids --- command/json_flat.go | 8 +++++++ handler/denormalized_json.go | 46 +++++++++++++++++++++++++++--------- 2 files changed, 43 insertions(+), 11 deletions(-) diff --git a/command/json_flat.go b/command/json_flat.go index 155cc69..b3c0daf 100644 --- a/command/json_flat.go +++ b/command/json_flat.go @@ -9,6 +9,7 @@ import ( "github.com/missinglink/pbf/lib" "github.com/missinglink/pbf/parser" "github.com/missinglink/pbf/proxy" + "github.com/missinglink/pbf/spatialite" "github.com/codegangsta/cli" ) @@ -50,6 +51,7 @@ func JSONFlat(c *cli.Context) error { var handle = &handler.DenormalizedJSON{ Conn: conn, Writer: lib.NewBufferedWriter(), + Spatialite: &spatialite.Connection{}, ComputeCentroid: c.BoolT("centroid"), ComputeGeohash: c.Bool("geohash"), ExportLatLons: c.Bool("vertices"), @@ -58,6 +60,12 @@ func JSONFlat(c *cli.Context) error { // close the writer routine and flush defer handle.Writer.Close() + // open the spatialite connection + handle.Spatialite.Open(":memory:") + + // close the spatialite connection + defer handle.Spatialite.Close() + // create db writer routine writer := leveldb.NewWriter(conn) diff --git a/handler/denormalized_json.go b/handler/denormalized_json.go index c1e1002..8ab59be 100644 --- a/handler/denormalized_json.go +++ b/handler/denormalized_json.go @@ -1,12 +1,15 @@ package handler import ( + "fmt" "log" + "strings" "github.com/missinglink/gosmparse" "github.com/missinglink/pbf/json" "github.com/missinglink/pbf/leveldb" "github.com/missinglink/pbf/lib" + "github.com/missinglink/pbf/spatialite" "github.com/missinglink/pbf/tags" "github.com/mmcloughlin/geohash" ) @@ -15,6 +18,7 @@ import ( type DenormalizedJSON struct { Writer *lib.BufferedWriter Conn *leveldb.Connection + Spatialite *spatialite.Connection ComputeCentroid bool ComputeGeohash bool ExportLatLons bool @@ -150,8 +154,8 @@ func (d *DenormalizedJSON) ReadRelation(item gosmparse.Relation) { // this is more complex, we need to load all the multipolygon rings // from the DB and assemble the geometry before calculating the centroid - // load ring data from database - var ways = make(map[int64]*json.DenormalizedWay) + // generate WKT strings as input for 'GeomFromText' + var lineStrings []string for _, wayID := range wayIDs { // load way from DB @@ -162,10 +166,8 @@ func (d *DenormalizedJSON) ReadRelation(item gosmparse.Relation) { return } - // use a struct which allows us to store the refs within - var denormalizedWay = json.DenormalizedWayFromParser(*way) - - // load way refs from DB + // load vertices from DB + var vertices []string for _, ref := range way.NodeIDs { var node, readError = d.Conn.ReadCoord(ref) if nil != readError { @@ -174,15 +176,37 @@ func (d *DenormalizedJSON) ReadRelation(item gosmparse.Relation) { return } - // append way vertex - denormalizedWay.LatLons = append(denormalizedWay.LatLons, json.NewLatLon(node.Lat, node.Lon)) + vertices = append(vertices, fmt.Sprintf("%f %f", node.Lon, node.Lat)) } - // store way - ways[item.ID] = denormalizedWay + lineStrings = append(lineStrings, fmt.Sprintf("(%s)", strings.Join(vertices, ","))) + } + + // build SQL query + var query = `SELECT COALESCE( AsText( PointOnSurface( BuildArea( GeomFromText('MULTILINESTRING(` + query += strings.Join(lineStrings, ",") + query += `)')))),'');` + + // query database for result + var res string + var err = d.Spatialite.DB.QueryRow(query).Scan(&res) + if err != nil { + log.Printf("spatialite: failed to assemble relation: %d", item.ID) + return + } + + // extract lat/lon values from WKT + var lon, lat float64 + n, _ := fmt.Sscanf(res, "POINT(%f %f)", &lon, &lat) + + // ensure we got 2 floats + if 2 != n { + log.Printf("spatialite: failed to compute centroid for relation: %d", item.ID) + return } - log.Println("write relation", item.ID) + // set the centroid + obj.Centroid = json.NewLatLon(lat, lon) } } From 95c3513557385cb88c6381cfd685322420d7b58e Mon Sep 17 00:00:00 2001 From: missinglink Date: Fri, 14 Jul 2017 06:36:54 +0200 Subject: [PATCH 09/15] relations: support two types of node centroid role: label & admin_centre --- handler/bitmask_custom.go | 15 +++++++++------ handler/denormalized_json.go | 16 ++++++++-------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/handler/bitmask_custom.go b/handler/bitmask_custom.go index 9b521a2..68f9953 100644 --- a/handler/bitmask_custom.go +++ b/handler/bitmask_custom.go @@ -55,7 +55,7 @@ func (b *BitmaskCustom) ReadRelation(item gosmparse.Relation) { // detect relation class var isSuperRelation = false - var hasAdminCentre = false + var hasNodeCentroid = false // iterate members once to try to classify the relation for _, member := range item.Members { @@ -63,8 +63,11 @@ func (b *BitmaskCustom) ReadRelation(item gosmparse.Relation) { case gosmparse.RelationType: isSuperRelation = true case gosmparse.NodeType: - if member.Role == "admin_centre" { - hasAdminCentre = true + switch member.Role { + case "label": + hasNodeCentroid = true + case "admin_centre": + hasNodeCentroid = true } } } @@ -81,15 +84,15 @@ func (b *BitmaskCustom) ReadRelation(item gosmparse.Relation) { switch member.Type { case gosmparse.NodeType: - // only store nodes if they are for 'admin_centre' - if member.Role == "admin_centre" { + // only store nodes if they are for 'label' or 'admin_centre' + if member.Role == "label" || member.Role == "admin_centre" { b.Masks.RelNodes.Insert(member.ID) } case gosmparse.WayType: // only store ways if we don't have the admin_centre - if !hasAdminCentre { + if !hasNodeCentroid { b.Masks.RelWays.Insert(member.ID) } } diff --git a/handler/denormalized_json.go b/handler/denormalized_json.go index 8ab59be..83e4e7f 100644 --- a/handler/denormalized_json.go +++ b/handler/denormalized_json.go @@ -114,17 +114,17 @@ func (d *DenormalizedJSON) ReadRelation(item gosmparse.Relation) { if d.ComputeCentroid { // iterate members once to try to classify the relation - var adminCentreID int64 + var nodeCentroidID int64 var wayIDs []int64 for _, member := range item.Members { switch member.Type { case gosmparse.NodeType: - // only target the 'admin_centre' node - if member.Role == "admin_centre" { + // only target the 'label' or 'admin_centre' nodes + if member.Role == "label" || member.Role == "admin_centre" { - // store the ID of the admin centre node - adminCentreID = member.ID + // store the ID of the node which contains the centroid info + nodeCentroidID = member.ID } case gosmparse.WayType: // skip cyclic references to parent @@ -138,12 +138,12 @@ func (d *DenormalizedJSON) ReadRelation(item gosmparse.Relation) { // this is the simplest relation to build, we simply need to load the // admin centre coord and use that as the centroid - if 0 != adminCentreID { + if 0 != nodeCentroidID { - var node, readError = d.Conn.ReadCoord(adminCentreID) + var node, readError = d.Conn.ReadCoord(nodeCentroidID) if nil != readError { // skip relation if the point is not found in the db - log.Printf("skipping relation %d. failed to load admin centre %d\n", item.ID, adminCentreID) + log.Printf("skipping relation %d. failed to load admin centre %d\n", item.ID, nodeCentroidID) return } From 1ccfd157bcf28a295204220374df72c320ca0270 Mon Sep 17 00:00:00 2001 From: missinglink Date: Fri, 14 Jul 2017 06:38:19 +0200 Subject: [PATCH 10/15] relations: support two types of relations: multipolygon & boundary --- handler/bitmask_custom.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/handler/bitmask_custom.go b/handler/bitmask_custom.go index 68f9953..937fe66 100644 --- a/handler/bitmask_custom.go +++ b/handler/bitmask_custom.go @@ -51,7 +51,7 @@ func (b *BitmaskCustom) ReadRelation(item gosmparse.Relation) { // we currently only support the 'multipolygon' type // see: http://wiki.openstreetmap.org/wiki/Types_of_relation - if val, ok := item.Tags["type"]; ok && val == "multipolygon" { + if val, ok := item.Tags["type"]; ok && (val == "multipolygon" || val == "boundary") { // detect relation class var isSuperRelation = false From 46a5bfc8872af887c99bf33e0c956514df77932f Mon Sep 17 00:00:00 2001 From: missinglink Date: Fri, 14 Jul 2017 08:53:18 +0200 Subject: [PATCH 11/15] formatting: code comment --- command/bitmask_custom.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/command/bitmask_custom.go b/command/bitmask_custom.go index 44c85d0..cc61f5f 100644 --- a/command/bitmask_custom.go +++ b/command/bitmask_custom.go @@ -11,6 +11,11 @@ import ( "github.com/codegangsta/cli" ) +// @todo: depending on which elements are defined in the config, we can +// skip over large chunks of the file. eg: if the config only contains +// relation patterns then it will be much faster if we skip over the +// nodes and ways on the first pass + // BitmaskCustom cli command func BitmaskCustom(c *cli.Context) error { From 1d92a6a65b62bed7190590d0ec7e548a0d318ad3 Mon Sep 17 00:00:00 2001 From: missinglink Date: Fri, 14 Jul 2017 09:27:22 +0200 Subject: [PATCH 12/15] formatting: code comments --- handler/bitmask_custom.go | 4 ++-- handler/denormalized_json.go | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/handler/bitmask_custom.go b/handler/bitmask_custom.go index 937fe66..3f3e768 100644 --- a/handler/bitmask_custom.go +++ b/handler/bitmask_custom.go @@ -49,7 +49,7 @@ func (b *BitmaskCustom) ReadWay(item gosmparse.Way) { func (b *BitmaskCustom) ReadRelation(item gosmparse.Relation) { if b.Features.MatchRelation(item) { - // we currently only support the 'multipolygon' type + // we currently only support the 'multipolygon' and 'boundary' types // see: http://wiki.openstreetmap.org/wiki/Types_of_relation if val, ok := item.Tags["type"]; ok && (val == "multipolygon" || val == "boundary") { @@ -91,7 +91,7 @@ func (b *BitmaskCustom) ReadRelation(item gosmparse.Relation) { case gosmparse.WayType: - // only store ways if we don't have the admin_centre + // only store ways if we don't have a node centroid if !hasNodeCentroid { b.Masks.RelWays.Insert(member.ID) } diff --git a/handler/denormalized_json.go b/handler/denormalized_json.go index 83e4e7f..900a754 100644 --- a/handler/denormalized_json.go +++ b/handler/denormalized_json.go @@ -137,7 +137,7 @@ func (d *DenormalizedJSON) ReadRelation(item gosmparse.Relation) { } // this is the simplest relation to build, we simply need to load the - // admin centre coord and use that as the centroid + // 'label' or 'admin_centre' node its lat/lon as the relation centroid if 0 != nodeCentroidID { var node, readError = d.Conn.ReadCoord(nodeCentroidID) @@ -151,7 +151,7 @@ func (d *DenormalizedJSON) ReadRelation(item gosmparse.Relation) { obj.Centroid = json.NewLatLon(node.Lat, node.Lon) } else { - // this is more complex, we need to load all the multipolygon rings + // this is more complex, we need to load all the multipolygon linestrings // from the DB and assemble the geometry before calculating the centroid // generate WKT strings as input for 'GeomFromText' From a2cc2284e295abf4d7760f726b6608e604d03f2c Mon Sep 17 00:00:00 2001 From: missinglink Date: Fri, 14 Jul 2017 09:37:31 +0200 Subject: [PATCH 13/15] relations: be more selective about which roles to target as linestrings --- handler/bitmask_custom.go | 7 ++++++- handler/denormalized_json.go | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/handler/bitmask_custom.go b/handler/bitmask_custom.go index 3f3e768..7841ef3 100644 --- a/handler/bitmask_custom.go +++ b/handler/bitmask_custom.go @@ -93,7 +93,12 @@ func (b *BitmaskCustom) ReadRelation(item gosmparse.Relation) { // only store ways if we don't have a node centroid if !hasNodeCentroid { - b.Masks.RelWays.Insert(member.ID) + + // skip cyclic references to parent (subarea) and other junk roles + if member.Role == "outer" || member.Role == "inner" || member.Role == "" { + + b.Masks.RelWays.Insert(member.ID) + } } } } diff --git a/handler/denormalized_json.go b/handler/denormalized_json.go index 900a754..9f08ff3 100644 --- a/handler/denormalized_json.go +++ b/handler/denormalized_json.go @@ -127,8 +127,8 @@ func (d *DenormalizedJSON) ReadRelation(item gosmparse.Relation) { nodeCentroidID = member.ID } case gosmparse.WayType: - // skip cyclic references to parent - if member.Role != "subarea" { + // skip cyclic references to parent (subarea) and other junk roles + if member.Role == "outer" || member.Role == "inner" || member.Role == "" { // append way ID to list of member ways wayIDs = append(wayIDs, member.ID) From c837d9cecc6d1bc976eb750a23ec1243ef4891f8 Mon Sep 17 00:00:00 2001 From: missinglink Date: Mon, 17 Jul 2017 12:54:10 +0200 Subject: [PATCH 14/15] relations: query debugging --- handler/denormalized_json.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/handler/denormalized_json.go b/handler/denormalized_json.go index 9f08ff3..40e514a 100644 --- a/handler/denormalized_json.go +++ b/handler/denormalized_json.go @@ -192,6 +192,19 @@ func (d *DenormalizedJSON) ReadRelation(item gosmparse.Relation) { var err = d.Spatialite.DB.QueryRow(query).Scan(&res) if err != nil { log.Printf("spatialite: failed to assemble relation: %d", item.ID) + log.Print(err) + + // // spatialite / GEOS debugging + // log.Printf("query: %s", query) + // + // var errGeos, errAus, errGeom string + // d.Spatialite.DB.QueryRow("SELECT COALESCE(GEOS_GetLastErrorMsg(),'')").Scan(&errGeos) + // d.Spatialite.DB.QueryRow("SELECT COALESCE(GEOS_GetLastAuxErrorMsg(),'')").Scan(&errAus) + // d.Spatialite.DB.QueryRow("SELECT COALESCE(LWGEOM_GetLastErrorMsg(),'')").Scan(&errGeom) + // log.Printf("GEOS_GetLastErrorMsg: %s", errGeos) + // log.Printf("GEOS_GetLastAuxErrorMsg: %s", errAus) + // log.Printf("LWGEOM_GetLastErrorMsg: %s", errGeom) + return } From e7d2ccd833c867d3a40f1a9e1a9f4cf5183f3cd1 Mon Sep 17 00:00:00 2001 From: missinglink Date: Mon, 17 Jul 2017 13:11:04 +0200 Subject: [PATCH 15/15] docker: dockerize image generation and execution via container --- .dockerignore | 8 ++++++++ Dockerfile | 36 ++++++++++++++++++++++++++++++++++++ readme.md | 18 ++++++++++++++++++ 3 files changed, 62 insertions(+) create mode 100644 .dockerignore create mode 100644 Dockerfile diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..baed050 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,8 @@ +.git +example +npm +LICENSE +*.md +*.pbf +*.mask +*.js diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..695bf98 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,36 @@ +# base image +FROM ubuntu:latest + +# configure env +ENV DEBIAN_FRONTEND 'noninteractive' + +# update apt, install core apt dependencies and delete the apt-cache +# note: this is done in one command in order to keep down the size of intermediate containers +RUN apt update && \ + apt install -y locales git-core sqlite3 libsqlite3-mod-spatialite golang && \ + rm -rf /var/lib/apt/lists/* + +# configure locale +RUN locale-gen 'en_US.UTF-8' +ENV LANG 'en_US.UTF-8' +ENV LANGUAGE 'en_US:en' +ENV LC_ALL 'en_US.UTF-8' + +# configure git +RUN git config --global 'user.email' 'null@null.com' +RUN git config --global 'user.name' 'Missinglink PBF' + +# set GOPATH +ENV GOPATH='/tmp/go' + +# change working dir +WORKDIR "$GOPATH/src/github.com/missinglink/pbf" + +# copy files +COPY . "$GOPATH/src/github.com/missinglink/pbf" + +# fetch dependencies +RUN go get + +# build binary +RUN go build diff --git a/readme.md b/readme.md index 5e6984a..7475d6e 100644 --- a/readme.md +++ b/readme.md @@ -79,6 +79,24 @@ OPTIONS: $ go test $(go list ./... | grep -v /vendor/) ``` +### docker + +#### build the docker image + +> this image is currently ~700MB, I would be happy to receive a PR which reduced the on-disk image size. + +```bash +$ docker build -t missinglink/pbf . +``` + +#### run a container + +there are many options for `docker run` which are out-of-scope for this document. if you are new to docker, have a read over the docs for the options specified below and also the `-v` flag which specifies how you can share directories/files between the container and your host OS. + +```bash +$ docker run --rm -it missinglink/pbf ./pbf +``` + ### issues / bugs please open a github issue / open a pull request.