diff --git a/CHANGELOG.md b/CHANGELOG.md index fcf8ab03..c26353d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Adds `riverbank`, `stream_end`, `dam`, `weir`, `waterfall`, and `pressurised` to the list of waterway features - Populates `nds` and `members` for deleted elements from the previous version +- Attempt to fix invalid way geometries by buffering them (this will result in + more way geometries for certain datasets) ### Fixed diff --git a/src/main/scala/vectorpipe/internal/package.scala b/src/main/scala/vectorpipe/internal/package.scala index a10335e9..f06c9172 100644 --- a/src/main/scala/vectorpipe/internal/package.scala +++ b/src/main/scala/vectorpipe/internal/package.scala @@ -4,6 +4,7 @@ import java.sql.Timestamp import geotrellis.vector._ import org.apache.log4j.Logger +import org.apache.spark.internal.Logging import org.apache.spark.sql._ import org.apache.spark.sql.expressions.Window import org.apache.spark.sql.functions._ @@ -14,14 +15,12 @@ import vectorpipe.functions.asDouble import vectorpipe.functions.osm._ import vectorpipe.relations.{MultiPolygons, Routes} -package object internal { +package object internal extends Logging { val NodeType: Byte = 1 val WayType: Byte = 2 val RelationType: Byte = 3 val MultiPolygonRoles: Seq[String] = Set("", "outer", "inner").toSeq - @transient lazy val logger: Logger = Logger.getLogger(getClass) - /** * Pre-process nodes. * @@ -293,6 +292,16 @@ package object internal { } val geometry = geom match { case Some(g) if g.isValid => g + case Some(g) if !g.isEmpty => + val buffered = g.buffer(0) + + if (buffered.isValid) { + logWarning(s"Invalid way geometry, fixed by buffering: $id@$version ($updated): $g)") + buffered + } else { + logWarning(s"Invalid way geometry: $id@$version ($updated): $g)") + null + } case _ => null } (changeset, id, version, updated, geometry)