geotrellis · jpolchlo · Jun 7, 2019 · Jun 7, 2019 · Jun 7, 2019 · Jun 10, 2019
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Better handling of falsy boolean values in tag UDFs
 - Adds `riverbank`, `stream_end`, `dam`, `weir`, `waterfall`, and `pressurised`
   to the list of waterway features
+- Reorganized repo to put project code into `core` subproject; introduced `examples` subproject
 
 ### Fixed
 
@@ -35,4 +36,3 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Mark all logger vals and some UDF vals as @transient lazy to avoid Spark serialization issues
 - Properly strip leading and trailing slashes from S3 URIs when exporting vector tiles
-
diff --git a/build.sbt b/build.sbt
@@ -147,11 +147,22 @@ val vpExtraSettings = Seq(
 // micrositeBaseUrl := "/vectorpipe"
 // micrositeDocumentationUrl := "/vectorpipe/latest/api/#vectorpipe.package" /* Location of Scaladocs */
 
+lazy val root = project
+  .in(file("."))
+  .aggregate(vectorpipe, examples)
+  .settings(commonSettings, vpExtraSettings)
+
 /* Main project */
 lazy val vectorpipe = project
-  .in(file("."))
+  .in(file("core"))
   .settings(commonSettings, vpExtraSettings, release)
 
+/* Example projects */
+lazy val examples = project
+  .in(file("examples"))
+  .settings(commonSettings, vpExtraSettings)
+  .dependsOn(vectorpipe)
+
 /* Benchmarking suite.
  * Benchmarks can be executed by first switching to the `bench` project and then by running:
       jmh:run -t 1 -f 1 -wi 5 -i 5 .*Bench.*
@@ -162,7 +173,7 @@ lazy val bench = project
   .dependsOn(vectorpipe)
   .enablePlugins(JmhPlugin)
 
-
+onLoad in Global ~= (_ andThen ("project vectorpipe" :: _))
 
 
 // assemblyShadeRules in assembly := {

diff --git a/...ache.spark.sql.sources.DataSourceRegister → ...ache.spark.sql.sources.DataSourceRegister b/...ache.spark.sql.sources.DataSourceRegister → ...ache.spark.sql.sources.DataSourceRegister
diff --git a/src/main/resources/microsite/data/menu.yml → ...rc/main/resources/microsite/data/menu.yml b/src/main/resources/microsite/data/menu.yml → ...rc/main/resources/microsite/data/menu.yml
diff --git a/src/main/scala/vectorpipe/OSM.scala → core/src/main/scala/vectorpipe/OSM.scala b/src/main/scala/vectorpipe/OSM.scala → core/src/main/scala/vectorpipe/OSM.scala
diff --git a/src/main/scala/vectorpipe/VectorPipe.scala → ...rc/main/scala/vectorpipe/VectorPipe.scala b/src/main/scala/vectorpipe/VectorPipe.scala → ...rc/main/scala/vectorpipe/VectorPipe.scala
@@ -15,6 +15,8 @@ import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.StringType
 import org.locationtech.jts.{geom => jts}
 
+import scala.reflect.ClassTag
+
 object VectorPipe {
 
   /** Vectortile conversion options.
@@ -46,7 +48,10 @@ object VectorPipe {
     def forAllZoomsWithSrcProjection(zoom: Int, crs: CRS) = Options(zoom, Some(0), crs, None)
   }
 
-  def apply(input: DataFrame, pipeline: vectortile.Pipeline, options: Options): Unit = {
+  def apply[T: ClassTag](input: DataFrame, pipeline: vectortile.Pipeline, options: Options): Unit = {
+    import input.sparkSession.implicits._
+    import vectorpipe.encoders._
+
     val geomColumn = pipeline.geometryColumn
     assert(input.columns.contains(geomColumn) &&
            input.schema(geomColumn).dataType.isInstanceOf[org.apache.spark.sql.jts.AbstractGeometryUDT[jts.Geometry]],
@@ -74,46 +79,49 @@ object VectorPipe {
         SpatialKey(k.col / 2, k.row / 2) }.toSeq
     }
 
-    def generateVectorTiles[G <: Geometry](df: DataFrame, level: LayoutLevel): RDD[(SpatialKey, VectorTile)] = {
+    def generateVectorTiles[G <: Geometry](df: DataFrame, level: LayoutLevel): Dataset[(SpatialKey, Array[Byte])] = {
       val zoom = level.zoom
-      val clip = udf { (g: jts.Geometry, key: GenericRowWithSchema) =>
-        val k = getSpatialKey(key)
-        pipeline.clip(g, k, level)
-      }
 
-      val selectedGeometry = pipeline
-        .select(df, zoom, keyColumn)
+      val selectedGeometry = pipeline.select match {
+        case None => df
+        case Some(select) => select(df, zoom, keyColumn)
+      }
 
-      val clipped = selectedGeometry
+      val keyed = selectedGeometry
         .withColumn(keyColumn, explode(col(keyColumn)))
-        .repartition(col(keyColumn)) // spread copies of possibly ill-tempered geometries around cluster prior to clipping
-        .withColumn(geomColumn, clip(col(geomColumn), col(keyColumn)))
+
+      val clipped = pipeline.clip match {
+        case None => keyed
+        case Some(clipper) =>
+          val clip = udf { (g: jts.Geometry, key: GenericRowWithSchema) =>
+            val k = getSpatialKey(key)
+            clipper(g, k, level)
+          }
+          val toClip = keyed.repartition(col(keyColumn)) // spread copies of possibly ill-tempered geometries around cluster prior to clipping
+          toClip.withColumn(geomColumn, clip(col(geomColumn), col(keyColumn)))
+      }
 
       pipeline.layerMultiplicity match {
         case SingleLayer(layerName) =>
           clipped
-            .rdd
-            .map { r => (getSpatialKey(r, keyColumn), pipeline.pack(r, zoom)) }
-            .groupByKey
-            .map { case (key, feats) =>
+            .map { r => SingleLayerEntry(getSpatialKey(r, keyColumn), pipeline.pack(r, zoom)) }
+            .groupByKey(_.key)
+            .mapGroups { (key: SpatialKey, sleIter: Iterator[SingleLayerEntry]) =>
                val ex = level.layout.mapTransform.keyToExtent(key)
-               key -> buildVectorTile(feats, layerName, ex, options.tileResolution, options.orderAreas)
+               key -> buildVectorTile(sleIter.map(_.feature).toIterable, layerName, ex, options.tileResolution, options.orderAreas).toBytes
             }
         case LayerNamesInColumn(layerNameCol) =>
           assert(selectedGeometry.schema(layerNameCol).dataType == StringType,
                  s"layerMultiplicity=${pipeline.layerMultiplicity} requires String-type column of name ${layerNameCol}")
+
           clipped
-            .rdd
-            .map { r => (getSpatialKey(r, keyColumn), r.getAs[String](layerNameCol) -> pipeline.pack(r, zoom)) }
-            .groupByKey
-            .mapPartitions{ iter: Iterator[(SpatialKey, Iterable[(String, VectorTileFeature[Geometry])])] =>
-              iter.map{ case (key, groupedFeatures) => {
-                val layerFeatures: Map[String, Iterable[VectorTileFeature[Geometry]]] =
-                  groupedFeatures.groupBy(_._1).mapValues(_.map(_._2))
-                val ex = level.layout.mapTransform.keyToExtent(key)
-                key -> buildVectorTile(layerFeatures, ex, options.tileResolution, options.orderAreas)
-              }}
-          }
+            .map { r => MultipleLayerEntry(getSpatialKey(r, keyColumn), r.getAs[String](layerNameCol), pipeline.pack(r, zoom)) }
+            .groupByKey(_.key)
+            .mapGroups{ (key: SpatialKey, iter: Iterator[MultipleLayerEntry]) =>
+              val ex = level.layout.mapTransform.keyToExtent(key)
+              val layerFeatures = iter.toSeq.groupBy(_.layer).mapValues(_.map(_.feature))
+              key -> buildVectorTile(layerFeatures, ex, options.tileResolution, options.orderAreas).toBytes
+            }
       }
     }
 
@@ -134,16 +142,30 @@ object VectorPipe {
         } else {
           df
         }
-      val simplify = udf { g: jts.Geometry => pipeline.simplify(g, level.layout) }
-      val reduced = pipeline
-        .reduce(working, level, keyColumn)
-      val prepared = reduced
-        .withColumn(geomColumn, simplify(col(geomColumn)))
-      val vts = generateVectorTiles(prepared, level)
+
+      val reduced = pipeline.reduce match {
+        case None => working
+        case Some(reduce) => reduce(working, level, keyColumn)
+      }
+
+      val simplified = pipeline.simplify match {
+        case None => reduced
+        case Some(simplifier) =>
+          val simplify = udf { g: jts.Geometry => simplifier(g, level.layout) }
+          reduced.withColumn(geomColumn, simplify(col(geomColumn)))
+      }
+
+      val vts = generateVectorTiles(simplified, level)
       saveVectorTiles(vts, zoom, pipeline.baseOutputURI)
-      prepared.withColumn(keyColumn, reduceKeys(col(keyColumn)))
+
+      simplified.withColumn(keyColumn, reduceKeys(col(keyColumn)))
     }
 
   }
 
+  private case class SingleLayerEntry(key: SpatialKey, feature: VectorTileFeature[Geometry])
+  private case class MultipleLayerEntry(key: SpatialKey, layer: String, feature: VectorTileFeature[Geometry])
+
+  private implicit def sleEncoder: Encoder[SingleLayerEntry] = Encoders.kryo[SingleLayerEntry]
+  private implicit def mleEncoder: Encoder[MultipleLayerEntry] = Encoders.kryo[MultipleLayerEntry]
 }
diff --git a/core/src/main/scala/vectorpipe/encoders/GTEncoders.scala b/core/src/main/scala/vectorpipe/encoders/GTEncoders.scala
@@ -0,0 +1,22 @@
+package vectorpipe.encoders
+
+import geotrellis.vector._
+import geotrellis.vectortile._
+import org.apache.spark.sql.{Encoder, Encoders}
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+
+object GTEncoders {
+  implicit def gtGeometryEncoder: Encoder[Geometry] = Encoders.kryo[Geometry]
+  implicit def gtPointEncoder: Encoder[Point] = ExpressionEncoder()
+  implicit def gtMultiPointEncoder: Encoder[MultiPoint] = ExpressionEncoder()
+  implicit def gtLineEncoder: Encoder[Line] = ExpressionEncoder()
+  implicit def gtMultiLineEncoder: Encoder[MultiLine] = ExpressionEncoder()
+  implicit def gtPolygonEncoder: Encoder[Polygon] = ExpressionEncoder()
+  implicit def gtMultiPolygonEncoder: Encoder[MultiPolygon] = ExpressionEncoder()
+
+  implicit def gtFeatureEncoder[G <: Geometry, D](implicit ev1: Encoder[G], ev2: Encoder[D]): Encoder[Feature[G, D]] = Encoders.kryo[Feature[G, D]]
+
+  implicit def gtVectorTileEncoder: Encoder[VectorTile] = Encoders.kryo[VectorTile]
+  //implicit def gtLayerEncoder: Encoder[Layer] = Encoders.javaSerialization[Layer]
+  //implicit def gtStrictLayerEncoder: Encoder[StrictLayer] = Encoders.kryo[StrictLayer]
+}
diff --git a/...la/vectorpipe/functions/osm/package.scala → ...la/vectorpipe/functions/osm/package.scala b/...la/vectorpipe/functions/osm/package.scala → ...la/vectorpipe/functions/osm/package.scala
@@ -4,7 +4,9 @@ import org.apache.spark.sql.expressions.UserDefinedFunction
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.{Column, DataFrame, Row}
+import vectorpipe.internal.{NodeType, WayType, RelationType}
 import vectorpipe.model.Member
+import vectorpipe.util._
 
 import scala.util.matching.Regex
 import scala.util.{Failure, Success, Try}
@@ -217,6 +219,19 @@ package object osm {
 
   @transient lazy val compressMemberTypes: UserDefinedFunction = udf(_compressMemberTypes, MemberSchema)
 
+  private val _uncompressMemberTypes = (members: Seq[Row]) =>
+    members.map { row =>
+      val t = row.getAs[Byte]("type") match {
+        case NodeType => "node"
+        case WayType => "way"
+        case RelationType => "relation"
+      }
+      val ref = row.getAs[Long]("ref")
+      val role = row.getAs[String]("role")
+
+      Row(t, ref, role)
+    }
+
   /**
    * Checks if members have byte-encoded types
    */
@@ -278,6 +293,7 @@ package object osm {
 
   def isBuilding(tags: Column): Column =
     !lower(coalesce(tags.getItem("building"), lit("no"))).isin(FalsyValues: _*) as 'isBuilding
+    //!array_contains(splitDelimitedValues(tags.getItem("building")), "no") as 'isBuilding
 
   @transient lazy val isPOI: UserDefinedFunction = udf {
     tags: Map[String, String] => POITags.intersect(tags.keySet).nonEmpty
@@ -292,8 +308,12 @@ package object osm {
   def isWaterway(tags: Column): Column =
     array_intersects(splitDelimitedValues(tags.getItem("waterway")), lit(WaterwayValues.toArray)) as 'isWaterway
 
-  def mergeTags: UserDefinedFunction = udf {
-    (_: Map[String, String]) ++ (_: Map[String, String])
+  def mergeTags: UserDefinedFunction = udf { (a: Map[String, String], b: Map[String, String]) =>
+    mergeMaps(a.mapValues(Set(_)), b.mapValues(Set(_)))(_ ++ _).mapValues(_.mkString(";"))
+  }
+
+  val reduceTags: UserDefinedFunction = udf { tags: Iterable[Map[String, String]] =>
+    tags.map(x => x.mapValues(Set(_))).reduce((a, b) => mergeMaps(a, b)(_ ++ _)).mapValues(_.mkString(";"))
   }
 
   val array_intersects: UserDefinedFunction = udf { (a: Seq[_], b: Seq[_]) =>

diff --git a/.../scala/vectorpipe/functions/package.scala → .../scala/vectorpipe/functions/package.scala b/.../scala/vectorpipe/functions/package.scala → .../scala/vectorpipe/functions/package.scala
@@ -52,4 +52,7 @@ package object functions {
   private val _mergeCounts = (a: Map[String, Int], b: Map[String, Int]) =>
     mergeMaps(Option(a).getOrElse(Map.empty[String, Int]),
               Option(b).getOrElse(Map.empty[String, Int]))(_ + _)
+
+  val array_intersects: UserDefinedFunction = udf { (a: Seq[_], b: Seq[_]) =>
+    a.intersect(b).nonEmpty}
 }
diff --git a/...n/scala/vectorpipe/internal/package.scala → ...n/scala/vectorpipe/internal/package.scala b/...n/scala/vectorpipe/internal/package.scala → ...n/scala/vectorpipe/internal/package.scala
@@ -132,8 +132,8 @@ package object internal {
     } else {
       @transient val idByVersion = Window.partitionBy('id).orderBy('version)
 
-      // when a node has been deleted, it doesn't include any tags; use a window function to retrieve the last tags
-      // present and use those
+      // when a node has been deleted, it doesn't include any tags or nds; use a window function to retrieve the last
+      // tags and nds present and use those
       history
         .where('type === "way")
         .repartition('id)
@@ -142,7 +142,8 @@ package object internal {
           when(!'visible and (lag('tags, 1) over idByVersion).isNotNull,
             lag('tags, 1) over idByVersion)
             .otherwise('tags) as 'tags,
-          $"nds.ref" as 'nds,
+          when(!'visible, lag($"nds.ref", 1) over idByVersion)
+            .otherwise($"nds.ref") as 'nds,
           'changeset,
           'timestamp,
           (lead('timestamp, 1) over idByVersion) as 'validUntil,
@@ -180,17 +181,18 @@ package object internal {
           history.withColumn("members", compressMemberTypes('members))
         }
 
-      // when an element has been deleted, it doesn't include any tags; use a window function to retrieve the last tags
-      // present and use those
-      history
+      // when an element has been deleted, it doesn't include any tags or members; use a window function to retrieve
+      // the last tags and members present and use those
+      frame
         .where('type === "relation")
         .repartition('id)
         .select(
           'id,
           when(!'visible and (lag('tags, 1) over idByUpdated).isNotNull,
             lag('tags, 1) over idByUpdated)
             .otherwise('tags) as 'tags,
-          'members,
+          when(!'visible, lag('members, 1) over idByUpdated)
+            .otherwise('members) as 'members,
           'changeset,
           'timestamp,
           (lead('timestamp, 1) over idByUpdated) as 'validUntil,

diff --git a/...main/scala/vectorpipe/model/Actions.scala → ...main/scala/vectorpipe/model/Actions.scala b/...main/scala/vectorpipe/model/Actions.scala → ...main/scala/vectorpipe/model/Actions.scala
diff --git a/...cala/vectorpipe/model/AugmentedDiff.scala → ...cala/vectorpipe/model/AugmentedDiff.scala b/...cala/vectorpipe/model/AugmentedDiff.scala → ...cala/vectorpipe/model/AugmentedDiff.scala
diff --git a/src/main/scala/vectorpipe/model/Change.scala → .../main/scala/vectorpipe/model/Change.scala b/src/main/scala/vectorpipe/model/Change.scala → .../main/scala/vectorpipe/model/Change.scala
diff --git a/...in/scala/vectorpipe/model/Changeset.scala → ...in/scala/vectorpipe/model/Changeset.scala b/...in/scala/vectorpipe/model/Changeset.scala → ...in/scala/vectorpipe/model/Changeset.scala
diff --git a/...a/vectorpipe/model/ChangesetComment.scala → ...a/vectorpipe/model/ChangesetComment.scala b/...a/vectorpipe/model/ChangesetComment.scala → ...a/vectorpipe/model/ChangesetComment.scala
diff --git a/...ectorpipe/model/ElementWithSequence.scala → ...ectorpipe/model/ElementWithSequence.scala b/...ectorpipe/model/ElementWithSequence.scala → ...ectorpipe/model/ElementWithSequence.scala
diff --git a/src/main/scala/vectorpipe/model/Member.scala → .../main/scala/vectorpipe/model/Member.scala b/src/main/scala/vectorpipe/model/Member.scala → .../main/scala/vectorpipe/model/Member.scala
diff --git a/src/main/scala/vectorpipe/model/Nd.scala → .../src/main/scala/vectorpipe/model/Nd.scala b/src/main/scala/vectorpipe/model/Nd.scala → .../src/main/scala/vectorpipe/model/Nd.scala
diff --git a/.../vectorpipe/relations/MultiPolygons.scala → .../vectorpipe/relations/MultiPolygons.scala b/.../vectorpipe/relations/MultiPolygons.scala → .../vectorpipe/relations/MultiPolygons.scala
diff --git a/...n/scala/vectorpipe/relations/Routes.scala → ...n/scala/vectorpipe/relations/Routes.scala b/...n/scala/vectorpipe/relations/Routes.scala → ...n/scala/vectorpipe/relations/Routes.scala
diff --git a/.../scala/vectorpipe/relations/package.scala → .../scala/vectorpipe/relations/package.scala b/.../scala/vectorpipe/relations/package.scala → .../scala/vectorpipe/relations/package.scala
diff --git a/...ons/utils/PartialCoordinateSequence.scala → ...ons/utils/PartialCoordinateSequence.scala b/...ons/utils/PartialCoordinateSequence.scala → ...ons/utils/PartialCoordinateSequence.scala
diff --git a/...ns/utils/ReversedCoordinateSequence.scala → ...ns/utils/ReversedCoordinateSequence.scala b/...ns/utils/ReversedCoordinateSequence.scala → ...ns/utils/ReversedCoordinateSequence.scala
diff --git a/...ons/utils/VirtualCoordinateSequence.scala → ...ons/utils/VirtualCoordinateSequence.scala b/...ons/utils/VirtualCoordinateSequence.scala → ...ons/utils/VirtualCoordinateSequence.scala
diff --git a/.../vectorpipe/relations/utils/package.scala → .../vectorpipe/relations/utils/package.scala b/.../vectorpipe/relations/utils/package.scala → .../vectorpipe/relations/utils/package.scala
diff --git a/...urces/AugmentedDiffMicroBatchReader.scala → ...urces/AugmentedDiffMicroBatchReader.scala b/...urces/AugmentedDiffMicroBatchReader.scala → ...urces/AugmentedDiffMicroBatchReader.scala
diff --git a/...rpipe/sources/AugmentedDiffProvider.scala → ...rpipe/sources/AugmentedDiffProvider.scala b/...rpipe/sources/AugmentedDiffProvider.scala → ...rpipe/sources/AugmentedDiffProvider.scala
diff --git a/...torpipe/sources/AugmentedDiffReader.scala → ...torpipe/sources/AugmentedDiffReader.scala b/...torpipe/sources/AugmentedDiffReader.scala → ...torpipe/sources/AugmentedDiffReader.scala
diff --git a/...torpipe/sources/AugmentedDiffSource.scala → ...torpipe/sources/AugmentedDiffSource.scala b/...torpipe/sources/AugmentedDiffSource.scala → ...torpipe/sources/AugmentedDiffSource.scala
diff --git a/...pipe/sources/ChangeMicroBatchReader.scala → ...pipe/sources/ChangeMicroBatchReader.scala b/...pipe/sources/ChangeMicroBatchReader.scala → ...pipe/sources/ChangeMicroBatchReader.scala
diff --git a/...a/vectorpipe/sources/ChangeProvider.scala → ...a/vectorpipe/sources/ChangeProvider.scala b/...a/vectorpipe/sources/ChangeProvider.scala → ...a/vectorpipe/sources/ChangeProvider.scala
diff --git a/...ala/vectorpipe/sources/ChangeReader.scala → ...ala/vectorpipe/sources/ChangeReader.scala b/...ala/vectorpipe/sources/ChangeReader.scala → ...ala/vectorpipe/sources/ChangeReader.scala
diff --git a/...ala/vectorpipe/sources/ChangeSource.scala → ...ala/vectorpipe/sources/ChangeSource.scala b/...ala/vectorpipe/sources/ChangeSource.scala → ...ala/vectorpipe/sources/ChangeSource.scala
diff --git a/...e/sources/ChangesetMicroBatchReader.scala → ...e/sources/ChangesetMicroBatchReader.scala b/...e/sources/ChangesetMicroBatchReader.scala → ...e/sources/ChangesetMicroBatchReader.scala
diff --git a/...ectorpipe/sources/ChangesetProvider.scala → ...ectorpipe/sources/ChangesetProvider.scala b/...ectorpipe/sources/ChangesetProvider.scala → ...ectorpipe/sources/ChangesetProvider.scala
diff --git a/.../vectorpipe/sources/ChangesetReader.scala → .../vectorpipe/sources/ChangesetReader.scala b/.../vectorpipe/sources/ChangesetReader.scala → .../vectorpipe/sources/ChangesetReader.scala
diff --git a/.../vectorpipe/sources/ChangesetSource.scala → .../vectorpipe/sources/ChangesetSource.scala b/.../vectorpipe/sources/ChangesetSource.scala → .../vectorpipe/sources/ChangesetSource.scala
diff --git a/...ectorpipe/sources/ReplicationReader.scala → ...ectorpipe/sources/ReplicationReader.scala b/...ectorpipe/sources/ReplicationReader.scala → ...ectorpipe/sources/ReplicationReader.scala
diff --git a/...ources/ReplicationStreamBatchReader.scala → ...ources/ReplicationStreamBatchReader.scala b/...ources/ReplicationStreamBatchReader.scala → ...ources/ReplicationStreamBatchReader.scala
diff --git a/...s/ReplicationStreamMicroBatchReader.scala → ...s/ReplicationStreamMicroBatchReader.scala b/...s/ReplicationStreamMicroBatchReader.scala → ...s/ReplicationStreamMicroBatchReader.scala
diff --git a/...a/vectorpipe/sources/SequenceOffset.scala → ...a/vectorpipe/sources/SequenceOffset.scala b/...a/vectorpipe/sources/SequenceOffset.scala → ...a/vectorpipe/sources/SequenceOffset.scala
diff --git a/...ain/scala/vectorpipe/sources/Source.scala → ...ain/scala/vectorpipe/sources/Source.scala b/...ain/scala/vectorpipe/sources/Source.scala → ...ain/scala/vectorpipe/sources/Source.scala
diff --git a/src/main/scala/vectorpipe/util/Auth.scala → ...src/main/scala/vectorpipe/util/Auth.scala b/src/main/scala/vectorpipe/util/Auth.scala → ...src/main/scala/vectorpipe/util/Auth.scala
diff --git a/src/main/scala/vectorpipe/util/DBUtils.scala → .../main/scala/vectorpipe/util/DBUtils.scala b/src/main/scala/vectorpipe/util/DBUtils.scala → .../main/scala/vectorpipe/util/DBUtils.scala
diff --git a/src/main/scala/vectorpipe/util/Geocode.scala → .../main/scala/vectorpipe/util/Geocode.scala b/src/main/scala/vectorpipe/util/Geocode.scala → .../main/scala/vectorpipe/util/Geocode.scala
diff --git a/...main/scala/vectorpipe/util/Resource.scala → ...main/scala/vectorpipe/util/Resource.scala b/...main/scala/vectorpipe/util/Resource.scala → ...main/scala/vectorpipe/util/Resource.scala
diff --git a/src/main/scala/vectorpipe/util/package.scala → .../main/scala/vectorpipe/util/package.scala b/src/main/scala/vectorpipe/util/package.scala → .../main/scala/vectorpipe/util/package.scala
diff --git a/...cala/vectorpipe/vectortile/Clipping.scala → ...cala/vectorpipe/vectortile/Clipping.scala b/...cala/vectorpipe/vectortile/Clipping.scala → ...cala/vectorpipe/vectortile/Clipping.scala
diff --git a/...cala/vectorpipe/vectortile/Pipeline.scala → ...cala/vectorpipe/vectortile/Pipeline.scala b/...cala/vectorpipe/vectortile/Pipeline.scala → ...cala/vectorpipe/vectortile/Pipeline.scala
@@ -97,7 +97,7 @@ trait Pipeline {
    *                          Array[SpatialKey] giving the list of keys that the
    *                          geometry interacts with
    */
-  def reduce(input: DataFrame, layoutLevel: LayoutLevel, keyColumn: String): DataFrame = input
+  val reduce: Option[(DataFrame, LayoutLevel, String) => DataFrame] = None
 
   /*
    * Lower complexity of geometry while moving to less resolute zoom levels.
@@ -111,7 +111,7 @@ trait Pipeline {
    * is a simplifier using JTS's topology-preserving simplifier available in
    * [[vectorpipe.vectortile.Simplify]].
    */
-  def simplify(g: jts.Geometry, layout: LayoutDefinition): jts.Geometry = g
+  val simplify: Option[(jts.Geometry, LayoutDefinition) => jts.Geometry] = None
 
   /**
    * Select geometries for display at a given zoom level.
@@ -129,7 +129,7 @@ trait Pipeline {
    * String-typed column of the name contained in [[layerName]] to indicate
    * which layer a geometry belongs in.
    */
-  def select(input: DataFrame, targetZoom: Int, keyColumn: String): DataFrame = input
+  val select: Option[(DataFrame, Int, String) => DataFrame] = None
 
   /**
    * Clip geometries prior to writing to vector tiles.
@@ -140,7 +140,7 @@ trait Pipeline {
    *
    * Basic (non-no-op) clipping functions can be found in [[Clipping]].
    */
-  def clip(geom: jts.Geometry, key: SpatialKey, layoutLevel: LayoutLevel): jts.Geometry = geom
+  val clip: Option[(jts.Geometry, SpatialKey, LayoutLevel) => jts.Geometry] = None
 
   /**
    * Convert table rows to output features.

diff --git a/...cala/vectorpipe/vectortile/Simplify.scala → ...cala/vectorpipe/vectortile/Simplify.scala b/...cala/vectorpipe/vectortile/Simplify.scala → ...cala/vectorpipe/vectortile/Simplify.scala