diff --git a/README.md b/README.md index f581a74f..ef094e8c 100644 --- a/README.md +++ b/README.md @@ -60,3 +60,5 @@ openaddr-process-one --skip-preview --layer addresses --layersource state exampl ``` Review https://github.com/openaddresses/openaddresses/blob/master/CONTRIBUTING.md for input json syntax. + +Supported conform formats include `shapefile`, `geojson`, `csv`, `xml`, `gdb`, and `gpkg`. diff --git a/openaddr/conform.py b/openaddr/conform.py index a25739e9..25e123a0 100644 --- a/openaddr/conform.py +++ b/openaddr/conform.py @@ -404,6 +404,28 @@ def find_source_path(data_source, source_paths): return c _L.warning("Source names file %s but could not find it", source_file_name) return None + elif format_string == "gpkg": + candidates = [] + for fn in source_paths: + basename, ext = os.path.splitext(fn) + if ext.lower() == ".gpkg": + candidates.append(fn) + if len(candidates) == 0: + _L.warning("No GPKG found in %s", source_paths) + return None + elif len(candidates) == 1: + _L.debug("Selected %s for source", candidates[0]) + return candidates[0] + else: + if "file" not in conform: + _L.warning("Multiple GPKGs found, but source has no file attribute.") + return None + source_file_name = conform["file"] + for c in candidates: + if source_file_name == os.path.basename(c): + return c + _L.warning("Source names file %s but could not find it", source_file_name) + return None elif format_string == "xml": # Return file if it's specified, else return the first .gml file we find if "file" in conform: @@ -425,7 +447,7 @@ def find_source_path(data_source, source_paths): return None class ConvertToGeojsonTask(object): - known_types = ('.shp', '.json', '.csv', '.kml', '.gdb') + known_types = ('.shp', '.json', '.csv', '.kml', '.gdb', '.gpkg') def convert(self, source_config, source_paths, workdir): "Convert a list of source_paths and write results in workdir" @@ -1151,7 +1173,7 @@ def extract_to_source_csv(source_config, source_path, extract_path): format_string = source_config.data_source["conform"]['format'] protocol_string = source_config.data_source['protocol'] - if format_string in ("shapefile", "xml", "gdb"): + if format_string in ("shapefile", "xml", "gdb", "gpkg"): ogr_source_path = normalize_ogr_filename_case(source_path) ogr_source_to_csv(source_config, ogr_source_path, extract_path) elif format_string == "csv": @@ -1194,7 +1216,7 @@ def conform_cli(source_config, source_path, dest_path): format_string = source_config.data_source["conform"].get('format') - if not format_string in ["shapefile", "geojson", "csv", "xml", "gdb"]: + if not format_string in ["shapefile", "geojson", "csv", "xml", "gdb", "gpkg"]: _L.warning("Skipping file with unknown conform: %s", source_path) return 1 diff --git a/openaddr/tests/conform.py b/openaddr/tests/conform.py index 74275248..f35ef831 100644 --- a/openaddr/tests/conform.py +++ b/openaddr/tests/conform.py @@ -1776,6 +1776,36 @@ def test_lake_man_gdb(self): self.assertEqual(rows[5]['properties']['number'], '5115') self.assertEqual(rows[5]['properties']['street'], 'OLD MILL RD') + def test_lake_man_gpkg(self): + with open(os.path.join(self.conforms_dir, "lake-man-gpkg.json")) as file: + source_config = SourceConfig(json.load(file), "addresses", "default") + source_path = os.path.join(self.conforms_dir, "lake-man.gpkg") + dest_path = os.path.join(self.testdir, 'lake-man-gpkg-conformed.csv') + + rc = conform_cli(source_config, source_path, dest_path) + self.assertEqual(0, rc) + + with open(dest_path) as fp: + rows = list(map(json.loads, list(fp))) + + self.assertEqual('Point', rows[0]['geometry']['type']) + self.assertAlmostEqual(-122.2592497, rows[0]['geometry']['coordinates'][0], places=4) + self.assertAlmostEqual(37.8026126, rows[0]['geometry']['coordinates'][1], places=4) + + self.assertEqual(6, len(rows)) + self.assertEqual(rows[0]['properties']['number'], '5115') + self.assertEqual(rows[0]['properties']['street'], 'FRUITED PLAINS LN') + self.assertEqual(rows[1]['properties']['number'], '5121') + self.assertEqual(rows[1]['properties']['street'], 'FRUITED PLAINS LN') + self.assertEqual(rows[2]['properties']['number'], '5133') + self.assertEqual(rows[2]['properties']['street'], 'FRUITED PLAINS LN') + self.assertEqual(rows[3]['properties']['number'], '5126') + self.assertEqual(rows[3]['properties']['street'], 'FRUITED PLAINS LN') + self.assertEqual(rows[4]['properties']['number'], '5120') + self.assertEqual(rows[4]['properties']['street'], 'FRUITED PLAINS LN') + self.assertEqual(rows[5]['properties']['number'], '5115') + self.assertEqual(rows[5]['properties']['street'], 'OLD MILL RD') + def test_lake_man_split(self): rc, dest_path = self._run_conform_on_source('lake-man-split', 'shp') self.assertEqual(0, rc) @@ -2309,8 +2339,8 @@ def test_srs(self): self.assertEqual(r[0], u'n,s,{GEOM_FIELDNAME}'.format(**globals())) r = r[1].split(',') - self.assertEquals(r[0], '3203') - self.assertEquals(r[1], 'SE WOODSTOCK BLVD') + self.assertEqual(r[0], '3203') + self.assertEqual(r[1], 'SE WOODSTOCK BLVD') x,y = wkt_pt(r[2]) self.assertAlmostEqual(-122.630842186651, x, places=4) diff --git a/openaddr/tests/conforms/lake-man-gml.gfs b/openaddr/tests/conforms/lake-man-gml.gfs new file mode 100644 index 00000000..485d1ae9 --- /dev/null +++ b/openaddr/tests/conforms/lake-man-gml.gfs @@ -0,0 +1,101 @@ + + + lake_man + lake_man + geometryProperty + geometryProperty + + 1 + + 6 + -122.25925 + -122.25672 + 37.80071 + 37.80436 + + + MSTRID + MSTRID + Integer + + + CATEGORY + CATEGORY + String + 7 + + + ADDRESSID + ADDRESSID + Integer + + + BASEID + BASEID + Integer + + + REVDATE + REVDATE + String + 10 + + + FLOOR + FLOOR + Integer + + + NUMBER + NUMBER + Integer + + + ZIP + ZIP + Integer + + + ACTIVE + ACTIVE + String + 3 + + + MAILING + MAILING + String + 3 + + + FENAME + FENAME + String + 14 + + + FNAME + FNAME + String + 14 + + + FTYPE + FTYPE + String + 2 + + + STRNAME + STRNAME + String + 17 + + + id + id + String + 9 + + + diff --git a/openaddr/tests/conforms/lake-man-gpkg.json b/openaddr/tests/conforms/lake-man-gpkg.json new file mode 100644 index 00000000..0c6b91e0 --- /dev/null +++ b/openaddr/tests/conforms/lake-man-gpkg.json @@ -0,0 +1,18 @@ +{ + "schema": 2, + "layers": { + "addresses": [{ + "name": "default", + "data": "http://fake-web/lake-man.gpkg", + "cache": "http://fake-cache/lake-man.gpkg", + "protocol": "http", + "conform": { + "lon": "X", + "lat": "Y", + "number": "NUMBER", + "street": "STRNAME", + "format": "gpkg" + } + }] + } +} diff --git a/openaddr/tests/conforms/lake-man.gpkg b/openaddr/tests/conforms/lake-man.gpkg new file mode 100644 index 00000000..0a3d22eb Binary files /dev/null and b/openaddr/tests/conforms/lake-man.gpkg differ