From 1d7013bcdb0011ac26e5ed3172b841885ae2e64e Mon Sep 17 00:00:00 2001 From: jeffdefacto Date: Thu, 29 Jan 2026 09:49:52 -0800 Subject: [PATCH] Add support for geopackage This adds geopackage support to batch-machine. I've tested it locally and its working as expected. --- README.md | 2 + openaddr/conform.py | 28 +++++- openaddr/tests/conform.py | 34 ++++++- openaddr/tests/conforms/lake-man-gml.gfs | 101 +++++++++++++++++++++ openaddr/tests/conforms/lake-man-gpkg.json | 18 ++++ openaddr/tests/conforms/lake-man.gpkg | Bin 0 -> 106496 bytes 6 files changed, 178 insertions(+), 5 deletions(-) create mode 100644 openaddr/tests/conforms/lake-man-gml.gfs create mode 100644 openaddr/tests/conforms/lake-man-gpkg.json create mode 100644 openaddr/tests/conforms/lake-man.gpkg diff --git a/README.md b/README.md index f581a74f..ef094e8c 100644 --- a/README.md +++ b/README.md @@ -60,3 +60,5 @@ openaddr-process-one --skip-preview --layer addresses --layersource state exampl ``` Review https://github.com/openaddresses/openaddresses/blob/master/CONTRIBUTING.md for input json syntax. + +Supported conform formats include `shapefile`, `geojson`, `csv`, `xml`, `gdb`, and `gpkg`. diff --git a/openaddr/conform.py b/openaddr/conform.py index a25739e9..25e123a0 100644 --- a/openaddr/conform.py +++ b/openaddr/conform.py @@ -404,6 +404,28 @@ def find_source_path(data_source, source_paths): return c _L.warning("Source names file %s but could not find it", source_file_name) return None + elif format_string == "gpkg": + candidates = [] + for fn in source_paths: + basename, ext = os.path.splitext(fn) + if ext.lower() == ".gpkg": + candidates.append(fn) + if len(candidates) == 0: + _L.warning("No GPKG found in %s", source_paths) + return None + elif len(candidates) == 1: + _L.debug("Selected %s for source", candidates[0]) + return candidates[0] + else: + if "file" not in conform: + _L.warning("Multiple GPKGs found, but source has no file attribute.") + return None + source_file_name = conform["file"] + for c in candidates: + if source_file_name == os.path.basename(c): + return c + _L.warning("Source names file %s but could not find it", source_file_name) + return None elif format_string == "xml": # Return file if it's specified, else return the first .gml file we find if "file" in conform: @@ -425,7 +447,7 @@ def find_source_path(data_source, source_paths): return None class ConvertToGeojsonTask(object): - known_types = ('.shp', '.json', '.csv', '.kml', '.gdb') + known_types = ('.shp', '.json', '.csv', '.kml', '.gdb', '.gpkg') def convert(self, source_config, source_paths, workdir): "Convert a list of source_paths and write results in workdir" @@ -1151,7 +1173,7 @@ def extract_to_source_csv(source_config, source_path, extract_path): format_string = source_config.data_source["conform"]['format'] protocol_string = source_config.data_source['protocol'] - if format_string in ("shapefile", "xml", "gdb"): + if format_string in ("shapefile", "xml", "gdb", "gpkg"): ogr_source_path = normalize_ogr_filename_case(source_path) ogr_source_to_csv(source_config, ogr_source_path, extract_path) elif format_string == "csv": @@ -1194,7 +1216,7 @@ def conform_cli(source_config, source_path, dest_path): format_string = source_config.data_source["conform"].get('format') - if not format_string in ["shapefile", "geojson", "csv", "xml", "gdb"]: + if not format_string in ["shapefile", "geojson", "csv", "xml", "gdb", "gpkg"]: _L.warning("Skipping file with unknown conform: %s", source_path) return 1 diff --git a/openaddr/tests/conform.py b/openaddr/tests/conform.py index 74275248..f35ef831 100644 --- a/openaddr/tests/conform.py +++ b/openaddr/tests/conform.py @@ -1776,6 +1776,36 @@ def test_lake_man_gdb(self): self.assertEqual(rows[5]['properties']['number'], '5115') self.assertEqual(rows[5]['properties']['street'], 'OLD MILL RD') + def test_lake_man_gpkg(self): + with open(os.path.join(self.conforms_dir, "lake-man-gpkg.json")) as file: + source_config = SourceConfig(json.load(file), "addresses", "default") + source_path = os.path.join(self.conforms_dir, "lake-man.gpkg") + dest_path = os.path.join(self.testdir, 'lake-man-gpkg-conformed.csv') + + rc = conform_cli(source_config, source_path, dest_path) + self.assertEqual(0, rc) + + with open(dest_path) as fp: + rows = list(map(json.loads, list(fp))) + + self.assertEqual('Point', rows[0]['geometry']['type']) + self.assertAlmostEqual(-122.2592497, rows[0]['geometry']['coordinates'][0], places=4) + self.assertAlmostEqual(37.8026126, rows[0]['geometry']['coordinates'][1], places=4) + + self.assertEqual(6, len(rows)) + self.assertEqual(rows[0]['properties']['number'], '5115') + self.assertEqual(rows[0]['properties']['street'], 'FRUITED PLAINS LN') + self.assertEqual(rows[1]['properties']['number'], '5121') + self.assertEqual(rows[1]['properties']['street'], 'FRUITED PLAINS LN') + self.assertEqual(rows[2]['properties']['number'], '5133') + self.assertEqual(rows[2]['properties']['street'], 'FRUITED PLAINS LN') + self.assertEqual(rows[3]['properties']['number'], '5126') + self.assertEqual(rows[3]['properties']['street'], 'FRUITED PLAINS LN') + self.assertEqual(rows[4]['properties']['number'], '5120') + self.assertEqual(rows[4]['properties']['street'], 'FRUITED PLAINS LN') + self.assertEqual(rows[5]['properties']['number'], '5115') + self.assertEqual(rows[5]['properties']['street'], 'OLD MILL RD') + def test_lake_man_split(self): rc, dest_path = self._run_conform_on_source('lake-man-split', 'shp') self.assertEqual(0, rc) @@ -2309,8 +2339,8 @@ def test_srs(self): self.assertEqual(r[0], u'n,s,{GEOM_FIELDNAME}'.format(**globals())) r = r[1].split(',') - self.assertEquals(r[0], '3203') - self.assertEquals(r[1], 'SE WOODSTOCK BLVD') + self.assertEqual(r[0], '3203') + self.assertEqual(r[1], 'SE WOODSTOCK BLVD') x,y = wkt_pt(r[2]) self.assertAlmostEqual(-122.630842186651, x, places=4) diff --git a/openaddr/tests/conforms/lake-man-gml.gfs b/openaddr/tests/conforms/lake-man-gml.gfs new file mode 100644 index 00000000..485d1ae9 --- /dev/null +++ b/openaddr/tests/conforms/lake-man-gml.gfs @@ -0,0 +1,101 @@ + + + lake_man + lake_man + geometryProperty + geometryProperty + + 1 + + 6 + -122.25925 + -122.25672 + 37.80071 + 37.80436 + + + MSTRID + MSTRID + Integer + + + CATEGORY + CATEGORY + String + 7 + + + ADDRESSID + ADDRESSID + Integer + + + BASEID + BASEID + Integer + + + REVDATE + REVDATE + String + 10 + + + FLOOR + FLOOR + Integer + + + NUMBER + NUMBER + Integer + + + ZIP + ZIP + Integer + + + ACTIVE + ACTIVE + String + 3 + + + MAILING + MAILING + String + 3 + + + FENAME + FENAME + String + 14 + + + FNAME + FNAME + String + 14 + + + FTYPE + FTYPE + String + 2 + + + STRNAME + STRNAME + String + 17 + + + id + id + String + 9 + + + diff --git a/openaddr/tests/conforms/lake-man-gpkg.json b/openaddr/tests/conforms/lake-man-gpkg.json new file mode 100644 index 00000000..0c6b91e0 --- /dev/null +++ b/openaddr/tests/conforms/lake-man-gpkg.json @@ -0,0 +1,18 @@ +{ + "schema": 2, + "layers": { + "addresses": [{ + "name": "default", + "data": "http://fake-web/lake-man.gpkg", + "cache": "http://fake-cache/lake-man.gpkg", + "protocol": "http", + "conform": { + "lon": "X", + "lat": "Y", + "number": "NUMBER", + "street": "STRNAME", + "format": "gpkg" + } + }] + } +} diff --git a/openaddr/tests/conforms/lake-man.gpkg b/openaddr/tests/conforms/lake-man.gpkg new file mode 100644 index 0000000000000000000000000000000000000000..0a3d22eb657ee606b4c025f4ff7517162276e5e2 GIT binary patch literal 106496 zcmeI5eQYDgb-Nt8|}QD^(iILzhd=qMy}D~TWaJV|F$;)=(bB4vK~EUOn{O)ja; zk-PNn%94yas7|(npbg^GK+_Z`?7(f27WKbL+XQJ46eyB^Tr?<<0xg=hh}$AptJN$dp2)vDK-RVLwKO}$zkN^@u z0!RP}AOR$R1dsp{xFrI=OgWdRGXv*pd7;FWd4VgD3MW=1w!Xy|Nj8&BrBlqd8Q1t= z6)K9NRN@5|{?{vUWlrLRB1hysS>bA%H_XGiK%H@%yVfjag_RUe=2^ikFoMtCX-IYnj$li*||_;mJZL)O@yas!{NyEOoWc+bC=VZL~di#tLE&by|ckc z(0lc2hnSg2ct(v`&m`6uC@n5=oUp?ex4d+)gZSKhFr6 zhwD8G{_sEoNB{{S0VIF~kN^@u0!RP}AOR$R1nwsSCwo1E#>xTC|L-TmVqK5`585l7B>kk_*)Yj99RL*?< z-;XHQkM51Cpq5Ag2_OL^fCP{L5N@K`~TtmUxk1N549R5@@i>zq zzSj6odHBdOvzpE@B$3K8nH)){+SB<6NDwBva+ze(S0F(=ks?_p$*kl^CYs1H{^)W# zlcR}G*Ncb(QP^jz9Pzd3;Pa8^d9lhWoJ@+MAS)8f3(7@e)KaS+DYAkfD&$#?R5@8D z$`&h-kS{>4TxLL1e~P&>VUu4Zp}+!RQnBm5BVSYWP@)ygbDRY}Zbsdib)S^NsoIyKIi}WH18ZZ|}beMfzz&xf$Hp?}9 zx&iYp`(snFj^5Y>zQ>09-Pv&m#S0~FSAMR_D_nuyRz&T((CT)DP;1D@!OAJ8dwksS z%DmQ?TH{-IIRr?CS%v;6wZdd|+|ZvX zoS?`S)Pcakhfg}))6p6F7*o{n-y*k%P;4t5Kf#edY4Yd(9T3xL@$YN<|YieO?EoH0sS=FXtyjK}$ zirAXmHl!|Ey(FJXJeh}qzS#(E^3X(8dBJXYi*Q#hv(=l5h1>U~#9+Vs>g{`F{HJG!TpE(obJg<$Npy1F~y1m*3$bcO#y2_f4f6kqb+thqT?xZ ztgT$Y#KF7;D&AQp@WnT2Q z*U5~sI@%Exg1bewpf)AD49%CI)91@PC+QKo_tGU!E=qh|;o)vr8*t64`+Rhjm6bvf zZfh$rzQH|XF0s}e;fN{dhr8pl!q+&zZ){_HtTsMY%8gyVIJS0ito)3R_6g#SFA$)u zJvmLRJvd#nblebS>9AYIh4#iEsm(zhIgZo2Hage&6aDTr-;owJtza|+^RjQgxBH~J zKT?XJ=yp^qLN?xhatQLTL>Xerj;JEkCv8 z(O`R**D3BK%Z&$}o-43;Hh)-~H&z4mY5Q>t;2r*S*Lqx$+54`yCp^9xPq|_ zvOX=9L|BD7Ec}e^ZqIv|nAfl6(pX5btSixi2vYztu`!X9o9#KZhZto) zmB{4s(Im-fYj+)V@REEsk%|*72R~oZ?(=uGTYB}K7xuKzdx0fSzx&AxN8WIAyRb?n z!_B8`OsJXx5AE-K(CNN#!ErFwykl-gYd6cy%Li@bS|ZyfRc#9ubR(xuYZI0mT>EmR zTol`;V3wtb3Rh5y9BE%du&qDP#9nwHu>MHDTJa67-o|pR%sscw2}Q2u@&Q}DTO$4H ztbm4;f$O=``q&h@Nt@UIr*q7IM;Oh@n#bY2slFI{x5j;|s<+Sq=t?)bQfT|%w6*_x z?qa{^!%p|Yf@9xvNIy|fJCyd$LJNM_=6oHx3QfxE*ZZW7-IcnK5jd!P!0GmQ9G}u# zy3t=6r_=UAj8HG}6K|O>c{}AwqWK)`HxCKd80*3z@v7@lUZQPAPZRGNtj{N6jVvH! z1r|W!>CA>#o5=d-f&mDO#$p*Jo3%?WN3)DQB*Q$VuAkVFt|rszjJ1GNer=htg*=m3 zw+cj8a*3zxnXE+6Y_} zWUN&&=k`P=Ej5f9m%sS8e2PMiRqc9`?co9!LNQAOR$R1dsp{Kmter z2_S*HM&N~hH?`X1InjH5u&4LI2Px|8+0)dc6m=%PPC4Fz?aLevUAKuBU;X$uz8ZgR z;k|d3AMgJcV;yd6v{0vB_lJYQ$YgM4G8BIB=Z=f7ZZKK+ODAJwEs;!`=S44JSILz9nj};3?O*sRU6rfskDlnP)7e&?e)b!nljF&U-ua|mr$3{p*S~7f zssFe-Rk^aR(r|BQmE1O!Zrr%>$aPTZRCMR9zp$zFz*LZW!)H-x__!)5q9lv^r+Yf< z(`L3&0rdIAXWv@*6PrG#6Msp){yP?Z9LLp1HybO}_D3C^b!s!)uYVqN`lBCxyXR{* zokrh!m3sZrrcM?=wmXacs)H+OW&@Sf`M<;UE(L#hAOR$R1dsp{Kmter2_OL^fCP{L z5^ug770YolMC|M!mkHRbxE>v1^Yfdr5M5+OLk_5i6|rbOw013n!_+*s%p~DN6m~aLd#zN1&H>% zs9UOnmQGWBTokH%>hu45U9VBDue)Az{r$aTV$>7~AOR$R1dsp{Kmter2_OL^fCP}h zJx<_beXw}pu&p7e52o#~cF{}vVDZ5&YD?Ec`e5;3@QAeo?f3t#f23SLcm34$pRRv% z{WDzPfdr5M5 zFv5Gj`I7?)UUdlj7QDV~g!h8%X?VGo-7`z@ddvtv0b#!duispDNB#?5|80c#K^~uj z*JmN@hw%CjBfKBNegLn3U3Sm@0lfau2*(o=KmtghYXa*0pMnQy;(-K^01`j~NB{{S z0VIF~kN^@u0!RP}+`j~dPyG{h%Hg9|KvxVp2s``*As^II&8qIyTTzxKUChVX(vmAky1pgmL`mCQd8+1N#&DC!v%~a ztC{o~>9E(U*Dx|)0;|HNHWS+RteQEmpB{61qTtR?9$p=N%i*c+>tiG^D`f80ZgpL) zLJd{O`6ZHKuIPG^Xevhh+NHiHY$B^wHJu^ZTp=MdwYsuri4O#BnF6#Lf1_}^S+wrr z2cC~;OkU=0#$@_dnM~g)Cg1=X6qS|g40no*%|FaGmmwJjHYQqO)E#}(ojvTSXfn@a z4ZEqk|I(n+8ZSI;wG^%DxyJ6=19icTPJ*^T-8$D#J*sP-Ot=3sJwy?|~n7vgTvv(Xvw_dCD09pe*q6cWkH+lfWVc!Q{dN6Zd zGs`QUo0(1VHD1_-@u_3CQDb+_xYphSlI=B;+4>8b zKr%Cq-L=J8N5sa+vjFYs`emYx?=M9UH=KUI(!JeudDCoWui7>o`UP&MZGi8tgE#Yh zU-i`cBj=p%#~yR+UpQ<7`#KAcVN*KX5$O?b^M*OPoMbu;I|W_t$ZhO(5p|#Y08CiS zuUQ;t?sBm9F{gWB!SPB+tB>vX)rb5pyBYjJTU`!GOZ<(WcNgTMSm(6u*|h;)9qsAn z{kaxF7scv!P0()G^#DoSDe$Gj^K5mSQwM7X<{7lY*Em^WYjqNf=9pY!jcMcv(~8w- zKA9taS&_=Bpx-yPF+NrsA1md?E?*p5yEs;U#z*@EamN=3(6v&b%vU)m!_2*rl^UnB zw#3pa=@jfw4|^KsNKvXug<6>}l&_K1bcRX9Q{)L|gZP`72H-|x6{aJp6}XkKG4k`J zKznBOj+r&II6Aw)^`U^%?ejSf^6mDt75>QKwj-ce1BIoswL<+GG4_4bY8lP0*idya zZ^IZq^(loir}$K+U^$`0N~OZK#J9LZ4eV7=_FCPfnU4YwES%h}RUsqe*@yNVnv%Z2 zuFIG&o~5&2m^N;xjOQcQq+)TRDAp#Q ztMKwfNhz84NsH%W>6P5ZIz!YLvYuZ~CRT{=uw*{s%WhYt_rIUm#@+Fw-@MWHpE-g-4F4Q>vmemrtIN27Y%2ipRvJH==h8!9n z)o}GD@RCu<#Yyc-3+9!oDywK`HH3W@ZuHrr0)dT>TE@I^O`g>BglstDW0~V*kkn49 z6g)FqEb&4G!Wtjd@S-HEN#T=RXBD2U!Zb$SlZgNq&vUDyRAZGT@0DwcMnV(Zxu#r- zGwJwBCcEjq63>#kDKE{WvdkLH^&mu)swEQVVA`heMUveEjx`dRpPM3_Aak{6tDKi! z17Rj}q_96Hv4YGi0q@nW#N)G(VAry9vyrZ4L!s&JGY-wnPIaI2>`b_8;czH2-@R~n zenu5$lF7t+Hl2tyEj1IFoeM=~>G1sAM0k2O9F9!SMCc@wisvrpQ;FQB7iJt1*eG-r z(i>)tuB_xU(UpzO@I(-T)#(wlw&{&a94GAX#Vs!#q@$^LlF@T0aTSPxz=>dJc4|5j zo)1rl=I5s(5lFm}-Q=oOzAlS=iLKJ%t910~1h^6`80s!l1>aM)OC0ehSuj5>h?25J zr!z6|m=H*JSx#%NqEIoj6$QwagBEV&s_M{XuK8><&TM8^((B;smqm$xK@=3WN}yA! zivsi$L@bj?j6fp-zn=gbL^Dj(@O1TZc{%;`=J1h)~N?BjBSM&TcDPaH;BYQI9r4Oo0+77bU)~KyP_TZ~lvu)-Y2} zRfH?5Vr)?og}vG(Lvvk6dz6`|=@~{EO^?}NBnTZ>!)uHog~gE^txXB0zzjbWs~!E~$^z|I)oMd=9(*igLK#q+D;p8y-ji z2_OL^fCP{L5y{Y($2InlkU{|D2$O``w+ literal 0 HcmV?d00001