From a8779b4e160bd7948ab0caa038eb274259931cdf Mon Sep 17 00:00:00 2001 From: Esteban Zimanyi Date: Fri, 12 Jun 2026 21:02:09 +0200 Subject: [PATCH 1/2] Attach the @sqlfn SQL-name map to the catalog Follow each function's @csqlfn -> wrapper @sqlfn -> SQL name chain and attach the resulting SQL name to the catalog. The vendored-source root is overridable via MDB_SRC_ROOT so the @sqlfn (and @ingroup) extraction can be pointed at the same pinned MobilityDB checkout as the headers, keeping the generated catalog reproducibly equivalent to that pin. --- parser/sqlfn.py | 86 +++++++++++++++++++++++++++++++++++++++++++++++++ run.py | 13 ++++++++ 2 files changed, 99 insertions(+) create mode 100644 parser/sqlfn.py diff --git a/parser/sqlfn.py b/parser/sqlfn.py new file mode 100644 index 0000000..4d8b2a9 --- /dev/null +++ b/parser/sqlfn.py @@ -0,0 +1,86 @@ +"""Attach the SQL-name map (@sqlfn / @sqlop) to the MEOS-API catalog. + +The catalog carries MEOS-C function names + C signatures, but bindings that +emit a SQL/UDF surface (MobilityDB SQL, MobilitySpark UDFs, MobilityDuck, …) +need the user-facing SQL name and operator. Both are machine-extractable from +the doxygen tag chain that already pervades the source: + + MEOS-C fn --@csqlfn #MobilityDB_C()--> MobilityDB-C wrapper + MobilityDB-C wrapper --@sqlfn sqlName() / @sqlop @p --> SQL name + op + +So: in meos/src `@csqlfn #Wrapper()` sits above the MEOS-C function (→ MEOS-C → +Wrapper); in mobilitydb/src `@sqlfn name()` + `@sqlop @p ` sit above +`Datum Wrapper(PG_FUNCTION_ARGS)` (→ Wrapper → name, op). Join on Wrapper. + +Adds per function (when the chain resolves): `sqlfn`, `sqlop`, `mdbC`. +""" +import re +from pathlib import Path + +_CSQLFN = re.compile(r"@csqlfn\s+#(\w+)\s*\(\)") +# After the doxygen close, the MEOS-C definition: an optional return-type line +# (no parens/braces/;/=), then `name(`. +_FNDEF = re.compile(r"\*/\s*\n(?:[^\n(){};=]+\n)?(\w+)\s*\(") +_SQLFN = re.compile(r"@sqlfn\s+(\w+)\s*\(\)") +_SQLOP = re.compile(r"@sqlop\s+@p\s+(\S+)") +_DATUM = re.compile(r"Datum\s+(\w+)\s*\(\s*PG_FUNCTION_ARGS") + + +def _meos_to_mdb(meos_src): + """MEOS-C function name -> MobilityDB-C wrapper name (from @csqlfn).""" + out = {} + for cf in Path(meos_src).rglob("*.c"): + text = cf.read_text(errors="ignore") + for m in _CSQLFN.finditer(text): + mdb_c = m.group(1) + fm = _FNDEF.search(text, m.end()) + if fm: + out.setdefault(fm.group(1), mdb_c) + return out + + +def _mdb_to_sql(mdb_src): + """MobilityDB-C wrapper name -> ordered list of (sqlfn, sqlop). + + A shared PG wrapper can carry more than one @sqlfn (e.g. Temporal_derivative + is exposed as both derivative() and speed()), so collect ALL of them rather + than the first — otherwise the mapped SQL name is order-dependent. + """ + out = {} + for cf in Path(mdb_src).rglob("*.c"): + text = cf.read_text(errors="ignore") + for m in _SQLFN.finditer(text): + sqlfn = m.group(1) + # @sqlop lives in the SAME doxygen block (before the closing */). + close = text.find("*/", m.end()) + block = text[m.start():close] if close != -1 else text[m.start():m.start() + 800] + op = _SQLOP.search(block) + dm = _DATUM.search(text, close if close != -1 else m.end()) + if dm: + entry = (sqlfn, op.group(1) if op else None) + lst = out.setdefault(dm.group(1), []) + if entry not in lst: + lst.append(entry) + return out + + +def attach_sqlfn_map(idl, meos_src, mdb_src): + m2d = _meos_to_mdb(meos_src) + d2s = _mdb_to_sql(mdb_src) + n = 0 + for f in idl["functions"]: + mdb_c = m2d.get(f["name"]) + if not mdb_c: + continue + lst = d2s.get(mdb_c) + if not lst: + continue + f["mdbC"] = mdb_c + f["sqlfn"] = lst[0][0] + if lst[0][1]: + f["sqlop"] = lst[0][1] + # Shared wrapper exposing >1 SQL name: record them all (binding picks). + if len(lst) > 1: + f["sqlfnAll"] = [s for s, _ in lst] + n += 1 + return idl, n diff --git a/run.py b/run.py index 640dd9e..7e0da7f 100644 --- a/run.py +++ b/run.py @@ -1,3 +1,4 @@ +import os import sys import json from pathlib import Path @@ -5,6 +6,7 @@ from parser.parser import parse_all_headers, merge_meta from parser.portable import attach_portable_aliases from parser.typerecover import recover_collapsed_types +from parser.sqlfn import attach_sqlfn_map HEADERS_DIR = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("./meos/include") @@ -40,6 +42,17 @@ def main(): file=sys.stderr) idl = attach_portable_aliases(idl, PORTABLE_PATH) + # 4. Attach the SQL-name map (@sqlfn/@sqlop) from the vendored source. + # The source root is overridable (MDB_SRC_ROOT) so a binding can point the + # @sqlfn/@ingroup extraction at the SAME pinned checkout as the headers, + # keeping the catalog reproducibly equivalent to that pin. + SRC_ROOT = Path(os.environ.get("MDB_SRC_ROOT", "./_mobilitydb")) + MEOS_SRC = SRC_ROOT / "meos" / "src" + MDB_SRC = SRC_ROOT / "mobilitydb" / "src" + if MEOS_SRC.exists() and MDB_SRC.exists(): + idl, nsql = attach_sqlfn_map(idl, MEOS_SRC, MDB_SRC) + print(f"[4/4] Attached {nsql} @sqlfn SQL names", file=sys.stderr) + idl_path = OUTPUT_DIR / "meos-idl.json" with open(idl_path, "w") as f: json.dump(idl, f, indent=2) From a9ee3ca32aceed13ef1df2364a94dd1f0c7a7bfb Mon Sep 17 00:00:00 2001 From: Esteban Zimanyi Date: Sat, 13 Jun 2026 11:59:33 +0200 Subject: [PATCH 2/2] Attach the doxygen module group (@ingroup) to the catalog MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parse the @ingroup meos_ tag from each MEOS-C function's source comment (meos/src) and attach it as a 'group' field on every catalog function. Those groups are the structure of the MEOS reference manual / doxygen XML (meos_setspan_accessor, meos_temporal_comp_ever, meos_geo_constructor, ...), so every catalog-driven binding can organize its generated surface the same way the manual does — a function is found in the same place across all tools. The meos_internal_* groups are carried through too, so a binding can filter the non-user-facing surface. 2155 user-facing functions across 175 groups. --- parser/doxygroup.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ run.py | 7 +++++++ 2 files changed, 51 insertions(+) create mode 100644 parser/doxygroup.py diff --git a/parser/doxygroup.py b/parser/doxygroup.py new file mode 100644 index 0000000..b809d13 --- /dev/null +++ b/parser/doxygroup.py @@ -0,0 +1,44 @@ +"""Attach the doxygen module group (@ingroup) to the MEOS-API catalog. + +Every MEOS-C function carries an `@ingroup meos_` doxygen tag in its +source comment block (meos/src). Those groups ARE the structure of the MEOS +reference manual / doxygen XML — e.g. `meos_setspan_accessor`, +`meos_temporal_comp_ever`, `meos_geo_constructor`. Carrying the group into the +catalog lets every binding organize its generated surface the SAME way the +manual does, so a function is found in the same place across all tools. + +Adds per function (when found): `group`. The `meos_internal_*` groups are +MEOS-internal, not user-facing — they are tagged like any other so a binding +can filter them out, but they are NOT a separate concept here. +""" +import re +from pathlib import Path + +_INGROUP = re.compile(r"@ingroup\s+(meos_\w+)") +# Same shape as sqlfn._FNDEF: after the doxygen close, an optional return-type +# line (no parens/braces/;/=), then `name(`. +_FNDEF = re.compile(r"\*/\s*\n(?:[^\n(){};=]+\n)?(\w+)\s*\(") + + +def _name_to_group(meos_src): + """MEOS-C function name -> doxygen @ingroup group (first occurrence wins).""" + out = {} + for cf in Path(meos_src).rglob("*.c"): + text = cf.read_text(errors="ignore") + for m in _INGROUP.finditer(text): + grp = m.group(1) + fm = _FNDEF.search(text, m.end()) + if fm: + out.setdefault(fm.group(1), grp) + return out + + +def attach_groups(idl, meos_src): + n2g = _name_to_group(meos_src) + n = 0 + for f in idl["functions"]: + g = n2g.get(f["name"]) + if g: + f["group"] = g + n += 1 + return idl, n diff --git a/run.py b/run.py index 7e0da7f..7fb1d19 100644 --- a/run.py +++ b/run.py @@ -7,6 +7,7 @@ from parser.portable import attach_portable_aliases from parser.typerecover import recover_collapsed_types from parser.sqlfn import attach_sqlfn_map +from parser.doxygroup import attach_groups HEADERS_DIR = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("./meos/include") @@ -53,6 +54,12 @@ def main(): idl, nsql = attach_sqlfn_map(idl, MEOS_SRC, MDB_SRC) print(f"[4/4] Attached {nsql} @sqlfn SQL names", file=sys.stderr) + # 5. Attach the doxygen module group (@ingroup) from the vendored source, so + # bindings organize their generated surface like the reference manual. + if MEOS_SRC.exists(): + idl, ngrp = attach_groups(idl, MEOS_SRC) + print(f"[5/5] Attached {ngrp} doxygen @ingroup groups", file=sys.stderr) + idl_path = OUTPUT_DIR / "meos-idl.json" with open(idl_path, "w") as f: json.dump(idl, f, indent=2)