Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions parser/doxygroup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""Attach the doxygen module group (@ingroup) to the MEOS-API catalog.

Every MEOS-C function carries an `@ingroup meos_<group>` doxygen tag in its
source comment block (meos/src). Those groups ARE the structure of the MEOS
reference manual / doxygen XML — e.g. `meos_setspan_accessor`,
`meos_temporal_comp_ever`, `meos_geo_constructor`. Carrying the group into the
catalog lets every binding organize its generated surface the SAME way the
manual does, so a function is found in the same place across all tools.

Adds per function (when found): `group`. The `meos_internal_*` groups are
MEOS-internal, not user-facing — they are tagged like any other so a binding
can filter them out, but they are NOT a separate concept here.
"""
import re
from pathlib import Path

_INGROUP = re.compile(r"@ingroup\s+(meos_\w+)")
# Same shape as sqlfn._FNDEF: after the doxygen close, an optional return-type
# line (no parens/braces/;/=), then `name(`.
_FNDEF = re.compile(r"\*/\s*\n(?:[^\n(){};=]+\n)?(\w+)\s*\(")


def _name_to_group(meos_src):
"""MEOS-C function name -> doxygen @ingroup group (first occurrence wins)."""
out = {}
for cf in Path(meos_src).rglob("*.c"):
text = cf.read_text(errors="ignore")
for m in _INGROUP.finditer(text):
grp = m.group(1)
fm = _FNDEF.search(text, m.end())
if fm:
out.setdefault(fm.group(1), grp)
return out


def attach_groups(idl, meos_src):
n2g = _name_to_group(meos_src)
n = 0
for f in idl["functions"]:
g = n2g.get(f["name"])
if g:
f["group"] = g
n += 1
return idl, n
86 changes: 86 additions & 0 deletions parser/sqlfn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
"""Attach the SQL-name map (@sqlfn / @sqlop) to the MEOS-API catalog.

The catalog carries MEOS-C function names + C signatures, but bindings that
emit a SQL/UDF surface (MobilityDB SQL, MobilitySpark UDFs, MobilityDuck, …)
need the user-facing SQL name and operator. Both are machine-extractable from
the doxygen tag chain that already pervades the source:

MEOS-C fn --@csqlfn #MobilityDB_C()--> MobilityDB-C wrapper
MobilityDB-C wrapper --@sqlfn sqlName() / @sqlop @p <op>--> SQL name + op

So: in meos/src `@csqlfn #Wrapper()` sits above the MEOS-C function (→ MEOS-C →
Wrapper); in mobilitydb/src `@sqlfn name()` + `@sqlop @p <op>` sit above
`Datum Wrapper(PG_FUNCTION_ARGS)` (→ Wrapper → name, op). Join on Wrapper.

Adds per function (when the chain resolves): `sqlfn`, `sqlop`, `mdbC`.
"""
import re
from pathlib import Path

_CSQLFN = re.compile(r"@csqlfn\s+#(\w+)\s*\(\)")
# After the doxygen close, the MEOS-C definition: an optional return-type line
# (no parens/braces/;/=), then `name(`.
_FNDEF = re.compile(r"\*/\s*\n(?:[^\n(){};=]+\n)?(\w+)\s*\(")
_SQLFN = re.compile(r"@sqlfn\s+(\w+)\s*\(\)")
_SQLOP = re.compile(r"@sqlop\s+@p\s+(\S+)")
_DATUM = re.compile(r"Datum\s+(\w+)\s*\(\s*PG_FUNCTION_ARGS")


def _meos_to_mdb(meos_src):
"""MEOS-C function name -> MobilityDB-C wrapper name (from @csqlfn)."""
out = {}
for cf in Path(meos_src).rglob("*.c"):
text = cf.read_text(errors="ignore")
for m in _CSQLFN.finditer(text):
mdb_c = m.group(1)
fm = _FNDEF.search(text, m.end())
if fm:
out.setdefault(fm.group(1), mdb_c)
return out


def _mdb_to_sql(mdb_src):
"""MobilityDB-C wrapper name -> ordered list of (sqlfn, sqlop).

A shared PG wrapper can carry more than one @sqlfn (e.g. Temporal_derivative
is exposed as both derivative() and speed()), so collect ALL of them rather
than the first — otherwise the mapped SQL name is order-dependent.
"""
out = {}
for cf in Path(mdb_src).rglob("*.c"):
text = cf.read_text(errors="ignore")
for m in _SQLFN.finditer(text):
sqlfn = m.group(1)
# @sqlop lives in the SAME doxygen block (before the closing */).
close = text.find("*/", m.end())
block = text[m.start():close] if close != -1 else text[m.start():m.start() + 800]
op = _SQLOP.search(block)
dm = _DATUM.search(text, close if close != -1 else m.end())
if dm:
entry = (sqlfn, op.group(1) if op else None)
lst = out.setdefault(dm.group(1), [])
if entry not in lst:
lst.append(entry)
return out


def attach_sqlfn_map(idl, meos_src, mdb_src):
m2d = _meos_to_mdb(meos_src)
d2s = _mdb_to_sql(mdb_src)
n = 0
for f in idl["functions"]:
mdb_c = m2d.get(f["name"])
if not mdb_c:
continue
lst = d2s.get(mdb_c)
if not lst:
continue
f["mdbC"] = mdb_c
f["sqlfn"] = lst[0][0]
if lst[0][1]:
f["sqlop"] = lst[0][1]
# Shared wrapper exposing >1 SQL name: record them all (binding picks).
if len(lst) > 1:
f["sqlfnAll"] = [s for s, _ in lst]
n += 1
return idl, n
20 changes: 20 additions & 0 deletions run.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import os
import sys
import json
from pathlib import Path

from parser.parser import parse_all_headers, merge_meta
from parser.portable import attach_portable_aliases
from parser.typerecover import recover_collapsed_types
from parser.sqlfn import attach_sqlfn_map
from parser.doxygroup import attach_groups


HEADERS_DIR = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("./meos/include")
Expand Down Expand Up @@ -40,6 +43,23 @@ def main():
file=sys.stderr)
idl = attach_portable_aliases(idl, PORTABLE_PATH)

# 4. Attach the SQL-name map (@sqlfn/@sqlop) from the vendored source.
# The source root is overridable (MDB_SRC_ROOT) so a binding can point the
# @sqlfn/@ingroup extraction at the SAME pinned checkout as the headers,
# keeping the catalog reproducibly equivalent to that pin.
SRC_ROOT = Path(os.environ.get("MDB_SRC_ROOT", "./_mobilitydb"))
MEOS_SRC = SRC_ROOT / "meos" / "src"
MDB_SRC = SRC_ROOT / "mobilitydb" / "src"
if MEOS_SRC.exists() and MDB_SRC.exists():
idl, nsql = attach_sqlfn_map(idl, MEOS_SRC, MDB_SRC)
print(f"[4/4] Attached {nsql} @sqlfn SQL names", file=sys.stderr)

# 5. Attach the doxygen module group (@ingroup) from the vendored source, so
# bindings organize their generated surface like the reference manual.
if MEOS_SRC.exists():
idl, ngrp = attach_groups(idl, MEOS_SRC)
print(f"[5/5] Attached {ngrp} doxygen @ingroup groups", file=sys.stderr)

idl_path = OUTPUT_DIR / "meos-idl.json"
with open(idl_path, "w") as f:
json.dump(idl, f, indent=2)
Expand Down