From ff915c2d2f8af295ffd168018ef7736cd20975c1 Mon Sep 17 00:00:00 2001
From: Brian Arnold <arnoldb@stanford.edu>
Date: Fri, 12 Jun 2026 21:54:16 +0000
Subject: [PATCH 01/12] docs(specs): add pydantic config loader design for
 ENG-607

Reusable pydantic-backed config facility: load_pydantic_config (validate
YAML -> typed model at build time) plus a semantic-type converter so a
validated config flows into pods as a first-class, JSON-hashed input and is
auto-deserialized to the typed model. Schema lives in the wrapped package's
config/ subpackage; YAML stays the authoring format.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 ...026-06-12-pydantic-config-loader-design.md | 169 ++++++++++++++++++
 1 file changed, 169 insertions(+)
 create mode 100644 superpowers/specs/2026-06-12-pydantic-config-loader-design.md

diff --git a/superpowers/specs/2026-06-12-pydantic-config-loader-design.md b/superpowers/specs/2026-06-12-pydantic-config-loader-design.md
new file mode 100644
index 00000000..c7d9cfc8
--- /dev/null
+++ b/superpowers/specs/2026-06-12-pydantic-config-loader-design.md
@@ -0,0 +1,169 @@
+# Design: Strongly-typed, schema-validated pipeline config loading (ENG-607)
+
+**Status:** Approved (design)
+**Date:** 2026-06-12
+**Linear:** ENG-607 (project: Wrap other RawData → ETL repos in Orcapod)
+**Related:** ENG-601 (config is a content-hashed broadcast pod input), PLT-964 (`OrcapodConfig` nested-config pattern)
+
+## Overview
+
+Pipelines wrapped in orcapod (starting with orcapod-spikesorting) are driven by large,
+deeply-nested YAML config files — the spike-sorting config alone is ~9 top-level sections and
+hundreds of parameters. Today such a config is loaded with `yaml.safe_load` into a plain `dict`
+and accessed by string keys throughout the wrapper and its `enigma-ephys` pods. There is no
+validation or typing: a typo'd key, a wrong type, or a missing nested field surfaces as a deep
+failure mid-processing (often only on a Ray worker) or as a silently wrong result.
+
+This design adds a **reusable, pydantic-backed config facility in orcapod-python** so a wrapped
+pipeline can: (1) define its config **schema** once as pydantic models, (2) **validate** a YAML
+config against that schema and load it into a **typed object** at pipeline-build time, and
+(3) pass that validated object into function pods as a **first-class, content-hashed input**,
+where pods receive it already deserialized and typed.
+
+## Goals & Success Criteria
+
+- Loading an invalid config fails **immediately at build time** (before any pod runs) with a
+  clear, field-located error (wrong type, unknown key, out-of-range value, missing required).
+- Pods receive the config as a **typed pydantic model** (attribute access, IDE/type-checker
+  support), not an untyped dict — with **no per-pod deserialization boilerplate**.
+- The config's pod-input **content hash is over its validated, canonical meaning**, so
+  formatting-only YAML edits (comments, key order, whitespace) do **not** bust the cache; only
+  meaningful value changes do. (An improvement over ENG-601's raw-file hashing.)
+- The facility is **reusable** across every wrapped ETL repo, not specific to spike-sorting.
+
+## Authoring model
+
+Two artifacts, complementary (not redundant):
+
+- **Schema** — pydantic model classes, written once by the pipeline developer, living in a
+  `config/` subpackage of the wrapped repo (e.g. `orcapod_spikesorting/config/`). Defines
+  structure, types, constraints, and defaults. Changes rarely.
+- **Values** — the YAML file a scientist edits per run (e.g. `subset_data`, `cache_path`).
+  YAML remains the human-authoring format; nobody hand-writes a pydantic object to configure a
+  run.
+
+## Architecture & data flow
+
+```
+Author (YAML)            Pipeline build (driver)                      Pod (worker)
+─────────────            ───────────────────────                      ────────────
+config.yaml  ──▶  load_pydantic_config(path, SpikeSortingConfig)      def preprocess(rec, config: SpikeSortingConfig):
+                    → yaml.safe_load                                       config.kilosort.batch_size   # typed + validated
+                    → model.model_validate(...)                            ...
+                    → SpikeSortingConfig instance
+                         │
+                         ▼
+                    broadcast as a pod input via a dict/list source.
+                    A registered semantic-type converter maps the
+                    model ⇄ Arrow struct<model qualname, canonical JSON>;
+                    content hash = hash(qualname + canonical JSON).
+                         │  identity = meaning of config, not file formatting
+                         ▼
+                    orcapod hashes + transports; worker reconstructs
+                    the typed model from the struct automatically.
+```
+
+## Components & API
+
+All new code lives in a new module `orcapod/pydantic_config.py` (loader + converter), kept
+separate from `orcapod/config.py` (which is orcapod's *own* `OrcapodConfig` runtime settings —
+a different concept). `pydantic` becomes a dependency of orcapod-python (pydantic v2).
+
+### `load_pydantic_config`
+
+```python
+M = TypeVar("M", bound=pydantic.BaseModel)
+
+def load_pydantic_config(path: str | Path, model_cls: type[M]) -> M:
+    """Read a YAML file, validate it against `model_cls`, return the typed model.
+
+    Raises a clear, file-located error on invalid YAML or schema violation.
+    """
+```
+
+- Named `load_pydantic_config` (not `load_config`, which is already a top-level export for
+  `OrcapodConfig`) to avoid collision and to be explicit that it is pydantic-backed.
+- Reads YAML via `yaml.safe_load`, validates via `model_cls.model_validate(data)`, returns the
+  instance. On `pydantic.ValidationError` (or YAML parse error), re-raise wrapped with the file
+  path for context.
+
+### `OrcapodBaseConfig` (optional base)
+
+```python
+class OrcapodBaseConfig(pydantic.BaseModel):
+    """Recommended base for pipeline config schemas; strict by default."""
+    model_config = ConfigDict(extra="forbid", frozen=True)
+```
+
+- `extra="forbid"` makes typo'd/unknown keys an error. `frozen=True` makes instances immutable
+  (safer as a broadcast input). Use is recommended, not required — a schema author may subclass
+  `pydantic.BaseModel` directly if they need different semantics.
+
+### `PydanticModelConverter` (semantic-type converter)
+
+Modeled directly on `PathStructConverter` (which maps `Path` ⇄ `struct<path: large_string>`
+with file-content hashing). Registered in the `DataContext` semantic registry.
+
+- `can_handle_python_type(t)`: `issubclass(t, pydantic.BaseModel)`.
+- **python → arrow:** `struct<__pydantic_model__: large_string, __pydantic_json__: large_string>`
+  where `__pydantic_model__` is the model's fully-qualified `module:qualname` and
+  `__pydantic_json__` is canonical JSON (`model.model_dump_json()`; deterministic field order).
+- **arrow → python:** import the class from the stored qualname and
+  `model_cls.model_validate_json(json)`. Self-describing — no external type context needed.
+- **content hash:** hash over `(__pydantic_model__, canonical JSON)`, so identity tracks config
+  meaning + schema identity, independent of source-YAML formatting.
+
+### Pipeline wiring (in the wrapped repo)
+
+The broadcast config source is built from the **validated model instance** (via a dict/list
+source, whose values route through the type converter) rather than a `Path` `DataFrameSource`.
+Pods declare a parameter typed as the model (`config: SpikeSortingConfig`) and orcapod injects
+the reconstructed, validated model — handling transport, hashing, and deserialization.
+
+## Error handling
+
+- **Invalid YAML / schema violation:** raised at build time, before any pod runs, with the file
+  path and pydantic's field-level detail.
+- **Unimportable model class on reconstruction (worker):** clear `ImportError` naming the stored
+  qualname (e.g. the wrapped package isn't on the worker path).
+
+## Testing
+
+- **Loader:** valid config → model; wrong-type, unknown-key (with `extra="forbid"`), and
+  missing-required configs → raise with clear messages including the file path.
+- **Converter round-trip:** `model → arrow struct → model` equality.
+- **Hash stability:** formatting-only YAML edits → identical content hash (cache still hits);
+  any value change → different hash.
+- **End-to-end:** a small pipeline where a pod consumes a typed config; confirm the pod receives
+  the model and that a formatting-only config edit yields a cache hit.
+
+## Scope & boundaries
+
+**In scope (orcapod-python, this work):**
+- `orcapod/pydantic_config.py`: `load_pydantic_config`, `OrcapodBaseConfig`,
+  `PydanticModelConverter`.
+- Register the converter in the `DataContext` semantic registry.
+- Add `pydantic` (v2) as a dependency; tests; docs.
+
+**Out of scope (orcapod-spikesorting follow-up):**
+- Define a `config/` subpackage of pydantic models for the spike-sorting config.
+- Swap the broadcast config source from a `Path` `DataFrameSource` to the validated model
+  source; annotate pods with the model type.
+- Migrate `enigma-ephys` dict-key call sites. Eased by handing existing functions
+  `config.model_dump()` (a plain dict) during transition, so the migration can be incremental.
+
+## Dependencies & risks
+
+- Adds `pydantic` v2 as an orcapod-python dependency (intended).
+- Content-hash semantics change for configs (meaning-based, not file-bytes). This is desired but
+  means existing caches keyed on the old `Path`-file hash won't match — a one-time recompute when
+  a pipeline migrates to the typed config. Document this.
+- Reconstruction requires the model class to be importable on workers (already true for wrapped
+  packages shipped via Ray `py_modules`).
+
+## Deferred / not now (YAGNI)
+
+- JSON-Schema export for docs/tooling — available for free via `model.model_json_schema()` if/when
+  wanted; not built now.
+- A standalone external schema file (XSD/JSON Schema) as the source of truth — rejected in favor
+  of pydantic models as the single source of truth (avoids a duplicated, drift-prone schema).

From 2e7060f86409ac82d1ce8bca29153b2657720d88 Mon Sep 17 00:00:00 2001
From: Brian Arnold <arnoldb@stanford.edu>
Date: Fri, 12 Jun 2026 22:09:32 +0000
Subject: [PATCH 02/12] docs(plans): add pydantic config loader implementation
 plan (ENG-607)

Task-by-task TDD plan: pydantic dependency, load_pydantic_config +
OrcapodBaseConfig, PydanticModelConverter semantic type, hash-stability tests,
and registration in the production (v0.1.json) and standalone registries.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../2026-06-12-pydantic-config-loader.md      | 584 ++++++++++++++++++
 1 file changed, 584 insertions(+)
 create mode 100644 superpowers/plans/2026-06-12-pydantic-config-loader.md

diff --git a/superpowers/plans/2026-06-12-pydantic-config-loader.md b/superpowers/plans/2026-06-12-pydantic-config-loader.md
new file mode 100644
index 00000000..fb5665cb
--- /dev/null
+++ b/superpowers/plans/2026-06-12-pydantic-config-loader.md
@@ -0,0 +1,584 @@
+# Pydantic Config Loader Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Add a reusable, pydantic-backed config facility to orcapod-python: validate a YAML config against a pydantic schema into a typed model at build time, and make a validated model a first-class, content-hashed orcapod value that pods receive already deserialized.
+
+**Architecture:** A new module `orcapod/pydantic_config.py` provides `load_pydantic_config()` (YAML → validated model), an optional strict base `OrcapodBaseConfig`, and a `PydanticModelConverter` semantic-type converter modeled on `PythonPathStructConverter`. The converter maps any `pydantic.BaseModel` ⇄ an Arrow struct holding the model's fully-qualified class name plus canonical JSON, content-hashing the canonical JSON so identity tracks config *meaning*, not YAML formatting. The converter is registered in the production semantic registry (`contexts/data/v0.1.json`) so the existing `UniversalTypeConverter` and `StarfixArrowHasher` pick it up automatically.
+
+**Tech Stack:** Python 3.12, pydantic v2, PyArrow, PyYAML, pytest, uv.
+
+**Spec:** `superpowers/specs/2026-06-12-pydantic-config-loader-design.md` (ENG-607).
+
+**Conventions:** Run everything via `uv run`. Google-style docstrings, no ReST roles. Conventional Commits. End commit messages with the `Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>` trailer.
+
+---
+
+## File Structure
+
+- Create: `src/orcapod/pydantic_config.py` — `load_pydantic_config`, `OrcapodBaseConfig`, `PydanticModelConverter`.
+- Create: `tests/test_pydantic_config.py` — loader + converter unit/integration tests.
+- Modify: `pyproject.toml` — add `pydantic>=2` to `dependencies`.
+- Modify: `src/orcapod/contexts/data/v0.1.json` — register the `pydantic` converter in `semantic_registry.converters` (production path).
+- Modify: `src/orcapod/hashing/versioned_hashers.py:135-138` — register the converter in the standalone fallback registry for consistency.
+
+---
+
+### Task 1: Add the pydantic dependency
+
+**Files:**
+- Modify: `pyproject.toml` (the `dependencies` list, ~line 9-28)
+
+- [ ] **Step 1: Add the dependency**
+
+In `pyproject.toml`, add to the `dependencies` array (e.g. after the `"deltalake>=1.0.2",` line):
+
+```toml
+    "pydantic>=2",
+```
+
+- [ ] **Step 2: Sync the environment**
+
+Run: `uv sync`
+Expected: resolves and installs pydantic 2.x with no conflict.
+
+- [ ] **Step 3: Verify import**
+
+Run: `uv run python -c "import pydantic; print(pydantic.VERSION)"`
+Expected: prints a `2.x` version string.
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add pyproject.toml uv.lock
+git commit -m "chore(deps): add pydantic for typed config loading (ENG-607)"
+```
+
+---
+
+### Task 2: `load_pydantic_config` + `OrcapodBaseConfig`
+
+**Files:**
+- Create: `src/orcapod/pydantic_config.py`
+- Test: `tests/test_pydantic_config.py`
+
+- [ ] **Step 1: Write the failing tests**
+
+Create `tests/test_pydantic_config.py`:
+
+```python
+"""Tests for orcapod.pydantic_config (ENG-607)."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from orcapod.pydantic_config import OrcapodBaseConfig, load_pydantic_config
+
+
+class SampleConfig(OrcapodBaseConfig):
+    name: str
+    threshold: float
+    retries: int = 3
+
+
+def _write(tmp_path: Path, text: str) -> Path:
+    p = tmp_path / "config.yaml"
+    p.write_text(text, encoding="utf-8")
+    return p
+
+
+def test_loads_valid_config(tmp_path):
+    path = _write(tmp_path, "name: run1\nthreshold: 6.0\n")
+    cfg = load_pydantic_config(path, SampleConfig)
+    assert isinstance(cfg, SampleConfig)
+    assert cfg.name == "run1"
+    assert cfg.threshold == 6.0
+    assert cfg.retries == 3  # default applied
+
+
+def test_wrong_type_raises_with_path(tmp_path):
+    path = _write(tmp_path, "name: run1\nthreshold: not-a-number\n")
+    with pytest.raises(ValueError) as exc:
+        load_pydantic_config(path, SampleConfig)
+    assert "threshold" in str(exc.value)
+    assert str(path) in str(exc.value)
+
+
+def test_unknown_key_raises(tmp_path):
+    path = _write(tmp_path, "name: run1\nthreshold: 6.0\ntypo_key: 1\n")
+    with pytest.raises(ValueError) as exc:
+        load_pydantic_config(path, SampleConfig)
+    assert "typo_key" in str(exc.value)
+
+
+def test_missing_required_raises(tmp_path):
+    path = _write(tmp_path, "threshold: 6.0\n")
+    with pytest.raises(ValueError) as exc:
+        load_pydantic_config(path, SampleConfig)
+    assert "name" in str(exc.value)
+```
+
+- [ ] **Step 2: Run tests to verify they fail**
+
+Run: `uv run pytest tests/test_pydantic_config.py -q`
+Expected: FAIL — `ModuleNotFoundError: No module named 'orcapod.pydantic_config'`.
+
+- [ ] **Step 3: Implement the loader + base**
+
+Create `src/orcapod/pydantic_config.py`:
+
+```python
+"""Pydantic-backed config loading for orcapod pipelines (ENG-601 / ENG-607).
+
+Provides `load_pydantic_config` (validate a YAML file against a pydantic model)
+and `OrcapodBaseConfig` (a strict base for config schemas). A companion
+`PydanticModelConverter` (also in this module) makes a validated model a
+first-class, content-hashed orcapod value.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import TypeVar
+
+import pydantic
+import yaml
+
+M = TypeVar("M", bound=pydantic.BaseModel)
+
+
+class OrcapodBaseConfig(pydantic.BaseModel):
+    """Recommended base for pipeline config schemas.
+
+    Defaults to strict validation: unknown keys are rejected and instances are
+    immutable. Subclass this for pipeline configs; subclass `pydantic.BaseModel`
+    directly only when different semantics are required.
+    """
+
+    model_config = pydantic.ConfigDict(extra="forbid", frozen=True)
+
+
+def load_pydantic_config(path: str | Path, model_cls: type[M]) -> M:
+    """Read a YAML file and validate it against a pydantic model.
+
+    Args:
+        path: Path to the YAML config file.
+        model_cls: The pydantic model class to validate against.
+
+    Returns:
+        A validated instance of `model_cls`.
+
+    Raises:
+        ValueError: If the YAML cannot be parsed or fails validation. The error
+            message includes the file path and the underlying field-level detail.
+    """
+    path = Path(path)
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            data = yaml.safe_load(f)
+    except yaml.YAMLError as e:
+        raise ValueError(f"Could not parse YAML config {path}: {e}") from e
+
+    try:
+        return model_cls.model_validate(data)
+    except pydantic.ValidationError as e:
+        raise ValueError(f"Config validation failed for {path}:\n{e}") from e
+```
+
+- [ ] **Step 4: Run tests to verify they pass**
+
+Run: `uv run pytest tests/test_pydantic_config.py -q`
+Expected: PASS (4 passed).
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add src/orcapod/pydantic_config.py tests/test_pydantic_config.py
+git commit -m "feat(pydantic_config): add load_pydantic_config and OrcapodBaseConfig (ENG-607)"
+```
+
+---
+
+### Task 3: `PydanticModelConverter` — model ⇄ Arrow struct round-trip
+
+**Files:**
+- Modify: `src/orcapod/pydantic_config.py`
+- Test: `tests/test_pydantic_config.py`
+
+- [ ] **Step 1: Write the failing tests**
+
+Append to `tests/test_pydantic_config.py`:
+
+```python
+import pyarrow as pa
+
+from orcapod.pydantic_config import PydanticModelConverter
+
+
+def _converter() -> PydanticModelConverter:
+    return PydanticModelConverter()
+
+
+def test_converter_python_type_and_struct_signature():
+    conv = _converter()
+    assert conv.python_type is pydantic.BaseModel
+    sig = conv.arrow_struct_type
+    assert pa.types.is_struct(sig)
+    assert {f.name for f in sig} == {"__pydantic_model__", "__pydantic_json__"}
+    assert all(f.type == pa.large_string() for f in sig)
+
+
+def test_converter_can_handle_model_subclass():
+    conv = _converter()
+    assert conv.can_handle_python_type(SampleConfig) is True
+    assert conv.can_handle_python_type(int) is False
+
+
+def test_converter_roundtrip_model_to_struct_to_model():
+    conv = _converter()
+    cfg = SampleConfig(name="run1", threshold=6.0, retries=5)
+    struct = conv.python_to_struct_dict(cfg)
+    assert set(struct.keys()) == {"__pydantic_model__", "__pydantic_json__"}
+    assert struct["__pydantic_model__"].endswith(":SampleConfig")
+    restored = conv.struct_dict_to_python(struct)
+    assert isinstance(restored, SampleConfig)
+    assert restored == cfg
+
+
+def test_converter_can_handle_struct_type_and_is_semantic_struct():
+    conv = _converter()
+    assert conv.can_handle_struct_type(conv.arrow_struct_type) is True
+    assert conv.can_handle_struct_type(pa.struct([pa.field("path", pa.large_string())])) is False
+    cfg = SampleConfig(name="x", threshold=1.0)
+    assert conv.is_semantic_struct(conv.python_to_struct_dict(cfg)) is True
+    assert conv.is_semantic_struct({"path": "/tmp/x"}) is False
+```
+
+- [ ] **Step 2: Run tests to verify they fail**
+
+Run: `uv run pytest tests/test_pydantic_config.py -q`
+Expected: FAIL — `ImportError: cannot import name 'PydanticModelConverter'`.
+
+- [ ] **Step 3: Implement the converter**
+
+Append to `src/orcapod/pydantic_config.py`:
+
+```python
+import importlib
+from typing import Any
+
+from orcapod.semantic_types.semantic_struct_converters import (
+    SemanticStructConverterBase,
+)
+
+# Arrow struct field names for the serialized config.
+_MODEL_FIELD = "__pydantic_model__"  # fully-qualified "module:QualName"
+_JSON_FIELD = "__pydantic_json__"    # canonical JSON of the model
+
+
+def _qualified_name(cls: type) -> str:
+    return f"{cls.__module__}:{cls.__qualname__}"
+
+
+def _import_model(qualified_name: str) -> type[pydantic.BaseModel]:
+    module_path, _, qualname = qualified_name.partition(":")
+    module = importlib.import_module(module_path)
+    obj: Any = module
+    for part in qualname.split("."):
+        obj = getattr(obj, part)
+    return obj
+
+
+class PydanticModelConverter(SemanticStructConverterBase):
+    """Semantic-type converter for pydantic models.
+
+    Maps any `pydantic.BaseModel` instance to an Arrow struct holding the
+    model's fully-qualified class name and its canonical JSON, and back. Content
+    is hashed over (class name + canonical JSON), so identity tracks the config's
+    meaning rather than source-file formatting. Modeled on `PythonPathStructConverter`.
+    """
+
+    def __init__(self) -> None:
+        super().__init__("pydantic")
+        import pyarrow as pa
+
+        self._arrow_struct_type = pa.struct(
+            [
+                pa.field(_MODEL_FIELD, pa.large_string()),
+                pa.field(_JSON_FIELD, pa.large_string()),
+            ]
+        )
+
+    @property
+    def python_type(self) -> type:
+        return pydantic.BaseModel
+
+    @property
+    def arrow_struct_type(self) -> "Any":
+        return self._arrow_struct_type
+
+    def can_handle_python_type(self, python_type: type) -> bool:
+        return isinstance(python_type, type) and issubclass(
+            python_type, pydantic.BaseModel
+        )
+
+    def can_handle_struct_type(self, struct_type: "Any") -> bool:
+        import pyarrow as pa
+
+        if not pa.types.is_struct(struct_type):
+            return False
+        for field in self._arrow_struct_type:
+            if (
+                field.name not in struct_type.names
+                or struct_type[field.name].type != field.type
+            ):
+                return False
+        return True
+
+    def is_semantic_struct(self, struct_dict: dict[str, Any]) -> bool:
+        return set(struct_dict.keys()) == {_MODEL_FIELD, _JSON_FIELD}
+
+    def python_to_struct_dict(self, value: Any) -> dict[str, Any]:
+        if not isinstance(value, pydantic.BaseModel):
+            raise TypeError(f"Expected a pydantic BaseModel, got {type(value)}")
+        return {
+            _MODEL_FIELD: _qualified_name(type(value)),
+            _JSON_FIELD: value.model_dump_json(),
+        }
+
+    def struct_dict_to_python(self, struct_dict: dict[str, Any]) -> Any:
+        qualified_name = struct_dict.get(_MODEL_FIELD)
+        json_str = struct_dict.get(_JSON_FIELD)
+        if qualified_name is None or json_str is None:
+            raise ValueError(
+                f"Missing '{_MODEL_FIELD}'/'{_JSON_FIELD}' in struct dict"
+            )
+        model_cls = _import_model(qualified_name)
+        return model_cls.model_validate_json(json_str)
+
+    def hash_struct_dict(
+        self, struct_dict: dict[str, Any], add_prefix: bool = False
+    ) -> str:
+        qualified_name = struct_dict.get(_MODEL_FIELD)
+        json_str = struct_dict.get(_JSON_FIELD)
+        if qualified_name is None or json_str is None:
+            raise ValueError(
+                f"Missing '{_MODEL_FIELD}'/'{_JSON_FIELD}' in struct dict"
+            )
+        content = f"{qualified_name}\n{json_str}".encode("utf-8")
+        content_hash = self._compute_content_hash(content)
+        return self._format_hash_string(content_hash.digest, add_prefix=add_prefix)
+```
+
+- [ ] **Step 4: Run tests to verify they pass**
+
+Run: `uv run pytest tests/test_pydantic_config.py -q`
+Expected: PASS (all tests, including Task 2's).
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add src/orcapod/pydantic_config.py tests/test_pydantic_config.py
+git commit -m "feat(pydantic_config): add PydanticModelConverter semantic type (ENG-607)"
+```
+
+---
+
+### Task 4: Hash stability — meaning, not formatting
+
+**Files:**
+- Test: `tests/test_pydantic_config.py`
+
+- [ ] **Step 1: Write the failing tests**
+
+Append to `tests/test_pydantic_config.py`:
+
+```python
+def test_hash_equal_for_equal_values():
+    conv = _converter()
+    a = conv.python_to_struct_dict(SampleConfig(name="run1", threshold=6.0, retries=5))
+    b = conv.python_to_struct_dict(SampleConfig(name="run1", threshold=6.0, retries=5))
+    assert conv.hash_struct_dict(a) == conv.hash_struct_dict(b)
+
+
+def test_hash_differs_for_different_values():
+    conv = _converter()
+    a = conv.python_to_struct_dict(SampleConfig(name="run1", threshold=6.0))
+    b = conv.python_to_struct_dict(SampleConfig(name="run1", threshold=7.0))
+    assert conv.hash_struct_dict(a) != conv.hash_struct_dict(b)
+
+
+def test_hash_stable_across_yaml_formatting(tmp_path):
+    # Two YAMLs that differ only in comments / key order / whitespace
+    # must produce the same validated model and therefore the same hash.
+    yaml_a = "name: run1\nthreshold: 6.0\nretries: 5\n"
+    yaml_b = "# a comment\nretries: 5\nthreshold:   6.0\nname: run1\n"
+    pa_path = _write(tmp_path, yaml_a)
+    cfg_a = load_pydantic_config(pa_path, SampleConfig)
+    pb_path = tmp_path / "b.yaml"
+    pb_path.write_text(yaml_b, encoding="utf-8")
+    cfg_b = load_pydantic_config(pb_path, SampleConfig)
+
+    conv = _converter()
+    ha = conv.hash_struct_dict(conv.python_to_struct_dict(cfg_a))
+    hb = conv.hash_struct_dict(conv.python_to_struct_dict(cfg_b))
+    assert ha == hb
+```
+
+- [ ] **Step 2: Run tests**
+
+Run: `uv run pytest tests/test_pydantic_config.py -q`
+Expected: PASS — the implementation from Task 3 already satisfies these (no new code needed). If `test_hash_stable_across_yaml_formatting` fails, it indicates `model_dump_json()` is non-deterministic for this model; investigate before proceeding.
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add tests/test_pydantic_config.py
+git commit -m "test(pydantic_config): assert hash tracks config meaning, not formatting (ENG-607)"
+```
+
+---
+
+### Task 5: Register the converter in the production + standalone registries
+
+**Files:**
+- Modify: `src/orcapod/contexts/data/v0.1.json` (the `semantic_registry` → `_config` → `converters` object)
+- Modify: `src/orcapod/hashing/versioned_hashers.py:135-138`
+- Test: `tests/test_pydantic_config.py`
+
+- [ ] **Step 1: Write the failing integration test**
+
+Append to `tests/test_pydantic_config.py`:
+
+```python
+from orcapod.contexts import get_default_context
+from orcapod.types import Schema
+
+
+def test_registered_in_default_context_roundtrip():
+    ctx = get_default_context()
+    converter = ctx.type_converter
+
+    cfg = SampleConfig(name="run1", threshold=6.0, retries=5)
+    table = converter.python_dicts_to_arrow_table(
+        [{"config": cfg}], python_schema=Schema({"config": SampleConfig})
+    )
+    # Stored as the pydantic struct, not an opaque blob.
+    assert pa.types.is_struct(table.schema.field("config").type)
+    assert {f.name for f in table.schema.field("config").type} == {
+        "__pydantic_model__",
+        "__pydantic_json__",
+    }
+
+    restored = converter.arrow_table_to_python_dicts(table)
+    assert isinstance(restored[0]["config"], SampleConfig)
+    assert restored[0]["config"] == cfg
+
+
+def test_default_context_hashes_model_stably():
+    ctx = get_default_context()
+    converter = ctx.type_converter
+    schema = Schema({"config": SampleConfig})
+    t1 = converter.python_dicts_to_arrow_table(
+        [{"config": SampleConfig(name="r", threshold=6.0)}], python_schema=schema
+    )
+    t2 = converter.python_dicts_to_arrow_table(
+        [{"config": SampleConfig(name="r", threshold=6.0)}], python_schema=schema
+    )
+    h1 = ctx.arrow_hasher.hash_table(t1)
+    h2 = ctx.arrow_hasher.hash_table(t2)
+    assert h1 == h2
+```
+
+Note: if `arrow_hasher` exposes a different method than `hash_table`, adjust the last two lines to the actual public hashing entry point (confirm by reading `ctx.arrow_hasher`'s class). The first test is the load-bearing one.
+
+- [ ] **Step 2: Run tests to verify they fail**
+
+Run: `uv run pytest tests/test_pydantic_config.py -k "default_context" -q`
+Expected: FAIL — the converter is not yet registered, so `python_dicts_to_arrow_table` does not produce the pydantic struct (it errors or produces a non-struct column).
+
+- [ ] **Step 3: Register in the production JSON registry**
+
+In `src/orcapod/contexts/data/v0.1.json`, inside `semantic_registry._config.converters` (alongside `"path"` and `"upath"`), add:
+
+```json
+      "pydantic": {
+        "_class": "orcapod.pydantic_config.PydanticModelConverter",
+        "_config": {}
+      }
+```
+
+(Place it as a sibling key; mind the trailing commas so the JSON stays valid.)
+
+- [ ] **Step 4: Register in the standalone fallback registry**
+
+In `src/orcapod/hashing/versioned_hashers.py`, after the existing `registry.register_converter("path", path_converter)` (line ~138), add:
+
+```python
+    from orcapod.pydantic_config import PydanticModelConverter
+
+    registry.register_converter("pydantic", PydanticModelConverter())
+```
+
+- [ ] **Step 5: Run tests to verify they pass**
+
+Run: `uv run pytest tests/test_pydantic_config.py -q`
+Expected: PASS (all tests).
+
+- [ ] **Step 6: Run the semantic-types + contexts suites for regressions**
+
+Run: `uv run pytest tests/test_semantic_types tests/test_hashing -q`
+Expected: PASS (no regressions from the new registration).
+
+- [ ] **Step 7: Commit**
+
+```bash
+git add src/orcapod/contexts/data/v0.1.json src/orcapod/hashing/versioned_hashers.py tests/test_pydantic_config.py
+git commit -m "feat(pydantic_config): register PydanticModelConverter in default registries (ENG-607)"
+```
+
+---
+
+### Task 6: Full-suite verification + DESIGN_ISSUES note
+
+**Files:**
+- Modify: `DESIGN_ISSUES.md` (optional — only if a matching issue exists; otherwise skip)
+
+- [ ] **Step 1: Run the full test suite**
+
+Run: `uv run pytest -m "not postgres" -q`
+Expected: PASS (no regressions). Note skip counts as normal.
+
+- [ ] **Step 2: Type-check the new module (if the repo runs a type checker in CI)**
+
+Run: `uv run python -c "import orcapod.pydantic_config"`
+Expected: imports cleanly. (If the repo uses pyright/mypy in CI, run that on `src/orcapod/pydantic_config.py` and fix any issues.)
+
+- [ ] **Step 3: Final commit (only if Step 2 required edits)**
+
+```bash
+git add -A
+git commit -m "chore(pydantic_config): satisfy type checker (ENG-607)"
+```
+
+---
+
+## Self-Review
+
+**Spec coverage:**
+- Reusable loader in orcapod-python → Task 2 (`load_pydantic_config`). ✓
+- Validate at build time, clear field-located error → Task 2 tests (wrong type / unknown key / missing required, path in message). ✓
+- Typed config is a first-class, content-hashed pod input → Task 3 (converter) + Task 5 (registration; round-trip + struct storage). ✓
+- Pods receive the typed model with no per-pod deserialization → Task 5 `test_registered_in_default_context_roundtrip` proves automatic reconstruction via the type converter (the actual pod parameter wiring is the spike-sorting follow-up, out of scope here). ✓
+- Hash over meaning, not formatting → Task 4 (`test_hash_stable_across_yaml_formatting`). ✓
+- `OrcapodBaseConfig` strict base → Task 2. ✓
+- Add pydantic dependency → Task 1. ✓
+
+**Out of scope (correctly deferred):** spike-sorting `config/` models, source swap, pod annotations, enigma-ephys migration (separate follow-up per spec).
+
+**Type consistency:** `PydanticModelConverter` uses `_MODEL_FIELD`/`_JSON_FIELD` consistently across `python_to_struct_dict`, `struct_dict_to_python`, `is_semantic_struct`, and `hash_struct_dict`. `python_type` returns `pydantic.BaseModel`; registry subclass-matching handles concrete subclasses (verified against `SemanticTypeRegistry.get_converter_for_python_type`).
+
+**Known verification point:** Task 5 Step 1 notes the `arrow_hasher` hashing method name (`hash_table`) must be confirmed against the concrete hasher class; the primary round-trip assertion does not depend on it.

From 8bbc5c10d19ef33c704a142dbdc6c403d2224d24 Mon Sep 17 00:00:00 2001
From: Brian Arnold <arnoldb@stanford.edu>
Date: Fri, 12 Jun 2026 22:16:25 +0000
Subject: [PATCH 03/12] chore(deps): add pydantic for typed config loading
 (ENG-607)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 pyproject.toml | 1 +
 uv.lock        | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index b3c772ef..b4a6e2a8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,6 +17,7 @@ dependencies = [
     "polars>=1.31.0",
     "beartype>=0.21.0",
     "deltalake>=1.0.2",
+    "pydantic>=2",
     "graphviz>=0.21",
     "gitpython>=3.1.45",
     "universal-pathlib>=0.3.8",
diff --git a/uv.lock b/uv.lock
index 446f7ebf..f9e9d0d8 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 3
+revision = 2
 requires-python = ">=3.11.0"
 resolution-markers = [
     "python_full_version >= '3.14' and sys_platform == 'darwin'",
@@ -2301,6 +2301,7 @@ dependencies = [
     { name = "pandas" },
     { name = "polars" },
     { name = "pyarrow" },
+    { name = "pydantic" },
     { name = "pygraphviz" },
     { name = "pymongo" },
     { name = "pyyaml" },
@@ -2380,6 +2381,7 @@ requires-dist = [
     { name = "psycopg", extras = ["binary"], marker = "extra == 'all'", specifier = ">=3.0" },
     { name = "psycopg", extras = ["binary"], marker = "extra == 'postgresql'", specifier = ">=3.0" },
     { name = "pyarrow", specifier = ">=20.0.0" },
+    { name = "pydantic", specifier = ">=2" },
     { name = "pygraphviz", specifier = ">=1.14" },
     { name = "pymongo", specifier = ">=4.15.5" },
     { name = "pyspiral", marker = "extra == 'all'", specifier = ">=0.11.0" },

From 1afbc52c065b6f500c9faeeaf574ada6827daf36 Mon Sep 17 00:00:00 2001
From: Brian Arnold <arnoldb@stanford.edu>
Date: Fri, 12 Jun 2026 22:22:24 +0000
Subject: [PATCH 04/12] feat(pydantic_config): add load_pydantic_config and
 OrcapodBaseConfig (ENG-607)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/orcapod/pydantic_config.py | 55 ++++++++++++++++++++++++++++++++++
 tests/test_pydantic_config.py  | 52 ++++++++++++++++++++++++++++++++
 2 files changed, 107 insertions(+)
 create mode 100644 src/orcapod/pydantic_config.py
 create mode 100644 tests/test_pydantic_config.py

diff --git a/src/orcapod/pydantic_config.py b/src/orcapod/pydantic_config.py
new file mode 100644
index 00000000..7707c7dd
--- /dev/null
+++ b/src/orcapod/pydantic_config.py
@@ -0,0 +1,55 @@
+"""Pydantic-backed config loading for orcapod pipelines (ENG-601 / ENG-607).
+
+Provides `load_pydantic_config` (validate a YAML file against a pydantic model)
+and `OrcapodBaseConfig` (a strict base for config schemas). A companion
+`PydanticModelConverter` (also in this module) makes a validated model a
+first-class, content-hashed orcapod value.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import TypeVar
+
+import pydantic
+import yaml
+
+M = TypeVar("M", bound=pydantic.BaseModel)
+
+
+class OrcapodBaseConfig(pydantic.BaseModel):
+    """Recommended base for pipeline config schemas.
+
+    Defaults to strict validation: unknown keys are rejected and instances are
+    immutable. Subclass this for pipeline configs; subclass `pydantic.BaseModel`
+    directly only when different semantics are required.
+    """
+
+    model_config = pydantic.ConfigDict(extra="forbid", frozen=True)
+
+
+def load_pydantic_config(path: str | Path, model_cls: type[M]) -> M:
+    """Read a YAML file and validate it against a pydantic model.
+
+    Args:
+        path: Path to the YAML config file.
+        model_cls: The pydantic model class to validate against.
+
+    Returns:
+        A validated instance of `model_cls`.
+
+    Raises:
+        ValueError: If the YAML cannot be parsed or fails validation. The error
+            message includes the file path and the underlying field-level detail.
+    """
+    path = Path(path)
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            data = yaml.safe_load(f)
+    except yaml.YAMLError as e:
+        raise ValueError(f"Could not parse YAML config {path}: {e}") from e
+
+    try:
+        return model_cls.model_validate(data)
+    except pydantic.ValidationError as e:
+        raise ValueError(f"Config validation failed for {path}:\n{e}") from e
diff --git a/tests/test_pydantic_config.py b/tests/test_pydantic_config.py
new file mode 100644
index 00000000..894b9b9b
--- /dev/null
+++ b/tests/test_pydantic_config.py
@@ -0,0 +1,52 @@
+"""Tests for orcapod.pydantic_config (ENG-607)."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from orcapod.pydantic_config import OrcapodBaseConfig, load_pydantic_config
+
+
+class SampleConfig(OrcapodBaseConfig):
+    name: str
+    threshold: float
+    retries: int = 3
+
+
+def _write(tmp_path: Path, text: str) -> Path:
+    p = tmp_path / "config.yaml"
+    p.write_text(text, encoding="utf-8")
+    return p
+
+
+def test_loads_valid_config(tmp_path):
+    path = _write(tmp_path, "name: run1\nthreshold: 6.0\n")
+    cfg = load_pydantic_config(path, SampleConfig)
+    assert isinstance(cfg, SampleConfig)
+    assert cfg.name == "run1"
+    assert cfg.threshold == 6.0
+    assert cfg.retries == 3  # default applied
+
+
+def test_wrong_type_raises_with_path(tmp_path):
+    path = _write(tmp_path, "name: run1\nthreshold: not-a-number\n")
+    with pytest.raises(ValueError) as exc:
+        load_pydantic_config(path, SampleConfig)
+    assert "threshold" in str(exc.value)
+    assert str(path) in str(exc.value)
+
+
+def test_unknown_key_raises(tmp_path):
+    path = _write(tmp_path, "name: run1\nthreshold: 6.0\ntypo_key: 1\n")
+    with pytest.raises(ValueError) as exc:
+        load_pydantic_config(path, SampleConfig)
+    assert "typo_key" in str(exc.value)
+
+
+def test_missing_required_raises(tmp_path):
+    path = _write(tmp_path, "threshold: 6.0\n")
+    with pytest.raises(ValueError) as exc:
+        load_pydantic_config(path, SampleConfig)
+    assert "name" in str(exc.value)

From 26fc1fa5458bc04c8b83b374fe699e7489d7fe8f Mon Sep 17 00:00:00 2001
From: Brian Arnold <arnoldb@stanford.edu>
Date: Fri, 12 Jun 2026 22:28:23 +0000
Subject: [PATCH 05/12] fix(pydantic_config): wrap file IO errors as ValueError
 (ENG-607)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/orcapod/pydantic_config.py |  2 ++
 tests/test_pydantic_config.py  | 14 ++++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/src/orcapod/pydantic_config.py b/src/orcapod/pydantic_config.py
index 7707c7dd..88c8a0d0 100644
--- a/src/orcapod/pydantic_config.py
+++ b/src/orcapod/pydantic_config.py
@@ -48,6 +48,8 @@ def load_pydantic_config(path: str | Path, model_cls: type[M]) -> M:
             data = yaml.safe_load(f)
     except yaml.YAMLError as e:
         raise ValueError(f"Could not parse YAML config {path}: {e}") from e
+    except OSError as e:
+        raise ValueError(f"Could not read YAML config {path}: {e}") from e
 
     try:
         return model_cls.model_validate(data)
diff --git a/tests/test_pydantic_config.py b/tests/test_pydantic_config.py
index 894b9b9b..e2fc667a 100644
--- a/tests/test_pydantic_config.py
+++ b/tests/test_pydantic_config.py
@@ -50,3 +50,17 @@ def test_missing_required_raises(tmp_path):
     with pytest.raises(ValueError) as exc:
         load_pydantic_config(path, SampleConfig)
     assert "name" in str(exc.value)
+
+
+def test_missing_file_raises_value_error(tmp_path):
+    missing = tmp_path / "does_not_exist.yaml"
+    with pytest.raises(ValueError) as exc:
+        load_pydantic_config(missing, SampleConfig)
+    assert str(missing) in str(exc.value)
+
+
+def test_empty_file_raises_value_error(tmp_path):
+    path = _write(tmp_path, "")
+    with pytest.raises(ValueError) as exc:
+        load_pydantic_config(path, SampleConfig)
+    assert str(path) in str(exc.value)

From b402058cb2b68301396438b0ad1d836122c5d834 Mon Sep 17 00:00:00 2001
From: Brian Arnold <arnoldb@stanford.edu>
Date: Fri, 12 Jun 2026 22:33:59 +0000
Subject: [PATCH 06/12] feat(pydantic_config): add PydanticModelConverter
 semantic type (ENG-607)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/orcapod/pydantic_config.py | 107 ++++++++++++++++++++++++++++++++-
 tests/test_pydantic_config.py  |  43 ++++++++++++-
 2 files changed, 148 insertions(+), 2 deletions(-)

diff --git a/src/orcapod/pydantic_config.py b/src/orcapod/pydantic_config.py
index 88c8a0d0..a5d2478c 100644
--- a/src/orcapod/pydantic_config.py
+++ b/src/orcapod/pydantic_config.py
@@ -8,12 +8,18 @@
 
 from __future__ import annotations
 
+import importlib
 from pathlib import Path
-from typing import TypeVar
+from typing import TYPE_CHECKING, Any, TypeVar
 
 import pydantic
 import yaml
 
+from orcapod.semantic_types.semantic_struct_converters import SemanticStructConverterBase
+
+if TYPE_CHECKING:
+    import pyarrow as pa
+
 M = TypeVar("M", bound=pydantic.BaseModel)
 
 
@@ -55,3 +61,102 @@ def load_pydantic_config(path: str | Path, model_cls: type[M]) -> M:
         return model_cls.model_validate(data)
     except pydantic.ValidationError as e:
         raise ValueError(f"Config validation failed for {path}:\n{e}") from e
+
+
+# Arrow struct field names for the serialized config.
+_MODEL_FIELD = "__pydantic_model__"  # fully-qualified "module:QualName"
+_JSON_FIELD = "__pydantic_json__"    # canonical JSON of the model
+
+
+def _qualified_name(cls: type) -> str:
+    return f"{cls.__module__}:{cls.__qualname__}"
+
+
+def _import_model(qualified_name: str) -> type[pydantic.BaseModel]:
+    module_path, _, qualname = qualified_name.partition(":")
+    module = importlib.import_module(module_path)
+    obj: Any = module
+    for part in qualname.split("."):
+        obj = getattr(obj, part)
+    return obj
+
+
+class PydanticModelConverter(SemanticStructConverterBase):
+    """Semantic-type converter for pydantic models.
+
+    Maps any `pydantic.BaseModel` instance to an Arrow struct holding the
+    model's fully-qualified class name and its canonical JSON, and back. Content
+    is hashed over (class name + canonical JSON), so identity tracks the config's
+    meaning rather than source-file formatting. Modeled on `PythonPathStructConverter`.
+    """
+
+    def __init__(self) -> None:
+        super().__init__("pydantic")
+        import pyarrow as pa
+
+        self._arrow_struct_type = pa.struct(
+            [
+                pa.field(_MODEL_FIELD, pa.large_string()),
+                pa.field(_JSON_FIELD, pa.large_string()),
+            ]
+        )
+
+    @property
+    def python_type(self) -> type:
+        return pydantic.BaseModel
+
+    @property
+    def arrow_struct_type(self) -> Any:
+        return self._arrow_struct_type
+
+    def can_handle_python_type(self, python_type: type) -> bool:
+        return isinstance(python_type, type) and issubclass(
+            python_type, pydantic.BaseModel
+        )
+
+    def can_handle_struct_type(self, struct_type: Any) -> bool:
+        import pyarrow as pa
+
+        if not pa.types.is_struct(struct_type):
+            return False
+        for field in self._arrow_struct_type:
+            if (
+                field.name not in struct_type.names
+                or struct_type[field.name].type != field.type
+            ):
+                return False
+        return True
+
+    def is_semantic_struct(self, struct_dict: dict[str, Any]) -> bool:
+        return set(struct_dict.keys()) == {_MODEL_FIELD, _JSON_FIELD}
+
+    def python_to_struct_dict(self, value: Any) -> dict[str, Any]:
+        if not isinstance(value, pydantic.BaseModel):
+            raise TypeError(f"Expected a pydantic BaseModel, got {type(value)}")
+        return {
+            _MODEL_FIELD: _qualified_name(type(value)),
+            _JSON_FIELD: value.model_dump_json(),
+        }
+
+    def struct_dict_to_python(self, struct_dict: dict[str, Any]) -> Any:
+        qualified_name = struct_dict.get(_MODEL_FIELD)
+        json_str = struct_dict.get(_JSON_FIELD)
+        if qualified_name is None or json_str is None:
+            raise ValueError(
+                f"Missing '{_MODEL_FIELD}'/'{_JSON_FIELD}' in struct dict"
+            )
+        model_cls = _import_model(qualified_name)
+        return model_cls.model_validate_json(json_str)
+
+    def hash_struct_dict(
+        self, struct_dict: dict[str, Any], add_prefix: bool = False
+    ) -> str:
+        qualified_name = struct_dict.get(_MODEL_FIELD)
+        json_str = struct_dict.get(_JSON_FIELD)
+        if qualified_name is None or json_str is None:
+            raise ValueError(
+                f"Missing '{_MODEL_FIELD}'/'{_JSON_FIELD}' in struct dict"
+            )
+        content = f"{qualified_name}\n{json_str}".encode("utf-8")
+        content_hash = self._compute_content_hash(content)
+        return self._format_hash_string(content_hash.digest, add_prefix=add_prefix)
diff --git a/tests/test_pydantic_config.py b/tests/test_pydantic_config.py
index e2fc667a..98d48608 100644
--- a/tests/test_pydantic_config.py
+++ b/tests/test_pydantic_config.py
@@ -4,9 +4,11 @@
 
 from pathlib import Path
 
+import pyarrow as pa
+import pydantic
 import pytest
 
-from orcapod.pydantic_config import OrcapodBaseConfig, load_pydantic_config
+from orcapod.pydantic_config import OrcapodBaseConfig, PydanticModelConverter, load_pydantic_config
 
 
 class SampleConfig(OrcapodBaseConfig):
@@ -64,3 +66,42 @@ def test_empty_file_raises_value_error(tmp_path):
     with pytest.raises(ValueError) as exc:
         load_pydantic_config(path, SampleConfig)
     assert str(path) in str(exc.value)
+
+
+def _converter() -> PydanticModelConverter:
+    return PydanticModelConverter()
+
+
+def test_converter_python_type_and_struct_signature():
+    conv = _converter()
+    assert conv.python_type is pydantic.BaseModel
+    sig = conv.arrow_struct_type
+    assert pa.types.is_struct(sig)
+    assert {f.name for f in sig} == {"__pydantic_model__", "__pydantic_json__"}
+    assert all(f.type == pa.large_string() for f in sig)
+
+
+def test_converter_can_handle_model_subclass():
+    conv = _converter()
+    assert conv.can_handle_python_type(SampleConfig) is True
+    assert conv.can_handle_python_type(int) is False
+
+
+def test_converter_roundtrip_model_to_struct_to_model():
+    conv = _converter()
+    cfg = SampleConfig(name="run1", threshold=6.0, retries=5)
+    struct = conv.python_to_struct_dict(cfg)
+    assert set(struct.keys()) == {"__pydantic_model__", "__pydantic_json__"}
+    assert struct["__pydantic_model__"].endswith(":SampleConfig")
+    restored = conv.struct_dict_to_python(struct)
+    assert isinstance(restored, SampleConfig)
+    assert restored == cfg
+
+
+def test_converter_can_handle_struct_type_and_is_semantic_struct():
+    conv = _converter()
+    assert conv.can_handle_struct_type(conv.arrow_struct_type) is True
+    assert conv.can_handle_struct_type(pa.struct([pa.field("path", pa.large_string())])) is False
+    cfg = SampleConfig(name="x", threshold=1.0)
+    assert conv.is_semantic_struct(conv.python_to_struct_dict(cfg)) is True
+    assert conv.is_semantic_struct({"path": "/tmp/x"}) is False

From 4a809de7e2670cd9e07bf31729731d0a4a4fb630 Mon Sep 17 00:00:00 2001
From: Brian Arnold <arnoldb@stanford.edu>
Date: Fri, 12 Jun 2026 22:44:43 +0000
Subject: [PATCH 07/12] fix(pydantic_config): clearer import errors, stricter
 struct check, lazy pyarrow (ENG-607)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/orcapod/pydantic_config.py | 32 ++++++++++++++++++++++++--------
 tests/test_pydantic_config.py  |  9 +++++++++
 2 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/src/orcapod/pydantic_config.py b/src/orcapod/pydantic_config.py
index a5d2478c..c03b053a 100644
--- a/src/orcapod/pydantic_config.py
+++ b/src/orcapod/pydantic_config.py
@@ -16,9 +16,12 @@
 import yaml
 
 from orcapod.semantic_types.semantic_struct_converters import SemanticStructConverterBase
+from orcapod.utils.lazy_module import LazyModule
 
 if TYPE_CHECKING:
     import pyarrow as pa
+else:
+    pa = LazyModule("pyarrow")
 
 M = TypeVar("M", bound=pydantic.BaseModel)
 
@@ -74,10 +77,21 @@ def _qualified_name(cls: type) -> str:
 
 def _import_model(qualified_name: str) -> type[pydantic.BaseModel]:
     module_path, _, qualname = qualified_name.partition(":")
-    module = importlib.import_module(module_path)
+    try:
+        module = importlib.import_module(module_path)
+    except ImportError as e:
+        raise ImportError(
+            f"Cannot import module '{module_path}' for pydantic model "
+            f"'{qualified_name}': {e}"
+        ) from e
     obj: Any = module
     for part in qualname.split("."):
-        obj = getattr(obj, part)
+        try:
+            obj = getattr(obj, part)
+        except AttributeError as e:
+            raise ImportError(
+                f"Cannot resolve '{part}' in '{qualified_name}': {e}"
+            ) from e
     return obj
 
 
@@ -92,8 +106,6 @@ class PydanticModelConverter(SemanticStructConverterBase):
 
     def __init__(self) -> None:
         super().__init__("pydantic")
-        import pyarrow as pa
-
         self._arrow_struct_type = pa.struct(
             [
                 pa.field(_MODEL_FIELD, pa.large_string()),
@@ -106,7 +118,7 @@ def python_type(self) -> type:
         return pydantic.BaseModel
 
     @property
-    def arrow_struct_type(self) -> Any:
+    def arrow_struct_type(self) -> "pa.StructType":
         return self._arrow_struct_type
 
     def can_handle_python_type(self, python_type: type) -> bool:
@@ -115,8 +127,6 @@ def can_handle_python_type(self, python_type: type) -> bool:
         )
 
     def can_handle_struct_type(self, struct_type: Any) -> bool:
-        import pyarrow as pa
-
         if not pa.types.is_struct(struct_type):
             return False
         for field in self._arrow_struct_type:
@@ -128,13 +138,19 @@ def can_handle_struct_type(self, struct_type: Any) -> bool:
         return True
 
     def is_semantic_struct(self, struct_dict: dict[str, Any]) -> bool:
-        return set(struct_dict.keys()) == {_MODEL_FIELD, _JSON_FIELD}
+        return (
+            set(struct_dict.keys()) == {_MODEL_FIELD, _JSON_FIELD}
+            and isinstance(struct_dict[_MODEL_FIELD], str)
+            and isinstance(struct_dict[_JSON_FIELD], str)
+        )
 
     def python_to_struct_dict(self, value: Any) -> dict[str, Any]:
         if not isinstance(value, pydantic.BaseModel):
             raise TypeError(f"Expected a pydantic BaseModel, got {type(value)}")
         return {
             _MODEL_FIELD: _qualified_name(type(value)),
+            # model_dump_json() serialises fields in definition order (pydantic v2),
+            # so equal models produce identical JSON -> stable content hash.
             _JSON_FIELD: value.model_dump_json(),
         }
 
diff --git a/tests/test_pydantic_config.py b/tests/test_pydantic_config.py
index 98d48608..1c7dae89 100644
--- a/tests/test_pydantic_config.py
+++ b/tests/test_pydantic_config.py
@@ -105,3 +105,12 @@ def test_converter_can_handle_struct_type_and_is_semantic_struct():
     cfg = SampleConfig(name="x", threshold=1.0)
     assert conv.is_semantic_struct(conv.python_to_struct_dict(cfg)) is True
     assert conv.is_semantic_struct({"path": "/tmp/x"}) is False
+
+
+def test_struct_dict_to_python_bad_qualname_raises_importerror():
+    conv = _converter()
+    with pytest.raises(ImportError) as exc:
+        conv.struct_dict_to_python(
+            {"__pydantic_model__": "no.such.module:Nope", "__pydantic_json__": "{}"}
+        )
+    assert "no.such.module:Nope" in str(exc.value)

From cb8f976287fb657eaebd7ce077a23690ec2fe399 Mon Sep 17 00:00:00 2001
From: Brian Arnold <arnoldb@stanford.edu>
Date: Fri, 12 Jun 2026 22:46:09 +0000
Subject: [PATCH 08/12] test(pydantic_config): assert hash tracks config
 meaning, not formatting (ENG-607)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 tests/test_pydantic_config.py | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/tests/test_pydantic_config.py b/tests/test_pydantic_config.py
index 1c7dae89..7626c350 100644
--- a/tests/test_pydantic_config.py
+++ b/tests/test_pydantic_config.py
@@ -114,3 +114,34 @@ def test_struct_dict_to_python_bad_qualname_raises_importerror():
             {"__pydantic_model__": "no.such.module:Nope", "__pydantic_json__": "{}"}
         )
     assert "no.such.module:Nope" in str(exc.value)
+
+
+def test_hash_equal_for_equal_values():
+    conv = _converter()
+    a = conv.python_to_struct_dict(SampleConfig(name="run1", threshold=6.0, retries=5))
+    b = conv.python_to_struct_dict(SampleConfig(name="run1", threshold=6.0, retries=5))
+    assert conv.hash_struct_dict(a) == conv.hash_struct_dict(b)
+
+
+def test_hash_differs_for_different_values():
+    conv = _converter()
+    a = conv.python_to_struct_dict(SampleConfig(name="run1", threshold=6.0))
+    b = conv.python_to_struct_dict(SampleConfig(name="run1", threshold=7.0))
+    assert conv.hash_struct_dict(a) != conv.hash_struct_dict(b)
+
+
+def test_hash_stable_across_yaml_formatting(tmp_path):
+    # Two YAMLs that differ only in comments / key order / whitespace
+    # must produce the same validated model and therefore the same hash.
+    yaml_a = "name: run1\nthreshold: 6.0\nretries: 5\n"
+    yaml_b = "# a comment\nretries: 5\nthreshold:   6.0\nname: run1\n"
+    pa_path = _write(tmp_path, yaml_a)
+    cfg_a = load_pydantic_config(pa_path, SampleConfig)
+    pb_path = tmp_path / "b.yaml"
+    pb_path.write_text(yaml_b, encoding="utf-8")
+    cfg_b = load_pydantic_config(pb_path, SampleConfig)
+
+    conv = _converter()
+    ha = conv.hash_struct_dict(conv.python_to_struct_dict(cfg_a))
+    hb = conv.hash_struct_dict(conv.python_to_struct_dict(cfg_b))
+    assert ha == hb

From 3aea43d4d3daaa6709254aae902918699cad3122 Mon Sep 17 00:00:00 2001
From: Brian Arnold <arnoldb@stanford.edu>
Date: Fri, 12 Jun 2026 22:48:39 +0000
Subject: [PATCH 09/12] feat(pydantic_config): register PydanticModelConverter
 in default registries (ENG-607)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/orcapod/contexts/data/v0.1.json      |  4 +++
 src/orcapod/hashing/versioned_hashers.py |  4 +++
 tests/test_pydantic_config.py            | 43 ++++++++++++++++++++++++
 3 files changed, 51 insertions(+)

diff --git a/src/orcapod/contexts/data/v0.1.json b/src/orcapod/contexts/data/v0.1.json
index 2fb31a70..f0e31f77 100644
--- a/src/orcapod/contexts/data/v0.1.json
+++ b/src/orcapod/contexts/data/v0.1.json
@@ -23,6 +23,10 @@
                     "_config": {
                         "file_hasher": {"_ref": "file_hasher"}
                     }
+                },
+                "pydantic": {
+                    "_class": "orcapod.pydantic_config.PydanticModelConverter",
+                    "_config": {}
                 }
             }
         }
diff --git a/src/orcapod/hashing/versioned_hashers.py b/src/orcapod/hashing/versioned_hashers.py
index f736293b..3627be36 100644
--- a/src/orcapod/hashing/versioned_hashers.py
+++ b/src/orcapod/hashing/versioned_hashers.py
@@ -137,6 +137,10 @@ def get_versioned_semantic_arrow_hasher(
     path_converter: Any = PythonPathStructConverter(file_hasher=file_hasher)
     registry.register_converter("path", path_converter)
 
+    from orcapod.pydantic_config import PydanticModelConverter
+
+    registry.register_converter("pydantic", PydanticModelConverter())
+
     logger.debug(
         "get_versioned_semantic_arrow_hasher: creating StarfixArrowHasher "
         "(hasher_id=%r)",
diff --git a/tests/test_pydantic_config.py b/tests/test_pydantic_config.py
index 7626c350..2d4b81aa 100644
--- a/tests/test_pydantic_config.py
+++ b/tests/test_pydantic_config.py
@@ -145,3 +145,46 @@ def test_hash_stable_across_yaml_formatting(tmp_path):
     ha = conv.hash_struct_dict(conv.python_to_struct_dict(cfg_a))
     hb = conv.hash_struct_dict(conv.python_to_struct_dict(cfg_b))
     assert ha == hb
+
+
+# ---------------------------------------------------------------------------
+# Integration tests — default context registry (ENG-607 Task 5)
+# ---------------------------------------------------------------------------
+
+from orcapod.contexts import get_default_context  # noqa: E402
+from orcapod.types import Schema  # noqa: E402
+
+
+def test_registered_in_default_context_roundtrip():
+    ctx = get_default_context()
+    converter = ctx.type_converter
+
+    cfg = SampleConfig(name="run1", threshold=6.0, retries=5)
+    table = converter.python_dicts_to_arrow_table(
+        [{"config": cfg}], python_schema=Schema({"config": SampleConfig})
+    )
+    # Stored as the pydantic struct, not an opaque blob.
+    assert pa.types.is_struct(table.schema.field("config").type)
+    assert {f.name for f in table.schema.field("config").type} == {
+        "__pydantic_model__",
+        "__pydantic_json__",
+    }
+
+    restored = converter.arrow_table_to_python_dicts(table)
+    assert isinstance(restored[0]["config"], SampleConfig)
+    assert restored[0]["config"] == cfg
+
+
+def test_default_context_hashes_model_stably():
+    ctx = get_default_context()
+    converter = ctx.type_converter
+    schema = Schema({"config": SampleConfig})
+    t1 = converter.python_dicts_to_arrow_table(
+        [{"config": SampleConfig(name="r", threshold=6.0)}], python_schema=schema
+    )
+    t2 = converter.python_dicts_to_arrow_table(
+        [{"config": SampleConfig(name="r", threshold=6.0)}], python_schema=schema
+    )
+    h1 = ctx.arrow_hasher.hash_table(t1)
+    h2 = ctx.arrow_hasher.hash_table(t2)
+    assert h1 == h2

From c8784344fec52f0b09f29dc27402e662a85508d1 Mon Sep 17 00:00:00 2001
From: Brian Arnold <arnoldb@stanford.edu>
Date: Fri, 12 Jun 2026 22:55:21 +0000
Subject: [PATCH 10/12] chore(pydantic_config): add registry-sync note, tidy
 test imports (ENG-607)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/orcapod/hashing/versioned_hashers.py | 2 ++
 tests/test_pydantic_config.py            | 5 ++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/orcapod/hashing/versioned_hashers.py b/src/orcapod/hashing/versioned_hashers.py
index 3627be36..66abc28f 100644
--- a/src/orcapod/hashing/versioned_hashers.py
+++ b/src/orcapod/hashing/versioned_hashers.py
@@ -135,6 +135,8 @@ def get_versioned_semantic_arrow_hasher(
     registry: Any = SemanticTypeRegistry()
     file_hasher = BasicFileHasher(algorithm="sha256")
     path_converter: Any = PythonPathStructConverter(file_hasher=file_hasher)
+    # NOTE: keep this converter list in sync with the production registry in
+    # src/orcapod/contexts/data/v0.1.json (semantic_registry._config.converters).
     registry.register_converter("path", path_converter)
 
     from orcapod.pydantic_config import PydanticModelConverter
diff --git a/tests/test_pydantic_config.py b/tests/test_pydantic_config.py
index 2d4b81aa..d1effc91 100644
--- a/tests/test_pydantic_config.py
+++ b/tests/test_pydantic_config.py
@@ -8,7 +8,9 @@
 import pydantic
 import pytest
 
+from orcapod.contexts import get_default_context
 from orcapod.pydantic_config import OrcapodBaseConfig, PydanticModelConverter, load_pydantic_config
+from orcapod.types import Schema
 
 
 class SampleConfig(OrcapodBaseConfig):
@@ -151,9 +153,6 @@ def test_hash_stable_across_yaml_formatting(tmp_path):
 # Integration tests — default context registry (ENG-607 Task 5)
 # ---------------------------------------------------------------------------
 
-from orcapod.contexts import get_default_context  # noqa: E402
-from orcapod.types import Schema  # noqa: E402
-
 
 def test_registered_in_default_context_roundtrip():
     ctx = get_default_context()

From b45d165162fb73868350a1b9c7ad11fcc8f5ce93 Mon Sep 17 00:00:00 2001
From: Brian Arnold <arnoldb@stanford.edu>
Date: Fri, 12 Jun 2026 23:04:15 +0000
Subject: [PATCH 11/12] fix(pydantic_config): canonicalize JSON (sorted keys)
 before hashing (ENG-607)

Hash over sorted-key JSON so configs that differ only in dict key order
hash equal -- identity tracks meaning, not formatting. Stored JSON used for
reconstruction is unchanged.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/orcapod/pydantic_config.py | 15 +++++++++++----
 tests/test_pydantic_config.py  | 13 +++++++++++++
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/src/orcapod/pydantic_config.py b/src/orcapod/pydantic_config.py
index c03b053a..8dac32d9 100644
--- a/src/orcapod/pydantic_config.py
+++ b/src/orcapod/pydantic_config.py
@@ -1,4 +1,4 @@
-"""Pydantic-backed config loading for orcapod pipelines (ENG-601 / ENG-607).
+"""Pydantic-backed config loading for orcapod pipelines (ENG-607).
 
 Provides `load_pydantic_config` (validate a YAML file against a pydantic model)
 and `OrcapodBaseConfig` (a strict base for config schemas). A companion
@@ -9,6 +9,7 @@
 from __future__ import annotations
 
 import importlib
+import json
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, TypeVar
 
@@ -100,8 +101,9 @@ class PydanticModelConverter(SemanticStructConverterBase):
 
     Maps any `pydantic.BaseModel` instance to an Arrow struct holding the
     model's fully-qualified class name and its canonical JSON, and back. Content
-    is hashed over (class name + canonical JSON), so identity tracks the config's
-    meaning rather than source-file formatting. Modeled on `PythonPathStructConverter`.
+    is hashed over (class name + sorted-key canonical JSON), so identity tracks
+    the config's meaning rather than source-file formatting or dict key order.
+    Modeled on `PythonPathStructConverter`.
     """
 
     def __init__(self) -> None:
@@ -173,6 +175,11 @@ def hash_struct_dict(
             raise ValueError(
                 f"Missing '{_MODEL_FIELD}'/'{_JSON_FIELD}' in struct dict"
             )
-        content = f"{qualified_name}\n{json_str}".encode("utf-8")
+        # Canonicalize (sorted keys) so semantically-equal configs that differ only
+        # in dict key order hash equal -- identity tracks meaning, not formatting.
+        canonical_json = json.dumps(
+            json.loads(json_str), sort_keys=True, separators=(",", ":")
+        )
+        content = f"{qualified_name}\n{canonical_json}".encode("utf-8")
         content_hash = self._compute_content_hash(content)
         return self._format_hash_string(content_hash.digest, add_prefix=add_prefix)
diff --git a/tests/test_pydantic_config.py b/tests/test_pydantic_config.py
index d1effc91..26eeb0ce 100644
--- a/tests/test_pydantic_config.py
+++ b/tests/test_pydantic_config.py
@@ -19,6 +19,11 @@ class SampleConfig(OrcapodBaseConfig):
     retries: int = 3
 
 
+class DictConfig(OrcapodBaseConfig):
+    name: str
+    params: dict[str, int]
+
+
 def _write(tmp_path: Path, text: str) -> Path:
     p = tmp_path / "config.yaml"
     p.write_text(text, encoding="utf-8")
@@ -174,6 +179,14 @@ def test_registered_in_default_context_roundtrip():
     assert restored[0]["config"] == cfg
 
 
+def test_hash_stable_across_dict_key_order():
+    conv = _converter()
+    a = conv.python_to_struct_dict(DictConfig(name="x", params={"a": 1, "b": 2}))
+    b = conv.python_to_struct_dict(DictConfig(name="x", params={"b": 2, "a": 1}))
+    # Same contents, different insertion order -> must hash equal (meaning, not order).
+    assert conv.hash_struct_dict(a) == conv.hash_struct_dict(b)
+
+
 def test_default_context_hashes_model_stably():
     ctx = get_default_context()
     converter = ctx.type_converter

From c23fe264dee4f1ac94bd36b79cf00851c0e9eace Mon Sep 17 00:00:00 2001
From: Brian Arnold <arnoldb@stanford.edu>
Date: Fri, 12 Jun 2026 23:37:11 +0000
Subject: [PATCH 12/12] feat(pydantic_config): support UPath for object-storage
 config files (ENG-607)

load_pydantic_config now resolves the path through UPath and reads via
read_text, so configs on s3://, gs://, etc. work in addition to local paths.
Per PR review.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/orcapod/pydantic_config.py | 26 +++++++++++++++++---------
 tests/test_pydantic_config.py  | 10 ++++++++++
 2 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/src/orcapod/pydantic_config.py b/src/orcapod/pydantic_config.py
index 8dac32d9..6ecf21e6 100644
--- a/src/orcapod/pydantic_config.py
+++ b/src/orcapod/pydantic_config.py
@@ -15,6 +15,7 @@
 
 import pydantic
 import yaml
+from upath import UPath
 
 from orcapod.semantic_types.semantic_struct_converters import SemanticStructConverterBase
 from orcapod.utils.lazy_module import LazyModule
@@ -38,29 +39,36 @@ class OrcapodBaseConfig(pydantic.BaseModel):
     model_config = pydantic.ConfigDict(extra="forbid", frozen=True)
 
 
-def load_pydantic_config(path: str | Path, model_cls: type[M]) -> M:
+def load_pydantic_config(path: str | Path | UPath, model_cls: type[M]) -> M:
     """Read a YAML file and validate it against a pydantic model.
 
+    The path is resolved through ``UPath``, so local paths and remote object
+    storage (e.g. ``s3://``, ``gs://``) are both supported.
+
     Args:
-        path: Path to the YAML config file.
+        path: Path to the YAML config file. A local path or any ``UPath``-supported
+            URI (e.g. an object-storage location).
         model_cls: The pydantic model class to validate against.
 
     Returns:
         A validated instance of `model_cls`.
 
     Raises:
-        ValueError: If the YAML cannot be parsed or fails validation. The error
-            message includes the file path and the underlying field-level detail.
+        ValueError: If the file cannot be read, the YAML cannot be parsed, or
+            validation fails. The error message includes the file path and the
+            underlying detail.
     """
-    path = Path(path)
+    path = UPath(path)
     try:
-        with open(path, "r", encoding="utf-8") as f:
-            data = yaml.safe_load(f)
-    except yaml.YAMLError as e:
-        raise ValueError(f"Could not parse YAML config {path}: {e}") from e
+        text = path.read_text(encoding="utf-8")
     except OSError as e:
         raise ValueError(f"Could not read YAML config {path}: {e}") from e
 
+    try:
+        data = yaml.safe_load(text)
+    except yaml.YAMLError as e:
+        raise ValueError(f"Could not parse YAML config {path}: {e}") from e
+
     try:
         return model_cls.model_validate(data)
     except pydantic.ValidationError as e:
diff --git a/tests/test_pydantic_config.py b/tests/test_pydantic_config.py
index 26eeb0ce..75acbc62 100644
--- a/tests/test_pydantic_config.py
+++ b/tests/test_pydantic_config.py
@@ -7,6 +7,7 @@
 import pyarrow as pa
 import pydantic
 import pytest
+from upath import UPath
 
 from orcapod.contexts import get_default_context
 from orcapod.pydantic_config import OrcapodBaseConfig, PydanticModelConverter, load_pydantic_config
@@ -39,6 +40,15 @@ def test_loads_valid_config(tmp_path):
     assert cfg.retries == 3  # default applied
 
 
+def test_loads_via_upath(tmp_path):
+    # UPath of a local path exercises the object-storage-capable read path.
+    path = _write(tmp_path, "name: run1\nthreshold: 6.0\n")
+    cfg = load_pydantic_config(UPath(path), SampleConfig)
+    assert isinstance(cfg, SampleConfig)
+    assert cfg.name == "run1"
+    assert cfg.threshold == 6.0
+
+
 def test_wrong_type_raises_with_path(tmp_path):
     path = _write(tmp_path, "name: run1\nthreshold: not-a-number\n")
     with pytest.raises(ValueError) as exc: