From 3d78c28eb22d9bfe8d1386cdd816e624d09b178c Mon Sep 17 00:00:00 2001 From: xuefl Date: Fri, 19 Jun 2026 12:49:02 -0700 Subject: [PATCH 1/3] feat: allow templating per-agent reasoning.effort and context_tier (#262) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per-agent `reasoning.effort` (low/medium/high/xhigh) and `context_tier` (default/long_context) were strict pydantic `Literal` enums, so a templated value like `effort: "{{ workflow.input.eff }}"` was rejected at YAML load — before `--input` / Jinja2 templates are resolved. `model:` already escaped this because it is a free-form `str` resolved at runtime. This implements the issue's Approach 2 (defer enum validation to runtime), mirroring the existing, shipped precedent for the `wait` step's `duration` field (templatable-but-validated). Approach 1 (resolve templates before enum validation in the loader) was evaluated and rejected: the loader has no `--input`/`workflow.input` defaults available, `conductor validate` has no inputs at all, and pre-resolving collapses back into Approach 2's surgery anyway. See PR description for the full grounded comparison. ## Changes Schema (src/conductor/config/schema.py) - Widen `ReasoningConfig.effort` to `ReasoningEffort | str` and add a `@model_validator(mode="after")` that defers `{{`/`{%` templates and validates literals against `get_args(ReasoningEffort)`. - Widen `AgentDef.context_tier` to `ContextTier | str | None` and add `_validate_context_tier()` dispatched from the regular-agent branch of `validate_agent_type` (non-agent types still reject the field outright — a template string is still "not None"). - Docstrings advertise template support, mirroring the `model` docstring. Runtime (src/conductor/executor/agent.py) - After the existing `model` render, render + validate templated `reasoning.effort` / `context_tier` with the full agent context (covers main / retry / dialog / for_each — every path routes through `AgentExecutor.execute`). A `_render_enum_field` helper renders, strips trailing whitespace (the renderer keeps trailing newlines), and validates the resolved literal, raising a clear `ValidationError` on a bad resolution. The provider then sees a concrete enum — no provider changes. Providers (reasoning.py / context_tier.py) - `resolve_*` now `cast(...)` the widened field back to the strict enum: by the time these run (provider execute, after the executor resolves the field) the value is a concrete, validated literal. Validator (src/conductor/config/validator.py) - The reasoning.effort capability cross-check now skips only the value-DEPENDENT membership check for templates. The value-INDEPENDENT "provider supports no reasoning at all" check (`reasoning_effort=None`) still fires for templated efforts — otherwise a templated effort on a no-reasoning provider (e.g. claude-agent-sdk) would pass `validate` while a literal errors, silently dropping operator intent. ## Per pre-impl + DUAL-model post-impl rubber-duck Pre-impl RD: confirmed the `ty`-type-safety angle (the `| str` widening propagates into the `resolve_*` return types) and resolved it with honest `cast()`s guarded by the runtime-resolution guarantee. Post-impl RD ran TWO models in parallel (Claude Opus 4.8 + GPT-5.5) and both independently flagged the validator gap above (GPT P1 / Opus P2) — fixed by reordering the capability check. Opus additionally caught a schema/executor asymmetry: the schema deferred only on `{{` while the executor renders `{{` or `{%`, so a statement-style `{% if %}` template was rejected at load though the executor would render it. Both schema validators now mirror the executor guard, enabling conditional templates end-to-end (verified: `{% if workflow.input.heavy %}xhigh{% else %}low{% endif %}` resolves to xhigh/low per input). Opus verified 8/10 risk areas fully clean (no unrendered template reaches any SDK; dialog/for_each/retry/claude-budget all route through the resolved agent copy). ## Tests - tests/test_config/test_schema.py: templated effort/context_tier deferred (incl. `{% %}` statement templates), literal-invalid still rejected, human_gate/script/workflow/etc still reject templated fields. - tests/test_executor/test_agent.py: runtime render for both fields, static pass-through, no-mutation of the original templated agent, bad-resolution raises, trailing-newline strip. - tests/test_config/test_validator_capabilities.py: templated effort defers the membership check; templated effort on a no-reasoning provider still errors. - tests/test_cli/test_validate.py + tests/fixtures/valid_templated_enums.yaml: `conductor validate` accepts a templated-enum workflow (exit 0). ## Verification - ruff check + format: clean - ty check src: 12 diagnostics (== baseline; 0 introduced — all pre-existing Windows termios / dialog stubs) - pytest: 3526 passed; the 13 failures are identical on a clean upstream/main checkout (pre-existing Windows-env: registry TOML, event_log non-serializable, script stdin tty) — 0 new failures. ## Out of scope (deliberate non-goals) - Workflow-level `runtime.default_reasoning_effort` / `default_context_tier` templating — the issue's acceptance criteria are per-agent only; these remain strict enums (a templated workflow-level default is rejected with a clear message). Can follow up separately. - A generalized `Annotated[...]` "templatable enum" type — starting with `| str` + the duration-style validator to match existing code; generalize only if a third enum field needs it. Closes #262 --- src/conductor/config/schema.py | 77 ++++++- src/conductor/config/validator.py | 11 + src/conductor/executor/agent.py | 65 +++++- src/conductor/providers/context_tier.py | 9 +- src/conductor/providers/reasoning.py | 9 +- tests/fixtures/valid_templated_enums.yaml | 38 ++++ tests/test_cli/test_validate.py | 10 + tests/test_config/test_schema.py | 127 ++++++++++++ .../test_validator_capabilities.py | 38 ++++ tests/test_executor/test_agent.py | 196 +++++++++++++++++- 10 files changed, 570 insertions(+), 10 deletions(-) create mode 100644 tests/fixtures/valid_templated_enums.yaml diff --git a/src/conductor/config/schema.py b/src/conductor/config/schema.py index 3d51127c..0cb07b37 100644 --- a/src/conductor/config/schema.py +++ b/src/conductor/config/schema.py @@ -6,7 +6,7 @@ from __future__ import annotations -from typing import Any, Literal +from typing import Any, Literal, get_args from pydantic import ( BaseModel, @@ -563,10 +563,43 @@ class ReasoningConfig(BaseModel): reasoning: effort: high + + Supports Jinja2 templates:: + + reasoning: + effort: "{{ workflow.input.effort }}" + + A templated ``effort`` is accepted at load time and resolved + validated + at runtime (in :mod:`conductor.executor.agent`), mirroring how ``model`` + and the ``wait`` step's ``duration`` are handled. A *literal* value must + be one of :data:`~conductor.providers.reasoning.ReasoningEffort`. + """ + + effort: ReasoningEffort | str + """Reasoning effort level applied to the agent's model calls. + + Either a literal level (``low`` / ``medium`` / ``high`` / ``xhigh``) or a + ``{{ ... }}`` Jinja2 template resolved at runtime. """ - effort: ReasoningEffort - """Reasoning effort level applied to the agent's model calls.""" + @model_validator(mode="after") + def _validate_effort(self) -> ReasoningConfig: + """Accept literal efforts or defer ``{{ }}`` templates to runtime. + + Mirrors :meth:`AgentDef._validate_wait_duration`: a templated value + (containing ``{{`` or ``{%``) skips literal validation here and is + rendered + validated at execute time (:mod:`conductor.executor.agent`). + A non-templated value must be a valid :data:`ReasoningEffort` literal. + """ + value = self.effort + if isinstance(value, str) and ("{{" in value or "{%" in value): + return self + if value not in get_args(ReasoningEffort): + raise ValueError( + f"reasoning.effort must be one of {list(get_args(ReasoningEffort))} " + f"or a '{{{{ ... }}}}' template (got {value!r})" + ) + return self class AgentDef(BaseModel): @@ -642,7 +675,7 @@ class AgentDef(BaseModel): Supports Jinja2 templates: {{ workflow.input.model_name }} """ - context_tier: ContextTier | None = None + context_tier: ContextTier | str | None = None """Context-window tier for models that support it (Copilot provider only). Set ``context_tier: long_context`` to pin a heavy-reasoning agent to the @@ -657,9 +690,18 @@ class AgentDef(BaseModel): Only applies to provider-backed agents (type='agent' or None). + Supports Jinja2 templates: a ``{{ workflow.input.tier }}`` value is + accepted at load time and resolved + validated at runtime (mirrors + ``model`` and the ``reasoning.effort`` handling). A *literal* value must + be one of :data:`~conductor.providers.context_tier.ContextTier`. + Example YAML:: context_tier: long_context + + Templated:: + + context_tier: "{{ workflow.input.tier }}" """ input: list[str] = Field(default_factory=list) @@ -1417,6 +1459,10 @@ def validate_agent_type(self) -> AgentDef: f"'{self.type or 'agent'}' agents cannot have 'output_type' " "(only 'set' agents support output_type)" ) + # #262: regular agents may carry a literal or templated + # context_tier; validate the literal here and defer templates to + # runtime. (reasoning.effort is validated on ReasoningConfig.) + self._validate_context_tier() if self.type == "workflow" and self.reasoning is not None: raise ValueError("workflow agents cannot have 'reasoning'") if self.type == "workflow" and self.context_tier is not None: @@ -1458,6 +1504,29 @@ def effective_output_schema(self) -> dict[str, OutputField] | None: return self.output return None + def _validate_context_tier(self) -> None: + """Validate ``context_tier`` for a regular (provider-backed) agent. + + Mirrors :meth:`_validate_wait_duration`: an unset (``None``) or + templated (containing ``{{`` or ``{%``) value defers all literal + validation to runtime (rendered + validated in + :mod:`conductor.executor.agent`); a non-templated value must be a + valid :data:`~conductor.providers.context_tier.ContextTier` literal. + + Non-agent step types reject ``context_tier`` outright via their own + ``is not None`` checks in :meth:`validate_agent_type` (a template + string is still "not None"), so this helper is only dispatched from + the regular-agent branch. + """ + value = self.context_tier + if value is None or (isinstance(value, str) and ("{{" in value or "{%" in value)): + return + if value not in get_args(ContextTier): + raise ValueError( + f"context_tier must be one of {list(get_args(ContextTier))} " + f"or a '{{{{ ... }}}}' template (got {value!r})" + ) + def _validate_wait_duration(self) -> None: """Validate ``duration`` for a ``wait`` agent. diff --git a/src/conductor/config/validator.py b/src/conductor/config/validator.py index e5de78c4..c8064621 100644 --- a/src/conductor/config/validator.py +++ b/src/conductor/config/validator.py @@ -1705,12 +1705,23 @@ def _caps_for(name: str) -> ProviderCapabilities | None: if (agent.reasoning is not None and agent.reasoning.effort is not None) else "runtime.default_reasoning_effort" ) + # #262: whether the provider supports reasoning effort AT ALL is a + # value-INDEPENDENT fact known at validate time, so reject even a + # templated effort here — no resolved value could ever be valid on + # a provider with reasoning_effort=None (e.g. claude-agent-sdk, + # which ignores reasoning entirely). Only the membership check + # (does the resolved literal fall in the supported subset?) is + # value-dependent and must be deferred for templates; providers + # that support reasoning re-validate the resolved value at runtime + # (copilot._validate_reasoning_effort_for_model / claude thinking). if supported is None: errors.append( f"Agent '{agent.name}' resolves to {source}={requested!r} " f"but provider '{provider_name}' does not support reasoning " f"effort (capabilities.reasoning_effort=None)." ) + elif isinstance(requested, str) and "{{" in requested: + pass elif requested not in supported: errors.append( f"Agent '{agent.name}' resolves to {source}={requested!r} " diff --git a/src/conductor/executor/agent.py b/src/conductor/executor/agent.py index a0cc8ccf..859c5725 100644 --- a/src/conductor/executor/agent.py +++ b/src/conductor/executor/agent.py @@ -8,12 +8,14 @@ import asyncio import contextlib -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, get_args from conductor.exceptions import ValidationError from conductor.executor.output import parse_json_output, validate_output from conductor.executor.template import TemplateRenderer from conductor.providers.base import AgentOutput, EventCallback +from conductor.providers.context_tier import ContextTier +from conductor.providers.reasoning import ReasoningEffort def _verbose_log(message: str, style: str = "dim") -> None: @@ -112,6 +114,33 @@ def __init__( self.instructions_preamble = instructions_preamble self.renderer = TemplateRenderer() + def _render_enum_field( + self, + *, + value: str, + context: dict[str, Any], + allowed: tuple[str, ...], + field_name: str, + agent_name: str, + ) -> str: + """Render a templated enum field and validate the resolved literal. + + Mirrors the ``model`` rendering above: a ``{{ ... }}`` value is + rendered with the full agent context, stripped (the renderer keeps + trailing newlines), and checked against ``allowed``. Raises a + :class:`~conductor.exceptions.ValidationError` when the resolved + value is not one of the permitted literals so the failure is actionable + at execute time rather than silently forwarded to the provider/SDK. + """ + resolved = self.renderer.render(value, context).strip() + if resolved not in allowed: + raise ValidationError( + f"Agent '{agent_name}': {field_name} template resolved to " + f"{resolved!r}, which is not a valid value.", + suggestion=f"Resolved value must be one of {list(allowed)}.", + ) + return resolved + async def execute( self, agent: AgentDef, @@ -154,6 +183,40 @@ async def execute( rendered_model = self.renderer.render(agent.model, context) agent = agent.model_copy(update={"model": rendered_model}) + # #262: resolve templated reasoning.effort / context_tier the same + # way model is handled above. These fields are strict enums that the + # schema deliberately accepts as templates (deferring literal + # validation to here); render the value with full context, then + # validate the resolved literal so the provider sees a concrete enum. + # The ``isinstance(..., str)`` guards both detect templates and narrow + # the widened ``Enum | str | None`` field types to ``str`` for the + # type checker before passing them to ``_render_enum_field``. + effort = agent.reasoning.effort if agent.reasoning is not None else None + if isinstance(effort, str) and ("{{" in effort or "{%" in effort): + resolved_effort = self._render_enum_field( + value=effort, + context=context, + allowed=get_args(ReasoningEffort), + field_name="reasoning.effort", + agent_name=agent.name, + ) + # ``agent.reasoning`` is not None here (effort came from it). + assert agent.reasoning is not None + agent = agent.model_copy( + update={"reasoning": agent.reasoning.model_copy(update={"effort": resolved_effort})} + ) + + tier = agent.context_tier + if isinstance(tier, str) and ("{{" in tier or "{%" in tier): + resolved_tier = self._render_enum_field( + value=tier, + context=context, + allowed=get_args(ContextTier), + field_name="context_tier", + agent_name=agent.name, + ) + agent = agent.model_copy(update={"context_tier": resolved_tier}) + # Render prompt with context rendered_prompt = self.renderer.render(agent.prompt, context) diff --git a/src/conductor/providers/context_tier.py b/src/conductor/providers/context_tier.py index 8cdae244..4edfab1f 100644 --- a/src/conductor/providers/context_tier.py +++ b/src/conductor/providers/context_tier.py @@ -13,7 +13,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Literal +from typing import TYPE_CHECKING, Literal, cast if TYPE_CHECKING: from conductor.config.schema import AgentDef @@ -39,5 +39,10 @@ def resolve_context_tier( The resolved ``ContextTier``, or ``None`` to send no value. """ if agent.context_tier is not None: - return agent.context_tier + # #262: AgentDef widens ``context_tier`` to ``ContextTier | str`` so a + # ``{{ ... }}`` template survives schema validation. By the time this + # resolver runs (provider execute, after AgentExecutor renders + + # validates the field) the value is a concrete, validated literal, so + # narrowing back to ContextTier is sound. + return cast(ContextTier, agent.context_tier) return runtime_default diff --git a/src/conductor/providers/reasoning.py b/src/conductor/providers/reasoning.py index e2163327..80181846 100644 --- a/src/conductor/providers/reasoning.py +++ b/src/conductor/providers/reasoning.py @@ -11,7 +11,7 @@ from __future__ import annotations from collections.abc import Mapping -from typing import TYPE_CHECKING, Final, Literal +from typing import TYPE_CHECKING, Final, Literal, cast if TYPE_CHECKING: from conductor.config.schema import AgentDef @@ -87,5 +87,10 @@ def resolve_reasoning_effort( set, signalling that no reasoning parameter should be sent to the SDK. """ if agent.reasoning is not None: - return agent.reasoning.effort + # #262: AgentDef widens ``effort`` to ``ReasoningEffort | str`` so a + # ``{{ ... }}`` template survives schema validation. By the time this + # resolver runs (provider execute, after AgentExecutor renders + + # validates the field) the value is a concrete, validated literal, so + # narrowing back to ReasoningEffort is sound. + return cast(ReasoningEffort, agent.reasoning.effort) return runtime_default diff --git a/tests/fixtures/valid_templated_enums.yaml b/tests/fixtures/valid_templated_enums.yaml new file mode 100644 index 00000000..8fe198f0 --- /dev/null +++ b/tests/fixtures/valid_templated_enums.yaml @@ -0,0 +1,38 @@ +# Valid workflow exercising templated reasoning.effort + context_tier (#262). +# Both enum fields carry Jinja2 templates that resolve from workflow.input; +# `conductor validate` must accept this without resolving the templates. +workflow: + name: templated-enums-workflow + description: A workflow with templated reasoning.effort and context_tier + entry_point: thinker + + input: + effort: + type: string + required: false + default: high + tier: + type: string + required: false + default: long_context + topic: + type: string + required: false + default: "the meaning of life" + +agents: + - name: thinker + model: gpt-4 + prompt: "Reason about {{ workflow.input.topic }}" + reasoning: + effort: "{{ workflow.input.effort }}" + context_tier: "{{ workflow.input.tier }}" + output: + answer: + type: string + description: The reasoned answer + routes: + - to: $end + +output: + message: "{{ thinker.output.answer }}" diff --git a/tests/test_cli/test_validate.py b/tests/test_cli/test_validate.py index bd2c7bc8..195962e1 100644 --- a/tests/test_cli/test_validate.py +++ b/tests/test_cli/test_validate.py @@ -44,6 +44,16 @@ def test_validate_valid_full_workflow(self, fixtures_dir: Path) -> None: assert result.exit_code == 0 assert "Validation Successful" in result.output + def test_validate_templated_enums(self, fixtures_dir: Path) -> None: + """#262: a workflow with templated reasoning.effort + context_tier + validates clean (templates are deferred to runtime, not rejected).""" + workflow_file = fixtures_dir / "valid_templated_enums.yaml" + result = runner.invoke(app, ["validate", str(workflow_file)]) + + assert result.exit_code == 0 + assert "Validation Successful" in result.output + assert "templated-enums-workflow" in result.output + def test_validate_malformed_yaml(self, fixtures_dir: Path) -> None: """Test validating a file with malformed YAML.""" workflow_file = fixtures_dir / "invalid_malformed.yaml" diff --git a/tests/test_config/test_schema.py b/tests/test_config/test_schema.py index 5e3cc757..6f7b510d 100644 --- a/tests/test_config/test_schema.py +++ b/tests/test_config/test_schema.py @@ -1424,6 +1424,76 @@ def test_workflow_with_reasoning_raises(self) -> None: assert "workflow agents cannot have 'reasoning'" in str(exc_info.value) +class TestAgentDefReasoningTemplating: + """Tests for templated ``reasoning.effort`` (#262 — defer to runtime).""" + + def test_templated_effort_deferred(self) -> None: + """A ``{{ }}`` effort is accepted at load and preserved verbatim.""" + agent = AgentDef( + name="a", + model="gpt-4", + prompt="test", + reasoning={"effort": "{{ workflow.input.eff }}"}, + ) + assert agent.reasoning is not None + # Not coerced or rejected at schema time — deferred to runtime. + assert agent.reasoning.effort == "{{ workflow.input.eff }}" + + def test_templated_effort_with_surrounding_text_deferred(self) -> None: + """A template embedded in other text is still deferred (any ``{{``).""" + agent = AgentDef( + name="a", + model="gpt-4", + prompt="test", + reasoning={"effort": "{{ workflow.input.eff }}-ish"}, + ) + assert agent.reasoning is not None + assert agent.reasoning.effort == "{{ workflow.input.eff }}-ish" + + def test_statement_style_effort_template_deferred(self) -> None: + """A ``{% %}`` statement template is deferred (matches executor guard). + + The executor renders effort on ``{{`` or ``{%``; the schema must defer + the same set so a conditional like ``{% if %}…{% endif %}`` is not + rejected at load (dual-RD opus P2-2). + """ + tmpl = "{% if workflow.input.heavy %}xhigh{% else %}low{% endif %}" + agent = AgentDef( + name="a", + model="gpt-4", + prompt="test", + reasoning={"effort": tmpl}, + ) + assert agent.reasoning is not None + assert agent.reasoning.effort == tmpl + + @pytest.mark.parametrize("effort", ["ultra", "none", "max"]) + def test_literal_invalid_effort_still_rejected(self, effort: str) -> None: + """A non-templated invalid literal still raises at schema time.""" + with pytest.raises(ValidationError): + AgentDef( + name="a", + model="gpt-4", + prompt="test", + reasoning={"effort": effort}, # type: ignore[arg-type] + ) + + def test_human_gate_with_templated_reasoning_still_raises(self) -> None: + """human_gate forbids reasoning even when the effort is templated. + + A template string is still "not None", so the per-type ban applies. + """ + with pytest.raises(ValidationError) as exc_info: + AgentDef( + name="gate1", + type="human_gate", + prompt="Choose:", + options=[GateOption(label="Ok", value="ok", route="next")], + reasoning={"effort": "{{ workflow.input.eff }}"}, + ) + assert "human_gate agents cannot have 'reasoning'" in str(exc_info.value) + + class TestAgentDefValidator: """Tests for the validator field on AgentDef.""" @@ -1763,6 +1833,63 @@ def test_terminate_with_context_tier_raises(self) -> None: assert "terminate agents cannot have 'context_tier'" in str(exc_info.value) +class TestAgentDefContextTierTemplating: + """Tests for templated ``context_tier`` (#262 — defer to runtime).""" + + def test_templated_context_tier_deferred(self) -> None: + """A ``{{ }}`` context_tier is accepted at load and preserved.""" + agent = AgentDef( + name="a", + model="gpt-4", + prompt="test", + context_tier="{{ workflow.input.tier }}", + ) + # Not coerced or rejected at schema time — deferred to runtime. + assert agent.context_tier == "{{ workflow.input.tier }}" + + def test_statement_style_context_tier_template_deferred(self) -> None: + """A ``{% %}`` statement template is deferred (matches executor guard). + + The executor renders context_tier on ``{{`` or ``{%``; the schema must + defer the same set so a conditional is not rejected at load (dual-RD + opus P2-2). + """ + tmpl = "{% if workflow.input.heavy %}long_context{% else %}default{% endif %}" + agent = AgentDef( + name="a", + model="gpt-4", + prompt="test", + context_tier=tmpl, + ) + assert agent.context_tier == tmpl + + @pytest.mark.parametrize("tier", ["1m", "huge", "long-context"]) + def test_literal_invalid_tier_still_rejected(self, tier: str) -> None: + """A non-templated invalid literal still raises at schema time.""" + with pytest.raises(ValidationError): + AgentDef( + name="a", + model="gpt-4", + prompt="test", + context_tier=tier, # type: ignore[arg-type] + ) + + def test_human_gate_with_templated_context_tier_still_raises(self) -> None: + """human_gate forbids context_tier even when it is templated. + + A template string is still "not None", so the per-type ban applies. + """ + with pytest.raises(ValidationError) as exc_info: + AgentDef( + name="g", + type="human_gate", + prompt="Approve?", + options=[GateOption(label="Ok", value="ok", route="next")], + context_tier="{{ workflow.input.tier }}", + ) + assert "human_gate agents cannot have 'context_tier'" in str(exc_info.value) + + class TestRuntimeConfigDefaultContextTier: """Tests for default_context_tier on RuntimeConfig.""" diff --git a/tests/test_config/test_validator_capabilities.py b/tests/test_config/test_validator_capabilities.py index b5257c1c..af363406 100644 --- a/tests/test_config/test_validator_capabilities.py +++ b/tests/test_config/test_validator_capabilities.py @@ -187,6 +187,44 @@ def test_supported_level_passes(self, patch_caps: Any) -> None: ) validate_workflow_config(config) + def test_templated_effort_defers_capability_check(self, patch_caps: Any) -> None: + """#262: a templated effort can't be checked vs caps until runtime. + + The provider only advertises ``low``/``medium``, but a templated + effort must NOT raise at validate time — the resolved value is + checked by the runtime resolver instead. + """ + patch_caps({"copilot": _caps(reasoning_effort=("low", "medium"))}) + config = _build_workflow( + agents=[ + AgentDef( + name="a", + prompt="hi", + reasoning=ReasoningConfig(effort="{{ workflow.input.eff }}"), + ), + ], + ) + validate_workflow_config(config) # must not raise + + def test_templated_effort_on_no_reasoning_provider_still_errors(self, patch_caps: Any) -> None: + """#262 (dual-RD): whether a provider supports reasoning AT ALL is a + value-INDEPENDENT fact known at validate time. A templated effort on a + provider with ``reasoning_effort=None`` must STILL error — no resolved + value could ever be valid, and the provider ignores reasoning at + runtime, so deferring would silently drop operator intent.""" + patch_caps({"copilot": _caps(reasoning_effort=None)}) + config = _build_workflow( + agents=[ + AgentDef( + name="a", + prompt="hi", + reasoning=ReasoningConfig(effort="{{ workflow.input.eff }}"), + ), + ], + ) + with pytest.raises(ConfigurationError, match="does not support reasoning effort"): + validate_workflow_config(config) + class TestStructuredOutputCrossCheck: def test_schema_against_no_support_errors(self, patch_caps: Any) -> None: diff --git a/tests/test_executor/test_agent.py b/tests/test_executor/test_agent.py index 14260be2..6b92eabe 100644 --- a/tests/test_executor/test_agent.py +++ b/tests/test_executor/test_agent.py @@ -10,7 +10,7 @@ import pytest -from conductor.config.schema import AgentDef, OutputField +from conductor.config.schema import AgentDef, OutputField, ReasoningConfig from conductor.exceptions import TemplateError, ValidationError from conductor.executor.agent import AgentExecutor, resolve_agent_tools from conductor.providers.base import AgentOutput @@ -313,6 +313,200 @@ def mock_handler(agent, prompt, context): assert agent.model == "{{ workflow.input.model_name }}" +class TestAgentExecutorReasoningEffortRendering: + """Tests for templated ``reasoning.effort`` rendering (#262).""" + + @pytest.mark.asyncio + async def test_templated_effort_is_rendered(self) -> None: + """A templated reasoning.effort resolves before the provider runs.""" + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + reasoning=ReasoningConfig(effort="{{ workflow.input.eff }}"), + ) + + captured: dict[str, object] = {} + + def mock_handler(agent, prompt, context): # noqa: ANN001, ANN202 + captured["effort"] = agent.reasoning.effort if agent.reasoning else None + return {"result": "ok"} + + provider = CopilotProvider(mock_handler=mock_handler) + executor = AgentExecutor(provider) + + context = {"workflow": {"input": {"eff": "xhigh"}}} + await executor.execute(agent, context) + + # Provider sees the concrete resolved literal, not the template. + assert captured["effort"] == "xhigh" + + @pytest.mark.asyncio + async def test_static_effort_is_unchanged(self) -> None: + """A literal reasoning.effort passes through untouched.""" + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + reasoning=ReasoningConfig(effort="high"), + ) + + captured: dict[str, object] = {} + + def mock_handler(agent, prompt, context): # noqa: ANN001, ANN202 + captured["effort"] = agent.reasoning.effort if agent.reasoning else None + return {"result": "ok"} + + provider = CopilotProvider(mock_handler=mock_handler) + executor = AgentExecutor(provider) + + await executor.execute(agent, {}) + assert captured["effort"] == "high" + + @pytest.mark.asyncio + async def test_templated_effort_does_not_mutate_original(self) -> None: + """Rendering effort copies the agent, leaving the original template.""" + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + reasoning=ReasoningConfig(effort="{{ workflow.input.eff }}"), + ) + + provider = CopilotProvider(mock_handler=lambda a, p, c: {"result": "ok"}) + executor = AgentExecutor(provider) + + await executor.execute(agent, {"workflow": {"input": {"eff": "low"}}}) + + assert agent.reasoning is not None + assert agent.reasoning.effort == "{{ workflow.input.eff }}" + + @pytest.mark.asyncio + async def test_effort_resolving_to_invalid_value_raises(self) -> None: + """A template resolving to a non-enum value raises a clear error.""" + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + reasoning=ReasoningConfig(effort="{{ workflow.input.eff }}"), + ) + + provider = CopilotProvider(mock_handler=lambda a, p, c: {"result": "ok"}) + executor = AgentExecutor(provider) + + with pytest.raises(ValidationError) as exc_info: + await executor.execute(agent, {"workflow": {"input": {"eff": "ultra"}}}) + assert "reasoning.effort" in str(exc_info.value) + assert "ultra" in str(exc_info.value) + + @pytest.mark.asyncio + async def test_effort_template_trailing_newline_is_stripped(self) -> None: + """A template that renders trailing whitespace still validates.""" + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + reasoning=ReasoningConfig(effort="{{ workflow.input.eff }}\n"), + ) + + captured: dict[str, object] = {} + + def mock_handler(agent, prompt, context): # noqa: ANN001, ANN202 + captured["effort"] = agent.reasoning.effort if agent.reasoning else None + return {"result": "ok"} + + provider = CopilotProvider(mock_handler=mock_handler) + executor = AgentExecutor(provider) + + await executor.execute(agent, {"workflow": {"input": {"eff": "medium"}}}) + assert captured["effort"] == "medium" + + +class TestAgentExecutorContextTierRendering: + """Tests for templated ``context_tier`` rendering (#262).""" + + @pytest.mark.asyncio + async def test_templated_context_tier_is_rendered(self) -> None: + """A templated context_tier resolves before the provider runs.""" + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + context_tier="{{ workflow.input.tier }}", + ) + + captured: dict[str, object] = {} + + def mock_handler(agent, prompt, context): # noqa: ANN001, ANN202 + captured["tier"] = agent.context_tier + return {"result": "ok"} + + provider = CopilotProvider(mock_handler=mock_handler) + executor = AgentExecutor(provider) + + context = {"workflow": {"input": {"tier": "long_context"}}} + await executor.execute(agent, context) + + assert captured["tier"] == "long_context" + + @pytest.mark.asyncio + async def test_static_context_tier_is_unchanged(self) -> None: + """A literal context_tier passes through untouched.""" + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + context_tier="default", + ) + + captured: dict[str, object] = {} + + def mock_handler(agent, prompt, context): # noqa: ANN001, ANN202 + captured["tier"] = agent.context_tier + return {"result": "ok"} + + provider = CopilotProvider(mock_handler=mock_handler) + executor = AgentExecutor(provider) + + await executor.execute(agent, {}) + assert captured["tier"] == "default" + + @pytest.mark.asyncio + async def test_templated_context_tier_does_not_mutate_original(self) -> None: + """Rendering context_tier copies the agent, leaving the template.""" + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + context_tier="{{ workflow.input.tier }}", + ) + + provider = CopilotProvider(mock_handler=lambda a, p, c: {"result": "ok"}) + executor = AgentExecutor(provider) + + await executor.execute(agent, {"workflow": {"input": {"tier": "default"}}}) + + assert agent.context_tier == "{{ workflow.input.tier }}" + + @pytest.mark.asyncio + async def test_context_tier_resolving_to_invalid_value_raises(self) -> None: + """A template resolving to a non-enum value raises a clear error.""" + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + context_tier="{{ workflow.input.tier }}", + ) + + provider = CopilotProvider(mock_handler=lambda a, p, c: {"result": "ok"}) + executor = AgentExecutor(provider) + + with pytest.raises(ValidationError) as exc_info: + await executor.execute(agent, {"workflow": {"input": {"tier": "huge"}}}) + assert "context_tier" in str(exc_info.value) + assert "huge" in str(exc_info.value) + + class TestAgentExecutorWithTools: """Tests for agent execution with tools.""" From f5d5ee52be74b23fc7dd187a837effde1d657c97 Mon Sep 17 00:00:00 2001 From: xuefl Date: Fri, 19 Jun 2026 13:36:32 -0700 Subject: [PATCH 2/3] fix: defer {% %} statement-style templated effort in capability cross-check (#262 pr-review R1-001) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Multi-agent pr-review (claude-opus-4.8 + gpt-5.5, full consensus, 91/100, 0 disputed) caught a self-consistency gap in the validator capability cross-check added by this PR. The schema (`_validate_effort` / `_validate_context_tier`), the runtime executor (`_render_enum_field` guard), and the `context_tier` validator all defer on `{{` OR `{%`, but the `reasoning.effort` capability cross-check in `config/validator.py` deferred the value-dependent membership check only for `{{`-style templates. So a `{% if %}...{% endif %}` effort on a reasoning-capable provider (non-None `reasoning_effort` tuple, e.g. copilot) fell through to `requested not in supported` and made `validate_workflow_config` raise — at odds with this PR's own statement-style support (and the #262 acceptance criterion that `conductor validate` must not reject a templated enum field). Scope was `conductor validate` only; the runtime path was already correct. Fix: mirror the shared `{{`-or-`{%` predicate used everywhere else. Added a validator-capabilities regression test feeding a `{% if %}...{% endif %}` effort through `validate_workflow_config` against a provider with a restricted non-None `reasoning_effort` tuple, asserting it does not raise (the existing test only covered `{{`). Verification: ruff + ty clean (ty == 12 baseline); 3527 passed (the 13 failures are identical on clean upstream/main — pre-existing Windows env). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/conductor/config/validator.py | 2 +- .../test_validator_capabilities.py | 26 +++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/src/conductor/config/validator.py b/src/conductor/config/validator.py index c8064621..fbba4890 100644 --- a/src/conductor/config/validator.py +++ b/src/conductor/config/validator.py @@ -1720,7 +1720,7 @@ def _caps_for(name: str) -> ProviderCapabilities | None: f"but provider '{provider_name}' does not support reasoning " f"effort (capabilities.reasoning_effort=None)." ) - elif isinstance(requested, str) and "{{" in requested: + elif isinstance(requested, str) and ("{{" in requested or "{%" in requested): pass elif requested not in supported: errors.append( diff --git a/tests/test_config/test_validator_capabilities.py b/tests/test_config/test_validator_capabilities.py index af363406..65264b2d 100644 --- a/tests/test_config/test_validator_capabilities.py +++ b/tests/test_config/test_validator_capabilities.py @@ -206,6 +206,32 @@ def test_templated_effort_defers_capability_check(self, patch_caps: Any) -> None ) validate_workflow_config(config) # must not raise + def test_statement_style_templated_effort_defers_capability_check( + self, patch_caps: Any + ) -> None: + """#262 (pr-review R1-001): a ``{% %}`` statement-style effort must + ALSO defer the membership check, not just ``{{ }}``. + + The schema, executor, and context_tier validator all defer on ``{{`` + OR ``{%``; the capability cross-check must mirror that predicate. + Provider supports only ``low``/``medium`` (a restricted non-None + tuple), but a ``{% if %}...{% endif %}`` effort must NOT raise at + validate time — it's resolved + re-validated at runtime. + """ + patch_caps({"copilot": _caps(reasoning_effort=("low", "medium"))}) + config = _build_workflow( + agents=[ + AgentDef( + name="a", + prompt="hi", + reasoning=ReasoningConfig( + effort="{% if workflow.input.heavy %}xhigh{% else %}low{% endif %}" + ), + ), + ], + ) + validate_workflow_config(config) # must not raise + def test_templated_effort_on_no_reasoning_provider_still_errors(self, patch_caps: Any) -> None: """#262 (dual-RD): whether a provider supports reasoning AT ALL is a value-INDEPENDENT fact known at validate time. A templated effort on a From 7d42fb06a119a9196c0399dcf930ec44ee0a82b0 Mon Sep 17 00:00:00 2001 From: xuefl Date: Wed, 24 Jun 2026 14:39:51 -0700 Subject: [PATCH 3/3] =?UTF-8?q?fix(reasoning):=20address=20#263=20review?= =?UTF-8?q?=20=E2=80=94=20shared=20template=20helper=20+=20resolver=20guar?= =?UTF-8?q?ds?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Jason's review feedback on PR #263: - B6: extract the {{ }}/{% %} predicate into conductor/templating.py (is_jinja_template, a TypeGuard[str] leaf module) and route all six enum-template sites + the two new guards through it. _validate_wait_duration ({{-only) and router.py ({{+}}) are intentionally left. - B1/B2: guard resolve_reasoning_effort / resolve_context_tier so an unrendered template raises instead of being cast straight to the SDK (context_tier is forwarded unvalidated — the riskier path). Fix the "AgentDef widens effort" comment (effort is on ReasoningConfig). - B3: agent.py comment — ReasoningEffort/ContextTier are Literal aliases, not Enums (hence get_args). - B4/B5: schema validator docstrings no longer claim to "mirror _validate_wait_duration" (that method is {{-only and never defers None); point at the executor's model-render path instead. - B7: executor test for a {% %} statement template (only {{ }} was covered). - B8: pin the empty-resolution behavior — a conditional with no matching branch resolves to "" and fails closed with an actionable message (consistent with _render_enum_field and the new guards) rather than silently using the default. ruff + ty clean; targeted suites green. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/conductor/config/schema.py | 37 ++++++--- src/conductor/config/validator.py | 3 +- src/conductor/executor/agent.py | 41 +++++++--- src/conductor/providers/context_tier.py | 20 +++-- src/conductor/providers/reasoning.py | 18 +++-- src/conductor/templating.py | 26 ++++++ tests/test_executor/test_agent.py | 80 ++++++++++++++++++ .../test_resolver_template_guards.py | 81 +++++++++++++++++++ tests/test_templating.py | 41 ++++++++++ 9 files changed, 312 insertions(+), 35 deletions(-) create mode 100644 src/conductor/templating.py create mode 100644 tests/test_providers/test_resolver_template_guards.py create mode 100644 tests/test_templating.py diff --git a/src/conductor/config/schema.py b/src/conductor/config/schema.py index 0cb07b37..d6e9725f 100644 --- a/src/conductor/config/schema.py +++ b/src/conductor/config/schema.py @@ -21,6 +21,7 @@ from conductor.duration import parse_duration from conductor.providers.context_tier import ContextTier from conductor.providers.reasoning import ReasoningEffort +from conductor.templating import is_jinja_template BudgetMode = Literal["audit", "enforce"] """How the engine responds when a workflow cost budget is exceeded. @@ -584,15 +585,21 @@ class ReasoningConfig(BaseModel): @model_validator(mode="after") def _validate_effort(self) -> ReasoningConfig: - """Accept literal efforts or defer ``{{ }}`` templates to runtime. - - Mirrors :meth:`AgentDef._validate_wait_duration`: a templated value - (containing ``{{`` or ``{%``) skips literal validation here and is - rendered + validated at execute time (:mod:`conductor.executor.agent`). - A non-templated value must be a valid :data:`ReasoningEffort` literal. + """Accept literal efforts or defer ``{{ }}`` / ``{% %}`` templates. + + A templated value (detected by + :func:`~conductor.templating.is_jinja_template`, matching ``{{`` or + ``{%``) skips literal validation here and is rendered + validated at + execute time (:mod:`conductor.executor.agent`, the same place the + ``model`` field is rendered). A non-templated value must be a valid + :data:`ReasoningEffort` literal. + + Note: this is a broader check than + :meth:`AgentDef._validate_wait_duration`, which intentionally matches + only ``{{``. """ value = self.effort - if isinstance(value, str) and ("{{" in value or "{%" in value): + if is_jinja_template(value): return self if value not in get_args(ReasoningEffort): raise ValueError( @@ -1507,11 +1514,15 @@ def effective_output_schema(self) -> dict[str, OutputField] | None: def _validate_context_tier(self) -> None: """Validate ``context_tier`` for a regular (provider-backed) agent. - Mirrors :meth:`_validate_wait_duration`: an unset (``None``) or - templated (containing ``{{`` or ``{%``) value defers all literal - validation to runtime (rendered + validated in - :mod:`conductor.executor.agent`); a non-templated value must be a - valid :data:`~conductor.providers.context_tier.ContextTier` literal. + An unset (``None``) or templated value (detected by + :func:`~conductor.templating.is_jinja_template`, matching ``{{`` or + ``{%``) defers all literal validation to runtime (rendered + validated + in :mod:`conductor.executor.agent`, alongside ``model``); a + non-templated value must be a valid + :data:`~conductor.providers.context_tier.ContextTier` literal. + + This differs from :meth:`_validate_wait_duration` on two counts: that + method matches only ``{{``, and it does not defer ``None``. Non-agent step types reject ``context_tier`` outright via their own ``is not None`` checks in :meth:`validate_agent_type` (a template @@ -1519,7 +1530,7 @@ def _validate_context_tier(self) -> None: the regular-agent branch. """ value = self.context_tier - if value is None or (isinstance(value, str) and ("{{" in value or "{%" in value)): + if value is None or is_jinja_template(value): return if value not in get_args(ContextTier): raise ValueError( diff --git a/src/conductor/config/validator.py b/src/conductor/config/validator.py index fbba4890..2c3abfdd 100644 --- a/src/conductor/config/validator.py +++ b/src/conductor/config/validator.py @@ -16,6 +16,7 @@ from conductor.exceptions import ConfigurationError from conductor.providers.capabilities import ProviderCapabilities, get_capabilities +from conductor.templating import is_jinja_template if TYPE_CHECKING: from conductor.config.schema import AgentDef, WorkflowConfig @@ -1720,7 +1721,7 @@ def _caps_for(name: str) -> ProviderCapabilities | None: f"but provider '{provider_name}' does not support reasoning " f"effort (capabilities.reasoning_effort=None)." ) - elif isinstance(requested, str) and ("{{" in requested or "{%" in requested): + elif is_jinja_template(requested): pass elif requested not in supported: errors.append( diff --git a/src/conductor/executor/agent.py b/src/conductor/executor/agent.py index 859c5725..6c484f38 100644 --- a/src/conductor/executor/agent.py +++ b/src/conductor/executor/agent.py @@ -16,6 +16,7 @@ from conductor.providers.base import AgentOutput, EventCallback from conductor.providers.context_tier import ContextTier from conductor.providers.reasoning import ReasoningEffort +from conductor.templating import is_jinja_template def _verbose_log(message: str, style: str = "dim") -> None: @@ -134,6 +135,23 @@ def _render_enum_field( """ resolved = self.renderer.render(value, context).strip() if resolved not in allowed: + if not resolved: + # An empty resolution is almost always a conditional template + # (``{% if ... %}``) with no matching branch. Fail closed — the + # same way a non-empty invalid value (below) and the + # provider-side resolver guards do — rather than silently + # treating empty as "unset": to fall back to the runtime + # default, omit the field or add an else-branch emitting the + # desired literal. + raise ValidationError( + f"Agent '{agent_name}': {field_name} template resolved to an empty value.", + suggestion=( + f"A conditional template with no matching branch " + f"produced nothing. Emit one of {list(allowed)}, add an " + f"else-branch, or omit {field_name} to use the runtime " + f"default." + ), + ) raise ValidationError( f"Agent '{agent_name}': {field_name} template resolved to " f"{resolved!r}, which is not a valid value.", @@ -179,20 +197,23 @@ async def execute( ValidationError: If output doesn't match schema or tools are invalid. """ # Render model field if it contains template expressions - if agent.model and ("{{" in agent.model or "{%" in agent.model): + if is_jinja_template(agent.model): rendered_model = self.renderer.render(agent.model, context) agent = agent.model_copy(update={"model": rendered_model}) # #262: resolve templated reasoning.effort / context_tier the same - # way model is handled above. These fields are strict enums that the - # schema deliberately accepts as templates (deferring literal - # validation to here); render the value with full context, then - # validate the resolved literal so the provider sees a concrete enum. - # The ``isinstance(..., str)`` guards both detect templates and narrow - # the widened ``Enum | str | None`` field types to ``str`` for the - # type checker before passing them to ``_render_enum_field``. + # way model is handled above. These fields are strict ``Literal`` + # aliases that the schema deliberately accepts as templates (deferring + # literal validation to here); render the value with full context, then + # validate the resolved literal so the provider sees a concrete value. + # ``is_jinja_template`` both detects templates and narrows the widened + # ``ReasoningEffort | str`` / ``ContextTier | str | None`` field types + # to ``str`` for the type checker before the value reaches + # ``_render_enum_field``. (``ReasoningEffort`` and ``ContextTier`` are + # ``Literal`` aliases, not ``Enum`` types — hence the ``get_args`` + # calls below.) effort = agent.reasoning.effort if agent.reasoning is not None else None - if isinstance(effort, str) and ("{{" in effort or "{%" in effort): + if is_jinja_template(effort): resolved_effort = self._render_enum_field( value=effort, context=context, @@ -207,7 +228,7 @@ async def execute( ) tier = agent.context_tier - if isinstance(tier, str) and ("{{" in tier or "{%" in tier): + if is_jinja_template(tier): resolved_tier = self._render_enum_field( value=tier, context=context, diff --git a/src/conductor/providers/context_tier.py b/src/conductor/providers/context_tier.py index 4edfab1f..5288216c 100644 --- a/src/conductor/providers/context_tier.py +++ b/src/conductor/providers/context_tier.py @@ -15,6 +15,8 @@ from typing import TYPE_CHECKING, Literal, cast +from conductor.templating import is_jinja_template + if TYPE_CHECKING: from conductor.config.schema import AgentDef @@ -40,9 +42,17 @@ def resolve_context_tier( """ if agent.context_tier is not None: # #262: AgentDef widens ``context_tier`` to ``ContextTier | str`` so a - # ``{{ ... }}`` template survives schema validation. By the time this - # resolver runs (provider execute, after AgentExecutor renders + - # validates the field) the value is a concrete, validated literal, so - # narrowing back to ContextTier is sound. - return cast(ContextTier, agent.context_tier) + # ``{{ ... }}`` / ``{% ... %}`` template survives schema validation. By + # the time this resolver runs (provider execute, after AgentExecutor + # renders + validates the field) the value is a concrete, validated + # literal. Guard the invariant so an unrendered template raises here + # rather than being cast straight to the SDK. This matters more than for + # reasoning.effort: Copilot forwards the tier to the SDK unvalidated + # (no advertised supported_context_tiers, so the SDK is the sole + # authority — see ``CopilotProvider``), so a leaked template would + # otherwise reach the SDK raw. + tier = agent.context_tier + if is_jinja_template(tier): + raise ValueError(f"context_tier reached the provider unresolved: {tier!r}") + return cast(ContextTier, tier) return runtime_default diff --git a/src/conductor/providers/reasoning.py b/src/conductor/providers/reasoning.py index 80181846..21b8c0c2 100644 --- a/src/conductor/providers/reasoning.py +++ b/src/conductor/providers/reasoning.py @@ -13,6 +13,8 @@ from collections.abc import Mapping from typing import TYPE_CHECKING, Final, Literal, cast +from conductor.templating import is_jinja_template + if TYPE_CHECKING: from conductor.config.schema import AgentDef @@ -87,10 +89,14 @@ def resolve_reasoning_effort( set, signalling that no reasoning parameter should be sent to the SDK. """ if agent.reasoning is not None: - # #262: AgentDef widens ``effort`` to ``ReasoningEffort | str`` so a - # ``{{ ... }}`` template survives schema validation. By the time this - # resolver runs (provider execute, after AgentExecutor renders + - # validates the field) the value is a concrete, validated literal, so - # narrowing back to ReasoningEffort is sound. - return cast(ReasoningEffort, agent.reasoning.effort) + # #262: ``ReasoningConfig`` widens ``effort`` to ``ReasoningEffort | str`` + # so a ``{{ ... }}`` / ``{% ... %}`` template survives schema validation. + # By the time this resolver runs (provider execute, after AgentExecutor + # renders + validates the field) the value is a concrete, validated + # literal. Guard the invariant so an unrendered template raises here + # rather than being cast straight to the SDK. + effort = agent.reasoning.effort + if is_jinja_template(effort): + raise ValueError(f"reasoning.effort reached the provider unresolved: {effort!r}") + return cast(ReasoningEffort, effort) return runtime_default diff --git a/src/conductor/templating.py b/src/conductor/templating.py new file mode 100644 index 00000000..9ecf69ee --- /dev/null +++ b/src/conductor/templating.py @@ -0,0 +1,26 @@ +"""Jinja template detection, shared across schema validation, the executor's +render step, and the provider resolvers. + +Kept as a leaf module (no intra-``conductor`` imports) so it can be imported +from any layer without risking an import cycle. +""" + +from __future__ import annotations + +from typing import TypeGuard + + +def is_jinja_template(value: object) -> TypeGuard[str]: + """Return ``True`` if ``value`` is a string carrying a Jinja expression + (``{{ ... }}``) or statement (``{% ... %}``) marker. + + Returns a :data:`~typing.TypeGuard` of ``str`` rather than a plain ``bool`` + so a caller that branches on it narrows the value to ``str`` — the + executor's ``_render_enum_field`` path relies on that narrowing. + + Note: this matches *both* ``{{`` and ``{%``. + :meth:`AgentDef._validate_wait_duration` deliberately checks only ``{{`` + (it defers expression templates but not statement templates) and is + intentionally *not* routed through this helper. + """ + return isinstance(value, str) and ("{{" in value or "{%" in value) diff --git a/tests/test_executor/test_agent.py b/tests/test_executor/test_agent.py index 6b92eabe..aac7fcc1 100644 --- a/tests/test_executor/test_agent.py +++ b/tests/test_executor/test_agent.py @@ -421,6 +421,59 @@ def mock_handler(agent, prompt, context): # noqa: ANN001, ANN202 await executor.execute(agent, {"workflow": {"input": {"eff": "medium"}}}) assert captured["effort"] == "medium" + @pytest.mark.asyncio + async def test_statement_template_effort_is_rendered(self) -> None: + """A ``{% %}`` *statement* template (not just a ``{{ }}`` expression) + resolves through the executor. + + Exercises the ``{%`` detection branch end-to-end, which no prior test + covered. + """ + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + reasoning=ReasoningConfig( + effort="{% if workflow.input.deep %}xhigh{% else %}low{% endif %}" + ), + ) + + captured: dict[str, object] = {} + + def mock_handler(agent, prompt, context): # noqa: ANN001, ANN202 + captured["effort"] = agent.reasoning.effort if agent.reasoning else None + return {"result": "ok"} + + provider = CopilotProvider(mock_handler=mock_handler) + executor = AgentExecutor(provider) + + await executor.execute(agent, {"workflow": {"input": {"deep": True}}}) + assert captured["effort"] == "xhigh" + + @pytest.mark.asyncio + async def test_effort_resolving_to_empty_raises(self) -> None: + """A conditional template with no matching branch resolves to empty. + + The executor fails closed (rather than silently falling back to the + runtime default) with an actionable message — a deliberate behavior + choice pinned here. + """ + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + reasoning=ReasoningConfig(effort="{% if workflow.input.deep %}xhigh{% endif %}"), + ) + + provider = CopilotProvider(mock_handler=lambda a, p, c: {"result": "ok"}) + executor = AgentExecutor(provider) + + with pytest.raises(ValidationError) as exc_info: + await executor.execute(agent, {"workflow": {"input": {"deep": False}}}) + message = str(exc_info.value) + assert "reasoning.effort" in message + assert "empty" in message + class TestAgentExecutorContextTierRendering: """Tests for templated ``context_tier`` rendering (#262).""" @@ -506,6 +559,33 @@ async def test_context_tier_resolving_to_invalid_value_raises(self) -> None: assert "context_tier" in str(exc_info.value) assert "huge" in str(exc_info.value) + @pytest.mark.asyncio + async def test_statement_template_context_tier_is_rendered(self) -> None: + """A ``{% %}`` statement template resolves through the executor for + ``context_tier`` too. + + The tier path is the riskier one — Copilot forwards the resolved value + to the SDK unvalidated — so its ``{%`` branch is covered explicitly. + """ + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + context_tier="{% if workflow.input.big %}long_context{% else %}default{% endif %}", + ) + + captured: dict[str, object] = {} + + def mock_handler(agent, prompt, context): # noqa: ANN001, ANN202 + captured["tier"] = agent.context_tier + return {"result": "ok"} + + provider = CopilotProvider(mock_handler=mock_handler) + executor = AgentExecutor(provider) + + await executor.execute(agent, {"workflow": {"input": {"big": True}}}) + assert captured["tier"] == "long_context" + class TestAgentExecutorWithTools: """Tests for agent execution with tools.""" diff --git a/tests/test_providers/test_resolver_template_guards.py b/tests/test_providers/test_resolver_template_guards.py new file mode 100644 index 00000000..50b0de84 --- /dev/null +++ b/tests/test_providers/test_resolver_template_guards.py @@ -0,0 +1,81 @@ +"""Defense-in-depth: provider resolvers reject a template that reached them +unresolved (#262 / #263 review). + +In normal flow ``AgentExecutor`` renders ``reasoning.effort`` / ``context_tier`` +before the provider runs, so the resolvers only ever see concrete literals. The +guards exercised here are the backstop for any path that reaches a resolver +without that render step: they fail loudly instead of casting a raw template +straight to the SDK. +""" + +from __future__ import annotations + +import pytest + +from conductor.config.schema import AgentDef, ReasoningConfig +from conductor.providers.context_tier import resolve_context_tier +from conductor.providers.reasoning import resolve_reasoning_effort + + +def test_resolve_reasoning_effort_rejects_unrendered_expression_template() -> None: + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + reasoning=ReasoningConfig(effort="{{ workflow.input.eff }}"), + ) + with pytest.raises(ValueError, match="reasoning.effort reached the provider unresolved"): + resolve_reasoning_effort(agent, runtime_default=None) + + +def test_resolve_reasoning_effort_rejects_unrendered_statement_template() -> None: + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + reasoning=ReasoningConfig(effort="{% if x %}high{% endif %}"), + ) + with pytest.raises(ValueError, match="unresolved"): + resolve_reasoning_effort(agent, runtime_default=None) + + +def test_resolve_reasoning_effort_passes_literal_through() -> None: + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + reasoning=ReasoningConfig(effort="high"), + ) + assert resolve_reasoning_effort(agent, runtime_default=None) == "high" + + +def test_resolve_context_tier_rejects_unrendered_expression_template() -> None: + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + context_tier="{{ workflow.input.tier }}", + ) + with pytest.raises(ValueError, match="context_tier reached the provider unresolved"): + resolve_context_tier(agent, runtime_default=None) + + +def test_resolve_context_tier_rejects_unrendered_statement_template() -> None: + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + context_tier="{% if x %}long_context{% endif %}", + ) + with pytest.raises(ValueError, match="unresolved"): + resolve_context_tier(agent, runtime_default=None) + + +def test_resolve_context_tier_passes_literal_through() -> None: + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + context_tier="long_context", + ) + assert resolve_context_tier(agent, runtime_default=None) == "long_context" diff --git a/tests/test_templating.py b/tests/test_templating.py new file mode 100644 index 00000000..f8fa3ac9 --- /dev/null +++ b/tests/test_templating.py @@ -0,0 +1,41 @@ +"""Unit tests for the shared ``is_jinja_template`` helper (#263 review).""" + +from __future__ import annotations + +import pytest + +from conductor.templating import is_jinja_template + + +@pytest.mark.parametrize( + "value", + [ + "{{ workflow.input.eff }}", + "{% if x %}high{% endif %}", + "prefix {{ x }} suffix", + "{%- trim -%}", + ], +) +def test_detects_expression_and_statement_templates(value: str) -> None: + assert is_jinja_template(value) is True + + +@pytest.mark.parametrize( + "value", + [ + "high", + "long_context", + "", + "{ not a template }", + "100", + ], +) +def test_rejects_plain_strings(value: str) -> None: + assert is_jinja_template(value) is False + + +@pytest.mark.parametrize("value", [None, 123, ["{{ x }}"], {"k": "{{ x }}"}]) +def test_rejects_non_strings(value: object) -> None: + # ``None`` in particular must be False so the ``model`` render guard can drop + # its old ``agent.model and ...`` short-circuit. + assert is_jinja_template(value) is False