diff --git a/src/conductor/config/schema.py b/src/conductor/config/schema.py index 3d51127c..d6e9725f 100644 --- a/src/conductor/config/schema.py +++ b/src/conductor/config/schema.py @@ -6,7 +6,7 @@ from __future__ import annotations -from typing import Any, Literal +from typing import Any, Literal, get_args from pydantic import ( BaseModel, @@ -21,6 +21,7 @@ from conductor.duration import parse_duration from conductor.providers.context_tier import ContextTier from conductor.providers.reasoning import ReasoningEffort +from conductor.templating import is_jinja_template BudgetMode = Literal["audit", "enforce"] """How the engine responds when a workflow cost budget is exceeded. @@ -563,10 +564,49 @@ class ReasoningConfig(BaseModel): reasoning: effort: high + + Supports Jinja2 templates:: + + reasoning: + effort: "{{ workflow.input.effort }}" + + A templated ``effort`` is accepted at load time and resolved + validated + at runtime (in :mod:`conductor.executor.agent`), mirroring how ``model`` + and the ``wait`` step's ``duration`` are handled. A *literal* value must + be one of :data:`~conductor.providers.reasoning.ReasoningEffort`. + """ + + effort: ReasoningEffort | str + """Reasoning effort level applied to the agent's model calls. + + Either a literal level (``low`` / ``medium`` / ``high`` / ``xhigh``) or a + ``{{ ... }}`` Jinja2 template resolved at runtime. """ - effort: ReasoningEffort - """Reasoning effort level applied to the agent's model calls.""" + @model_validator(mode="after") + def _validate_effort(self) -> ReasoningConfig: + """Accept literal efforts or defer ``{{ }}`` / ``{% %}`` templates. + + A templated value (detected by + :func:`~conductor.templating.is_jinja_template`, matching ``{{`` or + ``{%``) skips literal validation here and is rendered + validated at + execute time (:mod:`conductor.executor.agent`, the same place the + ``model`` field is rendered). A non-templated value must be a valid + :data:`ReasoningEffort` literal. + + Note: this is a broader check than + :meth:`AgentDef._validate_wait_duration`, which intentionally matches + only ``{{``. + """ + value = self.effort + if is_jinja_template(value): + return self + if value not in get_args(ReasoningEffort): + raise ValueError( + f"reasoning.effort must be one of {list(get_args(ReasoningEffort))} " + f"or a '{{{{ ... }}}}' template (got {value!r})" + ) + return self class AgentDef(BaseModel): @@ -642,7 +682,7 @@ class AgentDef(BaseModel): Supports Jinja2 templates: {{ workflow.input.model_name }} """ - context_tier: ContextTier | None = None + context_tier: ContextTier | str | None = None """Context-window tier for models that support it (Copilot provider only). Set ``context_tier: long_context`` to pin a heavy-reasoning agent to the @@ -657,9 +697,18 @@ class AgentDef(BaseModel): Only applies to provider-backed agents (type='agent' or None). + Supports Jinja2 templates: a ``{{ workflow.input.tier }}`` value is + accepted at load time and resolved + validated at runtime (mirrors + ``model`` and the ``reasoning.effort`` handling). A *literal* value must + be one of :data:`~conductor.providers.context_tier.ContextTier`. + Example YAML:: context_tier: long_context + + Templated:: + + context_tier: "{{ workflow.input.tier }}" """ input: list[str] = Field(default_factory=list) @@ -1417,6 +1466,10 @@ def validate_agent_type(self) -> AgentDef: f"'{self.type or 'agent'}' agents cannot have 'output_type' " "(only 'set' agents support output_type)" ) + # #262: regular agents may carry a literal or templated + # context_tier; validate the literal here and defer templates to + # runtime. (reasoning.effort is validated on ReasoningConfig.) + self._validate_context_tier() if self.type == "workflow" and self.reasoning is not None: raise ValueError("workflow agents cannot have 'reasoning'") if self.type == "workflow" and self.context_tier is not None: @@ -1458,6 +1511,33 @@ def effective_output_schema(self) -> dict[str, OutputField] | None: return self.output return None + def _validate_context_tier(self) -> None: + """Validate ``context_tier`` for a regular (provider-backed) agent. + + An unset (``None``) or templated value (detected by + :func:`~conductor.templating.is_jinja_template`, matching ``{{`` or + ``{%``) defers all literal validation to runtime (rendered + validated + in :mod:`conductor.executor.agent`, alongside ``model``); a + non-templated value must be a valid + :data:`~conductor.providers.context_tier.ContextTier` literal. + + This differs from :meth:`_validate_wait_duration` on two counts: that + method matches only ``{{``, and it does not defer ``None``. + + Non-agent step types reject ``context_tier`` outright via their own + ``is not None`` checks in :meth:`validate_agent_type` (a template + string is still "not None"), so this helper is only dispatched from + the regular-agent branch. + """ + value = self.context_tier + if value is None or is_jinja_template(value): + return + if value not in get_args(ContextTier): + raise ValueError( + f"context_tier must be one of {list(get_args(ContextTier))} " + f"or a '{{{{ ... }}}}' template (got {value!r})" + ) + def _validate_wait_duration(self) -> None: """Validate ``duration`` for a ``wait`` agent. diff --git a/src/conductor/config/validator.py b/src/conductor/config/validator.py index e5de78c4..2c3abfdd 100644 --- a/src/conductor/config/validator.py +++ b/src/conductor/config/validator.py @@ -16,6 +16,7 @@ from conductor.exceptions import ConfigurationError from conductor.providers.capabilities import ProviderCapabilities, get_capabilities +from conductor.templating import is_jinja_template if TYPE_CHECKING: from conductor.config.schema import AgentDef, WorkflowConfig @@ -1705,12 +1706,23 @@ def _caps_for(name: str) -> ProviderCapabilities | None: if (agent.reasoning is not None and agent.reasoning.effort is not None) else "runtime.default_reasoning_effort" ) + # #262: whether the provider supports reasoning effort AT ALL is a + # value-INDEPENDENT fact known at validate time, so reject even a + # templated effort here — no resolved value could ever be valid on + # a provider with reasoning_effort=None (e.g. claude-agent-sdk, + # which ignores reasoning entirely). Only the membership check + # (does the resolved literal fall in the supported subset?) is + # value-dependent and must be deferred for templates; providers + # that support reasoning re-validate the resolved value at runtime + # (copilot._validate_reasoning_effort_for_model / claude thinking). if supported is None: errors.append( f"Agent '{agent.name}' resolves to {source}={requested!r} " f"but provider '{provider_name}' does not support reasoning " f"effort (capabilities.reasoning_effort=None)." ) + elif is_jinja_template(requested): + pass elif requested not in supported: errors.append( f"Agent '{agent.name}' resolves to {source}={requested!r} " diff --git a/src/conductor/executor/agent.py b/src/conductor/executor/agent.py index a0cc8ccf..6c484f38 100644 --- a/src/conductor/executor/agent.py +++ b/src/conductor/executor/agent.py @@ -8,12 +8,15 @@ import asyncio import contextlib -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, get_args from conductor.exceptions import ValidationError from conductor.executor.output import parse_json_output, validate_output from conductor.executor.template import TemplateRenderer from conductor.providers.base import AgentOutput, EventCallback +from conductor.providers.context_tier import ContextTier +from conductor.providers.reasoning import ReasoningEffort +from conductor.templating import is_jinja_template def _verbose_log(message: str, style: str = "dim") -> None: @@ -112,6 +115,50 @@ def __init__( self.instructions_preamble = instructions_preamble self.renderer = TemplateRenderer() + def _render_enum_field( + self, + *, + value: str, + context: dict[str, Any], + allowed: tuple[str, ...], + field_name: str, + agent_name: str, + ) -> str: + """Render a templated enum field and validate the resolved literal. + + Mirrors the ``model`` rendering above: a ``{{ ... }}`` value is + rendered with the full agent context, stripped (the renderer keeps + trailing newlines), and checked against ``allowed``. Raises a + :class:`~conductor.exceptions.ValidationError` when the resolved + value is not one of the permitted literals so the failure is actionable + at execute time rather than silently forwarded to the provider/SDK. + """ + resolved = self.renderer.render(value, context).strip() + if resolved not in allowed: + if not resolved: + # An empty resolution is almost always a conditional template + # (``{% if ... %}``) with no matching branch. Fail closed — the + # same way a non-empty invalid value (below) and the + # provider-side resolver guards do — rather than silently + # treating empty as "unset": to fall back to the runtime + # default, omit the field or add an else-branch emitting the + # desired literal. + raise ValidationError( + f"Agent '{agent_name}': {field_name} template resolved to an empty value.", + suggestion=( + f"A conditional template with no matching branch " + f"produced nothing. Emit one of {list(allowed)}, add an " + f"else-branch, or omit {field_name} to use the runtime " + f"default." + ), + ) + raise ValidationError( + f"Agent '{agent_name}': {field_name} template resolved to " + f"{resolved!r}, which is not a valid value.", + suggestion=f"Resolved value must be one of {list(allowed)}.", + ) + return resolved + async def execute( self, agent: AgentDef, @@ -150,10 +197,47 @@ async def execute( ValidationError: If output doesn't match schema or tools are invalid. """ # Render model field if it contains template expressions - if agent.model and ("{{" in agent.model or "{%" in agent.model): + if is_jinja_template(agent.model): rendered_model = self.renderer.render(agent.model, context) agent = agent.model_copy(update={"model": rendered_model}) + # #262: resolve templated reasoning.effort / context_tier the same + # way model is handled above. These fields are strict ``Literal`` + # aliases that the schema deliberately accepts as templates (deferring + # literal validation to here); render the value with full context, then + # validate the resolved literal so the provider sees a concrete value. + # ``is_jinja_template`` both detects templates and narrows the widened + # ``ReasoningEffort | str`` / ``ContextTier | str | None`` field types + # to ``str`` for the type checker before the value reaches + # ``_render_enum_field``. (``ReasoningEffort`` and ``ContextTier`` are + # ``Literal`` aliases, not ``Enum`` types — hence the ``get_args`` + # calls below.) + effort = agent.reasoning.effort if agent.reasoning is not None else None + if is_jinja_template(effort): + resolved_effort = self._render_enum_field( + value=effort, + context=context, + allowed=get_args(ReasoningEffort), + field_name="reasoning.effort", + agent_name=agent.name, + ) + # ``agent.reasoning`` is not None here (effort came from it). + assert agent.reasoning is not None + agent = agent.model_copy( + update={"reasoning": agent.reasoning.model_copy(update={"effort": resolved_effort})} + ) + + tier = agent.context_tier + if is_jinja_template(tier): + resolved_tier = self._render_enum_field( + value=tier, + context=context, + allowed=get_args(ContextTier), + field_name="context_tier", + agent_name=agent.name, + ) + agent = agent.model_copy(update={"context_tier": resolved_tier}) + # Render prompt with context rendered_prompt = self.renderer.render(agent.prompt, context) diff --git a/src/conductor/providers/context_tier.py b/src/conductor/providers/context_tier.py index 8cdae244..5288216c 100644 --- a/src/conductor/providers/context_tier.py +++ b/src/conductor/providers/context_tier.py @@ -13,7 +13,9 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Literal +from typing import TYPE_CHECKING, Literal, cast + +from conductor.templating import is_jinja_template if TYPE_CHECKING: from conductor.config.schema import AgentDef @@ -39,5 +41,18 @@ def resolve_context_tier( The resolved ``ContextTier``, or ``None`` to send no value. """ if agent.context_tier is not None: - return agent.context_tier + # #262: AgentDef widens ``context_tier`` to ``ContextTier | str`` so a + # ``{{ ... }}`` / ``{% ... %}`` template survives schema validation. By + # the time this resolver runs (provider execute, after AgentExecutor + # renders + validates the field) the value is a concrete, validated + # literal. Guard the invariant so an unrendered template raises here + # rather than being cast straight to the SDK. This matters more than for + # reasoning.effort: Copilot forwards the tier to the SDK unvalidated + # (no advertised supported_context_tiers, so the SDK is the sole + # authority — see ``CopilotProvider``), so a leaked template would + # otherwise reach the SDK raw. + tier = agent.context_tier + if is_jinja_template(tier): + raise ValueError(f"context_tier reached the provider unresolved: {tier!r}") + return cast(ContextTier, tier) return runtime_default diff --git a/src/conductor/providers/reasoning.py b/src/conductor/providers/reasoning.py index e2163327..21b8c0c2 100644 --- a/src/conductor/providers/reasoning.py +++ b/src/conductor/providers/reasoning.py @@ -11,7 +11,9 @@ from __future__ import annotations from collections.abc import Mapping -from typing import TYPE_CHECKING, Final, Literal +from typing import TYPE_CHECKING, Final, Literal, cast + +from conductor.templating import is_jinja_template if TYPE_CHECKING: from conductor.config.schema import AgentDef @@ -87,5 +89,14 @@ def resolve_reasoning_effort( set, signalling that no reasoning parameter should be sent to the SDK. """ if agent.reasoning is not None: - return agent.reasoning.effort + # #262: ``ReasoningConfig`` widens ``effort`` to ``ReasoningEffort | str`` + # so a ``{{ ... }}`` / ``{% ... %}`` template survives schema validation. + # By the time this resolver runs (provider execute, after AgentExecutor + # renders + validates the field) the value is a concrete, validated + # literal. Guard the invariant so an unrendered template raises here + # rather than being cast straight to the SDK. + effort = agent.reasoning.effort + if is_jinja_template(effort): + raise ValueError(f"reasoning.effort reached the provider unresolved: {effort!r}") + return cast(ReasoningEffort, effort) return runtime_default diff --git a/src/conductor/templating.py b/src/conductor/templating.py new file mode 100644 index 00000000..9ecf69ee --- /dev/null +++ b/src/conductor/templating.py @@ -0,0 +1,26 @@ +"""Jinja template detection, shared across schema validation, the executor's +render step, and the provider resolvers. + +Kept as a leaf module (no intra-``conductor`` imports) so it can be imported +from any layer without risking an import cycle. +""" + +from __future__ import annotations + +from typing import TypeGuard + + +def is_jinja_template(value: object) -> TypeGuard[str]: + """Return ``True`` if ``value`` is a string carrying a Jinja expression + (``{{ ... }}``) or statement (``{% ... %}``) marker. + + Returns a :data:`~typing.TypeGuard` of ``str`` rather than a plain ``bool`` + so a caller that branches on it narrows the value to ``str`` — the + executor's ``_render_enum_field`` path relies on that narrowing. + + Note: this matches *both* ``{{`` and ``{%``. + :meth:`AgentDef._validate_wait_duration` deliberately checks only ``{{`` + (it defers expression templates but not statement templates) and is + intentionally *not* routed through this helper. + """ + return isinstance(value, str) and ("{{" in value or "{%" in value) diff --git a/tests/fixtures/valid_templated_enums.yaml b/tests/fixtures/valid_templated_enums.yaml new file mode 100644 index 00000000..8fe198f0 --- /dev/null +++ b/tests/fixtures/valid_templated_enums.yaml @@ -0,0 +1,38 @@ +# Valid workflow exercising templated reasoning.effort + context_tier (#262). +# Both enum fields carry Jinja2 templates that resolve from workflow.input; +# `conductor validate` must accept this without resolving the templates. +workflow: + name: templated-enums-workflow + description: A workflow with templated reasoning.effort and context_tier + entry_point: thinker + + input: + effort: + type: string + required: false + default: high + tier: + type: string + required: false + default: long_context + topic: + type: string + required: false + default: "the meaning of life" + +agents: + - name: thinker + model: gpt-4 + prompt: "Reason about {{ workflow.input.topic }}" + reasoning: + effort: "{{ workflow.input.effort }}" + context_tier: "{{ workflow.input.tier }}" + output: + answer: + type: string + description: The reasoned answer + routes: + - to: $end + +output: + message: "{{ thinker.output.answer }}" diff --git a/tests/test_cli/test_validate.py b/tests/test_cli/test_validate.py index bd2c7bc8..195962e1 100644 --- a/tests/test_cli/test_validate.py +++ b/tests/test_cli/test_validate.py @@ -44,6 +44,16 @@ def test_validate_valid_full_workflow(self, fixtures_dir: Path) -> None: assert result.exit_code == 0 assert "Validation Successful" in result.output + def test_validate_templated_enums(self, fixtures_dir: Path) -> None: + """#262: a workflow with templated reasoning.effort + context_tier + validates clean (templates are deferred to runtime, not rejected).""" + workflow_file = fixtures_dir / "valid_templated_enums.yaml" + result = runner.invoke(app, ["validate", str(workflow_file)]) + + assert result.exit_code == 0 + assert "Validation Successful" in result.output + assert "templated-enums-workflow" in result.output + def test_validate_malformed_yaml(self, fixtures_dir: Path) -> None: """Test validating a file with malformed YAML.""" workflow_file = fixtures_dir / "invalid_malformed.yaml" diff --git a/tests/test_config/test_schema.py b/tests/test_config/test_schema.py index 5e3cc757..6f7b510d 100644 --- a/tests/test_config/test_schema.py +++ b/tests/test_config/test_schema.py @@ -1424,6 +1424,76 @@ def test_workflow_with_reasoning_raises(self) -> None: assert "workflow agents cannot have 'reasoning'" in str(exc_info.value) +class TestAgentDefReasoningTemplating: + """Tests for templated ``reasoning.effort`` (#262 — defer to runtime).""" + + def test_templated_effort_deferred(self) -> None: + """A ``{{ }}`` effort is accepted at load and preserved verbatim.""" + agent = AgentDef( + name="a", + model="gpt-4", + prompt="test", + reasoning={"effort": "{{ workflow.input.eff }}"}, + ) + assert agent.reasoning is not None + # Not coerced or rejected at schema time — deferred to runtime. + assert agent.reasoning.effort == "{{ workflow.input.eff }}" + + def test_templated_effort_with_surrounding_text_deferred(self) -> None: + """A template embedded in other text is still deferred (any ``{{``).""" + agent = AgentDef( + name="a", + model="gpt-4", + prompt="test", + reasoning={"effort": "{{ workflow.input.eff }}-ish"}, + ) + assert agent.reasoning is not None + assert agent.reasoning.effort == "{{ workflow.input.eff }}-ish" + + def test_statement_style_effort_template_deferred(self) -> None: + """A ``{% %}`` statement template is deferred (matches executor guard). + + The executor renders effort on ``{{`` or ``{%``; the schema must defer + the same set so a conditional like ``{% if %}…{% endif %}`` is not + rejected at load (dual-RD opus P2-2). + """ + tmpl = "{% if workflow.input.heavy %}xhigh{% else %}low{% endif %}" + agent = AgentDef( + name="a", + model="gpt-4", + prompt="test", + reasoning={"effort": tmpl}, + ) + assert agent.reasoning is not None + assert agent.reasoning.effort == tmpl + + @pytest.mark.parametrize("effort", ["ultra", "none", "max"]) + def test_literal_invalid_effort_still_rejected(self, effort: str) -> None: + """A non-templated invalid literal still raises at schema time.""" + with pytest.raises(ValidationError): + AgentDef( + name="a", + model="gpt-4", + prompt="test", + reasoning={"effort": effort}, # type: ignore[arg-type] + ) + + def test_human_gate_with_templated_reasoning_still_raises(self) -> None: + """human_gate forbids reasoning even when the effort is templated. + + A template string is still "not None", so the per-type ban applies. + """ + with pytest.raises(ValidationError) as exc_info: + AgentDef( + name="gate1", + type="human_gate", + prompt="Choose:", + options=[GateOption(label="Ok", value="ok", route="next")], + reasoning={"effort": "{{ workflow.input.eff }}"}, + ) + assert "human_gate agents cannot have 'reasoning'" in str(exc_info.value) + + class TestAgentDefValidator: """Tests for the validator field on AgentDef.""" @@ -1763,6 +1833,63 @@ def test_terminate_with_context_tier_raises(self) -> None: assert "terminate agents cannot have 'context_tier'" in str(exc_info.value) +class TestAgentDefContextTierTemplating: + """Tests for templated ``context_tier`` (#262 — defer to runtime).""" + + def test_templated_context_tier_deferred(self) -> None: + """A ``{{ }}`` context_tier is accepted at load and preserved.""" + agent = AgentDef( + name="a", + model="gpt-4", + prompt="test", + context_tier="{{ workflow.input.tier }}", + ) + # Not coerced or rejected at schema time — deferred to runtime. + assert agent.context_tier == "{{ workflow.input.tier }}" + + def test_statement_style_context_tier_template_deferred(self) -> None: + """A ``{% %}`` statement template is deferred (matches executor guard). + + The executor renders context_tier on ``{{`` or ``{%``; the schema must + defer the same set so a conditional is not rejected at load (dual-RD + opus P2-2). + """ + tmpl = "{% if workflow.input.heavy %}long_context{% else %}default{% endif %}" + agent = AgentDef( + name="a", + model="gpt-4", + prompt="test", + context_tier=tmpl, + ) + assert agent.context_tier == tmpl + + @pytest.mark.parametrize("tier", ["1m", "huge", "long-context"]) + def test_literal_invalid_tier_still_rejected(self, tier: str) -> None: + """A non-templated invalid literal still raises at schema time.""" + with pytest.raises(ValidationError): + AgentDef( + name="a", + model="gpt-4", + prompt="test", + context_tier=tier, # type: ignore[arg-type] + ) + + def test_human_gate_with_templated_context_tier_still_raises(self) -> None: + """human_gate forbids context_tier even when it is templated. + + A template string is still "not None", so the per-type ban applies. + """ + with pytest.raises(ValidationError) as exc_info: + AgentDef( + name="g", + type="human_gate", + prompt="Approve?", + options=[GateOption(label="Ok", value="ok", route="next")], + context_tier="{{ workflow.input.tier }}", + ) + assert "human_gate agents cannot have 'context_tier'" in str(exc_info.value) + + class TestRuntimeConfigDefaultContextTier: """Tests for default_context_tier on RuntimeConfig.""" diff --git a/tests/test_config/test_validator_capabilities.py b/tests/test_config/test_validator_capabilities.py index b5257c1c..65264b2d 100644 --- a/tests/test_config/test_validator_capabilities.py +++ b/tests/test_config/test_validator_capabilities.py @@ -187,6 +187,70 @@ def test_supported_level_passes(self, patch_caps: Any) -> None: ) validate_workflow_config(config) + def test_templated_effort_defers_capability_check(self, patch_caps: Any) -> None: + """#262: a templated effort can't be checked vs caps until runtime. + + The provider only advertises ``low``/``medium``, but a templated + effort must NOT raise at validate time — the resolved value is + checked by the runtime resolver instead. + """ + patch_caps({"copilot": _caps(reasoning_effort=("low", "medium"))}) + config = _build_workflow( + agents=[ + AgentDef( + name="a", + prompt="hi", + reasoning=ReasoningConfig(effort="{{ workflow.input.eff }}"), + ), + ], + ) + validate_workflow_config(config) # must not raise + + def test_statement_style_templated_effort_defers_capability_check( + self, patch_caps: Any + ) -> None: + """#262 (pr-review R1-001): a ``{% %}`` statement-style effort must + ALSO defer the membership check, not just ``{{ }}``. + + The schema, executor, and context_tier validator all defer on ``{{`` + OR ``{%``; the capability cross-check must mirror that predicate. + Provider supports only ``low``/``medium`` (a restricted non-None + tuple), but a ``{% if %}...{% endif %}`` effort must NOT raise at + validate time — it's resolved + re-validated at runtime. + """ + patch_caps({"copilot": _caps(reasoning_effort=("low", "medium"))}) + config = _build_workflow( + agents=[ + AgentDef( + name="a", + prompt="hi", + reasoning=ReasoningConfig( + effort="{% if workflow.input.heavy %}xhigh{% else %}low{% endif %}" + ), + ), + ], + ) + validate_workflow_config(config) # must not raise + + def test_templated_effort_on_no_reasoning_provider_still_errors(self, patch_caps: Any) -> None: + """#262 (dual-RD): whether a provider supports reasoning AT ALL is a + value-INDEPENDENT fact known at validate time. A templated effort on a + provider with ``reasoning_effort=None`` must STILL error — no resolved + value could ever be valid, and the provider ignores reasoning at + runtime, so deferring would silently drop operator intent.""" + patch_caps({"copilot": _caps(reasoning_effort=None)}) + config = _build_workflow( + agents=[ + AgentDef( + name="a", + prompt="hi", + reasoning=ReasoningConfig(effort="{{ workflow.input.eff }}"), + ), + ], + ) + with pytest.raises(ConfigurationError, match="does not support reasoning effort"): + validate_workflow_config(config) + class TestStructuredOutputCrossCheck: def test_schema_against_no_support_errors(self, patch_caps: Any) -> None: diff --git a/tests/test_executor/test_agent.py b/tests/test_executor/test_agent.py index 14260be2..aac7fcc1 100644 --- a/tests/test_executor/test_agent.py +++ b/tests/test_executor/test_agent.py @@ -10,7 +10,7 @@ import pytest -from conductor.config.schema import AgentDef, OutputField +from conductor.config.schema import AgentDef, OutputField, ReasoningConfig from conductor.exceptions import TemplateError, ValidationError from conductor.executor.agent import AgentExecutor, resolve_agent_tools from conductor.providers.base import AgentOutput @@ -313,6 +313,280 @@ def mock_handler(agent, prompt, context): assert agent.model == "{{ workflow.input.model_name }}" +class TestAgentExecutorReasoningEffortRendering: + """Tests for templated ``reasoning.effort`` rendering (#262).""" + + @pytest.mark.asyncio + async def test_templated_effort_is_rendered(self) -> None: + """A templated reasoning.effort resolves before the provider runs.""" + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + reasoning=ReasoningConfig(effort="{{ workflow.input.eff }}"), + ) + + captured: dict[str, object] = {} + + def mock_handler(agent, prompt, context): # noqa: ANN001, ANN202 + captured["effort"] = agent.reasoning.effort if agent.reasoning else None + return {"result": "ok"} + + provider = CopilotProvider(mock_handler=mock_handler) + executor = AgentExecutor(provider) + + context = {"workflow": {"input": {"eff": "xhigh"}}} + await executor.execute(agent, context) + + # Provider sees the concrete resolved literal, not the template. + assert captured["effort"] == "xhigh" + + @pytest.mark.asyncio + async def test_static_effort_is_unchanged(self) -> None: + """A literal reasoning.effort passes through untouched.""" + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + reasoning=ReasoningConfig(effort="high"), + ) + + captured: dict[str, object] = {} + + def mock_handler(agent, prompt, context): # noqa: ANN001, ANN202 + captured["effort"] = agent.reasoning.effort if agent.reasoning else None + return {"result": "ok"} + + provider = CopilotProvider(mock_handler=mock_handler) + executor = AgentExecutor(provider) + + await executor.execute(agent, {}) + assert captured["effort"] == "high" + + @pytest.mark.asyncio + async def test_templated_effort_does_not_mutate_original(self) -> None: + """Rendering effort copies the agent, leaving the original template.""" + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + reasoning=ReasoningConfig(effort="{{ workflow.input.eff }}"), + ) + + provider = CopilotProvider(mock_handler=lambda a, p, c: {"result": "ok"}) + executor = AgentExecutor(provider) + + await executor.execute(agent, {"workflow": {"input": {"eff": "low"}}}) + + assert agent.reasoning is not None + assert agent.reasoning.effort == "{{ workflow.input.eff }}" + + @pytest.mark.asyncio + async def test_effort_resolving_to_invalid_value_raises(self) -> None: + """A template resolving to a non-enum value raises a clear error.""" + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + reasoning=ReasoningConfig(effort="{{ workflow.input.eff }}"), + ) + + provider = CopilotProvider(mock_handler=lambda a, p, c: {"result": "ok"}) + executor = AgentExecutor(provider) + + with pytest.raises(ValidationError) as exc_info: + await executor.execute(agent, {"workflow": {"input": {"eff": "ultra"}}}) + assert "reasoning.effort" in str(exc_info.value) + assert "ultra" in str(exc_info.value) + + @pytest.mark.asyncio + async def test_effort_template_trailing_newline_is_stripped(self) -> None: + """A template that renders trailing whitespace still validates.""" + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + reasoning=ReasoningConfig(effort="{{ workflow.input.eff }}\n"), + ) + + captured: dict[str, object] = {} + + def mock_handler(agent, prompt, context): # noqa: ANN001, ANN202 + captured["effort"] = agent.reasoning.effort if agent.reasoning else None + return {"result": "ok"} + + provider = CopilotProvider(mock_handler=mock_handler) + executor = AgentExecutor(provider) + + await executor.execute(agent, {"workflow": {"input": {"eff": "medium"}}}) + assert captured["effort"] == "medium" + + @pytest.mark.asyncio + async def test_statement_template_effort_is_rendered(self) -> None: + """A ``{% %}`` *statement* template (not just a ``{{ }}`` expression) + resolves through the executor. + + Exercises the ``{%`` detection branch end-to-end, which no prior test + covered. + """ + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + reasoning=ReasoningConfig( + effort="{% if workflow.input.deep %}xhigh{% else %}low{% endif %}" + ), + ) + + captured: dict[str, object] = {} + + def mock_handler(agent, prompt, context): # noqa: ANN001, ANN202 + captured["effort"] = agent.reasoning.effort if agent.reasoning else None + return {"result": "ok"} + + provider = CopilotProvider(mock_handler=mock_handler) + executor = AgentExecutor(provider) + + await executor.execute(agent, {"workflow": {"input": {"deep": True}}}) + assert captured["effort"] == "xhigh" + + @pytest.mark.asyncio + async def test_effort_resolving_to_empty_raises(self) -> None: + """A conditional template with no matching branch resolves to empty. + + The executor fails closed (rather than silently falling back to the + runtime default) with an actionable message — a deliberate behavior + choice pinned here. + """ + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + reasoning=ReasoningConfig(effort="{% if workflow.input.deep %}xhigh{% endif %}"), + ) + + provider = CopilotProvider(mock_handler=lambda a, p, c: {"result": "ok"}) + executor = AgentExecutor(provider) + + with pytest.raises(ValidationError) as exc_info: + await executor.execute(agent, {"workflow": {"input": {"deep": False}}}) + message = str(exc_info.value) + assert "reasoning.effort" in message + assert "empty" in message + + +class TestAgentExecutorContextTierRendering: + """Tests for templated ``context_tier`` rendering (#262).""" + + @pytest.mark.asyncio + async def test_templated_context_tier_is_rendered(self) -> None: + """A templated context_tier resolves before the provider runs.""" + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + context_tier="{{ workflow.input.tier }}", + ) + + captured: dict[str, object] = {} + + def mock_handler(agent, prompt, context): # noqa: ANN001, ANN202 + captured["tier"] = agent.context_tier + return {"result": "ok"} + + provider = CopilotProvider(mock_handler=mock_handler) + executor = AgentExecutor(provider) + + context = {"workflow": {"input": {"tier": "long_context"}}} + await executor.execute(agent, context) + + assert captured["tier"] == "long_context" + + @pytest.mark.asyncio + async def test_static_context_tier_is_unchanged(self) -> None: + """A literal context_tier passes through untouched.""" + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + context_tier="default", + ) + + captured: dict[str, object] = {} + + def mock_handler(agent, prompt, context): # noqa: ANN001, ANN202 + captured["tier"] = agent.context_tier + return {"result": "ok"} + + provider = CopilotProvider(mock_handler=mock_handler) + executor = AgentExecutor(provider) + + await executor.execute(agent, {}) + assert captured["tier"] == "default" + + @pytest.mark.asyncio + async def test_templated_context_tier_does_not_mutate_original(self) -> None: + """Rendering context_tier copies the agent, leaving the template.""" + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + context_tier="{{ workflow.input.tier }}", + ) + + provider = CopilotProvider(mock_handler=lambda a, p, c: {"result": "ok"}) + executor = AgentExecutor(provider) + + await executor.execute(agent, {"workflow": {"input": {"tier": "default"}}}) + + assert agent.context_tier == "{{ workflow.input.tier }}" + + @pytest.mark.asyncio + async def test_context_tier_resolving_to_invalid_value_raises(self) -> None: + """A template resolving to a non-enum value raises a clear error.""" + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + context_tier="{{ workflow.input.tier }}", + ) + + provider = CopilotProvider(mock_handler=lambda a, p, c: {"result": "ok"}) + executor = AgentExecutor(provider) + + with pytest.raises(ValidationError) as exc_info: + await executor.execute(agent, {"workflow": {"input": {"tier": "huge"}}}) + assert "context_tier" in str(exc_info.value) + assert "huge" in str(exc_info.value) + + @pytest.mark.asyncio + async def test_statement_template_context_tier_is_rendered(self) -> None: + """A ``{% %}`` statement template resolves through the executor for + ``context_tier`` too. + + The tier path is the riskier one — Copilot forwards the resolved value + to the SDK unvalidated — so its ``{%`` branch is covered explicitly. + """ + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + context_tier="{% if workflow.input.big %}long_context{% else %}default{% endif %}", + ) + + captured: dict[str, object] = {} + + def mock_handler(agent, prompt, context): # noqa: ANN001, ANN202 + captured["tier"] = agent.context_tier + return {"result": "ok"} + + provider = CopilotProvider(mock_handler=mock_handler) + executor = AgentExecutor(provider) + + await executor.execute(agent, {"workflow": {"input": {"big": True}}}) + assert captured["tier"] == "long_context" + + class TestAgentExecutorWithTools: """Tests for agent execution with tools.""" diff --git a/tests/test_providers/test_resolver_template_guards.py b/tests/test_providers/test_resolver_template_guards.py new file mode 100644 index 00000000..50b0de84 --- /dev/null +++ b/tests/test_providers/test_resolver_template_guards.py @@ -0,0 +1,81 @@ +"""Defense-in-depth: provider resolvers reject a template that reached them +unresolved (#262 / #263 review). + +In normal flow ``AgentExecutor`` renders ``reasoning.effort`` / ``context_tier`` +before the provider runs, so the resolvers only ever see concrete literals. The +guards exercised here are the backstop for any path that reaches a resolver +without that render step: they fail loudly instead of casting a raw template +straight to the SDK. +""" + +from __future__ import annotations + +import pytest + +from conductor.config.schema import AgentDef, ReasoningConfig +from conductor.providers.context_tier import resolve_context_tier +from conductor.providers.reasoning import resolve_reasoning_effort + + +def test_resolve_reasoning_effort_rejects_unrendered_expression_template() -> None: + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + reasoning=ReasoningConfig(effort="{{ workflow.input.eff }}"), + ) + with pytest.raises(ValueError, match="reasoning.effort reached the provider unresolved"): + resolve_reasoning_effort(agent, runtime_default=None) + + +def test_resolve_reasoning_effort_rejects_unrendered_statement_template() -> None: + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + reasoning=ReasoningConfig(effort="{% if x %}high{% endif %}"), + ) + with pytest.raises(ValueError, match="unresolved"): + resolve_reasoning_effort(agent, runtime_default=None) + + +def test_resolve_reasoning_effort_passes_literal_through() -> None: + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + reasoning=ReasoningConfig(effort="high"), + ) + assert resolve_reasoning_effort(agent, runtime_default=None) == "high" + + +def test_resolve_context_tier_rejects_unrendered_expression_template() -> None: + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + context_tier="{{ workflow.input.tier }}", + ) + with pytest.raises(ValueError, match="context_tier reached the provider unresolved"): + resolve_context_tier(agent, runtime_default=None) + + +def test_resolve_context_tier_rejects_unrendered_statement_template() -> None: + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + context_tier="{% if x %}long_context{% endif %}", + ) + with pytest.raises(ValueError, match="unresolved"): + resolve_context_tier(agent, runtime_default=None) + + +def test_resolve_context_tier_passes_literal_through() -> None: + agent = AgentDef( + name="test", + prompt="Do something", + output=None, + context_tier="long_context", + ) + assert resolve_context_tier(agent, runtime_default=None) == "long_context" diff --git a/tests/test_templating.py b/tests/test_templating.py new file mode 100644 index 00000000..f8fa3ac9 --- /dev/null +++ b/tests/test_templating.py @@ -0,0 +1,41 @@ +"""Unit tests for the shared ``is_jinja_template`` helper (#263 review).""" + +from __future__ import annotations + +import pytest + +from conductor.templating import is_jinja_template + + +@pytest.mark.parametrize( + "value", + [ + "{{ workflow.input.eff }}", + "{% if x %}high{% endif %}", + "prefix {{ x }} suffix", + "{%- trim -%}", + ], +) +def test_detects_expression_and_statement_templates(value: str) -> None: + assert is_jinja_template(value) is True + + +@pytest.mark.parametrize( + "value", + [ + "high", + "long_context", + "", + "{ not a template }", + "100", + ], +) +def test_rejects_plain_strings(value: str) -> None: + assert is_jinja_template(value) is False + + +@pytest.mark.parametrize("value", [None, 123, ["{{ x }}"], {"k": "{{ x }}"}]) +def test_rejects_non_strings(value: object) -> None: + # ``None`` in particular must be False so the ``model`` render guard can drop + # its old ``agent.model and ...`` short-circuit. + assert is_jinja_template(value) is False