EntityProcess · christso · Jun 28, 2026 · Jun 28, 2026
diff --git a/CONCEPTS.md b/CONCEPTS.md
@@ -18,7 +18,7 @@ Shared domain vocabulary for this project — entities, named processes, and sta
 
 **Raw case file** — YAML, JSONL, or directory case data imported with `tests: ./cases.yaml`, string shorthand, or `type: tests`. Raw cases are reusable data inputs; they do not carry imported suite context such as shared `workspace`, shared `input`, or shared `assertions`.
 
-**Wrapper eval** — Eval YAML whose main job is to import task suites and bind runtime policy with an inline `experiment:` block. Wrapper evals may live under an `experiments/` directory, but that path is an optional user-owned convention and AgentV does not infer behavior from it. A wrapper that imports suites with `type: suite` does not define parent workspace fields such as `workspace`, `experiment.workspace`, or legacy `execution.workspace`; imported suites own task environment.
+**Wrapper eval** — Eval YAML whose main job is to import task suites and bind runtime policy with an inline `experiment:` block. Wrapper evals may live under an `experiments/` directory, but that path is an optional user-owned convention and AgentV does not infer behavior from it. A wrapper that imports suites with `type: suite` does not define parent `workspace`; imported suites own task environment.
 
 **Experiment** — The run-policy namespace for how evals are executed: target or target matrix, eval filters, repeat counts, timeouts, workers, budgets, thresholds, and related run knobs. In authored files it lives as inline `experiment:` inside eval YAML; CLI `--experiment` and `experiment.name` choose the result bucket. Lifecycle setup belongs in `workspace.hooks` or `targets[].hooks`, not in a separate experiment artifact.
 

diff --git a/apps/cli/src/commands/eval/run-eval.ts b/apps/cli/src/commands/eval/run-eval.ts
@@ -439,17 +439,22 @@ function normalizeOptions(
   const cliOut = normalizeString(rawOptions.out);
   const configOutputDir = normalizeString(config?.output?.dir);
   const cliWorkspacePath = normalizeString(rawOptions.workspacePath);
+  const configWorkspacePath = normalizeString(yamlExecution?.workspace_path);
   const cliWorkspaceModeRaw = normalizeString(rawOptions.workspaceMode);
   const cliWorkspaceMode = normalizeWorkspaceMode(rawOptions.workspaceMode);
   if (cliWorkspacePath && cliWorkspaceModeRaw && cliWorkspaceMode !== 'static') {
     throw new Error('--workspace-path requires --workspace-mode=static (or omit --workspace-mode)');
   }
-
-  const yamlExecutionRecord = yamlExecution as Record<string, unknown> | undefined;
-  const yamlWorkspaceMode = normalizeWorkspaceMode(yamlExecutionRecord?.workspace_mode);
-  const yamlWorkspacePath = normalizeString(yamlExecutionRecord?.workspace_path);
-  const workspacePath = cliWorkspacePath ?? yamlWorkspacePath;
-  const workspaceMode = cliWorkspacePath ? 'static' : (cliWorkspaceMode ?? yamlWorkspaceMode);
+  const configWorkspaceMode = normalizeWorkspaceMode(yamlExecution?.workspace_mode);
+  if (configWorkspacePath && configWorkspaceMode && configWorkspaceMode !== 'static') {
+    throw new Error(
+      'execution.workspace_path requires execution.workspace_mode: static when both are provided',
+    );
+  }
+  const useConfigWorkspacePath = cliWorkspaceMode === undefined || cliWorkspaceMode === 'static';
+  const workspacePath =
+    cliWorkspacePath ?? (useConfigWorkspacePath ? configWorkspacePath : undefined);
+  const workspaceMode = workspacePath ? 'static' : (cliWorkspaceMode ?? configWorkspaceMode);
   const resultsRepo = normalizeString(rawOptions.resultsRepo);
   const resultsPush = normalizeBoolean(rawOptions.resultsPush);
   const resultsNoPush = normalizeBoolean(rawOptions.noResultsPush);
@@ -776,17 +781,14 @@ function applyExperimentOptions(
           ? [experimentTarget]
           : options.cliTargets;
 
-  const workspaceMode =
-    options.workspaceMode ?? readExperimentWorkspaceMode(experiment.workspace?.mode);
-  const workspacePath = options.workspacePath ?? readExperimentWorkspacePath(experiment.workspace);
   return {
     ...options,
     target: options.target ?? (nextCliTargets.length === 1 ? nextCliTargets[0] : undefined),
     cliTargets: nextCliTargets,
     agentTimeoutSeconds: options.agentTimeoutSeconds ?? experiment.timeoutSeconds,
     workers: options.workers ?? experiment.workers,
-    workspaceMode: workspacePath ? 'static' : workspaceMode,
-    workspacePath,
+    workspaceMode: options.workspaceMode,
+    workspacePath: options.workspacePath,
     budgetUsd: options.budgetUsd ?? experiment.budgetUsd,
     threshold: options.threshold ?? experiment.threshold,
     experimentConfig: experiment,
@@ -923,17 +925,6 @@ function groupTestsByRunPolicy(params: {
   return [...groups.values()];
 }
 
-function readExperimentWorkspaceMode(value: unknown): 'pooled' | 'temp' | 'static' | undefined {
-  return value === 'pooled' || value === 'temp' || value === 'static' ? value : undefined;
-}
-
-function readExperimentWorkspacePath(
-  workspace: Record<string, unknown> | undefined,
-): string | undefined {
-  const value = workspace?.path;
-  return typeof value === 'string' && value.trim().length > 0 ? value.trim() : undefined;
-}
-
 function matchesTestFilter(id: string, filter: string | readonly string[]): boolean {
   return typeof filter === 'string'
     ? micromatch.isMatch(id, filter)

diff --git a/apps/cli/src/commands/eval/task-bundle.ts b/apps/cli/src/commands/eval/task-bundle.ts
@@ -703,15 +703,8 @@ function serializeWorkspace(
   workspace: WorkspaceConfig,
   rewrites: ReadonlyMap<string, string>,
 ): Record<string, unknown> {
-  const {
-    workspaceFileDir: _workspaceFileDir,
-    path: _path,
-    mode,
-    ...portableWorkspace
-  } = workspace;
-  const withoutStaticMode =
-    mode === 'static' ? portableWorkspace : { ...portableWorkspace, ...(mode ? { mode } : {}) };
-  return rewritePathsDeep(withoutStaticMode, rewrites) as Record<string, unknown>;
+  const { workspaceFileDir: _workspaceFileDir, ...portableWorkspace } = workspace;
+  return rewritePathsDeep(portableWorkspace, rewrites) as Record<string, unknown>;
 }
 
 function buildPortableEvalCase(
@@ -827,12 +820,6 @@ async function collectWorkspaceReferences(
       continue;
     }
 
-    if (workspace.path || workspace.mode === 'static') {
-      errors.push(
-        `workspace.path for test "${test.id}" cannot be bundled because it points at an existing static workspace. Use workspace.template, workspace.repos, or workspace.hooks for portable bundles.`,
-      );
-    }
-
     if (workspace.template) {
       references.push({
         kind: 'workspace_template',

diff --git a/apps/cli/src/commands/prepare/index.ts b/apps/cli/src/commands/prepare/index.ts
@@ -292,8 +292,7 @@ async function prepareAttempt(options: {
     evalCases: suite.tests,
     testId: options.testId,
     verbose: false,
-    ...(test.workspace?.path === undefined &&
-      test.workspace?.mode !== 'static' && { workspaceMode: 'temp' }),
+    workspaceMode: 'temp',
     retainOnSuccess: 'keep',
     retainOnFailure: 'keep',
   });

diff --git a/apps/cli/test/eval.integration.test.ts b/apps/cli/test/eval.integration.test.ts
@@ -530,6 +530,31 @@ describe('agentv eval CLI', () => {
     }
   }, 30_000);
 
+  it('uses config.local.yaml workspace_path as a static workspace override', async () => {
+    const fixture = await createFixture();
+    try {
+      const workspacePath = path.join(fixture.baseDir, 'local-config-workspace');
+      await mkdir(workspacePath, { recursive: true });
+      await writeFile(
+        path.join(fixture.suiteDir, '.agentv', 'config.local.yaml'),
+        `execution:\n  workspace_path: ${JSON.stringify(workspacePath)}\n`,
+        'utf8',
+      );
+
+      const result = await runCli(fixture, ['eval', fixture.testFilePath]);
+
+      expect(result.exitCode).toBe(0);
+      const diagnostics = await readDiagnostics(fixture);
+      expect(diagnostics).toMatchObject({
+        workspaceMode: 'static',
+        workspacePath,
+        resultCount: 2,
+      });
+    } finally {
+      await rm(fixture.baseDir, { recursive: true, force: true });
+    }
+  }, 30_000);
+
   it('passes run-level budget tracking through to the evaluator', async () => {
     const fixture = await createFixture();
     try {

diff --git a/apps/web/src/content/docs/docs/evaluation/eval-files.mdx b/apps/web/src/content/docs/docs/evaluation/eval-files.mdx
@@ -25,9 +25,9 @@ experiment format.
 - A **wrapper eval** is eval YAML that imports one or more suites with
   `type: suite` and binds runtime policy in its inline `experiment:` block.
   Wrapper evals can live anywhere in the repo. A wrapper that imports suites
-  with `type: suite` must not define parent workspace fields such as
-  `workspace`, `experiment.workspace`, or legacy `execution.workspace`;
-  imported suites own task environment.
+  with `type: suite` must not define parent `workspace`; imported suites own
+  task environment. Machine-local existing workspace paths belong in CLI flags
+  or `config.local.yaml`, not eval YAML.
 
 For example, a reusable task suite can keep the task contract in one file:
 
@@ -78,11 +78,11 @@ The `experiments/` directory in that example is optional and user-owned. AgentV
 does not infer behavior from the path; the wrapper runs because it is eval YAML
 with an inline `experiment:` block. The wrapper owns runtime policy only. Put
 workspace setup in imported child suites. Parent workspace-affecting fields,
-including `workspace`, `experiment.workspace`, and legacy
-`execution.workspace`, are for parent-owned raw cases, including cases imported
-with `type: tests`. `experiment.workspace` is only a runtime `mode`/`path`
-override; repos, hooks, templates, Docker config, and isolation belong in
-top-level or case-level `workspace`.
+including top-level `workspace`, are for parent-owned raw cases, including
+cases imported with `type: tests`. Runtime workspace path overrides belong in
+CLI flags or `.agentv/config.local.yaml`; repos, hooks, templates, Docker
+config, env checks, and isolation belong in top-level or case-level
+`workspace`.
 
 ## YAML Format
 

diff --git a/apps/web/src/content/docs/docs/evaluation/experiments.mdx b/apps/web/src/content/docs/docs/evaluation/experiments.mdx
@@ -201,7 +201,7 @@ prepare files, dependencies, repos, or target-specific runner state.
 | Reset or apply per-case state | `workspace.hooks.before_each` / `workspace.hooks.after_each` |
 | Configure an agent runner or provider variant | `targets[].hooks` |
 | Choose targets, repeats, pass policy, budget, threshold | `experiment` |
-| Override run workspace mode/path without changing task setup | `experiment.workspace.mode` / `experiment.workspace.path` |
+| Bind an existing local workspace directory | `--workspace-path` or `.agentv/config.local.yaml` |
 
 ```yaml
 workspace:
@@ -223,11 +223,11 @@ experiment:
     strategy: pass_at_k
 ```
 
-`experiment.workspace` is intentionally limited to `mode` and `path`, matching
-the `--workspace-mode` and `--workspace-path` CLI flags. Put repos, templates,
-hooks, Docker config, and isolation under top-level or case-level `workspace`.
-Wrapper evals that import child evals with `type: suite` must not define
-`experiment.workspace`; imported suites own the task workspace.
+`experiment.workspace` is not an authored eval YAML field. Existing local
+workspace paths are machine-local bindings: pass `--workspace-path` for a
+one-off run or put `execution.workspace_path` in `.agentv/config.local.yaml`.
+Put repos, templates, hooks, Docker config, env checks, and isolation under
+top-level or case-level `workspace`.
 
 ## Repeat Runs
 

diff --git a/apps/web/src/content/docs/docs/evaluation/running-evals.mdx b/apps/web/src/content/docs/docs/evaluation/running-evals.mdx
@@ -297,14 +297,14 @@ This matches the standard model used by eval frameworks (promptfoo, deepeval, Op
 
 ### Workspace Modes and Finish Policy
 
-Use workspace mode and finish policies instead of multiple conflicting booleans:
+Use runtime workspace flags and finish policies instead of multiple conflicting booleans:
 
 ```bash
 # Mode: pooled | temp | static
 agentv eval evals/my-eval.yaml --workspace-mode pooled
 
-# Static mode path
-agentv eval evals/my-eval.yaml --workspace-mode static --workspace-path /path/to/workspace
+# Existing local workspace path for this run
+agentv eval evals/my-eval.yaml --workspace-path /path/to/workspace
 
 # Pooled reset policy override: standard | full (CLI override)
 agentv eval evals/my-eval.yaml --workspace-clean full
@@ -313,22 +313,22 @@ agentv eval evals/my-eval.yaml --workspace-clean full
 agentv eval evals/my-eval.yaml --retain-on-success cleanup --retain-on-failure keep
 ```
 
-Equivalent eval YAML:
+Portable eval YAML keeps workspace intent under templates, repos, hooks, env,
+Docker, and folder isolation:
 
 ```yaml
 workspace:
-  mode: pooled           # pooled | temp | static
-  path: null             # workspace path for mode=static; auto-materialised when empty/missing
+  isolation: shared      # shared | per_case
   hooks:
     enabled: true        # set false to skip all hooks
     after_each:
       reset: fast        # none | fast | strict
 ```
 
 Notes:
-- Pooling is default for shared workspaces with repos when mode is not specified.
-- `mode: static` (or `--workspace-mode static`) uses `path` / `--workspace-path`. When the path is empty or missing, the workspace is auto-materialised (template copied + repos cloned). Populated directories are reused as-is.
-- Static mode is incompatible with `isolation: per_case`.
+- Pooling is default for shared workspaces with repos.
+- `--workspace-path` uses an existing machine-local directory as-is and implies static runtime mode.
+- Runtime static mode is incompatible with `isolation: per_case`.
 - `hooks.enabled: false` skips all lifecycle hooks (setup, teardown, reset).
 - Pool slots are managed separately (`agentv workspace list|clean`).
 
@@ -562,6 +562,9 @@ Example local overlay:
 ```yaml
 execution:
   keep_workspaces: true
+  # Machine-local existing workspace binding. Do not commit this file.
+  workspace_path: /home/user/workspaces/my-eval
+  workspace_mode: static
 eval_patterns:
   - "local-evals/**/*.eval.yaml"
 ```
@@ -570,6 +573,8 @@ eval_patterns:
 |-------|---------------|------|---------|-------------|
 | `verbose` | `--verbose` | boolean | `false` | Enable verbose logging |
 | `keep_workspaces` | `--keep-workspaces` | boolean | `false` | Always keep temp workspaces after eval |
+| `workspace_path` | `--workspace-path` | string | none | Machine-local existing workspace directory |
+| `workspace_mode` | `--workspace-mode` | `pooled` / `temp` / `static` | none | Machine-local workspace preparation override |
 | `otel_file` | `--otel-file` | string | none | Write OTLP JSON trace to file |
 
 ### TypeScript config (`agentv.config.ts`)

diff --git a/apps/web/src/content/docs/docs/graders/code-graders.mdx b/apps/web/src/content/docs/docs/graders/code-graders.mdx
@@ -366,7 +366,7 @@ Use `expected_output` for reference answers and `output` for the actual final an
 
 ## Workspace Access
 
-When `workspace` is configured in the eval YAML (via `workspace.template`, `workspace.path`, or `workspace.repos`), code graders receive the workspace path in two ways:
+When `workspace` is configured in the eval YAML (via `workspace.template`, `workspace.repos`, or lifecycle hooks), code graders receive the prepared workspace path in two ways:
 
 1. **JSON payload**: `workspace_path` field in the stdin input
 2. **Environment variable**: `AGENTV_WORKSPACE_PATH`

diff --git a/apps/web/src/content/docs/docs/guides/benchmark-provenance.mdx b/apps/web/src/content/docs/docs/guides/benchmark-provenance.mdx
@@ -28,7 +28,7 @@ Use this split when deciding where a benchmark key belongs:
 | `workspace.repos[]` | Yes | Declares repo identity and checkout refs; AgentV resolves acquisition and materializes the checkout. |
 | `workspace.template` | Yes | Copies a workspace template into the run workspace. |
 | `workspace.hooks` | Yes | Runs lifecycle commands with workspace and case context on stdin. |
-| `workspace.isolation`, `workspace.mode`, `workspace.path` | Yes | Controls workspace reuse and materialization. |
+| `workspace.isolation` | Yes | Controls shared vs per-case folder isolation. Runtime workspace paths are machine-local config/CLI bindings, not benchmark provenance. |
 | `experiment` | Yes | Selects targets, thresholds, repeat policy, budgets, workers, and default grader behavior. |
 | `input`, `input_files`, `expected_output` | Yes | Builds the target prompt and passive reference answer. |
 | `assertions` | Yes | Runs deterministic, LLM, composite, or code graders. |
@@ -208,8 +208,7 @@ When one eval references another eval, preserve the task/runtime split:
 - Child `experiment:` blocks are ignored by `type: suite` composition. There is
   no fallback to the child `experiment:` when the parent has no `experiment:`.
 - Child `workspace` setup is preserved for `type: suite` imports. A parent eval
-  that imports any `type: suite` entry must not define parent workspace fields
-  such as `workspace`, `experiment.workspace`, or legacy `execution.workspace`.
+  that imports any `type: suite` entry must not define parent `workspace`.
   Parent workspace context is for parent-owned raw cases, including raw cases
   imported with `type: tests`.
 - A tests-only import can drop child workspace context only when the import mode