diff --git a/apps/cli/src/commands/eval/artifact-writer.ts b/apps/cli/src/commands/eval/artifact-writer.ts
index 6d230755c..32efbaad5 100644
--- a/apps/cli/src/commands/eval/artifact-writer.ts
+++ b/apps/cli/src/commands/eval/artifact-writer.ts
@@ -19,6 +19,8 @@ import {
   buildAggregateGradingArtifact,
   buildIndexArtifactEntry as buildCoreIndexArtifactEntry,
   buildResultIndexArtifact as buildCoreResultIndexArtifact,
+  buildEvalTestTargetKey,
+  buildEvaluationResultTargetKey,
   buildGradingArtifact,
   buildRunSummaryArtifact,
   buildTestTargetKey,
@@ -41,6 +43,8 @@ import {
 export {
   aggregateRunDir,
   buildAggregateGradingArtifact,
+  buildEvalTestTargetKey,
+  buildEvaluationResultTargetKey,
   buildRunSummaryArtifact,
   buildGradingArtifact,
   buildTestTargetKey,
diff --git a/apps/cli/src/commands/eval/progress-display.ts b/apps/cli/src/commands/eval/progress-display.ts
index f5db6a1b2..58805e0bd 100644
--- a/apps/cli/src/commands/eval/progress-display.ts
+++ b/apps/cli/src/commands/eval/progress-display.ts
@@ -65,8 +65,6 @@ export class ProgressDisplay {
   private readonly workers: Map<number, WorkerProgress> = new Map();
   private totalTests = 0;
   private completedTests = 0;
-  private readonly logPaths: string[] = [];
-  private readonly logPathSet = new Set<string>();
   private started = false;
   private finished = false;
   private readonly verbose: boolean;
@@ -133,24 +131,7 @@ export class ProgressDisplay {
   }
 
   addLogPaths(paths: readonly string[]): void {
-    const newPaths: string[] = [];
-    for (const path of paths) {
-      if (this.logPathSet.has(path)) {
-        continue;
-      }
-      this.logPathSet.add(path);
-      newPaths.push(path);
-    }
-
-    if (newPaths.length === 0) {
-      return;
-    }
-
-    this.logPaths.push(...newPaths);
-
-    for (const p of newPaths) {
-      console.log(`Provider log: ${p}`);
-    }
+    void paths;
   }
 
   finish(): void {
diff --git a/apps/cli/src/commands/eval/result-layout.ts b/apps/cli/src/commands/eval/result-layout.ts
index dc02ec6d6..47e8d1fb6 100644
--- a/apps/cli/src/commands/eval/result-layout.ts
+++ b/apps/cli/src/commands/eval/result-layout.ts
@@ -1,4 +1,4 @@
-import { existsSync, statSync } from 'node:fs';
+import { type Dirent, existsSync, readdirSync, statSync } from 'node:fs';
 import path from 'node:path';
 
 export const RESULT_INDEX_FILENAME = 'index.jsonl';
@@ -76,6 +76,37 @@ export function resolveExistingRunPrimaryPath(runDir: string): string | undefine
   return undefined;
 }
 
+export function discoverRunManifestPaths(runDir: string): readonly string[] {
+  const direct = resolveExistingRunPrimaryPath(runDir);
+  if (direct) {
+    return [direct];
+  }
+
+  const manifests: string[] = [];
+  function walk(currentDir: string): void {
+    const primary = resolveExistingRunPrimaryPath(currentDir);
+    if (primary) {
+      manifests.push(primary);
+      return;
+    }
+
+    let entries: Dirent<string>[];
+    try {
+      entries = readdirSync(currentDir, { withFileTypes: true });
+    } catch {
+      return;
+    }
+    for (const entry of entries) {
+      if (entry.isDirectory()) {
+        walk(path.join(currentDir, entry.name));
+      }
+    }
+  }
+
+  walk(runDir);
+  return manifests.sort();
+}
+
 export function isDirectoryPath(filePath: string): boolean {
   try {
     return statSync(filePath).isDirectory();
@@ -90,11 +121,20 @@ export function resolveWorkspaceOrFilePath(filePath: string): string {
   }
 
   const existing = resolveExistingRunPrimaryPath(filePath);
-  if (!existing) {
-    throw new Error(`Result workspace is missing ${RESULT_INDEX_FILENAME}: ${filePath}`);
+  if (existing) {
+    return existing;
   }
 
-  return existing;
+  const nested = discoverRunManifestPaths(filePath);
+  if (nested.length === 1) {
+    return nested[0];
+  }
+  if (nested.length > 1) {
+    throw new Error(
+      `Result workspace contains multiple ${RESULT_INDEX_FILENAME} manifests; pass one bundle directory or manifest: ${filePath}`,
+    );
+  }
+  throw new Error(`Result workspace is missing ${RESULT_INDEX_FILENAME}: ${filePath}`);
 }
 
 export function resolveRunManifestPath(filePath: string): string {
diff --git a/apps/cli/src/commands/eval/run-cache.ts b/apps/cli/src/commands/eval/run-cache.ts
index d30c75536..14969e6be 100644
--- a/apps/cli/src/commands/eval/run-cache.ts
+++ b/apps/cli/src/commands/eval/run-cache.ts
@@ -4,6 +4,7 @@ import path from 'node:path';
 
 import {
   RESULT_INDEX_FILENAME,
+  discoverRunManifestPaths,
   resolveExistingRunPrimaryPath,
   resolveRunIndexPath,
 } from './result-layout.js';
@@ -27,7 +28,11 @@ export interface RunCache {
  */
 export function resolveRunCacheFile(cache: RunCache): string {
   if (cache.lastRunDir) {
-    return resolveExistingRunPrimaryPath(cache.lastRunDir) ?? resolveRunIndexPath(cache.lastRunDir);
+    const direct = resolveExistingRunPrimaryPath(cache.lastRunDir);
+    if (direct) {
+      return direct;
+    }
+    return discoverRunManifestPaths(cache.lastRunDir)[0] ?? resolveRunIndexPath(cache.lastRunDir);
   }
   return '';
 }
@@ -61,14 +66,12 @@ export async function resolveCachedRunDir(cwd: string): Promise<string | undefin
 }
 
 export async function saveRunCache(cwd: string, resultPath: string): Promise<void> {
-  if (path.basename(resultPath) !== RESULT_INDEX_FILENAME) {
-    return;
-  }
-
   const dir = path.join(cwd, '.agentv');
+  const lastRunDir =
+    path.basename(resultPath) === RESULT_INDEX_FILENAME ? path.dirname(resultPath) : resultPath;
   await mkdir(dir, { recursive: true });
   const cache: RunCache = {
-    lastRunDir: path.dirname(resultPath),
+    lastRunDir,
     timestamp: new Date().toISOString(),
   };
   await writeFile(cachePath(cwd), `${JSON.stringify(cache, null, 2)}\n`, 'utf-8');
diff --git a/apps/cli/src/commands/eval/run-eval.ts b/apps/cli/src/commands/eval/run-eval.ts
index 919ccbb36..7490a6cd4 100644
--- a/apps/cli/src/commands/eval/run-eval.ts
+++ b/apps/cli/src/commands/eval/run-eval.ts
@@ -52,7 +52,8 @@ import {
 } from '../results/remote.js';
 import {
   aggregateRunDir,
-  buildTestTargetKey,
+  buildEvalTestTargetKey,
+  buildEvaluationResultTargetKey,
   deduplicateByTestIdTarget,
   parseJsonlResults,
   writeArtifactsFromResults,
@@ -65,6 +66,7 @@ import { ProgressDisplay, type Verdict, type WorkerProgress } from './progress-d
 import {
   buildDefaultRunDirFromName,
   createRunDirName,
+  discoverRunManifestPaths,
   normalizeExperimentName,
 } from './result-layout.js';
 import {
@@ -551,16 +553,6 @@ async function ensureFileExists(filePath: string, description: string): Promise<
   }
 }
 
-function buildDefaultOutputPathForExperiment(
-  cwd: string,
-  resultGroup: string | undefined,
-  runDirName: string,
-): string {
-  const runDir = buildDefaultRunDirFromName(cwd, resultGroup, runDirName);
-  mkdirSync(runDir, { recursive: true });
-  return path.join(runDir, 'index.jsonl');
-}
-
 function deriveEvalResultGroupName(evalFilePath: string | undefined): string {
   if (!evalFilePath) {
     return 'eval';
@@ -1013,6 +1005,94 @@ function applyVerboseOverride(selection: TargetSelection, cliVerbose: boolean):
   };
 }
 
+function safeRunPathSegment(value: string | undefined, fallback: string): string {
+  const trimmed = value?.trim();
+  if (!trimmed) {
+    return fallback;
+  }
+  const segment = trimmed.replace(/[/\\:*?"<>|]/g, '_');
+  return !segment || segment === '.' || segment === '..' ? fallback : segment;
+}
+
+function targetVariantForSelection(selection: TargetSelection): string | undefined {
+  const target = selection.resolvedTarget;
+  if (target.kind === 'replay') {
+    return target.config.variant;
+  }
+  return undefined;
+}
+
+function resultBundleKey(result: Pick<EvaluationResult, 'target' | 'variant'>): string {
+  return JSON.stringify({
+    target: result.target ?? 'unknown',
+    variant: result.variant ?? null,
+  });
+}
+
+function resultBundleDir(
+  invocationDir: string,
+  result: Pick<EvaluationResult, 'target' | 'variant'>,
+): string {
+  const targetDir = safeRunPathSegment(result.target, 'unknown-target');
+  const variantDir = result.variant ? safeRunPathSegment(result.variant, 'variant') : undefined;
+  return variantDir
+    ? path.join(invocationDir, targetDir, variantDir)
+    : path.join(invocationDir, targetDir);
+}
+
+class BundleOutputWriter implements OutputWriter {
+  private readonly writers = new Map<
+    string,
+    { readonly dir: string; readonly indexPath: string; readonly writer: OutputWriter }
+  >();
+
+  constructor(
+    private readonly invocationDir: string,
+    private readonly appendMode: boolean,
+  ) {}
+
+  async append(result: EvaluationResult): Promise<void> {
+    const writer = await this.writerForResult(result);
+    await writer.append(result);
+  }
+
+  async close(): Promise<void> {
+    await Promise.all([...this.writers.values()].map((entry) => entry.writer.close()));
+  }
+
+  bundleDirs(): readonly string[] {
+    return [...this.writers.values()].map((entry) => entry.dir);
+  }
+
+  bundleIndexPaths(): readonly string[] {
+    return [...this.writers.values()].map((entry) => entry.indexPath);
+  }
+
+  private async writerForResult(result: EvaluationResult): Promise<OutputWriter> {
+    const key = resultBundleKey(result);
+    const existing = this.writers.get(key);
+    if (existing) {
+      return existing.writer;
+    }
+    const dir = resultBundleDir(this.invocationDir, result);
+    mkdirSync(dir, { recursive: true });
+    const indexPath = path.join(dir, 'index.jsonl');
+    const writer = await createOutputWriter(indexPath, { append: this.appendMode });
+    this.writers.set(key, { dir, indexPath, writer });
+    return writer;
+  }
+}
+
+async function readExistingResultsFromRunDir(runDir: string): Promise<EvaluationResult[]> {
+  const manifests = discoverRunManifestPaths(runDir);
+  const results: EvaluationResult[] = [];
+  for (const manifest of manifests) {
+    const content = await readFile(manifest, 'utf8');
+    results.push(...parseJsonlResults(content));
+  }
+  return results;
+}
+
 async function prepareFileMetadata(params: {
   readonly testFilePath: string;
   readonly repoRoot: string;
@@ -1317,6 +1397,7 @@ async function runSingleEvalFile(params: {
 
   // CLI provider verbose logging should only be enabled when --verbose flag is passed
   const resolvedTargetSelection = applyVerboseOverride(selection, options.verbose);
+  const explicitVariant = targetVariantForSelection(resolvedTargetSelection);
   const providerLabel = resolvedTargetSelection.resolvedTarget.kind;
   const targetMessage = options.verbose
     ? `Using target (${resolvedTargetSelection.targetSource}): ${resolvedTargetSelection.targetName} ${buildTargetLabelSuffix(providerLabel, resolvedTargetSelection.resolvedTarget)} via ${resolvedTargetSelection.targetsFilePath}`
@@ -1428,7 +1509,9 @@ async function runSingleEvalFile(params: {
       // Trim output messages for results JSONL based on --output-messages.
       // Each message is trimmed to { role, content } only (no toolCalls, startTime, etc.).
       // Full output with tool calls goes to OTel.
-      const resultWithMetadata = withSourceMetadata(result, testFilePath, options);
+      const resultWithVariant =
+        explicitVariant && !result.variant ? { ...result, variant: explicitVariant } : result;
+      const resultWithMetadata = withSourceMetadata(resultWithVariant, testFilePath, options);
       const trimmedResult = prepareResultForJsonl(resultWithMetadata, options);
       await outputWriter.append(trimmedResult);
 
@@ -1482,7 +1565,15 @@ async function runSingleEvalFile(params: {
     },
   });
 
-  return { results: results.map((result) => withSourceMetadata(result, testFilePath, options)) };
+  return {
+    results: results.map((result) =>
+      withSourceMetadata(
+        explicitVariant && !result.variant ? { ...result, variant: explicitVariant } : result,
+        testFilePath,
+        options,
+      ),
+    ),
+  };
 }
 
 export interface RunEvalResult {
@@ -1647,14 +1738,14 @@ export async function runEvalCommand(
   if (options.resume && !options.retryErrors) {
     const explicitResumeDir = options.outputDir;
     if (explicitResumeDir) {
-      const resumeIndexPath = path.join(path.resolve(explicitResumeDir), 'index.jsonl');
-      if (existsSync(resumeIndexPath)) {
-        const content = await readFile(resumeIndexPath, 'utf8');
-        const existingResults = parseJsonlResults(content);
+      const resumeDir = path.resolve(explicitResumeDir);
+      const resumeIndexPaths = discoverRunManifestPaths(resumeDir);
+      if (resumeIndexPaths.length > 0) {
+        const existingResults = await readExistingResultsFromRunDir(resumeDir);
         resumeSkipKeys = new Set<string>();
         for (const r of existingResults) {
           if (shouldSkipExistingResultForResume(r, options.rerunFailed)) {
-            resumeSkipKeys.add(buildTestTargetKey(r.testId, r.target));
+            resumeSkipKeys.add(buildEvaluationResultTargetKey(r));
           }
         }
         isResumeAppend = true;
@@ -1663,8 +1754,8 @@ export async function runEvalCommand(
           `${modeLabel}: found ${existingResults.length} existing result(s), skipping ${resumeSkipKeys.size} completed.`,
         );
       } else {
-        // No existing index.jsonl — behave like a normal run
-        console.log('Resume: no existing index.jsonl found, starting fresh run.');
+        // No existing bundle index.jsonl — behave like a normal run
+        console.log('Resume: no existing bundle index.jsonl found, starting fresh run.');
       }
     } else {
       console.warn(
@@ -1695,7 +1786,8 @@ export async function runEvalCommand(
     console.log(`Repository root: ${repoRoot}`);
   }
 
-  // Resolve artifact directory (runDir) and primary output path.
+  // Resolve artifact directory. The CLI run dir is an invocation root; each
+  // target/variant writes its own bundle index below it.
   // Precedence: --output > config output.dir > default
   const explicitDir = options.outputDir;
   let runDir: string;
@@ -1705,11 +1797,12 @@ export async function runEvalCommand(
   if (explicitDir) {
     runDir = path.resolve(explicitDir);
     mkdirSync(runDir, { recursive: true });
-    outputPath = path.join(runDir, 'index.jsonl');
+    outputPath = runDir;
   } else {
     // Default: .agentv/results/<eval-name>/<timestamp>/.
-    outputPath = buildDefaultOutputPathForExperiment(cwd, resultGroupName, runDirName);
-    runDir = path.dirname(outputPath);
+    runDir = buildDefaultRunDirFromName(cwd, resultGroupName, runDirName);
+    mkdirSync(runDir, { recursive: true });
+    outputPath = runDir;
   }
   if (!process.env.AGENTV_RUN_TIMESTAMP) {
     process.env.AGENTV_RUN_TIMESTAMP = path.basename(runDir);
@@ -1782,8 +1875,6 @@ export async function runEvalCommand(
     }
   }
 
-  const primaryWritePath = outputPath;
-
   console.log(`Artifact directory: ${runDir}`);
 
   // Log file export paths
@@ -1896,10 +1987,9 @@ export async function runEvalCommand(
     throw new Error('--threshold must be between 0 and 1');
   }
 
-  // Build the output writer. Primary output is always JSONL to the artifact directory.
-  const outputWriter: OutputWriter = await createOutputWriter(primaryWritePath, {
-    append: isResumeAppend,
-  });
+  // Build the output writer. Each target/variant gets a separate bundle index
+  // below the invocation directory.
+  const outputWriter = new BundleOutputWriter(runDir, isResumeAppend);
 
   // Detect matrix mode: multiple targets for any file
   const isMatrixMode = Array.from(fileMetadata.values()).some((meta) => meta.selections.length > 1);
@@ -1908,16 +1998,27 @@ export async function runEvalCommand(
   // When resuming, subtract tests that will be skipped
   let totalEvalCount = 0;
   let resumeSkippedCount = 0;
+  const plannedBundleCounts = new Map<
+    string,
+    { readonly target: string; readonly variant?: string; count: number }
+  >();
   for (const meta of fileMetadata.values()) {
-    const suiteTargetNames = meta.selections.map((s) => s.selection.targetName);
     for (const test of meta.testCases) {
-      const effectiveTargets = suiteTargetNames.length > 0 ? suiteTargetNames : ['unknown'];
-      for (const tn of effectiveTargets) {
-        const key = `${test.id}::${tn}`;
+      for (const { selection } of meta.selections) {
+        const target = selection.targetName;
+        const variant = targetVariantForSelection(selection);
+        const key = buildEvalTestTargetKey(test, target, variant);
         if (resumeSkipKeys?.has(key)) {
           resumeSkippedCount++;
         } else {
           totalEvalCount++;
+          const bundleKey = resultBundleKey({ target, variant });
+          const existing = plannedBundleCounts.get(bundleKey);
+          if (existing) {
+            existing.count += 1;
+          } else {
+            plannedBundleCounts.set(bundleKey, { target, variant, count: 1 });
+          }
         }
       }
     }
@@ -2039,21 +2140,23 @@ export async function runEvalCommand(
     );
   }
 
-  // Write a stub summary.json before dispatching tests, carrying the planned
-  // execution count so an interrupted run can still surface as resumable in
-  // Dashboard (results.length < planned_test_count) even when every recorded row
-  // has execution_status: ok. The end-of-run write preserves this value via
-  // readPlannedTestCount inside aggregateRunDir / writeArtifactsFromResults.
+  // Write a stub summary.json in each planned bundle before dispatching tests,
+  // carrying the planned execution count so an interrupted run can still
+  // surface as resumable in Dashboard. The end-of-run write preserves this
+  // value via readPlannedTestCount inside aggregateRunDir /
+  // writeArtifactsFromResults.
   // Skip on resume — we want to preserve the *original* planned count.
   if (!isResumeAppend && totalEvalCount > 0) {
     const evalFile = activeTestFiles.length === 1 ? activeTestFiles[0] : '';
-    await writeInitialRunSummaryArtifact(runDir, {
-      evalFile,
-      plannedTestCount: totalEvalCount,
-      experiment: normalizeExperimentName(options.experiment),
-      experimentMetadata: runExperimentMetadata,
-      runtimeSource: runtimeSourceMetadata,
-    });
+    for (const bundle of plannedBundleCounts.values()) {
+      await writeInitialRunSummaryArtifact(resultBundleDir(runDir, bundle), {
+        evalFile,
+        plannedTestCount: bundle.count,
+        experiment: normalizeExperimentName(options.experiment),
+        experimentMetadata: runExperimentMetadata,
+        runtimeSource: runtimeSourceMetadata,
+      });
+    }
   }
 
   // Periodic WIP checkpoint loop: push partial results to a unique non-default
@@ -2099,6 +2202,7 @@ export async function runEvalCommand(
         const budgetMsg = `Run budget exceeded ($${fileBudgetTracker.currentCostUsd.toFixed(4)} / $${fileBudgetTracker.budgetCapUsd.toFixed(4)})`;
         console.log(`\n⚠ ${budgetMsg} — skipping ${path.basename(testFilePath)}`);
         for (const { selection } of targetPrep.selections) {
+          const explicitVariant = targetVariantForSelection(selection);
           const skippedResults: EvaluationResult[] = targetPrep.testCases.map((testCase) => ({
             timestamp: new Date().toISOString(),
             testId: testCase.id,
@@ -2121,6 +2225,7 @@ export async function runEvalCommand(
             failureReasonCode: 'budget_exceeded' as const,
             executionError: { message: budgetMsg, stage: 'setup' as const },
             target: selection.targetName,
+            variant: explicitVariant,
           }));
           for (const r of skippedResults) {
             await outputWriter.append(withSourceMetadata(r, testFilePath, fileOptions));
@@ -2143,7 +2248,10 @@ export async function runEvalCommand(
           // --resume / --rerun-failed: skip tests that are already completed
           const filteredTestCases = resumeSkipKeys
             ? applicableTestCases.filter(
-                (test) => !resumeSkipKeys.has(buildTestTargetKey(test.id, targetName)),
+                (test) =>
+                  !resumeSkipKeys.has(
+                    buildEvalTestTargetKey(test, targetName, targetVariantForSelection(selection)),
+                  ),
               )
             : applicableTestCases;
 
@@ -2212,6 +2320,7 @@ export async function runEvalCommand(
             console.error(
               `\n[ERROR] ⚠ Eval file failed: ${path.basename(testFilePath)} — ${message}\n`,
             );
+            const explicitVariant = targetVariantForSelection(selection);
             const errorResults: EvaluationResult[] = filteredTestCases.map((testCase) =>
               withSourceMetadata(
                 {
@@ -2237,6 +2346,7 @@ export async function runEvalCommand(
                   durationMs: 0,
                   tokenUsage: { input: 0, output: 0 },
                   target: selection.targetName,
+                  variant: explicitVariant,
                 },
                 testFilePath,
                 fileOptions,
@@ -2270,12 +2380,9 @@ export async function runEvalCommand(
     // Flush the output writer so all results are on disk before we read back.
     await outputWriter.close().catch(() => undefined);
 
-    // When resuming, compute summary from ALL results (old + new, deduplicated)
-    let summaryResults = allResults;
-    if (isResumeAppend) {
-      const content = await readFile(outputPath, 'utf8');
-      summaryResults = deduplicateByTestIdTarget(parseJsonlResults(content));
-    }
+    // Compute summary from the persisted bundle indexes so resume includes old
+    // rows and normal runs reflect the same manifests Dashboard will read.
+    const summaryResults = deduplicateByTestIdTarget(await readExistingResultsFromRunDir(runDir));
 
     const thresholdOpts =
       hasScopedRunPolicies || hasPerFileRuntimeThresholds
@@ -2305,57 +2412,77 @@ export async function runEvalCommand(
       console.log(formatMatrixSummary(summaryResults));
     }
 
-    // Write artifacts to the run directory (always, not conditional on flags)
+    // Write artifacts to target/variant bundle directories (always, not
+    // conditional on flags). The invocation root is only a container.
     if (allResults.length > 0) {
       const evalFile = activeTestFiles.length === 1 ? activeTestFiles[0] : '';
       const sourceTests = activeSourceTests;
       const taskBundleTargets = buildTaskBundleTargetSelections(activeTestFiles, fileMetadata);
+      const resultsByBundle = new Map<string, EvaluationResult[]>();
+      for (const result of allResults) {
+        const key = resultBundleKey(result);
+        const existing = resultsByBundle.get(key);
+        if (existing) {
+          existing.push(result);
+        } else {
+          resultsByBundle.set(key, [result]);
+        }
+      }
       if (isResumeAppend) {
-        // Resume mode: write per-test artifacts for newly-run tests, then aggregate
-        // from the full index.jsonl (old + new results with deduplication)
+        // Resume mode: write per-test artifacts for newly-run tests, then
+        // aggregate each bundle from its full index.jsonl (old + new results
+        // with deduplication).
         const { writePerTestArtifacts } = await import('./artifact-writer.js');
-        await writePerTestArtifacts(allResults, runDir, {
-          experiment: normalizeExperimentName(options.experiment),
-          resultGroup: resultGroupName,
-          cwd,
-          repoRoot,
-          sourceTests,
-          taskBundleTargets,
-          runtimeSource: runtimeSourceMetadata,
-        });
-        const { summaryPath } = await aggregateRunDir(runDir, {
-          evalFile,
-          experiment: normalizeExperimentName(options.experiment),
-          experimentMetadata: runExperimentMetadata,
-          runtimeSource: runtimeSourceMetadata,
-        });
-        const indexPath = path.join(runDir, 'index.jsonl');
-        console.log(`Artifact workspace updated: ${runDir}`);
-        console.log(`  Index: ${indexPath}`);
-        console.log(`  Per-test artifacts: ${runDir} (${allResults.length} new test directories)`);
-        console.log(`  Summary: ${summaryPath}`);
-      } else {
-        const { testArtifactDir, summaryPath, indexPath } = await writeArtifactsFromResults(
-          allResults,
-          runDir,
-          {
-            evalFile,
+        for (const bundleResults of resultsByBundle.values()) {
+          const bundleDir = resultBundleDir(runDir, bundleResults[0]);
+          await writePerTestArtifacts(bundleResults, bundleDir, {
             experiment: normalizeExperimentName(options.experiment),
-            experimentMetadata: runExperimentMetadata,
             resultGroup: resultGroupName,
             cwd,
             repoRoot,
             sourceTests,
             taskBundleTargets,
             runtimeSource: runtimeSourceMetadata,
-          },
-        );
-        console.log(`Artifact workspace written to: ${runDir}`);
-        console.log(`  Index: ${indexPath}`);
-        console.log(
-          `  Per-test artifacts: ${testArtifactDir} (${allResults.length} test directories)`,
-        );
-        console.log(`  Summary: ${summaryPath}`);
+          });
+          const { summaryPath } = await aggregateRunDir(bundleDir, {
+            evalFile,
+            experiment: normalizeExperimentName(options.experiment),
+            experimentMetadata: runExperimentMetadata,
+            runtimeSource: runtimeSourceMetadata,
+          });
+          const indexPath = path.join(bundleDir, 'index.jsonl');
+          console.log(`Artifact bundle updated: ${bundleDir}`);
+          console.log(`  Index: ${indexPath}`);
+          console.log(
+            `  Per-test artifacts: ${bundleDir} (${bundleResults.length} new test directories)`,
+          );
+          console.log(`  Summary: ${summaryPath}`);
+        }
+      } else {
+        for (const bundleResults of resultsByBundle.values()) {
+          const bundleDir = resultBundleDir(runDir, bundleResults[0]);
+          const { testArtifactDir, summaryPath, indexPath } = await writeArtifactsFromResults(
+            bundleResults,
+            bundleDir,
+            {
+              evalFile,
+              experiment: normalizeExperimentName(options.experiment),
+              experimentMetadata: runExperimentMetadata,
+              resultGroup: resultGroupName,
+              cwd,
+              repoRoot,
+              sourceTests,
+              taskBundleTargets,
+              runtimeSource: runtimeSourceMetadata,
+            },
+          );
+          console.log(`Artifact bundle written to: ${bundleDir}`);
+          console.log(`  Index: ${indexPath}`);
+          console.log(
+            `  Per-test artifacts: ${testArtifactDir} (${bundleResults.length} test directories)`,
+          );
+          console.log(`  Summary: ${summaryPath}`);
+        }
       }
     }
 
@@ -2381,10 +2508,16 @@ export async function runEvalCommand(
     }
 
     if (allResults.length > 0) {
+      const writtenIndexes = outputWriter.bundleIndexPaths();
+      outputPath = writtenIndexes[0] ?? outputPath;
       console.log(`\nResults written to: ${outputPath}`);
+      console.log(`\nResults written under: ${runDir}`);
+      for (const indexPath of writtenIndexes) {
+        console.log(`  ${indexPath}`);
+      }
 
       // Persist last run path for `agentv results` commands
-      await saveRunCache(cwd, outputPath).catch(() => undefined);
+      await saveRunCache(cwd, runDir).catch(() => undefined);
 
       finalExportStatus = await maybeAutoExportRunArtifacts({
         cwd,
diff --git a/apps/cli/src/commands/pipeline/input.ts b/apps/cli/src/commands/pipeline/input.ts
index c71f1fb82..616b09343 100644
--- a/apps/cli/src/commands/pipeline/input.ts
+++ b/apps/cli/src/commands/pipeline/input.ts
@@ -131,8 +131,7 @@ export const evalInputCommand = command({
     }
 
     // Use tests[0].suite — loaders (yaml-parser, jsonl-parser) already apply the
-    // metadata.name → filename-basename → 'eval' fallback. This keeps subagent-mode
-    // artifact layout aligned with CLI mode (artifact-writer.ts:buildArtifactSubdir).
+    // metadata.name → filename-basename → 'eval' fallback for subagent-mode labels.
     const suiteName = tests[0]?.suite?.trim() ?? '';
     const safeSuiteName = suiteName ? suiteName.replace(/[\/\\:*?"<>|]/g, '_') : '';
 
diff --git a/apps/cli/src/commands/pipeline/run.ts b/apps/cli/src/commands/pipeline/run.ts
index ff0992cb6..1f8c7fc0f 100644
--- a/apps/cli/src/commands/pipeline/run.ts
+++ b/apps/cli/src/commands/pipeline/run.ts
@@ -156,8 +156,7 @@ export const evalRunCommand = command({
     }
 
     // Use tests[0].suite — loaders (yaml-parser, jsonl-parser) already apply the
-    // metadata.name → filename-basename → 'eval' fallback. This keeps subagent-mode
-    // artifact layout aligned with CLI mode (artifact-writer.ts:buildArtifactSubdir).
+    // metadata.name → filename-basename → 'eval' fallback for subagent-mode labels.
     const suiteName = tests[0]?.suite?.trim() ?? '';
     const safeSuiteName = suiteName ? suiteName.replace(/[\/\\:*?"<>|]/g, '_') : '';
 
diff --git a/apps/cli/src/commands/results/combine-run.ts b/apps/cli/src/commands/results/combine-run.ts
index ccced82b1..f8af0f8b4 100644
--- a/apps/cli/src/commands/results/combine-run.ts
+++ b/apps/cli/src/commands/results/combine-run.ts
@@ -156,7 +156,11 @@ function latestTimestamp(values: readonly (string | undefined)[]): string | unde
 }
 
 function resultKey(record: ResultManifestRecord, result: EvaluationResult): string {
-  return buildTestTargetKey(record.test_id ?? result.testId, record.target ?? result.target);
+  return buildTestTargetKey(
+    record.test_id ?? result.testId,
+    record.target ?? result.target,
+    record.variant ?? result.variant,
+  );
 }
 
 function loadSources(sources: readonly CombineRunSource[]): LoadedSource[] {
diff --git a/apps/cli/src/commands/results/manifest.ts b/apps/cli/src/commands/results/manifest.ts
index 0925a671c..e2b017eb8 100644
--- a/apps/cli/src/commands/results/manifest.ts
+++ b/apps/cli/src/commands/results/manifest.ts
@@ -29,6 +29,7 @@ export interface ResultManifestRecord {
   readonly category?: string;
   readonly experiment?: string;
   readonly target?: string;
+  readonly variant?: string;
   readonly score: number;
   readonly scores?: readonly Record<string, unknown>[];
   readonly trials?: readonly {
@@ -232,6 +233,7 @@ function hydrateManifestRecord(
     suite: record.suite,
     category: record.category,
     target: record.target,
+    variant: record.variant,
     score: record.score,
     executionStatus: record.execution_status,
     error: record.error,
@@ -310,6 +312,7 @@ export interface LightweightResultRecord {
   readonly suite?: string;
   readonly category?: string;
   readonly target?: string;
+  readonly variant?: string;
   readonly experiment?: string;
   readonly score: number;
   readonly scores?: readonly Record<string, unknown>[];
@@ -329,6 +332,7 @@ export function loadLightweightResults(sourceFile: string): LightweightResultRec
     suite: record.suite,
     category: record.category,
     target: record.target,
+    variant: record.variant,
     experiment: record.experiment,
     score: record.score,
     scores: record.scores,
diff --git a/apps/cli/src/commands/results/serve.ts b/apps/cli/src/commands/results/serve.ts
index 32c64335b..9f609e831 100644
--- a/apps/cli/src/commands/results/serve.ts
+++ b/apps/cli/src/commands/results/serve.ts
@@ -63,6 +63,7 @@ import {
   getProject,
   loadConfig,
   loadProjectRegistry,
+  normalizeCategoryPath,
   normalizeTraceArtifactToTraceSessionResponse,
   omitExternalTraceMetadataKeys,
   readGitResultArtifact,
@@ -1883,30 +1884,7 @@ async function handleRunCategories(c: C, { searchDir, agentvDir, projectId }: Da
   try {
     const loaded = await loadManifestResultsForMeta(searchDir, meta, projectId);
     const { threshold: pass_threshold } = loadStudioConfig(agentvDir);
-    const categoryMap = new Map<string, { results: EvaluationResult[]; suites: Set<string> }>();
-    for (const r of loaded) {
-      const cat = r.category ?? DEFAULT_CATEGORY;
-      const entry = categoryMap.get(cat) ?? {
-        results: [],
-        suites: new Set<string>(),
-      };
-      entry.results.push(r);
-      entry.suites.add(r.suite ?? r.target ?? 'default');
-      categoryMap.set(cat, entry);
-    }
-    const categories = [...categoryMap.entries()].map(([name, entry]) => {
-      const qualitySummary = summarizeQualityResults(entry.results, pass_threshold);
-      return {
-        name,
-        total: qualitySummary.totalCount,
-        passed: qualitySummary.passedCount,
-        failed: qualitySummary.qualityFailureCount,
-        avg_score: qualitySummary.avgScore,
-        execution_error_count: qualitySummary.executionErrorCount,
-        suite_count: entry.suites.size,
-      };
-    });
-    return c.json({ categories });
+    return c.json(buildCategoryRollups(loaded, pass_threshold));
   } catch {
     return c.json({ error: 'Failed to load categories' }, 500);
   }
@@ -1920,7 +1898,10 @@ async function handleCategorySuites(c: C, { searchDir, agentvDir, projectId }: D
   try {
     const loaded = await loadManifestResultsForMeta(searchDir, meta, projectId);
     const { threshold: pass_threshold } = loadStudioConfig(agentvDir);
-    const filtered = loaded.filter((r) => (r.category ?? DEFAULT_CATEGORY) === category);
+    const selectedCategory = normalizeCategoryPath(category);
+    const filtered = loaded.filter((r) =>
+      isCategoryDescendant(categoryPathFromResult(r), selectedCategory),
+    );
     const suiteMap = new Map<string, EvaluationResult[]>();
     for (const r of filtered) {
       const ds = r.suite ?? r.target ?? 'default';
@@ -1945,6 +1926,120 @@ async function handleCategorySuites(c: C, { searchDir, agentvDir, projectId }: D
   }
 }
 
+interface CategoryRollupBucket {
+  readonly results: EvaluationResult[];
+  readonly suites: Set<string>;
+  readonly children: Set<string>;
+}
+
+interface CategoryRollupSummary {
+  readonly name: string;
+  readonly label: string;
+  readonly parent?: string;
+  readonly depth: number;
+  readonly total: number;
+  readonly passed: number;
+  readonly failed: number;
+  readonly avg_score: number;
+  readonly execution_error_count: number;
+  readonly suite_count: number;
+  readonly child_count: number;
+  readonly children?: CategoryRollupSummary[];
+}
+
+function categoryPathFromResult(result: EvaluationResult): string {
+  return normalizeCategoryPath(result.category ?? DEFAULT_CATEGORY);
+}
+
+function categoryPrefixes(category: string): string[] {
+  const parts = category.split('/').filter((part) => part.length > 0);
+  if (parts.length === 0) return [DEFAULT_CATEGORY];
+  return parts.map((_, index) => parts.slice(0, index + 1).join('/'));
+}
+
+function categoryParent(category: string): string | undefined {
+  const parts = category.split('/');
+  return parts.length > 1 ? parts.slice(0, -1).join('/') : undefined;
+}
+
+function categoryLabel(category: string): string {
+  return category.split('/').at(-1) ?? category;
+}
+
+function isCategoryDescendant(category: string, selectedCategory: string): boolean {
+  return category === selectedCategory || category.startsWith(`${selectedCategory}/`);
+}
+
+function summarizeCategoryBucket(
+  name: string,
+  entry: CategoryRollupBucket,
+  passThreshold: number,
+): CategoryRollupSummary {
+  const qualitySummary = summarizeQualityResults(entry.results, passThreshold);
+  const parent = categoryParent(name);
+  return {
+    name,
+    label: categoryLabel(name),
+    ...(parent && { parent }),
+    depth: name.split('/').filter(Boolean).length - 1,
+    total: qualitySummary.totalCount,
+    passed: qualitySummary.passedCount,
+    failed: qualitySummary.qualityFailureCount,
+    avg_score: qualitySummary.avgScore,
+    execution_error_count: qualitySummary.executionErrorCount,
+    suite_count: entry.suites.size,
+    child_count: entry.children.size,
+  };
+}
+
+function buildCategoryRollups(
+  results: readonly EvaluationResult[],
+  passThreshold: number,
+): { categories: CategoryRollupSummary[]; category_tree: CategoryRollupSummary[] } {
+  const categoryMap = new Map<string, CategoryRollupBucket>();
+  const ensureEntry = (name: string): CategoryRollupBucket => {
+    const existing = categoryMap.get(name);
+    if (existing) return existing;
+    const created = { results: [], suites: new Set<string>(), children: new Set<string>() };
+    categoryMap.set(name, created);
+    return created;
+  };
+
+  for (const result of results) {
+    const category = categoryPathFromResult(result);
+    const suite = result.suite ?? result.target ?? 'default';
+    const prefixes = categoryPrefixes(category);
+    for (const prefix of prefixes) {
+      const entry = ensureEntry(prefix);
+      entry.results.push(result);
+      entry.suites.add(suite);
+    }
+    for (let index = 1; index < prefixes.length; index++) {
+      ensureEntry(prefixes[index - 1]).children.add(prefixes[index]);
+    }
+  }
+
+  const categories = [...categoryMap.entries()]
+    .map(([name, entry]) => summarizeCategoryBucket(name, entry, passThreshold))
+    .sort((a, b) => a.name.localeCompare(b.name));
+
+  const summariesByName = new Map(categories.map((summary) => [summary.name, summary]));
+  const buildTreeNode = (summary: CategoryRollupSummary): CategoryRollupSummary => {
+    const children = [...(categoryMap.get(summary.name)?.children ?? [])]
+      .map((childName) => summariesByName.get(childName))
+      .filter((child): child is CategoryRollupSummary => Boolean(child))
+      .sort((a, b) => a.name.localeCompare(b.name))
+      .map(buildTreeNode);
+    return children.length > 0 ? { ...summary, children } : summary;
+  };
+  const categoryTree = categories
+    .filter((summary) => !summary.parent)
+    .sort((a, b) => a.name.localeCompare(b.name))
+    .map(buildTreeNode);
+
+  return { categories, category_tree: categoryTree };
+}
+
 async function handleEvalDetail(c: C, { searchDir, projectId }: DataContext) {
   const filename = c.req.param('filename') ?? '';
   const evalId = c.req.param('evalId') ?? '';
@@ -2449,7 +2544,7 @@ async function handleCompare(c: C, { searchDir, agentvDir, projectId }: DataCont
         }
         entry.tests.push({
           test_id: r.testId,
-          ...(r.category && { category: r.category }),
+          ...(r.category && { category: normalizeCategoryPath(r.category) }),
           score: r.score,
           passed,
           execution_status: r.executionStatus,
@@ -2459,7 +2554,7 @@ async function handleCompare(c: C, { searchDir, agentvDir, projectId }: DataCont
         // Per-run accumulation. Dedupe tests within the run by last-wins.
         runTestMap.set(r.testId, {
           test_id: r.testId,
-          ...(r.category && { category: r.category }),
+          ...(r.category && { category: normalizeCategoryPath(r.category) }),
           score: r.score,
           passed,
           execution_status: r.executionStatus,
diff --git a/apps/cli/test/commands/eval/aggregate.test.ts b/apps/cli/test/commands/eval/aggregate.test.ts
index 9ef6034ca..734b400e5 100644
--- a/apps/cli/test/commands/eval/aggregate.test.ts
+++ b/apps/cli/test/commands/eval/aggregate.test.ts
@@ -1,5 +1,13 @@
 import { afterEach, beforeEach, describe, expect, it } from 'bun:test';
-import { mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs';
+import {
+  existsSync,
+  mkdirSync,
+  mkdtempSync,
+  readFileSync,
+  readdirSync,
+  rmSync,
+  writeFileSync,
+} from 'node:fs';
 import { tmpdir } from 'node:os';
 import path from 'node:path';
 
@@ -45,12 +53,32 @@ function writeJsonlIndex(dir: string, results: Partial<EvaluationResult>[]): str
   return indexPath;
 }
 
+function readIndexRows(dir: string): Array<{ test_id: string; result_dir: string }> {
+  const indexPath = path.join(dir, 'index.jsonl');
+  if (!existsSync(indexPath)) {
+    return readdirSync(dir)
+      .filter((entry) => /--[a-f0-9]{12}$/.test(entry))
+      .map((entry) => ({ test_id: entry.replace(/--[a-f0-9]{12}$/, ''), result_dir: entry }));
+  }
+  return readFileSync(path.join(dir, 'index.jsonl'), 'utf8')
+    .trim()
+    .split('\n')
+    .filter(Boolean)
+    .map((line) => JSON.parse(line) as { test_id: string; result_dir: string });
+}
+
+function rowRunPath(dir: string, testId: string, ...segments: string[]): string {
+  const row = readIndexRows(dir).find((entry) => entry.test_id === testId);
+  expect(row?.result_dir).toMatch(new RegExp(`^${testId}--[a-f0-9]{12}$`));
+  return path.join(dir, row?.result_dir ?? '', ...segments);
+}
+
 // ---------------------------------------------------------------------------
 // deduplicateByTestIdTarget
 // ---------------------------------------------------------------------------
 
 describe('deduplicateByTestIdTarget', () => {
-  it('keeps last entry per (testId, target) pair', () => {
+  it('keeps last entry per (testId, target, variant) tuple', () => {
     const results = [
       makeResult({ testId: 'a', target: 'x', score: 0.1 }),
       makeResult({ testId: 'a', target: 'x', score: 0.9 }),
@@ -72,6 +100,63 @@ describe('deduplicateByTestIdTarget', () => {
     expect(deduped).toHaveLength(2);
   });
 
+  it('keeps entries with different variants for the same test and target', () => {
+    const results = [
+      makeResult({ testId: 'a', target: 'x', variant: 'baseline', score: 0.3 }),
+      makeResult({ testId: 'a', target: 'x', variant: 'candidate', score: 0.7 }),
+      makeResult({ testId: 'a', target: 'x', variant: 'candidate', score: 0.9 }),
+    ];
+    const deduped = deduplicateByTestIdTarget(results);
+    expect(deduped).toHaveLength(2);
+    expect(deduped.map((r) => [r.variant, r.score])).toEqual([
+      ['baseline', 0.3],
+      ['candidate', 0.9],
+    ]);
+  });
+
+  it('keeps entries with different suites for the same test and target', () => {
+    const results = [
+      makeResult({ suite: 'suite-a', testId: 'a', target: 'x', score: 0.3 }),
+      makeResult({ suite: 'suite-b', testId: 'a', target: 'x', score: 0.7 }),
+    ];
+    const deduped = deduplicateByTestIdTarget(results);
+    expect(deduped).toHaveLength(2);
+    expect(deduped.map((r) => r.suite)).toEqual(['suite-a', 'suite-b']);
+  });
+
+  it('keeps duplicate suite labels from different eval paths', () => {
+    const results = [
+      makeResult({
+        suite: 'duplicate-suite',
+        testId: 'a',
+        target: 'x',
+        source: {
+          evalFilePath: 'evals/a/cases.eval.yaml',
+          evalFileAbsolutePath: '/repo/evals/a/cases.eval.yaml',
+          testId: 'a',
+          testSnapshotYaml: 'id: a\n',
+          graderDefinitions: [],
+          references: [],
+        },
+      }),
+      makeResult({
+        suite: 'duplicate-suite',
+        testId: 'a',
+        target: 'x',
+        source: {
+          evalFilePath: 'evals/b/cases.eval.yaml',
+          evalFileAbsolutePath: '/repo/evals/b/cases.eval.yaml',
+          testId: 'a',
+          testSnapshotYaml: 'id: a\n',
+          graderDefinitions: [],
+          references: [],
+        },
+      }),
+    ];
+    const deduped = deduplicateByTestIdTarget(results);
+    expect(deduped).toHaveLength(2);
+  });
+
   it('handles empty input', () => {
     expect(deduplicateByTestIdTarget([])).toHaveLength(0);
   });
@@ -180,17 +265,17 @@ describe('writePerTestArtifacts', () => {
     await writePerTestArtifacts(results, tmpDir);
 
     const grading1 = JSON.parse(
-      readFileSync(path.join(tmpDir, 'test-1', 'run-1', 'grading.json'), 'utf8'),
+      readFileSync(rowRunPath(tmpDir, 'test-1', 'run-1', 'grading.json'), 'utf8'),
     );
     expect(grading1.assertions).toHaveLength(1);
 
     const timing1 = JSON.parse(
-      readFileSync(path.join(tmpDir, 'test-1', 'run-1', 'timing.json'), 'utf8'),
+      readFileSync(rowRunPath(tmpDir, 'test-1', 'run-1', 'timing.json'), 'utf8'),
     );
     expect(timing1.total_tokens).toBeGreaterThanOrEqual(0);
 
     const grading2 = JSON.parse(
-      readFileSync(path.join(tmpDir, 'test-2', 'run-1', 'grading.json'), 'utf8'),
+      readFileSync(rowRunPath(tmpDir, 'test-2', 'run-1', 'grading.json'), 'utf8'),
     );
     expect(grading2.assertions).toHaveLength(1);
   });
@@ -201,7 +286,7 @@ describe('writePerTestArtifacts', () => {
     await writePerTestArtifacts(results, tmpDir);
 
     const answer = readFileSync(
-      path.join(tmpDir, 'test-1', 'run-1', 'outputs', 'answer.md'),
+      rowRunPath(tmpDir, 'test-1', 'run-1', 'outputs', 'answer.md'),
       'utf8',
     );
     expect(answer).toContain('hello');
diff --git a/apps/cli/test/commands/eval/artifact-writer.test.ts b/apps/cli/test/commands/eval/artifact-writer.test.ts
index d248f474f..2ba014d14 100644
--- a/apps/cli/test/commands/eval/artifact-writer.test.ts
+++ b/apps/cli/test/commands/eval/artifact-writer.test.ts
@@ -10,6 +10,7 @@ import {
   type GraderResult,
   METRICS_SCHEMA_VERSION,
   MetricsArtifactWireSchema,
+  buildResultIndexArtifact,
   buildTraceFromMessages,
   parseYamlValue,
 } from '@agentv/core';
@@ -75,6 +76,33 @@ function makeEvaluatorResult(overrides: Partial<GraderResult> = {}): GraderResul
   } as GraderResult;
 }
 
+async function readIndexLines(indexPath: string): Promise<IndexArtifactEntry[]> {
+  const content = (await readFile(indexPath, 'utf8')).trim();
+  if (!content) return [];
+  return content.split('\n').map((line) => JSON.parse(line) as IndexArtifactEntry);
+}
+
+function escapeRegex(value: string): string {
+  return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+}
+
+function expectRowDir(
+  entry: Pick<IndexArtifactEntry, 'result_dir' | 'test_id'> | undefined,
+  expectedPrefix = entry?.test_id ?? 'unknown',
+): string {
+  expect(entry?.result_dir).toMatch(new RegExp(`^${escapeRegex(expectedPrefix)}--[a-f0-9]{12}$`));
+  return entry?.result_dir ?? '';
+}
+
+function runArtifactPath(
+  rootDir: string,
+  entry: Pick<IndexArtifactEntry, 'result_dir'> | undefined,
+  ...segments: string[]
+): string {
+  expect(entry?.result_dir).toBeTruthy();
+  return path.join(rootDir, entry?.result_dir ?? '', ...segments);
+}
+
 // ---------------------------------------------------------------------------
 // Grading artifact
 // ---------------------------------------------------------------------------
@@ -876,14 +904,25 @@ describe('writeArtifactsFromResults', () => {
       evalFile: 'my-eval.yaml',
     });
 
+    const indexLines = await readIndexLines(paths.indexPath);
+    expect(indexLines).toHaveLength(2);
+    const alphaRowDir = expectRowDir(indexLines[0], 'alpha');
+    const betaRowDir = expectRowDir(indexLines[1], 'beta');
+    expect(alphaRowDir).not.toBe(betaRowDir);
+
     // Check per-test artifact directories
     const artifactEntries = await readdir(paths.testArtifactDir);
-    expect(artifactEntries.sort()).toEqual(['alpha', 'beta', 'index.jsonl', 'summary.json']);
+    expect(artifactEntries.sort()).toEqual([
+      alphaRowDir,
+      betaRowDir,
+      'index.jsonl',
+      'summary.json',
+    ]);
 
-    const alphaEntries = await readdir(path.join(paths.testArtifactDir, 'alpha'));
+    const alphaEntries = await readdir(path.join(paths.testArtifactDir, alphaRowDir));
     expect(alphaEntries.sort()).toEqual(['run-1', 'summary.json']);
 
-    const alphaRunEntries = await readdir(path.join(paths.testArtifactDir, 'alpha', 'run-1'));
+    const alphaRunEntries = await readdir(path.join(paths.testArtifactDir, alphaRowDir, 'run-1'));
     expect(alphaRunEntries.sort()).toEqual([
       'grading.json',
       'metrics.json',
@@ -895,13 +934,16 @@ describe('writeArtifactsFromResults', () => {
     ]);
 
     const alphaGrading: GradingArtifact = JSON.parse(
-      await readFile(path.join(paths.testArtifactDir, 'alpha', 'run-1', 'grading.json'), 'utf8'),
+      await readFile(
+        path.join(paths.testArtifactDir, alphaRowDir, 'run-1', 'grading.json'),
+        'utf8',
+      ),
     );
     expect(alphaGrading.summary).toBeDefined();
     expect(alphaGrading).not.toHaveProperty('execution_metrics');
 
     const alphaTiming: TimingArtifact = JSON.parse(
-      await readFile(path.join(paths.testArtifactDir, 'alpha', 'run-1', 'timing.json'), 'utf8'),
+      await readFile(path.join(paths.testArtifactDir, alphaRowDir, 'run-1', 'timing.json'), 'utf8'),
     );
     expect(alphaTiming.duration_ms).toBe(5000);
 
@@ -910,15 +952,10 @@ describe('writeArtifactsFromResults', () => {
     expect(summary.metadata.tests_run.sort()).toEqual(['alpha', 'beta']);
     expect(summary.timing.duration_ms).toBe(13000);
 
-    const indexLines = (await readFile(paths.indexPath, 'utf8'))
-      .trim()
-      .split('\n')
-      .map((line) => JSON.parse(line) as IndexArtifactEntry);
-    expect(indexLines).toHaveLength(2);
-    expect(indexLines[0]?.summary_path).toBe('alpha/summary.json');
-    expect(indexLines[0]?.grading_path).toBe('alpha/run-1/grading.json');
-    expect(indexLines[0]?.timing_path).toBe('alpha/run-1/timing.json');
-    expect(indexLines[0]?.metrics_path).toBe('alpha/run-1/metrics.json');
+    expect(indexLines[0]?.summary_path).toBe(`${alphaRowDir}/summary.json`);
+    expect(indexLines[0]?.grading_path).toBe(`${alphaRowDir}/run-1/grading.json`);
+    expect(indexLines[0]?.timing_path).toBe(`${alphaRowDir}/run-1/timing.json`);
+    expect(indexLines[0]?.metrics_path).toBe(`${alphaRowDir}/run-1/metrics.json`);
   });
 
   it('writes optional runtime source metadata to summary and index rows', async () => {
@@ -1001,10 +1038,8 @@ describe('writeArtifactsFromResults', () => {
 
     const paths = await writeArtifactsFromResults(results, testDir, { sourceTests });
 
-    const [indexEntry] = (await readFile(paths.indexPath, 'utf8'))
-      .trim()
-      .split('\n')
-      .map((line) => JSON.parse(line) as IndexArtifactEntry);
+    const [indexEntry] = await readIndexLines(paths.indexPath);
+    const repeatRowDir = expectRowDir(indexEntry, 'repeat-case');
     expect(indexEntry?.trials).toEqual([
       { attempt: 0, run_path: 'run-1', score: 0.25, verdict: 'fail' },
       { attempt: 1, run_path: 'run-2', score: 1, verdict: 'pass' },
@@ -1016,19 +1051,19 @@ describe('writeArtifactsFromResults', () => {
       ci95_upper: 1,
       stddev: 0.53,
     });
-    expect(indexEntry?.result_dir).toBe('repeat-case');
-    expect(indexEntry?.summary_path).toBe('repeat-case/summary.json');
+    expect(indexEntry?.result_dir).toBe(repeatRowDir);
+    expect(indexEntry?.summary_path).toBe(`${repeatRowDir}/summary.json`);
     expect(indexEntry?.task_dir).toBeUndefined();
     expect(indexEntry?.input_path).toBeUndefined();
     expect(indexEntry?.grading_path).toBeUndefined();
     expect(indexEntry?.timing_path).toBeUndefined();
     expect(indexEntry?.metrics_path).toBeUndefined();
 
-    const repeatEntries = await readdir(path.join(paths.testArtifactDir, 'repeat-case'));
+    const repeatEntries = await readdir(path.join(paths.testArtifactDir, repeatRowDir));
     expect(repeatEntries.sort()).toEqual(['run-1', 'run-2', 'summary.json']);
 
     const caseSummary = JSON.parse(
-      await readFile(path.join(paths.testArtifactDir, 'repeat-case', 'summary.json'), 'utf8'),
+      await readFile(path.join(paths.testArtifactDir, repeatRowDir, 'summary.json'), 'utf8'),
     ) as Record<string, unknown>;
     expect(caseSummary).toMatchObject({
       total_runs: 2,
@@ -1060,11 +1095,11 @@ describe('writeArtifactsFromResults', () => {
     expect(typeof caseSummary.fingerprint).toBe('string');
 
     await expect(
-      readFile(path.join(paths.testArtifactDir, 'repeat-case', 'grading.json'), 'utf8'),
+      readFile(path.join(paths.testArtifactDir, repeatRowDir, 'grading.json'), 'utf8'),
     ).rejects.toThrow();
 
     for (const runDir of ['run-1', 'run-2']) {
-      const runEntries = await readdir(path.join(paths.testArtifactDir, 'repeat-case', runDir));
+      const runEntries = await readdir(path.join(paths.testArtifactDir, repeatRowDir, runDir));
       expect(runEntries.sort()).toEqual([
         'grading.json',
         'metrics.json',
@@ -1078,7 +1113,7 @@ describe('writeArtifactsFromResults', () => {
 
     const runOneResult = JSON.parse(
       await readFile(
-        path.join(paths.testArtifactDir, 'repeat-case', 'run-1', 'result.json'),
+        path.join(paths.testArtifactDir, repeatRowDir, 'run-1', 'result.json'),
         'utf8',
       ),
     ) as Record<string, unknown>;
@@ -1098,14 +1133,14 @@ describe('writeArtifactsFromResults', () => {
     });
 
     const runTwoAnswer = await readFile(
-      path.join(paths.testArtifactDir, 'repeat-case', 'run-2', 'outputs', 'answer.md'),
+      path.join(paths.testArtifactDir, repeatRowDir, 'run-2', 'outputs', 'answer.md'),
       'utf8',
     );
     expect(runTwoAnswer).toBe('second attempt');
 
     const runTwoResult = JSON.parse(
       await readFile(
-        path.join(paths.testArtifactDir, 'repeat-case', 'run-2', 'result.json'),
+        path.join(paths.testArtifactDir, repeatRowDir, 'run-2', 'result.json'),
         'utf8',
       ),
     ) as Record<string, unknown>;
@@ -1147,16 +1182,19 @@ describe('writeArtifactsFromResults', () => {
       }),
     ];
 
-    await writeArtifactsFromResults(results, testDir);
+    const paths = await writeArtifactsFromResults(results, testDir);
+    const indexLines = await readIndexLines(paths.indexPath);
+    const testOne = indexLines.find((line) => line.test_id === 'test-1');
+    const testTwo = indexLines.find((line) => line.test_id === 'test-2');
 
     const gradingOne: GradingArtifact = JSON.parse(
-      await readFile(path.join(testDir, 'test-1', 'run-1', 'grading.json'), 'utf8'),
+      await readFile(runArtifactPath(testDir, testOne, 'run-1', 'grading.json'), 'utf8'),
     );
     const gradingTwo: GradingArtifact = JSON.parse(
-      await readFile(path.join(testDir, 'test-2', 'run-1', 'grading.json'), 'utf8'),
+      await readFile(runArtifactPath(testDir, testTwo, 'run-1', 'grading.json'), 'utf8'),
     );
     const timingOne: TimingArtifact = JSON.parse(
-      await readFile(path.join(testDir, 'test-1', 'run-1', 'timing.json'), 'utf8'),
+      await readFile(runArtifactPath(testDir, testOne, 'run-1', 'timing.json'), 'utf8'),
     );
 
     expect(gradingOne.summary.total).toBe(1);
@@ -1215,16 +1253,18 @@ describe('writeArtifactsFromResults', () => {
       }),
     ];
 
-    await writeArtifactsFromResults(results, testDir);
+    const paths = await writeArtifactsFromResults(results, testDir);
+    const [indexLine] = await readIndexLines(paths.indexPath);
+    const rowDir = expectRowDir(indexLine, 'transcript-case');
 
-    const transcriptPath = path.join(testDir, 'transcript-case', 'run-1', 'transcript.jsonl');
+    const transcriptPath = runArtifactPath(testDir, indexLine, 'run-1', 'transcript.jsonl');
     const transcriptLines = (await readFile(transcriptPath, 'utf8'))
       .trim()
       .split('\n')
       .map((line) => JSON.parse(line));
 
     const rawTranscriptLines = (
-      await readFile(path.join(testDir, 'transcript-case', 'run-1', 'transcript-raw.jsonl'), 'utf8')
+      await readFile(runArtifactPath(testDir, indexLine, 'run-1', 'transcript-raw.jsonl'), 'utf8')
     )
       .trim()
       .split('\n')
@@ -1275,20 +1315,15 @@ describe('writeArtifactsFromResults', () => {
       message_index: 0,
       role: 'user',
     });
+    await expect(readFile(path.join(testDir, rowDir, 'transcript.json'), 'utf8')).rejects.toThrow();
     await expect(
-      readFile(path.join(testDir, 'transcript-case', 'transcript.json'), 'utf8'),
-    ).rejects.toThrow();
-    await expect(
-      readFile(path.join(testDir, 'transcript-case', 'run-1', 'trace.json'), 'utf8'),
+      readFile(runArtifactPath(testDir, indexLine, 'run-1', 'trace.json'), 'utf8'),
     ).rejects.toThrow();
 
-    const indexLine = JSON.parse(
-      (await readFile(path.join(testDir, 'index.jsonl'), 'utf8')).trim(),
-    );
     expect(indexLine).not.toHaveProperty('trace_path');
-    expect(indexLine.transcript_path).toBe('transcript-case/run-1/transcript.jsonl');
-    expect(indexLine.transcript_raw_path).toBe('transcript-case/run-1/transcript-raw.jsonl');
-    expect(indexLine.metrics_path).toBe('transcript-case/run-1/metrics.json');
+    expect(indexLine?.transcript_path).toBe(`${rowDir}/run-1/transcript.jsonl`);
+    expect(indexLine?.transcript_raw_path).toBe(`${rowDir}/run-1/transcript-raw.jsonl`);
+    expect(indexLine?.metrics_path).toBe(`${rowDir}/run-1/metrics.json`);
     expect(indexLine.metrics_path.endsWith(CANONICAL_METRICS_ARTIFACT_PATH)).toBe(true);
 
     expect(indexLine.artifact_pointers).toBeUndefined();
@@ -1383,16 +1418,15 @@ describe('writeArtifactsFromResults', () => {
       }),
     ];
 
-    await writeArtifactsFromResults(results, testDir);
+    const paths = await writeArtifactsFromResults(results, testDir);
+    const [indexLine] = await readIndexLines(paths.indexPath);
+    const rowDir = expectRowDir(indexLine, 'summary-case');
 
-    const indexLine = JSON.parse(
-      (await readFile(path.join(testDir, 'index.jsonl'), 'utf8')).trim(),
-    );
-    expect(indexLine.metrics_path).toBe('summary-case/run-1/metrics.json');
+    expect(indexLine?.metrics_path).toBe(`${rowDir}/run-1/metrics.json`);
 
     const summary = MetricsArtifactWireSchema.parse(
       JSON.parse(
-        await readFile(path.join(testDir, 'summary-case', 'run-1', 'metrics.json'), 'utf8'),
+        await readFile(runArtifactPath(testDir, indexLine, 'run-1', 'metrics.json'), 'utf8'),
       ),
     );
 
@@ -1410,7 +1444,7 @@ describe('writeArtifactsFromResults', () => {
     });
     expect(summary.source_artifacts).not.toHaveProperty('trace_path');
     await expect(
-      readFile(path.join(testDir, 'summary-case', 'run-1', 'trace.json'), 'utf8'),
+      readFile(runArtifactPath(testDir, indexLine, 'run-1', 'trace.json'), 'utf8'),
     ).rejects.toThrow();
     expect(summary.metrics.total_turns).toBe(2);
     expect(summary.metrics.total_tool_calls).toBe(4);
@@ -1483,7 +1517,7 @@ describe('writeArtifactsFromResults', () => {
     expect(summary).not.toHaveProperty('usage_summary');
 
     const timing = JSON.parse(
-      await readFile(path.join(testDir, 'summary-case', 'run-1', 'timing.json'), 'utf8'),
+      await readFile(runArtifactPath(testDir, indexLine, 'run-1', 'timing.json'), 'utf8'),
     );
     expect(timing).toMatchObject({
       total_tokens: 140,
@@ -1535,24 +1569,27 @@ describe('writeArtifactsFromResults', () => {
       }),
     ];
 
-    await writeArtifactsFromResults(results, testDir);
+    const paths = await writeArtifactsFromResults(results, testDir);
+    const indexLines = await readIndexLines(paths.indexPath);
+    const aggregateRow = indexLines.find((line) => line.test_id === 'aggregate-usage');
+    const estimatedRow = indexLines.find((line) => line.test_id === 'estimated-usage');
 
     const aggregateTiming = JSON.parse(
-      await readFile(path.join(testDir, 'aggregate-usage', 'run-1', 'timing.json'), 'utf8'),
+      await readFile(runArtifactPath(testDir, aggregateRow, 'run-1', 'timing.json'), 'utf8'),
     );
     const estimatedTiming = JSON.parse(
-      await readFile(path.join(testDir, 'estimated-usage', 'run-1', 'timing.json'), 'utf8'),
+      await readFile(runArtifactPath(testDir, estimatedRow, 'run-1', 'timing.json'), 'utf8'),
     );
     const runSummary = JSON.parse(await readFile(path.join(testDir, 'summary.json'), 'utf8'));
 
     MetricsArtifactWireSchema.parse(
       JSON.parse(
-        await readFile(path.join(testDir, 'aggregate-usage', 'run-1', 'metrics.json'), 'utf8'),
+        await readFile(runArtifactPath(testDir, aggregateRow, 'run-1', 'metrics.json'), 'utf8'),
       ),
     );
     MetricsArtifactWireSchema.parse(
       JSON.parse(
-        await readFile(path.join(testDir, 'estimated-usage', 'run-1', 'metrics.json'), 'utf8'),
+        await readFile(runArtifactPath(testDir, estimatedRow, 'run-1', 'metrics.json'), 'utf8'),
       ),
     );
 
@@ -1609,19 +1646,20 @@ describe('writeArtifactsFromResults', () => {
       }),
     ];
 
-    await writeArtifactsFromResults(results, testDir);
+    const paths = await writeArtifactsFromResults(results, testDir);
+    const [indexLine] = await readIndexLines(paths.indexPath);
+    const rowDir = expectRowDir(indexLine, 'raw-log-case');
 
-    const copiedRawLogPath = path.join(testDir, 'raw-log-case', 'run-1', 'provider.log');
+    const copiedRawLogPath = runArtifactPath(testDir, indexLine, 'run-1', 'provider.log');
     await expect(readFile(copiedRawLogPath, 'utf8')).rejects.toThrow();
 
-    const transcriptPath = path.join(testDir, 'raw-log-case', 'run-1', 'transcript-raw.jsonl');
+    const transcriptPath = runArtifactPath(testDir, indexLine, 'run-1', 'transcript-raw.jsonl');
     await expect(readFile(transcriptPath, 'utf8')).resolves.toBe(rawLog);
-    await expect(
-      readFile(path.join(testDir, 'raw-log-case', 'transcript.json'), 'utf8'),
-    ).rejects.toThrow();
+    await expect(readFile(rawLogPath, 'utf8')).resolves.toBe(rawLog);
+    await expect(readFile(path.join(testDir, rowDir, 'transcript.json'), 'utf8')).rejects.toThrow();
 
     const transcriptLines = (
-      await readFile(path.join(testDir, 'raw-log-case', 'run-1', 'transcript.jsonl'), 'utf8')
+      await readFile(runArtifactPath(testDir, indexLine, 'run-1', 'transcript.jsonl'), 'utf8')
     )
       .trim()
       .split('\n')
@@ -1633,12 +1671,9 @@ describe('writeArtifactsFromResults', () => {
       content: [{ type: 'text', text: 'Raw log copied' }],
     });
 
-    const indexLine = JSON.parse(
-      (await readFile(path.join(testDir, 'index.jsonl'), 'utf8')).trim(),
-    );
     expect(indexLine.raw_provider_log_path).toBeUndefined();
-    expect(indexLine.transcript_path).toBe('raw-log-case/run-1/transcript.jsonl');
-    expect(indexLine.transcript_raw_path).toBe('raw-log-case/run-1/transcript-raw.jsonl');
+    expect(indexLine.transcript_path).toBe(`${rowDir}/run-1/transcript.jsonl`);
+    expect(indexLine.transcript_raw_path).toBe(`${rowDir}/run-1/transcript-raw.jsonl`);
     expect(indexLine).not.toHaveProperty('transcript_json_path');
   });
 
@@ -1663,11 +1698,9 @@ describe('writeArtifactsFromResults', () => {
       }),
     ];
 
-    await writeArtifactsFromResults(results, testDir);
+    const paths = await writeArtifactsFromResults(results, testDir);
 
-    const indexLine = JSON.parse(
-      (await readFile(path.join(testDir, 'index.jsonl'), 'utf8')).trim(),
-    );
+    const [indexLine] = await readIndexLines(paths.indexPath);
     expect(indexLine.external_trace).toEqual({
       provider: 'phoenix',
       source: 'codex',
@@ -1685,7 +1718,7 @@ describe('writeArtifactsFromResults', () => {
     expect(JSON.stringify(indexLine)).not.toContain('api_key');
 
     const transcriptJson = await readFile(
-      path.join(testDir, 'external-trace-case', 'run-1', 'transcript.jsonl'),
+      runArtifactPath(testDir, indexLine, 'run-1', 'transcript.jsonl'),
       'utf8',
     );
     expect(transcriptJson).not.toContain('secret');
@@ -1701,33 +1734,29 @@ describe('writeArtifactsFromResults', () => {
       }),
     ];
 
-    await writeArtifactsFromResults(results, testDir);
+    const paths = await writeArtifactsFromResults(results, testDir);
+    const [indexLine] = await readIndexLines(paths.indexPath);
 
-    const transcriptPath = path.join(
-      testDir,
-      'no-transcript-case',
-      'run-1',
-      'transcript-raw.jsonl',
-    );
+    const transcriptPath = runArtifactPath(testDir, indexLine, 'run-1', 'transcript-raw.jsonl');
     await expect(readFile(transcriptPath, 'utf8')).rejects.toThrow();
 
-    const indexLine = JSON.parse(
-      (await readFile(path.join(testDir, 'index.jsonl'), 'utf8')).trim(),
-    );
     expect(indexLine).not.toHaveProperty('transcript_path');
-    expect(indexLine.metrics_path).toBe('no-transcript-case/run-1/metrics.json');
+    expect(indexLine.metrics_path).toBe(
+      `${expectRowDir(indexLine, 'no-transcript-case')}/run-1/metrics.json`,
+    );
     expect(indexLine.artifact_pointers).toBeUndefined();
   });
 
   it('sanitizes test IDs for directory names', async () => {
     const results = [makeResult({ testId: 'path/to:test*1' })];
-    await writeArtifactsFromResults(results, testDir);
+    const paths = await writeArtifactsFromResults(results, testDir);
+    const [indexLine] = await readIndexLines(paths.indexPath);
 
     const artifactEntries = await readdir(testDir);
-    expect(artifactEntries).toContain('path_to_test_1');
+    expect(artifactEntries).toContain(expectRowDir(indexLine, 'path_to_test_1'));
   });
 
-  it('writes artifacts without target subdirectory (one run = one target)', async () => {
+  it('writes artifacts in a deterministic row id directory without target hierarchy', async () => {
     const results = [
       makeResult({
         testId: 'shared-id',
@@ -1739,83 +1768,82 @@ describe('writeArtifactsFromResults', () => {
     ];
 
     const paths = await writeArtifactsFromResults(results, testDir);
-    const indexLines = (await readFile(paths.indexPath, 'utf8')).trim().split('\n').map(JSON.parse);
+    const [indexLine] = await readIndexLines(paths.indexPath);
+    const rowDir = expectRowDir(indexLine, 'shared-id');
 
-    expect(indexLines[0].grading_path).toBe('shared-id/run-1/grading.json');
+    expect(indexLine.grading_path).toBe(`${rowDir}/run-1/grading.json`);
+    expect(rowDir).not.toContain('/');
 
     const grading: GradingArtifact = JSON.parse(
-      await readFile(path.join(testDir, 'shared-id', 'run-1', 'grading.json'), 'utf8'),
+      await readFile(runArtifactPath(testDir, indexLine, 'run-1', 'grading.json'), 'utf8'),
     );
 
     expect(grading.assertions[0].text).toBe('baseline-check');
   });
 
-  it('prefixes artifact paths with suite when present', async () => {
+  it('uses distinct row ids for the same test id across targets', async () => {
     const paths = await writeArtifactsFromResults(
-      [makeResult({ suite: 'eval-top-months-chart', testId: 'shared-id', target: 'baseline' })],
+      [
+        makeResult({ testId: 'shared-id', target: 'mock-alpha', output: 'alpha answer' }),
+        makeResult({ testId: 'shared-id', target: 'mock-beta', output: 'beta answer' }),
+      ],
       testDir,
     );
 
-    const [indexLine] = (await readFile(paths.indexPath, 'utf8'))
-      .trim()
-      .split('\n')
-      .map(JSON.parse);
-    expect(indexLine.grading_path).toBe('eval-top-months-chart/shared-id/run-1/grading.json');
+    const indexLines = await readIndexLines(paths.indexPath);
+    const rowDirs = indexLines.map((line) => expectRowDir(line, 'shared-id'));
+    expect(new Set(rowDirs).size).toBe(2);
+    expect(indexLines.map((line) => line.grading_path)).toEqual(
+      rowDirs.map((rowDir) => `${rowDir}/run-1/grading.json`),
+    );
+    const answers = await Promise.all(
+      indexLines.map((line) =>
+        readFile(runArtifactPath(testDir, line, 'run-1', 'outputs', 'answer.md'), 'utf8'),
+      ),
+    );
+    expect(answers.sort()).toEqual(['alpha answer', 'beta answer']);
   });
 
-  it('does not prefix artifact paths with suite when it matches the result group', async () => {
+  it('uses distinct row ids for the same test id across suites', async () => {
     const paths = await writeArtifactsFromResults(
-      [makeResult({ suite: 'eval-top-months-chart', testId: 'shared-id', target: 'baseline' })],
+      [
+        makeResult({ suite: 'suite-a', testId: 'shared-id', target: 'baseline' }),
+        makeResult({ suite: 'suite-b', testId: 'shared-id', target: 'baseline' }),
+      ],
       testDir,
-      { resultGroup: 'eval-top-months-chart' },
     );
 
-    const [indexLine] = (await readFile(paths.indexPath, 'utf8'))
-      .trim()
-      .split('\n')
-      .map(JSON.parse);
-    expect(indexLine.suite).toBe('eval-top-months-chart');
-    expect(indexLine.grading_path).toBe('shared-id/run-1/grading.json');
+    const indexLines = await readIndexLines(paths.indexPath);
+    const rowDirs = indexLines.map((line) => expectRowDir(line, 'shared-id'));
+    expect(indexLines.map((line) => line.suite).sort()).toEqual(['suite-a', 'suite-b']);
+    expect(new Set(rowDirs).size).toBe(2);
+    expect(rowDirs.every((rowDir) => !rowDir.includes('/'))).toBe(true);
   });
 
-  it('prefixes imported suite artifacts even when the suite matches the result group', async () => {
+  it('uses distinct row ids for duplicate suite labels from different eval paths', async () => {
     const sourceTests = [
       {
         id: 'shared-id',
-        suite: 'eval-top-months-chart',
+        suite: 'duplicate-suite',
         source: {
-          evalFilePath: 'evals/imported.eval.yaml',
-          evalFileAbsolutePath: path.join(testDir, 'evals/imported.eval.yaml'),
-          importedSuiteName: 'eval-top-months-chart',
+          evalFilePath: 'evals/one.eval.yaml',
+          evalFileAbsolutePath: path.join(testDir, 'evals/one.eval.yaml'),
+          evalFileRepoPath: 'evals/one.eval.yaml',
+          importedSuiteName: 'duplicate-suite',
           testId: 'shared-id',
           testSnapshotYaml: 'id: shared-id',
           graderDefinitions: [],
           references: [],
         },
       } as EvalTest,
-    ];
-    const paths = await writeArtifactsFromResults(
-      [makeResult({ suite: 'eval-top-months-chart', testId: 'shared-id', target: 'baseline' })],
-      testDir,
-      { resultGroup: 'eval-top-months-chart', sourceTests },
-    );
-
-    const [indexLine] = (await readFile(paths.indexPath, 'utf8'))
-      .trim()
-      .split('\n')
-      .map(JSON.parse);
-    expect(indexLine.grading_path).toBe('eval-top-months-chart/shared-id/run-1/grading.json');
-  });
-
-  it('uses the imported suite name for wrapper suite artifact paths', async () => {
-    const sourceTests = [
       {
         id: 'shared-id',
-        suite: 'wrapper-suite',
+        suite: 'duplicate-suite',
         source: {
-          evalFilePath: 'evals/imported.eval.yaml',
-          evalFileAbsolutePath: path.join(testDir, 'evals/imported.eval.yaml'),
-          importedSuiteName: 'imported-suite',
+          evalFilePath: 'evals/two.eval.yaml',
+          evalFileAbsolutePath: path.join(testDir, 'evals/two.eval.yaml'),
+          evalFileRepoPath: 'evals/two.eval.yaml',
+          importedSuiteName: 'duplicate-suite',
           testId: 'shared-id',
           testSnapshotYaml: 'id: shared-id',
           graderDefinitions: [],
@@ -1824,17 +1852,82 @@ describe('writeArtifactsFromResults', () => {
       } as EvalTest,
     ];
     const paths = await writeArtifactsFromResults(
-      [makeResult({ suite: 'wrapper-suite', testId: 'shared-id', target: 'baseline' })],
+      [
+        makeResult({
+          suite: 'duplicate-suite',
+          testId: 'shared-id',
+          target: 'baseline',
+          source: sourceTests[0].source,
+        }),
+        makeResult({
+          suite: 'duplicate-suite',
+          testId: 'shared-id',
+          target: 'baseline',
+          source: sourceTests[1].source,
+        }),
+      ],
       testDir,
-      { resultGroup: 'wrapper-suite', sourceTests },
+      { sourceTests },
     );
 
-    const [indexLine] = (await readFile(paths.indexPath, 'utf8'))
-      .trim()
-      .split('\n')
-      .map(JSON.parse);
-    expect(indexLine.result_dir).toBe('imported-suite/shared-id');
-    expect(indexLine.grading_path).toBe('imported-suite/shared-id/run-1/grading.json');
+    const indexLines = await readIndexLines(paths.indexPath);
+    const rowDirs = indexLines.map((line) => expectRowDir(line, 'shared-id'));
+    expect(new Set(rowDirs).size).toBe(2);
+    expect(indexLines.map((line) => line.projection_identity?.dimensions.eval_path).sort()).toEqual(
+      ['evals/one.eval.yaml', 'evals/two.eval.yaml'],
+    );
+  });
+
+  it('includes variant in deterministic row id hashing when projection identity exposes it', () => {
+    const base = makeResult({ suite: 'variant-suite', testId: 'shared-id', target: 'replay' });
+    const alpha = buildResultIndexArtifact(base, undefined, {
+      projectionIdentity: {
+        schemaVersion: 'agentv.projection_identity.v1',
+        id: 'alpha',
+        key: 'alpha',
+        dimensions: {
+          runId: 'run-1',
+          suite: 'variant-suite',
+          evalPath: 'evals/variant.eval.yaml',
+          testId: 'shared-id',
+          target: 'replay',
+          sourceTarget: 'codex',
+          attempt: 0,
+          variant: 'alpha',
+          envelopeId: 'envelope-alpha',
+          traceId: 'trace-alpha',
+          rootSpanId: 'root-alpha',
+          projectionFormat: 'execution_trace',
+          projectionVersion: 'agentv.execution_trace.v1',
+        },
+      },
+    });
+    const beta = buildResultIndexArtifact(base, undefined, {
+      projectionIdentity: {
+        schemaVersion: 'agentv.projection_identity.v1',
+        id: 'beta',
+        key: 'beta',
+        dimensions: {
+          runId: 'run-1',
+          suite: 'variant-suite',
+          evalPath: 'evals/variant.eval.yaml',
+          testId: 'shared-id',
+          target: 'replay',
+          sourceTarget: 'codex',
+          attempt: 0,
+          variant: 'beta',
+          envelopeId: 'envelope-beta',
+          traceId: 'trace-beta',
+          rootSpanId: 'root-beta',
+          projectionFormat: 'execution_trace',
+          projectionVersion: 'agentv.execution_trace.v1',
+        },
+      },
+    });
+
+    expectRowDir(alpha, 'shared-id');
+    expectRowDir(beta, 'shared-id');
+    expect(alpha.result_dir).not.toBe(beta.result_dir);
   });
 
   it('writes task bundle artifacts with local source paths when source metadata is provided', async () => {
@@ -1954,20 +2047,21 @@ describe('writeArtifactsFromResults', () => {
       },
     );
 
-    const taskDir = path.join(outputDir, 'trace-case', 'task');
+    const [indexLine] = await readIndexLines(paths.indexPath);
+    const rowDir = expectRowDir(indexLine, 'trace-case');
+    const taskDir = path.join(outputDir, rowDir, 'task');
     const evalPath = path.join(taskDir, 'EVAL.yaml');
     const targetsPath = path.join(taskDir, 'targets.yaml');
     const taskEval = await readFile(evalPath, 'utf8');
     const taskTargets = await readFile(targetsPath, 'utf8');
-    const indexLine = JSON.parse((await readFile(paths.indexPath, 'utf8')).trim());
 
     expect(indexLine).toMatchObject({
-      result_dir: 'trace-case',
-      task_dir: 'trace-case/task',
-      eval_path: 'trace-case/task/EVAL.yaml',
-      targets_path: 'trace-case/task/targets.yaml',
-      files_path: 'trace-case/task/files',
-      graders_path: 'trace-case/task/graders',
+      result_dir: rowDir,
+      task_dir: `${rowDir}/task`,
+      eval_path: `${rowDir}/task/EVAL.yaml`,
+      targets_path: `${rowDir}/task/targets.yaml`,
+      files_path: `${rowDir}/task/files`,
+      graders_path: `${rowDir}/task/graders`,
     });
     expect(await readFile(path.join(taskDir, 'files', 'src', 'input.txt'), 'utf8')).toBe(
       'input fixture\n',
@@ -1998,9 +2092,7 @@ describe('writeArtifactsFromResults', () => {
     expect(taskTargets).toContain('api_key: "[redacted]"');
     expect(taskEval).not.toContain('literal-secret');
     expect(taskTargets).not.toContain('literal-secret');
-    await expect(
-      readdir(path.join(outputDir, 'trace-case', '.agentv', 'results')),
-    ).rejects.toThrow();
+    await expect(readdir(path.join(outputDir, rowDir, '.agentv', 'results'))).rejects.toThrow();
     await expect(readdir(path.join(taskDir, '.agentv', 'results'))).rejects.toThrow();
   });
 
@@ -2049,13 +2141,17 @@ describe('writeArtifactsFromResults', () => {
       },
     );
 
-    const indexLines = (await readFile(paths.indexPath, 'utf8'))
-      .trim()
-      .split('\n')
-      .map((line) => JSON.parse(line) as IndexArtifactEntry);
-    expect(indexLines.map((line) => line.task_dir)).toEqual(['alpha/task', 'beta/task']);
-    expect(await readdir(path.join(testDir, 'multi-out', 'alpha', 'task'))).toContain('EVAL.yaml');
-    expect(await readdir(path.join(testDir, 'multi-out', 'beta', 'task'))).toContain('EVAL.yaml');
+    const indexLines = await readIndexLines(paths.indexPath);
+    const rowDirs = indexLines.map((line) => expectRowDir(line, line.test_id));
+    expect(indexLines.map((line, index) => line.task_dir)).toEqual(
+      rowDirs.map((rowDir) => `${rowDir}/task`),
+    );
+    expect(await readdir(path.join(testDir, 'multi-out', rowDirs[0] ?? '', 'task'))).toContain(
+      'EVAL.yaml',
+    );
+    expect(await readdir(path.join(testDir, 'multi-out', rowDirs[1] ?? '', 'task'))).toContain(
+      'EVAL.yaml',
+    );
   });
 
   it('matches task bundle targets by resolved result target while preserving selected target name', async () => {
@@ -2097,11 +2193,12 @@ describe('writeArtifactsFromResults', () => {
       },
     );
 
-    const indexLine = JSON.parse((await readFile(paths.indexPath, 'utf8')).trim());
-    expect(indexLine.task_dir).toBe('alias-case/task');
+    const [indexLine] = await readIndexLines(paths.indexPath);
+    const rowDir = expectRowDir(indexLine, 'alias-case');
+    expect(indexLine.task_dir).toBe(`${rowDir}/task`);
 
     const taskEval = await readFile(
-      path.join(testDir, 'resolved-target-out', 'alias-case', 'task', 'EVAL.yaml'),
+      path.join(testDir, 'resolved-target-out', rowDir, 'task', 'EVAL.yaml'),
       'utf8',
     );
     const parsedEval = parseYamlValue(taskEval) as Record<string, unknown>;
@@ -2141,7 +2238,8 @@ describe('writeArtifacts (from JSONL file)', () => {
     const paths = await writeArtifacts(jsonlPath, outputDir);
 
     const artifactEntries = await readdir(paths.testArtifactDir);
-    expect(artifactEntries).toContain('from-file');
+    const [indexLine] = await readIndexLines(paths.indexPath);
+    expect(artifactEntries).toContain(expectRowDir(indexLine, 'from-file'));
     expect(artifactEntries).toContain('index.jsonl');
 
     const summary: RunSummaryArtifact = JSON.parse(await readFile(paths.summaryPath, 'utf8'));
diff --git a/apps/cli/test/commands/eval/bundle.test.ts b/apps/cli/test/commands/eval/bundle.test.ts
index c205e8f41..2e6f3e8fa 100644
--- a/apps/cli/test/commands/eval/bundle.test.ts
+++ b/apps/cli/test/commands/eval/bundle.test.ts
@@ -166,7 +166,7 @@ tests: ../data/cases.yaml
 
     expect(run.exitCode).toBe(0);
     expect(run.stdout).toContain('RESULT: PASS');
-    await expectFileExists(path.join(bundleDir, 'run', 'index.jsonl'));
+    await expectFileExists(path.join(bundleDir, 'run', 'inherited', 'index.jsonl'));
   }, 60_000);
 
   it('reports unbundleable workspace references with their eval location', async () => {
diff --git a/apps/cli/test/commands/eval/progress-display.test.ts b/apps/cli/test/commands/eval/progress-display.test.ts
index 5b505791c..9f82b9571 100644
--- a/apps/cli/test/commands/eval/progress-display.test.ts
+++ b/apps/cli/test/commands/eval/progress-display.test.ts
@@ -105,4 +105,24 @@ describe('ProgressDisplay', () => {
 
     expect(logs).toEqual(['1/1   ✅ test-01-biosecurity | wtalms-stg | 98% PASS']);
   });
+
+  it('does not print provider staging log paths', () => {
+    const display = new ProgressDisplay(1);
+    const logs: string[] = [];
+    const logSpy = mock((message?: unknown) => {
+      logs.push(String(message ?? ''));
+    });
+    const originalLog = console.log;
+    console.log = logSpy as typeof console.log;
+
+    try {
+      display.addLogPaths([
+        '/tmp/agentv-provider-streams/run-001/case/logs/codex/codex-stream.log',
+      ]);
+    } finally {
+      console.log = originalLog;
+    }
+
+    expect(logs).toEqual([]);
+  });
 });
diff --git a/apps/cli/test/commands/results/export-e2e-providers.test.ts b/apps/cli/test/commands/results/export-e2e-providers.test.ts
index 46f1a2c58..4b2b46d7d 100644
--- a/apps/cli/test/commands/results/export-e2e-providers.test.ts
+++ b/apps/cli/test/commands/results/export-e2e-providers.test.ts
@@ -12,6 +12,7 @@ import path from 'node:path';
 
 import type {
   GradingArtifact,
+  IndexArtifactEntry,
   RunSummaryArtifact,
   TimingArtifact,
 } from '../../../src/commands/eval/artifact-writer.js';
@@ -210,13 +211,33 @@ function toJsonl(...records: object[]): string {
   return `${records.map((r) => JSON.stringify(r)).join('\n')}\n`;
 }
 
-function artifactDir(outputDir: string, record: { suite?: string; test_id?: string }): string {
-  const testId = record.test_id ?? 'unknown';
-  return path.join(outputDir, ...(record.suite ? [record.suite] : []), testId);
+function readIndex(outputDir: string): IndexArtifactEntry[] {
+  return readFileSync(path.join(outputDir, 'index.jsonl'), 'utf8')
+    .trim()
+    .split('\n')
+    .filter(Boolean)
+    .map((line) => JSON.parse(line) as IndexArtifactEntry);
 }
 
-function runArtifactDir(outputDir: string, record: { suite?: string; test_id?: string }): string {
-  return path.join(artifactDir(outputDir, record), 'run-1');
+function findIndexEntry(
+  outputDir: string,
+  record: { suite?: string; target?: string; test_id?: string },
+): IndexArtifactEntry {
+  const entry = readIndex(outputDir).find(
+    (candidate) =>
+      candidate.test_id === (record.test_id ?? 'unknown') &&
+      candidate.target === (record.target ?? 'unknown') &&
+      candidate.suite === record.suite,
+  );
+  expect(entry?.result_dir).toMatch(/^[^/]+--[a-f0-9]{12}$/);
+  return entry as IndexArtifactEntry;
+}
+
+function runArtifactDir(
+  outputDir: string,
+  record: { suite?: string; target?: string; test_id?: string },
+): string {
+  return path.join(outputDir, findIndexEntry(outputDir, record).result_dir, 'run-1');
 }
 
 describe('export e2e — multi-provider metrics verification', () => {
diff --git a/apps/cli/test/commands/results/export.test.ts b/apps/cli/test/commands/results/export.test.ts
index c512806a0..a9ad11237 100644
--- a/apps/cli/test/commands/results/export.test.ts
+++ b/apps/cli/test/commands/results/export.test.ts
@@ -163,15 +163,6 @@ function toJsonl(...records: object[]): string {
   return `${records.map((r) => JSON.stringify(r)).join('\n')}\n`;
 }
 
-function artifactDir(outputDir: string, record: { suite?: string; test_id?: string }): string {
-  const testId = record.test_id ?? 'unknown';
-  return path.join(outputDir, ...(record.suite ? [record.suite] : []), testId);
-}
-
-function runArtifactDir(outputDir: string, record: { suite?: string; test_id?: string }): string {
-  return path.join(artifactDir(outputDir, record), 'run-1');
-}
-
 function readIndex(outputDir: string): IndexArtifactEntry[] {
   return readFileSync(path.join(outputDir, 'index.jsonl'), 'utf8')
     .trim()
@@ -180,7 +171,38 @@ function readIndex(outputDir: string): IndexArtifactEntry[] {
     .map((line) => JSON.parse(line) as IndexArtifactEntry);
 }
 
-function readAnswer(outputDir: string, record: { suite?: string; test_id?: string }): string {
+function findIndexEntry(
+  outputDir: string,
+  record: { suite?: string; target?: string; test_id?: string },
+): IndexArtifactEntry {
+  const entry = readIndex(outputDir).find(
+    (candidate) =>
+      candidate.test_id === (record.test_id ?? 'unknown') &&
+      candidate.target === (record.target ?? 'unknown') &&
+      candidate.suite === record.suite,
+  );
+  expect(entry?.result_dir).toMatch(/^[^/]+--[a-f0-9]{12}$/);
+  return entry as IndexArtifactEntry;
+}
+
+function artifactDir(
+  outputDir: string,
+  record: { suite?: string; target?: string; test_id?: string },
+): string {
+  return path.join(outputDir, findIndexEntry(outputDir, record).result_dir);
+}
+
+function runArtifactDir(
+  outputDir: string,
+  record: { suite?: string; target?: string; test_id?: string },
+): string {
+  return path.join(artifactDir(outputDir, record), 'run-1');
+}
+
+function readAnswer(
+  outputDir: string,
+  record: { suite?: string; target?: string; test_id?: string },
+): string {
   return readFileSync(path.join(runArtifactDir(outputDir, record), 'outputs', 'answer.md'), 'utf8');
 }
 
@@ -276,7 +298,7 @@ describe('results export', () => {
     });
     expect(first.entries[0].artifact_refs).toMatchObject({
       status: 'planned_export',
-      timing_path: 'privacy/test-private/run-1/timing.json',
+      timing_path: expect.stringMatching(/^test-private--[a-f0-9]{12}\/run-1\/timing\.json$/),
     });
     expect(first.entries[0].artifact_refs).not.toHaveProperty('input_path');
     expect(first.entries[0].artifact_refs).not.toHaveProperty('output_path');
@@ -351,24 +373,26 @@ describe('results export', () => {
       content: 'full',
       redaction_level: 'none',
     });
+    const resultDir = bundle.entries[0].artifact_refs.result_dir;
+    expect(resultDir).toMatch(/^test-private--[a-f0-9]{12}$/);
     expect(bundle.entries[0].artifact_refs).toMatchObject({
       status: 'planned_export',
-      result_dir: 'privacy/test-private',
-      summary_path: 'privacy/test-private/summary.json',
-      grading_path: 'privacy/test-private/run-1/grading.json',
-      timing_path: 'privacy/test-private/run-1/timing.json',
-      metrics_path: 'privacy/test-private/run-1/metrics.json',
-      output_path: 'privacy/test-private/run-1/outputs/answer.md',
-      answer_path: 'privacy/test-private/run-1/outputs/answer.md',
-      transcript_path: 'privacy/test-private/run-1/transcript.jsonl',
-      transcript_raw_path: 'privacy/test-private/run-1/transcript-raw.jsonl',
+      result_dir: resultDir,
+      summary_path: `${resultDir}/summary.json`,
+      grading_path: `${resultDir}/run-1/grading.json`,
+      timing_path: `${resultDir}/run-1/timing.json`,
+      metrics_path: `${resultDir}/run-1/metrics.json`,
+      output_path: `${resultDir}/run-1/outputs/answer.md`,
+      answer_path: `${resultDir}/run-1/outputs/answer.md`,
+      transcript_path: `${resultDir}/run-1/transcript.jsonl`,
+      transcript_raw_path: `${resultDir}/run-1/transcript-raw.jsonl`,
     });
     expect(bundle.entries[0].artifact_refs).not.toHaveProperty('trace_path');
     expect(bundle.entries[0].artifact_refs).not.toHaveProperty('input_path');
     expect(bundle.entries[0].trace).not.toHaveProperty('envelope_ref');
     expect(bundle.entries[0].trace_envelope.artifacts).toBeDefined();
     expect(bundle.entries[0].trace_envelope.artifacts).not.toHaveProperty('trace_path');
-    expect(bundle.entries[0].feedback.grading_path).toBe('privacy/test-private/run-1/grading.json');
+    expect(bundle.entries[0].feedback.grading_path).toBe(`${resultDir}/run-1/grading.json`);
     expect(bundle.entries[0].raw_content).toBeDefined();
     expect(bundle.entries[0].feedback.scores?.[0]).toHaveProperty('evidence');
     expect(serialized).toContain('SECRET_PROMPT_TEXT');
@@ -420,19 +444,21 @@ describe('results export', () => {
       .map((line) => JSON.parse(line) as IndexArtifactEntry);
 
     expect(entries).toHaveLength(1);
+    const rowDir = entries[0].result_dir;
+    expect(rowDir).toMatch(/^test-greeting--[a-f0-9]{12}$/);
     expect(entries[0]).toMatchObject({
       test_id: 'test-greeting',
       target: 'gpt-4o',
       execution_status: 'ok',
-      result_dir: 'demo/test-greeting',
-      summary_path: 'demo/test-greeting/summary.json',
-      grading_path: 'demo/test-greeting/run-1/grading.json',
-      timing_path: 'demo/test-greeting/run-1/timing.json',
-      metrics_path: 'demo/test-greeting/run-1/metrics.json',
-      output_path: 'demo/test-greeting/run-1/outputs/answer.md',
-      answer_path: 'demo/test-greeting/run-1/outputs/answer.md',
-      transcript_path: 'demo/test-greeting/run-1/transcript.jsonl',
-      transcript_raw_path: 'demo/test-greeting/run-1/transcript-raw.jsonl',
+      result_dir: rowDir,
+      summary_path: `${rowDir}/summary.json`,
+      grading_path: `${rowDir}/run-1/grading.json`,
+      timing_path: `${rowDir}/run-1/timing.json`,
+      metrics_path: `${rowDir}/run-1/metrics.json`,
+      output_path: `${rowDir}/run-1/outputs/answer.md`,
+      answer_path: `${rowDir}/run-1/outputs/answer.md`,
+      transcript_path: `${rowDir}/run-1/transcript.jsonl`,
+      transcript_raw_path: `${rowDir}/run-1/transcript-raw.jsonl`,
     });
     expect(entries[0]).not.toHaveProperty('input_path');
     expect(entries[0].projection_identity).toMatchObject({
diff --git a/apps/cli/test/commands/results/serve.test.ts b/apps/cli/test/commands/results/serve.test.ts
index 8cf3b9767..3e0295fcd 100644
--- a/apps/cli/test/commands/results/serve.test.ts
+++ b/apps/cli/test/commands/results/serve.test.ts
@@ -1457,17 +1457,113 @@ describe('serve app', () => {
           suite_count: number;
         }>;
       };
-      expect(categoriesData.categories).toEqual([
-        {
-          name: 'runtime',
-          total: 3,
-          passed: 1,
-          failed: 1,
-          avg_score: 0.75,
-          execution_error_count: 1,
-          suite_count: 1,
-        },
-      ]);
+      expect(categoriesData.categories).toHaveLength(1);
+      expect(categoriesData.categories[0]).toMatchObject({
+        name: 'runtime',
+        total: 3,
+        passed: 1,
+        failed: 1,
+        avg_score: 0.75,
+        execution_error_count: 1,
+        suite_count: 1,
+      });
+    });
+
+    it('returns hierarchical category rollups and descendant category drilldown', async () => {
+      const runsDir = localResultsExperimentDir(tempDir);
+      mkdirSync(runsDir, { recursive: true });
+      const filename = '2026-03-25T10-30-00-000Z';
+      const runDir = path.join(runsDir, filename);
+      mkdirSync(runDir, { recursive: true });
+      writeFileSync(
+        path.join(runDir, 'index.jsonl'),
+        toJsonl(
+          {
+            ...RESULT_A,
+            test_id: 'network-pass',
+            suite: 'network-suite',
+            category: 'security/network',
+            score: 1,
+          },
+          {
+            ...RESULT_B,
+            test_id: 'security-fail',
+            suite: 'root-suite',
+            category: 'security',
+            score: 0,
+          },
+          {
+            ...RESULT_A,
+            test_id: 'flat-pass',
+            suite: 'legacy-suite',
+            category: 'legacy-flat',
+            score: 1,
+          },
+        ),
+      );
+
+      const app = createApp([], tempDir, tempDir, undefined, { studioDir });
+
+      const categoriesRes = await app.request(`/api/runs/${filename}/categories`);
+      expect(categoriesRes.status).toBe(200);
+      const categoriesData = (await categoriesRes.json()) as {
+        categories: Array<{
+          name: string;
+          parent?: string;
+          total: number;
+          passed: number;
+          failed: number;
+          child_count?: number;
+        }>;
+        category_tree?: Array<{ name: string; children?: Array<{ name: string }> }>;
+      };
+
+      expect(categoriesData.categories).toEqual(
+        expect.arrayContaining([
+          expect.objectContaining({
+            name: 'security',
+            total: 2,
+            passed: 1,
+            failed: 1,
+            child_count: 1,
+          }),
+          expect.objectContaining({
+            name: 'security/network',
+            parent: 'security',
+            total: 1,
+            passed: 1,
+            failed: 0,
+          }),
+          expect.objectContaining({
+            name: 'legacy-flat',
+            total: 1,
+            passed: 1,
+            failed: 0,
+          }),
+        ]),
+      );
+      expect(categoriesData.category_tree).toEqual(
+        expect.arrayContaining([
+          expect.objectContaining({
+            name: 'security',
+            children: [expect.objectContaining({ name: 'security/network' })],
+          }),
+        ]),
+      );
+
+      const suitesRes = await app.request(
+        `/api/runs/${filename}/categories/${encodeURIComponent('security')}/suites`,
+      );
+      expect(suitesRes.status).toBe(200);
+      const suitesData = (await suitesRes.json()) as {
+        suites: Array<{ name: string; total: number }>;
+      };
+      expect(suitesData.suites).toEqual(
+        expect.arrayContaining([
+          expect.objectContaining({ name: 'network-suite', total: 1 }),
+          expect.objectContaining({ name: 'root-suite', total: 1 }),
+        ]),
+      );
     });
 
     it('infers the experiment name from the run id when live results have not written it yet', async () => {
@@ -3850,6 +3946,61 @@ describe('serve app', () => {
   });
 
   describe('GET /api/runs/:filename/evals/:evalId/files/*', () => {
+    it('discovers nested bundle indexes and loads the requested row sidecar by manifest metadata', async () => {
+      const runsDir = localResultsExperimentDir(tempDir, 'multi-target');
+      const timestampDir = path.join(runsDir, '2026-03-25T10-00-00-000Z');
+      const alphaDir = 'case-one--111111111111';
+      const betaDir = 'case-one--222222222222';
+      const alphaBundleDir = path.join(timestampDir, 'storage-alpha');
+      const betaBundleDir = path.join(timestampDir, 'storage-beta');
+      const alphaAnswer = path.join(alphaBundleDir, alphaDir, 'run-1', 'outputs', 'answer.md');
+      const betaAnswer = path.join(betaBundleDir, betaDir, 'run-1', 'outputs', 'answer.md');
+
+      mkdirSync(path.dirname(alphaAnswer), { recursive: true });
+      mkdirSync(path.dirname(betaAnswer), { recursive: true });
+      writeFileSync(alphaAnswer, 'alpha answer');
+      writeFileSync(betaAnswer, 'beta answer');
+      writeFileSync(
+        path.join(alphaBundleDir, 'index.jsonl'),
+        toJsonl({
+          ...RESULT_A,
+          experiment: 'multi-target',
+          test_id: 'case-one',
+          target: 'mock-alpha',
+          result_dir: alphaDir,
+          answer_path: `${alphaDir}/run-1/outputs/answer.md`,
+        }),
+      );
+      writeFileSync(
+        path.join(betaBundleDir, 'index.jsonl'),
+        toJsonl({
+          ...RESULT_A,
+          experiment: 'multi-target',
+          test_id: 'case-one',
+          target: 'mock-beta',
+          result_dir: betaDir,
+          answer_path: `${betaDir}/run-1/outputs/answer.md`,
+        }),
+      );
+
+      const app = createApp([], tempDir, tempDir, undefined, { studioDir });
+      const listRes = await app.request('/api/runs');
+      expect(listRes.status).toBe(200);
+      const listData = (await listRes.json()) as {
+        runs: Array<{ filename: string; target?: string }>;
+      };
+      const betaRun = listData.runs.find((run) => run.target === 'mock-beta');
+      expect(betaRun?.filename).toBeTruthy();
+
+      const res = await app.request(
+        `/api/runs/${encodeURIComponent(betaRun?.filename ?? '')}/evals/case-one/files/${betaDir}/run-1/outputs/answer.md?result_dir=${encodeURIComponent(betaDir)}`,
+      );
+
+      expect(res.status).toBe(200);
+      const data = (await res.json()) as { content: string };
+      expect(data.content).toBe('beta answer');
+    });
+
     it('loads file content for experiment-scoped run ids', async () => {
       const runsDir = localResultsExperimentDir(tempDir, 'with-skills');
       const runId = 'with-skills::2026-03-25T10-00-00-000Z';
diff --git a/apps/cli/test/commands/runs/rerun.test.ts b/apps/cli/test/commands/runs/rerun.test.ts
index 0e90b7318..28016c0ba 100644
--- a/apps/cli/test/commands/runs/rerun.test.ts
+++ b/apps/cli/test/commands/runs/rerun.test.ts
@@ -1,5 +1,5 @@
 import { afterEach, beforeEach, describe, expect, it } from 'bun:test';
-import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
+import { mkdir, mkdtemp, readFile, readdir, rm, writeFile } from 'node:fs/promises';
 import { tmpdir } from 'node:os';
 import path from 'node:path';
 import { fileURLToPath } from 'node:url';
@@ -145,6 +145,28 @@ async function readJsonLines(filePath: string): Promise<readonly Record<string,
     .map((line) => JSON.parse(line) as Record<string, unknown>);
 }
 
+async function discoverIndexPaths(dir: string): Promise<string[]> {
+  const entries = await readdir(dir, { withFileTypes: true });
+  if (entries.some((entry) => entry.isFile() && entry.name === 'index.jsonl')) {
+    return [path.join(dir, 'index.jsonl')];
+  }
+  const discovered: string[] = [];
+  for (const entry of entries) {
+    if (entry.isDirectory()) {
+      discovered.push(...(await discoverIndexPaths(path.join(dir, entry.name))));
+    }
+  }
+  return discovered.sort();
+}
+
+async function readOutputBundle(
+  outputDir: string,
+): Promise<{ readonly indexPath: string; readonly rows: readonly Record<string, unknown>[] }> {
+  const [indexPath] = await discoverIndexPaths(outputDir);
+  expect(indexPath).toBeTruthy();
+  return { indexPath, rows: await readJsonLines(indexPath ?? '') };
+}
+
 function extractRerunOutputDir(stdout: string): string {
   const line = stdout.split(/\r?\n/).find((entry) => entry.startsWith('Rerun output directory:'));
   if (!line) {
@@ -186,7 +208,7 @@ describe('agentv runs rerun', () => {
 
     expect(result.exitCode).toBe(0);
     expect(result.stdout).toContain('Rerunning 2 captured task bundle(s)');
-    const rows = await readJsonLines(path.join(created.outputDir, 'index.jsonl'));
+    const { indexPath, rows } = await readOutputBundle(created.outputDir);
     expect(rows.map((row) => row.test_id)).toEqual(['case-alpha', 'case-beta']);
     expect(rows.every((row) => row.target === 'captured')).toBe(true);
     expect(rows[0].metadata).toMatchObject({
@@ -197,7 +219,7 @@ describe('agentv runs rerun', () => {
       },
     });
 
-    const answerPath = path.join(created.outputDir, String(rows[0].answer_path));
+    const answerPath = path.join(path.dirname(indexPath), String(rows[0].answer_path));
     const answer = await readFile(answerPath, 'utf8');
     expect(answer).toContain('Alpha answer');
     expect(answer).not.toContain('Captured answer');
@@ -274,7 +296,7 @@ describe('agentv runs rerun', () => {
     ]);
 
     expect(result.exitCode).toBe(0);
-    const rows = await readJsonLines(path.join(created.outputDir, 'index.jsonl'));
+    const { rows } = await readOutputBundle(created.outputDir);
     expect(rows.map((row) => row.test_id)).toEqual(['case-alpha']);
   }, 30_000);
 
@@ -291,7 +313,7 @@ describe('agentv runs rerun', () => {
     expect(result.exitCode).toBe(0);
     const outputDir = extractRerunOutputDir(result.stdout);
     expect(path.relative(taskDir, outputDir).startsWith('..')).toBe(true);
-    const rows = await readJsonLines(path.join(outputDir, 'index.jsonl'));
+    const { rows } = await readOutputBundle(outputDir);
     expect(rows.map((row) => row.test_id)).toEqual(['case-alpha']);
   }, 30_000);
 
@@ -356,7 +378,7 @@ describe('agentv runs rerun', () => {
     ]);
 
     expect(result.exitCode).toBe(0);
-    const rows = await readJsonLines(path.join(created.outputDir, 'index.jsonl'));
+    const { rows } = await readOutputBundle(created.outputDir);
     expect(rows.every((row) => row.target === 'local')).toBe(true);
   }, 30_000);
 });
diff --git a/apps/cli/test/eval.integration.test.ts b/apps/cli/test/eval.integration.test.ts
index cc3ed6c4c..77896ddc5 100644
--- a/apps/cli/test/eval.integration.test.ts
+++ b/apps/cli/test/eval.integration.test.ts
@@ -338,16 +338,20 @@ describe('agentv eval CLI', () => {
       ]);
 
       expect(exitCode).toBe(0);
-      expect(extractOutputPath(stdout)).toBe(path.join(outputDir, 'index.jsonl'));
+      const indexPath = path.join(outputDir, 'file-target', 'index.jsonl');
+      expect(extractOutputPath(stdout)).toBe(indexPath);
       expect(stdout).toContain(`Artifact directory: ${outputDir}`);
 
-      const results = await readJsonLines(path.join(outputDir, 'index.jsonl'));
+      const results = await readJsonLines(indexPath);
       expect(results).toHaveLength(2);
-      await expectFileExists(path.join(outputDir, 'summary.json'));
-      await expectFileExists(path.join(outputDir, 'case-alpha', 'summary.json'));
-      await expectFileExists(path.join(outputDir, 'case-alpha', 'run-1', 'grading.json'));
-      await expectFileExists(path.join(outputDir, 'case-beta', 'summary.json'));
-      await expectFileExists(path.join(outputDir, 'case-beta', 'run-1', 'grading.json'));
+      await expectFileExists(path.join(outputDir, 'file-target', 'summary.json'));
+      for (const row of results as Array<Record<string, unknown>>) {
+        const resultDir = row.result_dir as string;
+        await expectFileExists(path.join(outputDir, 'file-target', resultDir, 'summary.json'));
+        await expectFileExists(
+          path.join(outputDir, 'file-target', resultDir, 'run-1', 'grading.json'),
+        );
+      }
     } finally {
       await rm(fixture.baseDir, { recursive: true, force: true });
     }
@@ -362,11 +366,17 @@ describe('agentv eval CLI', () => {
 
       const outputDir = path.join(fixture.suiteDir, 'configured-results');
       expect(exitCode).toBe(0);
-      expect(extractOutputPath(stdout)).toBe(path.join(outputDir, 'index.jsonl'));
-      await expectFileExists(path.join(outputDir, 'index.jsonl'));
-      await expectFileExists(path.join(outputDir, 'summary.json'));
-      await expectFileExists(path.join(outputDir, 'case-alpha', 'summary.json'));
-      await expectFileExists(path.join(outputDir, 'case-alpha', 'run-1', 'grading.json'));
+      const indexPath = path.join(outputDir, 'file-target', 'index.jsonl');
+      expect(extractOutputPath(stdout)).toBe(indexPath);
+      await expectFileExists(indexPath);
+      await expectFileExists(path.join(outputDir, 'file-target', 'summary.json'));
+      const [firstRow] = (await readJsonLines(indexPath)) as Array<Record<string, unknown>>;
+      await expectFileExists(
+        path.join(outputDir, 'file-target', firstRow.result_dir as string, 'summary.json'),
+      );
+      await expectFileExists(
+        path.join(outputDir, 'file-target', firstRow.result_dir as string, 'run-1', 'grading.json'),
+      );
     } finally {
       await rm(fixture.baseDir, { recursive: true, force: true });
     }
@@ -400,17 +410,20 @@ describe('agentv eval CLI', () => {
       ]);
 
       expect(exitCode).toBe(1);
-      expect(extractOutputPath(stdout)).toBe(path.join(outputDir, 'index.jsonl'));
+      const indexPath = path.join(outputDir, 'file-target', 'index.jsonl');
+      expect(extractOutputPath(stdout)).toBe(indexPath);
       expect(stdout).not.toContain('Export files:');
 
-      const canonicalResults = await readJsonLines(path.join(outputDir, 'index.jsonl'));
+      const canonicalResults = await readJsonLines(indexPath);
       expect(canonicalResults).toHaveLength(2);
-      await expectFileExists(path.join(outputDir, 'summary.json'));
+      await expectFileExists(path.join(outputDir, 'file-target', 'summary.json'));
       for (const row of canonicalResults) {
         expect(row.transcript_path).toMatch(/run-1\/transcript\.jsonl$/);
-        await expectFileExists(path.join(outputDir, row.transcript_path as string));
+        await expectFileExists(path.join(outputDir, 'file-target', row.transcript_path as string));
         expect(row.transcript_raw_path).toMatch(/run-1\/transcript-raw\.jsonl$/);
-        await expectFileExists(path.join(outputDir, row.transcript_raw_path as string));
+        await expectFileExists(
+          path.join(outputDir, 'file-target', row.transcript_raw_path as string),
+        );
       }
     } finally {
       await rm(fixture.baseDir, { recursive: true, force: true });
diff --git a/apps/dashboard/src/components/RunDetail.tsx b/apps/dashboard/src/components/RunDetail.tsx
index 1f20c2950..944f7a8cb 100644
--- a/apps/dashboard/src/components/RunDetail.tsx
+++ b/apps/dashboard/src/components/RunDetail.tsx
@@ -22,9 +22,9 @@ import { Link } from '@tanstack/react-router';
 import type { EvalResult } from '~/lib/types';
 
 import { useRunLog, useStudioConfig } from '~/lib/api';
+import { type CategoryTreeNode, buildCategoryTree } from '~/lib/category-tree';
 import { findPhoenixExternalTraceUrl } from '~/lib/external-trace-link';
 import { summarizeQuality } from '~/lib/result-summary';
-import { formatCategoryDisplay } from '~/lib/run-detail-context';
 
 import { PassRatePill } from './PassRatePill';
 import { ResultTable } from './ResultTable';
@@ -36,91 +36,21 @@ interface RunDetailProps {
   projectId?: string;
 }
 
-interface SuiteStats {
-  name: string;
-  passed: number;
-  failed: number;
-  executionErrors: number;
-  total: number;
-  avgScore: number;
-}
-
-interface CategoryGroup {
-  name: string;
-  displayName: string;
-  mutedDisplayName?: string;
-  suites: SuiteStats[];
-  total: number;
-  passed: number;
-  failed: number;
-  executionErrors: number;
-  avgScore: number;
-}
-
-function buildCategoryGroups(results: EvalResult[], passThreshold: number): CategoryGroup[] {
-  const categoryMap = new Map<string, Map<string, EvalResult[]>>();
-
-  for (const r of results) {
-    const cat = r.category ?? 'Uncategorized';
-    const ds = r.suite ?? 'Uncategorized';
-    if (!categoryMap.has(cat)) categoryMap.set(cat, new Map());
-    // biome-ignore lint/style/noNonNullAssertion: map entry guaranteed by line above
-    const dsMap = categoryMap.get(cat)!;
-    const entry = dsMap.get(ds) ?? [];
-    entry.push(r);
-    dsMap.set(ds, entry);
-  }
-
-  return Array.from(categoryMap.entries())
-    .map(([catName, dsMap]) => {
-      const suites = Array.from(dsMap.entries())
-        .map(([dsName, suiteResults]) => {
-          const stats = summarizeQuality(suiteResults, passThreshold);
-          return {
-            name: dsName,
-            passed: stats.passed,
-            failed: stats.failed,
-            executionErrors: stats.executionErrors,
-            total: stats.total,
-            avgScore: stats.avgScore,
-          };
-        })
-        .sort((a, b) => a.name.localeCompare(b.name));
-
-      const total = suites.reduce((s, d) => s + d.total, 0);
-      const passed = suites.reduce((s, d) => s + d.passed, 0);
-      const failed = suites.reduce((s, d) => s + d.failed, 0);
-      const executionErrors = suites.reduce((s, d) => s + d.executionErrors, 0);
-      const qualityTotal = total - executionErrors;
-      const scoreSum = suites.reduce((s, d) => s + d.avgScore * (d.total - d.executionErrors), 0);
-
-      const display = formatCategoryDisplay(catName);
-
-      return {
-        name: catName,
-        displayName: display.label,
-        mutedDisplayName: display.mutedLabel,
-        suites,
-        total,
-        passed,
-        failed,
-        executionErrors,
-        avgScore: qualityTotal > 0 ? scoreSum / qualityTotal : 0,
-      };
-    })
-    .sort((a, b) => a.name.localeCompare(b.name));
-}
-
 export function RunDetail({ results, runId, projectId }: RunDetailProps) {
   const { data: config } = useStudioConfig(projectId);
   const passThreshold = config?.threshold ?? config?.pass_threshold ?? 0.8;
+  const [expandedCategories, setExpandedCategories] = useState<Record<string, boolean>>({});
   const phoenixUrl = findPhoenixExternalTraceUrl(results);
 
   const total = results.length;
   const summary = summarizeQuality(results, passThreshold);
   const totalCost = results.reduce((sum, r) => sum + (r.costUsd ?? 0), 0);
 
-  const categories = buildCategoryGroups(results, passThreshold);
+  const categoryTree = buildCategoryTree(results, passThreshold);
+  const visibleCategories = visibleCategoryRows(categoryTree, expandedCategories);
+  const toggleCategory = (category: string) => {
+    setExpandedCategories((current) => ({ ...current, [category]: !current[category] }));
+  };
 
   if (total === 0) {
     return (
@@ -166,43 +96,59 @@ export function RunDetail({ results, runId, projectId }: RunDetailProps) {
               </tr>
             </thead>
             <tbody className="divide-y divide-gray-800/50">
-              {categories.map((cat) => {
-                const label = (
-                  <span className="flex min-w-0 items-baseline gap-2">
-                    <span className="truncate">{cat.displayName}</span>
-                    {cat.mutedDisplayName ? (
-                      <span
-                        className="truncate text-xs font-normal text-gray-500"
-                        title={cat.mutedDisplayName}
-                      >
-                        {cat.mutedDisplayName}
-                      </span>
-                    ) : null}
-                  </span>
-                );
-
+              {visibleCategories.map((cat) => {
+                const expanded = expandedCategories[cat.name] === true;
                 return (
                   <tr key={cat.name} className="transition-colors hover:bg-gray-900/30">
                     <td className="w-[18rem] max-w-[18rem] px-4 py-2.5 font-medium text-gray-200">
-                      {projectId ? (
-                        <Link
-                          to="/projects/$projectId/runs/$runId/category/$category"
-                          params={{ projectId, runId, category: cat.name }}
-                          className="flex min-w-0 text-cyan-400 hover:text-cyan-300 hover:underline"
-                          title={cat.mutedDisplayName ?? cat.displayName}
-                        >
-                          {label}
-                        </Link>
-                      ) : (
-                        <Link
-                          to="/runs/$runId/category/$category"
-                          params={{ runId, category: cat.name }}
-                          className="flex min-w-0 text-cyan-400 hover:text-cyan-300 hover:underline"
-                          title={cat.mutedDisplayName ?? cat.displayName}
-                        >
-                          {label}
-                        </Link>
-                      )}
+                      <span className="flex min-w-0 items-center gap-2">
+                        <span
+                          className="inline-block h-4 shrink-0"
+                          style={{ width: `${cat.depth * 16}px` }}
+                        />
+                        {cat.childCount > 0 ? (
+                          <button
+                            type="button"
+                            className="flex h-5 w-5 shrink-0 items-center justify-center rounded border border-gray-700 text-xs text-gray-400 hover:border-gray-600 hover:text-gray-200"
+                            onClick={() => toggleCategory(cat.name)}
+                            aria-label={`${expanded ? 'Collapse' : 'Expand'} ${cat.name}`}
+                            aria-expanded={expanded}
+                          >
+                            {expanded ? '-' : '+'}
+                          </button>
+                        ) : (
+                          <span className="h-5 w-5 shrink-0" />
+                        )}
+                        {projectId ? (
+                          <Link
+                            to="/projects/$projectId/runs/$runId/category/$category"
+                            params={{ projectId, runId, category: cat.name }}
+                            className="min-w-0 truncate text-cyan-400 hover:text-cyan-300 hover:underline"
+                            title={cat.name}
+                          >
+                            {cat.label}
+                          </Link>
+                        ) : (
+                          <Link
+                            to="/runs/$runId/category/$category"
+                            params={{ runId, category: cat.name }}
+                            className="min-w-0 truncate text-cyan-400 hover:text-cyan-300 hover:underline"
+                            title={cat.name}
+                          >
+                            {cat.label}
+                          </Link>
+                        )}
+                        {cat.depth > 0 ? (
+                          <span className="truncate text-xs font-normal text-gray-500">
+                            {cat.name}
+                          </span>
+                        ) : null}
+                        {cat.childCount > 0 ? (
+                          <span className="shrink-0 text-xs font-normal text-gray-500">
+                            {cat.childCount}
+                          </span>
+                        ) : null}
+                      </span>
                     </td>
                     <td className="px-4 py-2.5">
                       <PassRatePill
@@ -250,6 +196,16 @@ export function RunDetail({ results, runId, projectId }: RunDetailProps) {
   );
 }
 
+function visibleCategoryRows(
+  nodes: readonly CategoryTreeNode[],
+  expanded: Record<string, boolean>,
+): CategoryTreeNode[] {
+  return nodes.flatMap((node) => [
+    node,
+    ...(expanded[node.name] ? visibleCategoryRows(node.children, expanded) : []),
+  ]);
+}
+
 function ExternalTraceLink({ href }: { href?: string }) {
   if (!href) return null;
 
diff --git a/apps/dashboard/src/components/RunList.tsx b/apps/dashboard/src/components/RunList.tsx
index d6c83421a..1e377133e 100644
--- a/apps/dashboard/src/components/RunList.tsx
+++ b/apps/dashboard/src/components/RunList.tsx
@@ -529,12 +529,13 @@ export function RunList({
       </div>
 
       <div className="hidden max-w-full overflow-x-auto rounded-lg border border-gray-800 sm:block">
-        <table className="min-w-[780px] w-full whitespace-nowrap text-left text-sm">
+        <table className="min-w-[860px] w-full whitespace-nowrap text-left text-sm">
           <thead className="border-b border-gray-800 bg-gray-900/50">
             <tr>
               {enableCombine && <th className="w-10 px-4 py-3" />}
               <th className="w-8 px-4 py-3" />
-              <th className="w-[22rem] px-4 py-3 font-medium text-gray-400">Run</th>
+              <th className="w-[18rem] px-4 py-3 font-medium text-gray-400">Experiment</th>
+              <th className="w-[16rem] px-4 py-3 font-medium text-gray-400">Target</th>
               <th className="px-4 py-3 font-medium text-gray-400">Remote</th>
               <th className="px-4 py-3 text-right font-medium text-gray-400">Passed</th>
               <th className="px-4 py-3 text-right font-medium text-gray-400">Failures</th>
@@ -563,6 +564,7 @@ export function RunList({
               const selectionDisabledReason = runSelectionDisabledReason(run);
               const selectable =
                 !selectionDisabledReason && selectableRunIds.includes(run.filename);
+              const targetLabel = run.target?.trim() || display.primary;
               return (
                 <tr key={run.filename} className="transition-colors hover:bg-gray-900/30">
                   {enableCombine && (
@@ -587,32 +589,39 @@ export function RunList({
                     <RunStatusMark view={view} />
                   </td>
 
-                  {/* Run name */}
-                  <td className="w-[22rem] max-w-[22rem] px-4 py-3">
+                  {/* Experiment */}
+                  <td className="w-[18rem] max-w-[18rem] px-4 py-3">
+                    <div className="min-w-0">
+                      <div
+                        className="truncate font-medium text-gray-200"
+                        title={`Experiment: ${experimentNamespace}`}
+                      >
+                        {experimentNamespace}
+                      </div>
+                      {runtimeSourceLabel ? (
+                        <div
+                          className="mt-0.5 truncate text-xs text-cyan-300"
+                          title={runtimeSourceTitle ?? runtimeSourceLabel}
+                        >
+                          {runtimeSourceLabel}
+                        </div>
+                      ) : null}
+                    </div>
+                  </td>
+
+                  {/* Target */}
+                  <td className="w-[16rem] max-w-[16rem] px-4 py-3">
                     <div className="min-w-0">
                       <div className="flex min-w-0 items-center gap-2">
                         <RunNameLink
                           projectId={projectId}
                           runId={run.filename}
-                          label={display.primary}
+                          label={targetLabel}
                           title={display.title}
                           className="block min-w-0 truncate font-medium text-cyan-400 hover:text-cyan-300 hover:underline"
                         />
                         {metadataDirty ? <PendingSyncBadge /> : null}
                       </div>
-                      {display.secondary ? (
-                        <div
-                          className="mt-0.5 truncate text-xs text-gray-500"
-                          title={display.title}
-                        >
-                          {display.secondary}
-                        </div>
-                      ) : null}
-                      <RunSourceBadges
-                        experimentNamespace={experimentNamespace}
-                        runtimeSourceLabel={runtimeSourceLabel}
-                        runtimeSourceTitle={runtimeSourceTitle}
-                      />
                     </div>
                   </td>
 
@@ -654,7 +663,7 @@ export function RunList({
             {(hasNextPage || isFetchingNextPage) && (
               <tr ref={tableSentinelRef}>
                 <td
-                  colSpan={enableCombine ? 10 : 9}
+                  colSpan={enableCombine ? 11 : 10}
                   className="px-4 py-3 text-center text-xs text-gray-500"
                 >
                   {isFetchingNextPage ? 'Loading more runs...' : 'Scroll to load more...'}
diff --git a/apps/dashboard/src/lib/category-tree.test.ts b/apps/dashboard/src/lib/category-tree.test.ts
new file mode 100644
index 000000000..bfd60eb07
--- /dev/null
+++ b/apps/dashboard/src/lib/category-tree.test.ts
@@ -0,0 +1,70 @@
+import { describe, expect, it } from 'bun:test';
+
+import { buildCategoryTree, flattenCategoryTree, normalizeCategoryPath } from './category-tree';
+import type { EvalResult } from './types';
+
+function result(overrides: Partial<EvalResult>): EvalResult {
+  return {
+    testId: overrides.testId ?? 'case',
+    suite: overrides.suite ?? 'suite',
+    category: overrides.category,
+    score: overrides.score ?? 1,
+    ...overrides,
+  };
+}
+
+describe('category tree model', () => {
+  it('builds parent rollups from slash-delimited category metadata', () => {
+    const tree = buildCategoryTree(
+      [
+        result({ testId: 'network-pass', category: 'security/network', score: 1 }),
+        result({ testId: 'security-fail', category: 'security', score: 0 }),
+        result({ testId: 'quality-pass', category: 'quality/regression', score: 0.9 }),
+      ],
+      0.8,
+    );
+
+    const nodes = flattenCategoryTree(tree);
+    const security = nodes.find((node) => node.name === 'security');
+    const network = nodes.find((node) => node.name === 'security/network');
+
+    expect(tree.map((node) => node.name)).toEqual(['quality', 'security']);
+    expect(security).toMatchObject({
+      name: 'security',
+      label: 'security',
+      total: 2,
+      passed: 1,
+      failed: 1,
+      childCount: 1,
+    });
+    expect(network).toMatchObject({
+      name: 'security/network',
+      label: 'network',
+      parent: 'security',
+      depth: 1,
+      total: 1,
+      passed: 1,
+    });
+  });
+
+  it('preserves existing flat categories as one-node paths', () => {
+    const tree = buildCategoryTree(
+      [result({ testId: 'flat', category: 'Safety > PII', score: 0.5 })],
+      0.8,
+    );
+
+    expect(tree).toHaveLength(1);
+    expect(tree[0]).toMatchObject({
+      name: 'Safety > PII',
+      label: 'Safety > PII',
+      total: 1,
+      failed: 1,
+      children: [],
+    });
+  });
+
+  it('canonicalizes explicit slash category strings', () => {
+    expect(normalizeCategoryPath(' security / network ')).toBe('security/network');
+    expect(normalizeCategoryPath('security\\network')).toBe('security/network');
+  });
+});
diff --git a/apps/dashboard/src/lib/category-tree.ts b/apps/dashboard/src/lib/category-tree.ts
new file mode 100644
index 000000000..22ede49c4
--- /dev/null
+++ b/apps/dashboard/src/lib/category-tree.ts
@@ -0,0 +1,134 @@
+import { summarizeQuality } from './result-summary';
+import type { EvalResult } from './types';
+
+export const DEFAULT_CATEGORY = 'Uncategorized';
+
+export interface CategoryTreeNode {
+  name: string;
+  label: string;
+  parent?: string;
+  depth: number;
+  total: number;
+  passed: number;
+  failed: number;
+  executionErrors: number;
+  avgScore: number;
+  suiteCount: number;
+  childCount: number;
+  children: CategoryTreeNode[];
+}
+
+interface CategoryBucket {
+  results: EvalResult[];
+  suites: Set<string>;
+  children: Set<string>;
+}
+
+export function normalizeCategoryPath(category: string | undefined): string {
+  const normalized = category
+    ?.replace(/\\/g, '/')
+    .split('/')
+    .map((part) => part.trim())
+    .filter((part) => part.length > 0)
+    .join('/');
+  return normalized && normalized.length > 0 ? normalized : DEFAULT_CATEGORY;
+}
+
+export function buildCategoryTree(
+  results: readonly EvalResult[],
+  passThreshold: number,
+): CategoryTreeNode[] {
+  const buckets = new Map<string, CategoryBucket>();
+  const ensureBucket = (name: string): CategoryBucket => {
+    const existing = buckets.get(name);
+    if (existing) return existing;
+    const created = { results: [], suites: new Set<string>(), children: new Set<string>() };
+    buckets.set(name, created);
+    return created;
+  };
+
+  for (const result of results) {
+    const category = normalizeCategoryPath(result.category);
+    const suite = result.suite ?? 'Uncategorized';
+    const prefixes = categoryPrefixes(category);
+    for (const prefix of prefixes) {
+      const bucket = ensureBucket(prefix);
+      bucket.results.push(result);
+      bucket.suites.add(suite);
+    }
+    for (let index = 1; index < prefixes.length; index++) {
+      ensureBucket(prefixes[index - 1]).children.add(prefixes[index]);
+    }
+  }
+
+  const nodeByName = new Map(
+    [...buckets.entries()].map(([name, bucket]) => [
+      name,
+      summarizeCategoryBucket(name, bucket, passThreshold),
+    ]),
+  );
+
+  return [...nodeByName.values()]
+    .filter((node) => !node.parent)
+    .sort(compareCategoryNodes)
+    .map((node) => attachChildren(node, buckets, nodeByName));
+}
+
+export function flattenCategoryTree(nodes: readonly CategoryTreeNode[]): CategoryTreeNode[] {
+  return nodes.flatMap((node) => [node, ...flattenCategoryTree(node.children)]);
+}
+
+function categoryPrefixes(category: string): string[] {
+  const parts = category.split('/').filter((part) => part.length > 0);
+  if (parts.length === 0) return [DEFAULT_CATEGORY];
+  return parts.map((_, index) => parts.slice(0, index + 1).join('/'));
+}
+
+function categoryParent(category: string): string | undefined {
+  const parts = category.split('/');
+  return parts.length > 1 ? parts.slice(0, -1).join('/') : undefined;
+}
+
+function categoryLabel(category: string): string {
+  return category.split('/').at(-1) ?? category;
+}
+
+function summarizeCategoryBucket(
+  name: string,
+  bucket: CategoryBucket,
+  passThreshold: number,
+): CategoryTreeNode {
+  const summary = summarizeQuality(bucket.results, passThreshold);
+  const parent = categoryParent(name);
+  return {
+    name,
+    label: categoryLabel(name),
+    ...(parent && { parent }),
+    depth: name.split('/').filter(Boolean).length - 1,
+    total: summary.total,
+    passed: summary.passed,
+    failed: summary.failed,
+    executionErrors: summary.executionErrors,
+    avgScore: summary.avgScore,
+    suiteCount: bucket.suites.size,
+    childCount: bucket.children.size,
+    children: [],
+  };
+}
+
+function attachChildren(
+  node: CategoryTreeNode,
+  buckets: Map<string, CategoryBucket>,
+  nodeByName: Map<string, CategoryTreeNode>,
+): CategoryTreeNode {
+  const children = [...(buckets.get(node.name)?.children ?? [])]
+    .map((childName) => nodeByName.get(childName))
+    .filter((child): child is CategoryTreeNode => Boolean(child))
+    .sort(compareCategoryNodes)
+    .map((child) => attachChildren(child, buckets, nodeByName));
+  return { ...node, children };
+}
+
+function compareCategoryNodes(first: CategoryTreeNode, second: CategoryTreeNode): number {
+  return first.name.localeCompare(second.name);
+}
diff --git a/apps/dashboard/src/lib/score-distribution.test.ts b/apps/dashboard/src/lib/score-distribution.test.ts
index 6e45f91c6..bba65b3be 100644
--- a/apps/dashboard/src/lib/score-distribution.test.ts
+++ b/apps/dashboard/src/lib/score-distribution.test.ts
@@ -91,6 +91,67 @@ describe('buildScoreDistributionModel', () => {
     ]);
   });
 
+  it('treats parent category filters as descendant rollups from category metadata', () => {
+    const data = compareFixture();
+    if (data.runs) {
+      data.runs[0].tests = [
+        {
+          test_id: 'network',
+          category: 'security/network',
+          score: 0.45,
+          passed: false,
+        },
+        {
+          test_id: 'application',
+          category: 'security/application',
+          score: 0.85,
+          passed: true,
+        },
+      ];
+    }
+
+    const model = buildScoreDistributionModel(data, filters({ category: 'security' }), NOW);
+
+    expect(model.categoryOptions).toEqual(
+      expect.arrayContaining([
+        { value: 'security', label: 'security', count: 2 },
+        { value: 'security/application', label: 'security/application', count: 1 },
+        { value: 'security/network', label: 'security/network', count: 1 },
+      ]),
+    );
+    expect(model.filteredScores).toBe(2);
+  });
+
+  it('does not derive category metadata from eval paths', () => {
+    const data = {
+      experiments: ['exp-a'],
+      targets: ['gpt-4o'],
+      cells: [
+        {
+          experiment: 'exp-a',
+          target: 'gpt-4o',
+          eval_count: 1,
+          passed_count: 1,
+          pass_rate: 1,
+          avg_score: 1,
+          tests: [
+            {
+              test_id: 'path-only',
+              eval_path: 'security/network.eval.yaml',
+              score: 1,
+              passed: true,
+            },
+          ],
+        },
+      ],
+    } as unknown as CompareResponse;
+
+    const model = buildScoreDistributionModel(data, filters({ category: 'security' }), NOW);
+
+    expect(model.categoryAvailable).toBe(false);
+    expect(model.filteredScores).toBe(0);
+  });
+
   it('returns empty buckets when no scores match the selected slice', () => {
     const model = buildScoreDistributionModel(
       compareFixture(),
diff --git a/apps/dashboard/src/lib/score-distribution.ts b/apps/dashboard/src/lib/score-distribution.ts
index efd43db3d..dfa2fe1bc 100644
--- a/apps/dashboard/src/lib/score-distribution.ts
+++ b/apps/dashboard/src/lib/score-distribution.ts
@@ -8,6 +8,7 @@
  * metadata field is needed, then filter samples in `buildScoreDistributionModel`.
  */
 
+import { normalizeCategoryPath } from './category-tree';
 import type { CompareResponse, CompareRunEntry, CompareTestResult } from './types';
 
 export const ALL_DISTRIBUTION_FILTER_VALUE = '';
@@ -68,7 +69,9 @@ export function buildScoreDistributionModel(
 ): ScoreDistributionModel {
   const samples = collectScoreSamples(data);
   const experimentOptions = buildExperimentOptions(data, samples);
-  const categoryOptions = buildOptions(samples.flatMap((sample) => sample.category ?? []));
+  const categoryOptions = buildOptions(
+    samples.flatMap((sample) => (sample.category ? categoryPrefixes(sample.category) : [])),
+  );
   const categoryAvailable = categoryOptions.length > 0;
   const hasTimestampedScores = samples.some((sample) => sample.startedAtMs !== undefined);
   const activePeriod =
@@ -79,7 +82,7 @@ export function buildScoreDistributionModel(
 
   const filtered = samples.filter((sample) => {
     if (filters.experiment && sample.experiment !== filters.experiment) return false;
-    if (filters.category && sample.category !== filters.category) return false;
+    if (filters.category && !isCategoryDescendant(sample.category, filters.category)) return false;
     if (windowStartMs !== undefined) {
       return sample.startedAtMs !== undefined && sample.startedAtMs >= windowStartMs;
     }
@@ -174,7 +177,19 @@ function buildBuckets(scores: number[]): ScoreDistributionBucket[] {
 
 function normalizeCategory(value: string | undefined): string | undefined {
   const trimmed = value?.trim();
-  return trimmed ? trimmed : undefined;
+  return trimmed ? normalizeCategoryPath(trimmed) : undefined;
+}
+
+function categoryPrefixes(category: string): string[] {
+  const parts = category.split('/').filter((part) => part.length > 0);
+  return parts.map((_, index) => parts.slice(0, index + 1).join('/'));
+}
+
+function isCategoryDescendant(category: string | undefined, selectedCategory: string): boolean {
+  return (
+    category !== undefined &&
+    (category === selectedCategory || category.startsWith(`${selectedCategory}/`))
+  );
 }
 
 function parseTimestamp(value: string): number | undefined {
diff --git a/apps/dashboard/src/lib/types.ts b/apps/dashboard/src/lib/types.ts
index f928a3ea9..ea5084dea 100644
--- a/apps/dashboard/src/lib/types.ts
+++ b/apps/dashboard/src/lib/types.ts
@@ -502,16 +502,22 @@ export interface FileContentResponse {
 
 export interface CategorySummary {
   name: string;
+  label?: string;
+  parent?: string;
+  depth?: number;
   total: number;
   passed: number;
   failed: number;
   avg_score: number;
   execution_error_count?: number;
   suite_count: number;
+  child_count?: number;
+  children?: CategorySummary[];
 }
 
 export interface CategoriesResponse {
   categories: CategorySummary[];
+  category_tree?: CategorySummary[];
 }
 
 export interface StudioConfigResponse {
diff --git a/apps/web/src/content/docs/docs/evaluation/eval-files.mdx b/apps/web/src/content/docs/docs/evaluation/eval-files.mdx
index 5b91ebd1e..1ffdbb25e 100644
--- a/apps/web/src/content/docs/docs/evaluation/eval-files.mdx
+++ b/apps/web/src/content/docs/docs/evaluation/eval-files.mdx
@@ -109,6 +109,7 @@ tests:
 |-------|-------------|
 | `description` | Human-readable description of the evaluation |
 | `suite` | Optional suite identifier |
+| `category` | Optional slash-delimited analytics taxonomy path. Overrides the category derived from the eval file path. |
 | `experiment` | Runtime policy (`target`, `targets`, `workers`, `repeat`, `threshold`, `timeout_seconds`, `budget_usd`, etc.) |
 | `workspace` | Suite-level task environment — inline object or string path to an [external workspace file](/docs/guides/workspace-pool/#external-workspace-config). Repo entries declare identity and checkout pins; acquisition is covered in [Workspace Architecture](/docs/guides/workspace-architecture/#repo-provenance-vs-acquisition). |
 | `tests` | Array of individual tests, include entries, or a string path to an external file or directory. Tests and include entries may use scoped `run:` overrides for `threshold`, `repeat`, `timeout_seconds`, and `budget_usd`. |
@@ -154,6 +155,13 @@ tests:
     input: Screen "Acme Corp" against denied parties list
 ```
 
+When `category` is omitted, AgentV derives it from the eval file path. Generic
+filenames do not add a leaf: `security/eval.yaml` becomes `security`, and
+`security/network/dataset.eval.yaml` becomes `security/network`. A meaningful
+named eval file contributes a leaf, so `security/network.eval.yaml` becomes
+`security/network`. Existing flat category strings remain valid one-node
+category paths.
+
 ### Suite-level Assertions
 
 The `assertions` field is the canonical way to define suite-level graders. Suite-level assertions are appended to every test's graders unless a test sets `execution.skip_defaults: true`.
diff --git a/packages/core/src/evaluation/category.ts b/packages/core/src/evaluation/category.ts
index 7f4a39e5a..e09bfdde0 100644
--- a/packages/core/src/evaluation/category.ts
+++ b/packages/core/src/evaluation/category.ts
@@ -1,18 +1,52 @@
-/** Default category for eval files without subdirectory structure. */
+/** Default category for eval files without category taxonomy metadata. */
 export const DEFAULT_CATEGORY = 'Uncategorized';
 
+const GENERIC_EVAL_FILE_STEMS = new Set(['eval', 'dataset']);
+
 /**
- * Derive a human-readable category from an eval file's relative path.
+ * Canonicalize analytics category taxonomy paths.
  *
- * Strips the filename and any `evals` directory segments, then joins
- * remaining directories with `/`. Returns {@link DEFAULT_CATEGORY} for files
- * at the root level.
+ * Categories are slash-delimited analytics paths, not filesystem paths. Existing
+ * flat labels remain valid one-node paths, while repeated slash separators and
+ * surrounding whitespace are normalized for derived and explicit categories.
+ */
+export function normalizeCategoryPath(category: string | undefined): string {
+  const normalized = category
+    ?.replace(/\\/g, '/')
+    .split('/')
+    .map((part) => part.trim())
+    .filter((part) => part.length > 0)
+    .join('/');
+  return normalized && normalized.length > 0 ? normalized : DEFAULT_CATEGORY;
+}
+
+function evalFileStem(fileName: string): string {
+  return fileName.replace(/\.eval\.[^.]+$/i, '').replace(/\.[^.]+$/i, '');
+}
+
+/**
+ * Derive a canonical slash-delimited analytics category path from an eval file.
+ *
+ * Generic eval filenames such as `eval.yaml` and `dataset.eval.yaml` do not add
+ * a taxonomy leaf. Meaningful named eval files such as `network.eval.yaml` do
+ * contribute a leaf. Any `evals` directory segment is treated as organization
+ * only and is removed from the analytics taxonomy.
  */
 export function deriveCategory(relativePath: string): string {
-  const parts = relativePath.split(/[/\\]/);
-  if (parts.length <= 1) {
+  const parts = relativePath
+    .split(/[/\\]/)
+    .map((part) => part.trim())
+    .filter((part) => part.length > 0);
+  const fileName = parts.at(-1);
+  if (!fileName) {
     return DEFAULT_CATEGORY;
   }
-  const dirs = parts.slice(0, -1).filter((d) => d !== 'evals');
-  return dirs.length > 0 ? dirs.join('/') : DEFAULT_CATEGORY;
+
+  const taxonomyParts = parts.slice(0, -1).filter((part) => part !== 'evals');
+  const stem = evalFileStem(fileName).trim();
+  if (stem && !GENERIC_EVAL_FILE_STEMS.has(stem.toLowerCase())) {
+    taxonomyParts.push(stem);
+  }
+
+  return normalizeCategoryPath(taxonomyParts.join('/'));
 }
diff --git a/packages/core/src/evaluation/orchestrator.ts b/packages/core/src/evaluation/orchestrator.ts
index 3d49090e4..bce43a88a 100644
--- a/packages/core/src/evaluation/orchestrator.ts
+++ b/packages/core/src/evaluation/orchestrator.ts
@@ -693,6 +693,7 @@ export async function gradePreparedEvalCase(
     const baseResult = {
       timestamp: timestamp.toISOString(),
       testId: evalCase.id,
+      source: evalCase.source,
       suite: evalCase.suite,
       category: evalCase.category,
       conversationId: evalCase.conversation_id,
@@ -2558,6 +2559,7 @@ async function evaluateCandidate(options: {
   return {
     timestamp: completedAt.toISOString(),
     testId: evalCase.id,
+    source: evalCase.source,
     suite: evalCase.suite,
     category: evalCase.category,
     conversationId: evalCase.conversation_id,
diff --git a/packages/core/src/evaluation/run-artifacts.ts b/packages/core/src/evaluation/run-artifacts.ts
index 2be54ac7d..27dbd2312 100644
--- a/packages/core/src/evaluation/run-artifacts.ts
+++ b/packages/core/src/evaluation/run-artifacts.ts
@@ -8,7 +8,8 @@
  */
 
 import { createHash } from 'node:crypto';
-import { copyFile, mkdir, readFile, writeFile } from 'node:fs/promises';
+import { copyFile, mkdir, readFile, rm, rmdir, writeFile } from 'node:fs/promises';
+import { tmpdir } from 'node:os';
 import path from 'node:path';
 
 import {
@@ -88,8 +89,51 @@ export interface RunRuntimeSourceMetadata {
   readonly source_eval_files?: readonly string[];
 }
 
-export function buildTestTargetKey(testId?: string, target?: string): string {
-  return `${testId ?? 'unknown'}::${target ?? 'unknown'}`;
+export function buildTestTargetKey(testId?: string, target?: string, variant?: string): string {
+  return `${testId ?? 'unknown'}::${target ?? 'unknown'}::${variant ?? ''}`;
+}
+
+function stringField(record: Record<string, unknown> | undefined, key: string): string | undefined {
+  const value = record?.[key];
+  return typeof value === 'string' && value.trim().length > 0 ? value : undefined;
+}
+
+function resultProjectionDimensions(result: EvaluationResult): Record<string, unknown> | undefined {
+  const projectionIdentity = (result as unknown as Record<string, unknown>).projectionIdentity;
+  if (!isRecord(projectionIdentity)) {
+    return undefined;
+  }
+  const dimensions = projectionIdentity.dimensions;
+  return isRecord(dimensions) ? dimensions : undefined;
+}
+
+export function buildEvaluationResultTargetKey(result: EvaluationResult): string {
+  const dimensions = resultProjectionDimensions(result);
+  return JSON.stringify({
+    eval_path:
+      stringField(dimensions, 'evalPath') ??
+      sourceEvalPath(result, undefined) ??
+      stringField(result as unknown as Record<string, unknown>, 'evalPath') ??
+      null,
+    suite: stringField(dimensions, 'suite') ?? getSuite(result) ?? null,
+    test_id: stringField(dimensions, 'testId') ?? result.testId ?? 'unknown',
+    target: stringField(dimensions, 'target') ?? result.target ?? 'unknown',
+    variant: stringField(dimensions, 'variant') ?? result.variant ?? null,
+  });
+}
+
+export function buildEvalTestTargetKey(
+  test: Pick<EvalTest, 'id' | 'suite' | 'source'>,
+  target?: string,
+  variant?: string,
+): string {
+  return JSON.stringify({
+    eval_path: evalSourcePath(test.source) ?? null,
+    suite: test.suite ?? null,
+    test_id: test.id ?? 'unknown',
+    target: target ?? 'unknown',
+    variant: variant ?? null,
+  });
 }
 
 export function deduplicateByTestIdTarget(
@@ -97,11 +141,11 @@ export function deduplicateByTestIdTarget(
 ): EvaluationResult[] {
   const seen = new Map<string, number>();
   for (let i = 0; i < results.length; i++) {
-    seen.set(buildTestTargetKey(results[i].testId, results[i].target), i);
+    seen.set(buildEvaluationResultTargetKey(results[i]), i);
   }
   const deduped: EvaluationResult[] = [];
   for (let i = 0; i < results.length; i++) {
-    const key = buildTestTargetKey(results[i].testId, results[i].target);
+    const key = buildEvaluationResultTargetKey(results[i]);
     if (seen.get(key) === i) {
       deduped.push(results[i]);
     }
@@ -300,6 +344,7 @@ export interface RunSummaryArtifact {
     readonly eval_file: string;
     readonly timestamp: string;
     readonly targets: readonly string[];
+    readonly variants?: readonly string[];
     readonly tests_run: readonly string[];
     readonly experiment?: string;
     readonly experiment_config?: ExperimentArtifactMetadata;
@@ -345,6 +390,7 @@ export interface IndexArtifactEntry {
   readonly experiment?: string;
   readonly score: number;
   readonly target: string;
+  readonly variant?: string;
   readonly token_usage?: EvaluationResult['tokenUsage'];
   readonly cost_usd?: number;
   readonly duration_ms?: number;
@@ -889,7 +935,6 @@ async function writeTrialRunArtifacts(params: {
   const envelope = buildTraceEnvelopeSidecar({
     result,
     outputDir: params.outputDir,
-    testDir: runDir,
     evalPath: resolveEnvelopeEvalPath(result, params.testByTestId, params.evalFile),
     experiment: params.experiment,
     runId: attemptRunId,
@@ -1178,13 +1223,18 @@ export function buildRunSummaryArtifact(
   runtimeSource?: RunRuntimeSourceMetadata,
 ): RunSummaryArtifact {
   const targetSet = new Set<string>();
+  const variantSet = new Set<string>();
   const testIdSet = new Set<string>();
   for (const result of results) {
     targetSet.add(result.target ?? 'unknown');
+    if (result.variant) {
+      variantSet.add(result.variant);
+    }
     testIdSet.add(result.testId ?? 'unknown');
   }
 
   const targets = [...targetSet].sort();
+  const variants = [...variantSet].sort();
   const testIds = [...testIdSet].sort();
 
   const runSummary: RunSummaryArtifact['run_summary'] = {};
@@ -1264,6 +1314,7 @@ export function buildRunSummaryArtifact(
       eval_file: evalFile,
       timestamp,
       targets,
+      variants: variants.length > 0 ? variants : undefined,
       tests_run: testIds,
       experiment,
       experiment_config: experimentMetadata,
@@ -1344,25 +1395,62 @@ function safeTestId(testId: string | undefined): string {
   return safeArtifactPathSegment(testId, 'unknown');
 }
 
+const ROW_ID_PREFIX_MAX_LENGTH = 64;
+const ROW_ID_HASH_LENGTH = 12;
+
 function getSuite(result: EvaluationResult): string | undefined {
   return result.suite;
 }
 
+function evalSourcePath(source: EvalTest['source'] | undefined): string | undefined {
+  return source?.evalFileRepoPath ?? source?.evalFilePath;
+}
+
+function sourceEvalPath(
+  result: EvaluationResult,
+  sourceTest: EvalTest | undefined,
+): string | undefined {
+  return evalSourcePath(result.source) ?? evalSourcePath(sourceTest?.source);
+}
+
+function compactRowIdPrefix(testId: string | undefined): string {
+  const safe = safeTestId(testId);
+  return safe.length > ROW_ID_PREFIX_MAX_LENGTH ? safe.slice(0, ROW_ID_PREFIX_MAX_LENGTH) : safe;
+}
+
+function buildRowArtifactHashInput(
+  result: EvaluationResult,
+  sourceTest?: EvalTest,
+  projectionIdentity?: ProjectionIdentity,
+): {
+  readonly eval_path: string | null;
+  readonly suite: string | null;
+  readonly test_id: string;
+  readonly target: string;
+  readonly variant: string | null;
+} {
+  const dimensions = projectionIdentity?.dimensions;
+  return {
+    eval_path: dimensions?.evalPath ?? sourceEvalPath(result, sourceTest) ?? null,
+    suite: dimensions?.suite ?? getSuite(result) ?? null,
+    test_id: dimensions?.testId ?? result.testId ?? 'unknown',
+    target: dimensions?.target ?? result.target ?? 'unknown',
+    variant: dimensions?.variant ?? result.variant ?? null,
+  };
+}
+
 function buildArtifactSubdir(
   result: EvaluationResult,
-  resultGroup?: string,
+  _resultGroup?: string,
   sourceTest?: EvalTest,
+  projectionIdentity?: ProjectionIdentity,
 ): string {
-  const segments = [];
-  const evalSet = getSuite(result);
-  const importedSuiteName = sourceTest?.source?.importedSuiteName;
-  if (importedSuiteName !== undefined) {
-    segments.push(safeArtifactPathSegment(importedSuiteName, 'default'));
-  } else if (evalSet && evalSet !== resultGroup) {
-    segments.push(safeArtifactPathSegment(evalSet, 'default'));
-  }
-  segments.push(safeTestId(result.testId));
-  return path.posix.join(...segments);
+  const hashInput = buildRowArtifactHashInput(result, sourceTest, projectionIdentity);
+  const digest = createHash('sha256')
+    .update(JSON.stringify(hashInput))
+    .digest('hex')
+    .slice(0, ROW_ID_HASH_LENGTH);
+  return `${compactRowIdPrefix(hashInput.test_id)}--${digest}`;
 }
 
 function toRelativeArtifactPath(outputDir: string, filePath: string): string {
@@ -1374,6 +1462,13 @@ function findResultSourceTest(
   testByTestId: ReadonlyMap<string, EvalTest>,
 ): EvalTest | undefined {
   const testId = result.testId ?? 'unknown';
+  const resultSourcePath = evalSourcePath(result.source);
+  if (resultSourcePath) {
+    const sourceMatch = testByTestId.get(sourceTestLookupKey(`source:${resultSourcePath}`, testId));
+    if (sourceMatch) {
+      return sourceMatch;
+    }
+  }
   const suite = getSuite(result);
   if (suite) {
     const suiteMatch = testByTestId.get(sourceTestLookupKey(suite, testId));
@@ -1397,6 +1492,10 @@ function buildSourceTestLookup(
     if (test.suite) {
       lookup.set(sourceTestLookupKey(test.suite, test.id), test);
     }
+    const sourcePath = evalSourcePath(test.source);
+    if (sourcePath) {
+      lookup.set(sourceTestLookupKey(`source:${sourcePath}`, test.id), test);
+    }
     if (!lookup.has(test.id)) {
       lookup.set(test.id, test);
     }
@@ -1422,10 +1521,38 @@ function rawProviderLogSourcePath(result: EvaluationResult): string | undefined
   return sourcePath ? sourcePath : undefined;
 }
 
+function providerStagingRoot(): string {
+  return path.resolve(tmpdir(), 'agentv-provider-streams');
+}
+
+function isAgentvProviderStagingPath(filePath: string): boolean {
+  const root = providerStagingRoot();
+  const resolved = path.resolve(filePath);
+  return resolved.startsWith(`${root}${path.sep}`);
+}
+
+async function cleanupProviderStagingFile(filePath: string): Promise<void> {
+  if (!isAgentvProviderStagingPath(filePath)) {
+    return;
+  }
+
+  await rm(filePath, { force: true });
+
+  const root = providerStagingRoot();
+  let current = path.dirname(path.resolve(filePath));
+  while (current !== root && current.startsWith(`${root}${path.sep}`)) {
+    try {
+      await rmdir(current);
+    } catch {
+      break;
+    }
+    current = path.dirname(current);
+  }
+}
+
 interface TraceEnvelopeSidecarParams {
   readonly result: EvaluationResult;
   readonly outputDir: string;
-  readonly testDir: string;
   readonly evalPath?: string;
   readonly experiment?: string;
   readonly runId?: string;
@@ -1438,6 +1565,7 @@ function buildTraceEnvelopeSidecar(params: TraceEnvelopeSidecarParams): TraceEnv
     evalPath: params.evalPath,
     runId: params.runId ?? path.basename(params.outputDir),
     experiment: params.experiment,
+    variant: params.result.variant,
     source: { path: RESULT_INDEX_FILENAME },
     capture: { content: 'full', redactionLevel: 'none', redactedFields: [] },
     artifacts: {
@@ -1478,6 +1606,7 @@ export function buildIndexArtifactEntry(
     conversation_id: result.conversationId,
     score: result.score,
     target: result.target ?? 'unknown',
+    variant: result.variant,
     token_usage: result.tokenUsage,
     cost_usd: result.costUsd,
     duration_ms: result.durationMs,
@@ -1543,7 +1672,12 @@ export function buildResultIndexArtifact(
     runtimeSource?: RunRuntimeSourceMetadata;
   },
 ): ResultIndexArtifact {
-  const artifactSubdir = buildArtifactSubdir(result);
+  const artifactSubdir = buildArtifactSubdir(
+    result,
+    undefined,
+    undefined,
+    options?.projectionIdentity,
+  );
   const hasAnswer = result.output.length > 0;
   const hasTranscript = resultHasExecutionTraceTranscript(result);
   const isSingleRun = !hasPersistedTrialRuns(result);
@@ -1557,6 +1691,7 @@ export function buildResultIndexArtifact(
     conversation_id: result.conversationId,
     score: result.score,
     target: result.target ?? 'unknown',
+    variant: result.variant,
     token_usage: result.tokenUsage,
     cost_usd: result.costUsd,
     duration_ms: result.durationMs,
@@ -1643,6 +1778,7 @@ async function writeRawTranscriptJsonl(
   const rawSource = rawProviderLogSourcePath(result);
   if (rawSource) {
     await copyFile(rawSource, filePath);
+    await cleanupProviderStagingFile(rawSource).catch(() => undefined);
     return;
   }
   await writeGeneratedRawTranscriptJsonl(filePath, result, envelope);
@@ -1694,7 +1830,12 @@ function indexRecordKey(record: unknown): string | undefined {
         ? record.testId
         : undefined;
   const target = typeof record.target === 'string' ? record.target : undefined;
-  return testId ? buildTestTargetKey(testId, target) : undefined;
+  const variant = typeof record.variant === 'string' ? record.variant : undefined;
+  return testId ? buildTestTargetKey(testId, target, variant) : undefined;
+}
+
+function indexRecordReplacementKey(record: unknown): string | undefined {
+  return projectionIdentityRecordKey(record) ?? indexRecordKey(record);
 }
 
 function projectionIdentityRecordKey(record: unknown): string | undefined {
@@ -1780,7 +1921,10 @@ async function rewriteExistingIndexRecords(
   }
 
   const replacementsByKey = new Map(
-    replacements.map((record) => [buildTestTargetKey(record.test_id, record.target), record]),
+    replacements.flatMap((record) => {
+      const key = indexRecordReplacementKey(record);
+      return key ? [[key, record] as const] : [];
+    }),
   );
   const seen = new Set<string>();
   const records: unknown[] = [];
@@ -1790,7 +1934,7 @@ async function rewriteExistingIndexRecords(
     }
     try {
       const parsed = JSON.parse(line) as unknown;
-      const key = indexRecordKey(parsed);
+      const key = indexRecordReplacementKey(parsed);
       const replacement = key ? replacementsByKey.get(key) : undefined;
       if (key && replacement) {
         records.push(replacement);
@@ -1802,8 +1946,8 @@ async function rewriteExistingIndexRecords(
   }
 
   for (const replacement of replacements) {
-    const key = buildTestTargetKey(replacement.test_id, replacement.target);
-    if (!seen.has(key)) {
+    const key = indexRecordReplacementKey(replacement);
+    if (!key || !seen.has(key)) {
       records.push(replacement);
     }
   }
@@ -2011,14 +2155,11 @@ export async function writePerTestArtifacts(
 
   for (const result of results) {
     const sourceTest = findResultSourceTest(result, testByTestId);
-    const artifactSubdir = buildArtifactSubdir(result, options?.resultGroup, sourceTest);
-    const testDir = path.join(outputDir, artifactSubdir);
-    await mkdir(testDir, { recursive: true });
+    const evalPath = resolveEnvelopeEvalPath(result, testByTestId, options?.evalFile);
     const envelope = buildTraceEnvelopeSidecar({
       result,
       outputDir,
-      testDir,
-      evalPath: resolveEnvelopeEvalPath(result, testByTestId, options?.evalFile),
+      evalPath,
       experiment: options?.experiment,
       runId: options?.runId,
       duplicatePolicy,
@@ -2027,6 +2168,14 @@ export async function writePerTestArtifacts(
     if (!projectionIdentity) {
       throw new Error(`Result ${result.testId ?? 'unknown'} is missing projection identity`);
     }
+    const artifactSubdir = buildArtifactSubdir(
+      result,
+      options?.resultGroup,
+      sourceTest,
+      projectionIdentity,
+    );
+    const testDir = path.join(outputDir, artifactSubdir);
+    await mkdir(testDir, { recursive: true });
     const caseSummaryPath = path.join(testDir, RUN_SUMMARY_FILENAME);
     const aggregateTiming = buildRepeatAggregateTimingArtifact(result);
     const summary = buildRepeatCaseSummaryArtifact(result, aggregateTiming, projectionIdentity.id);
@@ -2130,14 +2279,11 @@ export async function writeArtifactsFromResults(
 
   const plans = results.map((result) => {
     const sourceTest = findResultSourceTest(result, testByTestId);
-    const artifactSubdir = buildArtifactSubdir(result, options?.resultGroup, sourceTest);
-    const testDir = path.join(outputDir, artifactSubdir);
-    const caseSummaryPath = path.join(testDir, RUN_SUMMARY_FILENAME);
+    const evalPath = resolveEnvelopeEvalPath(result, testByTestId, options?.evalFile);
     const envelope = buildTraceEnvelopeSidecar({
       result,
       outputDir,
-      testDir,
-      evalPath: resolveEnvelopeEvalPath(result, testByTestId, options?.evalFile),
+      evalPath,
       experiment: options?.experiment,
       runId: options?.runId,
       duplicatePolicy,
@@ -2146,6 +2292,14 @@ export async function writeArtifactsFromResults(
     if (!projectionIdentity) {
       throw new Error(`Result ${result.testId ?? 'unknown'} is missing projection identity`);
     }
+    const artifactSubdir = buildArtifactSubdir(
+      result,
+      options?.resultGroup,
+      sourceTest,
+      projectionIdentity,
+    );
+    const testDir = path.join(outputDir, artifactSubdir);
+    const caseSummaryPath = path.join(testDir, RUN_SUMMARY_FILENAME);
     const identityId = projectionIdentity.id;
     const isSingleRun = !hasPersistedTrialRuns(result);
     const singleRunDir = path.join(testDir, trialRunDirName(0));
diff --git a/packages/core/src/evaluation/types.ts b/packages/core/src/evaluation/types.ts
index 7d93c1762..647871c25 100644
--- a/packages/core/src/evaluation/types.ts
+++ b/packages/core/src/evaluation/types.ts
@@ -1180,12 +1180,15 @@ export type FailOnError = boolean;
 export interface EvaluationResult {
   readonly timestamp: string;
   readonly testId: string;
+  readonly source?: EvalTestSource;
   readonly suite?: string;
   readonly category?: string;
   readonly conversationId?: string;
   readonly score: number;
   readonly assertions: readonly AssertionEntry[];
   readonly target: string;
+  /** Optional explicit comparable variant. Path segments are not authoritative for this value. */
+  readonly variant?: string;
   /**
    * The target that actually served the response, when different from the
    * primary target. Present only when a fallback target was used.
diff --git a/packages/core/src/evaluation/yaml-parser.ts b/packages/core/src/evaluation/yaml-parser.ts
index df047d03e..4d3faf794 100644
--- a/packages/core/src/evaluation/yaml-parser.ts
+++ b/packages/core/src/evaluation/yaml-parser.ts
@@ -4,6 +4,7 @@ import fg from 'fast-glob';
 import micromatch from 'micromatch';
 import { stringify as stringifyYaml } from 'yaml';
 
+import { normalizeCategoryPath } from './category.js';
 import {
   type ExperimentConfig,
   normalizeExperimentConfig,
@@ -789,10 +790,12 @@ async function loadTestsFromParsedYamlValue(
         ? (renderedCase.window_size as number)
         : undefined;
 
+    const category = normalizeCategoryPath(suite.category ?? options?.category);
+
     const testCase: EvalTest = {
       id,
       suite: suiteName,
-      category: suite.category ?? options?.category,
+      category,
       conversation_id: conversationId,
       question: question,
       input: inputMessages,
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index 00b441734..072bbded2 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -63,6 +63,8 @@ export {
   aggregateRunDir,
   buildAggregateGradingArtifact,
   buildRunSummaryArtifact,
+  buildEvalTestTargetKey,
+  buildEvaluationResultTargetKey,
   buildGradingArtifact,
   buildIndexArtifactEntry,
   buildResultIndexArtifact,
@@ -186,7 +188,7 @@ export {
 } from './projects.js';
 export { syncProject, syncProjects } from './project-sync.js';
 export { trimBaselineResult } from './evaluation/baseline.js';
-export { DEFAULT_CATEGORY, deriveCategory } from './evaluation/category.js';
+export { DEFAULT_CATEGORY, deriveCategory, normalizeCategoryPath } from './evaluation/category.js';
 export * from './observability/index.js';
 
 // Registry exports
diff --git a/packages/core/test/evaluation/category.test.ts b/packages/core/test/evaluation/category.test.ts
index 9b8c62d01..3dc3f70f9 100644
--- a/packages/core/test/evaluation/category.test.ts
+++ b/packages/core/test/evaluation/category.test.ts
@@ -1,27 +1,35 @@
 import { describe, expect, test } from 'bun:test';
 
-import { DEFAULT_CATEGORY, deriveCategory } from '../../src/evaluation/category.js';
+import {
+  DEFAULT_CATEGORY,
+  deriveCategory,
+  normalizeCategoryPath,
+} from '../../src/evaluation/category.js';
 
 describe('deriveCategory', () => {
   test('returns Uncategorized for single-segment path (root-level file)', () => {
     expect(deriveCategory('dataset.eval.yaml')).toBe(DEFAULT_CATEGORY);
   });
 
+  test('uses a meaningful root-level eval filename as a one-node category path', () => {
+    expect(deriveCategory('network.eval.yaml')).toBe('network');
+  });
+
   test('returns Uncategorized when only directory is evals', () => {
     expect(deriveCategory('evals/dataset.eval.yaml')).toBe(DEFAULT_CATEGORY);
   });
 
-  test('strips evals segment and returns remaining directory', () => {
-    expect(deriveCategory('evals/fundamentals/greetings.eval.yaml')).toBe('fundamentals');
+  test('strips evals segment and appends meaningful named eval files as a leaf', () => {
+    expect(deriveCategory('evals/fundamentals/greetings.eval.yaml')).toBe('fundamentals/greetings');
   });
 
-  test('preserves nested directory paths', () => {
+  test('does not append generic eval filenames to nested directory paths', () => {
     expect(deriveCategory('evals/cargowise-customs/layout-engine/eval.yaml')).toBe(
       'cargowise-customs/layout-engine',
     );
   });
 
-  test('handles paths without evals segment', () => {
+  test('handles generic filenames without evals segment', () => {
     expect(deriveCategory('examples/showcase/eval.yaml')).toBe('examples/showcase');
   });
 
@@ -38,4 +46,27 @@ describe('deriveCategory', () => {
   test('returns Uncategorized for just a filename with no directory', () => {
     expect(deriveCategory('eval.yaml')).toBe(DEFAULT_CATEGORY);
   });
+
+  test('matches the hierarchical category derivation contract', () => {
+    expect(deriveCategory('security/eval.yaml')).toBe('security');
+    expect(deriveCategory('security/network.eval.yaml')).toBe('security/network');
+    expect(deriveCategory('security/network/dataset.eval.yaml')).toBe('security/network');
+  });
+});
+
+describe('normalizeCategoryPath', () => {
+  test('canonicalizes explicit slash-delimited taxonomy paths', () => {
+    expect(normalizeCategoryPath(' security / network ')).toBe('security/network');
+    expect(normalizeCategoryPath('security//network')).toBe('security/network');
+    expect(normalizeCategoryPath('security\\network')).toBe('security/network');
+  });
+
+  test('preserves existing flat category strings as one-node paths', () => {
+    expect(normalizeCategoryPath('Safety > PII')).toBe('Safety > PII');
+  });
+
+  test('returns Uncategorized for empty explicit categories', () => {
+    expect(normalizeCategoryPath('  /  ')).toBe(DEFAULT_CATEGORY);
+    expect(normalizeCategoryPath(undefined)).toBe(DEFAULT_CATEGORY);
+  });
 });
diff --git a/packages/core/test/evaluation/evaluate-programmatic-api.test.ts b/packages/core/test/evaluation/evaluate-programmatic-api.test.ts
index cba36f5d2..97be77621 100644
--- a/packages/core/test/evaluation/evaluate-programmatic-api.test.ts
+++ b/packages/core/test/evaluation/evaluate-programmatic-api.test.ts
@@ -141,6 +141,7 @@ describe('evaluate() — programmatic API extensions', () => {
           .trim()
           .split('\n')
           .map((line) => JSON.parse(line) as { result_dir?: string });
+        const resultDir = indexRow?.result_dir;
 
         const summaryArtifact = JSON.parse(
           await readFile(path.join(outputDir, 'summary.json'), 'utf8'),
@@ -153,29 +154,12 @@ describe('evaluate() — programmatic API extensions', () => {
         expect(summaryArtifact.metadata.eval_file).toBe('');
         expect(summaryArtifact.timing.duration_ms).toBeGreaterThanOrEqual(0);
 
-        expect(indexRow?.result_dir).toBe('__programmatic__.yaml/programmatic-artifacts');
+        expect(resultDir).toMatch(/^programmatic-artifacts--[a-f0-9]{12}$/);
+        expect(existsSync(path.join(outputDir, resultDir ?? '', 'run-1', 'grading.json'))).toBe(
+          true,
+        );
         expect(
-          existsSync(
-            path.join(
-              outputDir,
-              '__programmatic__.yaml',
-              'programmatic-artifacts',
-              'run-1',
-              'grading.json',
-            ),
-          ),
-        ).toBe(true);
-        expect(
-          existsSync(
-            path.join(
-              outputDir,
-              '__programmatic__.yaml',
-              'programmatic-artifacts',
-              'run-1',
-              'outputs',
-              'answer.md',
-            ),
-          ),
+          existsSync(path.join(outputDir, resultDir ?? '', 'run-1', 'outputs', 'answer.md')),
         ).toBe(true);
       } finally {
         rmSync(outputDir, { recursive: true, force: true });
diff --git a/packages/core/test/evaluation/orchestrator.test.ts b/packages/core/test/evaluation/orchestrator.test.ts
index 3e7e81e7c..9ddaa1c05 100644
--- a/packages/core/test/evaluation/orchestrator.test.ts
+++ b/packages/core/test/evaluation/orchestrator.test.ts
@@ -1,5 +1,12 @@
 import { afterEach, describe, expect, it, mock } from 'bun:test';
-import { mkdtempSync, readFileSync, readdirSync, writeFileSync } from 'node:fs';
+import {
+  existsSync,
+  mkdirSync,
+  mkdtempSync,
+  readFileSync,
+  readdirSync,
+  writeFileSync,
+} from 'node:fs';
 import { tmpdir } from 'node:os';
 import path from 'node:path';
 
@@ -723,9 +730,13 @@ console.log('spreadsheet: revenue,total\\nQ1,42');`,
     expect(result.failureReasonCode).toBe('provider_error');
   });
 
-  it('stores raw provider logs once as transcript-raw evidence', async () => {
-    const tempDir = mkdtempSync(path.join(tmpdir(), 'agentv-raw-provider-log-'));
-    const rawLogPath = path.join(tempDir, 'provider-native-session.jsonl');
+  it('stores raw provider logs once as transcript-raw evidence and cleans staging files', async () => {
+    const stagingRoot = path.join(tmpdir(), 'agentv-provider-streams');
+    mkdirSync(stagingRoot, { recursive: true });
+    const tempDir = mkdtempSync(path.join(stagingRoot, 'raw-provider-log-'));
+    const rawLogDir = path.join(tempDir, 'suite', 'case-1', 'logs', 'codex');
+    mkdirSync(rawLogDir, { recursive: true });
+    const rawLogPath = path.join(rawLogDir, 'provider-native-session.jsonl');
     writeFileSync(rawLogPath, '{"event":"provider-native"}\n', 'utf8');
 
     const provider = new SequenceProvider('mock', {
@@ -749,8 +760,13 @@ console.log('spreadsheet: revenue,total\\nQ1,42');`,
     const outputDir = path.join(tempDir, 'artifacts');
     await writeArtifactsFromResults([result], outputDir);
 
-    const artifactDir = path.join(outputDir, 'test-dataset', 'case-1');
-    const runDir = path.join(artifactDir, 'run-1');
+    const indexRows = readFileSync(path.join(outputDir, 'index.jsonl'), 'utf8')
+      .trim()
+      .split('\n')
+      .map((line) => JSON.parse(line) as Record<string, string | undefined>);
+    const resultDir = indexRows[0]?.result_dir;
+    expect(resultDir).toMatch(/^case-1--[a-f0-9]{12}$/);
+    const runDir = path.join(outputDir, resultDir ?? '', 'run-1');
     const outputsDir = path.join(runDir, 'outputs');
     expect(readdirSync(runDir)).not.toContain('provider.log');
     expect(readdirSync(runDir)).toContain('transcript-raw.jsonl');
@@ -761,16 +777,11 @@ console.log('spreadsheet: revenue,total\\nQ1,42');`,
     expect(readdirSync(outputsDir)).not.toContain('transcript.jsonl');
     expect(readdirSync(outputsDir)).not.toContain('transcript.json');
 
-    const indexRows = readFileSync(path.join(outputDir, 'index.jsonl'), 'utf8')
-      .trim()
-      .split('\n')
-      .map((line) => JSON.parse(line) as Record<string, unknown>);
     expect(indexRows[0]?.raw_provider_log_path).toBeUndefined();
     expect(indexRows[0]?.trace_path).toBeUndefined();
-    expect(indexRows[0]?.transcript_path).toBe('test-dataset/case-1/run-1/transcript.jsonl');
-    expect(indexRows[0]?.transcript_raw_path).toBe(
-      'test-dataset/case-1/run-1/transcript-raw.jsonl',
-    );
+    expect(indexRows[0]?.transcript_path).toBe(`${resultDir}/run-1/transcript.jsonl`);
+    expect(indexRows[0]?.transcript_raw_path).toBe(`${resultDir}/run-1/transcript-raw.jsonl`);
+    expect(existsSync(rawLogPath)).toBe(false);
   });
 
   it('reports failed progress status for batch item errors', async () => {
diff --git a/packages/core/test/evaluation/yaml-parser-metadata.test.ts b/packages/core/test/evaluation/yaml-parser-metadata.test.ts
index e9653f61d..772aeed95 100644
--- a/packages/core/test/evaluation/yaml-parser-metadata.test.ts
+++ b/packages/core/test/evaluation/yaml-parser-metadata.test.ts
@@ -74,6 +74,19 @@ tests:
     expect(suite.metadata).toBeUndefined();
   });
 
+  it('uses explicit YAML category as a canonical taxonomy path override', async () => {
+    const { filePath, dir } = createTempYaml(`
+category: " security / network "
+tests:
+  - id: test-1
+    input: "Hello"
+    criteria: "Greet"
+`);
+
+    const suite = await loadTestSuite(filePath, dir, { category: 'derived/path' });
+    expect(suite.tests[0].category).toBe('security/network');
+  });
+
   it('still loads tests correctly when metadata is present', async () => {
     const { filePath, dir } = createTempYaml(`
 name: my-eval