diff --git a/CONCEPTS.md b/CONCEPTS.md index 3014bf601..1c66490ae 100644 --- a/CONCEPTS.md +++ b/CONCEPTS.md @@ -16,7 +16,7 @@ Shared domain vocabulary for this project — entities, named processes, and sta **Experiment** — A committed run variant that selects how evals are executed: target or target matrix, setup, scripts, eval filters, repeat counts, timeouts, workers, budgets, and related run knobs. Experiments make A/B setup differences explicit while pointing at stable eval tasks. -**Run manifest** — The root `index.jsonl` file in a run bundle. It is the dashboard and tooling loading contract for per-case result rows and artifact locations, including fields such as `artifact_dir`, `task_dir`, `summary_path`, and `grading_path`. +**Run manifest** — The root `index.jsonl` file in a run bundle. It is the dashboard and tooling loading contract for per-case result rows and artifact locations, including fields such as `result_dir`, `task_dir`, `summary_path`, and `grading_path`. **Artifact sidecar** — A file beside or below a test-case artifact directory that provides evidence for a result, such as `summary.json`, `grading.json`, `result.json`, transcripts, logs, or outputs. Sidecars are evidence, not the primary discovery mechanism for a run. diff --git a/apps/cli/src/commands/eval/artifact-writer.ts b/apps/cli/src/commands/eval/artifact-writer.ts index 23a1dfbef..b9e8b4627 100644 --- a/apps/cli/src/commands/eval/artifact-writer.ts +++ b/apps/cli/src/commands/eval/artifact-writer.ts @@ -91,7 +91,7 @@ export function buildIndexArtifactEntry( result: EvaluationResult, options: { outputDir: string; - artifactDir?: string; + resultDir?: string; gradingPath?: string; timingPath?: string; summaryPath?: string; @@ -115,7 +115,7 @@ export function buildResultIndexArtifact( result: EvaluationResult, taskBundle?: MaterializedTaskBundlePaths, ): ResultIndexArtifact { - const artifactSubdir = (buildCoreResultIndexArtifact(result).artifact_dir ?? '').trim(); + const artifactSubdir = (buildCoreResultIndexArtifact(result).result_dir ?? '').trim(); const extraIndexFields = taskBundle ? { task_dir: path.posix.join(artifactSubdir, 'task'), diff --git a/apps/cli/src/commands/results/combine-run.ts b/apps/cli/src/commands/results/combine-run.ts index fde67f12f..ccced82b1 100644 --- a/apps/cli/src/commands/results/combine-run.ts +++ b/apps/cli/src/commands/results/combine-run.ts @@ -361,7 +361,7 @@ function resolveCombinedExperiment( } const MANIFEST_PATH_FIELDS = [ - 'artifact_dir', + 'result_dir', 'summary_path', 'grading_path', 'timing_path', diff --git a/apps/cli/src/commands/results/manifest.ts b/apps/cli/src/commands/results/manifest.ts index 4679405f0..dcbc980c7 100644 --- a/apps/cli/src/commands/results/manifest.ts +++ b/apps/cli/src/commands/results/manifest.ts @@ -62,7 +62,7 @@ export interface ResultManifestRecord { readonly artifact_pointers?: ResultArtifactPointersWire; readonly external_trace?: ExternalTraceMetadataWire; readonly response_path?: string; - readonly artifact_dir?: string; + readonly result_dir?: string; readonly task_dir?: string; readonly eval_path?: string; readonly targets_path?: string; diff --git a/apps/cli/src/commands/results/projection-bundle.ts b/apps/cli/src/commands/results/projection-bundle.ts index 56fe98a5e..4a2c58fd1 100644 --- a/apps/cli/src/commands/results/projection-bundle.ts +++ b/apps/cli/src/commands/results/projection-bundle.ts @@ -85,7 +85,7 @@ export interface ProjectionBundleEntry { export type ProjectionBundleArtifactRefs = Partial< Pick< IndexArtifactEntry, - | 'artifact_dir' + | 'result_dir' | 'summary_path' | 'grading_path' | 'timing_path' @@ -164,7 +164,7 @@ function artifactRefs( return dropUndefined({ ...metadataRefs, - artifact_dir: indexEntry.artifact_dir, + result_dir: indexEntry.result_dir, summary_path: indexEntry.summary_path, grading_path: indexEntry.grading_path, input_path: indexEntry.input_path, diff --git a/apps/cli/src/commands/results/serve.ts b/apps/cli/src/commands/results/serve.ts index 0e7447208..445421c4f 100644 --- a/apps/cli/src/commands/results/serve.ts +++ b/apps/cli/src/commands/results/serve.ts @@ -644,14 +644,14 @@ function normalizeArtifactRelativePath(relativePath: string): string | undefined return segments.join('/'); } -function requestedArtifactDir(c: C): { value?: string; error?: string } { - const raw = c.req.query('artifact_dir')?.trim(); +function requestedResultDir(c: C): { value?: string; error?: string } { + const raw = c.req.query('result_dir')?.trim(); if (!raw) { return {}; } const normalized = normalizeArtifactRelativePath(raw); if (!normalized) { - return { error: 'Invalid artifact_dir' }; + return { error: 'Invalid result_dir' }; } return { value: normalized }; } @@ -659,7 +659,7 @@ function requestedArtifactDir(c: C): { value?: string; error?: string } { function manifestRecordSelection( records: readonly ResultManifestRecord[], evalId: string, - artifactDir?: string, + resultDir?: string, ): { record: ResultManifestRecord; index: number } | undefined { return records .map((record, index) => ({ record, index })) @@ -667,10 +667,10 @@ function manifestRecordSelection( if (record.test_id !== evalId) { return false; } - if (!artifactDir) { + if (!resultDir) { return true; } - return normalizeArtifactRelativePath(record.artifact_dir ?? '') === artifactDir; + return normalizeArtifactRelativePath(record.result_dir ?? '') === resultDir; }); } @@ -804,14 +804,14 @@ function addTrialRunCatalogEntries( seen: Set, record: ResultManifestRecord, ): void { - const artifactDir = record.artifact_dir - ? normalizeArtifactRelativePath(record.artifact_dir) + const resultDir = record.result_dir + ? normalizeArtifactRelativePath(record.result_dir) : undefined; - if (!artifactDir) return; + if (!resultDir) return; for (const trial of record.trials ?? []) { const runPath = trial.run_path ? normalizeArtifactRelativePath(trial.run_path) : undefined; if (!runPath) continue; - const runDir = path.posix.join(artifactDir, runPath); + const runDir = path.posix.join(resultDir, runPath); addDirectArtifactCatalogEntry( entries, seen, @@ -897,9 +897,9 @@ function artifactTreeCommonDir( const knownPaths = resultArtifactTreeRootPaths(record, catalog); if (knownPaths.length === 0) return undefined; - const artifactDirs = knownPaths.map((p) => path.dirname(p)); - let commonDir = artifactDirs[0]; - for (const dir of artifactDirs) { + const resultDirs = knownPaths.map((p) => path.dirname(p)); + let commonDir = resultDirs[0]; + for (const dir of resultDirs) { while (!dir.startsWith(commonDir)) { const parent = path.dirname(commonDir); if (parent === commonDir) break; @@ -1135,12 +1135,12 @@ function objectField( } function caseTrialArtifactPath( - artifactDir: string | undefined, + resultDir: string | undefined, runPath: string | undefined, filePath: string, ): string | undefined { - if (!artifactDir || !runPath) return undefined; - return path.posix.join(artifactDir, runPath, filePath); + if (!resultDir || !runPath) return undefined; + return path.posix.join(resultDir, runPath, filePath); } function buildRepeatTrialReadModels( @@ -1148,18 +1148,18 @@ function buildRepeatTrialReadModels( record: ResultManifestRecord, ): Array> | undefined { if (!record.trials || record.trials.length === 0) return undefined; - const artifactDir = record.artifact_dir - ? normalizeArtifactRelativePath(record.artifact_dir) + const resultDir = record.result_dir + ? normalizeArtifactRelativePath(record.result_dir) : undefined; return record.trials.map((trial) => { const runPath = trial.run_path ? normalizeArtifactRelativePath(trial.run_path) : undefined; - const metricsPath = caseTrialArtifactPath(artifactDir, runPath, 'metrics.json'); - const timingPath = caseTrialArtifactPath(artifactDir, runPath, 'timing.json'); - const gradingPath = caseTrialArtifactPath(artifactDir, runPath, 'grading.json'); - const transcriptPath = caseTrialArtifactPath(artifactDir, runPath, 'transcript.jsonl'); - const transcriptRawPath = caseTrialArtifactPath(artifactDir, runPath, 'transcript-raw.jsonl'); - const answerPath = caseTrialArtifactPath(artifactDir, runPath, 'outputs/answer.md'); + const metricsPath = caseTrialArtifactPath(resultDir, runPath, 'metrics.json'); + const timingPath = caseTrialArtifactPath(resultDir, runPath, 'timing.json'); + const gradingPath = caseTrialArtifactPath(resultDir, runPath, 'grading.json'); + const transcriptPath = caseTrialArtifactPath(resultDir, runPath, 'transcript.jsonl'); + const transcriptRawPath = caseTrialArtifactPath(resultDir, runPath, 'transcript-raw.jsonl'); + const answerPath = caseTrialArtifactPath(resultDir, runPath, 'outputs/answer.md'); const metrics = readArtifactJsonObject(baseDir, metricsPath); const timing = readArtifactJsonObject(baseDir, timingPath); const toolCalls = objectField(metrics, 'tool_calls'); @@ -1203,7 +1203,7 @@ function attachRunDetailReadModelFields>( return { ...result, ...(record.aggregation && { aggregation: record.aggregation }), - ...(record.artifact_dir && { artifact_dir: record.artifact_dir }), + ...(record.result_dir && { result_dir: record.result_dir }), ...(record.summary_path && { summary_path: record.summary_path }), ...(record.grading_path && { grading_path: record.grading_path }), ...(record.timing_path && { timing_path: record.timing_path }), @@ -1842,14 +1842,14 @@ async function handleEvalDetail(c: C, { searchDir, projectId }: DataContext) { const filename = c.req.param('filename') ?? ''; const evalId = c.req.param('evalId') ?? ''; if (!evalId) return c.json({ error: 'Eval id is required' }, 400); - const artifactDir = requestedArtifactDir(c); - if (artifactDir.error) return c.json({ error: artifactDir.error }, 400); + const resultDir = requestedResultDir(c); + if (resultDir.error) return c.json({ error: resultDir.error }, 400); const meta = await findRunById(searchDir, filename, projectId); if (!meta) return c.json({ error: 'Run not found' }, 404); try { const loaded = await loadManifestResultsForMeta(searchDir, meta, projectId); const records = await parseManifestForMeta(searchDir, meta, projectId); - const selection = manifestRecordSelection(records, evalId, artifactDir.value); + const selection = manifestRecordSelection(records, evalId, resultDir.value); const result = selection ? loaded[selection.index] : undefined; if (!selection || !result) return c.json({ error: 'Eval not found' }, 404); const baseDir = path.dirname(meta.path); @@ -1868,13 +1868,13 @@ async function handleEvalFiles(c: C, { searchDir, projectId }: DataContext) { const filename = c.req.param('filename') ?? ''; const evalId = c.req.param('evalId') ?? ''; if (!evalId) return c.json({ error: 'Eval id is required' }, 400); - const artifactDir = requestedArtifactDir(c); - if (artifactDir.error) return c.json({ error: artifactDir.error }, 400); + const resultDir = requestedResultDir(c); + if (resultDir.error) return c.json({ error: resultDir.error }, 400); const meta = await findRunById(searchDir, filename, projectId); if (!meta) return c.json({ error: 'Run not found' }, 404); try { const records = await parseManifestForMeta(searchDir, meta, projectId); - const selection = manifestRecordSelection(records, evalId, artifactDir.value); + const selection = manifestRecordSelection(records, evalId, resultDir.value); if (!selection) return c.json({ error: 'Eval not found' }, 404); const { record } = selection; @@ -1895,8 +1895,8 @@ async function handleEvalFileContent(c: C, { searchDir, projectId }: DataContext const filename = c.req.param('filename') ?? ''; const evalId = c.req.param('evalId') ?? ''; if (!evalId) return c.json({ error: 'Eval id is required' }, 400); - const artifactDir = requestedArtifactDir(c); - if (artifactDir.error) return c.json({ error: artifactDir.error }, 400); + const resultDir = requestedResultDir(c); + if (resultDir.error) return c.json({ error: resultDir.error }, 400); const meta = await findRunById(searchDir, filename, projectId); if (!meta) return c.json({ error: 'Run not found' }, 404); @@ -1915,7 +1915,7 @@ async function handleEvalFileContent(c: C, { searchDir, projectId }: DataContext await ensureRunReadable(searchDir, meta, projectId); const records = parseResultManifest(readFileSync(meta.path, 'utf8')); - const selection = manifestRecordSelection(records, evalId, artifactDir.value); + const selection = manifestRecordSelection(records, evalId, resultDir.value); if (!selection) return c.json({ error: 'Eval not found' }, 404); const { record } = selection; const catalog = buildResultArtifactCatalog(record, { @@ -1942,14 +1942,14 @@ async function handleEvalTraceSession(c: C, { searchDir, projectId }: DataContex const filename = c.req.param('filename') ?? ''; const evalId = c.req.param('evalId') ?? ''; if (!evalId) return c.json({ error: 'Eval id is required' }, 400); - const artifactDir = requestedArtifactDir(c); - if (artifactDir.error) return c.json({ error: artifactDir.error }, 400); + const resultDir = requestedResultDir(c); + if (resultDir.error) return c.json({ error: resultDir.error }, 400); const meta = await findRunById(searchDir, filename, projectId); if (!meta) return c.json({ error: 'Run not found' }, 404); try { const records = await parseManifestForMeta(searchDir, meta, projectId); - const selection = manifestRecordSelection(records, evalId, artifactDir.value); + const selection = manifestRecordSelection(records, evalId, resultDir.value); if (!selection) return c.json({ error: 'Eval not found' }, 404); const { record } = selection; @@ -2074,14 +2074,14 @@ async function handleEvalTranscript(c: C, { searchDir, projectId }: DataContext) const filename = c.req.param('filename') ?? ''; const evalId = c.req.param('evalId') ?? ''; if (!evalId) return c.json({ error: 'Eval id is required' }, 400); - const artifactDir = requestedArtifactDir(c); - if (artifactDir.error) return c.json({ error: artifactDir.error }, 400); + const resultDir = requestedResultDir(c); + if (resultDir.error) return c.json({ error: resultDir.error }, 400); const meta = await findRunById(searchDir, filename, projectId); if (!meta) return c.json({ error: 'Run not found' }, 404); try { const records = await parseManifestForMeta(searchDir, meta, projectId); - const selection = manifestRecordSelection(records, evalId, artifactDir.value); + const selection = manifestRecordSelection(records, evalId, resultDir.value); if (!selection) return c.json({ error: 'Eval not found' }, 404); const { record } = selection; diff --git a/apps/cli/src/commands/results/validate.ts b/apps/cli/src/commands/results/validate.ts index 49cb206be..b75288498 100644 --- a/apps/cli/src/commands/results/validate.ts +++ b/apps/cli/src/commands/results/validate.ts @@ -37,7 +37,7 @@ interface IndexEntry { readonly summary_path?: string; readonly grading_path?: string; readonly timing_path?: string; - readonly artifact_dir?: string; + readonly result_dir?: string; readonly trials?: readonly { readonly run_path?: string }[]; readonly [key: string]: unknown; } @@ -237,22 +237,22 @@ function checkArtifactFiles(runDir: string, entries: IndexEntry[]): Diagnostic[] } for (const trial of entry.trials ?? []) { - if (!entry.artifact_dir || !trial.run_path) { + if (!entry.result_dir || !trial.run_path) { continue; } - const runDirPath = path.join(runDir, entry.artifact_dir, trial.run_path); + const runDirPath = path.join(runDir, entry.result_dir, trial.run_path); const resultPath = path.join(runDirPath, 'result.json'); const gradingPath = path.join(runDirPath, 'grading.json'); if (!existsSync(resultPath)) { diagnostics.push({ severity: 'error', - message: `${testId}: result.json not found at '${path.posix.join(entry.artifact_dir, trial.run_path, 'result.json')}'`, + message: `${testId}: result.json not found at '${path.posix.join(entry.result_dir, trial.run_path, 'result.json')}'`, }); } if (!existsSync(gradingPath)) { diagnostics.push({ severity: 'error', - message: `${testId}: grading.json not found at '${path.posix.join(entry.artifact_dir, trial.run_path, 'grading.json')}'`, + message: `${testId}: grading.json not found at '${path.posix.join(entry.result_dir, trial.run_path, 'grading.json')}'`, }); } } diff --git a/apps/cli/src/commands/runs/rerun.ts b/apps/cli/src/commands/runs/rerun.ts index 8e7751916..d05c520db 100644 --- a/apps/cli/src/commands/runs/rerun.ts +++ b/apps/cli/src/commands/runs/rerun.ts @@ -32,7 +32,7 @@ interface SelectedTaskBundle { readonly record: ResultManifestRecord; readonly testId: string; readonly sourceTarget: string; - readonly artifactDir: string; + readonly resultDir: string; readonly taskDir: string; readonly evalPath: string; readonly targetsPath: string; @@ -254,10 +254,7 @@ function forbiddenOutputRoots( ): readonly string[] { return [ path.resolve(sourceRunDir), - ...selected.flatMap((bundle) => [ - path.resolve(bundle.artifactDir), - path.resolve(bundle.taskDir), - ]), + ...selected.flatMap((bundle) => [path.resolve(bundle.resultDir), path.resolve(bundle.taskDir)]), ]; } @@ -340,11 +337,11 @@ async function loadSelectedTaskBundles(options: { const taskDir = resolveRelativeRunPath(options.sourceRunDir, record.task_dir) ?? (evalPath ? path.dirname(evalPath) : undefined); - const artifactDir = - resolveRelativeRunPath(options.sourceRunDir, record.artifact_dir) ?? + const resultDir = + resolveRelativeRunPath(options.sourceRunDir, record.result_dir) ?? (taskDir ? path.dirname(taskDir) : undefined); - if (!evalPath || !targetsPath || !taskDir || !artifactDir) { + if (!evalPath || !targetsPath || !taskDir || !resultDir) { throw new Error( `Selected result ${recordLabel} is missing task bundle paths. Re-run requires task/EVAL.yaml and task/targets.yaml.`, ); @@ -357,7 +354,7 @@ async function loadSelectedTaskBundles(options: { record, testId, sourceTarget, - artifactDir, + resultDir, taskDir, evalPath, targetsPath, @@ -386,7 +383,7 @@ function buildSourceMetadataByEvalFile( mode: 'rerun', sourceRunDir: path.resolve(sourceRunDir), sourceIndexPath: path.resolve(indexPath), - sourceArtifactDir: path.resolve(bundle.artifactDir), + sourceResultDir: path.resolve(bundle.resultDir), sourceTaskDir: path.resolve(bundle.taskDir), sourceTestId: bundle.testId, sourceTarget: bundle.sourceTarget, diff --git a/apps/cli/test/commands/eval/artifact-writer.test.ts b/apps/cli/test/commands/eval/artifact-writer.test.ts index c797e0418..921b266a7 100644 --- a/apps/cli/test/commands/eval/artifact-writer.test.ts +++ b/apps/cli/test/commands/eval/artifact-writer.test.ts @@ -991,7 +991,7 @@ describe('writeArtifactsFromResults', () => { ci95_upper: 1, stddev: 0.53, }); - expect(indexEntry?.artifact_dir).toBe('repeat-case'); + expect(indexEntry?.result_dir).toBe('repeat-case'); expect(indexEntry?.summary_path).toBe('repeat-case/summary.json'); expect(indexEntry?.task_dir).toBeUndefined(); expect(indexEntry?.input_path).toBeUndefined(); @@ -1808,7 +1808,7 @@ describe('writeArtifactsFromResults', () => { .trim() .split('\n') .map(JSON.parse); - expect(indexLine.artifact_dir).toBe('imported-suite/shared-id'); + expect(indexLine.result_dir).toBe('imported-suite/shared-id'); expect(indexLine.grading_path).toBe('imported-suite/shared-id/run-1/grading.json'); }); @@ -1937,7 +1937,7 @@ describe('writeArtifactsFromResults', () => { const indexLine = JSON.parse((await readFile(paths.indexPath, 'utf8')).trim()); expect(indexLine).toMatchObject({ - artifact_dir: 'trace-case', + result_dir: 'trace-case', task_dir: 'trace-case/task', eval_path: 'trace-case/task/EVAL.yaml', targets_path: 'trace-case/task/targets.yaml', diff --git a/apps/cli/test/commands/results/combine.test.ts b/apps/cli/test/commands/results/combine.test.ts index bfc2155f7..48b21dbc0 100644 --- a/apps/cli/test/commands/results/combine.test.ts +++ b/apps/cli/test/commands/results/combine.test.ts @@ -187,7 +187,7 @@ describe('results combine', () => { it('copies and rewrites artifact pointers when combining runs', () => { const first = seedRun('run-a', [ result({ - artifact_dir: 'demo/test-a', + result_dir: 'demo/test-a', trace_path: 'demo/test-a/trace.json', transcript_path: 'demo/test-a/transcript.jsonl', metrics_path: 'demo/test-a/metrics.json', @@ -251,7 +251,7 @@ describe('results combine', () => { }); const [record] = readIndex(combined.manifestPath); - expect(record.artifact_dir).toBe('sources/source-1/demo/test-a'); + expect(record.result_dir).toBe('sources/source-1/demo/test-a'); expect(record).not.toHaveProperty('trace_path'); expect(record.transcript_path).toBe('sources/source-1/demo/test-a/transcript.jsonl'); expect(record.metrics_path).toBe('sources/source-1/demo/test-a/metrics.json'); diff --git a/apps/cli/test/commands/results/export.test.ts b/apps/cli/test/commands/results/export.test.ts index 275c1d5be..c512806a0 100644 --- a/apps/cli/test/commands/results/export.test.ts +++ b/apps/cli/test/commands/results/export.test.ts @@ -353,7 +353,7 @@ describe('results export', () => { }); expect(bundle.entries[0].artifact_refs).toMatchObject({ status: 'planned_export', - artifact_dir: 'privacy/test-private', + result_dir: 'privacy/test-private', summary_path: 'privacy/test-private/summary.json', grading_path: 'privacy/test-private/run-1/grading.json', timing_path: 'privacy/test-private/run-1/timing.json', @@ -424,7 +424,7 @@ describe('results export', () => { test_id: 'test-greeting', target: 'gpt-4o', execution_status: 'ok', - artifact_dir: 'demo/test-greeting', + result_dir: 'demo/test-greeting', summary_path: 'demo/test-greeting/summary.json', grading_path: 'demo/test-greeting/run-1/grading.json', timing_path: 'demo/test-greeting/run-1/timing.json', diff --git a/apps/cli/test/commands/runs/rerun.test.ts b/apps/cli/test/commands/runs/rerun.test.ts index 1ca5bdae6..0e90b7318 100644 --- a/apps/cli/test/commands/runs/rerun.test.ts +++ b/apps/cli/test/commands/runs/rerun.test.ts @@ -68,7 +68,7 @@ tests: test_id: options.testId, target: 'captured', score: 0.1, - artifact_dir: options.testId, + result_dir: options.testId, grading_path: `${options.testId}/grading.json`, timing_path: `${options.testId}/timing.json`, output_path: `${options.testId}/outputs/answer.md`, diff --git a/apps/dashboard/src/components/EvalDetail.tsx b/apps/dashboard/src/components/EvalDetail.tsx index 4592197a6..7fc805106 100644 --- a/apps/dashboard/src/components/EvalDetail.tsx +++ b/apps/dashboard/src/components/EvalDetail.tsx @@ -709,12 +709,12 @@ function TrialChecksTab({ onOpenFile: (path: string) => void; }) { const gradingPath = trial.grading_path; - const artifactDir = result.artifact_dir; + const resultDir = result.result_dir; const evalId = result.testId; const { data: gradingContent, isLoading } = projectId && gradingPath - ? useQuery(projectEvalFileContentOptions(projectId, runId, evalId, gradingPath, artifactDir)) - : useEvalFileContent(runId, evalId, gradingPath ?? '', artifactDir); + ? useQuery(projectEvalFileContentOptions(projectId, runId, evalId, gradingPath, resultDir)) + : useEvalFileContent(runId, evalId, gradingPath ?? '', resultDir); const parsed = parseGradingArtifact(gradingContent?.content); if (!gradingPath) { @@ -814,7 +814,7 @@ function RepeatAggregateTranscriptTab({ runId, evalId: result.testId, filePath: transcriptPath, - artifactDir: result.artifact_dir, + resultDir: result.result_dir, raw: true, }) : undefined; @@ -870,19 +870,17 @@ function TrialTranscriptTab({ onOpenFile: (path: string) => void; }) { const evalId = result.testId; - const artifactDir = result.artifact_dir; + const resultDir = result.result_dir; const transcriptPath = trial.transcript_path; const answerPath = trial.answer_path; const { data: transcriptContent, isLoading: isLoadingTranscript } = projectId && transcriptPath - ? useQuery( - projectEvalFileContentOptions(projectId, runId, evalId, transcriptPath, artifactDir), - ) - : useEvalFileContent(runId, evalId, transcriptPath ?? '', artifactDir); + ? useQuery(projectEvalFileContentOptions(projectId, runId, evalId, transcriptPath, resultDir)) + : useEvalFileContent(runId, evalId, transcriptPath ?? '', resultDir); const { data: answerContent } = projectId && answerPath - ? useQuery(projectEvalFileContentOptions(projectId, runId, evalId, answerPath, artifactDir)) - : useEvalFileContent(runId, evalId, answerPath ?? '', artifactDir); + ? useQuery(projectEvalFileContentOptions(projectId, runId, evalId, answerPath, resultDir)) + : useEvalFileContent(runId, evalId, answerPath ?? '', resultDir); const transcriptValue = transcriptContent?.content ?? ''; const parsedTranscript = useMemo(() => parseTranscriptJsonl(transcriptValue), [transcriptValue]); @@ -939,7 +937,7 @@ function TrialTranscriptTab({ runId, evalId, filePath: answerPath, - artifactDir, + resultDir, raw: true, }) : undefined; @@ -948,7 +946,7 @@ function TrialTranscriptTab({ runId, evalId, filePath: transcriptPath, - artifactDir, + resultDir, raw: true, }); const transcriptDownloadHref = artifactFileContentUrl({ @@ -956,7 +954,7 @@ function TrialTranscriptTab({ runId, evalId, filePath: transcriptPath, - artifactDir, + resultDir, download: true, }); @@ -986,14 +984,14 @@ function TranscriptTab({ onOpenFile: (path: string) => void; }) { const evalId = result.testId; - const artifactDir = result.artifact_dir; + const resultDir = result.result_dir; const { data: transcriptData, isLoading: isLoadingTranscript, error: transcriptError, } = projectId - ? useQuery(projectEvalTranscriptOptions(projectId, runId, evalId, artifactDir)) - : useEvalTranscript(runId, evalId, artifactDir); + ? useQuery(projectEvalTranscriptOptions(projectId, runId, evalId, resultDir)) + : useEvalTranscript(runId, evalId, resultDir); const transcriptPath = transcriptData?.transcript_path; const answerPath = transcriptData?.answer_path; const transcriptContent = transcriptData?.status === 'ok' ? (transcriptData.content ?? '') : ''; @@ -1074,7 +1072,7 @@ function TranscriptTab({ runId, evalId, filePath: transcriptPath, - artifactDir, + resultDir, raw: true, })} target="_blank" @@ -1107,7 +1105,7 @@ function TranscriptTab({ runId, evalId, filePath: answerPath, - artifactDir, + resultDir, raw: true, }) : undefined; @@ -1117,7 +1115,7 @@ function TranscriptTab({ runId, evalId, filePath: transcriptPath, - artifactDir, + resultDir, raw: true, }) : undefined; @@ -1127,7 +1125,7 @@ function TranscriptTab({ runId, evalId, filePath: transcriptPath, - artifactDir, + resultDir, download: true, }) : undefined; @@ -1160,12 +1158,12 @@ function FilesTab({ onSelectedPathChange: (path: string) => void; }) { const evalId = result.testId; - const artifactDir = result.artifact_dir; + const resultDir = result.result_dir; // Use project-scoped API hooks when projectId is present const { data: filesData } = projectId - ? useQuery(projectEvalFilesOptions(projectId, runId, evalId, artifactDir)) - : useEvalFiles(runId, evalId, artifactDir); + ? useQuery(projectEvalFilesOptions(projectId, runId, evalId, resultDir)) + : useEvalFiles(runId, evalId, resultDir); const files = filesData?.files ?? []; const [localSelectedPath, setLocalSelectedPath] = useState(null); @@ -1180,9 +1178,9 @@ function FilesTab({ const { data: fileContentData, isLoading: isLoadingContent } = projectId ? useQuery( - projectEvalFileContentOptions(projectId, runId, evalId, effectivePath ?? '', artifactDir), + projectEvalFileContentOptions(projectId, runId, evalId, effectivePath ?? '', resultDir), ) - : useEvalFileContent(runId, evalId, effectivePath ?? '', artifactDir); + : useEvalFileContent(runId, evalId, effectivePath ?? '', resultDir); if (files.length === 0) { return

No artifact files available.

; diff --git a/apps/dashboard/src/components/ResultTable.tsx b/apps/dashboard/src/components/ResultTable.tsx index 37b577ad8..457f1da99 100644 --- a/apps/dashboard/src/components/ResultTable.tsx +++ b/apps/dashboard/src/components/ResultTable.tsx @@ -914,7 +914,7 @@ function ResultDetailPanel({ projectId, runId, evalId: row.testId, - artifactDir: row.result.artifact_dir, + resultDir: row.result.result_dir, }); const title = selectedTrialPath ? `${row.testId} · ${selectedTrialPath}` : row.testId; const showAggregateRepeatDetail = repeatGroup && !selectedTrial; @@ -1007,13 +1007,13 @@ function buildEvalDetailHref(options: { projectId?: string; runId: string; evalId: string; - artifactDir?: string; + resultDir?: string; }): string { const base = options.projectId ? `/projects/${encodeURIComponent(options.projectId)}/evals/${encodeURIComponent(options.runId)}/${encodeURIComponent(options.evalId)}` : `/evals/${encodeURIComponent(options.runId)}/${encodeURIComponent(options.evalId)}`; - if (!options.artifactDir) return base; - return `${base}?artifact_dir=${encodeURIComponent(options.artifactDir)}`; + if (!options.resultDir) return base; + return `${base}?result_dir=${encodeURIComponent(options.resultDir)}`; } function scrollPanelIntoView(panel: HTMLElement | null) { diff --git a/apps/dashboard/src/lib/api.ts b/apps/dashboard/src/lib/api.ts index bdbc59c82..685945b4b 100644 --- a/apps/dashboard/src/lib/api.ts +++ b/apps/dashboard/src/lib/api.ts @@ -104,9 +104,9 @@ function withQueryParams(base: string, params: URLSearchParams): string { return query ? `${base}?${query}` : base; } -function evalArtifactParams(artifactDir?: string): URLSearchParams { +function evalArtifactParams(resultDir?: string): URLSearchParams { const params = new URLSearchParams(); - if (artifactDir) params.set('artifact_dir', artifactDir); + if (resultDir) params.set('result_dir', resultDir); return params; } @@ -115,14 +115,14 @@ export function artifactFileContentUrl(options: { evalId: string; filePath: string; projectId?: string; - artifactDir?: string; + resultDir?: string; raw?: boolean; download?: boolean; }): string { const base = options.projectId ? `${projectApiBase(options.projectId)}/runs/${encodeURIComponent(options.runId)}/evals/${encodeURIComponent(options.evalId)}/files/${encodeArtifactPath(options.filePath)}` : `/api/runs/${encodeURIComponent(options.runId)}/evals/${encodeURIComponent(options.evalId)}/files/${encodeArtifactPath(options.filePath)}`; - const params = evalArtifactParams(options.artifactDir); + const params = evalArtifactParams(options.resultDir); if (options.raw) params.set('raw', '1'); if (options.download) params.set('download', '1'); return withQueryParams(base, params); @@ -175,12 +175,12 @@ export function runSuitesOptions(runId: string) { }); } -export function evalDetailOptions(runId: string, evalId: string, artifactDir?: string) { +export function evalDetailOptions(runId: string, evalId: string, resultDir?: string) { const base = `/api/runs/${encodeURIComponent(runId)}/evals/${encodeURIComponent(evalId)}`; return queryOptions({ - queryKey: ['runs', runId, 'evals', evalId, artifactDir ?? ''], + queryKey: ['runs', runId, 'evals', evalId, resultDir ?? ''], queryFn: () => - fetchJson(withQueryParams(base, evalArtifactParams(artifactDir))), + fetchJson(withQueryParams(base, evalArtifactParams(resultDir))), enabled: !!runId && !!evalId, }); } @@ -222,12 +222,12 @@ export const targetsOptions = queryOptions({ queryFn: () => fetchJson('/api/targets'), }); -export function evalFilesOptions(runId: string, evalId: string, artifactDir?: string) { +export function evalFilesOptions(runId: string, evalId: string, resultDir?: string) { const base = `/api/runs/${encodeURIComponent(runId)}/evals/${encodeURIComponent(evalId)}/files`; return queryOptions({ - queryKey: ['runs', runId, 'evals', evalId, artifactDir ?? '', 'files'], + queryKey: ['runs', runId, 'evals', evalId, resultDir ?? '', 'files'], queryFn: () => - fetchJson(withQueryParams(base, evalArtifactParams(artifactDir))), + fetchJson(withQueryParams(base, evalArtifactParams(resultDir))), enabled: !!runId && !!evalId, }); } @@ -236,24 +236,24 @@ export function evalFileContentOptions( runId: string, evalId: string, filePath: string, - artifactDir?: string, + resultDir?: string, ) { return queryOptions({ - queryKey: ['runs', runId, 'evals', evalId, artifactDir ?? '', 'files', filePath], + queryKey: ['runs', runId, 'evals', evalId, resultDir ?? '', 'files', filePath], queryFn: () => fetchJson( - artifactFileContentUrl({ runId, evalId, filePath, artifactDir }), + artifactFileContentUrl({ runId, evalId, filePath, resultDir }), ), enabled: !!runId && !!evalId && !!filePath, }); } -export function evalTranscriptOptions(runId: string, evalId: string, artifactDir?: string) { +export function evalTranscriptOptions(runId: string, evalId: string, resultDir?: string) { const base = `/api/runs/${encodeURIComponent(runId)}/evals/${encodeURIComponent(evalId)}/transcript`; return queryOptions({ - queryKey: ['runs', runId, 'evals', evalId, artifactDir ?? '', 'transcript'], + queryKey: ['runs', runId, 'evals', evalId, resultDir ?? '', 'transcript'], queryFn: () => - fetchJson(withQueryParams(base, evalArtifactParams(artifactDir))), + fetchJson(withQueryParams(base, evalArtifactParams(resultDir))), enabled: !!runId && !!evalId, }); } @@ -316,8 +316,8 @@ export function useRunSuites(runId: string) { return useQuery(runSuitesOptions(runId)); } -export function useEvalDetail(runId: string, evalId: string, artifactDir?: string) { - return useQuery(evalDetailOptions(runId, evalId, artifactDir)); +export function useEvalDetail(runId: string, evalId: string, resultDir?: string) { + return useQuery(evalDetailOptions(runId, evalId, resultDir)); } export function useIndex() { @@ -340,21 +340,21 @@ export function useTargets() { return useQuery(targetsOptions); } -export function useEvalFiles(runId: string, evalId: string, artifactDir?: string) { - return useQuery(evalFilesOptions(runId, evalId, artifactDir)); +export function useEvalFiles(runId: string, evalId: string, resultDir?: string) { + return useQuery(evalFilesOptions(runId, evalId, resultDir)); } export function useEvalFileContent( runId: string, evalId: string, filePath: string, - artifactDir?: string, + resultDir?: string, ) { - return useQuery(evalFileContentOptions(runId, evalId, filePath, artifactDir)); + return useQuery(evalFileContentOptions(runId, evalId, filePath, resultDir)); } -export function useEvalTranscript(runId: string, evalId: string, artifactDir?: string) { - return useQuery(evalTranscriptOptions(runId, evalId, artifactDir)); +export function useEvalTranscript(runId: string, evalId: string, resultDir?: string) { + return useQuery(evalTranscriptOptions(runId, evalId, resultDir)); } export function useRunCategories(runId: string) { @@ -555,13 +555,13 @@ export function projectEvalDetailOptions( projectId: string, runId: string, evalId: string, - artifactDir?: string, + resultDir?: string, ) { const base = `${projectApiBase(projectId)}/runs/${encodeURIComponent(runId)}/evals/${encodeURIComponent(evalId)}`; return queryOptions({ - queryKey: ['projects', projectId, 'runs', runId, 'evals', evalId, artifactDir ?? ''], + queryKey: ['projects', projectId, 'runs', runId, 'evals', evalId, resultDir ?? ''], queryFn: () => - fetchJson(withQueryParams(base, evalArtifactParams(artifactDir))), + fetchJson(withQueryParams(base, evalArtifactParams(resultDir))), enabled: !!projectId && !!runId && !!evalId, }); } @@ -570,13 +570,13 @@ export function projectEvalFilesOptions( projectId: string, runId: string, evalId: string, - artifactDir?: string, + resultDir?: string, ) { const base = `${projectApiBase(projectId)}/runs/${encodeURIComponent(runId)}/evals/${encodeURIComponent(evalId)}/files`; return queryOptions({ - queryKey: ['projects', projectId, 'runs', runId, 'evals', evalId, artifactDir ?? '', 'files'], + queryKey: ['projects', projectId, 'runs', runId, 'evals', evalId, resultDir ?? '', 'files'], queryFn: () => - fetchJson(withQueryParams(base, evalArtifactParams(artifactDir))), + fetchJson(withQueryParams(base, evalArtifactParams(resultDir))), enabled: !!projectId && !!runId && !!evalId, }); } @@ -586,7 +586,7 @@ export function projectEvalFileContentOptions( runId: string, evalId: string, filePath: string, - artifactDir?: string, + resultDir?: string, ) { return queryOptions({ queryKey: [ @@ -596,13 +596,13 @@ export function projectEvalFileContentOptions( runId, 'evals', evalId, - artifactDir ?? '', + resultDir ?? '', 'files', filePath, ], queryFn: () => fetchJson( - artifactFileContentUrl({ projectId, runId, evalId, filePath, artifactDir }), + artifactFileContentUrl({ projectId, runId, evalId, filePath, resultDir }), ), enabled: !!projectId && !!runId && !!evalId && !!filePath, }); @@ -612,7 +612,7 @@ export function projectEvalTranscriptOptions( projectId: string, runId: string, evalId: string, - artifactDir?: string, + resultDir?: string, ) { const base = `${projectApiBase(projectId)}/runs/${encodeURIComponent(runId)}/evals/${encodeURIComponent(evalId)}/transcript`; return queryOptions({ @@ -623,11 +623,11 @@ export function projectEvalTranscriptOptions( runId, 'evals', evalId, - artifactDir ?? '', + resultDir ?? '', 'transcript', ], queryFn: () => - fetchJson(withQueryParams(base, evalArtifactParams(artifactDir))), + fetchJson(withQueryParams(base, evalArtifactParams(resultDir))), enabled: !!projectId && !!runId && !!evalId, }); } diff --git a/apps/dashboard/src/lib/types.ts b/apps/dashboard/src/lib/types.ts index 74f72e6ae..58ba2a89d 100644 --- a/apps/dashboard/src/lib/types.ts +++ b/apps/dashboard/src/lib/types.ts @@ -250,7 +250,7 @@ export interface EvalResult { source_traceability?: SourceTraceability; trials?: EvalCaseTrial[]; aggregation?: EvalTrialAggregation; - artifact_dir?: string; + result_dir?: string; summary_path?: string; grading_path?: string; timing_path?: string; diff --git a/apps/dashboard/src/routes/evals/$runId.$evalId.tsx b/apps/dashboard/src/routes/evals/$runId.$evalId.tsx index 89246b754..18462ddcd 100644 --- a/apps/dashboard/src/routes/evals/$runId.$evalId.tsx +++ b/apps/dashboard/src/routes/evals/$runId.$evalId.tsx @@ -19,10 +19,10 @@ export const Route = createFileRoute('/evals/$runId/$evalId')({ function EvalDetailPage() { const { runId, evalId } = Route.useParams(); - const artifactDir = + const resultDir = typeof window === 'undefined' ? undefined - : (new URLSearchParams(window.location.search).get('artifact_dir') ?? undefined); + : (new URLSearchParams(window.location.search).get('result_dir') ?? undefined); const { data, isLoading, error } = useRunDetail(runId); const { data: config } = useStudioConfig(); const [showRunEval, setShowRunEval] = useState(false); @@ -46,7 +46,7 @@ function EvalDetailPage() { } const result = data?.results.find( - (r) => r.testId === evalId && (!artifactDir || r.artifact_dir === artifactDir), + (r) => r.testId === evalId && (!resultDir || r.result_dir === resultDir), ); if (!result) { diff --git a/apps/dashboard/src/routes/projects/$projectId_/evals/$runId.$evalId.tsx b/apps/dashboard/src/routes/projects/$projectId_/evals/$runId.$evalId.tsx index 60128f9da..9b00ca7b5 100644 --- a/apps/dashboard/src/routes/projects/$projectId_/evals/$runId.$evalId.tsx +++ b/apps/dashboard/src/routes/projects/$projectId_/evals/$runId.$evalId.tsx @@ -15,10 +15,10 @@ export const Route = createFileRoute('/projects/$projectId_/evals/$runId/$evalId function ProjectEvalDetailPage() { const { projectId, runId, evalId } = Route.useParams(); - const artifactDir = + const resultDir = typeof window === 'undefined' ? undefined - : (new URLSearchParams(window.location.search).get('artifact_dir') ?? undefined); + : (new URLSearchParams(window.location.search).get('result_dir') ?? undefined); const { data, isLoading, error } = useProjectRunDetail(projectId, runId); const { data: config } = useStudioConfig(projectId); const [showRunEval, setShowRunEval] = useState(false); @@ -42,7 +42,7 @@ function ProjectEvalDetailPage() { } const result = data?.results.find( - (r) => r.testId === evalId && (!artifactDir || r.artifact_dir === artifactDir), + (r) => r.testId === evalId && (!resultDir || r.result_dir === resultDir), ); if (!result) { diff --git a/apps/web/src/content/docs/docs/evaluation/running-evals.mdx b/apps/web/src/content/docs/docs/evaluation/running-evals.mdx index 92b577f9f..30ecfa1bb 100644 --- a/apps/web/src/content/docs/docs/evaluation/running-evals.mdx +++ b/apps/web/src/content/docs/docs/evaluation/running-evals.mdx @@ -129,7 +129,7 @@ my-results/ ``` The `index.jsonl` row links to these generated paths with snake_case fields such -as `artifact_dir`, `task_dir`, `eval_path`, `targets_path`, `files_path`, and +as `result_dir`, `task_dir`, `eval_path`, `targets_path`, `files_path`, and `graders_path`. Treat those paths as relative to the run directory. When you need a portable artifact for audit, review, Dashboard inspection, or rerun workflows, share the generated run directory and its `index.jsonl` manifest. Source-side @@ -443,7 +443,7 @@ See the [Import tool docs](/docs/tools/import/) for all providers and options. ## Transcript And Result Artifacts -Each result row's `artifact_dir` is a case-local folder under the timestamped +Each result row's `result_dir` is a case-local folder under the timestamped run bundle. It can include `transcript.jsonl`, `transcript-raw.jsonl`, `grading.json`, `timing.json`, `metrics.json`, and generated outputs under `outputs/`. The run root does not contain a mixed transcript artifact; use each diff --git a/docs/plans/2026-06-21-001-feat-av-quf-results-storage-plan.md b/docs/plans/2026-06-21-001-feat-av-quf-results-storage-plan.md index 60fa834bb..5561bdb74 100644 --- a/docs/plans/2026-06-21-001-feat-av-quf-results-storage-plan.md +++ b/docs/plans/2026-06-21-001-feat-av-quf-results-storage-plan.md @@ -405,7 +405,7 @@ after logical deletion. **Transcript migration:** - Existing runs may have `transcript_path` pointing at - `/outputs/transcript.jsonl`. + `/outputs/transcript.jsonl`. - Migration copies transcript bytes to `agentv/artifacts/v1:transcripts////transcript.jsonl` or the matching object-store key. diff --git a/docs/plans/2026-06-23-001-feat-repeat-runs-flaky-evals-plan.md b/docs/plans/2026-06-23-001-feat-repeat-runs-flaky-evals-plan.md index 17769b29b..0eeefd444 100644 --- a/docs/plans/2026-06-23-001-feat-repeat-runs-flaky-evals-plan.md +++ b/docs/plans/2026-06-23-001-feat-repeat-runs-flaky-evals-plan.md @@ -399,7 +399,7 @@ For trend and compare views, repeat aggregates should be the default unit. Attem "target": "codex", "score": 0.83, "execution_status": "quality_failure", - "artifact_dir": "case-1", + "result_dir": "case-1", "summary_path": "case-1/summary.json", "planned_attempts": 3, "total_attempts": 3, diff --git a/packages/core/src/evaluation/result-row-schema.ts b/packages/core/src/evaluation/result-row-schema.ts index 8dfd9fae3..d5b771031 100644 --- a/packages/core/src/evaluation/result-row-schema.ts +++ b/packages/core/src/evaluation/result-row-schema.ts @@ -20,7 +20,7 @@ const MIGRATION_GUIDANCE = const RESULT_ROW_ALIASES = { answerPath: 'answer_path', - artifactDir: 'artifact_dir', + resultDir: 'result_dir', conversationId: 'conversation_id', costUsd: 'cost_usd', durationMs: 'duration_ms', diff --git a/packages/core/src/evaluation/run-artifacts.ts b/packages/core/src/evaluation/run-artifacts.ts index ef13c67b0..20aa6fb32 100644 --- a/packages/core/src/evaluation/run-artifacts.ts +++ b/packages/core/src/evaluation/run-artifacts.ts @@ -292,7 +292,7 @@ export interface IndexArtifactEntry { readonly failure_stage?: string; readonly failure_reason_code?: string; readonly workspace_path?: string; - readonly artifact_dir?: string; + readonly result_dir?: string; readonly grading_path?: string; readonly timing_path?: string; readonly summary_path?: string; @@ -600,7 +600,7 @@ function toIndexRerunSource(value: unknown): Record | undefined mode: value.mode, source_run_dir: value.sourceRunDir, source_index_path: value.sourceIndexPath, - source_artifact_dir: value.sourceArtifactDir, + source_result_dir: value.sourceResultDir, source_task_dir: value.sourceTaskDir, source_test_id: value.sourceTestId, source_target: value.sourceTarget, @@ -1382,7 +1382,7 @@ export function buildIndexArtifactEntry( result: EvaluationResult, options: { outputDir: string; - artifactDir?: string; + resultDir?: string; gradingPath?: string; timingPath?: string; summaryPath?: string; @@ -1419,8 +1419,8 @@ export function buildIndexArtifactEntry( failure_stage: result.failureStage, failure_reason_code: result.failureReasonCode, workspace_path: result.workspacePath, - artifact_dir: options.artifactDir - ? toRelativeArtifactPath(options.outputDir, options.artifactDir) + result_dir: options.resultDir + ? toRelativeArtifactPath(options.outputDir, options.resultDir) : undefined, grading_path: options.gradingPath ? toRelativeArtifactPath(options.outputDir, options.gradingPath) @@ -1496,7 +1496,7 @@ export function buildResultIndexArtifact( failure_stage: result.failureStage, failure_reason_code: result.failureReasonCode, workspace_path: result.workspacePath, - artifact_dir: artifactSubdir, + result_dir: artifactSubdir, summary_path: path.posix.join(artifactSubdir, RUN_SUMMARY_FILENAME), grading_path: isSingleRun ? path.posix.join(singleRunDir, 'grading.json') : undefined, timing_path: isSingleRun ? path.posix.join(singleRunDir, 'timing.json') : undefined, @@ -2000,7 +2000,7 @@ export async function writePerTestArtifacts( indexRecords.push({ ...buildIndexArtifactEntry(result, { outputDir, - artifactDir: testDir, + resultDir: testDir, summaryPath: caseSummaryPath, gradingPath: singleGradingPath, timingPath: singleTimingPath, @@ -2165,7 +2165,7 @@ export async function writeArtifactsFromResults( const nextRecord = { ...buildIndexArtifactEntry(result, { outputDir, - artifactDir: plan.testDir, + resultDir: plan.testDir, summaryPath: plan.caseSummaryPath, gradingPath: plan.singleGradingPath, timingPath: plan.singleTimingPath, diff --git a/packages/core/test/evaluation/evaluate-programmatic-api.test.ts b/packages/core/test/evaluation/evaluate-programmatic-api.test.ts index b65502f20..cba36f5d2 100644 --- a/packages/core/test/evaluation/evaluate-programmatic-api.test.ts +++ b/packages/core/test/evaluation/evaluate-programmatic-api.test.ts @@ -140,7 +140,7 @@ describe('evaluate() — programmatic API extensions', () => { const [indexRow] = indexContent .trim() .split('\n') - .map((line) => JSON.parse(line) as { artifact_dir?: string }); + .map((line) => JSON.parse(line) as { result_dir?: string }); const summaryArtifact = JSON.parse( await readFile(path.join(outputDir, 'summary.json'), 'utf8'), @@ -153,7 +153,7 @@ describe('evaluate() — programmatic API extensions', () => { expect(summaryArtifact.metadata.eval_file).toBe(''); expect(summaryArtifact.timing.duration_ms).toBeGreaterThanOrEqual(0); - expect(indexRow?.artifact_dir).toBe('__programmatic__.yaml/programmatic-artifacts'); + expect(indexRow?.result_dir).toBe('__programmatic__.yaml/programmatic-artifacts'); expect( existsSync( path.join(