diff --git a/apps/cli/test/commands/eval/artifact-writer.test.ts b/apps/cli/test/commands/eval/artifact-writer.test.ts index c797e0418..a9b351580 100644 --- a/apps/cli/test/commands/eval/artifact-writer.test.ts +++ b/apps/cli/test/commands/eval/artifact-writer.test.ts @@ -1738,6 +1738,52 @@ describe('writeArtifactsFromResults', () => { expect(indexLine.grading_path).toBe('eval-top-months-chart/shared-id/run-1/grading.json'); }); + it('allocates distinct artifact directories for multiple targets with the same suite and test id', async () => { + const paths = await writeArtifactsFromResults( + [ + makeResult({ + suite: 'suite-a', + testId: 'shared-id', + target: 'baseline', + output: 'baseline output', + }), + makeResult({ + suite: 'suite-a', + testId: 'shared-id', + target: 'candidate', + output: 'candidate output', + }), + ], + testDir, + ); + + const indexLines = (await readFile(paths.indexPath, 'utf8')) + .trim() + .split('\n') + .map((line) => JSON.parse(line) as IndexArtifactEntry); + + expect(indexLines.map((line) => line.artifact_dir)).toEqual([ + 'suite-a/shared-id/baseline', + 'suite-a/shared-id/candidate', + ]); + expect(indexLines.map((line) => line.grading_path)).toEqual([ + 'suite-a/shared-id/baseline/run-1/grading.json', + 'suite-a/shared-id/candidate/run-1/grading.json', + ]); + await expect( + readFile( + path.join(testDir, 'suite-a', 'shared-id', 'baseline', 'run-1', 'outputs', 'answer.md'), + 'utf8', + ), + ).resolves.toBe('baseline output'); + await expect( + readFile( + path.join(testDir, 'suite-a', 'shared-id', 'candidate', 'run-1', 'outputs', 'answer.md'), + 'utf8', + ), + ).resolves.toBe('candidate output'); + }); + it('does not prefix artifact paths with suite when it matches the result group', async () => { const paths = await writeArtifactsFromResults( [makeResult({ suite: 'eval-top-months-chart', testId: 'shared-id', target: 'baseline' })], diff --git a/packages/core/src/evaluation/run-artifacts.ts b/packages/core/src/evaluation/run-artifacts.ts index ef13c67b0..a7cc7582a 100644 --- a/packages/core/src/evaluation/run-artifacts.ts +++ b/packages/core/src/evaluation/run-artifacts.ts @@ -1273,6 +1273,10 @@ function safeTestId(testId: string | undefined): string { return safeArtifactPathSegment(testId, 'unknown'); } +function safeTarget(target: string | undefined): string { + return safeArtifactPathSegment(target, 'unknown'); +} + function getSuite(result: EvaluationResult): string | undefined { return result.suite; } @@ -1281,6 +1285,7 @@ function buildArtifactSubdir( result: EvaluationResult, resultGroup?: string, sourceTest?: EvalTest, + targetSegment?: string, ): string { const segments = []; const evalSet = getSuite(result); @@ -1291,9 +1296,47 @@ function buildArtifactSubdir( segments.push(safeArtifactPathSegment(evalSet, 'default')); } segments.push(safeTestId(result.testId)); + if (targetSegment !== undefined) { + segments.push(safeTarget(targetSegment)); + } return path.posix.join(...segments); } +interface ArtifactWritePlan { + readonly result: EvaluationResult; + readonly artifactSubdir: string; +} + +function buildArtifactWritePlans( + results: readonly EvaluationResult[], + resultGroup: string | undefined, + testByTestId: ReadonlyMap, +): readonly ArtifactWritePlan[] { + const basePlans = results.map((result) => { + const sourceTest = findResultSourceTest(result, testByTestId); + return { + result, + sourceTest, + baseSubdir: buildArtifactSubdir(result, resultGroup, sourceTest), + targetSegment: safeTarget(result.target), + }; + }); + const targetsByBaseSubdir = new Map>(); + for (const plan of basePlans) { + const targets = targetsByBaseSubdir.get(plan.baseSubdir) ?? new Set(); + targets.add(plan.targetSegment); + targetsByBaseSubdir.set(plan.baseSubdir, targets); + } + + return basePlans.map((plan) => ({ + result: plan.result, + artifactSubdir: + (targetsByBaseSubdir.get(plan.baseSubdir)?.size ?? 0) > 1 + ? buildArtifactSubdir(plan.result, resultGroup, plan.sourceTest, plan.result.target) + : plan.baseSubdir, + })); +} + function toRelativeArtifactPath(outputDir: string, filePath: string): string { return path.relative(outputDir, filePath).split(path.sep).join('/'); } @@ -1932,10 +1975,10 @@ export async function writePerTestArtifacts( const duplicatePolicy = options?.duplicatePolicy ?? 'update'; const testByTestId = buildSourceTestLookup(options?.sourceTests); const indexRecords: ResultIndexArtifact[] = []; + const artifactPlans = buildArtifactWritePlans(results, options?.resultGroup, testByTestId); - for (const result of results) { - const sourceTest = findResultSourceTest(result, testByTestId); - const artifactSubdir = buildArtifactSubdir(result, options?.resultGroup, sourceTest); + for (const artifactPlan of artifactPlans) { + const { result, artifactSubdir } = artifactPlan; const testDir = path.join(outputDir, artifactSubdir); await mkdir(testDir, { recursive: true }); const envelope = buildTraceEnvelopeSidecar({ @@ -2049,10 +2092,9 @@ export async function writeArtifactsFromResults( const indexRecords: unknown[] = []; const testByTestId = buildSourceTestLookup(options?.sourceTests); const emittedIdentityIds = new Set(); + const artifactPlans = buildArtifactWritePlans(results, options?.resultGroup, testByTestId); - const plans = results.map((result) => { - const sourceTest = findResultSourceTest(result, testByTestId); - const artifactSubdir = buildArtifactSubdir(result, options?.resultGroup, sourceTest); + const plans = artifactPlans.map(({ result, artifactSubdir }) => { const testDir = path.join(outputDir, artifactSubdir); const caseSummaryPath = path.join(testDir, RUN_SUMMARY_FILENAME); const envelope = buildTraceEnvelopeSidecar({