Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions apps/cli/test/commands/eval/artifact-writer.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1738,6 +1738,52 @@ describe('writeArtifactsFromResults', () => {
expect(indexLine.grading_path).toBe('eval-top-months-chart/shared-id/run-1/grading.json');
});

it('allocates distinct artifact directories for multiple targets with the same suite and test id', async () => {
const paths = await writeArtifactsFromResults(
[
makeResult({
suite: 'suite-a',
testId: 'shared-id',
target: 'baseline',
output: 'baseline output',
}),
makeResult({
suite: 'suite-a',
testId: 'shared-id',
target: 'candidate',
output: 'candidate output',
}),
],
testDir,
);

const indexLines = (await readFile(paths.indexPath, 'utf8'))
.trim()
.split('\n')
.map((line) => JSON.parse(line) as IndexArtifactEntry);

expect(indexLines.map((line) => line.artifact_dir)).toEqual([
'suite-a/shared-id/baseline',
'suite-a/shared-id/candidate',
]);
expect(indexLines.map((line) => line.grading_path)).toEqual([
'suite-a/shared-id/baseline/run-1/grading.json',
'suite-a/shared-id/candidate/run-1/grading.json',
]);
await expect(
readFile(
path.join(testDir, 'suite-a', 'shared-id', 'baseline', 'run-1', 'outputs', 'answer.md'),
'utf8',
),
).resolves.toBe('baseline output');
await expect(
readFile(
path.join(testDir, 'suite-a', 'shared-id', 'candidate', 'run-1', 'outputs', 'answer.md'),
'utf8',
),
).resolves.toBe('candidate output');
});

it('does not prefix artifact paths with suite when it matches the result group', async () => {
const paths = await writeArtifactsFromResults(
[makeResult({ suite: 'eval-top-months-chart', testId: 'shared-id', target: 'baseline' })],
Expand Down
54 changes: 48 additions & 6 deletions packages/core/src/evaluation/run-artifacts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1273,6 +1273,10 @@ function safeTestId(testId: string | undefined): string {
return safeArtifactPathSegment(testId, 'unknown');
}

function safeTarget(target: string | undefined): string {
return safeArtifactPathSegment(target, 'unknown');
}

function getSuite(result: EvaluationResult): string | undefined {
return result.suite;
}
Expand All @@ -1281,6 +1285,7 @@ function buildArtifactSubdir(
result: EvaluationResult,
resultGroup?: string,
sourceTest?: EvalTest,
targetSegment?: string,
): string {
const segments = [];
const evalSet = getSuite(result);
Expand All @@ -1291,9 +1296,47 @@ function buildArtifactSubdir(
segments.push(safeArtifactPathSegment(evalSet, 'default'));
}
segments.push(safeTestId(result.testId));
if (targetSegment !== undefined) {
segments.push(safeTarget(targetSegment));
}
return path.posix.join(...segments);
}

interface ArtifactWritePlan {
readonly result: EvaluationResult;
readonly artifactSubdir: string;
}

function buildArtifactWritePlans(
results: readonly EvaluationResult[],
resultGroup: string | undefined,
testByTestId: ReadonlyMap<string, EvalTest>,
): readonly ArtifactWritePlan[] {
const basePlans = results.map((result) => {
const sourceTest = findResultSourceTest(result, testByTestId);
return {
result,
sourceTest,
baseSubdir: buildArtifactSubdir(result, resultGroup, sourceTest),
targetSegment: safeTarget(result.target),
};
});
const targetsByBaseSubdir = new Map<string, Set<string>>();
for (const plan of basePlans) {
const targets = targetsByBaseSubdir.get(plan.baseSubdir) ?? new Set<string>();
targets.add(plan.targetSegment);
targetsByBaseSubdir.set(plan.baseSubdir, targets);
}

return basePlans.map((plan) => ({
result: plan.result,
artifactSubdir:
(targetsByBaseSubdir.get(plan.baseSubdir)?.size ?? 0) > 1
? buildArtifactSubdir(plan.result, resultGroup, plan.sourceTest, plan.result.target)
: plan.baseSubdir,
}));
}

function toRelativeArtifactPath(outputDir: string, filePath: string): string {
return path.relative(outputDir, filePath).split(path.sep).join('/');
}
Expand Down Expand Up @@ -1932,10 +1975,10 @@ export async function writePerTestArtifacts(
const duplicatePolicy = options?.duplicatePolicy ?? 'update';
const testByTestId = buildSourceTestLookup(options?.sourceTests);
const indexRecords: ResultIndexArtifact[] = [];
const artifactPlans = buildArtifactWritePlans(results, options?.resultGroup, testByTestId);

for (const result of results) {
const sourceTest = findResultSourceTest(result, testByTestId);
const artifactSubdir = buildArtifactSubdir(result, options?.resultGroup, sourceTest);
for (const artifactPlan of artifactPlans) {
const { result, artifactSubdir } = artifactPlan;
const testDir = path.join(outputDir, artifactSubdir);
await mkdir(testDir, { recursive: true });
const envelope = buildTraceEnvelopeSidecar({
Expand Down Expand Up @@ -2049,10 +2092,9 @@ export async function writeArtifactsFromResults(
const indexRecords: unknown[] = [];
const testByTestId = buildSourceTestLookup(options?.sourceTests);
const emittedIdentityIds = new Set<string>();
const artifactPlans = buildArtifactWritePlans(results, options?.resultGroup, testByTestId);

const plans = results.map((result) => {
const sourceTest = findResultSourceTest(result, testByTestId);
const artifactSubdir = buildArtifactSubdir(result, options?.resultGroup, sourceTest);
const plans = artifactPlans.map(({ result, artifactSubdir }) => {
const testDir = path.join(outputDir, artifactSubdir);
const caseSummaryPath = path.join(testDir, RUN_SUMMARY_FILENAME);
const envelope = buildTraceEnvelopeSidecar({
Expand Down
Loading