Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions apps/cli/src/commands/results/serve.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1203,6 +1203,7 @@ function attachRunDetailReadModelFields<T extends Record<string, unknown>>(
return {
...result,
...(record.aggregation && { aggregation: record.aggregation }),
...(record.eval_path && { eval_path: record.eval_path }),
...(record.result_dir && { result_dir: record.result_dir }),
...(record.summary_path && { summary_path: record.summary_path }),
...(record.grading_path && { grading_path: record.grading_path }),
Expand Down
16 changes: 14 additions & 2 deletions apps/cli/test/commands/results/serve.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2581,18 +2581,30 @@ describe('serve app', () => {
const filename = '2026-03-25T10-00-00-000Z';
const runDir = path.join(runsDir, filename);
mkdirSync(runDir, { recursive: true });
writeFileSync(path.join(runDir, 'index.jsonl'), toJsonl(RESULT_A, RESULT_B));
writeFileSync(
path.join(runDir, 'index.jsonl'),
toJsonl(
{
...RESULT_A,
eval_path: 'evals/demo.eval.yaml',
result_dir: 'demo/test-greeting',
},
RESULT_B,
),
);

const app = createApp([], tempDir, tempDir, undefined, { studioDir });
const res = await app.request(`/api/runs/${filename}`);
expect(res.status).toBe(200);
const data = (await res.json()) as {
results: { testId: string }[];
results: { testId: string; eval_path?: string; result_dir?: string }[];
source: 'local' | 'remote';
source_label: string;
};
expect(data.results).toHaveLength(2);
expect(data.results[0].testId).toBe('test-greeting');
expect(data.results[0].eval_path).toBe('evals/demo.eval.yaml');
expect(data.results[0].result_dir).toBe('demo/test-greeting');
expect(data.source).toBe('local');
expect(data.source_label).toBe(filename);
});
Expand Down
3 changes: 2 additions & 1 deletion apps/dashboard/src/components/EvalDetail.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -335,8 +335,9 @@ function SourceTab({ result }: { result: EvalResult }) {
<h4 className="mb-3 text-sm font-medium text-gray-300">Traceability</h4>
<dl className="grid gap-3 md:grid-cols-2">
<SourceMetaRow label="Eval file" value={traceability.eval_file?.display_path} />
<SourceMetaRow label="Eval" value={result.eval_path} />
<SourceMetaRow label="Test ID" value={traceability.test_id ?? result.testId} />
<SourceMetaRow label="Suite" value={result.suite} />
<SourceMetaRow label="Legacy suite" value={result.suite} />
<SourceMetaRow label="Category" value={result.category} />
<SourceMetaRow label="Target" value={result.target} />
</dl>
Expand Down
18 changes: 18 additions & 0 deletions apps/dashboard/src/components/EvalSourceLabel.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import { formatEvalSourceDisplay } from '~/lib/run-detail-context';
import type { EvalResult } from '~/lib/types';

interface EvalSourceLabelProps {
result: Pick<EvalResult, 'eval_path' | 'suite'>;
className?: string;
}

export function EvalSourceLabel({ result, className = '' }: EvalSourceLabelProps) {
const display = formatEvalSourceDisplay(result);
if (!display) return null;

return (
<span className={`block truncate text-gray-500 ${className}`} title={display.title}>
{display.label}
</span>
);
}
29 changes: 8 additions & 21 deletions apps/dashboard/src/components/ResultTable.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import type React from 'react';
import { Fragment, useEffect, useMemo, useState } from 'react';

import { useFeedback } from '~/lib/api';
import { evalResultPath } from '~/lib/navigation';
import {
RESULT_TABLE_VIEW_PRESETS,
type RepeatRunGroup,
Expand Down Expand Up @@ -651,8 +652,8 @@ function TrialResultCell({
return <TargetCell target={row.targetLabel} tone="text-gray-500" />;
case 'score':
return <PassRatePill rate={trial.score ?? 0} />;
case 'suite':
return <TruncatedMuted value={row.suiteLabel} tone="text-gray-500" />;
case 'eval':
return <TruncatedMuted value={row.evalLabel} tone="text-gray-500" />;
case 'category':
return <TruncatedMuted value={row.categoryLabel} tone="text-gray-500" />;
case 'duration':
Expand Down Expand Up @@ -833,8 +834,8 @@ function ResultCell({
) : (
<PassRatePill rate={row.result.score} />
);
case 'suite':
return <TruncatedMuted value={row.suiteLabel} />;
case 'eval':
return <TruncatedMuted value={row.evalLabel} />;
case 'category':
return <TruncatedMuted value={row.categoryLabel} />;
case 'duration':
Expand Down Expand Up @@ -910,11 +911,10 @@ function ResultDetailPanel({
onOpenTrialDetail: (trial: EvalCaseTrial, initialTab?: DetailTab) => void;
onClose: () => void;
}) {
const evalDetailHref = buildEvalDetailHref({
const evalDetailHref = evalResultPath(runId, row.testId, {
projectId,
runId,
evalId: row.testId,
resultDir: row.result.result_dir,
evalPath: row.result.eval_path,
});
const title = selectedTrialPath ? `${row.testId} · ${selectedTrialPath}` : row.testId;
const showAggregateRepeatDetail = repeatGroup && !selectedTrial;
Expand All @@ -934,7 +934,7 @@ function ResultDetailPanel({
</h4>
<p className="mt-1 truncate text-xs text-gray-500" title={row.targetLabel}>
{row.targetLabel}
{row.suiteLabel ? ` · ${row.suiteLabel}` : ''}
{row.evalLabel ? ` · ${row.evalLabel}` : ''}
</p>
</div>
<div className="flex shrink-0 items-center gap-2">
Expand Down Expand Up @@ -1003,19 +1003,6 @@ function ExpanderCell({
);
}

function buildEvalDetailHref(options: {
projectId?: string;
runId: string;
evalId: string;
resultDir?: string;
}): string {
const base = options.projectId
? `/projects/${encodeURIComponent(options.projectId)}/evals/${encodeURIComponent(options.runId)}/${encodeURIComponent(options.evalId)}`
: `/evals/${encodeURIComponent(options.runId)}/${encodeURIComponent(options.evalId)}`;
if (!options.resultDir) return base;
return `${base}?result_dir=${encodeURIComponent(options.resultDir)}`;
}

function scrollPanelIntoView(panel: HTMLElement | null) {
if (!panel) return;
window.requestAnimationFrame(() => {
Expand Down
68 changes: 52 additions & 16 deletions apps/dashboard/src/components/Sidebar.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,18 @@ import {
useRunList,
useStudioConfig,
} from '~/lib/api';
import { shouldShowSuiteLabels } from '~/lib/run-detail-context';
import {
evalResultIdentityKey,
evalResultSearchParams,
matchesEvalResultIdentity,
} from '~/lib/navigation';
import { shouldShowEvalSourceLabels } from '~/lib/run-detail-context';
import { formatRunDisplay } from '~/lib/run-label';
import { useSidebarContext } from '~/lib/sidebar-context';
import type { EvalResult } from '~/lib/types';

import { BrandName } from './BrandName';
import { EvalSuiteLabel } from './EvalSuiteLabel';
import { EvalSourceLabel } from './EvalSourceLabel';

/** Responsive <aside> wrapper. Handles mobile overlay and desktop static placement. */
function SidebarShell({ children }: { children: ReactNode }) {
Expand Down Expand Up @@ -104,11 +109,11 @@ function SidebarRunText({ display }: { display: ReturnType<typeof formatRunDispl
function EvalSidebarItemContent({
result,
passThreshold,
showSuiteLabel,
showEvalSourceLabel,
}: {
result: EvalResult;
passThreshold: number;
showSuiteLabel: boolean;
showEvalSourceLabel: boolean;
}) {
const passed = isPassing(result.score, passThreshold);

Expand All @@ -119,14 +124,23 @@ function EvalSidebarItemContent({
</span>
<span className="min-w-0 flex-1">
<span className="block truncate">{result.testId}</span>
{showSuiteLabel ? (
<EvalSuiteLabel suite={result.suite} className="mt-1 max-w-full text-[11px] leading-4" />
{showEvalSourceLabel ? (
<EvalSourceLabel result={result} className="mt-1 max-w-full text-[11px] leading-4" />
) : null}
</span>
</>
);
}

function useCurrentEvalIdentitySearch() {
const location = useLocation();
const search = location.search as Record<string, string | undefined>;
return {
resultDir: search.result_dir,
evalPath: search.eval_path,
};
}

type ProjectTabId = 'runs' | 'experiments' | 'analytics' | 'targets';

const projectNavItems: { id: ProjectTabId; label: string; description: string }[] = [
Expand Down Expand Up @@ -409,8 +423,9 @@ function RunDetailSidebar({ currentRunId }: { currentRunId: string }) {
function EvalSidebar({ runId, currentEvalId }: { runId: string; currentEvalId: string }) {
const { data } = useRunDetail(runId);
const { data: config } = useStudioConfig();
const currentIdentity = useCurrentEvalIdentitySearch();
const passThreshold = config?.threshold ?? config?.pass_threshold ?? 0.8;
const showSuiteLabels = shouldShowSuiteLabels(data?.results ?? []);
const showEvalSourceLabels = shouldShowEvalSourceLabels(data?.results ?? []);

return (
<SidebarShell>
Expand All @@ -434,13 +449,18 @@ function EvalSidebar({ runId, currentEvalId }: { runId: string; currentEvalId: s
</div>

{data?.results.map((result) => {
const isActive = result.testId === currentEvalId;
const search = evalResultSearchParams({
resultDir: result.result_dir,
evalPath: result.eval_path,
});
const isActive = matchesEvalResultIdentity(result, currentEvalId, currentIdentity);

return (
<Link
key={result.testId}
key={evalResultIdentityKey(result)}
to="/evals/$runId/$evalId"
params={{ runId, evalId: result.testId }}
search={search}
className={`mb-0.5 flex items-start gap-2 rounded-md px-2 py-1.5 text-sm transition-colors ${
isActive
? 'bg-gray-800 text-cyan-400'
Expand All @@ -450,7 +470,7 @@ function EvalSidebar({ runId, currentEvalId }: { runId: string; currentEvalId: s
<EvalSidebarItemContent
result={result}
passThreshold={passThreshold}
showSuiteLabel={showSuiteLabels}
showEvalSourceLabel={showEvalSourceLabels}
/>
</Link>
);
Expand Down Expand Up @@ -490,12 +510,17 @@ function SuiteSidebar({ runId, suite }: { runId: string; suite: string }) {

{suiteResults.map((result) => {
const passed = isPassing(result.score, passThreshold);
const search = evalResultSearchParams({
resultDir: result.result_dir,
evalPath: result.eval_path,
});

return (
<Link
key={result.testId}
key={evalResultIdentityKey(result)}
to="/evals/$runId/$evalId"
params={{ runId, evalId: result.testId }}
search={search}
className="mb-0.5 flex items-center gap-2 rounded-md px-2 py-1.5 text-sm text-gray-400 transition-colors hover:bg-gray-800/50 hover:text-gray-200"
>
<span className={`text-xs ${passed ? 'text-emerald-400' : 'text-red-400'}`}>
Expand Down Expand Up @@ -609,8 +634,9 @@ function ProjectEvalSidebar({
}) {
const { data } = useProjectRunDetail(projectId, runId);
const { data: config } = useStudioConfig(projectId);
const currentIdentity = useCurrentEvalIdentitySearch();
const passThreshold = config?.threshold ?? config?.pass_threshold ?? 0.8;
const showSuiteLabels = shouldShowSuiteLabels(data?.results ?? []);
const showEvalSourceLabels = shouldShowEvalSourceLabels(data?.results ?? []);

return (
<SidebarShell>
Expand All @@ -632,12 +658,17 @@ function ProjectEvalSidebar({
Evaluations
</div>
{data?.results.map((result) => {
const isActive = result.testId === currentEvalId;
const search = evalResultSearchParams({
resultDir: result.result_dir,
evalPath: result.eval_path,
});
const isActive = matchesEvalResultIdentity(result, currentEvalId, currentIdentity);
return (
<Link
key={result.testId}
key={evalResultIdentityKey(result)}
to="/projects/$projectId/evals/$runId/$evalId"
params={{ projectId, runId, evalId: result.testId }}
search={search}
className={`mb-0.5 flex items-start gap-2 rounded-md px-2 py-1.5 text-sm transition-colors ${
isActive
? 'bg-gray-800 text-cyan-400'
Expand All @@ -647,7 +678,7 @@ function ProjectEvalSidebar({
<EvalSidebarItemContent
result={result}
passThreshold={passThreshold}
showSuiteLabel={showSuiteLabels}
showEvalSourceLabel={showEvalSourceLabels}
/>
</Link>
);
Expand Down Expand Up @@ -693,11 +724,16 @@ function ProjectSuiteSidebar({
</div>
{suiteResults.map((result) => {
const passed = isPassing(result.score, passThreshold);
const search = evalResultSearchParams({
resultDir: result.result_dir,
evalPath: result.eval_path,
});
return (
<Link
key={result.testId}
key={evalResultIdentityKey(result)}
to="/projects/$projectId/evals/$runId/$evalId"
params={{ projectId, runId, evalId: result.testId }}
search={search}
className="mb-0.5 flex items-center gap-2 rounded-md px-2 py-1.5 text-sm text-gray-400 transition-colors hover:bg-gray-800/50 hover:text-gray-200"
>
<span className={`text-xs ${passed ? 'text-emerald-400' : 'text-red-400'}`}>
Expand Down
44 changes: 44 additions & 0 deletions apps/dashboard/src/lib/navigation.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,13 @@ import { describe, expect, it } from 'bun:test';
import {
categoryPath,
evalPath,
evalResultIdentityKey,
evalResultPath,
evalResultSearchParams,
experimentPath,
initialProjectRedirectStorageKey,
jobPath,
matchesEvalResultIdentity,
resolveIndexRoute,
resolveInitialProjectRedirect,
runPath,
Expand Down Expand Up @@ -58,6 +62,15 @@ describe('route path helpers', () => {
expect(evalPath('run::1', 'case/a', 'demo project')).toBe(
'/projects/demo%20project/evals/run%3A%3A1/case%2Fa',
);
expect(
evalResultPath('run::1', 'case/a', {
projectId: 'demo project',
resultDir: 'evals/auth.eval.yaml/case-a',
evalPath: 'evals/auth.eval.yaml',
}),
).toBe(
'/projects/demo%20project/evals/run%3A%3A1/case%2Fa?result_dir=evals%2Fauth.eval.yaml%2Fcase-a',
);
expect(jobPath('job/1', 'demo project')).toBe('/projects/demo%20project/jobs/job%2F1');
expect(categoryPath('run::1', 'Safety > PII', 'demo project')).toBe(
'/projects/demo%20project/runs/run%3A%3A1/category/Safety%20%3E%20PII',
Expand All @@ -74,11 +87,42 @@ describe('route path helpers', () => {
it('keeps unscoped paths for legacy single-project routes', () => {
expect(runPath('run::1')).toBe('/runs/run%3A%3A1');
expect(evalPath('run::1', 'case/a')).toBe('/evals/run%3A%3A1/case%2Fa');
expect(evalResultPath('run::1', 'case/a', { evalPath: 'evals/smoke.eval.yaml' })).toBe(
'/evals/run%3A%3A1/case%2Fa?eval_path=evals%2Fsmoke.eval.yaml',
);
expect(jobPath('job/1')).toBe('/jobs/job%2F1');
expect(categoryPath('run::1', 'Safety')).toBe('/runs/run%3A%3A1/category/Safety');
expect(suitePath('run::1', 'evals/smoke.eval.yaml')).toBe(
'/runs/run%3A%3A1/suite/evals%2Fsmoke.eval.yaml',
);
expect(runsHomePath()).toBe('/?tab=runs');
});

it('prefers result_dir over eval_path for eval result query identity', () => {
expect(
evalResultSearchParams({
resultDir: 'opaque/case',
evalPath: 'evals/smoke.eval.yaml',
}),
).toEqual({ result_dir: 'opaque/case' });
expect(evalResultSearchParams({ evalPath: 'evals/smoke.eval.yaml' })).toEqual({
eval_path: 'evals/smoke.eval.yaml',
});
});

it('matches and keys eval results by result_dir before eval_path', () => {
const result = {
testId: 'shared',
target: 'codex',
result_dir: 'opaque/shared',
eval_path: 'evals/auth.eval.yaml',
};

expect(evalResultIdentityKey(result)).toBe('opaque/shared');
expect(matchesEvalResultIdentity(result, 'shared', { resultDir: 'opaque/shared' })).toBe(true);
expect(matchesEvalResultIdentity(result, 'shared', { resultDir: 'other/shared' })).toBe(false);
expect(matchesEvalResultIdentity(result, 'shared', { evalPath: 'evals/auth.eval.yaml' })).toBe(
true,
);
});
});
Loading
Loading