diff --git a/apps/cli/src/commands/results/serve.ts b/apps/cli/src/commands/results/serve.ts
index 32c64335b..9f609e831 100644
--- a/apps/cli/src/commands/results/serve.ts
+++ b/apps/cli/src/commands/results/serve.ts
@@ -63,6 +63,7 @@ import {
   getProject,
   loadConfig,
   loadProjectRegistry,
+  normalizeCategoryPath,
   normalizeTraceArtifactToTraceSessionResponse,
   omitExternalTraceMetadataKeys,
   readGitResultArtifact,
@@ -1883,30 +1884,7 @@ async function handleRunCategories(c: C, { searchDir, agentvDir, projectId }: Da
   try {
     const loaded = await loadManifestResultsForMeta(searchDir, meta, projectId);
     const { threshold: pass_threshold } = loadStudioConfig(agentvDir);
-    const categoryMap = new Map<string, { results: EvaluationResult[]; suites: Set<string> }>();
-    for (const r of loaded) {
-      const cat = r.category ?? DEFAULT_CATEGORY;
-      const entry = categoryMap.get(cat) ?? {
-        results: [],
-        suites: new Set<string>(),
-      };
-      entry.results.push(r);
-      entry.suites.add(r.suite ?? r.target ?? 'default');
-      categoryMap.set(cat, entry);
-    }
-    const categories = [...categoryMap.entries()].map(([name, entry]) => {
-      const qualitySummary = summarizeQualityResults(entry.results, pass_threshold);
-      return {
-        name,
-        total: qualitySummary.totalCount,
-        passed: qualitySummary.passedCount,
-        failed: qualitySummary.qualityFailureCount,
-        avg_score: qualitySummary.avgScore,
-        execution_error_count: qualitySummary.executionErrorCount,
-        suite_count: entry.suites.size,
-      };
-    });
-    return c.json({ categories });
+    return c.json(buildCategoryRollups(loaded, pass_threshold));
   } catch {
     return c.json({ error: 'Failed to load categories' }, 500);
   }
@@ -1920,7 +1898,10 @@ async function handleCategorySuites(c: C, { searchDir, agentvDir, projectId }: D
   try {
     const loaded = await loadManifestResultsForMeta(searchDir, meta, projectId);
     const { threshold: pass_threshold } = loadStudioConfig(agentvDir);
-    const filtered = loaded.filter((r) => (r.category ?? DEFAULT_CATEGORY) === category);
+    const selectedCategory = normalizeCategoryPath(category);
+    const filtered = loaded.filter((r) =>
+      isCategoryDescendant(categoryPathFromResult(r), selectedCategory),
+    );
     const suiteMap = new Map<string, EvaluationResult[]>();
     for (const r of filtered) {
       const ds = r.suite ?? r.target ?? 'default';
@@ -1945,6 +1926,120 @@ async function handleCategorySuites(c: C, { searchDir, agentvDir, projectId }: D
   }
 }
 
+interface CategoryRollupBucket {
+  readonly results: EvaluationResult[];
+  readonly suites: Set<string>;
+  readonly children: Set<string>;
+}
+
+interface CategoryRollupSummary {
+  readonly name: string;
+  readonly label: string;
+  readonly parent?: string;
+  readonly depth: number;
+  readonly total: number;
+  readonly passed: number;
+  readonly failed: number;
+  readonly avg_score: number;
+  readonly execution_error_count: number;
+  readonly suite_count: number;
+  readonly child_count: number;
+  readonly children?: CategoryRollupSummary[];
+}
+
+function categoryPathFromResult(result: EvaluationResult): string {
+  return normalizeCategoryPath(result.category ?? DEFAULT_CATEGORY);
+}
+
+function categoryPrefixes(category: string): string[] {
+  const parts = category.split('/').filter((part) => part.length > 0);
+  if (parts.length === 0) return [DEFAULT_CATEGORY];
+  return parts.map((_, index) => parts.slice(0, index + 1).join('/'));
+}
+
+function categoryParent(category: string): string | undefined {
+  const parts = category.split('/');
+  return parts.length > 1 ? parts.slice(0, -1).join('/') : undefined;
+}
+
+function categoryLabel(category: string): string {
+  return category.split('/').at(-1) ?? category;
+}
+
+function isCategoryDescendant(category: string, selectedCategory: string): boolean {
+  return category === selectedCategory || category.startsWith(`${selectedCategory}/`);
+}
+
+function summarizeCategoryBucket(
+  name: string,
+  entry: CategoryRollupBucket,
+  passThreshold: number,
+): CategoryRollupSummary {
+  const qualitySummary = summarizeQualityResults(entry.results, passThreshold);
+  const parent = categoryParent(name);
+  return {
+    name,
+    label: categoryLabel(name),
+    ...(parent && { parent }),
+    depth: name.split('/').filter(Boolean).length - 1,
+    total: qualitySummary.totalCount,
+    passed: qualitySummary.passedCount,
+    failed: qualitySummary.qualityFailureCount,
+    avg_score: qualitySummary.avgScore,
+    execution_error_count: qualitySummary.executionErrorCount,
+    suite_count: entry.suites.size,
+    child_count: entry.children.size,
+  };
+}
+
+function buildCategoryRollups(
+  results: readonly EvaluationResult[],
+  passThreshold: number,
+): { categories: CategoryRollupSummary[]; category_tree: CategoryRollupSummary[] } {
+  const categoryMap = new Map<string, CategoryRollupBucket>();
+  const ensureEntry = (name: string): CategoryRollupBucket => {
+    const existing = categoryMap.get(name);
+    if (existing) return existing;
+    const created = { results: [], suites: new Set<string>(), children: new Set<string>() };
+    categoryMap.set(name, created);
+    return created;
+  };
+
+  for (const result of results) {
+    const category = categoryPathFromResult(result);
+    const suite = result.suite ?? result.target ?? 'default';
+    const prefixes = categoryPrefixes(category);
+    for (const prefix of prefixes) {
+      const entry = ensureEntry(prefix);
+      entry.results.push(result);
+      entry.suites.add(suite);
+    }
+    for (let index = 1; index < prefixes.length; index++) {
+      ensureEntry(prefixes[index - 1]).children.add(prefixes[index]);
+    }
+  }
+
+  const categories = [...categoryMap.entries()]
+    .map(([name, entry]) => summarizeCategoryBucket(name, entry, passThreshold))
+    .sort((a, b) => a.name.localeCompare(b.name));
+
+  const summariesByName = new Map(categories.map((summary) => [summary.name, summary]));
+  const buildTreeNode = (summary: CategoryRollupSummary): CategoryRollupSummary => {
+    const children = [...(categoryMap.get(summary.name)?.children ?? [])]
+      .map((childName) => summariesByName.get(childName))
+      .filter((child): child is CategoryRollupSummary => Boolean(child))
+      .sort((a, b) => a.name.localeCompare(b.name))
+      .map(buildTreeNode);
+    return children.length > 0 ? { ...summary, children } : summary;
+  };
+  const categoryTree = categories
+    .filter((summary) => !summary.parent)
+    .sort((a, b) => a.name.localeCompare(b.name))
+    .map(buildTreeNode);
+
+  return { categories, category_tree: categoryTree };
+}
+
 async function handleEvalDetail(c: C, { searchDir, projectId }: DataContext) {
   const filename = c.req.param('filename') ?? '';
   const evalId = c.req.param('evalId') ?? '';
@@ -2449,7 +2544,7 @@ async function handleCompare(c: C, { searchDir, agentvDir, projectId }: DataCont
         }
         entry.tests.push({
           test_id: r.testId,
-          ...(r.category && { category: r.category }),
+          ...(r.category && { category: normalizeCategoryPath(r.category) }),
           score: r.score,
           passed,
           execution_status: r.executionStatus,
@@ -2459,7 +2554,7 @@ async function handleCompare(c: C, { searchDir, agentvDir, projectId }: DataCont
         // Per-run accumulation. Dedupe tests within the run by last-wins.
         runTestMap.set(r.testId, {
           test_id: r.testId,
-          ...(r.category && { category: r.category }),
+          ...(r.category && { category: normalizeCategoryPath(r.category) }),
           score: r.score,
           passed,
           execution_status: r.executionStatus,
diff --git a/apps/cli/test/commands/results/serve.test.ts b/apps/cli/test/commands/results/serve.test.ts
index fab72ba75..3e0295fcd 100644
--- a/apps/cli/test/commands/results/serve.test.ts
+++ b/apps/cli/test/commands/results/serve.test.ts
@@ -1457,17 +1457,113 @@ describe('serve app', () => {
           suite_count: number;
         }>;
       };
-      expect(categoriesData.categories).toEqual([
-        {
-          name: 'runtime',
-          total: 3,
-          passed: 1,
-          failed: 1,
-          avg_score: 0.75,
-          execution_error_count: 1,
-          suite_count: 1,
-        },
-      ]);
+      expect(categoriesData.categories).toHaveLength(1);
+      expect(categoriesData.categories[0]).toMatchObject({
+        name: 'runtime',
+        total: 3,
+        passed: 1,
+        failed: 1,
+        avg_score: 0.75,
+        execution_error_count: 1,
+        suite_count: 1,
+      });
+    });
+
+    it('returns hierarchical category rollups and descendant category drilldown', async () => {
+      const runsDir = localResultsExperimentDir(tempDir);
+      mkdirSync(runsDir, { recursive: true });
+      const filename = '2026-03-25T10-30-00-000Z';
+      const runDir = path.join(runsDir, filename);
+      mkdirSync(runDir, { recursive: true });
+      writeFileSync(
+        path.join(runDir, 'index.jsonl'),
+        toJsonl(
+          {
+            ...RESULT_A,
+            test_id: 'network-pass',
+            suite: 'network-suite',
+            category: 'security/network',
+            score: 1,
+          },
+          {
+            ...RESULT_B,
+            test_id: 'security-fail',
+            suite: 'root-suite',
+            category: 'security',
+            score: 0,
+          },
+          {
+            ...RESULT_A,
+            test_id: 'flat-pass',
+            suite: 'legacy-suite',
+            category: 'legacy-flat',
+            score: 1,
+          },
+        ),
+      );
+
+      const app = createApp([], tempDir, tempDir, undefined, { studioDir });
+
+      const categoriesRes = await app.request(`/api/runs/${filename}/categories`);
+      expect(categoriesRes.status).toBe(200);
+      const categoriesData = (await categoriesRes.json()) as {
+        categories: Array<{
+          name: string;
+          parent?: string;
+          total: number;
+          passed: number;
+          failed: number;
+          child_count?: number;
+        }>;
+        category_tree?: Array<{ name: string; children?: Array<{ name: string }> }>;
+      };
+
+      expect(categoriesData.categories).toEqual(
+        expect.arrayContaining([
+          expect.objectContaining({
+            name: 'security',
+            total: 2,
+            passed: 1,
+            failed: 1,
+            child_count: 1,
+          }),
+          expect.objectContaining({
+            name: 'security/network',
+            parent: 'security',
+            total: 1,
+            passed: 1,
+            failed: 0,
+          }),
+          expect.objectContaining({
+            name: 'legacy-flat',
+            total: 1,
+            passed: 1,
+            failed: 0,
+          }),
+        ]),
+      );
+      expect(categoriesData.category_tree).toEqual(
+        expect.arrayContaining([
+          expect.objectContaining({
+            name: 'security',
+            children: [expect.objectContaining({ name: 'security/network' })],
+          }),
+        ]),
+      );
+
+      const suitesRes = await app.request(
+        `/api/runs/${filename}/categories/${encodeURIComponent('security')}/suites`,
+      );
+      expect(suitesRes.status).toBe(200);
+      const suitesData = (await suitesRes.json()) as {
+        suites: Array<{ name: string; total: number }>;
+      };
+      expect(suitesData.suites).toEqual(
+        expect.arrayContaining([
+          expect.objectContaining({ name: 'network-suite', total: 1 }),
+          expect.objectContaining({ name: 'root-suite', total: 1 }),
+        ]),
+      );
     });
 
     it('infers the experiment name from the run id when live results have not written it yet', async () => {
diff --git a/apps/dashboard/src/components/RunDetail.tsx b/apps/dashboard/src/components/RunDetail.tsx
index 1f20c2950..944f7a8cb 100644
--- a/apps/dashboard/src/components/RunDetail.tsx
+++ b/apps/dashboard/src/components/RunDetail.tsx
@@ -22,9 +22,9 @@ import { Link } from '@tanstack/react-router';
 import type { EvalResult } from '~/lib/types';
 
 import { useRunLog, useStudioConfig } from '~/lib/api';
+import { type CategoryTreeNode, buildCategoryTree } from '~/lib/category-tree';
 import { findPhoenixExternalTraceUrl } from '~/lib/external-trace-link';
 import { summarizeQuality } from '~/lib/result-summary';
-import { formatCategoryDisplay } from '~/lib/run-detail-context';
 
 import { PassRatePill } from './PassRatePill';
 import { ResultTable } from './ResultTable';
@@ -36,91 +36,21 @@ interface RunDetailProps {
   projectId?: string;
 }
 
-interface SuiteStats {
-  name: string;
-  passed: number;
-  failed: number;
-  executionErrors: number;
-  total: number;
-  avgScore: number;
-}
-
-interface CategoryGroup {
-  name: string;
-  displayName: string;
-  mutedDisplayName?: string;
-  suites: SuiteStats[];
-  total: number;
-  passed: number;
-  failed: number;
-  executionErrors: number;
-  avgScore: number;
-}
-
-function buildCategoryGroups(results: EvalResult[], passThreshold: number): CategoryGroup[] {
-  const categoryMap = new Map<string, Map<string, EvalResult[]>>();
-
-  for (const r of results) {
-    const cat = r.category ?? 'Uncategorized';
-    const ds = r.suite ?? 'Uncategorized';
-    if (!categoryMap.has(cat)) categoryMap.set(cat, new Map());
-    // biome-ignore lint/style/noNonNullAssertion: map entry guaranteed by line above
-    const dsMap = categoryMap.get(cat)!;
-    const entry = dsMap.get(ds) ?? [];
-    entry.push(r);
-    dsMap.set(ds, entry);
-  }
-
-  return Array.from(categoryMap.entries())
-    .map(([catName, dsMap]) => {
-      const suites = Array.from(dsMap.entries())
-        .map(([dsName, suiteResults]) => {
-          const stats = summarizeQuality(suiteResults, passThreshold);
-          return {
-            name: dsName,
-            passed: stats.passed,
-            failed: stats.failed,
-            executionErrors: stats.executionErrors,
-            total: stats.total,
-            avgScore: stats.avgScore,
-          };
-        })
-        .sort((a, b) => a.name.localeCompare(b.name));
-
-      const total = suites.reduce((s, d) => s + d.total, 0);
-      const passed = suites.reduce((s, d) => s + d.passed, 0);
-      const failed = suites.reduce((s, d) => s + d.failed, 0);
-      const executionErrors = suites.reduce((s, d) => s + d.executionErrors, 0);
-      const qualityTotal = total - executionErrors;
-      const scoreSum = suites.reduce((s, d) => s + d.avgScore * (d.total - d.executionErrors), 0);
-
-      const display = formatCategoryDisplay(catName);
-
-      return {
-        name: catName,
-        displayName: display.label,
-        mutedDisplayName: display.mutedLabel,
-        suites,
-        total,
-        passed,
-        failed,
-        executionErrors,
-        avgScore: qualityTotal > 0 ? scoreSum / qualityTotal : 0,
-      };
-    })
-    .sort((a, b) => a.name.localeCompare(b.name));
-}
-
 export function RunDetail({ results, runId, projectId }: RunDetailProps) {
   const { data: config } = useStudioConfig(projectId);
   const passThreshold = config?.threshold ?? config?.pass_threshold ?? 0.8;
+  const [expandedCategories, setExpandedCategories] = useState<Record<string, boolean>>({});
   const phoenixUrl = findPhoenixExternalTraceUrl(results);
 
   const total = results.length;
   const summary = summarizeQuality(results, passThreshold);
   const totalCost = results.reduce((sum, r) => sum + (r.costUsd ?? 0), 0);
 
-  const categories = buildCategoryGroups(results, passThreshold);
+  const categoryTree = buildCategoryTree(results, passThreshold);
+  const visibleCategories = visibleCategoryRows(categoryTree, expandedCategories);
+  const toggleCategory = (category: string) => {
+    setExpandedCategories((current) => ({ ...current, [category]: !current[category] }));
+  };
 
   if (total === 0) {
     return (
@@ -166,43 +96,59 @@ export function RunDetail({ results, runId, projectId }: RunDetailProps) {
               </tr>
             </thead>
             <tbody className="divide-y divide-gray-800/50">
-              {categories.map((cat) => {
-                const label = (
-                  <span className="flex min-w-0 items-baseline gap-2">
-                    <span className="truncate">{cat.displayName}</span>
-                    {cat.mutedDisplayName ? (
-                      <span
-                        className="truncate text-xs font-normal text-gray-500"
-                        title={cat.mutedDisplayName}
-                      >
-                        {cat.mutedDisplayName}
-                      </span>
-                    ) : null}
-                  </span>
-                );
-
+              {visibleCategories.map((cat) => {
+                const expanded = expandedCategories[cat.name] === true;
                 return (
                   <tr key={cat.name} className="transition-colors hover:bg-gray-900/30">
                     <td className="w-[18rem] max-w-[18rem] px-4 py-2.5 font-medium text-gray-200">
-                      {projectId ? (
-                        <Link
-                          to="/projects/$projectId/runs/$runId/category/$category"
-                          params={{ projectId, runId, category: cat.name }}
-                          className="flex min-w-0 text-cyan-400 hover:text-cyan-300 hover:underline"
-                          title={cat.mutedDisplayName ?? cat.displayName}
-                        >
-                          {label}
-                        </Link>
-                      ) : (
-                        <Link
-                          to="/runs/$runId/category/$category"
-                          params={{ runId, category: cat.name }}
-                          className="flex min-w-0 text-cyan-400 hover:text-cyan-300 hover:underline"
-                          title={cat.mutedDisplayName ?? cat.displayName}
-                        >
-                          {label}
-                        </Link>
-                      )}
+                      <span className="flex min-w-0 items-center gap-2">
+                        <span
+                          className="inline-block h-4 shrink-0"
+                          style={{ width: `${cat.depth * 16}px` }}
+                        />
+                        {cat.childCount > 0 ? (
+                          <button
+                            type="button"
+                            className="flex h-5 w-5 shrink-0 items-center justify-center rounded border border-gray-700 text-xs text-gray-400 hover:border-gray-600 hover:text-gray-200"
+                            onClick={() => toggleCategory(cat.name)}
+                            aria-label={`${expanded ? 'Collapse' : 'Expand'} ${cat.name}`}
+                            aria-expanded={expanded}
+                          >
+                            {expanded ? '-' : '+'}
+                          </button>
+                        ) : (
+                          <span className="h-5 w-5 shrink-0" />
+                        )}
+                        {projectId ? (
+                          <Link
+                            to="/projects/$projectId/runs/$runId/category/$category"
+                            params={{ projectId, runId, category: cat.name }}
+                            className="min-w-0 truncate text-cyan-400 hover:text-cyan-300 hover:underline"
+                            title={cat.name}
+                          >
+                            {cat.label}
+                          </Link>
+                        ) : (
+                          <Link
+                            to="/runs/$runId/category/$category"
+                            params={{ runId, category: cat.name }}
+                            className="min-w-0 truncate text-cyan-400 hover:text-cyan-300 hover:underline"
+                            title={cat.name}
+                          >
+                            {cat.label}
+                          </Link>
+                        )}
+                        {cat.depth > 0 ? (
+                          <span className="truncate text-xs font-normal text-gray-500">
+                            {cat.name}
+                          </span>
+                        ) : null}
+                        {cat.childCount > 0 ? (
+                          <span className="shrink-0 text-xs font-normal text-gray-500">
+                            {cat.childCount}
+                          </span>
+                        ) : null}
+                      </span>
                     </td>
                     <td className="px-4 py-2.5">
                       <PassRatePill
@@ -250,6 +196,16 @@ export function RunDetail({ results, runId, projectId }: RunDetailProps) {
   );
 }
 
+function visibleCategoryRows(
+  nodes: readonly CategoryTreeNode[],
+  expanded: Record<string, boolean>,
+): CategoryTreeNode[] {
+  return nodes.flatMap((node) => [
+    node,
+    ...(expanded[node.name] ? visibleCategoryRows(node.children, expanded) : []),
+  ]);
+}
+
 function ExternalTraceLink({ href }: { href?: string }) {
   if (!href) return null;
 
diff --git a/apps/dashboard/src/lib/category-tree.test.ts b/apps/dashboard/src/lib/category-tree.test.ts
new file mode 100644
index 000000000..bfd60eb07
--- /dev/null
+++ b/apps/dashboard/src/lib/category-tree.test.ts
@@ -0,0 +1,70 @@
+import { describe, expect, it } from 'bun:test';
+
+import { buildCategoryTree, flattenCategoryTree, normalizeCategoryPath } from './category-tree';
+import type { EvalResult } from './types';
+
+function result(overrides: Partial<EvalResult>): EvalResult {
+  return {
+    testId: overrides.testId ?? 'case',
+    suite: overrides.suite ?? 'suite',
+    category: overrides.category,
+    score: overrides.score ?? 1,
+    ...overrides,
+  };
+}
+
+describe('category tree model', () => {
+  it('builds parent rollups from slash-delimited category metadata', () => {
+    const tree = buildCategoryTree(
+      [
+        result({ testId: 'network-pass', category: 'security/network', score: 1 }),
+        result({ testId: 'security-fail', category: 'security', score: 0 }),
+        result({ testId: 'quality-pass', category: 'quality/regression', score: 0.9 }),
+      ],
+      0.8,
+    );
+
+    const nodes = flattenCategoryTree(tree);
+    const security = nodes.find((node) => node.name === 'security');
+    const network = nodes.find((node) => node.name === 'security/network');
+
+    expect(tree.map((node) => node.name)).toEqual(['quality', 'security']);
+    expect(security).toMatchObject({
+      name: 'security',
+      label: 'security',
+      total: 2,
+      passed: 1,
+      failed: 1,
+      childCount: 1,
+    });
+    expect(network).toMatchObject({
+      name: 'security/network',
+      label: 'network',
+      parent: 'security',
+      depth: 1,
+      total: 1,
+      passed: 1,
+    });
+  });
+
+  it('preserves existing flat categories as one-node paths', () => {
+    const tree = buildCategoryTree(
+      [result({ testId: 'flat', category: 'Safety > PII', score: 0.5 })],
+      0.8,
+    );
+
+    expect(tree).toHaveLength(1);
+    expect(tree[0]).toMatchObject({
+      name: 'Safety > PII',
+      label: 'Safety > PII',
+      total: 1,
+      failed: 1,
+      children: [],
+    });
+  });
+
+  it('canonicalizes explicit slash category strings', () => {
+    expect(normalizeCategoryPath(' security / network ')).toBe('security/network');
+    expect(normalizeCategoryPath('security\\network')).toBe('security/network');
+  });
+});
diff --git a/apps/dashboard/src/lib/category-tree.ts b/apps/dashboard/src/lib/category-tree.ts
new file mode 100644
index 000000000..22ede49c4
--- /dev/null
+++ b/apps/dashboard/src/lib/category-tree.ts
@@ -0,0 +1,134 @@
+import { summarizeQuality } from './result-summary';
+import type { EvalResult } from './types';
+
+export const DEFAULT_CATEGORY = 'Uncategorized';
+
+export interface CategoryTreeNode {
+  name: string;
+  label: string;
+  parent?: string;
+  depth: number;
+  total: number;
+  passed: number;
+  failed: number;
+  executionErrors: number;
+  avgScore: number;
+  suiteCount: number;
+  childCount: number;
+  children: CategoryTreeNode[];
+}
+
+interface CategoryBucket {
+  results: EvalResult[];
+  suites: Set<string>;
+  children: Set<string>;
+}
+
+export function normalizeCategoryPath(category: string | undefined): string {
+  const normalized = category
+    ?.replace(/\\/g, '/')
+    .split('/')
+    .map((part) => part.trim())
+    .filter((part) => part.length > 0)
+    .join('/');
+  return normalized && normalized.length > 0 ? normalized : DEFAULT_CATEGORY;
+}
+
+export function buildCategoryTree(
+  results: readonly EvalResult[],
+  passThreshold: number,
+): CategoryTreeNode[] {
+  const buckets = new Map<string, CategoryBucket>();
+  const ensureBucket = (name: string): CategoryBucket => {
+    const existing = buckets.get(name);
+    if (existing) return existing;
+    const created = { results: [], suites: new Set<string>(), children: new Set<string>() };
+    buckets.set(name, created);
+    return created;
+  };
+
+  for (const result of results) {
+    const category = normalizeCategoryPath(result.category);
+    const suite = result.suite ?? 'Uncategorized';
+    const prefixes = categoryPrefixes(category);
+    for (const prefix of prefixes) {
+      const bucket = ensureBucket(prefix);
+      bucket.results.push(result);
+      bucket.suites.add(suite);
+    }
+    for (let index = 1; index < prefixes.length; index++) {
+      ensureBucket(prefixes[index - 1]).children.add(prefixes[index]);
+    }
+  }
+
+  const nodeByName = new Map(
+    [...buckets.entries()].map(([name, bucket]) => [
+      name,
+      summarizeCategoryBucket(name, bucket, passThreshold),
+    ]),
+  );
+
+  return [...nodeByName.values()]
+    .filter((node) => !node.parent)
+    .sort(compareCategoryNodes)
+    .map((node) => attachChildren(node, buckets, nodeByName));
+}
+
+export function flattenCategoryTree(nodes: readonly CategoryTreeNode[]): CategoryTreeNode[] {
+  return nodes.flatMap((node) => [node, ...flattenCategoryTree(node.children)]);
+}
+
+function categoryPrefixes(category: string): string[] {
+  const parts = category.split('/').filter((part) => part.length > 0);
+  if (parts.length === 0) return [DEFAULT_CATEGORY];
+  return parts.map((_, index) => parts.slice(0, index + 1).join('/'));
+}
+
+function categoryParent(category: string): string | undefined {
+  const parts = category.split('/');
+  return parts.length > 1 ? parts.slice(0, -1).join('/') : undefined;
+}
+
+function categoryLabel(category: string): string {
+  return category.split('/').at(-1) ?? category;
+}
+
+function summarizeCategoryBucket(
+  name: string,
+  bucket: CategoryBucket,
+  passThreshold: number,
+): CategoryTreeNode {
+  const summary = summarizeQuality(bucket.results, passThreshold);
+  const parent = categoryParent(name);
+  return {
+    name,
+    label: categoryLabel(name),
+    ...(parent && { parent }),
+    depth: name.split('/').filter(Boolean).length - 1,
+    total: summary.total,
+    passed: summary.passed,
+    failed: summary.failed,
+    executionErrors: summary.executionErrors,
+    avgScore: summary.avgScore,
+    suiteCount: bucket.suites.size,
+    childCount: bucket.children.size,
+    children: [],
+  };
+}
+
+function attachChildren(
+  node: CategoryTreeNode,
+  buckets: Map<string, CategoryBucket>,
+  nodeByName: Map<string, CategoryTreeNode>,
+): CategoryTreeNode {
+  const children = [...(buckets.get(node.name)?.children ?? [])]
+    .map((childName) => nodeByName.get(childName))
+    .filter((child): child is CategoryTreeNode => Boolean(child))
+    .sort(compareCategoryNodes)
+    .map((child) => attachChildren(child, buckets, nodeByName));
+  return { ...node, children };
+}
+
+function compareCategoryNodes(first: CategoryTreeNode, second: CategoryTreeNode): number {
+  return first.name.localeCompare(second.name);
+}
diff --git a/apps/dashboard/src/lib/score-distribution.test.ts b/apps/dashboard/src/lib/score-distribution.test.ts
index 6e45f91c6..bba65b3be 100644
--- a/apps/dashboard/src/lib/score-distribution.test.ts
+++ b/apps/dashboard/src/lib/score-distribution.test.ts
@@ -91,6 +91,67 @@ describe('buildScoreDistributionModel', () => {
     ]);
   });
 
+  it('treats parent category filters as descendant rollups from category metadata', () => {
+    const data = compareFixture();
+    if (data.runs) {
+      data.runs[0].tests = [
+        {
+          test_id: 'network',
+          category: 'security/network',
+          score: 0.45,
+          passed: false,
+        },
+        {
+          test_id: 'application',
+          category: 'security/application',
+          score: 0.85,
+          passed: true,
+        },
+      ];
+    }
+
+    const model = buildScoreDistributionModel(data, filters({ category: 'security' }), NOW);
+
+    expect(model.categoryOptions).toEqual(
+      expect.arrayContaining([
+        { value: 'security', label: 'security', count: 2 },
+        { value: 'security/application', label: 'security/application', count: 1 },
+        { value: 'security/network', label: 'security/network', count: 1 },
+      ]),
+    );
+    expect(model.filteredScores).toBe(2);
+  });
+
+  it('does not derive category metadata from eval paths', () => {
+    const data = {
+      experiments: ['exp-a'],
+      targets: ['gpt-4o'],
+      cells: [
+        {
+          experiment: 'exp-a',
+          target: 'gpt-4o',
+          eval_count: 1,
+          passed_count: 1,
+          pass_rate: 1,
+          avg_score: 1,
+          tests: [
+            {
+              test_id: 'path-only',
+              eval_path: 'security/network.eval.yaml',
+              score: 1,
+              passed: true,
+            },
+          ],
+        },
+      ],
+    } as unknown as CompareResponse;
+
+    const model = buildScoreDistributionModel(data, filters({ category: 'security' }), NOW);
+
+    expect(model.categoryAvailable).toBe(false);
+    expect(model.filteredScores).toBe(0);
+  });
+
   it('returns empty buckets when no scores match the selected slice', () => {
     const model = buildScoreDistributionModel(
       compareFixture(),
diff --git a/apps/dashboard/src/lib/score-distribution.ts b/apps/dashboard/src/lib/score-distribution.ts
index efd43db3d..dfa2fe1bc 100644
--- a/apps/dashboard/src/lib/score-distribution.ts
+++ b/apps/dashboard/src/lib/score-distribution.ts
@@ -8,6 +8,7 @@
  * metadata field is needed, then filter samples in `buildScoreDistributionModel`.
  */
 
+import { normalizeCategoryPath } from './category-tree';
 import type { CompareResponse, CompareRunEntry, CompareTestResult } from './types';
 
 export const ALL_DISTRIBUTION_FILTER_VALUE = '';
@@ -68,7 +69,9 @@ export function buildScoreDistributionModel(
 ): ScoreDistributionModel {
   const samples = collectScoreSamples(data);
   const experimentOptions = buildExperimentOptions(data, samples);
-  const categoryOptions = buildOptions(samples.flatMap((sample) => sample.category ?? []));
+  const categoryOptions = buildOptions(
+    samples.flatMap((sample) => (sample.category ? categoryPrefixes(sample.category) : [])),
+  );
   const categoryAvailable = categoryOptions.length > 0;
   const hasTimestampedScores = samples.some((sample) => sample.startedAtMs !== undefined);
   const activePeriod =
@@ -79,7 +82,7 @@ export function buildScoreDistributionModel(
 
   const filtered = samples.filter((sample) => {
     if (filters.experiment && sample.experiment !== filters.experiment) return false;
-    if (filters.category && sample.category !== filters.category) return false;
+    if (filters.category && !isCategoryDescendant(sample.category, filters.category)) return false;
     if (windowStartMs !== undefined) {
       return sample.startedAtMs !== undefined && sample.startedAtMs >= windowStartMs;
     }
@@ -174,7 +177,19 @@ function buildBuckets(scores: number[]): ScoreDistributionBucket[] {
 
 function normalizeCategory(value: string | undefined): string | undefined {
   const trimmed = value?.trim();
-  return trimmed ? trimmed : undefined;
+  return trimmed ? normalizeCategoryPath(trimmed) : undefined;
+}
+
+function categoryPrefixes(category: string): string[] {
+  const parts = category.split('/').filter((part) => part.length > 0);
+  return parts.map((_, index) => parts.slice(0, index + 1).join('/'));
+}
+
+function isCategoryDescendant(category: string | undefined, selectedCategory: string): boolean {
+  return (
+    category !== undefined &&
+    (category === selectedCategory || category.startsWith(`${selectedCategory}/`))
+  );
 }
 
 function parseTimestamp(value: string): number | undefined {
diff --git a/apps/dashboard/src/lib/types.ts b/apps/dashboard/src/lib/types.ts
index f928a3ea9..ea5084dea 100644
--- a/apps/dashboard/src/lib/types.ts
+++ b/apps/dashboard/src/lib/types.ts
@@ -502,16 +502,22 @@ export interface FileContentResponse {
 
 export interface CategorySummary {
   name: string;
+  label?: string;
+  parent?: string;
+  depth?: number;
   total: number;
   passed: number;
   failed: number;
   avg_score: number;
   execution_error_count?: number;
   suite_count: number;
+  child_count?: number;
+  children?: CategorySummary[];
 }
 
 export interface CategoriesResponse {
   categories: CategorySummary[];
+  category_tree?: CategorySummary[];
 }
 
 export interface StudioConfigResponse {
diff --git a/apps/web/src/content/docs/docs/evaluation/eval-files.mdx b/apps/web/src/content/docs/docs/evaluation/eval-files.mdx
index 5b91ebd1e..1ffdbb25e 100644
--- a/apps/web/src/content/docs/docs/evaluation/eval-files.mdx
+++ b/apps/web/src/content/docs/docs/evaluation/eval-files.mdx
@@ -109,6 +109,7 @@ tests:
 |-------|-------------|
 | `description` | Human-readable description of the evaluation |
 | `suite` | Optional suite identifier |
+| `category` | Optional slash-delimited analytics taxonomy path. Overrides the category derived from the eval file path. |
 | `experiment` | Runtime policy (`target`, `targets`, `workers`, `repeat`, `threshold`, `timeout_seconds`, `budget_usd`, etc.) |
 | `workspace` | Suite-level task environment — inline object or string path to an [external workspace file](/docs/guides/workspace-pool/#external-workspace-config). Repo entries declare identity and checkout pins; acquisition is covered in [Workspace Architecture](/docs/guides/workspace-architecture/#repo-provenance-vs-acquisition). |
 | `tests` | Array of individual tests, include entries, or a string path to an external file or directory. Tests and include entries may use scoped `run:` overrides for `threshold`, `repeat`, `timeout_seconds`, and `budget_usd`. |
@@ -154,6 +155,13 @@ tests:
     input: Screen "Acme Corp" against denied parties list
 ```
 
+When `category` is omitted, AgentV derives it from the eval file path. Generic
+filenames do not add a leaf: `security/eval.yaml` becomes `security`, and
+`security/network/dataset.eval.yaml` becomes `security/network`. A meaningful
+named eval file contributes a leaf, so `security/network.eval.yaml` becomes
+`security/network`. Existing flat category strings remain valid one-node
+category paths.
+
 ### Suite-level Assertions
 
 The `assertions` field is the canonical way to define suite-level graders. Suite-level assertions are appended to every test's graders unless a test sets `execution.skip_defaults: true`.
diff --git a/packages/core/src/evaluation/category.ts b/packages/core/src/evaluation/category.ts
index 7f4a39e5a..e09bfdde0 100644
--- a/packages/core/src/evaluation/category.ts
+++ b/packages/core/src/evaluation/category.ts
@@ -1,18 +1,52 @@
-/** Default category for eval files without subdirectory structure. */
+/** Default category for eval files without category taxonomy metadata. */
 export const DEFAULT_CATEGORY = 'Uncategorized';
 
+const GENERIC_EVAL_FILE_STEMS = new Set(['eval', 'dataset']);
+
 /**
- * Derive a human-readable category from an eval file's relative path.
+ * Canonicalize analytics category taxonomy paths.
  *
- * Strips the filename and any `evals` directory segments, then joins
- * remaining directories with `/`. Returns {@link DEFAULT_CATEGORY} for files
- * at the root level.
+ * Categories are slash-delimited analytics paths, not filesystem paths. Existing
+ * flat labels remain valid one-node paths, while repeated slash separators and
+ * surrounding whitespace are normalized for derived and explicit categories.
+ */
+export function normalizeCategoryPath(category: string | undefined): string {
+  const normalized = category
+    ?.replace(/\\/g, '/')
+    .split('/')
+    .map((part) => part.trim())
+    .filter((part) => part.length > 0)
+    .join('/');
+  return normalized && normalized.length > 0 ? normalized : DEFAULT_CATEGORY;
+}
+
+function evalFileStem(fileName: string): string {
+  return fileName.replace(/\.eval\.[^.]+$/i, '').replace(/\.[^.]+$/i, '');
+}
+
+/**
+ * Derive a canonical slash-delimited analytics category path from an eval file.
+ *
+ * Generic eval filenames such as `eval.yaml` and `dataset.eval.yaml` do not add
+ * a taxonomy leaf. Meaningful named eval files such as `network.eval.yaml` do
+ * contribute a leaf. Any `evals` directory segment is treated as organization
+ * only and is removed from the analytics taxonomy.
  */
 export function deriveCategory(relativePath: string): string {
-  const parts = relativePath.split(/[/\\]/);
-  if (parts.length <= 1) {
+  const parts = relativePath
+    .split(/[/\\]/)
+    .map((part) => part.trim())
+    .filter((part) => part.length > 0);
+  const fileName = parts.at(-1);
+  if (!fileName) {
     return DEFAULT_CATEGORY;
   }
-  const dirs = parts.slice(0, -1).filter((d) => d !== 'evals');
-  return dirs.length > 0 ? dirs.join('/') : DEFAULT_CATEGORY;
+
+  const taxonomyParts = parts.slice(0, -1).filter((part) => part !== 'evals');
+  const stem = evalFileStem(fileName).trim();
+  if (stem && !GENERIC_EVAL_FILE_STEMS.has(stem.toLowerCase())) {
+    taxonomyParts.push(stem);
+  }
+
+  return normalizeCategoryPath(taxonomyParts.join('/'));
 }
diff --git a/packages/core/src/evaluation/yaml-parser.ts b/packages/core/src/evaluation/yaml-parser.ts
index df047d03e..4d3faf794 100644
--- a/packages/core/src/evaluation/yaml-parser.ts
+++ b/packages/core/src/evaluation/yaml-parser.ts
@@ -4,6 +4,7 @@ import fg from 'fast-glob';
 import micromatch from 'micromatch';
 import { stringify as stringifyYaml } from 'yaml';
 
+import { normalizeCategoryPath } from './category.js';
 import {
   type ExperimentConfig,
   normalizeExperimentConfig,
@@ -789,10 +790,12 @@ async function loadTestsFromParsedYamlValue(
         ? (renderedCase.window_size as number)
         : undefined;
 
+    const category = normalizeCategoryPath(suite.category ?? options?.category);
+
     const testCase: EvalTest = {
       id,
       suite: suiteName,
-      category: suite.category ?? options?.category,
+      category,
       conversation_id: conversationId,
       question: question,
       input: inputMessages,
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index 141ab1d50..072bbded2 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -188,7 +188,7 @@ export {
 } from './projects.js';
 export { syncProject, syncProjects } from './project-sync.js';
 export { trimBaselineResult } from './evaluation/baseline.js';
-export { DEFAULT_CATEGORY, deriveCategory } from './evaluation/category.js';
+export { DEFAULT_CATEGORY, deriveCategory, normalizeCategoryPath } from './evaluation/category.js';
 export * from './observability/index.js';
 
 // Registry exports
diff --git a/packages/core/test/evaluation/category.test.ts b/packages/core/test/evaluation/category.test.ts
index 9b8c62d01..3dc3f70f9 100644
--- a/packages/core/test/evaluation/category.test.ts
+++ b/packages/core/test/evaluation/category.test.ts
@@ -1,27 +1,35 @@
 import { describe, expect, test } from 'bun:test';
 
-import { DEFAULT_CATEGORY, deriveCategory } from '../../src/evaluation/category.js';
+import {
+  DEFAULT_CATEGORY,
+  deriveCategory,
+  normalizeCategoryPath,
+} from '../../src/evaluation/category.js';
 
 describe('deriveCategory', () => {
   test('returns Uncategorized for single-segment path (root-level file)', () => {
     expect(deriveCategory('dataset.eval.yaml')).toBe(DEFAULT_CATEGORY);
   });
 
+  test('uses a meaningful root-level eval filename as a one-node category path', () => {
+    expect(deriveCategory('network.eval.yaml')).toBe('network');
+  });
+
   test('returns Uncategorized when only directory is evals', () => {
     expect(deriveCategory('evals/dataset.eval.yaml')).toBe(DEFAULT_CATEGORY);
   });
 
-  test('strips evals segment and returns remaining directory', () => {
-    expect(deriveCategory('evals/fundamentals/greetings.eval.yaml')).toBe('fundamentals');
+  test('strips evals segment and appends meaningful named eval files as a leaf', () => {
+    expect(deriveCategory('evals/fundamentals/greetings.eval.yaml')).toBe('fundamentals/greetings');
   });
 
-  test('preserves nested directory paths', () => {
+  test('does not append generic eval filenames to nested directory paths', () => {
     expect(deriveCategory('evals/cargowise-customs/layout-engine/eval.yaml')).toBe(
       'cargowise-customs/layout-engine',
     );
   });
 
-  test('handles paths without evals segment', () => {
+  test('handles generic filenames without evals segment', () => {
     expect(deriveCategory('examples/showcase/eval.yaml')).toBe('examples/showcase');
   });
 
@@ -38,4 +46,27 @@ describe('deriveCategory', () => {
   test('returns Uncategorized for just a filename with no directory', () => {
     expect(deriveCategory('eval.yaml')).toBe(DEFAULT_CATEGORY);
   });
+
+  test('matches the hierarchical category derivation contract', () => {
+    expect(deriveCategory('security/eval.yaml')).toBe('security');
+    expect(deriveCategory('security/network.eval.yaml')).toBe('security/network');
+    expect(deriveCategory('security/network/dataset.eval.yaml')).toBe('security/network');
+  });
+});
+
+describe('normalizeCategoryPath', () => {
+  test('canonicalizes explicit slash-delimited taxonomy paths', () => {
+    expect(normalizeCategoryPath(' security / network ')).toBe('security/network');
+    expect(normalizeCategoryPath('security//network')).toBe('security/network');
+    expect(normalizeCategoryPath('security\\network')).toBe('security/network');
+  });
+
+  test('preserves existing flat category strings as one-node paths', () => {
+    expect(normalizeCategoryPath('Safety > PII')).toBe('Safety > PII');
+  });
+
+  test('returns Uncategorized for empty explicit categories', () => {
+    expect(normalizeCategoryPath('  /  ')).toBe(DEFAULT_CATEGORY);
+    expect(normalizeCategoryPath(undefined)).toBe(DEFAULT_CATEGORY);
+  });
 });
diff --git a/packages/core/test/evaluation/yaml-parser-metadata.test.ts b/packages/core/test/evaluation/yaml-parser-metadata.test.ts
index e9653f61d..772aeed95 100644
--- a/packages/core/test/evaluation/yaml-parser-metadata.test.ts
+++ b/packages/core/test/evaluation/yaml-parser-metadata.test.ts
@@ -74,6 +74,19 @@ tests:
     expect(suite.metadata).toBeUndefined();
   });
 
+  it('uses explicit YAML category as a canonical taxonomy path override', async () => {
+    const { filePath, dir } = createTempYaml(`
+category: " security / network "
+tests:
+  - id: test-1
+    input: "Hello"
+    criteria: "Greet"
+`);
+
+    const suite = await loadTestSuite(filePath, dir, { category: 'derived/path' });
+    expect(suite.tests[0].category).toBe('security/network');
+  });
+
   it('still loads tests correctly when metadata is present', async () => {
     const { filePath, dir } = createTempYaml(`
 name: my-eval