From e74dfa7ee8f6ab7fc8b79c67ffaef2073c7a9cd7 Mon Sep 17 00:00:00 2001
From: Matt Valentine-House <matt@eightbitraptor.com>
Date: Wed, 17 Jun 2026 15:44:13 +0100
Subject: [PATCH] Improve the GC harness Summary output

---
 lib/benchmark_runner.rb            |  17 ++-
 lib/benchmark_runner/cli.rb        |   4 +-
 lib/results_table_builder.rb       | 162 ++++++++++++++++++++---------
 test/benchmark_runner_test.rb      |  28 +++++
 test/results_table_builder_test.rb |  84 +++++++++++++++
 5 files changed, 238 insertions(+), 57 deletions(-)

diff --git a/lib/benchmark_runner.rb b/lib/benchmark_runner.rb
index 5187486b..9f0408cc 100644
--- a/lib/benchmark_runner.rb
+++ b/lib/benchmark_runner.rb
@@ -48,7 +48,7 @@ def write_csv(output_path, ruby_descriptions, table)
     end
 
     # Build output text string with metadata, table, and legend
-    def build_output_text(ruby_descriptions, table, format, bench_failures, include_rss: false, include_gc: false, include_pvalue: false)
+    def build_output_text(ruby_descriptions, table, format, bench_failures, include_rss: false, include_gc: false, include_pvalue: false, gc_table: nil, gc_format: nil)
       base_name, *other_names = ruby_descriptions.keys
 
       output_str = +""
@@ -60,6 +60,11 @@ def build_output_text(ruby_descriptions, table, format, bench_failures, include_
       output_str << "\n"
       output_str << TableFormatter.new(table, format, bench_failures).to_s + "\n"
 
+      if include_gc && gc_table && gc_format
+        output_str << "GC summary:\n"
+        output_str << TableFormatter.new(gc_table, gc_format, {}).to_s + "\n"
+      end
+
       unless other_names.empty?
         output_str << "Legend:\n"
         other_names.each do |name|
@@ -68,10 +73,12 @@ def build_output_text(ruby_descriptions, table, format, bench_failures, include_
           if include_rss
             output_str << "- RSS #{base_name}/#{name}: ratio of #{base_name}/#{name} RSS. Higher is better for #{name}. Above 1 means lower memory usage.\n"
           end
-          if include_gc
-            output_str << "- mark #{base_name}/#{name}: ratio of GC marking time. Higher is better for #{name}. Above 1 represents faster marking.\n"
-            output_str << "- sweep #{base_name}/#{name}: ratio of GC sweeping time. Higher is better for #{name}. Above 1 represents faster sweeping.\n"
-          end
+        end
+        if include_gc && gc_table
+          output_str << "- GC summary compares #{base_name} → comparison. Ratio columns are #{base_name}/comparison; above 1 means the comparison spent less GC time.\n"
+          output_str << "- mark/iter ratio and sweep/iter ratio compare total GC phase time per benchmark iteration, so they include both per-GC cost and GC frequency changes.\n"
+          output_str << "- mark/GC ratio and sweep/GC ratio compare average phase time per GC, isolating whether each GC became cheaper or more expensive.\n"
+          output_str << "- major/iter, minor/iter, and minor GC % show #{base_name} → comparison values, not ratios. Rows with no GC activity are omitted.\n"
         end
         if include_pvalue
           output_str << "- ***: p < 0.001, **: p < 0.01, *: p < 0.05 (Welch's t-test)\n"
diff --git a/lib/benchmark_runner/cli.rb b/lib/benchmark_runner/cli.rb
index 634d207d..0b91dbbe 100644
--- a/lib/benchmark_runner/cli.rb
+++ b/lib/benchmark_runner/cli.rb
@@ -112,7 +112,7 @@ def run
         include_pvalue: args.pvalue,
         zjit_stats: args.zjit_stats
       )
-      table, format = builder.build
+      table, format, gc_table, gc_format = builder.build
 
       output_path = BenchmarkRunner.output_path(args.out_path, out_override: args.out_override)
 
@@ -125,7 +125,7 @@ def run
       BenchmarkRunner.write_csv(output_path, ruby_descriptions, table)
 
       # Save the output in a text file that we can easily refer to
-      output_str = BenchmarkRunner.build_output_text(ruby_descriptions, table, format, bench_failures, include_rss: args.rss, include_gc: builder.include_gc?, include_pvalue: args.pvalue)
+      output_str = BenchmarkRunner.build_output_text(ruby_descriptions, table, format, bench_failures, include_rss: args.rss, include_gc: builder.include_gc?, include_pvalue: args.pvalue, gc_table: gc_table, gc_format: gc_format)
       out_txt_path = output_path + ".txt"
       File.open(out_txt_path, "w") { |f| f.write output_str }
 
diff --git a/lib/results_table_builder.rb b/lib/results_table_builder.rb
index d7ac004d..ddd296b5 100644
--- a/lib/results_table_builder.rb
+++ b/lib/results_table_builder.rb
@@ -33,7 +33,9 @@ def build
       table << row
     end
 
-    [table, format]
+    gc_table = build_gc_summary_table
+
+    [table, format, gc_table, build_gc_summary_format(gc_table)]
   end
 
   private
@@ -49,10 +51,6 @@ def build_header
       header << "#{name} (ms)"
       header << "RSS (MiB)" if @include_rss
       @zjit_stats.each { |stat| header << stat }
-      if @include_gc
-        header << "#{name} mark (ms)"
-        header << "#{name} sweep (ms)"
-      end
     end
 
     @other_names.each do |name|
@@ -72,13 +70,6 @@ def build_header
       end
     end
 
-    if @include_gc
-      @other_names.each do |name|
-        header << "mark #{@base_name}/#{name}"
-        header << "sweep #{@base_name}/#{name}"
-      end
-    end
-
     header
   end
 
@@ -89,10 +80,6 @@ def build_format
       format << "%s"
       format << (@rss_has_samples ? "%s" : "%.1f") if @include_rss
       @zjit_stats.each { format << "%s" }
-      if @include_gc
-        format << "%s"
-        format << "%s"
-      end
     end
 
     @other_names.each do |_name|
@@ -112,14 +99,40 @@ def build_format
       end
     end
 
-    if @include_gc
-      @other_names.each do |_name|
-        format << "%s"
-        format << "%s"
+    format
+  end
+
+  def build_gc_summary_table
+    return nil unless @include_gc && !@other_names.empty?
+
+    rows = [["bench", *(["comparison"] if include_gc_comparison_name?), "mark/iter ratio", "sweep/iter ratio", "mark/GC ratio", "sweep/GC ratio", "major/iter", "minor/iter", "minor GC %"]]
+
+    @bench_names.each do |bench_name|
+      next unless has_complete_data?(bench_name)
+
+      marking_times = extract_gc_times(bench_name, 'gc_marking_time_bench')
+      sweeping_times = extract_gc_times(bench_name, 'gc_sweeping_time_bench')
+      major_counts = extract_gc_times(bench_name, 'gc_major_count_bench')
+      minor_counts = extract_gc_times(bench_name, 'gc_minor_count_bench')
+      base_mark, *other_marks = marking_times
+      base_sweep, *other_sweeps = sweeping_times
+      base_major, *other_majors = major_counts
+      base_minor, *other_minors = minor_counts
+
+      @other_names.each_with_index do |name, i|
+        next unless gc_activity?(base_mark, other_marks[i], base_sweep, other_sweeps[i], base_major, other_majors[i], base_minor, other_minors[i])
+
+        rows << gc_summary_row(bench_name, name, base_mark, other_marks[i], base_sweep, other_sweeps[i], base_major, other_majors[i], base_minor, other_minors[i])
       end
     end
 
-    format
+    rows.size == 1 ? nil : rows
+  end
+
+  def build_gc_summary_format(gc_table)
+    return nil unless gc_table
+
+    Array.new(gc_table.first.size, "%s")
   end
 
   def build_row(bench_name)
@@ -140,42 +153,26 @@ def build_row(bench_name)
       [stat, extract_zjit_stat(bench_name, stat)]
     end
 
-    if @include_gc
-      marking_times = extract_gc_times(bench_name, 'gc_marking_time_bench')
-      sweeping_times = extract_gc_times(bench_name, 'gc_sweeping_time_bench')
-      base_mark, *other_marks = marking_times
-      base_sweep, *other_sweeps = sweeping_times
-    end
-
     row = [bench_name]
-    build_base_columns(row, base_t, base_rss_cell, zjit_stat_values, 0, base_mark, base_sweep)
-    build_comparison_columns(row, other_ts, other_rss_cells, zjit_stat_values, other_marks, other_sweeps)
+    build_base_columns(row, base_t, base_rss_cell, zjit_stat_values, 0)
+    build_comparison_columns(row, other_ts, other_rss_cells, zjit_stat_values)
     build_ratio_columns(row, base_t0, other_t0s, base_t, other_ts)
     build_rss_ratio_columns(row, base_rss, other_rsss)
-    build_gc_ratio_columns(row, base_mark, other_marks, base_sweep, other_sweeps)
 
     row
   end
 
-  def build_base_columns(row, base_t, base_rss, zjit_stat_values, exe_index, base_mark, base_sweep)
+  def build_base_columns(row, base_t, base_rss, zjit_stat_values, exe_index)
     row << format_time_with_stddev(base_t)
     row << base_rss if @include_rss
     zjit_stat_values.each { |_stat, values| row << format_stat(values[exe_index]) }
-    if @include_gc
-      row << format_time_with_stddev(base_mark)
-      row << format_time_with_stddev(base_sweep)
-    end
   end
 
-  def build_comparison_columns(row, other_ts, other_rss_cells, zjit_stat_values, other_marks, other_sweeps)
+  def build_comparison_columns(row, other_ts, other_rss_cells, zjit_stat_values)
     other_ts.each_with_index do |other_t, i|
       row << format_time_with_stddev(other_t)
       row << other_rss_cells[i] if @include_rss
       zjit_stat_values.each { |_stat, values| row << format_stat(values[i + 1]) }
-      if @include_gc
-        row << format_time_with_stddev(other_marks[i])
-        row << format_time_with_stddev(other_sweeps[i])
-      end
     end
   end
 
@@ -211,15 +208,77 @@ def build_rss_ratio_columns(row, base_rss, other_rsss)
     end
   end
 
-  def build_gc_ratio_columns(row, base_mark, other_marks, base_sweep, other_sweeps)
-    return unless @include_gc
+  def include_gc_comparison_name?
+    @other_names.size > 1
+  end
 
-    (other_marks || []).each do |other_mark|
-      row << gc_ratio(base_mark, other_mark)
-    end
-    (other_sweeps || []).each do |other_sweep|
-      row << gc_ratio(base_sweep, other_sweep)
-    end
+  def gc_summary_row(bench_name, name, base_mark, other_mark, base_sweep, other_sweep, base_major, other_major, base_minor, other_minor)
+    row = [bench_name]
+    row << name if include_gc_comparison_name?
+    row.concat([
+      gc_ratio(base_mark, other_mark),
+      gc_ratio(base_sweep, other_sweep),
+      scalar_ratio(gc_time_per_gc(base_mark, base_major, base_minor), gc_time_per_gc(other_mark, other_major, other_minor)),
+      scalar_ratio(gc_time_per_gc(base_sweep, base_major, base_minor), gc_time_per_gc(other_sweep, other_major, other_minor)),
+      gc_count_cell(base_major, other_major),
+      gc_count_cell(base_minor, other_minor),
+      gc_minor_percent_cell(base_major, base_minor, other_major, other_minor),
+    ])
+    row
+  end
+
+  def gc_time_per_gc(time, major, minor)
+    return nil if time.nil? || time.empty? || major.nil? || major.empty? || minor.nil? || minor.empty?
+
+    count = mean(major) + mean(minor)
+    return nil if count == 0.0
+
+    mean(time) / count
+  end
+
+  def gc_activity?(*series)
+    series.any? { |values| values && values.sum > 0.0 }
+  end
+
+  def gc_count_cell(base, other)
+    "%4s  →  %4s" % [format_gc_series_mean(base), format_gc_series_mean(other)]
+  end
+
+  def gc_minor_percent_cell(base_major, base_minor, other_major, other_minor)
+    "%4s  →  %4s" % [format_gc_percent(gc_minor_percent(base_major, base_minor)), format_gc_percent(gc_minor_percent(other_major, other_minor))]
+  end
+
+  def gc_minor_percent(major, minor)
+    return nil if major.nil? || major.empty? || minor.nil? || minor.empty?
+
+    total = major.sum + minor.sum
+    return nil if total == 0.0
+
+    minor.sum.to_f / total
+  end
+
+  def format_gc_series_mean(values)
+    return "N/A" if values.nil? || values.empty?
+
+    "%.1f" % mean(values)
+  end
+
+  def format_gc_scalar(value)
+    return "N/A" if value.nil?
+
+    "%.3f" % value
+  end
+
+  def format_gc_percent(value)
+    return "N/A" if value.nil?
+
+    "%.0f%%" % (100.0 * value)
+  end
+
+  def scalar_ratio(base, other)
+    return "N/A" if base.nil? || other.nil? || other == 0.0
+
+    format_ratio(base / other, nil)
   end
 
   def gc_ratio(base, other)
@@ -358,7 +417,10 @@ def stddev(values)
   end
 
   def stddev_percent(values)
-    100 * stddev(values) / mean(values)
+    values_mean = mean(values)
+    return 0.0 if values_mean == 0.0
+
+    100 * stddev(values) / values_mean
   end
 
   def compute_bench_names
diff --git a/test/benchmark_runner_test.rb b/test/benchmark_runner_test.rb
index e475ed7f..80dee5d7 100644
--- a/test/benchmark_runner_test.rb
+++ b/test/benchmark_runner_test.rb
@@ -409,6 +409,34 @@
       assert_includes result, "- ***: p < 0.001, **: p < 0.01, *: p < 0.05 (Welch's t-test)"
     end
 
+    it 'prints compact GC comparison table and legend when include_gc is true' do
+      ruby_descriptions = {
+        'ruby-base' => 'ruby 3.3.0',
+        'ruby-exp' => 'ruby 3.3.0 experiment'
+      }
+      table = [
+        ['bench', 'ruby-base (ms)', 'ruby-exp (ms)', 'ruby-base/ruby-exp'],
+        ['fib', '100.0', '50.0', '2.000']
+      ]
+      format = ['%s', '%s', '%s', '%s']
+      gc_table = [
+        ['bench', 'mark/iter ratio', 'sweep/iter ratio', 'mark/GC ratio', 'sweep/GC ratio', 'major/iter', 'minor/iter', 'minor GC %'],
+        ['fib', '2.000', '1.250', '1.000', '0.625', ' 2.0  →   1.0', ' 8.0  →   4.0', ' 80%  →   80%']
+      ]
+      gc_format = ['%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s']
+      bench_failures = {}
+
+      result = BenchmarkRunner.build_output_text(
+        ruby_descriptions, table, format, bench_failures, include_gc: true, gc_table: gc_table, gc_format: gc_format
+      )
+
+      assert_includes result, "GC summary:\n"
+      assert_includes result, 'mark/GC ratio'
+      assert_includes result, ' 2.0  →   1.0'
+      assert_includes result, '- GC summary compares ruby-base → comparison. Ratio columns are ruby-base/comparison; above 1 means the comparison spent less GC time.'
+      refute_includes result, 'mark ruby-base/ruby-exp:'
+    end
+
     it 'includes RSS ratio legend when include_rss is true' do
       ruby_descriptions = {
         'ruby-base' => 'ruby 3.3.0',
diff --git a/test/results_table_builder_test.rb b/test/results_table_builder_test.rb
index 303b84f5..89b5c1f3 100644
--- a/test/results_table_builder_test.rb
+++ b/test/results_table_builder_test.rb
@@ -550,6 +550,90 @@
     end
   end
 
+  describe 'GC summary data' do
+    it 'keeps GC columns out of the main table and builds a compact GC comparison table' do
+      bench_data = {
+        'ruby-base' => {
+          'fib' => {
+            'warmup' => [0.1],
+            'bench' => [0.1, 0.1],
+            'rss' => 10 * 1024 * 1024,
+            'gc_marking_time_bench' => [20.0, 20.0],
+            'gc_sweeping_time_bench' => [10.0, 10.0],
+            'gc_major_count_bench' => [2, 2],
+            'gc_minor_count_bench' => [8, 8]
+          }
+        },
+        'ruby-exp' => {
+          'fib' => {
+            'warmup' => [0.05],
+            'bench' => [0.05, 0.05],
+            'rss' => 12 * 1024 * 1024,
+            'gc_marking_time_bench' => [15.0, 15.0],
+            'gc_sweeping_time_bench' => [10.0, 10.0],
+            'gc_major_count_bench' => [1, 1],
+            'gc_minor_count_bench' => [4, 4]
+          }
+        }
+      }
+
+      builder = ResultsTableBuilder.new(
+        executable_names: ['ruby-base', 'ruby-exp'],
+        bench_data: bench_data
+      )
+
+      table, format, gc_table, gc_format = builder.build
+
+      assert_equal ['bench', 'ruby-base (ms)', 'ruby-exp (ms)', 'ruby-exp 1st itr', 'ruby-base/ruby-exp'], table[0]
+      assert_equal ['%s', '%s', '%s', '%.3f', '%s'], format
+
+      assert_equal [
+        'bench', 'mark/iter ratio', 'sweep/iter ratio', 'mark/GC ratio', 'sweep/GC ratio', 'major/iter', 'minor/iter', 'minor GC %'
+      ], gc_table[0]
+      assert_equal ['%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s'], gc_format
+      assert_equal [
+        'fib', '1.333', '1.000', '0.667', '0.500', ' 2.0  →   1.0', ' 8.0  →   4.0', ' 80%  →   80%'
+      ], gc_table[1]
+    end
+
+    it 'omits benchmarks with no GC activity from the GC summary' do
+      bench_data = {
+        'ruby-base' => {
+          'fib' => {
+            'warmup' => [0.1],
+            'bench' => [0.1],
+            'rss' => 10,
+            'gc_marking_time_bench' => [0.0],
+            'gc_sweeping_time_bench' => [0.0],
+            'gc_major_count_bench' => [0],
+            'gc_minor_count_bench' => [0]
+          }
+        },
+        'ruby-exp' => {
+          'fib' => {
+            'warmup' => [0.1],
+            'bench' => [0.1],
+            'rss' => 10,
+            'gc_marking_time_bench' => [0.0],
+            'gc_sweeping_time_bench' => [0.0],
+            'gc_major_count_bench' => [0],
+            'gc_minor_count_bench' => [0]
+          }
+        }
+      }
+
+      builder = ResultsTableBuilder.new(
+        executable_names: ['ruby-base', 'ruby-exp'],
+        bench_data: bench_data
+      )
+
+      _table, _format, gc_table, gc_format = builder.build
+
+      assert_nil gc_table
+      assert_nil gc_format
+    end
+  end
+
   describe 'RSS sampling (rss_samples)' do
     MIB = 1024 * 1024