From e74dfa7ee8f6ab7fc8b79c67ffaef2073c7a9cd7 Mon Sep 17 00:00:00 2001 From: Matt Valentine-House Date: Wed, 17 Jun 2026 15:44:13 +0100 Subject: [PATCH] Improve the GC harness Summary output --- lib/benchmark_runner.rb | 17 ++- lib/benchmark_runner/cli.rb | 4 +- lib/results_table_builder.rb | 162 ++++++++++++++++++++--------- test/benchmark_runner_test.rb | 28 +++++ test/results_table_builder_test.rb | 84 +++++++++++++++ 5 files changed, 238 insertions(+), 57 deletions(-) diff --git a/lib/benchmark_runner.rb b/lib/benchmark_runner.rb index 5187486b..9f0408cc 100644 --- a/lib/benchmark_runner.rb +++ b/lib/benchmark_runner.rb @@ -48,7 +48,7 @@ def write_csv(output_path, ruby_descriptions, table) end # Build output text string with metadata, table, and legend - def build_output_text(ruby_descriptions, table, format, bench_failures, include_rss: false, include_gc: false, include_pvalue: false) + def build_output_text(ruby_descriptions, table, format, bench_failures, include_rss: false, include_gc: false, include_pvalue: false, gc_table: nil, gc_format: nil) base_name, *other_names = ruby_descriptions.keys output_str = +"" @@ -60,6 +60,11 @@ def build_output_text(ruby_descriptions, table, format, bench_failures, include_ output_str << "\n" output_str << TableFormatter.new(table, format, bench_failures).to_s + "\n" + if include_gc && gc_table && gc_format + output_str << "GC summary:\n" + output_str << TableFormatter.new(gc_table, gc_format, {}).to_s + "\n" + end + unless other_names.empty? output_str << "Legend:\n" other_names.each do |name| @@ -68,10 +73,12 @@ def build_output_text(ruby_descriptions, table, format, bench_failures, include_ if include_rss output_str << "- RSS #{base_name}/#{name}: ratio of #{base_name}/#{name} RSS. Higher is better for #{name}. Above 1 means lower memory usage.\n" end - if include_gc - output_str << "- mark #{base_name}/#{name}: ratio of GC marking time. Higher is better for #{name}. Above 1 represents faster marking.\n" - output_str << "- sweep #{base_name}/#{name}: ratio of GC sweeping time. Higher is better for #{name}. Above 1 represents faster sweeping.\n" - end + end + if include_gc && gc_table + output_str << "- GC summary compares #{base_name} → comparison. Ratio columns are #{base_name}/comparison; above 1 means the comparison spent less GC time.\n" + output_str << "- mark/iter ratio and sweep/iter ratio compare total GC phase time per benchmark iteration, so they include both per-GC cost and GC frequency changes.\n" + output_str << "- mark/GC ratio and sweep/GC ratio compare average phase time per GC, isolating whether each GC became cheaper or more expensive.\n" + output_str << "- major/iter, minor/iter, and minor GC % show #{base_name} → comparison values, not ratios. Rows with no GC activity are omitted.\n" end if include_pvalue output_str << "- ***: p < 0.001, **: p < 0.01, *: p < 0.05 (Welch's t-test)\n" diff --git a/lib/benchmark_runner/cli.rb b/lib/benchmark_runner/cli.rb index 634d207d..0b91dbbe 100644 --- a/lib/benchmark_runner/cli.rb +++ b/lib/benchmark_runner/cli.rb @@ -112,7 +112,7 @@ def run include_pvalue: args.pvalue, zjit_stats: args.zjit_stats ) - table, format = builder.build + table, format, gc_table, gc_format = builder.build output_path = BenchmarkRunner.output_path(args.out_path, out_override: args.out_override) @@ -125,7 +125,7 @@ def run BenchmarkRunner.write_csv(output_path, ruby_descriptions, table) # Save the output in a text file that we can easily refer to - output_str = BenchmarkRunner.build_output_text(ruby_descriptions, table, format, bench_failures, include_rss: args.rss, include_gc: builder.include_gc?, include_pvalue: args.pvalue) + output_str = BenchmarkRunner.build_output_text(ruby_descriptions, table, format, bench_failures, include_rss: args.rss, include_gc: builder.include_gc?, include_pvalue: args.pvalue, gc_table: gc_table, gc_format: gc_format) out_txt_path = output_path + ".txt" File.open(out_txt_path, "w") { |f| f.write output_str } diff --git a/lib/results_table_builder.rb b/lib/results_table_builder.rb index d7ac004d..ddd296b5 100644 --- a/lib/results_table_builder.rb +++ b/lib/results_table_builder.rb @@ -33,7 +33,9 @@ def build table << row end - [table, format] + gc_table = build_gc_summary_table + + [table, format, gc_table, build_gc_summary_format(gc_table)] end private @@ -49,10 +51,6 @@ def build_header header << "#{name} (ms)" header << "RSS (MiB)" if @include_rss @zjit_stats.each { |stat| header << stat } - if @include_gc - header << "#{name} mark (ms)" - header << "#{name} sweep (ms)" - end end @other_names.each do |name| @@ -72,13 +70,6 @@ def build_header end end - if @include_gc - @other_names.each do |name| - header << "mark #{@base_name}/#{name}" - header << "sweep #{@base_name}/#{name}" - end - end - header end @@ -89,10 +80,6 @@ def build_format format << "%s" format << (@rss_has_samples ? "%s" : "%.1f") if @include_rss @zjit_stats.each { format << "%s" } - if @include_gc - format << "%s" - format << "%s" - end end @other_names.each do |_name| @@ -112,14 +99,40 @@ def build_format end end - if @include_gc - @other_names.each do |_name| - format << "%s" - format << "%s" + format + end + + def build_gc_summary_table + return nil unless @include_gc && !@other_names.empty? + + rows = [["bench", *(["comparison"] if include_gc_comparison_name?), "mark/iter ratio", "sweep/iter ratio", "mark/GC ratio", "sweep/GC ratio", "major/iter", "minor/iter", "minor GC %"]] + + @bench_names.each do |bench_name| + next unless has_complete_data?(bench_name) + + marking_times = extract_gc_times(bench_name, 'gc_marking_time_bench') + sweeping_times = extract_gc_times(bench_name, 'gc_sweeping_time_bench') + major_counts = extract_gc_times(bench_name, 'gc_major_count_bench') + minor_counts = extract_gc_times(bench_name, 'gc_minor_count_bench') + base_mark, *other_marks = marking_times + base_sweep, *other_sweeps = sweeping_times + base_major, *other_majors = major_counts + base_minor, *other_minors = minor_counts + + @other_names.each_with_index do |name, i| + next unless gc_activity?(base_mark, other_marks[i], base_sweep, other_sweeps[i], base_major, other_majors[i], base_minor, other_minors[i]) + + rows << gc_summary_row(bench_name, name, base_mark, other_marks[i], base_sweep, other_sweeps[i], base_major, other_majors[i], base_minor, other_minors[i]) end end - format + rows.size == 1 ? nil : rows + end + + def build_gc_summary_format(gc_table) + return nil unless gc_table + + Array.new(gc_table.first.size, "%s") end def build_row(bench_name) @@ -140,42 +153,26 @@ def build_row(bench_name) [stat, extract_zjit_stat(bench_name, stat)] end - if @include_gc - marking_times = extract_gc_times(bench_name, 'gc_marking_time_bench') - sweeping_times = extract_gc_times(bench_name, 'gc_sweeping_time_bench') - base_mark, *other_marks = marking_times - base_sweep, *other_sweeps = sweeping_times - end - row = [bench_name] - build_base_columns(row, base_t, base_rss_cell, zjit_stat_values, 0, base_mark, base_sweep) - build_comparison_columns(row, other_ts, other_rss_cells, zjit_stat_values, other_marks, other_sweeps) + build_base_columns(row, base_t, base_rss_cell, zjit_stat_values, 0) + build_comparison_columns(row, other_ts, other_rss_cells, zjit_stat_values) build_ratio_columns(row, base_t0, other_t0s, base_t, other_ts) build_rss_ratio_columns(row, base_rss, other_rsss) - build_gc_ratio_columns(row, base_mark, other_marks, base_sweep, other_sweeps) row end - def build_base_columns(row, base_t, base_rss, zjit_stat_values, exe_index, base_mark, base_sweep) + def build_base_columns(row, base_t, base_rss, zjit_stat_values, exe_index) row << format_time_with_stddev(base_t) row << base_rss if @include_rss zjit_stat_values.each { |_stat, values| row << format_stat(values[exe_index]) } - if @include_gc - row << format_time_with_stddev(base_mark) - row << format_time_with_stddev(base_sweep) - end end - def build_comparison_columns(row, other_ts, other_rss_cells, zjit_stat_values, other_marks, other_sweeps) + def build_comparison_columns(row, other_ts, other_rss_cells, zjit_stat_values) other_ts.each_with_index do |other_t, i| row << format_time_with_stddev(other_t) row << other_rss_cells[i] if @include_rss zjit_stat_values.each { |_stat, values| row << format_stat(values[i + 1]) } - if @include_gc - row << format_time_with_stddev(other_marks[i]) - row << format_time_with_stddev(other_sweeps[i]) - end end end @@ -211,15 +208,77 @@ def build_rss_ratio_columns(row, base_rss, other_rsss) end end - def build_gc_ratio_columns(row, base_mark, other_marks, base_sweep, other_sweeps) - return unless @include_gc + def include_gc_comparison_name? + @other_names.size > 1 + end - (other_marks || []).each do |other_mark| - row << gc_ratio(base_mark, other_mark) - end - (other_sweeps || []).each do |other_sweep| - row << gc_ratio(base_sweep, other_sweep) - end + def gc_summary_row(bench_name, name, base_mark, other_mark, base_sweep, other_sweep, base_major, other_major, base_minor, other_minor) + row = [bench_name] + row << name if include_gc_comparison_name? + row.concat([ + gc_ratio(base_mark, other_mark), + gc_ratio(base_sweep, other_sweep), + scalar_ratio(gc_time_per_gc(base_mark, base_major, base_minor), gc_time_per_gc(other_mark, other_major, other_minor)), + scalar_ratio(gc_time_per_gc(base_sweep, base_major, base_minor), gc_time_per_gc(other_sweep, other_major, other_minor)), + gc_count_cell(base_major, other_major), + gc_count_cell(base_minor, other_minor), + gc_minor_percent_cell(base_major, base_minor, other_major, other_minor), + ]) + row + end + + def gc_time_per_gc(time, major, minor) + return nil if time.nil? || time.empty? || major.nil? || major.empty? || minor.nil? || minor.empty? + + count = mean(major) + mean(minor) + return nil if count == 0.0 + + mean(time) / count + end + + def gc_activity?(*series) + series.any? { |values| values && values.sum > 0.0 } + end + + def gc_count_cell(base, other) + "%4s → %4s" % [format_gc_series_mean(base), format_gc_series_mean(other)] + end + + def gc_minor_percent_cell(base_major, base_minor, other_major, other_minor) + "%4s → %4s" % [format_gc_percent(gc_minor_percent(base_major, base_minor)), format_gc_percent(gc_minor_percent(other_major, other_minor))] + end + + def gc_minor_percent(major, minor) + return nil if major.nil? || major.empty? || minor.nil? || minor.empty? + + total = major.sum + minor.sum + return nil if total == 0.0 + + minor.sum.to_f / total + end + + def format_gc_series_mean(values) + return "N/A" if values.nil? || values.empty? + + "%.1f" % mean(values) + end + + def format_gc_scalar(value) + return "N/A" if value.nil? + + "%.3f" % value + end + + def format_gc_percent(value) + return "N/A" if value.nil? + + "%.0f%%" % (100.0 * value) + end + + def scalar_ratio(base, other) + return "N/A" if base.nil? || other.nil? || other == 0.0 + + format_ratio(base / other, nil) end def gc_ratio(base, other) @@ -358,7 +417,10 @@ def stddev(values) end def stddev_percent(values) - 100 * stddev(values) / mean(values) + values_mean = mean(values) + return 0.0 if values_mean == 0.0 + + 100 * stddev(values) / values_mean end def compute_bench_names diff --git a/test/benchmark_runner_test.rb b/test/benchmark_runner_test.rb index e475ed7f..80dee5d7 100644 --- a/test/benchmark_runner_test.rb +++ b/test/benchmark_runner_test.rb @@ -409,6 +409,34 @@ assert_includes result, "- ***: p < 0.001, **: p < 0.01, *: p < 0.05 (Welch's t-test)" end + it 'prints compact GC comparison table and legend when include_gc is true' do + ruby_descriptions = { + 'ruby-base' => 'ruby 3.3.0', + 'ruby-exp' => 'ruby 3.3.0 experiment' + } + table = [ + ['bench', 'ruby-base (ms)', 'ruby-exp (ms)', 'ruby-base/ruby-exp'], + ['fib', '100.0', '50.0', '2.000'] + ] + format = ['%s', '%s', '%s', '%s'] + gc_table = [ + ['bench', 'mark/iter ratio', 'sweep/iter ratio', 'mark/GC ratio', 'sweep/GC ratio', 'major/iter', 'minor/iter', 'minor GC %'], + ['fib', '2.000', '1.250', '1.000', '0.625', ' 2.0 → 1.0', ' 8.0 → 4.0', ' 80% → 80%'] + ] + gc_format = ['%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s'] + bench_failures = {} + + result = BenchmarkRunner.build_output_text( + ruby_descriptions, table, format, bench_failures, include_gc: true, gc_table: gc_table, gc_format: gc_format + ) + + assert_includes result, "GC summary:\n" + assert_includes result, 'mark/GC ratio' + assert_includes result, ' 2.0 → 1.0' + assert_includes result, '- GC summary compares ruby-base → comparison. Ratio columns are ruby-base/comparison; above 1 means the comparison spent less GC time.' + refute_includes result, 'mark ruby-base/ruby-exp:' + end + it 'includes RSS ratio legend when include_rss is true' do ruby_descriptions = { 'ruby-base' => 'ruby 3.3.0', diff --git a/test/results_table_builder_test.rb b/test/results_table_builder_test.rb index 303b84f5..89b5c1f3 100644 --- a/test/results_table_builder_test.rb +++ b/test/results_table_builder_test.rb @@ -550,6 +550,90 @@ end end + describe 'GC summary data' do + it 'keeps GC columns out of the main table and builds a compact GC comparison table' do + bench_data = { + 'ruby-base' => { + 'fib' => { + 'warmup' => [0.1], + 'bench' => [0.1, 0.1], + 'rss' => 10 * 1024 * 1024, + 'gc_marking_time_bench' => [20.0, 20.0], + 'gc_sweeping_time_bench' => [10.0, 10.0], + 'gc_major_count_bench' => [2, 2], + 'gc_minor_count_bench' => [8, 8] + } + }, + 'ruby-exp' => { + 'fib' => { + 'warmup' => [0.05], + 'bench' => [0.05, 0.05], + 'rss' => 12 * 1024 * 1024, + 'gc_marking_time_bench' => [15.0, 15.0], + 'gc_sweeping_time_bench' => [10.0, 10.0], + 'gc_major_count_bench' => [1, 1], + 'gc_minor_count_bench' => [4, 4] + } + } + } + + builder = ResultsTableBuilder.new( + executable_names: ['ruby-base', 'ruby-exp'], + bench_data: bench_data + ) + + table, format, gc_table, gc_format = builder.build + + assert_equal ['bench', 'ruby-base (ms)', 'ruby-exp (ms)', 'ruby-exp 1st itr', 'ruby-base/ruby-exp'], table[0] + assert_equal ['%s', '%s', '%s', '%.3f', '%s'], format + + assert_equal [ + 'bench', 'mark/iter ratio', 'sweep/iter ratio', 'mark/GC ratio', 'sweep/GC ratio', 'major/iter', 'minor/iter', 'minor GC %' + ], gc_table[0] + assert_equal ['%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s'], gc_format + assert_equal [ + 'fib', '1.333', '1.000', '0.667', '0.500', ' 2.0 → 1.0', ' 8.0 → 4.0', ' 80% → 80%' + ], gc_table[1] + end + + it 'omits benchmarks with no GC activity from the GC summary' do + bench_data = { + 'ruby-base' => { + 'fib' => { + 'warmup' => [0.1], + 'bench' => [0.1], + 'rss' => 10, + 'gc_marking_time_bench' => [0.0], + 'gc_sweeping_time_bench' => [0.0], + 'gc_major_count_bench' => [0], + 'gc_minor_count_bench' => [0] + } + }, + 'ruby-exp' => { + 'fib' => { + 'warmup' => [0.1], + 'bench' => [0.1], + 'rss' => 10, + 'gc_marking_time_bench' => [0.0], + 'gc_sweeping_time_bench' => [0.0], + 'gc_major_count_bench' => [0], + 'gc_minor_count_bench' => [0] + } + } + } + + builder = ResultsTableBuilder.new( + executable_names: ['ruby-base', 'ruby-exp'], + bench_data: bench_data + ) + + _table, _format, gc_table, gc_format = builder.build + + assert_nil gc_table + assert_nil gc_format + end + end + describe 'RSS sampling (rss_samples)' do MIB = 1024 * 1024