Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 12 additions & 5 deletions lib/benchmark_runner.rb
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def write_csv(output_path, ruby_descriptions, table)
end

# Build output text string with metadata, table, and legend
def build_output_text(ruby_descriptions, table, format, bench_failures, include_rss: false, include_gc: false, include_pvalue: false)
def build_output_text(ruby_descriptions, table, format, bench_failures, include_rss: false, include_gc: false, include_pvalue: false, gc_table: nil, gc_format: nil)
base_name, *other_names = ruby_descriptions.keys

output_str = +""
Expand All @@ -60,6 +60,11 @@ def build_output_text(ruby_descriptions, table, format, bench_failures, include_
output_str << "\n"
output_str << TableFormatter.new(table, format, bench_failures).to_s + "\n"

if include_gc && gc_table && gc_format
output_str << "GC summary:\n"
output_str << TableFormatter.new(gc_table, gc_format, {}).to_s + "\n"
end

unless other_names.empty?
output_str << "Legend:\n"
other_names.each do |name|
Expand All @@ -68,10 +73,12 @@ def build_output_text(ruby_descriptions, table, format, bench_failures, include_
if include_rss
output_str << "- RSS #{base_name}/#{name}: ratio of #{base_name}/#{name} RSS. Higher is better for #{name}. Above 1 means lower memory usage.\n"
end
if include_gc
output_str << "- mark #{base_name}/#{name}: ratio of GC marking time. Higher is better for #{name}. Above 1 represents faster marking.\n"
output_str << "- sweep #{base_name}/#{name}: ratio of GC sweeping time. Higher is better for #{name}. Above 1 represents faster sweeping.\n"
end
end
if include_gc && gc_table
output_str << "- GC summary compares #{base_name} → comparison. Ratio columns are #{base_name}/comparison; above 1 means the comparison spent less GC time.\n"
output_str << "- mark/iter ratio and sweep/iter ratio compare total GC phase time per benchmark iteration, so they include both per-GC cost and GC frequency changes.\n"
output_str << "- mark/GC ratio and sweep/GC ratio compare average phase time per GC, isolating whether each GC became cheaper or more expensive.\n"
output_str << "- major/iter, minor/iter, and minor GC % show #{base_name} → comparison values, not ratios. Rows with no GC activity are omitted.\n"
end
if include_pvalue
output_str << "- ***: p < 0.001, **: p < 0.01, *: p < 0.05 (Welch's t-test)\n"
Expand Down
4 changes: 2 additions & 2 deletions lib/benchmark_runner/cli.rb
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def run
include_pvalue: args.pvalue,
zjit_stats: args.zjit_stats
)
table, format = builder.build
table, format, gc_table, gc_format = builder.build

output_path = BenchmarkRunner.output_path(args.out_path, out_override: args.out_override)

Expand All @@ -125,7 +125,7 @@ def run
BenchmarkRunner.write_csv(output_path, ruby_descriptions, table)

# Save the output in a text file that we can easily refer to
output_str = BenchmarkRunner.build_output_text(ruby_descriptions, table, format, bench_failures, include_rss: args.rss, include_gc: builder.include_gc?, include_pvalue: args.pvalue)
output_str = BenchmarkRunner.build_output_text(ruby_descriptions, table, format, bench_failures, include_rss: args.rss, include_gc: builder.include_gc?, include_pvalue: args.pvalue, gc_table: gc_table, gc_format: gc_format)
out_txt_path = output_path + ".txt"
File.open(out_txt_path, "w") { |f| f.write output_str }

Expand Down
162 changes: 112 additions & 50 deletions lib/results_table_builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ def build
table << row
end

[table, format]
gc_table = build_gc_summary_table

[table, format, gc_table, build_gc_summary_format(gc_table)]
end

private
Expand All @@ -49,10 +51,6 @@ def build_header
header << "#{name} (ms)"
header << "RSS (MiB)" if @include_rss
@zjit_stats.each { |stat| header << stat }
if @include_gc
header << "#{name} mark (ms)"
header << "#{name} sweep (ms)"
end
end

@other_names.each do |name|
Expand All @@ -72,13 +70,6 @@ def build_header
end
end

if @include_gc
@other_names.each do |name|
header << "mark #{@base_name}/#{name}"
header << "sweep #{@base_name}/#{name}"
end
end

header
end

Expand All @@ -89,10 +80,6 @@ def build_format
format << "%s"
format << (@rss_has_samples ? "%s" : "%.1f") if @include_rss
@zjit_stats.each { format << "%s" }
if @include_gc
format << "%s"
format << "%s"
end
end

@other_names.each do |_name|
Expand All @@ -112,14 +99,40 @@ def build_format
end
end

if @include_gc
@other_names.each do |_name|
format << "%s"
format << "%s"
format
end

def build_gc_summary_table
return nil unless @include_gc && !@other_names.empty?

rows = [["bench", *(["comparison"] if include_gc_comparison_name?), "mark/iter ratio", "sweep/iter ratio", "mark/GC ratio", "sweep/GC ratio", "major/iter", "minor/iter", "minor GC %"]]

@bench_names.each do |bench_name|
next unless has_complete_data?(bench_name)

marking_times = extract_gc_times(bench_name, 'gc_marking_time_bench')
sweeping_times = extract_gc_times(bench_name, 'gc_sweeping_time_bench')
major_counts = extract_gc_times(bench_name, 'gc_major_count_bench')
minor_counts = extract_gc_times(bench_name, 'gc_minor_count_bench')
base_mark, *other_marks = marking_times
base_sweep, *other_sweeps = sweeping_times
base_major, *other_majors = major_counts
base_minor, *other_minors = minor_counts

@other_names.each_with_index do |name, i|
next unless gc_activity?(base_mark, other_marks[i], base_sweep, other_sweeps[i], base_major, other_majors[i], base_minor, other_minors[i])

rows << gc_summary_row(bench_name, name, base_mark, other_marks[i], base_sweep, other_sweeps[i], base_major, other_majors[i], base_minor, other_minors[i])
end
end

format
rows.size == 1 ? nil : rows
end

def build_gc_summary_format(gc_table)
return nil unless gc_table

Array.new(gc_table.first.size, "%s")
end

def build_row(bench_name)
Expand All @@ -140,42 +153,26 @@ def build_row(bench_name)
[stat, extract_zjit_stat(bench_name, stat)]
end

if @include_gc
marking_times = extract_gc_times(bench_name, 'gc_marking_time_bench')
sweeping_times = extract_gc_times(bench_name, 'gc_sweeping_time_bench')
base_mark, *other_marks = marking_times
base_sweep, *other_sweeps = sweeping_times
end

row = [bench_name]
build_base_columns(row, base_t, base_rss_cell, zjit_stat_values, 0, base_mark, base_sweep)
build_comparison_columns(row, other_ts, other_rss_cells, zjit_stat_values, other_marks, other_sweeps)
build_base_columns(row, base_t, base_rss_cell, zjit_stat_values, 0)
build_comparison_columns(row, other_ts, other_rss_cells, zjit_stat_values)
build_ratio_columns(row, base_t0, other_t0s, base_t, other_ts)
build_rss_ratio_columns(row, base_rss, other_rsss)
build_gc_ratio_columns(row, base_mark, other_marks, base_sweep, other_sweeps)

row
end

def build_base_columns(row, base_t, base_rss, zjit_stat_values, exe_index, base_mark, base_sweep)
def build_base_columns(row, base_t, base_rss, zjit_stat_values, exe_index)
row << format_time_with_stddev(base_t)
row << base_rss if @include_rss
zjit_stat_values.each { |_stat, values| row << format_stat(values[exe_index]) }
if @include_gc
row << format_time_with_stddev(base_mark)
row << format_time_with_stddev(base_sweep)
end
end

def build_comparison_columns(row, other_ts, other_rss_cells, zjit_stat_values, other_marks, other_sweeps)
def build_comparison_columns(row, other_ts, other_rss_cells, zjit_stat_values)
other_ts.each_with_index do |other_t, i|
row << format_time_with_stddev(other_t)
row << other_rss_cells[i] if @include_rss
zjit_stat_values.each { |_stat, values| row << format_stat(values[i + 1]) }
if @include_gc
row << format_time_with_stddev(other_marks[i])
row << format_time_with_stddev(other_sweeps[i])
end
end
end

Expand Down Expand Up @@ -211,15 +208,77 @@ def build_rss_ratio_columns(row, base_rss, other_rsss)
end
end

def build_gc_ratio_columns(row, base_mark, other_marks, base_sweep, other_sweeps)
return unless @include_gc
def include_gc_comparison_name?
@other_names.size > 1
end

(other_marks || []).each do |other_mark|
row << gc_ratio(base_mark, other_mark)
end
(other_sweeps || []).each do |other_sweep|
row << gc_ratio(base_sweep, other_sweep)
end
def gc_summary_row(bench_name, name, base_mark, other_mark, base_sweep, other_sweep, base_major, other_major, base_minor, other_minor)
row = [bench_name]
row << name if include_gc_comparison_name?
row.concat([
gc_ratio(base_mark, other_mark),
gc_ratio(base_sweep, other_sweep),
scalar_ratio(gc_time_per_gc(base_mark, base_major, base_minor), gc_time_per_gc(other_mark, other_major, other_minor)),
scalar_ratio(gc_time_per_gc(base_sweep, base_major, base_minor), gc_time_per_gc(other_sweep, other_major, other_minor)),
gc_count_cell(base_major, other_major),
gc_count_cell(base_minor, other_minor),
gc_minor_percent_cell(base_major, base_minor, other_major, other_minor),
])
row
end

def gc_time_per_gc(time, major, minor)
return nil if time.nil? || time.empty? || major.nil? || major.empty? || minor.nil? || minor.empty?

count = mean(major) + mean(minor)
return nil if count == 0.0

mean(time) / count
end

def gc_activity?(*series)
series.any? { |values| values && values.sum > 0.0 }
end

def gc_count_cell(base, other)
"%4s → %4s" % [format_gc_series_mean(base), format_gc_series_mean(other)]
end

def gc_minor_percent_cell(base_major, base_minor, other_major, other_minor)
"%4s → %4s" % [format_gc_percent(gc_minor_percent(base_major, base_minor)), format_gc_percent(gc_minor_percent(other_major, other_minor))]
end

def gc_minor_percent(major, minor)
return nil if major.nil? || major.empty? || minor.nil? || minor.empty?

total = major.sum + minor.sum
return nil if total == 0.0

minor.sum.to_f / total
end

def format_gc_series_mean(values)
return "N/A" if values.nil? || values.empty?

"%.1f" % mean(values)
end

def format_gc_scalar(value)
return "N/A" if value.nil?

"%.3f" % value
end

def format_gc_percent(value)
return "N/A" if value.nil?

"%.0f%%" % (100.0 * value)
end

def scalar_ratio(base, other)
return "N/A" if base.nil? || other.nil? || other == 0.0

format_ratio(base / other, nil)
end

def gc_ratio(base, other)
Expand Down Expand Up @@ -358,7 +417,10 @@ def stddev(values)
end

def stddev_percent(values)
100 * stddev(values) / mean(values)
values_mean = mean(values)
return 0.0 if values_mean == 0.0

100 * stddev(values) / values_mean
end

def compute_bench_names
Expand Down
28 changes: 28 additions & 0 deletions test/benchmark_runner_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,34 @@
assert_includes result, "- ***: p < 0.001, **: p < 0.01, *: p < 0.05 (Welch's t-test)"
end

it 'prints compact GC comparison table and legend when include_gc is true' do
ruby_descriptions = {
'ruby-base' => 'ruby 3.3.0',
'ruby-exp' => 'ruby 3.3.0 experiment'
}
table = [
['bench', 'ruby-base (ms)', 'ruby-exp (ms)', 'ruby-base/ruby-exp'],
['fib', '100.0', '50.0', '2.000']
]
format = ['%s', '%s', '%s', '%s']
gc_table = [
['bench', 'mark/iter ratio', 'sweep/iter ratio', 'mark/GC ratio', 'sweep/GC ratio', 'major/iter', 'minor/iter', 'minor GC %'],
['fib', '2.000', '1.250', '1.000', '0.625', ' 2.0 → 1.0', ' 8.0 → 4.0', ' 80% → 80%']
]
gc_format = ['%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s']
bench_failures = {}

result = BenchmarkRunner.build_output_text(
ruby_descriptions, table, format, bench_failures, include_gc: true, gc_table: gc_table, gc_format: gc_format
)

assert_includes result, "GC summary:\n"
assert_includes result, 'mark/GC ratio'
assert_includes result, ' 2.0 → 1.0'
assert_includes result, '- GC summary compares ruby-base → comparison. Ratio columns are ruby-base/comparison; above 1 means the comparison spent less GC time.'
refute_includes result, 'mark ruby-base/ruby-exp:'
end

it 'includes RSS ratio legend when include_rss is true' do
ruby_descriptions = {
'ruby-base' => 'ruby 3.3.0',
Expand Down
Loading
Loading