From 995181359ec86952a409595366397f53c19cd626 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Wed, 1 Jul 2026 15:09:40 +0200 Subject: [PATCH 1/2] bench: add arithmetic operation micro-benchmarks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An op-level tier alongside the whole-model builds: one benchmark per (operation, size profile), operands built outside the measured region so a run isolates a single op rather than a whole build. This attributes perf changes to a specific arithmetic path — a build benchmark says "kvl got heavier", an op benchmark says "expr+expr broadcast got heavier". - benchmarks/ops.py: op registry (OpSpec) + size profiles (small 1D×2000; large 3D×3×4×1000 — differ in element count *and* dim count; the asymmetric shape also catches dim-order bugs) + ~30 ops across scaling, var/expr arithmetic, quadratic, reductions and constraint construction. Binary labelled ops carry match/broadcast variants — the alignment-path axis where the interesting regressions live. - benchmarks/drivers/test_ops.py: parametrized driver, one benchmark per (op, profile). - conftest: add test_ops to CODSPEED_MODULES (tracked; memory advisory). 60 benchmarks, ~80s/run with memory. Signal validates: large ≈ 6× small, broadcast ≈ 5× match (the §9 cross-product). --- benchmarks/conftest.py | 1 + benchmarks/drivers/test_ops.py | 27 ++++ benchmarks/ops.py | 260 +++++++++++++++++++++++++++++++++ 3 files changed, 288 insertions(+) create mode 100644 benchmarks/drivers/test_ops.py create mode 100644 benchmarks/ops.py diff --git a/benchmarks/conftest.py b/benchmarks/conftest.py index b9ef6014..58cbdd3d 100644 --- a/benchmarks/conftest.py +++ b/benchmarks/conftest.py @@ -20,6 +20,7 @@ "test_build", "test_to_lp", "test_to_solver", + "test_ops", ) diff --git a/benchmarks/drivers/test_ops.py b/benchmarks/drivers/test_ops.py new file mode 100644 index 00000000..774bd5a8 --- /dev/null +++ b/benchmarks/drivers/test_ops.py @@ -0,0 +1,27 @@ +""" +Arithmetic operation micro-benchmarks. + +One benchmark per ``(operation, size profile)`` — the operands are built in +setup (not measured) and the fixture measures a single ``op(*operands)``. See +:mod:`benchmarks.ops` for the op registry and the size / alignment axes. +""" + +from __future__ import annotations + +from collections.abc import Callable + +import pytest + +from benchmarks.ops import OpSpec, Profile, iter_op_params + +_CASES = iter_op_params() + + +@pytest.mark.parametrize( + "op, profile", + _CASES, + ids=[f"{op.name}[{profile.key}]" for op, profile in _CASES], +) +def test_op(benchmark: Callable[..., object], op: OpSpec, profile: Profile) -> None: + operands = op.setup(profile) + benchmark(op.op, *operands) diff --git a/benchmarks/ops.py b/benchmarks/ops.py new file mode 100644 index 00000000..483e4315 --- /dev/null +++ b/benchmarks/ops.py @@ -0,0 +1,260 @@ +""" +Registry of arithmetic *operation* micro-benchmarks. + +Where :mod:`benchmarks.registry` benchmarks whole model builds, this benchmarks +single operations — ``var * array``, ``expr + expr``, ``expr <= c`` — with the +operands built *outside* the measured region, so a run isolates one op rather +than a whole build. That granularity attributes regressions to a specific path +(a whole-build benchmark says "kvl got heavier"; an op benchmark says "expr+expr +broadcast got heavier"). + +Two axes beyond the op itself: + +- **size profile** — ``small`` (1-D, 2000) is the cheap time/shape signal; + ``large`` (3-D, 3×4×1000) carries the memory signal and exercises multi-dim + broadcast/alignment. They differ in element count *and* dimensionality on + purpose; the asymmetric 3×4×1000 also catches dim-order/transpose bugs. +- **alignment** — for binary labelled ops, ``match`` (identical coords, the fast + path) vs ``broadcast`` (an extra dim → §9 cross-product). Both are where the + alignment-path regressions live, so they're first-class, not incidental. +""" + +from __future__ import annotations + +from collections.abc import Callable +from dataclasses import dataclass + +import numpy as np +import pandas as pd +import xarray as xr + +import linopy + +# --- size profiles ---------------------------------------------------------- + + +@dataclass(frozen=True) +class Profile: + """A benchmark size: named dimensions and their lengths.""" + + key: str + dims: tuple[str, ...] + shape: tuple[int, ...] + + @property + def size(self) -> int: + return int(np.prod(self.shape)) + + +SMALL = Profile("small", ("d0",), (2000,)) +LARGE = Profile("large", ("d0", "d1", "d2"), (3, 4, 1000)) +PROFILES: tuple[Profile, ...] = (SMALL, LARGE) + +# a broadcast operand always adds this one extra dim (kept small so the +# cross-product stays cheap while still exercising the broadcast path) +EXTRA_DIM = "b" +EXTRA_LEN = 5 + + +# --- operand builders (run in setup, never measured) ------------------------ + + +def _coords(dims: tuple[str, ...], shape: tuple[int, ...]) -> dict[str, pd.Index]: + return {d: pd.RangeIndex(n, name=d) for d, n in zip(dims, shape)} + + +def var(profile: Profile, name: str = "x") -> linopy.Variable: + """A variable spanning the profile's dimensions.""" + m = linopy.Model() + return m.add_variables( + coords=list(_coords(profile.dims, profile.shape).values()), + dims=list(profile.dims), + name=name, + ) + + +def array(profile: Profile) -> xr.DataArray: + """A coefficient array matching the profile's dims (the ``match`` case).""" + return xr.DataArray( + np.linspace(-1.0, 1.0, profile.size).reshape(profile.shape), + dims=list(profile.dims), + coords=_coords(profile.dims, profile.shape), + ) + + +def extra_array(_: Profile) -> xr.DataArray: + """An array on a *new* dim — broadcasting it introduces that dim (§9).""" + return xr.DataArray( + np.linspace(1.0, 2.0, EXTRA_LEN), + dims=[EXTRA_DIM], + coords={EXTRA_DIM: pd.RangeIndex(EXTRA_LEN, name=EXTRA_DIM)}, + ) + + +def extra_var(profile: Profile, name: str = "z") -> linopy.Variable: + """A variable on a *new* dim — for var+var broadcast.""" + m = linopy.Model() + return m.add_variables( + coords=[pd.RangeIndex(EXTRA_LEN, name=EXTRA_DIM)], dims=[EXTRA_DIM], name=name + ) + + +def expr(profile: Profile) -> linopy.LinearExpression: + """A linear expression spanning the profile's dims (coeffs vary).""" + return array(profile) * var(profile) + + +# --- op registry ------------------------------------------------------------ + + +@dataclass(frozen=True) +class OpSpec: + """One operation benchmark: build operands, then measure ``op(*operands)``.""" + + name: str + group: str + setup: Callable[[Profile], tuple] + op: Callable[..., object] + profiles: tuple[str, ...] = ("small", "large") + + +OP_REGISTRY: dict[str, OpSpec] = {} + + +def register_op( + name: str, + group: str, + setup: Callable[[Profile], tuple], + op: Callable[..., object], + profiles: tuple[str, ...] = ("small", "large"), +) -> None: + if name in OP_REGISTRY: + raise ValueError(f"op {name!r} already registered") + OP_REGISTRY[name] = OpSpec(name, group, setup, op, profiles) + + +def iter_op_params() -> list[tuple[OpSpec, Profile]]: + """``(op, profile)`` pairs — the pytest parametrize source.""" + by_key = {p.key: p for p in PROFILES} + return [(op, by_key[key]) for op in OP_REGISTRY.values() for key in op.profiles] + + +# --- the operations --------------------------------------------------------- +# Binary labelled ops register a `match` and a `broadcast` variant; the +# alignment case is baked into the operands the setup builds. + +# scaling / construction +register_op("var_mul_scalar", "scale", lambda p: (var(p),), lambda x: 2.0 * x) +register_op("var_div_scalar", "scale", lambda p: (var(p),), lambda x: x / 2.0) +register_op("var_neg", "scale", lambda p: (var(p),), lambda x: -x) +register_op("var_to_linexpr", "scale", lambda p: (var(p),), lambda x: 1 * x) +register_op( + "var_mul_array_match", "scale", lambda p: (var(p), array(p)), lambda x, a: a * x +) +register_op( + "var_mul_array_bcast", + "scale", + lambda p: (var(p), extra_array(p)), + lambda x, a: a * x, +) + +# variable arithmetic +register_op("var_add_scalar", "var_arith", lambda p: (var(p),), lambda x: x + 2.0) +register_op( + "var_add_array_match", "var_arith", lambda p: (var(p), array(p)), lambda x, a: x + a +) +register_op( + "var_add_array_bcast", + "var_arith", + lambda p: (var(p), extra_array(p)), + lambda x, a: x + a, +) +register_op( + "var_add_var_match", + "var_arith", + lambda p: (var(p, "x"), var(p, "y")), + lambda x, y: x + y, +) +register_op( + "var_add_var_bcast", + "var_arith", + lambda p: (var(p, "x"), extra_var(p)), + lambda x, z: x + z, +) +register_op( + "var_sub_var_match", + "var_arith", + lambda p: (var(p, "x"), var(p, "y")), + lambda x, y: x - y, +) + +# quadratic +register_op( + "var_mul_var", "quad", lambda p: (var(p, "x"), var(p, "y")), lambda x, y: x * y +) +register_op( + "expr_mul_var", "quad", lambda p: (expr(p), var(p, "y")), lambda e, y: e * y +) + +# expression arithmetic +register_op("expr_add_scalar", "expr_arith", lambda p: (expr(p),), lambda e: e + 2.0) +register_op( + "expr_add_array_match", + "expr_arith", + lambda p: (expr(p), array(p)), + lambda e, a: e + a, +) +register_op( + "expr_add_array_bcast", + "expr_arith", + lambda p: (expr(p), extra_array(p)), + lambda e, a: e + a, +) +register_op( + "expr_add_var", "expr_arith", lambda p: (expr(p), var(p, "y")), lambda e, y: e + y +) +register_op( + "expr_add_expr_match", + "expr_arith", + lambda p: (expr(p), expr(p)), + lambda a, b: a + b, +) +register_op( + "expr_add_expr_bcast", + "expr_arith", + lambda p: (expr(p), extra_array(p) * var(p)), + lambda a, b: a + b, +) +register_op( + "expr_sub_expr_match", + "expr_arith", + lambda p: (expr(p), expr(p)), + lambda a, b: a - b, +) +register_op("expr_mul_scalar", "expr_arith", lambda p: (expr(p),), lambda e: 2.0 * e) +register_op( + "expr_mul_array_match", + "expr_arith", + lambda p: (expr(p), array(p)), + lambda e, a: a * e, +) +register_op( + "expr_mul_array_bcast", + "expr_arith", + lambda p: (expr(p), extra_array(p)), + lambda e, a: a * e, +) + +# reductions (sum over d0, present in both profiles) +register_op("var_sum_dim", "reduce", lambda p: (var(p),), lambda x: x.sum("d0")) +register_op("expr_sum_dim", "reduce", lambda p: (expr(p),), lambda e: e.sum("d0")) +register_op("expr_sum_all", "reduce", lambda p: (expr(p),), lambda e: e.sum()) + +# constraint construction +register_op("con_le_scalar", "constraint", lambda p: (expr(p),), lambda e: e <= 2.0) +register_op( + "con_le_array", "constraint", lambda p: (expr(p), array(p)), lambda e, a: e <= a +) +register_op( + "con_eq_expr", "constraint", lambda p: (expr(p), expr(p)), lambda a, b: a == b +) From a99bcb484a8d2ebf2724476ffeee4a9a91d1ec6f Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Wed, 1 Jul 2026 15:19:30 +0200 Subject: [PATCH 2/2] bench(ops): single 3-D profile; add masking/groupby/merge ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Collapse to one 3-D profile (3×4×1000, ~12 K elements) — CodSpeed records time *and* memory per benchmark, so a second size wasn't buying a separate signal; one multi-dim profile keeps broadcast/alignment coverage with MB-scale ops above the noise floor, and halves the matrix. Benchmark ids drop the size suffix. Add three categories: absence/masking (expr.where / fillna / absence propagation — §4–§7, the semantics-heavy surface), groupby.sum, and an N-way merge (constraint-assembly cost). 35 ops, ~45 s/run with memory. --- benchmarks/drivers/test_ops.py | 14 ++---- benchmarks/ops.py | 83 ++++++++++++++++++++++++++-------- 2 files changed, 68 insertions(+), 29 deletions(-) diff --git a/benchmarks/drivers/test_ops.py b/benchmarks/drivers/test_ops.py index 774bd5a8..230dd8b7 100644 --- a/benchmarks/drivers/test_ops.py +++ b/benchmarks/drivers/test_ops.py @@ -12,16 +12,12 @@ import pytest -from benchmarks.ops import OpSpec, Profile, iter_op_params +from benchmarks.ops import GRID, OpSpec, iter_ops -_CASES = iter_op_params() +_OPS = iter_ops() -@pytest.mark.parametrize( - "op, profile", - _CASES, - ids=[f"{op.name}[{profile.key}]" for op, profile in _CASES], -) -def test_op(benchmark: Callable[..., object], op: OpSpec, profile: Profile) -> None: - operands = op.setup(profile) +@pytest.mark.parametrize("op", _OPS, ids=[op.name for op in _OPS]) +def test_op(benchmark: Callable[..., object], op: OpSpec) -> None: + operands = op.setup(GRID) benchmark(op.op, *operands) diff --git a/benchmarks/ops.py b/benchmarks/ops.py index 483e4315..152ff3ab 100644 --- a/benchmarks/ops.py +++ b/benchmarks/ops.py @@ -8,15 +8,16 @@ (a whole-build benchmark says "kvl got heavier"; an op benchmark says "expr+expr broadcast got heavier"). -Two axes beyond the op itself: - -- **size profile** — ``small`` (1-D, 2000) is the cheap time/shape signal; - ``large`` (3-D, 3×4×1000) carries the memory signal and exercises multi-dim - broadcast/alignment. They differ in element count *and* dimensionality on - purpose; the asymmetric 3×4×1000 also catches dim-order/transpose bugs. -- **alignment** — for binary labelled ops, ``match`` (identical coords, the fast - path) vs ``broadcast`` (an extra dim → §9 cross-product). Both are where the - alignment-path regressions live, so they're first-class, not incidental. +One 3-D size profile (``3×4×1000``, ~12 K elements): multi-dim so it exercises +broadcast/alignment across dims; ~MB-scale ops sit above the memory-measurement +noise floor; the asymmetric shape catches dim-order/transpose bugs. CodSpeed +records time *and* memory on every benchmark, so a second size isn't needed to +separate the two signals. + +The one axis beyond the op itself is **alignment** — for binary labelled ops, +``match`` (identical coords, the fast path) vs ``broadcast`` (an extra dim → §9 +cross-product). That's where the alignment-path regressions live, so it's +first-class, not incidental. """ from __future__ import annotations @@ -46,9 +47,7 @@ def size(self) -> int: return int(np.prod(self.shape)) -SMALL = Profile("small", ("d0",), (2000,)) -LARGE = Profile("large", ("d0", "d1", "d2"), (3, 4, 1000)) -PROFILES: tuple[Profile, ...] = (SMALL, LARGE) +GRID = Profile("grid", ("d0", "d1", "d2"), (3, 4, 1000)) # a broadcast operand always adds this one extra dim (kept small so the # cross-product stays cheap while still exercising the broadcast path) @@ -104,6 +103,27 @@ def expr(profile: Profile) -> linopy.LinearExpression: return array(profile) * var(profile) +def cond(profile: Profile) -> xr.DataArray: + """A boolean mask over the profile's dims (~half the slots).""" + return array(profile) > 0.0 + + +def masked_expr(profile: Profile) -> linopy.LinearExpression: + """An expression carrying absence (§4) — masked in place.""" + return expr(profile).where(cond(profile)) + + +def grouped_expr(profile: Profile) -> linopy.LinearExpression: + """An expression with a coarse ``g`` group coord on the last dim (8 groups).""" + last, n = profile.dims[-1], profile.shape[-1] + g = xr.DataArray( + np.arange(n) * 8 // n, + dims=[last], + coords={last: pd.RangeIndex(n, name=last)}, + ) + return expr(profile).assign_coords(g=g) + + # --- op registry ------------------------------------------------------------ @@ -115,7 +135,6 @@ class OpSpec: group: str setup: Callable[[Profile], tuple] op: Callable[..., object] - profiles: tuple[str, ...] = ("small", "large") OP_REGISTRY: dict[str, OpSpec] = {} @@ -126,17 +145,15 @@ def register_op( group: str, setup: Callable[[Profile], tuple], op: Callable[..., object], - profiles: tuple[str, ...] = ("small", "large"), ) -> None: if name in OP_REGISTRY: raise ValueError(f"op {name!r} already registered") - OP_REGISTRY[name] = OpSpec(name, group, setup, op, profiles) + OP_REGISTRY[name] = OpSpec(name, group, setup, op) -def iter_op_params() -> list[tuple[OpSpec, Profile]]: - """``(op, profile)`` pairs — the pytest parametrize source.""" - by_key = {p.key: p for p in PROFILES} - return [(op, by_key[key]) for op in OP_REGISTRY.values() for key in op.profiles] +def iter_ops() -> list[OpSpec]: + """Every registered op — the pytest parametrize source.""" + return list(OP_REGISTRY.values()) # --- the operations --------------------------------------------------------- @@ -245,7 +262,7 @@ def iter_op_params() -> list[tuple[OpSpec, Profile]]: lambda e, a: a * e, ) -# reductions (sum over d0, present in both profiles) +# reductions register_op("var_sum_dim", "reduce", lambda p: (var(p),), lambda x: x.sum("d0")) register_op("expr_sum_dim", "reduce", lambda p: (expr(p),), lambda e: e.sum("d0")) register_op("expr_sum_all", "reduce", lambda p: (expr(p),), lambda e: e.sum()) @@ -258,3 +275,29 @@ def iter_op_params() -> list[tuple[OpSpec, Profile]]: register_op( "con_eq_expr", "constraint", lambda p: (expr(p), expr(p)), lambda a, b: a == b ) + +# absence / masking (§4–§7) +register_op("expr_where", "mask", lambda p: (expr(p), cond(p)), lambda e, c: e.where(c)) +register_op("expr_fillna", "mask", lambda p: (masked_expr(p),), lambda e: e.fillna(0.0)) +register_op( + "expr_add_masked", + "mask", + lambda p: (expr(p), masked_expr(p)), + lambda a, b: a + b, +) + +# groupby +register_op( + "expr_groupby_sum", + "groupby", + lambda p: (grouped_expr(p),), + lambda e: e.groupby("g").sum(), +) + +# N-way assembly (constraint building sums many terms) +register_op( + "merge_sum", + "merge", + lambda p: tuple(expr(p) for _ in range(8)), + lambda *es: sum(es[1:], es[0]), +)