Skip to content
Open
1 change: 1 addition & 0 deletions doc/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Upcoming Version

*Other*

* Default internal integer labels to ``int32`` (configurable via ``linopy.options["label_dtype"]``, set to ``np.int64`` for the old behavior), cutting memory ~25% and speeding up model build 10-35%. Raises ``ValueError`` if labels exceed the int32 maximum.
* ``add_variables(binary=True, ...)`` now accepts ``lower``/``upper`` bounds, as long as they are 0 or 1. Previously binary bounds could only be set via the ``.lower``/``.upper`` setters after creation. (https://github.com/PyPSA/linopy/issues/776)
* ``add_piecewise_formulation`` gained an ``active_fill`` parameter that gates a partial ``active`` (defined over a subset of the indexed dimension, or masked) as always-active (``1``) or always-off (``0``); without it, a partial ``active`` — which was previously zeroed silently — now raises. Useful when one formulation mixes gated and ungated entities (e.g. committable and non-committable units sharing a ``status``). ``active_fill`` is transitional and will be removed once v1 semantics make ``active.reindex(coords).fillna(value)`` sufficient. (https://github.com/PyPSA/linopy/issues/796)

Expand Down
7 changes: 2 additions & 5 deletions linopy/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from __future__ import annotations

import operator
import os
from collections.abc import Callable, Generator, Hashable, Iterable, Sequence
from functools import cached_property, reduce, wraps
from pathlib import Path
Expand Down Expand Up @@ -159,12 +158,10 @@ def infer_schema_polars(ds: Dataset) -> dict[str, DataTypeClass]:
dict: A dictionary mapping column names to their corresponding Polars data types.
"""
schema: dict[str, DataTypeClass] = {}
np_major_version = int(np.__version__.split(".")[0])
use_int32 = os.name == "nt" and np_major_version < 2
for name, array in ds.items():
name = str(name)
if np.issubdtype(array.dtype, np.integer):
schema[name] = pl.Int32 if use_int32 else pl.Int64
schema[name] = pl.Int32 if array.dtype.itemsize <= 4 else pl.Int64
elif np.issubdtype(array.dtype, np.floating):
schema[name] = pl.Float64
elif np.issubdtype(array.dtype, np.bool_):
Expand Down Expand Up @@ -308,7 +305,7 @@ def save_join(*dataarrays: DataArray, integer_dtype: bool = False) -> Dataset:
)
arrs = xr_align(*dataarrays, join="outer")
if integer_dtype:
arrs = tuple([ds.fillna(-1).astype(int) for ds in arrs])
arrs = tuple([ds.fillna(-1).astype(options["label_dtype"]) for ds in arrs])
return Dataset({ds.name: ds for ds in arrs})


Expand Down
9 changes: 9 additions & 0 deletions linopy/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@

from typing import Any

import numpy as np

_VALID_LABEL_DTYPES = {np.int32, np.int64}


class OptionSettings:
"""Runtime configuration knobs (e.g. display widths). Use as a context manager or set values directly via ``options(key=value)``."""
Expand All @@ -30,6 +34,10 @@ def set_value(self, **kwargs: Any) -> None:
for k, v in kwargs.items():
if k not in self._defaults:
raise KeyError(f"{k} is not a valid setting.")
if k == "label_dtype" and v not in _VALID_LABEL_DTYPES:
raise ValueError(
f"label_dtype must be one of {_VALID_LABEL_DTYPES}, got {v}"
)
self._current_values[k] = v

def get_value(self, name: str) -> Any:
Expand Down Expand Up @@ -62,4 +70,5 @@ def __repr__(self) -> str:
options = OptionSettings(
display_max_rows=14,
display_max_terms=6,
label_dtype=np.int32,
)
9 changes: 6 additions & 3 deletions linopy/constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -748,7 +748,7 @@ def _to_dataset(self, nterm: int) -> Dataset:
# Map active row i -> flat position in full shape via con_labels
active_positions = self.active_positions
coeffs_2d = np.zeros((full_size, nterm), dtype=csr.dtype)
vars_2d = np.full((full_size, nterm), -1, dtype=np.int64)
vars_2d = np.full((full_size, nterm), -1, dtype=options["label_dtype"])
if csr.nnz > 0:
row_indices = np.repeat(active_positions, counts)
term_cols = np.arange(csr.nnz) - np.repeat(csr.indptr[:-1], counts)
Expand All @@ -772,7 +772,7 @@ def _to_dataset(self, nterm: int) -> Dataset:
)
ds = Dataset({"coeffs": coeffs_da, "vars": vars_da})
if self._cindex is not None:
labels_flat = np.full(full_size, -1, dtype=np.int64)
labels_flat = np.full(full_size, -1, dtype=options["label_dtype"])
labels_flat[active_positions] = self._con_labels
ds = assign_multiindex_safe(
ds,
Expand Down Expand Up @@ -2181,7 +2181,10 @@ def flat(self) -> pd.DataFrame:
return pd.DataFrame(columns=["coeffs", "vars", "labels", "key"])
df = pd.concat(dfs, ignore_index=True)
unique_labels = df.labels.unique()
map_labels = pd.Series(np.arange(len(unique_labels)), index=unique_labels)
map_labels = pd.Series(
np.arange(len(unique_labels), dtype=options["label_dtype"]),
index=unique_labels,
)
df["key"] = df.labels.map(map_labels)
return df

Expand Down
10 changes: 6 additions & 4 deletions linopy/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,9 @@ def __init__(self, data: Dataset | Any | None, model: Model) -> None:
)

if np.issubdtype(data.vars, np.floating):
data = assign_multiindex_safe(data, vars=data.vars.fillna(-1).astype(int))
data = assign_multiindex_safe(
data, vars=data.vars.fillna(-1).astype(options["label_dtype"])
)
if not np.issubdtype(data.coeffs, np.floating):
data["coeffs"].values = data.coeffs.values.astype(float)

Expand Down Expand Up @@ -1535,7 +1537,7 @@ def sanitize(self) -> Self:
linopy.LinearExpression
"""
if not np.issubdtype(self.vars.dtype, np.integer):
return self.assign(vars=self.vars.fillna(-1).astype(int))
return self.assign(vars=self.vars.fillna(-1).astype(options["label_dtype"]))

return self

Expand Down Expand Up @@ -1939,12 +1941,12 @@ def _simplify_row(vars_row: np.ndarray, coeffs_row: np.ndarray) -> np.ndarray:
# Combined has dimensions (.., CV_DIM, TERM_DIM)

# Drop terms where all vars are -1 (i.e., empty terms across all coordinates)
vars = combined.isel({CV_DIM: 0}).astype(int)
vars = combined.isel({CV_DIM: 0}).astype(options["label_dtype"])
non_empty_terms = (vars != -1).any(dim=[d for d in vars.dims if d != TERM_DIM])
combined = combined.isel({TERM_DIM: non_empty_terms})

# Extract vars and coeffs from the combined result
vars = combined.isel({CV_DIM: 0}).astype(int)
vars = combined.isel({CV_DIM: 0}).astype(options["label_dtype"])
coeffs = combined.isel({CV_DIM: 1})

# Create new dataset with simplified data
Expand Down
21 changes: 19 additions & 2 deletions linopy/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
replace_by_map,
to_path,
)
from linopy.config import options
from linopy.constants import (
GREATER_EQUAL,
HELPER_DIMS,
Expand Down Expand Up @@ -824,7 +825,15 @@ def add_variables(

start = self._xCounter
end = start + data.labels.size
data.labels.values = np.arange(start, end).reshape(data.labels.shape)
label_dtype = options["label_dtype"]
if end > np.iinfo(label_dtype).max:
raise ValueError(
f"Number of labels ({end}) exceeds the maximum value for "
f"{label_dtype.__name__} ({np.iinfo(label_dtype).max})."
)
data.labels.values = np.arange(
start, end, dtype=options["label_dtype"]
).reshape(data.labels.shape)
self._xCounter += data.labels.size

if mask is not None:
Expand Down Expand Up @@ -969,7 +978,15 @@ def _allocate_constraint_labels(
"""Assign label ranges from the constraint counter and apply an optional mask."""
start = self._cCounter
end = start + data.labels.size
data.labels.values = np.arange(start, end).reshape(data.labels.shape)
label_dtype = options["label_dtype"]
if end > np.iinfo(label_dtype).max:
raise ValueError(
f"Number of labels ({end}) exceeds the maximum value for "
f"{label_dtype.__name__} ({np.iinfo(label_dtype).max})."
)
data.labels.values = np.arange(start, end, dtype=label_dtype).reshape(
data.labels.shape
)
self._cCounter += data.labels.size
if mask is not None:
data.labels.values = np.where(mask.values, data.labels.values, -1)
Expand Down
15 changes: 11 additions & 4 deletions linopy/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -1272,7 +1272,9 @@ def ffill(self, dim: str, limit: None = None) -> Variable:
.map(DataArray.ffill, dim=dim, limit=limit)
.fillna(self._fill_value)
)
return self.assign_multiindex_safe(labels=data.labels.astype(int))
return self.assign_multiindex_safe(
labels=data.labels.astype(options["label_dtype"])
)

def bfill(self, dim: str, limit: None = None) -> Variable:
"""
Expand All @@ -1299,7 +1301,7 @@ def bfill(self, dim: str, limit: None = None) -> Variable:
.map(DataArray.bfill, dim=dim, limit=limit)
.fillna(self._fill_value)
)
return self.assign(labels=data.labels.astype(int))
return self.assign(labels=data.labels.astype(options["label_dtype"]))

def sanitize(self) -> Variable:
"""
Expand All @@ -1310,7 +1312,9 @@ def sanitize(self) -> Variable:
linopy.Variable
"""
if issubdtype(self.labels.dtype, floating):
return self.assign(labels=self.labels.fillna(-1).astype(int))
return self.assign(
labels=self.labels.fillna(-1).astype(options["label_dtype"])
)
return self

def equals(self, other: Variable) -> bool:
Expand Down Expand Up @@ -2032,7 +2036,10 @@ def flat(self) -> pd.DataFrame:
"""
df = pd.concat([self[k].flat for k in self], ignore_index=True)
unique_labels = df.labels.unique()
map_labels = pd.Series(np.arange(len(unique_labels)), index=unique_labels)
map_labels = pd.Series(
np.arange(len(unique_labels), dtype=options["label_dtype"]),
index=unique_labels,
)
df["key"] = df.labels.map(map_labels)
return df

Expand Down
75 changes: 75 additions & 0 deletions test/test_dtypes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""Tests for int32 default label dtype."""

import numpy as np
import pytest

from linopy import Model
from linopy.config import options


def test_default_label_dtype_is_int32() -> None:
assert options["label_dtype"] == np.int32


def test_variable_labels_are_int32() -> None:
m = Model()
x = m.add_variables(lower=0, upper=10, coords=[range(5)], name="x")
assert x.labels.dtype == np.int32


def test_constraint_labels_are_int32() -> None:
m = Model()
x = m.add_variables(lower=0, upper=10, coords=[range(5)], name="x")
m.add_constraints(x >= 1, name="c")
assert m.constraints["c"].labels.dtype == np.int32


def test_expression_vars_are_int32() -> None:
m = Model()
x = m.add_variables(lower=0, upper=10, coords=[range(5)], name="x")
expr = 2 * x + 1
assert expr.vars.dtype == np.int32


@pytest.mark.skipif(
not pytest.importorskip("highspy", reason="highspy not installed"),
reason="highspy not installed",
)
def test_solve_with_int32_labels() -> None:
m = Model()
x = m.add_variables(lower=0, upper=10, name="x")
y = m.add_variables(lower=0, upper=10, name="y")
m.add_constraints(x + y <= 15, name="c1")
m.add_objective(x + 2 * y, sense="max")
m.solve("highs")
assert m.objective.value == pytest.approx(25.0)


def test_overflow_guard_variables() -> None:
m = Model()
m._xCounter = np.iinfo(np.int32).max - 1
with pytest.raises(ValueError, match="exceeds the maximum"):
m.add_variables(lower=0, upper=1, coords=[range(5)], name="x")


def test_overflow_guard_constraints() -> None:
m = Model()
x = m.add_variables(lower=0, upper=1, coords=[range(5)], name="x")
m._cCounter = np.iinfo(np.int32).max - 1
with pytest.raises(ValueError, match="exceeds the maximum"):
m.add_constraints(x >= 0, name="c")


def test_label_dtype_option_int64() -> None:
with options:
options["label_dtype"] = np.int64
m = Model()
x = m.add_variables(lower=0, upper=10, coords=[range(5)], name="x")
assert x.labels.dtype == np.int64
expr = 2 * x + 1
assert expr.vars.dtype == np.int64


def test_label_dtype_rejects_invalid() -> None:
with pytest.raises(ValueError, match="label_dtype must be one of"):
options["label_dtype"] = np.float64
Loading