Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ Release Notes
.. Upcoming Version

* Fix compatibility for xpress versions below 9.6 (regression)
* Performance: Up to 50x faster ``repr()`` for variables/constraints via O(log n) label lookup and direct numpy indexing
* Performance: Up to 46x faster ``ncons`` property by replacing ``.flat.labels.unique()`` with direct counting

Version 0.5.8
--------------
Expand Down
175 changes: 169 additions & 6 deletions linopy/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -750,7 +750,118 @@ def get_dims_with_index_levels(
return dims_with_levels


def get_label_position(
class LabelPositionIndex:
"""
Index for fast O(log n) lookup of label positions using binary search.

This class builds a sorted index of label ranges and uses binary search
to find which container (variable/constraint) a label belongs to.

Parameters
----------
obj : Any
Container object with items() method returning (name, val) pairs,
where val has .labels and .range attributes.
"""

__slots__ = ("_starts", "_names", "_obj", "_built")

def __init__(self, obj: Any) -> None:
self._obj = obj
self._starts: np.ndarray | None = None
self._names: list[str] | None = None
self._built = False

def _build_index(self) -> None:
"""Build the sorted index of label ranges."""
if self._built:
return

ranges = []
for name, val in self._obj.items():
start, stop = val.range
ranges.append((start, name))

# Sort by start value
ranges.sort(key=lambda x: x[0])
self._starts = np.array([r[0] for r in ranges])
self._names = [r[1] for r in ranges]
self._built = True

def invalidate(self) -> None:
"""Invalidate the index (call when items are added/removed)."""
self._built = False
self._starts = None
self._names = None

def find_single(self, value: int) -> tuple[str, dict] | tuple[None, None]:
"""Find the name and coordinates for a single label value."""
if value == -1:
return None, None

self._build_index()
starts = self._starts
names = self._names
assert starts is not None and names is not None

# Binary search to find the right range
idx = int(np.searchsorted(starts, value, side="right")) - 1

if idx < 0 or idx >= len(starts):
raise ValueError(f"Label {value} is not existent in the model.")

name = names[idx]
val = self._obj[name]
start, stop = val.range

# Verify the value is in range
if value < start or value >= stop:
raise ValueError(f"Label {value} is not existent in the model.")

labels = val.labels
index = np.unravel_index(value - start, labels.shape)
coord = {dim: labels.indexes[dim][i] for dim, i in zip(labels.dims, index)}
return name, coord

def find_single_with_index(
self, value: int
) -> tuple[str, dict, tuple[int, ...]] | tuple[None, None, None]:
"""
Find name, coordinates, and raw numpy index for a single label value.

Returns (name, coord, index) where index is a tuple of integers that
can be used for direct numpy indexing (e.g., arr.values[index]).
This avoids the overhead of xarray's .sel() method.
"""
if value == -1:
return None, None, None

self._build_index()
starts = self._starts
names = self._names
assert starts is not None and names is not None

# Binary search to find the right range
idx = int(np.searchsorted(starts, value, side="right")) - 1

if idx < 0 or idx >= len(starts):
raise ValueError(f"Label {value} is not existent in the model.")

name = names[idx]
val = self._obj[name]
start, stop = val.range

# Verify the value is in range
if value < start or value >= stop:
raise ValueError(f"Label {value} is not existent in the model.")

labels = val.labels
index = np.unravel_index(value - start, labels.shape)
coord = {dim: labels.indexes[dim][i] for dim, i in zip(labels.dims, index)}
return name, coord, index


def _get_label_position_linear(
obj: Any, values: int | np.ndarray
) -> (
tuple[str, dict]
Expand All @@ -760,6 +871,9 @@ def get_label_position(
):
"""
Get tuple of name and coordinate for variable labels.

This is the original O(n) implementation that scans through all items.
Used only for testing/benchmarking comparisons.
"""

def find_single(value: int) -> tuple[str, dict] | tuple[None, None]:
Expand Down Expand Up @@ -795,6 +909,53 @@ def find_single(value: int) -> tuple[str, dict] | tuple[None, None]:
raise ValueError("Array's with more than two dimensions is not supported")


def get_label_position(
obj: Any,
values: int | np.ndarray,
index: LabelPositionIndex | None = None,
) -> (
tuple[str, dict]
| tuple[None, None]
| list[tuple[str, dict] | tuple[None, None]]
| list[list[tuple[str, dict] | tuple[None, None]]]
):
"""
Get tuple of name and coordinate for variable labels.

Uses O(log n) binary search with a cached index for fast lookups.

Parameters
----------
obj : Any
Container object with items() method (Variables or Constraints).
values : int or np.ndarray
Label value(s) to look up.
index : LabelPositionIndex, optional
Pre-built index for fast lookups. If None, one will be created.

Returns
-------
tuple or list
(name, coord) tuple for single values, or list of tuples for arrays.
"""
if index is None:
index = LabelPositionIndex(obj)

if isinstance(values, int):
return index.find_single(values)

values = np.array(values)
ndim = values.ndim
if ndim == 0:
return index.find_single(values.item())
elif ndim == 1:
return [index.find_single(int(v)) for v in values]
elif ndim == 2:
return [[index.find_single(int(v)) for v in col] for col in values.T]
else:
raise ValueError("Array's with more than two dimensions is not supported")


def print_coord(coord: dict[str, Any] | Iterable[Any]) -> str:
"""
Format coordinates into a string representation.
Expand Down Expand Up @@ -838,14 +999,16 @@ def print_single_variable(model: Any, label: int) -> str:
return "None"

variables = model.variables
name, coord = variables.get_label_position(label)
name, coord, index = variables.get_label_position_with_index(label)

lower = variables[name].lower.sel(coord).item()
upper = variables[name].upper.sel(coord).item()
var = variables[name]
# Use direct numpy indexing instead of .sel() for performance
lower = var.lower.values[index]
upper = var.upper.values[index]

if variables[name].attrs["binary"]:
if var.attrs["binary"]:
bounds = " ∈ {0, 1}"
elif variables[name].attrs["integer"]:
elif var.attrs["integer"]:
bounds = f" ∈ Z ⋂ [{lower:.4g},...,{upper:.4g}]"
else:
bounds = f" ∈ [{lower:.4g}, {upper:.4g}]"
Expand Down
46 changes: 43 additions & 3 deletions linopy/constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

from linopy import expressions, variables
from linopy.common import (
LabelPositionIndex,
LocIndexer,
align_lines_by_delimiter,
assign_multiindex_safe,
Expand Down Expand Up @@ -696,6 +697,7 @@ class Constraints:

data: dict[str, Constraint]
model: Model
_label_position_index: LabelPositionIndex | None = None

dataset_attrs = ["labels", "coeffs", "vars", "sign", "rhs"]
dataset_names = [
Expand Down Expand Up @@ -792,12 +794,19 @@ def add(self, constraint: Constraint) -> None:
Add a constraint to the constraints constrainer.
"""
self.data[constraint.name] = constraint
self._invalidate_label_position_index()

def remove(self, name: str) -> None:
"""
Remove constraint `name` from the constraints.
"""
self.data.pop(name)
self._invalidate_label_position_index()

def _invalidate_label_position_index(self) -> None:
"""Invalidate the label position index cache."""
if self._label_position_index is not None:
self._label_position_index.invalidate()

@property
def labels(self) -> Dataset:
Expand Down Expand Up @@ -869,9 +878,36 @@ def ncons(self) -> int:
"""
Get the number all constraints effectively used by the model.

These excludes constraints with missing labels.
This excludes constraints with missing labels or where all variables
are masked (vars == -1).
"""
return len(self.flat.labels.unique())
total = 0
for con in self.data.values():
labels = con.labels.values
vars_arr = con.vars.values

# Handle scalar constraint (single constraint, labels is 0-d)
if labels.ndim == 0:
# Scalar: valid if label != -1 and any var != -1
if labels != -1 and (vars_arr != -1).any():
total += 1
continue

# Array constraint: labels has constraint dimensions, vars has
# constraint dimensions + _term dimension
valid_labels = labels != -1

# Check if any variable in each constraint is valid (not -1)
# vars has shape (..., n_terms) where ... matches labels shape
has_valid_var = (vars_arr != -1).any(axis=-1)

active = valid_labels & has_valid_var

if con.mask is not None:
active = active & con.mask.values

total += int(active.sum())
return total

@property
def inequalities(self) -> Constraints:
Expand Down Expand Up @@ -957,8 +993,12 @@ def get_label_position(
):
"""
Get tuple of name and coordinate for constraint labels.

Uses an optimized O(log n) binary search implementation with a cached index.
"""
return get_label_position(self, values)
if self._label_position_index is None:
self._label_position_index = LabelPositionIndex(self)
return get_label_position(self, values, self._label_position_index)

def print_labels(
self, values: Sequence[int], display_max_terms: int | None = None
Expand Down
48 changes: 46 additions & 2 deletions linopy/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

import linopy.expressions as expressions
from linopy.common import (
LabelPositionIndex,
LocIndexer,
as_dataarray,
assign_multiindex_safe,
Expand Down Expand Up @@ -1166,6 +1167,7 @@ class Variables:

data: dict[str, Variable]
model: Model
_label_position_index: LabelPositionIndex | None = None

dataset_attrs = ["labels", "lower", "upper"]
dataset_names = ["Labels", "Lower bounds", "Upper bounds"]
Expand Down Expand Up @@ -1256,12 +1258,19 @@ def add(self, variable: Variable) -> None:
Add a variable to the variables container.
"""
self.data[variable.name] = variable
self._invalidate_label_position_index()

def remove(self, name: str) -> None:
"""
Remove variable `name` from the variables.
"""
self.data.pop(name)
self._invalidate_label_position_index()

def _invalidate_label_position_index(self) -> None:
"""Invalidate the label position index cache."""
if self._label_position_index is not None:
self._label_position_index.invalidate()

@property
def attrs(self) -> dict[Any, Any]:
Expand Down Expand Up @@ -1321,7 +1330,14 @@ def nvars(self) -> int:

These excludes variables with missing labels.
"""
return len(self.flat.labels.unique())
total = 0
for var in self.data.values():
labels = var.labels.values
if var.mask is not None:
total += int((labels[var.mask.values] != -1).sum())
else:
total += int((labels != -1).sum())
return total

@property
def binaries(self) -> Variables:
Expand Down Expand Up @@ -1418,8 +1434,36 @@ def get_label_range(self, name: str) -> tuple[int, int]:
def get_label_position(self, values: int | ndarray) -> Any:
"""
Get tuple of name and coordinate for variable labels.

Uses an optimized O(log n) binary search implementation with a cached index.
"""
if self._label_position_index is None:
self._label_position_index = LabelPositionIndex(self)
return get_label_position(self, values, self._label_position_index)

def get_label_position_with_index(
self, label: int
) -> tuple[str, dict, tuple[int, ...]] | tuple[None, None, None]:
"""
Get name, coordinate, and raw numpy index for a single variable label.

This is an optimized version that also returns the raw index for direct
numpy array access, avoiding xarray's .sel() overhead.

Parameters
----------
label : int
The variable label to look up.

Returns
-------
tuple
(name, coord, index) where index is a tuple for numpy indexing,
or (None, None, None) if label is -1.
"""
return get_label_position(self, values)
if self._label_position_index is None:
self._label_position_index = LabelPositionIndex(self)
return self._label_position_index.find_single_with_index(label)

def print_labels(self, values: list[int]) -> None:
"""
Expand Down
Loading
Loading