diff --git a/nemo_curator/backends/base.py b/nemo_curator/backends/base.py index e302a37eb6..fdf76f3dc7 100644 --- a/nemo_curator/backends/base.py +++ b/nemo_curator/backends/base.py @@ -14,6 +14,7 @@ from abc import ABC, abstractmethod from dataclasses import dataclass +from pathlib import Path from typing import TYPE_CHECKING, Any from nemo_curator.core.utils import ignore_ray_head_node @@ -52,8 +53,22 @@ def __init__(self, config: dict[str, Any] | None = None, ignore_head_node: bool self.ignore_head_node = ignore_head_node or ignore_ray_head_node() @abstractmethod - def execute(self, stages: list["ProcessingStage"], initial_tasks: list[Task] | None = None) -> None: - """Execute the pipeline.""" + def execute( + self, + stages: list["ProcessingStage"], + initial_tasks: list[Task] | None = None, + checkpoint_path: str | Path | None = None, + ) -> None: + """Execute the pipeline. + + Args: + stages: Execution stages to run. + initial_tasks: Initial tasks. Empty list / ``EmptyTask`` is used when ``None``. + checkpoint_path: If provided, lineage records (parents, children, type, + completed flag) for every task that flows through the pipeline are + persisted to an LMDB file at this path. The file is owned by a + single Ray actor and is safe to place on NFS/Lustre. + """ class BaseStageAdapter: diff --git a/nemo_curator/backends/ray_actor_pool/executor.py b/nemo_curator/backends/ray_actor_pool/executor.py index b69d9e52e3..3058b50893 100644 --- a/nemo_curator/backends/ray_actor_pool/executor.py +++ b/nemo_curator/backends/ray_actor_pool/executor.py @@ -14,6 +14,7 @@ import uuid from copy import deepcopy +from pathlib import Path from typing import TYPE_CHECKING import numpy as np @@ -25,6 +26,7 @@ from nemo_curator.backends.base import BaseExecutor from nemo_curator.backends.utils import RayStageSpecKeys, execute_setup_on_node, register_loguru_serializer from nemo_curator.tasks import EmptyTask, Task +from nemo_curator.utils.lineage_store import LINEAGE_ACTOR_NAME, LineageWriterActor from .adapter import RayActorPoolStageAdapter from .raft_adapter import RayActorPoolRAFTAdapter @@ -78,12 +80,19 @@ def __init__( self.show_progress = show_progress self.progress_interval = progress_interval - def execute(self, stages: list["ProcessingStage"], initial_tasks: list[Task] | None = None) -> list[Task]: # noqa: PLR0912 + def execute( # noqa: PLR0912, PLR0915, C901 + self, + stages: list["ProcessingStage"], + initial_tasks: list[Task] | None = None, + checkpoint_path: str | Path | None = None, + ) -> list[Task]: """Execute the pipeline stages using ActorPool. Args: stages: List of processing stages to execute initial_tasks: Initial tasks to process (can be None for empty start) + checkpoint_path: If provided, spawn a :class:`LineageWriterActor` that + records the task DAG to LMDB at this path for the duration of the run. Returns: List of final processed tasks @@ -93,10 +102,19 @@ def execute(self, stages: list["ProcessingStage"], initial_tasks: list[Task] | N session_id = uuid.uuid4().bytes + lineage_actor = None try: # Initialize Ray and register loguru serializer register_loguru_serializer() ray.init(ignore_reinit_error=True, runtime_env=_parse_runtime_env(self.config.get("runtime_env", {}))) + if checkpoint_path is not None: + absolute_checkpoint_path = str(Path(checkpoint_path).absolute()) + lineage_actor = LineageWriterActor.options( + name=LINEAGE_ACTOR_NAME, + lifetime="detached", + get_if_exists=True, + ).remote(path=absolute_checkpoint_path) + logger.info(f"Spawned LineageWriterActor; checkpoint at {absolute_checkpoint_path}") # Execute setup on node for all stages BEFORE processing begins execute_setup_on_node(stages, ignore_head_node=self.ignore_head_node) @@ -160,6 +178,12 @@ def execute(self, stages: list["ProcessingStage"], initial_tasks: list[Task] | N return final_results finally: + if lineage_actor is not None: + try: + ray.get(lineage_actor.close.remote()) + except Exception as e: # noqa: BLE001 + logger.warning(f"Failed to close LineageWriterActor: {e}") + ray.kill(lineage_actor) # Clean up all Ray resources including named actors logger.info("Shutting down Ray to clean up all resources...") ray.shutdown() diff --git a/nemo_curator/backends/ray_data/executor.py b/nemo_curator/backends/ray_data/executor.py index 19e78d8c2f..6441346042 100644 --- a/nemo_curator/backends/ray_data/executor.py +++ b/nemo_curator/backends/ray_data/executor.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from pathlib import Path from typing import TYPE_CHECKING, Any import ray @@ -21,6 +22,7 @@ from nemo_curator.backends.base import BaseExecutor from nemo_curator.backends.utils import execute_setup_on_node, register_loguru_serializer from nemo_curator.tasks import EmptyTask, Task +from nemo_curator.utils.lineage_store import LINEAGE_ACTOR_NAME, LineageWriterActor from .adapter import RayDataStageAdapter @@ -41,12 +43,19 @@ class RayDataExecutor(BaseExecutor): def __init__(self, config: dict[str, Any] | None = None, ignore_head_node: bool = False): super().__init__(config, ignore_head_node) - def execute(self, stages: list["ProcessingStage"], initial_tasks: list[Task] | None = None) -> list[Task]: + def execute( + self, + stages: list["ProcessingStage"], + initial_tasks: list[Task] | None = None, + checkpoint_path: str | Path | None = None, + ) -> list[Task]: """Execute the pipeline stages using Ray Data. Args: stages (list[ProcessingStage]): List of processing stages to execute initial_tasks (list[Task], optional): Initial tasks to process (can be None for empty start) + checkpoint_path (str | Path, optional): If provided, spawn a :class:`LineageWriterActor` + that records the task DAG to LMDB at this path for the duration of the run. Returns: list[Task]: List of final processed tasks @@ -60,6 +69,7 @@ def execute(self, stages: list["ProcessingStage"], initial_tasks: list[Task] | N # Initialize with initial tasks if provided, otherwise start with EmptyTask tasks: list[Task] = initial_tasks or [EmptyTask] output_tasks: list[Task] = [] + lineage_actor = None # When runtime_env with pip is used, Ray's pip plugin sets up per-stage virtualenvs # lazily on first task dispatch by cloning the current virtualenv. The NeMo Curator # container's /opt/venv is created with `uv venv --seed` so pip is available in clones. @@ -69,6 +79,14 @@ def execute(self, stages: list["ProcessingStage"], initial_tasks: list[Task] | N ray.init( ignore_reinit_error=True, runtime_env={"env_vars": {"RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES": ""}} ) + if checkpoint_path is not None: + absolute_checkpoint_path = str(Path(checkpoint_path).absolute()) + lineage_actor = LineageWriterActor.options( + name=LINEAGE_ACTOR_NAME, + lifetime="detached", + get_if_exists=True, + ).remote(path=absolute_checkpoint_path) + logger.info(f"Spawned LineageWriterActor; checkpoint at {absolute_checkpoint_path}") # Convert tasks to dataset current_dataset = self._tasks_to_dataset(tasks) @@ -97,6 +115,12 @@ def execute(self, stages: list["ProcessingStage"], initial_tasks: list[Task] | N output_tasks = self._dataset_to_tasks(current_dataset) logger.info(f"Pipeline completed. Final results: {len(output_tasks)} tasks") finally: + if lineage_actor is not None: + try: + ray.get(lineage_actor.close.remote()) + except Exception as e: # noqa: BLE001 + logger.warning(f"Failed to close LineageWriterActor: {e}") + ray.kill(lineage_actor) # This ensures we unset all the env vars set above during initialize and kill the pending actors. ray.shutdown() return output_tasks diff --git a/nemo_curator/backends/xenna/executor.py b/nemo_curator/backends/xenna/executor.py index aaf51c8383..8b6e703c73 100644 --- a/nemo_curator/backends/xenna/executor.py +++ b/nemo_curator/backends/xenna/executor.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from pathlib import Path from typing import Any import ray @@ -24,6 +25,7 @@ from nemo_curator.backends.xenna.adapter import create_named_xenna_stage_adapter from nemo_curator.stages.base import ProcessingStage from nemo_curator.tasks import EmptyTask, Task +from nemo_curator.utils.lineage_store import LINEAGE_ACTOR_NAME, LineageWriterActor class XennaExecutor(BaseExecutor): @@ -59,12 +61,20 @@ def __init__(self, config: dict[str, Any] | None = None, ignore_head_node: bool "autoscale_interval_s": 180, } - def execute(self, stages: list[ProcessingStage], initial_tasks: list[Task] | None = None) -> list[Task]: + def execute( + self, + stages: list[ProcessingStage], + initial_tasks: list[Task] | None = None, + checkpoint_path: str | Path | None = None, + ) -> list[Task]: """Execute the pipeline using Cosmos-Xenna. Args: stages (list[ProcessingStage]): The stages to run initial_tasks (list[Task], optional): The initial tasks to run. Empty list of Task is used if not provided. + checkpoint_path (str | Path, optional): If provided, spawn a :class:`LineageWriterActor` + that records the task DAG (parents, children, type, completed flag) to LMDB at + this path for the duration of the run. Returns: list[Task]: List of output tasks from the pipeline @@ -134,6 +144,7 @@ def execute(self, stages: list[ProcessingStage], initial_tasks: list[Task] | Non # Log pipeline configuration logger.info(f"Execution mode: {exec_mode.name}") + lineage_actor = None try: register_loguru_serializer() # Prevent Ray from overriding accelerator env vars when num_gpus=0, letting Xenna manage them instead. @@ -146,6 +157,14 @@ def execute(self, stages: list[ProcessingStage], initial_tasks: list[Task] | Non } }, ) + if checkpoint_path is not None: + absolute_checkpoint_path = str(Path(checkpoint_path).absolute()) + lineage_actor = LineageWriterActor.options( + name=LINEAGE_ACTOR_NAME, + lifetime="detached", + get_if_exists=True, + ).remote(path=absolute_checkpoint_path) + logger.info(f"Spawned LineageWriterActor; checkpoint at {absolute_checkpoint_path}") # Run the pipeline (this will re-initialize ray but that'll be a no-op and the ray.init above will take precedence) results = pipelines_v1.run_pipeline(pipeline_spec) logger.info(f"Pipeline completed successfully with {len(results) if results else 0} output tasks") @@ -153,6 +172,12 @@ def execute(self, stages: list[ProcessingStage], initial_tasks: list[Task] | Non logger.error(f"Pipeline execution failed: {e}") raise finally: + if lineage_actor is not None: + try: + ray.get(lineage_actor.close.remote()) + except Exception as e: # noqa: BLE001 + logger.warning(f"Failed to close LineageWriterActor: {e}") + ray.kill(lineage_actor) # This ensures we unset all the env vars set above during initialize and kill the pending actors. ray.shutdown() return results if results else [] diff --git a/nemo_curator/pipeline/pipeline.py b/nemo_curator/pipeline/pipeline.py index 246ffcffc1..f94df43eb9 100644 --- a/nemo_curator/pipeline/pipeline.py +++ b/nemo_curator/pipeline/pipeline.py @@ -12,12 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +from pathlib import Path from typing import Any from loguru import logger from nemo_curator.backends.base import BaseExecutor -from nemo_curator.stages.base import CompositeStage, ProcessingStage +from nemo_curator.stages.base import CompositeStage, ProcessingStage, assign_root_lineage from nemo_curator.tasks import Task @@ -174,18 +175,31 @@ def describe(self) -> str: return "\n".join(lines) - def run(self, executor: BaseExecutor | None = None, initial_tasks: list[Task] | None = None) -> list[Task] | None: + def run( + self, + executor: BaseExecutor | None = None, + initial_tasks: list[Task] | None = None, + checkpoint_path: str | Path | None = None, + ) -> list[Task] | None: """Run the pipeline. Args: executor (BaseExecutor): Executor to use initial_tasks (list[Task], optional): Initial tasks to start the pipeline with. Defaults to None. + checkpoint_path (str | Path, optional): If provided, a single LMDB file at this path + records lineage (parents, children, task type, completed flag) for every task that + flows through the pipeline, keyed by ``_udid``. Owned by one Ray actor, so the file + may live on NFS/Lustre. When omitted, no lineage is persisted. Returns: list[Task] | None: List of tasks """ self.build() + if checkpoint_path is not None: + checkpoint_path = Path(checkpoint_path).absolute() + checkpoint_path.parent.mkdir(parents=True, exist_ok=True) + if executor is None: from nemo_curator.backends.xenna import XennaExecutor @@ -212,4 +226,6 @@ def run(self, executor: BaseExecutor | None = None, initial_tasks: list[Task] | "The executor will schedule GPU stages on GPUs not held by Serve." ) - return executor.execute(self.stages, initial_tasks) + if initial_tasks: + assign_root_lineage(initial_tasks) + return executor.execute(self.stages, initial_tasks, checkpoint_path=checkpoint_path) diff --git a/nemo_curator/stages/base.py b/nemo_curator/stages/base.py index 5761dfeb18..fefdc5971b 100644 --- a/nemo_curator/stages/base.py +++ b/nemo_curator/stages/base.py @@ -25,6 +25,7 @@ from nemo_curator.stages.resources import Resources from nemo_curator.tasks import Task +from nemo_curator.utils.lineage_store import record_lineage if TYPE_CHECKING: from nemo_curator.backends.base import NodeInfo, WorkerMetadata @@ -35,6 +36,58 @@ _STAGE_REGISTRY: dict[str, type[ProcessingStage]] = {} +def assign_child_lineage( + parent_paths: list[str], + result: Task | list[Task] | None, +) -> list[Task]: + """Normalize a stage's ``process()`` result and assign deterministic lineage. + + Each surviving ``children[i]`` gets ``_lineage_path`` and ``_udid`` derived + from ``(parent_paths, i)`` so that the same pipeline run twice on the same + inputs produces byte-identical task IDs. Call this from any custom + ``process_batch`` override to keep outputs consistent with the rest of the + pipeline. + + Children whose ``_udid`` is already set are passed through unchanged. This + happens when a stage mutates and returns the same task instance it received + (e.g. an embedder that writes results onto the input task): the framework + must not treat such a task as a new child of itself. + + Args: + parent_paths: One element per logical parent (typically + ``[task._lineage_path]`` for 1:N stages, or multiple paths for + join/aggregate stages). + result: Whatever ``process()`` (or your custom batch logic) returned for + this parent set — a single task, a list, or ``None``. + + Returns: + The normalized list of children with lineage assigned. May be empty. + """ + if result is None: + return [] + children = result if isinstance(result, list) else [result] + children = [c for c in children if c is not None] + for i, child in enumerate(children): + child._set_lineage(parent_paths, i) + return children + + +def assign_root_lineage(tasks: list[Task]) -> list[Task]: + """Assign deterministic root-level lineage to initial pipeline tasks. + + Each ``tasks[i]`` gets ``_lineage_path = str(i)`` and the corresponding + ``_udid``. Tasks whose ``_udid`` is already set are left untouched + (``_set_lineage`` early-returns), so calling this twice is a no-op. + + Without this step every root carries ``_lineage_path = ""``, and the + empty-string filter in ``_set_lineage`` collapses first-stage children of + different roots onto the same lineage path, producing identical ``_udid``. + """ + for i, task in enumerate(tasks): + task._set_lineage([], i) + return tasks + + class StageMeta(ABCMeta): """Metaclass that automatically registers concrete Stage subclasses. A class is considered *concrete* if it directly inherits from @@ -179,9 +232,26 @@ def process_batch(self, tasks: list[X]) -> list[Y]: - Single task: For 1-to-1 transformations - List of tasks: For 1-to-many transformations - None: If the task should be filtered out - Note: The returned list should have the same length as the input list, - with each element corresponding to the result of processing the task - at the same index. + + Lineage contract: every emitted child must have its ``_lineage_path`` + and ``_udid`` set so the pipeline produces deterministic IDs. The + default implementation below delegates to + :func:`assign_child_lineage` per input task. If you override this + method, you are responsible for calling ``assign_child_lineage`` on + each chunk of outputs that share parentage, e.g.:: + + outputs = [] + for task in tasks: + raw = self.my_batched_process(task) + outputs.extend(assign_child_lineage([task._lineage_path], raw)) + return outputs + + In-place returns are supported: if ``process()`` mutates and returns the + same task it received, ``assign_child_lineage`` will preserve that + task's existing ``_lineage_path`` / ``_udid`` rather than treating it as + a new child of itself. + + Outputs that skip this step will carry empty ``_udid``/``_lineage_path``. """ # Default implementation: process tasks one by one # This is only used as a fallback if a stage doesn't override this method @@ -192,10 +262,14 @@ def process_batch(self, tasks: list[X]) -> list[Y]: raise ValueError(msg) result = self.process(task) - if isinstance(result, list): - results.extend(result) - else: - results.append(result) + # Do not forget to call the assign_child_lineage if you have overwritten + # the process_batch funtion. This function generates unique and + # deterministic keys. + children = assign_child_lineage([task._lineage_path], result) + # If you pass a checkpoint_path to the executor, call the record_lineage + # function to build the DAG for resumability. + record_lineage([task._udid], [c._udid for c in children]) + results.extend(children) return results def setup_on_node(self, node_info: NodeInfo | None = None, worker_metadata: WorkerMetadata | None = None) -> None: diff --git a/nemo_curator/tasks/tasks.py b/nemo_curator/tasks/tasks.py index b2836415c1..3bfd4d5d15 100644 --- a/nemo_curator/tasks/tasks.py +++ b/nemo_curator/tasks/tasks.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import hashlib import uuid from abc import ABC, abstractmethod from dataclasses import dataclass, field @@ -40,11 +41,31 @@ class Task(ABC, Generic[T]): _stage_perf: list[StagePerfStats] = field(default_factory=list) _metadata: dict[str, Any] = field(default_factory=dict) _uuid: str = field(init=False, default_factory=lambda: str(uuid.uuid4())) + # `_lineage_path` is the index-based path of this task through the pipeline + # DAG (e.g. "3_0_7" = 4th root task, then 1st child, then 8th grandchild). + # It is propagated to children and hashed into `_udid`, the deterministic + # task id. + _lineage_path: str = field(init=False, default="") + _udid: str = field(init=False, default="") def __post_init__(self) -> None: """Post-initialization hook.""" self.validate() + def _set_lineage(self, parent_lineage_paths: list[str], child_index: int) -> bool: + """Assign deterministic lineage to this task. + + Returns ``True`` if lineage was newly assigned, ``False`` if ``_udid`` + was already set — which signals the task was returned in place by an + earlier stage and its existing lineage must be preserved. + """ + if self._udid: + return False + parts = [*[p for p in parent_lineage_paths if p], str(child_index)] + self._lineage_path = "_".join(parts) + self._udid = hashlib.sha256(self._lineage_path.encode()).hexdigest()[:32] + return True + @property @abstractmethod def num_items(self) -> int: diff --git a/nemo_curator/utils/lineage_store.py b/nemo_curator/utils/lineage_store.py new file mode 100644 index 0000000000..80a905585c --- /dev/null +++ b/nemo_curator/utils/lineage_store.py @@ -0,0 +1,303 @@ +# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""LMDB-backed lineage store for task DAG checkpointing. + +Stores, per task ``_udid``: + +- parent ``_udid``s +- child ``_udid``s +- ``task_type`` ("source" | "middle" | "leaf" | "source_leaf") +- ``completed`` flag (reserved for future resumability work; never auto-set today) + +Architecture: + +- :class:`LineageStore` — direct LMDB owner. Used inside the writer actor for + the active pipeline, and also opened standalone (e.g., after a run finishes, + in a fresh process) to read records. +- :class:`LineageWriterActor` — named Ray actor that wraps a single + :class:`LineageStore`. The only writer during a pipeline run, which is what + lets the file live safely on NFS / Lustre. +- :func:`record_lineage` — write helper called from stages. No-op unless a + :class:`LineageWriterActor` is registered in the cluster. +""" + +from __future__ import annotations + +import hashlib +from collections import deque +from dataclasses import dataclass, field +from pathlib import Path + +import lmdb +import ray +from loguru import logger + +LINEAGE_ACTOR_NAME = "nemo_curator_lineage_writer" + +_IN_EDGES_DB = b"in_edges" +_OUT_EDGES_DB = b"out_edges" +_TASK_TYPE_DB = b"task_type" +_COMPLETED_DB = b"completed" + +_DEFAULT_MAP_SIZE = 1 << 34 # 16 GiB; sparse on Linux so effectively free + +_TYPE_SOURCE = b"source" +_TYPE_MIDDLE = b"middle" +_TYPE_LEAF = b"leaf" +_TYPE_SOURCE_LEAF = b"source_leaf" + + +def _classify(has_parent: bool, has_child: bool) -> bytes: + if has_parent and has_child: + return _TYPE_MIDDLE + if has_parent: + return _TYPE_LEAF + if has_child: + return _TYPE_SOURCE + return _TYPE_SOURCE_LEAF + + +def _udid_to_key(udid: str) -> bytes: + return udid.encode("ascii") + + +def _key_to_udid(key: bytes) -> str: + return key.decode("ascii") + + +def _path_to_udid(lineage_path: str) -> str: + """Mirror of the udid derivation in ``Task._set_lineage`` ([tasks.py:59]).""" + return hashlib.sha256(lineage_path.encode()).hexdigest()[:32] + + +@dataclass +class LineageRecord: + parents: list[str] = field(default_factory=list) + children: list[str] = field(default_factory=list) + task_type: str = "source_leaf" + completed: bool = False + + +class LineageStore: + """Direct LMDB owner for the lineage checkpoint. + + Not safe to use from multiple processes concurrently. The writer actor uses + one of these as its backing store during a pipeline run; tests and + post-pipeline inspection tools instantiate one directly to read records. + """ + + def __init__(self, path: str | Path, map_size: int = _DEFAULT_MAP_SIZE): + self._path = str(Path(path).absolute()) + Path(self._path).parent.mkdir(parents=True, exist_ok=True) + self._env = lmdb.open( + self._path, + subdir=False, + lock=False, + max_dbs=4, + map_size=map_size, + metasync=False, + sync=True, + readahead=False, + ) + self._in_db = self._env.open_db(_IN_EDGES_DB, dupsort=True) + self._out_db = self._env.open_db(_OUT_EDGES_DB, dupsort=True) + self._type_db = self._env.open_db(_TASK_TYPE_DB) + self._completed_db = self._env.open_db(_COMPLETED_DB) + + @staticmethod + def _has_dup(txn: lmdb.Transaction, db: lmdb._Database, key: bytes) -> bool: + with txn.cursor(db=db) as cur: + return cur.set_key(key) + + def _record_emission_once(self, parent_udids: list[str], child_udids: list[str]) -> None: + parent_keys = [_udid_to_key(u) for u in parent_udids] + child_keys = [_udid_to_key(u) for u in child_udids] + with self._env.begin(write=True) as txn: + for child_key in child_keys: + for parent_key in parent_keys: + if parent_key == child_key: + # In-place return: don't add a node as its own parent/child. + continue + # In dupsort dbs, the default flags allow multiple distinct values + # per key and silently drop exact (key, value) duplicates. We + # deliberately do NOT pass overwrite=False — that maps to + # MDB_NOOVERWRITE which refuses any new value once the key has + # any existing value, blocking incremental parent attribution. + txn.put(child_key, parent_key, db=self._in_db) + txn.put(parent_key, child_key, db=self._out_db) + + affected = {*child_keys, *parent_keys} + for udid_key in affected: + has_parent = self._has_dup(txn, self._in_db, udid_key) + has_child = self._has_dup(txn, self._out_db, udid_key) + txn.put(udid_key, _classify(has_parent, has_child), db=self._type_db, overwrite=True) + + def record_emission(self, parent_udids: list[str], child_udids: list[str]) -> None: + """Append edges for ``(parent, child)`` pairs and refresh ``task_type`` + for every affected udid. Idempotent under retries and incremental + parent attribution; no-op when ``child_udids`` is empty.""" + if not child_udids: + return + try: + self._record_emission_once(parent_udids, child_udids) + except lmdb.MapFullError: + new_size = self._env.info()["map_size"] * 2 + logger.warning(f"LMDB map full at {self._path}; growing to {new_size} bytes") + self._env.set_mapsize(new_size) + self._record_emission_once(parent_udids, child_udids) + + def mark_completed(self, udid: str) -> None: + with self._env.begin(write=True) as txn: + txn.put(_udid_to_key(udid), b"1", db=self._completed_db, overwrite=True) + + def is_completed(self, udid: str) -> bool: + with self._env.begin() as txn: + return txn.get(_udid_to_key(udid), db=self._completed_db) is not None + + def get(self, udid: str) -> LineageRecord | None: + key = _udid_to_key(udid) + with self._env.begin() as txn: + task_type = txn.get(key, db=self._type_db) + if task_type is None: + return None + parents: list[str] = [] + with txn.cursor(db=self._in_db) as cur: + if cur.set_key(key): + parents = [_key_to_udid(v) for v in cur.iternext_dup()] + children: list[str] = [] + with txn.cursor(db=self._out_db) as cur: + if cur.set_key(key): + children = [_key_to_udid(v) for v in cur.iternext_dup()] + completed = txn.get(key, db=self._completed_db) is not None + return LineageRecord( + parents=parents, + children=children, + task_type=task_type.decode("ascii"), + completed=completed, + ) + + def iter_records(self) -> list[tuple[str, LineageRecord]]: + results: list[tuple[str, LineageRecord]] = [] + with self._env.begin() as txn, txn.cursor(db=self._type_db) as cur: + for key, _ in cur: + udid = _key_to_udid(key) + rec = self.get(udid) + if rec is not None: + results.append((udid, rec)) + return results + + def _traverse(self, udid: str, attr: str) -> dict[str, LineageRecord]: + start = self.get(udid) + if start is None: + return {} + result: dict[str, LineageRecord] = {} + queue: deque[str] = deque(getattr(start, attr)) + while queue: + neighbor = queue.popleft() + if neighbor == udid or neighbor in result: + continue + rec = self.get(neighbor) + if rec is None: + continue + result[neighbor] = rec + queue.extend(getattr(rec, attr)) + return result + + def get_all_parents(self, udid: str) -> dict[str, LineageRecord]: + """Return every ancestor of ``udid`` (transitive parents) keyed by udid. + + Excludes ``udid`` itself. Returns ``{}`` when ``udid`` is unknown or + has no parents. + """ + return self._traverse(udid, "parents") + + def get_all_children(self, udid: str) -> dict[str, LineageRecord]: + """Return every descendant of ``udid`` (transitive children) keyed by udid. + + Excludes ``udid`` itself. Returns ``{}`` when ``udid`` is unknown or + has no children. + """ + return self._traverse(udid, "children") + + def close(self) -> None: + if self._env is not None: + self._env.close() + self._env = None # type: ignore[assignment] + + +@ray.remote(num_cpus=0) +class LineageWriterActor: + """Singleton owner of the LMDB env, spawned by the executor when + ``Pipeline.run(checkpoint_path=...)`` is provided. Workers send lineage + events here via :func:`record_lineage`. Because it is the only process + that writes to the LMDB file, no cross-process file lock is required and + the file may safely live on NFS or Lustre.""" + + def __init__(self, path: str, map_size: int = _DEFAULT_MAP_SIZE): + self._store = LineageStore(path, map_size=map_size) + + def record_emission(self, parent_udids: list[str], child_udids: list[str]) -> None: + self._store.record_emission(parent_udids, child_udids) + + def mark_completed(self, udid: str) -> None: + self._store.mark_completed(udid) + + def is_completed(self, udid: str) -> bool: + return self._store.is_completed(udid) + + def get(self, udid: str) -> LineageRecord | None: + return self._store.get(udid) + + def iter_records(self) -> list[tuple[str, LineageRecord]]: + return self._store.iter_records() + + def get_all_parents(self, udid: str) -> dict[str, LineageRecord]: + return self._store.get_all_parents(udid) + + def get_all_children(self, udid: str) -> dict[str, LineageRecord]: + return self._store.get_all_children(udid) + + def close(self) -> None: + self._store.close() + + +def record_lineage(parent_udids: list[str], child_udids: list[str]) -> None: + """Persist parent/child edges via the named :class:`LineageWriterActor`. + + No-op when Ray is not initialized or no such actor is registered. The + actor is spawned by the executor only when ``Pipeline.run`` is called + with ``checkpoint_path``, so the absence of the actor is what gates + recording. + + Intended to be called from inside ``process_batch`` right after + :func:`nemo_curator.stages.base.assign_child_lineage`. Pass the parent + tasks' ``_udid`` values (typically ``[task._udid]`` for 1:N stages, or one + udid per parent for joins) and the emitted children's ``_udid`` values. + Empty udids (``EmptyTask`` roots and tasks that haven't been lineage-assigned + yet) are filtered out, so source tasks naturally end up with empty + ``in_edges``. + """ + if not ray.is_initialized(): + return + try: + actor = ray.get_actor(LINEAGE_ACTOR_NAME) + except ValueError: + return + + parent_udids = [u for u in parent_udids if u] + child_udids = [u for u in child_udids if u] + if not child_udids: + return + + ray.get(actor.record_emission.remote(parent_udids, child_udids)) diff --git a/pyproject.toml b/pyproject.toml index a0ab3fac3e..d84c87913a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,6 +60,7 @@ dependencies = [ "fsspec", "hydra-core", "jieba==0.42.1", + "lmdb>=1.4", "loguru", "mecab-python3", "omegaconf", diff --git a/tests/pipelines/test_lineage_integration.py b/tests/pipelines/test_lineage_integration.py new file mode 100644 index 0000000000..4e1ba80f3c --- /dev/null +++ b/tests/pipelines/test_lineage_integration.py @@ -0,0 +1,246 @@ +# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""End-to-end lineage-checkpoint tests. + +Drive a pipeline through the default :meth:`ProcessingStage.process_batch` +(which calls :func:`assign_child_lineage` + :func:`record_lineage` separately) +while a real :class:`LineageWriterActor` is registered, and verify the +resulting on-disk DAG matches the topology. Without an actor, recording is a +true no-op. +""" + +import contextlib +from dataclasses import dataclass +from pathlib import Path + +import pytest +import ray + +from nemo_curator.backends.base import BaseStageAdapter +from nemo_curator.pipeline.pipeline import Pipeline +from nemo_curator.stages.base import ProcessingStage, assign_child_lineage +from nemo_curator.tasks import Task +from nemo_curator.utils.lineage_store import ( + LINEAGE_ACTOR_NAME, + LineageWriterActor, + _path_to_udid, + record_lineage, +) + + +@dataclass +class _SimpleTask(Task[list[int]]): + @property + def num_items(self) -> int: + return len(self.data) if self.data is not None else 0 + + def validate(self) -> bool: + return True + + +@dataclass +class _FanOut(ProcessingStage[_SimpleTask, _SimpleTask]): + times: int = 3 + name: str = "fanout" + + def inputs(self) -> tuple[list[str], list[str]]: + return [], [] + + def outputs(self) -> tuple[list[str], list[str]]: + return [], [] + + def process(self, task: _SimpleTask) -> list[_SimpleTask]: + return [ + _SimpleTask(task_id=f"{task.task_id}_{i}", dataset_name=task.dataset_name, data=task.data) + for i in range(self.times) + ] + + +@dataclass +class _Passthrough(ProcessingStage[_SimpleTask, _SimpleTask]): + name: str = "passthrough" + + def inputs(self) -> tuple[list[str], list[str]]: + return [], [] + + def outputs(self) -> tuple[list[str], list[str]]: + return [], [] + + def process(self, task: _SimpleTask) -> _SimpleTask: + return _SimpleTask(task_id=f"{task.task_id}_pt", dataset_name=task.dataset_name, data=task.data) + + +@dataclass +class _FanIn(ProcessingStage[_SimpleTask, _SimpleTask]): + """Override ``process_batch`` to combine the whole batch into one output. + Demonstrates the multi-parent path of the lineage contract — separate + :func:`assign_child_lineage` and :func:`record_lineage` calls.""" + + name: str = "fanin" + + def inputs(self) -> tuple[list[str], list[str]]: + return [], [] + + def outputs(self) -> tuple[list[str], list[str]]: + return [], [] + + def process(self, task: _SimpleTask) -> _SimpleTask: + _ = task + msg = "FanIn only supports batched execution" + raise NotImplementedError(msg) + + def process_batch(self, tasks: list[_SimpleTask]) -> list[_SimpleTask]: + combined: list[int] = [] + for t in tasks: + combined.extend(t.data) + merged = _SimpleTask(task_id="merged", dataset_name=tasks[0].dataset_name, data=combined) + children = assign_child_lineage([t._lineage_path for t in tasks], merged) + record_lineage([t._udid for t in tasks], [c._udid for c in children]) + return children + + +def _drive(pipeline: Pipeline, initial_tasks: list[Task]) -> list[Task]: + current = initial_tasks + for stage in pipeline.stages: + current = BaseStageAdapter(stage).process_batch(current) + return current + + +def _kill_actor_if_present() -> None: + with contextlib.suppress(ValueError): + handle = ray.get_actor(LINEAGE_ACTOR_NAME) + ray.kill(handle) + + +@pytest.fixture +def actor(tmp_path: Path, shared_ray_client: None) -> tuple[object, Path]: # noqa: ARG001 + """Spawn a real :class:`LineageWriterActor` so ``record_lineage`` has somewhere + to write.""" + _kill_actor_if_present() + path = tmp_path / "lineage.mdb" + handle = LineageWriterActor.options( + name=LINEAGE_ACTOR_NAME, + get_if_exists=True, + ).remote(path=str(path)) + try: + yield handle, path + finally: + with contextlib.suppress(Exception): + ray.get(handle.close.remote()) + ray.kill(handle) + + +def test_fanout_passthrough_fanin_records_full_dag(actor: tuple[object, Path]) -> None: + """Drive a 4-stage pipeline and verify the on-disk DAG matches the topology. + + Input ─▶ FanOut(3) ─▶ Passthrough ─▶ FanIn ─▶ Passthrough ─▶ Output + """ + actor_handle, _ = actor + pipeline = Pipeline( + name="fanout_fanin", + stages=[_FanOut(times=3), _Passthrough(name="pt1"), _FanIn(), _Passthrough(name="pt2")], + ) + pipeline.build() + root = _SimpleTask(task_id="r", dataset_name="d", data=[1]) + final = _drive(pipeline, [root]) + + records = dict(ray.get(actor_handle.iter_records.remote())) + + # 3 FanOut outputs, 3 Passthrough-1 outputs, 1 FanIn output, 1 Passthrough-2 output = 8 records. + assert len(records) == 8 + + fanout_paths = ["0", "1", "2"] + pt1_paths = ["0_0", "1_0", "2_0"] + fanin_path = "0_0_1_0_2_0_0" + pt2_path = "0_0_1_0_2_0_0_0" + + fanout_udids = {_path_to_udid(p) for p in fanout_paths} + pt1_udids = {_path_to_udid(p) for p in pt1_paths} + fanin_udid = _path_to_udid(fanin_path) + pt2_udid = _path_to_udid(pt2_path) + + # FanOut roots: source (no parents, have children at the next stage). + for u in fanout_udids: + rec = records[u] + assert rec.parents == [] + assert len(rec.children) == 1 + assert rec.task_type == "source" + + # PT1: each has 1 parent (a FanOut output) and 1 child (the FanIn). + for u in pt1_udids: + rec = records[u] + assert len(rec.parents) == 1 + assert rec.parents[0] in fanout_udids + assert rec.children == [fanin_udid] + assert rec.task_type == "middle" + + # FanIn: 3 parents (the PT1 tasks), 1 child (the PT2 output). + fanin_rec = records[fanin_udid] + assert set(fanin_rec.parents) == pt1_udids + assert fanin_rec.children == [pt2_udid] + assert fanin_rec.task_type == "middle" + + # PT2: 1 parent (the FanIn), 0 children → leaf. + pt2_rec = records[pt2_udid] + assert pt2_rec.parents == [fanin_udid] + assert pt2_rec.children == [] + assert pt2_rec.task_type == "leaf" + + # The final returned task should match the leaf in the store. + assert len(final) == 1 + assert final[0]._udid == pt2_udid + + +def test_actor_exposes_transitive_traversal(actor: tuple[object, Path]) -> None: + """The actor surfaces ``get_all_parents`` / ``get_all_children`` for transitive + DAG inspection. Drives the same fanout/passthrough/fanin/passthrough pipeline + as :func:`test_fanout_passthrough_fanin_records_full_dag` and walks both + directions from the leaf and from one source.""" + actor_handle, _ = actor + pipeline = Pipeline( + name="fanout_fanin_traverse", + stages=[_FanOut(times=3), _Passthrough(name="pt1"), _FanIn(), _Passthrough(name="pt2")], + ) + pipeline.build() + root = _SimpleTask(task_id="r", dataset_name="d", data=[1]) + _drive(pipeline, [root]) + + fanout_udids = {_path_to_udid(p) for p in ["0", "1", "2"]} + pt1_udids = {_path_to_udid(p) for p in ["0_0", "1_0", "2_0"]} + fanin_udid = _path_to_udid("0_0_1_0_2_0_0") + pt2_udid = _path_to_udid("0_0_1_0_2_0_0_0") + + # From the final leaf, every upstream node should be reachable. + ancestors = ray.get(actor_handle.get_all_parents.remote(pt2_udid)) + assert set(ancestors.keys()) == fanout_udids | pt1_udids | {fanin_udid} + + # From one fanout root, descendants are its own pt1 + the shared fanin + pt2. + one_fanout = next(iter(fanout_udids)) + descendants = ray.get(actor_handle.get_all_children.remote(one_fanout)) + descendant_pt1 = pt1_udids & set(descendants.keys()) + assert len(descendant_pt1) == 1 + assert set(descendants.keys()) == descendant_pt1 | {fanin_udid, pt2_udid} + + +def test_no_lineage_recording_when_actor_absent(tmp_path: Path, shared_ray_client: None) -> None: # noqa: ARG001 + """Driving a pipeline with no LineageWriterActor registered must not create any LMDB file.""" + _kill_actor_if_present() + + pipeline = Pipeline(name="no_lineage", stages=[_FanOut(times=2), _Passthrough(name="pt")]) + pipeline.build() + root = _SimpleTask(task_id="r", dataset_name="d", data=[1]) + out = _drive(pipeline, [root]) + assert len(out) == 2 + # No files created anywhere by the lineage subsystem. + assert list(tmp_path.iterdir()) == [] diff --git a/tests/pipelines/test_pipelines.py b/tests/pipelines/test_pipelines.py index 87cc23e324..335dc8f347 100644 --- a/tests/pipelines/test_pipelines.py +++ b/tests/pipelines/test_pipelines.py @@ -12,13 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import hashlib +from dataclasses import dataclass from unittest.mock import Mock, patch import pytest +from nemo_curator.backends.base import BaseStageAdapter from nemo_curator.pipeline.pipeline import Pipeline -from nemo_curator.stages.base import ProcessingStage +from nemo_curator.stages.base import ProcessingStage, assign_child_lineage, assign_root_lineage from nemo_curator.stages.resources import Resources +from nemo_curator.tasks import Task def test_pipeline_uses_xenna_executor_by_default(): @@ -69,3 +73,304 @@ def test_raises_when_ray_serve_active_with_xenna_and_gpu_stages() -> None: with pytest.raises(RuntimeError, match="Cannot run XennaExecutor"): pipeline.run(executor=mock_executor) + + +# --------------------------------------------------------------------------- +# Deterministic _udid / _lineage_path end-to-end +# --------------------------------------------------------------------------- + + +@dataclass +class _SimpleTask(Task[list[int]]): + @property + def num_items(self) -> int: + return len(self.data) if self.data is not None else 0 + + def validate(self) -> bool: + return True + + +@dataclass +class _Repeat(ProcessingStage[_SimpleTask, _SimpleTask]): + times: int = 3 + name: str = "repeat" + + def inputs(self) -> tuple[list[str], list[str]]: + return [], [] + + def outputs(self) -> tuple[list[str], list[str]]: + return [], [] + + def process(self, task: _SimpleTask) -> list[_SimpleTask]: + return [ + _SimpleTask( + task_id=f"{task.task_id}_{i}", + dataset_name=task.dataset_name, + data=task.data, + ) + for i in range(self.times) + ] + + +def _drive(pipeline: Pipeline, initial_tasks: list[Task]) -> list[Task]: + """Walk a built pipeline by hand, threading tasks through BaseStageAdapter + for each stage. This is what every real executor does internally; using it + here lets us exercise the determinism contract without needing Ray.""" + assign_root_lineage(initial_tasks) + current = initial_tasks + for stage in pipeline.stages: + current = BaseStageAdapter(stage).process_batch(current) + return current + + +def test_pipeline_udid_deterministic_across_runs(): + def run_once() -> tuple[list[str], list[str]]: + pipeline = Pipeline(name="det", stages=[_Repeat(times=2), _Repeat(times=3)]) + pipeline.build() + root = _SimpleTask(task_id="r", dataset_name="d", data=[1, 2]) + out = _drive(pipeline, [root]) + return [t._lineage_path for t in out], [t._udid for t in out] + + paths_a, udids_a = run_once() + paths_b, udids_b = run_once() + assert paths_a == paths_b + assert udids_a == udids_b + # Root index "0" is prepended by `assign_root_lineage`; subsequent fan-outs + # extend the path one segment at a time per the documented + # "{root_idx}_{child_idx}_{grandchild_idx}" shape. + assert paths_a == [f"0_{i}_{j}" for i in range(2) for j in range(3)] + assert udids_a == [hashlib.sha256(p.encode()).hexdigest()[:32] for p in paths_a] + + +# --------------------------------------------------------------------------- +# Fan-out / passthrough / fan-in topology with explicit expected _udid values +# --------------------------------------------------------------------------- + + +@dataclass +class _Passthrough(ProcessingStage[_SimpleTask, _SimpleTask]): + name: str = "passthrough" + + def inputs(self) -> tuple[list[str], list[str]]: + return [], [] + + def outputs(self) -> tuple[list[str], list[str]]: + return [], [] + + def process(self, task: _SimpleTask) -> _SimpleTask: + return _SimpleTask( + task_id=f"{task.task_id}_pt", + dataset_name=task.dataset_name, + data=task.data, + ) + + +@dataclass +class _FanOut(ProcessingStage[_SimpleTask, _SimpleTask]): + times: int = 3 + name: str = "fanout" + + def inputs(self) -> tuple[list[str], list[str]]: + return [], [] + + def outputs(self) -> tuple[list[str], list[str]]: + return [], [] + + def process(self, task: _SimpleTask) -> list[_SimpleTask]: + return [ + _SimpleTask( + task_id=f"{task.task_id}_{i}", + dataset_name=task.dataset_name, + data=task.data, + ) + for i in range(self.times) + ] + + +@dataclass +class _FanIn(ProcessingStage[_SimpleTask, _SimpleTask]): + """Overrides `process_batch` to combine the whole batch into a single + output. Demonstrates the multi-parent path of `assign_child_lineage`.""" + + name: str = "fanin" + + def inputs(self) -> tuple[list[str], list[str]]: + return [], [] + + def outputs(self) -> tuple[list[str], list[str]]: + return [], [] + + def process(self, task: _SimpleTask) -> _SimpleTask: + _ = task + msg = "FanIn only supports batched execution" + raise NotImplementedError(msg) + + def process_batch(self, tasks: list[_SimpleTask]) -> list[_SimpleTask]: + combined: list[int] = [] + for t in tasks: + combined.extend(t.data) + merged = _SimpleTask( + task_id="merged", + dataset_name=tasks[0].dataset_name, + data=combined, + ) + return assign_child_lineage([t._lineage_path for t in tasks], merged) + + +def test_pipeline_udid_fanout_passthrough_fanin_passthrough(): + """End-to-end: a 4-stage pipeline that exercises 1:N, 1:1, N:1, 1:1 and + verifies the exact `_lineage_path` / `_udid` values at every step. + + Pipeline topology: + + Input ─▶ FanOut(3) ─▶ Passthrough ─▶ FanIn ─▶ Passthrough ─▶ Output + + Starting from one root task assigned ``_lineage_path = "0"`` by + ``assign_root_lineage``, the framework should produce the following paths: + + After FanOut: ["0_0", "0_1", "0_2"] + After Passthrough: ["0_0_0", "0_1_0", "0_2_0"] + After FanIn: ["0_0_0_0_1_0_0_2_0_0"] (all 3 parents + idx 0) + After Passthrough: ["0_0_0_0_1_0_0_2_0_0_0"] + """ + pipeline = Pipeline( + name="fanout_fanin", + stages=[ + _FanOut(times=3), + _Passthrough(name="pt1"), + _FanIn(), + _Passthrough(name="pt2"), + ], + ) + pipeline.build() + + root = _SimpleTask(task_id="r", dataset_name="d", data=[1]) + assign_root_lineage([root]) + + # Drive stage-by-stage so we can inspect each intermediate set of tasks. + after_fanout = BaseStageAdapter(pipeline.stages[0]).process_batch([root]) + after_passthrough_1 = BaseStageAdapter(pipeline.stages[1]).process_batch(after_fanout) + after_fanin = BaseStageAdapter(pipeline.stages[2]).process_batch(after_passthrough_1) + after_passthrough_2 = BaseStageAdapter(pipeline.stages[3]).process_batch(after_fanin) + + # Expected paths at every level. + assert [t._lineage_path for t in after_fanout] == ["0_0", "0_1", "0_2"] + assert [t._lineage_path for t in after_passthrough_1] == ["0_0_0", "0_1_0", "0_2_0"] + assert [t._lineage_path for t in after_fanin] == ["0_0_0_0_1_0_0_2_0_0"] + assert [t._lineage_path for t in after_passthrough_2] == ["0_0_0_0_1_0_0_2_0_0_0"] + + # Expected _udid values are exactly sha256(lineage_path)[:32]. + def udid(path: str) -> str: + return hashlib.sha256(path.encode()).hexdigest()[:32] + + assert [t._udid for t in after_fanout] == [udid("0_0"), udid("0_1"), udid("0_2")] + assert [t._udid for t in after_passthrough_1] == [udid("0_0_0"), udid("0_1_0"), udid("0_2_0")] + assert [t._udid for t in after_fanin] == [udid("0_0_0_0_1_0_0_2_0_0")] + assert [t._udid for t in after_passthrough_2] == [udid("0_0_0_0_1_0_0_2_0_0_0")] + + # Uniqueness: every task emitted anywhere in the pipeline has a distinct + # _udid (and a distinct _lineage_path). + all_tasks = [*after_fanout, *after_passthrough_1, *after_fanin, *after_passthrough_2] + all_udids = [t._udid for t in all_tasks] + all_paths = [t._lineage_path for t in all_tasks] + assert len(set(all_udids)) == len(all_udids) + assert len(set(all_paths)) == len(all_paths) + + # Determinism: running the same pipeline shape over the same input again + # yields byte-identical _udid and _lineage_path everywhere. + pipeline2 = Pipeline( + name="fanout_fanin", + stages=[ + _FanOut(times=3), + _Passthrough(name="pt1"), + _FanIn(), + _Passthrough(name="pt2"), + ], + ) + pipeline2.build() + second_run = _drive(pipeline2, [_SimpleTask(task_id="r", dataset_name="d", data=[1])]) + assert [t._lineage_path for t in second_run] == [t._lineage_path for t in after_passthrough_2] + assert [t._udid for t in second_run] == [t._udid for t in after_passthrough_2] + + +# --------------------------------------------------------------------------- +# In-place stages (process() returns the same task) preserve lineage +# --------------------------------------------------------------------------- + + +@dataclass +class _InPlace(ProcessingStage[_SimpleTask, _SimpleTask]): + """Mutates the input task and returns the same instance — the pattern used + by ImageEmbeddingStage and ~28 other stages across audio/image/video.""" + + name: str = "inplace" + + def inputs(self) -> tuple[list[str], list[str]]: + return [], [] + + def outputs(self) -> tuple[list[str], list[str]]: + return [], [] + + def process(self, task: _SimpleTask) -> _SimpleTask: + task.data = [*(task.data or []), 0] + return task + + +def test_inplace_stage_preserves_lineage(): + pipeline = Pipeline( + name="inplace", + stages=[_Repeat(times=2), _InPlace(name="ip1"), _InPlace(name="ip2")], + ) + pipeline.build() + + root = _SimpleTask(task_id="r", dataset_name="d", data=[1]) + assign_root_lineage([root]) + after_fanout = BaseStageAdapter(pipeline.stages[0]).process_batch([root]) + after_ip1 = BaseStageAdapter(pipeline.stages[1]).process_batch(after_fanout) + after_ip2 = BaseStageAdapter(pipeline.stages[2]).process_batch(after_ip1) + + # Fan-out gave the children paths "0_0" and "0_1". The two in-place stages + # must NOT extend the lineage path — same instances come back unchanged. + assert [t._lineage_path for t in after_fanout] == ["0_0", "0_1"] + assert [t._lineage_path for t in after_ip1] == ["0_0", "0_1"] + assert [t._lineage_path for t in after_ip2] == ["0_0", "0_1"] + + def udid(path: str) -> str: + return hashlib.sha256(path.encode()).hexdigest()[:32] + + expected_udids = [udid("0_0"), udid("0_1")] + assert [t._udid for t in after_fanout] == expected_udids + assert [t._udid for t in after_ip1] == expected_udids + assert [t._udid for t in after_ip2] == expected_udids + + # Identity check: the in-place stages return the same task instances. + assert all(a is b for a, b in zip(after_fanout, after_ip1, strict=True)) + assert all(a is b for a, b in zip(after_ip1, after_ip2, strict=True)) + + +# --------------------------------------------------------------------------- +# Multiple root tasks must produce distinct _udid through a 1:1 first stage +# --------------------------------------------------------------------------- + + +def test_pipeline_udid_no_collision_across_multiple_roots(): + """Multiple root tasks through a 1:1 first stage must produce distinct _udid. + + Without ``assign_root_lineage`` every root carries ``_lineage_path = ""``; + the empty-string filter in ``_set_lineage`` then collapses all first-stage + children onto the same path ("0"), so their ``_udid`` collides. + """ + pipeline = Pipeline(name="multi_root", stages=[_Passthrough(name="pt")]) + pipeline.build() + + roots = [ + _SimpleTask(task_id="r0", dataset_name="d", data=[1]), + _SimpleTask(task_id="r1", dataset_name="d", data=[2]), + _SimpleTask(task_id="r2", dataset_name="d", data=[3]), + ] + out = _drive(pipeline, roots) + + paths = [t._lineage_path for t in out] + udids = [t._udid for t in out] + assert paths == ["0_0", "1_0", "2_0"] + assert len(set(udids)) == len(udids) diff --git a/tests/tasks/test_tasks.py b/tests/tasks/test_tasks.py index e76959c8e6..53699282e3 100644 --- a/tests/tasks/test_tasks.py +++ b/tests/tasks/test_tasks.py @@ -12,8 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import hashlib from dataclasses import dataclass +from nemo_curator.backends.base import BaseStageAdapter from nemo_curator.stages.base import ProcessingStage from nemo_curator.tasks import Task @@ -69,3 +71,75 @@ def test_fanout_tasks_have_unique_uuid(): assert len(output) == 3 uuids = [t._uuid for t in output] assert len(set(uuids)) == 3, f"Expected unique _uuid per task, got {uuids}" + + +def _sha256_32(s: str) -> str: + return hashlib.sha256(s.encode()).hexdigest()[:32] + + +def test_lineage_path_and_udid_format(): + # Empty parent → just the child index + task = SimpleTask(task_id="root", dataset_name="t", data=[]) + task._set_lineage([], 4) + assert task._lineage_path == "4" + assert task._udid == _sha256_32("4") + + # Single non-empty parent + child = SimpleTask(task_id="c", dataset_name="t", data=[]) + child._set_lineage(["3"], 0) + assert child._lineage_path == "3_0" + assert child._udid == _sha256_32("3_0") + + # Multi-parent join + grandchild = SimpleTask(task_id="g", dataset_name="t", data=[]) + grandchild._set_lineage(["3_0", "4_1"], 2) + assert grandchild._lineage_path == "3_0_4_1_2" + assert grandchild._udid == _sha256_32("3_0_4_1_2") + + +def test_fanout_udid_from_empty_root(): + # Driving through the adapter triggers the default process_batch which + # calls assign_child_lineage. Parent _lineage_path is "" (no lineage + # assigned yet), so children get indices as their root paths. + task = _sample_task() + output = BaseStageAdapter(Repeat(times=3)).process_batch([task]) + + assert [t._lineage_path for t in output] == ["0", "1", "2"] + assert [t._udid for t in output] == [_sha256_32("0"), _sha256_32("1"), _sha256_32("2")] + # Original _uuid stays random and unique per task. + assert len({t._uuid for t in output}) == 3 + + +def test_set_lineage_is_idempotent(): + # First assignment fills in path/udid and returns True. + task = SimpleTask(task_id="t", dataset_name="d", data=[]) + assert task._set_lineage(["3"], 0) is True + assert task._lineage_path == "3_0" + assert task._udid == _sha256_32("3_0") + + # Re-assigning with different parent/index is a no-op and returns False. + # This is how the framework detects that a stage returned a task in place. + assert task._set_lineage(["7"], 4) is False + assert task._lineage_path == "3_0" + assert task._udid == _sha256_32("3_0") + + +def test_udid_deterministic_across_runs(): + # Same pipeline run twice over the same input must yield byte-identical + # _udid / _lineage_path sequences. (`_uuid` will differ because it's a + # fresh uuid4 each run; that's expected and not what _udid is for.) + def run_once() -> tuple[list[str], list[str]]: + task = _sample_task() + after_first = BaseStageAdapter(Repeat(times=2)).process_batch([task]) + after_second = BaseStageAdapter(Repeat(times=3)).process_batch(after_first) + return ( + [t._lineage_path for t in after_second], + [t._udid for t in after_second], + ) + + paths_a, udids_a = run_once() + paths_b, udids_b = run_once() + assert paths_a == paths_b + assert udids_a == udids_b + # Sanity: lineage paths follow the documented "{parent_idx}_{child_idx}" shape. + assert paths_a == [f"{i}_{j}" for i in range(2) for j in range(3)] diff --git a/tests/utils/test_lineage_store.py b/tests/utils/test_lineage_store.py new file mode 100644 index 0000000000..7f53964f42 --- /dev/null +++ b/tests/utils/test_lineage_store.py @@ -0,0 +1,382 @@ +# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for :mod:`nemo_curator.utils.lineage_store`. + +The storage-layer tests use :class:`LineageStore` directly. The end-to-end +helper tests spawn a real :class:`LineageWriterActor` and verify that calling +:func:`assign_and_record_lineage` writes through the actor. +""" + +import contextlib +from dataclasses import dataclass +from pathlib import Path + +import pytest +import ray + +from nemo_curator.stages.base import assign_child_lineage +from nemo_curator.tasks import Task +from nemo_curator.utils.lineage_store import ( + LINEAGE_ACTOR_NAME, + LineageStore, + LineageWriterActor, + _classify, + _path_to_udid, + record_lineage, +) + + +@dataclass +class _T(Task[None]): + @property + def num_items(self) -> int: + return 0 + + def validate(self) -> bool: + return True + + +def _make_child(parent_path: str, i: int) -> _T: + """Build a lineage-assigned task as if it were emitted by a stage.""" + t = _T(task_id=f"t{i}", dataset_name="ds", data=None) + t._set_lineage([parent_path], i) + return t + + +# --------------------------------------------------------------------------- # +# Direct LineageStore tests — no Ray required. +# --------------------------------------------------------------------------- # + + +@pytest.fixture +def store(tmp_path: Path) -> LineageStore: + path = tmp_path / "lineage.mdb" + s = LineageStore(str(path)) + try: + yield s + finally: + s.close() + + +def test_classify_truth_table() -> None: + assert _classify(False, False) == b"source_leaf" + assert _classify(False, True) == b"source" + assert _classify(True, False) == b"leaf" + assert _classify(True, True) == b"middle" + + +def test_get_returns_none_for_unknown(store: LineageStore) -> None: + assert store.get("doesnotexist" + "0" * 20) is None + + +def test_records_single_edge_with_types(store: LineageStore) -> None: + parent = "p" * 32 + child = "c" * 32 + store.record_emission([parent], [child]) + + p_rec = store.get(parent) + c_rec = store.get(child) + assert p_rec is not None + assert c_rec is not None + assert p_rec.children == [child] + assert p_rec.parents == [] + assert p_rec.task_type == "source" # provisional: no parents seen yet for `parent` + assert c_rec.parents == [parent] + assert c_rec.children == [] + assert c_rec.task_type == "leaf" + + +def test_emission_is_idempotent_under_retry(store: LineageStore) -> None: + parent = "p" * 32 + child = "c" * 32 + store.record_emission([parent], [child]) + store.record_emission([parent], [child]) # retry + + assert store.get(parent).children == [child] # no duplicate edge + assert store.get(child).parents == [parent] + + +def test_incremental_parent_attribution(store: LineageStore) -> None: + """Multiple calls for the same child accumulate parents.""" + p1 = "1" * 32 + p2 = "2" * 32 + child = "c" * 32 + store.record_emission([p1], [child]) + store.record_emission([p2], [child]) + + rec = store.get(child) + assert set(rec.parents) == {p1, p2} + assert rec.task_type == "leaf" + assert set(store.get(p1).children) == {child} + assert set(store.get(p2).children) == {child} + + +def test_type_promotes_monotonically_under_reordering(store: LineageStore) -> None: + """If a node first appears as a parent, it's provisionally `source`. When + its own parent-edge later arrives, it must promote to `middle`.""" + grandparent = "g" * 32 + parent = "p" * 32 + child = "c" * 32 + + # Out-of-order: child created from parent first; parent's own creation arrives later. + store.record_emission([parent], [child]) + assert store.get(parent).task_type == "source" + + store.record_emission([grandparent], [parent]) + assert store.get(parent).task_type == "middle" + assert store.get(grandparent).task_type == "source" + assert store.get(child).task_type == "leaf" + + +def test_source_leaf_classification(store: LineageStore) -> None: + """A task with no parents and no children is `source_leaf`.""" + orphan = "o" * 32 + # Emit a child with no real parents, AND no children of its own. + store.record_emission([], [orphan]) + assert store.get(orphan).task_type == "source_leaf" + + +def test_completed_defaults_false_and_can_be_set(store: LineageStore) -> None: + udid = "x" * 32 + store.record_emission([], [udid]) + assert store.get(udid).completed is False + store.mark_completed(udid) + assert store.is_completed(udid) is True + assert store.get(udid).completed is True + # Idempotent. + store.mark_completed(udid) + assert store.is_completed(udid) is True + + +def test_iter_records_returns_all(store: LineageStore) -> None: + udids = ["a" * 32, "b" * 32, "c" * 32] + # a → b, b → c + store.record_emission([udids[0]], [udids[1]]) + store.record_emission([udids[1]], [udids[2]]) + + all_records = dict(store.iter_records()) + assert set(all_records.keys()) == set(udids) + assert all_records[udids[0]].task_type == "source" + assert all_records[udids[1]].task_type == "middle" + assert all_records[udids[2]].task_type == "leaf" + + +def test_get_all_parents_chain(store: LineageStore) -> None: + """a → b → c: ``get_all_parents(c)`` returns both ``a`` and ``b``.""" + a, b, c = "a" * 32, "b" * 32, "c" * 32 + store.record_emission([a], [b]) + store.record_emission([b], [c]) + + parents = store.get_all_parents(c) + assert set(parents.keys()) == {a, b} + assert parents[b].parents == [a] + assert parents[a].parents == [] + + +def test_get_all_children_chain(store: LineageStore) -> None: + """a → b → c: ``get_all_children(a)`` returns both ``b`` and ``c``.""" + a, b, c = "a" * 32, "b" * 32, "c" * 32 + store.record_emission([a], [b]) + store.record_emission([b], [c]) + + children = store.get_all_children(a) + assert set(children.keys()) == {b, c} + assert children[b].children == [c] + assert children[c].children == [] + + +def test_transitive_diamond_dedup(store: LineageStore) -> None: + """Diamond: a → {b, c} → d. ``a`` appears once in ``get_all_parents(d)``.""" + a, b, c, d = "a" * 32, "b" * 32, "c" * 32, "d" * 32 + store.record_emission([a], [b]) + store.record_emission([a], [c]) + store.record_emission([b], [d]) + store.record_emission([c], [d]) + + assert set(store.get_all_parents(d).keys()) == {a, b, c} + assert set(store.get_all_children(a).keys()) == {b, c, d} + + +def test_transitive_unknown_returns_empty(store: LineageStore) -> None: + unknown = "u" * 32 + assert store.get_all_parents(unknown) == {} + assert store.get_all_children(unknown) == {} + + +def test_transitive_source_and_leaf_empty(store: LineageStore) -> None: + a, b = "a" * 32, "b" * 32 + store.record_emission([a], [b]) + # Pure source: no ancestors. + assert store.get_all_parents(a) == {} + # Pure leaf: no descendants. + assert store.get_all_children(b) == {} + + +def test_transitive_excludes_self(store: LineageStore) -> None: + a, b, c = "a" * 32, "b" * 32, "c" * 32 + store.record_emission([a], [b]) + store.record_emission([b], [c]) + for udid in (a, b, c): + assert udid not in store.get_all_parents(udid) + assert udid not in store.get_all_children(udid) + + +def test_record_emission_skips_self_loop_in_place_return(store: LineageStore) -> None: + """In-place return: parent and child udids match. No self-edge is recorded + and the node stays ``source_leaf`` rather than getting promoted to ``middle``.""" + a = "a" * 32 + store.record_emission([a], [a]) + + rec = store.get(a) + assert rec is not None + assert rec.parents == [] + assert rec.children == [] + assert rec.task_type == "source_leaf" + + +def test_record_emission_keeps_cross_edges_when_one_child_is_self(store: LineageStore) -> None: + """``parents=[a]``, ``children=[a, b]``: only ``a→a`` is dropped; ``a→b`` is kept.""" + a, b = "a" * 32, "b" * 32 + store.record_emission([a], [a, b]) + + a_rec = store.get(a) + b_rec = store.get(b) + assert a_rec is not None + assert b_rec is not None + assert a_rec.parents == [] + assert a_rec.children == [b] + assert a_rec.task_type == "source" + assert b_rec.parents == [a] + assert b_rec.children == [] + assert b_rec.task_type == "leaf" + + +def test_record_emission_keeps_cross_edges_in_multi_parent_self(store: LineageStore) -> None: + """``parents=[a, b]``, ``children=[a, c]``: edges are ``b→a``, ``a→c``, ``b→c``; + only ``a→a`` is dropped.""" + a, b, c = "a" * 32, "b" * 32, "c" * 32 + store.record_emission([a, b], [a, c]) + + a_rec = store.get(a) + b_rec = store.get(b) + c_rec = store.get(c) + assert a_rec is not None + assert b_rec is not None + assert c_rec is not None + assert a_rec.parents == [b] + assert set(a_rec.children) == {c} + assert b_rec.parents == [] + assert set(b_rec.children) == {a, c} + assert set(c_rec.parents) == {a, b} + assert c_rec.children == [] + + +def test_path_to_udid_matches_task_set_lineage() -> None: + """Mirror invariant: hashing a lineage path with ``_path_to_udid`` yields the same + ``_udid`` that ``Task._set_lineage`` would assign.""" + t = _T(task_id="t", dataset_name="ds", data=None) + t._set_lineage(["3_0"], 7) + assert t._lineage_path == "3_0_7" + assert _path_to_udid("3_0_7") == t._udid + + +# --------------------------------------------------------------------------- # +# Actor-routed tests — verify record_lineage → LineageWriterActor → LMDB. +# --------------------------------------------------------------------------- # + + +def _kill_actor_if_present() -> None: + """Make sure no leftover writer actor lingers between tests.""" + with contextlib.suppress(ValueError): + handle = ray.get_actor(LINEAGE_ACTOR_NAME) + ray.kill(handle) + + +@pytest.fixture +def actor(tmp_path: Path, shared_ray_client: None) -> tuple[object, Path]: # noqa: ARG001 + """Spawn a real :class:`LineageWriterActor` for the duration of the test.""" + _kill_actor_if_present() + path = tmp_path / "lineage_actor.mdb" + handle = LineageWriterActor.options( + name=LINEAGE_ACTOR_NAME, + get_if_exists=True, + ).remote(path=str(path)) + try: + yield handle, path + finally: + with contextlib.suppress(Exception): + ray.get(handle.close.remote()) + ray.kill(handle) + + +def test_record_lineage_filters_empty_parents(actor: tuple[object, Path]) -> None: + """``record_lineage`` should not record EmptyTask-style empty parent udids.""" + actor_handle, _ = actor + children = assign_child_lineage([""], [_T(task_id="c", dataset_name="ds", data=None) for _ in range(3)]) + record_lineage([""], [c._udid for c in children]) + assert [c._lineage_path for c in children] == ["0", "1", "2"] + + for i, c in enumerate(children): + rec = ray.get(actor_handle.get.remote(c._udid)) + assert rec is not None, f"child {i} not in store" + assert rec.parents == [] + assert rec.task_type == "source_leaf" + + +def test_record_lineage_propagates_lineage(actor: tuple[object, Path]) -> None: + """End-to-end through the actor: drive two stages with separate + :func:`assign_child_lineage` + :func:`record_lineage` calls and check the + DAG ends up in the store with correct types.""" + actor_handle, _ = actor + + # Stage 1: produce three children from an empty root. + parents = assign_child_lineage([""], [_T(task_id=f"p{i}", dataset_name="ds", data=None) for i in range(3)]) + record_lineage([""], [p._udid for p in parents]) + + # Stage 2: each parent produces two children. + grandchildren = [] + for p in parents: + emitted = assign_child_lineage( + [p._lineage_path], + [_T(task_id="g", dataset_name="ds", data=None) for _ in range(2)], + ) + record_lineage([p._udid], [c._udid for c in emitted]) + grandchildren.extend(emitted) + + # Sources: 3 parents, classified `source` because they now have children. + for p in parents: + rec = ray.get(actor_handle.get.remote(p._udid)) + assert rec is not None + assert rec.task_type == "source" + assert len(rec.children) == 2 + assert rec.parents == [] + + # Leaves: 6 grandchildren, each with one parent. + for g in grandchildren: + rec = ray.get(actor_handle.get.remote(g._udid)) + assert rec is not None + assert rec.task_type == "leaf" + assert len(rec.parents) == 1 + assert rec.children == [] + + +def test_record_lineage_is_noop_without_actor(shared_ray_client: None) -> None: # noqa: ARG001 + """When no LineageWriterActor is registered, ``record_lineage`` must not raise + and must not write anything (no actor exists to write to).""" + _kill_actor_if_present() + # If this returned an error path it would raise. We don't have a record store + # to assert "no write" against directly, but the absence of an actor means + # there's literally nowhere for it to write — successful return is the assertion. + child = _make_child("", 0) + record_lineage([""], [child._udid]) diff --git a/uv.lock b/uv.lock index fd786a09ae..4470e57b85 100644 --- a/uv.lock +++ b/uv.lock @@ -1813,17 +1813,49 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/bd/d4/16916f3dc20a3f5455b63c35dcb260b3716f59ce27a93586804e70e431d5/cytoolz-1.1.0.tar.gz", hash = "sha256:13a7bf254c3c0d28b12e2290b82aed0f0977a4c2a2bf84854fcdc7796a29f3b0", size = 642510, upload-time = "2025-10-19T00:44:56.174Z" } wheels = [ + { url = "https://files.pythonhosted.org/packages/df/aa/365953926ee8b4f2e07df7200c0d73632155908c8867af14b2d19cc9f1f7/cytoolz-1.1.0-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:098d628a801dc142e9740126be5624eb7aef1d732bc7a5719f60a2095547b485", size = 2639311, upload-time = "2025-10-19T00:40:22.289Z" }, + { url = "https://files.pythonhosted.org/packages/7c/ee/62beaaee7df208f22590ad07ef8875519af49c52ca39d99460b14a00f15a/cytoolz-1.1.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:779ee4096ed7a82cffab89372ffc339631c285079dbf33dbe7aff1f6174985df", size = 2979532, upload-time = "2025-10-19T00:40:24.006Z" }, + { url = "https://files.pythonhosted.org/packages/c5/04/2211251e450bed111ada1194dc42c461da9aea441de62a01e4085ea6de9f/cytoolz-1.1.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f2ce18dd99533d077e9712f9faa852f389f560351b1efd2f2bdb193a95eddde2", size = 3018632, upload-time = "2025-10-19T00:40:26.175Z" }, { url = "https://files.pythonhosted.org/packages/ed/a2/4a3400e4d07d3916172bf74fede08020d7b4df01595d8a97f1e9507af5ae/cytoolz-1.1.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ac266a34437812cf841cecbfe19f355ab9c3dd1ef231afc60415d40ff12a76e4", size = 2788579, upload-time = "2025-10-19T00:40:27.878Z" }, + { url = "https://files.pythonhosted.org/packages/fe/82/bb88caa53a41f600e7763c517d50e2efbbe6427ea395716a92b83f44882a/cytoolz-1.1.0-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:1920b9b9c13d60d0bb6cd14594b3bce0870022eccb430618c37156da5f2b7a55", size = 2593024, upload-time = "2025-10-19T00:40:29.601Z" }, + { url = "https://files.pythonhosted.org/packages/d4/56/faec7696f235521b926ffdf92c102f5b029f072d28e1020364e55b084820/cytoolz-1.1.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:5ab2c97d8aaa522b038cca9187b1153347af22309e7c998b14750c6fdec7b1cb", size = 2654461, upload-time = "2025-10-19T00:40:32.884Z" }, + { url = "https://files.pythonhosted.org/packages/d9/b3/80b8183e7eee44f45bfa3cdd3ebdadf3dd43ffc686f96d442a6c4dded45d/cytoolz-1.1.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:7fc0f1e4e9bb384d26e73c6657bbc26abdae4ff66a95933c00f3d578be89181b", size = 2881589, upload-time = "2025-10-19T00:40:36.315Z" }, + { url = "https://files.pythonhosted.org/packages/8f/05/ac5ba5ddb88a3ba7ecea4bf192194a838af564d22ea7a4812cbb6bd106ce/cytoolz-1.1.0-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:dd3f894ff972da1994d06ac6157d74e40dda19eb31fe5e9b7863ca4278c3a167", size = 2589924, upload-time = "2025-10-19T00:40:38.317Z" }, + { url = "https://files.pythonhosted.org/packages/8e/cd/100483cae3849d24351c8333a815dc6adaf3f04912486e59386d86d9db9a/cytoolz-1.1.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:0846f49cf8a4496bd42659040e68bd0484ce6af819709cae234938e039203ba0", size = 2868059, upload-time = "2025-10-19T00:40:40.025Z" }, { url = "https://files.pythonhosted.org/packages/34/6e/3a7c56b325772d39397fc3aafb4dc054273982097178b6c3917c6dad48de/cytoolz-1.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:16a3af394ade1973226d64bb2f9eb3336adbdea03ed5b134c1bbec5a3b20028e", size = 2721692, upload-time = "2025-10-19T00:40:41.621Z" }, { url = "https://files.pythonhosted.org/packages/fd/04/2ab98edeea90311e4029e1643e43d2027b54da61453292d9ea51a103ee87/cytoolz-1.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:ebf06d1c5344fb22fee71bf664234733e55db72d74988f2ecb7294b05e4db30c", size = 945831, upload-time = "2025-10-19T00:40:44.693Z" }, + { url = "https://files.pythonhosted.org/packages/0c/93/9c787f7c909e75670fff467f2504725d06d8c3f51d6dfe22c55a08c8ccd4/cytoolz-1.1.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:7d3e405e435320e08c5a1633afaf285a392e2d9cef35c925d91e2a31dfd7a688", size = 2679635, upload-time = "2025-10-19T00:40:57.799Z" }, + { url = "https://files.pythonhosted.org/packages/50/aa/9ee92c302cccf7a41a7311b325b51ebeff25d36c1f82bdc1bbe3f58dc947/cytoolz-1.1.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:923df8f5591e0d20543060c29909c149ab1963a7267037b39eee03a83dbc50a8", size = 2938352, upload-time = "2025-10-19T00:40:59.49Z" }, + { url = "https://files.pythonhosted.org/packages/6a/a3/3b58c5c1692c3bacd65640d0d5c7267a7ebb76204f7507aec29de7063d2f/cytoolz-1.1.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:25db9e4862f22ea0ae2e56c8bec9fc9fd756b655ae13e8c7b5625d7ed1c582d4", size = 3022121, upload-time = "2025-10-19T00:41:01.209Z" }, { url = "https://files.pythonhosted.org/packages/e1/93/c647bc3334355088c57351a536c2d4a83dd45f7de591fab383975e45bff9/cytoolz-1.1.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c7a98deb11ccd8e5d9f9441ef2ff3352aab52226a2b7d04756caaa53cd612363", size = 2857656, upload-time = "2025-10-19T00:41:03.456Z" }, + { url = "https://files.pythonhosted.org/packages/b2/c2/43fea146bf4141deea959e19dcddf268c5ed759dec5c2ed4a6941d711933/cytoolz-1.1.0-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:dce4ee9fc99104bc77efdea80f32ca5a650cd653bcc8a1d984a931153d3d9b58", size = 2551284, upload-time = "2025-10-19T00:41:05.347Z" }, + { url = "https://files.pythonhosted.org/packages/45/be/f8524bb9ad8812ad375e61238dcaa3177628234d1b908ad0b74e3657cafd/cytoolz-1.1.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:3b5c5a192abda123ad45ef716ec9082b4cf7d95e9ada8291c5c2cc5558be858b", size = 2722884, upload-time = "2025-10-19T00:41:09.698Z" }, + { url = "https://files.pythonhosted.org/packages/d7/dd/88619f9c8d2b682562c0c886bbb7c35720cb83fda2ac9a41bdd14073d9bd/cytoolz-1.1.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:e7e29a1a03f00b4322196cfe8e2c38da9a6c8d573566052c586df83aacc5663c", size = 2839661, upload-time = "2025-10-19T00:41:13.053Z" }, + { url = "https://files.pythonhosted.org/packages/b8/8d/4478ebf471ee78dd496d254dc0f4ad729cd8e6ba8257de4f0a98a2838ef2/cytoolz-1.1.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:5291b117d71652a817ec164e7011f18e6a51f8a352cc9a70ed5b976c51102fda", size = 2547095, upload-time = "2025-10-19T00:41:16.054Z" }, + { url = "https://files.pythonhosted.org/packages/e6/68/f1dea33367b0b3f64e199c230a14a6b6f243c189020effafd31e970ca527/cytoolz-1.1.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:8caef62f846a9011676c51bda9189ae394cdd6bb17f2946ecaedc23243268320", size = 2870901, upload-time = "2025-10-19T00:41:17.727Z" }, { url = "https://files.pythonhosted.org/packages/4a/9a/33591c09dfe799b8fb692cf2ad383e2c41ab6593cc960b00d1fc8a145655/cytoolz-1.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:de425c5a8e3be7bb3a195e19191d28d9eb3c2038046064a92edc4505033ec9cb", size = 2765422, upload-time = "2025-10-19T00:41:20.075Z" }, { url = "https://files.pythonhosted.org/packages/ad/33/4c9bdf8390dc01d2617c7f11930697157164a52259b6818ddfa2f94f89f4/cytoolz-1.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:07156987f224c6dac59aa18fb8bf91e1412f5463961862716a3381bf429c8699", size = 947989, upload-time = "2025-10-19T00:41:23.288Z" }, { url = "https://files.pythonhosted.org/packages/d9/cb/efc1b29e211e0670a6953222afaac84dcbba5cb940b130c0e49858978040/cytoolz-1.1.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:26801c1a165e84786a99e03c9c9973356caaca002d66727b761fb1042878ef06", size = 992632, upload-time = "2025-10-19T00:41:30.612Z" }, + { url = "https://files.pythonhosted.org/packages/db/f5/0083608286ad1716eda7c41f868e85ac549f6fd6b7646993109fa0bdfd98/cytoolz-1.1.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:823df012ab90d2f2a0f92fea453528539bf71ac1879e518524cd0c86aa6df7b9", size = 2669312, upload-time = "2025-10-19T00:41:41.55Z" }, + { url = "https://files.pythonhosted.org/packages/47/a8/d16080b575520fe5da00cede1ece4e0a4180ec23f88dcdc6a2f5a90a7f7f/cytoolz-1.1.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2f1fcf9e7e7b3487883ff3f815abc35b89dcc45c4cf81c72b7ee457aa72d197b", size = 2922147, upload-time = "2025-10-19T00:41:43.252Z" }, + { url = "https://files.pythonhosted.org/packages/7e/bc/716c9c1243701e58cad511eb3937fd550e645293c5ed1907639c5d66f194/cytoolz-1.1.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4cdb3fa1772116827f263f25b0cdd44c663b6701346a56411960534a06c082de", size = 2981602, upload-time = "2025-10-19T00:41:45.354Z" }, { url = "https://files.pythonhosted.org/packages/14/bc/571b232996846b27f4ac0c957dc8bf60261e9b4d0d01c8d955e82329544e/cytoolz-1.1.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d1b5c95041741b81430454db65183e133976f45ac3c03454cfa8147952568529", size = 2830103, upload-time = "2025-10-19T00:41:47.959Z" }, + { url = "https://files.pythonhosted.org/packages/5b/55/c594afb46ecd78e4b7e1fb92c947ed041807875661ceda73baaf61baba4f/cytoolz-1.1.0-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b2079fd9f1a65f4c61e6278c8a6d4f85edf30c606df8d5b32f1add88cbbe2286", size = 2533802, upload-time = "2025-10-19T00:41:49.683Z" }, + { url = "https://files.pythonhosted.org/packages/e2/df/035a408df87f25cfe3611557818b250126cd2281b2104cd88395de205583/cytoolz-1.1.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:06d1c79aa51e6a92a90b0e456ebce2288f03dd6a76c7f582bfaa3eda7692e8a5", size = 2707575, upload-time = "2025-10-19T00:41:53.305Z" }, + { url = "https://files.pythonhosted.org/packages/30/7a/2c3d60682b26058d435416c4e90d4a94db854de5be944dfd069ed1be648a/cytoolz-1.1.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:964b248edc31efc50a65e9eaa0c845718503823439d2fa5f8d2c7e974c2b5409", size = 2819605, upload-time = "2025-10-19T00:41:58.257Z" }, + { url = "https://files.pythonhosted.org/packages/45/92/19b722a1d83cc443fbc0c16e0dc376f8a451437890d3d9ee370358cf0709/cytoolz-1.1.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c9ff2b3c57c79b65cb5be14a18c6fd4a06d5036fb3f33e973a9f70e9ac13ca28", size = 2533559, upload-time = "2025-10-19T00:42:00.324Z" }, + { url = "https://files.pythonhosted.org/packages/1d/15/fa3b7891da51115204416f14192081d3dea0eaee091f123fdc1347de8dd1/cytoolz-1.1.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:22290b73086af600042d99f5ce52a43d4ad9872c382610413176e19fc1d4fd2d", size = 2839171, upload-time = "2025-10-19T00:42:01.881Z" }, { url = "https://files.pythonhosted.org/packages/46/40/d3519d5cd86eebebf1e8b7174ec32dfb6ecec67b48b0cfb92bf226659b5a/cytoolz-1.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a2ade74fccd080ea793382968913ee38d7a35c921df435bbf0a6aeecf0d17574", size = 2743379, upload-time = "2025-10-19T00:42:03.809Z" }, { url = "https://files.pythonhosted.org/packages/d6/a4/fb7eb403c6a4c81e5a30363f34a71adcc8bf5292dc8ea32e2440aa5668f2/cytoolz-1.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:9e2d3fe3b45c3eb7233746f7aca37789be3dceec3e07dcc406d3e045ea0f7bdc", size = 946461, upload-time = "2025-10-19T00:42:07.983Z" }, + { url = "https://files.pythonhosted.org/packages/9a/71/1d1103b819458679277206ad07d78ca6b31c4bb88d6463fd193e19bfb270/cytoolz-1.1.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:4d96ff3d381423af1b105295f97de86d1db51732c9566eb37378bab6670c5010", size = 2807149, upload-time = "2025-10-19T00:42:20.964Z" }, + { url = "https://files.pythonhosted.org/packages/1a/d4/3d83a05a21e7d2ed2b9e6daf489999c29934b005de9190272b8a2e3735d0/cytoolz-1.1.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:0ec96b3d537cdf47d4e76ded199f7440715f4c71029b45445cff92c1248808c2", size = 3111608, upload-time = "2025-10-19T00:42:22.684Z" }, + { url = "https://files.pythonhosted.org/packages/51/88/96f68354c3d4af68de41f0db4fe41a23b96a50a4a416636cea325490cfeb/cytoolz-1.1.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:208e2f2ef90a32b0acbff3303d90d89b13570a228d491d2e622a7883a3c68148", size = 3179373, upload-time = "2025-10-19T00:42:24.395Z" }, { url = "https://files.pythonhosted.org/packages/ce/50/ed87a5cd8e6f27ffbb64c39e9730e18ec66c37631db2888ae711909f10c9/cytoolz-1.1.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0d416a81bb0bd517558668e49d30a7475b5445f9bbafaab7dcf066f1e9adba36", size = 3003120, upload-time = "2025-10-19T00:42:26.18Z" }, + { url = "https://files.pythonhosted.org/packages/d3/a7/acde155b050d6eaa8e9c7845c98fc5fb28501568e78e83ebbf44f8855274/cytoolz-1.1.0-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f32e94c91ffe49af04835ee713ebd8e005c85ebe83e7e1fdcc00f27164c2d636", size = 2703225, upload-time = "2025-10-19T00:42:27.93Z" }, + { url = "https://files.pythonhosted.org/packages/89/7a/93e5f860926165538c85e1c5e1670ad3424f158df810f8ccd269da652138/cytoolz-1.1.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:bf069c5381d757debae891401b88b3a346ba3a28ca45ba9251103b282463fad8", size = 2862950, upload-time = "2025-10-19T00:42:31.803Z" }, + { url = "https://files.pythonhosted.org/packages/71/ca/adfa1fb7949478135a37755cb8e88c20cd6b75c22a05f1128f05f3ab2c60/cytoolz-1.1.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:3e3872c21170f8341656f8692f8939e8800dcee6549ad2474d4c817bdefd62cd", size = 2979049, upload-time = "2025-10-19T00:42:35.377Z" }, + { url = "https://files.pythonhosted.org/packages/70/4c/7bf47a03a4497d500bc73d4204e2d907771a017fa4457741b2a1d7c09319/cytoolz-1.1.0-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:b9ddeff8e8fd65eb1fcefa61018100b2b627e759ea6ad275d2e2a93ffac147bf", size = 2699492, upload-time = "2025-10-19T00:42:37.133Z" }, + { url = "https://files.pythonhosted.org/packages/7e/e7/3d034b0e4817314f07aa465d5864e9b8df9d25cb260a53dd84583e491558/cytoolz-1.1.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:02feeeda93e1fa3b33414eb57c2b0aefd1db8f558dd33fdfcce664a0f86056e4", size = 2995646, upload-time = "2025-10-19T00:42:38.912Z" }, { url = "https://files.pythonhosted.org/packages/c1/62/be357181c71648d9fe1d1ce91cd42c63457dcf3c158e144416fd51dced83/cytoolz-1.1.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:d08154ad45349162b6c37f12d5d1b2e6eef338e657b85e1621e4e6a4a69d64cb", size = 2919481, upload-time = "2025-10-19T00:42:40.85Z" }, { url = "https://files.pythonhosted.org/packages/64/29/39c161e9204a9715321ddea698cbd0abc317e78522c7c642363c20589e71/cytoolz-1.1.0-cp313-cp313t-win_amd64.whl", hash = "sha256:1bb77bc6197e5cb19784b6a42bb0f8427e81737a630d9d7dda62ed31733f9e6c", size = 1004445, upload-time = "2025-10-19T00:42:44.855Z" }, { url = "https://files.pythonhosted.org/packages/f6/8a/606e4c7ed14aa6a86aee6ca84a2cb804754dc6c4905b8f94e09e49f1ce60/cytoolz-1.1.0-pp311-pypy311_pp73-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b7de5718e2113d4efccea3f06055758cdbc17388ecc3341ba4d1d812837d7c1a", size = 978877, upload-time = "2025-10-19T00:44:50.819Z" }, @@ -2912,12 +2944,18 @@ version = "3.3.0" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/c7/e5/40dbda2736893e3e53d25838e0f19a2b417dfc122b9989c91918db30b5d3/greenlet-3.3.0.tar.gz", hash = "sha256:a82bb225a4e9e4d653dd2fb7b8b2d36e4fb25bc0165422a11e48b88e9e6f78fb", size = 190651, upload-time = "2025-12-04T14:49:44.05Z" } wheels = [ + { url = "https://files.pythonhosted.org/packages/3c/f2/89c5eb0faddc3ff014f1c04467d67dee0d1d334ab81fadbf3744847f8a8a/greenlet-3.3.0-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4243050a88ba61842186cb9e63c7dfa677ec146160b0efd73b855a3d9c7fcf32", size = 590338, upload-time = "2025-12-04T14:57:41.136Z" }, + { url = "https://files.pythonhosted.org/packages/80/d7/db0a5085035d05134f8c089643da2b44cc9b80647c39e93129c5ef170d8f/greenlet-3.3.0-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:670d0f94cd302d81796e37299bcd04b95d62403883b24225c6b5271466612f45", size = 601098, upload-time = "2025-12-04T15:07:11.898Z" }, { url = "https://files.pythonhosted.org/packages/dc/a6/e959a127b630a58e23529972dbc868c107f9d583b5a9f878fb858c46bc1a/greenlet-3.3.0-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6cb3a8ec3db4a3b0eb8a3c25436c2d49e3505821802074969db017b87bc6a948", size = 590206, upload-time = "2025-12-04T14:26:01.254Z" }, { url = "https://files.pythonhosted.org/packages/0a/5f/783a23754b691bfa86bd72c3033aa107490deac9b2ef190837b860996c9f/greenlet-3.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4449a736606bd30f27f8e1ff4678ee193bc47f6ca810d705981cfffd6ce0d8c5", size = 1615483, upload-time = "2025-12-04T14:27:28.083Z" }, { url = "https://files.pythonhosted.org/packages/1d/d5/c339b3b4bc8198b7caa4f2bd9fd685ac9f29795816d8db112da3d04175bb/greenlet-3.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:7652ee180d16d447a683c04e4c5f6441bae7ba7b17ffd9f6b3aff4605e9e6f71", size = 301164, upload-time = "2025-12-04T14:42:51.577Z" }, + { url = "https://files.pythonhosted.org/packages/a4/39/f1c8da50024feecd0793dbd5e08f526809b8ab5609224a2da40aad3a7641/greenlet-3.3.0-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e8e18ed6995e9e2c0b4ed264d2cf89260ab3ac7e13555b8032b25a74c6d18655", size = 607742, upload-time = "2025-12-04T14:57:42.349Z" }, + { url = "https://files.pythonhosted.org/packages/77/cb/43692bcd5f7a0da6ec0ec6d58ee7cddb606d055ce94a62ac9b1aa481e969/greenlet-3.3.0-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c024b1e5696626890038e34f76140ed1daf858e37496d33f2af57f06189e70d7", size = 622297, upload-time = "2025-12-04T15:07:13.552Z" }, { url = "https://files.pythonhosted.org/packages/75/b0/6bde0b1011a60782108c01de5913c588cf51a839174538d266de15e4bf4d/greenlet-3.3.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:047ab3df20ede6a57c35c14bf5200fcf04039d50f908270d3f9a7a82064f543b", size = 609885, upload-time = "2025-12-04T14:26:02.368Z" }, { url = "https://files.pythonhosted.org/packages/05/f5/49a9ac2dff7f10091935def9165c90236d8f175afb27cbed38fb1d61ab6b/greenlet-3.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83cd0e36932e0e7f36a64b732a6f60c2fc2df28c351bae79fbaf4f8092fe7614", size = 1636017, upload-time = "2025-12-04T14:27:29.688Z" }, { url = "https://files.pythonhosted.org/packages/6c/79/3912a94cf27ec503e51ba493692d6db1e3cd8ac7ac52b0b47c8e33d7f4f9/greenlet-3.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:a7a34b13d43a6b78abf828a6d0e87d3385680eaf830cd60d20d52f249faabf39", size = 301964, upload-time = "2025-12-04T14:36:58.316Z" }, + { url = "https://files.pythonhosted.org/packages/c2/ff/7c985128f0514271b8268476af89aee6866df5eec04ac17dcfbc676213df/greenlet-3.3.0-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7d2d9fd66bfadf230b385fdc90426fcd6eb64db54b40c495b72ac0feb5766c54", size = 610211, upload-time = "2025-12-04T14:57:43.968Z" }, + { url = "https://files.pythonhosted.org/packages/79/07/c47a82d881319ec18a4510bb30463ed6891f2ad2c1901ed5ec23d3de351f/greenlet-3.3.0-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:30a6e28487a790417d036088b3bcb3f3ac7d8babaa7d0139edbaddebf3af9492", size = 624311, upload-time = "2025-12-04T15:07:14.697Z" }, { url = "https://files.pythonhosted.org/packages/fd/8e/424b8c6e78bd9837d14ff7df01a9829fc883ba2ab4ea787d4f848435f23f/greenlet-3.3.0-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:087ea5e004437321508a8d6f20efc4cfec5e3c30118e1417ea96ed1d93950527", size = 612833, upload-time = "2025-12-04T14:26:03.669Z" }, { url = "https://files.pythonhosted.org/packages/1e/37/f31136132967982d698c71a281a8901daf1a8fbab935dce7c0cf15f942cc/greenlet-3.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5375d2e23184629112ca1ea89a53389dddbffcf417dad40125713d88eb5f96e8", size = 1636483, upload-time = "2025-12-04T14:27:30.804Z" }, { url = "https://files.pythonhosted.org/packages/7e/71/ba21c3fb8c5dce83b8c01f458a42e99ffdb1963aeec08fff5a18588d8fd7/greenlet-3.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:9ee1942ea19550094033c35d25d20726e4f1c40d59545815e1128ac58d416d38", size = 301833, upload-time = "2025-12-04T14:32:23.929Z" }, @@ -4336,6 +4374,32 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a0/ef/11292bb0b85cf4c93447cab5a29f64576ed14d3ab4280e35ddd23486594a/lm_format_enforcer-0.11.3-py3-none-any.whl", hash = "sha256:cf586350875def1ae7a8fba84fcbbfc8371424b6c9d05c1fcba70aa233fbf06f", size = 45418, upload-time = "2025-08-24T19:37:46.325Z" }, ] +[[package]] +name = "lmdb" +version = "2.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/21/44/d94934efaf8f887b6959f131fde740fcaa831edfd13eb5425574637cddd5/lmdb-2.2.0.tar.gz", hash = "sha256:53020e20305c043ea6e68089bc242d744fba6073cdb268332299ba6dda2886d4", size = 933189, upload-time = "2026-03-30T01:26:19.049Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/15/20/043bd8851979fb86a7fdb08b4337d319dbccf7f468632418527bad684945/lmdb-2.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a72dba2c63f6d497f1dd1a1e46e30f14dfb8c1fddc5a51ed913993f5ac03736c", size = 112274, upload-time = "2026-03-30T01:25:40.919Z" }, + { url = "https://files.pythonhosted.org/packages/ad/d1/d8f61fda6f837dad050514544560385a0f12e8b94e91079f63632195acc6/lmdb-2.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5c807ce9c514354c4f2e76f97e69002048b7f4a3c97a3eaf82415bf7c5daed77", size = 111129, upload-time = "2026-03-30T01:25:42.31Z" }, + { url = "https://files.pythonhosted.org/packages/19/11/f25fc19a68d8218d1337894b323fae79a4cccdef0994ba1c2714e268a2cd/lmdb-2.2.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a073fada46565c373c8683c67c7c07cc0d3511fef7e122da7052bb5720d2af09", size = 321904, upload-time = "2026-03-30T01:25:43.436Z" }, + { url = "https://files.pythonhosted.org/packages/31/a0/1b95f1d53e207d7f4581950228ae891fd930f5d2aeda1501a95982c7b2a8/lmdb-2.2.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:856b322399dcc1992675b8cf5f56cd54e89d05ea86a89dc5f6fa6d671c7b48f2", size = 324208, upload-time = "2026-03-30T01:25:44.706Z" }, + { url = "https://files.pythonhosted.org/packages/8a/1a/6c5931ee1412a9d8c0c3859ed33bb64ed00ea8ef418413c56524e0372ef3/lmdb-2.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:beacb2aed281cc806cb9a91663ed1a772fecd7a125d16b694cfc7af94a9864be", size = 109793, upload-time = "2026-03-30T01:25:46.148Z" }, + { url = "https://files.pythonhosted.org/packages/2e/36/0ba441a4faddd32376270aabedf915d7a21f5fe031313e18c6998b0138d4/lmdb-2.2.0-cp311-cp311-win_arm64.whl", hash = "sha256:e36455ace4c50b5185e4660e19d63533fe5c07840598eeefaad783415a380bab", size = 103680, upload-time = "2026-03-30T01:25:47.222Z" }, + { url = "https://files.pythonhosted.org/packages/b8/a7/9604e594725e2d2d0482669cfd9cba23cc47bd288f076c7e93985e5c046c/lmdb-2.2.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8cc73de748070321986a3a26f51f3693bdd196c20e797d8d2ad0e860b5d2e26c", size = 113096, upload-time = "2026-03-30T01:25:48.293Z" }, + { url = "https://files.pythonhosted.org/packages/05/cf/7b8e13c1253c77a2c41b7786659d64e97f758a13f1fafdb815cf76630eba/lmdb-2.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9b6fecb1e32c55f0a1f3585d637f221e20146bb3ea9997c50fdfa3a58c0c2e41", size = 111656, upload-time = "2026-03-30T01:25:49.36Z" }, + { url = "https://files.pythonhosted.org/packages/94/6a/f059c48e4f3321710825fdb1cdee50d32eea90e0c097441beec1b155788f/lmdb-2.2.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:547e083457b6a0936fe73821f35c019be817877f9a85488be818ec8383ef47a6", size = 329003, upload-time = "2026-03-30T01:25:50.47Z" }, + { url = "https://files.pythonhosted.org/packages/38/22/513c885f284eccd49fc8d1c0a9a9d5da6badd9efc600d482424118df2a67/lmdb-2.2.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dd505c995a595403f69367cbf16bcd5c88cdd208c706d709ba9b1bc2f9a16f69", size = 333140, upload-time = "2026-03-30T01:25:51.68Z" }, + { url = "https://files.pythonhosted.org/packages/f1/9b/8b3c81009230ebbe340e59cf2996626800f291e034ed76535d754b2cf98c/lmdb-2.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:dacf737ad869c6e48e1471dfa4d3e7c6ce2d082a218c069e20c4a138804e5fd2", size = 109668, upload-time = "2026-03-30T01:25:53.091Z" }, + { url = "https://files.pythonhosted.org/packages/0b/68/368099745c1d82d079c490c62cdef5e99bc9a3e9132991e3b82967363d55/lmdb-2.2.0-cp312-cp312-win_arm64.whl", hash = "sha256:653f5e183b04b9124c505c519a3ff691038b4fb459c3211b1323c67bfba53f37", size = 103760, upload-time = "2026-03-30T01:25:54.374Z" }, + { url = "https://files.pythonhosted.org/packages/64/43/543af71e8fa4c56623bb89c358121ab806426f26685f11539fe5452deffa/lmdb-2.2.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:36e0cbe6b7d59f6e19b448942c5f9e91674f596a802743258f82e926a9a09632", size = 113550, upload-time = "2026-03-30T01:25:55.727Z" }, + { url = "https://files.pythonhosted.org/packages/22/2c/4702d36c0073737554b20d1d62e879a066df963482f8e514866588ddd82d/lmdb-2.2.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e5d7a9dfd279a5884806fd478244961e4483cc6d7eb769caed1d7019a8608c20", size = 112135, upload-time = "2026-03-30T01:25:56.809Z" }, + { url = "https://files.pythonhosted.org/packages/2f/43/d015fea326ed0a634107f29740b002170a462b6d2481e509105c685520f5/lmdb-2.2.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d0dbe7902b2cdb60bf6c893f307ef2b2a5039afd22f029515b86183f05ab1353", size = 332108, upload-time = "2026-03-30T01:25:57.907Z" }, + { url = "https://files.pythonhosted.org/packages/bb/c9/503e7f173994b514936badcbcb7fa9f89a07a3cfe596c6fb95b1b91b8d70/lmdb-2.2.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9c576cdb163ae61a7ef6eecbc20a6025a4abe085491c1dc0c667d726f4926b53", size = 336017, upload-time = "2026-03-30T01:25:59.234Z" }, + { url = "https://files.pythonhosted.org/packages/3e/94/b3b064acfd2f8acf5aaa53fff2c43963dbc1932ba8b8df4e27d75bf6a34a/lmdb-2.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:746eebcd4c0aeaf0eb2f897028929d270c5bc80ef4918500eec16db6f26f3fcc", size = 109574, upload-time = "2026-03-30T01:26:00.324Z" }, + { url = "https://files.pythonhosted.org/packages/b9/10/dc7488d1effc339cd9470f9d22ec0fd7052a3d4fdfae87765ecd41cb2e59/lmdb-2.2.0-cp313-cp313-win_arm64.whl", hash = "sha256:006153aac9fb0415a5f3e8ac88789e5730dba3dd0743cd84c95e3951ff68bc3a", size = 103810, upload-time = "2026-03-30T01:26:01.559Z" }, +] + [[package]] name = "locket" version = "1.0.0" @@ -5085,6 +5149,7 @@ dependencies = [ { name = "fsspec" }, { name = "hydra-core" }, { name = "jieba" }, + { name = "lmdb" }, { name = "loguru" }, { name = "mecab-python3" }, { name = "omegaconf" }, @@ -5519,6 +5584,7 @@ requires-dist = [ { name = "jieba", specifier = "==0.42.1" }, { name = "justext", marker = "extra == 'text-cpu'" }, { name = "librosa", marker = "extra == 'audio-common'" }, + { name = "lmdb", specifier = ">=1.4" }, { name = "loguru" }, { name = "lxml", marker = "extra == 'text-cpu'" }, { name = "matplotlib", marker = "extra == 'interleaved-cpu'" }, @@ -8600,9 +8666,11 @@ version = "8.0.0" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/5e/eb/5a0d575de784f9a1f94e2b1288c6886f13f34185e13117ed530f32b6f8a8/pyyaml_ft-8.0.0.tar.gz", hash = "sha256:0c947dce03954c7b5d38869ed4878b2e6ff1d44b08a0d84dc83fdad205ae39ab", size = 141057, upload-time = "2025-06-10T15:32:15.613Z" } wheels = [ + { url = "https://files.pythonhosted.org/packages/5f/c2/e8825f4ff725b7e560d62a3609e31d735318068e1079539ebfde397ea03e/pyyaml_ft-8.0.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cec6c92b4207004b62dfad1f0be321c9f04725e0f271c16247d8b39c3bf3ea42", size = 786772, upload-time = "2025-06-10T15:31:54.712Z" }, { url = "https://files.pythonhosted.org/packages/35/be/58a4dcae8854f2fdca9b28d9495298fd5571a50d8430b1c3033ec95d2d0e/pyyaml_ft-8.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06237267dbcab70d4c0e9436d8f719f04a51123f0ca2694c00dd4b68c338e40b", size = 778723, upload-time = "2025-06-10T15:31:56.093Z" }, { url = "https://files.pythonhosted.org/packages/f0/69/ac02afe286275980ecb2dcdc0156617389b7e0c0a3fcdedf155c67be2b80/pyyaml_ft-8.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7d10175a746be65f6feb86224df5d6bc5c049ebf52b89a88cf1cd78af5a367a8", size = 799159, upload-time = "2025-06-10T15:31:59.675Z" }, { url = "https://files.pythonhosted.org/packages/4e/ac/c492a9da2e39abdff4c3094ec54acac9747743f36428281fb186a03fab76/pyyaml_ft-8.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:58e1015098cf8d8aec82f360789c16283b88ca670fe4275ef6c48c5e30b22a96", size = 158779, upload-time = "2025-06-10T15:32:01.029Z" }, + { url = "https://files.pythonhosted.org/packages/f9/66/28d82dbff7f87b96f0eeac79b7d972a96b4980c1e445eb6a857ba91eda00/pyyaml_ft-8.0.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dab0abb46eb1780da486f022dce034b952c8ae40753627b27a626d803926483b", size = 831650, upload-time = "2025-06-10T15:32:08.076Z" }, { url = "https://files.pythonhosted.org/packages/e8/df/161c4566facac7d75a9e182295c223060373d4116dead9cc53a265de60b9/pyyaml_ft-8.0.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd48d639cab5ca50ad957b6dd632c7dd3ac02a1abe0e8196a3c24a52f5db3f7a", size = 815755, upload-time = "2025-06-10T15:32:09.435Z" }, { url = "https://files.pythonhosted.org/packages/d5/d2/e369064aa51009eb9245399fd8ad2c562bd0bcd392a00be44b2a824ded7c/pyyaml_ft-8.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:3bb4b927929b0cb162fb1605392a321e3333e48ce616cdcfa04a839271373255", size = 835581, upload-time = "2025-06-10T15:32:12.897Z" }, { url = "https://files.pythonhosted.org/packages/c0/28/26534bed77109632a956977f60d8519049f545abc39215d086e33a61f1f2/pyyaml_ft-8.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:de04cfe9439565e32f178106c51dd6ca61afaa2907d143835d501d84703d3793", size = 171579, upload-time = "2025-06-10T15:32:14.34Z" }, @@ -8701,15 +8769,23 @@ source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/d3/28/9d808fe62375b9aab5ba92fa9b29371297b067c2790b2d7cda648b1e2f8d/rapidfuzz-3.14.3.tar.gz", hash = "sha256:2491937177868bc4b1e469087601d53f925e8d270ccc21e07404b4b5814b7b5f", size = 57863900, upload-time = "2025-11-01T11:54:52.321Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/ed/69/309d8f3a0bb3031fd9b667174cc4af56000645298af7c2931be5c3d14bb4/rapidfuzz-3.14.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cfe8df315ab4e6db4e1be72c5170f8e66021acde22cd2f9d04d2058a9fd8162e", size = 3178495, upload-time = "2025-11-01T11:52:53.005Z" }, + { url = "https://files.pythonhosted.org/packages/10/b7/f9c44a99269ea5bf6fd6a40b84e858414b6e241288b9f2b74af470d222b1/rapidfuzz-3.14.3-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:769f31c60cd79420188fcdb3c823227fc4a6deb35cafec9d14045c7f6743acae", size = 1228443, upload-time = "2025-11-01T11:52:54.991Z" }, + { url = "https://files.pythonhosted.org/packages/f3/b6/983805a844d44670eaae63831024cdc97ada4e9c62abc6b20703e81e7f9b/rapidfuzz-3.14.3-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:834d1e818005ed0d4ae38f6b87b86fad9b0a74085467ece0727d20e15077c094", size = 2530120, upload-time = "2025-11-01T11:52:58.298Z" }, { url = "https://files.pythonhosted.org/packages/b4/cc/2c97beb2b1be2d7595d805682472f1b1b844111027d5ad89b65e16bdbaaa/rapidfuzz-3.14.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:948b00e8476a91f510dd1ec07272efc7d78c275d83b630455559671d4e33b678", size = 4283129, upload-time = "2025-11-01T11:53:00.188Z" }, { url = "https://files.pythonhosted.org/packages/cf/99/5fa23e204435803875daefda73fd61baeabc3c36b8fc0e34c1705aab8c7b/rapidfuzz-3.14.3-cp311-cp311-win_amd64.whl", hash = "sha256:ef6bf930b947bd0735c550683939a032090f1d688dfd8861d6b45307b96fd5c5", size = 1544259, upload-time = "2025-11-01T11:53:03.66Z" }, { url = "https://files.pythonhosted.org/packages/30/83/80d22997acd928eda7deadc19ccd15883904622396d6571e935993e0453a/rapidfuzz-3.14.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6c5f545f454871e6af05753a0172849c82feaf0f521c5ca62ba09e1b382d6382", size = 3154947, upload-time = "2025-11-01T11:53:12.093Z" }, + { url = "https://files.pythonhosted.org/packages/5b/cf/9f49831085a16384695f9fb096b99662f589e30b89b4a589a1ebc1a19d34/rapidfuzz-3.14.3-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:07aa0b5d8863e3151e05026a28e0d924accf0a7a3b605da978f0359bb804df43", size = 1223872, upload-time = "2025-11-01T11:53:13.664Z" }, + { url = "https://files.pythonhosted.org/packages/da/86/280038b6b0c2ccec54fb957c732ad6b41cc1fd03b288d76545b9cf98343f/rapidfuzz-3.14.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:6de00eb84c71476af7d3110cf25d8fe7c792d7f5fa86764ef0b4ca97e78ca3ed", size = 2521398, upload-time = "2025-11-01T11:53:17.146Z" }, { url = "https://files.pythonhosted.org/packages/fa/7b/05c26f939607dca0006505e3216248ae2de631e39ef94dd63dbbf0860021/rapidfuzz-3.14.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d7843a1abf0091773a530636fdd2a49a41bcae22f9910b86b4f903e76ddc82dc", size = 4259416, upload-time = "2025-11-01T11:53:19.34Z" }, { url = "https://files.pythonhosted.org/packages/b8/63/d06ecce90e2cf1747e29aeab9f823d21e5877a4c51b79720b2d3be7848f8/rapidfuzz-3.14.3-cp312-cp312-win_amd64.whl", hash = "sha256:b5100fd6bcee4d27f28f4e0a1c6b5127bc8ba7c2a9959cad9eab0bf4a7ab3329", size = 1538989, upload-time = "2025-11-01T11:53:22.428Z" }, { url = "https://files.pythonhosted.org/packages/32/00/ec8597a64f2be301ce1ee3290d067f49f6a7afb226b67d5f15b56d772ba5/rapidfuzz-3.14.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43e38c1305cffae8472572a0584d4ffc2f130865586a81038ca3965301f7c97c", size = 3156759, upload-time = "2025-11-01T11:53:30.777Z" }, + { url = "https://files.pythonhosted.org/packages/61/d5/b41eeb4930501cc899d5a9a7b5c9a33d85a670200d7e81658626dcc0ecc0/rapidfuzz-3.14.3-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:e195a77d06c03c98b3fc06b8a28576ba824392ce40de8c708f96ce04849a052e", size = 1222067, upload-time = "2025-11-01T11:53:32.334Z" }, + { url = "https://files.pythonhosted.org/packages/15/ce/4f3ab4c401c5a55364da1ffff8cc879fc97b4e5f4fa96033827da491a973/rapidfuzz-3.14.3-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:a2135b138bcdcb4c3742d417f215ac2d8c2b87bde15b0feede231ae95f09ec41", size = 2526123, upload-time = "2025-11-01T11:53:35.779Z" }, { url = "https://files.pythonhosted.org/packages/c1/4b/54f804975376a328f57293bd817c12c9036171d15cf7292032e3f5820b2d/rapidfuzz-3.14.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:33a325ed0e8e1aa20c3e75f8ab057a7b248fdea7843c2a19ade0008906c14af0", size = 4262874, upload-time = "2025-11-01T11:53:37.866Z" }, { url = "https://files.pythonhosted.org/packages/07/75/fde1f334b0cec15b5946d9f84d73250fbfcc73c236b4bc1b25129d90876b/rapidfuzz-3.14.3-cp313-cp313-win_amd64.whl", hash = "sha256:e6b5e3036976f0fde888687d91be86d81f9ac5f7b02e218913c38285b756be6c", size = 1537011, upload-time = "2025-11-01T11:53:40.92Z" }, { url = "https://files.pythonhosted.org/packages/88/74/f50ea0e24a5880a9159e8fd256b84d8f4634c2f6b4f98028bdd31891d907/rapidfuzz-3.14.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:89acb8cbb52904f763e5ac238083b9fc193bed8d1f03c80568b20e4cef43a519", size = 3165563, upload-time = "2025-11-01T11:53:49.216Z" }, + { url = "https://files.pythonhosted.org/packages/e8/7a/e744359404d7737049c26099423fc54bcbf303de5d870d07d2fb1410f567/rapidfuzz-3.14.3-cp313-cp313t-manylinux_2_31_armv7l.whl", hash = "sha256:7d9af908c2f371bfb9c985bd134e295038e3031e666e4b2ade1e7cb7f5af2f1a", size = 1214727, upload-time = "2025-11-01T11:53:50.883Z" }, + { url = "https://files.pythonhosted.org/packages/70/17/6c0b2b2bff9c8b12e12624c07aa22e922b0c72a490f180fa9183d1ef2c75/rapidfuzz-3.14.3-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:152555187360978119e98ce3e8263d70dd0c40c7541193fc302e9b7125cf8f58", size = 2507596, upload-time = "2025-11-01T11:53:53.835Z" }, { url = "https://files.pythonhosted.org/packages/c3/d1/87852a7cbe4da7b962174c749a47433881a63a817d04f3e385ea9babcd9e/rapidfuzz-3.14.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:52619d25a09546b8db078981ca88939d72caa6b8701edd8b22e16482a38e799f", size = 4273595, upload-time = "2025-11-01T11:53:55.961Z" }, { url = "https://files.pythonhosted.org/packages/0b/0c/71ef356adc29e2bdf74cd284317b34a16b80258fa0e7e242dd92cc1e6d10/rapidfuzz-3.14.3-cp313-cp313t-win_amd64.whl", hash = "sha256:656e52b054d5b5c2524169240e50cfa080b04b1c613c5f90a2465e84888d6f15", size = 1576797, upload-time = "2025-11-01T11:53:59.455Z" }, { url = "https://files.pythonhosted.org/packages/22/20/9d30b4a1ab26aac22fff17d21dec7e9089ccddfe25151d0a8bb57001dc3d/rapidfuzz-3.14.3-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1e6eefec45625c634926a9fd46c9e4f31118ac8f3156fff9494422cee45207e6", size = 3101472, upload-time = "2025-11-01T11:54:47.255Z" }, @@ -11546,16 +11622,32 @@ version = "3.6.0" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/02/84/30869e01909fb37a6cc7e18688ee8bf1e42d57e7e0777636bd47524c43c7/xxhash-3.6.0.tar.gz", hash = "sha256:f0162a78b13a0d7617b2845b90c763339d1f1d82bb04a4b07f4ab535cc5e05d6", size = 85160, upload-time = "2025-10-02T14:37:08.097Z" } wheels = [ + { url = "https://files.pythonhosted.org/packages/c4/ef/3a9b05eb527457d5db13a135a2ae1a26c80fecd624d20f3e8dcc4cb170f3/xxhash-3.6.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6812c25fe0d6c36a46ccb002f40f27ac903bf18af9f6dd8f9669cb4d176ab18f", size = 212384, upload-time = "2025-10-02T14:34:19.182Z" }, + { url = "https://files.pythonhosted.org/packages/0f/18/ccc194ee698c6c623acbf0f8c2969811a8a4b6185af5e824cd27b9e4fd3e/xxhash-3.6.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4ccbff013972390b51a18ef1255ef5ac125c92dc9143b2d1909f59abc765540e", size = 445749, upload-time = "2025-10-02T14:34:20.659Z" }, { url = "https://files.pythonhosted.org/packages/a5/86/cf2c0321dc3940a7aa73076f4fd677a0fb3e405cb297ead7d864fd90847e/xxhash-3.6.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:297b7fbf86c82c550e12e8fb71968b3f033d27b874276ba3624ea868c11165a8", size = 193880, upload-time = "2025-10-02T14:34:22.431Z" }, + { url = "https://files.pythonhosted.org/packages/67/74/b044fcd6b3d89e9b1b665924d85d3f400636c23590226feb1eb09e1176ce/xxhash-3.6.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:08d45aef063a4531b785cd72de4887766d01dc8f362a515693df349fdb825e0c", size = 210867, upload-time = "2025-10-02T14:34:27.203Z" }, + { url = "https://files.pythonhosted.org/packages/bc/fd/3ce73bf753b08cb19daee1eb14aa0d7fe331f8da9c02dd95316ddfe5275e/xxhash-3.6.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:929142361a48ee07f09121fe9e96a84950e8d4df3bb298ca5d88061969f34d7b", size = 414012, upload-time = "2025-10-02T14:34:28.409Z" }, { url = "https://files.pythonhosted.org/packages/ba/b3/5a4241309217c5c876f156b10778f3ab3af7ba7e3259e6d5f5c7d0129eb2/xxhash-3.6.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:51312c768403d8540487dbbfb557454cfc55589bbde6424456951f7fcd4facb3", size = 191409, upload-time = "2025-10-02T14:34:29.696Z" }, { url = "https://files.pythonhosted.org/packages/65/79/9d24d7f53819fe301b231044ea362ce64e86c74f6e8c8e51320de248b3e5/xxhash-3.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:26734cdc2d4ffe449b41d186bbeac416f704a482ed835d375a5c0cb02bc63fef", size = 31481, upload-time = "2025-10-02T14:34:32.062Z" }, + { url = "https://files.pythonhosted.org/packages/38/86/fb6b6130d8dd6b8942cc17ab4d90e223653a89aa32ad2776f8af7064ed13/xxhash-3.6.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2aa5ee3444c25b69813663c9f8067dcfaa2e126dc55e8dddf40f4d1c25d7effa", size = 212163, upload-time = "2025-10-02T14:34:39.872Z" }, + { url = "https://files.pythonhosted.org/packages/ee/dc/e84875682b0593e884ad73b2d40767b5790d417bde603cceb6878901d647/xxhash-3.6.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f7f99123f0e1194fa59cc69ad46dbae2e07becec5df50a0509a808f90a0f03f0", size = 445411, upload-time = "2025-10-02T14:34:41.569Z" }, { url = "https://files.pythonhosted.org/packages/11/4f/426f91b96701ec2f37bb2b8cec664eff4f658a11f3fa9d94f0a887ea6d2b/xxhash-3.6.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:49e03e6fe2cac4a1bc64952dd250cf0dbc5ef4ebb7b8d96bce82e2de163c82a2", size = 193883, upload-time = "2025-10-02T14:34:43.249Z" }, + { url = "https://files.pythonhosted.org/packages/58/ca/faa05ac19b3b622c7c9317ac3e23954187516298a091eb02c976d0d3dd45/xxhash-3.6.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:843b52f6d88071f87eba1631b684fcb4b2068cd2180a0224122fe4ef011a9374", size = 210655, upload-time = "2025-10-02T14:34:47.571Z" }, + { url = "https://files.pythonhosted.org/packages/d4/7a/06aa7482345480cc0cb597f5c875b11a82c3953f534394f620b0be2f700c/xxhash-3.6.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:7d14a6cfaf03b1b6f5f9790f76880601ccc7896aff7ab9cd8978a939c1eb7e0d", size = 414001, upload-time = "2025-10-02T14:34:49.273Z" }, { url = "https://files.pythonhosted.org/packages/23/07/63ffb386cd47029aa2916b3d2f454e6cc5b9f5c5ada3790377d5430084e7/xxhash-3.6.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:418daf3db71e1413cfe211c2f9a528456936645c17f46b5204705581a45390ae", size = 191431, upload-time = "2025-10-02T14:34:50.798Z" }, { url = "https://files.pythonhosted.org/packages/13/5d/0d125536cbe7565a83d06e43783389ecae0c0f2ed037b48ede185de477c0/xxhash-3.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:c0f2ab8c715630565ab8991b536ecded9416d615538be8ecddce43ccf26cbc7c", size = 31534, upload-time = "2025-10-02T14:34:53.276Z" }, + { url = "https://files.pythonhosted.org/packages/84/7a/c2b3d071e4bb4a90b7057228a99b10d51744878f4a8a6dd643c8bd897620/xxhash-3.6.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ba284920194615cb8edf73bf52236ce2e1664ccd4a38fdb543506413529cc546", size = 212241, upload-time = "2025-10-02T14:35:02.207Z" }, + { url = "https://files.pythonhosted.org/packages/81/5f/640b6eac0128e215f177df99eadcd0f1b7c42c274ab6a394a05059694c5a/xxhash-3.6.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4b54219177f6c6674d5378bd862c6aedf64725f70dd29c472eaae154df1a2e89", size = 445471, upload-time = "2025-10-02T14:35:03.61Z" }, { url = "https://files.pythonhosted.org/packages/5e/1e/3c3d3ef071b051cc3abbe3721ffb8365033a172613c04af2da89d5548a87/xxhash-3.6.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:42c36dd7dbad2f5238950c377fcbf6811b1cdb1c444fab447960030cea60504d", size = 193936, upload-time = "2025-10-02T14:35:05.013Z" }, + { url = "https://files.pythonhosted.org/packages/d7/fd/2c0a00c97b9e18f72e1f240ad4e8f8a90fd9d408289ba9c7c495ed7dc05c/xxhash-3.6.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:6f2580ffab1a8b68ef2b901cde7e55fa8da5e4be0977c68f78fc80f3c143de42", size = 210689, upload-time = "2025-10-02T14:35:09.438Z" }, + { url = "https://files.pythonhosted.org/packages/93/86/5dd8076a926b9a95db3206aba20d89a7fc14dd5aac16e5c4de4b56033140/xxhash-3.6.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:40c391dd3cd041ebc3ffe6f2c862f402e306eb571422e0aa918d8070ba31da11", size = 414068, upload-time = "2025-10-02T14:35:11.162Z" }, { url = "https://files.pythonhosted.org/packages/af/3c/0bb129170ee8f3650f08e993baee550a09593462a5cddd8e44d0011102b1/xxhash-3.6.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f205badabde7aafd1a31e8ca2a3e5a763107a71c397c4481d6a804eb5063d8bd", size = 191495, upload-time = "2025-10-02T14:35:12.971Z" }, { url = "https://files.pythonhosted.org/packages/86/15/9bc32671e9a38b413a76d24722a2bf8784a132c043063a8f5152d390b0f9/xxhash-3.6.0-cp313-cp313-win_amd64.whl", hash = "sha256:757320d45d2fbcce8f30c42a6b2f47862967aea7bf458b9625b4bbe7ee390392", size = 31542, upload-time = "2025-10-02T14:35:15.21Z" }, + { url = "https://files.pythonhosted.org/packages/bc/68/c4c80614716345d55071a396cf03d06e34b5f4917a467faf43083c995155/xxhash-3.6.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3ed0df1b11a79856df5ffcab572cbd6b9627034c1c748c5566fa79df9048a7c5", size = 214833, upload-time = "2025-10-02T14:35:23.32Z" }, + { url = "https://files.pythonhosted.org/packages/7e/e9/ae27c8ffec8b953efa84c7c4a6c6802c263d587b9fc0d6e7cea64e08c3af/xxhash-3.6.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0e4edbfc7d420925b0dd5e792478ed393d6e75ff8fc219a6546fb446b6a417b1", size = 448348, upload-time = "2025-10-02T14:35:25.111Z" }, { url = "https://files.pythonhosted.org/packages/d7/6b/33e21afb1b5b3f46b74b6bd1913639066af218d704cc0941404ca717fc57/xxhash-3.6.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fba27a198363a7ef87f8c0f6b171ec36b674fe9053742c58dd7e3201c1ab30ee", size = 196070, upload-time = "2025-10-02T14:35:26.586Z" }, + { url = "https://files.pythonhosted.org/packages/0d/98/e8de5baa5109394baf5118f5e72ab21a86387c4f89b0e77ef3e2f6b0327b/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:f01375c0e55395b814a679b3eea205db7919ac2af213f4a6682e01220e5fe292", size = 213304, upload-time = "2025-10-02T14:35:31.222Z" }, + { url = "https://files.pythonhosted.org/packages/7b/1d/71056535dec5c3177eeb53e38e3d367dd1d16e024e63b1cee208d572a033/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:d706dca2d24d834a4661619dcacf51a75c16d65985718d6a7d73c1eeeb903ddf", size = 416930, upload-time = "2025-10-02T14:35:32.517Z" }, { url = "https://files.pythonhosted.org/packages/dc/6c/5cbde9de2cd967c322e651c65c543700b19e7ae3e0aae8ece3469bf9683d/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5f059d9faeacd49c0215d66f4056e1326c80503f51a1532ca336a385edadd033", size = 193787, upload-time = "2025-10-02T14:35:33.827Z" }, { url = "https://files.pythonhosted.org/packages/ad/e6/e8cf858a2b19d6d45820f072eff1bea413910592ff17157cabc5f1227a16/xxhash-3.6.0-cp313-cp313t-win_amd64.whl", hash = "sha256:b1e420ef35c503869c4064f4a2f2b08ad6431ab7b229a05cce39d74268bca6b8", size = 31799, upload-time = "2025-10-02T14:35:36.165Z" }, { url = "https://files.pythonhosted.org/packages/62/b2/5ac99a041a29e58e95f907876b04f7067a0242cb85b5f39e726153981503/xxhash-3.6.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c6dc31591899f5e5666f04cc2e529e69b4072827085c1ef15294d91a004bc1bd", size = 32481, upload-time = "2025-10-02T14:37:05.869Z" },