diff --git a/pyrit/datasets/seed_datasets/remote/cbt_bench_dataset.py b/pyrit/datasets/seed_datasets/remote/cbt_bench_dataset.py index 59774e5327..c3f35e9299 100644 --- a/pyrit/datasets/seed_datasets/remote/cbt_bench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/cbt_bench_dataset.py @@ -2,6 +2,7 @@ # Licensed under the MIT license. import logging +import warnings from typing import Any from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( @@ -38,7 +39,7 @@ def __init__( *, source: str = "Psychotherapy-LLM/CBT-Bench", config: str = "core_fine_seed", - split: str = "train", + split: str | None = None, ) -> None: """ Initialize the CBT-Bench dataset loader. @@ -46,11 +47,20 @@ def __init__( Args: source: HuggingFace dataset identifier. Defaults to "Psychotherapy-LLM/CBT-Bench". config: Dataset configuration/subset to load. Defaults to "core_fine_seed". - split: Dataset split to load. Defaults to "train". + split: **Deprecated.** Every config of ``Psychotherapy-LLM/CBT-Bench`` publishes + only the ``"train"`` split, so this kwarg has no effect. It will be removed + in v0.16.0. """ + if split is not None: + warnings.warn( + "'split' is deprecated and will be removed in v0.16.0. " + "Every config of Psychotherapy-LLM/CBT-Bench publishes only the 'train' " + "split, so this kwarg has no effect.", + DeprecationWarning, + stacklevel=2, + ) self.source = source self.config = config - self.split = split @property def dataset_name(self) -> str: @@ -76,7 +86,7 @@ async def fetch_dataset_async(self, *, cache: bool = True) -> SeedDataset: data = await self._fetch_from_huggingface_async( dataset_name=self.source, config=self.config, - split=self.split, + split="train", cache=cache, ) diff --git a/pyrit/datasets/seed_datasets/remote/darkbench_dataset.py b/pyrit/datasets/seed_datasets/remote/darkbench_dataset.py index 761148fa8b..09c120c41a 100644 --- a/pyrit/datasets/seed_datasets/remote/darkbench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/darkbench_dataset.py @@ -1,6 +1,8 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. +import warnings + from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) @@ -33,7 +35,7 @@ def __init__( *, dataset_name: str = "apart/darkbench", config: str = "default", - split: str = "train", + split: str | None = None, ) -> None: """ Initialize the DarkBench dataset loader. @@ -41,11 +43,20 @@ def __init__( Args: dataset_name: HuggingFace dataset identifier. Defaults to "apart/darkbench". config: Dataset configuration. Defaults to "default". - split: Dataset split to load. Defaults to "train". + split: **Deprecated.** Upstream ``apart/darkbench`` publishes only the + ``"train"`` split, so this kwarg has no effect. It will be removed in + v0.16.0. """ + if split is not None: + warnings.warn( + "'split' is deprecated and will be removed in v0.16.0. " + "Upstream apart/darkbench publishes only the 'train' split, " + "so this kwarg has no effect.", + DeprecationWarning, + stacklevel=2, + ) self.hf_dataset_name = dataset_name self.config = config - self.split = split @property def dataset_name(self) -> str: @@ -70,7 +81,7 @@ async def fetch_dataset_async(self, *, cache: bool = True) -> SeedDataset: data = await self._fetch_from_huggingface_async( dataset_name=self.hf_dataset_name, config=self.config, - split=self.split, + split="train", cache=cache, data_files="darkbench.tsv", ) diff --git a/pyrit/datasets/seed_datasets/remote/forbidden_questions_dataset.py b/pyrit/datasets/seed_datasets/remote/forbidden_questions_dataset.py index a301aad8c1..1cceee18d6 100644 --- a/pyrit/datasets/seed_datasets/remote/forbidden_questions_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/forbidden_questions_dataset.py @@ -2,6 +2,7 @@ # Licensed under the MIT license. import logging +import warnings from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, @@ -34,17 +35,28 @@ def __init__( self, *, source: str = "TrustAIRLab/forbidden_question_set", - split: str = "default", + split: str | None = None, ) -> None: """ Initialize the Forbidden Questions dataset loader. Args: source: HuggingFace dataset identifier. Defaults to "TrustAIRLab/forbidden_question_set". - split: Dataset split to load. Defaults to "default". + split: **Deprecated.** This kwarg was misforwarded to HuggingFace as ``config``, + and ``TrustAIRLab/forbidden_question_set`` publishes only one config + (``"default"``) with one split (``"train"``), so it never did anything + useful. It will be removed in v0.16.0. """ + if split is not None: + warnings.warn( + "'split' is deprecated and will be removed in v0.16.0. " + "It was misforwarded to HuggingFace as 'config', and " + "TrustAIRLab/forbidden_question_set publishes only one config ('default') " + "with one split ('train'), so this kwarg has no effect.", + DeprecationWarning, + stacklevel=2, + ) self.source = source - self.split = split @property def dataset_name(self) -> str: @@ -66,7 +78,7 @@ async def fetch_dataset_async(self, *, cache: bool = True) -> SeedDataset: # Load from HuggingFace data = await self._fetch_from_huggingface_async( dataset_name=self.source, - config=self.split, + config="default", split="train", cache=cache, ) diff --git a/pyrit/datasets/seed_datasets/remote/harmful_qa_dataset.py b/pyrit/datasets/seed_datasets/remote/harmful_qa_dataset.py index 6ec8c44e6c..9ed8306ba6 100644 --- a/pyrit/datasets/seed_datasets/remote/harmful_qa_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/harmful_qa_dataset.py @@ -2,6 +2,7 @@ # Licensed under the MIT license. import logging +import warnings from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, @@ -36,15 +37,24 @@ class _HarmfulQADataset(_RemoteDatasetLoader): def __init__( self, *, - split: str = "train", + split: str | None = None, ) -> None: """ Initialize the HarmfulQA dataset loader. Args: - split: Dataset split to load. Defaults to "train". + split: **Deprecated.** Upstream ``declare-lab/HarmfulQA`` publishes only the + ``"train"`` split, so this kwarg has no effect. It will be removed in + v0.16.0. """ - self.split = split + if split is not None: + warnings.warn( + "'split' is deprecated and will be removed in v0.16.0. " + "Upstream declare-lab/HarmfulQA publishes only the 'train' split, " + "so this kwarg has no effect.", + DeprecationWarning, + stacklevel=2, + ) @property def dataset_name(self) -> str: @@ -65,7 +75,7 @@ async def fetch_dataset_async(self, *, cache: bool = True) -> SeedDataset: data = await self._fetch_from_huggingface_async( dataset_name=self.HF_DATASET_NAME, - split=self.split, + split="train", cache=cache, ) diff --git a/pyrit/datasets/seed_datasets/remote/hixstest_dataset.py b/pyrit/datasets/seed_datasets/remote/hixstest_dataset.py index 6e0984f930..63b0439879 100644 --- a/pyrit/datasets/seed_datasets/remote/hixstest_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/hixstest_dataset.py @@ -3,6 +3,7 @@ import logging import os +import warnings from enum import Enum from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( @@ -68,7 +69,7 @@ def __init__( self, *, language: HiXSTestLanguage = HiXSTestLanguage.HINDI, - split: str = "train", + split: str | None = None, token: str | None = None, ) -> None: """ @@ -78,16 +79,25 @@ def __init__( language: Which language to use as the primary ``SeedPrompt.value``. Defaults to ``HiXSTestLanguage.HINDI`` (the dataset's intended language). Pass ``HiXSTestLanguage.ENGLISH`` to use the English translation instead. - split: Dataset split to load. Defaults to "train" (the only split). + split: **Deprecated.** Upstream ``walledai/HiXSTest`` publishes only the + ``"train"`` split, so this kwarg has no effect. It will be removed in + v0.16.0. token: Hugging Face authentication token. If not provided, reads from the ``HUGGINGFACE_TOKEN`` environment variable. Raises: ValueError: If ``language`` is not a ``HiXSTestLanguage`` instance. """ + if split is not None: + warnings.warn( + "'split' is deprecated and will be removed in v0.16.0. " + "Upstream walledai/HiXSTest publishes only the 'train' split, " + "so this kwarg has no effect.", + DeprecationWarning, + stacklevel=2, + ) self._validate_enum(language, HiXSTestLanguage, "language") self.language = language - self.split = split self.token = token if token is not None else os.environ.get("HUGGINGFACE_TOKEN") @property @@ -113,7 +123,7 @@ async def fetch_dataset_async(self, *, cache: bool = True) -> SeedDataset: data = await self._fetch_from_huggingface_async( dataset_name=self.HF_DATASET_NAME, - split=self.split, + split="train", cache=cache, token=self.token, ) diff --git a/pyrit/datasets/seed_datasets/remote/or_bench_dataset.py b/pyrit/datasets/seed_datasets/remote/or_bench_dataset.py index 129533a636..793da84814 100644 --- a/pyrit/datasets/seed_datasets/remote/or_bench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/or_bench_dataset.py @@ -2,6 +2,7 @@ # Licensed under the MIT license. import logging +import warnings from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, @@ -36,14 +37,23 @@ class _ORBenchBaseDataset(_RemoteDatasetLoader): modalities: tuple[Modality, ...] = (Modality.TEXT,) tags: frozenset[str] = frozenset({"default", "safety", "refusal"}) - def __init__(self, *, split: str = "train") -> None: + def __init__(self, *, split: str | None = None) -> None: """ Initialize the OR-Bench dataset loader. Args: - split: Dataset split to load. Defaults to "train". + split: **Deprecated.** Every config of ``bench-llm/OR-Bench`` publishes only + the ``"train"`` split, so this kwarg has no effect. It will be removed in + v0.16.0. """ - self.split = split + if split is not None: + warnings.warn( + "'split' is deprecated and will be removed in v0.16.0. " + "Every config of bench-llm/OR-Bench publishes only the 'train' split, " + "so this kwarg has no effect.", + DeprecationWarning, + stacklevel=2, + ) async def fetch_dataset_async(self, *, cache: bool = True) -> SeedDataset: """ @@ -60,7 +70,7 @@ async def fetch_dataset_async(self, *, cache: bool = True) -> SeedDataset: data = await self._fetch_from_huggingface_async( dataset_name=self.HF_DATASET_NAME, config=self.CONFIG, - split=self.split, + split="train", cache=cache, ) diff --git a/pyrit/datasets/seed_datasets/remote/sgxstest_dataset.py b/pyrit/datasets/seed_datasets/remote/sgxstest_dataset.py index 0ad3fd2687..86efef86c6 100644 --- a/pyrit/datasets/seed_datasets/remote/sgxstest_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/sgxstest_dataset.py @@ -3,6 +3,7 @@ import logging import os +import warnings from enum import Enum from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( @@ -74,7 +75,7 @@ def __init__( self, *, label: SGXSTestLabel = SGXSTestLabel.UNSAFE, - split: str = "train", + split: str | None = None, token: str | None = None, ) -> None: """ @@ -84,18 +85,26 @@ def __init__( label: Which subset of prompts to load. Defaults to ``SGXSTestLabel.UNSAFE`` (the truly-harmful prompts). Use ``SGXSTestLabel.SAFE`` for the over-refusal targets or ``SGXSTestLabel.ALL`` for the full 200-prompt set. - split: Dataset split to load. Defaults to "train" (the only split currently - published by the upstream dataset). + split: **Deprecated.** Upstream ``walledai/SGXSTest`` publishes only the + ``"train"`` split, so this kwarg has no effect. It will be removed in + v0.16.0. token: Hugging Face authentication token. If not provided, reads from the HUGGINGFACE_TOKEN env var. Raises: ValueError: If ``label`` is not an SGXSTestLabel member. """ + if split is not None: + warnings.warn( + "'split' is deprecated and will be removed in v0.16.0. " + "Upstream walledai/SGXSTest publishes only the 'train' split, " + "so this kwarg has no effect.", + DeprecationWarning, + stacklevel=2, + ) self._validate_enum(value=label, enum_cls=SGXSTestLabel, label="label") self.label = label - self.split = split self.token = token if token is not None else os.environ.get("HUGGINGFACE_TOKEN") @property @@ -122,7 +131,7 @@ async def fetch_dataset_async(self, *, cache: bool = True) -> SeedDataset: data = await self._fetch_from_huggingface_async( dataset_name=self.HF_DATASET_NAME, - split=self.split, + split="train", cache=cache, token=self.token, ) diff --git a/pyrit/datasets/seed_datasets/remote/simple_safety_tests_dataset.py b/pyrit/datasets/seed_datasets/remote/simple_safety_tests_dataset.py index 51ae2115ef..2c729134a5 100644 --- a/pyrit/datasets/seed_datasets/remote/simple_safety_tests_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/simple_safety_tests_dataset.py @@ -2,6 +2,7 @@ # Licensed under the MIT license. import logging +import warnings from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, @@ -36,15 +37,24 @@ class _SimpleSafetyTestsDataset(_RemoteDatasetLoader): def __init__( self, *, - split: str = "test", + split: str | None = None, ) -> None: """ Initialize the SimpleSafetyTests dataset loader. Args: - split: Dataset split to load. Defaults to "test". + split: **Deprecated.** Upstream ``Bertievidgen/SimpleSafetyTests`` publishes + only the ``"test"`` split, so this kwarg has no effect. It will be + removed in v0.16.0. """ - self.split = split + if split is not None: + warnings.warn( + "'split' is deprecated and will be removed in v0.16.0. " + "Upstream Bertievidgen/SimpleSafetyTests publishes only the 'test' " + "split, so this kwarg has no effect.", + DeprecationWarning, + stacklevel=2, + ) @property def dataset_name(self) -> str: @@ -65,7 +75,7 @@ async def fetch_dataset_async(self, *, cache: bool = True) -> SeedDataset: data = await self._fetch_from_huggingface_async( dataset_name=self.HF_DATASET_NAME, - split=self.split, + split="test", cache=cache, ) diff --git a/tests/unit/datasets/test_cbt_bench_dataset.py b/tests/unit/datasets/test_cbt_bench_dataset.py index 787fb3b5c1..93dbee5f50 100644 --- a/tests/unit/datasets/test_cbt_bench_dataset.py +++ b/tests/unit/datasets/test_cbt_bench_dataset.py @@ -87,11 +87,10 @@ async def test_fetch_dataset(self, mock_cbt_bench_data): assert first_prompt.metadata["core_belief_fine_grained"] == ["I am unlovable", "I am immoral"] async def test_fetch_dataset_with_custom_config(self, mock_cbt_bench_data): - """Test fetching with custom HuggingFace config and split.""" + """Test fetching with custom HuggingFace config.""" loader = _CBTBenchDataset( source="custom/cbt-bench", config="core_major_seed", - split="test", ) with patch.object(loader, "_fetch_from_huggingface_async", return_value=mock_cbt_bench_data) as mock_fetch: @@ -102,9 +101,14 @@ async def test_fetch_dataset_with_custom_config(self, mock_cbt_bench_data): call_kwargs = mock_fetch.call_args.kwargs assert call_kwargs["dataset_name"] == "custom/cbt-bench" assert call_kwargs["config"] == "core_major_seed" - assert call_kwargs["split"] == "test" + assert call_kwargs["split"] == "train" assert call_kwargs["cache"] is False + def test_split_kwarg_emits_deprecation_warning(self): + """Passing the deprecated ``split`` kwarg emits a DeprecationWarning.""" + with pytest.warns(DeprecationWarning, match="'split' is deprecated"): + _CBTBenchDataset(split="train") + async def test_fetch_dataset_situation_only(self, mock_cbt_bench_data_missing_thoughts): """Test that items with only situation (no thoughts) still work.""" loader = _CBTBenchDataset() diff --git a/tests/unit/datasets/test_darkbench_dataset.py b/tests/unit/datasets/test_darkbench_dataset.py index 34cf300d93..937141f0e9 100644 --- a/tests/unit/datasets/test_darkbench_dataset.py +++ b/tests/unit/datasets/test_darkbench_dataset.py @@ -32,7 +32,7 @@ async def test_fetch_dataset(mock_darkbench_data): async def test_fetch_dataset_passes_config(mock_darkbench_data): - loader = _DarkBenchDataset(config="custom", split="test") + loader = _DarkBenchDataset(config="custom") with patch.object( loader, "_fetch_from_huggingface_async", new=AsyncMock(return_value=mock_darkbench_data) @@ -42,7 +42,13 @@ async def test_fetch_dataset_passes_config(mock_darkbench_data): mock_fetch.assert_called_once() call_kwargs = mock_fetch.call_args.kwargs assert call_kwargs["config"] == "custom" - assert call_kwargs["split"] == "test" + assert call_kwargs["split"] == "train" + + +def test_split_kwarg_emits_deprecation_warning(): + """Passing the deprecated ``split`` kwarg emits a DeprecationWarning.""" + with pytest.warns(DeprecationWarning, match="'split' is deprecated"): + _DarkBenchDataset(split="train") def test_dataset_name(): diff --git a/tests/unit/datasets/test_harmful_qa_dataset.py b/tests/unit/datasets/test_harmful_qa_dataset.py index 1926e37413..d3a1af2548 100644 --- a/tests/unit/datasets/test_harmful_qa_dataset.py +++ b/tests/unit/datasets/test_harmful_qa_dataset.py @@ -55,3 +55,8 @@ def test_dataset_name(self): """Test dataset_name property.""" loader = _HarmfulQADataset() assert loader.dataset_name == "harmful_qa" + + def test_split_kwarg_emits_deprecation_warning(self): + """Passing the deprecated ``split`` kwarg emits a DeprecationWarning.""" + with pytest.warns(DeprecationWarning, match="'split' is deprecated"): + _HarmfulQADataset(split="train") diff --git a/tests/unit/datasets/test_hixstest_dataset.py b/tests/unit/datasets/test_hixstest_dataset.py index 4793a018ca..dc8cf272b7 100644 --- a/tests/unit/datasets/test_hixstest_dataset.py +++ b/tests/unit/datasets/test_hixstest_dataset.py @@ -61,6 +61,11 @@ def test_init_explicit_token_overrides_env(self): loader = _HiXSTestDataset(token="explicit-token") assert loader.token == "explicit-token" + def test_split_kwarg_emits_deprecation_warning(self): + """Passing the deprecated ``split`` kwarg emits a DeprecationWarning.""" + with pytest.warns(DeprecationWarning, match="'split' is deprecated"): + _HiXSTestDataset(split="train") + async def test_fetch_dataset_hindi_default(self, mock_hixstest_data): """By default, the Hindi prompt is the SeedPrompt value and both texts are in metadata.""" loader = _HiXSTestDataset() @@ -106,7 +111,7 @@ async def test_fetch_dataset_english(self, mock_hixstest_data): assert first_prompt.metadata["category"] == "मारना" async def test_fetch_dataset_passes_token_and_split(self, mock_hixstest_data): - """The loader forwards the configured token and split to _fetch_from_huggingface_async.""" + """The loader forwards the configured token and the hardcoded 'train' split to _fetch_from_huggingface_async.""" loader = _HiXSTestDataset(token="my-token") mock_fetch = AsyncMock(return_value=mock_hixstest_data) diff --git a/tests/unit/datasets/test_or_bench_dataset.py b/tests/unit/datasets/test_or_bench_dataset.py index 18c6da496a..016d236c0c 100644 --- a/tests/unit/datasets/test_or_bench_dataset.py +++ b/tests/unit/datasets/test_or_bench_dataset.py @@ -94,3 +94,10 @@ def test_dataset_name(self): """Test dataset_name property.""" loader = _ORBenchToxicDataset() assert loader.dataset_name == "or_bench_toxic" + + +def test_split_kwarg_emits_deprecation_warning(): + """All OR-Bench loaders inherit the deprecated ``split`` kwarg from the base class.""" + for cls in (_ORBench80KDataset, _ORBenchHardDataset, _ORBenchToxicDataset): + with pytest.warns(DeprecationWarning, match="'split' is deprecated"): + cls(split="train") diff --git a/tests/unit/datasets/test_seed_dataset_provider.py b/tests/unit/datasets/test_seed_dataset_provider.py index f81d755c61..3118e7ed9a 100644 --- a/tests/unit/datasets/test_seed_dataset_provider.py +++ b/tests/unit/datasets/test_seed_dataset_provider.py @@ -332,7 +332,6 @@ async def test_fetch_dataset_with_custom_config(self, mock_darkbench_data): loader = _DarkBenchDataset( dataset_name="custom/darkbench", config="custom_config", - split="test", ) with patch.object(loader, "_fetch_from_huggingface_async", return_value=mock_darkbench_data) as mock_fetch: @@ -343,7 +342,9 @@ async def test_fetch_dataset_with_custom_config(self, mock_darkbench_data): call_kwargs = mock_fetch.call_args.kwargs assert call_kwargs["dataset_name"] == "custom/darkbench" assert call_kwargs["config"] == "custom_config" - assert call_kwargs["split"] == "test" + # split is hardcoded at the call site since upstream apart/darkbench + # publishes only the "train" split (constructor kwarg is deprecated) + assert call_kwargs["split"] == "train" class TestMetadataParsingRemote: diff --git a/tests/unit/datasets/test_sgxstest_dataset.py b/tests/unit/datasets/test_sgxstest_dataset.py index eaf3cc36d3..919f0e0e6d 100644 --- a/tests/unit/datasets/test_sgxstest_dataset.py +++ b/tests/unit/datasets/test_sgxstest_dataset.py @@ -100,8 +100,8 @@ async def test_fetch_dataset_empty_after_filter_raises(self): await loader.fetch_dataset_async() async def test_fetch_dataset_passes_token_and_split(self, mock_sgxstest_data): - """Test that the loader forwards token and split to _fetch_from_huggingface_async.""" - loader = _SGXSTestDataset(split="train", token="hf_test_token") + """Test that the loader forwards token and the hardcoded 'train' split to _fetch_from_huggingface_async.""" + loader = _SGXSTestDataset(token="hf_test_token") mock_fetch = AsyncMock(return_value=mock_sgxstest_data) with patch.object(loader, "_fetch_from_huggingface_async", new=mock_fetch): @@ -114,6 +114,11 @@ async def test_fetch_dataset_passes_token_and_split(self, mock_sgxstest_data): assert kwargs["cache"] is False assert kwargs["token"] == "hf_test_token" + def test_split_kwarg_emits_deprecation_warning(self): + """Passing the deprecated ``split`` kwarg emits a DeprecationWarning.""" + with pytest.warns(DeprecationWarning, match="'split' is deprecated"): + _SGXSTestDataset(split="train") + def test_invalid_label_raises(self): """Passing a non-SGXSTestLabel value should raise.""" with pytest.raises(ValueError, match="Expected SGXSTestLabel"): diff --git a/tests/unit/datasets/test_simple_remote_datasets.py b/tests/unit/datasets/test_simple_remote_datasets.py index 47ad817929..6f35c7146e 100644 --- a/tests/unit/datasets/test_simple_remote_datasets.py +++ b/tests/unit/datasets/test_simple_remote_datasets.py @@ -145,3 +145,9 @@ async def test_fetch_dataset(loader_class): assert all(isinstance(p, SeedPrompt) for p in dataset.seeds) actual_values = {seed.value for seed in dataset.seeds} assert actual_values == config["expected_values"] + + +def test_forbidden_questions_split_kwarg_emits_deprecation_warning(): + """Passing the deprecated ``split`` kwarg emits a DeprecationWarning.""" + with pytest.warns(DeprecationWarning, match="'split' is deprecated"): + _ForbiddenQuestionsDataset(split="default") diff --git a/tests/unit/datasets/test_simple_safety_tests_dataset.py b/tests/unit/datasets/test_simple_safety_tests_dataset.py index 1eab818212..66d6d555a1 100644 --- a/tests/unit/datasets/test_simple_safety_tests_dataset.py +++ b/tests/unit/datasets/test_simple_safety_tests_dataset.py @@ -55,3 +55,8 @@ def test_dataset_name(self): """Test dataset_name property.""" loader = _SimpleSafetyTestsDataset() assert loader.dataset_name == "simple_safety_tests" + + def test_split_kwarg_emits_deprecation_warning(self): + """Passing the deprecated ``split`` kwarg emits a DeprecationWarning.""" + with pytest.warns(DeprecationWarning, match="'split' is deprecated"): + _SimpleSafetyTestsDataset(split="test")