diff --git a/pyrit/datasets/seed_datasets/remote/cbt_bench_dataset.py b/pyrit/datasets/seed_datasets/remote/cbt_bench_dataset.py
index 59774e5327..c3f35e9299 100644
--- a/pyrit/datasets/seed_datasets/remote/cbt_bench_dataset.py
+++ b/pyrit/datasets/seed_datasets/remote/cbt_bench_dataset.py
@@ -2,6 +2,7 @@
 # Licensed under the MIT license.
 
 import logging
+import warnings
 from typing import Any
 
 from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import (
@@ -38,7 +39,7 @@ def __init__(
         *,
         source: str = "Psychotherapy-LLM/CBT-Bench",
         config: str = "core_fine_seed",
-        split: str = "train",
+        split: str | None = None,
     ) -> None:
         """
         Initialize the CBT-Bench dataset loader.
@@ -46,11 +47,20 @@ def __init__(
         Args:
             source: HuggingFace dataset identifier. Defaults to "Psychotherapy-LLM/CBT-Bench".
             config: Dataset configuration/subset to load. Defaults to "core_fine_seed".
-            split: Dataset split to load. Defaults to "train".
+            split: **Deprecated.** Every config of ``Psychotherapy-LLM/CBT-Bench`` publishes
+                only the ``"train"`` split, so this kwarg has no effect. It will be removed
+                in v0.16.0.
         """
+        if split is not None:
+            warnings.warn(
+                "'split' is deprecated and will be removed in v0.16.0. "
+                "Every config of Psychotherapy-LLM/CBT-Bench publishes only the 'train' "
+                "split, so this kwarg has no effect.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
         self.source = source
         self.config = config
-        self.split = split
 
     @property
     def dataset_name(self) -> str:
@@ -76,7 +86,7 @@ async def fetch_dataset_async(self, *, cache: bool = True) -> SeedDataset:
         data = await self._fetch_from_huggingface_async(
             dataset_name=self.source,
             config=self.config,
-            split=self.split,
+            split="train",
             cache=cache,
         )
 
diff --git a/pyrit/datasets/seed_datasets/remote/darkbench_dataset.py b/pyrit/datasets/seed_datasets/remote/darkbench_dataset.py
index 761148fa8b..09c120c41a 100644
--- a/pyrit/datasets/seed_datasets/remote/darkbench_dataset.py
+++ b/pyrit/datasets/seed_datasets/remote/darkbench_dataset.py
@@ -1,6 +1,8 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT license.
 
+import warnings
+
 from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import (
     _RemoteDatasetLoader,
 )
@@ -33,7 +35,7 @@ def __init__(
         *,
         dataset_name: str = "apart/darkbench",
         config: str = "default",
-        split: str = "train",
+        split: str | None = None,
     ) -> None:
         """
         Initialize the DarkBench dataset loader.
@@ -41,11 +43,20 @@ def __init__(
         Args:
             dataset_name: HuggingFace dataset identifier. Defaults to "apart/darkbench".
             config: Dataset configuration. Defaults to "default".
-            split: Dataset split to load. Defaults to "train".
+            split: **Deprecated.** Upstream ``apart/darkbench`` publishes only the
+                ``"train"`` split, so this kwarg has no effect. It will be removed in
+                v0.16.0.
         """
+        if split is not None:
+            warnings.warn(
+                "'split' is deprecated and will be removed in v0.16.0. "
+                "Upstream apart/darkbench publishes only the 'train' split, "
+                "so this kwarg has no effect.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
         self.hf_dataset_name = dataset_name
         self.config = config
-        self.split = split
 
     @property
     def dataset_name(self) -> str:
@@ -70,7 +81,7 @@ async def fetch_dataset_async(self, *, cache: bool = True) -> SeedDataset:
         data = await self._fetch_from_huggingface_async(
             dataset_name=self.hf_dataset_name,
             config=self.config,
-            split=self.split,
+            split="train",
             cache=cache,
             data_files="darkbench.tsv",
         )
diff --git a/pyrit/datasets/seed_datasets/remote/forbidden_questions_dataset.py b/pyrit/datasets/seed_datasets/remote/forbidden_questions_dataset.py
index a301aad8c1..1cceee18d6 100644
--- a/pyrit/datasets/seed_datasets/remote/forbidden_questions_dataset.py
+++ b/pyrit/datasets/seed_datasets/remote/forbidden_questions_dataset.py
@@ -2,6 +2,7 @@
 # Licensed under the MIT license.
 
 import logging
+import warnings
 
 from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import (
     _RemoteDatasetLoader,
@@ -34,17 +35,28 @@ def __init__(
         self,
         *,
         source: str = "TrustAIRLab/forbidden_question_set",
-        split: str = "default",
+        split: str | None = None,
     ) -> None:
         """
         Initialize the Forbidden Questions dataset loader.
 
         Args:
             source: HuggingFace dataset identifier. Defaults to "TrustAIRLab/forbidden_question_set".
-            split: Dataset split to load. Defaults to "default".
+            split: **Deprecated.** This kwarg was misforwarded to HuggingFace as ``config``,
+                and ``TrustAIRLab/forbidden_question_set`` publishes only one config
+                (``"default"``) with one split (``"train"``), so it never did anything
+                useful. It will be removed in v0.16.0.
         """
+        if split is not None:
+            warnings.warn(
+                "'split' is deprecated and will be removed in v0.16.0. "
+                "It was misforwarded to HuggingFace as 'config', and "
+                "TrustAIRLab/forbidden_question_set publishes only one config ('default') "
+                "with one split ('train'), so this kwarg has no effect.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
         self.source = source
-        self.split = split
 
     @property
     def dataset_name(self) -> str:
@@ -66,7 +78,7 @@ async def fetch_dataset_async(self, *, cache: bool = True) -> SeedDataset:
         # Load from HuggingFace
         data = await self._fetch_from_huggingface_async(
             dataset_name=self.source,
-            config=self.split,
+            config="default",
             split="train",
             cache=cache,
         )
diff --git a/pyrit/datasets/seed_datasets/remote/harmful_qa_dataset.py b/pyrit/datasets/seed_datasets/remote/harmful_qa_dataset.py
index 6ec8c44e6c..9ed8306ba6 100644
--- a/pyrit/datasets/seed_datasets/remote/harmful_qa_dataset.py
+++ b/pyrit/datasets/seed_datasets/remote/harmful_qa_dataset.py
@@ -2,6 +2,7 @@
 # Licensed under the MIT license.
 
 import logging
+import warnings
 
 from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import (
     _RemoteDatasetLoader,
@@ -36,15 +37,24 @@ class _HarmfulQADataset(_RemoteDatasetLoader):
     def __init__(
         self,
         *,
-        split: str = "train",
+        split: str | None = None,
     ) -> None:
         """
         Initialize the HarmfulQA dataset loader.
 
         Args:
-            split: Dataset split to load. Defaults to "train".
+            split: **Deprecated.** Upstream ``declare-lab/HarmfulQA`` publishes only the
+                ``"train"`` split, so this kwarg has no effect. It will be removed in
+                v0.16.0.
         """
-        self.split = split
+        if split is not None:
+            warnings.warn(
+                "'split' is deprecated and will be removed in v0.16.0. "
+                "Upstream declare-lab/HarmfulQA publishes only the 'train' split, "
+                "so this kwarg has no effect.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
 
     @property
     def dataset_name(self) -> str:
@@ -65,7 +75,7 @@ async def fetch_dataset_async(self, *, cache: bool = True) -> SeedDataset:
 
         data = await self._fetch_from_huggingface_async(
             dataset_name=self.HF_DATASET_NAME,
-            split=self.split,
+            split="train",
             cache=cache,
         )
 
diff --git a/pyrit/datasets/seed_datasets/remote/hixstest_dataset.py b/pyrit/datasets/seed_datasets/remote/hixstest_dataset.py
index 6e0984f930..63b0439879 100644
--- a/pyrit/datasets/seed_datasets/remote/hixstest_dataset.py
+++ b/pyrit/datasets/seed_datasets/remote/hixstest_dataset.py
@@ -3,6 +3,7 @@
 
 import logging
 import os
+import warnings
 from enum import Enum
 
 from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import (
@@ -68,7 +69,7 @@ def __init__(
         self,
         *,
         language: HiXSTestLanguage = HiXSTestLanguage.HINDI,
-        split: str = "train",
+        split: str | None = None,
         token: str | None = None,
     ) -> None:
         """
@@ -78,16 +79,25 @@ def __init__(
             language: Which language to use as the primary ``SeedPrompt.value``.
                 Defaults to ``HiXSTestLanguage.HINDI`` (the dataset's intended language).
                 Pass ``HiXSTestLanguage.ENGLISH`` to use the English translation instead.
-            split: Dataset split to load. Defaults to "train" (the only split).
+            split: **Deprecated.** Upstream ``walledai/HiXSTest`` publishes only the
+                ``"train"`` split, so this kwarg has no effect. It will be removed in
+                v0.16.0.
             token: Hugging Face authentication token. If not provided, reads from the
                 ``HUGGINGFACE_TOKEN`` environment variable.
 
         Raises:
             ValueError: If ``language`` is not a ``HiXSTestLanguage`` instance.
         """
+        if split is not None:
+            warnings.warn(
+                "'split' is deprecated and will be removed in v0.16.0. "
+                "Upstream walledai/HiXSTest publishes only the 'train' split, "
+                "so this kwarg has no effect.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
         self._validate_enum(language, HiXSTestLanguage, "language")
         self.language = language
-        self.split = split
         self.token = token if token is not None else os.environ.get("HUGGINGFACE_TOKEN")
 
     @property
@@ -113,7 +123,7 @@ async def fetch_dataset_async(self, *, cache: bool = True) -> SeedDataset:
 
         data = await self._fetch_from_huggingface_async(
             dataset_name=self.HF_DATASET_NAME,
-            split=self.split,
+            split="train",
             cache=cache,
             token=self.token,
         )
diff --git a/pyrit/datasets/seed_datasets/remote/or_bench_dataset.py b/pyrit/datasets/seed_datasets/remote/or_bench_dataset.py
index 129533a636..793da84814 100644
--- a/pyrit/datasets/seed_datasets/remote/or_bench_dataset.py
+++ b/pyrit/datasets/seed_datasets/remote/or_bench_dataset.py
@@ -2,6 +2,7 @@
 # Licensed under the MIT license.
 
 import logging
+import warnings
 
 from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import (
     _RemoteDatasetLoader,
@@ -36,14 +37,23 @@ class _ORBenchBaseDataset(_RemoteDatasetLoader):
     modalities: tuple[Modality, ...] = (Modality.TEXT,)
     tags: frozenset[str] = frozenset({"default", "safety", "refusal"})
 
-    def __init__(self, *, split: str = "train") -> None:
+    def __init__(self, *, split: str | None = None) -> None:
         """
         Initialize the OR-Bench dataset loader.
 
         Args:
-            split: Dataset split to load. Defaults to "train".
+            split: **Deprecated.** Every config of ``bench-llm/OR-Bench`` publishes only
+                the ``"train"`` split, so this kwarg has no effect. It will be removed in
+                v0.16.0.
         """
-        self.split = split
+        if split is not None:
+            warnings.warn(
+                "'split' is deprecated and will be removed in v0.16.0. "
+                "Every config of bench-llm/OR-Bench publishes only the 'train' split, "
+                "so this kwarg has no effect.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
 
     async def fetch_dataset_async(self, *, cache: bool = True) -> SeedDataset:
         """
@@ -60,7 +70,7 @@ async def fetch_dataset_async(self, *, cache: bool = True) -> SeedDataset:
         data = await self._fetch_from_huggingface_async(
             dataset_name=self.HF_DATASET_NAME,
             config=self.CONFIG,
-            split=self.split,
+            split="train",
             cache=cache,
         )
 
diff --git a/pyrit/datasets/seed_datasets/remote/sgxstest_dataset.py b/pyrit/datasets/seed_datasets/remote/sgxstest_dataset.py
index 0ad3fd2687..86efef86c6 100644
--- a/pyrit/datasets/seed_datasets/remote/sgxstest_dataset.py
+++ b/pyrit/datasets/seed_datasets/remote/sgxstest_dataset.py
@@ -3,6 +3,7 @@
 
 import logging
 import os
+import warnings
 from enum import Enum
 
 from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import (
@@ -74,7 +75,7 @@ def __init__(
         self,
         *,
         label: SGXSTestLabel = SGXSTestLabel.UNSAFE,
-        split: str = "train",
+        split: str | None = None,
         token: str | None = None,
     ) -> None:
         """
@@ -84,18 +85,26 @@ def __init__(
             label: Which subset of prompts to load. Defaults to ``SGXSTestLabel.UNSAFE``
                 (the truly-harmful prompts). Use ``SGXSTestLabel.SAFE`` for the
                 over-refusal targets or ``SGXSTestLabel.ALL`` for the full 200-prompt set.
-            split: Dataset split to load. Defaults to "train" (the only split currently
-                published by the upstream dataset).
+            split: **Deprecated.** Upstream ``walledai/SGXSTest`` publishes only the
+                ``"train"`` split, so this kwarg has no effect. It will be removed in
+                v0.16.0.
             token: Hugging Face authentication token. If not provided, reads from
                 the HUGGINGFACE_TOKEN env var.
 
         Raises:
             ValueError: If ``label`` is not an SGXSTestLabel member.
         """
+        if split is not None:
+            warnings.warn(
+                "'split' is deprecated and will be removed in v0.16.0. "
+                "Upstream walledai/SGXSTest publishes only the 'train' split, "
+                "so this kwarg has no effect.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
         self._validate_enum(value=label, enum_cls=SGXSTestLabel, label="label")
 
         self.label = label
-        self.split = split
         self.token = token if token is not None else os.environ.get("HUGGINGFACE_TOKEN")
 
     @property
@@ -122,7 +131,7 @@ async def fetch_dataset_async(self, *, cache: bool = True) -> SeedDataset:
 
         data = await self._fetch_from_huggingface_async(
             dataset_name=self.HF_DATASET_NAME,
-            split=self.split,
+            split="train",
             cache=cache,
             token=self.token,
         )
diff --git a/pyrit/datasets/seed_datasets/remote/simple_safety_tests_dataset.py b/pyrit/datasets/seed_datasets/remote/simple_safety_tests_dataset.py
index 51ae2115ef..2c729134a5 100644
--- a/pyrit/datasets/seed_datasets/remote/simple_safety_tests_dataset.py
+++ b/pyrit/datasets/seed_datasets/remote/simple_safety_tests_dataset.py
@@ -2,6 +2,7 @@
 # Licensed under the MIT license.
 
 import logging
+import warnings
 
 from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import (
     _RemoteDatasetLoader,
@@ -36,15 +37,24 @@ class _SimpleSafetyTestsDataset(_RemoteDatasetLoader):
     def __init__(
         self,
         *,
-        split: str = "test",
+        split: str | None = None,
     ) -> None:
         """
         Initialize the SimpleSafetyTests dataset loader.
 
         Args:
-            split: Dataset split to load. Defaults to "test".
+            split: **Deprecated.** Upstream ``Bertievidgen/SimpleSafetyTests`` publishes
+                only the ``"test"`` split, so this kwarg has no effect. It will be
+                removed in v0.16.0.
         """
-        self.split = split
+        if split is not None:
+            warnings.warn(
+                "'split' is deprecated and will be removed in v0.16.0. "
+                "Upstream Bertievidgen/SimpleSafetyTests publishes only the 'test' "
+                "split, so this kwarg has no effect.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
 
     @property
     def dataset_name(self) -> str:
@@ -65,7 +75,7 @@ async def fetch_dataset_async(self, *, cache: bool = True) -> SeedDataset:
 
         data = await self._fetch_from_huggingface_async(
             dataset_name=self.HF_DATASET_NAME,
-            split=self.split,
+            split="test",
             cache=cache,
         )
 
diff --git a/tests/unit/datasets/test_cbt_bench_dataset.py b/tests/unit/datasets/test_cbt_bench_dataset.py
index 787fb3b5c1..93dbee5f50 100644
--- a/tests/unit/datasets/test_cbt_bench_dataset.py
+++ b/tests/unit/datasets/test_cbt_bench_dataset.py
@@ -87,11 +87,10 @@ async def test_fetch_dataset(self, mock_cbt_bench_data):
             assert first_prompt.metadata["core_belief_fine_grained"] == ["I am unlovable", "I am immoral"]
 
     async def test_fetch_dataset_with_custom_config(self, mock_cbt_bench_data):
-        """Test fetching with custom HuggingFace config and split."""
+        """Test fetching with custom HuggingFace config."""
         loader = _CBTBenchDataset(
             source="custom/cbt-bench",
             config="core_major_seed",
-            split="test",
         )
 
         with patch.object(loader, "_fetch_from_huggingface_async", return_value=mock_cbt_bench_data) as mock_fetch:
@@ -102,9 +101,14 @@ async def test_fetch_dataset_with_custom_config(self, mock_cbt_bench_data):
             call_kwargs = mock_fetch.call_args.kwargs
             assert call_kwargs["dataset_name"] == "custom/cbt-bench"
             assert call_kwargs["config"] == "core_major_seed"
-            assert call_kwargs["split"] == "test"
+            assert call_kwargs["split"] == "train"
             assert call_kwargs["cache"] is False
 
+    def test_split_kwarg_emits_deprecation_warning(self):
+        """Passing the deprecated ``split`` kwarg emits a DeprecationWarning."""
+        with pytest.warns(DeprecationWarning, match="'split' is deprecated"):
+            _CBTBenchDataset(split="train")
+
     async def test_fetch_dataset_situation_only(self, mock_cbt_bench_data_missing_thoughts):
         """Test that items with only situation (no thoughts) still work."""
         loader = _CBTBenchDataset()
diff --git a/tests/unit/datasets/test_darkbench_dataset.py b/tests/unit/datasets/test_darkbench_dataset.py
index 34cf300d93..937141f0e9 100644
--- a/tests/unit/datasets/test_darkbench_dataset.py
+++ b/tests/unit/datasets/test_darkbench_dataset.py
@@ -32,7 +32,7 @@ async def test_fetch_dataset(mock_darkbench_data):
 
 
 async def test_fetch_dataset_passes_config(mock_darkbench_data):
-    loader = _DarkBenchDataset(config="custom", split="test")
+    loader = _DarkBenchDataset(config="custom")
 
     with patch.object(
         loader, "_fetch_from_huggingface_async", new=AsyncMock(return_value=mock_darkbench_data)
@@ -42,7 +42,13 @@ async def test_fetch_dataset_passes_config(mock_darkbench_data):
         mock_fetch.assert_called_once()
         call_kwargs = mock_fetch.call_args.kwargs
         assert call_kwargs["config"] == "custom"
-        assert call_kwargs["split"] == "test"
+        assert call_kwargs["split"] == "train"
+
+
+def test_split_kwarg_emits_deprecation_warning():
+    """Passing the deprecated ``split`` kwarg emits a DeprecationWarning."""
+    with pytest.warns(DeprecationWarning, match="'split' is deprecated"):
+        _DarkBenchDataset(split="train")
 
 
 def test_dataset_name():
diff --git a/tests/unit/datasets/test_harmful_qa_dataset.py b/tests/unit/datasets/test_harmful_qa_dataset.py
index 1926e37413..d3a1af2548 100644
--- a/tests/unit/datasets/test_harmful_qa_dataset.py
+++ b/tests/unit/datasets/test_harmful_qa_dataset.py
@@ -55,3 +55,8 @@ def test_dataset_name(self):
         """Test dataset_name property."""
         loader = _HarmfulQADataset()
         assert loader.dataset_name == "harmful_qa"
+
+    def test_split_kwarg_emits_deprecation_warning(self):
+        """Passing the deprecated ``split`` kwarg emits a DeprecationWarning."""
+        with pytest.warns(DeprecationWarning, match="'split' is deprecated"):
+            _HarmfulQADataset(split="train")
diff --git a/tests/unit/datasets/test_hixstest_dataset.py b/tests/unit/datasets/test_hixstest_dataset.py
index 4793a018ca..dc8cf272b7 100644
--- a/tests/unit/datasets/test_hixstest_dataset.py
+++ b/tests/unit/datasets/test_hixstest_dataset.py
@@ -61,6 +61,11 @@ def test_init_explicit_token_overrides_env(self):
             loader = _HiXSTestDataset(token="explicit-token")
             assert loader.token == "explicit-token"
 
+    def test_split_kwarg_emits_deprecation_warning(self):
+        """Passing the deprecated ``split`` kwarg emits a DeprecationWarning."""
+        with pytest.warns(DeprecationWarning, match="'split' is deprecated"):
+            _HiXSTestDataset(split="train")
+
     async def test_fetch_dataset_hindi_default(self, mock_hixstest_data):
         """By default, the Hindi prompt is the SeedPrompt value and both texts are in metadata."""
         loader = _HiXSTestDataset()
@@ -106,7 +111,7 @@ async def test_fetch_dataset_english(self, mock_hixstest_data):
             assert first_prompt.metadata["category"] == "मारना"
 
     async def test_fetch_dataset_passes_token_and_split(self, mock_hixstest_data):
-        """The loader forwards the configured token and split to _fetch_from_huggingface_async."""
+        """The loader forwards the configured token and the hardcoded 'train' split to _fetch_from_huggingface_async."""
         loader = _HiXSTestDataset(token="my-token")
 
         mock_fetch = AsyncMock(return_value=mock_hixstest_data)
diff --git a/tests/unit/datasets/test_or_bench_dataset.py b/tests/unit/datasets/test_or_bench_dataset.py
index 18c6da496a..016d236c0c 100644
--- a/tests/unit/datasets/test_or_bench_dataset.py
+++ b/tests/unit/datasets/test_or_bench_dataset.py
@@ -94,3 +94,10 @@ def test_dataset_name(self):
         """Test dataset_name property."""
         loader = _ORBenchToxicDataset()
         assert loader.dataset_name == "or_bench_toxic"
+
+
+def test_split_kwarg_emits_deprecation_warning():
+    """All OR-Bench loaders inherit the deprecated ``split`` kwarg from the base class."""
+    for cls in (_ORBench80KDataset, _ORBenchHardDataset, _ORBenchToxicDataset):
+        with pytest.warns(DeprecationWarning, match="'split' is deprecated"):
+            cls(split="train")
diff --git a/tests/unit/datasets/test_seed_dataset_provider.py b/tests/unit/datasets/test_seed_dataset_provider.py
index f81d755c61..3118e7ed9a 100644
--- a/tests/unit/datasets/test_seed_dataset_provider.py
+++ b/tests/unit/datasets/test_seed_dataset_provider.py
@@ -332,7 +332,6 @@ async def test_fetch_dataset_with_custom_config(self, mock_darkbench_data):
         loader = _DarkBenchDataset(
             dataset_name="custom/darkbench",
             config="custom_config",
-            split="test",
         )
 
         with patch.object(loader, "_fetch_from_huggingface_async", return_value=mock_darkbench_data) as mock_fetch:
@@ -343,7 +342,9 @@ async def test_fetch_dataset_with_custom_config(self, mock_darkbench_data):
             call_kwargs = mock_fetch.call_args.kwargs
             assert call_kwargs["dataset_name"] == "custom/darkbench"
             assert call_kwargs["config"] == "custom_config"
-            assert call_kwargs["split"] == "test"
+            # split is hardcoded at the call site since upstream apart/darkbench
+            # publishes only the "train" split (constructor kwarg is deprecated)
+            assert call_kwargs["split"] == "train"
 
 
 class TestMetadataParsingRemote:
diff --git a/tests/unit/datasets/test_sgxstest_dataset.py b/tests/unit/datasets/test_sgxstest_dataset.py
index eaf3cc36d3..919f0e0e6d 100644
--- a/tests/unit/datasets/test_sgxstest_dataset.py
+++ b/tests/unit/datasets/test_sgxstest_dataset.py
@@ -100,8 +100,8 @@ async def test_fetch_dataset_empty_after_filter_raises(self):
                 await loader.fetch_dataset_async()
 
     async def test_fetch_dataset_passes_token_and_split(self, mock_sgxstest_data):
-        """Test that the loader forwards token and split to _fetch_from_huggingface_async."""
-        loader = _SGXSTestDataset(split="train", token="hf_test_token")
+        """Test that the loader forwards token and the hardcoded 'train' split to _fetch_from_huggingface_async."""
+        loader = _SGXSTestDataset(token="hf_test_token")
 
         mock_fetch = AsyncMock(return_value=mock_sgxstest_data)
         with patch.object(loader, "_fetch_from_huggingface_async", new=mock_fetch):
@@ -114,6 +114,11 @@ async def test_fetch_dataset_passes_token_and_split(self, mock_sgxstest_data):
         assert kwargs["cache"] is False
         assert kwargs["token"] == "hf_test_token"
 
+    def test_split_kwarg_emits_deprecation_warning(self):
+        """Passing the deprecated ``split`` kwarg emits a DeprecationWarning."""
+        with pytest.warns(DeprecationWarning, match="'split' is deprecated"):
+            _SGXSTestDataset(split="train")
+
     def test_invalid_label_raises(self):
         """Passing a non-SGXSTestLabel value should raise."""
         with pytest.raises(ValueError, match="Expected SGXSTestLabel"):
diff --git a/tests/unit/datasets/test_simple_remote_datasets.py b/tests/unit/datasets/test_simple_remote_datasets.py
index 47ad817929..6f35c7146e 100644
--- a/tests/unit/datasets/test_simple_remote_datasets.py
+++ b/tests/unit/datasets/test_simple_remote_datasets.py
@@ -145,3 +145,9 @@ async def test_fetch_dataset(loader_class):
     assert all(isinstance(p, SeedPrompt) for p in dataset.seeds)
     actual_values = {seed.value for seed in dataset.seeds}
     assert actual_values == config["expected_values"]
+
+
+def test_forbidden_questions_split_kwarg_emits_deprecation_warning():
+    """Passing the deprecated ``split`` kwarg emits a DeprecationWarning."""
+    with pytest.warns(DeprecationWarning, match="'split' is deprecated"):
+        _ForbiddenQuestionsDataset(split="default")
diff --git a/tests/unit/datasets/test_simple_safety_tests_dataset.py b/tests/unit/datasets/test_simple_safety_tests_dataset.py
index 1eab818212..66d6d555a1 100644
--- a/tests/unit/datasets/test_simple_safety_tests_dataset.py
+++ b/tests/unit/datasets/test_simple_safety_tests_dataset.py
@@ -55,3 +55,8 @@ def test_dataset_name(self):
         """Test dataset_name property."""
         loader = _SimpleSafetyTestsDataset()
         assert loader.dataset_name == "simple_safety_tests"
+
+    def test_split_kwarg_emits_deprecation_warning(self):
+        """Passing the deprecated ``split`` kwarg emits a DeprecationWarning."""
+        with pytest.warns(DeprecationWarning, match="'split' is deprecated"):
+            _SimpleSafetyTestsDataset(split="test")