diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 8af5182c..2daa81ba 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -60,13 +60,14 @@ jobs:
         if: matrix.install-profile == 'nlp'
         run: |
           pip install -e ".[test,cli,nlp]" -r requirements-test.txt
-          python -m spacy download en_core_web_sm
+          python -m spacy download en_core_web_lg
 
       - name: Install dependencies (nlp-advanced)
         if: matrix.install-profile == 'nlp-advanced'
         run: |
           pip install -e ".[test,cli,nlp,nlp-advanced]" -r requirements-test.txt
-          python -m spacy download en_core_web_sm
+          python -m spacy download en_core_web_lg
+          datafog download-model urchade/gliner_multi_pii-v1 --engine gliner
 
       - name: Run tests (core)
         if: matrix.install-profile == 'core'
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 53a39dfc..91a459aa 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -128,7 +128,8 @@ jobs:
           python -m pip install --upgrade pip
           pip install -e ".[all,test]"
           pip install -r requirements-test.txt
-          pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1.tar.gz
+          python -m spacy download en_core_web_lg
+          datafog download-model urchade/gliner_multi_pii-v1 --engine gliner
 
       - name: Run tests with segfault protection
         run: |
diff --git a/datafog/client.py b/datafog/client.py
index a76a30dd..c3d493c8 100644
--- a/datafog/client.py
+++ b/datafog/client.py
@@ -181,7 +181,7 @@ def download_model(
     Download a model for specified engine.
 
     Examples:
-        spaCy: datafog download-model en_core_web_sm --engine spacy
+        spaCy: datafog download-model en_core_web_lg --engine spacy
         GLiNER: datafog download-model urchade/gliner_multi_pii-v1 --engine gliner
     """
     if engine == "spacy":
diff --git a/datafog/models/spacy_nlp.py b/datafog/models/spacy_nlp.py
index 7b473a17..5257ba3d 100644
--- a/datafog/models/spacy_nlp.py
+++ b/datafog/models/spacy_nlp.py
@@ -13,6 +13,8 @@
 
 from .annotator import AnnotationResult, AnnotatorRequest
 
+DEFAULT_SPACY_MODEL = "en_core_web_lg"
+
 
 class SpacyAnnotator:
     """
@@ -22,14 +24,18 @@ class SpacyAnnotator:
     Supports various NLP tasks including entity recognition and model management.
     """
 
-    def __init__(self, model_name: str = "en_core_web_lg"):
+    def __init__(self, model_name: str = DEFAULT_SPACY_MODEL):
         self.model_name = model_name
         self.nlp = None
 
     def load_model(self):
-        if not spacy.util.is_package(self.model_name):
-            spacy.cli.download(self.model_name)
-        self.nlp = spacy.load(self.model_name)
+        try:
+            self.nlp = spacy.load(self.model_name)
+        except OSError as exc:
+            raise ImportError(
+                f"spaCy model {self.model_name!r} is not installed. "
+                f"Download it explicitly with: datafog download-model {self.model_name} --engine spacy"
+            ) from exc
 
     def annotate_text(self, text: str, language: str = "en") -> List[AnnotationResult]:
         if not self.nlp:
@@ -72,6 +78,12 @@ def list_models() -> List[str]:
         return spacy.util.get_installed_models()
 
     @staticmethod
-    def list_entities() -> List[str]:
-        nlp = spacy.load("en_core_web_lg")
+    def list_entities(model_name: str = DEFAULT_SPACY_MODEL) -> List[str]:
+        try:
+            nlp = spacy.load(model_name)
+        except OSError as exc:
+            raise ImportError(
+                f"spaCy model {model_name!r} is not installed. "
+                f"Download it explicitly with: datafog download-model {model_name} --engine spacy"
+            ) from exc
         return [ent for ent in nlp.pipe_labels["ner"]]
diff --git a/datafog/processing/image_processing/donut_processor.py b/datafog/processing/image_processing/donut_processor.py
index 7e100585..50022b6a 100644
--- a/datafog/processing/image_processing/donut_processor.py
+++ b/datafog/processing/image_processing/donut_processor.py
@@ -6,14 +6,10 @@
 from images of documents.
 """
 
-import importlib
-import importlib.util
 import json
 import logging
 import os
 import re
-import subprocess
-import sys
 from typing import TYPE_CHECKING, Any
 
 from .image_downloader import ImageDownloader
@@ -43,13 +39,12 @@ def __init__(self, model_path="naver-clova-ix/donut-base-finetuned-cord-v2"):
         self.model_path = model_path
         self.downloader = ImageDownloader()
 
-    def ensure_installed(self, package_name):
-        try:
-            importlib.import_module(package_name)
-        except ImportError:
-            subprocess.check_call(
-                [sys.executable, "-m", "pip", "install", package_name]
-            )
+    @staticmethod
+    def _missing_dependency_message(package_name: str) -> str:
+        return (
+            f"Donut OCR requires {package_name}. "
+            "Install with: pip install datafog[nlp-advanced,ocr]"
+        )
 
     def preprocess_image(self, image: "Image.Image") -> Any:
         import numpy as np
@@ -86,40 +81,40 @@ async def extract_text_from_image(self, image: "Image.Image") -> str:
                 "PYTEST_DONUT=yes is set, running actual OCR in test environment"
             )
 
-        # Only import torch and transformers when actually needed and not in test environment
         try:
-            # Check if torch is available before trying to import it
-            try:
-                # Try to find the module without importing it
-                spec = importlib.util.find_spec("torch")
-                if spec is None:
-                    # If we're in a test that somehow bypassed the IN_TEST_ENV check,
-                    # still return a mock result instead of failing
-                    logging.warning("torch module not found, returning mock result")
-                    return json.dumps({"text": "Mock OCR text (torch not available)"})
-
-                # Ensure dependencies are installed
-                self.ensure_installed("torch")
-                self.ensure_installed("transformers")
-            except ImportError:
-                # If importlib.util is not available, fall back to direct try/except
-                pass
-
-            # Import dependencies only when needed
             try:
                 import torch
+            except ImportError as exc:
+                raise ImportError(self._missing_dependency_message("torch")) from exc
+
+            try:
                 from transformers import DonutProcessor as TransformersDonutProcessor
                 from transformers import VisionEncoderDecoderModel
             except ImportError as e:
-                logging.warning(f"Import error: {e}, returning mock result")
-                return json.dumps({"text": f"Mock OCR text (import error: {e})"})
+                raise ImportError(
+                    self._missing_dependency_message("transformers")
+                ) from e
 
             # Preprocess the image
             image_np = self.preprocess_image(image)
 
             # Initialize model components
-            processor = TransformersDonutProcessor.from_pretrained(self.model_path)
-            model = VisionEncoderDecoderModel.from_pretrained(self.model_path)
+            try:
+                processor = TransformersDonutProcessor.from_pretrained(
+                    self.model_path,
+                    local_files_only=True,
+                )
+                model = VisionEncoderDecoderModel.from_pretrained(
+                    self.model_path,
+                    local_files_only=True,
+                )
+            except OSError as exc:
+                raise RuntimeError(
+                    f"Donut model {self.model_path!r} is not available locally. "
+                    "Download it explicitly before using Donut OCR, or pass a local "
+                    "model path."
+                ) from exc
+
             device = "cuda" if torch.cuda.is_available() else "cpu"
             model.to(device)
             model.eval()
@@ -153,6 +148,8 @@ async def extract_text_from_image(self, image: "Image.Image") -> str:
             result = processor.token2json(sequence)
             return json.dumps(result)
 
+        except (ImportError, RuntimeError):
+            raise
         except Exception as e:
             logging.error(f"Error in extract_text_from_image: {e}")
             # Return a placeholder in case of error
diff --git a/datafog/processing/spark_processing/pyspark_udfs.py b/datafog/processing/spark_processing/pyspark_udfs.py
index 413e6ef3..2d7e2bc5 100644
--- a/datafog/processing/spark_processing/pyspark_udfs.py
+++ b/datafog/processing/spark_processing/pyspark_udfs.py
@@ -2,17 +2,16 @@
 PySpark UDFs for PII annotation and related utilities.
 
 This module provides functions for PII (Personally Identifiable Information) annotation
-using SpaCy models in a PySpark environment. It includes utilities for installing
-dependencies, creating and broadcasting PII annotator UDFs, and performing PII annotation
-on text data.
+using SpaCy models in a PySpark environment. It includes utilities for validating
+dependencies, creating and broadcasting PII annotator UDFs, and performing PII
+annotation on text data.
 """
 
 import importlib
-import subprocess
-import sys
 
 PII_ANNOTATION_LABELS = ["DATE_TIME", "LOC", "NRP", "ORG", "PER"]
 MAXIMAL_STRING_SIZE = 1000000
+DEFAULT_SPACY_MODEL = "en_core_web_lg"
 
 
 def pii_annotator(text: str, broadcasted_nlp) -> list[list[str]]:
@@ -45,7 +44,7 @@ def pii_annotator(text: str, broadcasted_nlp) -> list[list[str]]:
 
 
 def broadcast_pii_annotator_udf(
-    spark_session=None, spacy_model: str = "en_core_web_lg"
+    spark_session=None, spacy_model: str = DEFAULT_SPACY_MODEL
 ):
     """Broadcast PII annotator across Spark cluster and create UDF"""
     ensure_installed("pyspark")
@@ -69,5 +68,14 @@ def broadcast_pii_annotator_udf(
 def ensure_installed(package_name):
     try:
         importlib.import_module(package_name)
-    except ImportError:
-        subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
+    except ImportError as exc:
+        if package_name == "pyspark":
+            extra = "distributed"
+        elif package_name == "spacy":
+            extra = "nlp"
+        else:
+            extra = "all"
+        raise ImportError(
+            f"{package_name} is required for Spark PII UDF support. "
+            f"Install with: pip install datafog[{extra}]"
+        ) from exc
diff --git a/datafog/processing/text_processing/gliner_annotator.py b/datafog/processing/text_processing/gliner_annotator.py
index cbaeca8c..5a195f89 100644
--- a/datafog/processing/text_processing/gliner_annotator.py
+++ b/datafog/processing/text_processing/gliner_annotator.py
@@ -79,14 +79,18 @@ def create(
 
         try:
             # Load the GLiNER model
-            model = GLiNER.from_pretrained(model_name)
+            model = GLiNER.from_pretrained(model_name, local_files_only=True)
             logging.info(f"Successfully loaded GLiNER model: {model_name}")
 
             return cls(model=model, entity_types=entity_types, model_name=model_name)
 
         except Exception as e:
             logging.error(f"Failed to load GLiNER model {model_name}: {str(e)}")
-            raise
+            raise RuntimeError(
+                f"GLiNER model {model_name!r} is not available locally. "
+                "Download it explicitly with: "
+                f"datafog download-model {model_name} --engine gliner"
+            ) from e
 
     def annotate(self, text: str) -> Dict[str, List[str]]:
         """
diff --git a/datafog/processing/text_processing/spacy_pii_annotator.py b/datafog/processing/text_processing/spacy_pii_annotator.py
index e871db8a..d2d1d765 100644
--- a/datafog/processing/text_processing/spacy_pii_annotator.py
+++ b/datafog/processing/text_processing/spacy_pii_annotator.py
@@ -24,39 +24,34 @@
     "WORK_OF_ART",
 ]
 MAXIMAL_STRING_SIZE = 1000000
+DEFAULT_SPACY_MODEL = "en_core_web_lg"
 
 
 class SpacyPIIAnnotator(BaseModel):
     model_config = ConfigDict(arbitrary_types_allowed=True)
 
     nlp: Any
+    model_name: str = DEFAULT_SPACY_MODEL
 
     @classmethod
-    def create(cls) -> "SpacyPIIAnnotator":
-        import spacy
-
+    def create(cls, model_name: str = DEFAULT_SPACY_MODEL) -> "SpacyPIIAnnotator":
         try:
-            nlp = spacy.load("en_core_web_lg")
-        except OSError:
-            import subprocess
-            import sys
+            import spacy
+        except ImportError as exc:
+            raise ImportError(
+                "SpaCy engine requires the nlp extra. "
+                "Install with: pip install datafog[nlp]"
+            ) from exc
 
-            interpreter_location = sys.executable
-            subprocess.run(
-                [
-                    interpreter_location,
-                    "-m",
-                    "pip",
-                    "install",
-                    "--no-deps",
-                    "--no-cache-dir",
-                    "https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.1/en_core_web_lg-3.7.1-py3-none-any.whl",
-                ],
-                check=True,
-            )
-            nlp = spacy.load("en_core_web_lg")
+        try:
+            nlp = spacy.load(model_name)
+        except OSError as exc:
+            raise ImportError(
+                f"spaCy model {model_name!r} is not installed. "
+                f"Download it explicitly with: datafog download-model {model_name} --engine spacy"
+            ) from exc
 
-        return cls(nlp=nlp)
+        return cls(nlp=nlp, model_name=model_name)
 
     def annotate(self, text: str) -> Dict[str, List[str]]:
         try:
diff --git a/datafog/services/spark_service.py b/datafog/services/spark_service.py
index 5b7db28b..bf7d2e48 100644
--- a/datafog/services/spark_service.py
+++ b/datafog/services/spark_service.py
@@ -1,14 +1,12 @@
 """
 Spark service for data processing and analysis.
 
-Provides a wrapper around PySpark functionality, including session creation,
-JSON reading, and package management.
+Provides a wrapper around PySpark functionality, including session creation and
+JSON reading.
 """
 
 import importlib
 import os
-import subprocess
-import sys
 from typing import List
 
 
@@ -16,14 +14,13 @@ class SparkService:
     """
     Manages Spark operations and dependencies.
 
-    Initializes a Spark session, handles imports, and provides methods for
-    data reading and package installation.
+    Initializes a Spark session, handles imports, and provides methods for data
+    reading.
     """
 
     def __init__(self, master=None):
         self.master = master
 
-        # Ensure pyspark is installed first
         self.ensure_installed("pyspark")
 
         # Now import necessary modules after ensuring pyspark is installed
@@ -84,16 +81,8 @@ def read_json(self, path: str) -> List[dict]:
     def ensure_installed(self, package_name):
         try:
             importlib.import_module(package_name)
-        except ImportError:
-            print(f"Installing {package_name}...")
-            try:
-                subprocess.check_call(
-                    [sys.executable, "-m", "pip", "install", package_name]
-                )
-                print(f"{package_name} installed successfully.")
-            except subprocess.CalledProcessError as e:
-                print(f"Failed to install {package_name}: {e}")
-                raise ImportError(
-                    f"Could not install {package_name}. "
-                    f"Please install it manually with 'pip install {package_name}'."
-                )
+        except ImportError as exc:
+            raise ImportError(
+                f"{package_name} is required for Spark support. "
+                "Install with: pip install datafog[distributed]"
+            ) from exc
diff --git a/setup.py b/setup.py
index 43eaff3b..7c3b7992 100644
--- a/setup.py
+++ b/setup.py
@@ -45,6 +45,7 @@
 distributed_deps = [
     "pandas>=2.0.0",
     "numpy>=1.24.0",
+    "pyspark>=3.5.0",
 ]
 
 web_deps = [
diff --git a/tests/test_donut_lazy_import.py b/tests/test_donut_lazy_import.py
index a0b62ab9..80c9ec09 100644
--- a/tests/test_donut_lazy_import.py
+++ b/tests/test_donut_lazy_import.py
@@ -1,5 +1,4 @@
 import sys
-from unittest.mock import patch
 
 from datafog.services.image_service import ImageService
 
@@ -46,21 +45,5 @@ def test_lazy_import_mechanism():
     # Verify that the extract_text_from_image method exists
     assert hasattr(processor, "extract_text_from_image")
 
-    # Mock importlib.import_module to prevent actual imports
-    with patch("importlib.import_module") as mock_import:
-        # Set up the mock to return a dummy module
-        mock_import.return_value = type("DummyModule", (), {})
-
-        # Mock the ensure_installed method to prevent actual installation
-        with patch.object(processor, "ensure_installed"):
-            # Try to call extract_text_from_image which should trigger imports
-            try:
-                # We don't actually need to run it asynchronously for this test
-                # Just call the method directly to see if it tries to import
-                processor.ensure_installed("torch")
-            except Exception:
-                # Ignore any exceptions
-                pass
-
-            # Verify ensure_installed was called
-            assert processor.ensure_installed.called
+    # Runtime package installation helpers should not exist on the processor.
+    assert not hasattr(processor, "ensure_installed")
diff --git a/tests/test_gliner_annotator.py b/tests/test_gliner_annotator.py
index bde66d02..fbf0bbdf 100644
--- a/tests/test_gliner_annotator.py
+++ b/tests/test_gliner_annotator.py
@@ -53,7 +53,8 @@ def test_gliner_annotator_creation_with_dependencies(self, mock_gliner_module):
         assert "person" in annotator.entity_types
         assert "email" in annotator.entity_types
         mock_gliner_class.from_pretrained.assert_called_with(
-            "urchade/gliner_multi_pii-v1"
+            "urchade/gliner_multi_pii-v1",
+            local_files_only=True,
         )
 
     def test_gliner_annotator_custom_model(self, mock_gliner_module):
@@ -69,7 +70,10 @@ def test_gliner_annotator_custom_model(self, mock_gliner_module):
 
         assert annotator.model_name == "urchade/gliner_base"
         assert annotator.entity_types == custom_entities
-        mock_gliner_class.from_pretrained.assert_called_with("urchade/gliner_base")
+        mock_gliner_class.from_pretrained.assert_called_with(
+            "urchade/gliner_base",
+            local_files_only=True,
+        )
 
     def test_gliner_annotate_text(self, mock_gliner_module):
         """Test GLiNER text annotation."""
@@ -353,23 +357,29 @@ def test_text_service_valid_engines(self):
 
             elif engine in ["spacy", "auto"]:
                 # Mock spaCy dependencies
-                with patch(
-                    "datafog.processing.text_processing.spacy_pii_annotator.SpacyPIIAnnotator"
-                ):
-                    from datafog.services.text_service import TextService
+                from datafog.services.text_service import TextService
 
-                    service = TextService(engine=engine)
-                    assert service.engine == engine
+                with patch.object(TextService, "_ensure_spacy_available"):
+                    with patch.object(
+                        TextService,
+                        "_create_spacy_annotator",
+                        return_value=Mock(),
+                    ):
+                        service = TextService(engine=engine)
+                        assert service.engine == engine
 
             elif engine in ["gliner", "smart"]:
                 # Mock GLiNER dependencies
-                with patch(
-                    "datafog.processing.text_processing.gliner_annotator.GLiNERAnnotator"
-                ):
-                    from datafog.services.text_service import TextService
+                from datafog.services.text_service import TextService
 
-                    service = TextService(engine=engine)
-                    assert service.engine == engine
+                with patch.object(TextService, "_ensure_gliner_available"):
+                    with patch.object(
+                        TextService,
+                        "_create_gliner_annotator",
+                        return_value=Mock(),
+                    ):
+                        service = TextService(engine=engine)
+                        assert service.engine == engine
 
     def test_text_service_invalid_engine(self):
         """Test that invalid engines raise AssertionError."""
@@ -445,7 +455,7 @@ def test_download_model_cli_output_fix(self):
         # Capture stdout
         captured_output = io.StringIO()
 
-        with patch("datafog.models.spacy_nlp.SpacyAnnotator.download_model"):
+        with patch("datafog.client.SpacyAnnotator.download_model"):
             with patch("sys.stdout", captured_output):
                 with patch("typer.echo") as mock_echo:
                     try:
diff --git a/tests/test_runtime_dependency_safety.py b/tests/test_runtime_dependency_safety.py
new file mode 100644
index 00000000..adc787ff
--- /dev/null
+++ b/tests/test_runtime_dependency_safety.py
@@ -0,0 +1,83 @@
+import sys
+from pathlib import Path
+
+import pytest
+
+
+def test_runtime_code_does_not_install_packages() -> None:
+    blocked_snippets = [
+        "subprocess.check_call",
+        "subprocess.run",
+        '"-m", "pip"',
+        '"pip", "install"',
+        "'pip', 'install'",
+    ]
+    offenders = []
+
+    for path in Path("datafog").rglob("*.py"):
+        source = path.read_text()
+        for snippet in blocked_snippets:
+            if snippet in source:
+                offenders.append(f"{path}: {snippet}")
+
+    assert offenders == []
+
+
+def test_spacy_pii_missing_model_requires_explicit_download(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    class FakeSpacy:
+        @staticmethod
+        def load(_model_name):
+            raise OSError("model not installed")
+
+    monkeypatch.setitem(sys.modules, "spacy", FakeSpacy())
+
+    from datafog.processing.text_processing.spacy_pii_annotator import SpacyPIIAnnotator
+
+    with pytest.raises(ImportError, match="Download it explicitly"):
+        SpacyPIIAnnotator.create()
+
+
+def test_spark_missing_dependency_requires_explicit_install(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    from datafog.services import spark_service
+
+    def missing_module(_package_name):
+        raise ImportError("missing")
+
+    monkeypatch.setattr(spark_service.importlib, "import_module", missing_module)
+
+    service = spark_service.SparkService.__new__(spark_service.SparkService)
+    with pytest.raises(ImportError, match=r"datafog\[distributed\]"):
+        service.ensure_installed("pyspark")
+
+
+def test_spark_udf_missing_dependency_requires_explicit_install(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    from datafog.processing.spark_processing import pyspark_udfs
+
+    def missing_module(_package_name):
+        raise ImportError("missing")
+
+    monkeypatch.setattr(pyspark_udfs.importlib, "import_module", missing_module)
+
+    with pytest.raises(ImportError, match=r"datafog\[nlp\]"):
+        pyspark_udfs.ensure_installed("spacy")
+
+
+@pytest.mark.asyncio
+async def test_donut_missing_dependency_requires_explicit_install(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    from datafog.processing.image_processing import donut_processor
+
+    monkeypatch.setattr(donut_processor, "IN_TEST_ENV", False)
+    monkeypatch.setitem(sys.modules, "torch", None)
+
+    processor = donut_processor.DonutProcessor()
+
+    with pytest.raises(ImportError, match=r"datafog\[nlp-advanced,ocr\]"):
+        await processor.extract_text_from_image(object())