From 82d5f654c35a84cebc26885a3ece0269cbc1bb77 Mon Sep 17 00:00:00 2001
From: Faolain <Faolain@users.noreply.github.com>
Date: Mon, 14 Apr 2025 19:48:54 -0400
Subject: [PATCH 1/4] test: improve coverage on test retrieval

---
 tests/test_ipfs_retrieval.py | 360 +++++++++++++++++++++++++++++++----
 1 file changed, 324 insertions(+), 36 deletions(-)

diff --git a/tests/test_ipfs_retrieval.py b/tests/test_ipfs_retrieval.py
index 357ee39..8812234 100644
--- a/tests/test_ipfs_retrieval.py
+++ b/tests/test_ipfs_retrieval.py
@@ -1,9 +1,11 @@
 import os
 import json
+from typing import Dict, Any  # Import Dict and Any
+from unittest.mock import patch, mock_open, MagicMock
 
 import pytest
 import requests
-from unittest.mock import patch, mock_open
+from multiformats import CID  # Import CID
 
 import dclimate_zarr_client.ipfs_retrieval as ipfs_retrieval
 from dclimate_zarr_client.ipfs_retrieval import (
@@ -13,7 +15,12 @@
     DatasetNotFoundError,
     IpfsConnectionError,
     StacCatalogError,
+    # Import internal functions for testing
+    _get_ipfs_store,
+    fetch_json_from_cid,  # noqa: E402 Testing private member
+    _get_host,  # noqa: E402 Testing private member
 )
+from py_hamt import IPFSStore  # Import IPFSStore
 
 
 # import xarray as xr
@@ -26,6 +33,218 @@
 pytestmark = pytest.mark.usefixtures("check_ipfs_connection")
 
 
+# --- Type Hinting --- # Define type alias if needed, or use directly
+MonkeyPatch = pytest.MonkeyPatch  # Common practice for pytest
+MockIPFSStore = MagicMock  # Alias for clarity
+
+
+# --- Tests for _get_ipfs_store ---
+
+
+def test_get_ipfs_store_defaults(monkeypatch: MonkeyPatch):
+    """Test store creation uses defaults when no args/env vars are set."""
+    monkeypatch.delenv("IPFS_GATEWAY_URI_STEM", raising=False)
+    monkeypatch.delenv("IPFS_RPC_URI_STEM", raising=False)
+    with patch("dclimate_zarr_client.ipfs_retrieval.IPFSStore") as mock_store_class:
+        store = _get_ipfs_store()
+        mock_store_class.assert_called_once_with()  # Called with no args, uses internal defaults
+        assert isinstance(
+            store, MagicMock
+        )  # We mocked the class, so instance is MagicMock
+
+
+def test_get_ipfs_store_args_override_env(monkeypatch: MonkeyPatch):
+    """Test that function arguments override environment variables."""
+    monkeypatch.setenv("IPFS_GATEWAY_URI_STEM", "http://env-gateway:8080")
+    monkeypatch.setenv("IPFS_RPC_URI_STEM", "http://env-rpc:5001")
+    with patch("dclimate_zarr_client.ipfs_retrieval.IPFSStore") as mock_store_class:
+        store = _get_ipfs_store(
+            gateway_uri_stem="http://arg-gateway:8080",
+            rpc_uri_stem="http://arg-rpc:5001",
+        )
+        mock_store_class.assert_called_once_with(
+            gateway_uri_stem="http://arg-gateway:8080",
+            rpc_uri_stem="http://arg-rpc:5001",
+        )
+        assert isinstance(store, MagicMock)
+
+
+def test_get_ipfs_store_env_vars(monkeypatch: MonkeyPatch):
+    """Test that environment variables are used when no args are provided."""
+    monkeypatch.setenv("IPFS_GATEWAY_URI_STEM", "http://env-gateway:8080")
+    monkeypatch.setenv("IPFS_RPC_URI_STEM", "http://env-rpc:5001")
+    with patch("dclimate_zarr_client.ipfs_retrieval.IPFSStore") as mock_store_class:
+        store = _get_ipfs_store()
+        mock_store_class.assert_called_once_with(
+            gateway_uri_stem="http://env-gateway:8080",
+            rpc_uri_stem="http://env-rpc:5001",
+        )
+        assert isinstance(store, MagicMock)
+
+
+def test_get_ipfs_store_mixed_args_env(monkeypatch: MonkeyPatch):
+    """Test using a mix of args and env vars (args should take precedence)."""
+    monkeypatch.setenv("IPFS_GATEWAY_URI_STEM", "http://env-gateway:8080")
+    monkeypatch.delenv("IPFS_RPC_URI_STEM", raising=False)  # RPC not set in env
+    with patch("dclimate_zarr_client.ipfs_retrieval.IPFSStore") as mock_store_class:
+        store = _get_ipfs_store(
+            rpc_uri_stem="http://arg-rpc:5001"
+        )  # Provide RPC as arg
+        mock_store_class.assert_called_once_with(
+            gateway_uri_stem="http://env-gateway:8080",  # Gateway comes from env
+            rpc_uri_stem="http://arg-rpc:5001",  # RPC comes from arg
+        )
+        assert isinstance(store, MagicMock)
+
+
+# --- Tests for fetch_json_from_cid ---
+
+
+@pytest.fixture
+def mock_ipfs_store() -> MockIPFSStore:
+    """Fixture to create a mock IPFSStore instance."""
+    store = MagicMock(spec=IPFSStore)
+    # Set default URIs for error messages if needed
+    store.gateway_uri_stem = "http://mock-gateway:8080"
+    store.rpc_uri_stem = "http://mock-rpc:5001"
+    return store
+
+
+def test_fetch_json_from_cid_success(mock_ipfs_store: MockIPFSStore):
+    """Test successful fetching and decoding of JSON from CID."""
+    valid_cid_str = (
+        "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi"  # Example CID
+    )
+    json_data: Dict[str, Any] = {"key": "value"}
+    mock_ipfs_store.load.return_value = json.dumps(json_data).encode("utf-8")
+
+    result: Dict[str, Any] = fetch_json_from_cid(valid_cid_str, mock_ipfs_store)
+
+    assert result == json_data
+    mock_ipfs_store.load.assert_called_once()
+    # Check that CID.decode was implicitly called by store.load mock (or explicitly if store expects CID obj)
+    # For MagicMock, we check the arg type if needed, assuming it passes the string
+    call_args = mock_ipfs_store.load.call_args[0]
+    assert isinstance(call_args[0], CID)
+    assert str(call_args[0]) == valid_cid_str
+
+
+def test_fetch_json_from_cid_success_with_prefix(mock_ipfs_store: MockIPFSStore):
+    """Test successful fetching when CID string has /ipfs/ prefix."""
+    cid_str_no_prefix = "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi"
+    cid_str_with_prefix = f"/ipfs/{cid_str_no_prefix}"
+    json_data: Dict[str, Any] = {"key": "value"}
+    mock_ipfs_store.load.return_value = json.dumps(json_data).encode("utf-8")
+
+    result: Dict[str, Any] = fetch_json_from_cid(cid_str_with_prefix, mock_ipfs_store)
+
+    assert result == json_data
+    mock_ipfs_store.load.assert_called_once()
+    call_args = mock_ipfs_store.load.call_args[0]
+    assert isinstance(call_args[0], CID)
+    assert str(call_args[0]) == cid_str_no_prefix  # Prefix should be stripped
+
+
+def test_fetch_json_from_cid_invalid_cid_string(mock_ipfs_store: MockIPFSStore):
+    """Test StacCatalogError when CID string is invalid."""
+    invalid_cid_str = "this-is-not-a-cid"
+    with pytest.raises(
+        StacCatalogError, match=f"Failed to decode CID string '{invalid_cid_str}'"
+    ):
+        fetch_json_from_cid(invalid_cid_str, mock_ipfs_store)
+    mock_ipfs_store.load.assert_not_called()  # Should fail before calling load
+
+
+def test_fetch_json_from_cid_load_returns_none(mock_ipfs_store: MockIPFSStore):
+    """Test StacCatalogError when ipfs_store.load returns None or empty bytes."""
+    valid_cid_str = "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi"
+    mock_ipfs_store.load.return_value = b""  # Empty bytes
+
+    with pytest.raises(
+        StacCatalogError, match=f"No data returned for CID: {valid_cid_str}"
+    ):
+        fetch_json_from_cid(valid_cid_str, mock_ipfs_store)
+    mock_ipfs_store.load.assert_called_once()
+
+
+def test_fetch_json_from_cid_json_decode_error(mock_ipfs_store: MockIPFSStore):
+    """Test StacCatalogError when fetched data is not valid JSON."""
+    valid_cid_str = "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi"
+    mock_ipfs_store.load.return_value = b"this is not json"
+
+    with pytest.raises(
+        StacCatalogError, match=f"Failed to decode JSON from CID {valid_cid_str}"
+    ):
+        fetch_json_from_cid(valid_cid_str, mock_ipfs_store)
+    mock_ipfs_store.load.assert_called_once()
+
+
+def test_fetch_json_from_cid_timeout_error(mock_ipfs_store: MockIPFSStore):
+    """Test IpfsConnectionError when ipfs_store.load raises Timeout."""
+    valid_cid_str = "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi"
+    mock_ipfs_store.load.side_effect = requests.exceptions.Timeout("Request timed out")
+
+    with pytest.raises(
+        IpfsConnectionError, match=f"Timeout fetching CID {valid_cid_str}"
+    ):
+        fetch_json_from_cid(valid_cid_str, mock_ipfs_store)
+    mock_ipfs_store.load.assert_called_once()
+
+
+def test_fetch_json_from_cid_connection_error(mock_ipfs_store: MockIPFSStore):
+    """Test IpfsConnectionError when ipfs_store.load raises connection-related error."""
+    valid_cid_str = "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi"
+    # Simulate different connection error messages
+    errors_to_test = [
+        requests.exceptions.ConnectionError("Connection refused"),
+        requests.exceptions.RequestException("Max retries exceeded"),
+        requests.exceptions.RequestException("Failed to establish a new connection"),
+    ]
+    for error in errors_to_test:
+        mock_ipfs_store.load.side_effect = error
+        with pytest.raises(
+            IpfsConnectionError,
+            match=f"Failed to connect via IPFSStore.*to fetch CID {valid_cid_str}",
+        ):
+            fetch_json_from_cid(valid_cid_str, mock_ipfs_store)
+        mock_ipfs_store.load.assert_called_once()
+        mock_ipfs_store.load.reset_mock()  # Reset mock for next error in loop
+
+
+def test_fetch_json_from_cid_generic_load_error(mock_ipfs_store: MockIPFSStore):
+    """Test StacCatalogError for other exceptions during ipfs_store.load."""
+    valid_cid_str = "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi"
+    mock_ipfs_store.load.side_effect = RuntimeError("Some other IPFSStore error")
+
+    with pytest.raises(
+        StacCatalogError,
+        match=f"Error fetching data for CID {valid_cid_str} via IPFSStore",
+    ):
+        fetch_json_from_cid(valid_cid_str, mock_ipfs_store)
+    mock_ipfs_store.load.assert_called_once()
+
+
+# --- Tests for _get_host ---
+
+
+def test_get_host_default():
+    """Test _get_host uses the default localhost when env var is not set."""
+    # Import DEFAULT_HOST to use in assertion
+    from dclimate_zarr_client.ipfs_retrieval import DEFAULT_HOST
+
+    with patch.dict(os.environ, {}, clear=True):  # Ensure env var is not set
+        assert _get_host() == DEFAULT_HOST  # Check default URI
+        # If IPFS_HOST is not set, _get_host IGNORES the uri argument and returns DEFAULT_HOST
+        assert _get_host("/custom/uri") == DEFAULT_HOST
+
+
+def test_get_host_from_env(monkeypatch: MonkeyPatch):
+    """Test _get_host uses IPFS_HOST environment variable."""
+    monkeypatch.setenv("IPFS_HOST", "http://my-ipfs-node:5002")
+    assert _get_host() == "http://my-ipfs-node:5002/api/v0"
+    assert _get_host("/other/uri") == "http://my-ipfs-node:5002/other/uri"
+
+
 # --- Tests for get_ipns_name_hash (Legacy/Utility Function) ---
 # These tests remain as UNIT tests, mocking requests and file system,
 # as they test the specific fallback logic of this function, not STAC traversal.
@@ -245,21 +464,6 @@ def test_list_datasets_functional():
 #                 assert "Failed to retrieve dataset list" in str(exc.value)
 
 
-# def test_list_datasets_local_cache_empty():
-#     """
-#     Test that if the endpoint fails AND local cache file is empty,
-#     list_datasets() raises RuntimeError (no data to parse).
-#     """
-#     with patch("requests.get") as mock_requests_get:
-#         mock_requests_get.side_effect = requests.RequestException("Simulated error")
-
-#         with patch("os.path.exists", return_value=True):
-#             with patch("builtins.open", mock_open(read_data="")):
-#                 with pytest.raises(RuntimeError) as exc:
-#                     list_datasets()
-#                 assert "Failed to retrieve dataset list" in str(exc.value)
-
-
 # def test_geo_temporal_query():
 #     ds_bytes = geo_temporal_query(
 #         "cpc-precip-conus",
@@ -312,14 +516,28 @@ def get_cache_path():
     return os.path.join(package_dir, "cids.json")
 
 
+# --- Helper Functions for Mocking --- #
+
+
+def mock_exists_true(*args, **kwargs) -> bool:
+    return True
+
+
+def mock_exists_false(*args, **kwargs) -> bool:
+    return False
+
+
 # Use monkeypatch fixture for modifying builtins like open
-def test_update_cache_no_update(monkeypatch):
+def test_update_cache_no_update(monkeypatch: MonkeyPatch):
     cached_data = {"dataset": "hash1"}
     new_data = {"dataset": "hash1"}
     file_path = get_cache_path()
 
+    # Mock os.path.exists needed if called before open
+    monkeypatch.setattr("os.path.exists", mock_exists_true)
+
     m = mock_open(read_data=json.dumps(cached_data))
-    monkeypatch.setattr("builtins.open", m)
+    monkeypatch.setattr("builtins.open", m)  # Correct: Patch built-in open
 
     update_cache_if_changed(new_data)
 
@@ -327,13 +545,17 @@ def test_update_cache_no_update(monkeypatch):
     m.assert_called_once_with(file_path, "r")
 
 
-def test_update_cache_update(monkeypatch):
+def test_update_cache_update(monkeypatch: MonkeyPatch):
     cached_data = {"dataset": "hash1"}
     new_data = {"dataset": "hash2"}
     file_path = get_cache_path()
 
+    # Mock os.path.exists needed if called before open
+    monkeypatch.setattr("os.path.exists", mock_exists_true)
+
     m = mock_open(read_data=json.dumps(cached_data))
-    monkeypatch.setattr("builtins.open", m)
+    # Patch open in the correct namespace where it's called
+    monkeypatch.setattr("builtins.open", m)  # Correct: Patch built-in open
 
     update_cache_if_changed(new_data)
 
@@ -342,21 +564,28 @@ def test_update_cache_update(monkeypatch):
     calls = m.call_args_list
     assert calls[0].args == (file_path, "r")
     assert calls[1].args == (file_path, "w")
-    # Remove the assertion checking the specific write content
-    # handle = m() # Remove this
-    # handle.write.assert_called_once_with(json.dumps(new_data)) # Remove this
+    # Check the content written (optional, but good practice)
+    # Ensure the mock handle captures the write
+    # Note: mock_open's write checking can be tricky.
+    # A simpler check is often sufficient unless exact content is critical.
+    # handle = m()
+    # handle.write.assert_called_once_with(json.dumps(new_data))
 
 
-def test_update_cache_file_not_found(monkeypatch):
+def test_update_cache_file_not_found(monkeypatch: MonkeyPatch):
     new_data = {"dataset": "hash2"}
     file_path = get_cache_path()
 
+    # No need to mock os.path.exists as the function doesn't use it
+
     # Mock 'open' to raise FileNotFoundError on first call (read), succeed on second (write)
-    # Create a mock handle instance for the successful write call return value
     mock_write_handle = mock_open().return_value
     m = mock_open()
-    m.side_effect = [FileNotFoundError, mock_write_handle]  # Read fails, Write succeeds
-    monkeypatch.setattr("builtins.open", m)
+    # First call (open "r") raises FileNotFoundError
+    # Second call (open "w") returns the mock handle
+    m.side_effect = [FileNotFoundError, mock_write_handle]
+    # Patch open in the correct namespace
+    monkeypatch.setattr("builtins.open", m)  # Correct: Patch built-in open
 
     update_cache_if_changed(new_data)
 
@@ -365,16 +594,19 @@ def test_update_cache_file_not_found(monkeypatch):
     calls = m.call_args_list
     assert calls[0].args == (file_path, "r")
     assert calls[1].args == (file_path, "w")
-    # Remove the lines trying to get handle and assert write
-    # handle = m() # Remove this
-    # handle.write.assert_called_once_with(json.dumps(new_data)) # Remove this
+    # Optionally check write content if needed:
+    # handle = calls[1]._extract_mock_return_value() # Get the handle returned by the 2nd call
+    # handle.write.assert_called_once_with(json.dumps(new_data))
 
 
-def test_update_cache_decode_error(monkeypatch):
+def test_update_cache_decode_error(monkeypatch: MonkeyPatch):
     """Test when the existing cache file has invalid JSON."""
     new_data = {"dataset": "hash2"}
     file_path = get_cache_path()
 
+    # Mock os.path.exists needed if called before open
+    monkeypatch.setattr("os.path.exists", mock_exists_true)
+
     # Create separate mock handles for read and write attempts
     # The read handle will simulate having invalid data
     read_handle = mock_open(read_data="invalid json").return_value
@@ -383,16 +615,18 @@ def test_update_cache_decode_error(monkeypatch):
     m = mock_open()
     # Define side effect: return read_handle on first call, write_handle on second
     m.side_effect = [read_handle, write_handle]
+    # Patch open in the correct namespace
+    monkeypatch.setattr("builtins.open", m)  # Correct: Patch built-in open
 
     # Mock json.load to raise error when the read_handle is passed to it
     # Patch it in the correct namespace where it's used
     mock_json_load = patch(
-        "dclimate_zarr_client.ipfs_retrieval.json.load",
+        "json.load",  # Patching built-in json directly
         side_effect=json.JSONDecodeError("err", "doc", 0),
-    ).start()
-
-    monkeypatch.setattr("builtins.open", m)
+    ).start()  # No need for .start()/.stop() if using 'with patch(...)'
 
+    # Use 'with patch' for cleaner setup/teardown
+    # with patch("json.load", side_effect=json.JSONDecodeError("err", "doc", 0)):
     update_cache_if_changed(new_data)
 
     # Assert open was called twice: read attempt (failed decode), write attempt
@@ -401,5 +635,59 @@ def test_update_cache_decode_error(monkeypatch):
     assert calls[0].args == (file_path, "r")  # Read attempt
     assert calls[1].args == (file_path, "w")  # Write attempt
 
-    # Clean up the patch for json.load
+    # Clean up the patch for json.load if using start/stop
     mock_json_load.stop()
+
+
+# --- Test Legacy get_ipns_name_hash Errors ---
+# (Ensure these cover JSONDecodeError during fallback read)
+
+
+def test_get_ipns_name_hash_local_cache_malformed_json_during_fallback():
+    """Test DatasetNotFoundError when local cache read fails during fallback"""
+    with patch(
+        "requests.get", side_effect=requests.RequestException("Simulated error")
+    ):
+        with patch("os.path.exists", return_value=True):
+            # Mock update_cache_if_changed to avoid file writes during test setup if called early
+            with patch("dclimate_zarr_client.ipfs_retrieval.update_cache_if_changed"):
+                # Mock open specifically within the fallback block
+                with patch(
+                    "dclimate_zarr_client.ipfs_retrieval.open",
+                    mock_open(read_data="INVALID JSON!!"),
+                    create=True,  # Allow create=True if needed by mock_open internals
+                ) as mock_file_open:
+                    # Mock json.load raising the error when called by get_ipns_name_hash
+                    with patch(
+                        "json.load", side_effect=json.JSONDecodeError("err", "doc", 0)
+                    ):
+                        with pytest.raises(
+                            DatasetNotFoundError, match="Invalid dataset name"
+                        ):
+                            get_ipns_name_hash("cpc-precip-conus")
+                    # Assert the mocked open was called for reading the cache
+                    mock_file_open.assert_called_once_with(get_cache_path(), "r")
+
+
+def test_get_ipns_name_hash_local_cache_empty_during_fallback():
+    """Test DatasetNotFoundError when local cache is empty during fallback"""
+    with patch(
+        "requests.get", side_effect=requests.RequestException("Simulated error")
+    ):
+        with patch("os.path.exists", return_value=True):
+            with patch("dclimate_zarr_client.ipfs_retrieval.update_cache_if_changed"):
+                with patch(
+                    "dclimate_zarr_client.ipfs_retrieval.open",
+                    mock_open(read_data=""),  # Empty file
+                    create=True,
+                ) as mock_file_open:
+                    # json.load will raise JSONDecodeError on empty string
+                    with patch(
+                        "json.load",
+                        side_effect=json.JSONDecodeError("Expecting value", "", 0),
+                    ):
+                        with pytest.raises(
+                            DatasetNotFoundError, match="Invalid dataset name"
+                        ):
+                            get_ipns_name_hash("cpc-precip-conus")
+                    mock_file_open.assert_called_once_with(get_cache_path(), "r")

From e9583ec92aa83611ed60bdb8a00553fd5ba45a53 Mon Sep 17 00:00:00 2001
From: Faolain <Faolain@users.noreply.github.com>
Date: Tue, 15 Apr 2025 01:28:57 -0400
Subject: [PATCH 2/4] tests: more tests using mocks for fetching json from ipns

---
 dclimate_zarr_client/ipfs_retrieval.py |  29 +--
 tests/test_ipfs_retrieval.py           | 263 +++++++++++++++++++++----
 2 files changed, 242 insertions(+), 50 deletions(-)

diff --git a/dclimate_zarr_client/ipfs_retrieval.py b/dclimate_zarr_client/ipfs_retrieval.py
index 7f186e5..ff116a7 100644
--- a/dclimate_zarr_client/ipfs_retrieval.py
+++ b/dclimate_zarr_client/ipfs_retrieval.py
@@ -185,7 +185,6 @@ def fetch_json_from_ipns(
         last_error = StacCatalogError(
             f"Unexpected error (nocache=true): {type(e).__name__}: {e}"
         )
-
     # --- Attempt 2: GET without nocache (if Attempt 1 failed) ---
     logger.info(
         f"Retrying fetch JSON via Gateway GET without nocache for: {ipns_name_for_url}"
@@ -204,20 +203,34 @@ def fetch_json_from_ipns(
             allow_redirects=True,
         )  # Retry
         response.raise_for_status()
-        json_content = response.json()
+        json_content = response.json()  # This call may raise JSONDecodeError
         logger.info(
             f"Successfully fetched JSON from IPNS '{ipns_name}' (nocache=false)"
         )
         return json_content
 
+    except json.JSONDecodeError as e:
+        # Handle JSON decode errors explicitly on the retry attempt.
+        response_text = response.text[:500] if response else "[No Response]"
+        status_code = response.status_code if response else "[No Status]"
+        err_msg = (
+            f"Invalid JSON fetching IPNS '{ipns_name}' (retry) via Gateway {gateway_base}: {e}. "
+            f"Response text: {response_text[:100]}"
+        )
+        if last_error:
+            err_msg += f" | Initial error (nocache=true): {type(last_error).__name__}: {last_error}"
+        raise StacCatalogError(err_msg) from e
+
     except requests.exceptions.ConnectionError as e:
         raise IpfsConnectionError(
             f"Connection error during IPNS fetch retry for '{ipns_name}' via Gateway {gateway_base}. Details: {e}"
         ) from e
+
     except requests.exceptions.Timeout as e:
         raise IpfsConnectionError(
             f"Timeout ({timeout}s) during IPNS fetch retry for '{ipns_name}' via Gateway {gateway_base}."
         ) from e
+
     except requests.exceptions.RequestException as e:  # Includes HTTP errors on retry
         err_msg = (
             f"Error fetching IPNS '{ipns_name}' (retry) via Gateway {gateway_base}: {e}"
@@ -231,18 +244,12 @@ def fetch_json_from_ipns(
             except Exception:
                 response_text = "[Could not read response text]"
             err_msg += f" Status Code: {status_code}, Response: {response_text}"
-        if last_error:
-            err_msg += f" | Initial error (nocache=true): {type(last_error).__name__}: {last_error}"
-        raise StacCatalogError(
-            err_msg
-        ) from e  # Raise as StacCatalogError as it prevents catalog reading
-    except json.JSONDecodeError as e:
-        err_msg = f"Invalid JSON fetching IPNS '{ipns_name}' (retry) via Gateway {gateway_base}: {e}. Response text: {response.text[:500] if response else '[No Response]'}"
         if last_error:
             err_msg += f" | Initial error (nocache=true): {type(last_error).__name__}: {last_error}"
         raise StacCatalogError(err_msg) from e
-    except Exception as e:  # Catch any other unexpected error during retry
-        err_msg = f"Unexpected error during IPNS fetch retry for '{ipns_name}' via Gateway: {e}"
+
+    except Exception as e:  # Catch any other unexpected exceptions
+        err_msg = f"Unexpected error during IPNS fetch retry for '{ipns_name}' via Gateway {gateway_base}: {e}"
         if last_error:
             err_msg += f" | Initial error (nocache=true): {type(last_error).__name__}: {last_error}"
         raise StacCatalogError(err_msg) from e
diff --git a/tests/test_ipfs_retrieval.py b/tests/test_ipfs_retrieval.py
index 8812234..ab1f339 100644
--- a/tests/test_ipfs_retrieval.py
+++ b/tests/test_ipfs_retrieval.py
@@ -9,7 +9,7 @@
 
 import dclimate_zarr_client.ipfs_retrieval as ipfs_retrieval
 from dclimate_zarr_client.ipfs_retrieval import (
-    # Keep imports for functions still tested here
+    fetch_json_from_ipns,
     get_ipns_name_hash,
     update_cache_if_changed,
     DatasetNotFoundError,
@@ -22,20 +22,42 @@
 )
 from py_hamt import IPFSStore  # Import IPFSStore
 
-
-# import xarray as xr
-
 # Import constants/configs
 from dclimate_zarr_client.client import DCLIMATE_STAC_CATALOG_IPNS
 from .conftest import KNOWN_STAC_DATASET_ID, KNOWN_STAC_DATASET_ID_2
-# Apply IPFS check fixture to relevant tests/module
 
-pytestmark = pytest.mark.usefixtures("check_ipfs_connection")
+# Apply IPFS check fixture ONLY to tests that actually need functional IPFS
+# Most tests in this file should be mocked unit tests.
+# pytestmark = pytest.mark.usefixtures("check_ipfs_connection") # Remove module-level mark
 
+# --- Type Hinting ---
+MonkeyPatch = pytest.MonkeyPatch
+MockIPFSStore = MagicMock
+MockResponse = MagicMock  # Alias for requests.Response mock
 
-# --- Type Hinting --- # Define type alias if needed, or use directly
-MonkeyPatch = pytest.MonkeyPatch  # Common practice for pytest
-MockIPFSStore = MagicMock  # Alias for clarity
+
+# --- Fixtures ---
+@pytest.fixture
+def mock_ipfs_store() -> MockIPFSStore:
+    """Fixture to create a mock IPFSStore instance."""
+    store = MagicMock(spec=IPFSStore)
+    store.gateway_uri_stem = "http://mock-gateway:8080"
+    store.rpc_uri_stem = "http://mock-rpc:5001"
+    return store
+
+
+@pytest.fixture
+def mock_requests_get(mocker) -> MagicMock:
+    """Fixture to mock requests.get."""
+    return mocker.patch("requests.get")
+
+
+# --- Helper to get cache path ---test_get_ipfs_store_defaults
+def get_cache_path():
+    # Helper to get the expected cache file path within the package
+    # Need to ensure this reflects the actual location used by the code
+    package_dir = os.path.dirname(ipfs_retrieval.__file__)
+    return os.path.join(package_dir, "cids.json")
 
 
 # --- Tests for _get_ipfs_store ---
@@ -97,29 +119,15 @@ def test_get_ipfs_store_mixed_args_env(monkeypatch: MonkeyPatch):
         assert isinstance(store, MagicMock)
 
 
-# --- Tests for fetch_json_from_cid ---
-
-
-@pytest.fixture
-def mock_ipfs_store() -> MockIPFSStore:
-    """Fixture to create a mock IPFSStore instance."""
-    store = MagicMock(spec=IPFSStore)
-    # Set default URIs for error messages if needed
-    store.gateway_uri_stem = "http://mock-gateway:8080"
-    store.rpc_uri_stem = "http://mock-rpc:5001"
-    return store
+# --- Tests for fetch_json_from_cid (Unit/Mocked) ---
 
 
 def test_fetch_json_from_cid_success(mock_ipfs_store: MockIPFSStore):
     """Test successful fetching and decoding of JSON from CID."""
-    valid_cid_str = (
-        "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi"  # Example CID
-    )
+    valid_cid_str = "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi"
     json_data: Dict[str, Any] = {"key": "value"}
     mock_ipfs_store.load.return_value = json.dumps(json_data).encode("utf-8")
-
     result: Dict[str, Any] = fetch_json_from_cid(valid_cid_str, mock_ipfs_store)
-
     assert result == json_data
     mock_ipfs_store.load.assert_called_once()
     # Check that CID.decode was implicitly called by store.load mock (or explicitly if store expects CID obj)
@@ -135,9 +143,7 @@ def test_fetch_json_from_cid_success_with_prefix(mock_ipfs_store: MockIPFSStore)
     cid_str_with_prefix = f"/ipfs/{cid_str_no_prefix}"
     json_data: Dict[str, Any] = {"key": "value"}
     mock_ipfs_store.load.return_value = json.dumps(json_data).encode("utf-8")
-
     result: Dict[str, Any] = fetch_json_from_cid(cid_str_with_prefix, mock_ipfs_store)
-
     assert result == json_data
     mock_ipfs_store.load.assert_called_once()
     call_args = mock_ipfs_store.load.call_args[0]
@@ -159,7 +165,6 @@ def test_fetch_json_from_cid_load_returns_none(mock_ipfs_store: MockIPFSStore):
     """Test StacCatalogError when ipfs_store.load returns None or empty bytes."""
     valid_cid_str = "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi"
     mock_ipfs_store.load.return_value = b""  # Empty bytes
-
     with pytest.raises(
         StacCatalogError, match=f"No data returned for CID: {valid_cid_str}"
     ):
@@ -171,7 +176,6 @@ def test_fetch_json_from_cid_json_decode_error(mock_ipfs_store: MockIPFSStore):
     """Test StacCatalogError when fetched data is not valid JSON."""
     valid_cid_str = "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi"
     mock_ipfs_store.load.return_value = b"this is not json"
-
     with pytest.raises(
         StacCatalogError, match=f"Failed to decode JSON from CID {valid_cid_str}"
     ):
@@ -183,7 +187,6 @@ def test_fetch_json_from_cid_timeout_error(mock_ipfs_store: MockIPFSStore):
     """Test IpfsConnectionError when ipfs_store.load raises Timeout."""
     valid_cid_str = "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi"
     mock_ipfs_store.load.side_effect = requests.exceptions.Timeout("Request timed out")
-
     with pytest.raises(
         IpfsConnectionError, match=f"Timeout fetching CID {valid_cid_str}"
     ):
@@ -225,8 +228,6 @@ def test_fetch_json_from_cid_generic_load_error(mock_ipfs_store: MockIPFSStore):
 
 
 # --- Tests for _get_host ---
-
-
 def test_get_host_default():
     """Test _get_host uses the default localhost when env var is not set."""
     # Import DEFAULT_HOST to use in assertion
@@ -245,6 +246,197 @@ def test_get_host_from_env(monkeypatch: MonkeyPatch):
     assert _get_host("/other/uri") == "http://my-ipfs-node:5002/other/uri"
 
 
+# --- NEW: Tests for fetch_json_from_ipns (Mocked Error Paths) ---
+
+
+class TestFetchJsonFromIpnsErrors:
+    MOCK_IPNS = "k51qzi5uqu5dk89atnl883sr0g1cb2py631ckz9ng45qhk6dg0pj141jtxtx6l"
+    MOCK_GATEWAY = "http://mock-gateway:8080"
+    EXPECTED_URL = f"{MOCK_GATEWAY}/ipns/{MOCK_IPNS}"
+
+    @pytest.fixture(autouse=True)
+    def setup_mocks(self, monkeypatch, mock_requests_get):
+        # Mock _get_ipfs_store to return a predictable gateway
+        mock_store = MagicMock(spec=IPFSStore)
+        mock_store.gateway_uri_stem = self.MOCK_GATEWAY
+        monkeypatch.setattr(
+            ipfs_retrieval, "_get_ipfs_store", lambda *args, **kwargs: mock_store
+        )
+        self.mock_requests_get = mock_requests_get
+
+    def mock_response(
+        self, status_code=200, json_data=None, text=None, raise_for_status_error=None
+    ) -> MockResponse:
+        mock_resp = MagicMock(spec=requests.Response)
+        mock_resp.status_code = status_code
+        mock_resp.raise_for_status.side_effect = raise_for_status_error
+        if json_data is not None:
+            mock_resp.json.return_value = json_data
+            # If json_data is provided, requests usually sets text as well
+            mock_resp.text = json.dumps(json_data) if text is None else text
+        else:
+            mock_resp.json.side_effect = requests.exceptions.JSONDecodeError(
+                "Expecting value", "doc", 0
+            )
+            mock_resp.text = text if text is not None else "Invalid JSON"
+        return mock_resp
+
+    def test_fetch_json_from_ipns_empty_name(self):
+        with pytest.raises(ValueError, match="IPNS name cannot be empty"):
+            fetch_json_from_ipns("")
+
+    def test_fetch_json_from_ipns_initial_timeout_then_success(self):
+        """Simulate timeout on first try, success on retry."""
+        mock_resp_success = self.mock_response(
+            status_code=200, json_data={"type": "Catalog"}
+        )
+        self.mock_requests_get.side_effect = [
+            requests.exceptions.Timeout("Initial timeout"),
+            mock_resp_success,
+        ]
+
+        result = fetch_json_from_ipns(self.MOCK_IPNS)
+
+        assert result == {"type": "Catalog"}
+        assert self.mock_requests_get.call_count == 2
+        # Check params: first call with nocache=true, second without
+        assert self.mock_requests_get.call_args_list[0].kwargs["params"] == {
+            "nocache": "true"
+        }
+        assert self.mock_requests_get.call_args_list[1].kwargs["params"] == {}
+
+    def test_fetch_json_from_ipns_initial_connection_error(self):
+        """Simulate ConnectionError on first try (should raise immediately)."""
+        self.mock_requests_get.side_effect = requests.exceptions.ConnectionError(
+            "Gateway down"
+        )
+
+        with pytest.raises(IpfsConnectionError, match="Connection error fetching IPNS"):
+            fetch_json_from_ipns(self.MOCK_IPNS)
+        assert self.mock_requests_get.call_count == 1
+
+    def test_fetch_json_from_ipns_initial_json_decode_then_success(self):
+        """Simulate JSON decode error on first try, success on retry."""
+        mock_resp_bad_json = self.mock_response(
+            status_code=200, text="<html>error page</html>"
+        )
+        mock_resp_success = self.mock_response(
+            status_code=200, json_data={"type": "Catalog"}
+        )
+        self.mock_requests_get.side_effect = [mock_resp_bad_json, mock_resp_success]
+
+        result = fetch_json_from_ipns(self.MOCK_IPNS)
+
+        assert result == {"type": "Catalog"}
+        assert self.mock_requests_get.call_count == 2
+        assert self.mock_requests_get.call_args_list[0].kwargs["params"] == {
+            "nocache": "true"
+        }
+        assert self.mock_requests_get.call_args_list[1].kwargs["params"] == {}
+
+    def test_fetch_json_from_ipns_initial_500_error_then_success(self):
+        """Simulate 500 HTTP error on first try, success on retry."""
+        mock_resp_500 = self.mock_response(
+            status_code=500,
+            text="Server Error",
+            raise_for_status_error=requests.exceptions.HTTPError("500 Error"),
+        )
+        mock_resp_success = self.mock_response(
+            status_code=200, json_data={"type": "Catalog"}
+        )
+        self.mock_requests_get.side_effect = [mock_resp_500, mock_resp_success]
+
+        result = fetch_json_from_ipns(self.MOCK_IPNS)
+
+        assert result == {"type": "Catalog"}
+        assert self.mock_requests_get.call_count == 2
+
+    def test_fetch_json_from_ipns_retry_timeout(self):
+        """Simulate failure on first try (e.g., timeout) AND timeout on retry."""
+        self.mock_requests_get.side_effect = [
+            requests.exceptions.Timeout("Initial timeout"),
+            requests.exceptions.Timeout("Retry timeout"),
+        ]
+
+        with pytest.raises(
+            IpfsConnectionError, match="Timeout .* during IPNS fetch retry"
+        ):
+            fetch_json_from_ipns(self.MOCK_IPNS)
+        assert self.mock_requests_get.call_count == 2
+
+    def test_fetch_json_from_ipns_retry_connection_error(self):
+        """Simulate failure on first try AND ConnectionError on retry."""
+        self.mock_requests_get.side_effect = [
+            requests.exceptions.Timeout("Initial timeout"),
+            requests.exceptions.ConnectionError("Retry connection failed"),
+        ]
+
+        with pytest.raises(
+            IpfsConnectionError, match="Connection error during IPNS fetch retry"
+        ):
+            fetch_json_from_ipns(self.MOCK_IPNS)
+        assert self.mock_requests_get.call_count == 2
+
+    def test_fetch_json_from_ipns_retry_json_decode_error(self):
+        """Simulate failure on first try AND JSON decode error on retry."""
+        mock_resp_bad_json_retry = self.mock_response(
+            status_code=200, text="Retry also bad json"
+        )
+        self.mock_requests_get.side_effect = [
+            requests.exceptions.Timeout("Initial timeout"),
+            mock_resp_bad_json_retry,
+        ]
+
+        with pytest.raises(
+            StacCatalogError, match="Invalid JSON fetching IPNS .* \\(retry\\)"
+        ):
+            fetch_json_from_ipns(self.MOCK_IPNS)
+        assert self.mock_requests_get.call_count == 2
+
+    def test_fetch_json_from_ipns_retry_http_error(self):
+        """Simulate failure on first try AND HTTP error on retry."""
+        mock_resp_503_retry = self.mock_response(
+            status_code=503,
+            text="Service Unavailable",
+            raise_for_status_error=requests.exceptions.HTTPError("503 Error"),
+        )
+        self.mock_requests_get.side_effect = [
+            requests.exceptions.Timeout("Initial timeout"),
+            mock_resp_503_retry,
+        ]
+
+        with pytest.raises(
+            StacCatalogError, match="Error fetching IPNS .* \\(retry\\) via Gateway"
+        ):
+            fetch_json_from_ipns(self.MOCK_IPNS)
+        assert self.mock_requests_get.call_count == 2
+
+    def test_fetch_json_from_ipns_initial_other_exception_then_success(self):
+        """Simulate generic exception on first try, success on retry."""
+        mock_resp_success = self.mock_response(
+            status_code=200, json_data={"type": "Catalog"}
+        )
+        self.mock_requests_get.side_effect = [
+            RuntimeError("Unexpected issue"),
+            mock_resp_success,
+        ]
+        result = fetch_json_from_ipns(self.MOCK_IPNS)
+        assert result == {"type": "Catalog"}
+        assert self.mock_requests_get.call_count == 2
+
+    def test_fetch_json_from_ipns_retry_other_exception(self):
+        """Simulate failure on first try AND generic exception on retry."""
+        self.mock_requests_get.side_effect = [
+            requests.exceptions.Timeout("Initial timeout"),
+            RuntimeError("Unexpected issue on retry"),
+        ]
+        with pytest.raises(
+            StacCatalogError, match="Unexpected error during IPNS fetch retry"
+        ):
+            fetch_json_from_ipns(self.MOCK_IPNS)
+        assert self.mock_requests_get.call_count == 2
+
+
 # --- Tests for get_ipns_name_hash (Legacy/Utility Function) ---
 # These tests remain as UNIT tests, mocking requests and file system,
 # as they test the specific fallback logic of this function, not STAC traversal.
@@ -509,13 +701,6 @@ def test_list_datasets_functional():
 # These remain unit tests using mocks for file I/O.
 
 
-def get_cache_path():
-    # Helper to get the expected cache file path within the package
-    # Need to ensure this reflects the actual location used by the code
-    package_dir = os.path.dirname(ipfs_retrieval.__file__)
-    return os.path.join(package_dir, "cids.json")
-
-
 # --- Helper Functions for Mocking --- #
 
 

From 55df8227da0afd23f91c48c1082c11833c5639a4 Mon Sep 17 00:00:00 2001
From: Faolain <Faolain@users.noreply.github.com>
Date: Tue, 15 Apr 2025 01:54:53 -0400
Subject: [PATCH 3/4] fix: missing legacy support and update tests

---
 dclimate_zarr_client/ipfs_retrieval.py |   2 +-
 tests/test_ipfs_retrieval.py           | 242 ++++++++++++++++++++++++-
 2 files changed, 242 insertions(+), 2 deletions(-)

diff --git a/dclimate_zarr_client/ipfs_retrieval.py b/dclimate_zarr_client/ipfs_retrieval.py
index ff116a7..ce21ab2 100644
--- a/dclimate_zarr_client/ipfs_retrieval.py
+++ b/dclimate_zarr_client/ipfs_retrieval.py
@@ -314,7 +314,7 @@ def get_dataset_hamt_cid_from_stac(
                 logger.warning(
                     f"Skipping child link with unexpected string href format (expected dict): {link}"
                 )
-                # If needed: collections_to_visit.append(href_obj[6:])
+                collections_to_visit.append(href_obj[6:])
             else:
                 logger.warning(
                     f"Skipping invalid child link format in root catalog: {link}"
diff --git a/tests/test_ipfs_retrieval.py b/tests/test_ipfs_retrieval.py
index ab1f339..8cde0f5 100644
--- a/tests/test_ipfs_retrieval.py
+++ b/tests/test_ipfs_retrieval.py
@@ -1,7 +1,7 @@
 import os
 import json
 from typing import Dict, Any  # Import Dict and Any
-from unittest.mock import patch, mock_open, MagicMock
+from unittest.mock import patch, mock_open, MagicMock, ANY
 
 import pytest
 import requests
@@ -10,6 +10,7 @@
 import dclimate_zarr_client.ipfs_retrieval as ipfs_retrieval
 from dclimate_zarr_client.ipfs_retrieval import (
     fetch_json_from_ipns,
+    get_dataset_hamt_cid_from_stac,
     get_ipns_name_hash,
     update_cache_if_changed,
     DatasetNotFoundError,
@@ -437,6 +438,245 @@ def test_fetch_json_from_ipns_retry_other_exception(self):
         assert self.mock_requests_get.call_count == 2
 
 
+# --- NEW: Tests for get_dataset_hamt_cid_from_stac (Mocked Error Paths) ---
+
+
+@patch("dclimate_zarr_client.ipfs_retrieval.fetch_json_from_ipns")
+@patch("dclimate_zarr_client.ipfs_retrieval.fetch_json_from_cid")
+def test_get_hamt_cid_root_catalog_fetch_error(mock_fetch_cid, mock_fetch_ipns):
+    """Test error when fetching the root catalog fails."""
+    mock_fetch_ipns.side_effect = IpfsConnectionError("Cannot connect to gateway")
+    with pytest.raises(StacCatalogError, match="Failed to fetch root catalog"):
+        get_dataset_hamt_cid_from_stac(
+            DCLIMATE_STAC_CATALOG_IPNS, KNOWN_STAC_DATASET_ID
+        )
+    mock_fetch_ipns.assert_called_once()
+    mock_fetch_cid.assert_not_called()
+
+
+@patch("dclimate_zarr_client.ipfs_retrieval.fetch_json_from_ipns")
+@patch("dclimate_zarr_client.ipfs_retrieval.fetch_json_from_cid")
+def test_get_hamt_cid_invalid_root_catalog_format(mock_fetch_cid, mock_fetch_ipns):
+    """Test error when root catalog JSON is not a valid STAC Catalog."""
+    mock_fetch_ipns.return_value = {"not": "a catalog"}
+    with pytest.raises(StacCatalogError, match="Invalid root catalog format"):
+        get_dataset_hamt_cid_from_stac(
+            DCLIMATE_STAC_CATALOG_IPNS, KNOWN_STAC_DATASET_ID
+        )
+    mock_fetch_ipns.assert_called_once()
+    mock_fetch_cid.assert_not_called()
+
+
+@patch("dclimate_zarr_client.ipfs_retrieval.fetch_json_from_ipns")
+@patch("dclimate_zarr_client.ipfs_retrieval.fetch_json_from_cid")
+def test_get_hamt_cid_no_child_links(mock_fetch_cid, mock_fetch_ipns):
+    """Test error when root catalog has no valid child links."""
+    mock_fetch_ipns.return_value = {
+        "type": "Catalog",
+        "id": "root",
+        "links": [{"rel": "self", "href": "."}],  # No child links
+    }
+    with pytest.raises(StacCatalogError, match="No valid child collection links found"):
+        get_dataset_hamt_cid_from_stac(
+            DCLIMATE_STAC_CATALOG_IPNS, KNOWN_STAC_DATASET_ID
+        )
+    mock_fetch_ipns.assert_called_once()
+    mock_fetch_cid.assert_not_called()
+
+
+@patch("dclimate_zarr_client.ipfs_retrieval.fetch_json_from_ipns")
+@patch("dclimate_zarr_client.ipfs_retrieval.fetch_json_from_cid")
+def test_get_hamt_cid_invalid_child_link_formats(mock_fetch_cid, mock_fetch_ipns):
+    """Test skipping various invalid child link formats."""
+    mock_fetch_ipns.return_value = {
+        "type": "Catalog",
+        "id": "root",
+        "links": [
+            {
+                "rel": "child",
+                "type": "application/json",
+                "href": {"/": 123},
+            },  # Invalid CID value
+            {
+                "rel": "child",
+                "type": "application/json",
+                "href": "/ipfs/legacy_cid_string",
+            },  # Legacy string (warning expected)
+            {
+                "rel": "child",
+                "type": "application/json",
+                "href": "not_a_dict_or_ipfs_string",
+            },  # Invalid format
+            {"rel": "child", "type": "application/json"},  # Missing href
+            {
+                "rel": "child",
+                "type": "application/xml",
+                "href": {"/": "cid1"},
+            },  # Wrong type
+            {
+                "rel": "item",
+                "type": "application/json",
+                "href": {"/": "cid2"},
+            },  # Wrong rel
+        ],
+    }
+    # Expect DatasetNotFoundError because no *valid* child links lead anywhere
+    with pytest.raises(
+        DatasetNotFoundError, match=f"Dataset ID '{KNOWN_STAC_DATASET_ID}' not found"
+    ):
+        get_dataset_hamt_cid_from_stac(
+            DCLIMATE_STAC_CATALOG_IPNS, KNOWN_STAC_DATASET_ID
+        )
+    mock_fetch_ipns.assert_called_once()
+    mock_fetch_cid.assert_called_once()  # One legacy string was allowed
+
+
+@patch("dclimate_zarr_client.ipfs_retrieval.fetch_json_from_ipns")
+@patch("dclimate_zarr_client.ipfs_retrieval.fetch_json_from_cid")
+def test_get_hamt_cid_collection_fetch_error(mock_fetch_cid, mock_fetch_ipns):
+    """Test scenario where fetching a collection fails but others might succeed."""
+    target_dataset = "dataset-in-good-collection"
+    good_collection_cid = "bafyGoodCollection"
+    bad_collection_cid = "bafyBadCollection"
+
+    mock_fetch_ipns.return_value = {
+        "type": "Catalog",
+        "id": "root",
+        "links": [
+            {
+                "rel": "child",
+                "type": "application/json",
+                "href": {"/": bad_collection_cid},
+            },
+            {
+                "rel": "child",
+                "type": "application/json",
+                "href": {"/": good_collection_cid},
+            },
+        ],
+    }
+    # Mock fetch_json_from_cid behavior
+    good_item_cid = "bafyGoodItem"
+    good_hamt_cid = "bafyGoodHAMT"
+
+    def fetch_cid_side_effect(cid_str, store):
+        if cid_str == bad_collection_cid:
+            raise IpfsConnectionError("Failed to fetch bad collection")
+        elif cid_str == good_collection_cid:
+            return {
+                "type": "Collection",
+                "id": "good-collection",
+                "links": [
+                    {
+                        "rel": "item",
+                        "type": "application/json",
+                        "href": {"/": good_item_cid},
+                    }
+                ],
+            }
+        elif cid_str == good_item_cid:
+            return {
+                "type": "Feature",
+                "id": target_dataset,
+                "assets": {"hamt-zarr": {"href": f"/ipfs/{good_hamt_cid}"}},
+            }
+        else:
+            raise ValueError(f"Unexpected CID requested: {cid_str}")
+
+    mock_fetch_cid.side_effect = fetch_cid_side_effect
+
+    # Should succeed by finding the dataset in the good collection
+    result_cid = get_dataset_hamt_cid_from_stac(
+        DCLIMATE_STAC_CATALOG_IPNS, target_dataset
+    )
+    assert result_cid == good_hamt_cid
+    assert (
+        mock_fetch_cid.call_count == 3
+    )  # Bad collection fetch fails, good collection + good item fetches succeed
+
+
+@patch("dclimate_zarr_client.ipfs_retrieval.fetch_json_from_ipns")
+@patch("dclimate_zarr_client.ipfs_retrieval.fetch_json_from_cid")
+def test_get_hamt_cid_invalid_collection_format(mock_fetch_cid, mock_fetch_ipns):
+    """Test skipping invalid collection format."""
+    collection_cid = "bafyValidCollectionLink"
+    mock_fetch_ipns.return_value = {
+        "type": "Catalog",
+        "id": "root",
+        "links": [
+            {"rel": "child", "type": "application/json", "href": {"/": collection_cid}}
+        ],
+    }
+    mock_fetch_cid.return_value = {"not": "a collection"}  # Invalid format
+
+    with pytest.raises(
+        DatasetNotFoundError, match=f"Dataset ID '{KNOWN_STAC_DATASET_ID}' not found"
+    ):
+        get_dataset_hamt_cid_from_stac(
+            DCLIMATE_STAC_CATALOG_IPNS, KNOWN_STAC_DATASET_ID
+        )
+    mock_fetch_cid.assert_called_once_with(
+        collection_cid, ANY
+    )  # Assuming default store used internally
+
+
+@patch("dclimate_zarr_client.ipfs_retrieval.fetch_json_from_ipns")
+@patch("dclimate_zarr_client.ipfs_retrieval.fetch_json_from_cid")
+def test_get_hamt_cid_item_fetch_error(mock_fetch_cid, mock_fetch_ipns):
+    """Test scenario where fetching an item fails but the target is in another item."""
+    target_dataset = KNOWN_STAC_DATASET_ID
+    collection_cid = "bafyCollectionWithItems"
+    bad_item_cid = "bafyBadItem"
+    good_item_cid = "bafyGoodItem"
+    good_hamt_cid = "bafyTargetHAMT"
+
+    mock_fetch_ipns.return_value = {
+        "type": "Catalog",
+        "id": "root",
+        "links": [
+            {"rel": "child", "type": "application/json", "href": {"/": collection_cid}}
+        ],
+    }
+
+    def fetch_cid_side_effect(cid_str, store):
+        if cid_str == collection_cid:
+            return {
+                "type": "Collection",
+                "id": "collection",
+                "links": [
+                    {
+                        "rel": "item",
+                        "type": "application/json",
+                        "href": {"/": bad_item_cid},
+                    },
+                    {
+                        "rel": "item",
+                        "type": "application/json",
+                        "href": {"/": good_item_cid},
+                    },
+                ],
+            }
+        elif cid_str == bad_item_cid:
+            raise StacCatalogError("Failed to fetch bad item")
+        elif cid_str == good_item_cid:
+            return {
+                "type": "Feature",
+                "id": target_dataset,
+                "assets": {"hamt-zarr": {"href": f"/ipfs/{good_hamt_cid}"}},
+            }
+        else:
+            raise ValueError(f"Unexpected CID requested: {cid_str}")
+
+    mock_fetch_cid.side_effect = fetch_cid_side_effect
+
+    result_cid = get_dataset_hamt_cid_from_stac(
+        DCLIMATE_STAC_CATALOG_IPNS, target_dataset
+    )
+    assert result_cid == good_hamt_cid
+    # Called for collection, bad item (failed), good item (success)
+    assert mock_fetch_cid.call_count == 3
+
+
 # --- Tests for get_ipns_name_hash (Legacy/Utility Function) ---
 # These tests remain as UNIT tests, mocking requests and file system,
 # as they test the specific fallback logic of this function, not STAC traversal.

From 7bca52efac23e10d1fea5b2f86414408e56846ab Mon Sep 17 00:00:00 2001
From: Faolain <Faolain@users.noreply.github.com>
Date: Mon, 9 Jun 2025 01:59:51 -0400
Subject: [PATCH 4/4] test: extensive mocking tests

---
 dclimate_zarr_client/ipfs_retrieval.py | 176 ++++++-----
 tests/test_geotemporal_utils.py        |  32 ++
 tests/test_ipfs_retrieval.py           | 392 +++++++++++++++++++++++++
 3 files changed, 526 insertions(+), 74 deletions(-)
 create mode 100644 tests/test_geotemporal_utils.py

diff --git a/dclimate_zarr_client/ipfs_retrieval.py b/dclimate_zarr_client/ipfs_retrieval.py
index ce21ab2..9bd2b77 100644
--- a/dclimate_zarr_client/ipfs_retrieval.py
+++ b/dclimate_zarr_client/ipfs_retrieval.py
@@ -260,8 +260,9 @@ def get_dataset_hamt_cid_from_stac(
     root_catalog_ipns: str,
     target_dataset_id: str,
     gateway_uri_stem: str | None = None,
-    rpc_uri_stem: str
-    | None = None,  # Keep rpc_uri_stem for IPFSStore config if needed by fetch_json_from_cid
+    rpc_uri_stem: (
+        str | None
+    ) = None,  # Keep rpc_uri_stem for IPFSStore config if needed by fetch_json_from_cid
 ) -> str:
     """
     Traverses the dClimate STAC catalog starting from a root IPNS name
@@ -330,14 +331,15 @@ def get_dataset_hamt_cid_from_stac(
     for collection_cid in collections_to_visit:
         logger.debug(f"Fetching collection content for CID: {collection_cid}")
         try:
-            # *** Use fetch_json_from_cid ***
+            # --- collection JSON ---
             collection = fetch_json_from_cid(collection_cid, ipfs_store)
             if (
                 not isinstance(collection, dict)
                 or collection.get("type") != "Collection"
             ):
                 logger.warning(
-                    f"Skipping invalid collection format for CID {collection_cid}. Type: {collection.get('type')}"
+                    f"Skipping invalid collection format for CID {collection_cid}. "
+                    f"Type: {collection.get('type')}"
                 )
                 continue
 
@@ -345,98 +347,116 @@ def get_dataset_hamt_cid_from_stac(
             for link in collection.get("links", []):
                 if link.get("rel") == "item" and link.get("type") == "application/json":
                     item_href_obj = link.get("href")
-                    item_cid = None  # Reset item_cid for each link
+                    item_cid = None  # reset each link
 
-                    # *** MODIFIED: Handle dict href for item links ***
+                    # --- handle IPLD or legacy /ipfs/ links ---
                     if isinstance(item_href_obj, dict):
-                        item_cid = item_href_obj.get("/")  # Extract item CID string
+                        item_cid = item_href_obj.get("/")  # IPLD dict
                     elif isinstance(item_href_obj, str) and item_href_obj.startswith(
                         "/ipfs/"
                     ):
                         logger.warning(
-                            f"Found item link with legacy string href format in {collection_cid}: {link}"
+                            f"Found legacy string href in {collection_cid}: {link}"
                         )
-                        item_cid = item_href_obj[6:]
+                        item_cid = item_href_obj[6:]  # strip "/ipfs/"
                     else:
                         logger.warning(
-                            f"Skipping invalid item link format in collection {collection_cid}: {link}"
+                            f"Skipping invalid item link in {collection_cid}: {link}"
                         )
-                        continue  # Skip this link if format is wrong
+                        continue
 
-                    if isinstance(item_cid, str):
-                        items_found_in_collection += 1
-                        # logger.debug(f"Fetching item content for CID: {item_cid}") # Can be verbose
-                        try:
-                            # *** Use fetch_json_from_cid with the extracted item CID string ***
-                            item = fetch_json_from_cid(item_cid, ipfs_store)
+                    if not isinstance(item_cid, str):
+                        # already logged warning
+                        continue
 
-                            if (
-                                not isinstance(item, dict)
-                                or item.get("type") != "Feature"
-                            ):
-                                logger.warning(
-                                    f"Skipping invalid item format for CID {item_cid}. Type: {item.get('type')}"
-                                )
-                                continue
+                    items_found_in_collection += 1
+                    item_id: str | None = None  # keep in scope for except
+                    try:
+                        # --- item JSON ---
+                        item = fetch_json_from_cid(item_cid, ipfs_store)
 
-                            item_id = item.get("id")
-                            if item_id == target_dataset_id:
-                                logger.info(
-                                    f"Found matching item for '{target_dataset_id}' with CID {item_cid} in collection {collection_cid}"
-                                )
-                                hamt_asset = item.get("assets", {}).get("hamt-zarr", {})
-                                hamt_cid_href = hamt_asset.get(
-                                    "href"
-                                )  # This should be the /ipfs/ string
-
-                                if not isinstance(
-                                    hamt_cid_href, str
-                                ) or not hamt_cid_href.startswith("/ipfs/"):
-                                    raise StacCatalogError(
-                                        f"STAC Item '{item_id}' (CID: {item_cid}) is missing a valid string 'assets.hamt-zarr.href' starting with /ipfs/. Found: '{hamt_cid_href}' (type: {type(hamt_cid_href).__name__})"
-                                    )
+                        if not isinstance(item, dict) or item.get("type") != "Feature":
+                            logger.warning(
+                                f"Skipping invalid item format for CID {item_cid}. "
+                                f"Type: {item.get('type')}"
+                            )
+                            continue
 
-                                hamt_cid_str = hamt_cid_href[
-                                    6:
-                                ]  # Slice the /ipfs/ prefix
-                                logger.info(
-                                    f"Successfully extracted HAMT CID for '{target_dataset_id}': {hamt_cid_str}"
-                                )
-                                _stac_hamt_cid_cache[target_dataset_id] = hamt_cid_str
-                                return hamt_cid_str
+                        item_id = item.get("id")
+                        if item_id != target_dataset_id:
+                            # not the dataset we're looking for
+                            continue
 
-                        except (StacCatalogError, IpfsConnectionError) as item_err:
-                            # Log error but continue searching other items/collections
-                            logger.error(
-                                f"Error processing item {item_cid} in collection {collection_cid}, continuing search: {item_err}"
-                            )
-                        except (
-                            Exception
-                        ) as item_err:  # Catch unexpected errors during item processing
-                            logger.error(
-                                f"Unexpected error processing item {item_cid} in collection {collection_cid}, continuing search: {type(item_err).__name__}: {item_err}"
+                        logger.info(
+                            f"Found matching item for '{target_dataset_id}' "
+                            f"(CID {item_cid}) in collection {collection_cid}"
+                        )
+                        hamt_asset = item.get("assets", {}).get("hamt-zarr", {})
+                        hamt_cid_href = hamt_asset.get("href")  # expected "/ipfs/<cid>"
+
+                        if not isinstance(
+                            hamt_cid_href, str
+                        ) or not hamt_cid_href.startswith("/ipfs/"):
+                            raise StacCatalogError(
+                                f"STAC Item '{item_id}' (CID: {item_cid}) is missing a "
+                                f"valid string 'assets.hamt-zarr.href' starting with "
+                                f"/ipfs/. Found: '{hamt_cid_href}' "
+                                f"(type: {type(hamt_cid_href).__name__})"
                             )
-                    # else: Invalid item CID extracted, already logged warning
+
+                        hamt_cid_str = hamt_cid_href[6:]  # drop "/ipfs/"
+                        logger.info(
+                            f"Successfully extracted HAMT CID for '{target_dataset_id}': "
+                            f"{hamt_cid_str}"
+                        )
+                        _stac_hamt_cid_cache[target_dataset_id] = hamt_cid_str
+                        return hamt_cid_str
+
+                    # ── error handling ──────────────────────────────────────────
+                    except StacCatalogError as item_err:
+                        # If the failing item *is* the target dataset, bubble it up.
+                        if item_id == target_dataset_id:
+                            raise item_err
+                        logger.error(
+                            f"Error processing non-target item {item_cid} in collection "
+                            f"{collection_cid}: {item_err}"
+                        )
+                    except IpfsConnectionError as item_err:
+                        logger.error(
+                            f"IPFS error processing item {item_cid} in collection "
+                            f"{collection_cid}: {item_err}"
+                        )
+                    except Exception as item_err:
+                        logger.error(
+                            f"Unexpected error processing item {item_cid} in collection "
+                            f"{collection_cid}: {type(item_err).__name__}: {item_err}"
+                        )
 
             logger.debug(
-                f"Finished searching {items_found_in_collection} items in collection {collection.get('id', collection_cid)}."
+                f"Finished searching {items_found_in_collection} items in collection "
+                f"{collection.get('id', collection_cid)}."
             )
 
-        except (StacCatalogError, IpfsConnectionError) as col_err:
-            # Log error but continue searching other collections
+        except StacCatalogError as col_err:
+            # ← this is the error that means “the target dataset is malformed”
+            #    → let it propagate to the caller so tests (and callers) can see it.
+            raise col_err
+        except IpfsConnectionError as col_err:
+            # ← still swallow network errors so other collections can be tried
             logger.error(
-                f"Error processing collection {collection_cid}, continuing search: {col_err}"
+                f"IPFS error processing collection {collection_cid}, continuing search: "
+                f"{col_err}"
             )
-        except (
-            Exception
-        ) as col_err:  # Catch unexpected errors during collection processing
+        except Exception as col_err:
             logger.error(
-                f"Unexpected error processing collection {collection_cid}, continuing search: {type(col_err).__name__}: {col_err}"
+                f"Unexpected error processing collection {collection_cid}, continuing "
+                f"search: {type(col_err).__name__}: {col_err}"
             )
 
-    # If loop completes without finding the dataset
+    # If the loop completes without returning
     raise DatasetNotFoundError(
-        f"Dataset ID '{target_dataset_id}' not found after searching all collections in the STAC catalog rooted at IPNS '{root_catalog_ipns}'."
+        f"Dataset ID '{target_dataset_id}' not found after searching all collections "
+        f"in the STAC catalog rooted at IPNS '{root_catalog_ipns}'."
     )
 
 
@@ -675,6 +695,9 @@ def _get_dataset_by_ipfs_cid(
         raise StacCatalogError(
             f"Zarr metadata not found at CID {ipfs_cid}. Is it a valid Zarr root? Error: {e}"
         ) from e
+    except ValueError:
+        # Let ValueErrors propagate, e.g. from invalid CID format
+        raise
     except Exception as e:
         # Catch other potential errors (e.g., Zarr format errors, py-hamt errors)
         logger.error(
@@ -883,10 +906,15 @@ def list_datasets(
                             )
                     # else: Invalid item CID extracted, already logged warning
 
-        except (StacCatalogError, IpfsConnectionError) as col_err:
-            # Log and skip this specific collection if fetching/parsing fails
-            logger.warning(
-                f"Skipping collection {collection_cid} during list due to error: {col_err}"
+        # 1️⃣  propagate a StacCatalogError that bubbled up from the **target item**
+        except StacCatalogError as col_err:
+            raise col_err
+
+        # 2️⃣  still swallow IPFS/network problems so that other collections can be tried
+        except IpfsConnectionError as col_err:
+            logger.error(
+                f"IPFS error processing collection {collection_cid}, continuing search: "
+                f"{col_err}"
             )
         except Exception as col_err:  # Catch unexpected errors
             logger.warning(
diff --git a/tests/test_geotemporal_utils.py b/tests/test_geotemporal_utils.py
new file mode 100644
index 0000000..f402d6e
--- /dev/null
+++ b/tests/test_geotemporal_utils.py
@@ -0,0 +1,32 @@
+import numpy as np
+import pytest
+
+from dclimate_zarr_client.geotemporal_data import _haversine, _check_input_parameters
+from dclimate_zarr_client import dclimate_zarr_errors as errors
+
+
+def test_haversine_single_points():
+    dist = _haversine(0, 0, 0, 1)
+    assert dist == pytest.approx(111.195, rel=1e-3)
+
+    dist = _haversine(36.12, -86.67, 33.94, -118.40)
+    assert dist == pytest.approx(2886.44, rel=1e-2)
+
+
+def test_haversine_arrays():
+    lats1 = np.array([0, 10])
+    lons1 = np.array([0, 0])
+    lats2 = np.array([0, 20])
+    lons2 = np.array([1, 0])
+    dists = _haversine(lats1, lons1, lats2, lons2)
+    assert np.allclose(dists, [111.195, 1111.95], rtol=1e-3)
+
+
+def test_check_input_parameters_invalid_period():
+    with pytest.raises(errors.InvalidTimePeriodError):
+        _check_input_parameters(time_period="decade")
+
+
+def test_check_input_parameters_invalid_method():
+    with pytest.raises(errors.InvalidAggregationMethodError):
+        _check_input_parameters(agg_method="average")
diff --git a/tests/test_ipfs_retrieval.py b/tests/test_ipfs_retrieval.py
index 8cde0f5..82dbd6c 100644
--- a/tests/test_ipfs_retrieval.py
+++ b/tests/test_ipfs_retrieval.py
@@ -1,4 +1,5 @@
 import os
+import re
 import json
 from typing import Dict, Any  # Import Dict and Any
 from unittest.mock import patch, mock_open, MagicMock, ANY
@@ -9,10 +10,14 @@
 
 import dclimate_zarr_client.ipfs_retrieval as ipfs_retrieval
 from dclimate_zarr_client.ipfs_retrieval import (
+    _stac_hamt_cid_cache,
     fetch_json_from_ipns,
     get_dataset_hamt_cid_from_stac,
     get_ipns_name_hash,
+    _get_dataset_by_ipfs_cid,
     update_cache_if_changed,
+    _get_single_metadata,
+    list_datasets,
     DatasetNotFoundError,
     IpfsConnectionError,
     StacCatalogError,
@@ -677,6 +682,393 @@ def fetch_cid_side_effect(cid_str, store):
     assert mock_fetch_cid.call_count == 3
 
 
+@patch("dclimate_zarr_client.ipfs_retrieval.fetch_json_from_ipns")
+@patch("dclimate_zarr_client.ipfs_retrieval.fetch_json_from_cid")
+def test_get_hamt_cid_invalid_item_format(mock_fetch_cid, mock_fetch_ipns):
+    """Test skipping invalid item format."""
+    # --- FIX: Clear cache at the start of the test ---
+    _stac_hamt_cid_cache.clear()
+
+    collection_cid = "bafyCollection"
+    item_cid = "bafyItem"
+    mock_fetch_ipns.return_value = {
+        "type": "Catalog",
+        "id": "dclimate-stac-catalog",
+        "links": [
+            {"rel": "child", "type": "application/json", "href": {"/": collection_cid}}
+        ],
+    }
+    mock_fetch_cid.side_effect = [
+        # First call returns collection
+        {
+            "type": "Collection",
+            "id": "collection",
+            "links": [
+                {"rel": "item", "type": "application/json", "href": {"/": item_cid}}
+            ],
+        },
+        # Second call returns invalid item
+        {"not": "a feature"},
+    ]
+    with pytest.raises(
+        DatasetNotFoundError, match=f"Dataset ID '{KNOWN_STAC_DATASET_ID}' not found"
+    ):
+        get_dataset_hamt_cid_from_stac(
+            DCLIMATE_STAC_CATALOG_IPNS, KNOWN_STAC_DATASET_ID
+        )
+    assert mock_fetch_cid.call_count == 2
+
+
+@patch("dclimate_zarr_client.ipfs_retrieval.fetch_json_from_ipns")
+@patch("dclimate_zarr_client.ipfs_retrieval.fetch_json_from_cid")
+def test_get_hamt_cid_item_missing_hamt_asset(mock_fetch_cid, mock_fetch_ipns):
+    """Test error if the found item doesn't have the 'hamt-zarr' asset href."""
+    target_dataset = KNOWN_STAC_DATASET_ID
+    collection_cid = "bafyCollection"
+    item_cid = "bafyItem"
+    mock_fetch_ipns.return_value = {
+        "type": "Catalog",
+        "id": "root",
+        "links": [
+            {"rel": "child", "type": "application/json", "href": {"/": collection_cid}}
+        ],
+    }
+    mock_fetch_cid.side_effect = [
+        {
+            "type": "Collection",
+            "id": "collection",
+            "links": [
+                {"rel": "item", "type": "application/json", "href": {"/": item_cid}}
+            ],
+        },
+        {
+            "type": "Feature",
+            "id": target_dataset,
+            "assets": {},
+        },  # Missing assets.hamt-zarr.href
+    ]
+    with pytest.raises(
+        StacCatalogError, match="missing a valid string 'assets.hamt-zarr.href'"
+    ):
+        get_dataset_hamt_cid_from_stac(DCLIMATE_STAC_CATALOG_IPNS, target_dataset)
+    assert mock_fetch_cid.call_count == 2
+
+
+@patch("dclimate_zarr_client.ipfs_retrieval.fetch_json_from_ipns")
+@patch("dclimate_zarr_client.ipfs_retrieval.fetch_json_from_cid")
+def test_get_hamt_cid_item_invalid_hamt_asset_href(mock_fetch_cid, mock_fetch_ipns):
+    """Test error if the hamt-zarr href is not a valid /ipfs/ string."""
+    target_dataset = KNOWN_STAC_DATASET_ID
+    collection_cid = "bafyCollection"
+    item_cid = "bafyItem"
+    mock_fetch_ipns.return_value = {
+        "type": "Catalog",
+        "id": "root",
+        "links": [
+            {"rel": "child", "type": "application/json", "href": {"/": collection_cid}}
+        ],
+    }
+    mock_fetch_cid.side_effect = [
+        {
+            "type": "Collection",
+            "id": "collection",
+            "links": [
+                {"rel": "item", "type": "application/json", "href": {"/": item_cid}}
+            ],
+        },
+        {
+            "type": "Feature",
+            "id": target_dataset,
+            "assets": {"hamt-zarr": {"href": "not-an-ipfs-link"}},
+        },
+    ]
+    with pytest.raises(
+        StacCatalogError, match="missing a valid string 'assets.hamt-zarr.href'"
+    ):
+        get_dataset_hamt_cid_from_stac(DCLIMATE_STAC_CATALOG_IPNS, target_dataset)
+    assert mock_fetch_cid.call_count == 2
+
+
+# --- NEW: Tests for _get_dataset_by_ipfs_cid (Unit/Mocked Error Paths) ---
+
+
+def test_get_dataset_by_ipfs_cid_empty():
+    """Test ValueError if ipfs_cid is empty."""
+    with pytest.raises(ValueError, match="IPFS CID cannot be empty"):
+        _get_dataset_by_ipfs_cid("")
+
+
+def test_get_dataset_by_ipfs_cid_invalid_format():
+    """Test ValueError if ipfs_cid is not a valid CID format."""
+    with pytest.raises(ValueError, match="Invalid IPFS CID format"):
+        _get_dataset_by_ipfs_cid("this-is-definitely-not-a-cid")
+
+
+@patch("dclimate_zarr_client.ipfs_retrieval.xr.open_zarr")
+@patch("dclimate_zarr_client.ipfs_retrieval.IPFSZarr3")
+@patch("dclimate_zarr_client.ipfs_retrieval.HAMT")
+@patch("dclimate_zarr_client.ipfs_retrieval._get_ipfs_store")
+def test_get_dataset_by_ipfs_cid_zarr_not_found(
+    mock_get_store, mock_hamt, mock_ipfs_zarr3, mock_open_zarr
+):
+    """Test StacCatalogError if Zarr metadata (e.g., .zgroup) is missing."""
+    valid_cid = (
+        "bafybeicg2rebjoofv4kbyovkw7af3rpiitvnl6i7ckcywaq6za2eflbka4"  # Example CID
+    )
+    mock_open_zarr.side_effect = FileNotFoundError(
+        "[Errno 2] No such file or directory: '.zgroup'"
+    )  # Simulate xr.open_zarr error
+
+    with pytest.raises(
+        StacCatalogError, match=f"Zarr metadata not found at CID {valid_cid}"
+    ):
+        _get_dataset_by_ipfs_cid(valid_cid)
+
+    mock_get_store.assert_called_once()
+    mock_hamt.assert_called_once()
+    mock_ipfs_zarr3.assert_called_once()
+    mock_open_zarr.assert_called_once()
+
+
+@patch("dclimate_zarr_client.ipfs_retrieval.xr.open_zarr")
+@patch("dclimate_zarr_client.ipfs_retrieval.IPFSZarr3")
+@patch("dclimate_zarr_client.ipfs_retrieval.HAMT")
+@patch("dclimate_zarr_client.ipfs_retrieval._get_ipfs_store")
+def test_get_dataset_by_ipfs_cid_connection_error(
+    mock_get_store, mock_hamt, mock_ipfs_zarr3, mock_open_zarr
+):
+    """Test IpfsConnectionError if loading data fails due to connection issues."""
+    valid_cid = "bafybeicg2rebjoofv4kbyovkw7af3rpiitvnl6i7ckcywaq6za2eflbka4"
+    # Simulate error during xr.open_zarr which tries to read from the store
+    mock_open_zarr.side_effect = requests.exceptions.ConnectionError(
+        "Connection refused during zarr open"
+    )
+
+    with pytest.raises(
+        IpfsConnectionError,
+        match=f"IPFS connection failed while loading dataset from CID {valid_cid}",
+    ):
+        _get_dataset_by_ipfs_cid(valid_cid)
+
+
+@patch("dclimate_zarr_client.ipfs_retrieval.xr.open_zarr")
+@patch("dclimate_zarr_client.ipfs_retrieval.IPFSZarr3")
+@patch("dclimate_zarr_client.ipfs_retrieval.HAMT")
+@patch("dclimate_zarr_client.ipfs_retrieval._get_ipfs_store")
+def test_get_dataset_by_ipfs_cid_other_runtime_error(
+    mock_get_store, mock_hamt, mock_ipfs_zarr3, mock_open_zarr
+):
+    """Test generic RuntimeError for other failures during loading."""
+    valid_cid = "bafybeicg2rebjoofv4kbyovkw7af3rpiitvnl6i7ckcywaq6za2eflbka4"
+    mock_open_zarr.side_effect = Exception("Some Zarr parsing error")
+
+    with pytest.raises(
+        RuntimeError, match=f"Failed to load Zarr dataset from IPFS CID {valid_cid}"
+    ):
+        _get_dataset_by_ipfs_cid(valid_cid)
+
+
+# --- NEW: Tests for list_datasets (Mocked Error Paths) ---
+
+
+@patch("dclimate_zarr_client.ipfs_retrieval.fetch_json_from_ipns")
+@patch("dclimate_zarr_client.ipfs_retrieval.fetch_json_from_cid")
+def test_list_datasets_root_catalog_fetch_error(mock_fetch_cid, mock_fetch_ipns):
+    """Test list_datasets fails if root catalog fetch fails."""
+    mock_fetch_ipns.side_effect = StacCatalogError("Cannot fetch root")
+    with pytest.raises(
+        StacCatalogError,
+        match="Failed to fetch or parse root catalog.*for listing datasets",
+    ):
+        list_datasets(root_catalog_ipns=DCLIMATE_STAC_CATALOG_IPNS)
+
+
+@patch("dclimate_zarr_client.ipfs_retrieval.fetch_json_from_ipns")
+@patch("dclimate_zarr_client.ipfs_retrieval.fetch_json_from_cid")
+def test_list_datasets_no_collections(mock_fetch_cid, mock_fetch_ipns):
+    """Test list_datasets returns empty list if no child collections found."""
+    mock_fetch_ipns.return_value = {"type": "Catalog", "id": "root", "links": []}
+    result = list_datasets(root_catalog_ipns=DCLIMATE_STAC_CATALOG_IPNS)
+    assert result == []
+    mock_fetch_cid.assert_not_called()
+
+
+@patch("dclimate_zarr_client.ipfs_retrieval.fetch_json_from_ipns")
+@patch("dclimate_zarr_client.ipfs_retrieval.fetch_json_from_cid")
+def test_list_datasets_fails_on_bad_collection(mock_fetch_cid, mock_fetch_ipns):
+    """Test list_datasets fails if any collection fails to load."""
+    good_collection_cid = "bafyGoodCollection"
+    bad_collection_cid = "bafyBadCollection"
+    good_item_cid = "bafyGoodItem"
+    good_item_id = "good-dataset-id"
+
+    mock_fetch_ipns.return_value = {
+        "type": "Catalog",
+        "id": "root",
+        "links": [
+            {
+                "rel": "child",
+                "type": "application/json",
+                "href": {"/": bad_collection_cid},
+            },
+            {
+                "rel": "child",
+                "type": "application/json",
+                "href": {"/": good_collection_cid},
+            },
+        ],
+    }
+
+    def fetch_cid_side_effect(cid_str, store):
+        if cid_str == bad_collection_cid:
+            raise StacCatalogError("Cannot load bad collection")
+        elif cid_str == good_collection_cid:
+            return {
+                "type": "Collection",
+                "id": "good",
+                "links": [
+                    {
+                        "rel": "item",
+                        "type": "application/json",
+                        "href": {"/": good_item_cid},
+                    }
+                ],
+            }
+        elif cid_str == good_item_cid:
+            return {
+                "type": "Feature",
+                "id": good_item_id,
+                "assets": {"hamt-zarr": {"href": "/ipfs/bafyHAMT"}},
+            }
+        else:
+            raise ValueError("Unknown CID")
+
+    mock_fetch_cid.side_effect = fetch_cid_side_effect
+
+    with pytest.raises(StacCatalogError, match="Cannot load bad collection"):
+        list_datasets(root_catalog_ipns=DCLIMATE_STAC_CATALOG_IPNS)
+
+
+@patch("dclimate_zarr_client.ipfs_retrieval.fetch_json_from_ipns")
+@patch("dclimate_zarr_client.ipfs_retrieval.fetch_json_from_cid")
+def test_list_datasets_skips_bad_item(mock_fetch_cid, mock_fetch_ipns):
+    """Test list_datasets skips items that fail to load or parse."""
+    collection_cid = "bafyCollection"
+    bad_item_cid = "bafyBadItem"
+    good_item_cid = "bafyGoodItem"
+    invalid_format_item_cid = "bafyInvalidFormatItem"
+    missing_id_item_cid = "bafyMissingIdItem"
+
+    good_item_id = "good-dataset-id"
+
+    mock_fetch_ipns.return_value = {
+        "type": "Catalog",
+        "id": "root",
+        "links": [
+            {"rel": "child", "type": "application/json", "href": {"/": collection_cid}}
+        ],
+    }
+
+    def fetch_cid_side_effect(cid_str, store):
+        if cid_str == collection_cid:
+            return {
+                "type": "Collection",
+                "id": "coll",
+                "links": [
+                    {
+                        "rel": "item",
+                        "type": "application/json",
+                        "href": {"/": bad_item_cid},
+                    },
+                    {
+                        "rel": "item",
+                        "type": "application/json",
+                        "href": {"/": good_item_cid},
+                    },
+                    {
+                        "rel": "item",
+                        "type": "application/json",
+                        "href": {"/": invalid_format_item_cid},
+                    },
+                    {
+                        "rel": "item",
+                        "type": "application/json",
+                        "href": {"/": missing_id_item_cid},
+                    },
+                ],
+            }
+        elif cid_str == bad_item_cid:
+            raise IpfsConnectionError("Cannot load bad item")
+        elif cid_str == good_item_cid:
+            return {
+                "type": "Feature",
+                "id": good_item_id,
+                "assets": {"hamt-zarr": {"href": "/ipfs/bafyHAMT"}},
+            }
+        elif cid_str == invalid_format_item_cid:
+            return {"not": "a feature"}
+        elif cid_str == missing_id_item_cid:
+            return {"type": "Feature", "assets": {}}  # Missing ID
+        else:
+            raise ValueError("Unknown CID")
+
+    mock_fetch_cid.side_effect = fetch_cid_side_effect
+
+    result = list_datasets(root_catalog_ipns=DCLIMATE_STAC_CATALOG_IPNS)
+    assert result == [good_item_id]  # Only the good dataset is listed
+    # Called for collection, bad item, good item, invalid item, missing ID item
+    assert mock_fetch_cid.call_count == 5
+
+
+@patch("dclimate_zarr_client.ipfs_retrieval.fetch_json_from_ipns")
+def test_list_datasets_uses_default_root_catalog(mock_fetch_ipns):
+    """Test that list_datasets uses DCLIMATE_STAC_CATALOG_IPNS if none provided."""
+    # Mock fetch_ipns to return a minimal valid catalog
+    mock_fetch_ipns.return_value = {"type": "Catalog", "id": "root", "links": []}
+    list_datasets()  # Call without root_catalog_ipns argument
+    # Assert fetch_json_from_ipns was called with the default IPNS name
+    mock_fetch_ipns.assert_called_once_with(
+        DCLIMATE_STAC_CATALOG_IPNS, gateway_uri_stem=None
+    )
+
+
+# --- Tests for Legacy/Utility Functions (get_ipns_name_hash, update_cache_if_changed covered) ---
+# --- NEW: Tests for other Legacy/Utility Functions (Mocked) ---
+
+
+def test_get_single_metadata_success(mock_requests_get: MagicMock):
+    """Test _get_single_metadata successfully fetches and parses JSON."""
+    ipfs_hash = "QmSomeHash"
+    expected_metadata = {"prop": "value", "links": []}
+    mock_response = MagicMock(spec=requests.Response)
+    mock_response.raise_for_status.return_value = None
+    mock_response.json.return_value = expected_metadata
+    mock_requests_get.return_value = mock_response
+
+    metadata = _get_single_metadata(ipfs_hash)
+
+    assert metadata == expected_metadata
+    expected_url_pattern = rf".*/ipfs/{ipfs_hash}"
+    call_args, call_kwargs = mock_requests_get.call_args
+    assert re.match(expected_url_pattern, call_args[0]), (
+        f"URL {call_args[0]} does not match pattern {expected_url_pattern}"
+    )
+    mock_response.raise_for_status.assert_called_once()
+    mock_response.json.assert_called_once()
+
+
+def test_get_single_metadata_http_error(mock_requests_get: MagicMock):
+    """Test _get_single_metadata raises HTTPError."""
+    ipfs_hash = "QmSomeHash"
+    mock_response = MagicMock(spec=requests.Response)
+    http_error = requests.exceptions.HTTPError("404 Not Found")
+    mock_response.raise_for_status.side_effect = http_error
+    mock_requests_get.return_value = mock_response
+
+    with pytest.raises(requests.exceptions.HTTPError):
+        _get_single_metadata(ipfs_hash)
+
+
 # --- Tests for get_ipns_name_hash (Legacy/Utility Function) ---
 # These tests remain as UNIT tests, mocking requests and file system,
 # as they test the specific fallback logic of this function, not STAC traversal.