Retrieve dClimate GIS zarr datasets stored on IPFS
Uses STAC (SpatioTemporal Asset Catalog) for dataset discovery and py-hamt to access Zarr data structures stored efficiently on IPFS.
Filtering and aggregation are packaged into convenience functions optimized for flexibility and performance.
Looking for JavaScript? Check out our JavaScript client for Node.js and browser environments.
from datetime import datetime
import dclimate_client_py as client
from dclimate_client_py import dClimateClient
# --- Recommended: Using dClimateClient (async context manager) ---
async def main():
# The client manages IPFS connections automatically
# No need to import or configure KuboCAS directly!
async with dClimateClient() as dclimate:
# Load datasets by name from the internal catalog
# For datasets with multiple variants, you must specify which variant
# Returns a tuple: (dataset, metadata)
dataset, metadata = await dclimate.load_dataset(
dataset="temperature_2m",
collection="era5", # Can also pass "ecmwf_era5"
organization="ecmwf",
variant="finalized", # Required for multi-variant datasets
return_xarray=False # Returns GeotemporalData wrapper (default)
)
# Check metadata about what was loaded
print(f"Loaded: {metadata['slug']}")
print(f"CID: {metadata['cid']}")
print(f"Timestamp: {metadata.get('timestamp')}") # If available from URL fetch
print(f"Source: {metadata['source']}") # 'stac' or 'direct_cid'
# Apply queries using the GeotemporalData interface
dataset_filtered = dataset.point(latitude=40.875, longitude=-104.875)
dataset_filtered = dataset_filtered.time_range(
datetime(2023, 1, 1),
datetime(2023, 1, 5)
)
data_dict = dataset_filtered.as_dict()
print(data_dict['data'])
# Or load and select in one call.
western_europe, metadata = await dclimate.select_dataset(
request={
"dataset": "temperature_2m",
"collection": "era5",
"organization": "ecmwf",
"variant": "finalized",
},
selection={
# Bounds are [west, south, east, north].
"bounds": [-12, 35, 16, 60],
"time_range": {
"start": datetime(2024, 1, 1),
"end": datetime(2024, 1, 7, 23),
},
},
)
# ERA5 land datasets
#
# ERA5 and ERA5-Land datasets are separate dataset IDs within the ECMWF ERA5
# collection. Use list_datasets() or list_available_datasets() to inspect the
# exact names before loading.
async def main_era5_land():
async with dClimateClient() as dclimate:
# Non-land ERA5 total precipitation
precip, precip_metadata = await dclimate.load_dataset(
dataset="precipitation_total",
collection="era5",
organization="ecmwf",
variant="finalized",
)
# ERA5-Land total precipitation
land_precip, land_metadata = await dclimate.load_dataset(
dataset="precipitation_total_land",
collection="era5",
organization="ecmwf",
variant="finalized",
)
# ERA5-Land wind datasets follow the same pattern:
# dataset="wind_u_10m_land" or dataset="wind_v_10m_land"
# Custom IPFS endpoints (optional)
async def main_custom_ipfs():
async with dClimateClient(
gateway_base_url="https://ipfs.io",
rpc_base_url="http://localhost:5001"
) as dclimate:
dataset, metadata = await dclimate.load_dataset(
dataset="temperature_2m",
collection="era5",
organization="ecmwf",
variant="finalized"
)
# Query dataset...
# Get raw xarray.Dataset directly
async def main_xarray():
async with dClimateClient() as dclimate:
xr_dataset, metadata = await dclimate.load_dataset(
dataset="temperature_2m",
collection="era5",
organization="ecmwf",
variant="finalized",
return_xarray=True # Returns xarray.Dataset
)
print(xr_dataset)
print(f"Dataset CID: {metadata['cid']}")
# List available datasets from the STAC catalog
from dclimate_client_py import list_available_datasets, load_stac_catalog
# Load the STAC catalog
stac_catalog = load_stac_catalog("https://ipfs-gateway.dclimate.net")
# List all available datasets
datasets = list_available_datasets(stac_catalog)
for collection_id, info in datasets.items():
print(
f"Collection: {info['title']} ({collection_id})"
+ (f" | org: {info['organization']}" if info.get('organization') else "")
)
print(f" Dataset types: {', '.join(info['types'])}")The Python client also exposes a Siren REST client for metrics and regions.
from dclimate_client_py import (
dClimateClient,
SirenApiKeyAuth,
SirenMetricQuery,
SirenOptions,
)
async def main():
client = dClimateClient(
siren=SirenOptions(
auth=SirenApiKeyAuth() # reads SIREN_API_KEY and SIREN_ACCOUNT_ID from env
)
)
regions = await client.list_regions()
print(f"Loaded {len(regions)} regions")
data = await client.get_metric_data(
SirenMetricQuery(
region_id=regions[0].id,
metric="average_precip",
start_date="2025-01-01",
end_date="2025-01-31",
)
)
print(data[:3])x402 is included in the default install.
More examples can be found at dClimate Jupyter Notebooks. To run your own IPFS gateway follow the instructions for installing ipfs. For additional assistance find us on Discord, if you are an organization or business reach out to us at community at dclimate dot net.
uv venv .venv
source .venv/bin/activate # macOS/Linux
.\.venv\Scripts\activate # Windowsuv sync --extra dev --extra testinguv run pytest tests/uv run pytest --cov=dclimate_client_py tests/ --cov-report=xml- Optionally you can run your own IPFS Server to host your own datasets or connect to others.
Entrypoint to code, contains geo_temporal_query, which combines all possible subsetting
and aggregation logic in a single function. Can output the data as either a dict
or bytes representing an xarray dataset.
Various exceptions to be raised for bad or invalid user input.
Functions to manipulate xarray datasets. Contains polygon, rectangle, circle and point spatial
subsetting options, as well as temporal subsetting. Also allows for both spatial and temporal
aggregations.
STAC (SpatioTemporal Asset Catalog) integration for dClimate datasets. Provides functions to:
- Fetch the latest STAC catalog CID from the dClimate IPFS gateway
- Load and navigate STAC catalogs stored on IPFS
- Resolve dataset names to IPFS CIDs using the STAC catalog structure
- List all available datasets and collections
Uses a custom IPFSStacIO implementation to transparently resolve ipfs:// URIs via HTTP gateways, allowing pystac to work seamlessly with IPFS-hosted catalogs.
Functions for loading Zarr datasets from IPFS using py-hamt. Handles interaction with IPFS gateways and RPC endpoints through the KuboCAS interface.