diff --git a/pyproject.toml b/pyproject.toml
index 0f899185c505d7..ff60933a2d39fb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -89,7 +89,7 @@ dependencies = [
   "sentry-ophio>=1.1.3",
   "sentry-protos>=0.8.11",
   "sentry-redis-tools>=0.5.0",
-  "sentry-relay>=0.9.26",
+  "sentry-relay>=0.9.27",
   "sentry-sdk[http2]>=2.47.0",
   "sentry-usage-accountant>=0.0.10",
   # remove once there are no unmarked transitive dependencies on setuptools
diff --git a/src/sentry/conf/server.py b/src/sentry/conf/server.py
index 6bbb2650b790c8..82d6e0f2455637 100644
--- a/src/sentry/conf/server.py
+++ b/src/sentry/conf/server.py
@@ -1205,10 +1205,15 @@ def SOCIAL_AUTH_DEFAULT_USERNAME() -> str:
         "task": "relocation:sentry.relocation.transfer.find_relocation_transfer_region",
         "schedule": crontab("*/5", "*", "*", "*", "*"),
     },
+    # TODO(constantinius): Remove fetch-ai-model-costs once all consumers have migrated to fetch-ai-model-metadata
     "fetch-ai-model-costs": {
         "task": "ai_agent_monitoring:sentry.tasks.ai_agent_monitoring.fetch_ai_model_costs",
         "schedule": crontab("*/30", "*", "*", "*", "*"),
     },
+    "fetch-ai-model-metadata": {
+        "task": "ai_agent_monitoring:sentry.tasks.ai_agent_monitoring.fetch_ai_model_metadata",
+        "schedule": crontab("*/30", "*", "*", "*", "*"),
+    },
     "llm-issue-detection": {
         "task": "issues:sentry.tasks.llm_issue_detection.run_llm_issue_detection",
         "schedule": crontab("0", "*", "*", "*", "*"),
diff --git a/src/sentry/relay/config/ai_model_costs.py b/src/sentry/relay/config/ai_model_costs.py
index 2acdd0075a0d0e..3f35c2f8dcdd14 100644
--- a/src/sentry/relay/config/ai_model_costs.py
+++ b/src/sentry/relay/config/ai_model_costs.py
@@ -11,13 +11,20 @@
 type ModelId = str
 
 
-# Cache key for storing AI model costs
+# Legacy cache key for AI model costs (v2 flat format)
+# TODO(constantinius): Remove once all consumers have migrated to AI_MODEL_METADATA_CACHE_KEY
 AI_MODEL_COSTS_CACHE_KEY = "ai-model-costs:v2"
-# Cache timeout: 30 days (we re-fetch every 30 minutes, so this provides more than enough overlap)
 AI_MODEL_COSTS_CACHE_TTL = 30 * 24 * 60 * 60
 
+# Cache key for storing LLM model metadata (v1 nested format)
+AI_MODEL_METADATA_CACHE_KEY = "ai-model-metadata:v1"
+# Cache timeout: 30 days (we re-fetch every 30 minutes, so this provides more than enough overlap)
+AI_MODEL_METADATA_CACHE_TTL = 30 * 24 * 60 * 60
+
 
 class AIModelCostV2(TypedDict):
+    """Legacy flat format. TODO(constantinius): Remove once all consumers have migrated."""
+
     inputPerToken: float
     outputPerToken: float
     outputReasoningPerToken: float
@@ -26,18 +33,34 @@ class AIModelCostV2(TypedDict):
 
 
 class AIModelCosts(TypedDict):
+    """Legacy config type. TODO(constantinius): Remove once all consumers have migrated."""
+
     version: Required[int]
     models: Required[dict[ModelId, AIModelCostV2]]
 
 
+class AIModelCost(TypedDict):
+    inputPerToken: float
+    outputPerToken: float
+    outputReasoningPerToken: float
+    inputCachedPerToken: float
+    inputCacheWritePerToken: float
+
+
+class AIModelMetadata(TypedDict, total=False):
+    costs: Required[AIModelCost]
+    contextSize: int
+
+
+class AIModelMetadataConfig(TypedDict):
+    version: Required[int]
+    models: Required[dict[ModelId, AIModelMetadata]]
+
+
 def ai_model_costs_config() -> AIModelCosts | None:
     """
-    Get AI model costs configuration.
-    AI model costs are set in cache by a cron job,
-    if there are no costs, it should be investigated why.
-
-    Returns:
-        AIModelCosts object containing cost information for AI models
+    Legacy: Get AI model costs configuration.
+    TODO(constantinius): Remove once all consumers have migrated to ai_model_metadata_config.
     """
     if settings.SENTRY_AIR_GAP:
         return None
@@ -47,7 +70,29 @@ def ai_model_costs_config() -> AIModelCosts | None:
         return cached_costs
 
     if not settings.IS_DEV:
-        # in dev environment, we don't want to log this
         logger.warning("Empty model costs")
 
     return None
+
+
+def ai_model_metadata_config() -> AIModelMetadataConfig | None:
+    """
+    Get LLM model metadata configuration.
+    LLM model metadata is set in cache by a cron job,
+    if there is no metadata, it should be investigated why.
+
+    Returns:
+        AIModelMetadataConfig containing cost and context size information for LLM models
+    """
+    if settings.SENTRY_AIR_GAP:
+        return None
+
+    cached_metadata = cache.get(AI_MODEL_METADATA_CACHE_KEY)
+    if cached_metadata is not None:
+        return cached_metadata
+
+    if not settings.IS_DEV:
+        # in dev environment, we don't want to log this
+        logger.warning("Empty LLM model metadata")
+
+    return None
diff --git a/src/sentry/relay/globalconfig.py b/src/sentry/relay/globalconfig.py
index 8ee3b4b449ae33..35934957d1d36b 100644
--- a/src/sentry/relay/globalconfig.py
+++ b/src/sentry/relay/globalconfig.py
@@ -1,7 +1,12 @@
 from typing import Any, TypedDict
 
 import sentry.options
-from sentry.relay.config.ai_model_costs import AIModelCosts, ai_model_costs_config
+from sentry.relay.config.ai_model_costs import (
+    AIModelCosts,
+    AIModelMetadataConfig,
+    ai_model_costs_config,
+    ai_model_metadata_config,
+)
 from sentry.relay.config.measurements import MeasurementsConfig, get_measurements_config
 from sentry.relay.config.metric_extraction import (
     MetricExtractionGroups,
@@ -39,7 +44,10 @@ class SpanOpDefaults(TypedDict):
 
 class GlobalConfig(TypedDict, total=False):
     measurements: MeasurementsConfig
-    aiModelCosts: AIModelCosts | None
+    aiModelCosts: (
+        AIModelCosts | None
+    )  # TODO(constantinius): Remove once all consumers use aiModelMetadata
+    aiModelMetadata: AIModelMetadataConfig | None
     metricExtraction: MetricExtractionGroups
     filters: GenericFiltersConfig | None
     spanOpDefaults: SpanOpDefaults
@@ -78,7 +86,8 @@ def get_global_config() -> GlobalConfig:
 
     global_config: GlobalConfig = {
         "measurements": get_measurements_config(),
-        "aiModelCosts": ai_model_costs_config(),
+        "aiModelCosts": ai_model_costs_config(),  # TODO(constantinius): Remove once all consumers use aiModelMetadata
+        "aiModelMetadata": ai_model_metadata_config(),
         "metricExtraction": global_metric_extraction_groups(),
         "spanOpDefaults": span_op_defaults(),
     }
diff --git a/src/sentry/tasks/ai_agent_monitoring.py b/src/sentry/tasks/ai_agent_monitoring.py
index f4f0bd2898568c..fca779eba9a3f1 100644
--- a/src/sentry/tasks/ai_agent_monitoring.py
+++ b/src/sentry/tasks/ai_agent_monitoring.py
@@ -8,8 +8,13 @@
 from sentry.relay.config.ai_model_costs import (
     AI_MODEL_COSTS_CACHE_KEY,
     AI_MODEL_COSTS_CACHE_TTL,
+    AI_MODEL_METADATA_CACHE_KEY,
+    AI_MODEL_METADATA_CACHE_TTL,
+    AIModelCost,
     AIModelCosts,
     AIModelCostV2,
+    AIModelMetadata,
+    AIModelMetadataConfig,
     ModelId,
 )
 from sentry.silo.base import SiloMode
@@ -329,3 +334,243 @@ def safe_float_conversion(value: Any) -> float:
             return 0.0
 
     return 0.0
+
+
+# ---------------------------------------------------------------------------
+# New task: fetch_ai_model_metadata
+# Fetches model costs + context size into the new AIModelMetadata format.
+# Runs alongside fetch_ai_model_costs during the migration period.
+# ---------------------------------------------------------------------------
+
+
+def _add_glob_model_names_metadata(models_dict: dict[ModelId, AIModelMetadata]) -> None:
+    """Add glob versions of model names to the metadata models dictionary."""
+    model_ids = list(models_dict.keys())
+
+    for model_id in model_ids:
+        normalized_model_id = _normalize_model_id(model_id)
+        if normalized_model_id != model_id and normalized_model_id not in models_dict:
+            models_dict[normalized_model_id] = models_dict[model_id]
+
+        prefix_glob_name = _create_prefix_glob_model_name(normalized_model_id)
+        if prefix_glob_name not in models_dict:
+            models_dict[prefix_glob_name] = models_dict[normalized_model_id]
+
+
+@instrumented_task(
+    name="sentry.tasks.ai_agent_monitoring.fetch_ai_model_metadata",
+    namespace=ai_agent_monitoring_tasks,
+    processing_deadline_duration=35,
+    expires=30,
+    silo_mode=SiloMode.CELL,
+)
+def fetch_ai_model_metadata() -> None:
+    """
+    Fetch LLM model metadata (costs, context size) from OpenRouter and models.dev APIs
+    and store them in cache.
+
+    This task fetches model pricing and context size data from both sources and
+    converts it to the AIModelMetadata format.
+    OpenRouter data takes precedence over models.dev data.
+    """
+    if settings.SENTRY_AIR_GAP:
+        return
+
+    models_dict: dict[ModelId, AIModelMetadata] = {}
+
+    # Fetch from OpenRouter API (takes precedence)
+    try:
+        openrouter_models = _fetch_openrouter_models_metadata()
+        models_dict.update(openrouter_models)
+    except Exception as e:
+        logger.warning(
+            "Failed to fetch LLM model metadata from OpenRouter API", extra={"error": str(e)}
+        )
+        raise
+
+    # Fetch from models.dev API (only add models not already present)
+    try:
+        models_dev_models = _fetch_models_dev_models_metadata()
+        for model_id, model_metadata in models_dev_models.items():
+            if model_id not in models_dict:
+                models_dict[model_id] = model_metadata
+    except Exception as e:
+        logger.warning(
+            "Failed to fetch LLM model metadata from models.dev API", extra={"error": str(e)}
+        )
+        raise
+
+    # Add glob versions of model names for flexible matching
+    _add_glob_model_names_metadata(models_dict)
+
+    metadata_config: AIModelMetadataConfig = {"version": 1, "models": models_dict}
+    cache.set(AI_MODEL_METADATA_CACHE_KEY, metadata_config, AI_MODEL_METADATA_CACHE_TTL)
+
+
+def _fetch_openrouter_models_metadata() -> dict[ModelId, AIModelMetadata]:
+    """Fetch model metadata from OpenRouter API.
+
+    Example response:
+    {
+        "data": [
+            {
+                "id": "openai/gpt-4o-mini",
+                "name": "OpenAI: GPT-4o Mini",
+                "context_length": 1000000,
+                "pricing": {
+                    "prompt": "0.0000003",
+                    "completion": "0.00000165",
+                    "internal_reasoning": "0.0000003",
+                    "input_cache_read": "0.0000003",
+                    "input_cache_write": "0.00000125",
+                },
+            },
+        ]
+    }
+    """
+    response = safe_urlopen(OPENROUTER_MODELS_API_URL)
+    response.raise_for_status()
+
+    data = response.json()
+
+    if not isinstance(data, dict) or "data" not in data:
+        raise ValueError("Invalid OpenRouter response format: missing 'data' field")
+
+    models_data = data["data"]
+    if not isinstance(models_data, list):
+        raise ValueError("Invalid OpenRouter response format: 'data' field is not a list")
+
+    models_dict: dict[ModelId, AIModelMetadata] = {}
+
+    for model_data in models_data:
+        if not isinstance(model_data, dict):
+            continue
+
+        model_id = model_data.get("id")
+        if not model_id:
+            continue
+
+        # OpenRouter includes provider name in the model ID, e.g. openai/gpt-4o-mini
+        # We need to extract the model name, since our SDKs only send the model name
+        # (e.g. gpt-4o-mini)
+        if "/" in model_id:
+            model_id = model_id.split("/", maxsplit=1)[1]
+
+        pricing = model_data.get("pricing", {})
+
+        # OpenRouter provides costs as strings, we need to convert to float
+        try:
+            model_cost = AIModelCost(
+                inputPerToken=safe_float_conversion(pricing.get("prompt")),
+                outputPerToken=safe_float_conversion(pricing.get("completion")),
+                outputReasoningPerToken=safe_float_conversion(pricing.get("internal_reasoning")),
+                inputCachedPerToken=safe_float_conversion(pricing.get("input_cache_read")),
+                inputCacheWritePerToken=safe_float_conversion(pricing.get("input_cache_write")),
+            )
+
+            metadata = AIModelMetadata(costs=model_cost)
+
+            context_length = model_data.get("context_length")
+            if isinstance(context_length, int) and context_length > 0:
+                metadata["contextSize"] = context_length
+
+            models_dict[model_id] = metadata
+
+        except (ValueError, TypeError) as e:
+            logger.warning(
+                "fetch_ai_model_metadata.openrouter_model_parse_error",
+                extra={"model_id": model_id, "error": str(e)},
+            )
+            continue
+
+    return models_dict
+
+
+def _fetch_models_dev_models_metadata() -> dict[ModelId, AIModelMetadata]:
+    """Fetch model metadata from models.dev API.
+
+    Example response:
+    {
+        "openai": {
+            "models": {
+                "gpt-4": {
+                    "cost": {
+                        "input": 0.0000003,
+                        "output": 0.00000165,
+                        "cache_read": 0.0000003,
+                        "cache_write": 0.00000125,
+                    },
+                    "limit": {
+                        "context": 128000,
+                        "output": 16384,
+                    }
+                }
+            }
+        }
+    }
+    """
+    response = safe_urlopen(MODELS_DEV_API_URL)
+    response.raise_for_status()
+
+    data = response.json()
+
+    if not isinstance(data, dict):
+        raise ValueError("Invalid models.dev response format: expected dict")
+
+    models_dict: dict[ModelId, AIModelMetadata] = {}
+
+    for provider_name, provider_data in data.items():
+        if not isinstance(provider_data, dict):
+            continue
+
+        models = provider_data.get("models", {})
+        if not isinstance(models, dict):
+            continue
+
+        for model_id, model_data in models.items():
+            if not isinstance(model_data, dict):
+                continue
+
+            cost_data = model_data.get("cost", {})
+            if not isinstance(cost_data, dict) or not cost_data:
+                # Skip models with no cost data or empty cost data
+                continue
+
+            # models.dev may include provider name in the model ID, e.g. google/gemini-2.0-flash-001
+            # We need to extract the model name, since our SDKs only send the model name
+            # (e.g. gemini-2.0-flash-001)
+            if "/" in model_id:
+                model_id = model_id.split("/", maxsplit=1)[1]
+
+            # models.dev provides costs as numbers, but for extra safety convert to our format
+            try:
+                model_cost = AIModelCost(
+                    inputPerToken=safe_float_conversion(cost_data.get("input"))
+                    / 1000000,  # models.dev have prices per 1M tokens
+                    outputPerToken=safe_float_conversion(cost_data.get("output"))
+                    / 1000000,  # models.dev have price per 1M tokens
+                    outputReasoningPerToken=0.0,  # models.dev doesn't provide reasoning costs
+                    inputCachedPerToken=safe_float_conversion(cost_data.get("cache_read"))
+                    / 1000000,  # models.dev have price per 1M tokens
+                    inputCacheWritePerToken=safe_float_conversion(cost_data.get("cache_write"))
+                    / 1000000,  # models.dev have price per 1M tokens
+                )
+
+                metadata = AIModelMetadata(costs=model_cost)
+
+                limit_data = model_data.get("limit", {})
+                if isinstance(limit_data, dict):
+                    context_size = limit_data.get("context")
+                    if isinstance(context_size, int) and context_size > 0:
+                        metadata["contextSize"] = context_size
+
+                models_dict[model_id] = metadata
+
+            except (ValueError, TypeError) as e:
+                logger.warning(
+                    "fetch_ai_model_metadata.models_dev_model_parse_error",
+                    extra={"model_id": model_id, "provider": provider_name, "error": str(e)},
+                )
+                continue
+
+    return models_dict
diff --git a/tests/sentry/tasks/test_ai_agent_monitoring.py b/tests/sentry/tasks/test_ai_agent_monitoring.py
index f3740df344b4c4..9b212ec9a4b672 100644
--- a/tests/sentry/tasks/test_ai_agent_monitoring.py
+++ b/tests/sentry/tasks/test_ai_agent_monitoring.py
@@ -1,19 +1,31 @@
 import pytest
 import responses
 
-from sentry.relay.config.ai_model_costs import AI_MODEL_COSTS_CACHE_KEY, AIModelCosts
+from sentry.relay.config.ai_model_costs import (
+    AI_MODEL_COSTS_CACHE_KEY,
+    AI_MODEL_METADATA_CACHE_KEY,
+    AIModelMetadataConfig,
+)
 from sentry.tasks.ai_agent_monitoring import (
     MODELS_DEV_API_URL,
     OPENROUTER_MODELS_API_URL,
     fetch_ai_model_costs,
+    fetch_ai_model_metadata,
 )
 from sentry.testutils.cases import TestCase
 from sentry.utils.cache import cache
 
 
-def _get_ai_model_costs_from_cache() -> AIModelCosts | None:
+def _get_metadata_from_cache() -> AIModelMetadataConfig | None:
+    """
+    Utility function to retrieve LLM model metadata from cache.
     """
-    Utility function to retrieve AI model costs from cache.
+    return cache.get(AI_MODEL_METADATA_CACHE_KEY)
+
+
+def _get_legacy_costs_from_cache():
+    """
+    Utility function to retrieve legacy AI model costs from cache.
     """
     return cache.get(AI_MODEL_COSTS_CACHE_KEY)
 
@@ -121,7 +133,11 @@ def _get_ai_model_costs_from_cache() -> AIModelCosts | None:
                     "output": 1.6 * 1000000,  # models.dev have prices per 1M tokens
                     "cache_read": 0.1 * 1000000,  # models.dev have prices per 1M tokens
                     "cache_write": 0.2 * 1000000,  # models.dev have prices per 1M tokens
-                }
+                },
+                "limit": {
+                    "context": 1048576,
+                    "output": 32768,
+                },
             },
             "gpt-4": {  # This should be skipped since it exists in OpenRouter
                 "cost": {
@@ -129,7 +145,11 @@ def _get_ai_model_costs_from_cache() -> AIModelCosts | None:
                     "output": 0.2 * 1000000,  # models.dev have prices per 1M tokens
                     "cache_read": 0.05 * 1000000,  # models.dev have prices per 1M tokens
                     "cache_write": 0.15 * 1000000,  # models.dev have prices per 1M tokens
-                }
+                },
+                "limit": {
+                    "context": 8192,
+                    "output": 4096,
+                },
             },
         }
     },
@@ -141,7 +161,11 @@ def _get_ai_model_costs_from_cache() -> AIModelCosts | None:
                     "output": 10 * 1000000,  # models.dev have prices per 1M tokens
                     "cache_read": 0.31 * 1000000,  # models.dev have prices per 1M tokens
                     "cache_write": 0.62 * 1000000,  # models.dev have prices per 1M tokens
-                }
+                },
+                "limit": {
+                    "context": 1048576,
+                    "output": 65536,
+                },
             },
             "google/gemini-2.0-flash-001": {  # Test provider prefix stripping
                 "cost": {
@@ -149,7 +173,11 @@ def _get_ai_model_costs_from_cache() -> AIModelCosts | None:
                     "output": 0.3 * 1000000,  # models.dev have prices per 1M tokens
                     "cache_read": 0.01875 * 1000000,  # models.dev have prices per 1M tokens
                     "cache_write": 0.0375 * 1000000,  # models.dev have prices per 1M tokens
-                }
+                },
+                "limit": {
+                    "context": 1048576,
+                    "output": 8192,
+                },
             },
         }
     },
@@ -157,10 +185,11 @@ def _get_ai_model_costs_from_cache() -> AIModelCosts | None:
 }
 
 
-class FetchAIModelCostsTest(TestCase):
+class FetchAIModelMetadataTest(TestCase):
     def setUp(self) -> None:
         super().setUp()
         # Clear cache before each test
+        cache.delete(AI_MODEL_METADATA_CACHE_KEY)
         cache.delete(AI_MODEL_COSTS_CACHE_KEY)
 
     def _mock_openrouter_api_response(self, mock_response: dict):
@@ -180,100 +209,105 @@ def _mock_models_dev_api_response(self, mock_response: dict):
         )
 
     @responses.activate
-    def test_fetch_ai_model_costs_success_both_apis(self) -> None:
+    def test_fetch_ai_model_metadata_success_both_apis(self) -> None:
         """Test successful fetching and caching from both APIs"""
         self._mock_openrouter_api_response(MOCK_OPENROUTER_API_RESPONSE)
         self._mock_models_dev_api_response(MOCK_MODELS_DEV_API_RESPONSE)
 
-        fetch_ai_model_costs()
+        fetch_ai_model_metadata()
 
         # Verify the data was cached correctly
-        cached_data = _get_ai_model_costs_from_cache()
+        cached_data = _get_metadata_from_cache()
         assert cached_data is not None
-        assert cached_data.get("version") == 2
-        assert cached_data.get("costs") is None
+        assert cached_data.get("version") == 1
         assert cached_data.get("models") is not None
 
         models = cached_data.get("models")
         assert models is not None
 
         # Check OpenRouter models
-        gpt4_model = models["gpt-4"]
-        assert gpt4_model.get("inputPerToken") == 0.0000003  # OpenRouter price, not models.dev
-        assert gpt4_model.get("outputPerToken") == 0.00000165
-        assert gpt4_model.get("outputReasoningPerToken") == 0.0
-        assert gpt4_model.get("inputCachedPerToken") == 0.0000015
-        assert gpt4_model.get("inputCacheWritePerToken") == 0.00001875
-
-        gpt5_model = models["gpt-5"]
-        assert gpt5_model.get("inputPerToken") == 0.00000055
-        assert gpt5_model.get("outputPerToken") == 0.0000022
-        assert gpt5_model.get("outputReasoningPerToken") == 0.00000055
-        assert gpt5_model.get("inputCachedPerToken") == 0.00000055
-        assert gpt5_model.get("inputCacheWritePerToken") == 0.000006875
+        gpt4 = models["gpt-4"]
+        assert gpt4["costs"]["inputPerToken"] == 0.0000003  # OpenRouter price, not models.dev
+        assert gpt4["costs"]["outputPerToken"] == 0.00000165
+        assert gpt4["costs"]["outputReasoningPerToken"] == 0.0
+        assert gpt4["costs"]["inputCachedPerToken"] == 0.0000015
+        assert gpt4["costs"]["inputCacheWritePerToken"] == 0.00001875
+        assert gpt4.get("contextSize") == 1000000  # OpenRouter context_length
+
+        gpt5 = models["gpt-5"]
+        assert gpt5["costs"]["inputPerToken"] == 0.00000055
+        assert gpt5["costs"]["outputPerToken"] == 0.0000022
+        assert gpt5["costs"]["outputReasoningPerToken"] == 0.00000055
+        assert gpt5["costs"]["inputCachedPerToken"] == 0.00000055
+        assert gpt5["costs"]["inputCacheWritePerToken"] == 0.000006875
+        assert gpt5.get("contextSize") == 128000  # OpenRouter context_length
 
         # Check models.dev models
-        gpt41_mini_model = models["gpt-4.1-mini"]
-        assert gpt41_mini_model.get("inputPerToken") == 0.4
-        assert gpt41_mini_model.get("outputPerToken") == 1.6
+        gpt41_mini = models["gpt-4.1-mini"]
+        assert gpt41_mini["costs"]["inputPerToken"] == 0.4
+        assert gpt41_mini["costs"]["outputPerToken"] == 1.6
         assert (
-            gpt41_mini_model.get("outputReasoningPerToken") == 0.0
+            gpt41_mini["costs"]["outputReasoningPerToken"] == 0.0
         )  # models.dev doesn't provide this
-        assert gpt41_mini_model.get("inputCachedPerToken") == 0.1
-        assert gpt41_mini_model.get("inputCacheWritePerToken") == 0.2
-
-        gemini_model = models["gemini-2.5-pro"]
-        assert gemini_model.get("inputPerToken") == 1.25
-        assert gemini_model.get("outputPerToken") == 10
-        assert gemini_model.get("outputReasoningPerToken") == 0.0
-        assert gemini_model.get("inputCachedPerToken") == 0.31
-        assert gemini_model.get("inputCacheWritePerToken") == 0.62
+        assert gpt41_mini["costs"]["inputCachedPerToken"] == 0.1
+        assert gpt41_mini["costs"]["inputCacheWritePerToken"] == 0.2
+        assert gpt41_mini.get("contextSize") == 1048576  # models.dev limit.context
+
+        gemini = models["gemini-2.5-pro"]
+        assert gemini["costs"]["inputPerToken"] == 1.25
+        assert gemini["costs"]["outputPerToken"] == 10
+        assert gemini["costs"]["outputReasoningPerToken"] == 0.0
+        assert gemini["costs"]["inputCachedPerToken"] == 0.31
+        assert gemini["costs"]["inputCacheWritePerToken"] == 0.62
+        assert gemini.get("contextSize") == 1048576  # models.dev limit.context
 
         # Check models.dev model with provider prefix (should be stripped)
-        gemini_flash_model = models["gemini-2.0-flash-001"]
-        assert gemini_flash_model.get("inputPerToken") == 0.075
-        assert gemini_flash_model.get("outputPerToken") == 0.3
-        assert gemini_flash_model.get("outputReasoningPerToken") == 0.0
-        assert gemini_flash_model.get("inputCachedPerToken") == 0.01875
-        assert gemini_flash_model.get("inputCacheWritePerToken") == 0.0375
+        gemini_flash = models["gemini-2.0-flash-001"]
+        assert gemini_flash["costs"]["inputPerToken"] == 0.075
+        assert gemini_flash["costs"]["outputPerToken"] == 0.3
+        assert gemini_flash["costs"]["outputReasoningPerToken"] == 0.0
+        assert gemini_flash["costs"]["inputCachedPerToken"] == 0.01875
+        assert gemini_flash["costs"]["inputCacheWritePerToken"] == 0.0375
+        assert gemini_flash.get("contextSize") == 1048576  # models.dev limit.context
 
     @responses.activate
-    def test_fetch_ai_model_costs_success_openrouter_only(self) -> None:
+    def test_fetch_ai_model_metadata_success_openrouter_only(self) -> None:
         """Test successful fetching when only OpenRouter succeeds"""
         self._mock_openrouter_api_response(MOCK_OPENROUTER_API_RESPONSE)
         # Also mock models.dev to return empty response to avoid real network call
         self._mock_models_dev_api_response({})
 
-        fetch_ai_model_costs()
+        fetch_ai_model_metadata()
 
         # Verify the data was cached correctly
-        cached_data = _get_ai_model_costs_from_cache()
+        cached_data = _get_metadata_from_cache()
         assert cached_data is not None
-        assert cached_data.get("version") == 2
-        assert cached_data.get("costs") is None
+        assert cached_data.get("version") == 1
         assert cached_data.get("models") is not None
 
         models = cached_data.get("models")
         assert models is not None
 
         # Check first model with cache pricing
-        gpt4_model = models["gpt-4"]
-        assert gpt4_model.get("inputPerToken") == 0.0000003
-        assert gpt4_model.get("outputPerToken") == 0.00000165
-        assert gpt4_model.get("outputReasoningPerToken") == 0.0
-        assert gpt4_model.get("inputCachedPerToken") == 0.0000015
-        assert gpt4_model.get("inputCacheWritePerToken") == 0.00001875
+        gpt4 = models["gpt-4"]
+        assert gpt4["costs"]["inputPerToken"] == 0.0000003
+        assert gpt4["costs"]["outputPerToken"] == 0.00000165
+        assert gpt4["costs"]["outputReasoningPerToken"] == 0.0
+        assert gpt4["costs"]["inputCachedPerToken"] == 0.0000015
+        assert gpt4["costs"]["inputCacheWritePerToken"] == 0.00001875
+        assert gpt4.get("contextSize") == 1000000
 
         # Check second model with all pricing fields
-        gpt5_model = models["gpt-5"]
-        assert gpt5_model.get("inputPerToken") == 0.00000055
-        assert gpt5_model.get("outputPerToken") == 0.0000022
-        assert gpt5_model.get("outputReasoningPerToken") == 0.00000055
-        assert gpt5_model.get("inputCachedPerToken") == 0.00000055
-        assert gpt5_model.get("inputCacheWritePerToken") == 0.000006875
+        gpt5 = models["gpt-5"]
+        assert gpt5["costs"]["inputPerToken"] == 0.00000055
+        assert gpt5["costs"]["outputPerToken"] == 0.0000022
+        assert gpt5["costs"]["outputReasoningPerToken"] == 0.00000055
+        assert gpt5["costs"]["inputCachedPerToken"] == 0.00000055
+        assert gpt5["costs"]["inputCacheWritePerToken"] == 0.000006875
+        assert gpt5.get("contextSize") == 128000
 
     @responses.activate
-    def test_fetch_ai_model_costs_missing_pricing(self) -> None:
+    def test_fetch_ai_model_metadata_missing_pricing(self) -> None:
         """Test handling of models with missing pricing data"""
         mock_openrouter_response = {
             "data": [
@@ -317,46 +351,48 @@ def test_fetch_ai_model_costs_missing_pricing(self) -> None:
         self._mock_openrouter_api_response(mock_openrouter_response)
         self._mock_models_dev_api_response(mock_models_dev_response)
 
-        fetch_ai_model_costs()
+        fetch_ai_model_metadata()
 
         # Verify only valid models are cached
-        cached_data = _get_ai_model_costs_from_cache()
+        cached_data = _get_metadata_from_cache()
         assert cached_data is not None
         models = cached_data.get("models")
         assert models is not None
 
         # Check valid model
-        gpt4_model = models["gpt-4"]
-        assert gpt4_model.get("inputPerToken") == 0.03
-        assert gpt4_model.get("outputPerToken") == 0.06
-        assert gpt4_model.get("outputReasoningPerToken") == 0.0  # Missing should default to 0.0
-        assert gpt4_model.get("inputCachedPerToken") == 0.0
-        assert gpt4_model.get("inputCacheWritePerToken") == 0.0
+        gpt4 = models["gpt-4"]
+        assert gpt4["costs"]["inputPerToken"] == 0.03
+        assert gpt4["costs"]["outputPerToken"] == 0.06
+        assert gpt4["costs"]["outputReasoningPerToken"] == 0.0  # Missing should default to 0.0
+        assert gpt4["costs"]["inputCachedPerToken"] == 0.0
+        assert gpt4["costs"]["inputCacheWritePerToken"] == 0.0
+        assert "contextSize" not in gpt4  # No context_length in response
 
         # Check model with invalid pricing (should default to 0.0)
-        another_model = models["another-model"]
-        assert another_model.get("inputPerToken") == 0.0  # Invalid "invalid" -> 0.0
-        assert another_model.get("outputPerToken") == 0.02
-        assert another_model.get("inputCacheWritePerToken") == 0.0
+        another = models["another-model"]
+        assert another["costs"]["inputPerToken"] == 0.0  # Invalid "invalid" -> 0.0
+        assert another["costs"]["outputPerToken"] == 0.02
+        assert another["costs"]["inputCacheWritePerToken"] == 0.0
 
         # Check model with no pricing (should default to 0.0)
-        no_pricing_model = models["no-pricing-model"]
-        assert no_pricing_model.get("inputPerToken") == 0.0
-        assert no_pricing_model.get("outputPerToken") == 0.0
-        assert no_pricing_model.get("outputReasoningPerToken") == 0.0
-        assert no_pricing_model.get("inputCachedPerToken") == 0.0
-        assert no_pricing_model.get("inputCacheWritePerToken") == 0.0
+        no_pricing = models["no-pricing-model"]
+        assert no_pricing["costs"]["inputPerToken"] == 0.0
+        assert no_pricing["costs"]["outputPerToken"] == 0.0
+        assert no_pricing["costs"]["outputReasoningPerToken"] == 0.0
+        assert no_pricing["costs"]["inputCachedPerToken"] == 0.0
+        assert no_pricing["costs"]["inputCacheWritePerToken"] == 0.0
 
         # Check models.dev model
-        models_dev_model = models["model-with-pricing"]
-        assert models_dev_model.get("inputPerToken") == 0.1
-        assert models_dev_model.get("outputPerToken") == 0.2
-        assert models_dev_model.get("outputReasoningPerToken") == 0.0
-        assert models_dev_model.get("inputCachedPerToken") == 0.0
-        assert models_dev_model.get("inputCacheWritePerToken") == 0.0
+        models_dev = models["model-with-pricing"]
+        assert models_dev["costs"]["inputPerToken"] == 0.1
+        assert models_dev["costs"]["outputPerToken"] == 0.2
+        assert models_dev["costs"]["outputReasoningPerToken"] == 0.0
+        assert "contextSize" not in models_dev  # No limit.context in response
+        assert models_dev["costs"]["inputCachedPerToken"] == 0.0
+        assert models_dev["costs"]["inputCacheWritePerToken"] == 0.0
 
     @responses.activate
-    def test_fetch_ai_model_costs_openrouter_invalid_response(self) -> None:
+    def test_fetch_ai_model_metadata_openrouter_invalid_response(self) -> None:
         """Test handling of invalid OpenRouter API response format"""
         # Invalid response - missing 'data' field
         mock_response = {"invalid": "response"}
@@ -364,14 +400,14 @@ def test_fetch_ai_model_costs_openrouter_invalid_response(self) -> None:
         self._mock_openrouter_api_response(mock_response)
 
         with pytest.raises(ValueError, match="Invalid OpenRouter response format"):
-            fetch_ai_model_costs()
+            fetch_ai_model_metadata()
 
         # Verify nothing was cached
-        cached_data = _get_ai_model_costs_from_cache()
+        cached_data = _get_metadata_from_cache()
         assert cached_data is None
 
     @responses.activate
-    def test_fetch_ai_model_costs_models_dev_invalid_response(self) -> None:
+    def test_fetch_ai_model_metadata_models_dev_invalid_response(self) -> None:
         """Test handling of invalid models.dev API response format"""
         # Valid OpenRouter response
         self._mock_openrouter_api_response(MOCK_OPENROUTER_API_RESPONSE)
@@ -385,14 +421,14 @@ def test_fetch_ai_model_costs_models_dev_invalid_response(self) -> None:
         )
 
         with pytest.raises(ValueError, match="Invalid models.dev response format"):
-            fetch_ai_model_costs()
+            fetch_ai_model_metadata()
 
         # Verify nothing was cached due to models.dev failure
-        cached_data = _get_ai_model_costs_from_cache()
+        cached_data = _get_metadata_from_cache()
         assert cached_data is None
 
     @responses.activate
-    def test_fetch_ai_model_costs_openrouter_http_error(self) -> None:
+    def test_fetch_ai_model_metadata_openrouter_http_error(self) -> None:
         """Test handling of OpenRouter HTTP errors"""
         responses.add(
             responses.GET,
@@ -401,14 +437,14 @@ def test_fetch_ai_model_costs_openrouter_http_error(self) -> None:
         )
 
         with pytest.raises(Exception):
-            fetch_ai_model_costs()
+            fetch_ai_model_metadata()
 
         # Verify nothing was cached
-        cached_data = _get_ai_model_costs_from_cache()
+        cached_data = _get_metadata_from_cache()
         assert cached_data is None
 
     @responses.activate
-    def test_fetch_ai_model_costs_models_dev_http_error(self) -> None:
+    def test_fetch_ai_model_metadata_models_dev_http_error(self) -> None:
         """Test handling of models.dev HTTP errors"""
         # Valid OpenRouter response
         self._mock_openrouter_api_response(MOCK_OPENROUTER_API_RESPONSE)
@@ -421,14 +457,14 @@ def test_fetch_ai_model_costs_models_dev_http_error(self) -> None:
         )
 
         with pytest.raises(Exception):
-            fetch_ai_model_costs()
+            fetch_ai_model_metadata()
 
         # Verify nothing was cached due to models.dev failure
-        cached_data = _get_ai_model_costs_from_cache()
+        cached_data = _get_metadata_from_cache()
         assert cached_data is None
 
     @responses.activate
-    def test_fetch_ai_model_costs_timeout(self) -> None:
+    def test_fetch_ai_model_metadata_timeout(self) -> None:
         """Test handling of request timeout"""
         import requests
 
@@ -439,19 +475,19 @@ def test_fetch_ai_model_costs_timeout(self) -> None:
         )
 
         with pytest.raises(requests.exceptions.Timeout):
-            fetch_ai_model_costs()
+            fetch_ai_model_metadata()
 
         # Verify nothing was cached
-        cached_data = _get_ai_model_costs_from_cache()
+        cached_data = _get_metadata_from_cache()
         assert cached_data is None
 
-    def test_get_ai_model_costs_from_cache_empty(self) -> None:
+    def test_get_metadata_from_cache_empty(self) -> None:
         """Test retrieving from empty cache"""
-        cached_data = _get_ai_model_costs_from_cache()
+        cached_data = _get_metadata_from_cache()
         assert cached_data is None
 
     @responses.activate
-    def test_fetch_ai_model_costs_with_normalized_and_prefix_glob_names(self) -> None:
+    def test_fetch_ai_model_metadata_with_normalized_and_prefix_glob_names(self) -> None:
         """Test that normalized and prefix glob versions of model names are added correctly"""
         # Mock responses with models that have dates/versions that should be normalized
         mock_openrouter_response = {
@@ -502,10 +538,10 @@ def test_fetch_ai_model_costs_with_normalized_and_prefix_glob_names(self) -> Non
         self._mock_openrouter_api_response(mock_openrouter_response)
         self._mock_models_dev_api_response(mock_models_dev_response)
 
-        fetch_ai_model_costs()
+        fetch_ai_model_metadata()
 
         # Verify the data was cached correctly
-        cached_data = _get_ai_model_costs_from_cache()
+        cached_data = _get_metadata_from_cache()
         assert cached_data is not None
         models = cached_data.get("models")
         assert models is not None
@@ -520,7 +556,7 @@ def test_fetch_ai_model_costs_with_normalized_and_prefix_glob_names(self) -> Non
         # Check normalized versions were added (dates/versions removed)
         assert "gpt-4o-mini" in models
         assert "claude-3-5-sonnet" in models
-        assert "claude-3-5-haiku" in models  # @ is not part of the date pattern
+        assert "claude-3-5-haiku" in models
         assert "o3-pro" in models
 
         # Check prefix glob versions of normalized models were added
@@ -533,50 +569,112 @@ def test_fetch_ai_model_costs_with_normalized_and_prefix_glob_names(self) -> Non
         # Verify normalized versions have same pricing as original models
         gpt4o_mini_original = models["gpt-4o-mini-20250522"]
         gpt4o_mini_normalized = models["gpt-4o-mini"]
-        assert gpt4o_mini_original.get("inputPerToken") == gpt4o_mini_normalized.get(
-            "inputPerToken"
+        assert (
+            gpt4o_mini_original["costs"]["inputPerToken"]
+            == gpt4o_mini_normalized["costs"]["inputPerToken"]
         )
-        assert gpt4o_mini_original.get("outputPerToken") == gpt4o_mini_normalized.get(
-            "outputPerToken"
+        assert (
+            gpt4o_mini_original["costs"]["outputPerToken"]
+            == gpt4o_mini_normalized["costs"]["outputPerToken"]
         )
 
         claude_sonnet_original = models["claude-3-5-sonnet-20241022"]
         claude_sonnet_normalized = models["claude-3-5-sonnet"]
-        assert claude_sonnet_original.get("inputPerToken") == claude_sonnet_normalized.get(
-            "inputPerToken"
+        assert (
+            claude_sonnet_original["costs"]["inputPerToken"]
+            == claude_sonnet_normalized["costs"]["inputPerToken"]
         )
-        assert claude_sonnet_original.get("outputPerToken") == claude_sonnet_normalized.get(
-            "outputPerToken"
+        assert (
+            claude_sonnet_original["costs"]["outputPerToken"]
+            == claude_sonnet_normalized["costs"]["outputPerToken"]
         )
 
         claude_haiku_original = models["claude-3-5-haiku@20241022"]
         claude_haiku_normalized = models["claude-3-5-haiku"]
-        assert claude_haiku_original.get("inputPerToken") == claude_haiku_normalized.get(
-            "inputPerToken"
+        assert (
+            claude_haiku_original["costs"]["inputPerToken"]
+            == claude_haiku_normalized["costs"]["inputPerToken"]
         )
-        assert claude_haiku_original.get("outputPerToken") == claude_haiku_normalized.get(
-            "outputPerToken"
+        assert (
+            claude_haiku_original["costs"]["outputPerToken"]
+            == claude_haiku_normalized["costs"]["outputPerToken"]
         )
 
         o3_pro_original = models["o3-pro-2025-06-10"]
         o3_pro_normalized = models["o3-pro"]
-        assert o3_pro_original.get("inputPerToken") == o3_pro_normalized.get("inputPerToken")
-        assert o3_pro_original.get("outputPerToken") == o3_pro_normalized.get("outputPerToken")
+        assert (
+            o3_pro_original["costs"]["inputPerToken"] == o3_pro_normalized["costs"]["inputPerToken"]
+        )
+        assert (
+            o3_pro_original["costs"]["outputPerToken"]
+            == o3_pro_normalized["costs"]["outputPerToken"]
+        )
 
         # Verify prefix glob versions have same pricing as normalized models
         gpt4_normalized = models["gpt-4"]
         gpt4_prefix_glob = models["*gpt-4"]
-        assert gpt4_normalized.get("inputPerToken") == gpt4_prefix_glob.get("inputPerToken")
-        assert gpt4_normalized.get("outputPerToken") == gpt4_prefix_glob.get("outputPerToken")
+        assert (
+            gpt4_normalized["costs"]["inputPerToken"] == gpt4_prefix_glob["costs"]["inputPerToken"]
+        )
+        assert (
+            gpt4_normalized["costs"]["outputPerToken"]
+            == gpt4_prefix_glob["costs"]["outputPerToken"]
+        )
 
         gpt4o_mini_prefix_glob = models["*gpt-4o-mini"]
-        assert gpt4o_mini_normalized.get("inputPerToken") == gpt4o_mini_prefix_glob.get(
-            "inputPerToken"
+        assert (
+            gpt4o_mini_normalized["costs"]["inputPerToken"]
+            == gpt4o_mini_prefix_glob["costs"]["inputPerToken"]
         )
-        assert gpt4o_mini_normalized.get("outputPerToken") == gpt4o_mini_prefix_glob.get(
-            "outputPerToken"
+        assert (
+            gpt4o_mini_normalized["costs"]["outputPerToken"]
+            == gpt4o_mini_prefix_glob["costs"]["outputPerToken"]
         )
 
+    @responses.activate
+    def test_fetch_ai_model_metadata_does_not_write_legacy_cache(self) -> None:
+        """Test that the new task only writes the new cache, not the legacy one"""
+        self._mock_openrouter_api_response(MOCK_OPENROUTER_API_RESPONSE)
+        self._mock_models_dev_api_response(MOCK_MODELS_DEV_API_RESPONSE)
+
+        fetch_ai_model_metadata()
+
+        # New cache should be populated
+        new_data = _get_metadata_from_cache()
+        assert new_data is not None
+        assert new_data.get("version") == 1
+
+        # Legacy cache should NOT be populated by this task
+        legacy_data = _get_legacy_costs_from_cache()
+        assert legacy_data is None
+
+    @responses.activate
+    def test_fetch_ai_model_costs_independent(self) -> None:
+        """Test that the legacy task writes only the legacy cache, independently"""
+        self._mock_openrouter_api_response(MOCK_OPENROUTER_API_RESPONSE)
+        self._mock_models_dev_api_response(MOCK_MODELS_DEV_API_RESPONSE)
+
+        fetch_ai_model_costs()
+
+        # Legacy cache should be populated
+        legacy_data = _get_legacy_costs_from_cache()
+        assert legacy_data is not None
+        assert legacy_data.get("version") == 2
+
+        legacy_models = legacy_data.get("models")
+        assert legacy_models is not None
+
+        # Legacy format: flat cost fields, no nested "costs", no contextSize
+        gpt4 = legacy_models["gpt-4"]
+        assert gpt4.get("inputPerToken") == 0.0000003
+        assert gpt4.get("outputPerToken") == 0.00000165
+        assert "costs" not in gpt4
+        assert "contextSize" not in gpt4
+
+        # New cache should NOT be populated by this task
+        new_data = _get_metadata_from_cache()
+        assert new_data is None
+
     def test_normalize_model_id(self) -> None:
         """Test model ID normalization with various date and version formats"""
         from sentry.tasks.ai_agent_monitoring import _normalize_model_id
diff --git a/uv.lock b/uv.lock
index 6484457d5c5d24..6252f2588e22a7 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.13"
 resolution-markers = [
     "(python_full_version >= '3.14' and sys_platform == 'darwin') or (python_full_version >= '3.14' and sys_platform == 'linux')",
@@ -2374,7 +2374,7 @@ requires-dist = [
     { name = "sentry-ophio", specifier = ">=1.1.3" },
     { name = "sentry-protos", specifier = ">=0.8.11" },
     { name = "sentry-redis-tools", specifier = ">=0.5.0" },
-    { name = "sentry-relay", specifier = ">=0.9.26" },
+    { name = "sentry-relay", specifier = ">=0.9.27" },
     { name = "sentry-sdk", extras = ["http2"], specifier = ">=2.47.0" },
     { name = "sentry-usage-accountant", specifier = ">=0.0.10" },
     { name = "setuptools", specifier = ">=70.0.0" },
@@ -2570,15 +2570,15 @@ wheels = [
 
 [[package]]
 name = "sentry-relay"
-version = "0.9.26"
+version = "0.9.27"
 source = { registry = "https://pypi.devinfra.sentry.io/simple" }
 dependencies = [
     { name = "milksnake", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]
 wheels = [
-    { url = "https://pypi.devinfra.sentry.io/wheels/sentry_relay-0.9.26-py2.py3-none-macosx_14_0_arm64.whl", hash = "sha256:6d02f4901526b0221afbb7bb7757a175f5edc001621a5f81445714a29152ff1f" },
-    { url = "https://pypi.devinfra.sentry.io/wheels/sentry_relay-0.9.26-py2.py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:00886a61dbc5d83941e95bc0d97b900e9b455e135a05553172474dd398112523" },
-    { url = "https://pypi.devinfra.sentry.io/wheels/sentry_relay-0.9.26-py2.py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:8b7e020e64c03905e8df28c9fafd808fbfae8d7a7c4a2bd067282d5bf8590da6" },
+    { url = "https://pypi.devinfra.sentry.io/wheels/sentry_relay-0.9.27-py2.py3-none-macosx_14_0_arm64.whl", hash = "sha256:ae370c69cc3699210e99f4a44b84e50291c15e455573cb708318d05859b6c7a8" },
+    { url = "https://pypi.devinfra.sentry.io/wheels/sentry_relay-0.9.27-py2.py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:5c56279d945cfc6b94f8dd4a42e10b6c0a73628e99e1f1cbb63111197f66bd96" },
+    { url = "https://pypi.devinfra.sentry.io/wheels/sentry_relay-0.9.27-py2.py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:6976c0b4d1e721700f8a84eafb25cc411e359c1d76ac41a0fec97677b893da43" },
 ]
 
 [[package]]