diff --git a/pyproject.toml b/pyproject.toml index 0f899185c505d7..ff60933a2d39fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -89,7 +89,7 @@ dependencies = [ "sentry-ophio>=1.1.3", "sentry-protos>=0.8.11", "sentry-redis-tools>=0.5.0", - "sentry-relay>=0.9.26", + "sentry-relay>=0.9.27", "sentry-sdk[http2]>=2.47.0", "sentry-usage-accountant>=0.0.10", # remove once there are no unmarked transitive dependencies on setuptools diff --git a/src/sentry/conf/server.py b/src/sentry/conf/server.py index 6bbb2650b790c8..82d6e0f2455637 100644 --- a/src/sentry/conf/server.py +++ b/src/sentry/conf/server.py @@ -1205,10 +1205,15 @@ def SOCIAL_AUTH_DEFAULT_USERNAME() -> str: "task": "relocation:sentry.relocation.transfer.find_relocation_transfer_region", "schedule": crontab("*/5", "*", "*", "*", "*"), }, + # TODO(constantinius): Remove fetch-ai-model-costs once all consumers have migrated to fetch-ai-model-metadata "fetch-ai-model-costs": { "task": "ai_agent_monitoring:sentry.tasks.ai_agent_monitoring.fetch_ai_model_costs", "schedule": crontab("*/30", "*", "*", "*", "*"), }, + "fetch-ai-model-metadata": { + "task": "ai_agent_monitoring:sentry.tasks.ai_agent_monitoring.fetch_ai_model_metadata", + "schedule": crontab("*/30", "*", "*", "*", "*"), + }, "llm-issue-detection": { "task": "issues:sentry.tasks.llm_issue_detection.run_llm_issue_detection", "schedule": crontab("0", "*", "*", "*", "*"), diff --git a/src/sentry/relay/config/ai_model_costs.py b/src/sentry/relay/config/ai_model_costs.py index 2acdd0075a0d0e..3f35c2f8dcdd14 100644 --- a/src/sentry/relay/config/ai_model_costs.py +++ b/src/sentry/relay/config/ai_model_costs.py @@ -11,13 +11,20 @@ type ModelId = str -# Cache key for storing AI model costs +# Legacy cache key for AI model costs (v2 flat format) +# TODO(constantinius): Remove once all consumers have migrated to AI_MODEL_METADATA_CACHE_KEY AI_MODEL_COSTS_CACHE_KEY = "ai-model-costs:v2" -# Cache timeout: 30 days (we re-fetch every 30 minutes, so this provides more than enough overlap) AI_MODEL_COSTS_CACHE_TTL = 30 * 24 * 60 * 60 +# Cache key for storing LLM model metadata (v1 nested format) +AI_MODEL_METADATA_CACHE_KEY = "ai-model-metadata:v1" +# Cache timeout: 30 days (we re-fetch every 30 minutes, so this provides more than enough overlap) +AI_MODEL_METADATA_CACHE_TTL = 30 * 24 * 60 * 60 + class AIModelCostV2(TypedDict): + """Legacy flat format. TODO(constantinius): Remove once all consumers have migrated.""" + inputPerToken: float outputPerToken: float outputReasoningPerToken: float @@ -26,18 +33,34 @@ class AIModelCostV2(TypedDict): class AIModelCosts(TypedDict): + """Legacy config type. TODO(constantinius): Remove once all consumers have migrated.""" + version: Required[int] models: Required[dict[ModelId, AIModelCostV2]] +class AIModelCost(TypedDict): + inputPerToken: float + outputPerToken: float + outputReasoningPerToken: float + inputCachedPerToken: float + inputCacheWritePerToken: float + + +class AIModelMetadata(TypedDict, total=False): + costs: Required[AIModelCost] + contextSize: int + + +class AIModelMetadataConfig(TypedDict): + version: Required[int] + models: Required[dict[ModelId, AIModelMetadata]] + + def ai_model_costs_config() -> AIModelCosts | None: """ - Get AI model costs configuration. - AI model costs are set in cache by a cron job, - if there are no costs, it should be investigated why. - - Returns: - AIModelCosts object containing cost information for AI models + Legacy: Get AI model costs configuration. + TODO(constantinius): Remove once all consumers have migrated to ai_model_metadata_config. """ if settings.SENTRY_AIR_GAP: return None @@ -47,7 +70,29 @@ def ai_model_costs_config() -> AIModelCosts | None: return cached_costs if not settings.IS_DEV: - # in dev environment, we don't want to log this logger.warning("Empty model costs") return None + + +def ai_model_metadata_config() -> AIModelMetadataConfig | None: + """ + Get LLM model metadata configuration. + LLM model metadata is set in cache by a cron job, + if there is no metadata, it should be investigated why. + + Returns: + AIModelMetadataConfig containing cost and context size information for LLM models + """ + if settings.SENTRY_AIR_GAP: + return None + + cached_metadata = cache.get(AI_MODEL_METADATA_CACHE_KEY) + if cached_metadata is not None: + return cached_metadata + + if not settings.IS_DEV: + # in dev environment, we don't want to log this + logger.warning("Empty LLM model metadata") + + return None diff --git a/src/sentry/relay/globalconfig.py b/src/sentry/relay/globalconfig.py index 8ee3b4b449ae33..35934957d1d36b 100644 --- a/src/sentry/relay/globalconfig.py +++ b/src/sentry/relay/globalconfig.py @@ -1,7 +1,12 @@ from typing import Any, TypedDict import sentry.options -from sentry.relay.config.ai_model_costs import AIModelCosts, ai_model_costs_config +from sentry.relay.config.ai_model_costs import ( + AIModelCosts, + AIModelMetadataConfig, + ai_model_costs_config, + ai_model_metadata_config, +) from sentry.relay.config.measurements import MeasurementsConfig, get_measurements_config from sentry.relay.config.metric_extraction import ( MetricExtractionGroups, @@ -39,7 +44,10 @@ class SpanOpDefaults(TypedDict): class GlobalConfig(TypedDict, total=False): measurements: MeasurementsConfig - aiModelCosts: AIModelCosts | None + aiModelCosts: ( + AIModelCosts | None + ) # TODO(constantinius): Remove once all consumers use aiModelMetadata + aiModelMetadata: AIModelMetadataConfig | None metricExtraction: MetricExtractionGroups filters: GenericFiltersConfig | None spanOpDefaults: SpanOpDefaults @@ -78,7 +86,8 @@ def get_global_config() -> GlobalConfig: global_config: GlobalConfig = { "measurements": get_measurements_config(), - "aiModelCosts": ai_model_costs_config(), + "aiModelCosts": ai_model_costs_config(), # TODO(constantinius): Remove once all consumers use aiModelMetadata + "aiModelMetadata": ai_model_metadata_config(), "metricExtraction": global_metric_extraction_groups(), "spanOpDefaults": span_op_defaults(), } diff --git a/src/sentry/tasks/ai_agent_monitoring.py b/src/sentry/tasks/ai_agent_monitoring.py index f4f0bd2898568c..fca779eba9a3f1 100644 --- a/src/sentry/tasks/ai_agent_monitoring.py +++ b/src/sentry/tasks/ai_agent_monitoring.py @@ -8,8 +8,13 @@ from sentry.relay.config.ai_model_costs import ( AI_MODEL_COSTS_CACHE_KEY, AI_MODEL_COSTS_CACHE_TTL, + AI_MODEL_METADATA_CACHE_KEY, + AI_MODEL_METADATA_CACHE_TTL, + AIModelCost, AIModelCosts, AIModelCostV2, + AIModelMetadata, + AIModelMetadataConfig, ModelId, ) from sentry.silo.base import SiloMode @@ -329,3 +334,243 @@ def safe_float_conversion(value: Any) -> float: return 0.0 return 0.0 + + +# --------------------------------------------------------------------------- +# New task: fetch_ai_model_metadata +# Fetches model costs + context size into the new AIModelMetadata format. +# Runs alongside fetch_ai_model_costs during the migration period. +# --------------------------------------------------------------------------- + + +def _add_glob_model_names_metadata(models_dict: dict[ModelId, AIModelMetadata]) -> None: + """Add glob versions of model names to the metadata models dictionary.""" + model_ids = list(models_dict.keys()) + + for model_id in model_ids: + normalized_model_id = _normalize_model_id(model_id) + if normalized_model_id != model_id and normalized_model_id not in models_dict: + models_dict[normalized_model_id] = models_dict[model_id] + + prefix_glob_name = _create_prefix_glob_model_name(normalized_model_id) + if prefix_glob_name not in models_dict: + models_dict[prefix_glob_name] = models_dict[normalized_model_id] + + +@instrumented_task( + name="sentry.tasks.ai_agent_monitoring.fetch_ai_model_metadata", + namespace=ai_agent_monitoring_tasks, + processing_deadline_duration=35, + expires=30, + silo_mode=SiloMode.CELL, +) +def fetch_ai_model_metadata() -> None: + """ + Fetch LLM model metadata (costs, context size) from OpenRouter and models.dev APIs + and store them in cache. + + This task fetches model pricing and context size data from both sources and + converts it to the AIModelMetadata format. + OpenRouter data takes precedence over models.dev data. + """ + if settings.SENTRY_AIR_GAP: + return + + models_dict: dict[ModelId, AIModelMetadata] = {} + + # Fetch from OpenRouter API (takes precedence) + try: + openrouter_models = _fetch_openrouter_models_metadata() + models_dict.update(openrouter_models) + except Exception as e: + logger.warning( + "Failed to fetch LLM model metadata from OpenRouter API", extra={"error": str(e)} + ) + raise + + # Fetch from models.dev API (only add models not already present) + try: + models_dev_models = _fetch_models_dev_models_metadata() + for model_id, model_metadata in models_dev_models.items(): + if model_id not in models_dict: + models_dict[model_id] = model_metadata + except Exception as e: + logger.warning( + "Failed to fetch LLM model metadata from models.dev API", extra={"error": str(e)} + ) + raise + + # Add glob versions of model names for flexible matching + _add_glob_model_names_metadata(models_dict) + + metadata_config: AIModelMetadataConfig = {"version": 1, "models": models_dict} + cache.set(AI_MODEL_METADATA_CACHE_KEY, metadata_config, AI_MODEL_METADATA_CACHE_TTL) + + +def _fetch_openrouter_models_metadata() -> dict[ModelId, AIModelMetadata]: + """Fetch model metadata from OpenRouter API. + + Example response: + { + "data": [ + { + "id": "openai/gpt-4o-mini", + "name": "OpenAI: GPT-4o Mini", + "context_length": 1000000, + "pricing": { + "prompt": "0.0000003", + "completion": "0.00000165", + "internal_reasoning": "0.0000003", + "input_cache_read": "0.0000003", + "input_cache_write": "0.00000125", + }, + }, + ] + } + """ + response = safe_urlopen(OPENROUTER_MODELS_API_URL) + response.raise_for_status() + + data = response.json() + + if not isinstance(data, dict) or "data" not in data: + raise ValueError("Invalid OpenRouter response format: missing 'data' field") + + models_data = data["data"] + if not isinstance(models_data, list): + raise ValueError("Invalid OpenRouter response format: 'data' field is not a list") + + models_dict: dict[ModelId, AIModelMetadata] = {} + + for model_data in models_data: + if not isinstance(model_data, dict): + continue + + model_id = model_data.get("id") + if not model_id: + continue + + # OpenRouter includes provider name in the model ID, e.g. openai/gpt-4o-mini + # We need to extract the model name, since our SDKs only send the model name + # (e.g. gpt-4o-mini) + if "/" in model_id: + model_id = model_id.split("/", maxsplit=1)[1] + + pricing = model_data.get("pricing", {}) + + # OpenRouter provides costs as strings, we need to convert to float + try: + model_cost = AIModelCost( + inputPerToken=safe_float_conversion(pricing.get("prompt")), + outputPerToken=safe_float_conversion(pricing.get("completion")), + outputReasoningPerToken=safe_float_conversion(pricing.get("internal_reasoning")), + inputCachedPerToken=safe_float_conversion(pricing.get("input_cache_read")), + inputCacheWritePerToken=safe_float_conversion(pricing.get("input_cache_write")), + ) + + metadata = AIModelMetadata(costs=model_cost) + + context_length = model_data.get("context_length") + if isinstance(context_length, int) and context_length > 0: + metadata["contextSize"] = context_length + + models_dict[model_id] = metadata + + except (ValueError, TypeError) as e: + logger.warning( + "fetch_ai_model_metadata.openrouter_model_parse_error", + extra={"model_id": model_id, "error": str(e)}, + ) + continue + + return models_dict + + +def _fetch_models_dev_models_metadata() -> dict[ModelId, AIModelMetadata]: + """Fetch model metadata from models.dev API. + + Example response: + { + "openai": { + "models": { + "gpt-4": { + "cost": { + "input": 0.0000003, + "output": 0.00000165, + "cache_read": 0.0000003, + "cache_write": 0.00000125, + }, + "limit": { + "context": 128000, + "output": 16384, + } + } + } + } + } + """ + response = safe_urlopen(MODELS_DEV_API_URL) + response.raise_for_status() + + data = response.json() + + if not isinstance(data, dict): + raise ValueError("Invalid models.dev response format: expected dict") + + models_dict: dict[ModelId, AIModelMetadata] = {} + + for provider_name, provider_data in data.items(): + if not isinstance(provider_data, dict): + continue + + models = provider_data.get("models", {}) + if not isinstance(models, dict): + continue + + for model_id, model_data in models.items(): + if not isinstance(model_data, dict): + continue + + cost_data = model_data.get("cost", {}) + if not isinstance(cost_data, dict) or not cost_data: + # Skip models with no cost data or empty cost data + continue + + # models.dev may include provider name in the model ID, e.g. google/gemini-2.0-flash-001 + # We need to extract the model name, since our SDKs only send the model name + # (e.g. gemini-2.0-flash-001) + if "/" in model_id: + model_id = model_id.split("/", maxsplit=1)[1] + + # models.dev provides costs as numbers, but for extra safety convert to our format + try: + model_cost = AIModelCost( + inputPerToken=safe_float_conversion(cost_data.get("input")) + / 1000000, # models.dev have prices per 1M tokens + outputPerToken=safe_float_conversion(cost_data.get("output")) + / 1000000, # models.dev have price per 1M tokens + outputReasoningPerToken=0.0, # models.dev doesn't provide reasoning costs + inputCachedPerToken=safe_float_conversion(cost_data.get("cache_read")) + / 1000000, # models.dev have price per 1M tokens + inputCacheWritePerToken=safe_float_conversion(cost_data.get("cache_write")) + / 1000000, # models.dev have price per 1M tokens + ) + + metadata = AIModelMetadata(costs=model_cost) + + limit_data = model_data.get("limit", {}) + if isinstance(limit_data, dict): + context_size = limit_data.get("context") + if isinstance(context_size, int) and context_size > 0: + metadata["contextSize"] = context_size + + models_dict[model_id] = metadata + + except (ValueError, TypeError) as e: + logger.warning( + "fetch_ai_model_metadata.models_dev_model_parse_error", + extra={"model_id": model_id, "provider": provider_name, "error": str(e)}, + ) + continue + + return models_dict diff --git a/tests/sentry/tasks/test_ai_agent_monitoring.py b/tests/sentry/tasks/test_ai_agent_monitoring.py index f3740df344b4c4..9b212ec9a4b672 100644 --- a/tests/sentry/tasks/test_ai_agent_monitoring.py +++ b/tests/sentry/tasks/test_ai_agent_monitoring.py @@ -1,19 +1,31 @@ import pytest import responses -from sentry.relay.config.ai_model_costs import AI_MODEL_COSTS_CACHE_KEY, AIModelCosts +from sentry.relay.config.ai_model_costs import ( + AI_MODEL_COSTS_CACHE_KEY, + AI_MODEL_METADATA_CACHE_KEY, + AIModelMetadataConfig, +) from sentry.tasks.ai_agent_monitoring import ( MODELS_DEV_API_URL, OPENROUTER_MODELS_API_URL, fetch_ai_model_costs, + fetch_ai_model_metadata, ) from sentry.testutils.cases import TestCase from sentry.utils.cache import cache -def _get_ai_model_costs_from_cache() -> AIModelCosts | None: +def _get_metadata_from_cache() -> AIModelMetadataConfig | None: + """ + Utility function to retrieve LLM model metadata from cache. """ - Utility function to retrieve AI model costs from cache. + return cache.get(AI_MODEL_METADATA_CACHE_KEY) + + +def _get_legacy_costs_from_cache(): + """ + Utility function to retrieve legacy AI model costs from cache. """ return cache.get(AI_MODEL_COSTS_CACHE_KEY) @@ -121,7 +133,11 @@ def _get_ai_model_costs_from_cache() -> AIModelCosts | None: "output": 1.6 * 1000000, # models.dev have prices per 1M tokens "cache_read": 0.1 * 1000000, # models.dev have prices per 1M tokens "cache_write": 0.2 * 1000000, # models.dev have prices per 1M tokens - } + }, + "limit": { + "context": 1048576, + "output": 32768, + }, }, "gpt-4": { # This should be skipped since it exists in OpenRouter "cost": { @@ -129,7 +145,11 @@ def _get_ai_model_costs_from_cache() -> AIModelCosts | None: "output": 0.2 * 1000000, # models.dev have prices per 1M tokens "cache_read": 0.05 * 1000000, # models.dev have prices per 1M tokens "cache_write": 0.15 * 1000000, # models.dev have prices per 1M tokens - } + }, + "limit": { + "context": 8192, + "output": 4096, + }, }, } }, @@ -141,7 +161,11 @@ def _get_ai_model_costs_from_cache() -> AIModelCosts | None: "output": 10 * 1000000, # models.dev have prices per 1M tokens "cache_read": 0.31 * 1000000, # models.dev have prices per 1M tokens "cache_write": 0.62 * 1000000, # models.dev have prices per 1M tokens - } + }, + "limit": { + "context": 1048576, + "output": 65536, + }, }, "google/gemini-2.0-flash-001": { # Test provider prefix stripping "cost": { @@ -149,7 +173,11 @@ def _get_ai_model_costs_from_cache() -> AIModelCosts | None: "output": 0.3 * 1000000, # models.dev have prices per 1M tokens "cache_read": 0.01875 * 1000000, # models.dev have prices per 1M tokens "cache_write": 0.0375 * 1000000, # models.dev have prices per 1M tokens - } + }, + "limit": { + "context": 1048576, + "output": 8192, + }, }, } }, @@ -157,10 +185,11 @@ def _get_ai_model_costs_from_cache() -> AIModelCosts | None: } -class FetchAIModelCostsTest(TestCase): +class FetchAIModelMetadataTest(TestCase): def setUp(self) -> None: super().setUp() # Clear cache before each test + cache.delete(AI_MODEL_METADATA_CACHE_KEY) cache.delete(AI_MODEL_COSTS_CACHE_KEY) def _mock_openrouter_api_response(self, mock_response: dict): @@ -180,100 +209,105 @@ def _mock_models_dev_api_response(self, mock_response: dict): ) @responses.activate - def test_fetch_ai_model_costs_success_both_apis(self) -> None: + def test_fetch_ai_model_metadata_success_both_apis(self) -> None: """Test successful fetching and caching from both APIs""" self._mock_openrouter_api_response(MOCK_OPENROUTER_API_RESPONSE) self._mock_models_dev_api_response(MOCK_MODELS_DEV_API_RESPONSE) - fetch_ai_model_costs() + fetch_ai_model_metadata() # Verify the data was cached correctly - cached_data = _get_ai_model_costs_from_cache() + cached_data = _get_metadata_from_cache() assert cached_data is not None - assert cached_data.get("version") == 2 - assert cached_data.get("costs") is None + assert cached_data.get("version") == 1 assert cached_data.get("models") is not None models = cached_data.get("models") assert models is not None # Check OpenRouter models - gpt4_model = models["gpt-4"] - assert gpt4_model.get("inputPerToken") == 0.0000003 # OpenRouter price, not models.dev - assert gpt4_model.get("outputPerToken") == 0.00000165 - assert gpt4_model.get("outputReasoningPerToken") == 0.0 - assert gpt4_model.get("inputCachedPerToken") == 0.0000015 - assert gpt4_model.get("inputCacheWritePerToken") == 0.00001875 - - gpt5_model = models["gpt-5"] - assert gpt5_model.get("inputPerToken") == 0.00000055 - assert gpt5_model.get("outputPerToken") == 0.0000022 - assert gpt5_model.get("outputReasoningPerToken") == 0.00000055 - assert gpt5_model.get("inputCachedPerToken") == 0.00000055 - assert gpt5_model.get("inputCacheWritePerToken") == 0.000006875 + gpt4 = models["gpt-4"] + assert gpt4["costs"]["inputPerToken"] == 0.0000003 # OpenRouter price, not models.dev + assert gpt4["costs"]["outputPerToken"] == 0.00000165 + assert gpt4["costs"]["outputReasoningPerToken"] == 0.0 + assert gpt4["costs"]["inputCachedPerToken"] == 0.0000015 + assert gpt4["costs"]["inputCacheWritePerToken"] == 0.00001875 + assert gpt4.get("contextSize") == 1000000 # OpenRouter context_length + + gpt5 = models["gpt-5"] + assert gpt5["costs"]["inputPerToken"] == 0.00000055 + assert gpt5["costs"]["outputPerToken"] == 0.0000022 + assert gpt5["costs"]["outputReasoningPerToken"] == 0.00000055 + assert gpt5["costs"]["inputCachedPerToken"] == 0.00000055 + assert gpt5["costs"]["inputCacheWritePerToken"] == 0.000006875 + assert gpt5.get("contextSize") == 128000 # OpenRouter context_length # Check models.dev models - gpt41_mini_model = models["gpt-4.1-mini"] - assert gpt41_mini_model.get("inputPerToken") == 0.4 - assert gpt41_mini_model.get("outputPerToken") == 1.6 + gpt41_mini = models["gpt-4.1-mini"] + assert gpt41_mini["costs"]["inputPerToken"] == 0.4 + assert gpt41_mini["costs"]["outputPerToken"] == 1.6 assert ( - gpt41_mini_model.get("outputReasoningPerToken") == 0.0 + gpt41_mini["costs"]["outputReasoningPerToken"] == 0.0 ) # models.dev doesn't provide this - assert gpt41_mini_model.get("inputCachedPerToken") == 0.1 - assert gpt41_mini_model.get("inputCacheWritePerToken") == 0.2 - - gemini_model = models["gemini-2.5-pro"] - assert gemini_model.get("inputPerToken") == 1.25 - assert gemini_model.get("outputPerToken") == 10 - assert gemini_model.get("outputReasoningPerToken") == 0.0 - assert gemini_model.get("inputCachedPerToken") == 0.31 - assert gemini_model.get("inputCacheWritePerToken") == 0.62 + assert gpt41_mini["costs"]["inputCachedPerToken"] == 0.1 + assert gpt41_mini["costs"]["inputCacheWritePerToken"] == 0.2 + assert gpt41_mini.get("contextSize") == 1048576 # models.dev limit.context + + gemini = models["gemini-2.5-pro"] + assert gemini["costs"]["inputPerToken"] == 1.25 + assert gemini["costs"]["outputPerToken"] == 10 + assert gemini["costs"]["outputReasoningPerToken"] == 0.0 + assert gemini["costs"]["inputCachedPerToken"] == 0.31 + assert gemini["costs"]["inputCacheWritePerToken"] == 0.62 + assert gemini.get("contextSize") == 1048576 # models.dev limit.context # Check models.dev model with provider prefix (should be stripped) - gemini_flash_model = models["gemini-2.0-flash-001"] - assert gemini_flash_model.get("inputPerToken") == 0.075 - assert gemini_flash_model.get("outputPerToken") == 0.3 - assert gemini_flash_model.get("outputReasoningPerToken") == 0.0 - assert gemini_flash_model.get("inputCachedPerToken") == 0.01875 - assert gemini_flash_model.get("inputCacheWritePerToken") == 0.0375 + gemini_flash = models["gemini-2.0-flash-001"] + assert gemini_flash["costs"]["inputPerToken"] == 0.075 + assert gemini_flash["costs"]["outputPerToken"] == 0.3 + assert gemini_flash["costs"]["outputReasoningPerToken"] == 0.0 + assert gemini_flash["costs"]["inputCachedPerToken"] == 0.01875 + assert gemini_flash["costs"]["inputCacheWritePerToken"] == 0.0375 + assert gemini_flash.get("contextSize") == 1048576 # models.dev limit.context @responses.activate - def test_fetch_ai_model_costs_success_openrouter_only(self) -> None: + def test_fetch_ai_model_metadata_success_openrouter_only(self) -> None: """Test successful fetching when only OpenRouter succeeds""" self._mock_openrouter_api_response(MOCK_OPENROUTER_API_RESPONSE) # Also mock models.dev to return empty response to avoid real network call self._mock_models_dev_api_response({}) - fetch_ai_model_costs() + fetch_ai_model_metadata() # Verify the data was cached correctly - cached_data = _get_ai_model_costs_from_cache() + cached_data = _get_metadata_from_cache() assert cached_data is not None - assert cached_data.get("version") == 2 - assert cached_data.get("costs") is None + assert cached_data.get("version") == 1 assert cached_data.get("models") is not None models = cached_data.get("models") assert models is not None # Check first model with cache pricing - gpt4_model = models["gpt-4"] - assert gpt4_model.get("inputPerToken") == 0.0000003 - assert gpt4_model.get("outputPerToken") == 0.00000165 - assert gpt4_model.get("outputReasoningPerToken") == 0.0 - assert gpt4_model.get("inputCachedPerToken") == 0.0000015 - assert gpt4_model.get("inputCacheWritePerToken") == 0.00001875 + gpt4 = models["gpt-4"] + assert gpt4["costs"]["inputPerToken"] == 0.0000003 + assert gpt4["costs"]["outputPerToken"] == 0.00000165 + assert gpt4["costs"]["outputReasoningPerToken"] == 0.0 + assert gpt4["costs"]["inputCachedPerToken"] == 0.0000015 + assert gpt4["costs"]["inputCacheWritePerToken"] == 0.00001875 + assert gpt4.get("contextSize") == 1000000 # Check second model with all pricing fields - gpt5_model = models["gpt-5"] - assert gpt5_model.get("inputPerToken") == 0.00000055 - assert gpt5_model.get("outputPerToken") == 0.0000022 - assert gpt5_model.get("outputReasoningPerToken") == 0.00000055 - assert gpt5_model.get("inputCachedPerToken") == 0.00000055 - assert gpt5_model.get("inputCacheWritePerToken") == 0.000006875 + gpt5 = models["gpt-5"] + assert gpt5["costs"]["inputPerToken"] == 0.00000055 + assert gpt5["costs"]["outputPerToken"] == 0.0000022 + assert gpt5["costs"]["outputReasoningPerToken"] == 0.00000055 + assert gpt5["costs"]["inputCachedPerToken"] == 0.00000055 + assert gpt5["costs"]["inputCacheWritePerToken"] == 0.000006875 + assert gpt5.get("contextSize") == 128000 @responses.activate - def test_fetch_ai_model_costs_missing_pricing(self) -> None: + def test_fetch_ai_model_metadata_missing_pricing(self) -> None: """Test handling of models with missing pricing data""" mock_openrouter_response = { "data": [ @@ -317,46 +351,48 @@ def test_fetch_ai_model_costs_missing_pricing(self) -> None: self._mock_openrouter_api_response(mock_openrouter_response) self._mock_models_dev_api_response(mock_models_dev_response) - fetch_ai_model_costs() + fetch_ai_model_metadata() # Verify only valid models are cached - cached_data = _get_ai_model_costs_from_cache() + cached_data = _get_metadata_from_cache() assert cached_data is not None models = cached_data.get("models") assert models is not None # Check valid model - gpt4_model = models["gpt-4"] - assert gpt4_model.get("inputPerToken") == 0.03 - assert gpt4_model.get("outputPerToken") == 0.06 - assert gpt4_model.get("outputReasoningPerToken") == 0.0 # Missing should default to 0.0 - assert gpt4_model.get("inputCachedPerToken") == 0.0 - assert gpt4_model.get("inputCacheWritePerToken") == 0.0 + gpt4 = models["gpt-4"] + assert gpt4["costs"]["inputPerToken"] == 0.03 + assert gpt4["costs"]["outputPerToken"] == 0.06 + assert gpt4["costs"]["outputReasoningPerToken"] == 0.0 # Missing should default to 0.0 + assert gpt4["costs"]["inputCachedPerToken"] == 0.0 + assert gpt4["costs"]["inputCacheWritePerToken"] == 0.0 + assert "contextSize" not in gpt4 # No context_length in response # Check model with invalid pricing (should default to 0.0) - another_model = models["another-model"] - assert another_model.get("inputPerToken") == 0.0 # Invalid "invalid" -> 0.0 - assert another_model.get("outputPerToken") == 0.02 - assert another_model.get("inputCacheWritePerToken") == 0.0 + another = models["another-model"] + assert another["costs"]["inputPerToken"] == 0.0 # Invalid "invalid" -> 0.0 + assert another["costs"]["outputPerToken"] == 0.02 + assert another["costs"]["inputCacheWritePerToken"] == 0.0 # Check model with no pricing (should default to 0.0) - no_pricing_model = models["no-pricing-model"] - assert no_pricing_model.get("inputPerToken") == 0.0 - assert no_pricing_model.get("outputPerToken") == 0.0 - assert no_pricing_model.get("outputReasoningPerToken") == 0.0 - assert no_pricing_model.get("inputCachedPerToken") == 0.0 - assert no_pricing_model.get("inputCacheWritePerToken") == 0.0 + no_pricing = models["no-pricing-model"] + assert no_pricing["costs"]["inputPerToken"] == 0.0 + assert no_pricing["costs"]["outputPerToken"] == 0.0 + assert no_pricing["costs"]["outputReasoningPerToken"] == 0.0 + assert no_pricing["costs"]["inputCachedPerToken"] == 0.0 + assert no_pricing["costs"]["inputCacheWritePerToken"] == 0.0 # Check models.dev model - models_dev_model = models["model-with-pricing"] - assert models_dev_model.get("inputPerToken") == 0.1 - assert models_dev_model.get("outputPerToken") == 0.2 - assert models_dev_model.get("outputReasoningPerToken") == 0.0 - assert models_dev_model.get("inputCachedPerToken") == 0.0 - assert models_dev_model.get("inputCacheWritePerToken") == 0.0 + models_dev = models["model-with-pricing"] + assert models_dev["costs"]["inputPerToken"] == 0.1 + assert models_dev["costs"]["outputPerToken"] == 0.2 + assert models_dev["costs"]["outputReasoningPerToken"] == 0.0 + assert "contextSize" not in models_dev # No limit.context in response + assert models_dev["costs"]["inputCachedPerToken"] == 0.0 + assert models_dev["costs"]["inputCacheWritePerToken"] == 0.0 @responses.activate - def test_fetch_ai_model_costs_openrouter_invalid_response(self) -> None: + def test_fetch_ai_model_metadata_openrouter_invalid_response(self) -> None: """Test handling of invalid OpenRouter API response format""" # Invalid response - missing 'data' field mock_response = {"invalid": "response"} @@ -364,14 +400,14 @@ def test_fetch_ai_model_costs_openrouter_invalid_response(self) -> None: self._mock_openrouter_api_response(mock_response) with pytest.raises(ValueError, match="Invalid OpenRouter response format"): - fetch_ai_model_costs() + fetch_ai_model_metadata() # Verify nothing was cached - cached_data = _get_ai_model_costs_from_cache() + cached_data = _get_metadata_from_cache() assert cached_data is None @responses.activate - def test_fetch_ai_model_costs_models_dev_invalid_response(self) -> None: + def test_fetch_ai_model_metadata_models_dev_invalid_response(self) -> None: """Test handling of invalid models.dev API response format""" # Valid OpenRouter response self._mock_openrouter_api_response(MOCK_OPENROUTER_API_RESPONSE) @@ -385,14 +421,14 @@ def test_fetch_ai_model_costs_models_dev_invalid_response(self) -> None: ) with pytest.raises(ValueError, match="Invalid models.dev response format"): - fetch_ai_model_costs() + fetch_ai_model_metadata() # Verify nothing was cached due to models.dev failure - cached_data = _get_ai_model_costs_from_cache() + cached_data = _get_metadata_from_cache() assert cached_data is None @responses.activate - def test_fetch_ai_model_costs_openrouter_http_error(self) -> None: + def test_fetch_ai_model_metadata_openrouter_http_error(self) -> None: """Test handling of OpenRouter HTTP errors""" responses.add( responses.GET, @@ -401,14 +437,14 @@ def test_fetch_ai_model_costs_openrouter_http_error(self) -> None: ) with pytest.raises(Exception): - fetch_ai_model_costs() + fetch_ai_model_metadata() # Verify nothing was cached - cached_data = _get_ai_model_costs_from_cache() + cached_data = _get_metadata_from_cache() assert cached_data is None @responses.activate - def test_fetch_ai_model_costs_models_dev_http_error(self) -> None: + def test_fetch_ai_model_metadata_models_dev_http_error(self) -> None: """Test handling of models.dev HTTP errors""" # Valid OpenRouter response self._mock_openrouter_api_response(MOCK_OPENROUTER_API_RESPONSE) @@ -421,14 +457,14 @@ def test_fetch_ai_model_costs_models_dev_http_error(self) -> None: ) with pytest.raises(Exception): - fetch_ai_model_costs() + fetch_ai_model_metadata() # Verify nothing was cached due to models.dev failure - cached_data = _get_ai_model_costs_from_cache() + cached_data = _get_metadata_from_cache() assert cached_data is None @responses.activate - def test_fetch_ai_model_costs_timeout(self) -> None: + def test_fetch_ai_model_metadata_timeout(self) -> None: """Test handling of request timeout""" import requests @@ -439,19 +475,19 @@ def test_fetch_ai_model_costs_timeout(self) -> None: ) with pytest.raises(requests.exceptions.Timeout): - fetch_ai_model_costs() + fetch_ai_model_metadata() # Verify nothing was cached - cached_data = _get_ai_model_costs_from_cache() + cached_data = _get_metadata_from_cache() assert cached_data is None - def test_get_ai_model_costs_from_cache_empty(self) -> None: + def test_get_metadata_from_cache_empty(self) -> None: """Test retrieving from empty cache""" - cached_data = _get_ai_model_costs_from_cache() + cached_data = _get_metadata_from_cache() assert cached_data is None @responses.activate - def test_fetch_ai_model_costs_with_normalized_and_prefix_glob_names(self) -> None: + def test_fetch_ai_model_metadata_with_normalized_and_prefix_glob_names(self) -> None: """Test that normalized and prefix glob versions of model names are added correctly""" # Mock responses with models that have dates/versions that should be normalized mock_openrouter_response = { @@ -502,10 +538,10 @@ def test_fetch_ai_model_costs_with_normalized_and_prefix_glob_names(self) -> Non self._mock_openrouter_api_response(mock_openrouter_response) self._mock_models_dev_api_response(mock_models_dev_response) - fetch_ai_model_costs() + fetch_ai_model_metadata() # Verify the data was cached correctly - cached_data = _get_ai_model_costs_from_cache() + cached_data = _get_metadata_from_cache() assert cached_data is not None models = cached_data.get("models") assert models is not None @@ -520,7 +556,7 @@ def test_fetch_ai_model_costs_with_normalized_and_prefix_glob_names(self) -> Non # Check normalized versions were added (dates/versions removed) assert "gpt-4o-mini" in models assert "claude-3-5-sonnet" in models - assert "claude-3-5-haiku" in models # @ is not part of the date pattern + assert "claude-3-5-haiku" in models assert "o3-pro" in models # Check prefix glob versions of normalized models were added @@ -533,50 +569,112 @@ def test_fetch_ai_model_costs_with_normalized_and_prefix_glob_names(self) -> Non # Verify normalized versions have same pricing as original models gpt4o_mini_original = models["gpt-4o-mini-20250522"] gpt4o_mini_normalized = models["gpt-4o-mini"] - assert gpt4o_mini_original.get("inputPerToken") == gpt4o_mini_normalized.get( - "inputPerToken" + assert ( + gpt4o_mini_original["costs"]["inputPerToken"] + == gpt4o_mini_normalized["costs"]["inputPerToken"] ) - assert gpt4o_mini_original.get("outputPerToken") == gpt4o_mini_normalized.get( - "outputPerToken" + assert ( + gpt4o_mini_original["costs"]["outputPerToken"] + == gpt4o_mini_normalized["costs"]["outputPerToken"] ) claude_sonnet_original = models["claude-3-5-sonnet-20241022"] claude_sonnet_normalized = models["claude-3-5-sonnet"] - assert claude_sonnet_original.get("inputPerToken") == claude_sonnet_normalized.get( - "inputPerToken" + assert ( + claude_sonnet_original["costs"]["inputPerToken"] + == claude_sonnet_normalized["costs"]["inputPerToken"] ) - assert claude_sonnet_original.get("outputPerToken") == claude_sonnet_normalized.get( - "outputPerToken" + assert ( + claude_sonnet_original["costs"]["outputPerToken"] + == claude_sonnet_normalized["costs"]["outputPerToken"] ) claude_haiku_original = models["claude-3-5-haiku@20241022"] claude_haiku_normalized = models["claude-3-5-haiku"] - assert claude_haiku_original.get("inputPerToken") == claude_haiku_normalized.get( - "inputPerToken" + assert ( + claude_haiku_original["costs"]["inputPerToken"] + == claude_haiku_normalized["costs"]["inputPerToken"] ) - assert claude_haiku_original.get("outputPerToken") == claude_haiku_normalized.get( - "outputPerToken" + assert ( + claude_haiku_original["costs"]["outputPerToken"] + == claude_haiku_normalized["costs"]["outputPerToken"] ) o3_pro_original = models["o3-pro-2025-06-10"] o3_pro_normalized = models["o3-pro"] - assert o3_pro_original.get("inputPerToken") == o3_pro_normalized.get("inputPerToken") - assert o3_pro_original.get("outputPerToken") == o3_pro_normalized.get("outputPerToken") + assert ( + o3_pro_original["costs"]["inputPerToken"] == o3_pro_normalized["costs"]["inputPerToken"] + ) + assert ( + o3_pro_original["costs"]["outputPerToken"] + == o3_pro_normalized["costs"]["outputPerToken"] + ) # Verify prefix glob versions have same pricing as normalized models gpt4_normalized = models["gpt-4"] gpt4_prefix_glob = models["*gpt-4"] - assert gpt4_normalized.get("inputPerToken") == gpt4_prefix_glob.get("inputPerToken") - assert gpt4_normalized.get("outputPerToken") == gpt4_prefix_glob.get("outputPerToken") + assert ( + gpt4_normalized["costs"]["inputPerToken"] == gpt4_prefix_glob["costs"]["inputPerToken"] + ) + assert ( + gpt4_normalized["costs"]["outputPerToken"] + == gpt4_prefix_glob["costs"]["outputPerToken"] + ) gpt4o_mini_prefix_glob = models["*gpt-4o-mini"] - assert gpt4o_mini_normalized.get("inputPerToken") == gpt4o_mini_prefix_glob.get( - "inputPerToken" + assert ( + gpt4o_mini_normalized["costs"]["inputPerToken"] + == gpt4o_mini_prefix_glob["costs"]["inputPerToken"] ) - assert gpt4o_mini_normalized.get("outputPerToken") == gpt4o_mini_prefix_glob.get( - "outputPerToken" + assert ( + gpt4o_mini_normalized["costs"]["outputPerToken"] + == gpt4o_mini_prefix_glob["costs"]["outputPerToken"] ) + @responses.activate + def test_fetch_ai_model_metadata_does_not_write_legacy_cache(self) -> None: + """Test that the new task only writes the new cache, not the legacy one""" + self._mock_openrouter_api_response(MOCK_OPENROUTER_API_RESPONSE) + self._mock_models_dev_api_response(MOCK_MODELS_DEV_API_RESPONSE) + + fetch_ai_model_metadata() + + # New cache should be populated + new_data = _get_metadata_from_cache() + assert new_data is not None + assert new_data.get("version") == 1 + + # Legacy cache should NOT be populated by this task + legacy_data = _get_legacy_costs_from_cache() + assert legacy_data is None + + @responses.activate + def test_fetch_ai_model_costs_independent(self) -> None: + """Test that the legacy task writes only the legacy cache, independently""" + self._mock_openrouter_api_response(MOCK_OPENROUTER_API_RESPONSE) + self._mock_models_dev_api_response(MOCK_MODELS_DEV_API_RESPONSE) + + fetch_ai_model_costs() + + # Legacy cache should be populated + legacy_data = _get_legacy_costs_from_cache() + assert legacy_data is not None + assert legacy_data.get("version") == 2 + + legacy_models = legacy_data.get("models") + assert legacy_models is not None + + # Legacy format: flat cost fields, no nested "costs", no contextSize + gpt4 = legacy_models["gpt-4"] + assert gpt4.get("inputPerToken") == 0.0000003 + assert gpt4.get("outputPerToken") == 0.00000165 + assert "costs" not in gpt4 + assert "contextSize" not in gpt4 + + # New cache should NOT be populated by this task + new_data = _get_metadata_from_cache() + assert new_data is None + def test_normalize_model_id(self) -> None: """Test model ID normalization with various date and version formats""" from sentry.tasks.ai_agent_monitoring import _normalize_model_id diff --git a/uv.lock b/uv.lock index 6484457d5c5d24..6252f2588e22a7 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.13" resolution-markers = [ "(python_full_version >= '3.14' and sys_platform == 'darwin') or (python_full_version >= '3.14' and sys_platform == 'linux')", @@ -2374,7 +2374,7 @@ requires-dist = [ { name = "sentry-ophio", specifier = ">=1.1.3" }, { name = "sentry-protos", specifier = ">=0.8.11" }, { name = "sentry-redis-tools", specifier = ">=0.5.0" }, - { name = "sentry-relay", specifier = ">=0.9.26" }, + { name = "sentry-relay", specifier = ">=0.9.27" }, { name = "sentry-sdk", extras = ["http2"], specifier = ">=2.47.0" }, { name = "sentry-usage-accountant", specifier = ">=0.0.10" }, { name = "setuptools", specifier = ">=70.0.0" }, @@ -2570,15 +2570,15 @@ wheels = [ [[package]] name = "sentry-relay" -version = "0.9.26" +version = "0.9.27" source = { registry = "https://pypi.devinfra.sentry.io/simple" } dependencies = [ { name = "milksnake", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, ] wheels = [ - { url = "https://pypi.devinfra.sentry.io/wheels/sentry_relay-0.9.26-py2.py3-none-macosx_14_0_arm64.whl", hash = "sha256:6d02f4901526b0221afbb7bb7757a175f5edc001621a5f81445714a29152ff1f" }, - { url = "https://pypi.devinfra.sentry.io/wheels/sentry_relay-0.9.26-py2.py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:00886a61dbc5d83941e95bc0d97b900e9b455e135a05553172474dd398112523" }, - { url = "https://pypi.devinfra.sentry.io/wheels/sentry_relay-0.9.26-py2.py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:8b7e020e64c03905e8df28c9fafd808fbfae8d7a7c4a2bd067282d5bf8590da6" }, + { url = "https://pypi.devinfra.sentry.io/wheels/sentry_relay-0.9.27-py2.py3-none-macosx_14_0_arm64.whl", hash = "sha256:ae370c69cc3699210e99f4a44b84e50291c15e455573cb708318d05859b6c7a8" }, + { url = "https://pypi.devinfra.sentry.io/wheels/sentry_relay-0.9.27-py2.py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:5c56279d945cfc6b94f8dd4a42e10b6c0a73628e99e1f1cbb63111197f66bd96" }, + { url = "https://pypi.devinfra.sentry.io/wheels/sentry_relay-0.9.27-py2.py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:6976c0b4d1e721700f8a84eafb25cc411e359c1d76ac41a0fec97677b893da43" }, ] [[package]]