BerriAI
diff --git a/‎litellm/cost_calculator.py‎
Lines changed: 35 additions & 2 deletions b/‎litellm/cost_calculator.py‎
Lines changed: 35 additions & 2 deletions
diff --git a/‎litellm/litellm_core_utils/get_litellm_params.py‎
Lines changed: 7 additions & 0 deletions b/‎litellm/litellm_core_utils/get_litellm_params.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎litellm/litellm_core_utils/litellm_logging.py‎
Lines changed: 5 additions & 0 deletions b/‎litellm/litellm_core_utils/litellm_logging.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎litellm/litellm_core_utils/llm_cost_calc/utils.py‎
Lines changed: 46 additions & 0 deletions b/‎litellm/litellm_core_utils/llm_cost_calc/utils.py‎
Lines changed: 46 additions & 0 deletions
diff --git a/‎litellm/llms/openai/cost_calculation.py‎
Lines changed: 8 additions & 1 deletion b/‎litellm/llms/openai/cost_calculation.py‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎litellm/llms/openai/data_residency.py‎
Lines changed: 41 additions & 0 deletions b/‎litellm/llms/openai/data_residency.py‎
Lines changed: 41 additions & 0 deletions
@@ -24,6 +24,7 @@
 from litellm.litellm_core_utils.llm_cost_calc.utils import (
     CostCalculatorUtils,
     _generic_cost_per_character,
+    _get_regional_uplift_multiplier,
     _get_service_tier_cost_key,
     _parse_prompt_tokens_details,
     calculate_cost_component,
@@ -312,6 +313,10 @@ def cost_per_token(  # noqa: PLR0915
     audio_transcription_file_duration: float = 0.0,  # for audio transcription calls - the file time in seconds
     ### SERVICE TIER ###
     service_tier: Optional[str] = None,  # for OpenAI service tier pricing
+    ### DATA RESIDENCY ###
+    data_residency: Optional[
+        str
+    ] = None,  # for OpenAI regional-processing uplift (e.g. "eu", "us")
     response: Optional[Any] = None,
     ### REQUEST MODEL ###
     request_model: Optional[str] = None,  # original request model for router detection
@@ -493,6 +498,7 @@ def cost_per_token(  # noqa: PLR0915
                 usage=usage_block,
                 custom_llm_provider=custom_llm_provider,
                 service_tier=service_tier,
+                data_residency=data_residency,
             )
 
         return prompt_cost, completion_cost
@@ -521,14 +527,18 @@ def cost_per_token(  # noqa: PLR0915
         or call_type == CallTypes.retrieve_batch
     ):
         return batch_cost_calculator(
-            usage=usage_block, model=model, custom_llm_provider=custom_llm_provider
+            usage=usage_block,
+            model=model,
+            custom_llm_provider=custom_llm_provider,
+            data_residency=data_residency,
         )
     elif call_type == "atranscription" or call_type == "transcription":
         if _transcription_usage_has_token_details(usage_block):
             return openai_cost_per_token(
                 model=model_without_prefix,
                 usage=usage_block,
                 service_tier=service_tier,
+                data_residency=data_residency,
             )
 
         return openai_cost_per_second(
@@ -579,7 +589,10 @@ def cost_per_token(  # noqa: PLR0915
         )
     elif custom_llm_provider == "openai":
         return openai_cost_per_token(
-            model=model, usage=usage_block, service_tier=service_tier
+            model=model,
+            usage=usage_block,
+            service_tier=service_tier,
+            data_residency=data_residency,
         )
     elif custom_llm_provider == "databricks":
         return databricks_cost_per_token(model=model, usage=usage_block)
@@ -631,6 +644,7 @@ def cost_per_token(  # noqa: PLR0915
                 usage=usage_block,
                 custom_llm_provider=custom_llm_provider,
                 service_tier=service_tier,
+                data_residency=data_residency,
             )
 
         if (
@@ -1117,6 +1131,10 @@ def completion_cost(  # noqa: PLR0915
     litellm_logging_obj: Optional[LitellmLoggingObject] = None,
     ### SERVICE TIER ###
     service_tier: Optional[str] = None,  # for OpenAI service tier pricing
+    ### DATA RESIDENCY ###
+    data_residency: Optional[
+        str
+    ] = None,  # for OpenAI regional-processing uplift (e.g. "eu", "us")
 ) -> float:
     """
     Calculate the cost of a given completion call fot GPT-3.5-turbo, llama2, any litellm supported llm.
@@ -1516,6 +1534,7 @@ def completion_cost(  # noqa: PLR0915
                         combined_usage_object=cost_per_token_usage_object,
                         custom_llm_provider=custom_llm_provider,
                         litellm_model_name=model,
+                        data_residency=data_residency,
                     )
                 elif call_type == _MCP_CALL_TYPE:
                     from litellm.proxy._experimental.mcp_server.cost_calculator import (
@@ -1600,6 +1619,7 @@ def completion_cost(  # noqa: PLR0915
                     audio_transcription_file_duration=audio_transcription_file_duration,
                     rerank_billed_units=rerank_billed_units,
                     service_tier=service_tier,
+                    data_residency=data_residency,
                     response=completion_response,
                     request_model=request_model_for_cost,
                 )
@@ -1811,6 +1831,10 @@ def response_cost_calculator(
     litellm_logging_obj: Optional[LitellmLoggingObject] = None,
     ### SERVICE TIER ###
     service_tier: Optional[str] = None,  # for OpenAI service tier pricing
+    ### DATA RESIDENCY ###
+    data_residency: Optional[
+        str
+    ] = None,  # for OpenAI regional-processing uplift (e.g. "eu", "us")
 ) -> float:
     """
     Returns
@@ -1844,6 +1868,7 @@ def response_cost_calculator(
                 router_model_id=router_model_id,
                 litellm_logging_obj=litellm_logging_obj,
                 service_tier=service_tier,
+                data_residency=data_residency,
             )
         return response_cost
     except Exception as e:
@@ -2202,6 +2227,7 @@ def batch_cost_calculator(
     model: str,
     custom_llm_provider: Optional[str] = None,
     model_info: Optional[ModelInfo] = None,
+    data_residency: Optional[str] = None,
 ) -> Tuple[float, float]:
     """
     Calculate the cost of a batch job.
@@ -2286,6 +2312,11 @@ def batch_cost_calculator(
             usage.completion_tokens * (output_cost_per_token) / 2
         )  # batch cost is usually half of the regular token cost
 
+    uplift = _get_regional_uplift_multiplier(model_info, data_residency)
+    if uplift != 1.0:
+        total_prompt_cost *= uplift
+        total_completion_cost *= uplift
+
     return total_prompt_cost, total_completion_cost
 
 
@@ -2431,6 +2462,7 @@ def handle_realtime_stream_cost_calculation(
     combined_usage_object: Usage,
     custom_llm_provider: str,
     litellm_model_name: str,
+    data_residency: Optional[str] = None,
 ) -> float:
     """
     Handles the cost calculation for realtime stream responses.
@@ -2461,6 +2493,7 @@ def handle_realtime_stream_cost_calculation(
                 model=model_name,
                 usage=combined_usage_object,
                 custom_llm_provider=custom_llm_provider,
+                data_residency=data_residency,
             )
         except Exception:
             continue
 
@@ -1,5 +1,7 @@
 from typing import Optional
 
+from litellm.llms.openai.data_residency import infer_openai_data_residency
+
 # Pre-define optional kwargs keys as frozenset for O(1) lookups
 # These are extracted from kwargs only if present, avoiding unnecessary .get() calls
 _OPTIONAL_KWARGS_KEYS = frozenset(
@@ -103,6 +105,10 @@ def get_litellm_params(
     if litellm_trace_id is None:
         litellm_trace_id = _meta.get("trace_id") or _meta.get("session_id")
 
+    data_residency: Optional[str] = infer_openai_data_residency(
+        custom_llm_provider, api_base
+    )
+
     # Build base dict with explicit parameters (always included)
     litellm_params = {
         "acompletion": acompletion,
@@ -112,6 +118,7 @@ def get_litellm_params(
         "verbose": verbose,
         "custom_llm_provider": custom_llm_provider,
         "api_base": api_base,
+        "data_residency": data_residency,
         "litellm_call_id": litellm_call_id,
         "model_alias_map": model_alias_map,
         "completion_call_id": completion_call_id,
 
@@ -1546,6 +1546,11 @@ def _response_cost_calculator(
                     if self.optional_params
                     else None
                 ),
+                "data_residency": (
+                    self.litellm_params.get("data_residency")
+                    if hasattr(self, "litellm_params") and self.litellm_params
+                    else None
+                ),
             }
         except Exception as e:  # error creating kwargs for cost calculation
             debug_info = StandardLoggingModelCostFailureDebugInformation(
 
@@ -9,6 +9,7 @@
     CacheCreationTokenDetails,
     CallTypes,
     CompletionTokensDetailsWrapper,
+    DataResidency,
     ImageResponse,
     ModelInfo,
     PassthroughCallTypes,
@@ -617,11 +618,46 @@ def _calculate_input_cost(
     return prompt_cost
 
 
+def _get_regional_uplift_multiplier(
+    model_info: ModelInfo, data_residency: Optional[str]
+) -> float:
+    """
+    Resolve the per-model regional-processing uplift multiplier for a given
+    data-residency region.
+
+    OpenAI applies a flat percentage uplift (e.g. +10%) on all token costs for
+    requests served from a regionalized hostname (eu./us.api.openai.com). The
+    multiplier is stored on the model entry as
+    ``regional_processing_uplift_multiplier_<region>`` (e.g. 1.10).
+
+    Returns 1.0 (no uplift) when ``data_residency`` is ``None`` or when the
+    model has no multiplier configured for the given region.
+    """
+    if data_residency is None:
+        return 1.0
+    residency = data_residency.lower()
+    if residency not in {r.value for r in DataResidency}:
+        return 1.0
+    multiplier = model_info.get(f"regional_processing_uplift_multiplier_{residency}")
+    if multiplier is None:
+        return 1.0
+    try:
+        return float(cast(float, multiplier))
+    except (TypeError, ValueError):
+        verbose_logger.exception(
+            "Invalid regional_processing_uplift_multiplier_%s for model; "
+            "defaulting to 1.0",
+            residency,
+        )
+        return 1.0
+
+
 def generic_cost_per_token(  # noqa: PLR0915
     model: str,
     usage: Usage,
     custom_llm_provider: str,
     service_tier: Optional[str] = None,
+    data_residency: Optional[str] = None,
 ) -> Tuple[float, float]:
     """
     Calculates the cost per token for a given model, prompt tokens, and completion tokens.
@@ -631,6 +667,8 @@ def generic_cost_per_token(  # noqa: PLR0915
     Input:
         - model: str, the model name without provider prefix
         - usage: LiteLLM Usage block, containing anthropic caching information
+        - data_residency: optional OpenAI data-residency region (e.g. "eu", "us"),
+          used to apply the per-model regional-processing uplift multiplier.
 
     Returns:
         Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
@@ -781,6 +819,14 @@ def generic_cost_per_token(  # noqa: PLR0915
         )
         completion_cost += float(image_tokens) * _output_cost_per_image_token
 
+    ## REGIONAL DATA-RESIDENCY UPLIFT
+    # Applied as a flat multiplier across all token costs for the request
+    # when the upstream is a regionalized OpenAI host (eu./us.api.openai.com).
+    uplift = _get_regional_uplift_multiplier(model_info, data_residency)
+    if uplift != 1.0:
+        prompt_cost *= uplift
+        completion_cost *= uplift
+
     return prompt_cost, completion_cost
 
 
 
@@ -19,14 +19,20 @@ def cost_router(call_type: CallTypes) -> Literal["cost_per_token", "cost_per_sec
 
 
 def cost_per_token(
-    model: str, usage: Usage, service_tier: Optional[str] = None
+    model: str,
+    usage: Usage,
+    service_tier: Optional[str] = None,
+    data_residency: Optional[str] = None,
 ) -> Tuple[float, float]:
     """
     Calculates the cost per token for a given model, prompt tokens, and completion tokens.
 
     Input:
         - model: str, the model name without provider prefix
         - usage: LiteLLM Usage block, containing anthropic caching information
+        - data_residency: optional OpenAI data-residency region (e.g. "eu", "us"),
+          inferred from api_base. Applies the model's regional-processing
+          uplift multiplier when set.
 
     Returns:
         Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
@@ -37,6 +43,7 @@ def cost_per_token(
         usage=usage,
         custom_llm_provider="openai",
         service_tier=service_tier,
+        data_residency=data_residency,
     )
     # ### Non-cached text tokens
     # non_cached_text_tokens = usage.prompt_tokens
 
@@ -0,0 +1,41 @@
+"""
+Helpers for resolving OpenAI data-residency (regional processing) from an
+api_base URL.
+
+OpenAI enforces hostname-per-region for projects with geography restrictions
+enabled and rejects requests sent to the wrong host, so the api_base hostname
+is the authoritative signal of which region a request was processed in.
+"""
+
+from typing import Dict, Optional
+from urllib.parse import urlparse
+
+# Mapping of OpenAI regional hostnames to the corresponding data-residency
+# value used by the cost calculator. See
+# https://developers.openai.com/api/docs/pricing for the regional-processing
+# uplift these hostnames trigger.
+_OPENAI_REGIONAL_HOSTS: Dict[str, str] = {
+    "eu.api.openai.com": "eu",
+    "us.api.openai.com": "us",
+}
+
+
+def infer_openai_data_residency(
+    custom_llm_provider: Optional[str], api_base: Optional[str]
+) -> Optional[str]:
+    """
+    Derive the OpenAI data-residency region from an api_base URL.
+
+    Returns ``"eu"`` for the EU regional host, ``"us"`` for the US regional
+    host, and ``None`` for the default global host, any non-OpenAI provider,
+    or any non-OpenAI URL.
+    """
+    if custom_llm_provider != "openai" or not api_base:
+        return None
+    try:
+        host = urlparse(api_base).hostname
+    except (TypeError, ValueError):
+        return None
+    if not host:
+        return None
+    return _OPENAI_REGIONAL_HOSTS.get(host.lower())