|
24 | 24 | from litellm.litellm_core_utils.llm_cost_calc.utils import ( |
25 | 25 | CostCalculatorUtils, |
26 | 26 | _generic_cost_per_character, |
| 27 | + _get_regional_uplift_multiplier, |
27 | 28 | _get_service_tier_cost_key, |
28 | 29 | _parse_prompt_tokens_details, |
29 | 30 | calculate_cost_component, |
@@ -312,6 +313,10 @@ def cost_per_token( # noqa: PLR0915 |
312 | 313 | audio_transcription_file_duration: float = 0.0, # for audio transcription calls - the file time in seconds |
313 | 314 | ### SERVICE TIER ### |
314 | 315 | service_tier: Optional[str] = None, # for OpenAI service tier pricing |
| 316 | + ### DATA RESIDENCY ### |
| 317 | + data_residency: Optional[ |
| 318 | + str |
| 319 | + ] = None, # for OpenAI regional-processing uplift (e.g. "eu", "us") |
315 | 320 | response: Optional[Any] = None, |
316 | 321 | ### REQUEST MODEL ### |
317 | 322 | request_model: Optional[str] = None, # original request model for router detection |
@@ -493,6 +498,7 @@ def cost_per_token( # noqa: PLR0915 |
493 | 498 | usage=usage_block, |
494 | 499 | custom_llm_provider=custom_llm_provider, |
495 | 500 | service_tier=service_tier, |
| 501 | + data_residency=data_residency, |
496 | 502 | ) |
497 | 503 |
|
498 | 504 | return prompt_cost, completion_cost |
@@ -521,14 +527,18 @@ def cost_per_token( # noqa: PLR0915 |
521 | 527 | or call_type == CallTypes.retrieve_batch |
522 | 528 | ): |
523 | 529 | return batch_cost_calculator( |
524 | | - usage=usage_block, model=model, custom_llm_provider=custom_llm_provider |
| 530 | + usage=usage_block, |
| 531 | + model=model, |
| 532 | + custom_llm_provider=custom_llm_provider, |
| 533 | + data_residency=data_residency, |
525 | 534 | ) |
526 | 535 | elif call_type == "atranscription" or call_type == "transcription": |
527 | 536 | if _transcription_usage_has_token_details(usage_block): |
528 | 537 | return openai_cost_per_token( |
529 | 538 | model=model_without_prefix, |
530 | 539 | usage=usage_block, |
531 | 540 | service_tier=service_tier, |
| 541 | + data_residency=data_residency, |
532 | 542 | ) |
533 | 543 |
|
534 | 544 | return openai_cost_per_second( |
@@ -579,7 +589,10 @@ def cost_per_token( # noqa: PLR0915 |
579 | 589 | ) |
580 | 590 | elif custom_llm_provider == "openai": |
581 | 591 | return openai_cost_per_token( |
582 | | - model=model, usage=usage_block, service_tier=service_tier |
| 592 | + model=model, |
| 593 | + usage=usage_block, |
| 594 | + service_tier=service_tier, |
| 595 | + data_residency=data_residency, |
583 | 596 | ) |
584 | 597 | elif custom_llm_provider == "databricks": |
585 | 598 | return databricks_cost_per_token(model=model, usage=usage_block) |
@@ -631,6 +644,7 @@ def cost_per_token( # noqa: PLR0915 |
631 | 644 | usage=usage_block, |
632 | 645 | custom_llm_provider=custom_llm_provider, |
633 | 646 | service_tier=service_tier, |
| 647 | + data_residency=data_residency, |
634 | 648 | ) |
635 | 649 |
|
636 | 650 | if ( |
@@ -1117,6 +1131,10 @@ def completion_cost( # noqa: PLR0915 |
1117 | 1131 | litellm_logging_obj: Optional[LitellmLoggingObject] = None, |
1118 | 1132 | ### SERVICE TIER ### |
1119 | 1133 | service_tier: Optional[str] = None, # for OpenAI service tier pricing |
| 1134 | + ### DATA RESIDENCY ### |
| 1135 | + data_residency: Optional[ |
| 1136 | + str |
| 1137 | + ] = None, # for OpenAI regional-processing uplift (e.g. "eu", "us") |
1120 | 1138 | ) -> float: |
1121 | 1139 | """ |
1122 | 1140 | Calculate the cost of a given completion call fot GPT-3.5-turbo, llama2, any litellm supported llm. |
@@ -1516,6 +1534,7 @@ def completion_cost( # noqa: PLR0915 |
1516 | 1534 | combined_usage_object=cost_per_token_usage_object, |
1517 | 1535 | custom_llm_provider=custom_llm_provider, |
1518 | 1536 | litellm_model_name=model, |
| 1537 | + data_residency=data_residency, |
1519 | 1538 | ) |
1520 | 1539 | elif call_type == _MCP_CALL_TYPE: |
1521 | 1540 | from litellm.proxy._experimental.mcp_server.cost_calculator import ( |
@@ -1600,6 +1619,7 @@ def completion_cost( # noqa: PLR0915 |
1600 | 1619 | audio_transcription_file_duration=audio_transcription_file_duration, |
1601 | 1620 | rerank_billed_units=rerank_billed_units, |
1602 | 1621 | service_tier=service_tier, |
| 1622 | + data_residency=data_residency, |
1603 | 1623 | response=completion_response, |
1604 | 1624 | request_model=request_model_for_cost, |
1605 | 1625 | ) |
@@ -1811,6 +1831,10 @@ def response_cost_calculator( |
1811 | 1831 | litellm_logging_obj: Optional[LitellmLoggingObject] = None, |
1812 | 1832 | ### SERVICE TIER ### |
1813 | 1833 | service_tier: Optional[str] = None, # for OpenAI service tier pricing |
| 1834 | + ### DATA RESIDENCY ### |
| 1835 | + data_residency: Optional[ |
| 1836 | + str |
| 1837 | + ] = None, # for OpenAI regional-processing uplift (e.g. "eu", "us") |
1814 | 1838 | ) -> float: |
1815 | 1839 | """ |
1816 | 1840 | Returns |
@@ -1844,6 +1868,7 @@ def response_cost_calculator( |
1844 | 1868 | router_model_id=router_model_id, |
1845 | 1869 | litellm_logging_obj=litellm_logging_obj, |
1846 | 1870 | service_tier=service_tier, |
| 1871 | + data_residency=data_residency, |
1847 | 1872 | ) |
1848 | 1873 | return response_cost |
1849 | 1874 | except Exception as e: |
@@ -2202,6 +2227,7 @@ def batch_cost_calculator( |
2202 | 2227 | model: str, |
2203 | 2228 | custom_llm_provider: Optional[str] = None, |
2204 | 2229 | model_info: Optional[ModelInfo] = None, |
| 2230 | + data_residency: Optional[str] = None, |
2205 | 2231 | ) -> Tuple[float, float]: |
2206 | 2232 | """ |
2207 | 2233 | Calculate the cost of a batch job. |
@@ -2286,6 +2312,11 @@ def batch_cost_calculator( |
2286 | 2312 | usage.completion_tokens * (output_cost_per_token) / 2 |
2287 | 2313 | ) # batch cost is usually half of the regular token cost |
2288 | 2314 |
|
| 2315 | + uplift = _get_regional_uplift_multiplier(model_info, data_residency) |
| 2316 | + if uplift != 1.0: |
| 2317 | + total_prompt_cost *= uplift |
| 2318 | + total_completion_cost *= uplift |
| 2319 | + |
2289 | 2320 | return total_prompt_cost, total_completion_cost |
2290 | 2321 |
|
2291 | 2322 |
|
@@ -2431,6 +2462,7 @@ def handle_realtime_stream_cost_calculation( |
2431 | 2462 | combined_usage_object: Usage, |
2432 | 2463 | custom_llm_provider: str, |
2433 | 2464 | litellm_model_name: str, |
| 2465 | + data_residency: Optional[str] = None, |
2434 | 2466 | ) -> float: |
2435 | 2467 | """ |
2436 | 2468 | Handles the cost calculation for realtime stream responses. |
@@ -2461,6 +2493,7 @@ def handle_realtime_stream_cost_calculation( |
2461 | 2493 | model=model_name, |
2462 | 2494 | usage=combined_usage_object, |
2463 | 2495 | custom_llm_provider=custom_llm_provider, |
| 2496 | + data_residency=data_residency, |
2464 | 2497 | ) |
2465 | 2498 | except Exception: |
2466 | 2499 | continue |
|
0 commit comments