Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 15 additions & 5 deletions litellm/litellm_core_utils/llm_cost_calc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,12 +583,22 @@ def generic_cost_per_token(
reasoning_tokens = completion_tokens_details["reasoning_tokens"]
image_tokens = completion_tokens_details["image_tokens"]

# Only assume all tokens are text if there's NO breakdown at all
# If image_tokens, audio_tokens, or reasoning_tokens exist, respect text_tokens=0
# Handle text_tokens calculation:
# 1. If text_tokens is explicitly provided and > 0, use it
# 2. If there's a breakdown (reasoning/audio/image tokens), calculate text_tokens as the remainder
# 3. If no breakdown at all, assume all completion_tokens are text_tokens
has_token_breakdown = image_tokens > 0 or audio_tokens > 0 or reasoning_tokens > 0
if text_tokens == 0 and not has_token_breakdown:
text_tokens = usage.completion_tokens
is_text_tokens_total = True
if text_tokens == 0:
if has_token_breakdown:
# Calculate text tokens as remainder when we have a breakdown
# This handles cases like OpenAI's reasoning models where text_tokens isn't provided
text_tokens = max(
0, usage.completion_tokens - reasoning_tokens - audio_tokens - image_tokens
)
else:
# No breakdown at all, all tokens are text tokens
text_tokens = usage.completion_tokens
is_text_tokens_total = True
## TEXT COST
completion_cost = float(text_tokens) * completion_base_cost

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -809,3 +809,54 @@ def test_bedrock_anthropic_prompt_caching():
assert completion_cost >= 0
assert round(prompt_cost, 3) == 0.111
assert round(completion_cost, 5) == 0.00820


def test_reasoning_tokens_without_text_tokens_gpt5_nano():
"""
Test fix for GitHub issue #18599:
https://github.com/BerriAI/litellm/issues/18599

When OpenAI models (gpt-5-nano, o1, o3) return reasoning_tokens but don't provide
text_tokens, LiteLLM should calculate text_tokens as:
text_tokens = completion_tokens - reasoning_tokens - audio_tokens - image_tokens

This ensures ALL completion tokens are billed, not just reasoning tokens.
"""
model = "gpt-5-nano"
custom_llm_provider = "openai"

# Simulate OpenAI gpt-5-nano response where text_tokens is NOT provided
# completion_tokens: 977 total
# reasoning_tokens: 768
# text_tokens: should be calculated as 977 - 768 = 209
usage = Usage(
prompt_tokens=17,
completion_tokens=977,
total_tokens=994,
completion_tokens_details=CompletionTokensDetailsWrapper(
reasoning_tokens=768,
audio_tokens=0,
# text_tokens NOT provided - this is the key part of the bug
),
)

prompt_cost, completion_cost = generic_cost_per_token(
model=model,
usage=usage,
custom_llm_provider=custom_llm_provider,
)

# gpt-5-nano pricing: $0.05/1M input, $0.40/1M output
expected_prompt_cost = 17 * 0.05 / 1_000_000
expected_completion_cost = 977 * 0.40 / 1_000_000 # ALL tokens, not just reasoning

assert abs(prompt_cost - expected_prompt_cost) < 1e-10, \
f"Prompt cost incorrect: {prompt_cost} vs {expected_prompt_cost}"

assert abs(completion_cost - expected_completion_cost) < 1e-10, \
f"Completion cost incorrect: {completion_cost} vs {expected_completion_cost}"

# Verify it's NOT using only reasoning_tokens (the bug)
wrong_cost = 768 * 0.40 / 1_000_000 # Only reasoning tokens
assert abs(completion_cost - wrong_cost) > 1e-6, \
"Bug detected: Cost calculation is using only reasoning_tokens instead of all completion_tokens!"
Loading