Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,21 @@ async def handle(
)

# --- Advisor sub-call (always non-streaming, no tools) ---
# Forward the parent request's proxy auth/attribution context
# (litellm_metadata, user_api_key_dict, proxy_server_request, ...) so
# the advisor sub-call is logged and cost-attributed to the
# originating key/user, exactly like the executor leg above (which
# spreads **kwargs). Without it the proxy cost-tracking callback skips
# the SpendLogs write entirely (it requires a non-None key/user/team),
# so advisor spend is invisible in per-user logs. litellm_logging_obj
# is excluded so the advisor leg gets its own logging object and its
# spend is not double-counted against the parent request's call id;
# api_key/api_base are excluded because they are passed explicitly.
advisor_passthrough = {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Medium: Credential forwarding into advisor sub-call

advisor_tool["model"] comes from the request, but this forwards every parent kwarg except api_key/api_base. For routed deployments that use other credential fields like aws_access_key_id, aws_secret_access_key, vertex_credentials, or litellm_credential_name, a caller can invoke an arbitrary advisor model with the parent deployment's cloud credentials instead of only inheriting attribution context.

Suggested change
advisor_passthrough = {
advisor_attribution_keys = {
"litellm_metadata",
"user_api_key_dict",
"proxy_server_request",
}
advisor_passthrough = {
k: v for k, v in kwargs.items() if k in advisor_attribution_keys
}

k: v
for k, v in kwargs.items()
if k not in ("litellm_logging_obj", "api_key", "api_base")
}
advisor_response: AnthropicMessagesResponse = await _call_messages_handler(
model=advisor_model,
messages=advisor_messages,
Expand All @@ -163,6 +178,7 @@ async def handle(
},
api_key=advisor_api_key,
api_base=advisor_api_base,
**advisor_passthrough,
)

advisor_text = _extract_response_text(advisor_response)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -364,3 +364,70 @@ async def fake_pre_request_hooks(
assert captured["thinking"] == {"type": "enabled", "budget_tokens": 2048}
assert captured["system"] == "Hook overrode the system prompt."
assert captured["temperature"] == 0.1


# ---------------------------------------------------------------------------
# Advisor sub-call is attributed to the originating key/user (SpendLogs)
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
async def test_advisor_subcall_forwards_proxy_attribution():
"""
The advisor sub-call must inherit the parent request's proxy
auth/attribution context (litellm_metadata / user_api_key_dict /
proxy_server_request) so it is logged and cost-attributed to the originating
key/user, exactly like the executor leg. Without it the proxy cost-tracking
callback skips the SpendLogs write entirely.

litellm_logging_obj must NOT be forwarded: the advisor leg owns its own
logging object so its spend is not double-counted against the parent request.
"""
from litellm.llms.anthropic.experimental_pass_through.messages.interceptors.advisor import (
AdvisorOrchestrationHandler,
)

sentinel_meta = {"user_api_key_alias": "team-a", "user_api_key": "sk-" + "a" * 32}
sentinel_key = object()
sentinel_psr = {"url": "/v1/messages"}

captured = []
executor_calls = 0

async def mock_handler(
model, messages, tools, stream, max_tokens, custom_llm_provider, **kwargs
):
nonlocal executor_calls
captured.append({"tools": tools, "kwargs": kwargs})
if tools is None:
return _text_resp("Some advice.", model="claude-opus-4-6") # advisor leg
executor_calls += 1
if executor_calls == 1:
return _advisor_call_resp() # executor → requests advisor (once)
return _text_resp("Final answer.") # executor → final

with patch(
"litellm.llms.anthropic.experimental_pass_through.messages.interceptors.advisor._call_messages_handler",
side_effect=mock_handler,
):
await AdvisorOrchestrationHandler().handle(
model="openai/gpt-4o-mini",
messages=MESSAGES,
tools=[ADVISOR_TOOL],
stream=False,
max_tokens=512,
custom_llm_provider="openai",
litellm_metadata=sentinel_meta,
user_api_key_dict=sentinel_key,
proxy_server_request=sentinel_psr,
litellm_logging_obj=object(),
)

advisor_legs = [c for c in captured if c["tools"] is None]
assert advisor_legs, "advisor sub-call (tools=None) must have fired"
adv = advisor_legs[0]["kwargs"]
assert adv.get("litellm_metadata") == sentinel_meta
assert adv.get("user_api_key_dict") is sentinel_key
assert adv.get("proxy_server_request") == sentinel_psr
# Own logging object → not stamped onto the parent request.
assert "litellm_logging_obj" not in adv
Comment on lines +409 to +433

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Test doesn't cover api_key/api_base exclusion

The filter in advisor_passthrough explicitly excludes api_key and api_base to prevent a caller-supplied key from silently overriding the advisor's own credentials (advisor_api_key/advisor_api_base). If that filter were accidentally removed, the advisor would call the provider with the wrong key, but this test wouldn't catch it. Adding api_key="sk-parent" and api_base="https://parent.example.com" to the handle() invocation and asserting they don't appear in adv would close this gap.

Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!

Loading