From 6018c7d54515a3d145fa901d82e7d995695b6a02 Mon Sep 17 00:00:00 2001 From: Brendan Smith-Elion Date: Mon, 15 Jun 2026 16:02:19 -0400 Subject: [PATCH] fix(advisor): attribute advisor sub-call spend to the originating key/user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The advisor orchestration sub-call did not forward the parent request's proxy auth/attribution context (litellm_metadata / user_api_key_dict / proxy_server_request) that the executor leg already spreads via **kwargs. With no key/user/team in scope the proxy cost-tracking callback skips the SpendLogs write entirely, so advisor spend is attributed to nobody — it runs on resolved provider credentials and is visible only in raw provider invocation logs, never in per-user litellm logs. Forward the proxy context to the advisor leg, excluding litellm_logging_obj so the advisor sub-call mints its own logging object and its spend is not double-counted against the parent request's call id (api_key/api_base are also excluded as they are passed explicitly). Co-Authored-By: Claude Opus 4.8 (1M context) --- .../messages/interceptors/advisor.py | 16 +++++ .../messages/test_advisor_integration.py | 67 +++++++++++++++++++ 2 files changed, 83 insertions(+) diff --git a/litellm/llms/anthropic/experimental_pass_through/messages/interceptors/advisor.py b/litellm/llms/anthropic/experimental_pass_through/messages/interceptors/advisor.py index c7c110ff3e3..eba6f8980c9 100644 --- a/litellm/llms/anthropic/experimental_pass_through/messages/interceptors/advisor.py +++ b/litellm/llms/anthropic/experimental_pass_through/messages/interceptors/advisor.py @@ -149,6 +149,21 @@ async def handle( ) # --- Advisor sub-call (always non-streaming, no tools) --- + # Forward the parent request's proxy auth/attribution context + # (litellm_metadata, user_api_key_dict, proxy_server_request, ...) so + # the advisor sub-call is logged and cost-attributed to the + # originating key/user, exactly like the executor leg above (which + # spreads **kwargs). Without it the proxy cost-tracking callback skips + # the SpendLogs write entirely (it requires a non-None key/user/team), + # so advisor spend is invisible in per-user logs. litellm_logging_obj + # is excluded so the advisor leg gets its own logging object and its + # spend is not double-counted against the parent request's call id; + # api_key/api_base are excluded because they are passed explicitly. + advisor_passthrough = { + k: v + for k, v in kwargs.items() + if k not in ("litellm_logging_obj", "api_key", "api_base") + } advisor_response: AnthropicMessagesResponse = await _call_messages_handler( model=advisor_model, messages=advisor_messages, @@ -163,6 +178,7 @@ async def handle( }, api_key=advisor_api_key, api_base=advisor_api_base, + **advisor_passthrough, ) advisor_text = _extract_response_text(advisor_response) diff --git a/tests/test_litellm/llms/anthropic/experimental_pass_through/messages/test_advisor_integration.py b/tests/test_litellm/llms/anthropic/experimental_pass_through/messages/test_advisor_integration.py index 414ba8f0f5c..7fda6570b5a 100644 --- a/tests/test_litellm/llms/anthropic/experimental_pass_through/messages/test_advisor_integration.py +++ b/tests/test_litellm/llms/anthropic/experimental_pass_through/messages/test_advisor_integration.py @@ -364,3 +364,70 @@ async def fake_pre_request_hooks( assert captured["thinking"] == {"type": "enabled", "budget_tokens": 2048} assert captured["system"] == "Hook overrode the system prompt." assert captured["temperature"] == 0.1 + + +# --------------------------------------------------------------------------- +# Advisor sub-call is attributed to the originating key/user (SpendLogs) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_advisor_subcall_forwards_proxy_attribution(): + """ + The advisor sub-call must inherit the parent request's proxy + auth/attribution context (litellm_metadata / user_api_key_dict / + proxy_server_request) so it is logged and cost-attributed to the originating + key/user, exactly like the executor leg. Without it the proxy cost-tracking + callback skips the SpendLogs write entirely. + + litellm_logging_obj must NOT be forwarded: the advisor leg owns its own + logging object so its spend is not double-counted against the parent request. + """ + from litellm.llms.anthropic.experimental_pass_through.messages.interceptors.advisor import ( + AdvisorOrchestrationHandler, + ) + + sentinel_meta = {"user_api_key_alias": "team-a", "user_api_key": "sk-" + "a" * 32} + sentinel_key = object() + sentinel_psr = {"url": "/v1/messages"} + + captured = [] + executor_calls = 0 + + async def mock_handler( + model, messages, tools, stream, max_tokens, custom_llm_provider, **kwargs + ): + nonlocal executor_calls + captured.append({"tools": tools, "kwargs": kwargs}) + if tools is None: + return _text_resp("Some advice.", model="claude-opus-4-6") # advisor leg + executor_calls += 1 + if executor_calls == 1: + return _advisor_call_resp() # executor → requests advisor (once) + return _text_resp("Final answer.") # executor → final + + with patch( + "litellm.llms.anthropic.experimental_pass_through.messages.interceptors.advisor._call_messages_handler", + side_effect=mock_handler, + ): + await AdvisorOrchestrationHandler().handle( + model="openai/gpt-4o-mini", + messages=MESSAGES, + tools=[ADVISOR_TOOL], + stream=False, + max_tokens=512, + custom_llm_provider="openai", + litellm_metadata=sentinel_meta, + user_api_key_dict=sentinel_key, + proxy_server_request=sentinel_psr, + litellm_logging_obj=object(), + ) + + advisor_legs = [c for c in captured if c["tools"] is None] + assert advisor_legs, "advisor sub-call (tools=None) must have fired" + adv = advisor_legs[0]["kwargs"] + assert adv.get("litellm_metadata") == sentinel_meta + assert adv.get("user_api_key_dict") is sentinel_key + assert adv.get("proxy_server_request") == sentinel_psr + # Own logging object → not stamped onto the parent request. + assert "litellm_logging_obj" not in adv