From 6018c7d54515a3d145fa901d82e7d995695b6a02 Mon Sep 17 00:00:00 2001
From: Brendan Smith-Elion <brendan.smith-elion@arcadia.io>
Date: Mon, 15 Jun 2026 16:02:19 -0400
Subject: [PATCH] fix(advisor): attribute advisor sub-call spend to the
 originating key/user
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The advisor orchestration sub-call did not forward the parent request's proxy
auth/attribution context (litellm_metadata / user_api_key_dict /
proxy_server_request) that the executor leg already spreads via **kwargs. With
no key/user/team in scope the proxy cost-tracking callback skips the SpendLogs
write entirely, so advisor spend is attributed to nobody — it runs on resolved
provider credentials and is visible only in raw provider invocation logs, never
in per-user litellm logs.

Forward the proxy context to the advisor leg, excluding litellm_logging_obj so
the advisor sub-call mints its own logging object and its spend is not
double-counted against the parent request's call id (api_key/api_base are also
excluded as they are passed explicitly).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../messages/interceptors/advisor.py          | 16 +++++
 .../messages/test_advisor_integration.py      | 67 +++++++++++++++++++
 2 files changed, 83 insertions(+)

diff --git a/litellm/llms/anthropic/experimental_pass_through/messages/interceptors/advisor.py b/litellm/llms/anthropic/experimental_pass_through/messages/interceptors/advisor.py
index c7c110ff3e3..eba6f8980c9 100644
--- a/litellm/llms/anthropic/experimental_pass_through/messages/interceptors/advisor.py
+++ b/litellm/llms/anthropic/experimental_pass_through/messages/interceptors/advisor.py
@@ -149,6 +149,21 @@ async def handle(
             )
 
             # --- Advisor sub-call (always non-streaming, no tools) ---
+            # Forward the parent request's proxy auth/attribution context
+            # (litellm_metadata, user_api_key_dict, proxy_server_request, ...) so
+            # the advisor sub-call is logged and cost-attributed to the
+            # originating key/user, exactly like the executor leg above (which
+            # spreads **kwargs). Without it the proxy cost-tracking callback skips
+            # the SpendLogs write entirely (it requires a non-None key/user/team),
+            # so advisor spend is invisible in per-user logs. litellm_logging_obj
+            # is excluded so the advisor leg gets its own logging object and its
+            # spend is not double-counted against the parent request's call id;
+            # api_key/api_base are excluded because they are passed explicitly.
+            advisor_passthrough = {
+                k: v
+                for k, v in kwargs.items()
+                if k not in ("litellm_logging_obj", "api_key", "api_base")
+            }
             advisor_response: AnthropicMessagesResponse = await _call_messages_handler(
                 model=advisor_model,
                 messages=advisor_messages,
@@ -163,6 +178,7 @@ async def handle(
                 },
                 api_key=advisor_api_key,
                 api_base=advisor_api_base,
+                **advisor_passthrough,
             )
 
             advisor_text = _extract_response_text(advisor_response)
diff --git a/tests/test_litellm/llms/anthropic/experimental_pass_through/messages/test_advisor_integration.py b/tests/test_litellm/llms/anthropic/experimental_pass_through/messages/test_advisor_integration.py
index 414ba8f0f5c..7fda6570b5a 100644
--- a/tests/test_litellm/llms/anthropic/experimental_pass_through/messages/test_advisor_integration.py
+++ b/tests/test_litellm/llms/anthropic/experimental_pass_through/messages/test_advisor_integration.py
@@ -364,3 +364,70 @@ async def fake_pre_request_hooks(
     assert captured["thinking"] == {"type": "enabled", "budget_tokens": 2048}
     assert captured["system"] == "Hook overrode the system prompt."
     assert captured["temperature"] == 0.1
+
+
+# ---------------------------------------------------------------------------
+# Advisor sub-call is attributed to the originating key/user (SpendLogs)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_advisor_subcall_forwards_proxy_attribution():
+    """
+    The advisor sub-call must inherit the parent request's proxy
+    auth/attribution context (litellm_metadata / user_api_key_dict /
+    proxy_server_request) so it is logged and cost-attributed to the originating
+    key/user, exactly like the executor leg. Without it the proxy cost-tracking
+    callback skips the SpendLogs write entirely.
+
+    litellm_logging_obj must NOT be forwarded: the advisor leg owns its own
+    logging object so its spend is not double-counted against the parent request.
+    """
+    from litellm.llms.anthropic.experimental_pass_through.messages.interceptors.advisor import (
+        AdvisorOrchestrationHandler,
+    )
+
+    sentinel_meta = {"user_api_key_alias": "team-a", "user_api_key": "sk-" + "a" * 32}
+    sentinel_key = object()
+    sentinel_psr = {"url": "/v1/messages"}
+
+    captured = []
+    executor_calls = 0
+
+    async def mock_handler(
+        model, messages, tools, stream, max_tokens, custom_llm_provider, **kwargs
+    ):
+        nonlocal executor_calls
+        captured.append({"tools": tools, "kwargs": kwargs})
+        if tools is None:
+            return _text_resp("Some advice.", model="claude-opus-4-6")  # advisor leg
+        executor_calls += 1
+        if executor_calls == 1:
+            return _advisor_call_resp()  # executor → requests advisor (once)
+        return _text_resp("Final answer.")  # executor → final
+
+    with patch(
+        "litellm.llms.anthropic.experimental_pass_through.messages.interceptors.advisor._call_messages_handler",
+        side_effect=mock_handler,
+    ):
+        await AdvisorOrchestrationHandler().handle(
+            model="openai/gpt-4o-mini",
+            messages=MESSAGES,
+            tools=[ADVISOR_TOOL],
+            stream=False,
+            max_tokens=512,
+            custom_llm_provider="openai",
+            litellm_metadata=sentinel_meta,
+            user_api_key_dict=sentinel_key,
+            proxy_server_request=sentinel_psr,
+            litellm_logging_obj=object(),
+        )
+
+    advisor_legs = [c for c in captured if c["tools"] is None]
+    assert advisor_legs, "advisor sub-call (tools=None) must have fired"
+    adv = advisor_legs[0]["kwargs"]
+    assert adv.get("litellm_metadata") == sentinel_meta
+    assert adv.get("user_api_key_dict") is sentinel_key
+    assert adv.get("proxy_server_request") == sentinel_psr
+    # Own logging object → not stamped onto the parent request.
+    assert "litellm_logging_obj" not in adv