From 8323ea3d16b62852abf7247b4e70605a1e750669 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 25 May 2026 16:56:19 +0000 Subject: [PATCH 1/4] fix(otel): end proxy SERVER span on management-endpoint success without http_request MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit management_endpoint_wrapper only ran async_management_endpoint_success_hook — which stamps http.response.status_code=200 and end()s the parent SERVER span — when the handler declared an http_request param. 45 of 65 wrapped endpoints (all /key/*, /user/*, /mcp/*, ...) lack it, so on success their SERVER span, created in auth, was never ended and thus never exported. Mirror the failure branch: invoke the success hook regardless, falling back to func.__name__ for the route. Add a wrapper-level regression test (fails before this change) and an otel_verify/ end-to-end harness (config + curl matrix + span verifier) verifying PRs #28273, #28362, #28364, #28405 against a running proxy. https://claude.ai/code/session_016u6Pe2S2zBVrUuFF1N6GkJ --- litellm/proxy/management_helpers/utils.py | 36 ++-- otel_verify/.gitignore | 7 + otel_verify/config.yaml | 18 ++ otel_verify/otel_file_exporter.py | 36 ++++ otel_verify/run_matrix.sh | 65 ++++++++ otel_verify/setup.sh | 22 +++ otel_verify/verify_guardrail.py | 92 +++++++++++ otel_verify/verify_spans.py | 154 ++++++++++++++++++ .../test_otel_admin_endpoints.py | 46 ++++++ 9 files changed, 463 insertions(+), 13 deletions(-) create mode 100644 otel_verify/.gitignore create mode 100644 otel_verify/config.yaml create mode 100644 otel_verify/otel_file_exporter.py create mode 100755 otel_verify/run_matrix.sh create mode 100755 otel_verify/setup.sh create mode 100644 otel_verify/verify_guardrail.py create mode 100644 otel_verify/verify_spans.py diff --git a/litellm/proxy/management_helpers/utils.py b/litellm/proxy/management_helpers/utils.py index b7d5cc30c49..f6d500ac42a 100644 --- a/litellm/proxy/management_helpers/utils.py +++ b/litellm/proxy/management_helpers/utils.py @@ -468,25 +468,35 @@ async def wrapper(*args, **kwargs): from litellm.proxy.proxy_server import open_telemetry_logger if open_telemetry_logger is not None: + # Run the success hook regardless of whether the handler + # declares an ``http_request`` parameter. The hook ends the + # parent SERVER span; gating it on ``http_request`` leaked the + # span (created in auth, never ended → never exported) for the + # many management endpoints without that param (e.g. + # ``/key/generate``). Fall back to ``func.__name__`` for the + # route, mirroring the failure branch. if _http_request: _route = _http_request.url.path _request_body: dict = await _read_request_body( request=_http_request ) - _response = dict(result) if result is not None else None - - logging_payload = ManagementEndpointLoggingPayload( - route=_route, - request_data=_request_body, - response=_response, - start_time=start_time, - end_time=end_time, - ) + else: + _route = func.__name__ + _request_body = {} + _response = dict(result) if result is not None else None + + logging_payload = ManagementEndpointLoggingPayload( + route=_route, + request_data=_request_body, + response=_response, + start_time=start_time, + end_time=end_time, + ) - await open_telemetry_logger.async_management_endpoint_success_hook( # type: ignore - logging_payload=logging_payload, - parent_otel_span=parent_otel_span, - ) + await open_telemetry_logger.async_management_endpoint_success_hook( # type: ignore + logging_payload=logging_payload, + parent_otel_span=parent_otel_span, + ) # Delete updated/deleted info from cache _delete_api_key_from_cache(kwargs=kwargs) diff --git a/otel_verify/.gitignore b/otel_verify/.gitignore new file mode 100644 index 00000000000..2bd1837fc99 --- /dev/null +++ b/otel_verify/.gitignore @@ -0,0 +1,7 @@ +# Runtime artifacts produced by a verification run (incl. a live virtual key) +.team_id +.team_key +spans.jsonl +spans_admin.jsonl +proxy.log +__pycache__/ diff --git a/otel_verify/config.yaml b/otel_verify/config.yaml new file mode 100644 index 00000000000..ec25ba75699 --- /dev/null +++ b/otel_verify/config.yaml @@ -0,0 +1,18 @@ +model_list: + - model_name: gpt-mock + litellm_params: + model: openai/gpt-4o-mini + api_key: sk-fake-unused + +litellm_settings: + callbacks: ["otel_file_exporter.otel_logger"] + +guardrails: + - guardrail_name: "verify-guardrail" + litellm_params: + guardrail: verify_guardrail.VerifyGuardrail + mode: "pre_call" + default_on: true + +general_settings: + master_key: sk-1234 diff --git a/otel_verify/otel_file_exporter.py b/otel_verify/otel_file_exporter.py new file mode 100644 index 00000000000..467dd918ba4 --- /dev/null +++ b/otel_verify/otel_file_exporter.py @@ -0,0 +1,36 @@ +"""Custom OTEL logger that writes every finished span as one compact JSON line. + +Registered as a litellm callback (``otel_file_exporter.otel_logger``). Because it +subclasses litellm's ``OpenTelemetry`` integration, its ``__init__`` registers +itself as ``proxy_server.open_telemetry_logger`` (first-registered-wins), so the +proxy's SERVER-span lifecycle flows through it exactly like ``callbacks: ["otel"]`` +would — but spans land in a file we can parse instead of pretty-printed stdout. +""" + +import os +import threading + +from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult + +from litellm.integrations.opentelemetry import OpenTelemetry, OpenTelemetryConfig + +_SPAN_FILE = os.getenv("OTEL_SPAN_FILE", "/tmp/otel_spans.jsonl") +_lock = threading.Lock() + + +class FileSpanExporter(SpanExporter): + def export(self, spans): + with _lock, open(_SPAN_FILE, "a") as f: + for span in spans: + f.write(span.to_json(indent=None) + "\n") + return SpanExportResult.SUCCESS + + def shutdown(self): + return None + + def force_flush(self, timeout_millis: int = 30000) -> bool: + return True + + +# SimpleSpanProcessor is used for non-string exporters, so spans flush on .end(). +otel_logger = OpenTelemetry(config=OpenTelemetryConfig(exporter=FileSpanExporter())) diff --git a/otel_verify/run_matrix.sh b/otel_verify/run_matrix.sh new file mode 100755 index 00000000000..094a3236012 --- /dev/null +++ b/otel_verify/run_matrix.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +# OTEL fidelity verification matrix. Each case uses a deterministic traceparent +# so its spans can be looked up by trace_id. Prints "LABEL -> HTTP ". +set -u +BASE=http://localhost:4000 +MASTER="sk-1234" +TEAM_KEY="$(cat "$(dirname "$0")/.team_key")" + +tp() { echo "00-$1-0000000000000001-01"; } # build a traceparent from a 32-hex trace id + +run() { # label traceid curl-args... + local label="$1" tid="$2"; shift 2 + local code + code=$(curl -s -o /dev/null -w "%{http_code}" -H "traceparent: $(tp "$tid")" "$@") + printf "%-28s trace=%s -> HTTP %s\n" "$label" "$tid" "$code" +} + +J="Content-Type: application/json" + +# ---- #28405 http.response.status_code + #28273 team attrs (success/failure) ---- +run "T01_chat_200_team" 00000000000000000000000000000001 -X POST $BASE/v1/chat/completions \ + -H "Authorization: Bearer $TEAM_KEY" -H "$J" \ + -d '{"model":"gpt-mock","messages":[{"role":"user","content":"hello"}],"mock_response":"hi there"}' + +run "T02_chat_400_badjson" 00000000000000000000000000000002 -X POST $BASE/v1/chat/completions \ + -H "Authorization: Bearer $TEAM_KEY" -H "$J" -d '{ this is not valid json' + +run "T03_chat_401_badkey" 00000000000000000000000000000003 -X POST $BASE/v1/chat/completions \ + -H "Authorization: Bearer sk-does-not-exist" -H "$J" \ + -d '{"model":"gpt-mock","messages":[{"role":"user","content":"hi"}],"mock_response":"x"}' + +run "T04_chat_400_unknownmodel" 00000000000000000000000000000004 -X POST $BASE/v1/chat/completions \ + -H "Authorization: Bearer $MASTER" -H "$J" \ + -d '{"model":"definitely-not-a-real-model","messages":[{"role":"user","content":"hi"}]}' + +run "T05_chat_429_team" 00000000000000000000000000000005 -X POST $BASE/v1/chat/completions \ + -H "Authorization: Bearer $TEAM_KEY" -H "$J" \ + -d '{"model":"gpt-mock","messages":[{"role":"user","content":"hi"}],"mock_response":"litellm.RateLimitError"}' + +run "T06_chat_500_team" 00000000000000000000000000000006 -X POST $BASE/v1/chat/completions \ + -H "Authorization: Bearer $TEAM_KEY" -H "$J" \ + -d '{"model":"gpt-mock","messages":[{"role":"user","content":"hi"}],"mock_response":"litellm.InternalServerError"}' + +run "T07_messages_200_team" 00000000000000000000000000000007 -X POST $BASE/v1/messages \ + -H "Authorization: Bearer $TEAM_KEY" -H "$J" \ + -d '{"model":"gpt-mock","max_tokens":50,"messages":[{"role":"user","content":"hi"}],"mock_response":"hi there"}' + +# ---- #28405 admin-endpoint paths ---- +run "T08_admin_200_keygen" 00000000000000000000000000000008 -X POST $BASE/key/generate \ + -H "Authorization: Bearer $MASTER" -H "$J" -d '{"models":["gpt-mock"]}' + +run "T09_admin_500_keygen" 00000000000000000000000000000009 -X POST $BASE/key/generate \ + -H "Authorization: Bearer $MASTER" -H "$J" -d '{"duration":"not-a-valid-duration"}' + +run "T10_admin_422_keygen" 00000000000000000000000000000010 -X POST $BASE/key/generate \ + -H "Authorization: Bearer $MASTER" -H "$J" -d '{"models":"should-be-a-list"}' + +# ---- #28362 serialize guardrail_response + #28364 guardrail span on failure ---- +run "T11_guardrail_block_400" 00000000000000000000000000000011 -X POST $BASE/v1/chat/completions \ + -H "Authorization: Bearer $TEAM_KEY" -H "$J" \ + -d '{"model":"gpt-mock","messages":[{"role":"user","content":"please blockme now"}]}' + +run "T12_guardrail_allow_200" 00000000000000000000000000000012 -X POST $BASE/v1/chat/completions \ + -H "Authorization: Bearer $TEAM_KEY" -H "$J" \ + -d '{"model":"gpt-mock","messages":[{"role":"user","content":"please scanme now"}],"mock_response":"clean"}' diff --git a/otel_verify/setup.sh b/otel_verify/setup.sh new file mode 100755 index 00000000000..5c95383e7d0 --- /dev/null +++ b/otel_verify/setup.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# Create a team + a virtual key (with allow_client_mock_response so the matrix +# can drive deterministic mock successes/errors). Writes .team_id / .team_key +# which run_matrix.sh and verify_spans.py read. Re-run after a fresh proxy/db. +set -euo pipefail +BASE="${BASE:-http://localhost:4000}" +MASTER="${MASTER_KEY:-sk-1234}" +HERE="$(dirname "$0")" + +team=$(curl -s -X POST "$BASE/team/new" -H "Authorization: Bearer $MASTER" \ + -H "Content-Type: application/json" -d '{"team_alias":"otel-verify-team"}') +team_id=$(python3 -c "import json,sys; print(json.load(sys.stdin)['team_id'])" <<<"$team") + +key=$(curl -s -X POST "$BASE/key/generate" -H "Authorization: Bearer $MASTER" \ + -H "Content-Type: application/json" \ + -d "{\"team_id\":\"$team_id\",\"models\":[\"gpt-mock\"],\"metadata\":{\"allow_client_mock_response\":true}}") +team_key=$(python3 -c "import json,sys; print(json.load(sys.stdin)['key'])" <<<"$key") + +printf '%s' "$team_id" > "$HERE/.team_id" +printf '%s' "$team_key" > "$HERE/.team_key" +echo "team_id=$team_id" +echo "team_key=$team_key (saved to .team_key)" diff --git a/otel_verify/verify_guardrail.py b/otel_verify/verify_guardrail.py new file mode 100644 index 00000000000..bf9a1eceea5 --- /dev/null +++ b/otel_verify/verify_guardrail.py @@ -0,0 +1,92 @@ +"""A self-contained guardrail that mimics a Bedrock pre-call guardrail. + +Drives the OTEL guardrail-span code paths added in #28362 (serialize +guardrail_response as JSON) and #28364 (emit guardrail span on the failure +path + surface guardrail_status / guardrail_action / violation_categories), +without needing AWS. + +Trigger words in the prompt: + - "blockme": records an intervention then raises -> failure path + - "scanme" : records a successful scan then allows -> success path + - otherwise: no-op (records nothing) +""" + +import time + +from fastapi import HTTPException + +from litellm.caching.caching import DualCache +from litellm.integrations.custom_guardrail import CustomGuardrail +from litellm.proxy._types import UserAPIKeyAuth +from litellm.types.utils import CallTypesLiteral + +# Shape mirrors a real Bedrock ApplyGuardrail response so the OTEL integration's +# violation-category extraction has realistic input. +BEDROCK_RESPONSE = { + "action": "GUARDRAIL_INTERVENED", + "assessments": [ + { + "topicPolicy": { + "topics": [{"name": "Fiduciary Advice", "action": "BLOCKED"}] + }, + "contentPolicy": { + "filters": [{"type": "VIOLENCE", "action": "BLOCKED"}] + }, + } + ], +} + + +class VerifyGuardrail(CustomGuardrail): + def __init__(self, **kwargs): + super().__init__(**kwargs) + + @staticmethod + def _content(data: dict) -> str: + parts = [] + for m in data.get("messages") or []: + c = m.get("content") + if isinstance(c, str): + parts.append(c) + return " ".join(parts).lower() + + async def async_pre_call_hook( + self, + user_api_key_dict: UserAPIKeyAuth, + cache: DualCache, + data: dict, + call_type: CallTypesLiteral, + ): + content = self._content(data) + now = time.time() + + if "blockme" in content: + self.add_standard_logging_guardrail_information_to_request_data( + guardrail_json_response=BEDROCK_RESPONSE, + request_data=data, + guardrail_status="guardrail_intervened", + guardrail_provider="bedrock", + start_time=now, + end_time=now + 0.01, + tracing_detail={ + "guardrail_action": "GUARDRAIL_INTERVENED", + "violation_categories": ["Fiduciary Advice", "VIOLENCE"], + }, + ) + raise HTTPException( + status_code=400, + detail={"error": "Blocked by verify-guardrail"}, + ) + + if "scanme" in content: + self.add_standard_logging_guardrail_information_to_request_data( + guardrail_json_response=BEDROCK_RESPONSE, + request_data=data, + guardrail_status="success", + guardrail_provider="bedrock", + start_time=now, + end_time=now + 0.01, + tracing_detail={"guardrail_action": "NONE"}, + ) + + return data diff --git a/otel_verify/verify_spans.py b/otel_verify/verify_spans.py new file mode 100644 index 00000000000..2ef3b3aebbf --- /dev/null +++ b/otel_verify/verify_spans.py @@ -0,0 +1,154 @@ +"""Parse spans.jsonl (one compact span JSON per line), group by trace_id, and +assert the behavior each of the four PRs introduced. Prints a PASS/FAIL matrix.""" + +import json +import os +import sys +from datetime import datetime + +SERVER = "Received Proxy Server Request" +FAILED = "Failed Proxy Server Request" + +# trace_id (32 hex) -> expectation spec +EXPECT = { + "00000000000000000000000000000001": {"label": "T01 chat 200 (team)", "code": 200, "otel": "OK", + "team_on": [SERVER, "litellm_request", "raw_gen_ai_request"]}, + "00000000000000000000000000000002": {"label": "T02 chat 400 invalid-json", "code": 400, "otel": "ERROR"}, + "00000000000000000000000000000003": {"label": "T03 chat 401 bad-key", "code": 401, "otel": "ERROR"}, + "00000000000000000000000000000004": {"label": "T04 chat 400 unknown-model", "code": 400, "otel": "ERROR"}, + "00000000000000000000000000000005": {"label": "T05 chat 429 (team)", "code": 429, "otel": "ERROR", + "team_on": [SERVER, FAILED]}, + "00000000000000000000000000000006": {"label": "T06 chat 500 (team)", "code": 500, "otel": "ERROR"}, + "00000000000000000000000000000007": {"label": "T07 messages 200 (team)", "code": 200, "otel": "OK"}, + "00000000000000000000000000000008": {"label": "T08 admin 200 key/generate", "code": 200, "otel": "OK"}, + "00000000000000000000000000000009": {"label": "T09 admin 500 key/generate", "code": 500, "otel": "ERROR"}, + "00000000000000000000000000000010": {"label": "T10 admin 422 key/generate", "code": 422, "otel": "ERROR"}, + "00000000000000000000000000000011": {"label": "T11 guardrail block 400", "code": 400, "otel": "ERROR", + "guardrail": {"guardrail_status": "guardrail_intervened", + "guardrail_action": "GUARDRAIL_INTERVENED", + "categories": ["Fiduciary Advice", "VIOLENCE"]}}, + "00000000000000000000000000000012": {"label": "T12 guardrail allow 200", "code": 200, "otel": "OK", + "guardrail": {"guardrail_status": "success"}}, +} + +def _read(path, default=""): + try: + with open(os.path.join(os.path.dirname(__file__), path)) as f: + return f.read().strip() + except OSError: + return default + + +# Written by setup.sh; the team_alias is fixed by that script. +TEAM_ID = os.getenv("TEAM_ID") or _read(".team_id") +TEAM_ALIAS = os.getenv("TEAM_ALIAS", "otel-verify-team") + + +def norm_tid(raw): + return raw[2:] if raw.startswith("0x") else raw + + +def duration_ms(span): + try: + a = datetime.fromisoformat(span["start_time"].replace("Z", "+00:00")) + b = datetime.fromisoformat(span["end_time"].replace("Z", "+00:00")) + return (b - a).total_seconds() * 1000 + except Exception: + return None + + +def main(): + traces = {} + for line in open(sys.argv[1] if len(sys.argv) > 1 else "spans.jsonl"): + line = line.strip() + if not line: + continue + s = json.loads(line) + tid = norm_tid(s["context"]["trace_id"]) + traces.setdefault(tid, []).append(s) + + all_pass = True + for tid, spec in EXPECT.items(): + spans = traces.get(tid, []) + by_name = {} + for s in spans: + by_name.setdefault(s["name"], s).update() if False else by_name.setdefault(s["name"], s) + checks = [] + + # --- #28405: status code / route / path / duration on SERVER span --- + server = next((s for s in spans if s["name"] == SERVER), None) + if server is None: + checks.append(("#28405 SERVER span present", False, "missing")) + else: + a = server.get("attributes") or {} + code = a.get("http.response.status_code") + checks.append(("#28405 http.response.status_code", code == spec["code"] and isinstance(code, int), + f"{code!r}")) + checks.append(("#28405 url.path present", bool(a.get("url.path")), a.get("url.path"))) + checks.append(("#28405 http.route present", bool(a.get("http.route")), a.get("http.route"))) + d = duration_ms(server) + checks.append(("#28405 duration>0", d is not None and d > 0, f"{d:.1f}ms" if d else d)) + ostat = (server.get("status") or {}).get("status_code") + checks.append(("#28405 otel status", ostat == spec["otel"], ostat)) + + # --- #28273: team attrs on the listed spans --- + for name in spec.get("team_on", []): + sp = next((s for s in spans if s["name"] == name), None) + if sp is None: + checks.append((f"#28273 team attrs on {name}", False, "span missing")) + continue + a = sp.get("attributes") or {} + ok = a.get("metadata.user_api_key_team_id") == TEAM_ID and \ + a.get("metadata.user_api_key_team_alias") == TEAM_ALIAS + checks.append((f"#28273 team attrs on {name}", ok, + f"{a.get('metadata.user_api_key_team_id')}/{a.get('metadata.user_api_key_team_alias')}")) + + # --- #28362 + #28364: guardrail span --- + g = spec.get("guardrail") + if g is not None: + gs = next((s for s in spans if s["name"] == "guardrail"), None) + if gs is None: + checks.append(("#28364 guardrail span emitted", False, "missing")) + else: + a = gs.get("attributes") or {} + checks.append(("#28364 guardrail span emitted", True, "present")) + checks.append(("#28364 guardrail_status", + a.get("guardrail_status") == g["guardrail_status"], a.get("guardrail_status"))) + # #28362: guardrail_response must be valid JSON, not a python repr + gr = a.get("guardrail_response") + json_ok = False + detail = "absent" + if gr is not None: + try: + parsed = json.loads(gr) + json_ok = isinstance(parsed, (dict, list)) and "'" not in gr + detail = "valid JSON dict" if json_ok else f"not-json: {gr[:40]}" + except Exception: + detail = f"NOT JSON (python repr?): {gr[:40]}" + checks.append(("#28362 guardrail_response is JSON", json_ok, detail)) + if "guardrail_action" in g: + checks.append(("#28364 guardrail_action", + a.get("guardrail_action") == g["guardrail_action"], a.get("guardrail_action"))) + if "categories" in g: + cats_raw = a.get("guardrail_violation_categories") + cats_ok = False + try: + cats_ok = sorted(json.loads(cats_raw)) == sorted(g["categories"]) + except Exception: + pass + checks.append(("#28364 violation_categories", cats_ok, cats_raw)) + + trace_pass = all(ok for _, ok, _ in checks) + all_pass = all_pass and trace_pass + spans_seen = ",".join(sorted({s["name"] for s in spans})) + print(f"\n{'='*78}\n{spec['label']} [trace …{tid[-4:]}] {'PASS' if trace_pass else 'FAIL'}") + print(f" spans: {spans_seen}") + for name, ok, detail in checks: + print(f" [{'OK' if ok else 'XX'}] {name:42} {detail}") + + print(f"\n{'='*78}\nOVERALL: {'ALL PASS' if all_pass else 'FAILURES PRESENT'}") + return 0 if all_pass else 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/test_litellm/integrations/open_telemetry/test_otel_admin_endpoints.py b/tests/test_litellm/integrations/open_telemetry/test_otel_admin_endpoints.py index b1a3b834c3d..a3b8c0cc70c 100644 --- a/tests/test_litellm/integrations/open_telemetry/test_otel_admin_endpoints.py +++ b/tests/test_litellm/integrations/open_telemetry/test_otel_admin_endpoints.py @@ -180,3 +180,49 @@ def test_admin_endpoint_failure_stamps_server_span( expected_url_path=path, where=f"{path} {expected_status}", ) + + +def test_management_wrapper_success_ends_server_span_without_http_request( + server_span_factory, otel_with_exporter, monkeypatch +): + """Regression: management endpoints whose handler does not declare an + ``http_request`` parameter (``/key/generate``, ``/user/new``, ``/mcp/*``, + ...) must still get their parent SERVER span stamped + ended on success. + + The success hook itself stamps 200 and ``end()``s the parent, but the + wrapper only invoked it when ``http_request`` was present — so on success + the span (created in auth) was never ended and never exported. This drives + the real wrapper around an ``http_request``-less handler and asserts the + SERVER span reaches the exporter with status 200. + """ + import litellm.proxy.proxy_server as proxy_server + from litellm.proxy.management_helpers import utils as mgmt_utils + + otel, exporter = otel_with_exporter + monkeypatch.setattr(proxy_server, "open_telemetry_logger", otel, raising=False) + + async def _noop_alert(*args, **kwargs): + return None + + monkeypatch.setattr(mgmt_utils, "send_management_endpoint_alert", _noop_alert) + + server_span = server_span_factory(KEY_GENERATE_PATH) + + @mgmt_utils.management_endpoint_wrapper + async def fake_generate_key_fn(data=None, user_api_key_dict=None): + # No ``http_request`` parameter — mirrors generate_key_fn et al. + return {"key": "sk-xyz", "key_name": "k"} + + asyncio.run( + fake_generate_key_fn( + data={}, + user_api_key_dict=_real_user_api_key_dict(server_span), + ) + ) + + assert_server_span_attrs( + exporter, + expected_status=200, + expected_url_path=KEY_GENERATE_PATH, + where="management wrapper success without http_request", + ) From 432233ee43eaf411a25a070bc3e582100823597b Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 25 May 2026 17:04:23 +0000 Subject: [PATCH 2/4] style(otel_verify): black-format harness scripts, drop dead code Re-runs CI against the corrected base branch and tidies the verification harness (Black formatting + removal of an unused by_name block). https://claude.ai/code/session_016u6Pe2S2zBVrUuFF1N6GkJ --- otel_verify/verify_guardrail.py | 4 +- otel_verify/verify_spans.py | 160 ++++++++++++++++++++++++-------- 2 files changed, 124 insertions(+), 40 deletions(-) diff --git a/otel_verify/verify_guardrail.py b/otel_verify/verify_guardrail.py index bf9a1eceea5..81058c42573 100644 --- a/otel_verify/verify_guardrail.py +++ b/otel_verify/verify_guardrail.py @@ -29,9 +29,7 @@ "topicPolicy": { "topics": [{"name": "Fiduciary Advice", "action": "BLOCKED"}] }, - "contentPolicy": { - "filters": [{"type": "VIOLENCE", "action": "BLOCKED"}] - }, + "contentPolicy": {"filters": [{"type": "VIOLENCE", "action": "BLOCKED"}]}, } ], } diff --git a/otel_verify/verify_spans.py b/otel_verify/verify_spans.py index 2ef3b3aebbf..04f43117487 100644 --- a/otel_verify/verify_spans.py +++ b/otel_verify/verify_spans.py @@ -11,26 +11,77 @@ # trace_id (32 hex) -> expectation spec EXPECT = { - "00000000000000000000000000000001": {"label": "T01 chat 200 (team)", "code": 200, "otel": "OK", - "team_on": [SERVER, "litellm_request", "raw_gen_ai_request"]}, - "00000000000000000000000000000002": {"label": "T02 chat 400 invalid-json", "code": 400, "otel": "ERROR"}, - "00000000000000000000000000000003": {"label": "T03 chat 401 bad-key", "code": 401, "otel": "ERROR"}, - "00000000000000000000000000000004": {"label": "T04 chat 400 unknown-model", "code": 400, "otel": "ERROR"}, - "00000000000000000000000000000005": {"label": "T05 chat 429 (team)", "code": 429, "otel": "ERROR", - "team_on": [SERVER, FAILED]}, - "00000000000000000000000000000006": {"label": "T06 chat 500 (team)", "code": 500, "otel": "ERROR"}, - "00000000000000000000000000000007": {"label": "T07 messages 200 (team)", "code": 200, "otel": "OK"}, - "00000000000000000000000000000008": {"label": "T08 admin 200 key/generate", "code": 200, "otel": "OK"}, - "00000000000000000000000000000009": {"label": "T09 admin 500 key/generate", "code": 500, "otel": "ERROR"}, - "00000000000000000000000000000010": {"label": "T10 admin 422 key/generate", "code": 422, "otel": "ERROR"}, - "00000000000000000000000000000011": {"label": "T11 guardrail block 400", "code": 400, "otel": "ERROR", - "guardrail": {"guardrail_status": "guardrail_intervened", - "guardrail_action": "GUARDRAIL_INTERVENED", - "categories": ["Fiduciary Advice", "VIOLENCE"]}}, - "00000000000000000000000000000012": {"label": "T12 guardrail allow 200", "code": 200, "otel": "OK", - "guardrail": {"guardrail_status": "success"}}, + "00000000000000000000000000000001": { + "label": "T01 chat 200 (team)", + "code": 200, + "otel": "OK", + "team_on": [SERVER, "litellm_request", "raw_gen_ai_request"], + }, + "00000000000000000000000000000002": { + "label": "T02 chat 400 invalid-json", + "code": 400, + "otel": "ERROR", + }, + "00000000000000000000000000000003": { + "label": "T03 chat 401 bad-key", + "code": 401, + "otel": "ERROR", + }, + "00000000000000000000000000000004": { + "label": "T04 chat 400 unknown-model", + "code": 400, + "otel": "ERROR", + }, + "00000000000000000000000000000005": { + "label": "T05 chat 429 (team)", + "code": 429, + "otel": "ERROR", + "team_on": [SERVER, FAILED], + }, + "00000000000000000000000000000006": { + "label": "T06 chat 500 (team)", + "code": 500, + "otel": "ERROR", + }, + "00000000000000000000000000000007": { + "label": "T07 messages 200 (team)", + "code": 200, + "otel": "OK", + }, + "00000000000000000000000000000008": { + "label": "T08 admin 200 key/generate", + "code": 200, + "otel": "OK", + }, + "00000000000000000000000000000009": { + "label": "T09 admin 500 key/generate", + "code": 500, + "otel": "ERROR", + }, + "00000000000000000000000000000010": { + "label": "T10 admin 422 key/generate", + "code": 422, + "otel": "ERROR", + }, + "00000000000000000000000000000011": { + "label": "T11 guardrail block 400", + "code": 400, + "otel": "ERROR", + "guardrail": { + "guardrail_status": "guardrail_intervened", + "guardrail_action": "GUARDRAIL_INTERVENED", + "categories": ["Fiduciary Advice", "VIOLENCE"], + }, + }, + "00000000000000000000000000000012": { + "label": "T12 guardrail allow 200", + "code": 200, + "otel": "OK", + "guardrail": {"guardrail_status": "success"}, + }, } + def _read(path, default=""): try: with open(os.path.join(os.path.dirname(__file__), path)) as f: @@ -70,9 +121,6 @@ def main(): all_pass = True for tid, spec in EXPECT.items(): spans = traces.get(tid, []) - by_name = {} - for s in spans: - by_name.setdefault(s["name"], s).update() if False else by_name.setdefault(s["name"], s) checks = [] # --- #28405: status code / route / path / duration on SERVER span --- @@ -82,12 +130,27 @@ def main(): else: a = server.get("attributes") or {} code = a.get("http.response.status_code") - checks.append(("#28405 http.response.status_code", code == spec["code"] and isinstance(code, int), - f"{code!r}")) - checks.append(("#28405 url.path present", bool(a.get("url.path")), a.get("url.path"))) - checks.append(("#28405 http.route present", bool(a.get("http.route")), a.get("http.route"))) + checks.append( + ( + "#28405 http.response.status_code", + code == spec["code"] and isinstance(code, int), + f"{code!r}", + ) + ) + checks.append( + ("#28405 url.path present", bool(a.get("url.path")), a.get("url.path")) + ) + checks.append( + ( + "#28405 http.route present", + bool(a.get("http.route")), + a.get("http.route"), + ) + ) d = duration_ms(server) - checks.append(("#28405 duration>0", d is not None and d > 0, f"{d:.1f}ms" if d else d)) + checks.append( + ("#28405 duration>0", d is not None and d > 0, f"{d:.1f}ms" if d else d) + ) ostat = (server.get("status") or {}).get("status_code") checks.append(("#28405 otel status", ostat == spec["otel"], ostat)) @@ -98,10 +161,17 @@ def main(): checks.append((f"#28273 team attrs on {name}", False, "span missing")) continue a = sp.get("attributes") or {} - ok = a.get("metadata.user_api_key_team_id") == TEAM_ID and \ - a.get("metadata.user_api_key_team_alias") == TEAM_ALIAS - checks.append((f"#28273 team attrs on {name}", ok, - f"{a.get('metadata.user_api_key_team_id')}/{a.get('metadata.user_api_key_team_alias')}")) + ok = ( + a.get("metadata.user_api_key_team_id") == TEAM_ID + and a.get("metadata.user_api_key_team_alias") == TEAM_ALIAS + ) + checks.append( + ( + f"#28273 team attrs on {name}", + ok, + f"{a.get('metadata.user_api_key_team_id')}/{a.get('metadata.user_api_key_team_alias')}", + ) + ) # --- #28362 + #28364: guardrail span --- g = spec.get("guardrail") @@ -112,8 +182,13 @@ def main(): else: a = gs.get("attributes") or {} checks.append(("#28364 guardrail span emitted", True, "present")) - checks.append(("#28364 guardrail_status", - a.get("guardrail_status") == g["guardrail_status"], a.get("guardrail_status"))) + checks.append( + ( + "#28364 guardrail_status", + a.get("guardrail_status") == g["guardrail_status"], + a.get("guardrail_status"), + ) + ) # #28362: guardrail_response must be valid JSON, not a python repr gr = a.get("guardrail_response") json_ok = False @@ -122,18 +197,27 @@ def main(): try: parsed = json.loads(gr) json_ok = isinstance(parsed, (dict, list)) and "'" not in gr - detail = "valid JSON dict" if json_ok else f"not-json: {gr[:40]}" + detail = ( + "valid JSON dict" if json_ok else f"not-json: {gr[:40]}" + ) except Exception: detail = f"NOT JSON (python repr?): {gr[:40]}" checks.append(("#28362 guardrail_response is JSON", json_ok, detail)) if "guardrail_action" in g: - checks.append(("#28364 guardrail_action", - a.get("guardrail_action") == g["guardrail_action"], a.get("guardrail_action"))) + checks.append( + ( + "#28364 guardrail_action", + a.get("guardrail_action") == g["guardrail_action"], + a.get("guardrail_action"), + ) + ) if "categories" in g: cats_raw = a.get("guardrail_violation_categories") cats_ok = False try: - cats_ok = sorted(json.loads(cats_raw)) == sorted(g["categories"]) + cats_ok = sorted(json.loads(cats_raw)) == sorted( + g["categories"] + ) except Exception: pass checks.append(("#28364 violation_categories", cats_ok, cats_raw)) @@ -141,7 +225,9 @@ def main(): trace_pass = all(ok for _, ok, _ in checks) all_pass = all_pass and trace_pass spans_seen = ",".join(sorted({s["name"] for s in spans})) - print(f"\n{'='*78}\n{spec['label']} [trace …{tid[-4:]}] {'PASS' if trace_pass else 'FAIL'}") + print( + f"\n{'='*78}\n{spec['label']} [trace …{tid[-4:]}] {'PASS' if trace_pass else 'FAIL'}" + ) print(f" spans: {spans_seen}") for name, ok, detail in checks: print(f" [{'OK' if ok else 'XX'}] {name:42} {detail}") From 8b0a6c4ecd6f5ceba89e97e43a2f7b79c2ecd816 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 25 May 2026 17:11:28 +0000 Subject: [PATCH 3/4] refactor(otel): extract management-endpoint span emission to fix PLR0915 Adding the success-path fix pushed management_endpoint_wrapper to 52 statements (ruff PLR0915 limit is 50). Extract the shared success/failure OTEL span emission into _emit_management_endpoint_otel_span, which also de-duplicates the two near-identical inline blocks. Behavior is unchanged: the parent SERVER span is stamped + ended on both paths, including for handlers without an http_request param. https://claude.ai/code/session_016u6Pe2S2zBVrUuFF1N6GkJ --- litellm/proxy/management_helpers/utils.py | 130 ++++++++++++---------- 1 file changed, 69 insertions(+), 61 deletions(-) diff --git a/litellm/proxy/management_helpers/utils.py b/litellm/proxy/management_helpers/utils.py index f6d500ac42a..750bbd732be 100644 --- a/litellm/proxy/management_helpers/utils.py +++ b/litellm/proxy/management_helpers/utils.py @@ -2,7 +2,7 @@ ## Helper utils for the management endpoints (keys/users/teams) from datetime import datetime from functools import wraps -from typing import List, Optional, Tuple +from typing import Any, Callable, List, Optional, Tuple from fastapi import HTTPException, Request @@ -435,6 +435,58 @@ async def send_management_endpoint_alert( ) +async def _emit_management_endpoint_otel_span( + func: Callable, + kwargs: dict, + parent_otel_span: Any, + start_time: datetime, + end_time: datetime, + result: Any = None, + exception: Optional[Exception] = None, +) -> None: + """Stamp + end the parent OTEL SERVER span for a management endpoint. + + Routes the request/response (or exception) through the OTEL success/failure + hook. Falls back to ``func.__name__`` for the route when the handler has no + ``http_request`` param — endpoints like ``/key/generate`` never receive one, + and gating the hook on it leaked their SERVER span (created in auth, never + ended → never exported). Always emitting keeps both success and failure + paths consistent. + """ + from litellm.proxy.proxy_server import open_telemetry_logger + + if open_telemetry_logger is None: + return + + http_request: Optional[Request] = kwargs.get("http_request") + if http_request is not None: + route = http_request.url.path + request_body: dict = await _read_request_body(request=http_request) + else: + route = func.__name__ + request_body = {} + + logging_payload = ManagementEndpointLoggingPayload( + route=route, + request_data=request_body, + response=dict(result) if (exception is None and result is not None) else None, + start_time=start_time, + end_time=end_time, + exception=exception, + ) + + if exception is None: + await open_telemetry_logger.async_management_endpoint_success_hook( + logging_payload=logging_payload, + parent_otel_span=parent_otel_span, + ) + else: + await open_telemetry_logger.async_management_endpoint_failure_hook( + logging_payload=logging_payload, + parent_otel_span=parent_otel_span, + ) + + def management_endpoint_wrapper(func): """ This wrapper does the following: @@ -446,7 +498,6 @@ def management_endpoint_wrapper(func): @wraps(func) async def wrapper(*args, **kwargs): start_time = datetime.now() - _http_request: Optional[Request] = None try: result = await func(*args, **kwargs) end_time = datetime.now() @@ -462,41 +513,16 @@ async def wrapper(*args, **kwargs): user_api_key_dict=user_api_key_dict, function_name=func.__name__, ) - _http_request = kwargs.get("http_request", None) parent_otel_span = getattr(user_api_key_dict, "parent_otel_span", None) if parent_otel_span is not None: - from litellm.proxy.proxy_server import open_telemetry_logger - - if open_telemetry_logger is not None: - # Run the success hook regardless of whether the handler - # declares an ``http_request`` parameter. The hook ends the - # parent SERVER span; gating it on ``http_request`` leaked the - # span (created in auth, never ended → never exported) for the - # many management endpoints without that param (e.g. - # ``/key/generate``). Fall back to ``func.__name__`` for the - # route, mirroring the failure branch. - if _http_request: - _route = _http_request.url.path - _request_body: dict = await _read_request_body( - request=_http_request - ) - else: - _route = func.__name__ - _request_body = {} - _response = dict(result) if result is not None else None - - logging_payload = ManagementEndpointLoggingPayload( - route=_route, - request_data=_request_body, - response=_response, - start_time=start_time, - end_time=end_time, - ) - - await open_telemetry_logger.async_management_endpoint_success_hook( # type: ignore - logging_payload=logging_payload, - parent_otel_span=parent_otel_span, - ) + await _emit_management_endpoint_otel_span( + func=func, + kwargs=kwargs, + parent_otel_span=parent_otel_span, + start_time=start_time, + end_time=end_time, + result=result, + ) # Delete updated/deleted info from cache _delete_api_key_from_cache(kwargs=kwargs) @@ -519,32 +545,14 @@ async def wrapper(*args, **kwargs): ) parent_otel_span = getattr(user_api_key_dict, "parent_otel_span", None) if parent_otel_span is not None: - from litellm.proxy.proxy_server import open_telemetry_logger - - if open_telemetry_logger is not None: - _http_request = kwargs.get("http_request") - if _http_request: - _route = _http_request.url.path - _request_body: dict = await _read_request_body( - request=_http_request - ) - else: - _route = func.__name__ - _request_body = {} - - logging_payload = ManagementEndpointLoggingPayload( - route=_route, - request_data=_request_body, - response=None, - start_time=start_time, - end_time=end_time, - exception=e, - ) - - await open_telemetry_logger.async_management_endpoint_failure_hook( # type: ignore - logging_payload=logging_payload, - parent_otel_span=parent_otel_span, - ) + await _emit_management_endpoint_otel_span( + func=func, + kwargs=kwargs, + parent_otel_span=parent_otel_span, + start_time=start_time, + end_time=end_time, + exception=e, + ) raise e From 9a0af3b3d25a5a27618d7992f9ae29e643c6ff61 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 25 May 2026 17:20:55 +0000 Subject: [PATCH 4/4] test(otel): cover management-wrapper span emission; drop dead kwargs guards Bring patch coverage of _emit_management_endpoint_otel_span and the wrapper to 100%: add tests for the failure path, the http_request-present path, the no-OTEL-logger early return, and the non-blocking post-success error path. Remove two unreachable `if kwargs is None` guards (**kwargs is always a dict). https://claude.ai/code/session_016u6Pe2S2zBVrUuFF1N6GkJ --- litellm/proxy/management_helpers/utils.py | 4 - .../test_otel_admin_endpoints.py | 138 +++++++++++++++++- 2 files changed, 134 insertions(+), 8 deletions(-) diff --git a/litellm/proxy/management_helpers/utils.py b/litellm/proxy/management_helpers/utils.py index 750bbd732be..142863ba69c 100644 --- a/litellm/proxy/management_helpers/utils.py +++ b/litellm/proxy/management_helpers/utils.py @@ -502,8 +502,6 @@ async def wrapper(*args, **kwargs): result = await func(*args, **kwargs) end_time = datetime.now() try: - if kwargs is None: - kwargs = {} user_api_key_dict: UserAPIKeyAuth = ( kwargs.get("user_api_key_dict") or UserAPIKeyAuth() ) @@ -538,8 +536,6 @@ async def wrapper(*args, **kwargs): except Exception as e: end_time = datetime.now() - if kwargs is None: - kwargs = {} user_api_key_dict: UserAPIKeyAuth = ( kwargs.get("user_api_key_dict") or UserAPIKeyAuth() ) diff --git a/tests/test_litellm/integrations/open_telemetry/test_otel_admin_endpoints.py b/tests/test_litellm/integrations/open_telemetry/test_otel_admin_endpoints.py index a3b8c0cc70c..34103449dad 100644 --- a/tests/test_litellm/integrations/open_telemetry/test_otel_admin_endpoints.py +++ b/tests/test_litellm/integrations/open_telemetry/test_otel_admin_endpoints.py @@ -3,6 +3,7 @@ import asyncio from datetime import datetime +from unittest.mock import MagicMock import pytest @@ -14,6 +15,7 @@ from ._helpers import ( HttpStatusException, assert_server_span_attrs, + get_server_span, make_fastapi_http_exception, make_httpx_status_error, ) @@ -28,6 +30,10 @@ def _real_user_api_key_dict(parent_span): ) +async def _noop_alert(*args, **kwargs): + return None + + async def _drive_admin_failure(*, otel, exception, parent_span, route): payload = ManagementEndpointLoggingPayload( route=route, @@ -200,10 +206,6 @@ def test_management_wrapper_success_ends_server_span_without_http_request( otel, exporter = otel_with_exporter monkeypatch.setattr(proxy_server, "open_telemetry_logger", otel, raising=False) - - async def _noop_alert(*args, **kwargs): - return None - monkeypatch.setattr(mgmt_utils, "send_management_endpoint_alert", _noop_alert) server_span = server_span_factory(KEY_GENERATE_PATH) @@ -226,3 +228,131 @@ async def fake_generate_key_fn(data=None, user_api_key_dict=None): expected_url_path=KEY_GENERATE_PATH, where="management wrapper success without http_request", ) + + +def test_management_wrapper_failure_ends_server_span( + server_span_factory, otel_with_exporter, monkeypatch +): + """When the handler raises, the wrapper must route through the failure hook + and stamp + end the parent SERVER span with the error status — even for an + ``http_request``-less handler (route falls back to ``func.__name__``).""" + import litellm.proxy.proxy_server as proxy_server + from litellm.proxy.management_helpers import utils as mgmt_utils + + otel, exporter = otel_with_exporter + monkeypatch.setattr(proxy_server, "open_telemetry_logger", otel, raising=False) + + server_span = server_span_factory(KEY_GENERATE_PATH) + + @mgmt_utils.management_endpoint_wrapper + async def failing_fn(data=None, user_api_key_dict=None): + raise HttpStatusException(500, "boom") + + with pytest.raises(HttpStatusException): + asyncio.run( + failing_fn(data={}, user_api_key_dict=_real_user_api_key_dict(server_span)) + ) + + assert_server_span_attrs( + exporter, + expected_status=500, + expected_url_path=KEY_GENERATE_PATH, + where="management wrapper failure", + ) + + +def test_management_wrapper_success_with_http_request( + server_span_factory, otel_with_exporter, monkeypatch +): + """Cover the branch where the handler DOES declare ``http_request``: the + route comes from ``http_request.url.path`` and the body is read from it.""" + import litellm.proxy.proxy_server as proxy_server + from litellm.proxy.management_helpers import utils as mgmt_utils + + otel, exporter = otel_with_exporter + monkeypatch.setattr(proxy_server, "open_telemetry_logger", otel, raising=False) + monkeypatch.setattr(mgmt_utils, "send_management_endpoint_alert", _noop_alert) + + async def _fake_body(request=None): + return {"team_alias": "t"} + + monkeypatch.setattr(mgmt_utils, "_read_request_body", _fake_body) + + server_span = server_span_factory("/team/new") + http_request = MagicMock() + http_request.url.path = "/team/new" + + @mgmt_utils.management_endpoint_wrapper + async def fake_new_team(data=None, http_request=None, user_api_key_dict=None): + return {"team_id": "t-1"} + + asyncio.run( + fake_new_team( + data={}, + http_request=http_request, + user_api_key_dict=_real_user_api_key_dict(server_span), + ) + ) + + assert_server_span_attrs( + exporter, + expected_status=200, + expected_url_path="/team/new", + where="management wrapper success with http_request", + ) + + +def test_management_wrapper_noop_when_otel_logger_absent( + server_span_factory, otel_with_exporter, monkeypatch +): + """When no OTEL logger is registered, the helper early-returns and no SERVER + span is exported — and the handler result is still returned unchanged.""" + import litellm.proxy.proxy_server as proxy_server + from litellm.proxy.management_helpers import utils as mgmt_utils + + _otel, exporter = otel_with_exporter + monkeypatch.setattr(proxy_server, "open_telemetry_logger", None, raising=False) + monkeypatch.setattr(mgmt_utils, "send_management_endpoint_alert", _noop_alert) + + server_span = server_span_factory(KEY_GENERATE_PATH) + + @mgmt_utils.management_endpoint_wrapper + async def fake_fn(data=None, user_api_key_dict=None): + return {"ok": True} + + result = asyncio.run( + fake_fn(data={}, user_api_key_dict=_real_user_api_key_dict(server_span)) + ) + + assert result == {"ok": True} + assert get_server_span(exporter) is None + + +def test_management_wrapper_swallows_post_success_errors( + server_span_factory, otel_with_exporter, monkeypatch +): + """A failure in post-success bookkeeping (cache invalidation, alerting) must + not propagate — the handler result is returned regardless (non-blocking).""" + import litellm.proxy.proxy_server as proxy_server + from litellm.proxy.management_helpers import utils as mgmt_utils + + otel, _exporter = otel_with_exporter + monkeypatch.setattr(proxy_server, "open_telemetry_logger", otel, raising=False) + monkeypatch.setattr(mgmt_utils, "send_management_endpoint_alert", _noop_alert) + + def _boom(*args, **kwargs): + raise RuntimeError("cache backend down") + + monkeypatch.setattr(mgmt_utils, "_delete_api_key_from_cache", _boom) + + server_span = server_span_factory(KEY_GENERATE_PATH) + + @mgmt_utils.management_endpoint_wrapper + async def fake_fn(data=None, user_api_key_dict=None): + return {"ok": True} + + result = asyncio.run( + fake_fn(data={}, user_api_key_dict=_real_user_api_key_dict(server_span)) + ) + + assert result == {"ok": True}