From 8323ea3d16b62852abf7247b4e70605a1e750669 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 25 May 2026 16:56:19 +0000
Subject: [PATCH 1/4] fix(otel): end proxy SERVER span on management-endpoint
 success without http_request
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

management_endpoint_wrapper only ran async_management_endpoint_success_hook —
which stamps http.response.status_code=200 and end()s the parent SERVER span —
when the handler declared an http_request param. 45 of 65 wrapped endpoints
(all /key/*, /user/*, /mcp/*, ...) lack it, so on success their SERVER span,
created in auth, was never ended and thus never exported. Mirror the failure
branch: invoke the success hook regardless, falling back to func.__name__ for
the route.

Add a wrapper-level regression test (fails before this change) and an
otel_verify/ end-to-end harness (config + curl matrix + span verifier)
verifying PRs #28273, #28362, #28364, #28405 against a running proxy.

https://claude.ai/code/session_016u6Pe2S2zBVrUuFF1N6GkJ
---
 litellm/proxy/management_helpers/utils.py     |  36 ++--
 otel_verify/.gitignore                        |   7 +
 otel_verify/config.yaml                       |  18 ++
 otel_verify/otel_file_exporter.py             |  36 ++++
 otel_verify/run_matrix.sh                     |  65 ++++++++
 otel_verify/setup.sh                          |  22 +++
 otel_verify/verify_guardrail.py               |  92 +++++++++++
 otel_verify/verify_spans.py                   | 154 ++++++++++++++++++
 .../test_otel_admin_endpoints.py              |  46 ++++++
 9 files changed, 463 insertions(+), 13 deletions(-)
 create mode 100644 otel_verify/.gitignore
 create mode 100644 otel_verify/config.yaml
 create mode 100644 otel_verify/otel_file_exporter.py
 create mode 100755 otel_verify/run_matrix.sh
 create mode 100755 otel_verify/setup.sh
 create mode 100644 otel_verify/verify_guardrail.py
 create mode 100644 otel_verify/verify_spans.py

diff --git a/litellm/proxy/management_helpers/utils.py b/litellm/proxy/management_helpers/utils.py
index b7d5cc30c49..f6d500ac42a 100644
--- a/litellm/proxy/management_helpers/utils.py
+++ b/litellm/proxy/management_helpers/utils.py
@@ -468,25 +468,35 @@ async def wrapper(*args, **kwargs):
                     from litellm.proxy.proxy_server import open_telemetry_logger
 
                     if open_telemetry_logger is not None:
+                        # Run the success hook regardless of whether the handler
+                        # declares an ``http_request`` parameter. The hook ends the
+                        # parent SERVER span; gating it on ``http_request`` leaked the
+                        # span (created in auth, never ended → never exported) for the
+                        # many management endpoints without that param (e.g.
+                        # ``/key/generate``). Fall back to ``func.__name__`` for the
+                        # route, mirroring the failure branch.
                         if _http_request:
                             _route = _http_request.url.path
                             _request_body: dict = await _read_request_body(
                                 request=_http_request
                             )
-                            _response = dict(result) if result is not None else None
-
-                            logging_payload = ManagementEndpointLoggingPayload(
-                                route=_route,
-                                request_data=_request_body,
-                                response=_response,
-                                start_time=start_time,
-                                end_time=end_time,
-                            )
+                        else:
+                            _route = func.__name__
+                            _request_body = {}
+                        _response = dict(result) if result is not None else None
+
+                        logging_payload = ManagementEndpointLoggingPayload(
+                            route=_route,
+                            request_data=_request_body,
+                            response=_response,
+                            start_time=start_time,
+                            end_time=end_time,
+                        )
 
-                            await open_telemetry_logger.async_management_endpoint_success_hook(  # type: ignore
-                                logging_payload=logging_payload,
-                                parent_otel_span=parent_otel_span,
-                            )
+                        await open_telemetry_logger.async_management_endpoint_success_hook(  # type: ignore
+                            logging_payload=logging_payload,
+                            parent_otel_span=parent_otel_span,
+                        )
 
                 # Delete updated/deleted info from cache
                 _delete_api_key_from_cache(kwargs=kwargs)
diff --git a/otel_verify/.gitignore b/otel_verify/.gitignore
new file mode 100644
index 00000000000..2bd1837fc99
--- /dev/null
+++ b/otel_verify/.gitignore
@@ -0,0 +1,7 @@
+# Runtime artifacts produced by a verification run (incl. a live virtual key)
+.team_id
+.team_key
+spans.jsonl
+spans_admin.jsonl
+proxy.log
+__pycache__/
diff --git a/otel_verify/config.yaml b/otel_verify/config.yaml
new file mode 100644
index 00000000000..ec25ba75699
--- /dev/null
+++ b/otel_verify/config.yaml
@@ -0,0 +1,18 @@
+model_list:
+  - model_name: gpt-mock
+    litellm_params:
+      model: openai/gpt-4o-mini
+      api_key: sk-fake-unused
+
+litellm_settings:
+  callbacks: ["otel_file_exporter.otel_logger"]
+
+guardrails:
+  - guardrail_name: "verify-guardrail"
+    litellm_params:
+      guardrail: verify_guardrail.VerifyGuardrail
+      mode: "pre_call"
+      default_on: true
+
+general_settings:
+  master_key: sk-1234
diff --git a/otel_verify/otel_file_exporter.py b/otel_verify/otel_file_exporter.py
new file mode 100644
index 00000000000..467dd918ba4
--- /dev/null
+++ b/otel_verify/otel_file_exporter.py
@@ -0,0 +1,36 @@
+"""Custom OTEL logger that writes every finished span as one compact JSON line.
+
+Registered as a litellm callback (``otel_file_exporter.otel_logger``). Because it
+subclasses litellm's ``OpenTelemetry`` integration, its ``__init__`` registers
+itself as ``proxy_server.open_telemetry_logger`` (first-registered-wins), so the
+proxy's SERVER-span lifecycle flows through it exactly like ``callbacks: ["otel"]``
+would — but spans land in a file we can parse instead of pretty-printed stdout.
+"""
+
+import os
+import threading
+
+from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
+
+from litellm.integrations.opentelemetry import OpenTelemetry, OpenTelemetryConfig
+
+_SPAN_FILE = os.getenv("OTEL_SPAN_FILE", "/tmp/otel_spans.jsonl")
+_lock = threading.Lock()
+
+
+class FileSpanExporter(SpanExporter):
+    def export(self, spans):
+        with _lock, open(_SPAN_FILE, "a") as f:
+            for span in spans:
+                f.write(span.to_json(indent=None) + "\n")
+        return SpanExportResult.SUCCESS
+
+    def shutdown(self):
+        return None
+
+    def force_flush(self, timeout_millis: int = 30000) -> bool:
+        return True
+
+
+# SimpleSpanProcessor is used for non-string exporters, so spans flush on .end().
+otel_logger = OpenTelemetry(config=OpenTelemetryConfig(exporter=FileSpanExporter()))
diff --git a/otel_verify/run_matrix.sh b/otel_verify/run_matrix.sh
new file mode 100755
index 00000000000..094a3236012
--- /dev/null
+++ b/otel_verify/run_matrix.sh
@@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+# OTEL fidelity verification matrix. Each case uses a deterministic traceparent
+# so its spans can be looked up by trace_id. Prints "LABEL -> HTTP <code>".
+set -u
+BASE=http://localhost:4000
+MASTER="sk-1234"
+TEAM_KEY="$(cat "$(dirname "$0")/.team_key")"
+
+tp() { echo "00-$1-0000000000000001-01"; }   # build a traceparent from a 32-hex trace id
+
+run() {  # label  traceid  curl-args...
+  local label="$1" tid="$2"; shift 2
+  local code
+  code=$(curl -s -o /dev/null -w "%{http_code}" -H "traceparent: $(tp "$tid")" "$@")
+  printf "%-28s trace=%s -> HTTP %s\n" "$label" "$tid" "$code"
+}
+
+J="Content-Type: application/json"
+
+# ---- #28405 http.response.status_code + #28273 team attrs (success/failure) ----
+run "T01_chat_200_team"       00000000000000000000000000000001 -X POST $BASE/v1/chat/completions \
+    -H "Authorization: Bearer $TEAM_KEY" -H "$J" \
+    -d '{"model":"gpt-mock","messages":[{"role":"user","content":"hello"}],"mock_response":"hi there"}'
+
+run "T02_chat_400_badjson"    00000000000000000000000000000002 -X POST $BASE/v1/chat/completions \
+    -H "Authorization: Bearer $TEAM_KEY" -H "$J" -d '{ this is not valid json'
+
+run "T03_chat_401_badkey"     00000000000000000000000000000003 -X POST $BASE/v1/chat/completions \
+    -H "Authorization: Bearer sk-does-not-exist" -H "$J" \
+    -d '{"model":"gpt-mock","messages":[{"role":"user","content":"hi"}],"mock_response":"x"}'
+
+run "T04_chat_400_unknownmodel" 00000000000000000000000000000004 -X POST $BASE/v1/chat/completions \
+    -H "Authorization: Bearer $MASTER" -H "$J" \
+    -d '{"model":"definitely-not-a-real-model","messages":[{"role":"user","content":"hi"}]}'
+
+run "T05_chat_429_team"       00000000000000000000000000000005 -X POST $BASE/v1/chat/completions \
+    -H "Authorization: Bearer $TEAM_KEY" -H "$J" \
+    -d '{"model":"gpt-mock","messages":[{"role":"user","content":"hi"}],"mock_response":"litellm.RateLimitError"}'
+
+run "T06_chat_500_team"       00000000000000000000000000000006 -X POST $BASE/v1/chat/completions \
+    -H "Authorization: Bearer $TEAM_KEY" -H "$J" \
+    -d '{"model":"gpt-mock","messages":[{"role":"user","content":"hi"}],"mock_response":"litellm.InternalServerError"}'
+
+run "T07_messages_200_team"   00000000000000000000000000000007 -X POST $BASE/v1/messages \
+    -H "Authorization: Bearer $TEAM_KEY" -H "$J" \
+    -d '{"model":"gpt-mock","max_tokens":50,"messages":[{"role":"user","content":"hi"}],"mock_response":"hi there"}'
+
+# ---- #28405 admin-endpoint paths ----
+run "T08_admin_200_keygen"    00000000000000000000000000000008 -X POST $BASE/key/generate \
+    -H "Authorization: Bearer $MASTER" -H "$J" -d '{"models":["gpt-mock"]}'
+
+run "T09_admin_500_keygen"    00000000000000000000000000000009 -X POST $BASE/key/generate \
+    -H "Authorization: Bearer $MASTER" -H "$J" -d '{"duration":"not-a-valid-duration"}'
+
+run "T10_admin_422_keygen"    00000000000000000000000000000010 -X POST $BASE/key/generate \
+    -H "Authorization: Bearer $MASTER" -H "$J" -d '{"models":"should-be-a-list"}'
+
+# ---- #28362 serialize guardrail_response + #28364 guardrail span on failure ----
+run "T11_guardrail_block_400" 00000000000000000000000000000011 -X POST $BASE/v1/chat/completions \
+    -H "Authorization: Bearer $TEAM_KEY" -H "$J" \
+    -d '{"model":"gpt-mock","messages":[{"role":"user","content":"please blockme now"}]}'
+
+run "T12_guardrail_allow_200" 00000000000000000000000000000012 -X POST $BASE/v1/chat/completions \
+    -H "Authorization: Bearer $TEAM_KEY" -H "$J" \
+    -d '{"model":"gpt-mock","messages":[{"role":"user","content":"please scanme now"}],"mock_response":"clean"}'
diff --git a/otel_verify/setup.sh b/otel_verify/setup.sh
new file mode 100755
index 00000000000..5c95383e7d0
--- /dev/null
+++ b/otel_verify/setup.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+# Create a team + a virtual key (with allow_client_mock_response so the matrix
+# can drive deterministic mock successes/errors). Writes .team_id / .team_key
+# which run_matrix.sh and verify_spans.py read. Re-run after a fresh proxy/db.
+set -euo pipefail
+BASE="${BASE:-http://localhost:4000}"
+MASTER="${MASTER_KEY:-sk-1234}"
+HERE="$(dirname "$0")"
+
+team=$(curl -s -X POST "$BASE/team/new" -H "Authorization: Bearer $MASTER" \
+  -H "Content-Type: application/json" -d '{"team_alias":"otel-verify-team"}')
+team_id=$(python3 -c "import json,sys; print(json.load(sys.stdin)['team_id'])" <<<"$team")
+
+key=$(curl -s -X POST "$BASE/key/generate" -H "Authorization: Bearer $MASTER" \
+  -H "Content-Type: application/json" \
+  -d "{\"team_id\":\"$team_id\",\"models\":[\"gpt-mock\"],\"metadata\":{\"allow_client_mock_response\":true}}")
+team_key=$(python3 -c "import json,sys; print(json.load(sys.stdin)['key'])" <<<"$key")
+
+printf '%s' "$team_id"  > "$HERE/.team_id"
+printf '%s' "$team_key" > "$HERE/.team_key"
+echo "team_id=$team_id"
+echo "team_key=$team_key (saved to .team_key)"
diff --git a/otel_verify/verify_guardrail.py b/otel_verify/verify_guardrail.py
new file mode 100644
index 00000000000..bf9a1eceea5
--- /dev/null
+++ b/otel_verify/verify_guardrail.py
@@ -0,0 +1,92 @@
+"""A self-contained guardrail that mimics a Bedrock pre-call guardrail.
+
+Drives the OTEL guardrail-span code paths added in #28362 (serialize
+guardrail_response as JSON) and #28364 (emit guardrail span on the failure
+path + surface guardrail_status / guardrail_action / violation_categories),
+without needing AWS.
+
+Trigger words in the prompt:
+  - "blockme": records an intervention then raises -> failure path
+  - "scanme" : records a successful scan then allows -> success path
+  - otherwise: no-op (records nothing)
+"""
+
+import time
+
+from fastapi import HTTPException
+
+from litellm.caching.caching import DualCache
+from litellm.integrations.custom_guardrail import CustomGuardrail
+from litellm.proxy._types import UserAPIKeyAuth
+from litellm.types.utils import CallTypesLiteral
+
+# Shape mirrors a real Bedrock ApplyGuardrail response so the OTEL integration's
+# violation-category extraction has realistic input.
+BEDROCK_RESPONSE = {
+    "action": "GUARDRAIL_INTERVENED",
+    "assessments": [
+        {
+            "topicPolicy": {
+                "topics": [{"name": "Fiduciary Advice", "action": "BLOCKED"}]
+            },
+            "contentPolicy": {
+                "filters": [{"type": "VIOLENCE", "action": "BLOCKED"}]
+            },
+        }
+    ],
+}
+
+
+class VerifyGuardrail(CustomGuardrail):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+    @staticmethod
+    def _content(data: dict) -> str:
+        parts = []
+        for m in data.get("messages") or []:
+            c = m.get("content")
+            if isinstance(c, str):
+                parts.append(c)
+        return " ".join(parts).lower()
+
+    async def async_pre_call_hook(
+        self,
+        user_api_key_dict: UserAPIKeyAuth,
+        cache: DualCache,
+        data: dict,
+        call_type: CallTypesLiteral,
+    ):
+        content = self._content(data)
+        now = time.time()
+
+        if "blockme" in content:
+            self.add_standard_logging_guardrail_information_to_request_data(
+                guardrail_json_response=BEDROCK_RESPONSE,
+                request_data=data,
+                guardrail_status="guardrail_intervened",
+                guardrail_provider="bedrock",
+                start_time=now,
+                end_time=now + 0.01,
+                tracing_detail={
+                    "guardrail_action": "GUARDRAIL_INTERVENED",
+                    "violation_categories": ["Fiduciary Advice", "VIOLENCE"],
+                },
+            )
+            raise HTTPException(
+                status_code=400,
+                detail={"error": "Blocked by verify-guardrail"},
+            )
+
+        if "scanme" in content:
+            self.add_standard_logging_guardrail_information_to_request_data(
+                guardrail_json_response=BEDROCK_RESPONSE,
+                request_data=data,
+                guardrail_status="success",
+                guardrail_provider="bedrock",
+                start_time=now,
+                end_time=now + 0.01,
+                tracing_detail={"guardrail_action": "NONE"},
+            )
+
+        return data
diff --git a/otel_verify/verify_spans.py b/otel_verify/verify_spans.py
new file mode 100644
index 00000000000..2ef3b3aebbf
--- /dev/null
+++ b/otel_verify/verify_spans.py
@@ -0,0 +1,154 @@
+"""Parse spans.jsonl (one compact span JSON per line), group by trace_id, and
+assert the behavior each of the four PRs introduced. Prints a PASS/FAIL matrix."""
+
+import json
+import os
+import sys
+from datetime import datetime
+
+SERVER = "Received Proxy Server Request"
+FAILED = "Failed Proxy Server Request"
+
+# trace_id (32 hex) -> expectation spec
+EXPECT = {
+    "00000000000000000000000000000001": {"label": "T01 chat 200 (team)", "code": 200, "otel": "OK",
+        "team_on": [SERVER, "litellm_request", "raw_gen_ai_request"]},
+    "00000000000000000000000000000002": {"label": "T02 chat 400 invalid-json", "code": 400, "otel": "ERROR"},
+    "00000000000000000000000000000003": {"label": "T03 chat 401 bad-key", "code": 401, "otel": "ERROR"},
+    "00000000000000000000000000000004": {"label": "T04 chat 400 unknown-model", "code": 400, "otel": "ERROR"},
+    "00000000000000000000000000000005": {"label": "T05 chat 429 (team)", "code": 429, "otel": "ERROR",
+        "team_on": [SERVER, FAILED]},
+    "00000000000000000000000000000006": {"label": "T06 chat 500 (team)", "code": 500, "otel": "ERROR"},
+    "00000000000000000000000000000007": {"label": "T07 messages 200 (team)", "code": 200, "otel": "OK"},
+    "00000000000000000000000000000008": {"label": "T08 admin 200 key/generate", "code": 200, "otel": "OK"},
+    "00000000000000000000000000000009": {"label": "T09 admin 500 key/generate", "code": 500, "otel": "ERROR"},
+    "00000000000000000000000000000010": {"label": "T10 admin 422 key/generate", "code": 422, "otel": "ERROR"},
+    "00000000000000000000000000000011": {"label": "T11 guardrail block 400", "code": 400, "otel": "ERROR",
+        "guardrail": {"guardrail_status": "guardrail_intervened",
+                       "guardrail_action": "GUARDRAIL_INTERVENED",
+                       "categories": ["Fiduciary Advice", "VIOLENCE"]}},
+    "00000000000000000000000000000012": {"label": "T12 guardrail allow 200", "code": 200, "otel": "OK",
+        "guardrail": {"guardrail_status": "success"}},
+}
+
+def _read(path, default=""):
+    try:
+        with open(os.path.join(os.path.dirname(__file__), path)) as f:
+            return f.read().strip()
+    except OSError:
+        return default
+
+
+# Written by setup.sh; the team_alias is fixed by that script.
+TEAM_ID = os.getenv("TEAM_ID") or _read(".team_id")
+TEAM_ALIAS = os.getenv("TEAM_ALIAS", "otel-verify-team")
+
+
+def norm_tid(raw):
+    return raw[2:] if raw.startswith("0x") else raw
+
+
+def duration_ms(span):
+    try:
+        a = datetime.fromisoformat(span["start_time"].replace("Z", "+00:00"))
+        b = datetime.fromisoformat(span["end_time"].replace("Z", "+00:00"))
+        return (b - a).total_seconds() * 1000
+    except Exception:
+        return None
+
+
+def main():
+    traces = {}
+    for line in open(sys.argv[1] if len(sys.argv) > 1 else "spans.jsonl"):
+        line = line.strip()
+        if not line:
+            continue
+        s = json.loads(line)
+        tid = norm_tid(s["context"]["trace_id"])
+        traces.setdefault(tid, []).append(s)
+
+    all_pass = True
+    for tid, spec in EXPECT.items():
+        spans = traces.get(tid, [])
+        by_name = {}
+        for s in spans:
+            by_name.setdefault(s["name"], s).update() if False else by_name.setdefault(s["name"], s)
+        checks = []
+
+        # --- #28405: status code / route / path / duration on SERVER span ---
+        server = next((s for s in spans if s["name"] == SERVER), None)
+        if server is None:
+            checks.append(("#28405 SERVER span present", False, "missing"))
+        else:
+            a = server.get("attributes") or {}
+            code = a.get("http.response.status_code")
+            checks.append(("#28405 http.response.status_code", code == spec["code"] and isinstance(code, int),
+                           f"{code!r}"))
+            checks.append(("#28405 url.path present", bool(a.get("url.path")), a.get("url.path")))
+            checks.append(("#28405 http.route present", bool(a.get("http.route")), a.get("http.route")))
+            d = duration_ms(server)
+            checks.append(("#28405 duration>0", d is not None and d > 0, f"{d:.1f}ms" if d else d))
+            ostat = (server.get("status") or {}).get("status_code")
+            checks.append(("#28405 otel status", ostat == spec["otel"], ostat))
+
+        # --- #28273: team attrs on the listed spans ---
+        for name in spec.get("team_on", []):
+            sp = next((s for s in spans if s["name"] == name), None)
+            if sp is None:
+                checks.append((f"#28273 team attrs on {name}", False, "span missing"))
+                continue
+            a = sp.get("attributes") or {}
+            ok = a.get("metadata.user_api_key_team_id") == TEAM_ID and \
+                 a.get("metadata.user_api_key_team_alias") == TEAM_ALIAS
+            checks.append((f"#28273 team attrs on {name}", ok,
+                           f"{a.get('metadata.user_api_key_team_id')}/{a.get('metadata.user_api_key_team_alias')}"))
+
+        # --- #28362 + #28364: guardrail span ---
+        g = spec.get("guardrail")
+        if g is not None:
+            gs = next((s for s in spans if s["name"] == "guardrail"), None)
+            if gs is None:
+                checks.append(("#28364 guardrail span emitted", False, "missing"))
+            else:
+                a = gs.get("attributes") or {}
+                checks.append(("#28364 guardrail span emitted", True, "present"))
+                checks.append(("#28364 guardrail_status",
+                               a.get("guardrail_status") == g["guardrail_status"], a.get("guardrail_status")))
+                # #28362: guardrail_response must be valid JSON, not a python repr
+                gr = a.get("guardrail_response")
+                json_ok = False
+                detail = "absent"
+                if gr is not None:
+                    try:
+                        parsed = json.loads(gr)
+                        json_ok = isinstance(parsed, (dict, list)) and "'" not in gr
+                        detail = "valid JSON dict" if json_ok else f"not-json: {gr[:40]}"
+                    except Exception:
+                        detail = f"NOT JSON (python repr?): {gr[:40]}"
+                checks.append(("#28362 guardrail_response is JSON", json_ok, detail))
+                if "guardrail_action" in g:
+                    checks.append(("#28364 guardrail_action",
+                                   a.get("guardrail_action") == g["guardrail_action"], a.get("guardrail_action")))
+                if "categories" in g:
+                    cats_raw = a.get("guardrail_violation_categories")
+                    cats_ok = False
+                    try:
+                        cats_ok = sorted(json.loads(cats_raw)) == sorted(g["categories"])
+                    except Exception:
+                        pass
+                    checks.append(("#28364 violation_categories", cats_ok, cats_raw))
+
+        trace_pass = all(ok for _, ok, _ in checks)
+        all_pass = all_pass and trace_pass
+        spans_seen = ",".join(sorted({s["name"] for s in spans}))
+        print(f"\n{'='*78}\n{spec['label']}  [trace …{tid[-4:]}]   {'PASS' if trace_pass else 'FAIL'}")
+        print(f"  spans: {spans_seen}")
+        for name, ok, detail in checks:
+            print(f"    [{'OK' if ok else 'XX'}] {name:42} {detail}")
+
+    print(f"\n{'='*78}\nOVERALL: {'ALL PASS' if all_pass else 'FAILURES PRESENT'}")
+    return 0 if all_pass else 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tests/test_litellm/integrations/open_telemetry/test_otel_admin_endpoints.py b/tests/test_litellm/integrations/open_telemetry/test_otel_admin_endpoints.py
index b1a3b834c3d..a3b8c0cc70c 100644
--- a/tests/test_litellm/integrations/open_telemetry/test_otel_admin_endpoints.py
+++ b/tests/test_litellm/integrations/open_telemetry/test_otel_admin_endpoints.py
@@ -180,3 +180,49 @@ def test_admin_endpoint_failure_stamps_server_span(
         expected_url_path=path,
         where=f"{path} {expected_status}",
     )
+
+
+def test_management_wrapper_success_ends_server_span_without_http_request(
+    server_span_factory, otel_with_exporter, monkeypatch
+):
+    """Regression: management endpoints whose handler does not declare an
+    ``http_request`` parameter (``/key/generate``, ``/user/new``, ``/mcp/*``,
+    ...) must still get their parent SERVER span stamped + ended on success.
+
+    The success hook itself stamps 200 and ``end()``s the parent, but the
+    wrapper only invoked it when ``http_request`` was present — so on success
+    the span (created in auth) was never ended and never exported. This drives
+    the real wrapper around an ``http_request``-less handler and asserts the
+    SERVER span reaches the exporter with status 200.
+    """
+    import litellm.proxy.proxy_server as proxy_server
+    from litellm.proxy.management_helpers import utils as mgmt_utils
+
+    otel, exporter = otel_with_exporter
+    monkeypatch.setattr(proxy_server, "open_telemetry_logger", otel, raising=False)
+
+    async def _noop_alert(*args, **kwargs):
+        return None
+
+    monkeypatch.setattr(mgmt_utils, "send_management_endpoint_alert", _noop_alert)
+
+    server_span = server_span_factory(KEY_GENERATE_PATH)
+
+    @mgmt_utils.management_endpoint_wrapper
+    async def fake_generate_key_fn(data=None, user_api_key_dict=None):
+        # No ``http_request`` parameter — mirrors generate_key_fn et al.
+        return {"key": "sk-xyz", "key_name": "k"}
+
+    asyncio.run(
+        fake_generate_key_fn(
+            data={},
+            user_api_key_dict=_real_user_api_key_dict(server_span),
+        )
+    )
+
+    assert_server_span_attrs(
+        exporter,
+        expected_status=200,
+        expected_url_path=KEY_GENERATE_PATH,
+        where="management wrapper success without http_request",
+    )

From 432233ee43eaf411a25a070bc3e582100823597b Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 25 May 2026 17:04:23 +0000
Subject: [PATCH 2/4] style(otel_verify): black-format harness scripts, drop
 dead code

Re-runs CI against the corrected base branch and tidies the verification
harness (Black formatting + removal of an unused by_name block).

https://claude.ai/code/session_016u6Pe2S2zBVrUuFF1N6GkJ
---
 otel_verify/verify_guardrail.py |   4 +-
 otel_verify/verify_spans.py     | 160 ++++++++++++++++++++++++--------
 2 files changed, 124 insertions(+), 40 deletions(-)

diff --git a/otel_verify/verify_guardrail.py b/otel_verify/verify_guardrail.py
index bf9a1eceea5..81058c42573 100644
--- a/otel_verify/verify_guardrail.py
+++ b/otel_verify/verify_guardrail.py
@@ -29,9 +29,7 @@
             "topicPolicy": {
                 "topics": [{"name": "Fiduciary Advice", "action": "BLOCKED"}]
             },
-            "contentPolicy": {
-                "filters": [{"type": "VIOLENCE", "action": "BLOCKED"}]
-            },
+            "contentPolicy": {"filters": [{"type": "VIOLENCE", "action": "BLOCKED"}]},
         }
     ],
 }
diff --git a/otel_verify/verify_spans.py b/otel_verify/verify_spans.py
index 2ef3b3aebbf..04f43117487 100644
--- a/otel_verify/verify_spans.py
+++ b/otel_verify/verify_spans.py
@@ -11,26 +11,77 @@
 
 # trace_id (32 hex) -> expectation spec
 EXPECT = {
-    "00000000000000000000000000000001": {"label": "T01 chat 200 (team)", "code": 200, "otel": "OK",
-        "team_on": [SERVER, "litellm_request", "raw_gen_ai_request"]},
-    "00000000000000000000000000000002": {"label": "T02 chat 400 invalid-json", "code": 400, "otel": "ERROR"},
-    "00000000000000000000000000000003": {"label": "T03 chat 401 bad-key", "code": 401, "otel": "ERROR"},
-    "00000000000000000000000000000004": {"label": "T04 chat 400 unknown-model", "code": 400, "otel": "ERROR"},
-    "00000000000000000000000000000005": {"label": "T05 chat 429 (team)", "code": 429, "otel": "ERROR",
-        "team_on": [SERVER, FAILED]},
-    "00000000000000000000000000000006": {"label": "T06 chat 500 (team)", "code": 500, "otel": "ERROR"},
-    "00000000000000000000000000000007": {"label": "T07 messages 200 (team)", "code": 200, "otel": "OK"},
-    "00000000000000000000000000000008": {"label": "T08 admin 200 key/generate", "code": 200, "otel": "OK"},
-    "00000000000000000000000000000009": {"label": "T09 admin 500 key/generate", "code": 500, "otel": "ERROR"},
-    "00000000000000000000000000000010": {"label": "T10 admin 422 key/generate", "code": 422, "otel": "ERROR"},
-    "00000000000000000000000000000011": {"label": "T11 guardrail block 400", "code": 400, "otel": "ERROR",
-        "guardrail": {"guardrail_status": "guardrail_intervened",
-                       "guardrail_action": "GUARDRAIL_INTERVENED",
-                       "categories": ["Fiduciary Advice", "VIOLENCE"]}},
-    "00000000000000000000000000000012": {"label": "T12 guardrail allow 200", "code": 200, "otel": "OK",
-        "guardrail": {"guardrail_status": "success"}},
+    "00000000000000000000000000000001": {
+        "label": "T01 chat 200 (team)",
+        "code": 200,
+        "otel": "OK",
+        "team_on": [SERVER, "litellm_request", "raw_gen_ai_request"],
+    },
+    "00000000000000000000000000000002": {
+        "label": "T02 chat 400 invalid-json",
+        "code": 400,
+        "otel": "ERROR",
+    },
+    "00000000000000000000000000000003": {
+        "label": "T03 chat 401 bad-key",
+        "code": 401,
+        "otel": "ERROR",
+    },
+    "00000000000000000000000000000004": {
+        "label": "T04 chat 400 unknown-model",
+        "code": 400,
+        "otel": "ERROR",
+    },
+    "00000000000000000000000000000005": {
+        "label": "T05 chat 429 (team)",
+        "code": 429,
+        "otel": "ERROR",
+        "team_on": [SERVER, FAILED],
+    },
+    "00000000000000000000000000000006": {
+        "label": "T06 chat 500 (team)",
+        "code": 500,
+        "otel": "ERROR",
+    },
+    "00000000000000000000000000000007": {
+        "label": "T07 messages 200 (team)",
+        "code": 200,
+        "otel": "OK",
+    },
+    "00000000000000000000000000000008": {
+        "label": "T08 admin 200 key/generate",
+        "code": 200,
+        "otel": "OK",
+    },
+    "00000000000000000000000000000009": {
+        "label": "T09 admin 500 key/generate",
+        "code": 500,
+        "otel": "ERROR",
+    },
+    "00000000000000000000000000000010": {
+        "label": "T10 admin 422 key/generate",
+        "code": 422,
+        "otel": "ERROR",
+    },
+    "00000000000000000000000000000011": {
+        "label": "T11 guardrail block 400",
+        "code": 400,
+        "otel": "ERROR",
+        "guardrail": {
+            "guardrail_status": "guardrail_intervened",
+            "guardrail_action": "GUARDRAIL_INTERVENED",
+            "categories": ["Fiduciary Advice", "VIOLENCE"],
+        },
+    },
+    "00000000000000000000000000000012": {
+        "label": "T12 guardrail allow 200",
+        "code": 200,
+        "otel": "OK",
+        "guardrail": {"guardrail_status": "success"},
+    },
 }
 
+
 def _read(path, default=""):
     try:
         with open(os.path.join(os.path.dirname(__file__), path)) as f:
@@ -70,9 +121,6 @@ def main():
     all_pass = True
     for tid, spec in EXPECT.items():
         spans = traces.get(tid, [])
-        by_name = {}
-        for s in spans:
-            by_name.setdefault(s["name"], s).update() if False else by_name.setdefault(s["name"], s)
         checks = []
 
         # --- #28405: status code / route / path / duration on SERVER span ---
@@ -82,12 +130,27 @@ def main():
         else:
             a = server.get("attributes") or {}
             code = a.get("http.response.status_code")
-            checks.append(("#28405 http.response.status_code", code == spec["code"] and isinstance(code, int),
-                           f"{code!r}"))
-            checks.append(("#28405 url.path present", bool(a.get("url.path")), a.get("url.path")))
-            checks.append(("#28405 http.route present", bool(a.get("http.route")), a.get("http.route")))
+            checks.append(
+                (
+                    "#28405 http.response.status_code",
+                    code == spec["code"] and isinstance(code, int),
+                    f"{code!r}",
+                )
+            )
+            checks.append(
+                ("#28405 url.path present", bool(a.get("url.path")), a.get("url.path"))
+            )
+            checks.append(
+                (
+                    "#28405 http.route present",
+                    bool(a.get("http.route")),
+                    a.get("http.route"),
+                )
+            )
             d = duration_ms(server)
-            checks.append(("#28405 duration>0", d is not None and d > 0, f"{d:.1f}ms" if d else d))
+            checks.append(
+                ("#28405 duration>0", d is not None and d > 0, f"{d:.1f}ms" if d else d)
+            )
             ostat = (server.get("status") or {}).get("status_code")
             checks.append(("#28405 otel status", ostat == spec["otel"], ostat))
 
@@ -98,10 +161,17 @@ def main():
                 checks.append((f"#28273 team attrs on {name}", False, "span missing"))
                 continue
             a = sp.get("attributes") or {}
-            ok = a.get("metadata.user_api_key_team_id") == TEAM_ID and \
-                 a.get("metadata.user_api_key_team_alias") == TEAM_ALIAS
-            checks.append((f"#28273 team attrs on {name}", ok,
-                           f"{a.get('metadata.user_api_key_team_id')}/{a.get('metadata.user_api_key_team_alias')}"))
+            ok = (
+                a.get("metadata.user_api_key_team_id") == TEAM_ID
+                and a.get("metadata.user_api_key_team_alias") == TEAM_ALIAS
+            )
+            checks.append(
+                (
+                    f"#28273 team attrs on {name}",
+                    ok,
+                    f"{a.get('metadata.user_api_key_team_id')}/{a.get('metadata.user_api_key_team_alias')}",
+                )
+            )
 
         # --- #28362 + #28364: guardrail span ---
         g = spec.get("guardrail")
@@ -112,8 +182,13 @@ def main():
             else:
                 a = gs.get("attributes") or {}
                 checks.append(("#28364 guardrail span emitted", True, "present"))
-                checks.append(("#28364 guardrail_status",
-                               a.get("guardrail_status") == g["guardrail_status"], a.get("guardrail_status")))
+                checks.append(
+                    (
+                        "#28364 guardrail_status",
+                        a.get("guardrail_status") == g["guardrail_status"],
+                        a.get("guardrail_status"),
+                    )
+                )
                 # #28362: guardrail_response must be valid JSON, not a python repr
                 gr = a.get("guardrail_response")
                 json_ok = False
@@ -122,18 +197,27 @@ def main():
                     try:
                         parsed = json.loads(gr)
                         json_ok = isinstance(parsed, (dict, list)) and "'" not in gr
-                        detail = "valid JSON dict" if json_ok else f"not-json: {gr[:40]}"
+                        detail = (
+                            "valid JSON dict" if json_ok else f"not-json: {gr[:40]}"
+                        )
                     except Exception:
                         detail = f"NOT JSON (python repr?): {gr[:40]}"
                 checks.append(("#28362 guardrail_response is JSON", json_ok, detail))
                 if "guardrail_action" in g:
-                    checks.append(("#28364 guardrail_action",
-                                   a.get("guardrail_action") == g["guardrail_action"], a.get("guardrail_action")))
+                    checks.append(
+                        (
+                            "#28364 guardrail_action",
+                            a.get("guardrail_action") == g["guardrail_action"],
+                            a.get("guardrail_action"),
+                        )
+                    )
                 if "categories" in g:
                     cats_raw = a.get("guardrail_violation_categories")
                     cats_ok = False
                     try:
-                        cats_ok = sorted(json.loads(cats_raw)) == sorted(g["categories"])
+                        cats_ok = sorted(json.loads(cats_raw)) == sorted(
+                            g["categories"]
+                        )
                     except Exception:
                         pass
                     checks.append(("#28364 violation_categories", cats_ok, cats_raw))
@@ -141,7 +225,9 @@ def main():
         trace_pass = all(ok for _, ok, _ in checks)
         all_pass = all_pass and trace_pass
         spans_seen = ",".join(sorted({s["name"] for s in spans}))
-        print(f"\n{'='*78}\n{spec['label']}  [trace …{tid[-4:]}]   {'PASS' if trace_pass else 'FAIL'}")
+        print(
+            f"\n{'='*78}\n{spec['label']}  [trace …{tid[-4:]}]   {'PASS' if trace_pass else 'FAIL'}"
+        )
         print(f"  spans: {spans_seen}")
         for name, ok, detail in checks:
             print(f"    [{'OK' if ok else 'XX'}] {name:42} {detail}")

From 8b0a6c4ecd6f5ceba89e97e43a2f7b79c2ecd816 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 25 May 2026 17:11:28 +0000
Subject: [PATCH 3/4] refactor(otel): extract management-endpoint span emission
 to fix PLR0915

Adding the success-path fix pushed management_endpoint_wrapper to 52
statements (ruff PLR0915 limit is 50). Extract the shared success/failure
OTEL span emission into _emit_management_endpoint_otel_span, which also
de-duplicates the two near-identical inline blocks. Behavior is unchanged:
the parent SERVER span is stamped + ended on both paths, including for
handlers without an http_request param.

https://claude.ai/code/session_016u6Pe2S2zBVrUuFF1N6GkJ
---
 litellm/proxy/management_helpers/utils.py | 130 ++++++++++++----------
 1 file changed, 69 insertions(+), 61 deletions(-)

diff --git a/litellm/proxy/management_helpers/utils.py b/litellm/proxy/management_helpers/utils.py
index f6d500ac42a..750bbd732be 100644
--- a/litellm/proxy/management_helpers/utils.py
+++ b/litellm/proxy/management_helpers/utils.py
@@ -2,7 +2,7 @@
 ## Helper utils for the management endpoints (keys/users/teams)
 from datetime import datetime
 from functools import wraps
-from typing import List, Optional, Tuple
+from typing import Any, Callable, List, Optional, Tuple
 
 from fastapi import HTTPException, Request
 
@@ -435,6 +435,58 @@ async def send_management_endpoint_alert(
             )
 
 
+async def _emit_management_endpoint_otel_span(
+    func: Callable,
+    kwargs: dict,
+    parent_otel_span: Any,
+    start_time: datetime,
+    end_time: datetime,
+    result: Any = None,
+    exception: Optional[Exception] = None,
+) -> None:
+    """Stamp + end the parent OTEL SERVER span for a management endpoint.
+
+    Routes the request/response (or exception) through the OTEL success/failure
+    hook. Falls back to ``func.__name__`` for the route when the handler has no
+    ``http_request`` param — endpoints like ``/key/generate`` never receive one,
+    and gating the hook on it leaked their SERVER span (created in auth, never
+    ended → never exported). Always emitting keeps both success and failure
+    paths consistent.
+    """
+    from litellm.proxy.proxy_server import open_telemetry_logger
+
+    if open_telemetry_logger is None:
+        return
+
+    http_request: Optional[Request] = kwargs.get("http_request")
+    if http_request is not None:
+        route = http_request.url.path
+        request_body: dict = await _read_request_body(request=http_request)
+    else:
+        route = func.__name__
+        request_body = {}
+
+    logging_payload = ManagementEndpointLoggingPayload(
+        route=route,
+        request_data=request_body,
+        response=dict(result) if (exception is None and result is not None) else None,
+        start_time=start_time,
+        end_time=end_time,
+        exception=exception,
+    )
+
+    if exception is None:
+        await open_telemetry_logger.async_management_endpoint_success_hook(
+            logging_payload=logging_payload,
+            parent_otel_span=parent_otel_span,
+        )
+    else:
+        await open_telemetry_logger.async_management_endpoint_failure_hook(
+            logging_payload=logging_payload,
+            parent_otel_span=parent_otel_span,
+        )
+
+
 def management_endpoint_wrapper(func):
     """
     This wrapper does the following:
@@ -446,7 +498,6 @@ def management_endpoint_wrapper(func):
     @wraps(func)
     async def wrapper(*args, **kwargs):
         start_time = datetime.now()
-        _http_request: Optional[Request] = None
         try:
             result = await func(*args, **kwargs)
             end_time = datetime.now()
@@ -462,41 +513,16 @@ async def wrapper(*args, **kwargs):
                     user_api_key_dict=user_api_key_dict,
                     function_name=func.__name__,
                 )
-                _http_request = kwargs.get("http_request", None)
                 parent_otel_span = getattr(user_api_key_dict, "parent_otel_span", None)
                 if parent_otel_span is not None:
-                    from litellm.proxy.proxy_server import open_telemetry_logger
-
-                    if open_telemetry_logger is not None:
-                        # Run the success hook regardless of whether the handler
-                        # declares an ``http_request`` parameter. The hook ends the
-                        # parent SERVER span; gating it on ``http_request`` leaked the
-                        # span (created in auth, never ended → never exported) for the
-                        # many management endpoints without that param (e.g.
-                        # ``/key/generate``). Fall back to ``func.__name__`` for the
-                        # route, mirroring the failure branch.
-                        if _http_request:
-                            _route = _http_request.url.path
-                            _request_body: dict = await _read_request_body(
-                                request=_http_request
-                            )
-                        else:
-                            _route = func.__name__
-                            _request_body = {}
-                        _response = dict(result) if result is not None else None
-
-                        logging_payload = ManagementEndpointLoggingPayload(
-                            route=_route,
-                            request_data=_request_body,
-                            response=_response,
-                            start_time=start_time,
-                            end_time=end_time,
-                        )
-
-                        await open_telemetry_logger.async_management_endpoint_success_hook(  # type: ignore
-                            logging_payload=logging_payload,
-                            parent_otel_span=parent_otel_span,
-                        )
+                    await _emit_management_endpoint_otel_span(
+                        func=func,
+                        kwargs=kwargs,
+                        parent_otel_span=parent_otel_span,
+                        start_time=start_time,
+                        end_time=end_time,
+                        result=result,
+                    )
 
                 # Delete updated/deleted info from cache
                 _delete_api_key_from_cache(kwargs=kwargs)
@@ -519,32 +545,14 @@ async def wrapper(*args, **kwargs):
             )
             parent_otel_span = getattr(user_api_key_dict, "parent_otel_span", None)
             if parent_otel_span is not None:
-                from litellm.proxy.proxy_server import open_telemetry_logger
-
-                if open_telemetry_logger is not None:
-                    _http_request = kwargs.get("http_request")
-                    if _http_request:
-                        _route = _http_request.url.path
-                        _request_body: dict = await _read_request_body(
-                            request=_http_request
-                        )
-                    else:
-                        _route = func.__name__
-                        _request_body = {}
-
-                    logging_payload = ManagementEndpointLoggingPayload(
-                        route=_route,
-                        request_data=_request_body,
-                        response=None,
-                        start_time=start_time,
-                        end_time=end_time,
-                        exception=e,
-                    )
-
-                    await open_telemetry_logger.async_management_endpoint_failure_hook(  # type: ignore
-                        logging_payload=logging_payload,
-                        parent_otel_span=parent_otel_span,
-                    )
+                await _emit_management_endpoint_otel_span(
+                    func=func,
+                    kwargs=kwargs,
+                    parent_otel_span=parent_otel_span,
+                    start_time=start_time,
+                    end_time=end_time,
+                    exception=e,
+                )
 
             raise e
 

From 9a0af3b3d25a5a27618d7992f9ae29e643c6ff61 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 25 May 2026 17:20:55 +0000
Subject: [PATCH 4/4] test(otel): cover management-wrapper span emission; drop
 dead kwargs guards

Bring patch coverage of _emit_management_endpoint_otel_span and the wrapper
to 100%: add tests for the failure path, the http_request-present path, the
no-OTEL-logger early return, and the non-blocking post-success error path.
Remove two unreachable `if kwargs is None` guards (**kwargs is always a dict).

https://claude.ai/code/session_016u6Pe2S2zBVrUuFF1N6GkJ
---
 litellm/proxy/management_helpers/utils.py     |   4 -
 .../test_otel_admin_endpoints.py              | 138 +++++++++++++++++-
 2 files changed, 134 insertions(+), 8 deletions(-)

diff --git a/litellm/proxy/management_helpers/utils.py b/litellm/proxy/management_helpers/utils.py
index 750bbd732be..142863ba69c 100644
--- a/litellm/proxy/management_helpers/utils.py
+++ b/litellm/proxy/management_helpers/utils.py
@@ -502,8 +502,6 @@ async def wrapper(*args, **kwargs):
             result = await func(*args, **kwargs)
             end_time = datetime.now()
             try:
-                if kwargs is None:
-                    kwargs = {}
                 user_api_key_dict: UserAPIKeyAuth = (
                     kwargs.get("user_api_key_dict") or UserAPIKeyAuth()
                 )
@@ -538,8 +536,6 @@ async def wrapper(*args, **kwargs):
         except Exception as e:
             end_time = datetime.now()
 
-            if kwargs is None:
-                kwargs = {}
             user_api_key_dict: UserAPIKeyAuth = (
                 kwargs.get("user_api_key_dict") or UserAPIKeyAuth()
             )
diff --git a/tests/test_litellm/integrations/open_telemetry/test_otel_admin_endpoints.py b/tests/test_litellm/integrations/open_telemetry/test_otel_admin_endpoints.py
index a3b8c0cc70c..34103449dad 100644
--- a/tests/test_litellm/integrations/open_telemetry/test_otel_admin_endpoints.py
+++ b/tests/test_litellm/integrations/open_telemetry/test_otel_admin_endpoints.py
@@ -3,6 +3,7 @@
 
 import asyncio
 from datetime import datetime
+from unittest.mock import MagicMock
 
 import pytest
 
@@ -14,6 +15,7 @@
 from ._helpers import (
     HttpStatusException,
     assert_server_span_attrs,
+    get_server_span,
     make_fastapi_http_exception,
     make_httpx_status_error,
 )
@@ -28,6 +30,10 @@ def _real_user_api_key_dict(parent_span):
     )
 
 
+async def _noop_alert(*args, **kwargs):
+    return None
+
+
 async def _drive_admin_failure(*, otel, exception, parent_span, route):
     payload = ManagementEndpointLoggingPayload(
         route=route,
@@ -200,10 +206,6 @@ def test_management_wrapper_success_ends_server_span_without_http_request(
 
     otel, exporter = otel_with_exporter
     monkeypatch.setattr(proxy_server, "open_telemetry_logger", otel, raising=False)
-
-    async def _noop_alert(*args, **kwargs):
-        return None
-
     monkeypatch.setattr(mgmt_utils, "send_management_endpoint_alert", _noop_alert)
 
     server_span = server_span_factory(KEY_GENERATE_PATH)
@@ -226,3 +228,131 @@ async def fake_generate_key_fn(data=None, user_api_key_dict=None):
         expected_url_path=KEY_GENERATE_PATH,
         where="management wrapper success without http_request",
     )
+
+
+def test_management_wrapper_failure_ends_server_span(
+    server_span_factory, otel_with_exporter, monkeypatch
+):
+    """When the handler raises, the wrapper must route through the failure hook
+    and stamp + end the parent SERVER span with the error status — even for an
+    ``http_request``-less handler (route falls back to ``func.__name__``)."""
+    import litellm.proxy.proxy_server as proxy_server
+    from litellm.proxy.management_helpers import utils as mgmt_utils
+
+    otel, exporter = otel_with_exporter
+    monkeypatch.setattr(proxy_server, "open_telemetry_logger", otel, raising=False)
+
+    server_span = server_span_factory(KEY_GENERATE_PATH)
+
+    @mgmt_utils.management_endpoint_wrapper
+    async def failing_fn(data=None, user_api_key_dict=None):
+        raise HttpStatusException(500, "boom")
+
+    with pytest.raises(HttpStatusException):
+        asyncio.run(
+            failing_fn(data={}, user_api_key_dict=_real_user_api_key_dict(server_span))
+        )
+
+    assert_server_span_attrs(
+        exporter,
+        expected_status=500,
+        expected_url_path=KEY_GENERATE_PATH,
+        where="management wrapper failure",
+    )
+
+
+def test_management_wrapper_success_with_http_request(
+    server_span_factory, otel_with_exporter, monkeypatch
+):
+    """Cover the branch where the handler DOES declare ``http_request``: the
+    route comes from ``http_request.url.path`` and the body is read from it."""
+    import litellm.proxy.proxy_server as proxy_server
+    from litellm.proxy.management_helpers import utils as mgmt_utils
+
+    otel, exporter = otel_with_exporter
+    monkeypatch.setattr(proxy_server, "open_telemetry_logger", otel, raising=False)
+    monkeypatch.setattr(mgmt_utils, "send_management_endpoint_alert", _noop_alert)
+
+    async def _fake_body(request=None):
+        return {"team_alias": "t"}
+
+    monkeypatch.setattr(mgmt_utils, "_read_request_body", _fake_body)
+
+    server_span = server_span_factory("/team/new")
+    http_request = MagicMock()
+    http_request.url.path = "/team/new"
+
+    @mgmt_utils.management_endpoint_wrapper
+    async def fake_new_team(data=None, http_request=None, user_api_key_dict=None):
+        return {"team_id": "t-1"}
+
+    asyncio.run(
+        fake_new_team(
+            data={},
+            http_request=http_request,
+            user_api_key_dict=_real_user_api_key_dict(server_span),
+        )
+    )
+
+    assert_server_span_attrs(
+        exporter,
+        expected_status=200,
+        expected_url_path="/team/new",
+        where="management wrapper success with http_request",
+    )
+
+
+def test_management_wrapper_noop_when_otel_logger_absent(
+    server_span_factory, otel_with_exporter, monkeypatch
+):
+    """When no OTEL logger is registered, the helper early-returns and no SERVER
+    span is exported — and the handler result is still returned unchanged."""
+    import litellm.proxy.proxy_server as proxy_server
+    from litellm.proxy.management_helpers import utils as mgmt_utils
+
+    _otel, exporter = otel_with_exporter
+    monkeypatch.setattr(proxy_server, "open_telemetry_logger", None, raising=False)
+    monkeypatch.setattr(mgmt_utils, "send_management_endpoint_alert", _noop_alert)
+
+    server_span = server_span_factory(KEY_GENERATE_PATH)
+
+    @mgmt_utils.management_endpoint_wrapper
+    async def fake_fn(data=None, user_api_key_dict=None):
+        return {"ok": True}
+
+    result = asyncio.run(
+        fake_fn(data={}, user_api_key_dict=_real_user_api_key_dict(server_span))
+    )
+
+    assert result == {"ok": True}
+    assert get_server_span(exporter) is None
+
+
+def test_management_wrapper_swallows_post_success_errors(
+    server_span_factory, otel_with_exporter, monkeypatch
+):
+    """A failure in post-success bookkeeping (cache invalidation, alerting) must
+    not propagate — the handler result is returned regardless (non-blocking)."""
+    import litellm.proxy.proxy_server as proxy_server
+    from litellm.proxy.management_helpers import utils as mgmt_utils
+
+    otel, _exporter = otel_with_exporter
+    monkeypatch.setattr(proxy_server, "open_telemetry_logger", otel, raising=False)
+    monkeypatch.setattr(mgmt_utils, "send_management_endpoint_alert", _noop_alert)
+
+    def _boom(*args, **kwargs):
+        raise RuntimeError("cache backend down")
+
+    monkeypatch.setattr(mgmt_utils, "_delete_api_key_from_cache", _boom)
+
+    server_span = server_span_factory(KEY_GENERATE_PATH)
+
+    @mgmt_utils.management_endpoint_wrapper
+    async def fake_fn(data=None, user_api_key_dict=None):
+        return {"ok": True}
+
+    result = asyncio.run(
+        fake_fn(data={}, user_api_key_dict=_real_user_api_key_dict(server_span))
+    )
+
+    assert result == {"ok": True}