Skip to content

Commit d04373f

Browse files
authored
Add granian as a ASGI compliant web server. Provider better throughput stability, (#26027)
* Add granian as a ASGI compliant web server. Provides better stability, 10-20 RPS improvement under standard LT conditions. TODO: Verify poetry lock details and add locust numbers to PR * Update granian version in license_cache.json and pyproject.toml to 2.5.7 * Enhance proxy CLI tests by adding SSL initialization checks for Granian server. Remove Python version skip conditions and implement tests to ensure SSL certificate and key are required for server initialization. * update uv lock to fix granian import error
1 parent 07bcd2c commit d04373f

5 files changed

Lines changed: 285 additions & 9 deletions

File tree

license_cache.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

litellm/proxy/proxy_cli.py

Lines changed: 111 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import subprocess
77
import sys
88
import urllib.parse as urlparse
9+
from pathlib import Path
910
from typing import TYPE_CHECKING, Any, Optional, Union
1011

1112
import click
@@ -293,6 +294,62 @@ def _init_hypercorn_server(
293294
# hypercorn serve raises a type warning when passing a fast api app - even though fast API is a valid type
294295
asyncio.run(serve(app, config)) # type: ignore
295296

297+
@staticmethod
298+
def _init_granian_server(
299+
host: str,
300+
port: int,
301+
num_workers: int,
302+
ssl_certfile_path: Optional[str],
303+
ssl_keyfile_path: Optional[str],
304+
max_requests_before_restart: Optional[int],
305+
ciphers: Optional[str],
306+
granian_runtime_threads: Optional[int] = None,
307+
) -> None:
308+
"""
309+
Run the proxy with Granian (Rust-backed ASGI server, HTTP/1 + HTTP/2).
310+
311+
Uses a string import path so workers load ``litellm.proxy.proxy_server:app``
312+
the same way as uvicorn's ``app=`` string target.
313+
"""
314+
from granian import Granian
315+
from granian.constants import Interfaces
316+
317+
print( # noqa
318+
f"\033[1;32mLiteLLM Proxy: Starting server on {host}:{port} using Granian\033[0m\n"
319+
)
320+
if max_requests_before_restart is not None:
321+
print( # noqa
322+
"\033[1;33mLiteLLM: --max_requests_before_restart is not supported by Granian "
323+
"(Granian uses workers_lifetime in seconds, not a per-request limit).\033[0m\n"
324+
)
325+
if ciphers is not None:
326+
print( # noqa
327+
"\033[1;33mLiteLLM: --ciphers is not applied when using --run_granian.\033[0m\n"
328+
)
329+
330+
kwargs: dict[str, Any] = {
331+
"target": "litellm.proxy.proxy_server:app",
332+
"address": host,
333+
"port": port,
334+
"workers": max(1, num_workers),
335+
"interface": Interfaces.ASGI,
336+
"websockets": True,
337+
}
338+
if granian_runtime_threads is not None:
339+
kwargs["runtime_threads"] = granian_runtime_threads
340+
if ssl_certfile_path is not None and ssl_keyfile_path is not None:
341+
print( # noqa
342+
f"\033[1;32mLiteLLM Proxy: Using SSL with certfile: {ssl_certfile_path} and keyfile: {ssl_keyfile_path}\033[0m\n"
343+
)
344+
kwargs["ssl_cert"] = Path(ssl_certfile_path)
345+
kwargs["ssl_key"] = Path(ssl_keyfile_path)
346+
elif ssl_certfile_path is not None or ssl_keyfile_path is not None:
347+
raise click.ClickException(
348+
"Both --ssl_certfile_path and --ssl_keyfile_path are required for SSL."
349+
)
350+
351+
Granian(**kwargs).serve()
352+
296353
@staticmethod
297354
def _run_gunicorn_server(
298355
host: str,
@@ -483,9 +540,23 @@ def _maybe_setup_prometheus_multiproc_dir(
483540
@click.option(
484541
"--num_workers",
485542
default=DEFAULT_NUM_WORKERS_LITELLM_PROXY,
486-
help="Number of uvicorn / gunicorn workers to spin up. Default is 1 (from DEFAULT_NUM_WORKERS_LITELLM_PROXY)",
543+
help=(
544+
"Number of worker processes for uvicorn / gunicorn, or Granian worker processes "
545+
"(--workers). Default is 1 (from DEFAULT_NUM_WORKERS_LITELLM_PROXY). "
546+
"With --run_granian, use --granian_threads for runtime threads per worker."
547+
),
487548
envvar="NUM_WORKERS",
488549
)
550+
@click.option(
551+
"--granian_threads",
552+
default=None,
553+
type=click.IntRange(min=1),
554+
help=(
555+
"Only with --run_granian: runtime threads per worker process "
556+
"(Granian --runtime-threads / GRANIAN_RUNTIME_THREADS). Omit to use Granian's default (1)."
557+
),
558+
envvar="GRANIAN_RUNTIME_THREADS",
559+
)
489560
@click.option("--api_base", default=None, help="API base URL.")
490561
@click.option(
491562
"--api_version",
@@ -624,6 +695,15 @@ def _maybe_setup_prometheus_multiproc_dir(
624695
is_flag=True,
625696
help="Starts proxy via hypercorn, instead of uvicorn (supports HTTP/2)",
626697
)
698+
@click.option(
699+
"--run_granian",
700+
default=False,
701+
is_flag=True,
702+
help=(
703+
"Starts proxy via Granian (Rust ASGI server) instead of uvicorn. "
704+
"Requires Python 3.10+ and the `granian` package."
705+
),
706+
)
627707
@click.option(
628708
"--ssl_keyfile_path",
629709
default=None,
@@ -728,6 +808,7 @@ def run_server( # noqa: PLR0915
728808
test,
729809
local,
730810
num_workers,
811+
granian_threads,
731812
test_async,
732813
iam_token_db_auth,
733814
num_requests,
@@ -737,6 +818,7 @@ def run_server( # noqa: PLR0915
737818
version,
738819
run_gunicorn,
739820
run_hypercorn,
821+
run_granian,
740822
ssl_keyfile_path,
741823
ssl_certfile_path,
742824
ciphers,
@@ -821,12 +903,22 @@ def run_server( # noqa: PLR0915
821903
config=config,
822904
use_queue=use_queue,
823905
)
824-
try:
825-
import uvicorn
826-
except Exception:
827-
raise ImportError(
828-
"uvicorn, gunicorn needs to be imported. Run - `pip install 'litellm[proxy]'`"
829-
)
906+
if run_granian:
907+
try:
908+
import granian # noqa: F401
909+
except ImportError as e:
910+
raise ImportError(
911+
"granian must be installed to use --run_granian. "
912+
"Run `pip install granian` or `pip install 'litellm[proxy]'` "
913+
"(Granian requires Python 3.10+)."
914+
) from e
915+
else:
916+
try:
917+
import uvicorn
918+
except Exception:
919+
raise ImportError(
920+
"uvicorn, gunicorn needs to be imported. Run - `pip install 'litellm[proxy]'`"
921+
)
830922

831923
db_connection_pool_limit = 100
832924
# Starts optional due to config fallback checks; guaranteed non-None before use.
@@ -1112,7 +1204,7 @@ def run_server( # noqa: PLR0915
11121204
# Optional: recycle uvicorn workers after N requests
11131205
if max_requests_before_restart is not None:
11141206
uvicorn_args["limit_max_requests"] = max_requests_before_restart
1115-
if run_gunicorn is False and run_hypercorn is False:
1207+
if run_gunicorn is False and run_hypercorn is False and run_granian is False:
11161208
if ssl_certfile_path is not None and ssl_keyfile_path is not None:
11171209
print( # noqa
11181210
f"\033[1;32mLiteLLM Proxy: Using SSL with certfile: {ssl_certfile_path} and keyfile: {ssl_keyfile_path}\033[0m\n" # noqa
@@ -1154,6 +1246,17 @@ def run_server( # noqa: PLR0915
11541246
ssl_keyfile_path=ssl_keyfile_path,
11551247
ciphers=ciphers,
11561248
)
1249+
elif run_granian is True:
1250+
ProxyInitializationHelpers._init_granian_server(
1251+
host=host,
1252+
port=port,
1253+
num_workers=num_workers,
1254+
ssl_certfile_path=ssl_certfile_path,
1255+
ssl_keyfile_path=ssl_keyfile_path,
1256+
max_requests_before_restart=max_requests_before_restart,
1257+
ciphers=ciphers,
1258+
granian_runtime_threads=granian_threads,
1259+
)
11571260

11581261

11591262
if __name__ == "__main__":

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ Documentation = "https://docs.litellm.ai"
4040
proxy = [
4141
"gunicorn==23.0.0",
4242
"uvicorn==0.33.0",
43+
"granian==2.5.7",
4344
"uvloop==0.21.0; sys_platform != 'win32'",
4445
"fastapi==0.124.4",
4546
"backoff==2.2.1",

tests/test_litellm/proxy/test_proxy_cli.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
import os
22
import sys
3+
from pathlib import Path
4+
from types import SimpleNamespace
35
from unittest.mock import AsyncMock, MagicMock, patch
46

7+
import click
8+
import fastapi
59
import pytest
610

711
sys.path.insert(
@@ -231,6 +235,96 @@ def test_init_hypercorn_server(self, mock_print, mock_asyncio_run):
231235
mock_app, "localhost", 8000, "cert.pem", "key.pem", "ECDHE"
232236
)
233237

238+
@patch("granian.Granian")
239+
@patch("builtins.print")
240+
def test_init_granian_server(self, mock_print, mock_granian_cls):
241+
pytest.importorskip("granian")
242+
mock_server = MagicMock()
243+
mock_granian_cls.return_value = mock_server
244+
fake_interfaces = SimpleNamespace(ASGI="asgi")
245+
with patch("granian.constants.Interfaces", fake_interfaces):
246+
ProxyInitializationHelpers._init_granian_server(
247+
host="0.0.0.0",
248+
port=4000,
249+
num_workers=2,
250+
ssl_certfile_path=None,
251+
ssl_keyfile_path=None,
252+
max_requests_before_restart=None,
253+
ciphers=None,
254+
granian_runtime_threads=None,
255+
)
256+
mock_granian_cls.assert_called_once()
257+
call_kwargs = mock_granian_cls.call_args.kwargs
258+
assert call_kwargs["target"] == "litellm.proxy.proxy_server:app"
259+
assert call_kwargs["address"] == "0.0.0.0"
260+
assert call_kwargs["port"] == 4000
261+
assert call_kwargs["workers"] == 2
262+
assert call_kwargs["interface"] == "asgi"
263+
assert call_kwargs["websockets"] is True
264+
assert "runtime_threads" not in call_kwargs
265+
mock_server.serve.assert_called_once()
266+
267+
@patch("granian.Granian")
268+
@patch("builtins.print")
269+
def test_init_granian_server_runtime_threads(self, mock_print, mock_granian_cls):
270+
pytest.importorskip("granian")
271+
mock_server = MagicMock()
272+
mock_granian_cls.return_value = mock_server
273+
fake_interfaces = SimpleNamespace(ASGI="asgi")
274+
with patch("granian.constants.Interfaces", fake_interfaces):
275+
ProxyInitializationHelpers._init_granian_server(
276+
host="0.0.0.0",
277+
port=4000,
278+
num_workers=1,
279+
ssl_certfile_path=None,
280+
ssl_keyfile_path=None,
281+
max_requests_before_restart=None,
282+
ciphers=None,
283+
granian_runtime_threads=4,
284+
)
285+
assert mock_granian_cls.call_args.kwargs["runtime_threads"] == 4
286+
287+
@patch("granian.Granian")
288+
@patch("builtins.print")
289+
def test_init_granian_server_ssl(self, mock_print, mock_granian_cls):
290+
pytest.importorskip("granian")
291+
mock_server = MagicMock()
292+
mock_granian_cls.return_value = mock_server
293+
fake_interfaces = SimpleNamespace(ASGI="asgi")
294+
with patch("granian.constants.Interfaces", fake_interfaces):
295+
ProxyInitializationHelpers._init_granian_server(
296+
host="0.0.0.0",
297+
port=4000,
298+
num_workers=1,
299+
ssl_certfile_path="/path/to/cert.pem",
300+
ssl_keyfile_path="/path/to/key.pem",
301+
max_requests_before_restart=None,
302+
ciphers=None,
303+
granian_runtime_threads=None,
304+
)
305+
call_kwargs = mock_granian_cls.call_args.kwargs
306+
assert call_kwargs["ssl_cert"] == Path("/path/to/cert.pem")
307+
assert call_kwargs["ssl_key"] == Path("/path/to/key.pem")
308+
mock_server.serve.assert_called_once()
309+
310+
@patch("granian.Granian")
311+
def test_init_granian_server_ssl_requires_cert_and_key(self, mock_granian_cls):
312+
pytest.importorskip("granian")
313+
fake_interfaces = SimpleNamespace(ASGI="asgi")
314+
with patch("granian.constants.Interfaces", fake_interfaces):
315+
with pytest.raises(click.ClickException, match="Both --ssl_certfile_path"):
316+
ProxyInitializationHelpers._init_granian_server(
317+
host="0.0.0.0",
318+
port=4000,
319+
num_workers=1,
320+
ssl_certfile_path="/path/to/cert.pem",
321+
ssl_keyfile_path=None,
322+
max_requests_before_restart=None,
323+
ciphers=None,
324+
granian_runtime_threads=None,
325+
)
326+
mock_granian_cls.assert_not_called()
327+
234328
@patch("subprocess.Popen")
235329
def test_run_ollama_serve(self, mock_popen):
236330
# Execute

0 commit comments

Comments
 (0)