Skip to content

Commit 86d0aa7

Browse files
mudlerlocalai-bot
authored andcommitted
feat(vllm-omni): add new backend (mudler#8188)
* feat(vllm-omni: add new backend Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * default to py3.12 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
1 parent 42f8fc4 commit 86d0aa7

14 files changed

Lines changed: 975 additions & 2 deletions

.github/workflows/backend.yml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,19 @@ jobs:
170170
dockerfile: "./backend/Dockerfile.python"
171171
context: "./"
172172
ubuntu-version: '2404'
173+
- build-type: 'cublas'
174+
cuda-major-version: "12"
175+
cuda-minor-version: "9"
176+
platforms: 'linux/amd64'
177+
tag-latest: 'auto'
178+
tag-suffix: '-gpu-nvidia-cuda-12-vllm-omni'
179+
runs-on: 'arc-runner-set'
180+
base-image: "ubuntu:24.04"
181+
skip-drivers: 'false'
182+
backend: "vllm-omni"
183+
dockerfile: "./backend/Dockerfile.python"
184+
context: "./"
185+
ubuntu-version: '2404'
173186
- build-type: 'cublas'
174187
cuda-major-version: "12"
175188
cuda-minor-version: "9"
@@ -653,6 +666,19 @@ jobs:
653666
dockerfile: "./backend/Dockerfile.python"
654667
context: "./"
655668
ubuntu-version: '2404'
669+
- build-type: 'hipblas'
670+
cuda-major-version: ""
671+
cuda-minor-version: ""
672+
platforms: 'linux/amd64'
673+
tag-latest: 'auto'
674+
tag-suffix: '-gpu-rocm-hipblas-vllm-omni'
675+
runs-on: 'arc-runner-set'
676+
base-image: "rocm/dev-ubuntu-24.04:6.4.4"
677+
skip-drivers: 'false'
678+
backend: "vllm-omni"
679+
dockerfile: "./backend/Dockerfile.python"
680+
context: "./"
681+
ubuntu-version: '2404'
656682
- build-type: 'hipblas'
657683
cuda-major-version: ""
658684
cuda-minor-version: ""

Makefile

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Disable parallel execution for backend builds
2-
.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin backends/vllm backends/moonshine backends/pocket-tts backends/qwen-tts
2+
.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts
33

44
GOCMD=go
55
GOTEST=$(GOCMD) test
@@ -314,6 +314,7 @@ prepare-test-extra: protogen-python
314314
$(MAKE) -C backend/python/diffusers
315315
$(MAKE) -C backend/python/chatterbox
316316
$(MAKE) -C backend/python/vllm
317+
$(MAKE) -C backend/python/vllm-omni
317318
$(MAKE) -C backend/python/vibevoice
318319
$(MAKE) -C backend/python/moonshine
319320
$(MAKE) -C backend/python/pocket-tts
@@ -324,6 +325,7 @@ test-extra: prepare-test-extra
324325
$(MAKE) -C backend/python/diffusers test
325326
$(MAKE) -C backend/python/chatterbox test
326327
$(MAKE) -C backend/python/vllm test
328+
$(MAKE) -C backend/python/vllm-omni test
327329
$(MAKE) -C backend/python/vibevoice test
328330
$(MAKE) -C backend/python/moonshine test
329331
$(MAKE) -C backend/python/pocket-tts test
@@ -455,6 +457,7 @@ BACKEND_KITTEN_TTS = kitten-tts|python|.|false|true
455457
BACKEND_NEUTTS = neutts|python|.|false|true
456458
BACKEND_KOKORO = kokoro|python|.|false|true
457459
BACKEND_VLLM = vllm|python|.|false|true
460+
BACKEND_VLLM_OMNI = vllm-omni|python|.|false|true
458461
BACKEND_DIFFUSERS = diffusers|python|.|--progress=plain|true
459462
BACKEND_CHATTERBOX = chatterbox|python|.|false|true
460463
BACKEND_VIBEVOICE = vibevoice|python|.|--progress=plain|true
@@ -501,6 +504,7 @@ $(eval $(call generate-docker-build-target,$(BACKEND_KITTEN_TTS)))
501504
$(eval $(call generate-docker-build-target,$(BACKEND_NEUTTS)))
502505
$(eval $(call generate-docker-build-target,$(BACKEND_KOKORO)))
503506
$(eval $(call generate-docker-build-target,$(BACKEND_VLLM)))
507+
$(eval $(call generate-docker-build-target,$(BACKEND_VLLM_OMNI)))
504508
$(eval $(call generate-docker-build-target,$(BACKEND_DIFFUSERS)))
505509
$(eval $(call generate-docker-build-target,$(BACKEND_CHATTERBOX)))
506510
$(eval $(call generate-docker-build-target,$(BACKEND_VIBEVOICE)))
@@ -512,7 +516,7 @@ $(eval $(call generate-docker-build-target,$(BACKEND_QWEN_TTS)))
512516
docker-save-%: backend-images
513517
docker save local-ai-backend:$* -o backend-images/$*.tar
514518

515-
docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-transformers docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-bark docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts
519+
docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-bark docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts
516520

517521
########################################################
518522
### END Backends

backend/index.yaml

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,31 @@
142142
amd: "rocm-vllm"
143143
intel: "intel-vllm"
144144
nvidia-cuda-12: "cuda12-vllm"
145+
- &vllm-omni
146+
name: "vllm-omni"
147+
license: apache-2.0
148+
urls:
149+
- https://github.com/vllm-project/vllm-omni
150+
tags:
151+
- text-to-image
152+
- image-generation
153+
- text-to-video
154+
- video-generation
155+
- text-to-speech
156+
- TTS
157+
- multimodal
158+
- LLM
159+
icon: https://raw.githubusercontent.com/vllm-project/vllm/main/docs/assets/logos/vllm-logo-text-dark.png
160+
description: |
161+
vLLM-Omni is a unified interface for multimodal generation with vLLM.
162+
It supports image generation (text-to-image, image editing), video generation
163+
(text-to-video, image-to-video), text generation with multimodal inputs, and
164+
text-to-speech generation. Only supports NVIDIA (CUDA) and ROCm platforms.
165+
alias: "vllm-omni"
166+
capabilities:
167+
nvidia: "cuda12-vllm-omni"
168+
amd: "rocm-vllm-omni"
169+
nvidia-cuda-12: "cuda12-vllm-omni"
145170
- &mlx
146171
name: "mlx"
147172
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-mlx"
@@ -973,6 +998,33 @@
973998
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-vllm"
974999
mirrors:
9751000
- localai/localai-backends:master-gpu-intel-vllm
1001+
# vllm-omni
1002+
- !!merge <<: *vllm-omni
1003+
name: "vllm-omni-development"
1004+
capabilities:
1005+
nvidia: "cuda12-vllm-omni-development"
1006+
amd: "rocm-vllm-omni-development"
1007+
nvidia-cuda-12: "cuda12-vllm-omni-development"
1008+
- !!merge <<: *vllm-omni
1009+
name: "cuda12-vllm-omni"
1010+
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-vllm-omni"
1011+
mirrors:
1012+
- localai/localai-backends:latest-gpu-nvidia-cuda-12-vllm-omni
1013+
- !!merge <<: *vllm-omni
1014+
name: "rocm-vllm-omni"
1015+
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-vllm-omni"
1016+
mirrors:
1017+
- localai/localai-backends:latest-gpu-rocm-hipblas-vllm-omni
1018+
- !!merge <<: *vllm-omni
1019+
name: "cuda12-vllm-omni-development"
1020+
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-vllm-omni"
1021+
mirrors:
1022+
- localai/localai-backends:master-gpu-nvidia-cuda-12-vllm-omni
1023+
- !!merge <<: *vllm-omni
1024+
name: "rocm-vllm-omni-development"
1025+
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-vllm-omni"
1026+
mirrors:
1027+
- localai/localai-backends:master-gpu-rocm-hipblas-vllm-omni
9761028
# rfdetr
9771029
- !!merge <<: *rfdetr
9781030
name: "rfdetr-development"

backend/python/vllm-omni/Makefile

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
.PHONY: vllm-omni
2+
vllm-omni:
3+
bash install.sh
4+
5+
.PHONY: run
6+
run: vllm-omni
7+
@echo "Running vllm-omni..."
8+
bash run.sh
9+
@echo "vllm-omni run."
10+
11+
.PHONY: test
12+
test: vllm-omni
13+
@echo "Testing vllm-omni..."
14+
bash test.sh
15+
@echo "vllm-omni tested."
16+
17+
.PHONY: protogen-clean
18+
protogen-clean:
19+
$(RM) backend_pb2_grpc.py backend_pb2.py
20+
21+
.PHONY: clean
22+
clean: protogen-clean
23+
rm -rf venv __pycache__

0 commit comments

Comments
 (0)