From 2a6a0c57de51866432e543cf0623d9b586f5d668 Mon Sep 17 00:00:00 2001 From: functionstackx <47992694+functionstackx@users.noreply.github.com> Date: Sun, 17 May 2026 22:26:03 -0400 Subject: [PATCH 1/2] Update dsv4-fp4-b200-vllm (+mtp) vLLM image to v0.21.0 Update vLLM image from v0.20.0-cu130 (20d/18d old) to v0.21.0 Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/configs/nvidia-master.yaml | 4 ++-- perf-changelog.yaml | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index 95d0c38ec..eb66fa482 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -1734,7 +1734,7 @@ dsv4-fp4-b200-sglang: - { tp: 8, ep: 8, dp-attn: true, conc-start: 256, conc-end: 512 } dsv4-fp4-b200-vllm: - image: vllm/vllm-openai:v0.20.0-cu130 + image: vllm/vllm-openai:v0.21.0 model: deepseek-ai/DeepSeek-V4-Pro model-prefix: dsv4 runner: b200-dsv4 @@ -1822,7 +1822,7 @@ dsv4-fp4-b200-trt-mtp: # MTP variant of dsv4-fp4-b200-vllm. Mirrors the base search space and adds # --speculative-config '{"method":"mtp","num_speculative_tokens":2}'. dsv4-fp4-b200-vllm-mtp: - image: vllm/vllm-openai:v0.20.0-cu130 + image: vllm/vllm-openai:v0.21.0 model: deepseek-ai/DeepSeek-V4-Pro model-prefix: dsv4 runner: b200-dsv4 diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 2bdf9f729..5e7ad2ae6 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -2653,3 +2653,10 @@ description: - "Update SGLang image from v0.5.9-cu129-amd64 (74d old) to v0.5.12-cu130" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1458 + +- config-keys: + - dsv4-fp4-b200-vllm + - dsv4-fp4-b200-vllm-mtp + description: + - "Update vLLM image from v0.20.0-cu130 (20d/18d old) to v0.21.0" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/XXX From d7d4dc4d5ca76f0c332e4bd9eca88f1ab1dab9ea Mon Sep 17 00:00:00 2001 From: functionstackx <47992694+functionstackx@users.noreply.github.com> Date: Sun, 17 May 2026 22:26:07 -0400 Subject: [PATCH 2/2] chore: fill pr-link for #1476 --- perf-changelog.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 5e7ad2ae6..478b4e28d 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -2659,4 +2659,4 @@ - dsv4-fp4-b200-vllm-mtp description: - "Update vLLM image from v0.20.0-cu130 (20d/18d old) to v0.21.0" - pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/XXX + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1476