From d0ad16d8b917fa0fea470247f7a407e026cd2847 Mon Sep 17 00:00:00 2001 From: functionstackx <47992694+functionstackx@users.noreply.github.com> Date: Thu, 25 Jun 2026 22:13:02 -0400 Subject: [PATCH] chore: update MiniMax M3 FP8 MI355X MTP image --- .github/configs/amd-master.yaml | 2 +- perf-changelog.yaml | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index bdbfafc22..0607e1e44 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -2562,7 +2562,7 @@ minimaxm3-fp8-mi355x-vllm: # acceptance dilutes in big batches, and the draft weights + draft KV shave # headroom — tp2-ep2 is dropped since its KV headroom was already thin. minimaxm3-fp8-mi355x-vllm-mtp: - image: vllm/vllm-openai-rocm:minimax-m3 + image: vllm/vllm-openai-rocm:nightly-3f5a1e1733200760169ff31ebe60a271072b199e model: MiniMaxAI/MiniMax-M3-MXFP8 model-prefix: minimaxm3 runner: mi355x diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 001911868..fd37ecc38 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -4229,3 +4229,10 @@ - "Reuse the pinned vllm/vllm-openai-rocm:nightly-3f5a1e1733200760169ff31ebe60a271072b199e image, text-only target path, TRITON_ATTN, automatic tool choice, MiniMax-M3 parsers, VLLM_USE_BREAKABLE_CUDAGRAPH=0, default KV-cache dtype, and automatic MoE backend selection." - "Pass --use-chat-template for MTP acceptance and mirror the existing MiniMax-M3 MXFP8 MI355X MTP TP/EP/DP-attention search space at 1k1k and 8k1k." pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1939 + +- config-keys: + - minimaxm3-fp8-mi355x-vllm-mtp + description: + - "Update the MiniMax-M3 MXFP8 MI355X vLLM EAGLE3 benchmark image from vllm/vllm-openai-rocm:minimax-m3 to vllm/vllm-openai-rocm:nightly-3f5a1e1733200760169ff31ebe60a271072b199e." + - "Benchmark configuration, EAGLE3 draft model, serving flags, and search space are unchanged." + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1941