diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index bdbfafc22..0607e1e44 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -2562,7 +2562,7 @@ minimaxm3-fp8-mi355x-vllm: # acceptance dilutes in big batches, and the draft weights + draft KV shave # headroom — tp2-ep2 is dropped since its KV headroom was already thin. minimaxm3-fp8-mi355x-vllm-mtp: - image: vllm/vllm-openai-rocm:minimax-m3 + image: vllm/vllm-openai-rocm:nightly-3f5a1e1733200760169ff31ebe60a271072b199e model: MiniMaxAI/MiniMax-M3-MXFP8 model-prefix: minimaxm3 runner: mi355x diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 001911868..fd37ecc38 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -4229,3 +4229,10 @@ - "Reuse the pinned vllm/vllm-openai-rocm:nightly-3f5a1e1733200760169ff31ebe60a271072b199e image, text-only target path, TRITON_ATTN, automatic tool choice, MiniMax-M3 parsers, VLLM_USE_BREAKABLE_CUDAGRAPH=0, default KV-cache dtype, and automatic MoE backend selection." - "Pass --use-chat-template for MTP acceptance and mirror the existing MiniMax-M3 MXFP8 MI355X MTP TP/EP/DP-attention search space at 1k1k and 8k1k." pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1939 + +- config-keys: + - minimaxm3-fp8-mi355x-vllm-mtp + description: + - "Update the MiniMax-M3 MXFP8 MI355X vLLM EAGLE3 benchmark image from vllm/vllm-openai-rocm:minimax-m3 to vllm/vllm-openai-rocm:nightly-3f5a1e1733200760169ff31ebe60a271072b199e." + - "Benchmark configuration, EAGLE3 draft model, serving flags, and search space are unchanged." + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1941