From 695b0cc2882128a9f23081f1ca1a3a0d073d183a Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Fri, 17 Apr 2026 11:00:45 -0500 Subject: [PATCH] Revert "[AMD][MI300X] Expand GPT-OSS FP4 TP=1 concurrency from 64 to 256 (#1053)" [slip-sweep] This reverts commit 31f066cbb3fd2d5a1c803a33cbeefba8aeb4bc8b. --- .github/configs/amd-master.yaml | 2 +- perf-changelog.yaml | 7 ------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index b35716e78..70eabb616 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -470,7 +470,7 @@ gptoss-fp4-mi300x-vllm: - isl: 1024 osl: 1024 search-space: - - { tp: 1, conc-start: 64, conc-end: 256 } + - { tp: 1, conc-start: 64, conc-end: 64 } - { tp: 2, conc-start: 4, conc-end: 64 } - { tp: 4, conc-start: 4, conc-end: 64 } - { tp: 8, conc-start: 4, conc-end: 16 } diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 245014a4f..23aceac9f 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1405,13 +1405,6 @@ pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1048 - config-keys: - - gptoss-fp4-mi300x-vllm - description: - - "Expand GPT-OSS 120B FP4 MI300X TP=1 concurrency from 64 to 256 for 1k1k" - - "Higher concurrency improves MoE weight amortization: 8552 total TPS at conc=256 vs 4016 at conc=64 (2.1x)" - pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1053 - -- config-keys: - dsr1-fp4-b300-sglang description: - "Add DeepSeek-R1-0528 FP4 B300 SGLang benchmark (non-MTP)"