From fa2ebe5a2c6a611ee84bf71f68c77bb6eb8ccabb Mon Sep 17 00:00:00 2001
From: Xin Li <xinli@nvidia.com>
Date: Sun, 14 Jun 2026 21:33:25 -0400
Subject: [PATCH 1/8] dsr1-fp8-gb300-dynamo-trt: pin image to
 tensorrtllm-runtime:1.3.0-dev.1-cuda13

---
 .github/configs/nvidia-master.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml
index 5f7d16d60..8574f755f 100644
--- a/.github/configs/nvidia-master.yaml
+++ b/.github/configs/nvidia-master.yaml
@@ -6587,7 +6587,7 @@ dsr1-fp4-gb300-dynamo-sglang:
           dp-attn: true
 
 dsr1-fp8-gb300-dynamo-trt:
-  image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post2
+  image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.3.0-dev.1-cuda13
   model: deepseek-ai/DeepSeek-R1-0528
   model-prefix: dsr1
   runner: gb300

From f6f41361eb389fd0119ca2c842caec26b3c4a851 Mon Sep 17 00:00:00 2001
From: Xin Li <xinli@nvidia.com>
Date: Sun, 14 Jun 2026 21:36:08 -0400
Subject: [PATCH 2/8] dsr1-fp8-gb300-dynamo-trt: pin image to
 tensorrtllm-runtime:1.3.0-dev.1-cuda13, fix gsm8k accuracy

---
 perf-changelog.yaml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 86254474d..707c04b00 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -3814,3 +3814,9 @@
   description:
     - "Extend MiniMax-M3 MXFP8 H100/H200 non-MTP sweeps to concurrency 1 on the latency rows (H100: TP8; H200: TP4 and TP8) and add full TEP coverage from conc 1 to 256 (H100: TP8+EP8; H200: TP4+EP4 and TP8+EP8, incl. a new TP4+EP4 row for 8k1k). H200 TP8+EP8 upper bound moves 512->256 (high concurrency stays covered by the TP8+EP8 dp-attn DEP rows). DEP rows unchanged"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1761
+
+- config-keys:
+    - dsr1-fp8-gb300-dynamo-trt
+  description:
+    - "Fix gsm8k accuracy at 88% instead of 95% for a single point"
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/

From 4c442b64964db8cffd5acab099977f608cf96f2a Mon Sep 17 00:00:00 2001
From: Xin Li <xinli@nvidia.com>
Date: Sun, 14 Jun 2026 23:58:19 -0400
Subject: [PATCH 3/8] perf changelog update

---
 perf-changelog.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 707c04b00..7b8b281f9 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -3819,4 +3819,4 @@
     - dsr1-fp8-gb300-dynamo-trt
   description:
     - "Fix gsm8k accuracy at 88% instead of 95% for a single point"
-  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1767

From d3b807ccdf6ae0d0295fcf04e6facd2a209fc799 Mon Sep 17 00:00:00 2001
From: Xin Li <xinli@nvidia.com>
Date: Mon, 15 Jun 2026 00:07:47 -0400
Subject: [PATCH 4/8] change runner

---
 .github/configs/nvidia-master.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml
index 8574f755f..857972476 100644
--- a/.github/configs/nvidia-master.yaml
+++ b/.github/configs/nvidia-master.yaml
@@ -6590,7 +6590,7 @@ dsr1-fp8-gb300-dynamo-trt:
   image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.3.0-dev.1-cuda13
   model: deepseek-ai/DeepSeek-R1-0528
   model-prefix: dsr1
-  runner: gb300
+  runner: gb300-nv
   precision: fp8
   framework: dynamo-trt
   multinode: true

From 3a9d26c63306dd02b7f0d5854646cb69600dedbb Mon Sep 17 00:00:00 2001
From: Xin Li <xinli@nvidia.com>
Date: Mon, 15 Jun 2026 22:37:34 -0400
Subject: [PATCH 5/8] perf change log

---
 perf-changelog.yaml | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index f647d3b8f..fdb9b5f94 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -3841,18 +3841,6 @@
     - "Switch fixed-seq-len search space from TP8 to TP4 for both isl=1024 and isl=8192 scenarios"
     - "Expand isl=8192 coverage: add TP4 dp-attn sweep (conc 32–2048) and TP4 TP-only sweep (conc 1–32)"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1762
-  
-- config-keys:
-    - dsv4-fp4-gb300-dynamo-trt
-    - dsv4-fp4-gb300-dynamo-trt-mtp
-  description:
-    - "Add DeepSeek-V4-Pro MXFP4 GB300 disaggregated TRT-LLM benchmarks via Dynamo (27 STP + 27 MTP configs)"
-    - "New configs: dsv4-fp4-gb300-dynamo-trt (STP) and dsv4-fp4-gb300-dynamo-trt-mtp (MTP)"
-    - "Covers ISL 1024/OSL 1024 (14 STP + 14 MTP) and ISL 8192/OSL 1024 (13 STP + 13 MTP)"
-    - "Container: nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:1.3.0-deepseek-v4-dev.1"
-    - "Recipes sourced from NVIDIA/srt-slurm branch sa-submission-q2-2026"
-    - "Runner script updated to support dsv4 model prefix with dynamo-trt framework on GB300"
-  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1689
 
 - config-keys:
     - minimaxm3-fp8-b200-vllm
@@ -3867,3 +3855,16 @@
     - "Align MiniMax-M3 B200 vLLM fixed-sequence serving with MiniMax-M2.5 FP8 B200 settings by setting VLLM_FLOAT32_MATMUL_PRECISION=high and restoring max cudagraph capture size 2048."
     - "Add TP4+EP4 coverage for MiniMax-M3 B200: DP-attention rows for 1k1k/8k1k and the missing non-DP-attention row for 8k1k."
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1779
+
+- config-keys:
+    - dsv4-fp4-gb300-dynamo-trt
+    - dsv4-fp4-gb300-dynamo-trt-mtp
+  description:
+    - "Add DeepSeek-V4-Pro MXFP4 GB300 disaggregated TRT-LLM benchmarks via Dynamo (27 STP + 27 MTP configs)"
+    - "New configs: dsv4-fp4-gb300-dynamo-trt (STP) and dsv4-fp4-gb300-dynamo-trt-mtp (MTP)"
+    - "Covers ISL 1024/OSL 1024 (14 STP + 14 MTP) and ISL 8192/OSL 1024 (13 STP + 13 MTP)"
+    - "Container: nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:1.3.0-deepseek-v4-dev.1"
+    - "Recipes sourced from NVIDIA/srt-slurm branch sa-submission-q2-2026"
+    - "Runner script updated to support dsv4 model prefix with dynamo-trt framework on GB300"
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1689
+

From 7c4c6454911f5465ddc9e426802fb4c35b5ac5ca Mon Sep 17 00:00:00 2001
From: Xin Li <xinli@nvidia.com>
Date: Mon, 15 Jun 2026 22:39:12 -0400
Subject: [PATCH 6/8] fix perf change log

---
 perf-changelog.yaml | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index fdb9b5f94..9cec83a85 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -3832,15 +3832,23 @@
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1761
 
 - config-keys:
-    - dsr1-fp8-gb300-dynamo-trt
-  description:
-    - "Fix gsm8k accuracy at 88% instead of 95% for a single point"
-  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1767
     - dsv4-fp4-mi355x-sglang
   description:
     - "Switch fixed-seq-len search space from TP8 to TP4 for both isl=1024 and isl=8192 scenarios"
     - "Expand isl=8192 coverage: add TP4 dp-attn sweep (conc 32–2048) and TP4 TP-only sweep (conc 1–32)"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1762
+  
+- config-keys:
+    - dsv4-fp4-gb300-dynamo-trt
+    - dsv4-fp4-gb300-dynamo-trt-mtp
+  description:
+    - "Add DeepSeek-V4-Pro MXFP4 GB300 disaggregated TRT-LLM benchmarks via Dynamo (27 STP + 27 MTP configs)"
+    - "New configs: dsv4-fp4-gb300-dynamo-trt (STP) and dsv4-fp4-gb300-dynamo-trt-mtp (MTP)"
+    - "Covers ISL 1024/OSL 1024 (14 STP + 14 MTP) and ISL 8192/OSL 1024 (13 STP + 13 MTP)"
+    - "Container: nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:1.3.0-deepseek-v4-dev.1"
+    - "Recipes sourced from NVIDIA/srt-slurm branch sa-submission-q2-2026"
+    - "Runner script updated to support dsv4 model prefix with dynamo-trt framework on GB300"
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1689
 
 - config-keys:
     - minimaxm3-fp8-b200-vllm
@@ -3857,14 +3865,7 @@
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1779
 
 - config-keys:
-    - dsv4-fp4-gb300-dynamo-trt
-    - dsv4-fp4-gb300-dynamo-trt-mtp
+    - dsr1-fp8-gb300-dynamo-trt
   description:
-    - "Add DeepSeek-V4-Pro MXFP4 GB300 disaggregated TRT-LLM benchmarks via Dynamo (27 STP + 27 MTP configs)"
-    - "New configs: dsv4-fp4-gb300-dynamo-trt (STP) and dsv4-fp4-gb300-dynamo-trt-mtp (MTP)"
-    - "Covers ISL 1024/OSL 1024 (14 STP + 14 MTP) and ISL 8192/OSL 1024 (13 STP + 13 MTP)"
-    - "Container: nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:1.3.0-deepseek-v4-dev.1"
-    - "Recipes sourced from NVIDIA/srt-slurm branch sa-submission-q2-2026"
-    - "Runner script updated to support dsv4 model prefix with dynamo-trt framework on GB300"
-  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1689
-
+    - "Fix gsm8k accuracy at 88% instead of 95% for a single point"
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1767

From 728eb321dd4b1decd81b2d460cb39aa369a0c9c8 Mon Sep 17 00:00:00 2001
From: Xin Li <119016172+xinli-sw@users.noreply.github.com>
Date: Tue, 16 Jun 2026 01:06:06 -0400
Subject: [PATCH 7/8] Enhance gsm8k accuracy fix description in changelog

Updated description for gsm8k accuracy fix to include config updates.
---
 perf-changelog.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 9cec83a85..33fd0451b 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -3867,5 +3867,5 @@
 - config-keys:
     - dsr1-fp8-gb300-dynamo-trt
   description:
-    - "Fix gsm8k accuracy at 88% instead of 95% for a single point"
+    - "Fix gsm8k accuracy at 88% instead of 95% for a single point, also update all configs for DSR1 TRTLLM FP8 to reflect latest released image usage"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1767

From f867bd42585cca316b31a058c9276fad6bdbab00 Mon Sep 17 00:00:00 2001
From: Xin Li <119016172+xinli-sw@users.noreply.github.com>
Date: Wed, 17 Jun 2026 00:02:52 -0400
Subject: [PATCH 8/8] Refine accuracy fix description for DSR1 TRTLLM

Updated description for gsm8k accuracy fix and config updates.
---
 perf-changelog.yaml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 33fd0451b..208d37dea 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -3867,5 +3867,8 @@
 - config-keys:
     - dsr1-fp8-gb300-dynamo-trt
   description:
-    - "Fix gsm8k accuracy at 88% instead of 95% for a single point, also update all configs for DSR1 TRTLLM FP8 to reflect latest released image usage"
+    - "Fix gsm8k accuracy at 88% instead of 95% for a single point."
+    - "In previous submission, there was an numeric issue causing accuracy degradation and performance anomaly in some MTP points at certain concurrency."
+    - "This issue is now fixed in the latest TRTLLM release."
+    - "Also update all configs for DSR1 TRTLLM FP8 to reflect latest released image usage"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1767