SemiAnalysisAI · cquil11 · Jun 17, 2026 · Jun 15, 2026 · Jun 15, 2026 · Jun 15, 2026
@@ -6587,10 +6587,10 @@ dsr1-fp4-gb300-dynamo-sglang:
           dp-attn: true
 
 dsr1-fp8-gb300-dynamo-trt:
-  image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post2
+  image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.3.0-dev.1-cuda13
   model: deepseek-ai/DeepSeek-R1-0528
   model-prefix: dsr1
-  runner: gb300
+  runner: gb300-nv
   precision: fp8
   framework: dynamo-trt
   multinode: true

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
@@ -3909,3 +3909,13 @@
   description:
     - "Use the Marlin MoE backend for MiniMax-M3 B200/B300 TP-only vLLM configurations by adding --moe-backend marlin when expert parallelism is disabled."
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1809
+
+  - config-keys:
+    - dsr1-fp8-gb300-dynamo-trt
+  description:
+    - "Fix gsm8k accuracy at 88% instead of 95% for a single point."
+    - "In previous submission, there was an numeric issue causing accuracy degradation and performance anomaly in some MTP points at certain concurrency."
+    - "This issue is now fixed in the latest TRTLLM release."
+    - "Also update all configs for DSR1 TRTLLM FP8 to reflect latest released image usage"
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1767
+