diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/1p1d-dep2-tep8-1k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/1p1d-dep2-tep8-1k1k.yaml index e1c908228..e76827af3 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/1p1d-dep2-tep8-1k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/1p1d-dep2-tep8-1k1k.yaml @@ -28,13 +28,11 @@ backend: connector: null prefill_environment: - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" VLLM_FLOAT32_MATMUL_PRECISION: "high" decode_environment: - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" VLLM_FLOAT32_MATMUL_PRECISION: "high" vllm_config: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/1p1d-dep2-tp8-marlin-1k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/1p1d-dep2-tp8-marlin-1k1k.yaml index 452d748b4..c8362cb32 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/1p1d-dep2-tp8-marlin-1k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/1p1d-dep2-tp8-marlin-1k1k.yaml @@ -28,13 +28,11 @@ backend: connector: null prefill_environment: - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" VLLM_FLOAT32_MATMUL_PRECISION: "high" decode_environment: - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" VLLM_FLOAT32_MATMUL_PRECISION: "high" vllm_config: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/1p2d-dep2-dep4-1k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/1p2d-dep2-dep4-1k1k.yaml index 625927baf..349a125bb 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/1p2d-dep2-dep4-1k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/1p2d-dep2-dep4-1k1k.yaml @@ -28,13 +28,11 @@ backend: connector: null prefill_environment: - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" VLLM_FLOAT32_MATMUL_PRECISION: "high" decode_environment: - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" VLLM_FLOAT32_MATMUL_PRECISION: "high" vllm_config: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/2p1d-dep2-dep8-1k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/2p1d-dep2-dep8-1k1k.yaml index 951cdfc94..0f790c79b 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/2p1d-dep2-dep8-1k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/2p1d-dep2-dep8-1k1k.yaml @@ -28,13 +28,11 @@ backend: connector: null prefill_environment: - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" VLLM_FLOAT32_MATMUL_PRECISION: "high" decode_environment: - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" VLLM_FLOAT32_MATMUL_PRECISION: "high" vllm_config: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/2p1d-dep2-tep8-1k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/2p1d-dep2-tep8-1k1k.yaml index 495fc9b78..1372ff29a 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/2p1d-dep2-tep8-1k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/2p1d-dep2-tep8-1k1k.yaml @@ -28,13 +28,11 @@ backend: connector: null prefill_environment: - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" VLLM_FLOAT32_MATMUL_PRECISION: "high" decode_environment: - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" VLLM_FLOAT32_MATMUL_PRECISION: "high" vllm_config: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/2p2d-dep2-tep8-1k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/2p2d-dep2-tep8-1k1k.yaml index 58c533504..4447d971b 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/2p2d-dep2-tep8-1k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/2p2d-dep2-tep8-1k1k.yaml @@ -28,13 +28,11 @@ backend: connector: null prefill_environment: - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" VLLM_FLOAT32_MATMUL_PRECISION: "high" decode_environment: - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" VLLM_FLOAT32_MATMUL_PRECISION: "high" vllm_config: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/3p2d-dep2-tep8-1k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/3p2d-dep2-tep8-1k1k.yaml index 165072cc5..b03d644d2 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/3p2d-dep2-tep8-1k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/3p2d-dep2-tep8-1k1k.yaml @@ -28,13 +28,11 @@ backend: connector: null prefill_environment: - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" VLLM_FLOAT32_MATMUL_PRECISION: "high" decode_environment: - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" VLLM_FLOAT32_MATMUL_PRECISION: "high" vllm_config: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/1p2d-dep2-dep8-8k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/1p2d-dep2-dep8-8k1k.yaml index cb9256e25..890014563 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/1p2d-dep2-dep8-8k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/1p2d-dep2-dep8-8k1k.yaml @@ -29,13 +29,11 @@ backend: prefill_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" decode_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" vllm_config: prefill: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/2p2d-dep2-dep8-8k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/2p2d-dep2-dep8-8k1k.yaml index 7a805df7f..6d9ecc425 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/2p2d-dep2-dep8-8k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/2p2d-dep2-dep8-8k1k.yaml @@ -29,13 +29,11 @@ backend: prefill_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" decode_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" vllm_config: prefill: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/2p2d-dep2-tep8-8k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/2p2d-dep2-tep8-8k1k.yaml index 8dc4c5c06..90d816592 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/2p2d-dep2-tep8-8k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/2p2d-dep2-tep8-8k1k.yaml @@ -29,13 +29,11 @@ backend: prefill_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" decode_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" vllm_config: prefill: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/3p2d-dep2-dep8-8k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/3p2d-dep2-dep8-8k1k.yaml index 8ff86e9d9..84d580452 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/3p2d-dep2-dep8-8k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/3p2d-dep2-dep8-8k1k.yaml @@ -29,13 +29,11 @@ backend: prefill_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" decode_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" vllm_config: prefill: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/3p2d-dep2-tep8-8k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/3p2d-dep2-tep8-8k1k.yaml index a90bfcf32..f272b21bc 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/3p2d-dep2-tep8-8k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/3p2d-dep2-tep8-8k1k.yaml @@ -29,13 +29,11 @@ backend: prefill_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" decode_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" vllm_config: prefill: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/4p2d-dep2-dep8-8k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/4p2d-dep2-dep8-8k1k.yaml index 8feb79b71..b087b0926 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/4p2d-dep2-dep8-8k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/4p2d-dep2-dep8-8k1k.yaml @@ -29,13 +29,11 @@ backend: prefill_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" decode_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" vllm_config: prefill: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/4p3d-dep2-dep4-8k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/4p3d-dep2-dep4-8k1k.yaml index 5639c2eda..94c36243e 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/4p3d-dep2-dep4-8k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/4p3d-dep2-dep4-8k1k.yaml @@ -29,13 +29,11 @@ backend: prefill_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" decode_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" vllm_config: prefill: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/5p2d-dep2-tep8-8k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/5p2d-dep2-tep8-8k1k.yaml index 1f517d815..94f546ec2 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/5p2d-dep2-tep8-8k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/5p2d-dep2-tep8-8k1k.yaml @@ -29,13 +29,11 @@ backend: prefill_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" decode_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" vllm_config: prefill: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/5p2d-dep2-tp8-marlin-8k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/5p2d-dep2-tp8-marlin-8k1k.yaml index cf1840dfe..e77e77600 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/5p2d-dep2-tp8-marlin-8k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/5p2d-dep2-tp8-marlin-8k1k.yaml @@ -29,13 +29,11 @@ backend: prefill_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" decode_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" vllm_config: prefill: diff --git a/runners/launch_b300-nv.sh b/runners/launch_b300-nv.sh index c32516c9e..f2a83e4b3 100644 --- a/runners/launch_b300-nv.sh +++ b/runners/launch_b300-nv.sh @@ -85,7 +85,7 @@ elif [[ $FRAMEWORK == "dynamo-vllm" && $MODEL_PREFIX == "minimaxm2.5" && $PRECIS elif [[ $FRAMEWORK == "dynamo-vllm" && $MODEL_PREFIX == "minimaxm3" && $PRECISION == "fp8" ]]; then git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR" cd "$SRT_REPO_DIR" || exit 1 - git checkout main + git checkout sa-submission-q2-2026 mkdir -p recipes/vllm/minimax-m3 cp -rT "$GITHUB_WORKSPACE/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3" recipes/vllm/minimax-m3 else