From fa925b9889b502bc1eb1e46af3b144a6a0d909a3 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Wed, 17 Jun 2026 21:38:07 -0700 Subject: [PATCH 1/2] point to sa-submission-q2-2026 branch instead --- runners/launch_b300-nv.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runners/launch_b300-nv.sh b/runners/launch_b300-nv.sh index c32516c9e..f2a83e4b3 100644 --- a/runners/launch_b300-nv.sh +++ b/runners/launch_b300-nv.sh @@ -85,7 +85,7 @@ elif [[ $FRAMEWORK == "dynamo-vllm" && $MODEL_PREFIX == "minimaxm2.5" && $PRECIS elif [[ $FRAMEWORK == "dynamo-vllm" && $MODEL_PREFIX == "minimaxm3" && $PRECISION == "fp8" ]]; then git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR" cd "$SRT_REPO_DIR" || exit 1 - git checkout main + git checkout sa-submission-q2-2026 mkdir -p recipes/vllm/minimax-m3 cp -rT "$GITHUB_WORKSPACE/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3" recipes/vllm/minimax-m3 else From 63214b5514476a6041aff2db1456cfb767351176 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Wed, 17 Jun 2026 22:18:56 -0700 Subject: [PATCH 2/2] fix UCX_* settings to fix nixl handshake failure --- .../vllm/minimax-m3/b300-fp8/1k1k/1p1d-dep2-tep8-1k1k.yaml | 6 ++---- .../minimax-m3/b300-fp8/1k1k/1p1d-dep2-tp8-marlin-1k1k.yaml | 6 ++---- .../vllm/minimax-m3/b300-fp8/1k1k/1p2d-dep2-dep4-1k1k.yaml | 6 ++---- .../vllm/minimax-m3/b300-fp8/1k1k/2p1d-dep2-dep8-1k1k.yaml | 6 ++---- .../vllm/minimax-m3/b300-fp8/1k1k/2p1d-dep2-tep8-1k1k.yaml | 6 ++---- .../vllm/minimax-m3/b300-fp8/1k1k/2p2d-dep2-tep8-1k1k.yaml | 6 ++---- .../vllm/minimax-m3/b300-fp8/1k1k/3p2d-dep2-tep8-1k1k.yaml | 6 ++---- .../vllm/minimax-m3/b300-fp8/8k1k/1p2d-dep2-dep8-8k1k.yaml | 6 ++---- .../vllm/minimax-m3/b300-fp8/8k1k/2p2d-dep2-dep8-8k1k.yaml | 6 ++---- .../vllm/minimax-m3/b300-fp8/8k1k/2p2d-dep2-tep8-8k1k.yaml | 6 ++---- .../vllm/minimax-m3/b300-fp8/8k1k/3p2d-dep2-dep8-8k1k.yaml | 6 ++---- .../vllm/minimax-m3/b300-fp8/8k1k/3p2d-dep2-tep8-8k1k.yaml | 6 ++---- .../vllm/minimax-m3/b300-fp8/8k1k/4p2d-dep2-dep8-8k1k.yaml | 6 ++---- .../vllm/minimax-m3/b300-fp8/8k1k/4p3d-dep2-dep4-8k1k.yaml | 6 ++---- .../vllm/minimax-m3/b300-fp8/8k1k/5p2d-dep2-tep8-8k1k.yaml | 6 ++---- .../minimax-m3/b300-fp8/8k1k/5p2d-dep2-tp8-marlin-8k1k.yaml | 6 ++---- 16 files changed, 32 insertions(+), 64 deletions(-) diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/1p1d-dep2-tep8-1k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/1p1d-dep2-tep8-1k1k.yaml index e1c908228..e76827af3 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/1p1d-dep2-tep8-1k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/1p1d-dep2-tep8-1k1k.yaml @@ -28,13 +28,11 @@ backend: connector: null prefill_environment: - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" VLLM_FLOAT32_MATMUL_PRECISION: "high" decode_environment: - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" VLLM_FLOAT32_MATMUL_PRECISION: "high" vllm_config: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/1p1d-dep2-tp8-marlin-1k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/1p1d-dep2-tp8-marlin-1k1k.yaml index 452d748b4..c8362cb32 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/1p1d-dep2-tp8-marlin-1k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/1p1d-dep2-tp8-marlin-1k1k.yaml @@ -28,13 +28,11 @@ backend: connector: null prefill_environment: - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" VLLM_FLOAT32_MATMUL_PRECISION: "high" decode_environment: - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" VLLM_FLOAT32_MATMUL_PRECISION: "high" vllm_config: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/1p2d-dep2-dep4-1k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/1p2d-dep2-dep4-1k1k.yaml index 625927baf..349a125bb 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/1p2d-dep2-dep4-1k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/1p2d-dep2-dep4-1k1k.yaml @@ -28,13 +28,11 @@ backend: connector: null prefill_environment: - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" VLLM_FLOAT32_MATMUL_PRECISION: "high" decode_environment: - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" VLLM_FLOAT32_MATMUL_PRECISION: "high" vllm_config: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/2p1d-dep2-dep8-1k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/2p1d-dep2-dep8-1k1k.yaml index 951cdfc94..0f790c79b 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/2p1d-dep2-dep8-1k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/2p1d-dep2-dep8-1k1k.yaml @@ -28,13 +28,11 @@ backend: connector: null prefill_environment: - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" VLLM_FLOAT32_MATMUL_PRECISION: "high" decode_environment: - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" VLLM_FLOAT32_MATMUL_PRECISION: "high" vllm_config: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/2p1d-dep2-tep8-1k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/2p1d-dep2-tep8-1k1k.yaml index 495fc9b78..1372ff29a 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/2p1d-dep2-tep8-1k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/2p1d-dep2-tep8-1k1k.yaml @@ -28,13 +28,11 @@ backend: connector: null prefill_environment: - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" VLLM_FLOAT32_MATMUL_PRECISION: "high" decode_environment: - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" VLLM_FLOAT32_MATMUL_PRECISION: "high" vllm_config: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/2p2d-dep2-tep8-1k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/2p2d-dep2-tep8-1k1k.yaml index 58c533504..4447d971b 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/2p2d-dep2-tep8-1k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/2p2d-dep2-tep8-1k1k.yaml @@ -28,13 +28,11 @@ backend: connector: null prefill_environment: - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" VLLM_FLOAT32_MATMUL_PRECISION: "high" decode_environment: - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" VLLM_FLOAT32_MATMUL_PRECISION: "high" vllm_config: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/3p2d-dep2-tep8-1k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/3p2d-dep2-tep8-1k1k.yaml index 165072cc5..b03d644d2 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/3p2d-dep2-tep8-1k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/1k1k/3p2d-dep2-tep8-1k1k.yaml @@ -28,13 +28,11 @@ backend: connector: null prefill_environment: - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" VLLM_FLOAT32_MATMUL_PRECISION: "high" decode_environment: - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" VLLM_FLOAT32_MATMUL_PRECISION: "high" vllm_config: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/1p2d-dep2-dep8-8k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/1p2d-dep2-dep8-8k1k.yaml index cb9256e25..890014563 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/1p2d-dep2-dep8-8k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/1p2d-dep2-dep8-8k1k.yaml @@ -29,13 +29,11 @@ backend: prefill_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" decode_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" vllm_config: prefill: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/2p2d-dep2-dep8-8k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/2p2d-dep2-dep8-8k1k.yaml index 7a805df7f..6d9ecc425 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/2p2d-dep2-dep8-8k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/2p2d-dep2-dep8-8k1k.yaml @@ -29,13 +29,11 @@ backend: prefill_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" decode_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" vllm_config: prefill: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/2p2d-dep2-tep8-8k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/2p2d-dep2-tep8-8k1k.yaml index 8dc4c5c06..90d816592 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/2p2d-dep2-tep8-8k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/2p2d-dep2-tep8-8k1k.yaml @@ -29,13 +29,11 @@ backend: prefill_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" decode_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" vllm_config: prefill: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/3p2d-dep2-dep8-8k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/3p2d-dep2-dep8-8k1k.yaml index 8ff86e9d9..84d580452 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/3p2d-dep2-dep8-8k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/3p2d-dep2-dep8-8k1k.yaml @@ -29,13 +29,11 @@ backend: prefill_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" decode_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" vllm_config: prefill: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/3p2d-dep2-tep8-8k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/3p2d-dep2-tep8-8k1k.yaml index a90bfcf32..f272b21bc 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/3p2d-dep2-tep8-8k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/3p2d-dep2-tep8-8k1k.yaml @@ -29,13 +29,11 @@ backend: prefill_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" decode_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" vllm_config: prefill: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/4p2d-dep2-dep8-8k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/4p2d-dep2-dep8-8k1k.yaml index 8feb79b71..b087b0926 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/4p2d-dep2-dep8-8k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/4p2d-dep2-dep8-8k1k.yaml @@ -29,13 +29,11 @@ backend: prefill_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" decode_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" vllm_config: prefill: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/4p3d-dep2-dep4-8k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/4p3d-dep2-dep4-8k1k.yaml index 5639c2eda..94c36243e 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/4p3d-dep2-dep4-8k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/4p3d-dep2-dep4-8k1k.yaml @@ -29,13 +29,11 @@ backend: prefill_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" decode_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" vllm_config: prefill: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/5p2d-dep2-tep8-8k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/5p2d-dep2-tep8-8k1k.yaml index 1f517d815..94f546ec2 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/5p2d-dep2-tep8-8k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/5p2d-dep2-tep8-8k1k.yaml @@ -29,13 +29,11 @@ backend: prefill_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" decode_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" vllm_config: prefill: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/5p2d-dep2-tp8-marlin-8k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/5p2d-dep2-tp8-marlin-8k1k.yaml index cf1840dfe..e77e77600 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/5p2d-dep2-tp8-marlin-8k1k.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/5p2d-dep2-tp8-marlin-8k1k.yaml @@ -29,13 +29,11 @@ backend: prefill_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" decode_environment: VLLM_FLOAT32_MATMUL_PRECISION: high - UCX_NET_DEVICES: "all" - UCX_TLS: "rc,cuda_ipc,cuda_copy,sm,self,tcp" + UCX_TLS: "cuda_copy,rc" vllm_config: prefill: