diff --git a/.github/scripts/monitor_slurm_job.sh b/.github/scripts/monitor_slurm_job.sh
index ba7587ec70..1142e97057 100755
--- a/.github/scripts/monitor_slurm_job.sh
+++ b/.github/scripts/monitor_slurm_job.sh
@@ -9,11 +9,17 @@ cleanup() {
   if [ -n "${tail_pid:-}" ]; then
     kill "${tail_pid}" 2>/dev/null || true
   fi
-  # Cancel the SLURM job if the monitor is exiting due to an error
-  # (e.g., the CI runner is being killed). Don't cancel on success.
+  # Cancel the SLURM job only if it is still active in the scheduler.
+  # If the job already left the queue (squeue returns empty), it has finished
+  # and run_monitored_slurm_job.sh will recover via sacct — don't cancel it.
   if [ "${monitor_success:-0}" -ne 1 ] && [ -n "${job_id:-}" ]; then
-    echo "Monitor exiting abnormally — cancelling SLURM job $job_id"
-    scancel "$job_id" 2>/dev/null || true
+    active_state=$(squeue -j "$job_id" -h -o '%T' 2>/dev/null | head -n1 | tr -d ' ' || echo "")
+    if [ -n "$active_state" ]; then
+      echo "Monitor exiting abnormally — cancelling SLURM job $job_id (state: $active_state)"
+      scancel "$job_id" 2>/dev/null || true
+    else
+      echo "Monitor exiting abnormally — SLURM job $job_id already left queue, not cancelling"
+    fi
   fi
 }
 trap cleanup EXIT
@@ -56,9 +62,11 @@ get_job_state() {
 }
 
 # Check if a state is terminal (job is done, for better or worse)
+# PREEMPTED is intentionally excluded: with --requeue the job restarts under
+# the same job ID and we must keep monitoring rather than exiting early.
 is_terminal_state() {
   case "$1" in
-    COMPLETED|FAILED|CANCELLED|CANCELLED+|TIMEOUT|OUT_OF_MEMORY|NODE_FAIL|BOOT_FAIL|DEADLINE|PREEMPTED|REVOKED)
+    COMPLETED|FAILED|CANCELLED|CANCELLED+|TIMEOUT|OUT_OF_MEMORY|NODE_FAIL|BOOT_FAIL|DEADLINE|REVOKED)
       return 0 ;;
     *)
       return 1 ;;
@@ -74,7 +82,7 @@ while [ ! -f "$output_file" ]; do
   state=$(get_job_state "$job_id")
 
   case "$state" in
-    PENDING|CONFIGURING)
+    PENDING|CONFIGURING|PREEMPTED)
       unknown_count=0
       sleep 5
       ;;
diff --git a/.github/scripts/prebuild-case-optimization.sh b/.github/scripts/prebuild-case-optimization.sh
index 87f26fdb5f..130f523c07 100755
--- a/.github/scripts/prebuild-case-optimization.sh
+++ b/.github/scripts/prebuild-case-optimization.sh
@@ -21,6 +21,8 @@ case "$cluster" in
     *) echo "ERROR: Unknown cluster '$cluster'"; exit 1 ;;
 esac
 
+rm -rf build
+
 . ./mfc.sh load -c "$flag" -m g
 source .github/scripts/gpu-opts.sh
 
diff --git a/.github/scripts/retry-build.sh b/.github/scripts/retry-build.sh
index b82a2e5d8d..38ac08b217 100755
--- a/.github/scripts/retry-build.sh
+++ b/.github/scripts/retry-build.sh
@@ -1,30 +1,13 @@
 #!/bin/bash
-# Provides retry_build(): 3-attempt loop with configurable cleanup.
-# Set RETRY_CLEAN_CMD to override cleanup (default: rm -rf build/staging build/install build/lock.yaml).
+# Provides retry_build(): 2-attempt loop.
+# On failure of attempt 1, nukes the entire build directory before attempt 2.
 # Set RETRY_VALIDATE_CMD to run a post-build validation; failure triggers a retry.
 # Usage: source .github/scripts/retry-build.sh
 #        retry_build ./mfc.sh build -j 8 --gpu acc
 
-# Try normal cleanup; if it fails, escalate to cache nuke.
-_retry_clean() {
-    local clean_cmd="$1"
-    if eval "$clean_cmd" 2>/dev/null; then
-        return 0
-    fi
-    echo "  Normal cleanup failed."
-    if type _cache_nuke > /dev/null 2>&1; then
-        echo "  Escalating to NFS cache nuke..."
-        _cache_nuke
-    else
-        echo "  _cache_nuke not available, best-effort rm."
-        rm -rf build/staging build/install build/lock.yaml 2>/dev/null || true
-    fi
-}
-
 retry_build() {
-    local clean_cmd="${RETRY_CLEAN_CMD:-rm -rf build/staging build/install build/lock.yaml}"
     local validate_cmd="${RETRY_VALIDATE_CMD:-}"
-    local max_attempts=3
+    local max_attempts=2
     local attempt=1
     while [ $attempt -le $max_attempts ]; do
         echo "Build attempt $attempt of $max_attempts..."
@@ -33,8 +16,8 @@ retry_build() {
                 if ! eval "$validate_cmd"; then
                     echo "Post-build validation failed on attempt $attempt."
                     if [ $attempt -lt $max_attempts ]; then
-                        echo "Cleaning and retrying in 5s..."
-                        _retry_clean "$clean_cmd"
+                        echo "  Nuking build directory before retry..."
+                        rm -rf build 2>/dev/null || true
                         sleep 5
                         attempt=$((attempt + 1))
                         continue
@@ -48,8 +31,8 @@ retry_build() {
             return 0
         fi
         if [ $attempt -lt $max_attempts ]; then
-            echo "Build failed on attempt $attempt. Retrying in 30s..."
-            _retry_clean "$clean_cmd"
+            echo "  Build failed — nuking build directory before retry..."
+            rm -rf build 2>/dev/null || true
             sleep 30
         else
             echo "Build failed after $max_attempts attempts."
diff --git a/.github/scripts/run_monitored_slurm_job.sh b/.github/scripts/run_monitored_slurm_job.sh
index 905520c45e..6fb9e254ec 100644
--- a/.github/scripts/run_monitored_slurm_job.sh
+++ b/.github/scripts/run_monitored_slurm_job.sh
@@ -25,8 +25,10 @@ if [ "$monitor_exit" -ne 0 ]; then
     echo "Monitor exited with code $monitor_exit; re-checking SLURM job $job_id final state..."
     # Give the SLURM epilog time to finalize if the job just finished
     sleep 30
-    final_state=$(sacct -j "$job_id" -n -X -P -o State 2>/dev/null | head -n1 | cut -d'|' -f1 | tr -d ' ' || echo "UNKNOWN")
-    final_exit=$(sacct -j "$job_id" --format=ExitCode --noheader --parsable2 2>/dev/null | head -n1 | tr -d ' ' || echo "")
+    final_state=$(sacct -j "$job_id" -n -X -P -o State 2>/dev/null | head -n1 | cut -d'|' -f1 | tr -d ' ' || true)
+    final_state="${final_state:-UNKNOWN}"
+    final_exit=$(sacct -j "$job_id" -X --format=ExitCode --noheader --parsable2 2>/dev/null | head -n1 | tr -d ' ' || true)
+    final_exit="${final_exit:-}"
     echo "Final SLURM state=$final_state exit=$final_exit"
     if [ "$final_state" = "COMPLETED" ] && [ "$final_exit" = "0:0" ]; then
         echo "SLURM job $job_id completed successfully despite monitor failure — continuing."
diff --git a/.github/scripts/run_parallel_benchmarks.sh b/.github/scripts/run_parallel_benchmarks.sh
index be9b5c5a94..8c562b911e 100755
--- a/.github/scripts/run_parallel_benchmarks.sh
+++ b/.github/scripts/run_parallel_benchmarks.sh
@@ -20,6 +20,31 @@ echo "=========================================="
 echo "Starting parallel benchmark jobs..."
 echo "=========================================="
 
+# For Phoenix GPU benchmarks, select a consistent GPU partition before launching
+# both parallel jobs so PR and master always land on the same GPU type.
+if [ "$device" = "gpu" ] && [ "$cluster" = "phoenix" ]; then
+    echo "Selecting Phoenix GPU partition for benchmark consistency..."
+    # Prefer older/smaller partitions first (rtx6000, l40s, v100) to leave
+    # large modern nodes (h200, h100, a100) free for production workloads.
+    # rtx6000 has the most nodes and gives the most consistent baselines.
+    BENCH_GPU_PARTITION=""
+    for part in gpu-rtx6000 gpu-l40s gpu-v100 gpu-h200 gpu-h100 gpu-a100; do
+        # || true: grep -c exits 1 on zero matches (or when sinfo returns no output
+        # for an unknown partition); suppress so set -euo pipefail doesn't abort.
+        idle=$(sinfo -p "$part" --noheader -o "%t" 2>/dev/null | grep -cE "^(idle|mix)" || true)
+        if [ "${idle:-0}" -gt 0 ]; then
+            BENCH_GPU_PARTITION="$part"
+            echo "Selected GPU partition: $BENCH_GPU_PARTITION ($idle idle/mix nodes)"
+            break
+        fi
+    done
+    if [ -z "$BENCH_GPU_PARTITION" ]; then
+        echo "WARNING: No idle GPU partition found; falling back to gpu-rtx6000 (may queue)"
+        BENCH_GPU_PARTITION="gpu-rtx6000"
+    fi
+    export BENCH_GPU_PARTITION
+fi
+
 # Run both jobs with monitoring using dedicated script from PR
 # Use stdbuf for line-buffered output and prefix each line for clarity
 (set -o pipefail; stdbuf -oL -eL bash "${SCRIPT_DIR}/submit_and_monitor_bench.sh" pr "$device" "$interface" "$cluster" 2>&1 | while IFS= read -r line; do echo "[PR] $line"; done) &
@@ -40,6 +65,8 @@ wait "$pr_pid"
 pr_exit=$?
 if [ "$pr_exit" -ne 0 ]; then
   echo "PR job exited with code: $pr_exit"
+  echo "Last 50 lines of PR job log:"
+  tail -n 50 "pr/bench-${device}-${interface}.out" 2>/dev/null || echo "  Could not read PR log"
 else
   echo "PR job completed successfully"
 fi
@@ -48,6 +75,8 @@ wait "$master_pid"
 master_exit=$?
 if [ "$master_exit" -ne 0 ]; then
   echo "Master job exited with code: $master_exit"
+  echo "Last 50 lines of master job log:"
+  tail -n 50 "master/bench-${device}-${interface}.out" 2>/dev/null || echo "  Could not read master log"
 else
   echo "Master job completed successfully"
 fi
diff --git a/.github/scripts/setup-build-cache.sh b/.github/scripts/setup-build-cache.sh
deleted file mode 100755
index 7e47175f6e..0000000000
--- a/.github/scripts/setup-build-cache.sh
+++ /dev/null
@@ -1,101 +0,0 @@
-#!/bin/bash
-# Sets up a persistent build cache for self-hosted CI runners.
-# Creates a symlink: ./build -> <cache_root>/<key>/build
-#
-# Each runner gets its own cache keyed by (cluster, device, interface, runner).
-# This avoids cross-runner path issues entirely — CMake's absolute paths are
-# always correct because the same runner always uses the same workspace path.
-#
-# Usage: source .github/scripts/setup-build-cache.sh <cluster> <device> <interface>
-
-_cache_cluster="${1:?Usage: setup-build-cache.sh <cluster> <device> <interface>}"
-_cache_device="${2:?}"
-_cache_interface="${3:-none}"
-_cache_runner="${RUNNER_NAME:?RUNNER_NAME not set}"
-
-# Select cache root based on cluster (each HPC system has its own persistent storage).
-case "$_cache_cluster" in
-    phoenix)
-        _cache_root="/storage/coda1/d-coc/0/sbryngelson3/.mfc-ci-cache" ;;
-    frontier|frontier_amd)
-        _cache_root="/lustre/orion/cfd154/scratch/sbryngelson/.mfc-ci-cache" ;;
-    *)
-        echo "=== Build Cache Setup ==="
-        echo "  No cache root configured for cluster '$_cache_cluster' — skipping."
-        echo "========================="
-        return 0 2>/dev/null || exit 0 ;;
-esac
-
-_cache_key="${_cache_cluster}-${_cache_device}-${_cache_interface}-${_cache_runner}"
-_cache_base="${_cache_root}/${_cache_key}/build"
-
-# Check if the cache directory is healthy (readable, writable, no stale handles).
-_cache_healthy() {
-    local dir="$1"
-    if ! ls "$dir" > /dev/null 2>&1; then
-        echo "  Health check FAILED: cannot list $dir"
-        return 1
-    fi
-    if [ -e "$dir/lock.yaml" ] && ! stat "$dir/lock.yaml" > /dev/null 2>&1; then
-        echo "  Health check FAILED: cannot stat $dir/lock.yaml"
-        return 1
-    fi
-    local probe="$dir/.nfs_probe.$$"
-    if ! touch "$probe" 2>/dev/null || ! rm -f "$probe" 2>/dev/null; then
-        echo "  Health check FAILED: cannot write/remove probe in $dir"
-        rm -f "$probe" 2>/dev/null
-        return 1
-    fi
-    return 0
-}
-
-# Nuclear recovery: rename stale cache out of the way and create a fresh one.
-# Uses mv (operates on parent directory entry) which works even when children
-# have stale file handles that prevent rm -rf from succeeding.
-_cache_nuke() {
-    local base="${1:-$_cache_base}"
-    local stale_name="${base}.stale.$(date +%s)"
-    echo "  NFS cache nuke: parking stale dir -> $stale_name"
-    if mv "$base" "$stale_name" 2>/dev/null; then
-        echo "  NFS cache nuke: renamed successfully"
-    else
-        echo "  NFS cache nuke: mv failed, trying rm -rf as fallback"
-        rm -rf "$base" 2>/dev/null || true
-    fi
-    mkdir -p "$base"
-    echo "  NFS cache nuke: fresh cache created at $base"
-}
-
-mkdir -p "$_cache_base"
-_cache_dir="$(cd "$_cache_base" && pwd -P)"
-
-echo "=== Build Cache Setup ==="
-echo "  Cache key: $_cache_key"
-echo "  Cache dir: $_cache_dir"
-
-# Pre-flight: detect stale NFS handles before wasting a build attempt.
-if ! _cache_healthy "$_cache_dir"; then
-    echo "  Stale NFS cache detected — nuking and recreating."
-    _cache_nuke "$_cache_base"
-    _cache_dir="$(cd "$_cache_base" && pwd -P)"
-fi
-
-# Replace any existing build/ (real dir or stale symlink) with a symlink
-# to our runner-specific cache directory.
-# Use unlink for symlinks to avoid rm -rf following the link and deleting
-# the shared cache contents (which another runner may be using).
-if [ -L "build" ]; then
-    unlink "build"
-elif [ -e "build" ]; then
-    rm -rf "build"
-fi
-
-ln -s "$_cache_dir" "build"
-
-echo "  Symlink: build -> $_cache_dir"
-
-# Garbage-collect stale cache dirs parked by _cache_nuke more than 7 days ago.
-_cache_parent="$(dirname "$_cache_base")"
-find "$_cache_parent" -maxdepth 1 -name "*.stale.*" -mtime +7 -exec rm -rf {} + 2>/dev/null || true
-
-echo "========================="
diff --git a/.github/scripts/submit_and_monitor_bench.sh b/.github/scripts/submit_and_monitor_bench.sh
index c081c8692a..e0a6eb7384 100755
--- a/.github/scripts/submit_and_monitor_bench.sh
+++ b/.github/scripts/submit_and_monitor_bench.sh
@@ -14,12 +14,18 @@ device="$2"
 interface="$3"
 cluster="$4"
 
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
 echo "[$dir] Submitting benchmark for $device-$interface on $cluster..."
 cd "$dir"
 
-# Submit and monitor job (submit.sh auto-detects bench mode from script name)
-bash .github/workflows/$cluster/submit.sh \
-    .github/workflows/$cluster/bench.sh "$device" "$interface"
+# Always use the PR's submit.sh so both master and PR builds benefit from the
+# run_monitored_slurm_job.sh SIGKILL recovery wrapper.  The bench script is
+# still resolved relative to the current directory (master/ or pr/) so the
+# correct branch code is benchmarked.  SLURM_SUBMIT_DIR ensures the job runs
+# in the right directory regardless of which submit.sh is invoked.
+PR_SUBMIT="${SCRIPT_DIR}/../workflows/${cluster}/submit.sh"
+bash "$PR_SUBMIT" .github/workflows/$cluster/bench.sh "$device" "$interface"
 
 # Verify the YAML output file was created
 job_slug="bench-$device-$interface"
diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
index b45fc45e40..8a1c848493 100644
--- a/.github/workflows/bench.yml
+++ b/.github/workflows/bench.yml
@@ -85,6 +85,7 @@ jobs:
             device: gpu
             interface: omp
             build_script: "bash .github/workflows/frontier_amd/build.sh gpu omp bench"
+    continue-on-error: ${{ matrix.cluster == 'frontier' || matrix.cluster == 'frontier_amd' }}
     runs-on:
       group: ${{ matrix.group }}
       labels: ${{ matrix.labels }}
@@ -106,7 +107,7 @@ jobs:
         if: matrix.build_script != ''
         uses: nick-fields/retry@v3
         with:
-          max_attempts: 3
+          max_attempts: 2
           retry_wait_seconds: 60
           timeout_minutes: 150
           command: |
@@ -118,13 +119,20 @@ jobs:
             wait $pid2; e2=$?
             [ $e1 -eq 0 ] && [ $e2 -eq 0 ]
           on_retry_command: |
-            (cd pr     && ./mfc.sh clean) &
-            (cd master && ./mfc.sh clean) &
-            wait
+            rm -rf pr/build master/build
 
       - name: Bench (Master v. PR)
         run: bash pr/.github/scripts/run_parallel_benchmarks.sh ${{ matrix.device }} ${{ matrix.interface }} ${{ matrix.cluster }}
 
+      - name: Cancel SLURM Jobs
+        if: cancelled()
+        run: |
+          find . -name "*.slurm_job_id" | while read -r f; do
+            job_id=$(cat "$f")
+            echo "Cancelling SLURM job $job_id"
+            scancel "$job_id" 2>/dev/null || true
+          done
+
       - name: Generate & Post Comment
         if: always()
         run: |
@@ -137,6 +145,29 @@ jobs:
           cat pr/bench-${{ matrix.device }}-${{ matrix.interface }}.* 2>/dev/null || true
           cat master/bench-${{ matrix.device }}-${{ matrix.interface }}.* 2>/dev/null || true
 
+      - name: Print Per-Case Logs
+        if: always()
+        run: |
+          passed=() failed=()
+          for out in pr/build/benchmarks/*/*.out master/build/benchmarks/*/*.out; do
+            [ -f "$out" ] || continue
+            [ -f "${out%.out}.yaml" ] && passed+=("$out") || failed+=("$out")
+          done
+
+          echo "=== Per-Case Summary: ${#failed[@]} failed, ${#passed[@]} passed ==="
+          for out in "${failed[@]}"; do echo "  [FAILED] $out"; done
+          for out in "${passed[@]}"; do echo "  [PASSED] $out"; done
+
+          if [ ${#failed[@]} -gt 0 ]; then
+            echo ""
+            echo "=== Failed Case Logs ==="
+            for out in "${failed[@]}"; do
+              echo "--- $out ---"
+              cat "$out"
+              echo ""
+            done
+          fi
+
       # All other runners (non-Phoenix) just run without special env
       - name: Archive Logs (Frontier)
         if: always() && matrix.cluster != 'phoenix'
diff --git a/.github/workflows/frontier/bench.sh b/.github/workflows/frontier/bench.sh
index b60f8541a2..b896feb17c 100644
--- a/.github/workflows/frontier/bench.sh
+++ b/.github/workflows/frontier/bench.sh
@@ -2,8 +2,11 @@
 
 source .github/scripts/bench-preamble.sh
 
+# Cap parallel jobs at 64 to avoid overwhelming MPI daemons on large nodes.
+n_jobs=$(( $(nproc) > 64 ? 64 : $(nproc) ))
+
 if [ "$job_device" = "gpu" ]; then
     ./mfc.sh bench --mem 4 -j $n_ranks -o "$job_slug.yaml" -- -c $job_cluster $device_opts -n $n_ranks
 else
-    ./mfc.sh bench --mem 1 -j $(nproc) -o "$job_slug.yaml" -- -c $job_cluster $device_opts -n $n_ranks
+    ./mfc.sh bench --mem 1 -j $n_jobs -o "$job_slug.yaml" -- -c $job_cluster $device_opts -n $n_ranks
 fi
diff --git a/.github/workflows/phoenix/bench.sh b/.github/workflows/phoenix/bench.sh
index 10a38d0eea..abaf76f33d 100644
--- a/.github/workflows/phoenix/bench.sh
+++ b/.github/workflows/phoenix/bench.sh
@@ -2,6 +2,10 @@
 
 source .github/scripts/bench-preamble.sh
 
+# Cap parallel jobs at 64 to avoid overwhelming MPI daemons on large nodes
+# (GNR nodes have 192 cores but nproc is too aggressive for build/bench).
+n_jobs=$(( $(nproc) > 64 ? 64 : $(nproc) ))
+
 tmpbuild=/storage/project/r-sbryngelson3-0/sbryngelson3/mytmp_build
 currentdir=$tmpbuild/run-$(( RANDOM % 900 ))
 mkdir -p $tmpbuild
@@ -18,9 +22,9 @@ fi
 rm -rf build
 
 source .github/scripts/retry-build.sh
-RETRY_CLEAN_CMD="./mfc.sh clean" retry_build ./mfc.sh build -j $(nproc) $build_opts || exit 1
+retry_build ./mfc.sh build -j $n_jobs $build_opts || exit 1
 
-./mfc.sh bench $bench_opts -j $(nproc) -o "$job_slug.yaml" -- -c phoenix-bench $device_opts -n $n_ranks
+./mfc.sh bench $bench_opts -j $n_jobs -o "$job_slug.yaml" -- -c phoenix-bench $device_opts -n $n_ranks
 
 sleep 10
 rm -rf "$currentdir" || true
diff --git a/.github/workflows/phoenix/submit-job.sh b/.github/workflows/phoenix/submit-job.sh
new file mode 100755
index 0000000000..caa6bd2175
--- /dev/null
+++ b/.github/workflows/phoenix/submit-job.sh
@@ -0,0 +1,123 @@
+#!/bin/bash
+# Submit a SLURM job without waiting for it to complete.
+# Writes the job ID to <job_slug>.slurm_job_id so a separate monitor step can wait.
+# Idempotent: if a job for this slug is still RUNNING or PENDING, skip resubmission.
+#
+# Usage: submit-job.sh [script.sh] [cpu|gpu] [none|acc|omp]
+
+set -euo pipefail
+
+# Ignore SIGHUP to survive login node session drops
+trap '' HUP
+
+usage() {
+    echo "Usage: $0 [script.sh] [cpu|gpu] [none|acc|omp]"
+}
+
+if [ -z "${1:-}" ]; then
+    usage
+    exit 1
+fi
+
+sbatch_script_contents=$(cat "$1")
+
+# Detect job type from submitted script basename
+script_basename="$(basename "$1" .sh)"
+case "$script_basename" in
+    bench*) job_type="bench" ;;
+    *)      job_type="test"  ;;
+esac
+
+sbatch_cpu_opts="\
+#SBATCH -p cpu-small               # partition
+#SBATCH --ntasks-per-node=24       # Number of cores per node required
+#SBATCH --mem-per-cpu=2G           # Memory per core\
+"
+
+if [ "$job_type" = "bench" ]; then
+    bench_partition="${BENCH_GPU_PARTITION:-gpu-rtx6000}"
+    echo "Submitting bench GPU job to partition: $bench_partition (BENCH_GPU_PARTITION=${BENCH_GPU_PARTITION:-<unset, using default>})"
+    sbatch_gpu_opts="\
+#SBATCH -p $bench_partition
+#SBATCH --ntasks-per-node=4       # Number of cores per node required
+#SBATCH -G2\
+"
+    sbatch_time="#SBATCH -t 04:00:00"
+else
+    sbatch_gpu_opts="\
+#SBATCH -p gpu-v100,gpu-a100,gpu-h100,gpu-l40s,gpu-h200
+#SBATCH --ntasks-per-node=4       # Number of cores per node required
+#SBATCH -G2\
+"
+    sbatch_time="#SBATCH -t 03:00:00"
+fi
+
+if [ "$2" = "cpu" ]; then
+    sbatch_device_opts="$sbatch_cpu_opts"
+elif [ "$2" = "gpu" ]; then
+    sbatch_device_opts="$sbatch_gpu_opts"
+else
+    usage
+    exit 1
+fi
+
+job_slug="$(basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g')-$2-$3"
+output_file="$job_slug.out"
+id_file="${job_slug}.slurm_job_id"
+
+# On rerun, cancel any existing job for this slug and submit a fresh one.
+# If the job is still live (RUNNING/PENDING), scancel it first as a safety net
+# in case the "Cancel SLURM Jobs" step did not fire (e.g. runner was SIGKILL'd).
+if [ -f "$id_file" ]; then
+    existing_id=$(cat "$id_file")
+    state=$(sacct -j "$existing_id" -n -X -P -o State 2>/dev/null | head -n1 | cut -d'|' -f1 | tr -d ' ' || true)
+    case "${state:-UNKNOWN}" in
+        RUNNING|PENDING|REQUEUED|COMPLETING)
+            echo "Cancelling stale SLURM job $existing_id (state=$state) before resubmission"
+            scancel "$existing_id" 2>/dev/null || true
+            ;;
+        *)
+            echo "Stale job $existing_id (state=${state:-UNKNOWN}) — submitting fresh"
+            ;;
+    esac
+    rm -f "$id_file"
+fi
+
+submit_output=$(sbatch <<EOT
+#!/bin/bash
+#SBATCH -Jshb-$job_slug            # Job name
+#SBATCH --account=gts-sbryngelson3 # charge account
+#SBATCH -N1                        # Number of nodes required
+$sbatch_device_opts
+$sbatch_time
+#SBATCH -q embers                  # QOS Name
+#SBATCH --requeue                  # Auto-requeue on preemption
+#SBATCH -o$output_file             # Combined output and error messages file
+
+set -e
+set -x
+
+cd "\$SLURM_SUBMIT_DIR"
+echo "Running in \$(pwd):"
+
+job_slug="$job_slug"
+job_device="$2"
+job_interface="$3"
+
+. ./mfc.sh load -c p -m $2
+
+$sbatch_script_contents
+
+EOT
+)
+
+job_id=$(echo "$submit_output" | grep -oE '[0-9]+')
+if [ -z "$job_id" ]; then
+    echo "ERROR: Failed to submit job. sbatch output:"
+    echo "$submit_output"
+    exit 1
+fi
+
+echo "Submitted batch job $job_id"
+echo "$job_id" > "$id_file"
+echo "Job ID written to $id_file"
diff --git a/.github/workflows/phoenix/submit.sh b/.github/workflows/phoenix/submit.sh
index 786489d1c4..0c009bd001 100755
--- a/.github/workflows/phoenix/submit.sh
+++ b/.github/workflows/phoenix/submit.sh
@@ -1,6 +1,10 @@
 #!/bin/bash
+# Submit a SLURM job and wait for it to complete.
+# Delegates submission (with idempotency) to submit-job.sh, then monitors.
+#
+# Usage: submit.sh [script.sh] [cpu|gpu] [none|acc|omp]
 
-set -e
+set -euo pipefail
 
 # Ignore SIGHUP to survive login node session drops
 trap '' HUP
@@ -9,90 +13,22 @@ usage() {
     echo "Usage: $0 [script.sh] [cpu|gpu] [none|acc|omp]"
 }
 
-if [ ! -z "$1" ]; then
-    sbatch_script_contents=`cat $1`
-else
+if [ -z "${1:-}" ]; then
     usage
     exit 1
 fi
 
-# Detect job type from submitted script basename
-script_basename="$(basename "$1" .sh)"
-case "$script_basename" in
-    bench*) job_type="bench" ;;
-    *)      job_type="test"  ;;
-esac
-
-sbatch_cpu_opts="\
-#SBATCH -p cpu-small               # partition
-#SBATCH --ntasks-per-node=24       # Number of cores per node required
-#SBATCH --mem-per-cpu=2G           # Memory per core\
-"
-
-if [ "$job_type" = "bench" ]; then
-    sbatch_gpu_opts="\
-#SBATCH -CL40S
-#SBATCH --ntasks-per-node=4       # Number of cores per node required
-#SBATCH -G2\
-"
-    sbatch_time="#SBATCH -t 04:00:00"
-else
-    sbatch_gpu_opts="\
-#SBATCH -p gpu-v100,gpu-a100,gpu-h100,gpu-l40s
-#SBATCH --ntasks-per-node=4       # Number of cores per node required
-#SBATCH -G2\
-"
-    sbatch_time="#SBATCH -t 03:00:00"
-fi
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 
-if [ "$2" = "cpu" ]; then
-    sbatch_device_opts="$sbatch_cpu_opts"
-elif [ "$2" = "gpu" ]; then
-    sbatch_device_opts="$sbatch_gpu_opts"
-else
-    usage
-    exit 1
-fi
+# Submit (idempotent — skips resubmission if a live job already exists)
+bash "$SCRIPT_DIR/submit-job.sh" "$@"
 
-job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2-$3"
+# Derive the same job slug and file paths as submit-job.sh.
+# NOTE: this sed pipeline must stay identical to the one in submit-job.sh —
+# if they diverge the id-file will not be found and the monitor will fail.
+job_slug="$(basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g')-$2-$3"
 output_file="$job_slug.out"
+id_file="${job_slug}.slurm_job_id"
 
-submit_output=$(sbatch <<EOT
-#!/bin/bash
-#SBATCH -Jshb-$job_slug            # Job name
-#SBATCH --account=gts-sbryngelson3 # charge account
-#SBATCH -N1                        # Number of nodes required
-$sbatch_device_opts
-$sbatch_time
-#SBATCH -q embers                  # QOS Name
-#SBATCH --requeue                  # Auto-requeue on preemption
-#SBATCH -o$output_file             # Combined output and error messages file
-
-set -e
-set -x
-
-cd "\$SLURM_SUBMIT_DIR"
-echo "Running in $(pwd):"
-
-job_slug="$job_slug"
-job_device="$2"
-job_interface="$3"
-
-. ./mfc.sh load -c p -m $2
-
-$sbatch_script_contents
-
-EOT
-)
-
-job_id=$(echo "$submit_output" | grep -oE '[0-9]+')
-if [ -z "$job_id" ]; then
-    echo "ERROR: Failed to submit job. sbatch output:"
-    echo "$submit_output"
-    exit 1
-fi
-
-echo "Submitted batch job $job_id"
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+job_id=$(cat "$id_file")
 bash "$SCRIPT_DIR/../../scripts/run_monitored_slurm_job.sh" "$job_id" "$output_file"
diff --git a/.github/workflows/phoenix/test.sh b/.github/workflows/phoenix/test.sh
index c8a5af2132..d073c54bde 100644
--- a/.github/workflows/phoenix/test.sh
+++ b/.github/workflows/phoenix/test.sh
@@ -5,8 +5,8 @@ build_opts="$gpu_opts"
 
 rm -rf build
 
-# Build with retry; smoke-test cached binaries to catch architecture mismatches
-# (SIGILL from binaries compiled on a different compute node).
+# Build with retry; smoke-test the freshly built syscheck binary to catch
+# architecture mismatches (SIGILL from binaries compiled on a different compute node).
 source .github/scripts/retry-build.sh
 RETRY_VALIDATE_CMD='syscheck_bin=$(find build/install -name syscheck -type f 2>/dev/null | head -1); [ -z "$syscheck_bin" ] || "$syscheck_bin" > /dev/null 2>&1' \
     retry_build ./mfc.sh test -v --dry-run -j 8 $build_opts || exit 1
@@ -19,4 +19,4 @@ if [ "$job_device" = "gpu" ]; then
     n_test_threads=$((ngpus * 2))
 fi
 
-./mfc.sh test -v --max-attempts 3 -a -j $n_test_threads $device_opts -- -c phoenix
+./mfc.sh test -v --max-attempts 3 -a -j $n_test_threads $device_opts ${build_opts:---no-gpu} -- -c phoenix
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 49934c6973..9ce6dda24c 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -137,16 +137,12 @@ jobs:
           printenv | sort > /tmp/env_after
           diff /tmp/env_before /tmp/env_after | grep '^>' | sed 's/^> //' >> $GITHUB_ENV
 
-      - name: Set up Python 3.14
-        uses: actions/setup-python@v5
-        with:
-          python-version: '3.14'
-
       - name: Build
         run:  |
-          /bin/bash mfc.sh test -v --dry-run -j $(nproc) --${{ matrix.debug }} --${{ matrix.mpi }} --${{ matrix.precision }} $TEST_ALL
+          /bin/bash mfc.sh test -v --dry-run -j $(nproc) --${{ matrix.debug }} --${{ matrix.mpi }} $PRECISION $TEST_ALL
         env:
           TEST_ALL: ${{ matrix.mpi == 'mpi' && '--test-all' || '' }}
+          PRECISION: ${{ matrix.precision != '' && format('--{0}', matrix.precision) || '' }}
 
       - name: Test
         run:  |
@@ -159,7 +155,10 @@ jobs:
     name: "${{ matrix.cluster_name }} (${{ matrix.device }}${{ matrix.interface != 'none' && format('-{0}', matrix.interface) || '' }}${{ matrix.shard != '' && format(' [{0}]', matrix.shard) || '' }})"
     if: github.repository == 'MFlowCode/MFC' && needs.file-changes.outputs.checkall == 'true' && github.event.pull_request.draft != true
     needs: [lint-gate, file-changes]
-    continue-on-error: false
+    # Frontier CCE compiler is periodically broken by toolchain updates (e.g.
+    # cpe/25.03 introduced an IPA SIGSEGV in CCE 19.0.0). Allow Frontier to
+    # fail without blocking PR merges; Phoenix remains a hard gate.
+    continue-on-error: ${{ matrix.runner == 'frontier' }}
     timeout-minutes: 480
     strategy:
       matrix:
@@ -237,7 +236,9 @@ jobs:
       - name: Clone
         uses: actions/checkout@v4
         with:
-          clean: true
+          # clean: false preserves .slurm_job_id files across reruns so
+          # submit-job.sh can detect and cancel stale SLURM jobs on retry.
+          clean: false
 
       - name: Build
         if:   matrix.cluster != 'phoenix'
@@ -249,7 +250,18 @@ jobs:
           command: bash .github/workflows/${{ matrix.cluster }}/build.sh ${{ matrix.device }} ${{ matrix.interface }}
           on_retry_command: rm -rf build
 
+      - name: Submit SLURM Test Job
+        if:   matrix.cluster == 'phoenix'
+        run:  bash .github/workflows/phoenix/submit-job.sh .github/workflows/phoenix/test.sh ${{ matrix.device }} ${{ matrix.interface }}
+
+      - name: Monitor SLURM Test Job
+        if:   matrix.cluster == 'phoenix'
+        run: |
+          slug="test-${{ matrix.device }}-${{ matrix.interface }}"
+          bash .github/scripts/run_monitored_slurm_job.sh "$(cat ${slug}.slurm_job_id)" "${slug}.out"
+
       - name: Test
+        if:   matrix.cluster != 'phoenix'
         run:  bash .github/workflows/${{ matrix.cluster }}/submit.sh .github/workflows/${{ matrix.cluster }}/test.sh ${{ matrix.device }} ${{ matrix.interface }} ${{ matrix.shard }}
 
       - name: Cancel SLURM Jobs
@@ -287,7 +299,8 @@ jobs:
     name: "Case Opt | ${{ matrix.cluster_name }} (${{ matrix.device }}-${{ matrix.interface }})"
     if: github.repository == 'MFlowCode/MFC' && needs.file-changes.outputs.checkall == 'true' && github.event.pull_request.draft != true
     needs: [lint-gate, file-changes]
-    continue-on-error: false
+    # Frontier is non-blocking for the same reason as the self job above.
+    continue-on-error: ${{ matrix.runner == 'frontier' }}
     timeout-minutes: 480
     strategy:
       matrix:
@@ -324,7 +337,7 @@ jobs:
       - name: Clone
         uses: actions/checkout@v4
         with:
-          clean: true
+          clean: false
 
       - name: Pre-Build (SLURM)
         if:   matrix.cluster == 'phoenix'
@@ -334,7 +347,18 @@ jobs:
         if:   matrix.cluster != 'phoenix'
         run:  bash .github/scripts/prebuild-case-optimization.sh ${{ matrix.cluster }} ${{ matrix.device }} ${{ matrix.interface }}
 
+      - name: Submit Case-Optimization Tests
+        if:   matrix.cluster == 'phoenix'
+        run:  bash .github/workflows/phoenix/submit-job.sh .github/scripts/run_case_optimization.sh ${{ matrix.device }} ${{ matrix.interface }}
+
+      - name: Monitor Case-Optimization Tests
+        if:   matrix.cluster == 'phoenix'
+        run: |
+          slug="run-case-optimization-${{ matrix.device }}-${{ matrix.interface }}"
+          bash .github/scripts/run_monitored_slurm_job.sh "$(cat ${slug}.slurm_job_id)" "${slug}.out"
+
       - name: Run Case-Optimization Tests
+        if:   matrix.cluster != 'phoenix'
         run:  bash .github/workflows/${{ matrix.cluster }}/submit.sh .github/scripts/run_case_optimization.sh ${{ matrix.device }} ${{ matrix.interface }}
 
       - name: Cancel SLURM Jobs
diff --git a/benchmarks/5eq_rk3_weno3_hllc/case.py b/benchmarks/5eq_rk3_weno3_hllc/case.py
index 5ecc327e8f..fa09426ffe 100644
--- a/benchmarks/5eq_rk3_weno3_hllc/case.py
+++ b/benchmarks/5eq_rk3_weno3_hllc/case.py
@@ -191,8 +191,8 @@
             "cyl_coord": "F",
             "dt": dt,
             "t_step_start": 0,
-            "t_step_stop": ARGS["steps"] if ARGS["steps"] is not None else int(7 * (5 * size + 5)),
-            "t_step_save": ARGS["steps"] if ARGS["steps"] is not None else int(7 * (5 * size + 5)),
+            "t_step_stop": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)),
+            "t_step_save": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)),
             # Simulation Algorithm Parameters
             "num_patches": 3,
             "model_eqns": 2,
diff --git a/benchmarks/hypo_hll/case.py b/benchmarks/hypo_hll/case.py
index 1663a507aa..f8d0928a01 100644
--- a/benchmarks/hypo_hll/case.py
+++ b/benchmarks/hypo_hll/case.py
@@ -44,8 +44,8 @@
             "p": Nz,
             "dt": 1e-8,
             "t_step_start": 0,
-            "t_step_stop": ARGS["steps"] if ARGS["steps"] is not None else int(7 * (5 * size + 5)),
-            "t_step_save": ARGS["steps"] if ARGS["steps"] is not None else int(7 * (5 * size + 5)),
+            "t_step_stop": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)),
+            "t_step_save": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)),
             # Simulation Algorithm Parameters
             "num_patches": 2,
             "model_eqns": 2,
diff --git a/benchmarks/ibm/case.py b/benchmarks/ibm/case.py
index e16cb620b7..303cf7fcaf 100644
--- a/benchmarks/ibm/case.py
+++ b/benchmarks/ibm/case.py
@@ -48,8 +48,8 @@
             "p": Nz,
             "dt": mydt,
             "t_step_start": 0,
-            "t_step_stop": ARGS["steps"] if ARGS["steps"] is not None else int(7 * (5 * size + 5)),
-            "t_step_save": ARGS["steps"] if ARGS["steps"] is not None else int(7 * (5 * size + 5)),
+            "t_step_stop": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)),
+            "t_step_save": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)),
             # Simulation Algorithm Parameters
             "num_patches": 1,
             "model_eqns": 2,
diff --git a/benchmarks/igr/case.py b/benchmarks/igr/case.py
index 469bff1fa9..4ceed76257 100644
--- a/benchmarks/igr/case.py
+++ b/benchmarks/igr/case.py
@@ -63,8 +63,8 @@
             "cyl_coord": "F",
             "dt": dt,
             "t_step_start": 0,
-            "t_step_stop": ARGS["steps"] if ARGS["steps"] is not None else int(7 * (5 * size + 5)),
-            "t_step_save": ARGS["steps"] if ARGS["steps"] is not None else int(7 * (5 * size + 5)),
+            "t_step_stop": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)),
+            "t_step_save": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)),
             # Simulation Algorithm Parameters
             "num_patches": 1,
             "model_eqns": 2,
diff --git a/benchmarks/viscous_weno5_sgb_acoustic/case.py b/benchmarks/viscous_weno5_sgb_acoustic/case.py
index 9f1351b0c1..83bdc43e9c 100644
--- a/benchmarks/viscous_weno5_sgb_acoustic/case.py
+++ b/benchmarks/viscous_weno5_sgb_acoustic/case.py
@@ -94,8 +94,8 @@
             "p": Nz,
             "dt": dt,
             "t_step_start": 0,
-            "t_step_stop": ARGS["steps"] if ARGS["steps"] is not None else int(6 * (5 * size + 5)),
-            "t_step_save": ARGS["steps"] if ARGS["steps"] is not None else int(6 * (5 * size + 5)),
+            "t_step_stop": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)),
+            "t_step_save": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)),
             # Simulation Algorithm Parameters
             "num_patches": 2,
             "model_eqns": 2,
diff --git a/toolchain/mfc/bench.py b/toolchain/mfc/bench.py
index 74f7469482..58b90e965b 100644
--- a/toolchain/mfc/bench.py
+++ b/toolchain/mfc/bench.py
@@ -228,8 +228,7 @@ def diff():
                     grind_time_value = lhs_summary[target.name]["grind"] / rhs_summary[target.name]["grind"]
                     speedups[i] += f" & Grind: {grind_time_value:.2f}"
                     if grind_time_value < 0.95:
-                        cons.print(f"[bold red]Error[/bold red]: Benchmarking failed since grind time speedup for {target.name} below acceptable threshold (<0.95) - Case: {slug}")
-                        err = 1
+                        cons.print(f"[bold yellow]Warning[/bold yellow]: Grind time speedup for {target.name} below threshold (<0.95) - Case: {slug}")
             except Exception as e:
                 cons.print(
                     f"[bold red]ERROR[/bold red]: Failed to compute speedup for {target.name} in {slug}: {e}\n"
diff --git a/toolchain/mfc/build.py b/toolchain/mfc/build.py
index 6430f7ad35..08ff6d7510 100644
--- a/toolchain/mfc/build.py
+++ b/toolchain/mfc/build.py
@@ -1,6 +1,7 @@
 import os, typing, hashlib, dataclasses, subprocess, re, time, sys, threading, queue
 
 from rich.panel import Panel
+from rich.text  import Text
 from rich.progress import Progress, SpinnerColumn, BarColumn, TextColumn, TimeElapsedColumn, TaskProgressColumn
 
 from .case    import Case
@@ -273,14 +274,14 @@ def _show_build_error(result: subprocess.CompletedProcess, stage: str):
         stdout_text = result.stdout if isinstance(result.stdout, str) else result.stdout.decode('utf-8', errors='replace')
         stdout_text = stdout_text.strip()
         if stdout_text:
-            cons.raw.print(Panel(stdout_text, title="Output", border_style="yellow"))
+            cons.raw.print(Panel(Text(stdout_text), title="Output", border_style="yellow"))
 
     # Show stderr if available
     if result.stderr:
         stderr_text = result.stderr if isinstance(result.stderr, str) else result.stderr.decode('utf-8', errors='replace')
         stderr_text = stderr_text.strip()
         if stderr_text:
-            cons.raw.print(Panel(stderr_text, title="Errors", border_style="red"))
+            cons.raw.print(Panel(Text(stderr_text), title="Errors", border_style="red"))
 
     cons.print()