From f108f1e745286da797743153c2deef57204ae8f9 Mon Sep 17 00:00:00 2001 From: Vance Ingalls Date: Tue, 12 May 2026 21:12:39 +0000 Subject: [PATCH] perf(engine): scale worker-count caps with cpu count Follow-up to hf#732. The hybrid layered/shader-blend path saturates well past 6 DOM workers on multi-core hosts, but the engine's `calculateOptimalWorkers` clamped both the explicit `--workers N` request and the `auto` default to a hardcoded ceiling of 10 / 6 respectively. On the 96-core validation host, w=8, w=12, w=16 all collapsed to 6-10 internally, masking whether the wall is algorithmically bound or just cpu-starved by the cap. Two changes: * `ABSOLUTE_MAX_WORKERS`: 10 -> 24. Explicit `--workers 16` now surfaces 16 DOM sessions instead of being silently truncated to 10. The new ceiling is still finite because CDP-protocol dispatch serializes through Node's main event loop; past ~24 we expect noise to dominate signal. * `DEFAULT_SAFE_MAX_WORKERS` (a constant) -> `defaultSafeMaxWorkers()` returning `Math.max(6, Math.min(16, Math.floor(cpuCount / 8)))`. On 8/16/32-core: 6 (unchanged). On 64-core: 8. On 96-core: 12. On 128-core: 16. The /8 divisor leaves headroom for each Chrome worker's SwiftShader compositor + the in-process shader-blend `worker_threads` pool, both CPU-heavy. No behavior change for typical hosts (<=32 cores). Unlocks high-core hosts to consume more parallelism, which is a prerequisite for the hybrid shader-transition path stacked on top of this PR. Tests: existing 7 parallelCoordinator tests pass unchanged. PR 1 of 5 in the hf#732 decomposition stack. -- Vai Co-Authored-By: Vai --- .../src/services/parallelCoordinator.ts | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/packages/engine/src/services/parallelCoordinator.ts b/packages/engine/src/services/parallelCoordinator.ts index bbee9a15e..4b21019bd 100644 --- a/packages/engine/src/services/parallelCoordinator.ts +++ b/packages/engine/src/services/parallelCoordinator.ts @@ -64,8 +64,23 @@ export interface WorkerSizingConfig extends Partial< const MEMORY_PER_WORKER_MB = 256; const MIN_WORKERS = 1; -const ABSOLUTE_MAX_WORKERS = 10; -const DEFAULT_SAFE_MAX_WORKERS = 6; +// Hard ceiling on explicit `--workers N` requests. Above this, the cost of +// CDP-protocol dispatch through Node's main event loop and OS scheduling +// noise overwhelms any further parallelism. Bumped from 10 → 24 in hf#732 +// follow-up so high-core hosts (32-96+ cores) can actually surface the +// hardware to renders that are CPU-bound on DOM capture. +const ABSOLUTE_MAX_WORKERS = 24; +// `auto` concurrency picks this many workers as the upper bound. Bumped +// from a hardcoded 6 → CPU-scaled value (floor(cpuCount/8), floor at 6, +// ceiling at 16) in hf#732 follow-up. Rationale: the prior fixed cap of 6 +// left ~90 cores idle on the validation host and forced users to pass +// `--workers N` to opt in. Now `auto` matches what a thoughtful operator +// would pick by hand. The /8 divisor leaves headroom for each Chrome +// worker's SwiftShader compositor + the shader-blend thread pool, both of +// which are themselves CPU-heavy. +function defaultSafeMaxWorkers(): number { + return Math.max(6, Math.min(16, Math.floor(cpus().length / 8))); +} const MIN_FRAMES_PER_WORKER = 30; export function calculateOptimalWorkers( @@ -79,7 +94,7 @@ export function calculateOptimalWorkers( if (concurrency !== "auto") { return Math.max(MIN_WORKERS, Math.min(ABSOLUTE_MAX_WORKERS, Math.floor(concurrency))); } - return DEFAULT_SAFE_MAX_WORKERS; + return defaultSafeMaxWorkers(); })(); const effectiveCoresPerWorker = config?.coresPerWorker ?? DEFAULT_CONFIG.coresPerWorker; const effectiveMinParallelFrames = config?.minParallelFrames ?? DEFAULT_CONFIG.minParallelFrames;