From f6291ce4b24d2043863b5ff40a4545500df77648 Mon Sep 17 00:00:00 2001 From: doudouOUC Date: Tue, 19 May 2026 08:34:34 +0800 Subject: [PATCH] fix(serve): unbreak E2E after #4271 (capabilities + clientCount) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two regressions introduced by #4271 (MCP guardrail push events) had been failing every main E2E run since the PR landed. Both fixes are in integration tests; no source changes. 1. `qwen serve — capabilities envelope > advertises all baseline capabilities`. `mcp_guardrail_events` was added to `SERVE_CAPABILITY_REGISTRY` and to the unit baseline list (`packages/cli/src/serve/server.test.ts:119`) but not to the integration test's hand-maintained list. Same drift class as #4268 / #4284. Fix: append the tag in registry order. 2. `MCP child amplification (P1 baseline) > clientCount matches external pgrep observation`. The test (added by #4271, never passed CI) asserted `pgrep_observed === MCP_SERVERS_CONFIGURED`, ignoring that an ACP child runs TWO `Config` objects — bootstrap (`runAcpAgent` → `config.initialize`) + per-session (`newSessionConfig` → `config.initialize`) — each with its own `McpClientManager`. After one session, pgrep observes 2×N grandchildren while `/workspace/mcp` snapshot (`buildWorkspaceMcpStatus(this.config)`) reads only the bootstrap manager (=N). Fix: encode the 2× architectural amplification literally so a future follow-up that unifies the managers fails this assertion and forces an explicit update; keep `clientCount === MCP_SERVERS_CONFIGURED` and the original `clientCount ≤ pgrep` over-report guard intact. Verified locally: both tests pass on first attempt (no retries) via `vitest run --root ./integration-tests cli/qwen-serve-routes.test.ts cli/qwen-serve-baseline.test.ts`. 🤖 Generated with [Qwen Code](https://github.com/QwenLM/qwen-code) --- .../cli/qwen-serve-baseline.test.ts | 87 ++++++++++--------- .../cli/qwen-serve-routes.test.ts | 1 + 2 files changed, 48 insertions(+), 40 deletions(-) diff --git a/integration-tests/cli/qwen-serve-baseline.test.ts b/integration-tests/cli/qwen-serve-baseline.test.ts index 97ca6b48d92..97f5674c151 100644 --- a/integration-tests/cli/qwen-serve-baseline.test.ts +++ b/integration-tests/cli/qwen-serve-baseline.test.ts @@ -464,27 +464,40 @@ async function measureRssAtSessionCount(sessionCount: number): Promise<{ }, 120_000); // PR 14b cross-check: validate the daemon's in-process MCP - // accounting against external `pgrep -P` measurement. The - // snapshot at `GET /workspace/mcp` exposes `clientCount` - // (live CONNECTED clients, `getMcpClientAccounting().total`) - // — that's the field SDK consumers and dashboards actually - // see, and it's the same source the push-event channel - // (`mcp_budget_warning` / `mcp_child_refused_batch`) reads. - // If `clientCount` diverges from what `pgrep -P` observes for - // the daemon's MCP grandchildren, the events lie. + // accounting on `GET /workspace/mcp` (`clientCount`, the field + // SDK consumers and dashboards see, and the same source the + // push-event channel — `mcp_budget_warning` / + // `mcp_child_refused_batch` — reads) against external `pgrep -P` + // measurement. // - // (Codex round 3 doc fix — codex/copilot finding: this test - // was named "in-process subprocessCount matches external pgrep" - // but actually asserts on `clientCount`. The snapshot's - // `clientCount` and the manager-internal `subprocessCount` - // (`stdio + websocket`) match for stdio-only fixtures, so the - // assertion is numerically correct — but the test name now - // matches the field it actually validates.) + // Architectural note (PR 22a): a `qwen serve` ACP child runs + // two `Config` objects, each carrying its own + // `McpClientManager`. The bootstrap Config (`runAcpAgent` → + // `config.initialize`) discovers MCP servers when the child + // starts, and `/workspace/mcp` reads its manager via + // `buildWorkspaceMcpStatus(this.config)` (`acpAgent.ts:1399`). + // The per-session Config (`newSessionConfig` → + // `config.initialize`) spawns a SECOND set of MCP children for + // the SAME servers — its accounting is NOT what the + // workspace-level snapshot reflects. So pgrep observes + // `(1 + sessionCount) * MCP_SERVERS_CONFIGURED` grandchildren + // while `clientCount` stays at `MCP_SERVERS_CONFIGURED`. + // + // What this test validates: + // 1. `clientCount` is exactly the configured server count + // (bootstrap manager accounting is honest). + // 2. pgrep observes the architectural 2×N grandchildren after + // one session is created — encoded literally so a future + // refactor that unifies bootstrap + session managers (#4175 + // follow-up to drop the duplicate discovery) fails this + // assertion and forces a deliberate test update. + // 3. `clientCount` NEVER exceeds the observed pgrep count — + // the original "snapshot must never over-report" guard. // // Skip-gated like the parent describe (POSIX, non-sandbox); - // idle MCP fixtures are stdio-only so `clientCount` should - // equal `mcpGrandchildren.length` exactly (no amplification - // slack required). + // idle MCP fixtures are stdio-only so the relationship between + // `clientCount` and pgrep is exact (no amplification slack + // required at idle). it('clientCount matches external pgrep observation', async () => { const ws = makeTempWorkspace('mcp-counter'); let daemon: SpawnedDaemon | undefined; @@ -498,34 +511,28 @@ async function measureRssAtSessionCount(sessionCount: number): Promise<{ daemon = await spawnDaemon({ workspaceCwd: ws }); await daemon.client.createOrAttachSession({ workspaceCwd: ws }); - // Wait for MCP grandchildren to be observable via pgrep, - // then read both numbers atomically (pgrep first to lock - // the comparison floor, snapshot second so the daemon - // can't sneak in a new connect between the two reads). + // Wait until the OS sees the FULL post-session set + // (`MCP_SERVERS_CONFIGURED * 2` grandchildren — see the + // architectural note above), then read the snapshot. + // pgrep first to lock the comparison floor; snapshot + // second so the daemon can't sneak in a new connect + // between the two reads. + const expectedGrandchildren = MCP_SERVERS_CONFIGURED * 2; const observed = await waitForMcpGrandchildren( daemon.daemon.pid!, - MCP_SERVERS_CONFIGURED, + expectedGrandchildren, ); const snapshot = await daemon.client.workspaceMcp(); - // PR 14b invariant: stdio-only fixtures → - // `clientCount === mcpGrandchildren.length`. The PR 14 - // amplification slack - // (`MCP_SERVERS_CONFIGURED * mcpAmplificationFactor`) is - // for connect-storm transient overhead, not steady-state - // counter drift. At idle the daemon's accounting MUST - // match `pgrep -P` exactly (no zombies, no orphans). - // - // `clientCount` is the snapshot's authoritative live - // count; validating it against pgrep closes the loop on - // PR 14b's event-source assumption (the push events read - // the same accounting). + // (1) Bootstrap manager accounting is honest. expect(snapshot.clientCount).toBe(MCP_SERVERS_CONFIGURED); - expect(observed.mcpGrandchildren.length).toBe(MCP_SERVERS_CONFIGURED); - // Defense-in-depth: even if a future race lets the OS - // observe a process the daemon already considers - // disconnected, `clientCount` must NEVER exceed the - // observed pgrep count. Equality at idle, `<=` always. + // (2) pgrep observes both managers' children. If a future + // refactor unifies them, change this to + // `MCP_SERVERS_CONFIGURED` (and update the architectural + // note above). + expect(observed.mcpGrandchildren.length).toBe(expectedGrandchildren); + // (3) Snapshot never over-reports OS reality. Holds under + // both the current 2× regime and the unified 1× future. expect(snapshot.clientCount).toBeLessThanOrEqual( observed.mcpGrandchildren.length, ); diff --git a/integration-tests/cli/qwen-serve-routes.test.ts b/integration-tests/cli/qwen-serve-routes.test.ts index 6ae6272ddf8..2878b96b194 100644 --- a/integration-tests/cli/qwen-serve-routes.test.ts +++ b/integration-tests/cli/qwen-serve-routes.test.ts @@ -217,6 +217,7 @@ describe('qwen serve — capabilities envelope', () => { 'session_close', 'session_metadata', 'mcp_guardrails', + 'mcp_guardrail_events', 'workspace_file_read', 'workspace_file_bytes', 'workspace_file_write',