From c84f712d7addcf5145539e7a89abbacf6d62cb45 Mon Sep 17 00:00:00 2001 From: wenshao Date: Tue, 28 Apr 2026 13:59:27 +0800 Subject: [PATCH 1/2] feat(core): wire background shells into the task_stop tool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase B follow-up #1 from #3634, unblocked by #3471 (control plane) merging in. The model can now cancel a managed background shell with the same `task_stop` tool it uses for subagents — no more falling back to `kill ` via BashTool. Lookup order: subagent registry first (existing behavior), then the background shell registry as a fallback. Agent IDs follow `-` and shell IDs follow `bg_<8 hex chars>`, so the two namespaces cannot collide in practice; the order is fixed for determinism (a defensive test pins agent-wins-over-shell). The shell cancel path resolves through the entry's own AbortController (which `BackgroundShellRegistry.cancel` triggers); the child process exit handler then settles the registry to `cancelled` and the on-disk output file is preserved for inspection via `/tasks` or a direct `Read`. This matches Phase B's "registry's own AbortController is the cancellation source of truth" design without needing the in-flight notification framework that subagents use. Tests: 7 task-stop tests (was 4) — added cancel-shell happy path, NOT_RUNNING for already-exited shell, and a defensive agent-takes-precedence-on-id-collision case. --- packages/core/src/tools/task-stop.test.ts | 86 +++++++++++++++++++++++ packages/core/src/tools/task-stop.ts | 83 +++++++++++++++------- 2 files changed, 144 insertions(+), 25 deletions(-) diff --git a/packages/core/src/tools/task-stop.test.ts b/packages/core/src/tools/task-stop.test.ts index d5df835a3ea..c202af47d2a 100644 --- a/packages/core/src/tools/task-stop.test.ts +++ b/packages/core/src/tools/task-stop.test.ts @@ -7,18 +7,22 @@ import { describe, it, expect, beforeEach } from 'vitest'; import { TaskStopTool } from './task-stop.js'; import { BackgroundTaskRegistry } from '../agents/background-tasks.js'; +import { BackgroundShellRegistry } from '../services/backgroundShellRegistry.js'; import type { Config } from '../config/config.js'; import { ToolErrorType } from './tool-error.js'; describe('TaskStopTool', () => { let registry: BackgroundTaskRegistry; + let shellRegistry: BackgroundShellRegistry; let config: Config; let tool: TaskStopTool; beforeEach(() => { registry = new BackgroundTaskRegistry(); + shellRegistry = new BackgroundShellRegistry(); config = { getBackgroundTaskRegistry: () => registry, + getBackgroundShellRegistry: () => shellRegistry, } as unknown as Config; tool = new TaskStopTool(config); }); @@ -91,4 +95,86 @@ describe('TaskStopTool', () => { expect(result.llmContent).toContain('Search for auth code'); expect(result.returnDisplay).toContain('Search for auth code'); }); + + describe('background shell support', () => { + it('cancels a running background shell', async () => { + const ac = new AbortController(); + shellRegistry.register({ + shellId: 'bg_a1b2c3d4', + command: 'npm run dev', + cwd: '/work', + status: 'running', + startTime: Date.now(), + outputPath: '/tmp/bg-out/shell-bg_a1b2c3d4.output', + abortController: ac, + }); + + const result = await tool.validateBuildAndExecute( + { task_id: 'bg_a1b2c3d4' }, + new AbortController().signal, + ); + + expect(result.error).toBeUndefined(); + expect(result.llmContent).toContain('background shell "bg_a1b2c3d4"'); + expect(result.llmContent).toContain('npm run dev'); + expect(result.llmContent).toContain('/tmp/bg-out/shell-bg_a1b2c3d4.output'); + expect(shellRegistry.get('bg_a1b2c3d4')!.status).toBe('cancelled'); + expect(ac.signal.aborted).toBe(true); + }); + + it('returns NOT_RUNNING when the shell already exited', async () => { + shellRegistry.register({ + shellId: 'bg_done', + command: 'true', + cwd: '/work', + status: 'running', + startTime: Date.now() - 1000, + outputPath: '/tmp/bg-out/shell-bg_done.output', + abortController: new AbortController(), + }); + shellRegistry.complete('bg_done', 0, Date.now()); + + const result = await tool.validateBuildAndExecute( + { task_id: 'bg_done' }, + new AbortController().signal, + ); + + expect(result.error?.type).toBe(ToolErrorType.TASK_STOP_NOT_RUNNING); + expect(result.llmContent).toContain('Background shell "bg_done"'); + expect(result.llmContent).toContain('completed'); + }); + + it('prefers an agent over a shell when both have the same id (defensive)', async () => { + // IDs cannot collide in practice (different naming schemes), but the + // tool's lookup order should still be deterministic if they ever do. + const agentAc = new AbortController(); + const shellAc = new AbortController(); + registry.register({ + agentId: 'shared-id', + description: 'agent', + status: 'running', + startTime: Date.now(), + abortController: agentAc, + }); + shellRegistry.register({ + shellId: 'shared-id', + command: 'shell-cmd', + cwd: '/work', + status: 'running', + startTime: Date.now(), + outputPath: '/tmp/x.out', + abortController: shellAc, + }); + + const result = await tool.validateBuildAndExecute( + { task_id: 'shared-id' }, + new AbortController().signal, + ); + + expect(result.llmContent).toContain('background agent'); + expect(agentAc.signal.aborted).toBe(true); + expect(shellAc.signal.aborted).toBe(false); + expect(shellRegistry.get('shared-id')!.status).toBe('running'); + }); + }); }); diff --git a/packages/core/src/tools/task-stop.ts b/packages/core/src/tools/task-stop.ts index 655b704508b..ff06a0cc068 100644 --- a/packages/core/src/tools/task-stop.ts +++ b/packages/core/src/tools/task-stop.ts @@ -40,45 +40,78 @@ class TaskStopInvocation extends BaseToolInvocation< } async execute(_signal: AbortSignal): Promise { - const registry = this.config.getBackgroundTaskRegistry(); - const entry = registry.get(this.params.task_id); + const taskId = this.params.task_id; - if (!entry) { + // Subagent registry first (Phase A control plane). Agent IDs follow the + // pattern `-`, so they cannot collide with shell + // IDs (which are `bg_<8 hex chars>` from the background shell pool). + const agentRegistry = this.config.getBackgroundTaskRegistry(); + const agentEntry = agentRegistry.get(taskId); + if (agentEntry) { + if (agentEntry.status !== 'running') { + return notRunningError('agent', taskId, agentEntry.status); + } + agentRegistry.cancel(taskId); + // The terminal task-notification is emitted by the agent's own handler + // (via registry.complete/fail) rather than cancel(), so the parent + // model still receives the agent's real partial/final result — not just + // a bare "cancelled" message — once the reasoning loop unwinds. + const desc = agentEntry.description; return { - llmContent: `Error: No background task found with ID "${this.params.task_id}".`, - returnDisplay: 'Task not found.', - error: { - message: `Task not found: ${this.params.task_id}`, - type: ToolErrorType.TASK_STOP_NOT_FOUND, - }, + llmContent: + `Cancellation requested for background agent "${taskId}". ` + + `A final task-notification carrying the agent's last result will ` + + `follow.\nDescription: ${desc}`, + returnDisplay: `Cancelled: ${desc}`, }; } - if (entry.status !== 'running') { + // Background shell registry (Phase B). Settles asynchronously when the + // child process exits in response to the AbortController; the registry + // entry's terminal state (`cancelled`) and final exit code/output stay + // observable via `/tasks` and the on-disk output file. + const shellRegistry = this.config.getBackgroundShellRegistry(); + const shellEntry = shellRegistry.get(taskId); + if (shellEntry) { + if (shellEntry.status !== 'running') { + return notRunningError('shell', taskId, shellEntry.status); + } + shellRegistry.cancel(taskId, Date.now()); return { - llmContent: `Error: Background task "${this.params.task_id}" is not running (status: ${entry.status}).`, - returnDisplay: `Task not running (${entry.status}).`, - error: { - message: `Task is ${entry.status}: ${this.params.task_id}`, - type: ToolErrorType.TASK_STOP_NOT_RUNNING, - }, + llmContent: + `Cancellation requested for background shell "${taskId}". ` + + `Final status will be visible via /tasks; captured output remains ` + + `at ${shellEntry.outputPath}.\nCommand: ${shellEntry.command}`, + returnDisplay: `Cancelled shell: ${shellEntry.command}`, }; } - registry.cancel(this.params.task_id); - - // The terminal task-notification is emitted by the task's own handler - // (via registry.complete/fail) rather than cancel(), so the parent model - // still receives the task's real partial/final result — not just a bare - // "cancelled" message — once the reasoning loop unwinds. - const desc = entry.description; return { - llmContent: `Cancellation requested for background task "${this.params.task_id}". A final task-notification carrying the task's last result will follow.\nDescription: ${desc}`, - returnDisplay: `Cancelled: ${desc}`, + llmContent: `Error: No background task found with ID "${taskId}".`, + returnDisplay: 'Task not found.', + error: { + message: `Task not found: ${taskId}`, + type: ToolErrorType.TASK_STOP_NOT_FOUND, + }, }; } } +function notRunningError( + kind: 'agent' | 'shell', + taskId: string, + status: string, +): ToolResult { + return { + llmContent: `Error: Background ${kind} "${taskId}" is not running (status: ${status}).`, + returnDisplay: `Task not running (${status}).`, + error: { + message: `${kind} is ${status}: ${taskId}`, + type: ToolErrorType.TASK_STOP_NOT_RUNNING, + }, + }; +} + export class TaskStopTool extends BaseDeclarativeTool< TaskStopParams, ToolResult From 7a69ea11e03395a0b363bc8b9b8633aec2e8a326 Mon Sep 17 00:00:00 2001 From: wenshao Date: Wed, 29 Apr 2026 06:35:07 +0800 Subject: [PATCH 2/2] fix(core): defer shell terminal transition until spawn handler settles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit @doudouOUC noticed that the previous task_stop path called `BackgroundShellRegistry.cancel(id, Date.now())`, which marked the entry `cancelled` immediately. The spawn handler's settle path only records real exit info via cancel/complete/fail when the entry is still `running`, so the cancel-vs-exit race could permanently hide a real completed/failed result and `/tasks` would show a terminal endTime while the process was still draining. Add a `requestCancel(id)` method to `BackgroundShellRegistry` that triggers the entry's AbortController only; status stays `running` until the settle path observes the abort and records the real terminal state. The immediate-mark `cancel(id, endTime)` is reserved for `abortAll()` / shutdown, where the CLI process is tearing down anyway and there is no settle handler to wait for. Tests updated: - `task-stop.test.ts` cancel-shell happy path now asserts the entry stays `running` with `endTime` undefined post-stop, and the abort signal fires (the settle path's contract, not task_stop's, is the one that flips status). - 3 new `requestCancel` tests in `backgroundShellRegistry.test.ts`: running → abort+still-running, terminal entry no-op, unknown id no-op. --- .../services/backgroundShellRegistry.test.ts | 32 +++++++++++++++++++ .../src/services/backgroundShellRegistry.ts | 23 +++++++++++++ packages/core/src/tools/task-stop.test.ts | 7 +++- packages/core/src/tools/task-stop.ts | 11 +++++-- 4 files changed, 69 insertions(+), 4 deletions(-) diff --git a/packages/core/src/services/backgroundShellRegistry.test.ts b/packages/core/src/services/backgroundShellRegistry.test.ts index 193492d27f4..e6c7b85c2e6 100644 --- a/packages/core/src/services/backgroundShellRegistry.test.ts +++ b/packages/core/src/services/backgroundShellRegistry.test.ts @@ -101,6 +101,38 @@ describe('BackgroundShellRegistry', () => { }); }); + describe('requestCancel', () => { + it('aborts the signal but leaves status running and endTime undefined', () => { + const reg = new BackgroundShellRegistry(); + const ac = new AbortController(); + reg.register(makeEntry({ shellId: 'a', abortController: ac })); + + reg.requestCancel('a'); + + const e = reg.get('a')!; + expect(e.status).toBe('running'); + expect(e.endTime).toBeUndefined(); + expect(ac.signal.aborted).toBe(true); + }); + + it('is a no-op on a terminal entry', () => { + const reg = new BackgroundShellRegistry(); + const ac = new AbortController(); + reg.register(makeEntry({ shellId: 'a', abortController: ac })); + reg.complete('a', 0, 1500); + + reg.requestCancel('a'); + + expect(reg.get('a')!.status).toBe('completed'); + expect(ac.signal.aborted).toBe(false); + }); + + it('is a no-op for unknown id', () => { + const reg = new BackgroundShellRegistry(); + expect(() => reg.requestCancel('missing')).not.toThrow(); + }); + }); + describe('abortAll', () => { it('cancels every running entry and leaves terminal entries alone', () => { const reg = new BackgroundShellRegistry(); diff --git a/packages/core/src/services/backgroundShellRegistry.ts b/packages/core/src/services/backgroundShellRegistry.ts index b8b2e5951e5..1b74c7be107 100644 --- a/packages/core/src/services/backgroundShellRegistry.ts +++ b/packages/core/src/services/backgroundShellRegistry.ts @@ -92,6 +92,29 @@ export class BackgroundShellRegistry { entry.abortController.abort(); } + /** + * Request cancellation without marking the entry terminal. + * + * Triggers the entry's AbortController so the spawn handler can tear the + * process down, but leaves `status='running'` until the settle path + * observes the abort and records the real exit moment + outcome via + * `complete()` / `fail()` / `cancel()`. This keeps the registry honest: + * a cancelled shell only shows its terminal `endTime` once the process + * has actually drained, and a cancel-vs-exit race can't permanently hide + * a real completed/failed result. + * + * Used by the `task_stop` tool path; the immediate-mark `cancel()` above + * is reserved for `abortAll()` / shutdown, where the CLI process is + * tearing down anyway and there is no settle handler to wait for. + * + * Idempotent: no-op on entries that aren't `running`. + */ + requestCancel(shellId: string): void { + const entry = this.entries.get(shellId); + if (!entry || entry.status !== 'running') return; + entry.abortController.abort(); + } + /** * Cancel every still-running entry. Called on session/Config shutdown so * background shells don't outlive the CLI process and leak orphaned diff --git a/packages/core/src/tools/task-stop.test.ts b/packages/core/src/tools/task-stop.test.ts index c202af47d2a..acd0ca14052 100644 --- a/packages/core/src/tools/task-stop.test.ts +++ b/packages/core/src/tools/task-stop.test.ts @@ -118,7 +118,12 @@ describe('TaskStopTool', () => { expect(result.llmContent).toContain('background shell "bg_a1b2c3d4"'); expect(result.llmContent).toContain('npm run dev'); expect(result.llmContent).toContain('/tmp/bg-out/shell-bg_a1b2c3d4.output'); - expect(shellRegistry.get('bg_a1b2c3d4')!.status).toBe('cancelled'); + // task_stop only requests cancellation — the entry stays `running` + // until the spawn handler observes the abort and settles the entry + // with the real exit moment. Without this guarantee, /tasks would + // report a terminal-but-still-draining shell. + expect(shellRegistry.get('bg_a1b2c3d4')!.status).toBe('running'); + expect(shellRegistry.get('bg_a1b2c3d4')!.endTime).toBeUndefined(); expect(ac.signal.aborted).toBe(true); }); diff --git a/packages/core/src/tools/task-stop.ts b/packages/core/src/tools/task-stop.ts index ff06a0cc068..c67ed2153a8 100644 --- a/packages/core/src/tools/task-stop.ts +++ b/packages/core/src/tools/task-stop.ts @@ -76,12 +76,17 @@ class TaskStopInvocation extends BaseToolInvocation< if (shellEntry.status !== 'running') { return notRunningError('shell', taskId, shellEntry.status); } - shellRegistry.cancel(taskId, Date.now()); + // requestCancel triggers the AbortController only — the registry's + // settle path records the real terminal status + endTime once the + // process actually drains. Calling cancel(id, Date.now()) here would + // mark the entry terminal immediately and lose the real exit info. + shellRegistry.requestCancel(taskId); return { llmContent: `Cancellation requested for background shell "${taskId}". ` + - `Final status will be visible via /tasks; captured output remains ` + - `at ${shellEntry.outputPath}.\nCommand: ${shellEntry.command}`, + `Final status will be visible via /tasks once the process drains; ` + + `captured output remains at ${shellEntry.outputPath}.\n` + + `Command: ${shellEntry.command}`, returnDisplay: `Cancelled shell: ${shellEntry.command}`, }; }