From cdfef5190e5b5be24b23a36def36b7ea3122fff3 Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Tue, 9 Jun 2026 10:48:34 +0800 Subject: [PATCH] feat(core): layered tool-output truncation, per-message budget, per-tool limits Bound tool output before it enters conversation history, mirroring Claude Code's three-layer model: - Single-result: a per-tool/global char threshold spills oversized output to a temp file with a recoverable preview + read_file pointer; media parts are kept, with a token-aware fallback, 0600 owner-only perms on the spilled file, and a sentinel re-entrancy guard. - Per-message: when one batch of tool results exceeds toolOutputBatchBudget (default 200k chars), the largest results are offloaded to disk; a warning is logged if it still can't get under budget. - Per-tool: DeclarativeTool.maxOutputChars/truncateKeep getters (shell 30k, grep 20k, read-file self-managed, mcp 500k, agent 32k/tail). Truncation runs in CoreToolScheduler, deferring PostToolUse/skill metadata injection until after truncation so reminders are never bisected. Per-tool budgets are char-only so the global line cap can't undercut a self-managed tool (read-file) or a declared char budget (grep). The re-entrancy guard matches the truncation prefix at the start of the output so an injected copy of the phrase can't bypass the budget. Adds the toolOutputBatchBudget setting (core + CLI schema) and supporting tests. --- .../acp-integration/acpAgent.worktree.test.ts | 1 + packages/cli/src/config/config.ts | 1 + packages/cli/src/config/settingsSchema.ts | 11 + packages/core/src/config/config.ts | 18 + .../core/src/core/coreToolScheduler.test.ts | 538 ++++++++++++++++++ packages/core/src/core/coreToolScheduler.ts | 250 +++++++- packages/core/src/test-utils/mock-tool.ts | 16 + packages/core/src/tools/agent/agent.ts | 8 + packages/core/src/tools/grep.ts | 4 + packages/core/src/tools/mcp-tool.test.ts | 6 +- packages/core/src/tools/mcp-tool.ts | 14 + packages/core/src/tools/read-file.ts | 8 + packages/core/src/tools/ripGrep.ts | 4 + packages/core/src/tools/shell.test.ts | 36 ++ packages/core/src/tools/shell.ts | 13 + packages/core/src/tools/tools.ts | 21 + packages/core/src/utils/truncation.test.ts | 334 ++++++++++- packages/core/src/utils/truncation.ts | 220 ++++++- .../schemas/settings.schema.json | 5 + 19 files changed, 1465 insertions(+), 43 deletions(-) diff --git a/packages/cli/src/acp-integration/acpAgent.worktree.test.ts b/packages/cli/src/acp-integration/acpAgent.worktree.test.ts index 0511a5c6f50..3e241c57104 100644 --- a/packages/cli/src/acp-integration/acpAgent.worktree.test.ts +++ b/packages/cli/src/acp-integration/acpAgent.worktree.test.ts @@ -118,6 +118,7 @@ vi.mock('@qwen-code/qwen-code-core', () => ({ })), SessionService: vi.fn(), SESSION_TITLE_MAX_LENGTH: 200, + DEFAULT_TOOL_OUTPUT_BATCH_BUDGET: 200_000, tokenLimit: vi.fn(), SessionStartSource: { Startup: 'startup', Resume: 'resume' }, SessionEndReason: { PromptInputExit: 'prompt_input_exit', Other: 'other' }, diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index cdeff7c9c0d..101c07f96fd 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -1909,6 +1909,7 @@ export async function loadCliConfig( skipStartupContext: settings.model?.skipStartupContext ?? false, truncateToolOutputThreshold: settings.tools?.truncateToolOutputThreshold, truncateToolOutputLines: settings.tools?.truncateToolOutputLines, + toolOutputBatchBudget: settings.tools?.toolOutputBatchBudget, eventEmitter: appEvents, gitCoAuthor: settings.general?.gitCoAuthor, output: { diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index eb51ad19c9c..169029986e4 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -16,6 +16,7 @@ import type { import { ApprovalMode, DEFAULT_STOP_HOOK_BLOCK_CAP, + DEFAULT_TOOL_OUTPUT_BATCH_BUDGET, DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES, DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD, } from '@qwen-code/qwen-code-core'; @@ -1962,6 +1963,16 @@ const SETTINGS_SCHEMA = { description: 'The number of lines to keep when truncating tool output.', showInDialog: false, }, + toolOutputBatchBudget: { + type: 'number', + label: 'Tool Output Batch Budget', + category: 'General', + requiresRestart: true, + default: DEFAULT_TOOL_OUTPUT_BATCH_BUDGET, + description: + 'Per-message budget (characters) for the combined output of one batch of tool calls; the largest results are offloaded to disk when exceeded. Set to -1 to disable.', + showInDialog: false, + }, computerUse: { type: 'object', label: 'Computer Use', diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 0b507054d3a..463296133e4 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -536,6 +536,12 @@ export interface ExtensionInstallMetadata { export const DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD = 25_000; export const DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES = 1000; +/** + * Per-message budget (chars) for the combined model-facing output of one + * batch of tool calls. When a batch's total output exceeds this, the largest + * results are offloaded to disk (with a recoverable pointer). `<= 0` disables. + */ +export const DEFAULT_TOOL_OUTPUT_BATCH_BUDGET = 200_000; export class MCPServerConfig { constructor( @@ -795,6 +801,7 @@ export interface ConfigParameters { skipLoopDetection?: boolean; truncateToolOutputThreshold?: number; truncateToolOutputLines?: number; + toolOutputBatchBudget?: number; eventEmitter?: EventEmitter; output?: OutputSettings; inputFormat?: InputFormat; @@ -1182,6 +1189,7 @@ export class Config { private readonly fileExclusions: FileExclusions; private readonly truncateToolOutputThreshold: number; private readonly truncateToolOutputLines: number; + private readonly toolOutputBatchBudget: number; private readonly eventEmitter?: EventEmitter; private readonly channel: string | undefined; private readonly jsonFd: number | undefined; @@ -1376,6 +1384,8 @@ export class Config { DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD; this.truncateToolOutputLines = params.truncateToolOutputLines ?? DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES; + this.toolOutputBatchBudget = + params.toolOutputBatchBudget ?? DEFAULT_TOOL_OUTPUT_BATCH_BUDGET; this.channel = params.channel; this.jsonFd = params.jsonFd; this.jsonFile = params.jsonFile; @@ -3806,6 +3816,14 @@ export class Config { return this.truncateToolOutputLines; } + getToolOutputBatchBudget(): number { + if (this.toolOutputBatchBudget <= 0) { + return Number.POSITIVE_INFINITY; + } + + return this.toolOutputBatchBudget; + } + getOutputFormat(): OutputFormat { return this.outputFormat; } diff --git a/packages/core/src/core/coreToolScheduler.test.ts b/packages/core/src/core/coreToolScheduler.test.ts index 7d8812119a4..ff72ec9f85b 100644 --- a/packages/core/src/core/coreToolScheduler.test.ts +++ b/packages/core/src/core/coreToolScheduler.test.ts @@ -270,6 +270,7 @@ vi.mock('../telemetry/session-tracing.js', () => ({ vi.mock('fs/promises', () => ({ writeFile: vi.fn(), + mkdir: vi.fn(), })); vi.mock('../ide/ide-client.js', () => ({ @@ -680,6 +681,7 @@ describe('CoreToolScheduler', () => { onAllToolCallsComplete?: ReturnType; onToolCallsUpdate?: ReturnType; memoryMonitor?: { scheduleCheck: () => void }; + toolOutputBatchBudget?: number; }) { const ensureTool = vi.fn( async (name: string) => @@ -726,6 +728,8 @@ describe('CoreToolScheduler', () => { getTruncateToolOutputThreshold: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD, getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES, + getToolOutputBatchBudget: () => + options.toolOutputBatchBudget ?? Number.POSITIVE_INFINITY, getToolRegistry: () => mockToolRegistry, getCwd: () => '/repo', getUseModelRouter: () => false, @@ -815,6 +819,540 @@ describe('CoreToolScheduler', () => { ); }); + function outputOfFirstCall( + onAllToolCallsComplete: ReturnType, + ): string { + const completionCalls = onAllToolCallsComplete.mock + .calls as unknown as Array<[ToolCall[]]>; + const call = completionCalls[0]?.[0]?.[0]; + return call && 'response' in call + ? ((call.response.responseParts[0]?.functionResponse?.response?.[ + 'output' + ] as string) ?? '') + : ''; + } + + it('truncates oversized model-facing string output before recording results', async () => { + const execute = vi.fn().mockResolvedValue({ + llmContent: 'a'.repeat(200_000), + returnDisplay: 'big output', + }); + const toolsByName = new Map([ + ['bigTool', new MockTool({ name: 'bigTool', execute })], + ]); + const { scheduler, onAllToolCallsComplete } = + createSchedulerForLegacyToolTests({ toolsByName }); + + await scheduler.schedule( + [ + { + callId: 'c-big', + name: 'bigTool', + args: {}, + isClientInitiated: false, + prompt_id: 'p-big', + }, + ], + new AbortController().signal, + ); + + const output = outputOfFirstCall(onAllToolCallsComplete); + expect(output).toContain( + 'Tool output was too large and has been truncated', + ); + expect(output.length).toBeLessThan(200_000); + }); + + it('leaves small model-facing output untouched', async () => { + const execute = vi.fn().mockResolvedValue({ + llmContent: 'small output', + returnDisplay: 'small', + }); + const toolsByName = new Map([ + ['smallTool', new MockTool({ name: 'smallTool', execute })], + ]); + const { scheduler, onAllToolCallsComplete } = + createSchedulerForLegacyToolTests({ toolsByName }); + + await scheduler.schedule( + [ + { + callId: 'c-small', + name: 'smallTool', + args: {}, + isClientInitiated: false, + prompt_id: 'p-small', + }, + ], + new AbortController().signal, + ); + + expect(outputOfFirstCall(onAllToolCallsComplete)).toBe('small output'); + }); + + it('applies the per-tool budget for a tool invoked via a legacy alias', async () => { + // Regression (C1): limitsTool read getTool(request.name) with the raw alias + // ('task'), which the registry stores only under the canonical name + // ('agent') — so the per-tool maxOutputChars was silently dropped and the + // global default applied. schedule() already resolved scheduledCall.tool + // canonically, so the budget must come from there. + const execute = vi.fn().mockResolvedValue({ + llmContent: 'a'.repeat(8000), // > 5k per-tool budget, < 25k global default + returnDisplay: 'big', + }); + const toolsByName = new Map([ + [ + ToolNames.AGENT, + new MockTool({ name: ToolNames.AGENT, execute, maxOutputChars: 5000 }), + ], + ]); + const { scheduler, onAllToolCallsComplete } = + createSchedulerForLegacyToolTests({ toolsByName }); + + await scheduler.schedule( + [ + { + callId: 'c-alias', + name: 'task', // legacy alias → AGENT + args: {}, + isClientInitiated: false, + prompt_id: 'p-alias', + }, + ], + new AbortController().signal, + ); + + // Per-tool 5k budget applied via scheduledCall.tool. Pre-fix: getTool('task') + // is undefined → global 25k → the 8k output would pass untruncated. + expect(outputOfFirstCall(onAllToolCallsComplete)).toContain( + 'Tool output was too large and has been truncated', + ); + }); + + it('keeps PostToolUse additionalContext intact after truncating oversized output', async () => { + const execute = vi.fn().mockResolvedValue({ + llmContent: 'a'.repeat(200_000), + returnDisplay: 'big output', + }); + const toolsByName = new Map([ + ['bigHookTool', new MockTool({ name: 'bigHookTool', execute })], + ]); + const messageBus = { + request: vi + .fn() + .mockImplementation(async (request: { eventName: string }) => { + if (request.eventName === 'PostToolUse') { + return { + type: MessageBusType.HOOK_EXECUTION_RESPONSE, + correlationId: 'PostToolUse-hook', + success: true, + output: { + hookSpecificOutput: { + additionalContext: 'POSTHOOK_CONTEXT_MARKER', + }, + }, + }; + } + return { + type: MessageBusType.HOOK_EXECUTION_RESPONSE, + correlationId: `${request.eventName}-hook`, + success: true, + output: { decision: 'allow' }, + }; + }), + }; + const { scheduler, onAllToolCallsComplete } = + createSchedulerForLegacyToolTests({ + toolsByName, + approvalMode: ApprovalMode.DEFAULT, + messageBus, + disableHooks: false, + }); + + await scheduler.schedule( + [ + { + callId: 'c-bh', + name: 'bigHookTool', + args: {}, + isClientInitiated: false, + prompt_id: 'p-bh', + }, + ], + new AbortController().signal, + ); + + const output = outputOfFirstCall(onAllToolCallsComplete); + // The body was truncated... + expect(output).toContain( + 'Tool output was too large and has been truncated', + ); + // ...yet the hook's additionalContext survived intact: it is appended + // AFTER truncation, so the head/tail truncator never bisects it. + expect(output).toContain('POSTHOOK_CONTEXT_MARKER'); + }); + + it('appends PostToolUse additionalContext AFTER truncation so a head-keep tool cannot drop it', async () => { + // Discriminating reorder guard: with keep='head' the metadata marker lands + // at the tail. Only truncate-THEN-append preserves it — the reverted + // append-then-truncate order drops the tail marker because the head + // truncator keeps the head of the oversized body and discards the rest. + const execute = vi.fn().mockResolvedValue({ + llmContent: 'a'.repeat(200_000), + returnDisplay: 'big output', + }); + const toolsByName = new Map([ + [ + 'headHookTool', + new MockTool({ + name: 'headHookTool', + execute, + maxOutputChars: 30_000, + truncateKeep: 'head', + }), + ], + ]); + const messageBus = { + request: vi + .fn() + .mockImplementation(async (request: { eventName: string }) => { + if (request.eventName === 'PostToolUse') { + return { + type: MessageBusType.HOOK_EXECUTION_RESPONSE, + correlationId: 'PostToolUse-hook', + success: true, + output: { + hookSpecificOutput: { + additionalContext: 'POSTHOOK_HEAD_MARKER', + }, + }, + }; + } + return { + type: MessageBusType.HOOK_EXECUTION_RESPONSE, + correlationId: `${request.eventName}-hook`, + success: true, + output: { decision: 'allow' }, + }; + }), + }; + const { scheduler, onAllToolCallsComplete } = + createSchedulerForLegacyToolTests({ + toolsByName, + approvalMode: ApprovalMode.DEFAULT, + messageBus, + disableHooks: false, + }); + + await scheduler.schedule( + [ + { + callId: 'c-hh', + name: 'headHookTool', + args: {}, + isClientInitiated: false, + prompt_id: 'p-hh', + }, + ], + new AbortController().signal, + ); + + const output = outputOfFirstCall(onAllToolCallsComplete); + // Body truncated head-only, yet the tail marker survived because it was + // appended after truncation. + expect(output).toContain( + 'Tool output was too large and has been truncated', + ); + expect(output).toContain('POSTHOOK_HEAD_MARKER'); + }); + + it('offloads the largest tool outputs when a batch exceeds the budget', async () => { + // Both outputs are individually under the single-result threshold (25k), + // so PR-A truncation leaves them alone; only their SUM (12k) exceeds the + // per-message batch budget (10k), so the largest is offloaded. + const bigExecute = vi.fn().mockResolvedValue({ + llmContent: 'a'.repeat(9000), + returnDisplay: 'big', + }); + const smallExecute = vi.fn().mockResolvedValue({ + llmContent: 'b'.repeat(3000), + returnDisplay: 'small', + }); + const toolsByName = new Map([ + [ + 'bigBatchTool', + new MockTool({ name: 'bigBatchTool', execute: bigExecute }), + ], + [ + 'smallBatchTool', + new MockTool({ name: 'smallBatchTool', execute: smallExecute }), + ], + ]); + const { scheduler, onAllToolCallsComplete } = + createSchedulerForLegacyToolTests({ + toolsByName, + toolOutputBatchBudget: 10_000, + }); + + await scheduler.schedule( + [ + { + callId: 'big', + name: 'bigBatchTool', + args: {}, + isClientInitiated: false, + prompt_id: 'p', + }, + { + callId: 'small', + name: 'smallBatchTool', + args: {}, + isClientInitiated: false, + prompt_id: 'p', + }, + ], + new AbortController().signal, + ); + + await vi.waitFor(() => { + expect(onAllToolCallsComplete).toHaveBeenCalled(); + }); + + const completionCalls = onAllToolCallsComplete.mock + .calls as unknown as Array<[ToolCall[]]>; + const calls = completionCalls[0][0]; + const outputOf = (name: string) => { + const c = calls.find((call) => call.request.name === name); + return c && 'response' in c + ? ((c.response.responseParts[0]?.functionResponse?.response?.[ + 'output' + ] as string) ?? '') + : ''; + }; + + // Largest output is offloaded to disk (recoverable pointer). + expect(outputOf('bigBatchTool')).toContain( + 'Tool output was too large and has been truncated', + ); + // Smaller output stays untouched (batch back under budget after offload). + expect(outputOf('smallBatchTool')).toBe('b'.repeat(3000)); + }); + + it('preserves PostToolBatch additionalContext in the offload preview tail', async () => { + // The PostToolBatch hook context is appended to the TAIL of the last call. + // When that call is the batch's largest and gets offloaded, the offload + // preview uses keep:'both' (head + tail), so the tail-resident context + // survives in the preview — the model still sees the hook guidance, and the + // full output (context included) is recoverable from the spill file. + const bigExecute = vi.fn().mockResolvedValue({ + llmContent: 'a'.repeat(9000), + returnDisplay: 'big', + }); + const smallExecute = vi.fn().mockResolvedValue({ + llmContent: 'b'.repeat(3000), + returnDisplay: 'small', + }); + const toolsByName = new Map([ + [ + 'smallBatchTool', + new MockTool({ name: 'smallBatchTool', execute: smallExecute }), + ], + [ + 'bigBatchTool', + new MockTool({ name: 'bigBatchTool', execute: bigExecute }), + ], + ]); + const messageBus = { + request: vi + .fn() + .mockImplementation(async (request: { eventName: string }) => { + if (request.eventName === 'PostToolBatch') { + return { + type: MessageBusType.HOOK_EXECUTION_RESPONSE, + correlationId: 'PostToolBatch-hook', + success: true, + output: { + hookSpecificOutput: { + hookEventName: 'PostToolBatch', + additionalContext: 'POSTBATCH_MARKER', + }, + }, + }; + } + return { + type: MessageBusType.HOOK_EXECUTION_RESPONSE, + correlationId: `${request.eventName}-hook`, + success: true, + output: { decision: 'allow' }, + }; + }), + }; + const { scheduler, onAllToolCallsComplete } = + createSchedulerForLegacyToolTests({ + toolsByName, + toolOutputBatchBudget: 10_000, + messageBus, + disableHooks: false, + }); + + // big is scheduled last, so it is the call PostToolBatch context attaches + // to — and it is also the batch's largest, so it gets offloaded. + await scheduler.schedule( + [ + { + callId: 'small', + name: 'smallBatchTool', + args: {}, + isClientInitiated: false, + prompt_id: 'p', + }, + { + callId: 'big', + name: 'bigBatchTool', + args: {}, + isClientInitiated: false, + prompt_id: 'p', + }, + ], + new AbortController().signal, + ); + + await vi.waitFor(() => { + expect(onAllToolCallsComplete).toHaveBeenCalled(); + }); + const calls = ( + onAllToolCallsComplete.mock.calls as unknown as Array<[ToolCall[]]> + )[0][0]; + const outputOf = (name: string) => { + const c = calls.find((call) => call.request.name === name); + return c && 'response' in c + ? ((c.response.responseParts[0]?.functionResponse?.response?.[ + 'output' + ] as string) ?? '') + : ''; + }; + + const bigOutput = outputOf('bigBatchTool'); + // big is offloaded (largest), yet the PostToolBatch context survives + // because it is appended after the budget pass. + expect(bigOutput).toContain( + 'Tool output was too large and has been truncated', + ); + expect(bigOutput).toContain('POSTBATCH_MARKER'); + }); + + it('applies a tool-declared maxOutputChars instead of the global threshold', async () => { + // Both tools emit the SAME 8k output (under the global 25k threshold). + // tinyTool declares a 5k per-tool budget → its output IS truncated. + // defaultTool declares nothing → falls back to global 25k → NOT truncated. + const make = () => + vi.fn().mockResolvedValue({ + llmContent: 'a'.repeat(8000), + returnDisplay: 'x', + }); + const toolsByName = new Map([ + [ + 'tinyTool', + new MockTool({ + name: 'tinyTool', + execute: make(), + maxOutputChars: 5000, + }), + ], + ['defaultTool', new MockTool({ name: 'defaultTool', execute: make() })], + ]); + const { scheduler, onAllToolCallsComplete } = + createSchedulerForLegacyToolTests({ toolsByName }); + + await scheduler.schedule( + [ + { + callId: '1', + name: 'tinyTool', + args: {}, + isClientInitiated: false, + prompt_id: 'p', + }, + { + callId: '2', + name: 'defaultTool', + args: {}, + isClientInitiated: false, + prompt_id: 'p', + }, + ], + new AbortController().signal, + ); + + await vi.waitFor(() => { + expect(onAllToolCallsComplete).toHaveBeenCalled(); + }); + + const calls = ( + onAllToolCallsComplete.mock.calls as unknown as Array<[ToolCall[]]> + )[0][0]; + const outputOf = (name: string) => { + const c = calls.find((call) => call.request.name === name); + return c && 'response' in c + ? ((c.response.responseParts[0]?.functionResponse?.response?.[ + 'output' + ] as string) ?? '') + : ''; + }; + + expect(outputOf('tinyTool')).toContain( + 'Tool output was too large and has been truncated', + ); + expect(outputOf('defaultTool')).toBe('a'.repeat(8000)); + }); + + it('exempts a self-managed (Infinity maxOutputChars) tool from the line cap', async () => { + // 2000 short lines: ~4k chars (well under any char budget) but over the + // global 1000-line cap. A tool that declares Infinity maxOutputChars + // self-manages its size (e.g. ReadFile paging), so the scheduler must NOT + // apply the global line cap to it. + const content = Array(2000).fill('x').join('\n'); + const execute = vi.fn().mockResolvedValue({ + llmContent: content, + returnDisplay: 'x', + }); + const toolsByName = new Map([ + [ + 'selfManaged', + new MockTool({ + name: 'selfManaged', + execute, + maxOutputChars: Number.POSITIVE_INFINITY, + }), + ], + ]); + const { scheduler, onAllToolCallsComplete } = + createSchedulerForLegacyToolTests({ toolsByName }); + + await scheduler.schedule( + [ + { + callId: 'c', + name: 'selfManaged', + args: {}, + isClientInitiated: false, + prompt_id: 'p', + }, + ], + new AbortController().signal, + ); + + await vi.waitFor(() => { + expect(onAllToolCallsComplete).toHaveBeenCalled(); + }); + + const output = outputOfFirstCall(onAllToolCallsComplete); + expect(output).not.toContain( + 'Tool output was too large and has been truncated', + ); + expect(output).toBe(content); + }); + it('schedules a memory pressure check after tool execution', async () => { const execute = vi.fn().mockResolvedValue({ llmContent: 'ok', diff --git a/packages/core/src/core/coreToolScheduler.ts b/packages/core/src/core/coreToolScheduler.ts index 92865362ab5..5fd0a2430f6 100644 --- a/packages/core/src/core/coreToolScheduler.ts +++ b/packages/core/src/core/coreToolScheduler.ts @@ -32,6 +32,11 @@ import { import { NotificationType } from '../hooks/types.js'; import type { PostToolBatchToolCall } from '../hooks/types.js'; import type { MessageBus } from '../confirmation-bus/message-bus.js'; +import { + truncateLlmContent, + truncateToolOutput, + TOOL_OUTPUT_TRUNCATED_PREFIX, +} from '../utils/truncation.js'; const debugLogger = createDebugLogger('TOOL_SCHEDULER'); import { @@ -687,6 +692,24 @@ function toParts(input: PartListUnion): Part[] { return parts; } +/** + * Per-message offload: when a batch of tool results collectively exceeds the + * budget, the largest results are spilled to disk and replaced with a small + * preview + recoverable pointer. This is the preview size used for that spill. + */ +const BATCH_OFFLOAD_PREVIEW_CHARS = 2000; + +/** Total model-facing string output across a completed call's responseParts. */ +function batchResponseOutputSize(call: CompletedToolCall): number { + if (call.status !== 'success') return 0; + let size = 0; + for (const part of call.response.responseParts) { + const output = part.functionResponse?.response?.['output']; + if (typeof output === 'string') size += output.length; + } + return size; +} + const VALIDATION_RETRY_LOOP_THRESHOLD = 3; /** Directive injected when a tool call repeatedly fails validation. */ @@ -3088,8 +3111,13 @@ export class CoreToolScheduler { if (toolResult.error === undefined) { let content = toolResult.llmContent; - const contentLength = - typeof content === 'string' ? content.length : undefined; + + // Deferred metadata: PostToolUse hook context and skill/rule reminders + // are captured here and appended AFTER the model-facing truncation + // below, so the head/tail truncator never bisects a + // envelope or hook-injected context. + let postToolUseAdditionalContext: string | undefined; + let reminderEnvelope: string | undefined; // PostToolUse Hook if (hooksEnabled && messageBus) { @@ -3129,12 +3157,10 @@ export class CoreToolScheduler { }, ); - // Append additional context from hook if provided + // Capture additional context from hook; appended after the + // model-facing truncation below. if (postHookResult.additionalContext) { - content = appendAdditionalContext( - content, - postHookResult.additionalContext, - ); + postToolUseAdditionalContext = postHookResult.additionalContext; } // Check if hook requested to stop execution @@ -3236,10 +3262,103 @@ export class CoreToolScheduler { if (reminderBlocks.length > 0) { const body = escapeSystemReminderTags(reminderBlocks.join('\n\n')); - content = appendAdditionalContext( - content, - `\n${body}\n`, - ); + // Capture; appended after the model-facing truncation below. + reminderEnvelope = `\n${body}\n`; + } + } + + // --- Model-facing output truncation --- + // 1) Truncate the raw tool output FIRST (per-tool budget if the tool + // declares one, else the global threshold), so the head/tail + // truncator never bisects the hook/skill metadata appended below. + // Read the per-tool budget from the already-resolved tool instance. + // schedule() resolved scheduledCall.tool from the CANONICAL name, so + // this also covers legacy aliases (e.g. 'task' → agent) that + // getTool(toolName) — keyed by the raw request name — would miss, + // silently dropping maxOutputChars / truncateKeep. + const limitsTool = scheduledCall.tool; + const perToolMax = limitsTool.maxOutputChars; + const perToolKeep = limitsTool.truncateKeep; + // Per-tool budgets are char-only (mirror CC's maxResultSizeChars): when + // a tool declares its own char budget, the global LINE cap must not + // undercut it — otherwise read-file's Infinity exemption (self-managed + // paging) and grep's char budget get silently capped at 1000 lines. + const perToolLines = + perToolMax !== undefined ? Number.POSITIVE_INFINITY : undefined; + const promptIdForTruncation = scheduledCall.request.prompt_id; + try { + const truncated = await truncateLlmContent( + this.config, + toolName, + content, + { threshold: perToolMax, lines: perToolLines, keep: perToolKeep }, + promptIdForTruncation, + ); + content = truncated.content; + } catch (truncErr) { + // A truncation/IO failure must never demote a successful tool call + // to an error — keep the content and warn. + debugLogger.warn( + `TRUNCATION failed for ${toolName}: ${ + truncErr instanceof Error ? truncErr.message : String(truncErr) + }`, + ); + } + + // 2) Append the deferred metadata now that the body is bounded. + if (postToolUseAdditionalContext) { + content = appendAdditionalContext( + content, + postToolUseAdditionalContext, + ); + } + if (reminderEnvelope) { + content = appendAdditionalContext(content, reminderEnvelope); + } + + // 3) Combined second pass: if metadata was appended and the assembled + // string blew past a doubled budget, bound it once more. Skip when + // the body was already persisted (contains the sentinel) to avoid + // nesting truncation headers. Only the string path is bounded here; + // Part[] outputs (e.g. MCP) rely on the per-message batch budget as + // their second-level bound — re-truncating a Part[] would mean + // re-merging text parts, not worth it for the rare large-metadata case. + if ( + (postToolUseAdditionalContext || reminderEnvelope) && + typeof content === 'string' && + !content.startsWith(TOOL_OUTPUT_TRUNCATED_PREFIX) + ) { + const baseThreshold = + perToolMax ?? this.config.getTruncateToolOutputThreshold(); + // Match the first pass's char-only semantics for per-tool budgets; + // only the global path keeps a (doubled) line cap. + const combinedLines = + perToolMax !== undefined + ? Number.POSITIVE_INFINITY + : this.config.getTruncateToolOutputLines() * 2; + if (content.length > baseThreshold * 2) { + try { + const recombined = await truncateToolOutput( + this.config, + toolName, + content, + { + threshold: baseThreshold * 2, + lines: combinedLines, + keep: perToolKeep, + }, + promptIdForTruncation, + ); + content = recombined.content; + } catch (truncErr) { + debugLogger.warn( + `TRUNCATION (combined) failed for ${toolName}: ${ + truncErr instanceof Error + ? truncErr.message + : String(truncErr) + }`, + ); + } } } @@ -3257,6 +3376,11 @@ export class CoreToolScheduler { ); } + // Computed AFTER truncation so it reflects the model-facing length, + // consistent with the batch-offload path (which also updates it). + const contentLength = + typeof content === 'string' ? content.length : undefined; + const response = convertToFunctionResponse(toolName, callId, content); const successResponse: ToolCallResponseInfo = { callId, @@ -3536,6 +3660,11 @@ export class CoreToolScheduler { ); } + // Per-message budget: offload the largest results if the batch's + // combined model-facing output exceeds the budget, before recording + // and notifying so both consumers see the same (bounded) version. + completedCalls = await this.applyBatchOutputBudget(completedCalls); + for (const call of completedCalls) { logToolCall(this.config, new ToolCallEvent(call)); } @@ -3587,6 +3716,105 @@ export class CoreToolScheduler { } } + /** + * Per-message tool-result budget. When the combined model-facing output of a + * completed batch exceeds `toolOutputBatchBudget`, the largest results are + * offloaded to disk (greedily, largest first) until the batch is back under + * budget. Idempotent: already-persisted / media-bearing results are skipped. + */ + private async applyBatchOutputBudget( + completedCalls: CompletedToolCall[], + ): Promise { + const budget = + this.config.getToolOutputBatchBudget?.() ?? Number.POSITIVE_INFINITY; + if (!Number.isFinite(budget)) return completedCalls; + + const sizes = completedCalls.map(batchResponseOutputSize); + let total = sizes.reduce((sum, size) => sum + size, 0); + if (total <= budget) return completedCalls; + + // Offload the largest results first until back under budget. + const order = completedCalls + .map((_, i) => i) + .sort((a, b) => sizes[b] - sizes[a]); + + const result = [...completedCalls]; + let offloaded = 0; + for (const i of order) { + if (total <= budget) break; + const replaced = await this.offloadCallOutput(result[i]); + if (!replaced) continue; + total -= sizes[i] - batchResponseOutputSize(replaced); + result[i] = replaced; + offloaded++; + } + if (offloaded > 0) { + debugLogger.info( + `Batch output budget (${budget} chars): offloaded ${offloaded} largest result(s) to disk.`, + ); + } + if (total > budget) { + // Could not get under budget — e.g. a single per-tool result whose + // ceiling (MCP's 500k) exceeds the 200k batch budget, or results already + // persisted (sentinel-bearing) and therefore skipped. Surface it instead + // of silently exceeding the per-message budget. + debugLogger.warn( + `Batch output budget (${budget} chars) still exceeded after offloading ${offloaded}: ${total} chars across ${completedCalls.length} result(s).`, + ); + } + return result; + } + + /** + * Spill a single completed call's text output to disk, replacing it with a + * small preview + recoverable pointer. Returns null (skip) for non-success, + * multi-part, media-bearing, or already-persisted results. + */ + private async offloadCallOutput( + call: CompletedToolCall, + ): Promise { + if (call.status !== 'success') return null; + const parts = call.response.responseParts; + if (parts.length !== 1) return null; + const fr = parts[0]?.functionResponse; + if (!fr) return null; + const output = fr.response?.['output']; + if (typeof output !== 'string') return null; + if (fr.parts && fr.parts.length > 0) return null; // media present + if (output.startsWith(TOOL_OUTPUT_TRUNCATED_PREFIX)) return null; // already + + let truncated: { content: string; outputFile?: string }; + try { + truncated = await truncateToolOutput( + this.config, + call.request.name, + output, + { threshold: BATCH_OFFLOAD_PREVIEW_CHARS }, + call.request.prompt_id, + ); + } catch { + return null; // offload failure must not break the batch + } + if (!truncated.outputFile) return null; + + return { + ...call, + response: { + ...call.response, + responseParts: [ + { + functionResponse: { + id: fr.id, + name: fr.name, + response: { output: truncated.content }, + }, + }, + ], + contentLength: truncated.content.length, + }, + }; + } + private notifyToolCallsUpdate(): void { if (this.onToolCallsUpdate) { this.onToolCallsUpdate([...this.toolCalls]); diff --git a/packages/core/src/test-utils/mock-tool.ts b/packages/core/src/test-utils/mock-tool.ts index a4434a5b676..f8ec340d46a 100644 --- a/packages/core/src/test-utils/mock-tool.ts +++ b/packages/core/src/test-utils/mock-tool.ts @@ -30,6 +30,8 @@ interface MockToolOptions { shouldDefer?: boolean; alwaysLoad?: boolean; searchHint?: string; + maxOutputChars?: number; + truncateKeep?: 'head' | 'tail' | 'both'; getDefaultPermission?: () => Promise; getConfirmationDetails?: ( signal: AbortSignal, @@ -96,6 +98,17 @@ export class MockTool extends BaseDeclarativeTool< updateOutput?: (output: string) => void, ) => Promise; + private readonly _maxOutputChars?: number; + private readonly _truncateKeep: 'head' | 'tail' | 'both'; + + override get maxOutputChars(): number | undefined { + return this._maxOutputChars; + } + + override get truncateKeep(): 'head' | 'tail' | 'both' { + return this._truncateKeep; + } + constructor(options: MockToolOptions) { super( options.name, @@ -110,6 +123,9 @@ export class MockTool extends BaseDeclarativeTool< options.searchHint, ); + this._maxOutputChars = options.maxOutputChars; + this._truncateKeep = options.truncateKeep ?? 'both'; + if (options.getDefaultPermission) { this.getDefaultPermission = options.getDefaultPermission; } else { diff --git a/packages/core/src/tools/agent/agent.ts b/packages/core/src/tools/agent/agent.ts index 66f3994faec..2705c968498 100644 --- a/packages/core/src/tools/agent/agent.ts +++ b/packages/core/src/tools/agent/agent.ts @@ -437,6 +437,14 @@ export async function createApprovalModeOverride( export class AgentTool extends BaseDeclarativeTool { static readonly Name: string = ToolNames.AGENT; + override get maxOutputChars(): number { + return 32_000; + } + + override get truncateKeep(): 'tail' { + return 'tail'; + } + private subagentManager: SubagentManager; private availableSubagents: SubagentConfig[] = BuiltinAgentRegistry.getBuiltinAgents(); diff --git a/packages/core/src/tools/grep.ts b/packages/core/src/tools/grep.ts index 2347c6ebb23..c3825f7c798 100644 --- a/packages/core/src/tools/grep.ts +++ b/packages/core/src/tools/grep.ts @@ -596,6 +596,10 @@ class GrepToolInvocation extends BaseToolInvocation< export class GrepTool extends BaseDeclarativeTool { static readonly Name = ToolNames.GREP; + override get maxOutputChars(): number { + return 20_000; + } + constructor(private readonly config: Config) { super( GrepTool.Name, diff --git a/packages/core/src/tools/mcp-tool.test.ts b/packages/core/src/tools/mcp-tool.test.ts index 0873dfbd3ec..214d528d8f2 100644 --- a/packages/core/src/tools/mcp-tool.test.ts +++ b/packages/core/src/tools/mcp-tool.test.ts @@ -879,7 +879,7 @@ describe('DiscoveredMCPTool', () => { } as any; it('should truncate large text results from direct client execution', async () => { - const largeText = 'Line of text content\n'.repeat(200); // ~4200 chars, well over THRESHOLD + const largeText = 'Line of text content\n'.repeat(25000); // ~525k chars, over the 500k MCP char budget const mockMcpClient: McpDirectClient = { callTool: vi.fn(async () => ({ content: [{ type: 'text', text: largeText }], @@ -912,7 +912,7 @@ describe('DiscoveredMCPTool', () => { }); it('should truncate large text results from callable tool execution', async () => { - const largeText = 'Line of text content\n'.repeat(200); + const largeText = 'Line of text content\n'.repeat(25000); const mockMcpToolResponseParts: Part[] = [ { functionResponse: { @@ -1011,7 +1011,7 @@ describe('DiscoveredMCPTool', () => { }); it('should truncate only text parts in mixed content', async () => { - const largeText = 'Line of text content\n'.repeat(200); + const largeText = 'Line of text content\n'.repeat(25000); const mockMcpClient: McpDirectClient = { callTool: vi.fn(async () => ({ content: [ diff --git a/packages/core/src/tools/mcp-tool.ts b/packages/core/src/tools/mcp-tool.ts index 0d00183f776..6f3b9e30368 100644 --- a/packages/core/src/tools/mcp-tool.ts +++ b/packages/core/src/tools/mcp-tool.ts @@ -439,6 +439,13 @@ class DiscoveredMCPToolInvocation extends BaseToolInvocation< this.cliConfig, `mcp__${this.serverName}__${this.serverToolName}`, part.text, + // Per-tool char budget; mirrors DiscoveredMCPTool.maxOutputChars + // (10x the global default, since MCP servers return large structured + // output). char-only (lines: Infinity) so the global line cap can't + // undercut the 500k char budget — many short lines (structured JSON, + // tables) would otherwise truncate while chars remain. Consistent + // with the shell tool's in-tool truncation. + { threshold: 500_000, lines: Number.POSITIVE_INFINITY }, ); result.push({ text: truncated.content }); } else { @@ -457,6 +464,13 @@ export class DiscoveredMCPTool extends BaseDeclarativeTool< ToolParams, ToolResult > { + // MCP servers often return large structured payloads; allow 10x the global + // budget (mirrors Claude Code's MCP `maxResultSizeChars`) before the + // scheduler offloads. truncateTextParts uses the same ceiling per text part. + override get maxOutputChars(): number { + return 500_000; + } + constructor( private readonly mcpTool: CallableTool, readonly serverName: string, diff --git a/packages/core/src/tools/read-file.ts b/packages/core/src/tools/read-file.ts index 8721fbb079f..06b7354806b 100644 --- a/packages/core/src/tools/read-file.ts +++ b/packages/core/src/tools/read-file.ts @@ -380,6 +380,14 @@ export class ReadFileTool extends BaseDeclarativeTool< > { static readonly Name: string = ToolNames.READ_FILE; + // Self-managed: ReadFile controls its own size via line-based paging + // (offset/limit, default 2000 lines), so it is exempt from the scheduler's + // char-based truncation. Oversized reads are bounded by the per-message + // batch budget instead. + override get maxOutputChars(): number { + return Number.POSITIVE_INFINITY; + } + constructor(private config: Config) { super( ReadFileTool.Name, diff --git a/packages/core/src/tools/ripGrep.ts b/packages/core/src/tools/ripGrep.ts index eddb46688a6..7b0f2bda762 100644 --- a/packages/core/src/tools/ripGrep.ts +++ b/packages/core/src/tools/ripGrep.ts @@ -490,6 +490,10 @@ export class RipGrepTool extends BaseDeclarativeTool< > { static readonly Name = ToolNames.GREP; + override get maxOutputChars(): number { + return 20_000; + } + constructor(private readonly config: Config) { super( RipGrepTool.Name, diff --git a/packages/core/src/tools/shell.test.ts b/packages/core/src/tools/shell.test.ts index 25a6e952897..0bb8a696a81 100644 --- a/packages/core/src/tools/shell.test.ts +++ b/packages/core/src/tools/shell.test.ts @@ -1509,6 +1509,42 @@ describe('ShellTool', () => { } }); + it('truncates shell output char-only so the line cap cannot undercut the char budget', async () => { + // Regression (C2): the in-tool truncateToolOutput call omitted `lines`, + // so it fell back to the config line cap (default 1000). Many-short-line + // output (find /, ls -R) then got line-truncated while the 30k char + // budget still had room — contradicting the per-tool char-only contract. + // Pin that shell declares lines: Infinity. + const truncationModule = await import('../utils/truncation.js'); + const spy = vi + .spyOn(truncationModule, 'truncateToolOutput') + .mockResolvedValue({ content: 'unused', outputFile: undefined }); + try { + const invocation = shellTool.build({ + command: 'find /', + is_background: false, + }); + const promise = invocation.execute(mockAbortSignal); + await vi.advanceTimersByTimeAsync(1_000); + resolveShellExecution({ + output: 'short line\n'.repeat(50), + exitCode: 0, + }); + await promise; + + // Shell must pass lines: Infinity so the global line cap can't + // undercut its declared 30k char budget. + expect(spy).toHaveBeenCalledWith( + expect.anything(), + ShellTool.Name, + expect.any(String), + expect.objectContaining({ lines: Number.POSITIVE_INFINITY }), + ); + } finally { + spy.mockRestore(); + } + }); + it('threshold scales with the user-supplied timeout (not the default)', async () => { // User explicitly sets timeout: 600_000 (10 min) because they // expect a long command. Threshold is half that, so a 100s diff --git a/packages/core/src/tools/shell.ts b/packages/core/src/tools/shell.ts index be3944f27f9..534e8259496 100644 --- a/packages/core/src/tools/shell.ts +++ b/packages/core/src/tools/shell.ts @@ -2166,6 +2166,15 @@ export class ShellToolInvocation extends BaseToolInvocation< this.config, ShellTool.Name, llmContent, + // Per-tool char budget; mirrors ShellTool.maxOutputChars. keep='both' + // preserves the command's start AND its trailing exit/error summary + // (where shell failures report). Kept in-tool (not deferred to the + // scheduler) so the long-run hint below is appended OUTSIDE the + // truncation envelope; the scheduler's sentinel makes its later pass a + // no-op here. lines: Infinity keeps this char-only so the global line + // cap can't undercut the declared 30k char budget — many short lines + // (e.g. `find /`, `ls -R`) would otherwise truncate while chars remain. + { threshold: 30_000, keep: 'both', lines: Number.POSITIVE_INFINITY }, ); if (truncatedResult.outputFile) { @@ -4258,6 +4267,10 @@ export class ShellTool extends BaseDeclarativeTool< > { static Name: string = ToolNames.SHELL; + override get maxOutputChars(): number { + return 30_000; + } + constructor(private readonly config: Config) { super( ShellTool.Name, diff --git a/packages/core/src/tools/tools.ts b/packages/core/src/tools/tools.ts index df518e1e93e..8577d920ab9 100644 --- a/packages/core/src/tools/tools.ts +++ b/packages/core/src/tools/tools.ts @@ -233,6 +233,27 @@ export abstract class DeclarativeTool< }; } + /** + * Max model-facing characters for this tool's output before the scheduler + * spills it to disk (mirrors Claude Code's per-tool `maxResultSizeChars`). + * - `undefined` → use the global truncation threshold. + * - `Infinity` → self-managed (the tool does its own size control, e.g. + * ReadFile's line-based paging), exempt from scheduler char truncation. + * Override in subclasses to opt into a per-tool budget. + */ + get maxOutputChars(): number | undefined { + return undefined; + } + + /** + * Direction kept when this tool's oversized output is truncated: `'head'` + * (beginning, e.g. shell), `'tail'` (end, e.g. background agents), or + * `'both'` (first + last, the default). + */ + get truncateKeep(): 'head' | 'tail' | 'both' { + return 'both'; + } + /** * Projects tool params for the AUTO approval mode classifier. * diff --git a/packages/core/src/utils/truncation.test.ts b/packages/core/src/utils/truncation.test.ts index 9350f7844da..795b523ecee 100644 --- a/packages/core/src/utils/truncation.test.ts +++ b/packages/core/src/utils/truncation.test.ts @@ -5,11 +5,22 @@ */ import { describe, it, expect, vi, beforeEach } from 'vitest'; -import { truncateAndSaveToFile } from './truncation.js'; +import type { Part } from '@google/genai'; +import { + truncateAndSaveToFile, + truncateToolOutput, + truncateLlmContent, + TOOL_OUTPUT_TRUNCATED_PREFIX, +} from './truncation.js'; import * as fs from 'node:fs/promises'; import * as path from 'node:path'; +import type { Config } from '../config/config.js'; +import { logToolOutputTruncated } from '../telemetry/loggers.js'; vi.mock('node:fs/promises'); +vi.mock('../telemetry/loggers.js', () => ({ + logToolOutputTruncated: vi.fn(), +})); describe('truncateAndSaveToFile', () => { const mockWriteFile = vi.mocked(fs.writeFile); @@ -145,6 +156,7 @@ describe('truncateAndSaveToFile', () => { expect(mockWriteFile).toHaveBeenCalledWith( path.join(projectTempDir, `${fileName}.output`), content, + { mode: 0o600 }, ); // Effective lines = min(1000, 40000/5) = 1000 (line limit is binding) @@ -185,6 +197,7 @@ describe('truncateAndSaveToFile', () => { expect(mockWriteFile).toHaveBeenCalledWith( path.join(projectTempDir, `${fileName}.output`), content, + { mode: 0o600 }, ); expect(result.content).toContain( @@ -268,7 +281,9 @@ describe('truncateAndSaveToFile', () => { const expectedPath = path.join(projectTempDir, `${fileName}.output`); expect(result.outputFile).toBe(expectedPath); - expect(mockWriteFile).toHaveBeenCalledWith(expectedPath, content); + expect(mockWriteFile).toHaveBeenCalledWith(expectedPath, content, { + mode: 0o600, + }); }); it('should include helpful instructions in truncated message', async () => { @@ -314,6 +329,319 @@ describe('truncateAndSaveToFile', () => { ); const expectedPath = path.join(projectTempDir, 'passwd.output'); - expect(mockWriteFile).toHaveBeenCalledWith(expectedPath, content); + expect(mockWriteFile).toHaveBeenCalledWith(expectedPath, content, { + mode: 0o600, + }); + }); + + describe('keep direction', () => { + // 2000 lines, line-limit (1000) is the binding constraint so truncation + // fires. Unique markers at both ends let us assert which side is kept. + const makeContent = () => + [ + 'FIRST_UNIQUE_LINE', + ...Array(1998).fill('filler'), + 'LAST_UNIQUE_LINE', + ].join('\n'); + + beforeEach(() => { + mockWriteFile.mockResolvedValue(undefined); + }); + + it("keep='head' retains only the beginning", async () => { + const result = await truncateAndSaveToFile( + makeContent(), + 'f', + '/tmp', + THRESHOLD, + TRUNCATE_LINES, + 'head', + ); + expect(result.content).toContain('FIRST_UNIQUE_LINE'); + expect(result.content).not.toContain('LAST_UNIQUE_LINE'); + }); + + it("keep='tail' retains only the end", async () => { + const result = await truncateAndSaveToFile( + makeContent(), + 'f', + '/tmp', + THRESHOLD, + TRUNCATE_LINES, + 'tail', + ); + expect(result.content).toContain('LAST_UNIQUE_LINE'); + expect(result.content).not.toContain('FIRST_UNIQUE_LINE'); + }); + + it("keep='both' (default) retains both ends", async () => { + const result = await truncateAndSaveToFile( + makeContent(), + 'f', + '/tmp', + THRESHOLD, + TRUNCATE_LINES, + ); + expect(result.content).toContain('FIRST_UNIQUE_LINE'); + expect(result.content).toContain('LAST_UNIQUE_LINE'); + }); + + it("keep='tail' does not leak a whole line when the per-line tail budget rounds to zero", async () => { + // Regression: slice(-0) === slice(0) returned the ENTIRE line when the + // remaining tail budget for the triggering line was <= ellipsis length. + // The 58-char C line consumes the tiny budget down to 2, so the 60k B + // line hits sliceLen=0 and (pre-fix) leaked whole into the preview. + const content = + 'H'.repeat(50_000) + '\n' + 'B'.repeat(60_000) + '\n' + 'C'.repeat(58); + const result = await truncateAndSaveToFile( + content, + 'f', + '/tmp', + 100, + TRUNCATE_LINES, + 'tail', + ); + expect(result.outputFile).toBeDefined(); + // The bound must hold: the 60k line must NOT appear whole in the preview. + expect(result.content.length).toBeLessThan(2_000); + }); + }); + + describe('token-aware fallback', () => { + beforeEach(() => { + mockWriteFile.mockResolvedValue(undefined); + }); + + it('returns original content when truncation would not save space', async () => { + // Content barely over a tiny threshold: the wrapper (instructions + + // file pointer) is longer than the original, so truncating wastes + // effort and loses recoverability for no benefit. + const content = 'x'.repeat(60); + const result = await truncateAndSaveToFile( + content, + 'f', + '/tmp', + 50, + 1000, + ); + + expect(result).toEqual({ content }); + expect(result.outputFile).toBeUndefined(); + expect(mockWriteFile).not.toHaveBeenCalled(); + }); + + it('still truncates when the wrapped output is genuinely smaller', async () => { + // Large content: wrapper is far smaller than the original, so the + // fallback must NOT trigger. + const content = 'a'.repeat(2_000_000); + const result = await truncateAndSaveToFile( + content, + 'f', + '/tmp', + THRESHOLD, + TRUNCATE_LINES, + ); + + expect(result.outputFile).toBeDefined(); + expect(result.content.length).toBeLessThan(content.length); + expect(mockWriteFile).toHaveBeenCalled(); + }); + }); +}); + +describe('truncateToolOutput', () => { + const mockWriteFile = vi.mocked(fs.writeFile); + const mockMkdir = vi.mocked(fs.mkdir); + const mockConfig = { + getTruncateToolOutputThreshold: () => 25_000, + getTruncateToolOutputLines: () => 1000, + storage: { getProjectTempDir: () => '/tmp' }, + } as unknown as Config; + + beforeEach(() => { + vi.clearAllMocks(); + mockMkdir.mockResolvedValue(undefined); + mockWriteFile.mockResolvedValue(undefined); + }); + + it('skips storage for a char-only no-op (no temp dir resolution needed)', async () => { + // Fast path: a char-only budget (lines:Infinity) with content within the + // char threshold must return without resolving the temp dir, so a + // storage-less config (e.g. some MCP tests) doesn't blow up. + const getProjectTempDir = vi.fn(() => '/tmp'); + const cfg = { + getTruncateToolOutputThreshold: () => 25_000, + getTruncateToolOutputLines: () => 1000, + storage: { getProjectTempDir }, + } as unknown as Config; + const result = await truncateToolOutput(cfg, 'mcp', 'small output', { + threshold: 500_000, + lines: Number.POSITIVE_INFINITY, + }); + expect(result.content).toBe('small output'); + expect(result.outputFile).toBeUndefined(); + expect(getProjectTempDir).not.toHaveBeenCalled(); + }); + + it('uses limits.threshold to override the config threshold', async () => { + // The config threshold (25k) would NOT truncate this ~1k content, but the + // per-call limits override forces a small threshold that does. + const content = 'x'.repeat(500) + '\n' + 'y'.repeat(500); + const result = await truncateToolOutput(mockConfig, 'shell', content, { + threshold: 100, + lines: 1000, + }); + expect(result.outputFile).toBeDefined(); + }); + + it('passes promptId into the telemetry event', async () => { + const content = 'a'.repeat(200_000); + await truncateToolOutput( + mockConfig, + 'shell', + content, + undefined, + 'prompt-123', + ); + expect(logToolOutputTruncated).toHaveBeenCalled(); + const event = vi.mocked(logToolOutputTruncated).mock.calls[0][1]; + expect(event.prompt_id).toBe('prompt-123'); + }); +}); + +describe('truncateLlmContent', () => { + const mockWriteFile = vi.mocked(fs.writeFile); + const mockMkdir = vi.mocked(fs.mkdir); + const mockConfig = { + getTruncateToolOutputThreshold: () => 25_000, + getTruncateToolOutputLines: () => 1000, + storage: { getProjectTempDir: () => '/tmp' }, + } as unknown as Config; + + beforeEach(() => { + vi.clearAllMocks(); + mockMkdir.mockResolvedValue(undefined); + mockWriteFile.mockResolvedValue(undefined); + }); + + it('truncates a large string and returns an outputFile', async () => { + const result = await truncateLlmContent( + mockConfig, + 'shell', + 'a'.repeat(200_000), + ); + expect(typeof result.content).toBe('string'); + expect(result.outputFile).toBeDefined(); + expect(result.content as string).toContain( + 'Tool output was too large and has been truncated', + ); + }); + + it('replaces empty output with a no-output marker', async () => { + const result = await truncateLlmContent(mockConfig, 'shell', ' '); + expect(result.content).toBe('(shell completed with no output)'); + expect(mockWriteFile).not.toHaveBeenCalled(); + }); + + it('is idempotent when content is already truncated', async () => { + const already = + 'Tool output was too large and has been truncated\nThe full output...'; + const result = await truncateLlmContent(mockConfig, 'shell', already); + expect(result.content).toBe(already); + expect(result.outputFile).toBeUndefined(); + expect(mockWriteFile).not.toHaveBeenCalled(); + }); + + it('truncates text parts but preserves media parts in Part[]', async () => { + const content: Part[] = [ + { text: 'a'.repeat(200_000) }, + { inlineData: { mimeType: 'image/png', data: 'BASE64DATA' } }, + ]; + const result = await truncateLlmContent(mockConfig, 'shell', content); + + expect(Array.isArray(result.content)).toBe(true); + const parts = result.content as Part[]; + // Media part is preserved verbatim. + expect(parts.some((p) => p.inlineData?.data === 'BASE64DATA')).toBe(true); + // Text part is truncated. + const textPart = parts.find((p) => p.text !== undefined); + expect(textPart?.text).toContain( + 'Tool output was too large and has been truncated', + ); + expect(result.outputFile).toBeDefined(); + }); + + it('leaves small Part[] text untouched', async () => { + const content: Part[] = [ + { text: 'small output' }, + { inlineData: { mimeType: 'image/png', data: 'BASE64DATA' } }, + ]; + const result = await truncateLlmContent(mockConfig, 'shell', content); + expect(result.outputFile).toBeUndefined(); + expect(result.content).toEqual(content); + expect(mockWriteFile).not.toHaveBeenCalled(); + }); + + it('bounds Part[] text even when the disk write fails', async () => { + // On a disk-write failure the Part[] path must still return a bounded + // preview (matching the string path) rather than leaking the original + // oversized content back into history. + mockWriteFile.mockRejectedValue(new Error('ENOSPC')); + const content: Part[] = [ + { text: 'a'.repeat(200_000) }, + { inlineData: { mimeType: 'image/png', data: 'BASE64DATA' } }, + ]; + const result = await truncateLlmContent(mockConfig, 'shell', content); + + const parts = result.content as Part[]; + const textPart = parts.find((p) => p.text !== undefined); + // Bounded: far smaller than the 200k original, carrying the failure note. + expect(textPart?.text?.length ?? 0).toBeLessThan(50_000); + expect(textPart?.text).toContain('Could not save full output to file'); + // Media part is still preserved. + expect(parts.some((p) => p.inlineData?.data === 'BASE64DATA')).toBe(true); + }); + + it('still truncates a string when the sentinel appears mid-output, not as a prefix', async () => { + // Only a genuine truncation prefix (at position 0) should short-circuit. + // A tool whose own output merely contains the phrase somewhere in the + // middle must still be truncated — otherwise attacker-controlled output + // could embed the phrase to bypass the budget. + const body = + 'normal output line\n'.repeat(100) + + `${TOOL_OUTPUT_TRUNCATED_PREFIX}\n` + + 'b'.repeat(200_000); + const result = await truncateLlmContent(mockConfig, 'shell', body); + expect(result.outputFile).toBeDefined(); + expect(typeof result.content).toBe('string'); + }); + + it('truncates a Part[] when the sentinel appears mid-stream, not as a part prefix', async () => { + // Part[] idempotency must mirror the string path: only a part that STARTS + // with the sentinel counts as already-truncated. A hostile or echoed part + // that merely contains the phrase mid-stream must not bypass the budget. + const content: Part[] = [ + { + text: + 'normal intro line\n' + + `${TOOL_OUTPUT_TRUNCATED_PREFIX}\n` + + 'b'.repeat(200_000), + }, + ]; + const result = await truncateLlmContent(mockConfig, 'mcp_tool', content); + expect(result.outputFile).toBeDefined(); + }); + + it('leaves a Part[] untouched when any part already starts with the sentinel', async () => { + // A genuinely pre-truncated part (e.g. MCP truncateTextParts output, which + // starts with the sentinel) must still short-circuit re-truncation even + // when it is not the first part and the combined content is over budget. + const content: Part[] = [ + { text: 'small intro' }, + { text: `${TOOL_OUTPUT_TRUNCATED_PREFIX}\n` + 'x'.repeat(200_000) }, + ]; + const result = await truncateLlmContent(mockConfig, 'mcp_tool', content); + expect(result.outputFile).toBeUndefined(); + expect(result.content).toEqual(content); }); }); diff --git a/packages/core/src/utils/truncation.ts b/packages/core/src/utils/truncation.ts index 35a11260736..e7dfaeeb749 100644 --- a/packages/core/src/utils/truncation.ts +++ b/packages/core/src/utils/truncation.ts @@ -7,11 +7,21 @@ import * as fs from 'node:fs/promises'; import * as path from 'node:path'; import * as crypto from 'node:crypto'; +import type { Part, PartListUnion } from '@google/genai'; import { ReadFileTool } from '../tools/read-file.js'; import type { Config } from '../config/config.js'; import { logToolOutputTruncated } from '../telemetry/loggers.js'; import { ToolOutputTruncatedEvent } from '../telemetry/types.js'; +/** + * Stable prefix every truncated tool output starts with. Used as an + * idempotency sentinel so content that was already truncated (by a tool's own + * path — e.g. MCP `truncateTextParts` — or by a prior pass) is not truncated + * again, which would nest headers and spill a duplicate file. + */ +export const TOOL_OUTPUT_TRUNCATED_PREFIX = + 'Tool output was too large and has been truncated'; + /** * Truncates large tool output and saves the full content to a temp file. * Used by the shell tool to prevent excessively large outputs from being @@ -27,7 +37,17 @@ export async function truncateAndSaveToFile( projectTempDir: string, threshold: number, truncateLines: number, + keep: 'head' | 'tail' | 'both' = 'both', ): Promise<{ content: string; outputFile?: string }> { + // Fast path: when no line cap applies (per-tool char budgets pass + // truncateLines = Infinity) and content is within the char threshold, return + // early without splitting. read-file (Infinity threshold) and other + // self-managed tools otherwise allocate a full line array on every call only + // to reach a guaranteed no-op. + if (content.length <= threshold && !Number.isFinite(truncateLines)) { + return { content }; + } + const lines = content.split('\n'); // Check both constraints: character threshold and line limit. @@ -35,16 +55,35 @@ export async function truncateAndSaveToFile( return { content }; } - // Build head and tail within both line and character budgets. + // Build head and tail within both line and character budgets. The `keep` + // direction decides how the line/character budget is split: + // - 'both' (default): head 1/5 + tail 4/5 (preserves first & last). + // - 'head': all budget to the beginning (mirrors CC's Bash tool). + // - 'tail': all budget to the end (mirrors CC's Task tool). const effectiveLines = Math.min(truncateLines, lines.length); - const headCount = Math.max(Math.floor(effectiveLines / 5), 1); - const tailCount = effectiveLines - headCount; + let headCount: number; + let tailCount: number; + if (keep === 'head') { + headCount = effectiveLines; + tailCount = 0; + } else if (keep === 'tail') { + headCount = 0; + tailCount = effectiveLines; + } else { + headCount = Math.max(Math.floor(effectiveLines / 5), 1); + tailCount = effectiveLines - headCount; + } const separator = '\n\n---\n... [CONTENT TRUNCATED] ...\n---\n\n'; const ellipsis = '...'; // Collect head lines within budget. If a single line exceeds the // remaining budget, include a truncated slice of it. - const headBudget = Math.floor(threshold / 5); + const headBudget = + keep === 'head' + ? threshold + : keep === 'tail' + ? 0 + : Math.floor(threshold / 5); const beginning: string[] = []; let headChars = 0; for (let i = 0; i < Math.min(headCount, lines.length); i++) { @@ -62,7 +101,8 @@ export async function truncateAndSaveToFile( // Collect tail lines within remaining budget. If a single line exceeds // the remaining budget, include a truncated slice of it. - const tailBudget = Math.max(threshold - headChars - separator.length, 0); + const tailBudget = + keep === 'head' ? 0 : Math.max(threshold - headChars - separator.length, 0); const end: string[] = []; let tailChars = 0; const tailStart = Math.max(lines.length - tailCount, beginning.length); @@ -71,7 +111,10 @@ export async function truncateAndSaveToFile( if (remaining <= 0) break; if (lines[i].length + 1 > remaining) { const sliceLen = Math.max(remaining - ellipsis.length, 0); - end.unshift(ellipsis + lines[i].slice(-sliceLen)); + // slice(-0) === slice(0) returns the WHOLE line, so guard the zero case + // explicitly: sliceLen === 0 means no budget for any tail chars (the head + // branch's slice(0, 0) already yields '' correctly). + end.unshift(ellipsis + (sliceLen > 0 ? lines[i].slice(-sliceLen) : '')); tailChars = tailBudget; break; } @@ -79,23 +122,45 @@ export async function truncateAndSaveToFile( tailChars += lines[i].length + 1; } - const truncatedContent = beginning.join('\n') + separator + end.join('\n'); + // Compose by direction: head-only ends with the separator (content + // removed after), tail-only starts with it (content removed before), + // both keeps the existing head+separator+tail shape. + const truncatedContent = + keep === 'head' + ? beginning.join('\n') + separator + : keep === 'tail' + ? separator + end.join('\n') + : beginning.join('\n') + separator + end.join('\n'); // Sanitize fileName to prevent path traversal. const safeFileName = `${path.basename(fileName)}.output`; const outputFile = path.join(projectTempDir, safeFileName); - try { - await fs.mkdir(projectTempDir, { recursive: true }); - await fs.writeFile(outputFile, content); - - return { - content: `Tool output was too large and has been truncated. + const wrappedMessage = `${TOOL_OUTPUT_TRUNCATED_PREFIX}. The full output has been saved to: ${outputFile} To read the complete output, use the ${ReadFileTool.Name} tool with the absolute file path above. The truncated output below shows the beginning and end of the content. The marker '... [CONTENT TRUNCATED] ...' indicates where content was removed. Truncated part of the output: -${truncatedContent}`, +${truncatedContent}`; + + // Token-aware fallback: if the wrapped (truncated + instructions) output is + // not actually smaller than the original, truncating wastes effort and + // loses recoverability for no benefit — keep the original untouched. + if (wrappedMessage.length >= content.length) { + return { content }; + } + + try { + // Restrictive perms: tool output can contain secrets, and this PR widens + // persistence to every string-returning tool. The spilled file is created + // owner-only (0o600). We don't set a mode on the temp dir: it is shared + // with the logger/checkpoints and is normally created earlier without one, + // and mkdir would not tighten an already-existing directory anyway. + await fs.mkdir(projectTempDir, { recursive: true }); + await fs.writeFile(outputFile, content, { mode: 0o600 }); + + return { + content: wrappedMessage, outputFile, }; } catch (_error) { @@ -119,14 +184,33 @@ export async function truncateToolOutput( config: Config, toolName: string, content: string, + limits?: { + threshold?: number; + lines?: number; + keep?: 'head' | 'tail' | 'both'; + }, + promptId?: string, ): Promise<{ content: string; outputFile?: string }> { - const threshold = config.getTruncateToolOutputThreshold(); - const lines = config.getTruncateToolOutputLines(); + // Per-call `limits` override the global config thresholds. Used for + // per-tool budgets (maxOutputChars) and the combined second-pass that runs + // a 2x budget after hook/reminder metadata is appended. + const threshold = + limits?.threshold ?? config.getTruncateToolOutputThreshold(); + const lines = limits?.lines ?? config.getTruncateToolOutputLines(); + const keep = limits?.keep ?? 'both'; if (threshold <= 0 || lines <= 0) { return { content }; } + // Fast path: when no line cap applies (char-only budgets pass lines:Infinity) + // and content is within the char threshold, there is nothing to spill — + // return before resolving the temp dir, so callers that never truncate (e.g. + // small MCP outputs) don't require storage to be configured. + if (content.length <= threshold && !Number.isFinite(lines)) { + return { content }; + } + const originalLength = content.length; const fileName = `${toolName}_${crypto.randomBytes(6).toString('hex')}`; const result = await truncateAndSaveToFile( @@ -135,20 +219,104 @@ export async function truncateToolOutput( config.storage.getProjectTempDir(), threshold, lines, + keep, ); if (result.outputFile) { - logToolOutputTruncated( - config, - new ToolOutputTruncatedEvent('', { - toolName, - originalContentLength: originalLength, - truncatedContentLength: result.content.length, - threshold, - lines, - }), - ); + try { + logToolOutputTruncated( + config, + new ToolOutputTruncatedEvent(promptId ?? '', { + toolName, + originalContentLength: originalLength, + truncatedContentLength: result.content.length, + threshold, + lines, + }), + ); + } catch { + // Telemetry must never break a successful truncation. + } } return result; } + +/** + * Unified truncation entry for the tool scheduler. Handles both string and + * Part[] `llmContent`: + * - string is truncated directly; + * - Part[] has its text parts merged and truncated, while media parts + * (inlineData/fileData) are preserved verbatim; + * - empty output is replaced with a no-output marker; + * - already-truncated content passes through unchanged (idempotent). + */ +export async function truncateLlmContent( + config: Config, + toolName: string, + content: PartListUnion, + limits?: { + threshold?: number; + lines?: number; + keep?: 'head' | 'tail' | 'both'; + }, + promptId?: string, +): Promise<{ content: PartListUnion; outputFile?: string }> { + // --- string path --- + if (typeof content === 'string') { + if (content.trim() === '') { + return { content: `(${toolName} completed with no output)` }; + } + // Idempotency: a genuine truncation prefix sits at position 0. Use + // startsWith (not includes) so a tool whose own output merely contains the + // phrase mid-stream is still bounded rather than passing through unbounded. + if (content.startsWith(TOOL_OUTPUT_TRUNCATED_PREFIX)) { + return { content }; + } + return truncateToolOutput(config, toolName, content, limits, promptId); + } + + // --- Part[] / single Part path --- + const parts: Part[] = (Array.isArray(content) ? content : [content]).map( + (p) => (typeof p === 'string' ? ({ text: p } as Part) : p), + ); + const textParts = parts.filter((p) => p.text !== undefined); + const mediaParts = parts.filter((p) => p.text === undefined); + const combined = textParts.map((p) => p.text).join('\n'); + + if (combined.trim() === '' && mediaParts.length === 0) { + return { content: `(${toolName} completed with no output)` }; + } + // Idempotency, mirroring the string path: a part counts as already-truncated + // only if it STARTS with the sentinel (MCP's truncateTextParts emits such a + // part). Checking each part's prefix — not `combined.includes` — preserves + // the multi-part dedup intent while preventing a part that merely contains + // the phrase mid-stream from bypassing the budget. + if (textParts.some((p) => p.text?.startsWith(TOOL_OUTPUT_TRUNCATED_PREFIX))) { + return { content }; + } + + const result = await truncateToolOutput( + config, + toolName, + combined, + limits, + promptId, + ); + + // Unchanged (within budget or token-aware fallback): leave the original + // Part[] untouched so part structure and ordering are preserved. Compare + // content identity rather than `outputFile` so a truncated-but-unsaved + // result (a disk-write failure returns a bounded preview with no file) still + // bounds the output, matching the string path. + if (result.content === combined) { + return { content }; + } + + // Truncated: collapse text into a single truncated part, then re-append the + // preserved media parts. + return { + content: [{ text: result.content } as Part, ...mediaParts], + outputFile: result.outputFile, + }; +} diff --git a/packages/vscode-ide-companion/schemas/settings.schema.json b/packages/vscode-ide-companion/schemas/settings.schema.json index f95e9307a48..b54c83d4821 100644 --- a/packages/vscode-ide-companion/schemas/settings.schema.json +++ b/packages/vscode-ide-companion/schemas/settings.schema.json @@ -903,6 +903,11 @@ "type": "number", "default": 1000 }, + "toolOutputBatchBudget": { + "description": "Per-message budget (characters) for the combined output of one batch of tool calls; the largest results are offloaded to disk when exceeded. Set to -1 to disable.", + "type": "number", + "default": 200000 + }, "computerUse": { "description": "Cross-platform desktop automation via the upstream open-computer-use MCP server. Tools: list_apps, get_app_state, click, type_text, scroll, drag, press_key, perform_secondary_action, set_value. On first invocation, the upstream binary is fetched via npx and the user is walked through macOS Accessibility / Screen Recording permissions if needed.", "type": "object",