From 52eab19934f0d282ed558f637ff3f4a1ca415c62 Mon Sep 17 00:00:00 2001 From: yiliang114 <1204183885@qq.com> Date: Sat, 30 May 2026 23:30:52 +0800 Subject: [PATCH 1/5] fix(core,cli): replace full-history structuredClone with shallow/tail variants to prevent OOM on resume Several UI and service call sites clone the entire chat history via structuredClone(getHistory()) every turn. On a resumed session with thousands of entries, each clone allocates 150-200 MB transiently. When multiple async side-requests overlap (suggestion generation, auto-title, checkpointing), multiple clones coexist on the heap, pushing V8 past its limit within 10 turns (2 GB heap cap). Changes: - AppContainer.tsx: use getHistoryTail(40, true) instead of getHistory(true) + slice(-40) - btwCommand.ts: same pattern, use getHistoryTail(40, true) - sessionTitle.ts: use getHistoryShallow() (read-only filtering) - sessionRecap.ts: use getHistoryShallow() (read-only filtering) - useGeminiStream.ts: use getHistoryShallow() for checkpoint serialization (only needs to survive JSON.stringify) Closes #4624 --- packages/cli/src/ui/AppContainer.tsx | 9 +++++---- packages/cli/src/ui/commands/btwCommand.ts | 7 +------ packages/cli/src/ui/hooks/useGeminiStream.ts | 2 +- packages/core/src/services/sessionRecap.ts | 2 +- packages/core/src/services/sessionTitle.ts | 2 +- 5 files changed, 9 insertions(+), 13 deletions(-) diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx index 6fbc512dd2f..70bc31e3fb4 100644 --- a/packages/cli/src/ui/AppContainer.tsx +++ b/packages/cli/src/ui/AppContainer.tsx @@ -2108,10 +2108,11 @@ export const AppContainer = (props: AppContainerProps) => { const ac = new AbortController(); suggestionAbortRef.current = ac; - // Use curated history to avoid invalid/empty entries causing API errors - const fullHistory = geminiClient.getChat().getHistory(true); - const conversationHistory = - fullHistory.length > 40 ? fullHistory.slice(-40) : fullHistory; + // Only clone the tail — full structuredClone of a large resumed session + // causes transient heap peaks that trigger OOM (#4624). + const conversationHistory = geminiClient + .getChat() + .getHistoryTail(40, true); generatePromptSuggestion(config, conversationHistory, ac.signal, { enableCacheSharing: settings.merged.ui?.enableCacheSharing === true, }) diff --git a/packages/cli/src/ui/commands/btwCommand.ts b/packages/cli/src/ui/commands/btwCommand.ts index 801b6a275f4..08af2a0ceef 100644 --- a/packages/cli/src/ui/commands/btwCommand.ts +++ b/packages/cli/src/ui/commands/btwCommand.ts @@ -65,12 +65,7 @@ function getBtwCacheSafeParams( ) { const generationConfig = chat.getGenerationConfig(); if (generationConfig) { - const fullHistory = geminiClient.getHistory(true); - const maxHistoryEntries = 40; - const history = - fullHistory.length > maxHistoryEntries - ? fullHistory.slice(-maxHistoryEntries) - : fullHistory; + const history = geminiClient.getHistoryTail(40, true); return { generationConfig, diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index 91272d1e4de..bae8e2dfacf 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -2462,7 +2462,7 @@ export const useGeminiStream = ( const toolName = toolCall.request.name; const fileName = path.basename(filePath); const toolCallWithSnapshotFileName = `${timestamp}-${fileName}-${toolName}.json`; - const clientHistory = await geminiClient?.getHistory(); + const clientHistory = geminiClient?.getHistoryShallow(); const toolCallWithSnapshotFilePath = path.join( checkpointDir, toolCallWithSnapshotFileName, diff --git a/packages/core/src/services/sessionRecap.ts b/packages/core/src/services/sessionRecap.ts index 4f0ba75da60..32e8583c0a9 100644 --- a/packages/core/src/services/sessionRecap.ts +++ b/packages/core/src/services/sessionRecap.ts @@ -49,7 +49,7 @@ export async function generateSessionRecap( const geminiClient = config.getGeminiClient(); if (!geminiClient) return null; - const fullHistory = geminiClient.getChat().getHistory(); + const fullHistory = geminiClient.getChat().getHistoryShallow(); if (fullHistory.length < 2) return null; const dialog = filterToDialog(fullHistory); diff --git a/packages/core/src/services/sessionTitle.ts b/packages/core/src/services/sessionTitle.ts index 331b5e86361..bb88d83b2bc 100644 --- a/packages/core/src/services/sessionTitle.ts +++ b/packages/core/src/services/sessionTitle.ts @@ -113,7 +113,7 @@ export async function tryGenerateSessionTitle( const geminiClient = config.getGeminiClient(); if (!geminiClient) return { ok: false, reason: 'no_client' }; - const fullHistory = geminiClient.getChat().getHistory(); + const fullHistory = geminiClient.getChat().getHistoryShallow(); if (fullHistory.length < 2) return { ok: false, reason: 'empty_history' }; const dialog = filterToDialog(fullHistory); From 981bbcfb463fdacc9797bb6b783a38845d0a1d21 Mon Sep 17 00:00:00 2001 From: yiliang114 <1204183885@qq.com> Date: Sat, 30 May 2026 23:39:41 +0800 Subject: [PATCH 2/5] fix(test): update mocks for getHistoryShallow/getHistoryTail in sessionTitle and btwCommand tests --- packages/cli/src/ui/commands/btwCommand.test.ts | 9 +++++++++ packages/core/src/services/sessionTitle.test.ts | 11 +++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/packages/cli/src/ui/commands/btwCommand.test.ts b/packages/cli/src/ui/commands/btwCommand.test.ts index 5aebbbb1a3e..83424c6cf84 100644 --- a/packages/cli/src/ui/commands/btwCommand.test.ts +++ b/packages/cli/src/ui/commands/btwCommand.test.ts @@ -176,6 +176,11 @@ describe('btwCommand', () => { .mockReturnValue([ { role: 'user', parts: [{ text: '杭州天气如何?' }] }, ]), + getHistoryTail: vi + .fn() + .mockReturnValue([ + { role: 'user', parts: [{ text: '杭州天气如何?' }] }, + ]), getChat: vi.fn().mockReturnValue({ getGenerationConfig: vi.fn().mockReturnValue({ systemInstruction: 'You are helpful', @@ -229,6 +234,10 @@ describe('btwCommand', () => { { role: 'user', parts: [{ text: '杭州天气如何?' }] }, { role: 'user', parts: [{ text: '请顺便解释一下湿度怎么看' }] }, ]), + getHistoryTail: vi.fn().mockReturnValue([ + { role: 'user', parts: [{ text: '杭州天气如何?' }] }, + { role: 'user', parts: [{ text: '请顺便解释一下湿度怎么看' }] }, + ]), getChat: vi.fn().mockReturnValue({ getGenerationConfig: vi.fn().mockReturnValue({ systemInstruction: 'live system prompt', diff --git a/packages/core/src/services/sessionTitle.test.ts b/packages/core/src/services/sessionTitle.test.ts index 05bb83b9db4..bdf29c28c67 100644 --- a/packages/core/src/services/sessionTitle.test.ts +++ b/packages/core/src/services/sessionTitle.test.ts @@ -33,6 +33,7 @@ function makeConfig(opts: MockOptions): { getGeminiClient: vi.fn(() => ({ getChat: () => ({ getHistory: () => opts.history ?? [], + getHistoryShallow: () => opts.history ?? [], }), })), getBaseLlmClient: vi.fn(() => ({ generateJson })), @@ -202,7 +203,10 @@ describe('tryGenerateSessionTitle', () => { getFastModel: vi.fn(() => 'qwen-turbo'), getModel: vi.fn(() => 'qwen-plus'), getGeminiClient: vi.fn(() => ({ - getChat: () => ({ getHistory: () => history }), + getChat: () => ({ + getHistory: () => history, + getHistoryShallow: () => history, + }), })), getBaseLlmClient: vi.fn(() => ({ generateJson })), } as unknown as Config; @@ -240,7 +244,10 @@ describe('tryGenerateSessionTitle', () => { getFastModel: vi.fn(() => 'qwen-turbo'), getModel: vi.fn(() => 'qwen-plus'), getGeminiClient: vi.fn(() => ({ - getChat: () => ({ getHistory: () => history }), + getChat: () => ({ + getHistory: () => history, + getHistoryShallow: () => history, + }), })), getBaseLlmClient: vi.fn(() => ({ generateJson })), } as unknown as Config; From 8ddd8627fe25b0d1385dafc8bb2141ad77f1f9cb Mon Sep 17 00:00:00 2001 From: yiliang114 <1204183885@qq.com> Date: Sun, 31 May 2026 00:19:46 +0800 Subject: [PATCH 3/5] fix(cli): migrate remaining getHistory() clone sites to shallow/tail variants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - AppContainer.tsx rewind path: getHistory() → getHistoryShallow() (only used read-only by computeApiTruncationIndex) - Session.ts ACP rewind: getHistory() → getHistoryShallow() (only walks entries to compute truncation index) - Session.ts stop-hook: getHistory() + filter(.model).pop() → getLastModelMessageText() (O(1) backward scan, no clone) --- .../cli/src/acp-integration/session/Session.ts | 14 ++++---------- packages/cli/src/ui/AppContainer.tsx | 2 +- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/packages/cli/src/acp-integration/session/Session.ts b/packages/cli/src/acp-integration/session/Session.ts index df4be4e7df3..38b6ef652a6 100644 --- a/packages/cli/src/acp-integration/session/Session.ts +++ b/packages/cli/src/acp-integration/session/Session.ts @@ -402,7 +402,7 @@ export class Session implements SessionContext { } const chat = this.config.getGeminiClient()!.getChat(); - const apiHistory = chat.getHistory(); + const apiHistory = chat.getHistoryShallow(); const apiTruncateIndex = this.#computeApiTruncationIndexForUserTurn( apiHistory, targetTurnIndex, @@ -895,16 +895,10 @@ export class Session implements SessionContext { return { stopReason: 'end_turn' }; } - // Get response text from the chat history - const history = this.#getCurrentChat().getHistory(); - const lastModelMessage = history - .filter((msg: Content) => msg.role === 'model') - .pop(); + // Extract last model text without cloning the full history. const responseText = - lastModelMessage?.parts - ?.filter((p: Part): p is { text: string } & Part => 'text' in p) - .map((p: { text: string }) => p.text) - .join('') || '[no response text]'; + this.#getCurrentChat().getLastModelMessageText?.() || + '[no response text]'; const response = await messageBus.request< HookExecutionRequest, diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx index 70bc31e3fb4..edc15f2ddd9 100644 --- a/packages/cli/src/ui/AppContainer.tsx +++ b/packages/cli/src/ui/AppContainer.tsx @@ -2464,7 +2464,7 @@ export const AppContainer = (props: AppContainerProps) => { apiTruncateIndex = computeApiTruncationIndex( historyManager.history, userItem.id, - geminiClient.getHistory(), + geminiClient.getHistoryShallow(), ); if (apiTruncateIndex < 0) { historyManager.addItem( From b02bf69e62e38eeb0b0aa4275d46a952ca18f557 Mon Sep 17 00:00:00 2001 From: yiliang114 <1204183885@qq.com> Date: Sun, 31 May 2026 00:11:45 +0800 Subject: [PATCH 4/5] fix(core): use client-level getHistoryShallow with fallback sessionTitle.ts and sessionRecap.ts were calling chat.getHistoryShallow() directly, bypassing the client-level wrapper that provides a getHistory() fallback when the chat implementation doesn't support shallow reads. Use geminiClient.getHistoryShallow() instead. Update test mocks to match the new call site. --- packages/cli/src/ui/AppContainer.tsx | 4 +--- packages/core/src/services/sessionRecap.ts | 2 +- packages/core/src/services/sessionTitle.test.ts | 6 +++--- packages/core/src/services/sessionTitle.ts | 2 +- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx index edc15f2ddd9..e8362122bb7 100644 --- a/packages/cli/src/ui/AppContainer.tsx +++ b/packages/cli/src/ui/AppContainer.tsx @@ -2110,9 +2110,7 @@ export const AppContainer = (props: AppContainerProps) => { // Only clone the tail — full structuredClone of a large resumed session // causes transient heap peaks that trigger OOM (#4624). - const conversationHistory = geminiClient - .getChat() - .getHistoryTail(40, true); + const conversationHistory = geminiClient.getHistoryTail(40, true); generatePromptSuggestion(config, conversationHistory, ac.signal, { enableCacheSharing: settings.merged.ui?.enableCacheSharing === true, }) diff --git a/packages/core/src/services/sessionRecap.ts b/packages/core/src/services/sessionRecap.ts index 32e8583c0a9..147f2594f4f 100644 --- a/packages/core/src/services/sessionRecap.ts +++ b/packages/core/src/services/sessionRecap.ts @@ -49,7 +49,7 @@ export async function generateSessionRecap( const geminiClient = config.getGeminiClient(); if (!geminiClient) return null; - const fullHistory = geminiClient.getChat().getHistoryShallow(); + const fullHistory = geminiClient.getHistoryShallow(); if (fullHistory.length < 2) return null; const dialog = filterToDialog(fullHistory); diff --git a/packages/core/src/services/sessionTitle.test.ts b/packages/core/src/services/sessionTitle.test.ts index bdf29c28c67..2da674f754d 100644 --- a/packages/core/src/services/sessionTitle.test.ts +++ b/packages/core/src/services/sessionTitle.test.ts @@ -31,9 +31,9 @@ function makeConfig(opts: MockOptions): { getFastModel: vi.fn(() => opts.fastModel ?? undefined), getModel: vi.fn(() => 'qwen-plus'), getGeminiClient: vi.fn(() => ({ + getHistoryShallow: () => opts.history ?? [], getChat: () => ({ getHistory: () => opts.history ?? [], - getHistoryShallow: () => opts.history ?? [], }), })), getBaseLlmClient: vi.fn(() => ({ generateJson })), @@ -203,9 +203,9 @@ describe('tryGenerateSessionTitle', () => { getFastModel: vi.fn(() => 'qwen-turbo'), getModel: vi.fn(() => 'qwen-plus'), getGeminiClient: vi.fn(() => ({ + getHistoryShallow: () => history, getChat: () => ({ getHistory: () => history, - getHistoryShallow: () => history, }), })), getBaseLlmClient: vi.fn(() => ({ generateJson })), @@ -244,9 +244,9 @@ describe('tryGenerateSessionTitle', () => { getFastModel: vi.fn(() => 'qwen-turbo'), getModel: vi.fn(() => 'qwen-plus'), getGeminiClient: vi.fn(() => ({ + getHistoryShallow: () => history, getChat: () => ({ getHistory: () => history, - getHistoryShallow: () => history, }), })), getBaseLlmClient: vi.fn(() => ({ generateJson })), diff --git a/packages/core/src/services/sessionTitle.ts b/packages/core/src/services/sessionTitle.ts index bb88d83b2bc..b82c2028ad4 100644 --- a/packages/core/src/services/sessionTitle.ts +++ b/packages/core/src/services/sessionTitle.ts @@ -113,7 +113,7 @@ export async function tryGenerateSessionTitle( const geminiClient = config.getGeminiClient(); if (!geminiClient) return { ok: false, reason: 'no_client' }; - const fullHistory = geminiClient.getChat().getHistoryShallow(); + const fullHistory = geminiClient.getHistoryShallow(); if (fullHistory.length < 2) return { ok: false, reason: 'empty_history' }; const dialog = filterToDialog(fullHistory); From 5fbf8a4d27d028bc87285e7beb5ddbfdb7c0eaf6 Mon Sep 17 00:00:00 2001 From: yiliang114 <1204183885@qq.com> Date: Sun, 31 May 2026 01:28:08 +0800 Subject: [PATCH 5/5] fix(test): add getHistoryShallow and getLastModelMessageText to Session test mocks Session.ts now calls chat.getHistoryShallow() in rewindToTurn and chat.getLastModelMessageText() in the Stop hook. Update all mockChat instances in Session.test.ts to provide these methods. --- .../acp-integration/session/Session.test.ts | 32 +++++++++++++++++-- packages/cli/src/ui/commands/btwCommand.ts | 2 +- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/packages/cli/src/acp-integration/session/Session.test.ts b/packages/cli/src/acp-integration/session/Session.test.ts index 750b5e34869..be6763a98a5 100644 --- a/packages/cli/src/acp-integration/session/Session.test.ts +++ b/packages/cli/src/acp-integration/session/Session.test.ts @@ -196,6 +196,8 @@ describe('Session', () => { sendMessageStream: vi.fn(), addHistory: vi.fn(), getHistory: vi.fn().mockReturnValue([]), + getHistoryShallow: vi.fn().mockReturnValue([]), + getLastModelMessageText: vi.fn().mockReturnValue(''), setHistory: vi.fn(), truncateHistory: vi.fn(), stripThoughtsFromHistory: vi.fn(), @@ -329,6 +331,7 @@ describe('Session', () => { { role: 'model', parts: [{ text: 'second reply' }] }, ]; vi.mocked(mockChat.getHistory).mockReturnValue(history); + vi.mocked(mockChat.getHistoryShallow).mockReturnValue(history); const result = session.rewindToTurn(1); @@ -348,6 +351,7 @@ describe('Session', () => { { role: 'model', parts: [{ text: 'first reply' }] }, ]; vi.mocked(mockChat.getHistory).mockReturnValue(history); + vi.mocked(mockChat.getHistoryShallow).mockReturnValue(history); const result = session.rewindToTurn(0); @@ -356,9 +360,9 @@ describe('Session', () => { }); it('rejects unreachable user turns', () => { - vi.mocked(mockChat.getHistory).mockReturnValue([ - { role: 'user', parts: [{ text: 'first' }] }, - ]); + const history: Content[] = [{ role: 'user', parts: [{ text: 'first' }] }]; + vi.mocked(mockChat.getHistory).mockReturnValue(history); + vi.mocked(mockChat.getHistoryShallow).mockReturnValue(history); expect(() => session.rewindToTurn(2)).toThrow( 'Cannot rewind to the requested turn', @@ -853,6 +857,8 @@ describe('Session', () => { sendMessageStream: vi.fn().mockResolvedValue(createEmptyStream()), addHistory: vi.fn(), getHistory: vi.fn().mockReturnValue([]), + getHistoryShallow: vi.fn().mockReturnValue([]), + getLastModelMessageText: vi.fn().mockReturnValue(''), } as unknown as GeminiChat; mockChat.sendMessageStream = vi @@ -1207,6 +1213,8 @@ describe('Session', () => { sendMessageStream: vi.fn().mockResolvedValue(createEmptyStream()), addHistory: vi.fn(), getHistory: vi.fn().mockReturnValue([]), + getHistoryShallow: vi.fn().mockReturnValue([]), + getLastModelMessageText: vi.fn().mockReturnValue(''), } as unknown as GeminiChat; mockConfig.getSessionTokenLimit = vi.fn().mockReturnValue(100); mockGeminiClient.tryCompressChat @@ -1523,6 +1531,9 @@ describe('Session', () => { .mockReturnValue([ { role: 'model', parts: [{ text: 'response text' }] }, ]); + mockChat.getLastModelMessageText = vi + .fn() + .mockReturnValue('response text'); mockChat.sendMessageStream = vi .fn() .mockResolvedValueOnce(createEmptyStream()) @@ -1584,6 +1595,9 @@ describe('Session', () => { .mockReturnValue([ { role: 'model', parts: [{ text: 'response text' }] }, ]); + mockChat.getLastModelMessageText = vi + .fn() + .mockReturnValue('response text'); mockChat.sendMessageStream = vi .fn() .mockResolvedValueOnce(createEmptyStream()) @@ -1655,6 +1669,9 @@ describe('Session', () => { .mockReturnValue([ { role: 'model', parts: [{ text: 'response text' }] }, ]); + mockChat.getLastModelMessageText = vi + .fn() + .mockReturnValue('response text'); mockChat.sendMessageStream = vi .fn() .mockResolvedValue(createEmptyStream()); @@ -2405,6 +2422,9 @@ describe('Session', () => { .mockReturnValue([ { role: 'model', parts: [{ text: 'response text' }] }, ]); + mockChat.getLastModelMessageText = vi + .fn() + .mockReturnValue('response text'); mockChat.sendMessageStream = vi.fn().mockResolvedValue( createStreamWithChunks([ @@ -2458,6 +2478,9 @@ describe('Session', () => { .mockReturnValue([ { role: 'model', parts: [{ text: 'response text' }] }, ]); + mockChat.getLastModelMessageText = vi + .fn() + .mockReturnValue('response text'); mockChat.sendMessageStream = vi .fn() .mockResolvedValue(createEmptyStream()); @@ -2503,6 +2526,9 @@ describe('Session', () => { .mockReturnValue([ { role: 'model', parts: [{ text: 'response text' }] }, ]); + mockChat.getLastModelMessageText = vi + .fn() + .mockReturnValue('response text'); mockChat.sendMessageStream = vi .fn() .mockResolvedValue(createEmptyStream()); diff --git a/packages/cli/src/ui/commands/btwCommand.ts b/packages/cli/src/ui/commands/btwCommand.ts index 08af2a0ceef..0bba5f62f4a 100644 --- a/packages/cli/src/ui/commands/btwCommand.ts +++ b/packages/cli/src/ui/commands/btwCommand.ts @@ -55,7 +55,7 @@ function getBtwCacheSafeParams( geminiClient && typeof geminiClient === 'object' && typeof geminiClient.getChat === 'function' && - typeof geminiClient.getHistory === 'function' + typeof geminiClient.getHistoryTail === 'function' ) { const chat = geminiClient.getChat(); if (