diff --git a/packages/cli/src/acp-integration/session/Session.test.ts b/packages/cli/src/acp-integration/session/Session.test.ts index 750b5e34869..be6763a98a5 100644 --- a/packages/cli/src/acp-integration/session/Session.test.ts +++ b/packages/cli/src/acp-integration/session/Session.test.ts @@ -196,6 +196,8 @@ describe('Session', () => { sendMessageStream: vi.fn(), addHistory: vi.fn(), getHistory: vi.fn().mockReturnValue([]), + getHistoryShallow: vi.fn().mockReturnValue([]), + getLastModelMessageText: vi.fn().mockReturnValue(''), setHistory: vi.fn(), truncateHistory: vi.fn(), stripThoughtsFromHistory: vi.fn(), @@ -329,6 +331,7 @@ describe('Session', () => { { role: 'model', parts: [{ text: 'second reply' }] }, ]; vi.mocked(mockChat.getHistory).mockReturnValue(history); + vi.mocked(mockChat.getHistoryShallow).mockReturnValue(history); const result = session.rewindToTurn(1); @@ -348,6 +351,7 @@ describe('Session', () => { { role: 'model', parts: [{ text: 'first reply' }] }, ]; vi.mocked(mockChat.getHistory).mockReturnValue(history); + vi.mocked(mockChat.getHistoryShallow).mockReturnValue(history); const result = session.rewindToTurn(0); @@ -356,9 +360,9 @@ describe('Session', () => { }); it('rejects unreachable user turns', () => { - vi.mocked(mockChat.getHistory).mockReturnValue([ - { role: 'user', parts: [{ text: 'first' }] }, - ]); + const history: Content[] = [{ role: 'user', parts: [{ text: 'first' }] }]; + vi.mocked(mockChat.getHistory).mockReturnValue(history); + vi.mocked(mockChat.getHistoryShallow).mockReturnValue(history); expect(() => session.rewindToTurn(2)).toThrow( 'Cannot rewind to the requested turn', @@ -853,6 +857,8 @@ describe('Session', () => { sendMessageStream: vi.fn().mockResolvedValue(createEmptyStream()), addHistory: vi.fn(), getHistory: vi.fn().mockReturnValue([]), + getHistoryShallow: vi.fn().mockReturnValue([]), + getLastModelMessageText: vi.fn().mockReturnValue(''), } as unknown as GeminiChat; mockChat.sendMessageStream = vi @@ -1207,6 +1213,8 @@ describe('Session', () => { sendMessageStream: vi.fn().mockResolvedValue(createEmptyStream()), addHistory: vi.fn(), getHistory: vi.fn().mockReturnValue([]), + getHistoryShallow: vi.fn().mockReturnValue([]), + getLastModelMessageText: vi.fn().mockReturnValue(''), } as unknown as GeminiChat; mockConfig.getSessionTokenLimit = vi.fn().mockReturnValue(100); mockGeminiClient.tryCompressChat @@ -1523,6 +1531,9 @@ describe('Session', () => { .mockReturnValue([ { role: 'model', parts: [{ text: 'response text' }] }, ]); + mockChat.getLastModelMessageText = vi + .fn() + .mockReturnValue('response text'); mockChat.sendMessageStream = vi .fn() .mockResolvedValueOnce(createEmptyStream()) @@ -1584,6 +1595,9 @@ describe('Session', () => { .mockReturnValue([ { role: 'model', parts: [{ text: 'response text' }] }, ]); + mockChat.getLastModelMessageText = vi + .fn() + .mockReturnValue('response text'); mockChat.sendMessageStream = vi .fn() .mockResolvedValueOnce(createEmptyStream()) @@ -1655,6 +1669,9 @@ describe('Session', () => { .mockReturnValue([ { role: 'model', parts: [{ text: 'response text' }] }, ]); + mockChat.getLastModelMessageText = vi + .fn() + .mockReturnValue('response text'); mockChat.sendMessageStream = vi .fn() .mockResolvedValue(createEmptyStream()); @@ -2405,6 +2422,9 @@ describe('Session', () => { .mockReturnValue([ { role: 'model', parts: [{ text: 'response text' }] }, ]); + mockChat.getLastModelMessageText = vi + .fn() + .mockReturnValue('response text'); mockChat.sendMessageStream = vi.fn().mockResolvedValue( createStreamWithChunks([ @@ -2458,6 +2478,9 @@ describe('Session', () => { .mockReturnValue([ { role: 'model', parts: [{ text: 'response text' }] }, ]); + mockChat.getLastModelMessageText = vi + .fn() + .mockReturnValue('response text'); mockChat.sendMessageStream = vi .fn() .mockResolvedValue(createEmptyStream()); @@ -2503,6 +2526,9 @@ describe('Session', () => { .mockReturnValue([ { role: 'model', parts: [{ text: 'response text' }] }, ]); + mockChat.getLastModelMessageText = vi + .fn() + .mockReturnValue('response text'); mockChat.sendMessageStream = vi .fn() .mockResolvedValue(createEmptyStream()); diff --git a/packages/cli/src/acp-integration/session/Session.ts b/packages/cli/src/acp-integration/session/Session.ts index df4be4e7df3..38b6ef652a6 100644 --- a/packages/cli/src/acp-integration/session/Session.ts +++ b/packages/cli/src/acp-integration/session/Session.ts @@ -402,7 +402,7 @@ export class Session implements SessionContext { } const chat = this.config.getGeminiClient()!.getChat(); - const apiHistory = chat.getHistory(); + const apiHistory = chat.getHistoryShallow(); const apiTruncateIndex = this.#computeApiTruncationIndexForUserTurn( apiHistory, targetTurnIndex, @@ -895,16 +895,10 @@ export class Session implements SessionContext { return { stopReason: 'end_turn' }; } - // Get response text from the chat history - const history = this.#getCurrentChat().getHistory(); - const lastModelMessage = history - .filter((msg: Content) => msg.role === 'model') - .pop(); + // Extract last model text without cloning the full history. const responseText = - lastModelMessage?.parts - ?.filter((p: Part): p is { text: string } & Part => 'text' in p) - .map((p: { text: string }) => p.text) - .join('') || '[no response text]'; + this.#getCurrentChat().getLastModelMessageText?.() || + '[no response text]'; const response = await messageBus.request< HookExecutionRequest, diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx index 6fbc512dd2f..e8362122bb7 100644 --- a/packages/cli/src/ui/AppContainer.tsx +++ b/packages/cli/src/ui/AppContainer.tsx @@ -2108,10 +2108,9 @@ export const AppContainer = (props: AppContainerProps) => { const ac = new AbortController(); suggestionAbortRef.current = ac; - // Use curated history to avoid invalid/empty entries causing API errors - const fullHistory = geminiClient.getChat().getHistory(true); - const conversationHistory = - fullHistory.length > 40 ? fullHistory.slice(-40) : fullHistory; + // Only clone the tail — full structuredClone of a large resumed session + // causes transient heap peaks that trigger OOM (#4624). + const conversationHistory = geminiClient.getHistoryTail(40, true); generatePromptSuggestion(config, conversationHistory, ac.signal, { enableCacheSharing: settings.merged.ui?.enableCacheSharing === true, }) @@ -2463,7 +2462,7 @@ export const AppContainer = (props: AppContainerProps) => { apiTruncateIndex = computeApiTruncationIndex( historyManager.history, userItem.id, - geminiClient.getHistory(), + geminiClient.getHistoryShallow(), ); if (apiTruncateIndex < 0) { historyManager.addItem( diff --git a/packages/cli/src/ui/commands/btwCommand.test.ts b/packages/cli/src/ui/commands/btwCommand.test.ts index 5aebbbb1a3e..83424c6cf84 100644 --- a/packages/cli/src/ui/commands/btwCommand.test.ts +++ b/packages/cli/src/ui/commands/btwCommand.test.ts @@ -176,6 +176,11 @@ describe('btwCommand', () => { .mockReturnValue([ { role: 'user', parts: [{ text: '杭州天气如何?' }] }, ]), + getHistoryTail: vi + .fn() + .mockReturnValue([ + { role: 'user', parts: [{ text: '杭州天气如何?' }] }, + ]), getChat: vi.fn().mockReturnValue({ getGenerationConfig: vi.fn().mockReturnValue({ systemInstruction: 'You are helpful', @@ -229,6 +234,10 @@ describe('btwCommand', () => { { role: 'user', parts: [{ text: '杭州天气如何?' }] }, { role: 'user', parts: [{ text: '请顺便解释一下湿度怎么看' }] }, ]), + getHistoryTail: vi.fn().mockReturnValue([ + { role: 'user', parts: [{ text: '杭州天气如何?' }] }, + { role: 'user', parts: [{ text: '请顺便解释一下湿度怎么看' }] }, + ]), getChat: vi.fn().mockReturnValue({ getGenerationConfig: vi.fn().mockReturnValue({ systemInstruction: 'live system prompt', diff --git a/packages/cli/src/ui/commands/btwCommand.ts b/packages/cli/src/ui/commands/btwCommand.ts index 801b6a275f4..0bba5f62f4a 100644 --- a/packages/cli/src/ui/commands/btwCommand.ts +++ b/packages/cli/src/ui/commands/btwCommand.ts @@ -55,7 +55,7 @@ function getBtwCacheSafeParams( geminiClient && typeof geminiClient === 'object' && typeof geminiClient.getChat === 'function' && - typeof geminiClient.getHistory === 'function' + typeof geminiClient.getHistoryTail === 'function' ) { const chat = geminiClient.getChat(); if ( @@ -65,12 +65,7 @@ function getBtwCacheSafeParams( ) { const generationConfig = chat.getGenerationConfig(); if (generationConfig) { - const fullHistory = geminiClient.getHistory(true); - const maxHistoryEntries = 40; - const history = - fullHistory.length > maxHistoryEntries - ? fullHistory.slice(-maxHistoryEntries) - : fullHistory; + const history = geminiClient.getHistoryTail(40, true); return { generationConfig, diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index 91272d1e4de..bae8e2dfacf 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -2462,7 +2462,7 @@ export const useGeminiStream = ( const toolName = toolCall.request.name; const fileName = path.basename(filePath); const toolCallWithSnapshotFileName = `${timestamp}-${fileName}-${toolName}.json`; - const clientHistory = await geminiClient?.getHistory(); + const clientHistory = geminiClient?.getHistoryShallow(); const toolCallWithSnapshotFilePath = path.join( checkpointDir, toolCallWithSnapshotFileName, diff --git a/packages/core/src/services/sessionRecap.ts b/packages/core/src/services/sessionRecap.ts index 4f0ba75da60..147f2594f4f 100644 --- a/packages/core/src/services/sessionRecap.ts +++ b/packages/core/src/services/sessionRecap.ts @@ -49,7 +49,7 @@ export async function generateSessionRecap( const geminiClient = config.getGeminiClient(); if (!geminiClient) return null; - const fullHistory = geminiClient.getChat().getHistory(); + const fullHistory = geminiClient.getHistoryShallow(); if (fullHistory.length < 2) return null; const dialog = filterToDialog(fullHistory); diff --git a/packages/core/src/services/sessionTitle.test.ts b/packages/core/src/services/sessionTitle.test.ts index 05bb83b9db4..2da674f754d 100644 --- a/packages/core/src/services/sessionTitle.test.ts +++ b/packages/core/src/services/sessionTitle.test.ts @@ -31,6 +31,7 @@ function makeConfig(opts: MockOptions): { getFastModel: vi.fn(() => opts.fastModel ?? undefined), getModel: vi.fn(() => 'qwen-plus'), getGeminiClient: vi.fn(() => ({ + getHistoryShallow: () => opts.history ?? [], getChat: () => ({ getHistory: () => opts.history ?? [], }), @@ -202,7 +203,10 @@ describe('tryGenerateSessionTitle', () => { getFastModel: vi.fn(() => 'qwen-turbo'), getModel: vi.fn(() => 'qwen-plus'), getGeminiClient: vi.fn(() => ({ - getChat: () => ({ getHistory: () => history }), + getHistoryShallow: () => history, + getChat: () => ({ + getHistory: () => history, + }), })), getBaseLlmClient: vi.fn(() => ({ generateJson })), } as unknown as Config; @@ -240,7 +244,10 @@ describe('tryGenerateSessionTitle', () => { getFastModel: vi.fn(() => 'qwen-turbo'), getModel: vi.fn(() => 'qwen-plus'), getGeminiClient: vi.fn(() => ({ - getChat: () => ({ getHistory: () => history }), + getHistoryShallow: () => history, + getChat: () => ({ + getHistory: () => history, + }), })), getBaseLlmClient: vi.fn(() => ({ generateJson })), } as unknown as Config; diff --git a/packages/core/src/services/sessionTitle.ts b/packages/core/src/services/sessionTitle.ts index 331b5e86361..b82c2028ad4 100644 --- a/packages/core/src/services/sessionTitle.ts +++ b/packages/core/src/services/sessionTitle.ts @@ -113,7 +113,7 @@ export async function tryGenerateSessionTitle( const geminiClient = config.getGeminiClient(); if (!geminiClient) return { ok: false, reason: 'no_client' }; - const fullHistory = geminiClient.getChat().getHistory(); + const fullHistory = geminiClient.getHistoryShallow(); if (fullHistory.length < 2) return { ok: false, reason: 'empty_history' }; const dialog = filterToDialog(fullHistory);