diff --git a/packages/core/src/core/openaiContentGenerator/converter.test.ts b/packages/core/src/core/openaiContentGenerator/converter.test.ts index 039c4733875..85810759c8a 100644 --- a/packages/core/src/core/openaiContentGenerator/converter.test.ts +++ b/packages/core/src/core/openaiContentGenerator/converter.test.ts @@ -1474,6 +1474,504 @@ describe('OpenAIContentConverter', () => { }); }); + it('should drop tool responses that are not adjacent to their assistant tool call', () => { + const request: GenerateContentParameters = { + model: 'models/test', + contents: [ + { + role: 'model', + parts: [ + { + functionCall: { + id: 'call_a', + name: 'read_file', + args: { path: 'a.txt' }, + }, + }, + { + functionCall: { + id: 'call_b', + name: 'grep', + args: { pattern: 'needle' }, + }, + }, + ], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + id: 'call_a', + name: 'read_file', + response: { output: 'A' }, + }, + }, + ], + }, + { + role: 'user', + parts: [{ text: 'history text inserted between tool results' }], + }, + { + role: 'model', + parts: [ + { + functionCall: { + id: 'call_c', + name: 'list_files', + args: {}, + }, + }, + ], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + id: 'call_c', + name: 'list_files', + response: { output: 'C' }, + }, + }, + { + functionResponse: { + id: 'call_b', + name: 'grep', + response: { output: 'B' }, + }, + }, + ], + }, + ], + }; + + const messages = converter.convertGeminiRequestToOpenAI( + request, + requestContext, + ); + const assistantWithCallA = messages.find( + (message): message is OpenAI.Chat.ChatCompletionAssistantMessageParam => + message.role === 'assistant' && + 'tool_calls' in message && + Array.isArray(message.tool_calls) && + message.tool_calls.some((toolCall) => toolCall.id === 'call_a'), + ); + + expect( + assistantWithCallA?.tool_calls?.map((toolCall) => toolCall.id), + ).toEqual(['call_a']); + + const toolCallIds = messages + .filter( + (message): message is OpenAI.Chat.ChatCompletionToolMessageParam => + message.role === 'tool' && 'tool_call_id' in message, + ) + .map((message) => message.tool_call_id); + + expect(toolCallIds).toEqual(['call_a', 'call_c']); + }); + + it('should keep assistant text when all tool calls are orphaned', () => { + const request: GenerateContentParameters = { + model: 'models/test', + contents: [ + { + role: 'model', + parts: [ + { text: 'I can answer without the tool.' }, + { + functionCall: { + id: 'call_missing', + name: 'read_file', + args: { path: 'missing.txt' }, + }, + }, + ], + }, + { + role: 'user', + parts: [{ text: 'continue' }], + }, + ], + }; + + const messages = converter.convertGeminiRequestToOpenAI( + request, + requestContext, + ); + const assistant = messages.find( + (message): message is OpenAI.Chat.ChatCompletionAssistantMessageParam => + message.role === 'assistant', + ); + + expect(assistant?.content).toBe('I can answer without the tool.'); + expect('tool_calls' in (assistant ?? {})).toBe(false); + expect(messages.some((message) => message.role === 'tool')).toBe(false); + }); + + it('should drop assistant-only tool calls when all responses are orphaned', () => { + const request: GenerateContentParameters = { + model: 'models/test', + contents: [ + { + role: 'model', + parts: [ + { + functionCall: { + id: 'call_missing', + name: 'read_file', + args: { path: 'missing.txt' }, + }, + }, + ], + }, + { + role: 'user', + parts: [{ text: 'break adjacency' }], + }, + { + role: 'user', + parts: [{ text: 'continue' }], + }, + ], + }; + + const messages = converter.convertGeminiRequestToOpenAI( + request, + requestContext, + ); + + expect(messages.some((message) => message.role === 'assistant')).toBe( + false, + ); + expect(messages.some((message) => message.role === 'tool')).toBe(false); + }); + + it('should keep a tool response after an empty-id tool message', () => { + const request: GenerateContentParameters = { + model: 'models/test', + contents: [ + { + role: 'model', + parts: [ + { + functionCall: { + id: 'call_a', + name: 'read_file', + args: { path: 'a.txt' }, + }, + }, + ], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'empty_id', + response: { output: 'no id' }, + }, + }, + { + functionResponse: { + id: 'call_a', + name: 'read_file', + response: { output: 'A' }, + }, + }, + ], + }, + ], + }; + + const messages = converter.convertGeminiRequestToOpenAI( + request, + requestContext, + ); + const toolCallIds = messages + .filter( + (message): message is OpenAI.Chat.ChatCompletionToolMessageParam => + message.role === 'tool' && 'tool_call_id' in message, + ) + .map((message) => message.tool_call_id); + + expect(toolCallIds).toEqual(['call_a']); + }); + + it('should clean after merging consecutive assistant turns', () => { + const request: GenerateContentParameters = { + model: 'models/test', + contents: [ + { + role: 'model', + parts: [ + { + functionCall: { + id: 'call_a', + name: 'read_file', + args: { path: 'a.txt' }, + }, + }, + ], + }, + { + role: 'model', + parts: [{ text: 'A short follow-up.' }], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + id: 'call_a', + name: 'read_file', + response: { output: 'A' }, + }, + }, + ], + }, + ], + }; + + const messages = converter.convertGeminiRequestToOpenAI( + request, + requestContext, + ); + + expect(messages[0]).toMatchObject({ + role: 'assistant', + content: 'A short follow-up.', + }); + expect( + ( + messages[0] as OpenAI.Chat.ChatCompletionAssistantMessageParam + ).tool_calls?.map((toolCall) => toolCall.id), + ).toEqual(['call_a']); + expect(messages[1]).toMatchObject({ + role: 'tool', + tool_call_id: 'call_a', + }); + }); + + it('should keep split media after all adjacent tool responses across content items', () => { + const request: GenerateContentParameters = { + model: 'models/test', + contents: [ + { + role: 'model', + parts: [ + { + functionCall: { id: 'call_a', name: 'shot_a', args: {} }, + }, + { + functionCall: { id: 'call_b', name: 'shot_b', args: {} }, + }, + ], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + id: 'call_a', + name: 'shot_a', + response: { output: 'A' }, + parts: [ + { inlineData: { mimeType: 'image/png', data: 'aaa' } }, + ], + }, + }, + ], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + id: 'call_b', + name: 'shot_b', + response: { output: 'B' }, + parts: [ + { inlineData: { mimeType: 'image/png', data: 'bbb' } }, + ], + }, + }, + ], + }, + ], + }; + const strictContext: RequestContext = { + ...requestContext, + splitToolMedia: true, + }; + + const messages = converter.convertGeminiRequestToOpenAI( + request, + strictContext, + ); + const assistantIndex = messages.findIndex( + (message) => message.role === 'assistant', + ); + + expect(messages[assistantIndex + 1]).toMatchObject({ + role: 'tool', + tool_call_id: 'call_a', + }); + expect(messages[assistantIndex + 2]).toMatchObject({ + role: 'tool', + tool_call_id: 'call_b', + }); + expect(messages[assistantIndex + 3]?.role).toBe('user'); + expect(messages[assistantIndex + 4]?.role).toBe('user'); + }); + + it('should not keep split media from orphaned tool responses', () => { + const request: GenerateContentParameters = { + model: 'models/test', + contents: [ + { + role: 'model', + parts: [ + { + functionCall: { id: 'call_a', name: 'shot_a', args: {} }, + }, + ], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + id: 'call_x', + name: 'shot_x', + response: { output: 'X' }, + parts: [ + { inlineData: { mimeType: 'image/png', data: 'xxx' } }, + ], + }, + }, + ], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + id: 'call_a', + name: 'shot_a', + response: { output: 'A' }, + }, + }, + ], + }, + ], + }; + const strictContext: RequestContext = { + ...requestContext, + splitToolMedia: true, + }; + + const messages = converter.convertGeminiRequestToOpenAI( + request, + strictContext, + ); + + expect(messages.map((message) => message.role)).toEqual([ + 'assistant', + 'tool', + ]); + expect(messages[1]).toMatchObject({ + role: 'tool', + tool_call_id: 'call_a', + }); + }); + + it('should merge assistant turns created by orphan cleanup', () => { + const request: GenerateContentParameters = { + model: 'models/test', + contents: [ + { + role: 'model', + parts: [ + { + functionCall: { id: 'call_a', name: 'read_file', args: {} }, + }, + ], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + id: 'call_a', + name: 'read_file', + response: { output: 'A' }, + }, + }, + ], + }, + { + role: 'model', + parts: [{ text: 'Next I will call another tool.' }], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + id: 'call_orphan', + name: 'stale_tool', + response: { output: 'stale' }, + }, + }, + ], + }, + { + role: 'model', + parts: [ + { + functionCall: { id: 'call_b', name: 'grep', args: {} }, + }, + ], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + id: 'call_b', + name: 'grep', + response: { output: 'B' }, + }, + }, + ], + }, + ], + }; + + const messages = converter.convertGeminiRequestToOpenAI( + request, + requestContext, + ); + + for (let index = 1; index < messages.length; index += 1) { + expect([messages[index - 1].role, messages[index].role]).not.toEqual([ + 'assistant', + 'assistant', + ]); + } + expect( + messages + .filter( + (message): message is OpenAI.Chat.ChatCompletionToolMessageParam => + message.role === 'tool' && 'tool_call_id' in message, + ) + .map((message) => message.tool_call_id), + ).toEqual(['call_a', 'call_b']); + }); + describe('assistant message with reasoning-only content (issue #3421)', () => { /** * Regression tests for https://github.com/QwenLM/qwen-code/issues/3421 @@ -1514,6 +2012,41 @@ describe('OpenAIContentConverter', () => { ).toBe('I reasoned about it.'); }); + it('should keep reasoning content when orphaned tool calls are removed', () => { + const request: GenerateContentParameters = { + model: 'models/test', + contents: [ + { + role: 'model', + parts: [ + { text: 'I need to inspect this.', thought: true }, + { + functionCall: { + id: 'call_missing', + name: 'read_file', + args: {}, + }, + }, + ], + }, + { role: 'user', parts: [{ text: 'break adjacency' }] }, + ], + }; + + const messages = converter.convertGeminiRequestToOpenAI( + request, + requestContext, + ); + + const assistantMsg = messages.find((m) => m.role === 'assistant'); + expect(assistantMsg).toBeDefined(); + expect((assistantMsg as { content: unknown }).content).toBe(''); + expect( + (assistantMsg as { reasoning_content?: string }).reasoning_content, + ).toBe('I need to inspect this.'); + expect('tool_calls' in (assistantMsg ?? {})).toBe(false); + }); + it('should keep content null when assistant has only tool_calls and no reasoning', () => { const request: GenerateContentParameters = { model: 'models/test', diff --git a/packages/core/src/core/openaiContentGenerator/converter.ts b/packages/core/src/core/openaiContentGenerator/converter.ts index d0892d09de0..b0161807475 100644 --- a/packages/core/src/core/openaiContentGenerator/converter.ts +++ b/packages/core/src/core/openaiContentGenerator/converter.ts @@ -29,6 +29,7 @@ import { } from '../../utils/schemaConverter.js'; const debugLogger = createDebugLogger('CONVERTER'); +const SPLIT_TOOL_MEDIA_TEXT = '(attached media from previous tool call)'; /** * Extended usage type that supports both OpenAI standard format and alternative formats @@ -380,11 +381,11 @@ export function convertGeminiRequestToOpenAI( // Handle contents processContents(request.contents, messages, requestContext); - // Clean up orphaned tool calls and merge consecutive assistant messages + messages = mergeConsecutiveAssistantMessages(messages); if (options.cleanOrphanToolCalls) { messages = cleanOrphanedToolCalls(messages); + messages = mergeConsecutiveAssistantMessages(messages); } - messages = mergeConsecutiveAssistantMessages(messages); return messages; } @@ -645,7 +646,7 @@ function processContent( content: [ { type: 'text', - text: '(attached media from previous tool call)', + text: SPLIT_TOOL_MEDIA_TEXT, }, ...accumulatedSplitMedia, ] as unknown as OpenAI.Chat.ChatCompletionContentPartText[], @@ -1387,51 +1388,126 @@ function mapGeminiFinishReasonToOpenAI( } } +/** Type guard: is this an assistant message with at least one tool call? */ +function hasToolCalls( + message: OpenAI.Chat.ChatCompletionMessageParam, +): message is OpenAI.Chat.ChatCompletionAssistantMessageParam & { + tool_calls: OpenAI.Chat.ChatCompletionMessageToolCall[]; +} { + return ( + message.role === 'assistant' && + 'tool_calls' in message && + Array.isArray(message.tool_calls) && + message.tool_calls.length > 0 + ); +} + +function isSplitToolMediaMessage( + message: OpenAI.Chat.ChatCompletionMessageParam, +): boolean { + if ( + message.role !== 'user' || + !('content' in message) || + !Array.isArray(message.content) + ) { + return false; + } + + const firstPart = message.content[0] as + | { type?: string; text?: string } + | undefined; + return firstPart?.type === 'text' && firstPart.text === SPLIT_TOOL_MEDIA_TEXT; +} + /** * Clean up orphaned tool calls from message history to prevent OpenAI API errors. + * + * Assumes consecutive assistant messages have already been merged. */ function cleanOrphanedToolCalls( messages: OpenAI.Chat.ChatCompletionMessageParam[], ): OpenAI.Chat.ChatCompletionMessageParam[] { const cleaned: OpenAI.Chat.ChatCompletionMessageParam[] = []; - const toolCallIds = new Set(); - const toolResponseIds = new Set(); + const adjacentToolResponseIdsByAssistant = new Map>(); + const validToolResponseIndexesByAssistant = new Map(); + const splitMediaIndexesByAssistant = new Map(); + const emittedWithAssistant = new Set(); + + for (let index = 0; index < messages.length; index += 1) { + const message = messages[index]; + if (hasToolCalls(message)) { + const toolCallIds = new Set( + message.tool_calls + .map((toolCall) => toolCall.id) + .filter((id): id is string => Boolean(id)), + ); + const adjacentToolResponseIds = new Set(); + const toolResponseIndexes: number[] = []; + const splitMediaIndexes: number[] = []; + let lastToolResponseMatchesAssistant = false; + + for ( + let nextIndex = index + 1; + nextIndex < messages.length; + nextIndex += 1 + ) { + const nextMessage = messages[nextIndex]; + if (nextMessage.role === 'tool' && 'tool_call_id' in nextMessage) { + if (!nextMessage.tool_call_id) { + lastToolResponseMatchesAssistant = false; + continue; + } - // First pass: collect all tool call IDs and tool response IDs - for (const message of messages) { - if ( - message.role === 'assistant' && - 'tool_calls' in message && - message.tool_calls - ) { - for (const toolCall of message.tool_calls) { - if (toolCall.id) { - toolCallIds.add(toolCall.id); + if (toolCallIds.has(nextMessage.tool_call_id)) { + adjacentToolResponseIds.add(nextMessage.tool_call_id); + toolResponseIndexes.push(nextIndex); + lastToolResponseMatchesAssistant = true; + } else { + lastToolResponseMatchesAssistant = false; + } + + // Other tool responses in this block may belong to another assistant. + continue; + } + + if (isSplitToolMediaMessage(nextMessage)) { + if (lastToolResponseMatchesAssistant) { + splitMediaIndexes.push(nextIndex); + } + continue; + } + + if (nextMessage.role === 'assistant' && !hasToolCalls(nextMessage)) { + // Consecutive assistant turns are merged before cleanup. + continue; } + + break; } - } else if ( - message.role === 'tool' && - 'tool_call_id' in message && - message.tool_call_id - ) { - toolResponseIds.add(message.tool_call_id); + + adjacentToolResponseIdsByAssistant.set(index, adjacentToolResponseIds); + validToolResponseIndexesByAssistant.set(index, toolResponseIndexes); + splitMediaIndexesByAssistant.set(index, splitMediaIndexes); } } - // Second pass: filter out orphaned messages - for (const message of messages) { - if ( - message.role === 'assistant' && - 'tool_calls' in message && - message.tool_calls - ) { - // Filter out tool calls that don't have corresponding responses + for (let index = 0; index < messages.length; index += 1) { + if (emittedWithAssistant.has(index)) { + continue; + } + + const message = messages[index]; + if (hasToolCalls(message)) { + const reasoningContent = ( + message as ExtendedChatCompletionAssistantMessageParam + ).reasoning_content; + const adjacentToolResponseIds = + adjacentToolResponseIdsByAssistant.get(index) ?? new Set(); const validToolCalls = message.tool_calls.filter( - (toolCall) => toolCall.id && toolResponseIds.has(toolCall.id), + (toolCall) => toolCall.id && adjacentToolResponseIds.has(toolCall.id), ); if (validToolCalls.length > 0) { - // Keep the message but only with valid tool calls const cleanedMessage = { ...message }; ( cleanedMessage as OpenAI.Chat.ChatCompletionMessageParam & { @@ -1439,103 +1515,56 @@ function cleanOrphanedToolCalls( } ).tool_calls = validToolCalls; cleaned.push(cleanedMessage); - } else if ( - typeof message.content === 'string' && - message.content.trim() - ) { - // Keep the message if it has text content, but remove tool calls - const cleanedMessage = { ...message }; - delete ( - cleanedMessage as OpenAI.Chat.ChatCompletionMessageParam & { - tool_calls?: OpenAI.Chat.ChatCompletionMessageToolCall[]; - } - ).tool_calls; - cleaned.push(cleanedMessage); - } - // If no valid tool calls and no content, skip the message entirely - } else if ( - message.role === 'tool' && - 'tool_call_id' in message && - message.tool_call_id - ) { - // Only keep tool responses that have corresponding tool calls - if (toolCallIds.has(message.tool_call_id)) { - cleaned.push(message); - } - } else { - // Keep all other messages as-is - cleaned.push(message); - } - } - - // Final validation: ensure every assistant message with tool_calls has corresponding tool responses - const finalCleaned: OpenAI.Chat.ChatCompletionMessageParam[] = []; - const finalToolCallIds = new Set(); - // Collect all remaining tool call IDs - for (const message of cleaned) { - if ( - message.role === 'assistant' && - 'tool_calls' in message && - message.tool_calls - ) { - for (const toolCall of message.tool_calls) { - if (toolCall.id) { - finalToolCallIds.add(toolCall.id); + for (const toolResponseIndex of validToolResponseIndexesByAssistant.get( + index, + ) ?? []) { + const toolResponse = messages[toolResponseIndex]; + if (toolResponse) { + cleaned.push(toolResponse); + emittedWithAssistant.add(toolResponseIndex); + } } - } - } - } - - // Verify all tool calls have responses - const finalToolResponseIds = new Set(); - for (const message of cleaned) { - if ( - message.role === 'tool' && - 'tool_call_id' in message && - message.tool_call_id - ) { - finalToolResponseIds.add(message.tool_call_id); - } - } - - // Remove any remaining orphaned tool calls - for (const message of cleaned) { - if ( - message.role === 'assistant' && - 'tool_calls' in message && - message.tool_calls - ) { - const finalValidToolCalls = message.tool_calls.filter( - (toolCall) => toolCall.id && finalToolResponseIds.has(toolCall.id), - ); - if (finalValidToolCalls.length > 0) { - const cleanedMessage = { ...message }; - ( - cleanedMessage as OpenAI.Chat.ChatCompletionMessageParam & { - tool_calls?: OpenAI.Chat.ChatCompletionMessageToolCall[]; + for (const splitMediaIndex of splitMediaIndexesByAssistant.get(index) ?? + []) { + const splitMediaMessage = messages[splitMediaIndex]; + if (splitMediaMessage) { + cleaned.push(splitMediaMessage); + emittedWithAssistant.add(splitMediaIndex); } - ).tool_calls = finalValidToolCalls; - finalCleaned.push(cleanedMessage); + } } else if ( - typeof message.content === 'string' && - message.content.trim() + (typeof message.content === 'string' && message.content.trim()) || + reasoningContent ) { + // Keep text/reasoning content, but remove orphaned tool calls. const cleanedMessage = { ...message }; delete ( cleanedMessage as OpenAI.Chat.ChatCompletionMessageParam & { tool_calls?: OpenAI.Chat.ChatCompletionMessageToolCall[]; } ).tool_calls; - finalCleaned.push(cleanedMessage); + cleaned.push(cleanedMessage); + } else { + debugLogger.debug( + `cleanOrphanedToolCalls: dropping assistant with ${message.tool_calls.length} orphaned tool call(s) and no text/reasoning content`, + ); } + } else if (message.role === 'tool' && 'tool_call_id' in message) { + debugLogger.debug( + `cleanOrphanedToolCalls: dropping orphaned tool response ${message.tool_call_id || ''}`, + ); + } else if (isSplitToolMediaMessage(message)) { + debugLogger.debug( + 'cleanOrphanedToolCalls: dropping orphaned split tool media message', + ); } else { - finalCleaned.push(message); + cleaned.push(message); } } - return finalCleaned; + return cleaned; } /**