From 687a35d3cc96cc0c4207f8c8643c42c7f6486d30 Mon Sep 17 00:00:00 2001
From: Drew Stone <drewstone329@gmail.com>
Date: Sat, 13 Jun 2026 18:59:20 -0600
Subject: [PATCH] =?UTF-8?q?fix(tool-loop):=20strict-model=20tool-call=20hi?=
 =?UTF-8?q?story=20(assistant+tool=5Fcalls=20/=20role:tool)=20=E2=80=94=20?=
 =?UTF-8?q?not=20a=20user-message=20fold?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

runToolLoop and streamToolLoop folded every tool outcome into a single
{ role: 'user', content: 'Tool results:\n…' } message and dropped the
assistant turn's tool_calls. A strict tool-history validator (Claude, and
any OpenAI-compatible provider that validates) can't see its own tool use
that way and re-issues the same call in a loop.

The loop now appends OpenAI function-calling shape: the assistant turn that
emitted the calls is preserved as an assistant message carrying its
tool_calls array (content null when the turn was tool-only), and each result
is its own { role: 'tool', tool_call_id, content } message keyed to its call.
A missing tool-call id is derived deterministically from the tool name so the
assistant entry and its result still match.

ToolLoopMessage widens additively: tool_calls / tool_call_id are optional and
content is string | null. A streamTurn that reads only role + content is
unaffected; one that forwards the whole message array to an OpenAI-compatible
endpoint now sends correct tool history.
---
 src/index.ts          |   1 +
 src/tool-loop.test.ts | 177 +++++++++++++++++++++++++++++++++++++-----
 src/tool-loop.ts      |  79 ++++++++++++++++---
 3 files changed, 227 insertions(+), 30 deletions(-)

diff --git a/src/index.ts b/src/index.ts
index 9eb0749f..d021b9cc 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -224,6 +224,7 @@ export {
   type StreamToolLoopYield,
   streamToolLoop,
   type ToolCallOutcome,
+  type ToolLoopAssistantToolCall,
   type ToolLoopCall,
   type ToolLoopEvent,
   type ToolLoopMessage,
diff --git a/src/tool-loop.test.ts b/src/tool-loop.test.ts
index 2af92276..97423db6 100644
--- a/src/tool-loop.test.ts
+++ b/src/tool-loop.test.ts
@@ -7,6 +7,7 @@ import {
   type ToolCallOutcome,
   type ToolLoopCall,
   type ToolLoopEvent,
+  type ToolLoopMessage,
 } from './tool-loop'
 
 const isExec = (n: string) => n === 'submit_proposal' || n === 'schedule_followup'
@@ -31,8 +32,8 @@ describe('runToolLoop', () => {
 
   it('executes a tool call, folds the result back, re-runs to the final answer', async () => {
     const calls: ToolLoopCall[] = []
-    const streamTurn = async function* (messages: Array<{ role: string; content: string }>) {
-      if (!messages.some((m) => m.content.includes('Tool results'))) {
+    const streamTurn = async function* (messages: ToolLoopMessage[]) {
+      if (!messages.some((m) => m.role === 'tool')) {
         yield { type: 'text', text: 'Routing. ' } as ToolLoopEvent
         yield {
           type: 'tool_call',
@@ -57,6 +58,113 @@ describe('runToolLoop', () => {
     expect(r.finalText).toBe('Routing. Done.')
   })
 
+  it('appends tool history in OpenAI shape: assistant+tool_calls then role:tool per result, no user fold', async () => {
+    const seen: ToolLoopMessage[][] = []
+    const streamTurn = async function* (messages: ToolLoopMessage[]) {
+      seen.push(messages)
+      if (!messages.some((m) => m.role === 'tool')) {
+        yield { type: 'text', text: 'Routing. ' } as ToolLoopEvent
+        yield {
+          type: 'tool_call',
+          call: { toolName: 'submit_proposal', toolCallId: 'p1', args: { title: 'A' } },
+        } as ToolLoopEvent
+        return
+      }
+      yield { type: 'text', text: 'Done.' } as ToolLoopEvent
+    }
+
+    await runToolLoop({
+      systemPrompt: 's',
+      userMessage: 'u',
+      streamTurn,
+      executeToolCall: async () => ({ ok: true, result: { id: 1 } }),
+      isExecutableTool: isExec,
+    })
+
+    // The second turn's history carries the model's own tool use in OpenAI shape.
+    const second = seen[1] ?? []
+    const assistant = second.find((m) => m.role === 'assistant' && m.tool_calls)
+    expect(assistant).toMatchObject({
+      role: 'assistant',
+      content: 'Routing.',
+      tool_calls: [{ id: 'p1', type: 'function', function: { name: 'submit_proposal' } }],
+    })
+    const tool = second.find((m) => m.role === 'tool')
+    expect(tool).toMatchObject({ role: 'tool', tool_call_id: 'p1' })
+    // The broken shape — a user message folding results — must not appear.
+    expect(
+      second.some((m) => m.role === 'user' && (m.content ?? '').includes('Tool results')),
+    ).toBe(false)
+  })
+
+  it('derives a stable tool_call_id from the tool name when the model omits one', async () => {
+    const seen: ToolLoopMessage[][] = []
+    const streamTurn = async function* (messages: ToolLoopMessage[]) {
+      seen.push(messages)
+      if (!messages.some((m) => m.role === 'tool')) {
+        yield {
+          type: 'tool_call',
+          call: { toolName: 'submit_proposal', args: { title: 'A' } },
+        } as ToolLoopEvent
+        return
+      }
+      yield { type: 'text', text: 'ok' } as ToolLoopEvent
+    }
+
+    await runToolLoop({
+      systemPrompt: 's',
+      userMessage: 'u',
+      streamTurn,
+      executeToolCall: async () => ({ ok: true, result: {} }),
+      isExecutableTool: isExec,
+    })
+
+    const second = seen[1] ?? []
+    const assistant = second.find((m) => m.role === 'assistant' && m.tool_calls)
+    const tool = second.find((m) => m.role === 'tool')
+    // A tool-only turn carries null content and a name-derived id matching its result.
+    expect(assistant?.content).toBeNull()
+    expect(assistant?.tool_calls?.[0]?.id).toBe('call_submit_proposal')
+    expect(tool?.tool_call_id).toBe('call_submit_proposal')
+  })
+
+  it('a strict model that re-issues on a user fold completes on the tool-history shape', async () => {
+    // This model mimics a strict tool-history validator (Claude, OpenAI-compat):
+    // if its own tool call is folded into a user message it cannot see the
+    // result and re-issues the same call forever; given the assistant+tool_calls
+    // / role:tool shape it reads the result back and finishes.
+    let calls = 0
+    const streamTurn = async function* (messages: ToolLoopMessage[]) {
+      const sawToolResult = messages.some((m) => m.role === 'tool')
+      const wasFoldedIntoUser = messages.some(
+        (m) => m.role === 'user' && (m.content ?? '').includes('Tool results'),
+      )
+      if (sawToolResult && !wasFoldedIntoUser) {
+        yield { type: 'text', text: 'Final answer.' } as ToolLoopEvent
+        return
+      }
+      // No readable result yet — re-issue the call (the looping pathology).
+      calls++
+      yield {
+        type: 'tool_call',
+        call: { toolName: 'submit_proposal', toolCallId: 'p1', args: { title: 'A' } },
+      } as ToolLoopEvent
+    }
+
+    const r = await runToolLoop({
+      systemPrompt: 's',
+      userMessage: 'u',
+      streamTurn,
+      executeToolCall: async () => ({ ok: true, result: { id: 1 } }),
+      isExecutableTool: isExec,
+    })
+
+    expect(r.stopReason).toBe('completed')
+    expect(r.finalText).toBe('Final answer.')
+    // One call, then it reads the result and stops — not a stuck loop.
+    expect(calls).toBe(1)
+  })
+
   it('ignores non-executable tool calls', async () => {
     const stream = async function* () {
       yield { type: 'text', text: 'done' } as ToolLoopEvent
@@ -196,8 +304,8 @@ describe('runToolLoop', () => {
   })
 
   it('turns an executor throw into a failed outcome', async () => {
-    const streamTurn = async function* (messages: Array<{ role: string; content: string }>) {
-      if (!messages.some((m) => m.content.includes('Tool results'))) {
+    const streamTurn = async function* (messages: ToolLoopMessage[]) {
+      if (!messages.some((m) => m.role === 'tool')) {
         yield {
           type: 'tool_call',
           call: { toolName: 'submit_proposal', args: {} },
@@ -224,8 +332,8 @@ describe('runToolLoop', () => {
 
   it('emits general before/after hook events around loop, turn, and tool call boundaries', async () => {
     const events: RuntimeHookEvent[] = []
-    const streamTurn = async function* (messages: Array<{ role: string; content: string }>) {
-      if (!messages.some((m) => m.content.includes('Tool results'))) {
+    const streamTurn = async function* (messages: ToolLoopMessage[]) {
+      if (!messages.some((m) => m.role === 'tool')) {
         yield {
           type: 'tool_call',
           call: { toolName: 'submit_proposal', toolCallId: 'p1', args: { title: 'A' } },
@@ -270,8 +378,8 @@ describe('runToolLoop', () => {
   it('emits one failure-recovery decision point before the next model turn', async () => {
     const order: string[] = []
     const points: RuntimeDecisionPoint[] = []
-    const streamTurn = async function* (messages: Array<{ role: string; content: string }>) {
-      if (!messages.some((m) => m.content.includes('Tool results'))) {
+    const streamTurn = async function* (messages: ToolLoopMessage[]) {
+      if (!messages.some((m) => m.role === 'tool')) {
         order.push('first-turn')
         yield { type: 'text', text: 'I will call the tool.' } as ToolLoopEvent
         yield {
@@ -331,8 +439,8 @@ describe('runToolLoop', () => {
 
   it('does not emit a failure-recovery decision point for successful tool results', async () => {
     const points: RuntimeDecisionPoint[] = []
-    const streamTurn = async function* (messages: Array<{ role: string; content: string }>) {
-      if (!messages.some((m) => m.content.includes('Tool results'))) {
+    const streamTurn = async function* (messages: ToolLoopMessage[]) {
+      if (!messages.some((m) => m.role === 'tool')) {
         yield {
           type: 'tool_call',
           call: { toolName: 'submit_proposal', args: {} },
@@ -360,8 +468,8 @@ describe('runToolLoop', () => {
 
   it('isolates hook failures from the tool loop', async () => {
     const hookErrors: string[] = []
-    const streamTurn = async function* (messages: Array<{ role: string; content: string }>) {
-      if (!messages.some((m) => m.content.includes('Tool results'))) {
+    const streamTurn = async function* (messages: ToolLoopMessage[]) {
+      if (!messages.some((m) => m.role === 'tool')) {
         yield {
           type: 'tool_call',
           call: { toolName: 'submit_proposal', args: {} },
@@ -437,10 +545,8 @@ describe('streamToolLoop', () => {
   }
 
   it('yields each raw event + each tool_result and drives the loop', async () => {
-    const streamTurn = async function* (
-      messages: Array<{ role: string; content: string }>,
-    ): AsyncIterable<Raw> {
-      if (!messages.some((m) => m.content.includes('Tool results'))) {
+    const streamTurn = async function* (messages: ToolLoopMessage[]): AsyncIterable<Raw> {
+      if (!messages.some((m) => m.role === 'tool')) {
         yield { type: 'text_delta', text: 'Routing. ' }
         yield { type: 'tool_call', toolName: 'submit_proposal', toolCallId: 'p1', args: {} }
         return
@@ -460,6 +566,39 @@ describe('streamToolLoop', () => {
     expect(ys.filter((y) => y.kind === 'tool_result').length).toBe(1)
   })
 
+  it('appends tool history in OpenAI shape (assistant+tool_calls then role:tool), no user fold', async () => {
+    const seen: ToolLoopMessage[][] = []
+    const streamTurn = async function* (messages: ToolLoopMessage[]): AsyncIterable<Raw> {
+      seen.push(messages)
+      if (!messages.some((m) => m.role === 'tool')) {
+        yield { type: 'text_delta', text: 'Routing. ' }
+        yield { type: 'tool_call', toolName: 'submit_proposal', toolCallId: 'p1', args: { a: 1 } }
+        return
+      }
+      yield { type: 'text_delta', text: 'Done.' }
+    }
+    for await (const _ of streamToolLoop<Raw>({
+      systemPrompt: 's',
+      userMessage: 'u',
+      streamTurn,
+      ...seams,
+      executeToolCall: async () => ({ ok: true, result: {} }),
+    })) {
+      // drain
+    }
+    const second = seen[1] ?? []
+    expect(second.find((m) => m.role === 'assistant' && m.tool_calls)).toMatchObject({
+      tool_calls: [{ id: 'p1', type: 'function', function: { name: 'submit_proposal' } }],
+    })
+    expect(second.find((m) => m.role === 'tool')).toMatchObject({
+      role: 'tool',
+      tool_call_id: 'p1',
+    })
+    expect(
+      second.some((m) => m.role === 'user' && (m.content ?? '').includes('Tool results')),
+    ).toBe(false)
+  })
+
   it('emits one capped signal with backstop stopReason when the model never stops', async () => {
     const streamTurn = async function* (): AsyncIterable<Raw> {
       yield { type: 'tool_call', toolName: 'submit_proposal', args: {} }
@@ -499,10 +638,8 @@ describe('streamToolLoop', () => {
 
   it('emits a failure-recovery decision point without changing streamed yields', async () => {
     const points: RuntimeDecisionPoint[] = []
-    const streamTurn = async function* (
-      messages: Array<{ role: string; content: string }>,
-    ): AsyncIterable<Raw> {
-      if (!messages.some((m) => m.content.includes('Tool results'))) {
+    const streamTurn = async function* (messages: ToolLoopMessage[]): AsyncIterable<Raw> {
+      if (!messages.some((m) => m.role === 'tool')) {
         yield { type: 'text_delta', text: 'Trying. ' }
         yield { type: 'tool_call', toolName: 'submit_proposal', toolCallId: 'p1', args: {} }
         return
diff --git a/src/tool-loop.ts b/src/tool-loop.ts
index 0f0d86e1..71daa2f9 100644
--- a/src/tool-loop.ts
+++ b/src/tool-loop.ts
@@ -41,7 +41,62 @@ const FAILURE_RECOVERY_ACTIONS = ['retry', 'verify', 'continue', 'stop']
  *  stuck-loop detection. The window resets on any different call. */
 const STUCK_LOOP_THRESHOLD = 3
 
-export type ToolLoopMessage = { role: string; content: string }
+/** One OpenAI-shaped tool-call entry carried on an assistant message. */
+export interface ToolLoopAssistantToolCall {
+  id: string
+  type: 'function'
+  function: { name: string; arguments: string }
+}
+
+/**
+ * A message in the running conversation the loop sends to `streamTurn`.
+ *
+ * The base `{ role, content }` covers `system` / `user` / plain `assistant`
+ * turns. Two optional fields carry the OpenAI function-calling contract so a
+ * strict model (Claude, and any OpenAI-compatible provider that validates tool
+ * history) reads its own tool use back instead of re-issuing the same call:
+ *
+ *   - an assistant turn that emitted tool calls carries `tool_calls`, and its
+ *     `content` is `null` when the turn was tool-only;
+ *   - each tool result is its own `{ role: 'tool', tool_call_id, content }`
+ *     message keyed to the call that produced it.
+ *
+ * Widening is additive: a `streamTurn` that reads only `role` + `content` still
+ * works; one that forwards the whole message to an OpenAI-compatible endpoint
+ * now sends correct tool history.
+ */
+export type ToolLoopMessage = {
+  role: string
+  content: string | null
+  tool_calls?: ToolLoopAssistantToolCall[]
+  tool_call_id?: string
+}
+
+/** A tool-call id is required to key a `role: 'tool'` result back to its call.
+ *  When the model omitted one, derive a stable id from the tool name so the
+ *  assistant `tool_calls` entry and its `tool` result still match. */
+function toolCallId(call: ToolLoopCall): string {
+  return call.toolCallId ?? `call_${call.toolName}`
+}
+
+/** The assistant turn that emitted `pending`, in OpenAI shape: text content
+ *  (null when the turn was tool-only) plus its `tool_calls` array. */
+function assistantToolCallMessage(turnText: string, pending: ToolLoopCall[]): ToolLoopMessage {
+  return {
+    role: 'assistant',
+    content: turnText.trim() || null,
+    tool_calls: pending.map((call) => ({
+      id: toolCallId(call),
+      type: 'function',
+      function: { name: call.toolName, arguments: JSON.stringify(call.args) },
+    })),
+  }
+}
+
+/** One `role: 'tool'` result message keyed to its call by `tool_call_id`. */
+function toolResultMessage(call: ToolLoopCall, content: string): ToolLoopMessage {
+  return { role: 'tool', tool_call_id: toolCallId(call), content }
+}
 
 function defaultRender(label: string, outcome: ToolCallOutcome): string {
   if (outcome.ok) return `- ${label} → ok: ${JSON.stringify(outcome.result)}`
@@ -158,8 +213,9 @@ export async function runToolLoop(opts: RunToolLoopOptions): Promise<ToolLoopRes
       return { finalText, toolResults, turns, stopReason: 'backstop', cappedOut: true }
     }
 
-    if (turnText.trim()) messages.push({ role: 'assistant', content: turnText })
-    const lines: string[] = []
+    // The assistant turn that emitted the calls, carrying its tool_calls array,
+    // so a strict model reads its own tool use back in OpenAI shape.
+    messages.push(assistantToolCallMessage(turnText, pending))
     const outcomes: ExecutedToolCall[] = []
     for (const [callIndex, call] of pending.entries()) {
       // Stuck-loop detection: hash the first pending call each turn (a model
@@ -198,6 +254,7 @@ export async function runToolLoop(opts: RunToolLoopOptions): Promise<ToolLoopRes
         if (accumulatedCostUsd >= opts.maxCostUsd) {
           const label = labelFor(call)
           toolResults.push({ call, label, outcome })
+          messages.push(toolResultMessage(call, render(label, outcome)))
           observer.toolCallAfter(toolTurn, callEventId, call, outcome)
           observer.turnAfter(toolTurn, turnEventId, {
             pendingToolCalls: pending.length,
@@ -211,8 +268,9 @@ export async function runToolLoop(opts: RunToolLoopOptions): Promise<ToolLoopRes
       const label = labelFor(call)
       const rendered = render(label, outcome)
       toolResults.push({ call, label, outcome })
-      lines.push(rendered)
       outcomes.push({ call, label, outcome, rendered })
+      // One role:'tool' message per result, keyed to its call by tool_call_id.
+      messages.push(toolResultMessage(call, rendered))
       observer.toolCallAfter(toolTurn, callEventId, call, outcome)
     }
     observer.failureRecovery({
@@ -230,7 +288,6 @@ export async function runToolLoop(opts: RunToolLoopOptions): Promise<ToolLoopRes
       })),
       failedToolCalls: outcomes.filter((item) => !item.outcome.ok).length,
     })
-    messages.push({ role: 'user', content: `Tool results:\n${lines.join('\n')}` })
   }
   observer.loopAfter({ turns, toolResults: toolResults.length, stopReason: 'completed' })
   return { finalText, toolResults, turns, stopReason: 'completed', cappedOut: false }
@@ -329,8 +386,9 @@ export async function* streamToolLoop<Raw>(
       return
     }
 
-    if (turnText.trim()) messages.push({ role: 'assistant', content: turnText })
-    const lines: string[] = []
+    // The assistant turn that emitted the calls, carrying its tool_calls array,
+    // so a strict model reads its own tool use back in OpenAI shape.
+    messages.push(assistantToolCallMessage(turnText, pending))
     const outcomes: ExecutedToolCall[] = []
     for (const [callIndex, call] of pending.entries()) {
       // Stuck-loop detection.
@@ -375,6 +433,7 @@ export async function* streamToolLoop<Raw>(
             label,
             outcome,
           }
+          messages.push(toolResultMessage(call, render(label, outcome)))
           observer.toolCallAfter(toolTurn, callEventId, call, outcome)
           observer.turnAfter(toolTurn, turnEventId, {
             pendingToolCalls: pending.length,
@@ -395,8 +454,9 @@ export async function* streamToolLoop<Raw>(
         outcome,
       }
       const rendered = render(label, outcome)
-      lines.push(rendered)
       outcomes.push({ call, label, outcome, rendered })
+      // One role:'tool' message per result, keyed to its call by tool_call_id.
+      messages.push(toolResultMessage(call, rendered))
       observer.toolCallAfter(toolTurn, callEventId, call, outcome)
     }
     observer.failureRecovery({
@@ -414,7 +474,6 @@ export async function* streamToolLoop<Raw>(
       })),
       failedToolCalls: outcomes.filter((item) => !item.outcome.ok).length,
     })
-    messages.push({ role: 'user', content: `Tool results:\n${lines.join('\n')}` })
   }
 }
 
@@ -663,7 +722,7 @@ function renderDecisionContext(
   turnText: string,
   outcomes: ExecutedToolCall[],
 ): string {
-  const recent = messages.slice(-6).map((message) => `[${message.role}]\n${message.content}`)
+  const recent = messages.slice(-6).map((message) => `[${message.role}]\n${message.content ?? ''}`)
   const assistant = turnText.trim() ? [`[assistant]\n${turnText}`] : []
   const toolResults = [`[tool results]\n${outcomes.map((item) => item.rendered).join('\n')}`]
   return trimText(