diff --git a/CLAUDE.md b/CLAUDE.md
index f3bba77a..ea7a28cc 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -62,7 +62,7 @@ Types that stay in THIS repo because they're runtime-shaped (coupled to a runnin
 - `run-loop.ts` — `runLoop`, the round-synchronous leaf kernel. Per round: `driver.plan()`→N tasks→one sandbox/iteration (bounded by `maxConcurrency`, round-robin `agentRuns`)→`streamPrompt`→`output.parse`→`validator.validate`→`driver.decide`. Owns iteration accounting, concurrency, abort, cost+token aggregation, trace emission, box teardown. Exports `defaultSelectWinner` (best-valid-score, ties→earliest) — the single-sourced selection the personify combinators reuse.
 - `supervise/` — the recursive execution atom (keystone): `Scope` + `Supervisor` over the open `Executor` port, spawn/settle on a **conserved budget pool** so equal-compute holds by construction; journal→replay/resume. `runtime.ts` also holds `createExecutor({backend})` — the ONE built-in executor (backend-as-data: `router`/`router-tools`/`bridge`/`cli`/`sandbox`; `router-tools` is the off-box tool-using agentic loop — chat→tool_calls→`executeToolCall`→repeat — over the router's tool-calling, no sandbox); the per-backend bodies are internal case-arms, BYO agents implement `Executor` directly.
 - `personify/` — the content-free generic combinators (`fanout`/`loopUntil`/`widen`/`panel`/`verify`/`pipeline`) + `definePersona`/`runPersonified` + the cross-run `Corpus` + `createScopeAnalyst` (the analyst-on-scope steer firewall).
-- the **agent-driver** is the canonical "drive an agent" path: an `AgentProfile` driving another `AgentProfile` via the coordination toolbox (`createCoordinationTools`, `src/mcp/tools/coordination.ts`) over the `Scope`/`Supervisor`, plus `runAgentic`/`defineStrategy`/`runPersonified` (`strategy.ts`/`personify/persona.ts`) on the Supervisor. Child→parent messages ride ONE typed pipe — `createEventBus` (`supervise/event-bus.ts`): settled outputs, `ask_parent` questions, and analyst findings are all `CoordinationEvent` kinds, delivered pass-through (`subscribe`/`onEvent`, immediate) AND queued for the driver to pull (`await_event`, kind-filterable; `await_next` is the settled-only view). The pull queue is **priority-ordered** — a blocking question (urgency→priority: `blocks-run`=20/`blocks-step`=10) is bumped ahead of queued settles/findings; ties FIFO by `seq`. Observability is first-class: every event is stamped (`seq`/`at`/`priority`), the full `history()` is an audit/replay trail, `stats()` counts throughput (both surfaced on `CoordinationTools` and the MCP handle). `analyzeOnSettle` auto-fires trace analysts when a worker settles `done`, re-entering each result as a `finding` on the same bus (cost-governed opt-in; the firewall stays in the analyst registry). The in-process queue and a future cross-box durable mailbox share this one interface. `assertTraceDerivedFindings` (`personify/analyst.ts`) is the steer-firewall (selector≠judge). `types.ts` holds `Driver`/`AgentRunSpec`/`OutputAdapter`/`Validator`/`Iteration`/`LoopResult`/`SandboxClient` + the `LoopTraceEvent` union. `sandbox-run.ts` is `openSandboxRun` — the one run/stream/resume sandbox seam; `inline-sandbox-client.ts` is `inlineSandboxClient` — the one adapter presenting any non-box `Executor` as a `SandboxClient` for `runLoop`. `loop-dispatch.ts` adapts `runLoop`→agent-eval campaigns; `report-usage.ts` forwards token usage so the integrity guard sees a real backend.
+- the **agent-driver** is the canonical "drive an agent" path: an `AgentProfile` driving another `AgentProfile` via the coordination toolbox (`createCoordinationTools`, `src/mcp/tools/coordination.ts`) over the `Scope`/`Supervisor`, plus `runAgentic`/`defineStrategy`/`runPersonified` (`strategy.ts`/`personify/persona.ts`) on the Supervisor. Child→parent messages ride ONE typed pipe — `createEventBus` (`supervise/event-bus.ts`): settled outputs, `ask_parent` questions, and analyst findings are all `CoordinationEvent` kinds, delivered pass-through (`subscribe`/`onEvent`, immediate) AND queued for the driver to pull (`await_event({kinds?})` — the ONE wait verb; `kinds:['settled']` = next finished worker, omit = also questions/findings). The pull queue is **priority-ordered** — a blocking question (urgency→priority: `blocks-run`=20/`blocks-step`=10) is bumped ahead of queued settles/findings; ties FIFO by `seq`. The bus is **bidirectional**: UP (settled/question/finding) is queued+pullable; DOWN (`steer_worker` for any live worker — instruction/correction/continuation; `answer_question` routes an answer down) goes to the child inbox via `scope.send`→`deliver` AND records a `queue:false` event (history + subscribers, never pulled back). The receive end is `createInbox` (`supervise/inbox.ts`), which the owned tool-loop executor (`routerToolsInlineExecutor`) exposes as `Executor.deliver`: QUEUED messages flush at each step boundary AND before the worker may settle (it can't finish with an unread steer); a FORCEFUL `steer_worker({interrupt:true})` aborts the in-flight turn so the worker re-plans immediately. Black-box CLI harnesses can't be interrupted mid-step, so there the down-leg degrades to the next spawn. Observability is first-class: every event both ways is stamped (`seq`/`at`/`priority`), the full `history()` is an audit/replay trail, `stats()` counts throughput (both surfaced on `CoordinationTools` and the MCP handle). `analyzeOnSettle` auto-fires trace analysts when a worker settles `done`, re-entering each result as a `finding` on the same bus (cost-governed opt-in; the firewall stays in the analyst registry). Trace analysis is **substrate-agnostic** via `TraceSource` (`supervise/trace-source.ts`) — a worker's tool calls as agent-eval `ToolSpan`s from EITHER an owned loop (`createPushTraceSource`; `routerToolsInlineExecutor`'s `onToolStep` feeds `record`) OR a sandbox/fleet box (`sandboxSessionTraceSource(box, sessionId)` decodes `box.messages()` session parts; `decodeToolPart` is defensive across OpenAI + harness shapes). Two consumers ride a source: ONLINE `watchTrace` (`detector-monitor.ts`) folds live spans through agent-eval's published streaming kernel (`repeatedActionDetector`/`errorStreakDetector`, the SAME kernel `control-runtime` folds) → `onSignal` → a `finding`; SETTLE `analyzeTrace` (`trajectory-recorder.ts`) collects the spans and runs the published BATCH analyzers (`buildTrajectory`/`stuckLoopView`/`toolWasteView`). `ToolSpan` is the common currency; detection logic + the failure taxonomy live in agent-eval — never reimplement here. Production target = sandbox/fleet; the owned-loop push path is for local/router/cli-bridge. The in-process queue and a future cross-box durable mailbox share this one interface. `assertTraceDerivedFindings` (`personify/analyst.ts`) is the steer-firewall (selector≠judge). `types.ts` holds `Driver`/`AgentRunSpec`/`OutputAdapter`/`Validator`/`Iteration`/`LoopResult`/`SandboxClient` + the `LoopTraceEvent` union. `sandbox-run.ts` is `openSandboxRun` — the one run/stream/resume sandbox seam; `inline-sandbox-client.ts` is `inlineSandboxClient` — the one adapter presenting any non-box `Executor` as a `SandboxClient` for `runLoop`. `loop-dispatch.ts` adapts `runLoop`→agent-eval campaigns; `report-usage.ts` forwards token usage so the integrity guard sees a real backend.
 
 Two substrates coexist for the same "recursive agent decision" atom: the round-synchronous `runLoop` kernel (the leaf, what most sandbox benches drive today) and the reactive `Scope`/`Supervisor`+combinators (the canonical core — the agent-driver, `runAgentic`/`defineStrategy`/`runPersonified`). Prefer the latter for new recursive/keystone work. Both run over the one `Executor` port.
 
diff --git a/bench/src/atom-humaneval.mts b/bench/src/atom-humaneval.mts
index 64c43f5f..43d4d6e4 100644
--- a/bench/src/atom-humaneval.mts
+++ b/bench/src/atom-humaneval.mts
@@ -143,7 +143,7 @@ function humanEvalWorker(task: HumanEvalTask, label: string): Agent<unknown, unk
   }
 }
 
-const driverSystem = `You are an orchestrator driving worker agents to solve a Python coding task. You do NOT write code yourself. Each worker independently attempts the task and is graded by a deterministic, hidden test suite. Tools: spawn_worker (dispatch one attempt; the "profile" argument may be {} and "task" a short note), await_next (collect the next settled worker — its result tells you valid:true if its tests PASSED, valid:false if they failed), and stopping (reply with NO tool call) once a worker has DELIVERED. Spawn one worker, await it; if it delivered, stop; if not, spawn another, up to ${K} workers total. You cannot declare success yourself — only a delivered (valid:true) worker counts.`
+const driverSystem = `You are an orchestrator driving worker agents to solve a Python coding task. You do NOT write code yourself. Each worker independently attempts the task and is graded by a deterministic, hidden test suite. Tools: spawn_worker (dispatch one attempt; the "profile" argument may be {} and "task" a short note), await_event (collect the next settled worker — its result tells you valid:true if its tests PASSED, valid:false if they failed), and stopping (reply with NO tool call) once a worker has DELIVERED. Spawn one worker, await it; if it delivered, stop; if not, spawn another, up to ${K} workers total. You cannot declare success yourself — only a delivered (valid:true) worker counts.`
 
 interface TaskOutcome {
   taskId: string
diff --git a/bench/src/atom-mcp-e2e.mts b/bench/src/atom-mcp-e2e.mts
index 3da19796..0c588607 100644
--- a/bench/src/atom-mcp-e2e.mts
+++ b/bench/src/atom-mcp-e2e.mts
@@ -175,7 +175,7 @@ async function main(): Promise<void> {
           messages: [
             {
               role: 'user',
-              content: `${TASK}\n\nYou are a SUPERVISOR. You have the "supervise" skill and a "coordination" MCP with tools spawn_worker, await_next, stop. Do NOT write code yourself. Author a worker profile (a JSON object with name + a rich systemPrompt telling the worker exactly what to implement) and call spawn_worker with it, then await_next, and stop once a worker delivered (valid:true).`,
+              content: `${TASK}\n\nYou are a SUPERVISOR. You have the "supervise" skill and a "coordination" MCP with tools spawn_worker, await_event, stop. Do NOT write code yourself. Author a worker profile (a JSON object with name + a rich systemPrompt telling the worker exactly what to implement) and call spawn_worker with it, then await_event, and stop once a worker delivered (valid:true).`,
             },
           ],
           cwd: supCwd,
diff --git a/bench/src/mcp-mount-probe.mts b/bench/src/mcp-mount-probe.mts
index a9b39382..699e1408 100644
--- a/bench/src/mcp-mount-probe.mts
+++ b/bench/src/mcp-mount-probe.mts
@@ -3,7 +3,7 @@
  * actually MOUNT my coordination MCP and CALL spawn_worker — landing on a real Scope.spawn?
  *
  * Serves the coordination MCP over a live Scope, then asks the bridge's opencode (with that MCP in
- * its config) to call spawn_worker + await_next. If the Scope spawned+settled, the in-box driving
+ * its config) to call spawn_worker + await_event. If the Scope spawned+settled, the in-box driving
  * path is real. No mock.
  *
  *   ROUTER_BASE=http://127.0.0.1:3355/v1 TANGLE_API_KEY=<bridge-bearer> \
@@ -86,9 +86,9 @@ async function main(): Promise<void> {
             {
               role: 'user',
               content:
-                'You have an MCP server named "coordination" with tools: spawn_worker, await_next, stop. ' +
-                'Call spawn_worker with arguments {"profile":{},"task":"hello"}. Then call await_next. ' +
-                'Then reply with exactly what await_next returned.',
+                'You have an MCP server named "coordination" with tools: spawn_worker, await_event, stop. ' +
+                'Call spawn_worker with arguments {"profile":{},"task":"hello"}. Then call await_event. ' +
+                'Then reply with exactly what await_event returned.',
             },
           ],
           mcp.url,
diff --git a/bench/src/profiles.ts b/bench/src/profiles.ts
index 877db868..00ae9838 100644
--- a/bench/src/profiles.ts
+++ b/bench/src/profiles.ts
@@ -21,7 +21,7 @@ export const OPERATOR_TOOLS = [
   'run_analyst', // run an analyst over a worker's trace → findings (selector≠judge: trace, not score)
   'observe_worker', // a worker's in-flight trace, or its last finished episode/shot
   'spawn_worker', // start a worker (or a sub-analyst) — drive many; parallelize when independent
-  'steer_worker', // send a running/parked worker its next instruction / an interrupt
+  'steer_worker', // send a live worker a message down: instruction, course-correction, or continuation (interrupt? for forceful)
   'stop', // declare the task complete (verified) or abandon a line
 ] as const
 
@@ -95,7 +95,7 @@ export const driverProfile: RoleProfile = {
     '  analysts are cheap; make them when a worker’s failure mode needs a focused lens.',
     '- observe_worker(worker): the worker’s IN-FLIGHT trace if it is still running, else its last',
     '  finished episode/shot.',
-    '- spawn_worker(profile, task) / steer_worker(worker, instruction) / stop.',
+    '- spawn_worker(profile, task) / steer_worker(worker, instruction, interrupt?) / stop.',
     '- the artifact’s own tools (read/edit/run) — use them to inspect the workspace and to contribute',
     '  decisive work yourself.',
     '',
diff --git a/docs/architecture-visual.md b/docs/architecture-visual.md
index bb5c6b58..a619e8e3 100644
--- a/docs/architecture-visual.md
+++ b/docs/architecture-visual.md
@@ -107,7 +107,7 @@ that keeps it honest.
                         ▼
         Scope: spawn child agent(s) → run → settle → verdict on the artifact
                         │
-                        └──▶ await_next → terminal? → winner = argmax(valid score)
+                        └──▶ await_event → terminal? → winner = argmax(valid score)
 ```
 
 The firewall is the load-bearing line: the **analyst reads the trace and may not cite the score**, so
diff --git a/docs/execution-model.md b/docs/execution-model.md
index 2639d69e..8b2ff9e2 100644
--- a/docs/execution-model.md
+++ b/docs/execution-model.md
@@ -49,11 +49,11 @@ Before, each bench hand-rolled its own pseudo-box client. Now there is **one exe
                     │  each round it decides the TOPOLOGY MOVE ─────┐ this IS
                     │   refine │ fanout │ select │ stop          │ │ "topology grown
                     │  then drives workers via the toolbox:      │ │  by LLM decision"
-                    │   spawn_worker · await_next · steer_worker │ │ (driver.ts:52)
+                    │   spawn_worker · await_event · steer_worker │ │ (driver.ts:52)
                     └───────────────┬────────────────────────────┘ │
        spawn_worker(profile,task) ──┤  reserves budget (fails       │
        steer_worker(id,msg) ────────┤  CLOSED if the pool is dry)   │
-       await_next ──────────────────┘                               │
+       await_event ──────────────────┘                               │
                     ┌───────────────┼───────────────┐               │
                     ▼               ▼                ▼               │
              ┌───────────┐   ┌───────────┐   ┌───────────┐          │
@@ -113,7 +113,7 @@ Before, each bench hand-rolled its own pseudo-box client. Now there is **one exe
         └─ 4. settle  ──►  pool.reconcile(ticket, actualSpend)
                                             │
                                             ▼
-                              await_next wakes the driver with this child's result
+                              await_event wakes the driver with this child's result
 ```
 
 **Net:** the "unified thing" is the `Executor` port. Everything that runs work — a router call, a cli-bridge turn, a `claude -p` subprocess, a full sandbox rollout, or a BYO agent — is an `Executor`, chosen by data via `createExecutor`, metered by one budget pool. Drivers and workers are both `act`s over that port; the only structural difference is the driver carries the operator toolbox (so it can spawn/steer) and the worker does not.
diff --git a/docs/glossary.md b/docs/glossary.md
index 9ce1a335..ba8db08e 100644
--- a/docs/glossary.md
+++ b/docs/glossary.md
@@ -28,12 +28,12 @@ Two substrates run the same "recursive agent decision" atom — the round-synchr
 
 ## Topology (how the shape grows — by LLM decision, not a fixed script)
 
-The shape grows by LLM decision through the **coordination toolbox** over a live `Scope`: the driver `AgentProfile` calls `spawn_worker` (branch), `await_next` (react), `steer_worker` (interrupt), `stop` — and `runAgentic`/`defineStrategy` package the common depth/breadth shapes on the Supervisor.
+The shape grows by LLM decision through the **coordination toolbox** over a live `Scope`: the driver `AgentProfile` calls `spawn_worker` (branch), `await_event` (react), `steer_worker` (interrupt), `stop` — and `runAgentic`/`defineStrategy` package the common depth/breadth shapes on the Supervisor.
 
 | Term | Meaning | Anchor |
 |---|---|---|
 | **Strategy** (`sample`/`refine`) | A `defineStrategy(name, body)` value run through the Supervisor as one recursive `Agent.act`: `sample` = breadth/best-of-N, `refine` = depth/iterate-with-feedback. The harness-verified topology, NOT a fixed script. | `strategy.ts` (`defineStrategy`, `sample`, `refine`) |
-| **Coordination toolbox** | The driver's per-step move set as MCP tools over a live `Scope`: `spawn_worker` (branch N) · `await_next` (react) · `steer_worker` (interrupt) · `observe_worker` · `stop`. This **is** "topology grown through LLM decisions". | `mcp/tools/coordination.ts` (`createCoordinationTools`) |
+| **Coordination toolbox** | The driver's per-step move set as MCP tools over a live `Scope`: `spawn_worker` (branch N) · `await_event` (react) · `steer_worker` (interrupt) · `observe_worker` · `stop`. This **is** "topology grown through LLM decisions". | `mcp/tools/coordination.ts` (`createCoordinationTools`) |
 | **AnalystFn / `critique`** | `(history, task?) → correction`. The firewalled steer — trajectory in, never the score. `llmAnalyst` (one router call); the strategy author calls it via `ctx.critique`. | `bench/src/sandbox-run.ts:50,58` (`llmAnalyst`); `strategy.ts` (`ctx.critique`) |
 
 ## The executor port (the unified execution seam)
@@ -61,8 +61,8 @@ The shape grows by LLM decision through the **coordination toolbox** over a live
 | Term | Meaning | Anchor |
 |---|---|---|
 | **Agent.act** | The recursive atom: `act(task, scope) → Out`. A driver IS an `act` that spawns into its `scope`; replay-safe. The Supervisor calls `root.act(task, scope)`. | `supervise/types.ts:50`; `supervisor.ts:145` |
-| **Coordination toolbox ("Scope-as-MCP")** | The operator/driver verbs exposed as MCP tools over a live `Scope`: `spawn_worker`→`scope.spawn`, `await_next`→`scope.next` (the wake event), `steer_worker`→`scope.send` (chat/interrupt a running child), `observe_worker`→`scope.view`, `stop`, `list_analysts`/`run_analyst`. **Built + tested**, public on the `./mcp` subpath. This is how an LLM driver spawns and talks to its sub-agents. | `mcp/tools/coordination.ts`; tests `tests/loops/coordination.test.ts` |
+| **Coordination toolbox ("Scope-as-MCP")** | The operator/driver verbs exposed as MCP tools over a live `Scope`: `spawn_worker`→`scope.spawn`, `await_event`→`scope.next` (the wake event), `steer_worker`→`scope.send` (chat/interrupt a running child), `observe_worker`→`scope.view`, `stop`, `list_analysts`/`run_analyst`. **Built + tested**, public on the `./mcp` subpath. This is how an LLM driver spawns and talks to its sub-agents. | `mcp/tools/coordination.ts`; tests `tests/loops/coordination.test.ts` |
 | **Scope.send / deliver** | The "steer a live worker" verb the toolbox's `steer_worker` binds to: `scope.send(nodeId, msg)` → child executor's `deliver()` inbox. **In-process binding is real**; the cross-box (A2A) binding is task #13. | `supervise/scope.ts:290` |
 | **Agent Bus / A2A** | The cross-process agent↔agent transport for the same verbs — **designed, not adopted**. The in-process toolbox works today; this is the unfinished edge. | task #13; `docs/agent-bus-protocol.md` |
 
-**One agent CALLING another** today = the coordination toolbox (`spawn_worker`/`steer_worker`/`await_next`) over a live `Scope`, in-process — real and tested. The cross-box transport (A2A) is the thin part. The dominant *control* model is **topology-by-LLM-decision** (the driver's coordination-tool moves, packaged as `runAgentic`/`defineStrategy` shapes). `src/conversation/` is multi-*turn*, not agent-to-agent.
+**One agent CALLING another** today = the coordination toolbox (`spawn_worker`/`steer_worker`/`await_event`) over a live `Scope`, in-process — real and tested. The cross-box transport (A2A) is the thin part. The dominant *control* model is **topology-by-LLM-decision** (the driver's coordination-tool moves, packaged as `runAgentic`/`defineStrategy` shapes). `src/conversation/` is multi-*turn*, not agent-to-agent.
diff --git a/package.json b/package.json
index 91ff0110..053a581b 100644
--- a/package.json
+++ b/package.json
@@ -106,7 +106,7 @@
   },
   "devDependencies": {
     "@biomejs/biome": "^2.4.15",
-    "@tangle-network/agent-eval": "^0.92.0",
+    "@tangle-network/agent-eval": "^0.93.0",
     "@tangle-network/sandbox": "^0.6.0",
     "@types/node": "^25.9.3",
     "playwright": "^1.61.0",
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 488953e0..867449b8 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -16,8 +16,8 @@ importers:
         specifier: ^2.4.15
         version: 2.4.15
       '@tangle-network/agent-eval':
-        specifier: ^0.92.0
-        version: 0.92.0(@tangle-network/sandbox@0.6.1(viem@2.48.8(typescript@5.9.3)(zod@4.4.2)))(typescript@5.9.3)
+        specifier: ^0.93.0
+        version: 0.93.0(@tangle-network/sandbox@0.6.1(viem@2.48.8(typescript@5.9.3)(zod@4.4.2)))(typescript@5.9.3)
       '@tangle-network/sandbox':
         specifier: ^0.6.0
         version: 0.6.1(viem@2.48.8(typescript@5.9.3)(zod@4.4.2))
@@ -467,8 +467,8 @@ packages:
     engines: {node: '>=20'}
     hasBin: true
 
-  '@tangle-network/agent-eval@0.92.0':
-    resolution: {integrity: sha512-Sj/ejnn74ILewRM+48gkWbZnEInB0xQgF6bJC5DwpVeMdankuCuljZVR34Y66Ba+eg+YwNxf3BJUEX0aV4LB5w==}
+  '@tangle-network/agent-eval@0.93.0':
+    resolution: {integrity: sha512-EPpluyrHTPnBQxi2Mbx6LGkmPLndk8XhsY4iPIxxhuv23GGoL6FYvrwPq/eu+uxTwmfIVwdJsKchk2nj/ke1tA==}
     engines: {node: '>=20'}
     hasBin: true
     peerDependencies:
@@ -1394,7 +1394,7 @@ snapshots:
       - typescript
       - utf-8-validate
 
-  '@tangle-network/agent-eval@0.92.0(@tangle-network/sandbox@0.6.1(viem@2.48.8(typescript@5.9.3)(zod@4.4.2)))(typescript@5.9.3)':
+  '@tangle-network/agent-eval@0.93.0(@tangle-network/sandbox@0.6.1(viem@2.48.8(typescript@5.9.3)(zod@4.4.2)))(typescript@5.9.3)':
     dependencies:
       '@asteasolutions/zod-to-openapi': 8.5.0(zod@4.4.3)
       '@ax-llm/ax': 19.0.45(zod@4.4.3)
diff --git a/skills/loop-writer/SKILL.md b/skills/loop-writer/SKILL.md
index aa563980..254d3770 100644
--- a/skills/loop-writer/SKILL.md
+++ b/skills/loop-writer/SKILL.md
@@ -110,7 +110,7 @@ const result = await createSupervisor<Task, Output>().run(driver, task, supervis
 ```
 
 When the driver lives in a sandbox, expose the same verbs through
-`createCoordinationTools`: `spawn_worker`, `await_next`, `observe_worker`,
+`createCoordinationTools`: `spawn_worker`, `await_event`, `observe_worker`,
 `steer_worker`, `list_questions`, `answer_question`, `ask_parent`, `stop`, and
 optional analyst tools.
 
diff --git a/skills/supervise/SKILL.md b/skills/supervise/SKILL.md
index ffe5889b..35239342 100644
--- a/skills/supervise/SKILL.md
+++ b/skills/supervise/SKILL.md
@@ -15,7 +15,7 @@ You are a supervisor. You do NOT do the work yourself — you design and drive s
    - `skills` — the skill files the worker should carry (by name), OR `systemPrompt` — rich, specific instructions for this sub-task.
    - `model` — the model best suited to this sub-task (optional).
    Write the instructions a power user would write — never a one-liner. **Never spawn a worker with an empty profile.** The quality of the worker is the quality of the profile you author.
-3. **Await** each worker with `await_next`. Its result reports `valid: true` only if the worker's deployable check passed.
+3. **Await** each worker with `await_event`. Its result reports `valid: true` only if the worker's deployable check passed.
 4. **On failure**, author a *new* worker whose profile names the specific failure and how to fix it — never blindly retry the same profile.
 5. **Stop** (reply with no tool call) once the work is delivered. Only a delivered (`valid: true`) worker counts; you cannot declare done yourself.
 
diff --git a/src/mcp/tools/coordination.ts b/src/mcp/tools/coordination.ts
index 41239fbd..fbb7cf1f 100644
--- a/src/mcp/tools/coordination.ts
+++ b/src/mcp/tools/coordination.ts
@@ -17,7 +17,7 @@ import type {
 import { type BusRecord, type BusStats, createEventBus } from '../../runtime/supervise/event-bus'
 import type { McpToolDescriptor } from '../server'
 
-/** A worker the driver has drained via `await_next`. */
+/** A worker the driver has drained via `await_event`. */
 export interface SettledWorker {
   readonly id: string
   readonly status: 'done' | 'down'
@@ -71,12 +71,23 @@ export interface AnalystFindingEvent {
   readonly findings: unknown
 }
 
-/** Every message a worker/sub-driver/analyst sends up to the driver — the one typed pipe. New kinds
- *  are additive: a `{type:'question'}` consumer keeps matching. */
+/** A parent→child message (the down-leg): recorded for observability, delivered via the child inbox,
+ *  never pulled back by the parent. `delivered` mirrors whether the live child accepted it. */
+export interface DownMessageEvent {
+  readonly toWorker: string
+  readonly instruction: string
+  readonly delivered: boolean
+}
+
+/** Every message on the one typed pipe. UP (child→parent): question / settled / finding — queued for
+ *  the driver to `pull`. DOWN (parent→child): steer / answer — record-only (history + subscribers),
+ *  routed to the child inbox. New kinds are additive. */
 export type CoordinationEvent =
   | { readonly type: 'question'; readonly question: QuestionRecord }
   | { readonly type: 'settled'; readonly worker: SettledWorker }
   | { readonly type: 'finding'; readonly finding: AnalystFindingEvent }
+  | { readonly type: 'steer'; readonly down: DownMessageEvent }
+  | { readonly type: 'answer'; readonly down: DownMessageEvent; readonly questionId: string }
 
 export type MakeWorkerAgent = (profile: unknown) => SuperviseAgent<unknown, unknown>
 
@@ -101,11 +112,16 @@ export interface CoordinationTools {
   stopReason(): string | undefined
   settled(): ReadonlyArray<SettledWorker>
   questions(): ReadonlyArray<QuestionRecord>
-  /** The full ordered log of every bus event (settled / question / finding) — the observability
-   *  audit + replay trail. Each record carries seq, timestamp, and priority. */
+  /** The full ordered log of every bus event — UP (settled / question / finding) and DOWN
+   *  (steer / answer) — the observability audit + replay trail. Each record carries seq,
+   *  timestamp, and priority. */
   history(): ReadonlyArray<BusRecord<CoordinationEvent>>
   /** Bus throughput counters (published / pulled / by-kind) for live dashboards. */
   stats(): BusStats
+  /** Raise a `finding` on the bus from outside the settle hook — the seam an ONLINE detector
+   *  (mid-run, on the worker pipe) uses to tell the driver "this worker is looping/erroring" the
+   *  moment it happens, instead of only at settle. Queued for `await_event` + pass-through. */
+  raiseFinding(finding: AnalystFindingEvent): Promise<void>
 }
 
 const idArg = { type: 'string', description: 'The workerId returned by spawn_worker.' } as const
@@ -189,6 +205,24 @@ export function createCoordinationTools(opts: CoordinationToolsOptions): Coordin
     return true
   }
 
+  // The down-leg: record a parent→child message on the bus for the audit trail (history +
+  // subscribers) WITHOUT enqueuing it — the parent must never pull its own outbound message back.
+  // Overloaded so the `answer` kind REQUIRES a questionId (no silent `?? ''` fallback to mask a bug).
+  function sendDown(type: 'steer', down: DownMessageEvent): Promise<void>
+  function sendDown(type: 'answer', down: DownMessageEvent, questionId: string): Promise<void>
+  async function sendDown(
+    type: 'steer' | 'answer',
+    down: DownMessageEvent,
+    questionId?: string,
+  ): Promise<void> {
+    await bus.publish(
+      type === 'answer'
+        ? { type, down, questionId: str(questionId, 'questionId') }
+        : { type, down },
+      { queue: false },
+    )
+  }
+
   // Consumer projection: the wire shape the driver sees for a pulled bus event.
   const projectEvent = (ev: CoordinationEvent): Record<string, unknown> => {
     if (ev.type === 'settled') {
@@ -205,7 +239,11 @@ export function createCoordinationTools(opts: CoordinationToolsOptions): Coordin
         : { type: 'settled', settled: w.id, status: 'down', reason: w.reason }
     }
     if (ev.type === 'question') return { type: 'question', question: ev.question }
-    return { type: 'finding', ...ev.finding }
+    if (ev.type === 'finding') return { type: 'finding', ...ev.finding }
+    if (ev.type === 'answer') return { type: 'answer', ...ev.down, questionId: ev.questionId }
+    // Down-leg `steer` is record-only (never queued), so the driver never pulls it; project
+    // defensively for completeness.
+    return { type: ev.type, ...ev.down }
   }
 
   const nextQuestionId = (from: string): string => `${from}:q${questionSeq++}`
@@ -330,48 +368,45 @@ export function createCoordinationTools(opts: CoordinationToolsOptions): Coordin
     },
     {
       name: 'steer_worker',
-      description: 'Deliver an out-of-band instruction to a running worker inbox.',
+      description:
+        'Send a message DOWN to a still-LIVE worker (parent→child): a new instruction, a course ' +
+        'correction, or a continuation. The worker drains it at its next step boundary — and before ' +
+        'it may settle, so it cannot finish while a message it never read is pending. A worker that ' +
+        'already settled is gone (returns delivered:false) — spawn a fresh one instead.',
       inputSchema: {
         type: 'object',
         properties: {
           workerId: idArg,
           instruction: { type: 'string', description: 'What the worker should do next.' },
+          interrupt: {
+            type: 'boolean',
+            description:
+              'true = forceful: abort the worker’s in-flight inference so it re-plans on the NEXT ' +
+              'turn (a tool already mid-execution finishes first; only the owned tool-loop honors this). ' +
+              'false/omitted = queued: it flushes at the next step boundary (and before it may settle).',
+          },
         },
         required: ['workerId', 'instruction'],
       },
-      handler: (raw) => {
+      handler: async (raw) => {
         const a = obj(raw)
-        const delivered = opts.scope.send(str(a.workerId, 'workerId'), {
-          steer: str(a.instruction, 'instruction'),
-        })
-        return Promise.resolve({ delivered })
-      },
-    },
-    {
-      name: 'await_next',
-      description:
-        'Wait for the next spawned worker to settle. Returns { idle: true } when none are live. ' +
-        '(A settle also fires any analyze-on-settle lenses, whose findings queue for await_event.)',
-      inputSchema: { type: 'object', properties: {} },
-      handler: async () => {
-        if (bus.pending(['settled']) === 0 && !(await drainSettlement())) return { idle: true }
-        const ev = bus.pull(['settled'])
-        if (!ev || ev.type !== 'settled') return { idle: true }
-        const w = ev.worker
-        return w.status === 'done'
-          ? { settled: w.id, status: 'done', score: w.score, valid: w.valid, outRef: w.outRef }
-          : { settled: w.id, status: 'down', reason: w.reason }
+        const workerId = str(a.workerId, 'workerId')
+        const instruction = str(a.instruction, 'instruction')
+        const interrupt = a.interrupt === true
+        const delivered = opts.scope.send(workerId, { steer: instruction, interrupt })
+        await sendDown('steer', { toWorker: workerId, instruction, delivered })
+        return { delivered }
       },
     },
     {
       name: 'await_event',
       description:
-        'Pull the next message a worker, sub-driver, or analyst sent up — the unified inbox. An ' +
-        "event is one of: a settled worker output ('settled'), a question needing your answer " +
-        "('question', from ask_parent / the worker's ask-user), or a trace-analyst finding " +
-        "('finding', from analyze-on-settle). Optional `kinds` filters which to wait for. Returns " +
-        '{ idle: true } when nothing is queued and no workers are live. Prefer this over await_next ' +
-        'when you also want questions and findings, not just settlements.',
+        'Wait for and pull the next message a worker, sub-driver, or analyst sent up — the unified ' +
+        "inbox. An event is one of: a settled worker output ('settled'), a question needing your " +
+        "answer ('question', from ask_parent / the worker's ask-user), or a trace-analyst finding " +
+        "('finding', from analyze-on-settle). Pass kinds:['settled'] for just the next finished " +
+        'worker; omit `kinds` to also receive questions and findings. Returns { idle: true } when ' +
+        'nothing is queued and no workers are live.',
       inputSchema: {
         type: 'object',
         properties: {
@@ -422,17 +457,29 @@ export function createCoordinationTools(opts: CoordinationToolsOptions): Coordin
         },
         required: ['questionId'],
       },
-      handler: (raw) => {
+      handler: async (raw) => {
         const a = obj(raw)
         const questionId = str(a.questionId, 'questionId')
         if (typeof a.answer === 'string' && a.answer.length > 0) {
-          return Promise.resolve({
-            question: decideQuestion(questionId, {
-              kind: 'answer',
-              answer: a.answer,
-              by: typeof a.by === 'string' && a.by.length > 0 ? a.by : 'user',
-            }),
+          const answer = a.answer
+          const question = decideQuestion(questionId, {
+            kind: 'answer',
+            answer,
+            by: typeof a.by === 'string' && a.by.length > 0 ? a.by : 'user',
           })
+          // Route the answer DOWN to the worker that asked, unparking it, and record the down-leg.
+          // A blocking question parked the worker, so deliver forcefully — it should resume on the
+          // answer immediately, not wait for its next step boundary.
+          const interrupt = question.urgency === 'blocks-run' || question.urgency === 'blocks-step'
+          const delivered = opts.scope.send(question.from, { answer, questionId, interrupt })
+          await sendDown(
+            'answer',
+            { toWorker: question.from, instruction: answer, delivered },
+            questionId,
+          )
+          // Surface `delivered` like steer_worker — the caller must see whether the answer actually
+          // reached a live worker (false when it already settled or has no inbox).
+          return { question, delivered }
         }
         if (typeof a.deferReason === 'string' && a.deferReason.length > 0) {
           return Promise.resolve({
@@ -549,6 +596,7 @@ export function createCoordinationTools(opts: CoordinationToolsOptions): Coordin
   return {
     tools,
     history: () => bus.history(),
+    raiseFinding: (finding) => bus.publish({ type: 'finding', finding }).then(() => undefined),
     stats: () => bus.stats(),
     isStopped: () => stopped,
     stopReason: () => reason,
diff --git a/src/runtime/index.ts b/src/runtime/index.ts
index 904692c8..1281f827 100644
--- a/src/runtime/index.ts
+++ b/src/runtime/index.ts
@@ -316,6 +316,12 @@ export {
 // Supervisor-as-MCP: serve the coordination verbs as a real HTTP MCP over a live Scope, so any
 // harness (claude-code / codex / opencode) BECOMES the supervisor by mounting one MCP server.
 export { type CoordinationMcpHandle, serveCoordinationMcp } from './supervise/coordination-mcp'
+// The ONLINE analyst: watch a TraceSource and raise a `finding` the moment a worker loops/error-storms.
+export {
+  defaultToolDetectors,
+  type WatchTraceOptions,
+  watchTrace,
+} from './supervise/detector-monitor'
 // The recursive driver-executor: a spawned child can BE a driver (agents drive agents),
 // resolved through `withDriverExecutor` and run over a nested `Scope` one depth deeper on
 // the SAME conserved pool.
@@ -336,6 +342,9 @@ export {
   type EventBus,
   type PublishOptions,
 } from './supervise/event-bus'
+// The down-leg receive end: a per-worker inbox an executor exposes as `Executor.deliver`; the loop
+// drains it at the step boundary + before settle (queued) or aborts the turn (forceful interrupt).
+export { createInbox, type Inbox, type InboxMessage } from './supervise/inbox'
 // The production `DriverChat`: adapt the router's tool-calling to the seam a
 // `coordinationDriverAgent` drives. The one turnkey piece a consumer needs to run the driver
 // brain in-process — tests script a mock `DriverChat`, production passes `routerDriverChat(cfg)`.
@@ -365,6 +374,22 @@ export {
   createRootHandle,
   createSupervisor,
 } from './supervise/supervisor'
+// The substrate-agnostic trace source: a worker's tool calls as agent-eval `ToolSpan`s, from an
+// OWNED loop (push) OR a sandbox box session (message parts). The common currency for both analysts.
+export {
+  createPartsTraceSource,
+  createPushTraceSource,
+  decodeToolPart,
+  type SessionMessageLike,
+  type SessionTraceBox,
+  sandboxSessionTraceSource,
+  type ToolStepInput,
+  type TraceSource,
+  toToolSpan,
+} from './supervise/trace-source'
+// The SETTLE-time analyzer: collect a TraceSource's spans and run agent-eval's published batch
+// analyzers (buildTrajectory / stuckLoopView / toolWasteView) — the post-hoc half.
+export { analyzeTrace, type TrajectoryAnalysis } from './supervise/trajectory-recorder'
 export type {
   Agent,
   AgentSpec,
diff --git a/src/runtime/supervise/authoring.ts b/src/runtime/supervise/authoring.ts
index 1239d688..4b5cf45d 100644
--- a/src/runtime/supervise/authoring.ts
+++ b/src/runtime/supervise/authoring.ts
@@ -53,7 +53,7 @@ export function supervisorSkill(opts?: { goal?: string }): string {
     '   • systemPrompt: rich, specific instructions for THIS sub-task — tell the worker exactly what to produce, how to use its tools fully, and what "done" means. Never a one-liner; write the prompt a power-user would write.',
     '   • model: the model best suited to this sub-task (omit to use the default).',
     '   NEVER spawn a worker with an empty profile. The quality of the worker IS the quality of the profile you write.',
-    '3. await_next to collect each worker. Its result says valid:true only if the deployable check passed.',
+    "3. await_event (kinds:['settled']) to collect each worker. Its result says valid:true only if the deployable check passed.",
     '4. If a worker did NOT deliver, AUTHOR A NEW worker whose systemPrompt names the SPECIFIC failure and how to fix it — never just retry the same prompt.',
     '5. Stop (reply with no tool call) once the work is delivered. You cannot declare done yourself — only a delivered (valid:true) worker counts.',
     ...(opts?.goal ? ['', `The goal: ${opts.goal}`] : []),
diff --git a/src/runtime/supervise/coordination-mcp.ts b/src/runtime/supervise/coordination-mcp.ts
index 67296b85..7a494dd7 100644
--- a/src/runtime/supervise/coordination-mcp.ts
+++ b/src/runtime/supervise/coordination-mcp.ts
@@ -1,7 +1,7 @@
 /**
  * @experimental
  *
- * Serve the coordination verbs (spawn_worker / await_next / observe_worker / steer_worker / stop)
+ * Serve the coordination verbs (spawn_worker / await_event / observe_worker / steer_worker / stop)
  * as a real HTTP MCP server over a LIVE `Scope`. This is the keystone that lets a coding-harness
  * agent (opencode via the cli-bridge, claude-code, codex) BE the supervisor: it mounts this MCP
  * (`mcp.mcpServers.coordination`) and calls `spawn_worker` as a native tool, which lands on
@@ -35,6 +35,8 @@ export interface CoordinationMcpHandle {
   history: CoordinationTools['history']
   /** Bus throughput counters for live dashboards. */
   stats: CoordinationTools['stats']
+  /** Raise a `finding` on the bus from an online detector watching a worker's live pipe. */
+  raiseFinding: CoordinationTools['raiseFinding']
   close(): Promise<void>
 }
 
@@ -119,6 +121,7 @@ export async function serveCoordinationMcp(opts: {
     isStopped: () => coord.isStopped(),
     history: () => coord.history(),
     stats: () => coord.stats(),
+    raiseFinding: (finding) => coord.raiseFinding(finding),
     close: () =>
       new Promise<void>((resolve) => {
         server.close(() => resolve())
diff --git a/src/runtime/supervise/detector-monitor.ts b/src/runtime/supervise/detector-monitor.ts
new file mode 100644
index 00000000..7235bba6
--- /dev/null
+++ b/src/runtime/supervise/detector-monitor.ts
@@ -0,0 +1,54 @@
+/**
+ * @experimental
+ *
+ * The ONLINE analyst: watch a `TraceSource` and fold each tool span through agent-eval's published
+ * streaming detector kernel (`repeatedActionDetector`/`errorStreakDetector` — the SAME kernel the
+ * control loop folds), firing `onSignal` the moment a worker loops or error-storms. Substrate-
+ * agnostic: it consumes spans from any source (owned router/bridge loop OR a sandbox box session),
+ * never the raw tool seam. Detection logic + the failure taxonomy live in agent-eval; not reimplemented.
+ */
+
+import {
+  argHash,
+  type DetectorSignal,
+  errorStreakDetector,
+  observeAll,
+  repeatedActionDetector,
+  type StreamingDetector,
+  type ToolSpan,
+} from '@tangle-network/agent-eval'
+import type { TraceSource } from './trace-source'
+
+export interface WatchTraceOptions {
+  /** The detectors to run online. Defaults to a stuck-loop + error-streak panel. */
+  readonly detectors?: ReadonlyArray<StreamingDetector>
+  /** Fired for each signal a detector raises — the seam that raises a `finding` on the bus. */
+  readonly onSignal?: (signal: DetectorSignal, span: ToolSpan) => void | Promise<void>
+}
+
+/** The default online panel for a tool-call pipe: a worker repeating the same call, or hammering
+ *  consecutive errors. (No-progress needs a domain progress-probe, so it is opt-in, not default.) */
+export function defaultToolDetectors(): StreamingDetector[] {
+  return [repeatedActionDetector({ maxRepeated: 3 }), errorStreakDetector({ maxErrors: 3 })]
+}
+
+/** Subscribe to a `TraceSource` and run the streaming detectors over its live spans. Returns an
+ *  unsubscribe. A defensive `argHash` failure (circular args) never throws out of the side-channel. */
+export function watchTrace(source: TraceSource, opts: WatchTraceOptions = {}): () => void {
+  const detectors = opts.detectors ?? defaultToolDetectors()
+  return source.onSpan((span) => {
+    let fingerprint: string
+    try {
+      // Same fingerprint scheme as agent-eval's batch stuck-loop view: tool name + hashed args.
+      fingerprint = `${span.toolName}|${argHash(span.args)}`
+    } catch {
+      fingerprint = `${span.toolName}|<unhashable>`
+    }
+    const signals = observeAll(detectors, {
+      actionFingerprint: fingerprint,
+      ...(span.status ? { status: span.status } : {}),
+      label: span.toolName,
+    })
+    for (const s of signals) void opts.onSignal?.(s, span)
+  })
+}
diff --git a/src/runtime/supervise/event-bus.ts b/src/runtime/supervise/event-bus.ts
index c5d06702..26522496 100644
--- a/src/runtime/supervise/event-bus.ts
+++ b/src/runtime/supervise/event-bus.ts
@@ -40,6 +40,10 @@ export interface PublishOptions {
   /** Higher = pulled ahead of lower-priority queued events (default 0). A blocking question sets
    *  this so it bumps to the front of the driver's inbox. */
   readonly priority?: number
+  /** Whether the event enters the pull queue (default true). Set `false` for record-only events —
+   *  the parent→child down-leg (steer / answer / resume): they belong in `history()` and reach
+   *  `subscribe` observers, but the parent must never `pull` its own outbound message back. */
+  readonly queue?: boolean
 }
 
 export interface BusStats {
@@ -56,8 +60,6 @@ export interface EventBus<E extends BusEvent> {
   /** Remove and return the highest-priority QUEUED event whose type is in `kinds` (any if omitted),
    *  ties broken FIFO by `seq`; `undefined` when nothing matches. */
   pull(kinds?: ReadonlyArray<E['type']>): E | undefined
-  /** Like `pull` but non-destructive — inspect the next event without consuming it. */
-  peek(kinds?: ReadonlyArray<E['type']>): E | undefined
   /** Register a pass-through handler; it receives the stamped record of every event published after
    *  registration. Returns an unsubscribe fn. */
   subscribe(handler: (record: BusRecord<E>) => void | Promise<void>): () => void
@@ -99,7 +101,8 @@ export function createEventBus<E extends BusEvent>(now: () => number = Date.now)
   return {
     async publish(event, opts) {
       const record: BusRecord<E> = { seq: seq++, at: now(), priority: opts?.priority ?? 0, event }
-      queue.push(record)
+      // Record-only events (the down-leg) skip the pull queue but still hit the log + subscribers.
+      if (opts?.queue !== false) queue.push(record)
       log.push(record)
       byKind[event.type] = (byKind[event.type] ?? 0) + 1
       // Sequential, not Promise.all: a subscriber that steers off this event must observe a
@@ -113,10 +116,6 @@ export function createEventBus<E extends BusEvent>(now: () => number = Date.now)
       pulled++
       return queue.splice(i, 1)[0]?.event
     },
-    peek(kinds) {
-      const i = bestIndex(kinds)
-      return i < 0 ? undefined : queue[i]?.event
-    },
     subscribe(handler) {
       subscribers.push(handler)
       return () => {
diff --git a/src/runtime/supervise/inbox.ts b/src/runtime/supervise/inbox.ts
new file mode 100644
index 00000000..42ef61a7
--- /dev/null
+++ b/src/runtime/supervise/inbox.ts
@@ -0,0 +1,83 @@
+/**
+ * @experimental
+ *
+ * The worker-side receive end of the down-leg: a per-worker inbox an executor exposes as
+ * `Executor.deliver`. The driver's `steer_worker` / `answer_question` land here,
+ * and the worker's agent loop drains them at two points (Drew's two delivery modes):
+ *
+ *   - QUEUED (default): the message accumulates and is FLUSHED at the next step boundary — folded
+ *     into the conversation before the next think. A worker is also forced to flush BEFORE it may
+ *     settle, so it can never finish while a steer/answer it never read is still pending.
+ *   - FORCEFUL (`interrupt: true`): trips `freshInterrupt()`'s signal so the loop can abort its
+ *     in-flight turn immediately, then re-plan with the message folded in — breaking the worker out
+ *     of a wrong path mid-task instead of waiting for it to finish the step.
+ *
+ * `deliver` never throws — a malformed message is ignored, per the `Executor.deliver` contract.
+ */
+
+export interface InboxMessage {
+  readonly kind: 'steer' | 'answer'
+  readonly text: string
+  /** Forceful messages abort the in-flight turn; queued ones wait for the boundary flush. */
+  readonly interrupt: boolean
+  /** Present for an `answer` — the question id it resolves. */
+  readonly questionId?: string
+}
+
+export interface Inbox {
+  /** The `Executor.deliver` implementation — accept a raw down-message from `Scope.send`. */
+  deliver(msg: unknown): void
+  /** Remove and return all pending messages (the flush). */
+  drain(): InboxMessage[]
+  pending(): number
+  /** Open a fresh per-turn interrupt signal; a later forceful `deliver` aborts it. The loop links
+   *  this into the signal it passes to its inference call, then re-plans when it fires. */
+  freshInterrupt(): AbortSignal
+  /** Render drained messages as ONE operator turn to fold into the worker's conversation. */
+  fold(messages: ReadonlyArray<InboxMessage>): string
+}
+
+function parseDown(msg: unknown): InboxMessage | undefined {
+  if (!msg || typeof msg !== 'object') return undefined
+  const m = msg as Record<string, unknown>
+  const interrupt = m.interrupt === true
+  if (typeof m.steer === 'string') return { kind: 'steer', text: m.steer, interrupt }
+  if (typeof m.answer === 'string')
+    return {
+      kind: 'answer',
+      text: m.answer,
+      interrupt,
+      ...(typeof m.questionId === 'string' ? { questionId: m.questionId } : {}),
+    }
+  return undefined
+}
+
+export function createInbox(): Inbox {
+  const pending: InboxMessage[] = []
+  let live: AbortController | null = null
+  return {
+    deliver(msg) {
+      const m = parseDown(msg)
+      if (!m) return
+      pending.push(m)
+      // A forceful message aborts the turn currently in flight (if any).
+      if (m.interrupt && live && !live.signal.aborted) live.abort()
+    },
+    drain() {
+      return pending.splice(0, pending.length)
+    },
+    pending: () => pending.length,
+    freshInterrupt() {
+      live = new AbortController()
+      return live.signal
+    },
+    fold(messages) {
+      const lines = messages.map((m) => {
+        if (m.kind === 'answer')
+          return `- Answer to your question${m.questionId ? ` (${m.questionId})` : ''}: ${m.text}`
+        return `- New instruction from your supervisor: ${m.text}`
+      })
+      return `[SUPERVISOR] Out-of-band message(s) — address these before continuing:\n${lines.join('\n')}`
+    },
+  }
+}
diff --git a/src/runtime/supervise/runtime.ts b/src/runtime/supervise/runtime.ts
index b28431b5..ccc28ddf 100644
--- a/src/runtime/supervise/runtime.ts
+++ b/src/runtime/supervise/runtime.ts
@@ -40,6 +40,7 @@ import type {
   SandboxClient,
 } from '../types'
 import { zeroTokenUsage } from '../util'
+import { createInbox } from './inbox'
 import type {
   AgentSpec,
   DefaultVerdict,
@@ -243,6 +244,13 @@ export interface RouterToolsSeam {
   model?: string
   tools: ReadonlyArray<ToolSpec>
   executeToolCall: (name: string, args: Record<string, unknown>, task: unknown) => Promise<string>
+  /** Online observer of each tool step — the seam a `DetectorMonitor` taps to watch the live pipe
+   *  (raise a `finding` when the worker loops/errors). Called after every tool call resolves. */
+  onToolStep?: (step: {
+    toolName: string
+    args: Record<string, unknown>
+    status: 'ok' | 'error'
+  }) => void
   /** Max inference turns. Default 200 (runaway backstop — set far above any
    *  legitimate workflow). For tighter per-workflow limits use a cost budget
    *  or wall-clock deadline at the call site. */
@@ -286,37 +294,81 @@ export const routerToolsInlineExecutor: ExecutorFactory<unknown> = (spec, ctx) =
   abortIfSignalled()
   if (!ctx.signal.aborted) ctx.signal.addEventListener('abort', abortIfSignalled, { once: true })
 
+  // The down-leg receive end: the driver's steer/answer/resume land here via `Scope.send`.
+  const inbox = createInbox()
+
   let artifact: ExecutorResult<unknown> | undefined
 
   return {
     runtime: 'router' as Runtime,
+    deliver: (m) => inbox.deliver(m),
     async execute(task, signal): Promise<ExecutorResult<unknown>> {
       const started = Date.now()
-      const linked = linkSignals(signal, controller.signal)
       const messages: Array<Record<string, unknown>> = [
         ...(taskToMessages(task, spec) as Array<Record<string, unknown>>),
       ]
       const tokens = zeroTokenUsage()
       let turns = 0
       let lastText = ''
+      // Fold any queued down-messages into the conversation as one operator turn (the boundary flush).
+      const flush = () => {
+        const pending = inbox.drain()
+        if (pending.length) messages.push({ role: 'user', content: inbox.fold(pending) })
+        return pending.length > 0
+      }
+
+      // The external abort sources (caller signal + executor teardown), merged ONCE — so we don't
+      // re-register listeners on these long-lived signals every turn.
+      const external = mergeAbortSignals(signal, controller.signal)
 
       for (let t = 0; t < maxTurns; t += 1) {
+        // QUEUED messages flush at the step boundary, before this turn's inference.
+        flush()
+        // A forceful (interrupt) message aborts THIS turn so the worker re-plans immediately. The
+        // per-turn controller fires on `external` OR a fresh interrupt; its listener on `external` is
+        // removed after the turn (`cleanup`) so nothing accumulates across turns.
+        const interruptSig = inbox.freshInterrupt()
+        const turnController = new AbortController()
+        const abortTurn = () => turnController.abort()
+        if (external.aborted) turnController.abort()
+        else external.addEventListener('abort', abortTurn)
+        interruptSig.addEventListener('abort', abortTurn, { once: true })
+        const cleanup = () => external.removeEventListener('abort', abortTurn)
+        let res: Response
+        try {
+          res = await fetch(`${seam.routerBaseUrl.replace(/\/$/, '')}/chat/completions`, {
+            method: 'POST',
+            headers: {
+              'content-type': 'application/json',
+              authorization: `Bearer ${seam.routerKey}`,
+            },
+            body: JSON.stringify({
+              model,
+              messages,
+              tools: seam.tools,
+              tool_choice: 'auto',
+              temperature: 0.2,
+            }),
+            signal: turnController.signal,
+          })
+        } catch (e) {
+          cleanup()
+          // Re-plan ONLY when a forceful inbox message aborted this turn (a real AbortError, with the
+          // interrupt — not the external teardown/budget signal). The re-planned turn still consumes a
+          // loop slot (so interrupt spam is bounded by maxTurns, not a hang) but does not bill a turn.
+          // Any other error — incl. a network fault coincident with an interrupt — is fatal: rethrow.
+          const interruptAbort =
+            e instanceof DOMException &&
+            e.name === 'AbortError' &&
+            interruptSig.aborted &&
+            !signal.aborted &&
+            !controller.signal.aborted
+          if (interruptAbort) continue
+          throw e
+        }
+        cleanup()
+        // The inference completed — count the turn now (an interrupted, re-planned turn doesn't bill).
         turns += 1
-        const res = await fetch(`${seam.routerBaseUrl.replace(/\/$/, '')}/chat/completions`, {
-          method: 'POST',
-          headers: {
-            'content-type': 'application/json',
-            authorization: `Bearer ${seam.routerKey}`,
-          },
-          body: JSON.stringify({
-            model,
-            messages,
-            tools: seam.tools,
-            tool_choice: 'auto',
-            temperature: 0.2,
-          }),
-          ...(linked ? { signal: linked } : {}),
-        })
         if (!res.ok) {
           throw new ValidationError(
             `routerToolsInlineExecutor: router ${res.status}: ${(await res.text()).slice(0, 200)}`,
@@ -331,7 +383,12 @@ export const routerToolsInlineExecutor: ExecutorFactory<unknown> = (spec, ctx) =
         const msg = data.choices?.[0]?.message
         if (msg?.content) lastText = msg.content
         const toolCalls = msg?.tool_calls ?? []
-        if (toolCalls.length === 0) break // the model answered — loop done
+        if (toolCalls.length === 0) {
+          // Before settling, flush once more — a worker may not finish while a steer/answer it never
+          // read is still pending. If anything flushed, keep going; otherwise it is truly done.
+          if (flush()) continue
+          break
+        }
 
         // Record the assistant turn verbatim, then run each call on the host and
         // fold the result back as a `tool` message for the next turn.
@@ -360,8 +417,24 @@ export const routerToolsInlineExecutor: ExecutorFactory<unknown> = (spec, ctx) =
             })
             continue
           }
-          const result = await seam.executeToolCall(tc?.function?.name ?? '', args, task)
+          const toolName = tc?.function?.name ?? ''
+          let result: string
+          let status: 'ok' | 'error' = 'ok'
+          try {
+            result = await seam.executeToolCall(toolName, args, task)
+          } catch (e) {
+            status = 'error'
+            result = `error: ${e instanceof Error ? e.message : String(e)}`
+          }
           messages.push({ role: 'tool', tool_call_id: id, content: result })
+          // Feed the online detector pipe (stuck-loop / error-streak) — a worker repeating the same
+          // call or hammering errors is caught mid-run, not only at settle. This is an observability
+          // side-channel: a throwing monitor must never crash the production inference loop.
+          try {
+            seam.onToolStep?.({ toolName, args, status })
+          } catch {
+            // ignore — monitoring must not break the worker
+          }
         }
       }
 
@@ -962,6 +1035,21 @@ function linkSignals(a: AbortSignal, b: AbortSignal): AbortSignal | undefined {
   return c.signal
 }
 
+/** Combine N abort signals into one that fires when ANY does. Node-portable (no `AbortSignal.any`,
+ *  which needs >=20.3 — the package floor is >=20). */
+function mergeAbortSignals(...signals: AbortSignal[]): AbortSignal {
+  const c = new AbortController()
+  const onAbort = () => c.abort()
+  for (const s of signals) {
+    if (s.aborted) {
+      c.abort()
+      break
+    }
+    s.addEventListener('abort', onAbort, { once: true })
+  }
+  return c.signal
+}
+
 // Re-export the verdict + spend surface so a consumer importing the runtime
 // built-ins gets the budget vocabulary from one place.
 export type { DefaultVerdict, Executor, ExecutorResult, Spend, UsageEvent }
diff --git a/src/runtime/supervise/trace-source.ts b/src/runtime/supervise/trace-source.ts
new file mode 100644
index 00000000..3c5dd4ac
--- /dev/null
+++ b/src/runtime/supervise/trace-source.ts
@@ -0,0 +1,211 @@
+/**
+ * @experimental
+ *
+ * `TraceSource` — the ONE substrate-agnostic source of a worker's tool-call trace. The online
+ * detectors and the settle-time analyzers consume agent-eval `ToolSpan`s from here, regardless of
+ * whether the worker is:
+ *   - an OWNED tool loop (router-tools, cli-bridge tool dispatch) → push spans as we dispatch them;
+ *   - a SANDBOX / fleet box → read the harness's tool calls off the session (`streamPrompt` parts
+ *     live, `session.messages()` / `findCompletedTurn` at settle).
+ *
+ * The common currency is agent-eval's `ToolSpan` (so the same detectors + `buildTrajectory`/
+ * `stuckLoopView`/`toolWasteView` run over any source). A source exposes two lanes:
+ *   - `onSpan` — live spans for ONLINE detection (best-effort; a black-box box may only collect).
+ *   - `collect` — the full span set at settle for the BATCH analyzers (always available).
+ *
+ * This module imports NO substrate SDK — it decodes generic message parts / OpenAI tool-call shapes.
+ * The sandbox wiring (`sandboxSessionTraceSource`) is the thin adapter that feeds box session parts in.
+ */
+
+import type { ToolSpan } from '@tangle-network/agent-eval'
+
+export interface ToolStepInput {
+  readonly toolName: string
+  readonly args: unknown
+  readonly status?: 'ok' | 'error'
+  readonly result?: unknown
+}
+
+export interface TraceSource {
+  /** Subscribe to tool spans as they are produced (ONLINE). Returns an unsubscribe. A source that
+   *  only exposes its trace at the end registers nothing and returns a no-op. */
+  onSpan(handler: (span: ToolSpan) => void): () => void
+  /** The full set of tool spans for the run (SETTLE / batch). Always available. */
+  collect(): Promise<ToolSpan[]>
+}
+
+/** Project a normalized tool step into the canonical agent-eval `ToolSpan`. */
+export function toToolSpan(input: ToolStepInput, runId: string, seq: number, at: number): ToolSpan {
+  return {
+    spanId: `${runId}-t${seq}`,
+    runId,
+    kind: 'tool',
+    name: input.toolName,
+    toolName: input.toolName,
+    args: input.args,
+    status: input.status ?? 'ok',
+    startedAt: at,
+    endedAt: at,
+    ...(input.result !== undefined ? { result: input.result } : {}),
+  }
+}
+
+/** Decode a single harness message part / OpenAI tool-call into a tool step, or `undefined` if it is
+ *  not a tool call. Defensive across the shapes a harness or the OpenAI API emit:
+ *    - OpenAI: `{ type:'function'|'tool_call', function:{ name, arguments } }` or `{ name, arguments }`
+ *    - harness part: `{ type:'tool'|'tool-call'|'tool_use'|'tool-invocation', tool/name/toolName,
+ *                      args/input/arguments, state/status }`
+ *  Unknown args strings are left as-is (the detector hashes them); never throws. */
+export function decodeToolPart(part: unknown): ToolStepInput | undefined {
+  if (!part || typeof part !== 'object') return undefined
+  const p = part as Record<string, unknown>
+  const type = typeof p.type === 'string' ? p.type.toLowerCase() : ''
+  const fn = (p.function ?? p.tool ?? p.toolInvocation) as Record<string, unknown> | undefined
+
+  const isOpenAiToolCall = type === 'function' || type === 'tool_call' || !!p.function
+  const isHarnessTool =
+    type.includes('tool') || (typeof p.toolName === 'string' && p.toolName.length > 0)
+  if (!isOpenAiToolCall && !isHarnessTool) return undefined
+
+  const name =
+    (typeof fn?.name === 'string' && fn.name) ||
+    (typeof p.toolName === 'string' && p.toolName) ||
+    (typeof p.name === 'string' && p.name) ||
+    ''
+  if (!name) return undefined
+
+  const rawArgs = fn?.arguments ?? p.args ?? p.input ?? p.arguments ?? fn?.input
+  const args = typeof rawArgs === 'string' ? safeParse(rawArgs) : (rawArgs ?? {})
+
+  const state =
+    (typeof p.state === 'string' && p.state) || (typeof p.status === 'string' && p.status)
+  const status: 'ok' | 'error' | undefined =
+    state === 'error' || p.error
+      ? 'error'
+      : state === 'completed' || state === 'result'
+        ? 'ok'
+        : undefined
+
+  return { toolName: name, args, ...(status ? { status } : {}) }
+}
+
+function safeParse(s: string): unknown {
+  try {
+    return JSON.parse(s)
+  } catch {
+    return s
+  }
+}
+
+let runSeq = 0
+
+/** A push source for OWNED tool loops (router-tools / cli-bridge tool dispatch): the loop calls
+ *  `record(step)` for each tool call; it becomes a span, fan-out to live subscribers + buffered for
+ *  `collect`. */
+export function createPushTraceSource(opts: { runId?: string; now?: () => number } = {}): {
+  source: TraceSource
+  record: (input: ToolStepInput) => ToolSpan
+} {
+  const runId = opts.runId ?? `push-${runSeq++}`
+  const now = opts.now ?? Date.now
+  const spans: ToolSpan[] = []
+  const subs = new Set<(span: ToolSpan) => void>()
+  return {
+    record(input) {
+      const span = toToolSpan(input, runId, spans.length, now())
+      spans.push(span)
+      for (const fn of subs) fn(span)
+      return span
+    },
+    source: {
+      onSpan(handler) {
+        subs.add(handler)
+        return () => subs.delete(handler)
+      },
+      collect: () => Promise.resolve([...spans]),
+    },
+  }
+}
+
+/** A source backed by harness message PARTS (sandbox session, cli-bridge). `collect` reads the full
+ *  part list and decodes the tool calls; `subscribe`, when given, streams parts live for online
+ *  detection. The caller supplies how to get parts (e.g. `box.session(id).messages()` flat-mapped to
+ *  parts) — keeping this module free of any substrate SDK. */
+export function createPartsTraceSource(opts: {
+  collectParts: () => Promise<ReadonlyArray<unknown>>
+  subscribeParts?: (onPart: (part: unknown) => void) => () => void
+  runId?: string
+  now?: () => number
+}): TraceSource {
+  const runId = opts.runId ?? `parts-${runSeq++}`
+  const now = opts.now ?? Date.now
+  const subs = new Set<(span: ToolSpan) => void>()
+  let liveSeq = 0
+  let unsub: (() => void) | undefined
+  const startLive = () => {
+    if (unsub || !opts.subscribeParts) return
+    unsub = opts.subscribeParts((part) => {
+      const step = decodeToolPart(part)
+      if (!step) return
+      const span = toToolSpan(step, runId, liveSeq++, now())
+      for (const fn of subs) fn(span)
+    })
+  }
+  return {
+    onSpan(handler) {
+      subs.add(handler)
+      startLive()
+      return () => {
+        subs.delete(handler)
+        if (subs.size === 0 && unsub) {
+          unsub()
+          unsub = undefined
+        }
+      }
+    },
+    async collect() {
+      const parts = await opts.collectParts()
+      const spans: ToolSpan[] = []
+      for (const part of parts) {
+        const step = decodeToolPart(part)
+        if (step) spans.push(toToolSpan(step, runId, spans.length, now()))
+      }
+      return spans
+    },
+  }
+}
+
+/** A harness session message carrying parts (the shape `box.messages()` returns). Structurally typed
+ *  so this works with the real `@tangle-network/sandbox` box AND a test double, no SDK import. */
+export interface SessionMessageLike {
+  readonly parts?: ReadonlyArray<unknown>
+}
+
+/** The minimal box surface this needs: list a session's messages (incl. mid-turn partials). */
+export interface SessionTraceBox {
+  messages(opts: { sessionId: string }): Promise<ReadonlyArray<SessionMessageLike>>
+}
+
+/** The SANDBOX / fleet trace source: read a box session's message parts and decode the harness's tool
+ *  calls into spans. `collect` (settle) is the solid path — `box.messages({sessionId})` → parts → spans;
+ *  black-box harnesses aren't mid-step interruptible, so online steering is the owned-loop's job and a
+ *  live `subscribe` is opt-in (pass `subscribeParts` from `streamPrompt` when the harness streams parts). */
+export function sandboxSessionTraceSource(
+  box: SessionTraceBox,
+  sessionId: string,
+  opts: {
+    subscribeParts?: (onPart: (part: unknown) => void) => () => void
+    runId?: string
+    now?: () => number
+  } = {},
+): TraceSource {
+  return createPartsTraceSource({
+    collectParts: async () => {
+      const msgs = await box.messages({ sessionId })
+      return msgs.flatMap((m) => (m.parts ? [...m.parts] : []))
+    },
+    ...(opts.subscribeParts ? { subscribeParts: opts.subscribeParts } : {}),
+    runId: opts.runId ?? `box-${sessionId}`,
+    ...(opts.now ? { now: opts.now } : {}),
+  })
+}
diff --git a/src/runtime/supervise/trajectory-recorder.ts b/src/runtime/supervise/trajectory-recorder.ts
new file mode 100644
index 00000000..65dc479c
--- /dev/null
+++ b/src/runtime/supervise/trajectory-recorder.ts
@@ -0,0 +1,45 @@
+/**
+ * @experimental
+ *
+ * The SETTLE-time analyst: when a worker finishes, collect its tool spans from a `TraceSource` and run
+ * agent-eval's PUBLISHED batch analyzers over them — `buildTrajectory` (structured run summary),
+ * `stuckLoopView` (full-run repeated-call view, complementing the online consecutive detector), and
+ * `toolWasteView`. Substrate-agnostic: the spans come from any source (an owned loop's buffer OR a
+ * sandbox box session). No analysis reimplemented — this is the thin bridge into agent-eval's analyzers.
+ */
+
+import { buildTrajectory, InMemoryTraceStore } from '@tangle-network/agent-eval'
+import { stuckLoopView, toolWasteView } from '@tangle-network/agent-eval/pipelines'
+import type { TraceSource } from './trace-source'
+
+export interface TrajectoryAnalysis {
+  /** Structured run summary (tool-call count, step order). Steps carry a single timestamp, so per-span
+   *  duration is 0; loop/waste detection keys on call PATTERNS + cross-span windows, not durations. */
+  readonly trajectory: Awaited<ReturnType<typeof buildTrajectory>>
+  /** Full-run repeated-call view (total occurrences + window) — catches a loop the online consecutive
+   *  detector interleaves past. */
+  readonly stuckLoop: Awaited<ReturnType<typeof stuckLoopView>>
+  /** Wasted-vs-total tool-call ratio for the run. */
+  readonly toolWaste: Awaited<ReturnType<typeof toolWasteView>>
+}
+
+/** Collect the source's spans and run the agent-eval batch analyzers over them under one `runId`. */
+export async function analyzeTrace(
+  source: TraceSource,
+  runId = 'worker',
+): Promise<TrajectoryAnalysis> {
+  const spans = await source.collect()
+  const store = new InMemoryTraceStore()
+  // Re-stamp onto one runId so the runId-filtered analyzers see the whole trace regardless of the
+  // source's own id scheme.
+  for (let i = 0; i < spans.length; i += 1) {
+    const s = spans[i]
+    if (s) await store.appendSpan({ ...s, runId, spanId: `${runId}-t${i}` })
+  }
+  const [trajectory, stuckLoop, toolWaste] = await Promise.all([
+    buildTrajectory(store, runId),
+    stuckLoopView(store, { runId }),
+    toolWasteView(store, { runId }),
+  ])
+  return { trajectory, stuckLoop, toolWaste }
+}
diff --git a/tests/loops/completion-gate.test.ts b/tests/loops/completion-gate.test.ts
index 7b3604d5..85203bd9 100644
--- a/tests/loops/completion-gate.test.ts
+++ b/tests/loops/completion-gate.test.ts
@@ -163,7 +163,7 @@ function gatedWorkerLeaf(
 
 const spawnAwaitStop: DriverTurn[] = [
   { toolCalls: [{ name: 'spawn_worker', arguments: { profile: { kind: 'worker' }, task: 'go' } }] },
-  { toolCalls: [{ name: 'await_next', arguments: {} }] },
+  { toolCalls: [{ name: 'await_event', arguments: {} }] },
   { content: 'stop' },
 ]
 
@@ -236,8 +236,8 @@ describe('completion-oracle settle — settled ⟺ DELIVERED (Foreman 0/18)', ()
       },
       {
         toolCalls: [
-          { name: 'await_next', arguments: {} },
-          { name: 'await_next', arguments: {} },
+          { name: 'await_event', arguments: {} },
+          { name: 'await_event', arguments: {} },
         ],
       },
       { content: 'stop' },
@@ -282,7 +282,7 @@ describe('completion-oracle settle — settled ⟺ DELIVERED (Foreman 0/18)', ()
           { name: 'spawn_worker', arguments: { profile: { kind: 'driver' }, task: 'delegate' } },
         ],
       },
-      { toolCalls: [{ name: 'await_next', arguments: {} }] },
+      { toolCalls: [{ name: 'await_event', arguments: {} }] },
       { content: 'stop' },
     ]
     const root = coordinationDriverAgent(driverOpts('root', scriptedChat(rootTurns), makeAgent))
diff --git a/tests/loops/coordination-driver.test.ts b/tests/loops/coordination-driver.test.ts
index 720550cc..944898d6 100644
--- a/tests/loops/coordination-driver.test.ts
+++ b/tests/loops/coordination-driver.test.ts
@@ -127,7 +127,7 @@ describe('coordinationDriverAgent — the driver BRAIN (LLM tool-loop drives rea
             { name: 'spawn_worker', arguments: { profile: { kind: 'worker' }, task: 'go' } },
           ],
         },
-        { toolCalls: [{ name: 'await_next', arguments: {} }] },
+        { toolCalls: [{ name: 'await_event', arguments: {} }] },
         { content: 'done' },
       ],
       seen,
@@ -149,11 +149,11 @@ describe('coordinationDriverAgent — the driver BRAIN (LLM tool-loop drives rea
     expect(result.kind).toBe('winner')
 
     // Feed-back proof: by turn 2 (the 3rd chat call), the conversation the driver saw contains a
-    // `tool` message carrying the await_next settlement — i.e. the tool RESULT was folded back.
+    // `tool` message carrying the await_event settlement — i.e. the tool RESULT was folded back.
     const turn2Convo = seen[2]!
     const toolMsgs = turn2Convo.filter((m) => m.role === 'tool')
-    expect(toolMsgs.length).toBeGreaterThanOrEqual(2) // spawn_worker result + await_next result
-    expect(toolMsgs.some((m) => m.name === 'await_next' && m.content.includes('done'))).toBe(true)
+    expect(toolMsgs.length).toBeGreaterThanOrEqual(2) // spawn_worker result + await_event result
+    expect(toolMsgs.some((m) => m.name === 'await_event' && m.content.includes('done'))).toBe(true)
 
     // A real worker spawn is recorded in the journal (not a mock-bypassed result).
     const root_tree = (await journal.loadTree('cd')) as SpawnEvent[]
@@ -200,7 +200,7 @@ describe('coordinationDriverAgent — the driver BRAIN (LLM tool-loop drives rea
             { name: 'spawn_worker', arguments: { profile: { kind: 'worker' }, task: 'sub' } },
           ],
         },
-        { toolCalls: [{ name: 'await_next', arguments: {} }] },
+        { toolCalls: [{ name: 'await_event', arguments: {} }] },
         { content: 'mid done' },
       ],
     }
@@ -213,7 +213,7 @@ describe('coordinationDriverAgent — the driver BRAIN (LLM tool-loop drives rea
             { name: 'spawn_worker', arguments: { profile: midProfile, task: 'delegate' } },
           ],
         },
-        { toolCalls: [{ name: 'await_next', arguments: {} }] },
+        { toolCalls: [{ name: 'await_event', arguments: {} }] },
         { content: 'root done' },
       ],
       rootSeen,
@@ -237,7 +237,7 @@ describe('coordinationDriverAgent — the driver BRAIN (LLM tool-loop drives rea
     // recorded the worker's settlement fed back — proof the inner agent reasoned, not scripted-bypassed.
     expect(midSeen.length).toBeGreaterThanOrEqual(2)
     const midToolMsgs = midSeen[midSeen.length - 1]!.filter((m) => m.role === 'tool')
-    expect(midToolMsgs.some((m) => m.name === 'await_next')).toBe(true)
+    expect(midToolMsgs.some((m) => m.name === 'await_event')).toBe(true)
 
     // A SEPARATE nested tree exists under the root — the mid driver's sub-tree, holding the
     // worker spawn. A non-recursive build (mid as a leaf) could not produce a nested tree.
diff --git a/tests/loops/coordination-mcp.test.ts b/tests/loops/coordination-mcp.test.ts
index 45de14ab..3fbbf03a 100644
--- a/tests/loops/coordination-mcp.test.ts
+++ b/tests/loops/coordination-mcp.test.ts
@@ -73,7 +73,7 @@ describe('coordination MCP over a live Scope — the real keystone (HTTP → MCP
             name: 'spawn_worker',
             arguments: { profile: {}, task: 'go' },
           })
-          await jsonRpc(mcp.url, 'tools/call', { name: 'await_next', arguments: {} })
+          await jsonRpc(mcp.url, 'tools/call', { name: 'await_event', arguments: {} })
           observed = { toolsList: toolsList.result, settled: mcp.settled() }
           const done = mcp.settled().filter((w) => w.status === 'done' && w.valid === true)
           return done[0]?.outRef ? await blobs.get(done[0].outRef) : undefined
@@ -102,6 +102,6 @@ describe('coordination MCP over a live Scope — the real keystone (HTTP → MCP
       (t) => t.name,
     )
     expect(names).toContain('spawn_worker')
-    expect(names).toContain('await_next')
+    expect(names).toContain('await_event')
   })
 })
diff --git a/tests/loops/coordination.test.ts b/tests/loops/coordination.test.ts
index 96f0cea6..5b795931 100644
--- a/tests/loops/coordination.test.ts
+++ b/tests/loops/coordination.test.ts
@@ -2,6 +2,7 @@ import { describe, expect, it } from 'vitest'
 import { createMcpServer } from '../../src/mcp/server'
 import { createCoordinationTools } from '../../src/mcp/tools/coordination'
 import type { Agent, ResultBlobStore, Scope, Spend } from '../../src/runtime'
+import { createPushTraceSource, watchTrace } from '../../src/runtime'
 
 const zeroSpend = (): Spend => ({ iterations: 0, tokens: { input: 0, output: 0 }, usd: 0, ms: 0 })
 
@@ -114,13 +115,13 @@ describe('coordination tools', () => {
     expect(
       await tool(tb, 'steer_worker').handler({ workerId: 'w0', instruction: 'do X next' }),
     ).toEqual({ delivered: true })
-    expect(sent).toEqual([{ id: 'w0', msg: { steer: 'do X next' } }])
+    expect(sent).toEqual([{ id: 'w0', msg: { steer: 'do X next', interrupt: false } }])
     expect(await tool(tb, 'steer_worker').handler({ workerId: 'gone', instruction: 'x' })).toEqual({
       delivered: false,
     })
   })
 
-  it('await_next drains settlements into the driver ledger', async () => {
+  it('await_event(settled) drains settlements into the driver ledger', async () => {
     const { scope } = mockScope()
     const settlements = [
       {
@@ -143,14 +144,15 @@ describe('coordination tools', () => {
       makeWorkerAgent,
       perWorker: { maxIterations: 1, maxTokens: 10 },
     })
-    expect(await tool(tb, 'await_next').handler({})).toEqual({
+    expect(await tool(tb, 'await_event').handler({ kinds: ['settled'] })).toEqual({
+      type: 'settled',
       settled: 'w7',
       status: 'done',
       score: 0.83,
       valid: true,
       outRef: 'blob:w7',
     })
-    expect(await tool(tb, 'await_next').handler({})).toEqual({ idle: true })
+    expect(await tool(tb, 'await_event').handler({ kinds: ['settled'] })).toEqual({ idle: true })
     expect(tb.settled()).toEqual([
       { id: 'w7', status: 'done', score: 0.83, valid: true, outRef: 'blob:w7' },
     ])
@@ -179,16 +181,27 @@ describe('coordination tools', () => {
       stopped: false,
       error: 'unresolved-blocking-questions',
     })
-    await tool(tb, 'answer_question').handler({
-      questionId: r.question.id,
-      answer: 'Target v2.',
-      by: 'user',
-    })
+    // The driver-1 asker is not a live worker in the mock scope → the answer reports delivered:false.
+    expect(
+      await tool(tb, 'answer_question').handler({
+        questionId: r.question.id,
+        answer: 'Target v2.',
+        by: 'user',
+      }),
+    ).toMatchObject({ question: { status: 'answered' }, delivered: false })
     expect(await tool(tb, 'stop').handler({ reason: 'answered and verified' })).toEqual({
       stopped: true,
     })
     expect(tb.questions()[0]).toMatchObject({ status: 'answered' })
-    expect(emitted).toEqual([{ type: 'question', question: expect.objectContaining(r.question) }])
+    // The pass-through trail records BOTH legs: the question up, then the answer routed down.
+    expect(emitted).toEqual([
+      { type: 'question', question: expect.objectContaining(r.question) },
+      {
+        type: 'answer',
+        questionId: r.question.id,
+        down: { toWorker: 'driver-1', instruction: 'Target v2.', delivered: false },
+      },
+    ])
   })
 
   it('list_analysts surfaces the menu and run_analyst applies a lens to a settled worker', async () => {
@@ -264,6 +277,69 @@ describe('coordination tools', () => {
     expect(tb.stats()).toMatchObject({ published: 2, pulled: 2, byKind: { question: 2 } })
   })
 
+  it('steer_worker routes down + records in history but is never pulled back', async () => {
+    const { scope, sent } = mockScope()
+    const emitted: Array<{ type: string }> = []
+    const tb = createCoordinationTools({
+      scope,
+      blobs,
+      makeWorkerAgent,
+      perWorker: { maxIterations: 1, maxTokens: 10 },
+      onEvent: (e) => emitted.push(e),
+    })
+    expect(
+      await tool(tb, 'steer_worker').handler({
+        workerId: 'w0',
+        instruction: 'do X',
+        interrupt: true,
+      }),
+    ).toEqual({ delivered: true })
+    // A steer to a worker with no live inbox reports delivered:false.
+    expect(await tool(tb, 'steer_worker').handler({ workerId: 'gone', instruction: 'x' })).toEqual({
+      delivered: false,
+    })
+    // The forceful steer reached the child inbox (down delivery)...
+    expect(sent).toEqual([{ id: 'w0', msg: { steer: 'do X', interrupt: true } }])
+    // ...and both attempts were recorded for observability (pass-through + history)...
+    expect(emitted.map((e) => e.type)).toEqual(['steer', 'steer'])
+    expect(tb.history().map((r) => r.event.type)).toEqual(['steer', 'steer'])
+    // ...but the parent never pulls its own outbound messages back.
+    expect(await tool(tb, 'await_event').handler({})).toEqual({ idle: true })
+  })
+
+  it('answer_question routes the answer down to a LIVE worker and surfaces delivered:true', async () => {
+    const { scope, sent } = mockScope()
+    const emitted: Array<{ type: string }> = []
+    const tb = createCoordinationTools({
+      scope,
+      blobs,
+      makeWorkerAgent,
+      perWorker: { maxIterations: 1, maxTokens: 10 },
+      onEvent: (e) => emitted.push(e),
+    })
+    // The question originates from the live worker w0, so the answer routes back to its inbox.
+    const r = (await tool(tb, 'ask_parent').handler({
+      from: 'w0',
+      level: 'worker',
+      question: 'which path?',
+      reason: 'ambiguous',
+      urgency: 'blocks-step',
+    })) as { question: { id: string } }
+    expect(
+      await tool(tb, 'answer_question').handler({ questionId: r.question.id, answer: 'path B' }),
+    ).toEqual({
+      question: expect.objectContaining({ id: r.question.id, status: 'answered' }),
+      delivered: true,
+    })
+    // The answer reached w0's inbox shaped { answer, questionId }...
+    // The question was blocks-step, so the answer is delivered FORCEFULLY to unpark the worker now.
+    expect(sent).toEqual([
+      { id: 'w0', msg: { answer: 'path B', questionId: r.question.id, interrupt: true } },
+    ])
+    // ...and both legs are on the trail: question up, answer down.
+    expect(emitted.map((e) => e.type)).toEqual(['question', 'answer'])
+  })
+
   it('analyze-on-settle auto-runs lenses and await_event surfaces settled + finding', async () => {
     const { scope } = mockScope()
     const settlements = [
@@ -348,6 +424,66 @@ describe('coordination tools', () => {
     expect(await tool(tb, 'await_event').handler({ kinds: ['settled'] })).toEqual({ idle: true })
   })
 
+  it('await_event returns idle when the only live event mismatches the kinds filter', async () => {
+    const { scope } = mockScope()
+    const settlements = [
+      {
+        kind: 'done' as const,
+        handle: { id: 'w9', label: 'w', status: 'done' as const, abort() {} },
+        out: {},
+        outRef: 'blob:w9',
+        verdict: { valid: true, score: 1 },
+        spent: zeroSpend(),
+        seq: 0,
+      },
+    ]
+    const tb = createCoordinationTools({
+      scope: { ...scope, next: () => Promise.resolve(settlements.shift() ?? null) } as typeof scope,
+      blobs,
+      makeWorkerAgent,
+      perWorker: { maxIterations: 1, maxTokens: 10 },
+    })
+    // A worker is settle-able, but the driver only wants questions: await_event drains the cursor
+    // (progress was made → not idle) WITHOUT leaking the settled event to a question-only pull.
+    expect(await tool(tb, 'await_event').handler({ kinds: ['question'] })).toEqual({ idle: false })
+    // The drained settled event was queued, not lost — a caller that asks for it still gets it.
+    expect(await tool(tb, 'await_event').handler({ kinds: ['settled'] })).toMatchObject({
+      settled: 'w9',
+    })
+  })
+
+  it('an ONLINE detector raises a finding on the bus that the driver pulls (the live pipe → bus chain)', async () => {
+    const { scope } = mockScope()
+    const tb = createCoordinationTools({
+      scope,
+      blobs,
+      makeWorkerAgent,
+      perWorker: { maxIterations: 1, maxTokens: 10 },
+    })
+    // watchTrace over the worker's TraceSource raises a finding via raiseFinding when it loops.
+    const { source, record } = createPushTraceSource({ runId: 'w0' })
+    watchTrace(source, {
+      onSignal: (s) => {
+        void tb.raiseFinding({ fromWorker: 'w0', analyst: `online:${s.detector}`, findings: s })
+      },
+    })
+    // The worker loops on the same tool call → the stuck-loop detector trips mid-run.
+    record({ toolName: 'grep', args: { q: 'x' } })
+    record({ toolName: 'grep', args: { q: 'x' } })
+    record({ toolName: 'grep', args: { q: 'x' } })
+    // The driver pulls the finding off the bus mid-run — no need to wait for settle.
+    const ev = (await tool(tb, 'await_event').handler({ kinds: ['finding'] })) as {
+      type: string
+      fromWorker: string
+      analyst: string
+    }
+    expect(ev).toMatchObject({
+      type: 'finding',
+      fromWorker: 'w0',
+      analyst: 'online:repeated-action',
+    })
+  })
+
   it('createMcpServer serves coordination tools alongside built-ins; a shadow throws', () => {
     const { scope } = mockScope()
     const tb = createCoordinationTools({
diff --git a/tests/loops/detector-monitor.test.ts b/tests/loops/detector-monitor.test.ts
new file mode 100644
index 00000000..afbecb7a
--- /dev/null
+++ b/tests/loops/detector-monitor.test.ts
@@ -0,0 +1,59 @@
+import type { DetectorSignal } from '@tangle-network/agent-eval'
+import { describe, expect, it } from 'vitest'
+import { createPushTraceSource, watchTrace } from '../../src/runtime'
+
+describe('watchTrace (online analyst over a TraceSource)', () => {
+  it('raises a stuck-loop signal when a worker repeats the same tool call', () => {
+    const signals: DetectorSignal[] = []
+    const { source, record } = createPushTraceSource({ runId: 'r' })
+    watchTrace(source, { onSignal: (s) => signals.push(s) })
+    record({ toolName: 'run_tests', args: { path: 'src/' } })
+    record({ toolName: 'run_tests', args: { path: 'src/' } })
+    record({ toolName: 'run_tests', args: { path: 'src/' } }) // 3rd → trip
+    expect(signals).toHaveLength(1)
+    expect(signals[0]).toMatchObject({
+      detector: 'repeated-action',
+      streak: 3,
+      failureClass: 'tool_recovery_failure',
+    })
+  })
+
+  it('does NOT signal when args differ (real progress through distinct calls)', () => {
+    const signals: DetectorSignal[] = []
+    const { source, record } = createPushTraceSource()
+    watchTrace(source, { onSignal: (s) => signals.push(s) })
+    record({ toolName: 'edit', args: { file: 'a.ts' } })
+    record({ toolName: 'edit', args: { file: 'b.ts' } })
+    record({ toolName: 'edit', args: { file: 'c.ts' } })
+    expect(signals).toHaveLength(0)
+  })
+
+  it('raises an error-streak signal when consecutive tool calls error', () => {
+    const signals: DetectorSignal[] = []
+    const { source, record } = createPushTraceSource()
+    watchTrace(source, { onSignal: (s) => signals.push(s) })
+    record({ toolName: 'build', args: { n: 1 }, status: 'error' })
+    record({ toolName: 'build', args: { n: 2 }, status: 'error' })
+    record({ toolName: 'build', args: { n: 3 }, status: 'error' })
+    expect(signals.some((s) => s.detector === 'error-streak' && s.streak === 3)).toBe(true)
+  })
+
+  it('never throws on unhashable (circular) args — observability must not crash the worker', () => {
+    const { source, record } = createPushTraceSource()
+    watchTrace(source)
+    const circular: Record<string, unknown> = {}
+    circular.self = circular
+    expect(() => record({ toolName: 'x', args: circular })).not.toThrow()
+  })
+
+  it('unsubscribe stops watching', () => {
+    const signals: DetectorSignal[] = []
+    const { source, record } = createPushTraceSource()
+    const off = watchTrace(source, { onSignal: (s) => signals.push(s) })
+    record({ toolName: 't', args: {} })
+    off()
+    record({ toolName: 't', args: {} })
+    record({ toolName: 't', args: {} }) // would be the 3rd, but we unsubscribed
+    expect(signals).toHaveLength(0)
+  })
+})
diff --git a/tests/loops/event-bus.test.ts b/tests/loops/event-bus.test.ts
index b8b2226a..f121e16b 100644
--- a/tests/loops/event-bus.test.ts
+++ b/tests/loops/event-bus.test.ts
@@ -58,15 +58,6 @@ describe('event bus', () => {
     expect(bus.pull()).toEqual({ type: 'finding', claim: 'late finding' })
   })
 
-  it('peek is non-destructive and respects priority', async () => {
-    const bus = createEventBus<E>()
-    await bus.publish({ type: 'settled', id: 'w1' })
-    await bus.publish({ type: 'question', q: 'q' }, { priority: 10 })
-    expect(bus.peek()).toEqual({ type: 'question', q: 'q' })
-    expect(bus.pending()).toBe(2) // peek consumed nothing
-    expect(bus.pull()).toEqual({ type: 'question', q: 'q' })
-  })
-
   it('history is the full ordered audit trail; stats count throughput', async () => {
     const bus = createEventBus<E>(fakeClock())
     await bus.publish({ type: 'settled', id: 'w1' })
@@ -82,6 +73,24 @@ describe('event bus', () => {
     })
   })
 
+  it('queue:false records to history + subscribers but never enters the pull queue', async () => {
+    const bus = createEventBus<E>()
+    const seen: string[] = []
+    bus.subscribe((r) => {
+      seen.push(r.event.type)
+    })
+    await bus.publish({ type: 'settled', id: 'w1' })
+    await bus.publish({ type: 'question', q: 'down-leg' }, { queue: false })
+    // The record-only event reached subscribers + the audit log...
+    expect(seen).toEqual(['settled', 'question'])
+    expect(bus.history().map((r) => r.event.type)).toEqual(['settled', 'question'])
+    expect(bus.stats()).toMatchObject({ published: 2 })
+    // ...but is invisible to the pull queue: only the queued settled is pending/pullable.
+    expect(bus.pending()).toBe(1)
+    expect(bus.pull()).toEqual({ type: 'settled', id: 'w1' })
+    expect(bus.pull()).toBeUndefined()
+  })
+
   it('unsubscribe stops delivery', async () => {
     const bus = createEventBus<BusEvent>()
     const seen: string[] = []
diff --git a/tests/loops/inbox.test.ts b/tests/loops/inbox.test.ts
new file mode 100644
index 00000000..8c1e84c2
--- /dev/null
+++ b/tests/loops/inbox.test.ts
@@ -0,0 +1,146 @@
+import type { AgentProfile } from '@tangle-network/sandbox'
+import { afterEach, describe, expect, it, vi } from 'vitest'
+import { type AgentSpec, createExecutor, createInbox } from '../../src/runtime'
+
+describe('worker inbox (down-leg receive end)', () => {
+  it('parses the down-message shapes; ignores malformed', () => {
+    const inbox = createInbox()
+    inbox.deliver({ steer: 'do X' })
+    inbox.deliver({ answer: 'use v2', questionId: 'q1' })
+    inbox.deliver({ junk: true }) // ignored, never throws
+    inbox.deliver(null)
+    const drained = inbox.drain()
+    expect(drained).toEqual([
+      { kind: 'steer', text: 'do X', interrupt: false },
+      { kind: 'answer', text: 'use v2', interrupt: false, questionId: 'q1' },
+    ])
+    // drain is destructive
+    expect(inbox.pending()).toBe(0)
+  })
+
+  it('folds queued messages into one operator turn', () => {
+    const inbox = createInbox()
+    inbox.deliver({ steer: 'switch to recursion' })
+    inbox.deliver({ answer: 'v2', questionId: 'q7' })
+    const folded = inbox.fold(inbox.drain())
+    expect(folded).toContain('[SUPERVISOR]')
+    expect(folded).toContain('New instruction from your supervisor: switch to recursion')
+    expect(folded).toContain('Answer to your question (q7): v2')
+  })
+
+  it('a forceful message aborts the live turn signal; a queued one does not', () => {
+    const inbox = createInbox()
+    const sig = inbox.freshInterrupt()
+    expect(sig.aborted).toBe(false)
+    inbox.deliver({ steer: 'note for later' }) // queued — no interrupt
+    expect(sig.aborted).toBe(false)
+    inbox.deliver({ steer: 'STOP, wrong path', interrupt: true }) // forceful
+    expect(sig.aborted).toBe(true)
+  })
+
+  it('each freshInterrupt is independent — a stale signal is not re-aborted', () => {
+    const inbox = createInbox()
+    const first = inbox.freshInterrupt()
+    inbox.deliver({ steer: 'x', interrupt: true })
+    expect(first.aborted).toBe(true)
+    // A new turn opens a fresh signal; the prior forceful message does not abort it.
+    const second = inbox.freshInterrupt()
+    expect(second.aborted).toBe(false)
+  })
+})
+
+describe('router-tools executor drains the inbox', () => {
+  afterEach(() => vi.unstubAllGlobals())
+
+  const noToolReply = () =>
+    new Response(
+      JSON.stringify({
+        choices: [{ message: { content: 'done', tool_calls: [] } }],
+        usage: { prompt_tokens: 1, completion_tokens: 1 },
+      }),
+      { status: 200, headers: { 'content-type': 'application/json' } },
+    )
+
+  it('a worker may not settle while a steer is pending — it flushes, folds it in, and continues', async () => {
+    const bodies: Array<{ messages: Array<{ role: string; content: string }> }> = []
+    let calls = 0
+    let deliver: (m: unknown) => void = () => {}
+    vi.stubGlobal(
+      'fetch',
+      vi.fn(async (_url: string, init?: { body?: string }) => {
+        bodies.push(JSON.parse(init?.body ?? '{}'))
+        calls += 1
+        // The driver steers the worker WHILE it is mid-turn, just as it first tries to finish.
+        if (calls === 1) deliver({ steer: 'also handle the wide-char edge case' })
+        return noToolReply()
+      }),
+    )
+
+    const factory = createExecutor({
+      backend: 'router-tools',
+      model: 'test-model',
+      routerBaseUrl: 'http://router.test',
+      routerKey: 'k',
+      tools: [],
+      executeToolCall: async () => '',
+    })
+    const spec: AgentSpec = {
+      profile: { name: 'w', prompt: { systemPrompt: 'sys' } } as unknown as AgentProfile,
+      harness: null,
+    } as AgentSpec
+    const exec = factory(spec, { signal: new AbortController().signal, seams: {} })
+    deliver = (m) => exec.deliver?.(m)
+
+    await exec.execute('implement wcwidth', new AbortController().signal)
+
+    // Turn 1 saw no tool calls but DID NOT settle — the pending steer forced a second turn...
+    expect(calls).toBe(2)
+    // ...and that second turn's conversation carries the folded steer.
+    const turn2 = bodies[1]?.messages ?? []
+    expect(turn2.some((m) => m.content?.includes('also handle the wide-char edge case'))).toBe(true)
+  })
+
+  it('a FORCEFUL steer aborts the in-flight turn; the worker re-plans and the aborted turn is free', async () => {
+    const bodies: Array<{ messages: Array<{ role: string; content: string }> }> = []
+    let calls = 0
+    let deliver: (m: unknown) => void = () => {}
+    vi.stubGlobal(
+      'fetch',
+      vi.fn(async (_url: string, init?: { body?: string; signal?: AbortSignal }) => {
+        calls += 1
+        if (calls === 1) {
+          // The driver forcefully interrupts mid-inference — the turn signal aborts and fetch rejects.
+          deliver({ steer: 'STOP — wrong file, edit src/core.ts', interrupt: true })
+          throw new DOMException('aborted', 'AbortError')
+        }
+        bodies.push(JSON.parse(init?.body ?? '{}'))
+        return noToolReply()
+      }),
+    )
+
+    const factory = createExecutor({
+      backend: 'router-tools',
+      model: 'test-model',
+      routerBaseUrl: 'http://router.test',
+      routerKey: 'k',
+      tools: [],
+      executeToolCall: async () => '',
+    })
+    const spec: AgentSpec = {
+      profile: { name: 'w', prompt: { systemPrompt: 'sys' } } as unknown as AgentProfile,
+      harness: null,
+    } as AgentSpec
+    const exec = factory(spec, { signal: new AbortController().signal, seams: {} })
+    deliver = (m) => exec.deliver?.(m)
+
+    const result = await exec.execute('edit the file', new AbortController().signal)
+
+    // The aborted turn was discarded and the worker re-planned on turn 2...
+    expect(calls).toBe(2)
+    // ...which carries the forceful steer, and the aborted turn did NOT count toward iterations.
+    expect(
+      bodies[0]?.messages.some((m) => m.content?.includes('wrong file, edit src/core.ts')),
+    ).toBe(true)
+    expect(result.spent.iterations).toBe(1)
+  })
+})
diff --git a/tests/loops/router-driver-chat.test.ts b/tests/loops/router-driver-chat.test.ts
index 2ac433e7..0ff357d3 100644
--- a/tests/loops/router-driver-chat.test.ts
+++ b/tests/loops/router-driver-chat.test.ts
@@ -102,11 +102,11 @@ describe('routerDriverChat — the production DriverChat seam over the router to
   it('omits empty-string content (truthy check), not just null', async () => {
     routerMock.mockResolvedValue({
       content: '',
-      toolCalls: [{ id: 'c1', name: 'await_next', arguments: '{}' }],
+      toolCalls: [{ id: 'c1', name: 'await_event', arguments: '{}' }],
     })
     const turn = await routerDriverChat(cfg).next({ system: 'S', messages: [], tools: [] })
     expect(turn.content).toBeUndefined()
-    expect(turn.toolCalls).toEqual([{ id: 'c1', name: 'await_next', arguments: {} }])
+    expect(turn.toolCalls).toEqual([{ id: 'c1', name: 'await_event', arguments: {} }])
   })
 
   it('honors a custom temperature', async () => {
diff --git a/tests/loops/supervisor-authoring.test.ts b/tests/loops/supervisor-authoring.test.ts
index bb30a8e1..23de1991 100644
--- a/tests/loops/supervisor-authoring.test.ts
+++ b/tests/loops/supervisor-authoring.test.ts
@@ -97,8 +97,8 @@ describe('supervisor authoring — the supervisor DESIGNS each worker (profile),
       },
       {
         toolCalls: [
-          { name: 'await_next', arguments: {} },
-          { name: 'await_next', arguments: {} },
+          { name: 'await_event', arguments: {} },
+          { name: 'await_event', arguments: {} },
         ],
       },
       { content: 'done' },
diff --git a/tests/loops/trace-source.test.ts b/tests/loops/trace-source.test.ts
new file mode 100644
index 00000000..89ad204e
--- /dev/null
+++ b/tests/loops/trace-source.test.ts
@@ -0,0 +1,80 @@
+import { describe, expect, it } from 'vitest'
+import {
+  analyzeTrace,
+  createPushTraceSource,
+  decodeToolPart,
+  type SessionTraceBox,
+  sandboxSessionTraceSource,
+} from '../../src/runtime'
+
+describe('decodeToolPart — defensive across harness + OpenAI shapes', () => {
+  it('decodes an OpenAI tool_call (function shape, string args)', () => {
+    expect(
+      decodeToolPart({ type: 'function', function: { name: 'run', arguments: '{"path":"src/"}' } }),
+    ).toEqual({ toolName: 'run', args: { path: 'src/' } })
+  })
+
+  it('decodes a harness tool part (type=tool, input/state)', () => {
+    expect(
+      decodeToolPart({
+        type: 'tool',
+        toolName: 'edit',
+        input: { file: 'a.ts' },
+        state: 'completed',
+      }),
+    ).toEqual({ toolName: 'edit', args: { file: 'a.ts' }, status: 'ok' })
+  })
+
+  it('flags an errored tool part', () => {
+    expect(decodeToolPart({ type: 'tool-call', name: 'build', args: {}, state: 'error' })).toEqual({
+      toolName: 'build',
+      args: {},
+      status: 'error',
+    })
+  })
+
+  it('returns undefined for non-tool parts (text, reasoning)', () => {
+    expect(decodeToolPart({ type: 'text', text: 'hi' })).toBeUndefined()
+    expect(decodeToolPart({ type: 'reasoning', text: '...' })).toBeUndefined()
+    expect(decodeToolPart(null)).toBeUndefined()
+  })
+})
+
+describe('sandboxSessionTraceSource — the production (box) path', () => {
+  // A mock box matching the real `box.messages({sessionId})` surface; messages carry harness parts.
+  const box: SessionTraceBox = {
+    messages: async () => [
+      {
+        parts: [
+          { type: 'text', text: 'working' },
+          { type: 'tool', toolName: 'grep', input: { q: 'x' } },
+        ],
+      },
+      { parts: [{ type: 'tool', toolName: 'grep', input: { q: 'x' } }] },
+      { parts: [{ type: 'tool', toolName: 'grep', input: { q: 'x' } }] },
+    ],
+  }
+
+  it('collects the harness tool calls from session parts and the batch analyzer sees the loop', async () => {
+    const source = sandboxSessionTraceSource(box, 'sess-1')
+    const spans = await source.collect()
+    expect(spans.map((s) => s.toolName)).toEqual(['grep', 'grep', 'grep'])
+    const analysis = await analyzeTrace(source)
+    expect(analysis.trajectory.toolCalls).toBe(3)
+    expect(analysis.stuckLoop.findings.find((f) => f.toolName === 'grep')?.occurrences).toBe(3)
+  })
+})
+
+describe('createPushTraceSource — owned-loop path', () => {
+  it('records steps as spans, fans out live, and buffers for collect', async () => {
+    const seen: string[] = []
+    const { source, record } = createPushTraceSource({ runId: 'p' })
+    const off = source.onSpan((s) => seen.push(s.toolName))
+    record({ toolName: 'a', args: {} })
+    record({ toolName: 'b', args: {} })
+    off()
+    record({ toolName: 'c', args: {} }) // after unsubscribe — buffered but not streamed
+    expect(seen).toEqual(['a', 'b'])
+    expect((await source.collect()).map((s) => s.toolName)).toEqual(['a', 'b', 'c'])
+  })
+})
diff --git a/tests/loops/trajectory-recorder.test.ts b/tests/loops/trajectory-recorder.test.ts
new file mode 100644
index 00000000..3ae3fcec
--- /dev/null
+++ b/tests/loops/trajectory-recorder.test.ts
@@ -0,0 +1,35 @@
+import { describe, expect, it } from 'vitest'
+import { analyzeTrace, createPushTraceSource } from '../../src/runtime'
+
+const fakeClock = () => {
+  let t = 0
+  return () => (t += 10)
+}
+
+describe('analyzeTrace (settle-time agent-eval analyzers over a TraceSource)', () => {
+  it('collects spans and the batch stuck-loop view detects a repeated call', async () => {
+    const { source, record } = createPushTraceSource({ runId: 'r1', now: fakeClock() })
+    // A loop interleaved with another call — the FULL-run view still catches it.
+    record({ toolName: 'run_tests', args: { path: 'src/' } })
+    record({ toolName: 'read', args: { f: 'log.txt' } })
+    record({ toolName: 'run_tests', args: { path: 'src/' } })
+    record({ toolName: 'run_tests', args: { path: 'src/' } })
+
+    const analysis = await analyzeTrace(source)
+
+    expect(analysis.trajectory.toolCalls).toBe(4)
+    const loop = analysis.stuckLoop.findings.find((f) => f.toolName === 'run_tests')
+    expect(loop?.occurrences).toBe(3)
+    expect(loop?.windowMs).toBeGreaterThan(0)
+  })
+
+  it('does not flag distinct calls as a loop', async () => {
+    const { source, record } = createPushTraceSource({ now: fakeClock() })
+    record({ toolName: 'edit', args: { f: 'a.ts' } })
+    record({ toolName: 'edit', args: { f: 'b.ts' } })
+    record({ toolName: 'edit', args: { f: 'c.ts' } })
+    const analysis = await analyzeTrace(source)
+    expect(analysis.trajectory.toolCalls).toBe(3)
+    expect(analysis.stuckLoop.findings).toHaveLength(0)
+  })
+})