From 21608ef30c885adf1f644e38551b1ddf8218fa15 Mon Sep 17 00:00:00 2001
From: Drew Stone <drewstone329@gmail.com>
Date: Tue, 23 Jun 2026 16:26:15 -0600
Subject: [PATCH 1/2] docs(examples): prune + clarify the example set for
 tip-top DX
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Land a developer in examples/, let them read ONE short example, and feel
the power. Three confirmed defects fixed plus the centerpiece added.

- DELETE examples/coder-loop/: its .ts was refactored off runLoop to
  worktreeLoopRunner, and its README still taught the deleted coderProfile
  export, so it no longer demonstrated "the same runLoop kernel" it claimed.
  researcher-loop is now the primary runLoop teacher.

- ADD examples/driver-loop/: the centerpiece. A multi-round refine driver
  whose plan() READS the last worker's output from history and COMPOSES the
  next prompt FROM it — "the fold" every supervisor is built on, made visible
  with heavy plain-language comments. A second labeled section contrasts it
  with multishot so round vs shot sit side by side. Offline, e2e-proven
  (round 0 rejected -> driver folds -> round 1 passes via the corrected prompt).

- IMPROVE stale-API docs and teaching comments:
  - mcp-delegation/README + fleet-delegation/README: replace the deleted
    delegate_code/delegate_research tools with the generic delegate verb +
    the MCP_ENABLE_DELEGATE=1 gate + the always-on feedback/status/history trio.
  - product-eval/README: teach runPersonaConversation (evalPersona was deleted
    in 0.76.0); the .ts was already correct.
  - researcher-loop / ui-audit / self-improving-loop / supervisor-loop:
    inline-define round and shot the first time each appears, and point at
    driver-loop/ as the example that actually shows the fold.
  - delegate: import from the @tangle-network/agent-runtime/loops subpath
    instead of a relative dist path; strip emoji from console output.

- INDEX examples/README.md: rewrite as an ordered "use this when" path over
  tiers (cores -> driver/supervisor -> runLoop kernel -> production runtime ->
  self-improvement), add a vocabulary block, add the missing driver-loop /
  supervise / delegate / intelligence-drop-in rows, drop coder-loop.

All edits stay inside examples/; build + typecheck + typecheck:examples +
Biome lint are green, and driver-loop and researcher-loop run offline.
---
 examples/README.md                            | 159 +++++-----
 examples/coder-loop/README.md                 | 109 -------
 examples/coder-loop/coder-loop.ts             |  72 -----
 examples/delegate/e2e-delegate-real.ts        |   2 +-
 examples/driver-loop/README.md                |  72 +++++
 examples/driver-loop/driver-loop.ts           | 279 ++++++++++++++++++
 examples/fleet-delegation/README.md           |   6 +-
 examples/mcp-delegation/README.md             |  29 +-
 examples/product-eval/README.md               |  19 +-
 examples/researcher-loop/README.md            |  22 +-
 examples/researcher-loop/researcher-loop.ts   |   5 +
 .../self-improving-loop.ts                    |   5 +
 examples/supervise/supervise.ts               |   2 +-
 examples/supervisor-loop/run-bridge.ts        |   4 +-
 examples/supervisor-loop/run-sandbox.ts       |  25 +-
 .../supervisor-loop/run-supervisor-mcp.ts     |   4 +-
 examples/supervisor-loop/shared.ts            |   9 +-
 examples/ui-audit/README.md                   |   4 +-
 examples/ui-audit/ui-audit.ts                 |   4 +
 19 files changed, 528 insertions(+), 303 deletions(-)
 delete mode 100644 examples/coder-loop/README.md
 delete mode 100644 examples/coder-loop/coder-loop.ts
 create mode 100644 examples/driver-loop/README.md
 create mode 100644 examples/driver-loop/driver-loop.ts

diff --git a/examples/README.md b/examples/README.md
index ca9a8bf4..084429f8 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -1,114 +1,135 @@
 # agent-runtime examples
 
-Ordered as a learning progression — each example introduces one concept on top of the previous one. The first three cover the package's three cores: the production chat/task runtime, the optimization suite, and the recursive Supervisor. The rest go deeper into each.
+A learning path. Read the examples in order — each one adds a single concept on top of the last.
+The fastest way to feel the package is to read **ONE** example: [`driver-loop/`](./driver-loop/)
+(below), which shows the move every supervisor is built on.
 
-Every example imports from `@tangle-network/agent-runtime` (the same surface consumers use), not from relative paths. All of them are typechecked by `pnpm run typecheck:examples` (wired into `pnpm run typecheck`).
+Every example imports from `@tangle-network/agent-runtime` (the surface consumers use), not from
+relative paths, and all of them are typechecked by `pnpm run typecheck:examples`.
 
-Era tags: **production runtime** (`runAgentTask` / `handleChatTurn` — what every product runs), **loops suite** (`Environment` / `defineStrategy` / `runBenchmark` — the optimization layer), **supervisor core** (`Scope` / `Supervisor` / personify — the recursive atom; prefer it for new recursive work), **runLoop kernel** (the round-synchronous driver loop), **infra** (transports, MCP, observability).
+## Vocabulary
 
-## Start here — the three cores
+These words appear in every example. The clearest demonstration of all of them is
+[`driver-loop/`](./driver-loop/).
 
-| # | Example | Era | One sentence |
-|---|---|---|---|
-| 1 | [`chat-handler/`](./chat-handler/) | production runtime | `handleChatTurn` — the production chat turn lifecycle every product runs |
-| 2 | [`strategy-suite/`](./strategy-suite/) | loops suite | `Environment` + `defineStrategy` + `runBenchmark` — author and compare optimization strategies against your own check (needs `TANGLE_API_KEY`) |
-| 3 | [`recursive-supervisor/`](./recursive-supervisor/) | supervisor core | One `Agent` spawning children through `scope.spawn` on a conserved budget pool, plus the `fanout` combinator (offline) |
+- **round** — one driver cycle: `plan → run workers → decide` (the `runLoop` kernel runs this once per round).
+- **shot** — one independent worker attempt/sample; **multishot** plays N shots in parallel.
+- **sample** — best-of-N shots (breadth); **refine** — iterate-with-critique across rounds (depth).
+- **the fold** — a driver reading the last worker's output and writing the next instruction *from* it.
 
-## The production runtime, deeper
+## Tier 0 — the three cores (read one, feel the power)
 
-| # | Example | Era | One sentence |
-|---|---|---|---|
-| 4 | [`knowledge-gating/`](./knowledge-gating/) | production runtime | The minimal `AgentAdapter` + `requiredKnowledge` + readiness gating |
-| 5 | [`sanitized-telemetry-streaming/`](./sanitized-telemetry-streaming/) | production runtime | Redaction-by-default telemetry collectors (streaming + non-streaming) |
-| 6 | [`runtime-run/`](./runtime-run/) | production runtime | `startRuntimeRun` + cost ledger persistence |
-| 7 | [`stream-backends/`](./stream-backends/) | infra | The three stream transports (iterable / sandbox / OpenAI-compatible) + SSE helpers, side by side |
+| # | Example | Use this when… |
+|---|---|---|
+| 1 | [`chat-handler/`](./chat-handler/) | You're wiring a product's chat turn — the `handleChatTurn` lifecycle every product runs. |
+| 2 | [`strategy-suite/`](./strategy-suite/) | You want to compare optimization strategies (sample vs refine vs your own) against your own pass/fail check (needs `TANGLE_API_KEY`). |
+| 3 | [`recursive-supervisor/`](./recursive-supervisor/) | You want the raw recursive atom: one `Agent` spawning children on a conserved budget pool, shown twice (raw `scope.spawn` + the `fanout` combinator, offline). |
 
-## Delegation + tools
+## Tier 1 — the driver loop & supervisor (the heart of the product)
 
-| # | Example | Era | One sentence |
-|---|---|---|---|
-| 8 | [`mcp-delegation/`](./mcp-delegation/) | infra | Mount `agent-runtime-mcp` in an `AgentProfile` — exposes `delegate_code`, `delegate_research`, `delegate_feedback`, `delegation_status`, `delegation_history` (plus `delegate_ui_audit` when a UI-audit runner is wired) |
-| 9 | [`fleet-delegation/`](./fleet-delegation/) | infra | `TANGLE_FLEET_ID` flips delegation from sibling-sandbox to fleet-workspace topology |
+| # | Example | Use this when… |
+|---|---|---|
+| 4 | [`driver-loop/`](./driver-loop/) | **You want to SEE the fold** — a driver reads the last worker's output and composes the next prompt from it (plan → run → decide → re-plan). The seam that makes everything else click. Offline. |
+| 5 | [`supervise/`](./supervise/) | You want the one-call headline: `supervise(profile, goal)` — a router-brained supervisor with all scaffolding defaulted (needs `TANGLE_API_KEY`). |
+| 6 | [`supervisor-loop/`](./supervisor-loop/) | You want that same supervisor over a real worker backend — sandbox box / local cli-bridge / coordination MCP — with the **worker backend as the only knob**. |
+| 7 | [`delegate/`](./delegate/) | You want the one-call `delegate(intent)` proven e2e: a worker does real on-disk filesystem work, the gate settles only when the file exists, cost rides through (needs `TANGLE_API_KEY`). |
 
-## The loops suite, deeper — search, evals, and the RSI verb
+## Tier 2 — the runLoop kernel (the leaf the benches drive)
 
-| # | Example | Era | One sentence |
-|---|---|---|---|
-| 9c | [`strategy-evolution/`](./strategy-evolution/) | loops suite | `runStrategyEvolution` + `promotionGate` — the policy-search journey: author candidate strategies from losses, advance a champion, promote on a fresh holdout slice (needs `TANGLE_API_KEY`) |
-| 9d | [`product-eval/`](./product-eval/) | loops suite | `evalPersona` — user-sim product evals in one call: scripted + LLM-adversarial personas, plus the `runPersonaDispatch` → `runProfileMatrix` scored path (needs `TANGLE_API_KEY`; offline-testable via a `backendFor` override) |
+The round-synchronous kernel: `driver.plan()` → N tasks → one sandbox per iteration → `output.parse`
+→ `validator.validate` → `driver.decide`. The drivers below are single-round and content-blind on
+purpose — read [`driver-loop/`](./driver-loop/) for the contrast (a driver that re-plans from output).
 
-## The supervisor core, deeper — an agent drives N agents
+| # | Example | Use this when… |
+|---|---|---|
+| 8 | [`researcher-loop/`](./researcher-loop/) | You want the canonical `runLoop` + inline fanout driver, with a validator that hard-fails a namespace leak so the kernel prunes the bad candidate (needs the optional `@tangle-network/agent-knowledge` peer). |
+| 9 | [`ui-audit/`](./ui-audit/) | You want the smallest end-to-end `runLoop` over a real client (Playwright + stub judge), persisting findings. |
 
-| # | Example | Era | One sentence |
-|---|---|---|---|
-| 9b | [`supervisor-loop/`](./supervisor-loop/) | supervisor core | One LLM SUPERVISOR (`driverAgent`) spawns + drives N worker agents to a checked completion on one conserved pool — the SAME code over `router-tools` / `sandbox` (a box) / `bridge` (local cli-bridge), swapping only the worker-leaf seam |
+## Tier 3 — the production runtime, deeper
 
-## The runLoop kernel (driver-planned fanout)
+| # | Example | Use this when… |
+|---|---|---|
+| 10 | [`knowledge-gating/`](./knowledge-gating/) | You want readiness gating: the loop BLOCKS when a required-knowledge confidence is below threshold (also the smallest `runAgentTask`). |
+| 11 | [`runtime-run/`](./runtime-run/) | You want the run-record + cost-ledger persistence lifecycle for dashboards. |
+| 12 | [`stream-backends/`](./stream-backends/) | You want to pick a stream transport (iterable / sandbox / OpenAI-compatible) — the "pick your backend" map (OpenAI section needs `OPENAI_API_KEY`). |
+| 13 | [`sanitized-telemetry-streaming/`](./sanitized-telemetry-streaming/) | You want redaction-by-default telemetry on the stream (and the `task.intent` PII footgun). |
 
-The round-synchronous kernel: `driver.plan()` → N tasks → one sandbox per iteration → parse → validate → `driver.decide`. The drivers below are hand-written inline (`plan` + `decide` — two functions); for new recursive work prefer the supervisor core (#3).
+## Tier 4 — delegation over MCP
 
-| # | Example | Era | One sentence |
-|---|---|---|---|
-| 10 | [`coder-loop/`](./coder-loop/) | runLoop kernel | `coderProfile` + `runLoop` + an inline fanout driver — kernel picks the winner |
-| 11 | [`researcher-loop/`](./researcher-loop/) | runLoop kernel | `researcherProfile` (from `@tangle-network/agent-knowledge/profiles`) + the namespace-leak hard-fail validator |
-| 12 | [`ui-audit/`](./ui-audit/) | runLoop kernel | `uiAuditorProfile` + an in-process `SandboxClient` (Playwright + stub judge) + Markdown findings writer |
+| # | Example | Use this when… |
+|---|---|---|
+| 14 | [`mcp-delegation/`](./mcp-delegation/) | You want to mount `agent-runtime-mcp` in an `AgentProfile`. Exposes the generic `delegate` verb (opt in with `MCP_ENABLE_DELEGATE=1`) plus the always-on `delegate_feedback` / `delegation_status` / `delegation_history` trio (and `delegate_ui_audit` when a UI-audit runner is wired). Needs `pnpm build` first. |
+| 15 | [`fleet-delegation/`](./fleet-delegation/) | You want `TANGLE_FLEET_ID` to flip delegation from sibling-sandbox to fleet-workspace topology. |
 
-## Self-improvement + observability
+## Tier 5 — self-improvement & intelligence
 
-| # | Example | Era | One sentence |
-|---|---|---|---|
-| 13 | [`self-improving-loop/`](./self-improving-loop/) | loops suite (pedagogical) | The v0 → judge → analyst → mutation → v1 → gate cycle, offline; production paths are `selfImprove` (agent-eval) and `runStrategyEvolution` (#2's subpath) |
-| 13b | [`improve/`](./improve/) | loops suite | `improve(profile, findings, opts)` — the one pluggable RSI verb (held-out-gated surface optimization), offline with a scripted generator |
-| 13c | [`intelligence-recommend/`](./intelligence-recommend/) | loops suite | The intelligence loop end to end, offline: `recordTrace` → derived `AnalystFinding`s → `improve()` → a gated candidate (the first example connecting the two halves) |
-| 14 | [`agents-of-all-shapes/`](./agents-of-all-shapes/) | infra | Any framework's traces → one OTel GenAI contract → in-process `InsightReport` (the only example with a CI test) |
+| # | Example | Use this when… |
+|---|---|---|
+| 16 | [`strategy-evolution/`](./strategy-evolution/) | You want the full policy-search + holdout gate: author candidates from losses, promote a champion only if a paired-bootstrap CI says it isn't luck (needs `TANGLE_API_KEY`). |
+| 17 | [`improve/`](./improve/) | You want the one supported RSI verb: `improve(profile, findings, opts)` — optimize one profile surface, ship only if it clears the held-out gate. Offline. |
+| 18 | [`self-improving-loop/`](./self-improving-loop/) | You want the unrolled internals of #17: v0 → judge → analyst → mutation → v1 → gate, with the "which substrate owns each phase" map. Offline. |
+| 19 | [`intelligence-recommend/`](./intelligence-recommend/) | You want the intelligence loop offline: trace → findings → `improve()` → gated candidate. |
+| 20 | [`intelligence-drop-in/`](./intelligence-drop-in/) | You want to wrap any agent with `withTangleIntelligence` and ship one trace per call (best-effort; off = passthrough). |
+| 21 | [`agents-of-all-shapes/`](./agents-of-all-shapes/) | You want proof that any framework's traces converge on one OTel contract → one `InsightReport` (the CI-tested example). |
+| 22 | [`product-eval/`](./product-eval/) | You want user-sim product evals: a persona over a multi-round conversation via `runPersonaConversation`, then score the transcript (`maxTurns` is a ceiling, not a target). Needs `TANGLE_API_KEY`; offline via a `backendFor` override. |
 
 ## Conventions
 
-- Examples are synthetic unless noted. `strategy-suite`, `strategy-evolution`, and `product-eval` need `TANGLE_API_KEY` (`improve` and `intelligence-recommend` run fully offline); `stream-backends`' OpenAI section needs `OPENAI_API_KEY` (the rest of it runs offline); `mcp-delegation` needs `pnpm build` first so the local MCP bin exists; `researcher-loop` needs the optional `@tangle-network/agent-knowledge` peer.
-- Where domain types are needed (`SandboxBox`, evidence stores), the example defines them inline — comments call out which parts are *yours* to provide vs *the runtime's* contract.
-- No example creates its own throwaway `package.json` — they run from this repo's tsx so changes to the runtime are picked up immediately.
+- Examples are synthetic unless noted. `strategy-suite`, `strategy-evolution`, `product-eval`,
+  `supervise`, and `delegate` need `TANGLE_API_KEY`; `stream-backends`' OpenAI section needs
+  `OPENAI_API_KEY` (the rest runs offline); `mcp-delegation` needs `pnpm build` first so the local
+  MCP bin exists; `researcher-loop` needs the optional `@tangle-network/agent-knowledge` peer.
+  Everything else runs fully offline.
+- Where domain types are needed (`SandboxBox`, evidence stores), the example defines them inline —
+  comments call out which parts are *yours* to provide vs *the runtime's* contract.
+- No example creates its own throwaway `package.json` — they run from this repo's tsx so changes to
+  the runtime are picked up immediately.
 
 ## Run
 
-From the agent-runtime repo root, in the suggested learning order:
+From the agent-runtime repo root, in the learning order above:
 
 ```bash
-# The three cores
+# Tier 0 — the three cores
 pnpm tsx examples/chat-handler/chat-handler.ts
 TANGLE_API_KEY=... pnpm tsx examples/strategy-suite/strategy-suite.ts
 pnpm tsx examples/recursive-supervisor/recursive-supervisor.ts
 
-# Production runtime, deeper
+# Tier 1 — driver loop & supervisor (the heart)
+pnpm tsx examples/driver-loop/driver-loop.ts                       # SEE THE FOLD (offline)
+TANGLE_API_KEY=... pnpm tsx examples/supervise/supervise.ts        # the one-call supervisor
+WORKER_MODEL=opencode/anthropic/claude-sonnet-4-5 \
+  pnpm tsx examples/supervisor-loop/run-bridge.ts                  # same supervisor, local cli-bridge backend
+TANGLE_API_KEY=... pnpm tsx examples/delegate/e2e-delegate-real.ts # delegate(intent), e2e
+
+# Tier 2 — the runLoop kernel
+pnpm tsx examples/researcher-loop/researcher-loop.ts
+pnpm dlx tsx examples/ui-audit/ui-audit.ts /tmp/ui-audit-demo https://example.com
+
+# Tier 3 — production runtime, deeper
 pnpm tsx examples/knowledge-gating/knowledge-gating.ts
-pnpm tsx examples/sanitized-telemetry-streaming/sanitized-telemetry-streaming.ts
 pnpm tsx examples/runtime-run/runtime-run.ts
 pnpm tsx examples/stream-backends/stream-backends.ts
+pnpm tsx examples/sanitized-telemetry-streaming/sanitized-telemetry-streaming.ts
 
-# Delegation
+# Tier 4 — delegation over MCP
 pnpm build  # mcp-delegation needs dist/mcp/bin.js
 pnpm tsx examples/mcp-delegation/mcp-delegation.ts
 pnpm tsx examples/fleet-delegation/fleet-delegation.ts
 
-# Supervisor core, deeper — one agent drives N workers (bridge = local cli-bridge path)
-TANGLE_API_KEY=... pnpm tsx examples/supervise/supervise.ts   # router brain + router-tools workers (the one-call entry)
-WORKER_MODEL=opencode/anthropic/claude-sonnet-4-5 pnpm tsx examples/supervisor-loop/run-bridge.ts  # local harness CLIs via ~/code/cli-bridge
-
-# runLoop kernel
-pnpm tsx examples/coder-loop/coder-loop.ts
-pnpm tsx examples/researcher-loop/researcher-loop.ts
-pnpm dlx tsx examples/ui-audit/ui-audit.ts /tmp/ui-audit-demo https://example.com
-
-# The loops suite, deeper — search + evals
-TANGLE_API_KEY=... pnpm tsx examples/strategy-evolution/strategy-evolution.ts  # policy search → holdout gate
-TANGLE_API_KEY=... pnpm tsx examples/product-eval/product-eval.ts              # user-sim product evals (evalPersona)
-
-# Self-improvement + observability
+# Tier 5 — self-improvement & intelligence
+TANGLE_API_KEY=... pnpm tsx examples/strategy-evolution/strategy-evolution.ts
+pnpm tsx examples/improve/improve.ts
 pnpm tsx examples/self-improving-loop/self-improving-loop.ts
-pnpm tsx examples/improve/improve.ts                                   # improve() — the RSI verb (offline)
-pnpm tsx examples/intelligence-recommend/intelligence-recommend.ts    # traces → findings → improve() (offline)
+pnpm tsx examples/intelligence-recommend/intelligence-recommend.ts
+pnpm tsx examples/intelligence-drop-in/intelligence-drop-in.ts
 pnpm tsx examples/agents-of-all-shapes/run.ts
+TANGLE_API_KEY=... pnpm tsx examples/product-eval/product-eval.ts
 ```
 
 ## Tracing
 
-The kernels emit `loop.*` trace events as they run; with `OTEL_EXPORTER_OTLP_ENDPOINT` set they export as OTel GenAI spans (see the root README § Tracing). `agents-of-all-shapes/` (#14) shows the full traces → insights pipe; the `agent-stack-adoption` skill documents the end-to-end production ingestion pipeline.
+The kernels emit `loop.*` trace events as they run; with `OTEL_EXPORTER_OTLP_ENDPOINT` set they
+export as OTel GenAI spans (see the root README § Tracing). `agents-of-all-shapes/` (#21) shows the
+full traces → insights pipe; the `agent-stack-adoption` skill documents the end-to-end production
+ingestion pipeline.
diff --git a/examples/coder-loop/README.md b/examples/coder-loop/README.md
deleted file mode 100644
index ba3f278c..00000000
--- a/examples/coder-loop/README.md
+++ /dev/null
@@ -1,109 +0,0 @@
-# coder-loop
-
-`coderProfile()` + `runLoop()` + an inline fanout `Driver` — the smallest
-end-to-end coder loop. Two parallel iterations attempt the same goal; the
-validator scores test + typecheck + diff size; the kernel picks the
-highest-scoring valid winner.
-
-`runLoop` is the round-synchronous kernel: `driver.plan()` → N tasks → one
-sandbox per iteration → `output.parse` → `validator.validate` →
-`driver.decide`. For new recursive/multi-level work, prefer the reactive
-`Scope`/`Supervisor` core and the personify combinators (`fanout` does this
-example's topology generically) — see
-[`examples/recursive-supervisor/`](../recursive-supervisor/).
-
-## Topology
-
-The driver is ~5 lines, hand-written in `coder-loop.ts`: a single-round
-fanout whose `plan()` returns two copies of the task only when `history` is
-empty (round 0), then `[]` forever after — it spawns N, scores, and picks; it
-never refines. Each of the N tasks becomes its own iteration, and every
-iteration runs the same `output.parse` → `validator.validate` pipeline
-independently before the driver votes.
-
-```mermaid
-flowchart TD
-  task["CoderTask\ngoal: add util.ts add(a,b)"] --> plan0
-
-  subgraph round0["round 0 — driver.plan(task, history=[])"]
-    plan0["inline fanout driver\nreturns [task, task]"]
-  end
-
-  plan0 --> reserve["kernel reserves 2 iteration slots\nrunBatch dispatches in parallel\n(bounded by maxConcurrency)"]
-
-  reserve --> wA
-  reserve --> wB
-
-  subgraph A["iteration 0 — worker A"]
-    direction TB
-    wA["sandboxClient.create()\n→ box.streamPrompt()"] --> evA["events:\nllm_call (costUsd 0.0036)\nresult { branch util-add-A }"]
-    evA --> parseA["output.parse → CoderOutput\ntyped arrow fn\nexport const add = (a:number,b:number):number"]
-    parseA --> valA["validator.validate\ntests pass · typecheck PASS\ndiff 2 ≤ 50 · no forbidden paths"]
-    valA --> verA["DefaultVerdict\nvalid = true · score ≈ 0.992"]
-  end
-
-  subgraph B["iteration 1 — worker B"]
-    direction TB
-    wB["sandboxClient.create()\n→ box.streamPrompt()"] --> evB["events:\nllm_call (costUsd 0.0036)\nresult { branch util-add-B }"]
-    evB --> parseB["output.parse → CoderOutput\nuntyped params\nexport function add(a, b)"]
-    parseB --> valB["validator.validate\ntests pass · typecheck FAIL (TS7006)\ndiff 3 ≤ 50"]
-    valB --> verB["DefaultVerdict\nvalid = false · rejected"]
-  end
-
-  verA --> plan1
-  verB --> plan1
-
-  subgraph round1["round 1 — driver.plan(task, history=[2 done])"]
-    plan1["returns []\nmoveKind = stop (no refine)"]
-  end
-
-  plan1 --> decide["driver.decide(history)\ndefaultSelector: filter valid,\nsort by verdict.score desc,\ntie-break iterationIndex asc"]
-
-  decide --> winner["decision = pick-winner\nwinner = iteration 0 (A)"]
-  verB -.->|invalid, dropped| decide
-
-  verA -.->|costUsd 0.0036| cost
-  verB -.->|costUsd 0.0036| cost
-  cost["result.costUsd = 0.0072\n(sum of per-iteration costUsd)"]
-  winner --> cost
-
-  classDef win fill:#1b5e20,stroke:#2e7d32,color:#fff
-  classDef lose fill:#5d1a1a,stroke:#b71c1c,color:#fff
-  class verA,winner win
-  class verB lose
-```
-
-## Run
-
-```bash
-pnpm tsx examples/coder-loop/coder-loop.ts
-```
-
-## What it shows
-
-- How `coderProfile({ task, harness })` bundles `profile`, `taskToPrompt`,
-  `output` (event-stream → `CoderOutput`), `validator` (test + typecheck +
-  diff cap + forbidden-path enforcement), and `agentRunSpec` together.
-- How a hand-written `Driver` (`plan` + `decide`) makes the kernel plan N
-  parallel iterations and pick the winning output — the whole `Driver`
-  contract is two functions.
-- How the synthetic `sandboxClient` mirrors the production
-  `@tangle-network/sandbox` `Sandbox` surface — swap it for `new Sandbox(...)`
-  when you wire to production.
-- How `result.winner` carries the typed `CoderOutput`, the verdict, and the
-  iteration index — everything you need to merge the patch in CI.
-
-## Wire to production
-
-Swap the synthetic `sandboxClient` for:
-
-```ts
-import { Sandbox } from '@tangle-network/sandbox'
-
-const sandboxClient = new Sandbox({ apiKey: process.env.TANGLE_API_KEY! })
-```
-
-Then `runLoop` creates a fresh sandbox per iteration via `sandboxClient.create()`
-and streams the prompt through `box.streamPrompt(taskToPrompt(task))`. Each
-iteration's events feed the same `output.parse` → `validator.validate`
-pipeline.
diff --git a/examples/coder-loop/coder-loop.ts b/examples/coder-loop/coder-loop.ts
deleted file mode 100644
index 9cbacb52..00000000
--- a/examples/coder-loop/coder-loop.ts
+++ /dev/null
@@ -1,72 +0,0 @@
-// worktreeLoopRunner — the smallest end-to-end coder loop on the generic recursive path:
-// author one AgentProfile per harness, fan them out over worktree-CLI leaves, gate each on
-// patchDelivered, and pick the winning patch with the shared valid-only selector. See README.md.
-
-import { worktreeLoopRunner } from '@tangle-network/agent-runtime'
-import type { AgentProfile } from '@tangle-network/sandbox'
-
-const profile = (name: string): AgentProfile => ({
-  name,
-  prompt: { systemPrompt: `You are ${name}. Deliver a minimal, correct patch.` },
-})
-
-// ── Offline test seams ───────────────────────────────────────────────────
-// A fake git that hands every worktree the same one-line patch, a no-op harness
-// runner, and a passing check runner. Production callers leave these unset (the
-// runner drives the real claude/codex/opencode CLIs on real worktrees).
-const patch = [
-  'diff --git a/util.ts b/util.ts',
-  '--- a/util.ts',
-  '+++ b/util.ts',
-  '+export const add = (a: number, b: number): number => a + b',
-].join('\n')
-
-async function main(): Promise<void> {
-  const runner = worktreeLoopRunner({
-    repoRoot: '/tmp/coder-loop-example',
-    taskPrompt: 'add util.ts that exports add(a, b)',
-    budget: { maxIterations: 50, maxTokens: 500_000 },
-    harnesses: [
-      { name: 'claude', profile: profile('claude'), harness: 'claude' },
-      { name: 'opencode', profile: profile('opencode'), harness: 'opencode' },
-    ],
-    testCmd: 'node -e \'require("./util").add(1,2)===3 || process.exit(1)\'',
-    typecheckCmd: 'pnpm typecheck',
-    require: ['tests', 'typecheck'],
-    maxDiffLines: 50,
-    forbiddenPaths: ['secrets/', 'node_modules/'],
-    runGit: (args: readonly string[]) => {
-      if (args[0] === 'diff' && args.includes('--shortstat')) {
-        return {
-          stdout: ' 1 file changed, 1 insertion(+), 0 deletions(-)\n',
-          stderr: '',
-          exitCode: 0,
-        }
-      }
-      if (args[0] === 'diff') return { stdout: patch, stderr: '', exitCode: 0 }
-      if (args[0] === 'rev-parse') return { stdout: 'base\n', stderr: '', exitCode: 0 }
-      return { stdout: '', stderr: '', exitCode: 0 }
-    },
-    runHarness: async () => ({
-      exitCode: 0,
-      stdout: 'done',
-      stderr: '',
-      killedBySignal: null,
-      durationMs: 1,
-      timedOut: false,
-    }),
-    runCommand: async () => ({ exitCode: 0, output: 'green' }),
-  })
-
-  const winner = await runner(new AbortController().signal)
-  console.log(`winning branch: ${winner.branch}`)
-  console.log(`  diff (${winner.stats.insertions} insertions):`)
-  for (const line of winner.patch.split('\n')) console.log(`    ${line}`)
-  console.log(`  tests passed: ${winner.checks?.tests?.passed ?? '(not run)'}`)
-  console.log(`  typecheck passed: ${winner.checks?.typecheck?.passed ?? '(not run)'}`)
-}
-
-main().catch((err) => {
-  console.error(err)
-  process.exit(1)
-})
diff --git a/examples/delegate/e2e-delegate-real.ts b/examples/delegate/e2e-delegate-real.ts
index 2608352c..60f9823c 100644
--- a/examples/delegate/e2e-delegate-real.ts
+++ b/examples/delegate/e2e-delegate-real.ts
@@ -11,7 +11,7 @@
 import { existsSync, mkdirSync, mkdtempSync, readFileSync, writeFileSync } from 'node:fs'
 import { tmpdir } from 'node:os'
 import { dirname, join, resolve } from 'node:path'
-import { delegate, type ExecutorConfig } from '../../dist/loops.js'
+import { delegate, type ExecutorConfig } from '@tangle-network/agent-runtime/loops'
 
 const routerBaseUrl = process.env.TANGLE_ROUTER_URL ?? 'https://router.tangle.tools/v1'
 const routerKey = process.env.TANGLE_API_KEY
diff --git a/examples/driver-loop/README.md b/examples/driver-loop/README.md
new file mode 100644
index 00000000..cb38b5ac
--- /dev/null
+++ b/examples/driver-loop/README.md
@@ -0,0 +1,72 @@
+# driver-loop
+
+**See the fold.** This is the single most important example in the set: a driver that
+*reads the last worker's output and writes the next instruction from it*. That read-then-rewrite
+move — "the fold" — is what every supervisor in this repo is built on. Once you've seen it here,
+`supervise()`, the coordination MCP, and the self-improvement loop all read as variations of it.
+
+Runs fully offline (a scripted worker, no credentials):
+
+```bash
+pnpm tsx examples/driver-loop/driver-loop.ts
+```
+
+## Vocabulary
+
+These words are used across every example and defined here.
+
+| Term | Meaning |
+|---|---|
+| **round** | One full driver cycle: `plan → run workers → decide`. The `runLoop` kernel runs exactly this, once per round. |
+| **shot** | One independent worker attempt/sample. A single round can run many shots (a fanout). |
+| **multishot** | N shots played in parallel. |
+| **sample** | A strategy: take the best of N shots (breadth). |
+| **refine** | A strategy: iterate-with-critique *across rounds* (depth) — what SECTION 1 of this example does. |
+
+## What the example shows
+
+**SECTION 1 — ROUNDS (refine), the centerpiece.** A multi-round driver:
+
+- **Round 0** — `driver.plan(task, history=[])`: no history yet, so it runs the worker once. The
+  worker drafts a release note but forgets a required word, so the validator **rejects** it.
+- **Round 1** — `driver.plan(task, history=[1 rejected])`: the driver READS the rejected draft
+  and its verdict out of `history`, then COMPOSES a corrective prompt *from that output* ("your
+  draft was X, it was rejected because Y — rewrite it to mention Z"). The worker obeys the new
+  prompt and the validator **passes**.
+
+The two load-bearing lines in `driver-loop.ts` are commented `THE FOLD, PART 1: INGEST` (where it
+reads `history[history.length-1].output`) and `THE FOLD, PART 2: GENERATE` (where it builds the
+next prompt). In production a router LLM does that composition — it reads the folded worker output
+from its tool-result messages and writes the next spawn's prompt. Here it's plain code so the seam
+is visible.
+
+```mermaid
+flowchart TD
+  task["NoteTask\nprompt: draft a release note"] --> plan0
+  subgraph r0["ROUND 0 — plan(task, history=[])"]
+    plan0["driver runs the worker once"]
+  end
+  plan0 --> w0["worker → 'Shipped one-click restore for failed deploys.'"]
+  w0 --> v0{"validator: mentions 'rollback'?"}
+  v0 -->|no — REJECT| fold["THE FOLD\ndriver reads the rejected draft\n+ builds a corrective prompt from it"]
+  subgraph r1["ROUND 1 — plan(task, history=[1 rejected])"]
+    fold
+  end
+  fold --> w1["worker → '…with an instant rollback path…'"]
+  w1 --> v1{"validator: mentions 'rollback'?"}
+  v1 -->|yes — PASS| done["decide → pick-winner"]
+```
+
+**SECTION 2 — SHOTS (multishot), the contrast.** Three independent attempts at the same task,
+in parallel, with **no fold between them**. This is the *other* axis: a round refines depth-wise
+(each round improves on the last); a shot explores breadth-wise (many tries at once). Seeing them
+side by side is the cleanest way to internalize round vs shot.
+
+## Where this goes next
+
+- `examples/supervise/` — the one-call `supervise(profile, goal)` where a router LLM does the fold
+  for you.
+- `examples/supervisor-loop/` — the same supervisor over a real worker backend (sandbox box /
+  local cli-bridge), worker backend as the only knob.
+- `examples/researcher-loop/` and `examples/ui-audit/` — `runLoop` drivers that are *single-round*
+  and *content-blind* on purpose (they never fold); read those to see the contrast with this one.
diff --git a/examples/driver-loop/driver-loop.ts b/examples/driver-loop/driver-loop.ts
new file mode 100644
index 00000000..6044a3bd
--- /dev/null
+++ b/examples/driver-loop/driver-loop.ts
@@ -0,0 +1,279 @@
+/**
+ * driver-loop — SEE THE FOLD.
+ *
+ * This is the one concept that makes the whole supervisor/driver story click: a driver
+ * does not just count iterations. It READS the last worker's actual output and WRITES the
+ * next instruction FROM that output. That read-then-rewrite is "the fold". Everything else
+ * in this repo — supervise(), the coordination MCP, the self-improvement loop — is built on
+ * top of this single move.
+ *
+ * ── Vocabulary (used everywhere, defined here) ──────────────────────────────────────────
+ *   • round      — one full driver cycle: plan → run workers → decide. The `runLoop` kernel
+ *                  calls plan(), runs the planned workers, then calls decide(), once per round.
+ *   • shot       — one independent worker attempt/sample. A round can run many shots (a fanout).
+ *   • multishot  — N shots played in parallel (see SECTION 2 below).
+ *   • sample     — a strategy: take the best of N shots (breadth).
+ *   • refine     — a strategy: iterate-with-critique ACROSS rounds (depth) — this file's SECTION 1.
+ *
+ * SECTION 1 (the centerpiece) is a multi-ROUND refine driver. Round 0 asks the worker to draft
+ * a release note; the validator rejects it for missing a required word; the driver READS that
+ * rejected draft and BUILDS a corrective prompt from it; round 1 re-runs with that prompt and
+ * passes. SECTION 2 contrasts it with a multi-SHOT run so the two axes sit side by side.
+ *
+ * Fully offline — the worker is a scripted client keyed on the prompt, so it runs with zero
+ * credentials (the same offline pattern self-improving-loop uses).
+ *
+ * Run:  pnpm tsx examples/driver-loop/driver-loop.ts
+ */
+
+import {
+  type MultishotPersona,
+  type MultishotShape,
+  runMultishot,
+} from '@tangle-network/agent-eval/multishot'
+import {
+  type DefaultVerdict,
+  type Driver,
+  type OutputAdapter,
+  runLoop,
+  type Validator,
+} from '@tangle-network/agent-runtime/loops'
+import type { AgentProfile, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox'
+
+// ── The task + what "good" means ────────────────────────────────────────────────────────
+// The agent must draft a one-line release note that mentions the word "rollback". A real
+// product would validate something richer; the required word keeps the example deterministic.
+interface NoteTask {
+  feature: string
+  /** The next instruction the worker should run. The DRIVER rewrites this between rounds. */
+  prompt: string
+}
+interface NoteOutput {
+  note: string
+}
+const requiredWord = 'rollback'
+
+// ── The worker (scripted, offline) ──────────────────────────────────────────────────────
+// A worker is just something that takes a prompt and streams back events. Here we fake it:
+// the FIRST prompt produces a draft that forgets the required word (so it will be rejected);
+// any prompt that mentions the required word produces a corrected draft. That keyed behavior
+// is what lets the example PROVE the fold worked: round 1 only passes because the driver put
+// the right correction into the prompt.
+function scriptedWorkerClient(): { create(): Promise<SandboxInstance> } {
+  return {
+    async create(): Promise<SandboxInstance> {
+      return {
+        id: `worker-${Math.random().toString(36).slice(2, 8)}`,
+        async *streamPrompt(prompt: string): AsyncIterable<SandboxEvent> {
+          yield {
+            type: 'llm_call',
+            data: { model: 'scripted', tokensIn: 200, tokensOut: 40, costUsd: 0.0006 },
+          }
+          // The worker "obeys" the prompt: if the driver's corrective prompt told it to
+          // mention the required word, it does; otherwise it ships the naive first draft.
+          const note = prompt.toLowerCase().includes(requiredWord)
+            ? 'Shipped one-click restore with an instant rollback path if a deploy goes bad.'
+            : 'Shipped one-click restore for failed deploys.'
+          yield { type: 'result', data: { result: { note } satisfies NoteOutput } }
+        },
+      } as unknown as SandboxInstance
+    },
+  }
+}
+
+// ── The output adapter: raw event stream → typed output ─────────────────────────────────
+const output: OutputAdapter<NoteOutput> = {
+  parse(events: SandboxEvent[]): NoteOutput {
+    for (const ev of events) {
+      if (ev.type === 'result') {
+        const r = (ev as { data?: { result?: unknown } }).data?.result
+        if (r && typeof r === 'object' && 'note' in r) return r as NoteOutput
+      }
+    }
+    return { note: '' }
+  },
+}
+
+// ── The validator: the pass/fail check the driver reads to decide whether to refine ──────
+const validator: Validator<NoteOutput> = {
+  validate(out: NoteOutput): Promise<DefaultVerdict> {
+    const valid = out.note.toLowerCase().includes(requiredWord)
+    return Promise.resolve({
+      valid,
+      score: valid ? 1 : 0,
+      notes: valid ? 'mentions rollback' : `missing required word "${requiredWord}"`,
+    })
+  },
+}
+
+// ── THE DRIVER — this is the example ────────────────────────────────────────────────────
+// A driver is two functions: plan() (what to run this round) and decide() (are we done?).
+// The fold lives inside plan(): on round > 0 it READS history (the last worker's real output
+// + its verdict) and COMPOSES the next prompt FROM that output.
+//
+// Decision values: the kernel STOPS the loop when decide() returns a TERMINAL value
+// ('stop' | 'pick-winner' | 'fail' | 'done'). Any other string is non-terminal → the loop
+// runs another round. That's the footgun for a refine driver: if decide() returned 'fail'
+// after a failing round 0, the loop would stop BEFORE it ever got to refine. So we return the
+// non-terminal 'refine' to keep going, and only the terminal 'pick-winner'/'fail' when truly done.
+type NoteDecision = 'refine' | 'pick-winner' | 'fail'
+
+function refineDriver(maxRounds: number): Driver<NoteTask, NoteOutput, NoteDecision> {
+  return {
+    name: 'refine',
+    async plan(task, history) {
+      // ROUND 0 — no history yet, so just run the initial task once.
+      if (history.length === 0) return [task]
+
+      // We already passed? Stop refining (return [] → no more workers this round).
+      const last = history[history.length - 1]
+      if (last?.verdict?.valid) return []
+
+      // Round cap: stop even if still failing.
+      if (history.length >= maxRounds) return []
+
+      // ── THE FOLD, PART 1: INGEST the last worker's actual output ────────────────────────
+      // `history[history.length - 1].output` is the real answer the previous worker produced;
+      // `.verdict` is how it scored. This read is what separates a driver from a counter.
+      const draft = last?.output?.note ?? '(empty draft)'
+      const why = last?.verdict?.notes ?? 'failed validation'
+
+      // ── THE FOLD, PART 2: GENERATE the next prompt FROM that output ──────────────────────
+      // We build the NEXT instruction out of what we just read. In a real supervisor a router
+      // LLM does this composition (it reads the folded worker output via its tool-result
+      // messages and writes the next spawn's prompt); here we do it in plain code so the seam
+      // is visible. The corrective prompt deliberately names the required word so the scripted
+      // worker can obey it — proving the loop's behavior changed BECAUSE of the fold.
+      const correctedPrompt =
+        `Your previous draft was: "${draft}". It was rejected because ${why}. ` +
+        `Rewrite the release note for "${task.feature}" so it explicitly mentions the ` +
+        `${requiredWord} path. Keep it to one line.`
+
+      return [{ ...task, prompt: correctedPrompt }]
+    },
+
+    // decide() runs after each round, AND once more when plan() returns [] (the finalize pass).
+    //   • a valid winner exists        → 'pick-winner' (terminal: we're done, ship it)
+    //   • no winner but rounds remain  → 'refine'      (NON-terminal: loop runs plan() again)
+    //   • no winner and out of rounds  → 'fail'        (terminal: give up)
+    decide(history): NoteDecision {
+      if (history.some((it) => it.verdict?.valid)) return 'pick-winner'
+      return history.length < maxRounds ? 'refine' : 'fail'
+    },
+  }
+}
+
+// ── SECTION 1: run the refine (multi-round) driver ──────────────────────────────────────
+async function runRefine(): Promise<void> {
+  console.log('── SECTION 1 · ROUNDS (refine) — driver reads worker output, rewrites the prompt')
+
+  const task: NoteTask = {
+    feature: 'one-click restore',
+    prompt: 'Write a one-line release note for the one-click restore feature.',
+  }
+
+  const result = await runLoop<NoteTask, NoteOutput, NoteDecision>({
+    driver: refineDriver(3),
+    agentRun: {
+      profile: { name: 'note-writer' } as AgentProfile,
+      // Each round's task carries the prompt the driver authored; this is how the rewritten
+      // instruction actually reaches the worker.
+      taskToPrompt: (t) => t.prompt,
+    },
+    output,
+    validator,
+    task,
+    ctx: { sandboxClient: scriptedWorkerClient() },
+    maxIterations: 5,
+  })
+
+  // One iteration == one round here (the driver runs a single worker per round).
+  for (const it of result.iterations) {
+    const verdict = it.verdict?.valid ? 'PASS' : 'reject'
+    console.log(`   ROUND ${it.index}: [${verdict}] note = "${it.output?.note ?? ''}"`)
+    if (!it.verdict?.valid && it.index < result.iterations.length - 1) {
+      console.log('            └─ driver folds this rejected output into round', it.index + 1)
+    }
+  }
+  console.log(`   decision: ${result.decision}`)
+  if (result.winner) console.log(`   winner: round ${result.winner.iterationIndex}`)
+  console.log()
+}
+
+// ── SECTION 2: contrast — SHOTS (multishot), the OTHER axis ──────────────────────────────
+// A round refines DEPTH-wise (each round improves on the last). A shot explores BREADTH-wise:
+// N independent attempts at the SAME task, in parallel, no fold between them. runMultishot is
+// the substrate primitive for that. We run it with a mocked router so it stays offline.
+interface SimplePersona extends MultishotPersona {
+  id: string
+}
+async function runShots(): Promise<void> {
+  console.log('── SECTION 2 · SHOTS (multishot) — N independent attempts, no fold between them')
+
+  const restore = installMockRouter([
+    { text: 'Attempt A: one-click restore with a rollback path.' },
+    { text: 'Attempt B: one-click restore, instant rollback if a deploy fails.' },
+    { text: 'Attempt C: one-click restore; rollback included.' },
+  ])
+  process.env.TANGLE_API_KEY ??= 'test-key'
+  try {
+    const profile: AgentProfile = {
+      name: 'note-writer',
+      prompt: { systemPrompt: 'Write a one-line release note that mentions rollback.' },
+    }
+    const shape: MultishotShape<SimplePersona> = {
+      buildOpener: () => 'Write the release note.',
+      buildDriverSystemPrompt: () => 'You are drafting a release note.',
+    }
+    // Three personas == three shots; they run independently. There is no round-to-round fold.
+    for (const id of ['shot-0', 'shot-1', 'shot-2']) {
+      const res = await runMultishot({
+        profile,
+        persona: { id } as SimplePersona,
+        shape,
+        maxTurns: 1,
+      })
+      // Grab the worker's reply: the last non-user, non-tool message in the transcript.
+      const reply = [...res.transcript]
+        .reverse()
+        .find((m) => m.role !== 'user' && m.role !== 'tool')
+      console.log(`   ${id} (parallel): "${reply?.content ?? ''}"`)
+    }
+  } finally {
+    restore()
+  }
+  console.log()
+  console.log('   ROUND vs SHOT: a round folds the last output into the next prompt (depth);')
+  console.log('   a shot is one independent attempt; multishot plays N shots at once (breadth).')
+}
+
+// Minimal offline router stub (same pattern as self-improving-loop) so SECTION 2 needs no creds.
+function installMockRouter(replies: Array<{ text: string }>): () => void {
+  const original = global.fetch
+  let i = 0
+  global.fetch = (async () => {
+    const r = replies[i++ % replies.length]
+    return {
+      ok: true,
+      status: 200,
+      json: async () => ({
+        choices: [{ message: { content: r?.text ?? '' } }],
+        usage: { prompt_tokens: 80, completion_tokens: 20 },
+      }),
+      text: async () => 'ok',
+    } as Response
+  }) as typeof fetch
+  return () => {
+    global.fetch = original
+  }
+}
+
+async function main(): Promise<void> {
+  await runRefine()
+  await runShots()
+}
+
+main().catch((err) => {
+  console.error(err)
+  process.exit(1)
+})
diff --git a/examples/fleet-delegation/README.md b/examples/fleet-delegation/README.md
index 125345e7..eccdefc6 100644
--- a/examples/fleet-delegation/README.md
+++ b/examples/fleet-delegation/README.md
@@ -77,9 +77,9 @@ flowchart TD
     end
 ```
 
-- **Sibling** (default): each `delegate_code` / `delegate_research` spawns
-  a fresh sandbox via `sandboxClient.create()`. Worker output flows back
-  through the MCP response — there is no shared filesystem.
+- **Sibling** (default): each `delegate` call spawns a fresh sandbox via
+  `sandboxClient.create()`. Worker output flows back through the MCP
+  response — there is no shared filesystem.
 - **Fleet** (set `TANGLE_FLEET_ID`): each delegation lands on an existing
   machine in the parent fleet. The fleet's shared-workspace policy means
   the worker sees the caller's filesystem and any diff lands in-place.
diff --git a/examples/mcp-delegation/README.md b/examples/mcp-delegation/README.md
index 5d24af73..0b9c16b5 100644
--- a/examples/mcp-delegation/README.md
+++ b/examples/mcp-delegation/README.md
@@ -1,7 +1,7 @@
 # mcp-delegation
 
 How a product mounts the `agent-runtime-mcp` server into its `AgentProfile`,
-plus a tiny stdio JSON-RPC client that proves the server exposes all five
+plus a tiny stdio JSON-RPC client that proves the server exposes the
 delegation tools.
 
 ## Run
@@ -14,22 +14,26 @@ pnpm tsx examples/mcp-delegation/mcp-delegation.ts
 The first block prints the `mcp['agent-runtime-delegation']` entry a
 product passes to `sandboxClient.create({ backend: { profile } })`. The
 second block spawns the locally-built `dist/mcp/bin.js`, calls
-`tools/list` over stdio JSON-RPC, and asserts the five canonical tools
-are present.
+`tools/list` over stdio JSON-RPC, and asserts the always-on tools are
+present.
 
 ## What it shows
 
 - The literal `AgentProfileMcpServer` shape consumers paste into their own
   product's profile composer.
 - The bin's expected env: `TANGLE_API_KEY` for live delegations,
+  `MCP_ENABLE_DELEGATE=1` to opt the generic `delegate` verb in, and
   `AGENT_RUNTIME_MCP_ALLOW_NO_KEY=1` for the diagnostic mode the smoke leg
   uses when no key is set.
-- The five canonical tools every consumer expects:
-  - `delegate_code` — async coder dispatch
-  - `delegate_research` — async researcher dispatch
-  - `delegate_feedback` — append-only rating store
-  - `delegation_status` — poll for `pending` / `running` / `completed`
-  - `delegation_history` — read past delegations newest-first
+- The delegation tools:
+  - `delegate` — the ONE generic verb: a supervisor that authors + drives its
+    own worker and returns the delivered output with its real spend. Replaces
+    the old hardcoded `delegate_code` / `delegate_research`. Registers ONLY when
+    `MCP_ENABLE_DELEGATE=1` AND a real sandbox key resolves.
+  - `delegate_feedback` — append-only rating store (always on)
+  - `delegation_status` — poll for `pending` / `running` / `completed` (always on)
+  - `delegation_history` — read past delegations newest-first (always on)
+  - `delegate_ui_audit` — served only when a UI-audit runner is wired in
 
 ## Production wiring
 
@@ -46,6 +50,7 @@ const profile: AgentProfile = {
       env: {
         TANGLE_API_KEY: process.env.TANGLE_API_KEY!,
         SANDBOX_BASE_URL: 'https://sandbox.tangle.tools',
+        MCP_ENABLE_DELEGATE: '1', // opt the generic `delegate` verb in (off by default)
       },
       enabled: true,
     },
@@ -54,8 +59,10 @@ const profile: AgentProfile = {
 ```
 
 Pass `profile` to `sandboxClient.create({ backend: { profile } })`. The
-sandbox-side agent harness now sees the five delegation tools mid-turn,
-and can fan work out to coders/researchers without blocking the chat.
+sandbox-side agent harness now sees the delegation tools mid-turn, and can
+fan work out via the generic `delegate` verb without blocking the chat.
+Omit `MCP_ENABLE_DELEGATE` and only the always-on trio
+(`delegate_feedback` / `delegation_status` / `delegation_history`) is exposed.
 
 See [`fleet-delegation`](../fleet-delegation/) for the multi-machine
 variant where delegations dispatch into a shared workspace instead of
diff --git a/examples/product-eval/README.md b/examples/product-eval/README.md
index 2ad594de..7c228ed0 100644
--- a/examples/product-eval/README.md
+++ b/examples/product-eval/README.md
@@ -1,12 +1,13 @@
 # product-eval
 
-User-sim product evals in one call — `evalPersona`, plus the `runPersonaDispatch` → matrix path.
+User-sim product evals — `runPersonaConversation` (the persona loop) + the
+`runPersonaDispatch` → matrix path.
 
-A product eval runs the **agent under test** against a **persona** (a simulated user) over a
-multi-round conversation, then scores the transcript. `evalPersona(worker, persona, opts)` is the
-one-call entry — you author a worker `AgentProfile` and a persona, and it defaults the two seams
-`runPersonaConversation` otherwise makes you hand-wire: the backend (from `opts.{apiKey,baseUrl,model}`)
-and the system prompt (`p.prompt?.systemPrompt`).
+A product eval runs the **agent under test** against a **persona** (a simulated
+user) over a multi-round conversation, then scores the transcript.
+`runPersonaConversation` is the loop runner: you author a worker `AgentProfile`
+and a persona, and supply two seams — `backendFor` (turn a profile into a
+runnable backend) and `systemPromptOf` (render its system prompt).
 
 Three cells, smallest to largest:
 
@@ -27,6 +28,6 @@ Optional env: `WORKER_MODEL` (the agent under test, default `gpt-4o-mini`), `ROU
 
 ## Offline
 
-`evalPersona` and `runPersonaDispatch` both take a `backendFor` override — pass a fake backend and the
-whole loop runs with no credentials and no network. See `src/conversation/eval-persona.test.ts` for
-the `$0` offline pattern (it is part of `pnpm test`).
+Both `runPersonaConversation` and `runPersonaDispatch` take a `backendFor` seam — pass a fake
+backend and the whole loop runs with no credentials and no network. See
+`src/conversation/run-persona.test.ts` for the `$0` offline pattern (it is part of `pnpm test`).
diff --git a/examples/researcher-loop/README.md b/examples/researcher-loop/README.md
index c5d02d7d..96193a07 100644
--- a/examples/researcher-loop/README.md
+++ b/examples/researcher-loop/README.md
@@ -1,17 +1,19 @@
 # researcher-loop
 
 `researcherProfile()` (from `@tangle-network/agent-knowledge/profiles`) +
-`runLoop()` + an inline fanout `Driver` — the researcher-flavoured
-counterpart to [`coder-loop`](../coder-loop). Two parallel researcher
-iterations attempt the same question; the validator scores citation density +
-namespace scoping + per-item provenance; the kernel picks the
-highest-scoring valid winner.
+`runLoop()` + an inline fanout `Driver` — the primary, smallest example of the
+`runLoop` kernel. Two parallel researcher attempts answer the same question;
+the validator scores citation density + namespace scoping + per-item
+provenance; the kernel picks the highest-scoring valid winner.
 
-Same `runLoop` kernel and inline fanout driver as
-[`coder-loop`](../coder-loop), only the profile differs. The load-bearing
-branch below is candidate B: it leaks an item into `other-tenant`, so the
-validator hard-fails the entire output and the kernel prunes it — leaving A
-as the sole winner.
+A **round** is one `plan → run workers → decide` cycle. This driver is
+**single-round**: `plan()` returns two copies of the task on round 0, then `[]`
+forever after — so it spawns two workers, scores both, and picks once. It never
+reads a worker's output to write the next instruction. To see a driver that
+*does* re-plan from worker output (the supervisor pattern), read
+[`driver-loop/`](../driver-loop). The load-bearing branch below is candidate B:
+it leaks an item into `other-tenant`, so the validator hard-fails the entire
+output and the kernel prunes it — leaving A as the sole winner.
 
 ```mermaid
 flowchart TD
diff --git a/examples/researcher-loop/researcher-loop.ts b/examples/researcher-loop/researcher-loop.ts
index 36ebe00d..b46c1123 100644
--- a/examples/researcher-loop/researcher-loop.ts
+++ b/examples/researcher-loop/researcher-loop.ts
@@ -145,6 +145,11 @@ async function main(): Promise<void> {
   const { output, validator, agentRunSpec } = researcherProfile({ task })
   const driver: Driver<ResearchTask, ResearchOutput, 'pick-winner' | 'fail'> = {
     name: 'fanout',
+    // A "round" = one plan → run workers → decide cycle. This driver is SINGLE-ROUND:
+    // it returns two copies of the task on round 0 (history empty) → two parallel
+    // workers (a "fanout"), then [] forever after → it spawns, scores, and picks ONCE.
+    // It never reads a worker's output to build the next prompt. For a driver that
+    // re-plans from worker output (the supervisor fold), see examples/driver-loop/.
     plan: async (task, history) => (history.length === 0 ? [task, task] : []),
     decide: (history) => (history.some((i) => i.verdict?.valid === true) ? 'pick-winner' : 'fail'),
   }
diff --git a/examples/self-improving-loop/self-improving-loop.ts b/examples/self-improving-loop/self-improving-loop.ts
index 59d7bf53..43384d7a 100644
--- a/examples/self-improving-loop/self-improving-loop.ts
+++ b/examples/self-improving-loop/self-improving-loop.ts
@@ -171,6 +171,11 @@ async function runVariant(profile: AgentProfile, scriptedReplies: ScriptedReply[
       score: { composite: number }
     }> = []
     for (const persona of PERSONAS) {
+      // A "shot" = one independent worker attempt/sample. `runMultishot` plays N shots
+      // in parallel and reports each; here each persona gets one shot (maxTurns:1 = one
+      // turn per shot). Contrast with a "round" (the driver-loop sense): a shot is ONE
+      // worker attempt; a round is one full plan → run workers → decide cycle that can
+      // span many shots. See examples/driver-loop/ for the round/shot vocabulary block.
       const result = await runMultishot({ profile, persona, shape, maxTurns: 1 })
       const score = await runJudge(conversationJudge, { transcript: result.transcript, persona })
       runs.push({ persona, result, score })
diff --git a/examples/supervise/supervise.ts b/examples/supervise/supervise.ts
index 68f33e83..d393e96a 100644
--- a/examples/supervise/supervise.ts
+++ b/examples/supervise/supervise.ts
@@ -46,4 +46,4 @@ const result = await supervise(
   },
 )
 
-console.log(result.kind === 'winner' ? '✓ delivered' : `✗ no winner (${result.kind})`)
+console.log(result.kind === 'winner' ? '[OK] delivered' : `[--] no winner (${result.kind})`)
diff --git a/examples/supervisor-loop/run-bridge.ts b/examples/supervisor-loop/run-bridge.ts
index ec09f611..fcf5c3e0 100644
--- a/examples/supervisor-loop/run-bridge.ts
+++ b/examples/supervisor-loop/run-bridge.ts
@@ -94,8 +94,8 @@ async function main(): Promise<void> {
 
   console.log(
     result.kind === 'winner'
-      ? `✅ delivered: ${JSON.stringify(result.out)}`
-      : `❌ no winner (${result.reason}, ${result.downCount} down)`,
+      ? `[OK] delivered: ${JSON.stringify(result.out)}`
+      : `[--] no winner (${result.reason}, ${result.downCount} down)`,
   )
 }
 
diff --git a/examples/supervisor-loop/run-sandbox.ts b/examples/supervisor-loop/run-sandbox.ts
index c1c2eb37..aabcfa73 100644
--- a/examples/supervisor-loop/run-sandbox.ts
+++ b/examples/supervisor-loop/run-sandbox.ts
@@ -1,17 +1,24 @@
 /**
- * The sandbox path — each worker is a coding harness running in a real Tangle sandbox box.
+ * SANDBOXED SUPERVISOR — a supervisor that drives workers inside real Tangle sandbox boxes.
+ *
+ * The three-line shape:
+ *   1. the supervisor AUTHORS a worker `AgentProfile` (its standing instructions + harness),
+ *   2. each worker runs `runLoop` INSIDE a real box — `createExecutor({ backend: 'sandbox',
+ *      harness, sandboxClient })` composes the kernel as a single-task leaf in a box running
+ *      `harness` (opencode / claude-code / codex),
+ *   3. the supervisor reads each box's settled output and drives the next worker until the
+ *      deliverable check passes.
  *
  *   TANGLE_API_KEY=sk-... SANDBOX_BASE_URL=https://... pnpm tsx examples/supervisor-loop/run-sandbox.ts
  *
  * The supervisor is the canonical one-call `supervise()`; this runner supplies only the
- * load-bearing sandbox seam — a real `SandboxClient` + `backend: 'sandbox'` (each worker leaf
- * is `createExecutor({ backend: 'sandbox', harness, sandboxClient })`, which composes `runLoop`
- * as a single-task leaf inside a box running `harness`).
+ * load-bearing sandbox seam — a real `SandboxClient` + `backend: 'sandbox'`. The WORKER BACKEND
+ * is the only knob: swap `backend: 'sandbox'` for `'bridge'` and the IDENTICAL supervisor drives
+ * local harness CLIs instead (see run-bridge.ts).
  *
  * The driver brain defaults to the router (the box key is already in hand); set DRIVER=scripted
- * for the offline brain. The IDENTICAL supervisor runs against local harness CLIs by swapping
- * the one backend value to `bridge` — see run-bridge.ts. For a fully offline, no-creds wiring
- * check, see tests/loops/coordination-driver.test.ts and tests/supervisor-loop-example.test.ts.
+ * for the offline brain. For a fully offline, no-creds wiring check, see
+ * tests/loops/coordination-driver.test.ts and tests/supervisor-loop-example.test.ts.
  */
 
 import {
@@ -75,8 +82,8 @@ async function main(): Promise<void> {
 
   console.log(
     result.kind === 'winner'
-      ? `✅ delivered: ${JSON.stringify(result.out)}`
-      : `❌ no winner (${result.reason}, ${result.downCount} down)`,
+      ? `[OK] delivered: ${JSON.stringify(result.out)}`
+      : `[--] no winner (${result.reason}, ${result.downCount} down)`,
   )
 }
 
diff --git a/examples/supervisor-loop/run-supervisor-mcp.ts b/examples/supervisor-loop/run-supervisor-mcp.ts
index be020878..0d698b3e 100644
--- a/examples/supervisor-loop/run-supervisor-mcp.ts
+++ b/examples/supervisor-loop/run-supervisor-mcp.ts
@@ -166,11 +166,11 @@ async function main(): Promise<void> {
   console.log('\n── verdict ──')
   if (result.kind === 'winner') {
     console.log(
-      `✅ supervisor drove a worker via the coordination MCP to a CHECKED delivery on backend "${backend.backend}".`,
+      `[OK] supervisor drove a worker via the coordination MCP to a CHECKED delivery on backend "${backend.backend}".`,
     )
     console.log(`   winner output: ${JSON.stringify(result.out)}`)
   } else {
-    console.log(`❌ no delivery (result=${result.kind}) — see supervisor transcript above`)
+    console.log(`[--] no delivery (result=${result.kind}) — see supervisor transcript above`)
     process.exitCode = 1
   }
 }
diff --git a/examples/supervisor-loop/shared.ts b/examples/supervisor-loop/shared.ts
index b85f26d8..a0e8e5e8 100644
--- a/examples/supervisor-loop/shared.ts
+++ b/examples/supervisor-loop/shared.ts
@@ -68,9 +68,12 @@ export function scriptedSupervisorChat(workerCount: number, labelPrefix = 'solve
 
   let i = 0
   return (messages) => {
-    // A real brain reads `messages` (the folded tool results) to decide; the
-    // scripted one advances its fixed plan. Touch `messages` so the shape is
-    // exercised.
+    // A real brain READS `messages` (the folded worker outputs + tool results) and
+    // composes its next move FROM them — that read is "the fold". This scripted brain
+    // deliberately IGNORES `messages` and advances a fixed plan, so do NOT mistake this
+    // for the supervisor pattern. To see a driver that actually reads the last worker's
+    // output and builds the next instruction from it, read examples/driver-loop/.
+    // We touch `messages` only so the shape is exercised:
     void messages.length
     const turn = turns[Math.min(i, turns.length - 1)] ?? { content: '', toolCalls: [] }
     i += 1
diff --git a/examples/ui-audit/README.md b/examples/ui-audit/README.md
index 7a5eabdc..a1b2812d 100644
--- a/examples/ui-audit/README.md
+++ b/examples/ui-audit/README.md
@@ -6,8 +6,8 @@ The example uses a **stub judge** so it runs without an API key and demonstrates
 
 ## What the example shows
 
-- A custom `SandboxClient` — the in-process browser+judge client — satisfies the kernel contract WITHOUT a real sandbox-SDK harness. The kernel does `client.create() → box.streamPrompt() → box.delete()` exactly as it does for `coderProfile`; the work happens in-process.
-- A custom `Driver` (`lensCyclingDriver`) plans one iteration per lens in a fixed order. Supply your own `Driver` that authors its topology from the trace for richer policies.
+- A custom `SandboxClient` — the in-process browser+judge client — satisfies the kernel contract WITHOUT a real sandbox-SDK harness. The kernel does `client.create() → box.streamPrompt() → box.delete()` exactly as it does for any profile (e.g. `researcherProfile`); the work happens in-process.
+- A custom `Driver` (`lensCyclingDriver`) plans one iteration per lens in a fixed order. It is **content-blind**: it cycles a fixed lens list off `history.length` and never reads a worker's output to decide what to do next. For a driver that re-plans *from* worker output, see [`driver-loop/`](../driver-loop). Supply your own `Driver` that authors its topology from the trace for richer policies.
 - `appendFindings(workspaceDir, findings)` and `writeAuditIndex(workspaceDir)` persist self-contained GitHub-issue Markdown files plus a registry + index.
 
 ## Run
diff --git a/examples/ui-audit/ui-audit.ts b/examples/ui-audit/ui-audit.ts
index a78dab87..a6e4bb85 100644
--- a/examples/ui-audit/ui-audit.ts
+++ b/examples/ui-audit/ui-audit.ts
@@ -68,6 +68,10 @@ function lensCyclingDriver(
 ): Driver<UiAuditTask, UiAuditOutput, 'complete' | 'failed'> {
   let cursor = 0
   return {
+    // This driver is CONTENT-BLIND by design: it cycles a fixed lens list off
+    // `history.length` and never reads a worker's output. So "driver" here is just
+    // a counter, NOT the output-driven re-planner. For a driver that builds the next
+    // prompt FROM the last worker's output (the fold), see examples/driver-loop/.
     // plan() returns Task[] — one lens per iteration, [] once all lenses are
     // cycled. The empty plan is what ends the loop: neither 'complete' nor
     // 'failed' is a terminal Decision (isTerminalDecision = stop|fail|done|

From 56b143c1002f41ae34895a311667e1da9e2fd0b0 Mon Sep 17 00:00:00 2001
From: Drew Stone <drewstone329@gmail.com>
Date: Tue, 23 Jun 2026 16:40:59 -0600
Subject: [PATCH 2/2] =?UTF-8?q?docs(examples):=20align=20driver-loop=20voc?=
 =?UTF-8?q?abulary=20=E2=80=94=20a=20shot=20is=20one=20driver-worker=20exc?=
 =?UTF-8?q?hange?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

shot = round = turn = one (driver prompts worker -> worker output -> driver) exchange;
'many shots' is the sequence where each output folds into the next prompt. Remove the
runMultishot 'parallel shots' section: runMultishot is a multi-turn conversation, not a
fanout, so it mislabeled the breadth axis. Point to researcher-loop for fanout instead.
---
 examples/driver-loop/README.md      |  34 +++---
 examples/driver-loop/driver-loop.ts | 170 ++++++++--------------------
 2 files changed, 67 insertions(+), 137 deletions(-)

diff --git a/examples/driver-loop/README.md b/examples/driver-loop/README.md
index cb38b5ac..fb0d1c2d 100644
--- a/examples/driver-loop/README.md
+++ b/examples/driver-loop/README.md
@@ -13,23 +13,23 @@ pnpm tsx examples/driver-loop/driver-loop.ts
 
 ## Vocabulary
 
-These words are used across every example and defined here.
+These words are used across every example. The key thing: **a shot, a round, and a turn are the
+same atom** — one driver↔worker exchange. "Many shots" is the *sequence* of them, not a fanout.
 
 | Term | Meaning |
 |---|---|
-| **round** | One full driver cycle: `plan → run workers → decide`. The `runLoop` kernel runs exactly this, once per round. |
-| **shot** | One independent worker attempt/sample. A single round can run many shots (a fanout). |
-| **multishot** | N shots played in parallel. |
-| **sample** | A strategy: take the best of N shots (breadth). |
-| **refine** | A strategy: iterate-with-critique *across rounds* (depth) — what SECTION 1 of this example does. |
+| **shot** = **round** = **turn** | ONE driver↔worker exchange: `driver ──prompt──▶ worker ──output (+traces/analysis)──▶ driver`. (`runLoop` increments a "round"; the multi-turn conversation primitive calls it a "turn"; people say "shot". Same atom.) |
+| **the loop** (*"many shots"*) | A **sequence** of shots where each output **folds** into the next prompt: `prompt0 ▶ worker ▶ output0 ▶ driver ▶ prompt1 ▶ worker ▶ …`. Each shot builds on the last. **This example.** |
+| **refine** | The strategy this file uses: keep taking shots, folding the last output into the next prompt, until a check passes (depth). |
+| **fanout** (*best-of-N*) | A **different** axis: N *independent* shots with **no fold** between them, keep the best (breadth). This is **not** "many shots" in the looping sense — see `examples/researcher-loop`. |
 
 ## What the example shows
 
-**SECTION 1 — ROUNDS (refine), the centerpiece.** A multi-round driver:
+A multi-shot **refine** driver:
 
-- **Round 0** — `driver.plan(task, history=[])`: no history yet, so it runs the worker once. The
+- **Shot 0** — `driver.plan(task, history=[])`: no history yet, so it runs the worker once. The
   worker drafts a release note but forgets a required word, so the validator **rejects** it.
-- **Round 1** — `driver.plan(task, history=[1 rejected])`: the driver READS the rejected draft
+- **Shot 1** — `driver.plan(task, history=[1 rejected])`: the driver READS the rejected draft
   and its verdict out of `history`, then COMPOSES a corrective prompt *from that output* ("your
   draft was X, it was rejected because Y — rewrite it to mention Z"). The worker obeys the new
   prompt and the validator **passes**.
@@ -43,13 +43,13 @@ is visible.
 ```mermaid
 flowchart TD
   task["NoteTask\nprompt: draft a release note"] --> plan0
-  subgraph r0["ROUND 0 — plan(task, history=[])"]
+  subgraph s0["SHOT 0 — plan(task, history=[])"]
     plan0["driver runs the worker once"]
   end
   plan0 --> w0["worker → 'Shipped one-click restore for failed deploys.'"]
   w0 --> v0{"validator: mentions 'rollback'?"}
   v0 -->|no — REJECT| fold["THE FOLD\ndriver reads the rejected draft\n+ builds a corrective prompt from it"]
-  subgraph r1["ROUND 1 — plan(task, history=[1 rejected])"]
+  subgraph s1["SHOT 1 — plan(task, history=[1 rejected])"]
     fold
   end
   fold --> w1["worker → '…with an instant rollback path…'"]
@@ -57,10 +57,10 @@ flowchart TD
   v1 -->|yes — PASS| done["decide → pick-winner"]
 ```
 
-**SECTION 2 — SHOTS (multishot), the contrast.** Three independent attempts at the same task,
-in parallel, with **no fold between them**. This is the *other* axis: a round refines depth-wise
-(each round improves on the last); a shot explores breadth-wise (many tries at once). Seeing them
-side by side is the cleanest way to internalize round vs shot.
+**Shot vs fanout (the other axis).** This file refines *depth*-wise: each shot improves on the
+last by folding its output forward. The orthogonal move is *breadth* — fire N independent shots at
+once with no fold between them and keep the best (a fanout / best-of-N). That's a different example:
+see `examples/researcher-loop`, whose driver is single-round and content-blind on purpose.
 
 ## Where this goes next
 
@@ -68,5 +68,5 @@ side by side is the cleanest way to internalize round vs shot.
   for you.
 - `examples/supervisor-loop/` — the same supervisor over a real worker backend (sandbox box /
   local cli-bridge), worker backend as the only knob.
-- `examples/researcher-loop/` and `examples/ui-audit/` — `runLoop` drivers that are *single-round*
-  and *content-blind* on purpose (they never fold); read those to see the contrast with this one.
+- `examples/researcher-loop/` — a `runLoop` driver that is *single-round* and *content-blind* on
+  purpose (a fanout, never a fold); read it to see the breadth axis next to this file's depth axis.
diff --git a/examples/driver-loop/driver-loop.ts b/examples/driver-loop/driver-loop.ts
index 6044a3bd..574bfe81 100644
--- a/examples/driver-loop/driver-loop.ts
+++ b/examples/driver-loop/driver-loop.ts
@@ -1,24 +1,33 @@
 /**
  * driver-loop — SEE THE FOLD.
  *
- * This is the one concept that makes the whole supervisor/driver story click: a driver
- * does not just count iterations. It READS the last worker's actual output and WRITES the
- * next instruction FROM that output. That read-then-rewrite is "the fold". Everything else
- * in this repo — supervise(), the coordination MCP, the self-improvement loop — is built on
- * top of this single move.
+ * The one concept that makes the whole supervisor/driver story click: a driver does not just
+ * count attempts. It READS the last worker's output and WRITES the next instruction FROM it.
+ * That read-then-rewrite is "the fold". supervise(), the coordination MCP, and the
+ * self-improvement loop are all built on this single move.
  *
- * ── Vocabulary (used everywhere, defined here) ──────────────────────────────────────────
- *   • round      — one full driver cycle: plan → run workers → decide. The `runLoop` kernel
- *                  calls plan(), runs the planned workers, then calls decide(), once per round.
- *   • shot       — one independent worker attempt/sample. A round can run many shots (a fanout).
- *   • multishot  — N shots played in parallel (see SECTION 2 below).
- *   • sample     — a strategy: take the best of N shots (breadth).
- *   • refine     — a strategy: iterate-with-critique ACROSS rounds (depth) — this file's SECTION 1.
+ * ── Vocabulary (one exchange, three names — all the SAME atom) ────────────────────────────
  *
- * SECTION 1 (the centerpiece) is a multi-ROUND refine driver. Round 0 asks the worker to draft
- * a release note; the validator rejects it for missing a required word; the driver READS that
- * rejected draft and BUILDS a corrective prompt from it; round 1 re-runs with that prompt and
- * passes. SECTION 2 contrasts it with a multi-SHOT run so the two axes sit side by side.
+ *   • shot = round = turn — ONE driver↔worker exchange:
+ *
+ *         driver ──prompt──▶ worker ──output (+ traces / analysis)──▶ driver
+ *
+ *     The driver sends a prompt, the worker runs, its output comes back, the driver reads it.
+ *     (`runLoop` increments a "round"; the multi-turn conversation primitive calls it a "turn";
+ *      people say "shot". Same atom — pick whichever word you like.)
+ *
+ *   • the loop ("many shots") — a SEQUENCE of shots where each output FOLDS into the next prompt:
+ *
+ *         prompt0 ▶ worker ▶ output0 ▶ driver ▶ prompt1 ▶ worker ▶ output1 ▶ driver ▶ …
+ *
+ *     Each shot builds on the last. THIS FILE is exactly that, and it's almost always what you want.
+ *
+ *   • fanout (breadth / best-of-N) — a DIFFERENT axis: N independent shots with NO fold between
+ *     them, keep the best. That is NOT "many shots" in the looping sense. See examples/researcher-loop.
+ *
+ * This file is a multi-shot REFINE driver. Shot 0 drafts a release note; the validator rejects it
+ * for a missing word; the driver READS that rejected draft and BUILDS a corrective prompt from it;
+ * shot 1 re-runs with that prompt and passes — proving the loop's behavior changed BECAUSE of the fold.
  *
  * Fully offline — the worker is a scripted client keyed on the prompt, so it runs with zero
  * credentials (the same offline pattern self-improving-loop uses).
@@ -26,11 +35,6 @@
  * Run:  pnpm tsx examples/driver-loop/driver-loop.ts
  */
 
-import {
-  type MultishotPersona,
-  type MultishotShape,
-  runMultishot,
-} from '@tangle-network/agent-eval/multishot'
 import {
   type DefaultVerdict,
   type Driver,
@@ -45,7 +49,7 @@ import type { AgentProfile, SandboxEvent, SandboxInstance } from '@tangle-networ
 // product would validate something richer; the required word keeps the example deterministic.
 interface NoteTask {
   feature: string
-  /** The next instruction the worker should run. The DRIVER rewrites this between rounds. */
+  /** The next instruction the worker should run. The DRIVER rewrites this between shots. */
   prompt: string
 }
 interface NoteOutput {
@@ -57,7 +61,7 @@ const requiredWord = 'rollback'
 // A worker is just something that takes a prompt and streams back events. Here we fake it:
 // the FIRST prompt produces a draft that forgets the required word (so it will be rejected);
 // any prompt that mentions the required word produces a corrected draft. That keyed behavior
-// is what lets the example PROVE the fold worked: round 1 only passes because the driver put
+// is what lets the example PROVE the fold worked: shot 1 only passes because the driver put
 // the right correction into the prompt.
 function scriptedWorkerClient(): { create(): Promise<SandboxInstance> } {
   return {
@@ -107,30 +111,30 @@ const validator: Validator<NoteOutput> = {
 }
 
 // ── THE DRIVER — this is the example ────────────────────────────────────────────────────
-// A driver is two functions: plan() (what to run this round) and decide() (are we done?).
-// The fold lives inside plan(): on round > 0 it READS history (the last worker's real output
+// A driver is two functions: plan() (what to run this shot) and decide() (are we done?).
+// The fold lives inside plan(): on shot > 0 it READS history (the last worker's real output
 // + its verdict) and COMPOSES the next prompt FROM that output.
 //
 // Decision values: the kernel STOPS the loop when decide() returns a TERMINAL value
 // ('stop' | 'pick-winner' | 'fail' | 'done'). Any other string is non-terminal → the loop
-// runs another round. That's the footgun for a refine driver: if decide() returned 'fail'
-// after a failing round 0, the loop would stop BEFORE it ever got to refine. So we return the
+// runs another shot. That's the footgun for a refine driver: if decide() returned 'fail'
+// after a failing shot 0, the loop would stop BEFORE it ever got to refine. So we return the
 // non-terminal 'refine' to keep going, and only the terminal 'pick-winner'/'fail' when truly done.
 type NoteDecision = 'refine' | 'pick-winner' | 'fail'
 
-function refineDriver(maxRounds: number): Driver<NoteTask, NoteOutput, NoteDecision> {
+function refineDriver(maxShots: number): Driver<NoteTask, NoteOutput, NoteDecision> {
   return {
     name: 'refine',
     async plan(task, history) {
-      // ROUND 0 — no history yet, so just run the initial task once.
+      // SHOT 0 — no history yet, so just run the initial task once.
       if (history.length === 0) return [task]
 
-      // We already passed? Stop refining (return [] → no more workers this round).
+      // We already passed? Stop refining (return [] → no more workers).
       const last = history[history.length - 1]
       if (last?.verdict?.valid) return []
 
-      // Round cap: stop even if still failing.
-      if (history.length >= maxRounds) return []
+      // Shot cap: stop even if still failing.
+      if (history.length >= maxShots) return []
 
       // ── THE FOLD, PART 1: INGEST the last worker's actual output ────────────────────────
       // `history[history.length - 1].output` is the real answer the previous worker produced;
@@ -152,20 +156,20 @@ function refineDriver(maxRounds: number): Driver<NoteTask, NoteOutput, NoteDecis
       return [{ ...task, prompt: correctedPrompt }]
     },
 
-    // decide() runs after each round, AND once more when plan() returns [] (the finalize pass).
-    //   • a valid winner exists        → 'pick-winner' (terminal: we're done, ship it)
-    //   • no winner but rounds remain  → 'refine'      (NON-terminal: loop runs plan() again)
-    //   • no winner and out of rounds  → 'fail'        (terminal: give up)
+    // decide() runs after each shot, AND once more when plan() returns [] (the finalize pass).
+    //   • a valid winner exists       → 'pick-winner' (terminal: we're done, ship it)
+    //   • no winner but shots remain  → 'refine'      (NON-terminal: loop runs plan() again)
+    //   • no winner and out of shots  → 'fail'        (terminal: give up)
     decide(history): NoteDecision {
       if (history.some((it) => it.verdict?.valid)) return 'pick-winner'
-      return history.length < maxRounds ? 'refine' : 'fail'
+      return history.length < maxShots ? 'refine' : 'fail'
     },
   }
 }
 
-// ── SECTION 1: run the refine (multi-round) driver ──────────────────────────────────────
-async function runRefine(): Promise<void> {
-  console.log('── SECTION 1 · ROUNDS (refine) — driver reads worker output, rewrites the prompt')
+// ── Run the refine (multi-shot) driver ──────────────────────────────────────────────────
+async function main(): Promise<void> {
+  console.log('driver-loop · the driver reads each shot’s output and rewrites the next prompt\n')
 
   const task: NoteTask = {
     feature: 'one-click restore',
@@ -176,7 +180,7 @@ async function runRefine(): Promise<void> {
     driver: refineDriver(3),
     agentRun: {
       profile: { name: 'note-writer' } as AgentProfile,
-      // Each round's task carries the prompt the driver authored; this is how the rewritten
+      // Each shot's task carries the prompt the driver authored; this is how the rewritten
       // instruction actually reaches the worker.
       taskToPrompt: (t) => t.prompt,
     },
@@ -187,90 +191,16 @@ async function runRefine(): Promise<void> {
     maxIterations: 5,
   })
 
-  // One iteration == one round here (the driver runs a single worker per round).
+  // One iteration == one shot here (the driver runs a single worker per shot).
   for (const it of result.iterations) {
     const verdict = it.verdict?.valid ? 'PASS' : 'reject'
-    console.log(`   ROUND ${it.index}: [${verdict}] note = "${it.output?.note ?? ''}"`)
+    console.log(`SHOT ${it.index}: [${verdict}] note = "${it.output?.note ?? ''}"`)
     if (!it.verdict?.valid && it.index < result.iterations.length - 1) {
-      console.log('            └─ driver folds this rejected output into round', it.index + 1)
+      console.log(`         └─ driver folds this rejected output into shot ${it.index + 1}`)
     }
   }
-  console.log(`   decision: ${result.decision}`)
-  if (result.winner) console.log(`   winner: round ${result.winner.iterationIndex}`)
-  console.log()
-}
-
-// ── SECTION 2: contrast — SHOTS (multishot), the OTHER axis ──────────────────────────────
-// A round refines DEPTH-wise (each round improves on the last). A shot explores BREADTH-wise:
-// N independent attempts at the SAME task, in parallel, no fold between them. runMultishot is
-// the substrate primitive for that. We run it with a mocked router so it stays offline.
-interface SimplePersona extends MultishotPersona {
-  id: string
-}
-async function runShots(): Promise<void> {
-  console.log('── SECTION 2 · SHOTS (multishot) — N independent attempts, no fold between them')
-
-  const restore = installMockRouter([
-    { text: 'Attempt A: one-click restore with a rollback path.' },
-    { text: 'Attempt B: one-click restore, instant rollback if a deploy fails.' },
-    { text: 'Attempt C: one-click restore; rollback included.' },
-  ])
-  process.env.TANGLE_API_KEY ??= 'test-key'
-  try {
-    const profile: AgentProfile = {
-      name: 'note-writer',
-      prompt: { systemPrompt: 'Write a one-line release note that mentions rollback.' },
-    }
-    const shape: MultishotShape<SimplePersona> = {
-      buildOpener: () => 'Write the release note.',
-      buildDriverSystemPrompt: () => 'You are drafting a release note.',
-    }
-    // Three personas == three shots; they run independently. There is no round-to-round fold.
-    for (const id of ['shot-0', 'shot-1', 'shot-2']) {
-      const res = await runMultishot({
-        profile,
-        persona: { id } as SimplePersona,
-        shape,
-        maxTurns: 1,
-      })
-      // Grab the worker's reply: the last non-user, non-tool message in the transcript.
-      const reply = [...res.transcript]
-        .reverse()
-        .find((m) => m.role !== 'user' && m.role !== 'tool')
-      console.log(`   ${id} (parallel): "${reply?.content ?? ''}"`)
-    }
-  } finally {
-    restore()
-  }
-  console.log()
-  console.log('   ROUND vs SHOT: a round folds the last output into the next prompt (depth);')
-  console.log('   a shot is one independent attempt; multishot plays N shots at once (breadth).')
-}
-
-// Minimal offline router stub (same pattern as self-improving-loop) so SECTION 2 needs no creds.
-function installMockRouter(replies: Array<{ text: string }>): () => void {
-  const original = global.fetch
-  let i = 0
-  global.fetch = (async () => {
-    const r = replies[i++ % replies.length]
-    return {
-      ok: true,
-      status: 200,
-      json: async () => ({
-        choices: [{ message: { content: r?.text ?? '' } }],
-        usage: { prompt_tokens: 80, completion_tokens: 20 },
-      }),
-      text: async () => 'ok',
-    } as Response
-  }) as typeof fetch
-  return () => {
-    global.fetch = original
-  }
-}
-
-async function main(): Promise<void> {
-  await runRefine()
-  await runShots()
+  console.log(`\ndecision: ${result.decision}`)
+  if (result.winner) console.log(`winner: shot ${result.winner.iterationIndex}`)
 }
 
 main().catch((err) => {