diff --git a/bench/HARNESS.md b/bench/HARNESS.md
index da1faa55..8d72346a 100644
--- a/bench/HARNESS.md
+++ b/bench/HARNESS.md
@@ -164,6 +164,17 @@ via the router, is graded by the runnable checker, and that `BenchScore` is the
 Offline plumbing test (no creds): `tsx src/gate.test.mts`. The gate runs through the SAME recursive
 atom every personified loop uses.
 
+## "Supervisor" (iterate/decompose) vs blind — through the PUBLISHED suite
+The supervisor-vs-blind gate is NOT a bespoke harness: it is `runBenchmark([sample, refine, …])`
+over an Environment. blind = `sample` (best-of-k); "supervisor" = `refine`/`sampleThenRefine`
+(depth: attempt→firewalled-analyst-steer→retry — *"a multi-agent team is just a Strategy whose driver
+spawns several agents"*). Equal compute by the substrate's CONSERVED budget; the deployable check is
+the Environment's `score`; the can't-fake-the-check firewall is built in. Run it on the HARD real
+domain via `commit0-env-run.mts` (above) or the toy `strategy-demo.mts` (offline). The LLM
+agent-driver (an LLM that itself decides spawns via the coordination MCP) is the SEPARATE product
+path — `atom-mcp-e2e.mts` / `atom-commit0.mts` — not a strategy. Evolve any strategy on a frozen
+holdout with `runStrategyEvolution`.
+
 ## Generate a fresh corpus + gate it
 The rollout generators now live with their domains: the recursive gate
 (`gate-cli.mts`) and the optimization-suite env runs (`commit0-env-run.mts`,
diff --git a/bench/src/atom-humaneval.mts b/bench/src/atom-humaneval.mts
index 0a394b78..64c43f5f 100644
--- a/bench/src/atom-humaneval.mts
+++ b/bench/src/atom-humaneval.mts
@@ -189,7 +189,7 @@ async function driveTask(
   })
   const tree = await journal.loadTree(runId)
   const tokens = (tree ?? [])
-    .filter((e): e is Extract<(typeof tree)[number], { kind: 'settled' }> => e.kind === 'settled')
+    .filter((e): e is Extract<NonNullable<typeof tree>[number], { kind: 'settled' }> => e.kind === 'settled')
     .reduce((s, e) => s + e.spent.tokens.input + e.spent.tokens.output, 0)
   const replay = renderReplayHtml(recorder.timeline(runId), {
     title: `${task.taskId} · driver=${driverCfg.model}`,
diff --git a/bench/src/atom-mcp-e2e.mts b/bench/src/atom-mcp-e2e.mts
index 04c8db76..3da19796 100644
--- a/bench/src/atom-mcp-e2e.mts
+++ b/bench/src/atom-mcp-e2e.mts
@@ -13,7 +13,7 @@
  */
 
 import { execFileSync } from 'node:child_process'
-import { cpSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs'
+import { mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs'
 import { tmpdir } from 'node:os'
 import { dirname, join } from 'node:path'
 import { fileURLToPath } from 'node:url'
@@ -26,9 +26,12 @@ import {
   createSupervisor,
   type Executor,
   type ExecutorResult,
+  gitWorkspace,
   InMemoryResultBlobStore,
   InMemorySpawnJournal,
+  runInWorkspace,
   type Scope,
+  type Workspace,
 } from '../../src/runtime/index'
 import { asAuthoredProfile } from '../../src/runtime/supervise/authoring'
 import { serveCoordinationMcp } from '../../src/runtime/supervise/coordination-mcp'
@@ -41,14 +44,28 @@ const SKILL_MD = readFileSync(join(REPO, 'skills', 'supervise', 'SKILL.md'), 'ut
 
 const TASK = 'In solution.py, implement add(a, b) so it returns the sum a + b and test_solution.py passes.'
 
-function makeTaskTemplate(): string {
-  const dir = mkdtempSync(join(tmpdir(), 'e2e-task-'))
-  writeFileSync(join(dir, 'solution.py'), 'def add(a, b):\n    raise NotImplementedError\n')
+/** Seed a bare git repo with the failing task — the SHARED workspace ref every worker clones. */
+function seedWorkspaceRepo(): string {
+  const git = (args: string[], cwd?: string): void => {
+    execFileSync('git', ['-c', 'core.hooksPath=/dev/null', '-c', 'user.email=t@t', '-c', 'user.name=t', ...args], {
+      cwd,
+      stdio: 'pipe',
+    })
+  }
+  const bare = `${mkdtempSync(join(tmpdir(), 'e2e-ws-'))}.git`
+  git(['init', '--bare', '-b', 'main', bare])
+  const seed = mkdtempSync(join(tmpdir(), 'e2e-seed-'))
+  git(['clone', bare, seed])
+  writeFileSync(join(seed, 'solution.py'), 'def add(a, b):\n    raise NotImplementedError\n')
   writeFileSync(
-    join(dir, 'test_solution.py'),
+    join(seed, 'test_solution.py'),
     'from solution import add\nassert add(2, 3) == 5\nassert add(-1, 1) == 0\nassert add(0, 0) == 0\nprint("PASS")\n',
   )
-  return dir
+  git(['add', '-A'], seed)
+  git(['commit', '-m', 'task'], seed)
+  git(['push', 'origin', 'main'], seed)
+  rmSync(seed, { recursive: true, force: true })
+  return bare
 }
 
 /** The deployable check: run the test in the worker's cwd. Exit 0 = delivered. No LLM judge. */
@@ -83,35 +100,41 @@ async function bridgeChat(opts: {
 
 const transcripts: Array<{ who: string; said: string; delivered?: boolean }> = []
 
-/** A WORKER = a real opencode coding session in its OWN cwd, graded by the real test. */
-function makeWorker(rawProfile: unknown, templateDir: string, n: number): Agent<unknown, unknown> {
+/** A WORKER = a real opencode coding session in a clone of the SHARED workspace, graded by the
+ *  real test; its delivery is committed back so the next worker builds on it (not isolated). */
+function makeWorker(rawProfile: unknown, ws: Workspace, n: number): Agent<unknown, unknown> {
   const p = asAuthoredProfile(rawProfile)
   const name = p?.name ?? `worker-${n}`
   let artifact: ExecutorResult<unknown> | undefined
   const inner: Executor<unknown> = {
     runtime: 'router',
     async execute() {
-      const cwd = mkdtempSync(join(tmpdir(), 'e2e-worker-'))
-      cpSync(templateDir, cwd, { recursive: true })
       const sys = p?.systemPrompt ?? TASK
-      const said = await bridgeChat({
-        messages: [
-          {
-            role: 'user',
-            content: `${sys}\n\nYou are working in the current directory. Edit the files so that running \`python3 test_solution.py\` prints PASS. Do it now.`,
-          },
-        ],
-        cwd,
-      })
-      const delivered = checkPasses(cwd)
-      transcripts.push({ who: name, said: said.slice(0, 300), delivered })
+      const run = await runInWorkspace(
+        ws,
+        async (cwd) => {
+          const said = await bridgeChat({
+            messages: [
+              {
+                role: 'user',
+                content: `${sys}\n\nYou are working in the current directory (it already holds prior workers' committed progress). Edit the files so that running \`python3 test_solution.py\` prints PASS. Do it now.`,
+              },
+            ],
+            cwd,
+          })
+          const valid = checkPasses(cwd)
+          transcripts.push({ who: name, said: said.slice(0, 300), delivered: valid })
+          return { valid, value: said.slice(0, 120), message: `${name}: ${valid ? 'delivered' : 'wip'}` }
+        },
+        { tmpPrefix: 'e2e-worker-', commitOnInvalid: true },
+      )
+      const delivered = run.valid
       artifact = {
         outRef: contentAddress(`${name}:${delivered}`),
-        out: { worker: name, delivered, profileSystemPrompt: sys.slice(0, 120) },
+        out: { worker: name, delivered, rev: run.commit?.ok ? run.commit.rev : undefined, profileSystemPrompt: sys.slice(0, 120) },
         verdict: { valid: delivered, score: delivered ? 1 : 0 },
         spent: { iterations: 1, tokens: { input: 0, output: 0 }, usd: 0, ms: 0 },
       }
-      rmSync(cwd, { recursive: true, force: true })
       return artifact
     },
     teardown: () => Promise.resolve({ destroyed: true }),
@@ -125,8 +148,9 @@ function makeWorker(rawProfile: unknown, templateDir: string, n: number): Agent<
 }
 
 async function main(): Promise<void> {
-  console.log(`atom-mcp-e2e: model=${MODEL}  (real boxes, real MCP, real test)`)
-  const templateDir = makeTaskTemplate()
+  console.log(`atom-mcp-e2e: model=${MODEL}  (real boxes, real MCP, real test, shared workspace)`)
+  const bareRef = seedWorkspaceRepo()
+  const ws = gitWorkspace({ ref: bareRef })
   const blobs = new InMemoryResultBlobStore()
   let n = 0
 
@@ -136,7 +160,7 @@ async function main(): Promise<void> {
       const mcp = await serveCoordinationMcp({
         scope,
         blobs,
-        makeWorkerAgent: (raw) => makeWorker(raw, templateDir, n++),
+        makeWorkerAgent: (raw) => makeWorker(raw, ws, n++),
         perWorker: { maxIterations: 2, maxTokens: 200_000 },
       })
       // The supervisor's cwd carries the REAL skill file (opencode loads it from the cwd skill dirs).
@@ -178,7 +202,7 @@ async function main(): Promise<void> {
     maxDepth: 4,
     now: () => Date.now(),
   })
-  rmSync(templateDir, { recursive: true, force: true })
+  rmSync(bareRef, { recursive: true, force: true })
 
   console.log('\n── transcripts (real driver↔worker) ──')
   for (const t of transcripts) {
diff --git a/docs/research/interactive-sessions-spec.md b/docs/research/interactive-sessions-spec.md
new file mode 100644
index 00000000..022bf777
--- /dev/null
+++ b/docs/research/interactive-sessions-spec.md
@@ -0,0 +1,75 @@
+# Spec — interactive (tmux) harness sessions + live streaming
+
+**Vision (one sentence):** instead of headless one-shot CLI calls, each agent in a supervised run is a **live, interactive harness session in its own tmux window** (driveable, observable, resumable), the whole agent tree is one tmux session, and it streams to a browser — composing with the recorded animated replay.
+
+**Why now:** the whole real chain already delivers — an opencode supervisor drives opencode workers via the coordination MCP, a real deployable check gates delivery (`bench/src/atom-mcp-e2e.mts`, `972707f`). What's missing is (a) the agents run *headless* (one prompt → output), so you can't watch or interact, and (b) the harness-specific glue lives in a bench script, not the substrate. This spec turns both into a real, generalized capability.
+
+## Placement — who owns what (obeys the AgentProfile law + the layering)
+
+The law: *an agent IS its AgentProfile; you change behavior by authoring the profile and letting the substrate materialize it — never specialize the runtime to a harness.* That decides the split cleanly:
+
+| Layer | Owns | Why |
+|---|---|---|
+| **agent-runtime** (this repo) | The **recursion + the ports**: the coordination MCP over the Scope (`serveCoordinationMcp`, done), a generic **`session` Executor** that opens/drives/observes a session via the substrate's API (NOT tmux-aware), the shared `Workspace` seam, the journal→replay. | The runtime stays harness-agnostic. It drives; it never spawns tmux or knows what opencode is. |
+| **agent-dev-container** (adc) | The **materialization**: given an `AgentProfile` + cwd + mcp config, stand up the harness as an **interactive tmux window** (the TUI, not `run`), materialize the FULL profile (skills as real SKILL.md files, tools, model, mcp), capture (`pipe-pane`) + stream (`ttyd`). Exposes a **session API** (create / send / observe / status / kill). | "the container where the agents actually live" — Drew. This is the harness-specific layer; it belongs in the substrate, never the runtime. |
+| **cli-bridge** | Stays the *headless* harness materializer (the test target + the fast path). Optionally grows the same session API for local runs. | Already proven; the adc is the richer/interactive home. |
+| **sandbox SDK** | The `AgentProfile` manifest + box abstraction the adc is a flavor of. | Where the profile shape + `resources.skills` materialization already live. |
+
+**The seam** = a small **session API** the adc exposes and the runtime's `session` Executor consumes:
+`POST /sessions {profile, cwd, mcp} → {id, ttydUrl}` · `POST /sessions/:id/send {text}` · `GET /sessions/:id/stream` (SSE: harness output + a done/settle signal) · `GET /sessions/:id/status` · `DELETE /sessions/:id`. The runtime drives the recursion through the coordination MCP; the substrate drives the *harness* through this API.
+
+## Where the issue goes
+- **Primary issue → `tangle-network/agent-dev-container`** (the materialization + the session API + ttyd). This spec is the design ref.
+- **Companion issue → `tangle-network/agent-runtime`** (the generic `session` Executor + the shared `Workspace` wiring + replay-compose). Small; mostly the executor seam.
+- **Track on `ops-board`** (lane: eng, owner: claude) with measurable done-criteria = the e2e checklist below.
+
+## End-to-end checklist (the map to "done")
+
+### Phase 0 — preconditions (DONE)
+- [x] Coordination MCP over a live Scope (`serveCoordinationMcp`, real test).
+- [x] Proof a coding harness mounts + calls it (`mcp-mount-probe`).
+- [x] Whole headless e2e delivers (`atom-mcp-e2e`).
+- [x] Standard `skills/supervise/SKILL.md`.
+
+### Phase 1 — substrate: AgentProfile materialization (adc + bridge)  *(Drew's "materialize the entire profile")*
+- [ ] Materialize `resources.skills` as real `SKILL.md` files in the harness skill dir (opencode `~/.config/opencode/skill/` + project `.opencode/skill/`; verify the exact dir per harness) — loaded natively, NOT a prompt note.
+- [ ] Materialize tools, model, system prompt, mcp (mcp already works — `type:'http'`).
+- [ ] One `materializeAgentProfile(profile, dir)` per harness; remove the bench script's cwd-writes.
+- [ ] Exit: a profile with a skill drives behavior with zero prompt-stuffing (probe: agent uses a skill it was never told about in the prompt).
+
+### Phase 2 — substrate: interactive tmux session + session API (adc)
+- [ ] `tmux new-session`/`new-window` per run/agent; run the harness in **interactive** mode (TUI), one window per agent, named by agent id.
+- [ ] Drive: send the prompt (send-keys or the harness's stdin protocol); detect completion (harness done-signal / sentinel) → emit a settle event.
+- [ ] Capture: `pipe-pane` → a transcript stream (for the journal).
+- [ ] The session API (create/send/stream/status/kill) over HTTP.
+- [ ] Resource governance: max concurrent windows, per-session timeout, cleanup on settle/crash.
+- [ ] Exit: `POST /sessions` with a profile → a live tmux window you can `tmux attach` to; `/stream` yields output + a done signal.
+
+### Phase 3 — runtime: the generic `session` Executor (agent-runtime)
+- [ ] A `session` backend on the `Executor` port: `execute` calls the substrate session API (create → send task → stream until done) and settles with the result; `deliver` → `/send` (steer); `teardown` → `/kill`. Harness-agnostic.
+- [ ] Wire `makeWorkerAgent` (coordination MCP) → the `session` executor, selected by the worker's `AgentProfile.backend`.
+- [ ] Exit: `spawn_worker` → a worker that runs as a live interactive session, settles on its deployable check.
+
+### Phase 4 — shared workspace (agent-runtime)  *(the e2e's open design point)*
+- [ ] Supervisor + its workers share ONE `Workspace` (gitWorkspace) — workers branch/worktree, deliver back so the supervisor (and the next worker) build on one artifact. Fixes the "files missing" confusion.
+- [ ] Exit: a 2-worker run where worker-2 builds on worker-1's committed output.
+
+### Phase 5 — streaming + viz (adc + the viewer)
+- [ ] `ttyd` serves the run's tmux session over a websocket; auth (bearer); a stable URL per run.
+- [ ] A viewer page: the live tmux stream (now) beside the **animated replay** (the recorded tree) + the topology — one screen, live + history.
+- [ ] Exit: open the URL, watch the supervisor + worker panes work in real time; scrub the replay after.
+
+### Phase 6 — prove it e2e (no mock)
+- [ ] The whole chain on interactive sessions: supervisor (tmux) authors profiles → `spawn_worker` → worker (tmux) codes in the shared workspace → real test gates → delivered — all streamed live, all journaled, replayable.
+- [ ] Retire `atom-mcp-e2e`'s harness-specific shortcuts (now: author profiles, the substrate materializes).
+- [ ] Exit: a recorded run URL + the replay + green deployable check.
+
+## Open design points (decide during Phase 2–4)
+- **Interactive vs headless harness mode:** does opencode/claude-code expose a driveable interactive TUI, or do we run `run` *inside* the pane for the live-output view? (Headless-in-a-pane is the cheap first cut; true interactive is the goal.)
+- **Completion detection** in a TUI (sentinel vs a harness done event).
+- **Session lifecycle:** resume after a crash (the journal already supports replay/resume — extend to sessions).
+- **Security:** ttyd exposure + the coordination MCP exposure (bind localhost / authd tunnel).
+- **Concurrency:** N agents = N windows; the adc's resource limits.
+
+## Net
+The runtime is essentially done for this (coordination MCP + the executor port + replay). The new work is a **substrate capability in the adc** (interactive tmux sessions + full-profile materialization + ttyd), reached through one small session API and one generic `session` executor in the runtime. Nothing here specializes the runtime to a harness.
diff --git a/skills/build-with-agent-runtime/SKILL.md b/skills/build-with-agent-runtime/SKILL.md
index 52401922..bfd4251a 100644
--- a/skills/build-with-agent-runtime/SKILL.md
+++ b/skills/build-with-agent-runtime/SKILL.md
@@ -68,7 +68,7 @@ signature + the exact "do NOT build".
 | **Gate: ship/hold from a `BenchmarkReport`** (per-task cells) | `promotionGate({ report, incumbent, candidate })` — `/runtime` | canonical-api §3.4 |
 | **Run the full multi-generation flywheel + certify** | `runStrategyEvolution(config)` — `/runtime` | canonical-api §3.4 |
 | **Compose the prod sandbox profile** (eval/prod parity) | `composeProductionAgentProfile(base, opts)` — `/mcp` | canonical-api §3.2 |
-| **Observe a run** (cost/time waterfall, live tree, OTLP) | `createWaterfallCollector` / `createTopologyView` / `createOtelExporter` via `composeRuntimeHooks(...)` — root | canonical-api §3.5 |
+| **Observe a run** (cost/time waterfall, live tree, OTLP) | `createWaterfallCollector` / `createOtelExporter` via `composeRuntimeHooks(...)` — root; `createTopologyView` / `renderTopologyTree` — `/topology` | canonical-api §3.5 |
 | **State any A/B claim** | `pairedLift` (bench) over `pairedBootstrap`/`heldoutSignificance` (substrate) | canonical-api §3.5 |
 | **Observe/ship with billing-boundary** | `withTangleIntelligence(agent, { project, effort })` — `/intelligence` | canonical-api §7 (now live on main — verify) |
 
diff --git a/src/runtime/index.ts b/src/runtime/index.ts
index fc72d48a..e4a924e1 100644
--- a/src/runtime/index.ts
+++ b/src/runtime/index.ts
@@ -311,6 +311,9 @@ export {
   isDriverSpec,
   withDriverExecutor,
 } from './supervise/driver-executor'
+// Supervisor-as-MCP: serve the coordination verbs as a real HTTP MCP over a live Scope, so any
+// harness (claude-code / codex / opencode) BECOMES the supervisor by mounting one MCP server.
+export { type CoordinationMcpHandle, serveCoordinationMcp } from './supervise/coordination-mcp'
 // The ONE built-in executor entrypoint: backend-as-data (`createExecutor({backend})`).
 // The per-backend factories are internal case-arms; BYO agents implement `Executor`.
 export {
@@ -407,7 +410,9 @@ export {
   gitWorkspace,
   jjWorkspace,
   localShell,
+  runInWorkspace,
   type Shell,
   type Workspace,
   type WorkspaceCommit,
+  type WorkspaceRun,
 } from './workspace'
diff --git a/src/runtime/workspace.ts b/src/runtime/workspace.ts
index 3246a460..569aa66a 100644
--- a/src/runtime/workspace.ts
+++ b/src/runtime/workspace.ts
@@ -132,6 +132,43 @@ export function jjWorkspace(opts: GitWorkspaceOptions): Workspace {
   }
 }
 
+export interface WorkspaceRun<T> {
+  readonly valid: boolean
+  readonly value: T
+  /** Present when a commit was attempted (valid, or `commitOnInvalid`). */
+  readonly commit?: WorkspaceCommit
+}
+
+/**
+ * Run a worker `body` inside a FRESH clone of a shared `Workspace`, then commit its work back
+ * so the next worker (or the supervisor) builds on it. This is the seam that turns isolated
+ * per-worker cwds into one compounding artifact — `body` gets a real materialized dir, its
+ * delivery is committed to the shared ref iff it's valid (a conflict is returned, never thrown).
+ * The clone is removed after; durable state lives only in the ref.
+ */
+export async function runInWorkspace<T>(
+  ws: Workspace,
+  body: (cwd: string) => Promise<{ valid: boolean; value: T; message?: string }>,
+  opts: { tmpPrefix?: string; commitOnInvalid?: boolean } = {},
+): Promise<WorkspaceRun<T>> {
+  const { mkdtempSync, rmSync } = await import('node:fs')
+  const { tmpdir } = await import('node:os')
+  const { join } = await import('node:path')
+  const dir = mkdtempSync(join(tmpdir(), opts.tmpPrefix ?? 'ws-run-'))
+  try {
+    await ws.materialize(dir)
+    const r = await body(dir)
+    if (r.valid || opts.commitOnInvalid) {
+      const message = r.message ?? (r.valid ? 'worker: delivered' : 'worker: wip')
+      const commit = await ws.commit(dir, message)
+      return { valid: r.valid, value: r.value, commit }
+    }
+    return { valid: r.valid, value: r.value }
+  } finally {
+    rmSync(dir, { recursive: true, force: true })
+  }
+}
+
 function tail(s: string): string {
   return s.slice(-400)
 }
diff --git a/tests/loops/workspace.test.ts b/tests/loops/workspace.test.ts
index 38b40543..01bd9fc8 100644
--- a/tests/loops/workspace.test.ts
+++ b/tests/loops/workspace.test.ts
@@ -3,7 +3,16 @@ import { existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'no
 import { tmpdir } from 'node:os'
 import { join } from 'node:path'
 import { afterEach, beforeEach, describe, expect, it } from 'vitest'
-import { gitWorkspace, jjWorkspace } from '../../src/runtime/workspace'
+import { gitWorkspace, jjWorkspace, runInWorkspace } from '../../src/runtime/workspace'
+
+const hasPython = (() => {
+  try {
+    execFileSync('python3', ['--version'], { stdio: 'pipe' })
+    return true
+  } catch {
+    return false
+  }
+})()
 
 /** jj is optional and absent in CI — its block skips unless the binary is present. */
 const hasJj = (() => {
@@ -66,6 +75,66 @@ describe('gitWorkspace', () => {
     expect(readFileSync(join(w2, 'a.txt'), 'utf-8')).toBe('one\n')
   })
 
+  it.skipIf(!hasPython)(
+    'runInWorkspace: a second worker builds on the first, gated by a real test',
+    async () => {
+      // Seed the shared ref with a failing task (two functions to implement, one test).
+      const seed = fresh()
+      await gitWorkspace({ ref: bare }).materialize(seed)
+      writeFileSync(
+        join(seed, 'solution.py'),
+        'def add(a, b):\n    raise NotImplementedError\n\n\ndef mul(a, b):\n    raise NotImplementedError\n',
+      )
+      writeFileSync(
+        join(seed, 'test_solution.py'),
+        'from solution import add, mul\nassert add(2, 3) == 5\nassert mul(2, 3) == 6\nprint("PASS")\n',
+      )
+      git(['add', '-A'], seed)
+      git(['commit', '-m', 'task'], seed)
+      git(['push', 'origin', 'main'], seed)
+
+      const ws = gitWorkspace({ ref: bare })
+      const runTest = (cwd: string): boolean => {
+        try {
+          execFileSync('python3', ['test_solution.py'], { cwd, stdio: 'pipe', timeout: 30_000 })
+          return true
+        } catch {
+          return false
+        }
+      }
+
+      // Worker 1 implements add() only — the test still fails (mul missing), but it commits WIP.
+      const r1 = await runInWorkspace(
+        ws,
+        async (cwd) => {
+          const src = readFileSync(join(cwd, 'solution.py'), 'utf-8').replace(
+            'def add(a, b):\n    raise NotImplementedError',
+            'def add(a, b):\n    return a + b',
+          )
+          writeFileSync(join(cwd, 'solution.py'), src)
+          return { valid: runTest(cwd), value: 'w1', message: 'w1: add()' }
+        },
+        { commitOnInvalid: true },
+      )
+      expect(r1.valid).toBe(false)
+      expect(r1.commit).toMatchObject({ ok: true })
+
+      // Worker 2 materializes a FRESH clone — it must already see worker 1's add(), then finish mul().
+      const r2 = await runInWorkspace(ws, async (cwd) => {
+        const before = readFileSync(join(cwd, 'solution.py'), 'utf-8')
+        expect(before).toContain('return a + b') // compounding: it built on worker 1
+        const src = before.replace(
+          'def mul(a, b):\n    raise NotImplementedError',
+          'def mul(a, b):\n    return a * b',
+        )
+        writeFileSync(join(cwd, 'solution.py'), src)
+        return { valid: runTest(cwd), value: 'w2', message: 'w2: mul()' }
+      })
+      expect(r2.valid).toBe(true)
+      expect(r2.commit).toMatchObject({ ok: true })
+    },
+  )
+
   it('returns a typed conflict instead of overwriting concurrent edits', async () => {
     const ws = gitWorkspace({ ref: bare })
     const w1 = fresh()