diff --git a/bench/src/swe-bench-env.test.ts b/bench/src/swe-bench-env.test.ts
new file mode 100644
index 00000000..30dbdc02
--- /dev/null
+++ b/bench/src/swe-bench-env.test.ts
@@ -0,0 +1,71 @@
+import { mkdtempSync, realpathSync, rmSync, symlinkSync, writeFileSync } from 'node:fs'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+import { afterAll, describe, expect, it } from 'vitest'
+import { isInsideJail, isTestPath, jailPath } from './swe-bench-env'
+
+describe('isTestPath', () => {
+  it('flags test directories and test-named python files', () => {
+    expect(isTestPath('tests/test_models.py')).toBe(true)
+    expect(isTestPath('pkg/test/helpers.py')).toBe(true)
+    expect(isTestPath('pkg/tests/helpers.py')).toBe(true)
+    expect(isTestPath('test_models.py')).toBe(true)
+    expect(isTestPath('models_test.py')).toBe(true)
+    expect(isTestPath('conftest.py')).toBe(true)
+    expect(isTestPath('pkg/conftest.py')).toBe(true)
+  })
+
+  it('does not flag ordinary source files', () => {
+    expect(isTestPath('src/foo.py')).toBe(false)
+    expect(isTestPath('pkg/models.py')).toBe(false)
+    // `testing.py` is not a test file by the test_/_test/conftest rules.
+    expect(isTestPath('pkg/testing.py')).toBe(false)
+    // A `latest/` segment must not trip the `tests?/` directory rule.
+    expect(isTestPath('latest/foo.py')).toBe(false)
+  })
+})
+
+describe('jailPath', () => {
+  const root = '/work/repo'
+
+  it('rejects `..` traversal and absolute paths', () => {
+    expect(jailPath(root, '../x')).toBeNull()
+    expect(jailPath(root, 'a/../../etc/passwd')).toBeNull()
+    expect(jailPath(root, '/etc/passwd')).toBeNull()
+  })
+
+  it('accepts in-repo relative paths and strips a leading `./`', () => {
+    expect(jailPath(root, 'src/a.py')).toBe('src/a.py')
+    expect(jailPath(root, './a.py')).toBe('a.py')
+    expect(jailPath(root, 'a.py')).toBe('a.py')
+  })
+})
+
+describe('isInsideJail (realpath containment)', () => {
+  // Mirror the `resolveInJail` closure in `call()`: realpath-resolve a workspace-relative path, then
+  // assert containment. Offline — operates on a throwaway temp dir, no git clone, no network.
+  const dir = mkdtempSync(join(tmpdir(), 'swe-jail-'))
+  const jailRoot = realpathSync(dir)
+  afterAll(() => rmSync(dir, { recursive: true, force: true }))
+
+  it('admits a real file inside the jail', () => {
+    const inside = join(dir, 'a.py')
+    writeFileSync(inside, 'x = 1\n')
+    expect(isInsideJail(jailRoot, realpathSync(inside))).toBe(true)
+    expect(isInsideJail(jailRoot, jailRoot)).toBe(true)
+  })
+
+  it('rejects reading through a symlink that escapes the jail', () => {
+    // A repo could ship `escape -> /etc`; following it must not let the agent read /etc/passwd.
+    const link = join(dir, 'escape')
+    symlinkSync('/etc', link)
+    // `resolveInJail` does `realpathSync(join(ws.dir, relPath))` then this containment check.
+    const real = realpathSync(join(dir, 'escape/passwd'))
+    expect(real).toBe('/etc/passwd')
+    expect(isInsideJail(jailRoot, real)).toBe(false)
+  })
+
+  it('rejects a sibling dir that shares the jail-root prefix', () => {
+    expect(isInsideJail('/tmp/swe-x', '/tmp/swe-x-evil/secret')).toBe(false)
+  })
+})
diff --git a/bench/src/swe-bench-env.ts b/bench/src/swe-bench-env.ts
index ccfb0931..e82b2c1a 100644
--- a/bench/src/swe-bench-env.ts
+++ b/bench/src/swe-bench-env.ts
@@ -13,22 +13,40 @@
  * memorization. Always report this; never claim a "clean" frontier number from this arena alone.
  */
 import { execFile } from 'node:child_process'
-import { existsSync, lstatSync, mkdtempSync, readdirSync, readFileSync, rmSync, writeFileSync } from 'node:fs'
+import { existsSync, lstatSync, mkdtempSync, readdirSync, readFileSync, realpathSync, rmSync, writeFileSync } from 'node:fs'
 import { tmpdir } from 'node:os'
-import { join } from 'node:path'
+import { join, sep } from 'node:path'
 import { promisify } from 'node:util'
 import type { AgenticSurface, AgenticTask, AgenticTool, ArtifactHandle, SurfaceScore } from '@tangle-network/agent-runtime/loops'
 import { createSweBenchAdapter } from './benchmarks/swe-bench'
 import type { BenchTask } from './benchmarks/types'
 
 const exec = promisify(execFile)
-const isTestPath = (p: string) => /(^|\/)(tests?)\//.test(p) || /test_.*\.py$|_test\.py$|conftest\.py$/.test(p)
+export const isTestPath = (p: string) => /(^|\/)(tests?)\//.test(p) || /test_.*\.py$|_test\.py$|conftest\.py$/.test(p)
+
+/**
+ * Cheap string pre-filter for an agent-supplied repo-relative path, applied before the path is
+ * joined to a workspace root: rejects absolute paths and any `..` segment, strips a leading `./`.
+ * Returns the cleaned relative path, or `null` if it must be refused. Pure and side-effect-free —
+ * `root` is unused here (the symlink-following boundary is the realpath jail, not this filter) but
+ * is taken so call sites read symmetrically with the realpath check.
+ */
+export const jailPath = (_root: string, p: string): string | null => {
+  if (p.startsWith('/') || p.includes('..')) return null
+  return p.replace(/^\.\//, '')
+}
+
+/**
+ * Containment predicate for the realpath jail: true iff `real` (an already-resolved absolute path)
+ * is `jailRoot` itself or lies strictly inside it. The `+ sep` guard stops a sibling like
+ * `/tmp/swe-x-evil` from matching the root `/tmp/swe-x`. Pure and side-effect-free.
+ */
+export const isInsideJail = (jailRoot: string, real: string): boolean => real === jailRoot || real.startsWith(jailRoot + sep)
 
 interface Ws {
   dir: string
   task: BenchTask
 }
-const workspaces = new Map<string, Ws>()
 
 /** Build the SWE-bench Environment + a DISJOINT-slice task supplier over the Verified split. The
  *  supplier keys tasks by dataset offset so `runStrategyEvolution`'s train [0,trainN) and holdout
@@ -41,6 +59,8 @@ export async function createSweBenchEnvironment(poolN = 80): Promise<{
   const adapter = createSweBenchAdapter()
   const pool = await adapter.loadTasks({ limit: poolN, split: 'test' })
   const byId = new Map(pool.map((t) => [t.id, t]))
+  // Each environment owns its workspace registry so concurrent environments don't share state.
+  const workspaces = new Map<string, Ws>()
 
   const environment: AgenticSurface = {
     name: 'swe-bench-verified',
@@ -70,9 +90,18 @@ export async function createSweBenchEnvironment(poolN = 80): Promise<{
     async call(handle, name, args) {
       const ws = workspaces.get(handle.id)
       if (!ws) return 'ERROR: workspace closed'
-      const safe = (p: string): string | null => {
-        if (p.startsWith('/') || p.includes('..')) return null
-        return p.replace(/^\.\//, '')
+      // Cheap pre-filter: reject absolute paths and `..` traversal, strip a leading `./`. The real
+      // boundary is the realpath jail check below (resolveInJail) — `safe` only normalizes the string
+      // form. `ws.dir` is passed for signature symmetry; the filter itself is root-independent.
+      const safe = (p: string): string | null => jailPath(ws.dir, p)
+      // Resolve `relPath` to an absolute path and assert it stays inside the workspace AFTER following
+      // symlinks (a repo symlink targeting /etc/passwd would otherwise escape the string-only jail).
+      // The target must exist (both callers read it first); a missing path throws and the caller
+      // surfaces the error message, matching the previous read-then-fail behavior.
+      const jailRoot = realpathSync(ws.dir)
+      const resolveInJail = (relPath: string): string | null => {
+        const real = realpathSync(join(ws.dir, relPath))
+        return isInsideJail(jailRoot, real) ? real : null
       }
       if (name === 'list_files') {
         const sub = safe(String(args.dir ?? '')) ?? ''
@@ -106,8 +135,15 @@ export async function createSweBenchEnvironment(poolN = 80): Promise<{
       if (name === 'read_file') {
         const p = safe(String(args.path ?? ''))
         if (!p) return 'ERROR: invalid path'
+        let real: string | null
         try {
-          const c = readFileSync(join(ws.dir, p), 'utf8')
+          real = resolveInJail(p)
+        } catch (e) {
+          return `(error: ${(e as Error).message})`
+        }
+        if (!real) return `ERROR: path ${p} escapes the workspace`
+        try {
+          const c = readFileSync(real, 'utf8')
           return c.length > 24_000 ? `${c.slice(0, 24_000)}\n...[truncated]` : c
         } catch (e) {
           return `(error: ${(e as Error).message})`
@@ -119,9 +155,16 @@ export async function createSweBenchEnvironment(poolN = 80): Promise<{
         if (isTestPath(p)) return 'REJECTED: editing test files is forbidden (the evaluation runs hidden tests).'
         const oldStr = String(args.old_string ?? '')
         const newStr = String(args.new_string ?? '')
+        let real: string | null
+        try {
+          real = resolveInJail(p)
+        } catch (e) {
+          return `(cannot read ${p}: ${(e as Error).message})`
+        }
+        if (!real) return `ERROR: path ${p} escapes the workspace`
         let content: string
         try {
-          content = readFileSync(join(ws.dir, p), 'utf8')
+          content = readFileSync(real, 'utf8')
         } catch (e) {
           return `(cannot read ${p}: ${(e as Error).message})`
         }
@@ -129,7 +172,7 @@ export async function createSweBenchEnvironment(poolN = 80): Promise<{
         const count = content.split(oldStr).length - 1
         if (count === 0) return `ERROR: old_string not found in ${p}. read_file it and copy EXACT text.`
         if (count > 1) return `ERROR: old_string appears ${count}× in ${p} — add surrounding context to make it unique.`
-        writeFileSync(join(ws.dir, p), content.replace(oldStr, newStr))
+        writeFileSync(real, content.replace(oldStr, newStr))
         return `edited ${p}: replaced 1 occurrence`
       }
       return `ERROR: unknown tool ${name}`
diff --git a/bench/swe-self-improve.mts b/bench/src/swe-self-improve.mts
similarity index 96%
rename from bench/swe-self-improve.mts
rename to bench/src/swe-self-improve.mts
index a1d72cff..069f3b31 100644
--- a/bench/swe-self-improve.mts
+++ b/bench/src/swe-self-improve.mts
@@ -4,14 +4,14 @@
  * draws a disjoint holdout slice and gates once — adaptive reuse is impossible). CONTAMINATION CAVEAT
  * applies (public fixes may be memorized) — reported, never claimed clean.
  *
- *   CALIBRATE first (cost gate):  TANGLE_API_KEY=… CALIBRATE=1 N=3 tsx bench/swe-self-improve.mts
- *   Full run:                     TANGLE_API_KEY=… TRAIN_N=6 HOLDOUT_N=8 GENERATIONS=2 tsx bench/swe-self-improve.mts
+ *   CALIBRATE first (cost gate):  TANGLE_API_KEY=… CALIBRATE=1 N=3 tsx bench/src/swe-self-improve.mts
+ *   Full run:                     TANGLE_API_KEY=… TRAIN_N=6 HOLDOUT_N=8 GENERATIONS=2 tsx bench/src/swe-self-improve.mts
  */
 import { mkdtempSync, rmSync } from 'node:fs'
 import { join } from 'node:path'
 import { createChatClient } from '@tangle-network/agent-eval'
 import { refine, runAgentic, runStrategyEvolution, sample } from '@tangle-network/agent-runtime/loops'
-import { createSweBenchEnvironment } from './src/swe-bench-env'
+import { createSweBenchEnvironment } from './swe-bench-env'
 
 async function main(): Promise<void> {
   const routerKey = process.env.TANGLE_API_KEY
diff --git a/docs/api/mcp.md b/docs/api/mcp.md
index 1cbb79b3..4892b9c4 100644
--- a/docs/api/mcp.md
+++ b/docs/api/mcp.md
@@ -3956,40 +3956,6 @@ Defined in: [mcp/tools/coordination.ts:56](https://github.com/tangle-network/age
 
 ***
 
-### AnalystRegistry
-
-Defined in: [mcp/tools/coordination.ts:62](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L62)
-
-#### Properties
-
-##### kinds
-
-> `readonly` **kinds**: readonly `object`[]
-
-Defined in: [mcp/tools/coordination.ts:63](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L63)
-
-##### run
-
-> `readonly` **run**: (`kindId`, `trace`) => `Promise`\<`unknown`\>
-
-Defined in: [mcp/tools/coordination.ts:64](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L64)
-
-###### Parameters
-
-###### kindId
-
-`string`
-
-###### trace
-
-`unknown`
-
-###### Returns
-
-`Promise`\<`unknown`\>
-
-***
-
 ### CoordinationToolsOptions
 
 Defined in: [mcp/tools/coordination.ts:94](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L94)
@@ -4010,7 +3976,7 @@ Defined in: [mcp/tools/coordination.ts:96](https://github.com/tangle-network/age
 
 ##### makeWorkerAgent
 
-> `readonly` **makeWorkerAgent**: [`MakeWorkerAgent`](#makeworkeragent)
+> `readonly` **makeWorkerAgent**: [`MakeWorkerAgent`](runtime.md#makeworkeragent)
 
 Defined in: [mcp/tools/coordination.ts:97](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L97)
 
@@ -4022,7 +3988,7 @@ Defined in: [mcp/tools/coordination.ts:98](https://github.com/tangle-network/age
 
 ##### analysts?
 
-> `readonly` `optional` **analysts?**: [`AnalystRegistry`](#analystregistry)
+> `readonly` `optional` **analysts?**: [`AnalystRegistry`](runtime.md#analystregistry)
 
 Defined in: [mcp/tools/coordination.ts:99](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L99)
 
@@ -5812,24 +5778,6 @@ Defined in: [mcp/tools/coordination.ts:60](https://github.com/tangle-network/age
 
 ***
 
-### MakeWorkerAgent
-
-> **MakeWorkerAgent** = (`profile`) => [`Agent`](runtime.md#agent)\<`unknown`, `unknown`\>
-
-Defined in: [mcp/tools/coordination.ts:92](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L92)
-
-#### Parameters
-
-##### profile
-
-`unknown`
-
-#### Returns
-
-[`Agent`](runtime.md#agent)\<`unknown`, `unknown`\>
-
-***
-
 ### DelegateResult
 
 > **DelegateResult** = \{ `status`: `"winner"`; `out`: `unknown`; `outRef`: `string`; `spentTotal`: [`Spend`](runtime.md#spend); \} \| \{ `status`: `"no-winner"`; `reason`: `string`; `spentTotal`: [`Spend`](runtime.md#spend); \}
@@ -7748,6 +7696,18 @@ Re-exports [mcpToolsForRuntimeMcpSubset](index.md#mcptoolsforruntimemcpsubset)
 
 ***
 
+### AnalystRegistry
+
+Re-exports [AnalystRegistry](runtime.md#analystregistry)
+
+***
+
 ### CoordinationEvent
 
 Re-exports [CoordinationEvent](runtime.md#coordinationevent)
+
+***
+
+### MakeWorkerAgent
+
+Re-exports [MakeWorkerAgent](runtime.md#makeworkeragent)
diff --git a/docs/api/primitive-catalog.md b/docs/api/primitive-catalog.md
index 27526a6b..17362d32 100644
--- a/docs/api/primitive-catalog.md
+++ b/docs/api/primitive-catalog.md
@@ -337,7 +337,7 @@ Import from `@tangle-network/agent-runtime/intelligence` — 60 exports.
 
 ### Recursive atom + loop kernel (alias of ./runtime)
 
-Import from `@tangle-network/agent-runtime/loops` — 381 exports.
+Import from `@tangle-network/agent-runtime/loops` — 383 exports.
 
 | Symbol | Kind | Summary |
 |---|---|---|
@@ -487,6 +487,7 @@ Import from `@tangle-network/agent-runtime/loops` — 381 exports.
 | `AgentTurnInput` | interface | _(no summary — add a TSDoc line at the declaration)_ |
 | `AgentTurnResult` | interface | _(no summary — add a TSDoc line at the declaration)_ |
 | `AnalystFinding` | interface | Unified envelope every analyst emits. Schema-versioned so renderers |
+| `AnalystRegistry` | interface | _(no summary — add a TSDoc line at the declaration)_ |
 | `AnytimeReport` | interface | _(no summary — add a TSDoc line at the declaration)_ |
 | `AnytimeStrategySummary` | interface | _(no summary — add a TSDoc line at the declaration)_ |
 | `AnytimeTaskCurve` | interface | anytimeReport — time-to-satisfactory-output metrics, derived entirely from the |
@@ -702,6 +703,7 @@ Import from `@tangle-network/agent-runtime/loops` — 381 exports.
 | `LoopShape` | type | A reusable act-body factory. Given the persona's content + seams (`ShapeContext`), it |
 | `LoopTraceEvent` | type | _(no summary — add a TSDoc line at the declaration)_ |
 | `LoopUntil` | type | `loopUntil(spec)` — build the iterative-deepening combinator. `seed` is the initial state. |
+| `MakeWorkerAgent` | type | _(no summary — add a TSDoc line at the declaration)_ |
 | `MountRecorder` | type | Records a mounted resource into the run's provenance manifest. Passed to |
 | `Outcome` | type | The terminal contract Drew wants: a loop returns a FINISHED deliverable, or the concrete |
 | `Panel` | type | `panel(spec)` — build the M-judge write-only-merge combinator. |
diff --git a/docs/api/runtime.md b/docs/api/runtime.md
index eac1add0..4269b2e1 100644
--- a/docs/api/runtime.md
+++ b/docs/api/runtime.md
@@ -412,6 +412,40 @@ The last artifact read error, if the abort fired during the retry loop.
 
 ## Interfaces
 
+### AnalystRegistry
+
+Defined in: [mcp/tools/coordination.ts:62](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L62)
+
+#### Properties
+
+##### kinds
+
+> `readonly` **kinds**: readonly `object`[]
+
+Defined in: [mcp/tools/coordination.ts:63](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L63)
+
+##### run
+
+> `readonly` **run**: (`kindId`, `trace`) => `Promise`\<`unknown`\>
+
+Defined in: [mcp/tools/coordination.ts:64](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L64)
+
+###### Parameters
+
+###### kindId
+
+`string`
+
+###### trace
+
+`unknown`
+
+###### Returns
+
+`Promise`\<`unknown`\>
+
+***
+
 ### WorktreeCommandResult
 
 Defined in: [mcp/worktree-harness.ts:39](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/worktree-harness.ts#L39)
@@ -7231,7 +7265,7 @@ What the spawn was supposed to produce — surfaced in traces/reports.
 
 ### DriverAgentOptions
 
-Defined in: [runtime/supervise/coordination-driver.ts:45](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L45)
+Defined in: [runtime/supervise/coordination-driver.ts:46](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L46)
 
 #### Properties
 
@@ -7239,13 +7273,13 @@ Defined in: [runtime/supervise/coordination-driver.ts:45](https://github.com/tan
 
 > `readonly` **name**: `string`
 
-Defined in: [runtime/supervise/coordination-driver.ts:46](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L46)
+Defined in: [runtime/supervise/coordination-driver.ts:47](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L47)
 
 ##### brain
 
 > `readonly` **brain**: [`ToolLoopChat`](#toolloopchat)
 
-Defined in: [runtime/supervise/coordination-driver.ts:50](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L50)
+Defined in: [runtime/supervise/coordination-driver.ts:51](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L51)
 
 The driver-LLM seam — ONE inference turn over the conversation + the coordination tool specs
  (the canonical `ToolLoopChat`): a scripted mock offline, the router's tool-calling in
@@ -7255,15 +7289,15 @@ The driver-LLM seam — ONE inference turn over the conversation + the coordinat
 
 > `readonly` **blobs**: [`ResultBlobStore`](#resultblobstore)
 
-Defined in: [runtime/supervise/coordination-driver.ts:52](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L52)
+Defined in: [runtime/supervise/coordination-driver.ts:53](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L53)
 
 Shared blob store — `observe_agent` reads settled outputs through it.
 
 ##### makeWorkerAgent
 
-> `readonly` **makeWorkerAgent**: [`MakeWorkerAgent`](mcp.md#makeworkeragent)
+> `readonly` **makeWorkerAgent**: [`MakeWorkerAgent`](#makeworkeragent)
 
-Defined in: [runtime/supervise/coordination-driver.ts:54](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L54)
+Defined in: [runtime/supervise/coordination-driver.ts:55](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L55)
 
 Resolve a spawned `profile` to a worker LEAF or a driver child (the recursion seam).
 
@@ -7271,7 +7305,7 @@ Resolve a spawned `profile` to a worker LEAF or a driver child (the recursion se
 
 > `readonly` **perWorker**: [`Budget`](#budget-10)
 
-Defined in: [runtime/supervise/coordination-driver.ts:56](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L56)
+Defined in: [runtime/supervise/coordination-driver.ts:57](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L57)
 
 Per-child budget reserved from the conserved pool on each spawn.
 
@@ -7279,16 +7313,35 @@ Per-child budget reserved from the conserved pool on each spawn.
 
 > `readonly` `optional` **maxLiveWorkers?**: `number`
 
-Defined in: [runtime/supervise/coordination-driver.ts:59](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L59)
+Defined in: [runtime/supervise/coordination-driver.ts:60](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L60)
 
 Hard cap on simultaneously-LIVE workers — `spawn_agent` fails closed once this many are in
  flight (a concurrency fence on top of the conserved-pool fence). Omit/`<= 0` = no cap.
 
+##### analysts?
+
+> `readonly` `optional` **analysts?**: [`AnalystRegistry`](#analystregistry)
+
+Defined in: [runtime/supervise/coordination-driver.ts:63](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L63)
+
+The analyst lenses available to the driver. Required for `analyzeOnSettle` (and `run_analyst`).
+ Unset → no analyst feed (status quo: the driver gets settled outputs, no findings).
+
+##### analyzeOnSettle?
+
+> `readonly` `optional` **analyzeOnSettle?**: readonly `string`[]
+
+Defined in: [runtime/supervise/coordination-driver.ts:67](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L67)
+
+Analyst kind ids run AUTOMATICALLY when a worker settles `done` — each result re-enters as a
+ `finding` the driver pulls and composes its next steer from. The UP-leg of the self-improving
+ loop. Omit/empty = no auto-analysis (status quo). Requires `analysts`.
+
 ##### systemPrompt
 
 > `readonly` **systemPrompt**: `string` \| ((`task`) => `string`)
 
-Defined in: [runtime/supervise/coordination-driver.ts:62](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L62)
+Defined in: [runtime/supervise/coordination-driver.ts:70](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L70)
 
 The driver's stance — a string, or built from the task (the worker-driver prompt /
  the generator). INJECTED so the prompt is a pluggable, optimizable role.
@@ -7297,7 +7350,7 @@ The driver's stance — a string, or built from the task (the worker-driver prom
 
 > `readonly` `optional` **extraTools?**: readonly `object`[]
 
-Defined in: [runtime/supervise/coordination-driver.ts:67](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L67)
+Defined in: [runtime/supervise/coordination-driver.ts:75](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L75)
 
 WORK tools the driver may call DIRECTLY (alongside the coordination verbs) — so the driver is
  not a pure manager but a full agent that can ACT (do simple work itself) OR SPAWN (delegate).
@@ -7308,7 +7361,7 @@ WORK tools the driver may call DIRECTLY (alongside the coordination verbs) — s
 
 > `readonly` `optional` **executeExtraTool?**: (`name`, `args`) => `Promise`\<`string` \| `null` \| `undefined`\>
 
-Defined in: [runtime/supervise/coordination-driver.ts:74](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L74)
+Defined in: [runtime/supervise/coordination-driver.ts:82](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L82)
 
 Runs an `extraTools` call. Returns a string result, or null/undefined to signal "not handled"
  so the call falls through to the coordination dispatch. Required iff `extraTools` is set.
@@ -7331,7 +7384,7 @@ Runs an `extraTools` call. Returns a string result, or null/undefined to signal
 
 > `readonly` `optional` **maxTurns?**: `number`
 
-Defined in: [runtime/supervise/coordination-driver.ts:82](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L82)
+Defined in: [runtime/supervise/coordination-driver.ts:90](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L90)
 
 Max driver turns before the loop force-finalizes on the best settled child. Default 16.
  `0` lifts the turn-COUNT cap: the loop is bounded instead by the conserved budget pool,
@@ -7342,7 +7395,7 @@ Max driver turns before the loop force-finalizes on the best settled child. Defa
 
 > `readonly` `optional` **now?**: () => `number`
 
-Defined in: [runtime/supervise/coordination-driver.ts:85](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L85)
+Defined in: [runtime/supervise/coordination-driver.ts:93](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L93)
 
 Injected clock for the in-loop absolute-deadline guard — keeps the deadline check
  deterministic in tests. Defaults to `Date.now`.
@@ -7355,7 +7408,7 @@ Injected clock for the in-loop absolute-deadline guard — keeps the deadline ch
 
 > `readonly` `optional` **compaction?**: [`ToolLoopCompactionOptions`](#toolloopcompactionoptions)
 
-Defined in: [runtime/supervise/coordination-driver.ts:94](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L94)
+Defined in: [runtime/supervise/coordination-driver.ts:102](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L102)
 
 Give the driver brain a chapter-lifecycle on its OWN context window. The LLM-brain front doors
  lose to a dumb-Ralph respawn because the brain re-bills its whole coordination transcript every
@@ -8221,7 +8274,7 @@ The completion oracle for backend-derived workers (settled ⟺ delivered). Stron
 
 ##### makeWorkerAgent?
 
-> `readonly` `optional` **makeWorkerAgent?**: [`MakeWorkerAgent`](mcp.md#makeworkeragent)
+> `readonly` `optional` **makeWorkerAgent?**: [`MakeWorkerAgent`](#makeworkeragent)
 
 Defined in: [runtime/supervise/supervise.ts:56](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L56)
 
@@ -8301,11 +8354,31 @@ Hard cap on simultaneously-LIVE workers — `spawn_agent` fails closed once this
  flight. The conserved pool bounds TOTAL work; this bounds SIMULTANEOUS work (live boxes/
  sandboxes a real fleet runs at once). Omit/`<= 0` = no cap (the pool stays the only fence).
 
+##### analysts?
+
+> `readonly` `optional` **analysts?**: [`AnalystRegistry`](#analystregistry)
+
+Defined in: [runtime/supervise/supervise.ts:84](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L84)
+
+Analyst lenses available to the driver. Required for `analyzeOnSettle`. Unset → status quo
+ (the driver receives settled worker outputs, no analyst findings).
+
+##### analyzeOnSettle?
+
+> `readonly` `optional` **analyzeOnSettle?**: readonly `string`[]
+
+Defined in: [runtime/supervise/supervise.ts:89](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L89)
+
+Analyst kind ids run AUTOMATICALLY when a worker settles `done` — each re-enters as a `finding`
+ the driver pulls (`await_event`) and composes its next steer from. The self-improving UP-leg,
+ threaded to the driver at this level (propagate to sub-drivers via a recursive `makeWorkerAgent`).
+ Omit/empty = status quo (no analyst feed). Requires `analysts`.
+
 ##### blobs?
 
 > `readonly` `optional` **blobs?**: [`ResultBlobStore`](#resultblobstore)
 
-Defined in: [runtime/supervise/supervise.ts:83](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L83)
+Defined in: [runtime/supervise/supervise.ts:91](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L91)
 
 Worker output store. Defaults to in-memory.
 
@@ -8313,19 +8386,19 @@ Worker output store. Defaults to in-memory.
 
 > `readonly` `optional` **maxDepth?**: `number`
 
-Defined in: [runtime/supervise/supervise.ts:84](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L84)
+Defined in: [runtime/supervise/supervise.ts:92](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L92)
 
 ##### maxTurns?
 
 > `readonly` `optional` **maxTurns?**: `number`
 
-Defined in: [runtime/supervise/supervise.ts:85](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L85)
+Defined in: [runtime/supervise/supervise.ts:93](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L93)
 
 ##### compaction?
 
 > `readonly` `optional` **compaction?**: [`ToolLoopCompactionOptions`](#toolloopcompactionoptions)
 
-Defined in: [runtime/supervise/supervise.ts:91](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L91)
+Defined in: [runtime/supervise/supervise.ts:99](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L99)
 
 Give the supervisor brain a chapter-lifecycle on its OWN context window (router arm only): once
  its coordination transcript exceeds `thresholdTokens` it distills to a compact progress note and
@@ -8337,13 +8410,13 @@ Give the supervisor brain a chapter-lifecycle on its OWN context window (router
 
 > `readonly` `optional` **runId?**: `string`
 
-Defined in: [runtime/supervise/supervise.ts:92](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L92)
+Defined in: [runtime/supervise/supervise.ts:100](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L100)
 
 ##### now?
 
 > `readonly` `optional` **now?**: () => `number`
 
-Defined in: [runtime/supervise/supervise.ts:93](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L93)
+Defined in: [runtime/supervise/supervise.ts:101](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L101)
 
 ###### Returns
 
@@ -8353,7 +8426,7 @@ Defined in: [runtime/supervise/supervise.ts:93](https://github.com/tangle-networ
 
 > `readonly` `optional` **allowedModels?**: readonly `string`[]
 
-Defined in: [runtime/supervise/supervise.ts:97](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L97)
+Defined in: [runtime/supervise/supervise.ts:105](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L105)
 
 Restrict the run to this subset of models. When set, every configured model — the
  supervisor router model, the profile's model, and the backend's model — must be a member,
@@ -8416,7 +8489,7 @@ Defined in: [runtime/supervise/supervisor-agent.ts:70](https://github.com/tangle
 
 ##### makeWorkerAgent
 
-> `readonly` **makeWorkerAgent**: [`MakeWorkerAgent`](mcp.md#makeworkeragent)
+> `readonly` **makeWorkerAgent**: [`MakeWorkerAgent`](#makeworkeragent)
 
 Defined in: [runtime/supervise/supervisor-agent.ts:72](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L72)
 
@@ -8495,17 +8568,34 @@ Runs an `extraTools` call; null/undefined falls through to the coordination disp
 
 `Promise`\<`string` \| `null` \| `undefined`\>
 
+##### analysts?
+
+> `readonly` `optional` **analysts?**: [`AnalystRegistry`](#analystregistry)
+
+Defined in: [runtime/supervise/supervisor-agent.ts:98](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L98)
+
+Analyst lenses available to the driver (both arms). Required for `analyzeOnSettle`.
+
+##### analyzeOnSettle?
+
+> `readonly` `optional` **analyzeOnSettle?**: readonly `string`[]
+
+Defined in: [runtime/supervise/supervisor-agent.ts:101](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L101)
+
+Analyst kinds run on each worker-settle → a `finding` the driver composes its next steer from
+ (the self-improving UP-leg). Unset/empty = status quo (no analyst feed). Requires `analysts`.
+
 ##### maxTurns?
 
 > `readonly` `optional` **maxTurns?**: `number`
 
-Defined in: [runtime/supervise/supervisor-agent.ts:97](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L97)
+Defined in: [runtime/supervise/supervisor-agent.ts:102](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L102)
 
 ##### compaction?
 
 > `readonly` `optional` **compaction?**: [`ToolLoopCompactionOptions`](#toolloopcompactionoptions)
 
-Defined in: [runtime/supervise/supervisor-agent.ts:101](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L101)
+Defined in: [runtime/supervise/supervisor-agent.ts:106](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L106)
 
 Give the supervisor brain a chapter-lifecycle on its OWN context window (router arm only) — it
  distills its coordination transcript to a compact progress note once it exceeds the threshold,
@@ -12109,6 +12199,24 @@ Every message on the one typed pipe. UP (child→parent): question / settled / f
 
 ***
 
+### MakeWorkerAgent
+
+> **MakeWorkerAgent** = (`profile`) => [`Agent`](#agent)\<`unknown`, `unknown`\>
+
+Defined in: [mcp/tools/coordination.ts:92](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L92)
+
+#### Parameters
+
+##### profile
+
+`unknown`
+
+#### Returns
+
+[`Agent`](#agent)\<`unknown`, `unknown`\>
+
+***
+
 ### InProcessOnPrompt
 
 > **InProcessOnPrompt** = (`prompt`, `ctx`) => `SandboxEvent`[] \| `AsyncIterable`\<`SandboxEvent`\> \| `Promise`\<`SandboxEvent`[]\>
@@ -15394,7 +15502,7 @@ executor has produced its output. The inner `score` is preserved; only `valid` i
 
 > **driverAgent**(`opts`): [`Agent`](#agent)\<`unknown`, `unknown`\>
 
-Defined in: [runtime/supervise/coordination-driver.ts:157](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L157)
+Defined in: [runtime/supervise/coordination-driver.ts:165](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L165)
 
 Build the intelligent recursive driver. Its `act` is the LLM tool-loop; spawn it as a
 `driverChild` (`driver-executor.ts`) to run it inside a nested scope, recursively.
@@ -15415,7 +15523,7 @@ Build the intelligent recursive driver. Its `act` is the LLM tool-loop; spawn it
 
 > **finalizeBestDelivered**(`settled`, `blobs`): `Promise`\<`unknown`\>
 
-Defined in: [runtime/supervise/coordination-driver.ts:356](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L356)
+Defined in: [runtime/supervise/coordination-driver.ts:373](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L373)
 
 Keep-best finalize under the completion-oracle: return the highest-scoring DELIVERED child's
  output (settled `done` AND `valid` — its deliverable check passed). Returns undefined when no
@@ -15463,7 +15571,7 @@ Stand up the coordination MCP over a live scope. The HOST address is `127.0.0.1`
 
 ###### makeWorkerAgent
 
-[`MakeWorkerAgent`](mcp.md#makeworkeragent)
+[`MakeWorkerAgent`](#makeworkeragent)
 
 ###### perWorker
 
@@ -15486,7 +15594,7 @@ Hard cap on simultaneously-LIVE workers — `spawn_agent` fails closed once this
 
 ###### analysts?
 
-[`AnalystRegistry`](mcp.md#analystregistry)
+[`AnalystRegistry`](#analystregistry)
 
 Trace-analyst lenses the driver can run (`run_analyst`) or auto-fire on settle.
 
@@ -15806,7 +15914,7 @@ Fail loud on a `down` settlement: only a `done` child is an iteration.
 
 ### workerFromBackend()
 
-> **workerFromBackend**(`backend`, `deliverable?`): [`MakeWorkerAgent`](mcp.md#makeworkeragent)
+> **workerFromBackend**(`backend`, `deliverable?`): [`MakeWorkerAgent`](#makeworkeragent)
 
 Defined in: [runtime/supervise/supervise.ts:26](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L26)
 
@@ -15826,7 +15934,7 @@ Build the worker seam from a backend (WHERE workers run) + an optional completio
 
 #### Returns
 
-[`MakeWorkerAgent`](mcp.md#makeworkeragent)
+[`MakeWorkerAgent`](#makeworkeragent)
 
 ***
 
@@ -15834,7 +15942,7 @@ Build the worker seam from a backend (WHERE workers run) + an optional completio
 
 > **supervise**(`profile`, `task`, `opts`): `Promise`\<[`SupervisedResult`](#supervisedresult)\<`unknown`\>\>
 
-Defined in: [runtime/supervise/supervise.ts:108](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L108)
+Defined in: [runtime/supervise/supervise.ts:116](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L116)
 
 #### Parameters
 
@@ -15860,7 +15968,7 @@ Defined in: [runtime/supervise/supervise.ts:108](https://github.com/tangle-netwo
 
 > **supervisorAgent**(`profile`, `deps`): [`Agent`](#agent)\<`unknown`, `unknown`\>
 
-Defined in: [runtime/supervise/supervisor-agent.ts:104](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L104)
+Defined in: [runtime/supervise/supervisor-agent.ts:109](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L109)
 
 #### Parameters
 
diff --git a/examples/ablation-suite/ablation.ts b/examples/ablation-suite/ablation.ts
index 9fe2837e..cf9ac757 100644
--- a/examples/ablation-suite/ablation.ts
+++ b/examples/ablation-suite/ablation.ts
@@ -7,10 +7,13 @@
  * just burns tokens. One-knob-delta design (baseline + each single knob flipped) keeps it O(N), not 2^N.
  *
  * STATUS — honest: the framework + the cost autopsy are real; knobs are wired incrementally. WIRED:
- * `topology` (single/fanout/fanout-refine = refine/sample/sampleThenRefine) + `budget`. The rest are
- * DECLARED knobs that FAIL LOUD if set (no silent no-op — you must not think GEPA ran when it didn't);
- * each is a tracked next-increment over a real substrate primitive (named in the throw). Validate the
- * framework on the cheap contamination-proof task, THEN point `environment`/`tasks` at SWE-bench.
+ * `topology` (single/fanout/fanout-refine = refine/sample/sampleThenRefine) + `budget`; `driverSteer`
+ * (the supervisor brain spawns + steers a graded worker, analyst up-leg on — via `selfImprovingSupervisor`)
+ * and `optimize:'gepa'` (GEPA-tune the driver's compose-prompt on a DISJOINT train slice, freeze, then
+ * drive — via `optimizeDriverPrompt`; implies `driverSteer`). STILL DECLARED + FAIL LOUD: `halo`,
+ * `persistentArtifact` (no silent no-op — each names its substrate primitive in the throw). Note: the
+ * driverSteer/optimize arms report real resolve + $ but NOT a per-token/latency breakdown (uncaptured,
+ * not a real zero). Validate on the cheap contamination-proof task, THEN point `environment`/`tasks` at SWE-bench.
  */
 import { pairedBootstrap } from '@tangle-network/agent-eval'
 import {
@@ -23,6 +26,14 @@ import {
   sampleThenRefine,
 } from '@tangle-network/agent-runtime/loops'
 import { codingEnv, codingTasks } from '../self-improving-coder/self-improving-coder'
+import { optimizeDriverPrompt } from './gepa-driver-prompt'
+import { selfImprovingSupervisor } from './self-improving-supervisor'
+
+/** The baseline driver/steerer standing instruction — the compose-next-prompt the GEPA pass mutates
+ *  (its `baselinePrompt`) and the prompt the supervisor runs with when `optimize` is off. Kept terse:
+ *  GEPA earns the lift, this is only the floor. */
+const baselineDriverPrompt =
+  'You are a driver coordinating one worker on a coding task. Read the worker’s settled output and the analyst finding, then steer the next attempt: name the concrete next action, require the worker to verify the change took, and only stop once every required check passes.'
 
 export interface AblationKnobs {
   /** WIRED → strategy: single=`refine` (iterate one artifact), fanout=`sample` (N parallel, pick best),
@@ -55,16 +66,6 @@ const unwiredKnobs: Array<{
   isSet: (v: unknown) => boolean
   prim: string
 }> = [
-  {
-    k: 'driverSteer',
-    isSet: (v) => v === true,
-    prim: 'supervise(driverProfile,{backend,analyzeOnSettle}) — driver composes the steer from the analyst finding',
-  },
-  {
-    k: 'optimize',
-    isSet: (v) => !!v && v !== 'off',
-    prim: "selfImprove() w/ executable JudgeConfig optimizing the driver's compose-prompt on TRAIN, frozen",
-  },
   { k: 'halo', isSet: (v) => v === true, prim: 'HALO analyst option' },
   { k: 'persistentArtifact', isSet: (v) => v === true, prim: 'openSandboxRun resume' },
 ]
@@ -99,6 +100,16 @@ export async function runAblation(opts: {
     maxTokens?: number
     innerTurns?: number
   }
+  /** The DRIVER brain's own router substrate (used by the `driverSteer`/`optimize` arms). Defaults to
+   *  the worker's router + model. The supervisor's inference is separate compute from the worker's, so
+   *  it carries its own model knob. */
+  supervisor?: {
+    routerBaseUrl?: string
+    routerKey?: string
+    model?: string
+    /** Reflection model for the GEPA optimize pass (defaults to the supervisor/worker model). */
+    reflectionModel?: string
+  }
   onArm?: (r: ArmResult) => void
 }): Promise<ArmResult[]> {
   // ONE held-out set, shared across all arms — the fair-comparison invariant.
@@ -110,6 +121,13 @@ export async function runAblation(opts: {
       knobs: { ...opts.base, ...d.knob } as AblationKnobs,
     })),
   ]
+  // The driver brain's router substrate (the `driverSteer`/`optimize` arms) — defaults to the worker's
+  // router + model. The supervisor's inference is separate compute from the worker's.
+  const supervisorRouter = {
+    baseUrl: opts.supervisor?.routerBaseUrl ?? opts.worker.routerBaseUrl,
+    apiKey: opts.supervisor?.routerKey ?? opts.worker.routerKey,
+    model: opts.supervisor?.model ?? opts.worker.model,
+  }
   const results: ArmResult[] = []
   for (const arm of arms) {
     for (const u of unwiredKnobs) {
@@ -118,34 +136,108 @@ export async function runAblation(opts: {
           `ablation: knob '${u.k}'=${JSON.stringify(arm.knobs[u.k])} (arm "${arm.name}") is DECLARED but not yet wired — wire it over ${u.prim} before claiming it ran. (No silent no-op.)`,
         )
     }
+    // `optimize` implies `driverSteer`: a tuned compose-prompt only has effect through the driver loop.
+    const driverSteer = arm.knobs.driverSteer === true || arm.knobs.optimize === 'gepa'
+
+    // The `optimize:'gepa'` knob: BEFORE the held-out arm runs, GEPA-tune the driver's compose-prompt on
+    // a DISJOINT train slice (offset past the held-out window so train ∩ holdout = ∅), freeze the winner,
+    // and use it for this arm's driverSteer runs. Off → the baseline standing prompt drives the loop.
+    let driverPrompt = baselineDriverPrompt
+    let gepaUsd = 0
+    if (arm.knobs.optimize === 'gepa') {
+      const opt = await optimizeDriverPrompt({
+        surface: opts.environment,
+        tasks: opts.tasks,
+        trainOffset: opts.holdoutOffset + opts.holdoutN,
+        trainN: opts.holdoutN,
+        baselinePrompt: baselineDriverPrompt,
+        worker: opts.worker,
+        ...(opts.supervisor?.reflectionModel !== undefined
+          ? { reflectionModel: opts.supervisor.reflectionModel }
+          : {}),
+      })
+      driverPrompt = opt.systemPrompt
+      gepaUsd = opt.usd // the TRAIN-side GEPA cost, counted into this arm's $ (the fair-cost invariant)
+      console.log(
+        `ablation: arm "${arm.name}" GEPA driver-prompt ${opt.shipped ? 'SHIPPED' : 'kept-baseline'} (train lift ${(100 * opt.lift).toFixed(0)}pp)`,
+      )
+    }
+
     let resolved = 0
     let ti = 0
     let to = 0
-    let usd = 0
+    let usd = gepaUsd // seed with the TRAIN-side GEPA optimization cost so the arm's $ is honest
     let ms = 0
     let shots = 0
     let comps = 0
     const perTask: number[] = []
     for (const t of tasks) {
-      const r = await runAgentic({
-        surface: opts.environment,
-        task: t,
-        strategy: topologyStrategy[arm.knobs.topology],
-        budget: arm.knobs.budget,
-        routerBaseUrl: opts.worker.routerBaseUrl,
-        routerKey: opts.worker.routerKey,
-        model: opts.worker.model,
-        ...(opts.worker.maxTokens !== undefined ? { maxTokens: opts.worker.maxTokens } : {}),
-        ...(opts.worker.innerTurns !== undefined ? { innerTurns: opts.worker.innerTurns } : {}),
-      })
-      if (r.resolved) resolved++
-      perTask.push(r.resolved ? 1 : 0)
-      ti += r.tokens.input
-      to += r.tokens.output
-      usd += r.usd
-      ms += r.ms
-      shots += r.shots
-      comps += r.completions
+      try {
+        if (driverSteer) {
+          // The driver-steered path: the supervisor brain spawns + steers a graded worker on a conserved
+          // pool, with the analyst up-leg on. `selfImprovingSupervisor` reports the deployable outcome +
+          // the FULL conserved spend (driver inference + all worker work: $, tokens, latency). `shots`
+          // stays 0 — a multi-worker supervised run has no single refine-shot count (N/A, not a real zero).
+          const sup = await selfImprovingSupervisor({
+            surface: opts.environment,
+            task: t,
+            driverPrompt,
+            worker: {
+              routerBaseUrl: opts.worker.routerBaseUrl,
+              routerKey: opts.worker.routerKey,
+              model: opts.worker.model,
+              ...(opts.worker.maxTokens !== undefined ? { maxTokens: opts.worker.maxTokens } : {}),
+              ...(opts.worker.innerTurns !== undefined
+                ? { innerTurns: opts.worker.innerTurns }
+                : {}),
+              budget: arm.knobs.budget,
+            },
+            budget: {
+              // Pool for the driver's turns PLUS several worker spawns (each reserves ~innerTurns+2
+              // iterations) so the analyst up-leg can drive a spawn-refine loop, not stall after one
+              // worker. The autopsy measures the real cost; this is intentionally not equal-k.
+              maxIterations: arm.knobs.budget * ((opts.worker.innerTurns ?? 6) + 2) + 16,
+              maxTokens: (opts.worker.maxTokens ?? 4000) * Math.max(4, arm.knobs.budget * 3),
+            },
+            analyze: true,
+            router: supervisorRouter,
+          })
+          if (sup.resolved) resolved++
+          perTask.push(sup.resolved ? 1 : 0)
+          usd += sup.usd
+          ti += sup.tokensIn
+          to += sup.tokensOut
+          ms += sup.ms
+        } else {
+          const r = await runAgentic({
+            surface: opts.environment,
+            task: t,
+            strategy: topologyStrategy[arm.knobs.topology],
+            budget: arm.knobs.budget,
+            routerBaseUrl: opts.worker.routerBaseUrl,
+            routerKey: opts.worker.routerKey,
+            model: opts.worker.model,
+            ...(opts.worker.maxTokens !== undefined ? { maxTokens: opts.worker.maxTokens } : {}),
+            ...(opts.worker.innerTurns !== undefined ? { innerTurns: opts.worker.innerTurns } : {}),
+          })
+          if (r.resolved) resolved++
+          perTask.push(r.resolved ? 1 : 0)
+          ti += r.tokens.input
+          to += r.tokens.output
+          usd += r.usd
+          ms += r.ms
+          shots += r.shots
+          comps += r.completions
+        }
+      } catch (e) {
+        // One task throw (network/quota/etc.) must not lose the whole arm's accumulated data:
+        // count it as unresolved and keep going so the arm returns partial results. Warn loud.
+        const msg = e instanceof Error ? e.message : String(e)
+        console.warn(
+          `ablation: arm "${arm.name}" task "${t.id}" failed (counted unresolved): ${msg}`,
+        )
+        perTask.push(0)
+      }
     }
     const n = tasks.length
     const res: ArmResult = {
@@ -217,19 +309,29 @@ async function main(): Promise<void> {
     maxTokens: 4000,
     innerTurns: Number(process.env.INNER_TURNS ?? 6),
   }
-  console.log(`═══ ABLATION (cheap contamination-proof task) — worker=${worker.model} ═══`)
+  const supervisor = {
+    model: process.env.SUPERVISOR_MODEL ?? worker.model,
+    reflectionModel: process.env.REFLECTION_MODEL ?? 'gemini-2.5-pro',
+  }
+  console.log(
+    `═══ ABLATION (cheap contamination-proof task) — worker=${worker.model} driver=${supervisor.model} ═══`,
+  )
   const results = await runAblation({
     environment: codingEnv,
     tasks: codingTasks,
     holdoutOffset: 100, // a fixed disjoint held-out slice
     holdoutN: Number(process.env.HOLDOUT_N ?? 6),
     base: { topology: 'single', budget: Number(process.env.BUDGET ?? 2) },
-    // one-knob-delta: flip ONLY topology (the wired knob) vs baseline.
+    // one-knob-delta: flip ONLY one knob vs baseline. topology is the cheap free arm; driverSteer adds
+    // the driver brain; optimize tunes that brain's compose-prompt on a disjoint train slice first.
     deltas: [
       { name: 'fanout', knob: { topology: 'fanout' } },
       { name: 'fanout-refine', knob: { topology: 'fanout-refine' } },
+      { name: 'driver-steer', knob: { driverSteer: true } },
+      { name: 'driver-gepa', knob: { optimize: 'gepa' } },
     ],
     worker,
+    supervisor,
     onArm: (r) =>
       console.log(
         `  ${r.name}: ${(100 * r.resolve).toFixed(0)}% resolve, $${r.costUsd.toFixed(4)}, ${(r.latencyMs / 1000).toFixed(0)}s`,
diff --git a/examples/ablation-suite/gepa-driver-prompt.ts b/examples/ablation-suite/gepa-driver-prompt.ts
new file mode 100644
index 00000000..2ed130ac
--- /dev/null
+++ b/examples/ablation-suite/gepa-driver-prompt.ts
@@ -0,0 +1,167 @@
+/**
+ * gepa-driver-prompt — GEPA-optimize the driver's compose-next-prompt on TRAIN, executable-graded,
+ * frozen, held-out-certified, and return the winner.
+ *
+ * This is the `optimize: 'gepa'` knob from the ablation board (ablation.ts), wired over the real
+ * substrate: agent-eval's `selfImprove` (the held-out-gated closed loop) driven by `gepaProposer`
+ * (the reflective prompt mutator). It is NOT `improve()` — `improve()` writes the winner back into an
+ * `AgentProfile` field, but the steerer prompt (what the driver composes the next round's instruction
+ * from) is not a profile field. So we call `selfImprove` directly with the steerer string as the
+ * `baselineSurface` the proposer mutates.
+ *
+ * The grading is EXECUTABLE, never an LLM judge: each candidate steerer runs a real `refine` rollout
+ * over the surface (its harness-verified `resolved`/`score`), and the `JudgeConfig` reads those
+ * outcomes straight off the artifact. A candidate's fitness IS the resolve it actually earned on the
+ * environment's own check — there is no model in the scoring loop to flatter it.
+ *
+ * The candidate steerer reaches the run through `refine`'s built-in analyst steerer
+ * (`AgenticOptions.analystInstruction`): the closest in-strategy proxy for "the driver's
+ * compose-next-prompt", since `refine`'s between-shot analyst IS the thing that composes the next
+ * instruction from the trajectory. GEPA tunes that instruction; the held-out gate certifies it.
+ */
+
+import {
+  type DispatchContext,
+  gepaProposer,
+  type JudgeConfig,
+  type MutableSurface,
+  type Scenario,
+  selfImprove,
+} from '@tangle-network/agent-eval/contract'
+import {
+  type AgenticRunResult,
+  type AgenticSurface,
+  type AgenticTask,
+  refine,
+  runAgentic,
+} from '@tangle-network/agent-runtime/loops'
+
+/** One TRAIN scenario: the coding task carried as the scenario's domain payload. The agent reads
+ *  `scenario.task` to run the rollout; the judge reads the artifact the rollout produced. */
+interface DriverPromptScenario extends Scenario {
+  task: AgenticTask
+}
+
+/** The default reflection model — a model the Tangle router actually serves. The substrate default
+ *  (`anthropic/claude-sonnet-4.6`) is NOT served by the router, so `gepaProposer` would fail every
+ *  reflection call; callers should pass their own, but this keeps the zero-config path live. */
+const defaultReflectionModel = 'gemini-2.5-pro'
+
+/** The mutation levers offered to the reflective proposer — what a steerer-prompt rewrite may change.
+ *  These orient the model toward the kinds of edits that move a compose-next-prompt's effectiveness. */
+const steererMutationPrimitives = [
+  'sharpen what the reviewer must check on the trajectory before recommending an action',
+  'make the recommended next actions more concrete and tool-grounded',
+  'add an explicit verify-it-took step after each change',
+  'tighten the COMPLETE / continue decision so it stops only when every required change is verified',
+]
+
+export async function optimizeDriverPrompt(opts: {
+  surface: AgenticSurface
+  tasks: (offset: number, n: number) => Promise<AgenticTask[]>
+  trainOffset: number
+  trainN: number
+  baselinePrompt: string
+  worker: {
+    routerBaseUrl: string
+    routerKey: string
+    model: string
+    maxTokens?: number
+    innerTurns?: number
+  }
+  reflectionModel?: string
+}): Promise<{ systemPrompt: string; lift: number; shipped: boolean; usd: number }> {
+  const { surface, worker } = opts
+
+  // TRAIN scenarios — the disjoint training slice. `selfImprove` splits a held-out fraction off these
+  // for the gate, so the winner is certified on tasks the proposer never optimized against.
+  const trainTasks = await opts.tasks(opts.trainOffset, opts.trainN)
+  const scenarios: DriverPromptScenario[] = trainTasks.map((task) => ({
+    id: task.id,
+    kind: 'coding',
+    task,
+  }))
+
+  // The agent under improvement: it receives the CURRENT candidate steerer (the surface string) and
+  // runs a real `refine` rollout with that steerer as the analyst instruction. The returned artifact
+  // is the harness-verified `AgenticRunResult` — `resolved`/`score` come from `surface.score`, not a
+  // self-report, so the candidate cannot fabricate a win.
+  const agent = async (
+    candidate: MutableSurface,
+    scenario: DriverPromptScenario,
+    _ctx: DispatchContext,
+  ): Promise<AgenticRunResult> => {
+    // The candidate is the steerer prompt. A `CodeSurface` is not a prompt — this loop only optimizes
+    // the string steerer, so a non-string candidate is a wiring error that must fail loud.
+    if (typeof candidate !== 'string') {
+      throw new Error(
+        `optimizeDriverPrompt: candidate surface is a CodeSurface, not a steerer prompt — this loop optimizes the string steerer only`,
+      )
+    }
+    return runAgentic({
+      surface,
+      task: scenario.task,
+      strategy: refine,
+      budget: opts.worker.innerTurns ? Math.max(2, Math.ceil(opts.worker.innerTurns / 2)) : 2,
+      routerBaseUrl: worker.routerBaseUrl,
+      routerKey: worker.routerKey,
+      model: worker.model,
+      // The candidate steerer drives the run via refine's built-in between-shot analyst.
+      analystInstruction: candidate,
+      ...(worker.maxTokens !== undefined ? { maxTokens: worker.maxTokens } : {}),
+      ...(worker.innerTurns !== undefined ? { innerTurns: worker.innerTurns } : {}),
+    })
+  }
+
+  // The EXECUTABLE judge — no LLM in the scoring loop. Composite = the artifact's harness-verified
+  // resolve fraction; the `resolved` dimension is the binary deployable pass. A thrown judge would be
+  // recorded as a failed cell, so we read defensively-shaped numeric fields and never throw on shape.
+  const judge: JudgeConfig<AgenticRunResult, DriverPromptScenario> = {
+    name: 'surface-resolve',
+    dimensions: [
+      { key: 'resolved', description: 'the surface verifier passed every check (1) or not (0)' },
+      { key: 'score', description: 'the surface verifier pass fraction in [0,1]' },
+    ],
+    score: ({ artifact }) => ({
+      dimensions: {
+        resolved: artifact.resolved ? 1 : 0,
+        score: artifact.score,
+      },
+      composite: artifact.score,
+      notes: `executable grade: resolved=${artifact.resolved} score=${artifact.score.toFixed(3)}`,
+    }),
+  }
+
+  const reflectionModel = opts.reflectionModel ?? defaultReflectionModel
+
+  const result = await selfImprove<DriverPromptScenario, AgenticRunResult>({
+    agent,
+    scenarios,
+    judge,
+    baselineSurface: opts.baselinePrompt,
+    proposer: gepaProposer({
+      llm: { baseUrl: worker.routerBaseUrl, apiKey: worker.routerKey },
+      model: reflectionModel,
+      target:
+        'the driver compose-next-prompt (the between-shot steerer that turns the trajectory into the next instruction)',
+      mutationPrimitives: steererMutationPrimitives,
+    }),
+    // One generation, two candidates, a third of TRAIN held out for the gate — the cheap proof shape;
+    // raise generations/populationSize for a deeper search once the cheap run is green.
+    budget: { generations: 1, populationSize: 2, holdoutFraction: 0.34 },
+  })
+
+  // The winner surface is the promoted steerer. A `CodeSurface` winner is impossible here (the
+  // baseline + every mutation is a string), but guard the type so the return stays a clean string.
+  const winner = result.winner.surface
+  const systemPrompt = typeof winner === 'string' ? winner : opts.baselinePrompt
+
+  return {
+    systemPrompt,
+    lift: result.lift,
+    shipped: result.gateDecision === 'ship',
+    // The TRAIN-side optimization cost (baseline + every generation) — counted into the arm's $ so the
+    // cost-aware ablation never hides the price of GEPA behind the held-out run alone.
+    usd: result.totalCostUsd,
+  }
+}
diff --git a/examples/ablation-suite/self-improving-supervisor.ts b/examples/ablation-suite/self-improving-supervisor.ts
new file mode 100644
index 00000000..217bf423
--- /dev/null
+++ b/examples/ablation-suite/self-improving-supervisor.ts
@@ -0,0 +1,128 @@
+/**
+ * self-improving-supervisor — the one-call DX recipe for the driver-steered supervisor over a graded
+ * task. It composes three already-built seams instead of hand-wiring a loop:
+ *
+ *   surfaceWorkerSeam   → WHERE the worker runs + the completion oracle that makes "settled ⟺ delivered"
+ *   supervise()         → the LLM driver brain that spawns + steers the worker on a conserved budget
+ *   analysts/onSettle   → the self-improving UP-leg: when a worker settles, an analyst reads its output
+ *                          and re-enters a short `finding` the driver composes its next steer from
+ *
+ * `analyze` is the one knob that flips the up-leg on: off → the driver sees raw settled outputs; on →
+ * the driver also receives a one-line analyst read of each settled worker (the steer firewall stays in
+ * the analyst registry — the analyst summarizes, it never decides the verdict).
+ */
+import {
+  type AgenticSurface,
+  type AgenticTask,
+  type SupervisorProfile,
+  supervise,
+} from '@tangle-network/agent-runtime/loops'
+import { type SurfaceWorkerOut, surfaceWorkerSeam } from './surface-worker'
+
+export interface SelfImprovingSupervisorOptions {
+  /** The agentic surface the worker acts on (grading + task generation live here). */
+  readonly surface: AgenticSurface
+  /** The single graded task the supervisor must resolve. */
+  readonly task: AgenticTask
+  /** The driver brain's standing instruction — the optimized prompt from the GEPA pass, or a baseline. */
+  readonly driverPrompt: string
+  /** WHERE the worker runs (router substrate + model + inner-loop bounds). Threaded to the seam. */
+  readonly worker: {
+    readonly routerBaseUrl: string
+    readonly routerKey: string
+    readonly model: string
+    readonly maxTokens?: number
+    readonly innerTurns?: number
+    readonly budget?: number
+  }
+  /** The conserved compute pool for the whole supervised run. */
+  readonly budget: { readonly maxIterations: number; readonly maxTokens: number }
+  /** Flip the self-improving up-leg on: feed the driver a one-line analyst read of each settled worker. */
+  readonly analyze?: boolean
+  /** The supervisor brain's router substrate (the driver's own inference). */
+  readonly router: { readonly baseUrl: string; readonly apiKey: string; readonly model: string }
+}
+
+/** The minimal one-lens registry used only when `analyze` is on: a single `progress` lens that reads
+ *  the worker's settled output and hands the driver a short summary (the up-leg). It declares its kind
+ *  so `analyzeOnSettle:['progress']` resolves, and its `run` returns the `{ summary }` read. The shape
+ *  is validated structurally against `supervise`'s `analysts` option at the call site. */
+function progressAnalyst() {
+  return {
+    kinds: [
+      {
+        id: 'progress',
+        description: "Summarize the worker's settled output for the driver's next steer.",
+        area: 'progress',
+      },
+    ],
+    run: async (_kindId: string, trace: unknown) => {
+      // `trace` is the worker's settled blob — a SurfaceWorkerOut object. `String(obj)` yields the
+      // useless literal '[object Object]', so read the real fields into the driver's next-steer context.
+      const w = (trace ?? {}) as Partial<SurfaceWorkerOut>
+      const summary =
+        typeof w === 'object' && w !== null && 'resolved' in w
+          ? `worker ${w.resolved ? 'RESOLVED' : 'did NOT resolve'} — score ${(100 * (w.score ?? 0)).toFixed(0)}%, ${w.shots ?? '?'} shot(s)${w.summary ? `: ${w.summary}` : ''}`
+          : `worker produced: ${JSON.stringify(trace).slice(0, 400)}`
+      return { summary }
+    },
+  }
+}
+
+/** Run the driver-steered supervisor over one graded task and report the deployable outcome:
+ *  `resolved` (a winner delivered), `score` ([0,1] from the completion verdict), and `usd` (the real
+ *  conserved spend — paid even on a no-winner). */
+export async function selfImprovingSupervisor(opts: SelfImprovingSupervisorOptions): Promise<{
+  resolved: boolean
+  score: number
+  usd: number
+  tokensIn: number
+  tokensOut: number
+  ms: number
+}> {
+  const seam = surfaceWorkerSeam({
+    surface: opts.surface,
+    task: opts.task,
+    worker: opts.worker,
+  })
+
+  const profile: SupervisorProfile = { name: 'driver', systemPrompt: opts.driverPrompt }
+
+  // Size the per-worker reservation so MULTIPLE workers fit the conserved pool. The default reserves
+  // the WHOLE iteration pool per worker (supervise.defaultPerWorker forwards budget.maxIterations
+  // unchanged), so only one worker ever spawns — which would defeat the spawn-a-refined-worker steering
+  // the analyst up-leg exists to drive. A small per-worker iteration slice lets the driver re-spawn.
+  const perWorkerIters = (opts.worker.innerTurns ?? 6) + 2
+
+  const result = await supervise(profile, opts.task, {
+    makeWorkerAgent: seam.makeWorkerAgent,
+    deliverable: seam.deliverable,
+    budget: opts.budget,
+    perWorker: { maxIterations: perWorkerIters, maxTokens: opts.worker.maxTokens ?? 4000 },
+    router: {
+      routerBaseUrl: opts.router.baseUrl,
+      routerKey: opts.router.apiKey,
+      model: opts.router.model,
+    },
+    ...(opts.analyze
+      ? { analysts: progressAnalyst(), analyzeOnSettle: ['progress'] as const }
+      : {}),
+  })
+
+  // The supervise winner carries the driver's finalize output (the best-delivered worker's blob), NOT a
+  // verdict field — read the real surface-checked score/resolved off that SurfaceWorkerOut.
+  const out = result.kind === 'winner' ? (result.out as SurfaceWorkerOut | undefined) : undefined
+  const resolved = out?.resolved ?? false
+  const score = out?.score ?? 0
+  // Report the FULL conserved spend (driver inference + all worker work) so the cost-aware ablation has
+  // real token + latency columns for this arm, not fake zeros.
+  const sp = result.spentTotal
+  return {
+    resolved,
+    score,
+    usd: sp.usd,
+    tokensIn: sp.tokens.input,
+    tokensOut: sp.tokens.output,
+    ms: sp.ms,
+  }
+}
diff --git a/examples/ablation-suite/surface-worker.ts b/examples/ablation-suite/surface-worker.ts
new file mode 100644
index 00000000..fbe84c38
--- /dev/null
+++ b/examples/ablation-suite/surface-worker.ts
@@ -0,0 +1,147 @@
+/**
+ * surface-worker — the GRADED-worker seam for the self-improving supervisor.
+ *
+ * `supervise()` spawns workers by resolving a profile through `makeWorkerAgent` to an `Agent` whose
+ * `executorSpec` carries a leaf `Executor`. This seam makes that worker actually WORK the
+ * `AgenticSurface` task: each spawned worker runs ONE `runAgentic({ surface, task, strategy: refine })`
+ * — the canonical depth tool loop over the surface — and settles with the surface's score as its
+ * verdict. So the driver can spawn/steer workers and read a real, surface-checked result, not a
+ * self-report.
+ *
+ * The paired `deliverable` is the completion oracle: settled ⟺ resolved. A worker that ran but
+ * didn't drive the artifact to its final checked state settles `valid:false`, so a keep-best driver
+ * never counts it as done (the Foreman 0/18 lesson — "done" means the check passed).
+ *
+ * v1 SIMPLIFICATION: the worker IGNORES the driver's brief — every spawn is a fresh `refine` attempt
+ * on the SAME task. The driver's intelligence in v1 is allocation (how many workers, when to stop),
+ * not per-worker instruction authoring; threading a per-worker brief into the surface tool loop is the
+ * next increment.
+ */
+
+import type {
+  Agent,
+  AgentProfile,
+  AgentSpec,
+  Executor,
+  ExecutorResult,
+  Spend,
+} from '@tangle-network/agent-runtime/loops'
+import {
+  type AgenticSurface,
+  type AgenticTask,
+  type DeliverableSpec,
+  type MakeWorkerAgent,
+  refine,
+  runAgentic,
+} from '@tangle-network/agent-runtime/loops'
+
+/** What the worker executor settles with — the surface verdict the driver + deliverable read.
+ *  `resolved` is the surface check's pass/fail (settled ⟺ resolved); `score` is the partial-credit
+ *  fraction; the rest is a short human summary for traces/reports. */
+export interface SurfaceWorkerOut {
+  readonly resolved: boolean
+  readonly score: number
+  readonly shots: number
+  readonly summary: string
+}
+
+export interface SurfaceWorkerOptions {
+  readonly surface: AgenticSurface
+  readonly task: AgenticTask
+  readonly worker: {
+    readonly routerBaseUrl: string
+    readonly routerKey: string
+    readonly model: string
+    readonly maxTokens?: number
+    readonly innerTurns?: number
+    /** refine shot budget for ONE worker attempt (max steered shots). Defaults to 1. */
+    readonly budget?: number
+  }
+}
+
+/** One spawned worker = one `runAgentic` refine attempt over the surface task. The result is cached on
+ *  first `execute` and read back by `resultArtifact()` (the replay source the scope journals). */
+function surfaceWorkerExecutor(opts: SurfaceWorkerOptions): Executor<SurfaceWorkerOut> {
+  const { surface, task, worker } = opts
+  let artifact: ExecutorResult<SurfaceWorkerOut> | undefined
+  return {
+    runtime: 'surface-worker',
+    // v1: the worker ignores the spawn `task` (the driver's brief) — each spawn is a fresh refine
+    // attempt on the SAME surface task. `runAgentic` already stamps real tokens/usd/ms from its
+    // conserved pool, so we forward those as the worker's Spend (no re-pricing here).
+    async execute(): Promise<ExecutorResult<SurfaceWorkerOut>> {
+      const r = await runAgentic({
+        surface,
+        task,
+        strategy: refine,
+        budget: worker.budget ?? 1,
+        routerBaseUrl: worker.routerBaseUrl,
+        routerKey: worker.routerKey,
+        model: worker.model,
+        ...(worker.maxTokens !== undefined ? { maxTokens: worker.maxTokens } : {}),
+        ...(worker.innerTurns !== undefined ? { innerTurns: worker.innerTurns } : {}),
+      })
+      const out: SurfaceWorkerOut = {
+        resolved: r.resolved,
+        score: r.score,
+        shots: r.shots,
+        summary: `refine ${r.shots} shot(s) → ${(100 * r.score).toFixed(0)}% (${
+          r.resolved ? 'resolved' : 'unresolved'
+        })`,
+      }
+      const spent: Spend = {
+        iterations: r.completions,
+        tokens: r.tokens,
+        usd: r.usd,
+        ms: r.ms,
+      }
+      artifact = {
+        outRef: `surface-worker:${task.id}:${r.shots}:${r.resolved ? 'ok' : 'no'}`,
+        out,
+        verdict: { valid: r.resolved, score: r.score },
+        spent,
+      }
+      return artifact
+    },
+    teardown: () => Promise.resolve({ destroyed: true }),
+    resultArtifact() {
+      if (!artifact) throw new Error('surfaceWorkerExecutor: resultArtifact before execute')
+      return artifact
+    },
+  }
+}
+
+/**
+ * Build the graded-worker seam: a `makeWorkerAgent` `supervise()` spawns through, and the matching
+ * `deliverable` (settled ⟺ resolved). Hand both to `supervise(profile, intent, { makeWorkerAgent,
+ * deliverable, budget })` — every spawned worker then works the surface task and settles with the
+ * surface-checked verdict.
+ */
+export function surfaceWorkerSeam(opts: SurfaceWorkerOptions): {
+  makeWorkerAgent: MakeWorkerAgent
+  deliverable: DeliverableSpec<unknown>
+} {
+  const makeWorkerAgent: MakeWorkerAgent = (rawProfile) => {
+    const p = (rawProfile ?? {}) as { name?: unknown }
+    const name = typeof p.name === 'string' && p.name.length > 0 ? p.name : 'surface-worker'
+    // harness:null is unused — the BYO `executor` overrides harness resolution entirely (the scope
+    // resolves a BYO `spec.executor` first). `act` is never called for a spawned child.
+    const spec: AgentSpec = {
+      profile: rawProfile as AgentProfile,
+      harness: null,
+      executor: surfaceWorkerExecutor(opts) as Executor<unknown>,
+    }
+    return { name, act: async () => '', executorSpec: spec } as Agent<unknown, unknown> & {
+      executorSpec: AgentSpec
+    }
+  }
+
+  // The completion oracle: DELIVERED ⟺ the worker resolved the surface check. The driver's keep-best
+  // / stop decision rides on this `valid`, never on a worker self-report.
+  const deliverable: DeliverableSpec<unknown> = {
+    describe: `resolve the surface task ${opts.task.id} (every required check passes)`,
+    check: (out) => (out as SurfaceWorkerOut | undefined)?.resolved === true,
+  }
+
+  return { makeWorkerAgent, deliverable }
+}
diff --git a/src/runtime/index.ts b/src/runtime/index.ts
index e4bad3e0..8a889a78 100644
--- a/src/runtime/index.ts
+++ b/src/runtime/index.ts
@@ -37,8 +37,14 @@ export {
 } from '../durable/spawn-journal'
 // The typed coordination-bus event (up: settled/question/finding; down: steer/answer) — surfaced
 // here so a host folding the bus onto its own timeline (the supervise-topology observability) can
-// type its `onEvent` subscriber without reaching into the `/mcp` subpath.
-export type { CoordinationEvent } from './../mcp/tools/coordination'
+// type its `onEvent` subscriber without reaching into the `/mcp` subpath. `MakeWorkerAgent` rides
+// alongside it: the worker-seam type `supervise`/`workerFromBackend` traffic in, so a host authoring
+// its own seam types it from the loop layer rather than the `/mcp` subpath.
+export type {
+  AnalystRegistry,
+  CoordinationEvent,
+  MakeWorkerAgent,
+} from './../mcp/tools/coordination'
 export {
   type AnytimeReport,
   type AnytimeStrategySummary,
diff --git a/src/runtime/supervise/coordination-driver.ts b/src/runtime/supervise/coordination-driver.ts
index 219c5749..62b0aeca 100644
--- a/src/runtime/supervise/coordination-driver.ts
+++ b/src/runtime/supervise/coordination-driver.ts
@@ -28,6 +28,7 @@
 import { ValidationError } from '../../errors'
 import type { McpToolDescriptor } from '../../mcp/server'
 import {
+  type AnalystRegistry,
   coordinationVerbNames,
   createCoordinationTools,
   type MakeWorkerAgent,
@@ -57,6 +58,13 @@ export interface DriverAgentOptions {
   /** Hard cap on simultaneously-LIVE workers — `spawn_agent` fails closed once this many are in
    *  flight (a concurrency fence on top of the conserved-pool fence). Omit/`<= 0` = no cap. */
   readonly maxLiveWorkers?: number
+  /** The analyst lenses available to the driver. Required for `analyzeOnSettle` (and `run_analyst`).
+   *  Unset → no analyst feed (status quo: the driver gets settled outputs, no findings). */
+  readonly analysts?: AnalystRegistry
+  /** Analyst kind ids run AUTOMATICALLY when a worker settles `done` — each result re-enters as a
+   *  `finding` the driver pulls and composes its next steer from. The UP-leg of the self-improving
+   *  loop. Omit/empty = no auto-analysis (status quo). Requires `analysts`. */
+  readonly analyzeOnSettle?: ReadonlyArray<string>
   /** The driver's stance — a string, or built from the task (the worker-driver prompt /
    *  the generator). INJECTED so the prompt is a pluggable, optimizable role. */
   readonly systemPrompt: string | ((task: unknown) => string)
@@ -165,6 +173,13 @@ export function driverAgent(opts: DriverAgentOptions): Agent<unknown, unknown> {
       'driverAgent: extraTools requires executeExtraTool (how to run a work-tool call)',
     )
   }
+  // Fail loud on a half-wired analyst seam (matches the extraTools pattern): analyze-on-settle with no
+  // lens registry is a silent no-op the house rules forbid — the driver would get no findings, no error.
+  if ((opts.analyzeOnSettle?.length ?? 0) > 0 && !opts.analysts) {
+    throw new ValidationError(
+      'driverAgent: analyzeOnSettle requires analysts (the lens registry the kinds resolve against)',
+    )
+  }
   // A work tool that shadows a coordination verb would leave the driver unable to coordinate.
   // Validate against the reserved verb set HERE (construction), so the conflict fails loud — not
   // buried inside act() where the supervisor would swallow the throw into a quiet no-winner.
@@ -199,6 +214,8 @@ export function driverAgent(opts: DriverAgentOptions): Agent<unknown, unknown> {
         makeWorkerAgent: opts.makeWorkerAgent,
         perWorker: opts.perWorker,
         ...(opts.maxLiveWorkers !== undefined ? { maxLiveWorkers: opts.maxLiveWorkers } : {}),
+        ...(opts.analysts ? { analysts: opts.analysts } : {}),
+        ...(opts.analyzeOnSettle ? { analyzeOnSettle: opts.analyzeOnSettle } : {}),
       })
       const byName = new Map<string, McpToolDescriptor>(coord.tools.map((t) => [t.name, t]))
       const toolSpecs: ToolSpec[] = [
diff --git a/src/runtime/supervise/supervise.ts b/src/runtime/supervise/supervise.ts
index 5760d73f..46fc8444 100644
--- a/src/runtime/supervise/supervise.ts
+++ b/src/runtime/supervise/supervise.ts
@@ -9,7 +9,7 @@
  */
 import type { AgentProfile } from '@tangle-network/sandbox'
 import { ValidationError } from '../../errors'
-import type { MakeWorkerAgent } from '../../mcp/tools/coordination'
+import type { AnalystRegistry, MakeWorkerAgent } from '../../mcp/tools/coordination'
 import type { RouterConfig } from '../router-client'
 import type { ToolLoopChat, ToolLoopCompactionOptions } from '../tool-loop'
 import { type DeliverableSpec, gateOnDeliverable } from './completion-gate'
@@ -79,6 +79,14 @@ export interface SuperviseOptions {
    *  flight. The conserved pool bounds TOTAL work; this bounds SIMULTANEOUS work (live boxes/
    *  sandboxes a real fleet runs at once). Omit/`<= 0` = no cap (the pool stays the only fence). */
   readonly maxLiveWorkers?: number
+  /** Analyst lenses available to the driver. Required for `analyzeOnSettle`. Unset → status quo
+   *  (the driver receives settled worker outputs, no analyst findings). */
+  readonly analysts?: AnalystRegistry
+  /** Analyst kind ids run AUTOMATICALLY when a worker settles `done` — each re-enters as a `finding`
+   *  the driver pulls (`await_event`) and composes its next steer from. The self-improving UP-leg,
+   *  threaded to the driver at this level (propagate to sub-drivers via a recursive `makeWorkerAgent`).
+   *  Omit/empty = status quo (no analyst feed). Requires `analysts`. */
+  readonly analyzeOnSettle?: ReadonlyArray<string>
   /** Worker output store. Defaults to in-memory. */
   readonly blobs?: ResultBlobStore
   readonly maxDepth?: number
@@ -140,6 +148,8 @@ export function supervise(profile: SupervisorProfile, task: unknown, opts: Super
     ...(opts.driveHarness ? { driveHarness: opts.driveHarness } : {}),
     ...(opts.extraTools ? { extraTools: opts.extraTools } : {}),
     ...(opts.executeExtraTool ? { executeExtraTool: opts.executeExtraTool } : {}),
+    ...(opts.analysts ? { analysts: opts.analysts } : {}),
+    ...(opts.analyzeOnSettle ? { analyzeOnSettle: opts.analyzeOnSettle } : {}),
     ...(opts.maxTurns !== undefined ? { maxTurns: opts.maxTurns } : {}),
     ...(opts.compaction ? { compaction: opts.compaction } : {}),
   })
diff --git a/src/runtime/supervise/supervisor-agent.ts b/src/runtime/supervise/supervisor-agent.ts
index 5b4a5fc6..86691e86 100644
--- a/src/runtime/supervise/supervisor-agent.ts
+++ b/src/runtime/supervise/supervisor-agent.ts
@@ -14,7 +14,7 @@
  * oracle (`finalizeBestDelivered` — the best DELIVERED child, never the driver's own prose).
  */
 import { ValidationError } from '../../errors'
-import type { MakeWorkerAgent } from '../../mcp/tools/coordination'
+import type { AnalystRegistry, MakeWorkerAgent } from '../../mcp/tools/coordination'
 import { type RouterConfig, routerBrain } from '../router-client'
 import type { ToolLoopChat, ToolLoopCompactionOptions } from '../tool-loop'
 import { driverAgent, finalizeBestDelivered } from './coordination-driver'
@@ -94,6 +94,11 @@ export interface SupervisorAgentDeps {
     name: string,
     args: Record<string, unknown>,
   ) => Promise<string | null | undefined>
+  /** Analyst lenses available to the driver (both arms). Required for `analyzeOnSettle`. */
+  readonly analysts?: AnalystRegistry
+  /** Analyst kinds run on each worker-settle → a `finding` the driver composes its next steer from
+   *  (the self-improving UP-leg). Unset/empty = status quo (no analyst feed). Requires `analysts`. */
+  readonly analyzeOnSettle?: ReadonlyArray<string>
   readonly maxTurns?: number
   /** Give the supervisor brain a chapter-lifecycle on its OWN context window (router arm only) — it
    *  distills its coordination transcript to a compact progress note once it exceeds the threshold,
@@ -129,6 +134,8 @@ export function supervisorAgent(
       ...(deps.maxLiveWorkers !== undefined ? { maxLiveWorkers: deps.maxLiveWorkers } : {}),
       ...(deps.extraTools ? { extraTools: deps.extraTools } : {}),
       ...(deps.executeExtraTool ? { executeExtraTool: deps.executeExtraTool } : {}),
+      ...(deps.analysts ? { analysts: deps.analysts } : {}),
+      ...(deps.analyzeOnSettle ? { analyzeOnSettle: deps.analyzeOnSettle } : {}),
       ...(deps.maxTurns !== undefined ? { maxTurns: deps.maxTurns } : {}),
       ...(deps.compaction ? { compaction: deps.compaction } : {}),
     })
@@ -150,6 +157,8 @@ export function supervisorAgent(
         makeWorkerAgent: deps.makeWorkerAgent,
         perWorker: deps.perWorker,
         ...(deps.maxLiveWorkers !== undefined ? { maxLiveWorkers: deps.maxLiveWorkers } : {}),
+        ...(deps.analysts ? { analysts: deps.analysts } : {}),
+        ...(deps.analyzeOnSettle ? { analyzeOnSettle: deps.analyzeOnSettle } : {}),
       })
       try {
         await driveHarness({ profile, task, scope, coordinationMcpUrl: mcp.url })
diff --git a/tests/loops/coordination-driver.test.ts b/tests/loops/coordination-driver.test.ts
index fe6de46d..8d0e1eeb 100644
--- a/tests/loops/coordination-driver.test.ts
+++ b/tests/loops/coordination-driver.test.ts
@@ -559,3 +559,35 @@ describe('driverAgent — the driver can ACT (call work tools itself), not only
     expect(() => driverAgent(opts)).toThrow(/collides with a coordination verb/)
   })
 })
+
+describe('driverAgent — the analyst up-leg (analysts + analyzeOnSettle pass-through)', () => {
+  const noWorker = (_p: unknown): Agent<unknown, unknown> =>
+    ({
+      name: 'w',
+      act: async () => '',
+      executorSpec: { profile: { name: 'w' } as AgentProfile, harness: null },
+    }) as Agent<unknown, unknown> & { executorSpec: AgentSpec }
+  const analysts = {
+    kinds: [{ id: 'progress', description: 'read the settled output', area: 'progress' }],
+    run: async () => ({ note: 'ok' }),
+  }
+
+  it('fails loud when analyzeOnSettle is set without analysts (matches the extraTools guard)', () => {
+    expect(() =>
+      driverAgent({
+        ...driverOpts('x', scriptedBrain([]), noWorker),
+        analyzeOnSettle: ['progress'],
+      }),
+    ).toThrow(/analyzeOnSettle requires analysts/)
+  })
+
+  it('constructs when both analysts and analyzeOnSettle are provided (the up-leg wired)', () => {
+    expect(() =>
+      driverAgent({
+        ...driverOpts('x', scriptedBrain([]), noWorker),
+        analysts,
+        analyzeOnSettle: ['progress'],
+      }),
+    ).not.toThrow()
+  })
+})