diff --git a/bench/HARNESS.md b/bench/HARNESS.md
index 8d72346a..fa3661a1 100644
--- a/bench/HARNESS.md
+++ b/bench/HARNESS.md
@@ -202,7 +202,7 @@ in a `.venv`/Docker subprocess → parse its JSON report → `{resolved,score}`)
 copy of the process/venv/Docker/temp/report plumbing; commit0+appworld also share its
 stdin-piping runner (`runVenvScriptStdin`).
 - **Real, runnable with ZERO extra deps:** finsearchcomp (GitHub dataset + fixtures + LLM judge — the gate bench), hotpotqa + simpleqa + frames (HF/web QA + F1/LLM judge; `*_FIXTURES=1` offline), **aec-bench** (real GitHub task tree + fixtures; judge = the task's own `tests/verify.py` over python3 stdlib — **deterministic, graded per-field partial credit, no Docker, no LLM** → the candidate non-oracle correctable-middle-band bench for the open gate).
-- **Real code, needs an external harness/tools to run (fail loud with the exact install/Docker fix; never a fabricated score):** swe-bench + terminal-bench (`bench/.venv` + Docker), **commit0** (ISOLATED `bench/.venv-commit0` via `python3 -m venv bench/.venv-commit0 && bench/.venv-commit0/bin/pip install commit0 datasets` — its deps conflict with the shared `.venv`; override dir with `COMMIT0_VENV` — plus Docker; judge = official pytest harness, graded (passed+xfail)/total; the rollout prompt stages in-box (clones `commit-0/<repo>` @ `base_commit`, emits `git diff`); `COMMIT0_FIXTURES=1` for offline listing), **programbench** (`pip install programbench` + Docker on linux/amd64 + HF blobs; judge = official cleanroom eval, graded passed/total; `PROGRAMBENCH_FIXTURES=1` offline), **appworld** (`pip install appworld` + `appworld install` + `appworld download data`; judge = AppWorld's own `world.evaluate()`, graded passes/num_tests — NO committed fixture: task data exists only after `download data`, so loadTasks fails loud rather than fabricate a task), mind2web, cad-design + cadbench + cadgenbench (openscad/blender/build123d).
+- **Real code, needs an external harness/tools to run (fail loud with the exact install/Docker fix; never a fabricated score):** swe-bench + terminal-bench (`bench/.venv` + Docker), **commit0** (ISOLATED `bench/.venv-commit0` via `python3 -m venv bench/.venv-commit0 && bench/.venv-commit0/bin/pip install commit0 datasets` — its deps conflict with the shared `.venv`; override dir with `COMMIT0_VENV` — plus Docker; judge = official pytest harness, graded (passed+xfail)/total; the rollout prompt stages in-box (clones `commit-0/<repo>` @ `base_commit`, emits `git diff`); `COMMIT0_FIXTURES=1` for offline listing), **programbench** (`pip install programbench` + Docker on linux/amd64 + HF blobs; judge = official cleanroom eval, graded passed/total; `PROGRAMBENCH_FIXTURES=1` offline), **appworld** (`pip install appworld` + `appworld install` + `appworld download data`; judge = AppWorld's own `world.evaluate()`, graded passes/num_tests — NO committed fixture: task data exists only after `download data`, so loadTasks fails loud rather than fabricate a task), **dabstep** (`DABSTEP_DIR=/path/to/EnvCommons/DABStep` with the released `dataset.csv`, `splits/*.txt`, `files/*`, and `grade.py`; judge delegates to official `grade.py`; `DABSTEP_FIXTURES=1` only tests adapter plumbing and does not fabricate benchmark scores), mind2web, cad-design + cadbench + cadgenbench (openscad/blender/build123d).
 - **goldArtifact:** aec-bench returns the task's real `golden_pass.md` (verify-judge works fully offline). commit0 / programbench / appworld return `undefined` — the oracle is a git ref / stripped source / engine-bundled solution, not a portable string; judge correctness is proven by a real solve through the harness, not a synthetic gold (documented + fail-loud, not a fake).
 - **Absent (not built):** swe-gym, swe-bench-multimodal, and the rest of the survey set.
 Every unbuilt/scaffold adapter fails LOUD (throws with the integration step) rather than faking a score — no silent zeros in any corpus. Offline fixture tests: `benchmarks/{aec-bench,commit0,programbench,appworld}.test.mts` (`tsx --test`).
diff --git a/bench/fixtures/dabstep.json b/bench/fixtures/dabstep.json
new file mode 100644
index 00000000..59626579
--- /dev/null
+++ b/bench/fixtures/dabstep.json
@@ -0,0 +1,22 @@
+[
+  {
+    "task_id": 1,
+    "instructions": "Using the payment files, answer this calibration task with the exact integer 42.",
+    "all_golds_by_task": [
+      {
+        "kind": "number",
+        "value": 42.0
+      }
+    ]
+  },
+  {
+    "task_id": 2,
+    "instructions": "Using the payment files, answer this calibration task with the card scheme nexpay.",
+    "all_golds_by_task": [
+      {
+        "kind": "scheme",
+        "value": "nexpay"
+      }
+    ]
+  }
+]
diff --git a/bench/package.json b/bench/package.json
index f1d97625..f3fae32a 100644
--- a/bench/package.json
+++ b/bench/package.json
@@ -2,7 +2,7 @@
   "name": "@tangle-network/agent-bench",
   "version": "0.1.0",
   "type": "module",
-  "description": "The unified benchmark suite for agent-runtime agents: 18 adapters (commit0, enterpriseops-gym, trata-hedge, finsearchcomp, swe-bench, humaneval, …) behind one resolveAdapter registry, each with a real deterministic judge. Score any profile/skill/prompt change against them. Map: bench/HARNESS.md.",
+  "description": "The unified benchmark suite for agent-runtime agents: 19 adapters (commit0, enterpriseops-gym, trata-hedge, finsearchcomp, dabstep, swe-bench, humaneval, …) behind one resolveAdapter registry, each with a real deterministic judge. Score any profile/skill/prompt change against them. Map: bench/HARNESS.md.",
   "main": "src/index.ts",
   "types": "src/index.ts",
   "exports": {
@@ -18,7 +18,7 @@
   },
   "dependencies": {
     "@tangle-network/agent-eval": "^0.100.0",
-    "@tangle-network/agent-runtime": "^0.78.0",
+    "@tangle-network/agent-runtime": "^0.79.3",
     "@tangle-network/sandbox": "^0.9.3"
   },
   "devDependencies": {
@@ -27,6 +27,10 @@
   },
   "files": [
     "src",
+    "fixtures",
+    "scripts",
+    "tb_agents/*.py",
+    "steerers",
     "README.md"
   ],
   "publishConfig": {
diff --git a/bench/pnpm-lock.yaml b/bench/pnpm-lock.yaml
index 618cf533..8526b7af 100644
--- a/bench/pnpm-lock.yaml
+++ b/bench/pnpm-lock.yaml
@@ -9,21 +9,21 @@ importers:
   .:
     dependencies:
       '@tangle-network/agent-eval':
-        specifier: ^0.89.0
-        version: 0.89.0(@tangle-network/sandbox@0.4.3(viem@2.52.0(typescript@5.9.3)(zod@4.4.3)))(typescript@5.9.3)
+        specifier: ^0.100.0
+        version: 0.100.0(typescript@6.0.3)
       '@tangle-network/agent-runtime':
-        specifier: file:..
-        version: file:..(@tangle-network/agent-eval@0.89.0(@tangle-network/sandbox@0.4.3(viem@2.52.0(typescript@5.9.3)(zod@4.4.3)))(typescript@5.9.3))(@tangle-network/sandbox@0.4.3(viem@2.52.0(typescript@5.9.3)(zod@4.4.3)))
+        specifier: ^0.79.3
+        version: 0.79.3(@tangle-network/agent-eval@0.100.0(typescript@6.0.3))(@tangle-network/agent-interface@0.14.0)(@tangle-network/sandbox@0.9.5(viem@2.52.0(typescript@6.0.3)(zod@4.4.3)))
       '@tangle-network/sandbox':
-        specifier: ^0.4.3
-        version: 0.4.3(viem@2.52.0(typescript@5.9.3)(zod@4.4.3))
+        specifier: ^0.9.3
+        version: 0.9.5(viem@2.52.0(typescript@6.0.3)(zod@4.4.3))
     devDependencies:
       tsx:
         specifier: ^4.19.0
         version: 4.22.4
       typescript:
-        specifier: ^5.7.0
-        version: 5.9.3
+        specifier: ^6.0.3
+        version: 6.0.3
 
 packages:
 
@@ -248,32 +248,39 @@ packages:
   '@scure/bip39@2.2.0':
     resolution: {integrity: sha512-T/Bj/YvYMNkIPq6EENO6/rcs2e7qTNuyoUXf0KBFDmp0ZDu0H2X4Lq6yC3i0c8PcWkov5EbW+yQZZbdMmk154A==}
 
-  '@tangle-network/agent-eval@0.89.0':
-    resolution: {integrity: sha512-ifpIj1rjaE0KtL0yteX/5kreSDyDvygs6iCee+OVSrZmxZD6ZfW/0iIm+2e15elrIAiW10UJo+tUBqp1Zqu+Lg==}
+  '@tangle-network/agent-core@0.3.4':
+    resolution: {integrity: sha512-Hvz3ABRouNtBmRvGqPxifAO2yuILneJMylWH5jW/jeS2F03RvqkGYuXyGXWWLqosYbb3hVAvSEe4Ykm2FMGEDQ==}
+
+  '@tangle-network/agent-eval@0.100.0':
+    resolution: {integrity: sha512-yBupVJJAqHozhe1BL5xBuDObjvNsoY+XmJo7qfpw/w7rehAXbKliBb4k3XS1G55+GaYPjFA+xwPzlEDQISpMRw==}
     engines: {node: '>=20'}
     hasBin: true
-    peerDependencies:
-      '@tangle-network/sandbox': '>=0.2.1 <0.5.0'
-    peerDependenciesMeta:
-      '@tangle-network/sandbox':
-        optional: true
 
   '@tangle-network/agent-integrations@0.29.0':
     resolution: {integrity: sha512-Avn4oBDTRP5v/3o1xq++uu/9+Rhl2hscIggeFPBGjtVYwhvbsSZL9pRrF3LfjqL9rjx9AocZOdsZC6MXrxKnkg==}
     engines: {node: '>=20'}
     hasBin: true
 
-  '@tangle-network/agent-runtime@file:..':
-    resolution: {directory: .., type: directory}
+  '@tangle-network/agent-interface@0.10.1':
+    resolution: {integrity: sha512-yehY/0EgKvu8lG6jIVoZCtMPLkj8VEWwasuAtuph2RaB9MKE5wuxRF647O6jw8KufNZ3aQ2UVVWpZ19dGCbs6w==}
+
+  '@tangle-network/agent-interface@0.13.0':
+    resolution: {integrity: sha512-CeTPGRLoXqpt0h+BCyFgZPkfU1zyRpWmqfD+85i/uk+uvbqxkfI+JprfKVf3tBsQuCgJPSjPt5qjdW8n3h2BVg==}
+
+  '@tangle-network/agent-interface@0.14.0':
+    resolution: {integrity: sha512-9CyGhIpl90E7v4MTm3b1ti3Bp7BfPigk2Nafgi21Lg0U+QxlNB656F2JmVpUuSbOo9aGZPtg5nXu5EBTlV5a1g==}
+
+  '@tangle-network/agent-runtime@0.79.3':
+    resolution: {integrity: sha512-CIQ09F9zK8agXbPvilRySCX3QB8XssnYx95VHsonWs5D4M5kXn3v+dXzz1aPbnOCxveEHLyiE7zApUyj3WU1yA==}
     engines: {node: '>=20'}
     hasBin: true
     peerDependencies:
-      '@tangle-network/agent-eval': '>=0.83.0 <1.0.0'
-      '@tangle-network/agent-knowledge': '>=1.3.0 <2.0.0'
-      '@tangle-network/sandbox': '>=0.1.2 <0.7.0'
+      '@tangle-network/agent-eval': '>=0.97.0 <1.0.0'
+      '@tangle-network/agent-interface': '>=0.14.0 <1.0.0'
+      '@tangle-network/sandbox': '>=0.8.0 <1.0.0'
       playwright: ^1.40.0
     peerDependenciesMeta:
-      '@tangle-network/agent-knowledge':
+      '@tangle-network/agent-interface':
         optional: true
       '@tangle-network/sandbox':
         optional: true
@@ -300,8 +307,8 @@ packages:
       viem:
         optional: true
 
-  '@tangle-network/sandbox@0.4.3':
-    resolution: {integrity: sha512-6QE3Nuhkd8f+OlpRJbumHTAG4wKR+ESXT47UE0fjTf7ndRWLnhE4RZ7YRtHVo/Q9ZZr0FGH1mwM+6tW0NAT1bA==}
+  '@tangle-network/sandbox@0.9.5':
+    resolution: {integrity: sha512-yvX2OX6uISBVnMQ+v6Upkesa3u8yj6BHxsfcS6p8Vze+M4WBpyhkwA+onzFHuo9rti557ItZn8yDu4a/klljvQ==}
     peerDependencies:
       '@mastra/core': ^1.36.0
       '@modelcontextprotocol/sdk': ^1.29.0
@@ -385,8 +392,8 @@ packages:
     engines: {node: '>=18.0.0'}
     hasBin: true
 
-  typescript@5.9.3:
-    resolution: {integrity: sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==}
+  typescript@6.0.3:
+    resolution: {integrity: sha512-y2TvuxSZPDyQakkFRPZHKFm+KKVqIisdg9/CZwm9ftvKXLP8NRWj38/ODjNbr43SsoXqNuAisEf1GdCxqWcdBw==}
     engines: {node: '>=14.17'}
     hasBin: true
 
@@ -558,16 +565,20 @@ snapshots:
       '@noble/hashes': 2.2.0
       '@scure/base': 2.2.0
 
-  '@tangle-network/agent-eval@0.89.0(@tangle-network/sandbox@0.4.3(viem@2.52.0(typescript@5.9.3)(zod@4.4.3)))(typescript@5.9.3)':
+  '@tangle-network/agent-core@0.3.4':
+    dependencies:
+      '@tangle-network/agent-interface': 0.14.0
+      zod: 4.4.3
+
+  '@tangle-network/agent-eval@0.100.0(typescript@6.0.3)':
     dependencies:
       '@asteasolutions/zod-to-openapi': 8.5.0(zod@4.4.3)
       '@ax-llm/ax': 19.0.45(zod@4.4.3)
       '@hono/node-server': 2.0.4(hono@4.12.23)
-      '@tangle-network/tcloud': 0.4.12(typescript@5.9.3)(zod@4.4.3)
+      '@tangle-network/agent-interface': 0.10.1
+      '@tangle-network/tcloud': 0.4.12(typescript@6.0.3)(zod@4.4.3)
       hono: 4.12.23
       zod: 4.4.3
-    optionalDependencies:
-      '@tangle-network/sandbox': 0.4.3(viem@2.52.0(typescript@5.9.3)(zod@4.4.3))
     transitivePeerDependencies:
       - '@mastra/core'
       - '@modelcontextprotocol/sdk'
@@ -579,32 +590,48 @@ snapshots:
 
   '@tangle-network/agent-integrations@0.29.0': {}
 
-  '@tangle-network/agent-runtime@file:..(@tangle-network/agent-eval@0.89.0(@tangle-network/sandbox@0.4.3(viem@2.52.0(typescript@5.9.3)(zod@4.4.3)))(typescript@5.9.3))(@tangle-network/sandbox@0.4.3(viem@2.52.0(typescript@5.9.3)(zod@4.4.3)))':
+  '@tangle-network/agent-interface@0.10.1':
+    dependencies:
+      zod: 4.4.3
+
+  '@tangle-network/agent-interface@0.13.0':
     dependencies:
-      '@tangle-network/agent-eval': 0.89.0(@tangle-network/sandbox@0.4.3(viem@2.52.0(typescript@5.9.3)(zod@4.4.3)))(typescript@5.9.3)
+      zod: 4.4.3
+
+  '@tangle-network/agent-interface@0.14.0':
+    dependencies:
+      zod: 4.4.3
+
+  '@tangle-network/agent-runtime@0.79.3(@tangle-network/agent-eval@0.100.0(typescript@6.0.3))(@tangle-network/agent-interface@0.14.0)(@tangle-network/sandbox@0.9.5(viem@2.52.0(typescript@6.0.3)(zod@4.4.3)))':
+    dependencies:
+      '@tangle-network/agent-eval': 0.100.0(typescript@6.0.3)
     optionalDependencies:
-      '@tangle-network/sandbox': 0.4.3(viem@2.52.0(typescript@5.9.3)(zod@4.4.3))
+      '@tangle-network/agent-interface': 0.14.0
+      '@tangle-network/sandbox': 0.9.5(viem@2.52.0(typescript@6.0.3)(zod@4.4.3))
 
-  '@tangle-network/sandbox@0.3.0(viem@2.52.0(typescript@5.9.3)(zod@4.4.3))':
+  '@tangle-network/sandbox@0.3.0(viem@2.52.0(typescript@6.0.3)(zod@4.4.3))':
     dependencies:
       '@tangle-network/agent-integrations': 0.29.0
     optionalDependencies:
-      viem: 2.52.0(typescript@5.9.3)(zod@4.4.3)
+      viem: 2.52.0(typescript@6.0.3)(zod@4.4.3)
 
-  '@tangle-network/sandbox@0.4.3(viem@2.52.0(typescript@5.9.3)(zod@4.4.3))':
+  '@tangle-network/sandbox@0.9.5(viem@2.52.0(typescript@6.0.3)(zod@4.4.3))':
+    dependencies:
+      '@tangle-network/agent-core': 0.3.4
+      '@tangle-network/agent-interface': 0.13.0
     optionalDependencies:
-      viem: 2.52.0(typescript@5.9.3)(zod@4.4.3)
+      viem: 2.52.0(typescript@6.0.3)(zod@4.4.3)
 
   '@tangle-network/tcloud-attestation@0.1.1': {}
 
-  '@tangle-network/tcloud@0.4.12(typescript@5.9.3)(zod@4.4.3)':
+  '@tangle-network/tcloud@0.4.12(typescript@6.0.3)(zod@4.4.3)':
     dependencies:
       '@scure/bip32': 2.2.0
       '@scure/bip39': 2.2.0
-      '@tangle-network/sandbox': 0.3.0(viem@2.52.0(typescript@5.9.3)(zod@4.4.3))
+      '@tangle-network/sandbox': 0.3.0(viem@2.52.0(typescript@6.0.3)(zod@4.4.3))
       '@tangle-network/tcloud-attestation': 0.1.1
       commander: 14.0.3
-      viem: 2.52.0(typescript@5.9.3)(zod@4.4.3)
+      viem: 2.52.0(typescript@6.0.3)(zod@4.4.3)
     transitivePeerDependencies:
       - '@mastra/core'
       - '@modelcontextprotocol/sdk'
@@ -615,9 +642,9 @@ snapshots:
       - utf-8-validate
       - zod
 
-  abitype@1.2.3(typescript@5.9.3)(zod@4.4.3):
+  abitype@1.2.3(typescript@6.0.3)(zod@4.4.3):
     optionalDependencies:
-      typescript: 5.9.3
+      typescript: 6.0.3
       zod: 4.4.3
 
   commander@14.0.3: {}
@@ -668,7 +695,7 @@ snapshots:
     dependencies:
       yaml: 2.9.0
 
-  ox@0.14.27(typescript@5.9.3)(zod@4.4.3):
+  ox@0.14.27(typescript@6.0.3)(zod@4.4.3):
     dependencies:
       '@adraffy/ens-normalize': 1.11.1
       '@noble/ciphers': 1.3.0
@@ -676,10 +703,10 @@ snapshots:
       '@noble/hashes': 1.8.0
       '@scure/bip32': 1.7.0
       '@scure/bip39': 1.6.0
-      abitype: 1.2.3(typescript@5.9.3)(zod@4.4.3)
+      abitype: 1.2.3(typescript@6.0.3)(zod@4.4.3)
       eventemitter3: 5.0.1
     optionalDependencies:
-      typescript: 5.9.3
+      typescript: 6.0.3
     transitivePeerDependencies:
       - zod
 
@@ -689,20 +716,20 @@ snapshots:
     optionalDependencies:
       fsevents: 2.3.3
 
-  typescript@5.9.3: {}
+  typescript@6.0.3: {}
 
-  viem@2.52.0(typescript@5.9.3)(zod@4.4.3):
+  viem@2.52.0(typescript@6.0.3)(zod@4.4.3):
     dependencies:
       '@noble/curves': 1.9.1
       '@noble/hashes': 1.8.0
       '@scure/bip32': 1.7.0
       '@scure/bip39': 1.6.0
-      abitype: 1.2.3(typescript@5.9.3)(zod@4.4.3)
+      abitype: 1.2.3(typescript@6.0.3)(zod@4.4.3)
       isows: 1.0.7(ws@8.20.1)
-      ox: 0.14.27(typescript@5.9.3)(zod@4.4.3)
+      ox: 0.14.27(typescript@6.0.3)(zod@4.4.3)
       ws: 8.20.1
     optionalDependencies:
-      typescript: 5.9.3
+      typescript: 6.0.3
     transitivePeerDependencies:
       - bufferutil
       - utf-8-validate
diff --git a/bench/scripts/dabstep_judge.py b/bench/scripts/dabstep_judge.py
new file mode 100644
index 00000000..1d45acf1
--- /dev/null
+++ b/bench/scripts/dabstep_judge.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+"""DABStep judge bridge.
+
+Reads {"prediction": str, "golds": list} from stdin and delegates scoring to
+the official DABStep grade.py module. This script owns no grading semantics.
+"""
+
+import argparse
+import importlib.util
+import json
+import sys
+from pathlib import Path
+
+
+def load_grade(grade_file: Path):
+    spec = importlib.util.spec_from_file_location("dabstep_grade", grade_file)
+    if spec is None or spec.loader is None:
+        raise RuntimeError(f"could not import DABStep grade file: {grade_file}")
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module.grade
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Score one DABStep answer")
+    parser.add_argument("--grade-file", required=True)
+    args = parser.parse_args()
+
+    try:
+        payload = json.loads(sys.stdin.read())
+        prediction = payload["prediction"]
+        golds = payload["golds"]
+        correct = bool(load_grade(Path(args.grade_file))(prediction, golds))
+        print(json.dumps({"correct": correct, "score": 1.0 if correct else 0.0}))
+        return 0
+    except Exception as exc:
+        print(json.dumps({"error": str(exc)}))
+        return 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/bench/src/adapters.ts b/bench/src/adapters.ts
index 715f16ae..0f549d7a 100644
--- a/bench/src/adapters.ts
+++ b/bench/src/adapters.ts
@@ -11,6 +11,7 @@ import { createCadBenchAdapter } from './benchmarks/cadbench'
 import { createCadDesignAdapter } from './benchmarks/cad-design'
 import { createCadGenBenchAdapter } from './benchmarks/cadgenbench'
 import { createCommit0Adapter } from './benchmarks/commit0'
+import { createDabstepAdapter } from './benchmarks/dabstep'
 import { createEnterpriseOpsGymAdapter } from './benchmarks/enterpriseops-gym'
 import { createFinsearchcompAdapter } from './benchmarks/finsearchcomp'
 import { createFramesAdapter } from './benchmarks/frames'
@@ -32,6 +33,7 @@ export const ADAPTERS: Record<string, () => BenchmarkAdapter> = {
   // delegates to the benchmark's own harness and fails loud when it/Docker is absent.
   'aec-bench': createAecBenchAdapter,
   commit0: createCommit0Adapter,
+  dabstep: createDabstepAdapter,
   programbench: createProgrambenchAdapter,
   appworld: createAppWorldAdapter,
   // AppWorld's native interactive protocol — the worker is the in-engine ReAct
diff --git a/bench/src/benchmarks/dabstep.test.mts b/bench/src/benchmarks/dabstep.test.mts
new file mode 100644
index 00000000..09792e7d
--- /dev/null
+++ b/bench/src/benchmarks/dabstep.test.mts
@@ -0,0 +1,70 @@
+/**
+ * Offline DABStep adapter test. Official live tasks need a DABStep checkout with
+ * the released dataset.csv. Fixture mode only exercises adapter plumbing; it
+ * never scores benchmark rows without the official grade.py.
+ */
+import assert from 'node:assert/strict'
+import { mkdtemp, rm } from 'node:fs/promises'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+import { test } from 'node:test'
+import { createDabstepAdapter, dabstepAnswerOutput } from './dabstep'
+
+process.env.DABSTEP_FIXTURES = '1'
+
+type Events = Parameters<typeof dabstepAnswerOutput.parse>[0]
+const stream = (text: string): Events => [{ data: { finalText: text } }] as unknown as Events
+
+test('loadTasks fixtures expose DABStep prompt and resource metadata shape', async () => {
+  const adapter = createDabstepAdapter()
+  const tasks = await adapter.loadTasks({ ids: ['1'] })
+  assert.equal(tasks.length, 1)
+  assert.equal(tasks[0].id, '1')
+  assert.match(tasks[0].prompt, /DABStep data-analysis task/)
+  const meta = tasks[0].metadata as Record<string, unknown>
+  assert.equal(meta.taskId, 1)
+  assert.equal(Array.isArray(meta.golds), true)
+})
+
+test('answer OutputAdapter extracts final fenced answer when present', () => {
+  assert.equal(dabstepAnswerOutput.parse(stream('work\n```answer\n42\n```')), '42')
+  assert.equal(dabstepAnswerOutput.parse(stream('Final Answer: 42')), 'Final Answer: 42')
+})
+
+test('goldArtifact exposes the fixture oracle without scoring it as a benchmark result', async () => {
+  const adapter = createDabstepAdapter()
+  const [task] = await adapter.loadTasks({ ids: ['1'] })
+  const gold = await adapter.goldArtifact(task)
+  assert.equal(gold, '42')
+})
+
+test('judge fails loud without an official DABSTEP_DIR/grade.py', async () => {
+  const adapter = createDabstepAdapter()
+  const [task] = await adapter.loadTasks({ ids: ['1'] })
+  delete process.env.DABSTEP_DIR
+  await assert.rejects(adapter.judge(task, '42'), /DABSTEP_DIR is required/)
+})
+
+test('preflight is fixture-safe and live mode fails loud without DABSTEP_DIR', async () => {
+  const fixtureAdapter = createDabstepAdapter()
+  await fixtureAdapter.preflight()
+  delete process.env.DABSTEP_FIXTURES
+  delete process.env.DABSTEP_DIR
+  const liveAdapter = createDabstepAdapter()
+  await assert.rejects(liveAdapter.preflight(), /DABSTEP_DIR is required/)
+  process.env.DABSTEP_FIXTURES = '1'
+})
+
+test('live preflight fails loud when the checkout is missing released dataset.csv', async () => {
+  delete process.env.DABSTEP_FIXTURES
+  const dir = await mkdtemp(join(tmpdir(), 'dabstep-missing-dataset-'))
+  process.env.DABSTEP_DIR = dir
+  try {
+    const adapter = createDabstepAdapter()
+    await assert.rejects(adapter.preflight(), /released dataset\.csv/)
+  } finally {
+    process.env.DABSTEP_FIXTURES = '1'
+    delete process.env.DABSTEP_DIR
+    await rm(dir, { recursive: true, force: true })
+  }
+})
diff --git a/bench/src/benchmarks/dabstep.ts b/bench/src/benchmarks/dabstep.ts
new file mode 100644
index 00000000..0fb17932
--- /dev/null
+++ b/bench/src/benchmarks/dabstep.ts
@@ -0,0 +1,212 @@
+/**
+ * DABStep adapter (EnvCommons/DABStep) — data-analysis questions over synthetic
+ * payment files. Worker artifact = final answer text. Judge = the official
+ * DABStep `grade.py` normalization/matching function. No LLM judge.
+ *
+ * Live tasks require `DABSTEP_DIR` pointing at an official DABStep checkout that
+ * includes `dataset.csv`, `splits/*.txt`, `files/*`, and `grade.py`. The adapter
+ * exposes `metadata.resourceRoot` so runners can mount the benchmark files into
+ * AgentProfile.resources.files; it does not paste the dataset into prompt text.
+ */
+
+import { join } from 'node:path'
+import { access, readFile, stat } from 'node:fs/promises'
+import type { OutputAdapter } from '@tangle-network/agent-runtime/loops'
+import { benchRoot, runVenvPython, runVenvScriptStdin } from './_harness'
+import type { BenchmarkAdapter, BenchScore, BenchTask, LoadOptions } from './types'
+
+const FIXTURES = join(benchRoot, 'fixtures', 'dabstep.json')
+const DEFAULT_SPLIT = 'easy'
+
+interface DabstepFixtureRow {
+  task_id: number
+  instructions: string
+  all_golds_by_task: Array<Record<string, unknown>>
+}
+
+interface DabstepMeta {
+  taskId: number
+  split: string
+  golds: Array<Record<string, unknown>>
+  resourceRoot?: string
+}
+
+const dabstepDir = (): string | undefined => process.env.DABSTEP_DIR
+const gradeFile = (dir: string): string => join(dir, 'grade.py')
+const resourceRoot = (dir: string): string => join(dir, 'files')
+
+async function assertFile(path: string, label: string): Promise<void> {
+  try {
+    await access(path)
+  } catch (err) {
+    throw new Error(`DABStep: missing ${label} at ${path} (${err instanceof Error ? err.message : err})`)
+  }
+}
+
+async function assertOfficialFiles(dir: string, split: string): Promise<void> {
+  await assertFile(join(dir, 'dataset.csv'), 'released dataset.csv')
+  await assertFile(join(dir, 'splits', `${split}.txt`), `${split} split file`)
+  await assertFile(gradeFile(dir), 'official grade.py')
+  const files = resourceRoot(dir)
+  try {
+    const s = await stat(files)
+    if (!s.isDirectory()) throw new Error('not a directory')
+  } catch (err) {
+    throw new Error(`DABStep: missing benchmark files directory at ${files} (${err instanceof Error ? err.message : err})`)
+  }
+}
+
+export const dabstepAnswerOutput: OutputAdapter<string> = {
+  parse(events) {
+    let text = ''
+    for (const ev of events) {
+      const d = (ev as { data?: Record<string, unknown> })?.data
+      const t = d?.finalText ?? d?.text ?? d?.result
+      if (typeof t === 'string' && t.length > 0) text = t
+    }
+    const fences = [...text.matchAll(/```(?:text|answer)?\s*\n([\s\S]*?)```/g)]
+    return (fences.at(-1)?.[1] ?? text).trim()
+  },
+}
+
+function rowToTask(row: DabstepFixtureRow, split: string, dir?: string): BenchTask {
+  const meta: DabstepMeta = {
+    taskId: row.task_id,
+    split,
+    golds: row.all_golds_by_task,
+    ...(dir ? { resourceRoot: resourceRoot(dir) } : {}),
+  }
+  return {
+    id: String(row.task_id),
+    split,
+    prompt: [
+      'Solve this DABStep data-analysis task using the mounted payment files.',
+      'Use code or shell commands as needed, then return only the final answer.',
+      '',
+      row.instructions,
+    ].join('\n'),
+    metadata: meta as unknown as Record<string, unknown>,
+  }
+}
+
+function readMeta(task: BenchTask): DabstepMeta {
+  const md = task.metadata
+  if (!md || typeof md.taskId !== 'number' || !Array.isArray(md.golds)) {
+    throw new Error(`dabstep task ${task.id} missing metadata — loadTasks did not populate it`)
+  }
+  return md as unknown as DabstepMeta
+}
+
+function selectRows(rows: DabstepFixtureRow[], opts: LoadOptions, split: string, dir?: string): BenchTask[] {
+  let tasks = rows.map((row) => rowToTask(row, split, dir))
+  if (opts.ids) {
+    const want = new Set(opts.ids)
+    tasks = tasks.filter((task) => want.has(task.id))
+  } else if (opts.limit !== undefined) {
+    tasks = tasks.slice(0, opts.limit)
+  }
+  if (tasks.length === 0) throw new Error(`DABStep: no tasks matched ${JSON.stringify(opts)} for split=${split}`)
+  return tasks
+}
+
+async function loadFixtures(opts: LoadOptions, split: string): Promise<BenchTask[]> {
+  const rows = JSON.parse(await readFile(FIXTURES, 'utf8')) as DabstepFixtureRow[]
+  console.warn(`[dabstep] DABSTEP_FIXTURES=1 — loading ${rows.length} adapter fixtures from ${FIXTURES}`)
+  return selectRows(rows, opts, split)
+}
+
+async function loadOfficialTasks(dir: string, opts: LoadOptions, split: string): Promise<BenchTask[]> {
+  const script = `
+import ast, csv, json, sys
+from pathlib import Path
+
+root = Path(sys.argv[1])
+split = sys.argv[2]
+limit = None if sys.argv[3] == "" else int(sys.argv[3])
+ids = set(json.loads(sys.argv[4]))
+dataset = root / "dataset.csv"
+split_file = root / "splits" / f"{split}.txt"
+if not dataset.exists():
+    raise SystemExit(f"missing official DABStep dataset.csv at {dataset}")
+if not split_file.exists():
+    raise SystemExit(f"missing official DABStep split file at {split_file}")
+split_ids = {int(line.strip()) for line in split_file.read_text().splitlines() if line.strip()}
+out = []
+with dataset.open(newline="") as f:
+    for row in csv.DictReader(f):
+        task_id = int(row["task_id"])
+        if task_id not in split_ids:
+            continue
+        if ids and str(task_id) not in ids:
+            continue
+        out.append({
+            "task_id": task_id,
+            "instructions": f"{row['question']}\\n{row['guidelines']}",
+            "all_golds_by_task": ast.literal_eval(str(row["all_golds_by_task"])),
+        })
+        if limit is not None and len(out) >= limit:
+            break
+if not out:
+    raise SystemExit(f"no DABStep rows matched split={split} ids={sorted(ids)} limit={limit}")
+print(json.dumps(out))
+`
+  const stdout = await runVenvPython(script, [dir, split, opts.limit === undefined ? '' : String(opts.limit), JSON.stringify(opts.ids ?? [])])
+  return selectRows(JSON.parse(stdout) as DabstepFixtureRow[], opts, split, dir)
+}
+
+export function createDabstepAdapter(): BenchmarkAdapter {
+  const fixturesMode = process.env.DABSTEP_FIXTURES === '1'
+
+  return {
+    name: 'dabstep',
+    output: dabstepAnswerOutput,
+
+    async preflight() {
+      if (fixturesMode) return
+      const dir = dabstepDir()
+      if (!dir) {
+        throw new Error(
+          'DABSTEP_DIR is required. Fix: clone https://github.com/EnvCommons/DABStep, add the released dataset.csv under that checkout, then set DABSTEP_DIR=/path/to/DABStep.',
+        )
+      }
+      await assertOfficialFiles(dir, DEFAULT_SPLIT)
+      await loadOfficialTasks(dir, { limit: 1 }, DEFAULT_SPLIT)
+    },
+
+    async loadTasks(opts: LoadOptions = {}) {
+      const split = opts.split ?? DEFAULT_SPLIT
+      if (fixturesMode) return loadFixtures(opts, split)
+      const dir = dabstepDir()
+      if (!dir) throw new Error('DABSTEP_DIR is required to load official DABStep tasks')
+      return loadOfficialTasks(dir, opts, split)
+    },
+
+    async goldArtifact(task: BenchTask) {
+      const meta = readMeta(task)
+      const first = meta.golds[0]
+      if (!first) return undefined
+      const value = first.value
+      return value === undefined ? undefined : String(value)
+    },
+
+    async judge(task: BenchTask, artifact: string): Promise<BenchScore> {
+      const meta = readMeta(task)
+      const dir = dabstepDir()
+      if (!dir) throw new Error('DABSTEP_DIR is required to judge DABStep tasks with the official grade.py')
+      const stdout = await runVenvScriptStdin(
+        join(benchRoot, 'scripts', 'dabstep_judge.py'),
+        ['--grade-file', gradeFile(dir)],
+        JSON.stringify({ prediction: artifact, golds: meta.golds }),
+        { cwd: benchRoot },
+      )
+      const report = JSON.parse(stdout.trim().split('\n').at(-1) ?? '{}') as { correct?: boolean; score?: number; error?: string }
+      if (report.error) throw new Error(`DABStep judge error for ${task.id}: ${report.error}`)
+      const score = typeof report.score === 'number' ? report.score : 0
+      return {
+        resolved: report.correct === true,
+        score,
+        detail: JSON.stringify({ taskId: meta.taskId, split: meta.split, correct: report.correct }),
+      }
+    },
+  }
+}
diff --git a/bench/src/decoder-live.mts b/bench/src/decoder-live.mts
index dc15bae7..8525694a 100644
--- a/bench/src/decoder-live.mts
+++ b/bench/src/decoder-live.mts
@@ -47,7 +47,7 @@ async function main(): Promise<number> {
       },
       profile: { name: 'decoder-live' },
     },
-  } as never)) as Record<string, (...a: never[]) => unknown> & { id?: string }
+  } as never)) as unknown as Record<string, (...a: never[]) => unknown> & { id?: string }
 
   try {
     console.error('[live] box', box.id, '— waiting for running…')
diff --git a/bench/src/trata-gepa.mts b/bench/src/trata-gepa.mts
index 7c2993c1..d760408d 100644
--- a/bench/src/trata-gepa.mts
+++ b/bench/src/trata-gepa.mts
@@ -367,7 +367,7 @@ async function main(): Promise<void> {
       apiKey: routerKey,
       model: reflectModel,
     },
-    driverTarget:
+    proposerTarget:
       'a FINANCIAL ANALYST SYSTEM INSTRUCTION: the directive given to an agent that produces an investment memo from embedded earnings call transcripts, SEC filings, financial statements, and investor presentations. ' +
       'The memo is scored by a rubric with 4-6 analytical themes, each requiring 2-4 specific analytical moves (quantitative claims, strategic conclusions, peer comparisons, or explicit calculations). ' +
       'A theme is "hit" only when the agent makes the SPECIFIC move — not just gestures at the theme. ' +
diff --git a/bench/tsconfig.json b/bench/tsconfig.json
index eff42166..7501a77c 100644
--- a/bench/tsconfig.json
+++ b/bench/tsconfig.json
@@ -10,7 +10,11 @@
     "skipLibCheck": true,
     "types": ["node"],
     "esModuleInterop": true,
-    "resolveJsonModule": true
+    "resolveJsonModule": true,
+    "paths": {
+      "@tangle-network/agent-runtime/loops": ["../src/runtime/index.ts"],
+      "@tangle-network/sandbox": ["../node_modules/@tangle-network/sandbox"]
+    }
   },
   "include": ["src/**/*.ts", "src/**/*.mts"],
   "exclude": ["src/authored"]
diff --git a/docs/api/primitive-catalog.md b/docs/api/primitive-catalog.md
index 84e59f22..6ff0e4bc 100644
--- a/docs/api/primitive-catalog.md
+++ b/docs/api/primitive-catalog.md
@@ -337,7 +337,7 @@ Import from `@tangle-network/agent-runtime/intelligence` — 60 exports.
 
 ### Recursive atom + loop kernel (alias of ./runtime)
 
-Import from `@tangle-network/agent-runtime/loops` — 383 exports.
+Import from `@tangle-network/agent-runtime/loops` — 386 exports.
 
 | Symbol | Kind | Summary |
 |---|---|---|
@@ -547,6 +547,7 @@ Import from `@tangle-network/agent-runtime/loops` — 383 exports.
 | `ExecResult` | interface | _(no summary — add a TSDoc line at the declaration)_ |
 | `Executor` | interface | The leaf runtime — ONE open interface, not a closed union. `execute` returns a |
 | `ExecutorContext` | interface | Construction context handed to a `ExecutorFactory` — the seams a built-in needs |
+| `ExecutorRegistry` | interface | The OPEN resolver: maps an `AgentSpec` to a `ExecutorFactory`. The default |
 | `ExecutorResult` | interface | Terminal artifact of a one-shot `Executor.execute`. |
 | `FanoutOptions` | interface | `fanout(items, { synthesize? })` — N children spawned in one round (one per item, bounded by |
 | `FanoutSynthesis` | interface | How a fanout's synthesis child is built + read. `synthesisTask` projects the drained child |
@@ -695,6 +696,7 @@ Import from `@tangle-network/agent-runtime/loops` — 383 exports.
 | `Environment` | type | A checkable task domain — implement these 5 hooks and the suite does the rest. The |
 | `EqualKOnCost` | type | `equalKOnCost(arms, opts)` — the cross-arm equal-compute check on conserved cost. |
 | `ExecutorConfig` | type | Config for {@link createExecutor}: the backend is DATA — the cost dial a profile, |
+| `ExecutorFactory` | type | Builds a fresh `Executor` for one spawn from the resolved spec. Per-spawn (not |
 | `Fanout` | type | `fanout(items, opts)` — build the fanout combinator over a static item list. |
 | `FanoutWinnerSelector` | type | A winner-selection strategy: argmax/sort over the gathered child iterations (each output is the |
 | `FlatWidenGate` | type | The flat default `ScopeWidenGate` factory contract — never widens, keeping the R2 firewall |
@@ -710,6 +712,7 @@ Import from `@tangle-network/agent-runtime/loops` — 383 exports.
 | `Pipeline` | type | `pipeline(stages)` — build the sequential combinator from an ordered stage list. The first |
 | `RenderCorpusToInstructions` | type | `renderCorpusToInstructions(opts)` — the flywheel read-back projection. Async (queries the |
 | `RunPersonified` | type | The composed run signature. |
+| `Runtime` | type | The runtime tag of a `Executor` impl. Open by intent: custom runtimes use their own string name. |
 | `Settled` | type | A settled child, delivered by `scope.next()`. `seq` is the monotonic cursor order |
 | `Shell` | type | Command runner seam. Host code can use `localShell`; sandbox code can wrap `box.exec`. |
 | `SteeringDecision` | type | Terminal-or-continue decision shared by all three steering drivers. The |
diff --git a/docs/api/runtime.md b/docs/api/runtime.md
index d3114ed4..930a2af8 100644
--- a/docs/api/runtime.md
+++ b/docs/api/runtime.md
@@ -1748,7 +1748,7 @@ thread the seams onto each spawn. Exactly one is required — fail loud if neith
 
 ##### registry?
 
-> `readonly` `optional` **registry?**: `ExecutorRegistry`
+> `readonly` `optional` **registry?**: [`ExecutorRegistry`](#executorregistry)
 
 Defined in: [runtime/personify/types.ts:120](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/personify/types.ts#L120)
 
@@ -8132,7 +8132,7 @@ Defined in: [runtime/supervise/run-context.ts:48](https://github.com/tangle-netw
 
 ##### executors
 
-> `readonly` **executors**: `ExecutorRegistry`
+> `readonly` **executors**: [`ExecutorRegistry`](#executorregistry)
 
 Defined in: [runtime/supervise/run-context.ts:49](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/run-context.ts#L49)
 
@@ -8166,7 +8166,7 @@ Defined in: [runtime/environment-provider.ts:269](https://github.com/tangle-netw
 
 ##### runtime?
 
-> `optional` **runtime?**: `Runtime`
+> `optional` **runtime?**: [`Runtime`](#runtime-3)
 
 Defined in: [runtime/environment-provider.ts:270](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/environment-provider.ts#L270)
 
@@ -8786,7 +8786,7 @@ own agent (mastra/agno/raw HTTP/anything) is first-class by implementing this in
 
 ##### runtime
 
-> `readonly` **runtime**: `Runtime`
+> `readonly` **runtime**: [`Runtime`](#runtime-3)
 
 Defined in: [runtime/supervise/types.ts:72](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/types.ts#L72)
 
@@ -9023,6 +9023,74 @@ Opaque seams the registry threads through; a built-in narrows what it needs.
 
 ***
 
+### ExecutorRegistry
+
+Defined in: [runtime/supervise/types.ts:182](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/types.ts#L182)
+
+The OPEN resolver: maps an `AgentSpec` to a `ExecutorFactory`. The default
+registry resolves the three built-ins AND accepts a BYO `executor`/factory; callers
+register more runtimes by name. NOT a closed switch — registration is the extension
+point, mirroring the open `Executor` interface.
+
+#### Methods
+
+##### register()
+
+> **register**\<`Out`\>(`runtime`, `factory`): `void`
+
+Defined in: [runtime/supervise/types.ts:184](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/types.ts#L184)
+
+Register a factory for a named runtime. Throws on a duplicate name (fail loud).
+
+###### Type Parameters
+
+###### Out
+
+`Out`
+
+###### Parameters
+
+###### runtime
+
+[`Runtime`](#runtime-3)
+
+###### factory
+
+[`ExecutorFactory`](#executorfactory)\<`Out`\>
+
+###### Returns
+
+`void`
+
+##### resolve()
+
+> **resolve**\<`Out`\>(`spec`): \{ `succeeded`: `true`; `value`: [`ExecutorFactory`](#executorfactory)\<`Out`\>; \} \| \{ `succeeded`: `false`; `error`: `string`; \}
+
+Defined in: [runtime/supervise/types.ts:191](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/types.ts#L191)
+
+Resolve a spec to a factory. Precedence: a BYO `spec.executor` → a trivial factory
+returning it; else `harness === null` → the `'router'` factory; else a registered
+factory for the harness-derived runtime. Returns a typed outcome — the caller
+inspects `succeeded` before `value` (no silent fallback).
+
+###### Type Parameters
+
+###### Out
+
+`Out`
+
+###### Parameters
+
+###### spec
+
+[`AgentSpec`](#agentspec)
+
+###### Returns
+
+\{ `succeeded`: `true`; `value`: [`ExecutorFactory`](#executorfactory)\<`Out`\>; \} \| \{ `succeeded`: `false`; `error`: `string`; \}
+
+***
+
 ### Budget
 
 Defined in: [runtime/supervise/types.ts:199](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/types.ts#L199)
@@ -9424,7 +9492,7 @@ Result payload store backing `outRef` rehydration.
 
 ##### executors
 
-> `readonly` **executors**: `ExecutorRegistry`
+> `readonly` **executors**: [`ExecutorRegistry`](#executorregistry)
 
 Defined in: [runtime/supervise/types.ts:442](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/types.ts#L442)
 
@@ -12967,6 +13035,49 @@ conserved pool meters all runtimes identically. `tokens` carries `LoopTokenUsage
 
 ***
 
+### Runtime
+
+> **Runtime** = `"router"` \| `"inline"` \| `"sandbox"` \| `"cli"` \| `string` & `object`
+
+Defined in: [runtime/supervise/types.ts:137](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/types.ts#L137)
+
+The runtime tag of a `Executor` impl. Open by intent: custom runtimes use their own string name.
+External executors can register additional runtime strings without widening this type.
+
+***
+
+### ExecutorFactory
+
+> **ExecutorFactory**\<`Out`\> = (`spec`, `ctx`) => [`Executor`](#executor)\<`Out`\>
+
+Defined in: [runtime/supervise/types.ts:165](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/types.ts#L165)
+
+Builds a fresh `Executor` for one spawn from the resolved spec. Per-spawn (not
+shared) so each child owns its own box/abort/teardown lifecycle. A BYO factory lets a
+user supply construction args without pre-instantiating.
+
+#### Type Parameters
+
+##### Out
+
+`Out`
+
+#### Parameters
+
+##### spec
+
+[`AgentSpec`](#agentspec)
+
+##### ctx
+
+[`ExecutorContext`](#executorcontext)
+
+#### Returns
+
+[`Executor`](#executor)\<`Out`\>
+
+***
+
 ### Settled
 
 > **Settled**\<`Out`\> = \{ `kind`: `"done"`; `handle`: `Handle`\<`Out`\>; `out`: `Out`; `outRef`: `string`; `verdict?`: `DefaultVerdict`; `spent`: [`Spend`](#spend); `seq`: `number`; \} \| \{ `kind`: `"down"`; `handle`: `Handle`\<`Out`\>; `reason`: `string`; `infra`: `boolean`; `restartCount`: `number`; `seq`: `number`; \}
@@ -13345,7 +13456,7 @@ The conserved pool a `delegate()` call applies when the caller does not pass its
 
 ### cliWorktreeExecutor
 
-> `const` **cliWorktreeExecutor**: `ExecutorFactory`\<`unknown`\>
+> `const` **cliWorktreeExecutor**: [`ExecutorFactory`](#executorfactory)\<`unknown`\>
 
 Defined in: [runtime/supervise/runtime.ts:1360](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/runtime.ts#L1360)
 
@@ -13622,7 +13733,7 @@ run once on the prompt, emit the terminal result event, tear down.
 
 ##### factory
 
-`ExecutorFactory`\<`unknown`\>
+[`ExecutorFactory`](#executorfactory)\<`unknown`\>
 
 #### Returns
 
@@ -15819,7 +15930,7 @@ state between runs), so two runs never cross-contaminate their journals/blobs.
 
 ### createExecutor()
 
-> **createExecutor**(`config`): `ExecutorFactory`\<`unknown`\>
+> **createExecutor**(`config`): [`ExecutorFactory`](#executorfactory)\<`unknown`\>
 
 Defined in: [runtime/supervise/runtime.ts:1413](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/runtime.ts#L1413)
 
@@ -15838,13 +15949,13 @@ per-vendor adapter or a closed `inline|sandbox|cli` switch — those bypass the
 
 #### Returns
 
-`ExecutorFactory`\<`unknown`\>
+[`ExecutorFactory`](#executorfactory)\<`unknown`\>
 
 ***
 
 ### createExecutorRegistry()
 
-> **createExecutorRegistry**(): `ExecutorRegistry`
+> **createExecutorRegistry**(): [`ExecutorRegistry`](#executorregistry)
 
 Defined in: [runtime/supervise/runtime.ts:1459](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/runtime.ts#L1459)
 
@@ -15860,7 +15971,7 @@ harness-derived runtime (`'sandbox'` for any `BackendType`); else fail loud.
 
 #### Returns
 
-`ExecutorRegistry`
+[`ExecutorRegistry`](#executorregistry)
 
 ***
 
diff --git a/docs/api/runtime/environment-provider.md b/docs/api/runtime/environment-provider.md
index 6a1b9c40..23b1761b 100644
--- a/docs/api/runtime/environment-provider.md
+++ b/docs/api/runtime/environment-provider.md
@@ -282,7 +282,7 @@ Defined in: [runtime/environment-provider.ts:269](https://github.com/tangle-netw
 
 ##### runtime?
 
-> `optional` **runtime?**: `Runtime`
+> `optional` **runtime?**: [`Runtime`](../runtime.md#runtime-3)
 
 Defined in: [runtime/environment-provider.ts:270](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/environment-provider.ts#L270)
 
@@ -442,7 +442,7 @@ Adapt a `SandboxClient` into the shared `AgentEnvironmentProvider` contract.
 
 ### providerAsExecutor()
 
-> **providerAsExecutor**(`provider`, `options?`): `ExecutorFactory`\<`unknown`\>
+> **providerAsExecutor**(`provider`, `options?`): [`ExecutorFactory`](../runtime.md#executorfactory)\<`unknown`\>
 
 Defined in: [runtime/environment-provider.ts:278](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/environment-provider.ts#L278)
 
@@ -462,4 +462,4 @@ Adapt an environment provider into an `ExecutorFactory` for `createExecutor`.
 
 #### Returns
 
-`ExecutorFactory`\<`unknown`\>
+[`ExecutorFactory`](../runtime.md#executorfactory)\<`unknown`\>
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index da847b6b..b9f061ff 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -19,7 +19,7 @@ importers:
         version: 0.14.0
       '@tangle-network/sandbox':
         specifier: '>=0.8.0 <1.0.0'
-        version: 0.8.2(viem@2.52.2(typescript@5.9.3)(zod@4.4.3))
+        version: 0.9.5(viem@2.52.2(typescript@5.9.3)(zod@4.4.3))
       '@types/node':
         specifier: ^25.9.3
         version: 25.9.3
@@ -633,6 +633,9 @@ packages:
   '@shikijs/vscode-textmate@10.0.2':
     resolution: {integrity: sha512-83yeghZ2xxin3Nj8z1NMd/NCuca+gsYXswywDy5bHvwlWL8tpTQmzGeUuHd9FC3E/SBEMvzJRwWEOz5gGes9Qg==}
 
+  '@tangle-network/agent-core@0.3.4':
+    resolution: {integrity: sha512-Hvz3ABRouNtBmRvGqPxifAO2yuILneJMylWH5jW/jeS2F03RvqkGYuXyGXWWLqosYbb3hVAvSEe4Ykm2FMGEDQ==}
+
   '@tangle-network/agent-eval@0.100.0':
     resolution: {integrity: sha512-yBupVJJAqHozhe1BL5xBuDObjvNsoY+XmJo7qfpw/w7rehAXbKliBb4k3XS1G55+GaYPjFA+xwPzlEDQISpMRw==}
     engines: {node: '>=20'}
@@ -646,12 +649,12 @@ packages:
   '@tangle-network/agent-interface@0.10.0':
     resolution: {integrity: sha512-oiREgihkeX/xcGEtFfi9AkAfU2VzuF7SSla2s0iliXPUXyHCIIx6jwzHiYdwb1ZGCfvC+T+0SWOIa6fN5u195g==}
 
+  '@tangle-network/agent-interface@0.13.0':
+    resolution: {integrity: sha512-CeTPGRLoXqpt0h+BCyFgZPkfU1zyRpWmqfD+85i/uk+uvbqxkfI+JprfKVf3tBsQuCgJPSjPt5qjdW8n3h2BVg==}
+
   '@tangle-network/agent-interface@0.14.0':
     resolution: {integrity: sha512-9CyGhIpl90E7v4MTm3b1ti3Bp7BfPigk2Nafgi21Lg0U+QxlNB656F2JmVpUuSbOo9aGZPtg5nXu5EBTlV5a1g==}
 
-  '@tangle-network/agent-interface@0.8.0':
-    resolution: {integrity: sha512-okz9LGKwPNKODNyT9Y7+T+sQsJ4g6oTy/hpWpxR6r2BI7pS6WqIdgCOQcx98+WtlPoibkY3ewRRAb8YJMrPHog==}
-
   '@tangle-network/sandbox@0.3.0':
     resolution: {integrity: sha512-KfgvKhsUaOpkJe3AD19w7s4hdQekBlXQGoNx0xS4u6vuQk5YnFzBgv+EQeHCkkgETpYOWS2AN+6u/JhSyWStMw==}
     peerDependencies:
@@ -672,8 +675,8 @@ packages:
       viem:
         optional: true
 
-  '@tangle-network/sandbox@0.8.2':
-    resolution: {integrity: sha512-MG3dj7SnF7vI8CagW1OwpkJSUq3IREpADBWp6knOukKxSYYCMGwJ0nPZz+O2eotI+Nl2A2LIGiHqPB82jgOvjw==}
+  '@tangle-network/sandbox@0.9.5':
+    resolution: {integrity: sha512-yvX2OX6uISBVnMQ+v6Upkesa3u8yj6BHxsfcS6p8Vze+M4WBpyhkwA+onzFHuo9rti557ItZn8yDu4a/klljvQ==}
     peerDependencies:
       '@mastra/core': ^1.36.0
       '@modelcontextprotocol/sdk': ^1.29.0
@@ -1621,6 +1624,11 @@ snapshots:
 
   '@shikijs/vscode-textmate@10.0.2': {}
 
+  '@tangle-network/agent-core@0.3.4':
+    dependencies:
+      '@tangle-network/agent-interface': 0.14.0
+      zod: 4.4.3
+
   '@tangle-network/agent-eval@0.100.0(typescript@5.9.3)':
     dependencies:
       '@asteasolutions/zod-to-openapi': 8.5.0(zod@4.4.3)
@@ -1645,11 +1653,11 @@ snapshots:
     dependencies:
       zod: 4.4.3
 
-  '@tangle-network/agent-interface@0.14.0':
+  '@tangle-network/agent-interface@0.13.0':
     dependencies:
       zod: 4.4.3
 
-  '@tangle-network/agent-interface@0.8.0':
+  '@tangle-network/agent-interface@0.14.0':
     dependencies:
       zod: 4.4.3
 
@@ -1659,9 +1667,10 @@ snapshots:
     optionalDependencies:
       viem: 2.52.2(typescript@5.9.3)(zod@4.4.3)
 
-  '@tangle-network/sandbox@0.8.2(viem@2.52.2(typescript@5.9.3)(zod@4.4.3))':
+  '@tangle-network/sandbox@0.9.5(viem@2.52.2(typescript@5.9.3)(zod@4.4.3))':
     dependencies:
-      '@tangle-network/agent-interface': 0.8.0
+      '@tangle-network/agent-core': 0.3.4
+      '@tangle-network/agent-interface': 0.13.0
     optionalDependencies:
       viem: 2.52.2(typescript@5.9.3)(zod@4.4.3)
 
diff --git a/src/runtime/environment-provider.ts b/src/runtime/environment-provider.ts
index 66bf6a5d..0832c0ee 100644
--- a/src/runtime/environment-provider.ts
+++ b/src/runtime/environment-provider.ts
@@ -506,6 +506,7 @@ function environmentAsSandboxInstance(
       return {
         response: resultFromEvents(events, text).content,
         success: true,
+        status: 'success',
         durationMs: 0,
         ...(usage ? { usage } : {}),
       }
@@ -908,6 +909,7 @@ function promptResultFromAgentTurnResult(result: AgentTurnResult): PromptResult
   return {
     response: result.text,
     success: result.success,
+    status: result.success ? 'success' : 'failed',
     durationMs: 0,
     ...(result.error ? { error: result.error } : {}),
     ...(result.usage ? { usage: result.usage } : {}),
diff --git a/src/runtime/index.ts b/src/runtime/index.ts
index 8a889a78..0e1b78b8 100644
--- a/src/runtime/index.ts
+++ b/src/runtime/index.ts
@@ -459,8 +459,11 @@ export type {
   Budget,
   Executor,
   ExecutorContext,
+  ExecutorFactory,
+  ExecutorRegistry,
   ExecutorResult,
   ResultBlobStore,
+  Runtime,
   Scope,
   Settled,
   Spend,
diff --git a/src/runtime/supervise/types.ts b/src/runtime/supervise/types.ts
index 8b45a98f..f2c7d4dd 100644
--- a/src/runtime/supervise/types.ts
+++ b/src/runtime/supervise/types.ts
@@ -132,8 +132,8 @@ export type UsageEvent =
   | { kind: 'cost'; usd: number }
   | { kind: 'iteration' }
 
-/** The runtime tag of a `Executor` impl. Open by intent — `string` so a BYO executor
- *  names its own runtime; the built-ins use these literals. */
+/** The runtime tag of a `Executor` impl. Open by intent: custom runtimes use their own string name.
+ * External executors can register additional runtime strings without widening this type. */
 export type Runtime = 'router' | 'inline' | 'sandbox' | 'cli' | (string & {})
 
 // ── Executor resolution (OPEN registry, not a switch) ─────────────────────────