diff --git a/.env.example b/.env.example
index 89978fc..1aa5cc9 100644
--- a/.env.example
+++ b/.env.example
@@ -32,3 +32,29 @@ PUBLIC_BASE_URL=http://127.0.0.1:8080/v1
 RATE_PER_MIN=600
 BURST=200
 LOG_LEVEL=INFO
+
+# ============================================================================
+# AntSeed marketplace (OPTIONAL — only with `docker compose --profile antseed`)
+# ============================================================================
+# AntSeed lets the router buy inference from a decentralized marketplace, paid in
+# REAL USDC on Base mainnet from a hot wallet you control. Off by default.
+#
+# ⚠️  ALWAYS use a DEDICATED DEV WALLET here with a tiny balance — NEVER your
+#     production wallet key. This var IS a private key: treat it like a password,
+#     never commit it (.env / .env.secrets are gitignored).
+#
+# Setup (3 steps):
+#   1. Generate a dev wallet:   ./scripts/gen-dev-wallet.sh   (prints the two
+#      lines below; paste them into .env)
+#   2. Bring it up:             docker compose --profile antseed up -d --build
+#   3. Get the address to fund: docker compose exec antseed antseed buyer balance --json
+#      then send a little USDC + ETH (gas) on **Base mainnet** to that address,
+#      and `deposit` it into escrow from the dashboard Catalog (wallet cell).
+ANTSEED_IDENTITY_HEX=
+# Shared secret enabling the dashboard's wallet self-service (deposit/withdraw).
+# Same value on the router and the antseed sidecar. Unset => those endpoints 503.
+ANTSEED_CONTROL_TOKEN=
+# Wide outer spend ceilings (USD per million tokens). The real per-call price gate
+# is the caller's Σ_pol policy; these are just rails.
+ANTSEED_MAX_INPUT=1000
+ANTSEED_MAX_OUTPUT=1000
diff --git a/behave.ini b/behave.ini
new file mode 100644
index 0000000..8c2595d
--- /dev/null
+++ b/behave.ini
@@ -0,0 +1,4 @@
+[behave]
+paths = features
+tags = -manual
+show_timings = true
diff --git a/docs/PROVIDERS.md b/docs/PROVIDERS.md
index 2087f76..cb8c962 100644
--- a/docs/PROVIDERS.md
+++ b/docs/PROVIDERS.md
@@ -46,6 +46,20 @@ The host pins the policy-selected peer per request via `x-antseed-pin-peer`
 (the browse-mode buyer disables auto-selection), keeping peer choice inside
 Σ_pol rather than an opaque buyer-side router.
 
+### Local dev wallet (testing)
+
+For local testing use a **dedicated dev wallet**, never your production key.
+`./scripts/gen-dev-wallet.sh` prints a fresh `ANTSEED_IDENTITY_HEX` +
+`ANTSEED_CONTROL_TOKEN` to paste into `.env`; bring the sidecar up
+(`docker compose --profile antseed up -d`), read the derived address with
+`docker compose exec antseed antseed buyer balance --json`, fund it with a little
+USDC + ETH (gas) on Base, then **Deposit** into escrow from the dashboard Catalog
+(wallet cell). Keep dev and prod wallet secrets separate. See `.env.example`.
+
+> Note: the AntSeed deposits contract **locks** deposited funds — an immediate
+> `withdraw` after a `deposit` reverts. Funds are safe in escrow and become
+> withdrawable later, or are spent as the buyer routes paid calls.
+
 ### Running the node (vendored sidecar)
 
 Built from `Dockerfile.antseed` (pinned `@antseed/cli`, `socat`) and run by
diff --git a/features/01_onboarding.feature b/features/01_onboarding.feature
new file mode 100644
index 0000000..8fffa94
--- /dev/null
+++ b/features/01_onboarding.feature
@@ -0,0 +1,34 @@
+Feature: Onboarding & setup — a new user gets a running, healthy stack
+  The clone/compose steps themselves are environment-level (@manual: cannot be
+  re-run inside the suite); here we assert their OUTCOME on the running stack.
+
+  @p0 @onboarding
+  Scenario: The core engine submodule is populated (recursive clone outcome)
+    Then the file "core/router.lua" exists
+    And the file "core/llm_policy.lua" exists
+
+  @p0 @onboarding
+  Scenario: The stack is up and healthy (compose up outcome)
+    Given the stack is healthy
+    When I GET "/healthz" as none
+    Then the status is 200
+    And the field "ok" equals "True"
+
+  @p0 @onboarding
+  Scenario: The router loaded its catalog (engine embedded + config.live.lua)
+    Given I have a caller token
+    When I GET "/v1/models" as consumer
+    Then the status is 200
+    And the array "data" has at least 5 items
+
+  @manual @onboarding
+  Scenario: Recursive clone (manual — run once on a fresh machine)
+    # git clone --recursive https://github.com/genlayerlabs/unhardcoded.git
+    # -> core/ submodule populated; covered by the 'submodule populated' outcome above.
+    Given the stack is healthy
+
+  @manual @onboarding
+  Scenario: docker compose up --build (manual — environment setup)
+    # cp .env.example .env.secrets; fill secrets; docker compose up -d --build
+    # -> router + ingress healthy; covered by the 'stack up and healthy' outcome above.
+    Given the stack is healthy
diff --git a/features/02_auth.feature b/features/02_auth.feature
new file mode 100644
index 0000000..c4295d2
--- /dev/null
+++ b/features/02_auth.feature
@@ -0,0 +1,39 @@
+Feature: Authentication — dashboard sessions and the caller bearer contract
+
+  Background:
+    Given the stack is healthy
+
+  @p0 @auth
+  Scenario: DASHBOARD_NO_AUTH grants local admin to the console API
+    When I GET "/dashboard/api/stats" as admin
+    Then the status is 200
+    And the field "viewer_role" equals "admin"
+
+  @p0 @auth
+  Scenario: A valid caller bearer token is accepted on /v1
+    Given I have a caller token
+    When I GET "/v1/models" as consumer
+    Then the status is 200
+
+  @p0 @auth
+  Scenario: A missing caller token is rejected on /v1
+    When I GET "/v1/models" as none
+    Then the status is 401
+    And the field "error.code" equals "caller_auth"
+
+  @p1 @auth
+  Scenario: A consumer can log into the dashboard with their API key (scoped session)
+    Given I have a caller token
+    When I log into the dashboard with my caller key
+    Then the status is 200
+    And the field "role" equals "consumer"
+
+  @manual @auth
+  Scenario: Admin password login (manual — needs DASHBOARD_PASSWORD_SHA256 set and NO_AUTH off)
+    # POST /dashboard/login {password} -> sets an admin session cookie.
+    # Not auto-tested: the local dev stack runs with DASHBOARD_NO_AUTH=1.
+    Given the stack is healthy
+
+  @manual @auth
+  Scenario: Trusted-header SSO admin (manual — needs a reverse proxy injecting the header+secret)
+    Given the stack is healthy
diff --git a/features/03_consumer_api.feature b/features/03_consumer_api.feature
new file mode 100644
index 0000000..326ba65
--- /dev/null
+++ b/features/03_consumer_api.feature
@@ -0,0 +1,94 @@
+Feature: Consumer API flows (/v1) — the calling service's surface
+  As a consuming service I call /v1 with my bearer token and the router
+  decides/falls-back over the operator's provider keys. All end-to-end chats
+  here route to codex ($0) so the suite is free.
+
+  Background:
+    Given the stack is healthy
+    And I have a caller token
+
+  @p0 @api
+  Scenario: List the routable model catalog
+    When I GET "/v1/models" as consumer
+    Then the status is 200
+    And the field "object" equals "list"
+    And the array "data" has at least 5 items
+    And the array "data" includes an item where "id" equals "profile:default"
+
+  @p0 @api
+  Scenario: Chat completion runs a policy and returns a real answer + trace
+    When I POST a free chat as consumer
+    Then the status is 200
+    And the field "object" equals "chat.completion"
+    And the field "choices[0].message.content" is non-empty
+    And the field "usage.total_tokens" is a number
+    And the field "x_router.provider" is non-empty
+    And the field "x_router.served_model_id" is non-empty
+    And the field "x_router.decision_trace" is present
+
+  @p0 @api
+  Scenario: Per-call policy_ir is admitted and executed
+    When I POST "/v1/chat/completions" as consumer with json
+      """
+      {"model":"","max_tokens":16,"messages":[{"role":"user","content":"hi"}],
+       "policy_ir":["policy",
+         ["and",["meets_req"],["not",["is","disabled"]],["family_eq","gpt-5.5"]],
+         ["neg",["normalize",["field","price_in"]]],
+         ["argmax"],["id"],["always",{"action":"next_candidate"}]]}
+      """
+    Then the status is 200
+    And the field "x_router.policy_fingerprint" is present
+    And the field "choices[0].message.content" is non-empty
+
+  @p1 @api
+  Scenario: Malformed policy_ir is rejected cleanly at admission (no spend)
+    When I POST "/v1/chat/completions" as consumer with json
+      """
+      {"model":"","messages":[{"role":"user","content":"hi"}],
+       "policy_ir":["policy","not-a-valid-term"]}
+      """
+    Then the status is 400
+    And the field "error.type" equals "invalid_request_error"
+    And the field "error.message" contains "policy_ir"
+
+  @p0 @api
+  Scenario: Sigma_flow DAG runs and returns the sink answer with a per-node trace
+    When I POST a free flow as consumer
+    Then the status is 200
+    And the field "x_router.provider" equals "flow"
+    And the field "choices[0].message.content" is non-empty
+    And the array "x_router.decision_trace.flow_nodes" has at least 2 items
+    And every item in "x_router.decision_trace.flow_nodes" has a "provider"
+    And every item in "x_router.decision_trace.flow_nodes" has a "served_model_id"
+
+  @p1 @api
+  Scenario: Malformed flow_ir is rejected at admission
+    When I POST "/v1/chat/completions" as consumer with json
+      """
+      {"model":"","messages":[{"role":"user","content":"hi"}],
+       "flow_ir":["flow",{"out":{"kind":"output","inputs":["missing"]}}]}
+      """
+    Then the status is 400
+    And the field "error.message" contains "flow_ir"
+
+  @p1 @api
+  Scenario: Per-key usage self-service is scoped and sanitized
+    When I POST a free chat as consumer
+    And I GET "/v1/usage?window=24h" as consumer
+    Then the status is 200
+    And the field "kind" equals "router_key_usage"
+    And the field "key_sha256_prefix" is non-empty
+    And the field "totals.requests" is at least 1
+    And the field "consumer_settings.status" is present
+
+  @p0 @api
+  Scenario: Missing bearer token is rejected
+    When I GET "/v1/models" as none
+    Then the status is 401
+    And the field "error.code" equals "caller_auth"
+
+  @p0 @api
+  Scenario: Unknown bearer token is rejected
+    When I GET "/v1/models" as bad
+    Then the status is 401
+    And the field "error.code" equals "caller_auth"
diff --git a/features/04_dashboard.feature b/features/04_dashboard.feature
new file mode 100644
index 0000000..78cf094
--- /dev/null
+++ b/features/04_dashboard.feature
@@ -0,0 +1,109 @@
+Feature: Dashboard data — what the operator console renders MUST be present and correct
+  The dashboard is a thin renderer of /dashboard/api/*. These scenarios assert the
+  backing data is complete and correct (so the frontend shows real, correct values
+  in Analytics, Activity, Catalog, Config, Consumers, Provider keys). Seeded
+  activity (one chat + one flow) is created in before_all.
+
+  Background:
+    Given the stack is healthy
+
+  @p0 @dashboard
+  Scenario: The dashboard HTML page loads with all its tabs and renderers
+    When I GET "/dashboard" as admin
+    Then the status is 200
+    And the response text contains "Analytics"
+    And the response text contains "Builder"
+    And the response text contains "Activity"
+    And the response text contains "Catalog"
+    And the response text contains "Config"
+    And the response text contains "renderActivity"
+    And the response text contains "renderAnalytics"
+
+  @p0 @dashboard
+  Scenario: Analytics — totals, breakdowns and health are populated
+    When I GET "/dashboard/api/stats" as admin
+    Then the status is 200
+    And the field "viewer_role" equals "admin"
+    And the field "totals.requests" is at least 1
+    And the field "totals.tokens_total" is a number
+    And the field "totals.cost_usd" is a number
+    And the field "by_provider" is non-empty
+    And the field "by_status" is non-empty
+    And the field "health_summary" is present
+    And the array "daily_totals" has at least 1 items
+
+  @p0 @dashboard
+  Scenario: Activity — recent requests carry a full, correct per-request trace
+    When I POST a free chat as consumer
+    And I POST a free flow as consumer
+    And I GET "/dashboard/api/stats" as admin
+    Then the status is 200
+    And the array "recent" has at least 2 items
+    And every item in "recent" has a "status"
+    And every item in "recent" has a "ts"
+    And the array "recent" includes an item where "provider" equals "flow"
+    And the array "recent" includes an item where "provider" equals "openai"
+
+  @p0 @dashboard
+  Scenario: Catalog (Market) — families list with prices and per-seller perf
+    When I GET "/dashboard/api/market" as admin
+    Then the status is 200
+    And the array "families" has at least 3 items
+    And every item in "families" has a "family"
+    And every item in "families" has a "quality"
+    And every item in "families" has a "rows"
+    And the array "families" includes an item where "family" equals "gpt-5.5"
+
+  @p0 @dashboard
+  Scenario: Policies — the default profile and live providers with health
+    When I GET "/dashboard/api/policies" as admin
+    Then the status is 200
+    And the array "profiles" includes an item where "name" equals "default"
+    And the field "providers" is non-empty
+    And every item in "providers" has a "health"
+
+  @p1 @dashboard
+  Scenario: Builder field vocabulary is available
+    When I GET "/dashboard/api/fields" as admin
+    Then the status is 200
+    And the array "fields" includes an item where "name" equals "price_in"
+    And the array "fields" includes an item where "name" equals "latency_ms"
+    And the array "fields" includes an item where "name" equals "success_rate"
+
+  @p1 @dashboard
+  Scenario: Config — per-provider tunable knobs are present
+    When I GET "/dashboard/api/config" as admin
+    Then the status is 200
+    And the field "knobs" is non-empty
+
+  @p1 @dashboard
+  Scenario: Consumers — the test consumer is listed with stats
+    When I GET "/dashboard/api/keys" as admin
+    Then the status is 200
+    And the array "keys" includes an item where "consumer" equals "bdd-test"
+
+  @p1 @dashboard
+  Scenario: Provider keys — credentials snapshot is privatized but present
+    When I GET "/dashboard/api/provider-keys" as admin
+    Then the status is 200
+    And the field "rows" is non-empty
+
+  @p1 @dashboard
+  Scenario: Codex accounts — an active account is configured
+    When I GET "/dashboard/api/codex/accounts" as admin
+    Then the status is 200
+    And the field "accounts" is non-empty
+    And the field "active" is non-empty
+    And the field "activity" is present
+
+  @p1 @dashboard
+  Scenario: Builder dry-run ranking (policy preview) returns an ordering (no spend)
+    When I POST "/dashboard/api/policy/preview" as admin with json
+      """
+      {"policy_ir":["policy",
+        ["and",["meets_req"],["not",["is","disabled"]],["family_eq","gpt-5.5"]],
+        ["neg",["normalize",["field","price_in"]]],
+        ["argmax"],["id"],["always",{"action":"next_candidate"}]]}
+      """
+    Then the status is 200
+    And the field "ranked" is non-empty
diff --git a/features/05_providers.feature b/features/05_providers.feature
new file mode 100644
index 0000000..2613d65
--- /dev/null
+++ b/features/05_providers.feature
@@ -0,0 +1,42 @@
+Feature: Providers — OpenRouter, Codex, discovery and registered model traits
+  Asserts the configured providers are live and the registered benchmark/modality
+  fields (model_meta) are part of the field vocabulary the builder/policies use.
+
+  Background:
+    Given the stack is healthy
+
+  @p0 @providers
+  Scenario: OpenRouter and Codex providers are present with health
+    When I GET "/dashboard/api/policies" as admin
+    Then the status is 200
+    And the array "providers" includes an item where "name" equals "openrouter"
+    And the array "providers" includes an item where "name" equals "openai"
+    And every item in "providers" has a "health"
+
+  @p1 @providers
+  Scenario: Codex is configured as a ChatGPT-subscription (openai_codex) provider
+    When I GET "/dashboard/api/policies" as admin
+    Then the status is 200
+    And the array "providers" includes an item where "name" equals "openai"
+    And the matched item field "api_kind" equals "openai_codex"
+
+  @p1 @providers
+  Scenario: A Codex account is active (auth wired through)
+    When I GET "/dashboard/api/codex/accounts" as admin
+    Then the status is 200
+    And the field "accounts" is non-empty
+    And the field "active" is non-empty
+
+  @p1 @providers
+  Scenario: Registered model traits (model_meta benchmarks) are in the field vocabulary
+    When I GET "/dashboard/api/fields" as admin
+    Then the status is 200
+    And the array "fields" includes an item where "name" equals "bench_intelligence"
+    And the array "fields" includes an item where "name" equals "bench_coding"
+
+  @p1 @providers
+  Scenario: The discovered catalog exposes routable families
+    Given I have a caller token
+    When I GET "/v1/models" as consumer
+    Then the status is 200
+    And the array "data" includes an item where "id" equals "family:gpt-5.5"
diff --git a/features/06_consumer_keys.feature b/features/06_consumer_keys.feature
new file mode 100644
index 0000000..bc59eaf
--- /dev/null
+++ b/features/06_consumer_keys.feature
@@ -0,0 +1,73 @@
+Feature: Consumer key lifecycle (operator issues + governs ingress tokens)
+  Each scenario mints its own throwaway consumer so they stay isolated. All
+  rejections happen at the ingress BEFORE any LLM call, so these are free.
+
+  Background:
+    Given the stack is healthy
+
+  @p0 @consumer-keys
+  Scenario: A freshly issued key authenticates against /v1 immediately
+    When I create a consumer key for "bdd-new"
+    Then the status is 200
+    And the field "api_key" is non-empty
+    And the field "sha256_prefix" is non-empty
+    When I GET "/v1/models" as consumer
+    Then the status is 200
+    And the field "object" equals "list"
+
+  @p1 @consumer-keys
+  Scenario: allowed_routes restricts which routes a key may call
+    When I create a consumer key for "bdd-route"
+    Then the status is 200
+    When I POST "/dashboard/api/consumers/bdd-route" as admin with json
+      """
+      {"allowed_routes":["family:does-not-exist"]}
+      """
+    Then the status is 200
+    When I POST "/v1/chat/completions" as consumer with json
+      """
+      {"model":"family:gpt-5.5","messages":[{"role":"user","content":"hi"}]}
+      """
+    Then the status is 403
+    And the field "error.code" equals "caller_route_not_allowed"
+
+  @p1 @consumer-keys
+  Scenario: rate_per_min / burst throttle a key
+    When I create a consumer key for "bdd-rate"
+    Then the status is 200
+    When I POST "/dashboard/api/consumers/bdd-rate" as admin with json
+      """
+      {"allowed_routes":[],"rate_per_min":1,"burst":1}
+      """
+    Then the status is 200
+    When I POST a free chat as consumer
+    Then the status is 200
+    When I POST a free chat as consumer
+    Then the status is 429
+    And the field "error.code" equals "caller_rate_limit"
+
+  @p1 @consumer-keys
+  Scenario: A revoked key is rejected immediately
+    # Revoke drops the key's hash, so the token becomes unknown -> 401 caller_auth
+    # (not 403 caller_key_revoked, which only applies while the hash still maps).
+    When I create a consumer key for "bdd-revoke"
+    Then the status is 200
+    When I revoke the created key
+    Then the status is 200
+    And the field "removed_hashes" equals 1
+    When I GET "/v1/models" as consumer
+    Then the status is 401
+    And the field "error.code" equals "caller_auth"
+
+  @p2 @consumer-keys
+  Scenario: An inactive consumer's keys are all rejected
+    When I create a consumer key for "bdd-inactive"
+    Then the status is 200
+    When I POST "/dashboard/api/consumers/bdd-inactive" as admin with json
+      """
+      {"status":"inactive"}
+      """
+    Then the status is 200
+    When I GET "/v1/models" as consumer
+    Then the status is 403
+    And the field "error.code" equals "caller_inactive"
diff --git a/features/07_money_antseed.feature b/features/07_money_antseed.feature
new file mode 100644
index 0000000..9049687
--- /dev/null
+++ b/features/07_money_antseed.feature
@@ -0,0 +1,79 @@
+Feature: AntSeed marketplace — wallet, escrow and on-chain money
+  Split into two: @antseed READ-ONLY data checks (free — verify the dashboard
+  shows the real wallet/escrow correctly; needs the antseed sidecar up + funded,
+  so excluded from the default run), and @manual on-chain EXECUTION (real USDC on
+  Base mainnet — deposit/withdraw/spend, run by hand).
+
+  Run the read-only ones (with the sidecar up + funded):  behave --tags=antseed
+
+  # ---- READ-ONLY: the money DATA the dashboard renders is real and correct ----
+
+  @antseed @money
+  Scenario: The dashboard Catalog shows the AntSeed wallet with real escrow data
+    Given the stack is healthy
+    When I GET "/dashboard/api/market" as admin
+    Then the status is 200
+    And the field "wallet.provider" equals "antseed"
+    And the field "wallet.address" contains "0x"
+    And the field "wallet.deposits_available" is a number
+    And the field "wallet.deposits_reserved" is present
+    And the field "wallet.connection" equals "connected"
+
+  @antseed @money
+  Scenario: AntSeed appears in the catalog as a marketplace provider
+    Given the stack is healthy
+    When I GET "/dashboard/api/policies" as admin
+    Then the status is 200
+    And the array "providers" includes an item where "name" equals "antseed"
+    And the matched item field "tier" equals "marketplace"
+
+  @antseed @spend @money
+  Scenario: AntSeed serves a request (routes to a peer) — proves the marketplace works
+    # NB: REAL MONEY. Costs a few cents of escrow + reserves ~1 USDC in a channel
+    # (returns on settle). Gated behind RUN_ANTSEED_SPEND=1 so it never runs by
+    # accident:  RUN_ANTSEED_SPEND=1 behave --tags=spend
+    Given the stack is healthy
+    And I have a caller token
+    When I POST "/v1/chat/completions" as consumer with json
+      """
+      {"model":"","max_tokens":16,"messages":[{"role":"user","content":"Reply: pong"}],
+       "policy_ir":["policy",
+         ["and",["meets_req"],["not",["is","disabled"]],["family_eq","glm-5.2"]],
+         ["neg",["normalize",["field","price_in"]]],
+         ["argmax"],["id"],["always",{"action":"next_candidate"}]]}
+      """
+    Then the status is 200
+    And the field "x_router.provider" equals "antseed"
+    And the field "x_router.served_model_id" equals "glm-5.2"
+    And the field "x_router.cost_usd" is a number
+
+  # ---- MANUAL: real on-chain transactions (spend / move funds) ----
+
+  @manual @money
+  Scenario: Set up a local AntSeed dev wallet (manual — done once per machine)
+    # The full local user flow (see .env.example + scripts/gen-dev-wallet.sh):
+    #   1. ./scripts/gen-dev-wallet.sh        -> prints ANTSEED_IDENTITY_HEX +
+    #      ANTSEED_CONTROL_TOKEN (a fresh secp256k1/EVM key — a DEV wallet, never prod)
+    #   2. paste both into .env
+    #   3. docker compose --profile antseed up -d --build
+    #   4. docker compose exec antseed antseed buyer balance --json   # -> the address
+    #   5. fund that address with a little USDC + ETH (gas) on Base mainnet
+    #   6. Deposit into escrow from the dashboard Catalog (wallet cell)
+    # The OUTCOME (wallet connected + escrow visible) is verified by the @antseed
+    # read-only scenarios above.
+    Given the stack is healthy
+
+  @manual @money
+  Scenario: Deposit USDC wallet -> escrow via the dashboard (real on-chain tx)
+    # POST /dashboard/api/wallet/deposit {amount} -> /x/wallet/deposit -> sidecar
+    # control :8379 -> antseed buyer deposit. Verified live: walletUSDC drops,
+    # depositsAvailable rises, and the Catalog wallet cell shows it.
+    Given the stack is healthy
+
+  @manual @money
+  Scenario: Withdraw escrow -> wallet (real on-chain tx)
+    # POST /dashboard/api/wallet/withdraw {amount}. NOTE: the AntSeed deposits
+    # contract LOCKS funds — an immediate withdraw after deposit reverts (custom
+    # error 0xea8e4eb5). Funds are safe in escrow; withdrawable after the lock or
+    # spendable by routing calls to antseed.
+    Given the stack is healthy
diff --git a/features/08_dashboard_ui.feature b/features/08_dashboard_ui.feature
new file mode 100644
index 0000000..cf86877
--- /dev/null
+++ b/features/08_dashboard_ui.feature
@@ -0,0 +1,46 @@
+Feature: Dashboard UI rendered in a real headless browser
+  Proves the operator actually SEES the data in the dashboard (real DOM render
+  via headless chromium), not just that the API returns it. Relies on the seeded
+  activity (one chat via codex + one flow) created in before_all.
+
+  @browser @p0
+  Scenario: The dashboard loads its shell and Activity shows the real flow run
+    Given I open the dashboard in a browser
+    Then I see "Analytics" rendered
+    When I click the "Activity" tab
+    Then I see "PROVIDER" rendered
+    And I see "flow" rendered
+
+  @browser @p0
+  Scenario: Catalog renders model families with prices
+    Given I open the dashboard in a browser
+    When I click the "Catalog" tab
+    Then I see "gpt-5.5" rendered
+
+  @browser @p0
+  Scenario: Analytics renders totals (requests / spend / tokens)
+    Given I open the dashboard in a browser
+    Then I see "Requests" rendered
+    And I see "Spend" rendered
+    And I see "Tokens" rendered
+
+  @browser @p1
+  Scenario: Config renders per-provider tunable knobs
+    Given I open the dashboard in a browser
+    When I click the "Config" tab
+    Then I see "codex" rendered
+
+  @browser @p1
+  Scenario: Provider keys tab renders the credentials view
+    Given I open the dashboard in a browser
+    When I click the "Provider keys" tab
+    Then I see "openrouter" rendered
+
+  @browser @p1 @regression
+  Scenario: An expanded Activity row survives the 15s auto-refresh (no auto-close bug)
+    Given I open the dashboard in a browser
+    When I click the "Activity" tab
+    And I expand the first Activity row
+    And I wait 17 seconds
+    Then an Activity row is still expanded
+
diff --git a/features/09_flow1.feature b/features/09_flow1.feature
new file mode 100644
index 0000000..8d664e1
--- /dev/null
+++ b/features/09_flow1.feature
@@ -0,0 +1,38 @@
+Feature: Flow 1 — the GLM ∥ GPT → merge ensemble produces the expected output
+  The concrete ensemble we ship in opencode: GPT-5.5 (served by Codex, $0) in
+  parallel with GLM-5.2 (OpenRouter), merged by GLM-5.2. Asserts that when run it
+  yields exactly the expected shape — a 3-node DAG with Codex + OpenRouter — and
+  that the run shows up correctly in the dashboard Activity.
+
+  Background:
+    Given the stack is healthy
+    And I have a caller token
+
+  @p0 @flow @flow1
+  Scenario: Running flow1 returns the merged answer with the expected per-node routing
+    When I run the flow1 ensemble (retry on flake)
+    Then the status is 200
+    And the field "object" equals "chat.completion"
+    And the field "x_router.provider" equals "flow"
+    And the field "choices[0].message.content" is non-empty
+    And the array "x_router.decision_trace.flow_nodes" has at least 3 items
+    And every item in "x_router.decision_trace.flow_nodes" has a "provider"
+    And every item in "x_router.decision_trace.flow_nodes" has a "served_model_id"
+    And the array "x_router.decision_trace.flow_nodes" includes an item where "provider" equals "openai"
+    And the array "x_router.decision_trace.flow_nodes" includes an item where "served_model_id" equals "z-ai/glm-5.2"
+
+  @p0 @flow @flow1
+  Scenario: The Codex (gpt) node really served via the subscription at $0
+    When I run the flow1 ensemble (retry on flake)
+    Then the status is 200
+    And the array "x_router.decision_trace.flow_nodes" includes an item where "served_model_id" equals "gpt-5.5"
+    And the matched item field "provider" equals "openai"
+    And the matched item field "price_out" equals "0.0"
+
+  @p1 @flow @flow1
+  Scenario: The flow1 run is recorded correctly in the dashboard Activity
+    When I run the flow1 ensemble (retry on flake)
+    Then the status is 200
+    When I GET "/dashboard/api/stats" as admin
+    Then the status is 200
+    And the array "recent" includes an item where "provider" equals "flow"
diff --git a/features/environment.py b/features/environment.py
new file mode 100644
index 0000000..be39775
--- /dev/null
+++ b/features/environment.py
@@ -0,0 +1,174 @@
+"""
+Behave environment for the unhardcoded user-flow BDD suite.
+
+Drives the LIVE local stack (ingress :8080 + router) — the same endpoints the
+dashboard frontend consumes — so the assertions prove the data the UI renders is
+present AND correct, not just that endpoints return 200.
+
+Assumptions (local-dev):
+  * stack up at BASE_URL (default http://127.0.0.1:8080)
+  * DASHBOARD_NO_AUTH=1 so /dashboard/api/* is reachable as admin
+  * a working $0 route exists for family gpt-5.5 (codex) so chat tests are FREE
+
+Run: nix-shell -p "python3.withPackages(ps: with ps; [behave requests])" \
+        --run 'behave features'
+"""
+import os
+import json
+import requests
+
+BASE_URL = os.environ.get("BASE_URL", "http://127.0.0.1:8080")
+
+# A $0, price-first policy pinned to gpt-5.5 -> resolves to codex (subscription,
+# cost 0). Keeps every end-to-end chat test free.
+FREE_POLICY_IR = [
+    "policy",
+    ["and", ["meets_req"], ["not", ["is", "disabled"]], ["family_eq", "gpt-5.5"]],
+    ["neg", ["normalize", ["field", "price_in"]]],
+    ["argmax"], ["id"], ["always", {"action": "next_candidate"}],
+]
+
+
+def _mint_caller_token(consumer="bdd-test"):
+    # NO_AUTH dashboard -> we can mint a consumer key with no session.
+    r = requests.post(f"{BASE_URL}/dashboard/api/keys",
+                      json={"consumer": consumer}, timeout=30)
+    r.raise_for_status()
+    return r.json()["api_key"]
+
+
+def before_all(context):
+    context.base_url = BASE_URL
+    context.session = requests.Session()
+
+    # Sanity: stack is up.
+    h = requests.get(f"{BASE_URL}/healthz", timeout=10)
+    assert h.status_code == 200, f"stack not healthy: {h.status_code}"
+
+    # Sanity: dashboard is reachable without a login (NO_AUTH expected locally).
+    d = requests.get(f"{BASE_URL}/dashboard/api/full", timeout=10)
+    assert d.status_code == 200, (
+        "dashboard /api/full needs DASHBOARD_NO_AUTH=1 for the BDD suite "
+        f"(got {d.status_code}). Set it in .env.secrets and restart ingress."
+    )
+
+    context.caller_token = _mint_caller_token()
+
+    # Seed REAL activity so Activity / usage / stats / catalog have data to show:
+    # one chat + one 2-node flow, both $0 via codex.
+    _seed_activity(context)
+
+
+def before_scenario(context, scenario):
+    tags = scenario.effective_tags
+    if "antseed" in tags:
+        # Only run AntSeed scenarios when the funded sidecar is actually up;
+        # otherwise skip (keeps the default suite green without a wallet).
+        try:
+            w = (requests.get(f"{BASE_URL}/dashboard/api/market", timeout=10)
+                 .json().get("wallet") or {})
+        except Exception:
+            w = {}
+        if w.get("connection") != "connected":
+            scenario.skip("antseed sidecar not up/funded (wallet not connected)")
+            return
+        # Real-money spend is gated behind an explicit opt-in env var.
+        if "spend" in tags and os.environ.get("RUN_ANTSEED_SPEND") != "1":
+            scenario.skip("real-money antseed spend — set RUN_ANTSEED_SPEND=1 to run")
+            return
+    if "browser" in scenario.effective_tags:
+        import shutil
+        from selenium import webdriver
+        from selenium.webdriver.chrome.options import Options
+        from selenium.webdriver.chrome.service import Service
+        opts = Options()
+        opts.add_argument("--headless=new")
+        opts.add_argument("--no-sandbox")
+        opts.add_argument("--disable-dev-shm-usage")
+        opts.add_argument("--disable-gpu")
+        opts.add_argument("--window-size=1400,2000")
+        opts.binary_location = shutil.which("chromium") or shutil.which("chromium-browser")
+        service = Service(executable_path=shutil.which("chromedriver"))
+        context.driver = webdriver.Chrome(service=service, options=opts)
+        context.driver.set_page_load_timeout(60)
+
+
+def after_scenario(context, scenario):
+    d = getattr(context, "driver", None)
+    if d is not None:
+        try:
+            d.quit()
+        except Exception:
+            pass
+        context.driver = None
+
+
+def _seed_activity(context):
+    hdr = {"Authorization": f"Bearer {context.caller_token}",
+           "Content-Type": "application/json"}
+    # one routed chat (codex, $0)
+    requests.post(f"{context.base_url}/v1/chat/completions", headers=hdr, json={
+        "model": "", "max_tokens": 16,
+        "messages": [{"role": "user", "content": "Reply with one short sentence."}],
+        "policy_ir": FREE_POLICY_IR,
+    }, timeout=120)
+    # one flow (two codex nodes -> merge), $0
+    flow = ["flow", {
+        "q": {"kind": "input"},
+        "a": {"kind": "llm", "system": "Be concise.",
+              "policy": FREE_POLICY_IR, "inputs": ["q"]},
+        "b": {"kind": "llm", "system": "Synthesize.",
+              "policy": FREE_POLICY_IR, "inputs": ["a"],
+              "template": "Refine: $1"},
+        "out": {"kind": "output", "inputs": ["b"]},
+    }]
+    requests.post(f"{context.base_url}/v1/chat/completions", headers=hdr, json={
+        "model": "", "max_tokens": 120,
+        "messages": [{"role": "user", "content": "Say hello."}],
+        "flow_ir": flow,
+    }, timeout=180)
+    context.seeded = True
+
+
+# ---- helpers used by steps ------------------------------------------------
+
+def auth_headers(context):
+    return {"Authorization": f"Bearer {context.caller_token}",
+            "Content-Type": "application/json"}
+
+
+def jpath(obj, path):
+    """Tiny dotted/indexed JSON getter: 'a.b[0].c'. Returns SENTINEL if missing."""
+    cur = obj
+    for part in _tokenize(path):
+        try:
+            if isinstance(part, int):
+                cur = cur[part]
+            else:
+                cur = cur[part]
+        except (KeyError, IndexError, TypeError):
+            return _MISSING
+    return cur
+
+
+class _Missing:
+    def __repr__(self):
+        return "<MISSING>"
+
+
+_MISSING = _Missing()
+SENTINEL = _MISSING
+
+
+def _tokenize(path):
+    out = []
+    for seg in path.split("."):
+        while "[" in seg:
+            name, rest = seg.split("[", 1)
+            if name:
+                out.append(name)
+            idx, seg = rest.split("]", 1)
+            out.append(int(idx))
+        if seg:
+            out.append(seg)
+    return out
diff --git a/features/fixtures/flow1.json b/features/fixtures/flow1.json
new file mode 100644
index 0000000..6dae779
--- /dev/null
+++ b/features/fixtures/flow1.json
@@ -0,0 +1,37 @@
+{
+  "model": "",
+  "max_tokens": 1200,
+  "messages": [{"role": "user", "content": "Propose the next step to implement an in-memory LRU cache in Python."}],
+  "flow_ir": ["flow", {
+    "q": { "kind": "input" },
+    "gpt": {
+      "kind": "llm",
+      "system": "You are a coding agent. Given the task and context, propose the single best NEXT STEP — concrete and short.",
+      "policy": ["policy",
+        ["and", ["meets_req"], ["not", ["is", "disabled"]], ["family_eq", "gpt-5.5"]],
+        ["neg", ["normalize", ["field", "price_in"]]],
+        ["argmax"], ["id"], ["always", { "action": "next_candidate" }]],
+      "inputs": ["q"]
+    },
+    "glm": {
+      "kind": "llm",
+      "system": "You are a coding agent. Given the task and context, propose the single best NEXT STEP — concrete and short.",
+      "policy": ["policy",
+        ["and", ["meets_req"], ["not", ["is", "disabled"]], ["family_eq", "z-ai/glm-5.2"]],
+        ["add", ["scale", 3, ["neg", ["normalize", ["field", "latency_ms"]]]], ["neg", ["normalize", ["field", "price_in"]]]],
+        ["argmax"], ["id"], ["always", { "action": "next_candidate" }]],
+      "inputs": ["q"]
+    },
+    "merge": {
+      "kind": "llm",
+      "system": "You are a coding agent. Two approaches for the next step are below (A and B). Pick the strongest, merge the best of each, and give the single best next step. Be concise.",
+      "policy": ["policy",
+        ["and", ["meets_req"], ["not", ["is", "disabled"]], ["family_eq", "z-ai/glm-5.2"]],
+        ["add", ["scale", 3, ["neg", ["normalize", ["field", "latency_ms"]]]], ["neg", ["normalize", ["field", "price_in"]]]],
+        ["argmax"], ["id"], ["always", { "action": "next_candidate" }]],
+      "inputs": ["gpt", "glm"],
+      "template": "Approach A (GPT-5.5):\n$1\n\nApproach B (GLM-5.2):\n$2\n\nDecide and give the single best next step now."
+    },
+    "out": { "kind": "output", "inputs": ["merge"] }
+  }]
+}
diff --git a/features/steps/browser_steps.py b/features/steps/browser_steps.py
new file mode 100644
index 0000000..5be37d7
--- /dev/null
+++ b/features/steps/browser_steps.py
@@ -0,0 +1,84 @@
+"""Real-browser (Selenium + headless chromium) steps: prove the dashboard
+actually RENDERS the data, not just that the API returns it."""
+import time
+from behave import given, when, then
+
+# Optional: only the @browser scenarios need selenium (run under the chromium
+# nix-shell). Keep the module importable so the non-browser suite runs without it.
+try:
+    from selenium.webdriver.common.by import By
+    from selenium.webdriver.support.ui import WebDriverWait
+except ImportError:
+    By = WebDriverWait = None
+
+
+def _body_text(driver):
+    return driver.find_element(By.TAG_NAME, "body").text
+
+
+def _wait_text(context, text, timeout=40):
+    WebDriverWait(context.driver, timeout).until(
+        lambda d: text.lower() in _body_text(d).lower(),
+        message=f"timed out waiting for {text!r}")
+
+
+@given('I open the dashboard in a browser')
+def step_open(context):
+    context.driver.get(context.base_url + "/dashboard")
+    # NO_AUTH -> goes straight to the app shell (sidebar nav with the tabs).
+    _wait_text(context, "Analytics", timeout=40)
+    _wait_text(context, "Activity", timeout=40)
+
+
+@when('I click the "{tab}" tab')
+def step_click_tab(context, tab):
+    # Tab buttons are <button class='tab'><span class='navIcon'>X</span>Label</button>,
+    # so the visible text is "<icon> Label" -> match by substring.
+    deadline = time.time() + 20
+    while time.time() < deadline:
+        for el in context.driver.find_elements(By.CSS_SELECTOR, ".tab"):
+            try:
+                if tab.lower() in el.text.strip().lower() and el.is_displayed():
+                    context.driver.execute_script("arguments[0].click();", el)
+                    time.sleep(0.8)  # let the tab's loader fetch + render
+                    return
+            except Exception:
+                continue
+        time.sleep(0.3)
+    raise AssertionError(f'tab {tab!r} not found/clickable')
+
+
+@then('I see "{text}" rendered')
+def step_see(context, text):
+    _wait_text(context, text)
+
+
+@when('I expand the first Activity row')
+def step_expand_row(context):
+    rows = context.driver.find_elements(By.CSS_SELECTOR, "#recent .actRow")
+    assert rows, "no Activity rows to expand"
+    context.driver.execute_script("arguments[0].click();", rows[0])
+    time.sleep(0.5)
+    open_details = context.driver.find_elements(
+        By.CSS_SELECTOR, "#recent .actDetail:not(.hidden)")
+    assert open_details, "row did not expand on click"
+
+
+@when('I wait {seconds:d} seconds')
+def step_wait(context, seconds):
+    time.sleep(seconds)
+
+
+@then('an Activity row is still expanded')
+def step_still_expanded(context):
+    open_details = context.driver.find_elements(
+        By.CSS_SELECTOR, "#recent .actDetail:not(.hidden)")
+    assert open_details, ("an expanded Activity row collapsed by itself "
+                          "(auto-refresh bug regressed)")
+
+
+@then('I do NOT see "{text}" rendered')
+def step_not_see(context, text):
+    time.sleep(1.0)
+    assert text.lower() not in _body_text(context.driver).lower(), \
+        f'unexpectedly saw {text!r} on screen'
diff --git a/features/steps/steps.py b/features/steps/steps.py
new file mode 100644
index 0000000..29c5cae
--- /dev/null
+++ b/features/steps/steps.py
@@ -0,0 +1,216 @@
+"""Generic + specific step definitions for the unhardcoded user-flow suite."""
+import json
+import requests
+from behave import given, when, then
+from environment import auth_headers, jpath, SENTINEL, FREE_POLICY_IR
+
+
+# ---- request steps --------------------------------------------------------
+
+def _do(context, method, path, *, auth, body=None, stream=False):
+    url = context.base_url + path
+    headers = {}
+    if auth == "consumer":
+        headers = auth_headers(context)
+    elif auth == "bad":
+        headers = {"Authorization": "Bearer not-a-real-token",
+                   "Content-Type": "application/json"}
+    elif auth == "none":
+        headers = {"Content-Type": "application/json"}
+    # auth == "admin" -> dashboard NO_AUTH, no headers needed
+    context.resp = requests.request(
+        method, url, headers=headers,
+        json=body if body is not None else None,
+        timeout=200, stream=stream)
+    context.resp_text = context.resp.text  # NB: context.text is reserved by behave
+    try:
+        context.json = context.resp.json()
+    except Exception:
+        context.json = None
+
+
+@given('the stack is healthy')
+def step_healthy(context):
+    r = requests.get(context.base_url + "/healthz", timeout=10)
+    assert r.status_code == 200, r.status_code
+    assert r.json().get("ok") is True
+
+
+@given('I have a caller token')
+def step_have_token(context):
+    assert getattr(context, "caller_token", None), "no caller token minted"
+
+
+@when('I GET "{path}" as {auth}')
+def step_get(context, path, auth):
+    _do(context, "GET", path, auth=auth)
+
+
+# NOTE: behave puts a step's docstring (the body) in context.text BEFORE the step
+# runs; we read it here, then _do() overwrites context.text with the response body.
+@when('I POST "{path}" as {auth} with json')
+def step_post_json(context, path, auth):
+    body = json.loads(context.text)
+    _do(context, "POST", path, auth=auth, body=body)
+
+
+@when('I create a consumer key for "{consumer}"')
+def step_create_key(context, consumer):
+    _do(context, "POST", "/dashboard/api/keys", auth="admin", body={"consumer": consumer})
+    assert context.resp.status_code == 200, context.resp_text[:200]
+    context.created_consumer = consumer
+    context.created_key = context.json["api_key"]
+    context.created_prefix = context.json["sha256_prefix"]
+    context.caller_token = context.created_key  # subsequent "as consumer" uses it (scenario-scoped)
+
+
+@when('I revoke the created key')
+def step_revoke_created(context):
+    _do(context, "POST", "/dashboard/api/keys/revoke", auth="admin",
+        body={"consumer": context.created_consumer, "sha256_prefix": context.created_prefix})
+
+
+@when('I run the flow1 ensemble (retry on flake)')
+def step_flow1(context):
+    import os
+    here = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    with open(os.path.join(here, "fixtures", "flow1.json")) as f:
+        body = json.load(f)
+    last = None
+    for _ in range(5):
+        _do(context, "POST", "/v1/chat/completions", auth="consumer", body=body)
+        last = context.resp
+        if context.resp.status_code == 200 and isinstance(context.json, dict) \
+                and context.json.get("object") == "chat.completion":
+            return
+    raise AssertionError(
+        f"flow1 never succeeded in 5 tries; last status {last.status_code}: "
+        f"{context.resp_text[:200]}")
+
+
+@when('I log into the dashboard with my caller key')
+def step_dash_login(context):
+    _do(context, "POST", "/dashboard/login", auth="none", body={"api_key": context.caller_token})
+
+
+@then('the file "{path}" exists')
+def step_file_exists(context, path):
+    import os
+    assert os.path.exists(path), f"missing: {path}"
+
+
+@when('I POST a free chat as consumer')
+def step_free_chat(context):
+    _do(context, "POST", "/v1/chat/completions", auth="consumer", body={
+        "model": "", "max_tokens": 16,
+        "messages": [{"role": "user", "content": "Reply with one short sentence."}],
+        "policy_ir": FREE_POLICY_IR,
+    })
+
+
+@when('I POST a free flow as consumer')
+def step_free_flow(context):
+    flow = ["flow", {
+        "q": {"kind": "input"},
+        "a": {"kind": "llm", "system": "Be concise.", "policy": FREE_POLICY_IR, "inputs": ["q"]},
+        "b": {"kind": "llm", "system": "Refine.", "policy": FREE_POLICY_IR, "inputs": ["a"],
+              "template": "Refine: $1"},
+        "out": {"kind": "output", "inputs": ["b"]},
+    }]
+    _do(context, "POST", "/v1/chat/completions", auth="consumer", body={
+        "model": "", "max_tokens": 120,
+        "messages": [{"role": "user", "content": "Say hello."}],
+        "flow_ir": flow,
+    })
+
+
+# ---- assertion steps ------------------------------------------------------
+
+@then('the status is {code:d}')
+def step_status(context, code):
+    assert context.resp.status_code == code, \
+        f"expected {code}, got {context.resp.status_code}: {context.resp_text[:300]}"
+
+
+@then('the field "{path}" is present')
+def step_present(context, path):
+    v = jpath(context.json, path)
+    assert v is not SENTINEL, f'"{path}" missing in {str(context.json)[:300]}'
+
+
+@then('the field "{path}" is non-empty')
+def step_nonempty(context, path):
+    v = jpath(context.json, path)
+    assert v is not SENTINEL, f'"{path}" missing'
+    assert v not in (None, "", [], {}), f'"{path}" is empty: {v!r}'
+
+
+@then('the field "{path}" equals "{value}"')
+def step_equals_str(context, path, value):
+    v = jpath(context.json, path)
+    assert str(v) == value, f'"{path}" = {v!r}, expected {value!r}'
+
+
+@then('the field "{path}" equals {value:d}')
+def step_equals_int(context, path, value):
+    v = jpath(context.json, path)
+    assert v == value, f'"{path}" = {v!r}, expected {value}'
+
+
+@then('the field "{path}" is a number')
+def step_is_number(context, path):
+    v = jpath(context.json, path)
+    assert isinstance(v, (int, float)) and not isinstance(v, bool), f'"{path}" = {v!r}'
+
+
+@then('the field "{path}" is at least {value:d}')
+def step_at_least(context, path, value):
+    v = jpath(context.json, path)
+    assert isinstance(v, (int, float)), f'"{path}" not numeric: {v!r}'
+    assert v >= value, f'"{path}" = {v}, expected >= {value}'
+
+
+@then('the field "{path}" contains "{sub}"')
+def step_contains(context, path, sub):
+    v = jpath(context.json, path)
+    assert v is not SENTINEL, f'"{path}" missing'
+    assert sub in str(v), f'"{path}" = {v!r} does not contain {sub!r}'
+
+
+@then('the array "{path}" has at least {n:d} items')
+def step_array_len(context, path, n):
+    v = jpath(context.json, path)
+    assert isinstance(v, list), f'"{path}" is not a list: {type(v)}'
+    assert len(v) >= n, f'"{path}" has {len(v)} items, expected >= {n}'
+
+
+@then('the array "{path}" includes an item where "{key}" equals "{value}"')
+def step_array_item(context, path, key, value):
+    arr = jpath(context.json, path)
+    assert isinstance(arr, list), f'"{path}" not a list'
+    for it in arr:
+        if str(jpath(it, key)) == value:
+            context.matched_item = it
+            return
+    raise AssertionError(f'no item in "{path}" with {key}=={value!r}; '
+                         f'saw {[jpath(i, key) for i in arr][:10]}')
+
+
+@then('the matched item field "{path}" equals "{value}"')
+def step_matched_equals(context, path, value):
+    v = jpath(context.matched_item, path)
+    assert str(v) == value, f'matched item "{path}" = {v!r}, expected {value!r}'
+
+
+@then('every item in "{path}" has a "{key}"')
+def step_every_has(context, path, key):
+    arr = jpath(context.json, path)
+    assert isinstance(arr, list), f'"{path}" not a list'
+    assert arr, f'"{path}" empty'
+    for it in arr:
+        assert jpath(it, key) is not SENTINEL, f'item missing {key}: {str(it)[:200]}'
+
+
+@then('the response text contains "{sub}"')
+def step_text_contains(context, sub):
+    assert sub in context.resp_text, f'response text missing {sub!r}'
diff --git a/scripts/gen-dev-wallet.sh b/scripts/gen-dev-wallet.sh
new file mode 100755
index 0000000..30ba00b
--- /dev/null
+++ b/scripts/gen-dev-wallet.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env sh
+# Generate a fresh AntSeed *dev* wallet identity (a secp256k1 / EVM private key)
+# plus a control token, and print the two .env lines to paste in.
+#
+# The sidecar derives the Base-mainnet address from the key. After adding these
+# to .env and running `docker compose --profile antseed up -d`, get the address
+# to fund with:  docker compose exec antseed antseed buyer balance --json
+#
+# ⚠️  This is a real private key. Use a DEDICATED DEV WALLET with a tiny balance,
+#     never your production wallet. Never commit it.
+set -eu
+
+rand_hex() {
+  # $1 = number of bytes
+  if command -v openssl >/dev/null 2>&1; then
+    openssl rand -hex "$1"
+  elif command -v node >/dev/null 2>&1; then
+    node -e "console.log(require('crypto').randomBytes($1).toString('hex'))"
+  elif command -v python3 >/dev/null 2>&1; then
+    python3 -c "import secrets;print(secrets.token_hex($1))"
+  else
+    echo "need one of: openssl, node, python3" >&2
+    exit 1
+  fi
+}
+
+KEY=$(rand_hex 32)
+TOKEN=$(rand_hex 16)
+
+cat <<EOF
+# --- AntSeed dev wallet (paste into .env) ---
+ANTSEED_IDENTITY_HEX=${KEY}
+ANTSEED_CONTROL_TOKEN=${TOKEN}
+
+# Next:
+#   docker compose --profile antseed up -d --build
+#   docker compose exec antseed antseed buyer balance --json   # -> the address to fund
+#   send a little USDC + ETH (gas) on Base mainnet to that address,
+#   then Deposit it into escrow from the dashboard Catalog (wallet cell).
+EOF
diff --git a/user_flows.json b/user_flows.json
new file mode 100644
index 0000000..449cd00
--- /dev/null
+++ b/user_flows.json
@@ -0,0 +1,726 @@
+{
+  "meta": {
+    "product": "unhardcoded",
+    "description": "OpenAI-compatible LLM policy router. Data-plane 'router' (serve.py/shim.py) behind an 'ingress' auth-proxy (auth_proxy.py) that also serves the operator dashboard. Sigma_pol/Sigma_flow engine vendored at core/ (unhardcoded-engine).",
+    "purpose": "Exhaustive catalogue of user flows in entry order, to be lowered into Gherkin .feature files.",
+    "architecture_note": "Two HTTP layers. Consumers talk to the INGRESS (:8080): it does Bearer caller-auth, route allow-lists (403 caller_route_not_allowed), rate limits (429), usage recording, then proxies to the SHIM (router:18080) which runs the policy and owns /v1/* and operator /x/* (proxy 404s /x/* for consumers). Provider keys live in the host env; callers only ever send their own caller token.",
+    "test_flag_legend": {
+      "real_money": "executing for real spends on-chain funds / provider credits",
+      "real_credentials": "touches a real provider credential or private key",
+      "tos_risk": "uses an unofficial/ToS-risky path (codex)",
+      "mockable": "can be exercised hermetically against the local router with set_mock_response / a stub backend",
+      "priority": "P0 critical happy path, P1 important, P2 edge/advanced"
+    },
+    "phases": [
+      "1-onboarding", "2-auth", "3-providers", "4-consumer-keys",
+      "5-consumer-api", "6-dashboard-ops", "7-money-antseed"
+    ]
+  },
+  "flows": [
+    {
+      "id": "clone-with-submodule", "order": 1, "phase": "1-onboarding",
+      "name": "Clone the repo recursively (vendored Sigma_pol core submodule)",
+      "actor": "new-user", "category": "setup",
+      "entry_point": "git clone --recursive https://github.com/genlayerlabs/unhardcoded.git",
+      "preconditions": ["git installed; network to github.com", "core/ is a submodule -> genlayerlabs/unhardcoded-engine.git (.gitmodules)"],
+      "steps": [
+        {"action": "git clone --recursive <repo>", "expected": "repo + core/ populated; core/router.lua and core/llm_policy.lua exist"}
+      ],
+      "notes": "Without --recursive, core/ is empty and the Docker build fails at Dockerfile:19 (test -f core/router.lua && test -f core/llm_policy.lua) -- surfaces the missing submodule early.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P0"}
+    },
+    {
+      "id": "submodule-update-after-plain-clone", "order": 2, "phase": "1-onboarding",
+      "name": "Populate the core submodule after a plain clone",
+      "actor": "new-user", "category": "setup",
+      "entry_point": "git submodule update --init",
+      "preconditions": ["did a plain git clone (no --recursive); core/ empty"],
+      "steps": [
+        {"action": "git submodule update --init", "expected": "core/ checked out at the pinned commit; core/router.lua + core/llm_policy.lua present"}
+      ],
+      "notes": "Documented in README.md and CONTRIBUTING.md.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"}
+    },
+    {
+      "id": "create-env-secrets", "order": 3, "phase": "1-onboarding",
+      "name": "Create .env.secrets from .env.example",
+      "actor": "operator", "category": "setup",
+      "entry_point": "cp .env.example .env.secrets && chmod 600 .env.secrets",
+      "preconditions": [".env, .env.secrets, secrets/ are gitignored -- never commit them"],
+      "steps": [
+        {"action": "cp .env.example .env.secrets && chmod 600 .env.secrets", "expected": "template created, owner-only"},
+        {"action": "fill provider keys + dashboard auth values", "expected": "ready for router+ingress containers (both env_file .env.secrets)"}
+      ],
+      "notes": "Env vars: OPENAI_API_KEY(opt), OPENROUTER_API_KEY, CODEX_AUTH_PATH, CALLER_KEYS_JSON, CALLER_KEYS_SHA256_JSON, DASHBOARD_PASSWORD_SHA256, DASHBOARD_SESSION_SECRET, DASHBOARD_TRUSTED_USER_HEADER/SECRET, DASHBOARD_NO_AUTH, LLM_ROUTER_HOST_PORT, PUBLIC_BASE_URL, RATE_PER_MIN, BURST, LOG_LEVEL.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P0"}
+    },
+    {
+      "id": "generate-dashboard-admin-secrets", "order": 4, "phase": "1-onboarding",
+      "name": "Generate DASHBOARD_PASSWORD_SHA256 and DASHBOARD_SESSION_SECRET",
+      "actor": "operator", "category": "auth",
+      "entry_point": ".env.secrets: DASHBOARD_PASSWORD_SHA256=, DASHBOARD_SESSION_SECRET=",
+      "preconditions": [".env.secrets exists; Python available"],
+      "steps": [
+        {"action": "python - (sha256 of getpass) -> DASHBOARD_PASSWORD_SHA256", "expected": "64-hex digest"},
+        {"action": "python -c 'import secrets;print(secrets.token_urlsafe(32))' -> DASHBOARD_SESSION_SECRET", "expected": "urlsafe secret set"}
+      ],
+      "notes": "_dashboard_password_ok compares sha256(submitted) via hmac.compare_digest. If SESSION_SECRET unset, no session can be minted -> only NO_AUTH or trusted-header SSO work.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"}
+    },
+    {
+      "id": "run-via-nix-shell", "order": 5, "phase": "1-onboarding",
+      "name": "Run the data-plane shim directly via nix-shell (dev, no Docker, no auth/dashboard)",
+      "actor": "new-user", "category": "setup",
+      "entry_point": "nix-shell -p 'python3.withPackages(ps: with ps;[lupa httpx fastapi uvicorn pydantic])' --run 'python serve.py --config config.live.lua --default-profile default --host 127.0.0.1 --port 8080'",
+      "preconditions": ["submodule populated; Nix installed; provider keys exported in the shell env"],
+      "steps": [
+        {"action": "enter nix-shell with runtime deps (or shell.nix)", "expected": "lupa/httpx/fastapi/uvicorn/pydantic available"},
+        {"action": "python serve.py ...", "expected": "loads env_secrets, optional model_meta refresh, inits LLMRouterHost, uvicorn serves the RAW shim (no auth_proxy, no dashboard)"}
+      ],
+      "notes": "serve.py = data-plane only. The bearer contract + dashboard exist ONLY in auth_proxy/ingress. Flags: --metrics, --default-max-tokens (0=strict), --timeout-s, --codex-auth. MODEL_META_REFRESH=0 skips the boot refresh.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"}
+    },
+    {
+      "id": "docker-compose-up-default", "order": 6, "phase": "1-onboarding",
+      "name": "Start the default 2-service stack (router + ingress)",
+      "actor": "operator", "category": "setup",
+      "entry_point": "docker compose -f compose.yml up -d --build",
+      "preconditions": [".env.secrets filled; submodule populated; external docker network 'genlayer-web' exists"],
+      "steps": [
+        {"action": "docker compose up -d --build", "expected": "builds unhardcoded:local; router (serve.py :18080) + ingress (auth_proxy :8080). ingress depends_on router service_healthy"},
+        {"action": "ingress binds 127.0.0.1:${LLM_ROUTER_HOST_PORT:-8080}", "expected": "only loopback exposed; router never published"}
+      ],
+      "notes": "ingress env: ROUTER_UPSTREAM=http://router:18080, DASHBOARD_KEY_ENV_PATH, DASHBOARD_ISSUED_KEYS_PATH. .env.secrets bind-mounted into ingress at /run/llm-router/.env.secrets. genlayer-web is external:true -> must pre-exist.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P0"}
+    },
+    {
+      "id": "docker-compose-up-antseed", "order": 7, "phase": "1-onboarding",
+      "name": "Start the stack with the AntSeed buyer sidecar (--profile antseed)",
+      "actor": "operator", "category": "setup",
+      "entry_point": "docker compose -f compose.yml --profile antseed up -d --build",
+      "preconditions": ["default stack ok; AntSeed participation desired; a funded identity"],
+      "steps": [
+        {"action": "compose --profile antseed up", "expected": "also starts antseed (browse mode); writes /market/market.json + status-antseed.json into the antseed-market volume"},
+        {"action": "compose exec antseed antseed buyer status; ... network browse --services --top 5", "expected": "sidecar up; sellers discovered"}
+      ],
+      "notes": "antseed gated behind the compose profile (omitted from default up). Wallet self-service needs ANTSEED_CONTROL_TOKEN on both router+sidecar. Needs a FUNDED wallet to transact (real money).",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": true, "mockable": false, "priority": "P2"}
+    },
+    {
+      "id": "healthz-and-smoke", "order": 8, "phase": "1-onboarding",
+      "name": "Health check + smoke test the running stack",
+      "actor": "operator", "category": "setup",
+      "entry_point": "curl -fsS http://127.0.0.1:8080/healthz",
+      "preconditions": ["stack started"],
+      "steps": [
+        {"action": "GET /healthz (no auth)", "expected": "ingress proxies to router /healthz -> {ok:true,initialized:true}"},
+        {"action": "GET /v1/models with Bearer <token>", "expected": "200 model list"},
+        {"action": "POST /v1/chat/completions {model:'',messages:[pong],max_tokens:8}", "expected": "200; x_router shows chosen provider/model"},
+        {"action": "GET /v1/models with NO bearer", "expected": "401 Unauthorized"}
+      ],
+      "notes": "Both services define /healthz healthchecks; ingress depends_on router service_healthy.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P0"}
+    },
+
+    {
+      "id": "dashboard-no-auth-bypass", "order": 9, "phase": "2-auth",
+      "name": "Local-dev: bypass dashboard auth with DASHBOARD_NO_AUTH",
+      "actor": "operator", "category": "auth",
+      "entry_point": ".env.secrets: DASHBOARD_NO_AUTH=1 ; GET http://127.0.0.1:8080/dashboard",
+      "preconditions": ["local-only deployment nobody else can reach"],
+      "steps": [
+        {"action": "set DASHBOARD_NO_AUTH=1 (1/true/yes/on) and restart ingress", "expected": "every dashboard request treated as local admin"},
+        {"action": "open /dashboard", "expected": "loads as admin without login; _require_dashboard_context returns {role:admin,user:'local-dev'}"}
+      ],
+      "notes": "DANGER: bypasses ALL dashboard auth. Checked FIRST, before trusted-header and session cookie. Never on a reachable deployment.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P0"}
+    },
+    {
+      "id": "dashboard-password-login", "order": 10, "phase": "2-auth",
+      "name": "Admin login to the dashboard with the password",
+      "actor": "operator", "category": "auth",
+      "entry_point": "POST /dashboard/login {password}",
+      "preconditions": ["DASHBOARD_PASSWORD_SHA256 + DASHBOARD_SESSION_SECRET set; NO_AUTH unset"],
+      "steps": [
+        {"action": "POST /dashboard/login {password}", "expected": "sha256(password) compared to hash; on ok sets httponly signed cookie router_dashboard_session (role=admin,user='admin'), records login"},
+        {"action": "subsequent /dashboard/api/* carry the cookie", "expected": "admin role -> full access"}
+      ],
+      "notes": "Cookie is secure=True + path=/dashboard -> remote use needs TLS. 401 on bad password.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P0"}
+    },
+    {
+      "id": "dashboard-consumer-key-login", "order": 11, "phase": "2-auth",
+      "name": "Consumer logs into the dashboard with their API key (scoped view)",
+      "actor": "consuming-service", "category": "auth",
+      "entry_point": "POST /dashboard/login {api_key}",
+      "preconditions": ["valid active consumer token; DASHBOARD_SESSION_SECRET set"],
+      "steps": [
+        {"action": "POST /dashboard/login {api_key}", "expected": "_caller_auth validates; mints consumer-role session {role:consumer,consumer,key_sha256}"},
+        {"action": "dashboard renders consumer-scoped view", "expected": "Catalog/Provider-keys/Key-usage hidden; admin ops -> 403 dashboard_admin_required"}
+      ],
+      "notes": "Same endpoint as password login; api_key vs password selects the branch.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"}
+    },
+    {
+      "id": "trusted-header-sso-admin", "order": 12, "phase": "2-auth",
+      "name": "Trusted-header SSO admin auth (behind a reverse proxy)",
+      "actor": "operator", "category": "auth",
+      "entry_point": "headers: <DASHBOARD_TRUSTED_USER_HEADER>: <user> + x-dashboard-trusted-secret: <secret>",
+      "preconditions": ["both DASHBOARD_TRUSTED_USER_HEADER and DASHBOARD_TRUSTED_USER_SECRET set; a proxy that strips client copies"],
+      "steps": [
+        {"action": "proxy injects trusted user header + x-dashboard-trusted-secret", "expected": "if secret matches via hmac.compare_digest -> admin session {user:trusted_user}"}
+      ],
+      "notes": "Disabled by default. Evaluated AFTER NO_AUTH, BEFORE the cookie. Secret header name is fixed; only the user header name is configurable.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P2"}
+    },
+    {
+      "id": "dashboard-logout", "order": 13, "phase": "2-auth",
+      "name": "Log out of the dashboard",
+      "actor": "operator", "category": "auth",
+      "entry_point": "POST /dashboard/logout",
+      "preconditions": ["any session"],
+      "steps": [{"action": "POST /dashboard/logout", "expected": "session cookie deleted; login card shown"}],
+      "notes": "",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P2"}
+    },
+    {
+      "id": "caller-bearer-contract", "order": 14, "phase": "2-auth",
+      "name": "Consuming service authenticates to /v1 with its bearer token",
+      "actor": "consuming-service", "category": "auth",
+      "entry_point": "Authorization: Bearer <per-service-token> on any /v1 path (point at ingress :8080, not router)",
+      "preconditions": ["stack running; valid active consumer token"],
+      "steps": [
+        {"action": "client sets LLM_BASE_URL=http://127.0.0.1:8080/v1, LLM_API_KEY=<token>", "expected": "standard OpenAI SDK usage"},
+        {"action": "ingress validates token (_caller_auth)", "expected": "unknown/missing -> 401; inactive/revoked/expired -> 403"},
+        {"action": "ingress strips Authorization, adds x-llm-router-caller, proxies to router", "expected": "router runs policy over the host's keys; caller never sends provider keys"}
+      ],
+      "notes": "Token maps to a caller name for audit. /x/* paths are 404'd for consumers. PUBLIC_BASE_URL shown in the key-handoff blurb.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P0"}
+    },
+
+    {
+      "id": "openrouter-key-config", "order": 15, "phase": "3-providers",
+      "name": "Configure the OpenRouter API key",
+      "actor": "operator", "category": "providers",
+      "entry_point": "env OPENROUTER_API_KEY in .env.secrets (provider openrouter/openrouter_market, auth_env=OPENROUTER_API_KEY); or dashboard provider-keys/update",
+      "preconditions": ["an OpenRouter account+key; .env.secrets mounted into router+ingress"],
+      "steps": [
+        {"action": "set OPENROUTER_API_KEY in .env.secrets, restart router", "expected": "host resolves Authorization: Bearer $OPENROUTER_API_KEY for openrouter calls"},
+        {"action": "optional: read /credits", "expected": "credits_usd balance + runway shown in dashboard"}
+      ],
+      "notes": "/models pricing discovery is keyless; the key is only needed to call and to read /credits. Tier=fallback.",
+      "test_flags": {"real_money": false, "real_credentials": true, "tos_risk": false, "mockable": true, "priority": "P0"}
+    },
+    {
+      "id": "openrouter-catalog-discovery", "order": 16, "phase": "3-providers",
+      "name": "OpenRouter live whole-catalog discovery",
+      "actor": "operator", "category": "providers",
+      "entry_point": "provider openrouter_market (discovery=marketplace); sources/openrouter.py",
+      "preconditions": ["openrouter_market enabled (default)"],
+      "steps": [
+        {"action": "source polls GET /models every 3600s", "expected": "long-tail exposed as marketplace offers; curated families skipped"},
+        {"action": "each model gets traits inline (benchmarks, modalities, cap_*, ranks)", "expected": "discovered families rank on real benchmark not just price"}
+      ],
+      "notes": "Negative per-token prices (OpenRouter -1 sentinel) dropped so they can't win cost-led policies; $0 free models stay routable.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"}
+    },
+    {
+      "id": "refresh-model-meta", "order": 17, "phase": "3-providers",
+      "name": "Regenerate model_meta.lua (registered benchmark/modality/cap traits)",
+      "actor": "operator", "category": "providers",
+      "entry_point": "python scripts/refresh_model_meta.py [--config config.live.lua] [--out model_meta.lua]",
+      "preconditions": ["repo checkout; network to OpenRouter /models (keyless)"],
+      "steps": [
+        {"action": "run script (startup job + periodically)", "expected": "fetches /models; writes per-curated-family traits + ranks to model_meta.lua (GENERATED, do not hand-edit)"}
+      ],
+      "notes": "Only curated families land here (on-chain/deterministic path); discovered families carry traits inline. No money/credentials.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P2"}
+    },
+    {
+      "id": "provider-add", "order": 18, "phase": "3-providers",
+      "name": "Dashboard 'Add provider' (BYO OpenAI-compatible gateway)",
+      "actor": "operator", "category": "providers",
+      "entry_point": "POST /dashboard/api/provider-keys/add -> router /x/providers",
+      "preconditions": ["admin session; secrets/ mounted RW for ingress; .env.secrets writable"],
+      "steps": [
+        {"action": "POST {id, base_url, auth_env, tier, served_models[], key}", "expected": "validate_entry (id regex, http(s) base_url, api_kind=openai_compatible only, UPPER_SNAKE auth_env, known families)"},
+        {"action": "persist + hot-apply", "expected": "key -> .env.secrets under auth_env; provider def -> secrets/providers.local.json (key never stored there); /x/providers re-inits core preserving breaker/EMA; on failure loads next restart"}
+      ],
+      "notes": "Only openai_compatible providers can be added at runtime. Overlay never overwrites a config.live.lua provider. No per-tenant provider keys -- provider keys are global to the router.",
+      "test_flags": {"real_money": false, "real_credentials": true, "tos_risk": false, "mockable": true, "priority": "P1"}
+    },
+    {
+      "id": "provider-key-update-reveal", "order": 19, "phase": "3-providers",
+      "name": "Update / reveal an existing provider credential",
+      "actor": "operator", "category": "providers",
+      "entry_point": "POST /dashboard/api/provider-keys/update ; GET /dashboard/api/provider-keys/reveal?provider=<id>",
+      "preconditions": ["admin session; provider has an auth_env (NOT the oauth/codex provider)"],
+      "steps": [
+        {"action": "update POST {provider,key}", "expected": "writes .env.secrets, hot-applies via /x/provider-key"},
+        {"action": "reveal GET", "expected": "raw env-key value + 12-char fingerprint; logged dashboard_provider_key_revealed"}
+      ],
+      "notes": "Update rejects providers with no auth_env (codex -> use the Codex flow). Reveal exposes the real key in plaintext to an admin session.",
+      "test_flags": {"real_money": false, "real_credentials": true, "tos_risk": false, "mockable": true, "priority": "P1"}
+    },
+    {
+      "id": "codex-login-setup", "order": 20, "phase": "3-providers",
+      "name": "Codex (ChatGPT-subscription) provider setup (codex login + mount auth.json)",
+      "actor": "operator", "category": "providers",
+      "entry_point": "codex login; env CODEX_AUTH_PATH (compose mounts host file -> /codex/auth.json); router --codex-auth; provider openai (api_kind=openai_codex, oauth)",
+      "preconditions": ["a paid ChatGPT subscription; codex CLI installed"],
+      "steps": [
+        {"action": "codex login (browser sign-in)", "expected": "writes ~/.codex/auth.json with access/refresh/id tokens + account_id"},
+        {"action": "set CODEX_AUTH_PATH; compose bind-mounts to /codex/auth.json", "expected": "router picks it up lazily on first codex call"},
+        {"action": "a codex-routed request", "expected": "codex_backend POSTs chatgpt.com/backend-api/codex/responses (SSE) with Bearer token + chatgpt-account-id"}
+      ],
+      "notes": "UNOFFICIAL / ToS-risky. access_token JWT expires in hours -> see codex-token-refresh; an expired token + dead refresh_token -> auth_error(401) -> provider disabled. Mount must be writable (token is rewritten on refresh). temperature/max_tokens NOT forwarded.",
+      "test_flags": {"real_money": false, "real_credentials": true, "tos_risk": true, "mockable": true, "priority": "P1"}
+    },
+    {
+      "id": "codex-token-refresh", "order": 21, "phase": "3-providers",
+      "name": "Codex token auto-refresh (refresh_token -> access_token, written back)",
+      "actor": "operator", "category": "providers",
+      "entry_point": "codex_auth.CodexAuth.access_token() (per codex call)",
+      "preconditions": ["a valid refresh_token in auth.json"],
+      "steps": [
+        {"action": "read JWT exp; if within 300s margin -> refresh", "expected": "_needs_refresh() true"},
+        {"action": "POST auth.openai.com/oauth/token grant_type=refresh_token (public client app_EMoa...)", "expected": "new tokens; _write_back to auth.json preserving layout"}
+      ],
+      "notes": "Refresh failures are swallowed (keep old token; 401 surfaces later). Thread-safe. If refresh_token is dead -> 401 -> provider disabled (the failure we hit). The mounted host file gets rewritten -> bind mount must be writable.",
+      "test_flags": {"real_money": false, "real_credentials": true, "tos_risk": true, "mockable": true, "priority": "P2"}
+    },
+    {
+      "id": "codex-multi-account", "order": 22, "phase": "3-providers",
+      "name": "Manage multiple Codex accounts from the dashboard",
+      "actor": "operator", "category": "providers",
+      "entry_point": "GET/POST /dashboard/api/codex/accounts ; DELETE /dashboard/api/codex/accounts/{name}; CodexAuthStore over CODEX_ACCOUNTS_DIR (default /codex/accounts on the PVC)",
+      "preconditions": ["admin session; auth.json blob(s) to paste"],
+      "steps": [
+        {"action": "POST {name, auth_json}", "expected": "validated (must contain access_token), saved <slug>.json chmod 600, store reloaded, router /x/codex/reload"},
+        {"action": "GET list", "expected": "per-account {name,account_id,fingerprint} (never raw token) + active=sorted(names)[0] + activity (used_percent, recent_429, scarcity_price_in)"},
+        {"action": "DELETE {name}", "expected": "file unlinked, reloaded; 404 if missing"}
+      ],
+      "notes": "GOTCHA: active account = first alphabetically by name (per-call selection is a follow-up). Legacy single /codex/auth.json auto-adopted as account 'default' (which sorts before most names). In compose, /codex/accounts is NOT shared with the router by default -- the router reads only the mounted /codex/auth.json (a real wiring gap).",
+      "test_flags": {"real_money": false, "real_credentials": true, "tos_risk": true, "mockable": true, "priority": "P2"}
+    },
+    {
+      "id": "codex-scarcity-ramp", "order": 23, "phase": "3-providers",
+      "name": "Codex scarcity price ramp (ranking-only; bills $0)",
+      "actor": "operator", "category": "providers",
+      "entry_point": "sources/codex.py ingest()/poll; Config tab knobs (codex.quota_demote_start, imputed_price_in/out, quota_429_window_s, quota_429_shed)",
+      "preconditions": ["codex live and receiving traffic"],
+      "steps": [
+        {"action": "every codex call pushes an observation (status + quota headers) -- never probes", "expected": "_demote_frac() = max(quota used fraction, recent-429 fraction)"},
+        {"action": "imputed ranking price = imputed_price_* x frac via update_metrics", "expected": "paid routes take over before the 429 wall; price decays back as pressure ages out"}
+      ],
+      "notes": "Codex seeded at price ~0 in metrics.live.lua so cost-led policies rank it first; executed/billing cost stays $0. Ranking-only.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": true, "mockable": true, "priority": "P2"}
+    },
+
+    {
+      "id": "register-consumer-key-cli", "order": 24, "phase": "4-consumer-keys",
+      "name": "Register a per-consumer ingress token via CLI (sha256 storage)",
+      "actor": "operator", "category": "consumer-keys",
+      "entry_point": "python scripts/register_consumer_key.py <consumer> [--env .env.secrets] [--token ...] [--plaintext]",
+      "preconditions": ["writable .env.secrets"],
+      "steps": [
+        {"action": "run with a consumer name", "expected": "generates llmr_<token> (rejects <16 chars)"},
+        {"action": "default mode", "expected": "stores sha256(token)->consumer in CALLER_KEYS_SHA256_JSON; raw token printed once, never stored"},
+        {"action": "--plaintext", "expected": "legacy CALLER_KEYS_JSON (raw token, discouraged)"}
+      ],
+      "notes": ".env.secrets rewritten chmod 600. Restart/reload for keys to take effect (caller maps read at module import).",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"}
+    },
+    {
+      "id": "issue-consumer-key-dashboard", "order": 25, "phase": "4-consumer-keys",
+      "name": "Issue a consumer key from the dashboard (+ setup blurb)",
+      "actor": "operator", "category": "consumer-keys",
+      "entry_point": "POST /dashboard/api/keys {consumer}",
+      "preconditions": ["admin session; DASHBOARD_ISSUED_KEYS_PATH + .env.secrets writable"],
+      "steps": [
+        {"action": "POST {consumer}", "expected": "mints <prefix>_<token>; sha256->consumer in CALLER_KEYS_SHA256_JSON; appends {sha256_prefix,status:active,created_at} record"},
+        {"action": "response + copy blurb (buildKeyHandoff)", "expected": "raw api_key shown once; blurb has PUBLIC_BASE_URL, profile:default, curl + Python OpenAI-SDK examples, /usage, 403 route hint"}
+      ],
+      "notes": "Only hashed metadata persisted. The new key works immediately on the ingress (in-memory map updated). This is the path we used to mint the local test key.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P0"}
+    },
+    {
+      "id": "rotate-consumer-key", "order": 26, "phase": "4-consumer-keys",
+      "name": "Rotate a consumer key with a grace period",
+      "actor": "operator", "category": "consumer-keys",
+      "entry_point": "POST /dashboard/api/keys {consumer, rotate:true, grace_period_s?}",
+      "preconditions": ["admin session; existing active key(s)"],
+      "steps": [
+        {"action": "POST rotate:true", "expected": "new key minted"},
+        {"action": "existing active non-expiring keys get expires_at=now+grace (clamped 0..90d) + replaced_at", "expected": "old keys work until expiry, then rejected caller_key_expired"}
+      ],
+      "notes": "Lets clients swap keys without downtime; expiry enforced at request time.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"}
+    },
+    {
+      "id": "revoke-consumer-key", "order": 27, "phase": "4-consumer-keys",
+      "name": "Revoke a consumer key immediately",
+      "actor": "operator", "category": "consumer-keys",
+      "entry_point": "POST /dashboard/api/keys/revoke {consumer, sha256_prefix}",
+      "preconditions": ["admin session; prefix 8-64 hex"],
+      "steps": [
+        {"action": "POST", "expected": "record status:revoked + revoked_at; hash dropped from CALLER_KEYS_SHA256_JSON (and plaintext); persisted to .env.secrets"},
+        {"action": "subsequent calls", "expected": "404 if prefix not found; revoked key -> caller_key_revoked; takes effect immediately"}
+      ],
+      "notes": "",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"}
+    },
+    {
+      "id": "configure-consumer-limits", "order": 28, "phase": "4-consumer-keys",
+      "name": "Configure consumer policy (status / allowed_routes / rate / burst)",
+      "actor": "operator", "category": "consumer-keys",
+      "entry_point": "POST /dashboard/api/consumers/{consumer} {status?, allowed_routes?, rate_per_min?, burst?}",
+      "preconditions": ["admin session"],
+      "steps": [
+        {"action": "set status active/inactive", "expected": "inactive -> all that consumer's keys rejected (caller_inactive)"},
+        {"action": "set allowed_routes (exact, * / all, or prefix*)", "expected": "_route_allowed gates each request by model / profile:<name>; empty=all"},
+        {"action": "set rate_per_min / burst", "expected": "per-caller 60s window; effective = max(rate_per_min, burst), default to global RATE_PER_MIN/BURST"}
+      ],
+      "notes": "Dashboard UI = Consumers tab -> Settings drawer (saveConsumerSettings).",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"}
+    },
+    {
+      "id": "reveal-consumer-key", "order": 29, "phase": "4-consumer-keys",
+      "name": "Reveal a recoverable consumer key (legacy plaintext only)",
+      "actor": "operator", "category": "consumer-keys",
+      "entry_point": "GET /dashboard/api/keys/reveal?consumer=<name>",
+      "preconditions": ["admin session"],
+      "steps": [
+        {"action": "GET", "expected": "raw rows only for legacy CALLER_KEYS_JSON entries + hash_only_count"},
+        {"action": "hash-only consumer", "expected": "message: cannot be revealed; generate a replacement"}
+      ],
+      "notes": "Hash-only (default) keys are unrecoverable by design.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P2"}
+    },
+
+    {
+      "id": "chat-default-policy", "order": 30, "phase": "5-consumer-api",
+      "name": "Chat completion with the default policy (empty model)",
+      "actor": "consuming-service", "category": "api",
+      "entry_point": "POST /v1/chat/completions",
+      "preconditions": ["Bearer caller token (active, route allowed, under rate limit)"],
+      "steps": [
+        {"action": "POST {model:'', messages:[...]}", "expected": "contract.profile=default_profile; the single declarative default Sigma_pol policy runs; max_tokens defaulted to 4096 if omitted"},
+        {"action": "router filters->scores->picks over the host keys", "expected": "200 chat.completion + x_router{provider, model_family, served_model_id, price_in/out, cost_usd, policy_fingerprint, decision_trace (ranked trimmed to top 10)}"}
+      ],
+      "notes": "Primary 'let the router decide' path. cost_usd = tokens x chosen price (0 for codex subs, clamped >=0).",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P0"}
+    },
+    {
+      "id": "chat-family-pin", "order": 31, "phase": "5-consumer-api",
+      "name": "Chat completion pinned to a model family (model=family:NAME)",
+      "actor": "consuming-service", "category": "api",
+      "entry_point": "POST /v1/chat/completions",
+      "preconditions": ["Bearer caller token"],
+      "steps": [
+        {"action": "POST model:'family:deepseek-v3'", "expected": "default policy + requirements.model_family; only that family qualifies"},
+        {"action": "router ranks the family's sellers", "expected": "200; x_router.model_family = the pin; route key for allow-lists = 'family:deepseek-v3'"}
+      ],
+      "notes": "Sugar over the default policy.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"}
+    },
+    {
+      "id": "chat-provider-pin", "order": 32, "phase": "5-consumer-api",
+      "name": "Chat completion pinned to one (provider, family) (model=pin:PROVIDER/FAMILY)",
+      "actor": "consuming-service", "category": "api",
+      "entry_point": "POST /v1/chat/completions",
+      "preconditions": ["Bearer caller token"],
+      "steps": [
+        {"action": "POST model:'pin:openrouter/deepseek-v3'", "expected": "requirements.pin={provider,model}; only that exact seller qualifies; no cross-provider fallback"},
+        {"action": "router calls the pinned seller", "expected": "200 with x_router.provider==pin; if it errors -> exhausted/no_candidates (nothing to fall back to)"}
+      ],
+      "notes": "Malformed pin (no '/') silently degrades to plain default policy.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"}
+    },
+    {
+      "id": "chat-named-profile", "order": 33, "phase": "5-consumer-api",
+      "name": "Chat completion via a named profile (model=profile:NAME or path /NAME/v1/...)",
+      "actor": "consuming-service", "category": "api",
+      "entry_point": "POST /v1/chat/completions  OR  POST /{profile}/v1/chat/completions",
+      "preconditions": ["Bearer caller token"],
+      "steps": [
+        {"action": "POST model:'profile:edge' (or path-addressed)", "expected": "contract.profile='edge'; for the path form the path wins and the model string is ignored"},
+        {"action": "router runs that profile's term", "expected": "200; x_router.policy_fingerprint identifies the profile"}
+      ],
+      "notes": "Only 'default' ships out of the box. Route 'profile:edge' is derived from the path too, so route restriction works for both forms.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"}
+    },
+    {
+      "id": "chat-per-call-policy-ir", "order": 34, "phase": "5-consumer-api",
+      "name": "Chat completion with a per-call Sigma_pol policy_ir term",
+      "actor": "consuming-service", "category": "api",
+      "entry_point": "POST /v1/chat/completions",
+      "preconditions": ["Bearer caller token"],
+      "steps": [
+        {"action": "POST body with a policy_ir array term", "expected": "shim forwards it verbatim; the CORE is the single admission boundary (check sorts/arity/depth<=64/nodes<=4096) then AND-composes the host policy_envelope (caller can only narrow)"},
+        {"action": "admission + execution", "expected": "200 with x_router.policy_fingerprint = term identity; admission failure -> 400 invalid_request_error code invalid_policy 'policy_ir rejected at admission: <reason>'"}
+      ],
+      "notes": "The PRIMARY routing path. 5-slot term: filter/score/pick/xform/failplan (core/docs/SIGMA-POL.md).",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P0"}
+    },
+    {
+      "id": "chat-flow-ir", "order": 35, "phase": "5-consumer-api",
+      "name": "Chat completion driving a Sigma_flow DAG (flow_ir term)",
+      "actor": "consuming-service", "category": "api",
+      "entry_point": "POST /v1/chat/completions",
+      "preconditions": ["Bearer caller token"],
+      "steps": [
+        {"action": "POST body with flow_ir = ['flow', {input, llm nodes (each with own policy+system+inputs), output}]", "expected": "flow_ir takes precedence over policy_ir/model; core admits the whole DAG (acyclic, one source/one sink, each node's policy admitted)"},
+        {"action": "execute_flow_async schedules the DAG; each llm node is a normal routed call", "expected": "answer = sink output; x_router.provider='flow', model_family='flow:<fp>', decision_trace.flow_nodes[] per-node trace (provider/model/price/tokens/latency/nested trace)"},
+        {"action": "admission/node failure", "expected": "admission -> 400 invalid_flow; a node failing surfaces the node's REAL error kind (e.g. no_candidates->503), not a blanket 502; failed flow still records provider:'flow' + failed_node"}
+      ],
+      "notes": "Sigma_flow = declarative composition over Sigma_pol (no loops/effects). Tokens ~ Nx input. This is the ensemble/review-MoA path we screenshotted.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P0"}
+    },
+    {
+      "id": "chat-streaming", "order": 36, "phase": "5-consumer-api",
+      "name": "Streaming chat completion (stream:true, SSE)",
+      "actor": "consuming-service", "category": "api",
+      "entry_point": "POST /v1/chat/completions {stream:true}",
+      "preconditions": ["Bearer caller token"],
+      "steps": [
+        {"action": "POST any routing form with stream:true", "expected": "Content-Type text/event-stream; fallback BEFORE the first content delta is still a clean JSON error (not SSE)"},
+        {"action": "receive SSE", "expected": "role chunk, text deltas, final chunk with usage+x_router (cost+trimmed trace), then [DONE]; heartbeat comments keep the line warm under the 60s ALB idle timeout"},
+        {"action": "proxy records AFTER stream ends", "expected": "ingress tees the SSE tail to extract final usage/cost -> real tokens in the usage row"}
+      ],
+      "notes": "A streaming Sigma_flow has no token stream -> first byte + heartbeats while it runs, then the assembled result (or trace+error on failure).",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"}
+    },
+    {
+      "id": "chat-tool-calls", "order": 37, "phase": "5-consumer-api",
+      "name": "Chat completion with tool calls",
+      "actor": "consuming-service", "category": "api",
+      "entry_point": "POST /v1/chat/completions {tools:[...]}",
+      "preconditions": ["Bearer caller token"],
+      "steps": [
+        {"action": "POST with tools[]/tool_choice (+ any routing form)", "expected": "shim forwards tools/tool_choice/response_format/temperature/seed; a policy can gate on cap_tools"},
+        {"action": "model returns tool calls", "expected": "200 with choices[0].message.tool_calls, finish_reason 'tool_calls'; in a flow the sink node's tool_calls are emitted"}
+      ],
+      "notes": "flow+tools with no tool-capable candidate fails clean with no_candidates (503), not 502.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"}
+    },
+    {
+      "id": "list-models", "order": 38, "phase": "5-consumer-api",
+      "name": "List the routable model catalog",
+      "actor": "consuming-service", "category": "api",
+      "entry_point": "GET /v1/models",
+      "preconditions": ["Bearer caller token"],
+      "steps": [
+        {"action": "GET /v1/models", "expected": "{object:list, data:[{id:'profile:<name>'},{id:'family:<fam>'}...]} = profile names + curated + discovered families; exactly the model-field values a caller can send"}
+      ],
+      "notes": "Auth required (goes through the proxy).",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P0"}
+    },
+    {
+      "id": "consumer-key-usage", "order": 39, "phase": "5-consumer-api",
+      "name": "Per-key usage self-service (consumer)",
+      "actor": "consuming-service", "category": "api",
+      "entry_point": "GET /v1/usage (alias GET /api/usage)",
+      "preconditions": ["Bearer caller token (the exact key whose usage is reported)"],
+      "steps": [
+        {"action": "GET /v1/usage?since&until&limit&offset OR /api/usage?window=24h", "expected": "usage for ONLY that key; window 15m/24h/7d/4w overrides since; limit clamped 1-500"},
+        {"action": "receive snapshot", "expected": "schema_version 3, key_sha256_prefix (first 12 hex only), consumer_settings, totals, cost_estimate, daily/monthly, by_provider/model_family/route/served_model/status, route_health, recent[]"}
+      ],
+      "notes": "Bad window -> 400 invalid_usage_window. Operator equivalent: POST /dashboard/api/key-usage (admin session).",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"}
+    },
+    {
+      "id": "route-and-rate-rejections", "order": 40, "phase": "5-consumer-api",
+      "name": "Auth / route-restriction / rate-limit rejections (ingress)",
+      "actor": "consuming-service", "category": "api",
+      "entry_point": "any /v1 or /api path",
+      "preconditions": ["Bearer caller token"],
+      "steps": [
+        {"action": "no/unknown token", "expected": "401 caller_auth"},
+        {"action": "inactive/revoked/expired key", "expected": "403 caller_inactive | caller_key_revoked | caller_key_expired"},
+        {"action": "route not in allowed_routes (route = body.model or profile:<seg> from path)", "expected": "403 caller_route_not_allowed; empty/['all']/['*'] = all; 'prefix*' wildcard"},
+        {"action": "exceed rate_per_min/burst", "expected": "429 caller_rate_limit"}
+      ],
+      "notes": "All enforced in the ingress BEFORE forwarding; rejects recorded (_record_reject).",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P0"}
+    },
+    {
+      "id": "router-cascade-errors", "order": 41, "phase": "5-consumer-api",
+      "name": "Fallback/cascade behavior + router error kinds (in the trace)",
+      "actor": "consuming-service", "category": "api",
+      "entry_point": "POST /v1/chat/completions",
+      "preconditions": ["Bearer caller token"],
+      "steps": [
+        {"action": "a chosen provider errors mid-cascade", "expected": "router tries ranked candidates in order; decision_path records each attempt (provider/family, error_kind, http_status, message). 402->payment_required, 401/403->auth_error, 429->rate_limit, 408/504->timeout, 404->model_unavailable, 400->bad_request/context_overflow, 5xx->server_error, empty content->bad_response"},
+        {"action": "all candidates exhausted / none qualify", "expected": "error body {error{message,type:router_error,code}, x_router{decision_trace}}; mapped: no_candidates->503, auth_error->401, rate_limit->429, bad_request/context_overflow->400, timeout->504, else->502; message appends per-attempt summary"}
+      ],
+      "notes": "This is exactly what we debugged (antseed payment_required/network_error, z-ai/glm-5.2 bad_response, opus bad_request). Same translation on the streaming path.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P0"}
+    },
+
+    {
+      "id": "analytics-view", "order": 42, "phase": "6-dashboard-ops",
+      "name": "View Analytics (spend / traffic / errors with filters)",
+      "actor": "operator", "category": "dashboard",
+      "entry_point": "Analytics tab -> GET /dashboard/api/stats?timeframe&consumer&provider&model",
+      "preconditions": ["session"],
+      "steps": [
+        {"action": "open Analytics", "expected": "metric cards Requests/Spend/Tokens/Success-rate; over-time bars; By provider/model/consumer/status tables; live chat-health banner"},
+        {"action": "change timeframe (all/runtime/1h/24h/7d/30d), consumer, provider, model filters; Refresh", "expected": "snapshot rescoped/refetched; 15s auto-poll"}
+      ],
+      "notes": "",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"}
+    },
+    {
+      "id": "activity-trace", "order": 43, "phase": "6-dashboard-ops",
+      "name": "Activity -- per-request trace (policy term, fallback chain, cost, Sigma_flow DAG)",
+      "actor": "operator", "category": "dashboard",
+      "entry_point": "Activity tab -> rows from /dashboard/api/stats recent[]; filter All/Requests/Rejects/Probes",
+      "preconditions": ["session"],
+      "steps": [
+        {"action": "open Activity, filter", "expected": "table Time/Event/Caller/Status/Route/Provider/Cost/Error"},
+        {"action": "expand a row", "expected": "meta pills (policy/flow fingerprint, cost, tokens, latency, served model); Attempts fallback order with per-provider ok/error/skip; copyable Sigma_pol term"},
+        {"action": "expand a Sigma_flow request", "expected": "node DAG view (level-laid nodes, per-node provider/model/latency/tokens + inner attempts)"}
+      ],
+      "notes": "This is the view we used for the flow screenshots.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P0"}
+    },
+    {
+      "id": "builder-policy", "order": 44, "phase": "6-dashboard-ops",
+      "name": "Builder -- compose / review / download / test a Sigma_pol policy",
+      "actor": "operator", "category": "dashboard",
+      "entry_point": "Builder tab (Policy mode); /dashboard/api/policy/{build,preview,normalize,test}, /dashboard/api/fields, /dashboard/api/policies",
+      "preconditions": ["admin session"],
+      "steps": [
+        {"action": "compose filter rows / score terms / selector (rows<->raw term)", "expected": "bBuildTerm assembles ['policy',filter,score,selector,...]; fields data-driven from /x/fields"},
+        {"action": "Review ranking", "expected": "normalize -> fingerprint; preview (/x/rank) -> ranked Provider/Model/Tier/$in/$out/Score; empty survivors -> 'No survivors'"},
+        {"action": "Download policy", "expected": "sigma-pol-<fp>.json {version,fingerprint,policy_ir}"},
+        {"action": "Test call (prompt)", "expected": "runs /v1/chat/completions with policy_ir; result chips + output; recorded in Activity (caller=dashboard-test, not billed)"}
+      ],
+      "notes": "400s render verbatim in the builder error box.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"}
+    },
+    {
+      "id": "builder-flow", "order": 45, "phase": "6-dashboard-ops",
+      "name": "Flow Builder -- compose / review / download / test a Sigma_flow DAG",
+      "actor": "operator", "category": "dashboard",
+      "entry_point": "Builder tab (Flow mode); /dashboard/api/flow/{normalize,test}",
+      "preconditions": ["admin session"],
+      "steps": [
+        {"action": "add nodes, per-node system prompt, choose inputs (multi-input=fusion), pick answer node; '↧ use Policy-builder term' or edit raw", "expected": "fBuildIR assembles ['flow',{u:input,n*:llm,out:output}]"},
+        {"action": "Review flow", "expected": "/dashboard/api/flow/normalize -> fingerprint, 'Flow admitted'"},
+        {"action": "Download flow", "expected": "sigma-flow-<fp>.json"},
+        {"action": "Test call", "expected": "/dashboard/api/flow/test runs live; per-node trace chips + output; recorded in Activity"}
+      ],
+      "notes": "Default example = Mixture-of-agents (2 drafts -> synthesize), same shape as our ensemble.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"}
+    },
+    {
+      "id": "catalog-view", "order": 46, "phase": "6-dashboard-ops",
+      "name": "Catalog (Market) -- price book + per-seller perf, + SKILL.md download",
+      "actor": "operator", "category": "dashboard",
+      "entry_point": "Catalog tab -> GET /dashboard/api/market (-> /x/market); /dashboard/api/skill",
+      "preconditions": ["admin session"],
+      "steps": [
+        {"action": "open Catalog", "expected": "per-family rows: quality, benchmark badges, modalities, cheapest $in/$out, seller count"},
+        {"action": "expand a family", "expected": "seller table: wire model, $in/$out, status (live/disabled/pinned/over-cap), perf (success%/ms/calls), refreshed time"},
+        {"action": "filter / Tradable-only / Copy / download SKILL.md", "expected": "filtered list; catalog JSON to clipboard; SKILL.md with live catalog + field vocab baked in"}
+      ],
+      "notes": "Wallet cell appears on the AntSeed provider. 502 if router /x/market unavailable.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"}
+    },
+    {
+      "id": "consumers-manage", "order": 47, "phase": "6-dashboard-ops",
+      "name": "Consumers tab -- browse / scope / settings / create-rotate-revoke-reveal keys",
+      "actor": "operator", "category": "dashboard",
+      "entry_point": "Consumers tab + Settings group; /dashboard/api/{stats,keys,keys/revoke,keys/reveal,consumers/{c}}",
+      "preconditions": ["admin session"],
+      "steps": [
+        {"action": "browse/search/filter; click a row", "expected": "rows (status, requests/errors/tokens/spend/last-seen/key-status); scope to one consumer -> detail + routes/providers breakdown + recent activity"},
+        {"action": "drawer actions", "expected": "Save settings (configure-consumer-limits); Generate/rotate key (+ handoff blurb); Revoke key; Reveal keys -- see flows 25-29"}
+      ],
+      "notes": "Consumer-role sessions get admin actions hidden.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"}
+    },
+    {
+      "id": "key-usage-lookup", "order": 48, "phase": "6-dashboard-ops",
+      "name": "Key usage -- operator per-key lookup",
+      "actor": "operator", "category": "dashboard",
+      "entry_point": "Key usage tab -> POST /dashboard/api/key-usage {api_key, window?, limit?, offset?}",
+      "preconditions": ["admin session"],
+      "steps": [
+        {"action": "paste api_key + window/limit/offset", "expected": "same sanitized router_key_usage snapshot as /v1/usage for that key; 404 key_usage_not_found if unseen; 401 if not admin"}
+      ],
+      "notes": "Consumer bearer does NOT satisfy this -- admin session only.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P2"}
+    },
+    {
+      "id": "provider-keys-tab", "order": 49, "phase": "6-dashboard-ops",
+      "name": "Provider keys tab -- view / reveal / edit / add / manage Codex accounts",
+      "actor": "operator", "category": "dashboard",
+      "entry_point": "Provider keys tab; /dashboard/api/provider-keys[/add|/update|/reveal], /dashboard/api/codex/accounts",
+      "preconditions": ["admin session"],
+      "steps": [
+        {"action": "view + reveal/copy", "expected": "rows: provider, credential (env name+fingerprint) or AntSeed wallet cell, usage stats; reveal -> raw key inline"},
+        {"action": "edit / add provider / manage codex accounts", "expected": "see flows provider-add, provider-key-update-reveal, codex-multi-account"}
+      ],
+      "notes": "Privatized by default (env names + fingerprints).",
+      "test_flags": {"real_money": false, "real_credentials": true, "tos_risk": false, "mockable": true, "priority": "P1"}
+    },
+    {
+      "id": "config-knobs", "order": 50, "phase": "6-dashboard-ops",
+      "name": "Config tab -- per-provider runtime knobs (PVC-backed, hot-apply)",
+      "actor": "operator", "category": "dashboard",
+      "entry_point": "Config tab -> GET /dashboard/api/config ; POST /dashboard/api/config (-> /x/config/reload)",
+      "preconditions": ["admin session"],
+      "steps": [
+        {"action": "open Config", "expected": "knobs grouped by provider (antseed/codex/openrouter): value, default, [min,max], override pill"},
+        {"action": "edit + Save", "expected": "validated vs schema, persisted to PVC, hot-applied; 'Saved · live'; 400 on schema failure"},
+        {"action": "Reset an overridden knob", "expected": "override cleared to default"}
+      ],
+      "notes": "Knobs: antseed top-N/runway, codex scarcity ramp/runway, openrouter credit runway.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P2"}
+    },
+
+    {
+      "id": "antseed-identity-and-fund", "order": 51, "phase": "7-money-antseed",
+      "name": "AntSeed buyer identity + fund the hot-wallet (USDC + ETH gas on Base mainnet)",
+      "actor": "operator", "category": "billing",
+      "entry_point": "env ANTSEED_IDENTITY_HEX (private key) ; on-chain transfer to the wallet address",
+      "preconditions": ["a secp256k1 private key controlling a Base wallet; real USDC + ETH"],
+      "steps": [
+        {"action": "set ANTSEED_IDENTITY_HEX to the funded wallet's private key", "expected": "durable identity across volume loss (unset -> ephemeral key in the volume, lost with it)"},
+        {"action": "send USDC + ETH(gas) to the wallet address", "expected": "raw wallet USDC appears (but is NOT yet spendable -- see escrow)"}
+      ],
+      "notes": "REAL MONEY. Treat ANTSEED_IDENTITY_HEX as a private key -- never commit it. wallet USDC alone does nothing; the buyer spends from escrow.",
+      "test_flags": {"real_money": true, "real_credentials": true, "tos_risk": false, "mockable": false, "priority": "P2"}
+    },
+    {
+      "id": "antseed-deposit-withdraw", "order": 52, "phase": "7-money-antseed",
+      "name": "AntSeed deposit / withdraw / refresh (wallet <-> escrow)",
+      "actor": "operator", "category": "billing",
+      "entry_point": "dashboard Deposit/Withdraw/Refresh -> POST /dashboard/api/wallet/{deposit,withdraw,refresh} -> /x/wallet/* -> sidecar control :8379 -> antseed buyer <cmd>",
+      "preconditions": ["funded wallet; ANTSEED_CONTROL_TOKEN set on router+sidecar (else 503); in k8s ANTSEED_CONTROL_URL=http://127.0.0.1:8379"],
+      "steps": [
+        {"action": "Deposit {amount}", "expected": "amount validated ^\\d+(\\.\\d{1,6})?$ >0; antseed buyer deposit <amt> on-chain (120s tx timeout); mutations serialized"},
+        {"action": "control runs buyer status --json", "expected": "status-antseed.json rewritten; router reads new depositsAvailable on next poll"},
+        {"action": "dashboard refresh", "expected": "new escrow + runway (off available+reserved total; reserved returns as channels settle)"}
+      ],
+      "notes": "REAL on-chain spend. wallet-vs-escrow gotcha: dashboard shows depositsAvailable (escrow), what the buyer spends -- NOT raw wallet USDC. Prod runbook = the antseed-prod-deposit skill (VPN+SSO+kubectl). Propagation lag: sidecar 60s, router poll 300s.",
+      "test_flags": {"real_money": true, "real_credentials": false, "tos_risk": false, "mockable": false, "priority": "P2"}
+    },
+    {
+      "id": "antseed-market-dump", "order": 53, "phase": "7-money-antseed",
+      "name": "AntSeed browse market / regenerate price seeds (no spend)",
+      "actor": "operator", "category": "billing",
+      "entry_point": "antseed network browse --services --json --top 50 ; auto /market/market.json ; dashboard Market via /x/market",
+      "preconditions": ["AntSeed node running"],
+      "steps": [
+        {"action": "browse the network", "expected": "per-peer servicePricing[<model>] input/output USD-per-million, queryable WITHOUT spending"},
+        {"action": "seed metrics.live.lua with the cheapest peer per model", "expected": "a seed only; live EMA becomes source of truth once proxies run"}
+      ],
+      "notes": "Stale dump (>900s) -> degraded (no antseed candidates). Writer validates JSON before writing. Caps enforced by the buyer proxy so a stale seed can't overpay.",
+      "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P2"}
+    }
+  ]
+}