diff --git a/.env.example b/.env.example index 89978fc..1aa5cc9 100644 --- a/.env.example +++ b/.env.example @@ -32,3 +32,29 @@ PUBLIC_BASE_URL=http://127.0.0.1:8080/v1 RATE_PER_MIN=600 BURST=200 LOG_LEVEL=INFO + +# ============================================================================ +# AntSeed marketplace (OPTIONAL — only with `docker compose --profile antseed`) +# ============================================================================ +# AntSeed lets the router buy inference from a decentralized marketplace, paid in +# REAL USDC on Base mainnet from a hot wallet you control. Off by default. +# +# ⚠️ ALWAYS use a DEDICATED DEV WALLET here with a tiny balance — NEVER your +# production wallet key. This var IS a private key: treat it like a password, +# never commit it (.env / .env.secrets are gitignored). +# +# Setup (3 steps): +# 1. Generate a dev wallet: ./scripts/gen-dev-wallet.sh (prints the two +# lines below; paste them into .env) +# 2. Bring it up: docker compose --profile antseed up -d --build +# 3. Get the address to fund: docker compose exec antseed antseed buyer balance --json +# then send a little USDC + ETH (gas) on **Base mainnet** to that address, +# and `deposit` it into escrow from the dashboard Catalog (wallet cell). +ANTSEED_IDENTITY_HEX= +# Shared secret enabling the dashboard's wallet self-service (deposit/withdraw). +# Same value on the router and the antseed sidecar. Unset => those endpoints 503. +ANTSEED_CONTROL_TOKEN= +# Wide outer spend ceilings (USD per million tokens). The real per-call price gate +# is the caller's Σ_pol policy; these are just rails. +ANTSEED_MAX_INPUT=1000 +ANTSEED_MAX_OUTPUT=1000 diff --git a/behave.ini b/behave.ini new file mode 100644 index 0000000..8c2595d --- /dev/null +++ b/behave.ini @@ -0,0 +1,4 @@ +[behave] +paths = features +tags = -manual +show_timings = true diff --git a/docs/PROVIDERS.md b/docs/PROVIDERS.md index 2087f76..cb8c962 100644 --- a/docs/PROVIDERS.md +++ b/docs/PROVIDERS.md @@ -46,6 +46,20 @@ The host pins the policy-selected peer per request via `x-antseed-pin-peer` (the browse-mode buyer disables auto-selection), keeping peer choice inside Σ_pol rather than an opaque buyer-side router. +### Local dev wallet (testing) + +For local testing use a **dedicated dev wallet**, never your production key. +`./scripts/gen-dev-wallet.sh` prints a fresh `ANTSEED_IDENTITY_HEX` + +`ANTSEED_CONTROL_TOKEN` to paste into `.env`; bring the sidecar up +(`docker compose --profile antseed up -d`), read the derived address with +`docker compose exec antseed antseed buyer balance --json`, fund it with a little +USDC + ETH (gas) on Base, then **Deposit** into escrow from the dashboard Catalog +(wallet cell). Keep dev and prod wallet secrets separate. See `.env.example`. + +> Note: the AntSeed deposits contract **locks** deposited funds — an immediate +> `withdraw` after a `deposit` reverts. Funds are safe in escrow and become +> withdrawable later, or are spent as the buyer routes paid calls. + ### Running the node (vendored sidecar) Built from `Dockerfile.antseed` (pinned `@antseed/cli`, `socat`) and run by diff --git a/features/01_onboarding.feature b/features/01_onboarding.feature new file mode 100644 index 0000000..8fffa94 --- /dev/null +++ b/features/01_onboarding.feature @@ -0,0 +1,34 @@ +Feature: Onboarding & setup — a new user gets a running, healthy stack + The clone/compose steps themselves are environment-level (@manual: cannot be + re-run inside the suite); here we assert their OUTCOME on the running stack. + + @p0 @onboarding + Scenario: The core engine submodule is populated (recursive clone outcome) + Then the file "core/router.lua" exists + And the file "core/llm_policy.lua" exists + + @p0 @onboarding + Scenario: The stack is up and healthy (compose up outcome) + Given the stack is healthy + When I GET "/healthz" as none + Then the status is 200 + And the field "ok" equals "True" + + @p0 @onboarding + Scenario: The router loaded its catalog (engine embedded + config.live.lua) + Given I have a caller token + When I GET "/v1/models" as consumer + Then the status is 200 + And the array "data" has at least 5 items + + @manual @onboarding + Scenario: Recursive clone (manual — run once on a fresh machine) + # git clone --recursive https://github.com/genlayerlabs/unhardcoded.git + # -> core/ submodule populated; covered by the 'submodule populated' outcome above. + Given the stack is healthy + + @manual @onboarding + Scenario: docker compose up --build (manual — environment setup) + # cp .env.example .env.secrets; fill secrets; docker compose up -d --build + # -> router + ingress healthy; covered by the 'stack up and healthy' outcome above. + Given the stack is healthy diff --git a/features/02_auth.feature b/features/02_auth.feature new file mode 100644 index 0000000..c4295d2 --- /dev/null +++ b/features/02_auth.feature @@ -0,0 +1,39 @@ +Feature: Authentication — dashboard sessions and the caller bearer contract + + Background: + Given the stack is healthy + + @p0 @auth + Scenario: DASHBOARD_NO_AUTH grants local admin to the console API + When I GET "/dashboard/api/stats" as admin + Then the status is 200 + And the field "viewer_role" equals "admin" + + @p0 @auth + Scenario: A valid caller bearer token is accepted on /v1 + Given I have a caller token + When I GET "/v1/models" as consumer + Then the status is 200 + + @p0 @auth + Scenario: A missing caller token is rejected on /v1 + When I GET "/v1/models" as none + Then the status is 401 + And the field "error.code" equals "caller_auth" + + @p1 @auth + Scenario: A consumer can log into the dashboard with their API key (scoped session) + Given I have a caller token + When I log into the dashboard with my caller key + Then the status is 200 + And the field "role" equals "consumer" + + @manual @auth + Scenario: Admin password login (manual — needs DASHBOARD_PASSWORD_SHA256 set and NO_AUTH off) + # POST /dashboard/login {password} -> sets an admin session cookie. + # Not auto-tested: the local dev stack runs with DASHBOARD_NO_AUTH=1. + Given the stack is healthy + + @manual @auth + Scenario: Trusted-header SSO admin (manual — needs a reverse proxy injecting the header+secret) + Given the stack is healthy diff --git a/features/03_consumer_api.feature b/features/03_consumer_api.feature new file mode 100644 index 0000000..326ba65 --- /dev/null +++ b/features/03_consumer_api.feature @@ -0,0 +1,94 @@ +Feature: Consumer API flows (/v1) — the calling service's surface + As a consuming service I call /v1 with my bearer token and the router + decides/falls-back over the operator's provider keys. All end-to-end chats + here route to codex ($0) so the suite is free. + + Background: + Given the stack is healthy + And I have a caller token + + @p0 @api + Scenario: List the routable model catalog + When I GET "/v1/models" as consumer + Then the status is 200 + And the field "object" equals "list" + And the array "data" has at least 5 items + And the array "data" includes an item where "id" equals "profile:default" + + @p0 @api + Scenario: Chat completion runs a policy and returns a real answer + trace + When I POST a free chat as consumer + Then the status is 200 + And the field "object" equals "chat.completion" + And the field "choices[0].message.content" is non-empty + And the field "usage.total_tokens" is a number + And the field "x_router.provider" is non-empty + And the field "x_router.served_model_id" is non-empty + And the field "x_router.decision_trace" is present + + @p0 @api + Scenario: Per-call policy_ir is admitted and executed + When I POST "/v1/chat/completions" as consumer with json + """ + {"model":"","max_tokens":16,"messages":[{"role":"user","content":"hi"}], + "policy_ir":["policy", + ["and",["meets_req"],["not",["is","disabled"]],["family_eq","gpt-5.5"]], + ["neg",["normalize",["field","price_in"]]], + ["argmax"],["id"],["always",{"action":"next_candidate"}]]} + """ + Then the status is 200 + And the field "x_router.policy_fingerprint" is present + And the field "choices[0].message.content" is non-empty + + @p1 @api + Scenario: Malformed policy_ir is rejected cleanly at admission (no spend) + When I POST "/v1/chat/completions" as consumer with json + """ + {"model":"","messages":[{"role":"user","content":"hi"}], + "policy_ir":["policy","not-a-valid-term"]} + """ + Then the status is 400 + And the field "error.type" equals "invalid_request_error" + And the field "error.message" contains "policy_ir" + + @p0 @api + Scenario: Sigma_flow DAG runs and returns the sink answer with a per-node trace + When I POST a free flow as consumer + Then the status is 200 + And the field "x_router.provider" equals "flow" + And the field "choices[0].message.content" is non-empty + And the array "x_router.decision_trace.flow_nodes" has at least 2 items + And every item in "x_router.decision_trace.flow_nodes" has a "provider" + And every item in "x_router.decision_trace.flow_nodes" has a "served_model_id" + + @p1 @api + Scenario: Malformed flow_ir is rejected at admission + When I POST "/v1/chat/completions" as consumer with json + """ + {"model":"","messages":[{"role":"user","content":"hi"}], + "flow_ir":["flow",{"out":{"kind":"output","inputs":["missing"]}}]} + """ + Then the status is 400 + And the field "error.message" contains "flow_ir" + + @p1 @api + Scenario: Per-key usage self-service is scoped and sanitized + When I POST a free chat as consumer + And I GET "/v1/usage?window=24h" as consumer + Then the status is 200 + And the field "kind" equals "router_key_usage" + And the field "key_sha256_prefix" is non-empty + And the field "totals.requests" is at least 1 + And the field "consumer_settings.status" is present + + @p0 @api + Scenario: Missing bearer token is rejected + When I GET "/v1/models" as none + Then the status is 401 + And the field "error.code" equals "caller_auth" + + @p0 @api + Scenario: Unknown bearer token is rejected + When I GET "/v1/models" as bad + Then the status is 401 + And the field "error.code" equals "caller_auth" diff --git a/features/04_dashboard.feature b/features/04_dashboard.feature new file mode 100644 index 0000000..78cf094 --- /dev/null +++ b/features/04_dashboard.feature @@ -0,0 +1,109 @@ +Feature: Dashboard data — what the operator console renders MUST be present and correct + The dashboard is a thin renderer of /dashboard/api/*. These scenarios assert the + backing data is complete and correct (so the frontend shows real, correct values + in Analytics, Activity, Catalog, Config, Consumers, Provider keys). Seeded + activity (one chat + one flow) is created in before_all. + + Background: + Given the stack is healthy + + @p0 @dashboard + Scenario: The dashboard HTML page loads with all its tabs and renderers + When I GET "/dashboard" as admin + Then the status is 200 + And the response text contains "Analytics" + And the response text contains "Builder" + And the response text contains "Activity" + And the response text contains "Catalog" + And the response text contains "Config" + And the response text contains "renderActivity" + And the response text contains "renderAnalytics" + + @p0 @dashboard + Scenario: Analytics — totals, breakdowns and health are populated + When I GET "/dashboard/api/stats" as admin + Then the status is 200 + And the field "viewer_role" equals "admin" + And the field "totals.requests" is at least 1 + And the field "totals.tokens_total" is a number + And the field "totals.cost_usd" is a number + And the field "by_provider" is non-empty + And the field "by_status" is non-empty + And the field "health_summary" is present + And the array "daily_totals" has at least 1 items + + @p0 @dashboard + Scenario: Activity — recent requests carry a full, correct per-request trace + When I POST a free chat as consumer + And I POST a free flow as consumer + And I GET "/dashboard/api/stats" as admin + Then the status is 200 + And the array "recent" has at least 2 items + And every item in "recent" has a "status" + And every item in "recent" has a "ts" + And the array "recent" includes an item where "provider" equals "flow" + And the array "recent" includes an item where "provider" equals "openai" + + @p0 @dashboard + Scenario: Catalog (Market) — families list with prices and per-seller perf + When I GET "/dashboard/api/market" as admin + Then the status is 200 + And the array "families" has at least 3 items + And every item in "families" has a "family" + And every item in "families" has a "quality" + And every item in "families" has a "rows" + And the array "families" includes an item where "family" equals "gpt-5.5" + + @p0 @dashboard + Scenario: Policies — the default profile and live providers with health + When I GET "/dashboard/api/policies" as admin + Then the status is 200 + And the array "profiles" includes an item where "name" equals "default" + And the field "providers" is non-empty + And every item in "providers" has a "health" + + @p1 @dashboard + Scenario: Builder field vocabulary is available + When I GET "/dashboard/api/fields" as admin + Then the status is 200 + And the array "fields" includes an item where "name" equals "price_in" + And the array "fields" includes an item where "name" equals "latency_ms" + And the array "fields" includes an item where "name" equals "success_rate" + + @p1 @dashboard + Scenario: Config — per-provider tunable knobs are present + When I GET "/dashboard/api/config" as admin + Then the status is 200 + And the field "knobs" is non-empty + + @p1 @dashboard + Scenario: Consumers — the test consumer is listed with stats + When I GET "/dashboard/api/keys" as admin + Then the status is 200 + And the array "keys" includes an item where "consumer" equals "bdd-test" + + @p1 @dashboard + Scenario: Provider keys — credentials snapshot is privatized but present + When I GET "/dashboard/api/provider-keys" as admin + Then the status is 200 + And the field "rows" is non-empty + + @p1 @dashboard + Scenario: Codex accounts — an active account is configured + When I GET "/dashboard/api/codex/accounts" as admin + Then the status is 200 + And the field "accounts" is non-empty + And the field "active" is non-empty + And the field "activity" is present + + @p1 @dashboard + Scenario: Builder dry-run ranking (policy preview) returns an ordering (no spend) + When I POST "/dashboard/api/policy/preview" as admin with json + """ + {"policy_ir":["policy", + ["and",["meets_req"],["not",["is","disabled"]],["family_eq","gpt-5.5"]], + ["neg",["normalize",["field","price_in"]]], + ["argmax"],["id"],["always",{"action":"next_candidate"}]]} + """ + Then the status is 200 + And the field "ranked" is non-empty diff --git a/features/05_providers.feature b/features/05_providers.feature new file mode 100644 index 0000000..2613d65 --- /dev/null +++ b/features/05_providers.feature @@ -0,0 +1,42 @@ +Feature: Providers — OpenRouter, Codex, discovery and registered model traits + Asserts the configured providers are live and the registered benchmark/modality + fields (model_meta) are part of the field vocabulary the builder/policies use. + + Background: + Given the stack is healthy + + @p0 @providers + Scenario: OpenRouter and Codex providers are present with health + When I GET "/dashboard/api/policies" as admin + Then the status is 200 + And the array "providers" includes an item where "name" equals "openrouter" + And the array "providers" includes an item where "name" equals "openai" + And every item in "providers" has a "health" + + @p1 @providers + Scenario: Codex is configured as a ChatGPT-subscription (openai_codex) provider + When I GET "/dashboard/api/policies" as admin + Then the status is 200 + And the array "providers" includes an item where "name" equals "openai" + And the matched item field "api_kind" equals "openai_codex" + + @p1 @providers + Scenario: A Codex account is active (auth wired through) + When I GET "/dashboard/api/codex/accounts" as admin + Then the status is 200 + And the field "accounts" is non-empty + And the field "active" is non-empty + + @p1 @providers + Scenario: Registered model traits (model_meta benchmarks) are in the field vocabulary + When I GET "/dashboard/api/fields" as admin + Then the status is 200 + And the array "fields" includes an item where "name" equals "bench_intelligence" + And the array "fields" includes an item where "name" equals "bench_coding" + + @p1 @providers + Scenario: The discovered catalog exposes routable families + Given I have a caller token + When I GET "/v1/models" as consumer + Then the status is 200 + And the array "data" includes an item where "id" equals "family:gpt-5.5" diff --git a/features/06_consumer_keys.feature b/features/06_consumer_keys.feature new file mode 100644 index 0000000..bc59eaf --- /dev/null +++ b/features/06_consumer_keys.feature @@ -0,0 +1,73 @@ +Feature: Consumer key lifecycle (operator issues + governs ingress tokens) + Each scenario mints its own throwaway consumer so they stay isolated. All + rejections happen at the ingress BEFORE any LLM call, so these are free. + + Background: + Given the stack is healthy + + @p0 @consumer-keys + Scenario: A freshly issued key authenticates against /v1 immediately + When I create a consumer key for "bdd-new" + Then the status is 200 + And the field "api_key" is non-empty + And the field "sha256_prefix" is non-empty + When I GET "/v1/models" as consumer + Then the status is 200 + And the field "object" equals "list" + + @p1 @consumer-keys + Scenario: allowed_routes restricts which routes a key may call + When I create a consumer key for "bdd-route" + Then the status is 200 + When I POST "/dashboard/api/consumers/bdd-route" as admin with json + """ + {"allowed_routes":["family:does-not-exist"]} + """ + Then the status is 200 + When I POST "/v1/chat/completions" as consumer with json + """ + {"model":"family:gpt-5.5","messages":[{"role":"user","content":"hi"}]} + """ + Then the status is 403 + And the field "error.code" equals "caller_route_not_allowed" + + @p1 @consumer-keys + Scenario: rate_per_min / burst throttle a key + When I create a consumer key for "bdd-rate" + Then the status is 200 + When I POST "/dashboard/api/consumers/bdd-rate" as admin with json + """ + {"allowed_routes":[],"rate_per_min":1,"burst":1} + """ + Then the status is 200 + When I POST a free chat as consumer + Then the status is 200 + When I POST a free chat as consumer + Then the status is 429 + And the field "error.code" equals "caller_rate_limit" + + @p1 @consumer-keys + Scenario: A revoked key is rejected immediately + # Revoke drops the key's hash, so the token becomes unknown -> 401 caller_auth + # (not 403 caller_key_revoked, which only applies while the hash still maps). + When I create a consumer key for "bdd-revoke" + Then the status is 200 + When I revoke the created key + Then the status is 200 + And the field "removed_hashes" equals 1 + When I GET "/v1/models" as consumer + Then the status is 401 + And the field "error.code" equals "caller_auth" + + @p2 @consumer-keys + Scenario: An inactive consumer's keys are all rejected + When I create a consumer key for "bdd-inactive" + Then the status is 200 + When I POST "/dashboard/api/consumers/bdd-inactive" as admin with json + """ + {"status":"inactive"} + """ + Then the status is 200 + When I GET "/v1/models" as consumer + Then the status is 403 + And the field "error.code" equals "caller_inactive" diff --git a/features/07_money_antseed.feature b/features/07_money_antseed.feature new file mode 100644 index 0000000..9049687 --- /dev/null +++ b/features/07_money_antseed.feature @@ -0,0 +1,79 @@ +Feature: AntSeed marketplace — wallet, escrow and on-chain money + Split into two: @antseed READ-ONLY data checks (free — verify the dashboard + shows the real wallet/escrow correctly; needs the antseed sidecar up + funded, + so excluded from the default run), and @manual on-chain EXECUTION (real USDC on + Base mainnet — deposit/withdraw/spend, run by hand). + + Run the read-only ones (with the sidecar up + funded): behave --tags=antseed + + # ---- READ-ONLY: the money DATA the dashboard renders is real and correct ---- + + @antseed @money + Scenario: The dashboard Catalog shows the AntSeed wallet with real escrow data + Given the stack is healthy + When I GET "/dashboard/api/market" as admin + Then the status is 200 + And the field "wallet.provider" equals "antseed" + And the field "wallet.address" contains "0x" + And the field "wallet.deposits_available" is a number + And the field "wallet.deposits_reserved" is present + And the field "wallet.connection" equals "connected" + + @antseed @money + Scenario: AntSeed appears in the catalog as a marketplace provider + Given the stack is healthy + When I GET "/dashboard/api/policies" as admin + Then the status is 200 + And the array "providers" includes an item where "name" equals "antseed" + And the matched item field "tier" equals "marketplace" + + @antseed @spend @money + Scenario: AntSeed serves a request (routes to a peer) — proves the marketplace works + # NB: REAL MONEY. Costs a few cents of escrow + reserves ~1 USDC in a channel + # (returns on settle). Gated behind RUN_ANTSEED_SPEND=1 so it never runs by + # accident: RUN_ANTSEED_SPEND=1 behave --tags=spend + Given the stack is healthy + And I have a caller token + When I POST "/v1/chat/completions" as consumer with json + """ + {"model":"","max_tokens":16,"messages":[{"role":"user","content":"Reply: pong"}], + "policy_ir":["policy", + ["and",["meets_req"],["not",["is","disabled"]],["family_eq","glm-5.2"]], + ["neg",["normalize",["field","price_in"]]], + ["argmax"],["id"],["always",{"action":"next_candidate"}]]} + """ + Then the status is 200 + And the field "x_router.provider" equals "antseed" + And the field "x_router.served_model_id" equals "glm-5.2" + And the field "x_router.cost_usd" is a number + + # ---- MANUAL: real on-chain transactions (spend / move funds) ---- + + @manual @money + Scenario: Set up a local AntSeed dev wallet (manual — done once per machine) + # The full local user flow (see .env.example + scripts/gen-dev-wallet.sh): + # 1. ./scripts/gen-dev-wallet.sh -> prints ANTSEED_IDENTITY_HEX + + # ANTSEED_CONTROL_TOKEN (a fresh secp256k1/EVM key — a DEV wallet, never prod) + # 2. paste both into .env + # 3. docker compose --profile antseed up -d --build + # 4. docker compose exec antseed antseed buyer balance --json # -> the address + # 5. fund that address with a little USDC + ETH (gas) on Base mainnet + # 6. Deposit into escrow from the dashboard Catalog (wallet cell) + # The OUTCOME (wallet connected + escrow visible) is verified by the @antseed + # read-only scenarios above. + Given the stack is healthy + + @manual @money + Scenario: Deposit USDC wallet -> escrow via the dashboard (real on-chain tx) + # POST /dashboard/api/wallet/deposit {amount} -> /x/wallet/deposit -> sidecar + # control :8379 -> antseed buyer deposit. Verified live: walletUSDC drops, + # depositsAvailable rises, and the Catalog wallet cell shows it. + Given the stack is healthy + + @manual @money + Scenario: Withdraw escrow -> wallet (real on-chain tx) + # POST /dashboard/api/wallet/withdraw {amount}. NOTE: the AntSeed deposits + # contract LOCKS funds — an immediate withdraw after deposit reverts (custom + # error 0xea8e4eb5). Funds are safe in escrow; withdrawable after the lock or + # spendable by routing calls to antseed. + Given the stack is healthy diff --git a/features/08_dashboard_ui.feature b/features/08_dashboard_ui.feature new file mode 100644 index 0000000..cf86877 --- /dev/null +++ b/features/08_dashboard_ui.feature @@ -0,0 +1,46 @@ +Feature: Dashboard UI rendered in a real headless browser + Proves the operator actually SEES the data in the dashboard (real DOM render + via headless chromium), not just that the API returns it. Relies on the seeded + activity (one chat via codex + one flow) created in before_all. + + @browser @p0 + Scenario: The dashboard loads its shell and Activity shows the real flow run + Given I open the dashboard in a browser + Then I see "Analytics" rendered + When I click the "Activity" tab + Then I see "PROVIDER" rendered + And I see "flow" rendered + + @browser @p0 + Scenario: Catalog renders model families with prices + Given I open the dashboard in a browser + When I click the "Catalog" tab + Then I see "gpt-5.5" rendered + + @browser @p0 + Scenario: Analytics renders totals (requests / spend / tokens) + Given I open the dashboard in a browser + Then I see "Requests" rendered + And I see "Spend" rendered + And I see "Tokens" rendered + + @browser @p1 + Scenario: Config renders per-provider tunable knobs + Given I open the dashboard in a browser + When I click the "Config" tab + Then I see "codex" rendered + + @browser @p1 + Scenario: Provider keys tab renders the credentials view + Given I open the dashboard in a browser + When I click the "Provider keys" tab + Then I see "openrouter" rendered + + @browser @p1 @regression + Scenario: An expanded Activity row survives the 15s auto-refresh (no auto-close bug) + Given I open the dashboard in a browser + When I click the "Activity" tab + And I expand the first Activity row + And I wait 17 seconds + Then an Activity row is still expanded + diff --git a/features/09_flow1.feature b/features/09_flow1.feature new file mode 100644 index 0000000..8d664e1 --- /dev/null +++ b/features/09_flow1.feature @@ -0,0 +1,38 @@ +Feature: Flow 1 — the GLM ∥ GPT → merge ensemble produces the expected output + The concrete ensemble we ship in opencode: GPT-5.5 (served by Codex, $0) in + parallel with GLM-5.2 (OpenRouter), merged by GLM-5.2. Asserts that when run it + yields exactly the expected shape — a 3-node DAG with Codex + OpenRouter — and + that the run shows up correctly in the dashboard Activity. + + Background: + Given the stack is healthy + And I have a caller token + + @p0 @flow @flow1 + Scenario: Running flow1 returns the merged answer with the expected per-node routing + When I run the flow1 ensemble (retry on flake) + Then the status is 200 + And the field "object" equals "chat.completion" + And the field "x_router.provider" equals "flow" + And the field "choices[0].message.content" is non-empty + And the array "x_router.decision_trace.flow_nodes" has at least 3 items + And every item in "x_router.decision_trace.flow_nodes" has a "provider" + And every item in "x_router.decision_trace.flow_nodes" has a "served_model_id" + And the array "x_router.decision_trace.flow_nodes" includes an item where "provider" equals "openai" + And the array "x_router.decision_trace.flow_nodes" includes an item where "served_model_id" equals "z-ai/glm-5.2" + + @p0 @flow @flow1 + Scenario: The Codex (gpt) node really served via the subscription at $0 + When I run the flow1 ensemble (retry on flake) + Then the status is 200 + And the array "x_router.decision_trace.flow_nodes" includes an item where "served_model_id" equals "gpt-5.5" + And the matched item field "provider" equals "openai" + And the matched item field "price_out" equals "0.0" + + @p1 @flow @flow1 + Scenario: The flow1 run is recorded correctly in the dashboard Activity + When I run the flow1 ensemble (retry on flake) + Then the status is 200 + When I GET "/dashboard/api/stats" as admin + Then the status is 200 + And the array "recent" includes an item where "provider" equals "flow" diff --git a/features/environment.py b/features/environment.py new file mode 100644 index 0000000..be39775 --- /dev/null +++ b/features/environment.py @@ -0,0 +1,174 @@ +""" +Behave environment for the unhardcoded user-flow BDD suite. + +Drives the LIVE local stack (ingress :8080 + router) — the same endpoints the +dashboard frontend consumes — so the assertions prove the data the UI renders is +present AND correct, not just that endpoints return 200. + +Assumptions (local-dev): + * stack up at BASE_URL (default http://127.0.0.1:8080) + * DASHBOARD_NO_AUTH=1 so /dashboard/api/* is reachable as admin + * a working $0 route exists for family gpt-5.5 (codex) so chat tests are FREE + +Run: nix-shell -p "python3.withPackages(ps: with ps; [behave requests])" \ + --run 'behave features' +""" +import os +import json +import requests + +BASE_URL = os.environ.get("BASE_URL", "http://127.0.0.1:8080") + +# A $0, price-first policy pinned to gpt-5.5 -> resolves to codex (subscription, +# cost 0). Keeps every end-to-end chat test free. +FREE_POLICY_IR = [ + "policy", + ["and", ["meets_req"], ["not", ["is", "disabled"]], ["family_eq", "gpt-5.5"]], + ["neg", ["normalize", ["field", "price_in"]]], + ["argmax"], ["id"], ["always", {"action": "next_candidate"}], +] + + +def _mint_caller_token(consumer="bdd-test"): + # NO_AUTH dashboard -> we can mint a consumer key with no session. + r = requests.post(f"{BASE_URL}/dashboard/api/keys", + json={"consumer": consumer}, timeout=30) + r.raise_for_status() + return r.json()["api_key"] + + +def before_all(context): + context.base_url = BASE_URL + context.session = requests.Session() + + # Sanity: stack is up. + h = requests.get(f"{BASE_URL}/healthz", timeout=10) + assert h.status_code == 200, f"stack not healthy: {h.status_code}" + + # Sanity: dashboard is reachable without a login (NO_AUTH expected locally). + d = requests.get(f"{BASE_URL}/dashboard/api/full", timeout=10) + assert d.status_code == 200, ( + "dashboard /api/full needs DASHBOARD_NO_AUTH=1 for the BDD suite " + f"(got {d.status_code}). Set it in .env.secrets and restart ingress." + ) + + context.caller_token = _mint_caller_token() + + # Seed REAL activity so Activity / usage / stats / catalog have data to show: + # one chat + one 2-node flow, both $0 via codex. + _seed_activity(context) + + +def before_scenario(context, scenario): + tags = scenario.effective_tags + if "antseed" in tags: + # Only run AntSeed scenarios when the funded sidecar is actually up; + # otherwise skip (keeps the default suite green without a wallet). + try: + w = (requests.get(f"{BASE_URL}/dashboard/api/market", timeout=10) + .json().get("wallet") or {}) + except Exception: + w = {} + if w.get("connection") != "connected": + scenario.skip("antseed sidecar not up/funded (wallet not connected)") + return + # Real-money spend is gated behind an explicit opt-in env var. + if "spend" in tags and os.environ.get("RUN_ANTSEED_SPEND") != "1": + scenario.skip("real-money antseed spend — set RUN_ANTSEED_SPEND=1 to run") + return + if "browser" in scenario.effective_tags: + import shutil + from selenium import webdriver + from selenium.webdriver.chrome.options import Options + from selenium.webdriver.chrome.service import Service + opts = Options() + opts.add_argument("--headless=new") + opts.add_argument("--no-sandbox") + opts.add_argument("--disable-dev-shm-usage") + opts.add_argument("--disable-gpu") + opts.add_argument("--window-size=1400,2000") + opts.binary_location = shutil.which("chromium") or shutil.which("chromium-browser") + service = Service(executable_path=shutil.which("chromedriver")) + context.driver = webdriver.Chrome(service=service, options=opts) + context.driver.set_page_load_timeout(60) + + +def after_scenario(context, scenario): + d = getattr(context, "driver", None) + if d is not None: + try: + d.quit() + except Exception: + pass + context.driver = None + + +def _seed_activity(context): + hdr = {"Authorization": f"Bearer {context.caller_token}", + "Content-Type": "application/json"} + # one routed chat (codex, $0) + requests.post(f"{context.base_url}/v1/chat/completions", headers=hdr, json={ + "model": "", "max_tokens": 16, + "messages": [{"role": "user", "content": "Reply with one short sentence."}], + "policy_ir": FREE_POLICY_IR, + }, timeout=120) + # one flow (two codex nodes -> merge), $0 + flow = ["flow", { + "q": {"kind": "input"}, + "a": {"kind": "llm", "system": "Be concise.", + "policy": FREE_POLICY_IR, "inputs": ["q"]}, + "b": {"kind": "llm", "system": "Synthesize.", + "policy": FREE_POLICY_IR, "inputs": ["a"], + "template": "Refine: $1"}, + "out": {"kind": "output", "inputs": ["b"]}, + }] + requests.post(f"{context.base_url}/v1/chat/completions", headers=hdr, json={ + "model": "", "max_tokens": 120, + "messages": [{"role": "user", "content": "Say hello."}], + "flow_ir": flow, + }, timeout=180) + context.seeded = True + + +# ---- helpers used by steps ------------------------------------------------ + +def auth_headers(context): + return {"Authorization": f"Bearer {context.caller_token}", + "Content-Type": "application/json"} + + +def jpath(obj, path): + """Tiny dotted/indexed JSON getter: 'a.b[0].c'. Returns SENTINEL if missing.""" + cur = obj + for part in _tokenize(path): + try: + if isinstance(part, int): + cur = cur[part] + else: + cur = cur[part] + except (KeyError, IndexError, TypeError): + return _MISSING + return cur + + +class _Missing: + def __repr__(self): + return "" + + +_MISSING = _Missing() +SENTINEL = _MISSING + + +def _tokenize(path): + out = [] + for seg in path.split("."): + while "[" in seg: + name, rest = seg.split("[", 1) + if name: + out.append(name) + idx, seg = rest.split("]", 1) + out.append(int(idx)) + if seg: + out.append(seg) + return out diff --git a/features/fixtures/flow1.json b/features/fixtures/flow1.json new file mode 100644 index 0000000..6dae779 --- /dev/null +++ b/features/fixtures/flow1.json @@ -0,0 +1,37 @@ +{ + "model": "", + "max_tokens": 1200, + "messages": [{"role": "user", "content": "Propose the next step to implement an in-memory LRU cache in Python."}], + "flow_ir": ["flow", { + "q": { "kind": "input" }, + "gpt": { + "kind": "llm", + "system": "You are a coding agent. Given the task and context, propose the single best NEXT STEP — concrete and short.", + "policy": ["policy", + ["and", ["meets_req"], ["not", ["is", "disabled"]], ["family_eq", "gpt-5.5"]], + ["neg", ["normalize", ["field", "price_in"]]], + ["argmax"], ["id"], ["always", { "action": "next_candidate" }]], + "inputs": ["q"] + }, + "glm": { + "kind": "llm", + "system": "You are a coding agent. Given the task and context, propose the single best NEXT STEP — concrete and short.", + "policy": ["policy", + ["and", ["meets_req"], ["not", ["is", "disabled"]], ["family_eq", "z-ai/glm-5.2"]], + ["add", ["scale", 3, ["neg", ["normalize", ["field", "latency_ms"]]]], ["neg", ["normalize", ["field", "price_in"]]]], + ["argmax"], ["id"], ["always", { "action": "next_candidate" }]], + "inputs": ["q"] + }, + "merge": { + "kind": "llm", + "system": "You are a coding agent. Two approaches for the next step are below (A and B). Pick the strongest, merge the best of each, and give the single best next step. Be concise.", + "policy": ["policy", + ["and", ["meets_req"], ["not", ["is", "disabled"]], ["family_eq", "z-ai/glm-5.2"]], + ["add", ["scale", 3, ["neg", ["normalize", ["field", "latency_ms"]]]], ["neg", ["normalize", ["field", "price_in"]]]], + ["argmax"], ["id"], ["always", { "action": "next_candidate" }]], + "inputs": ["gpt", "glm"], + "template": "Approach A (GPT-5.5):\n$1\n\nApproach B (GLM-5.2):\n$2\n\nDecide and give the single best next step now." + }, + "out": { "kind": "output", "inputs": ["merge"] } + }] +} diff --git a/features/steps/browser_steps.py b/features/steps/browser_steps.py new file mode 100644 index 0000000..5be37d7 --- /dev/null +++ b/features/steps/browser_steps.py @@ -0,0 +1,84 @@ +"""Real-browser (Selenium + headless chromium) steps: prove the dashboard +actually RENDERS the data, not just that the API returns it.""" +import time +from behave import given, when, then + +# Optional: only the @browser scenarios need selenium (run under the chromium +# nix-shell). Keep the module importable so the non-browser suite runs without it. +try: + from selenium.webdriver.common.by import By + from selenium.webdriver.support.ui import WebDriverWait +except ImportError: + By = WebDriverWait = None + + +def _body_text(driver): + return driver.find_element(By.TAG_NAME, "body").text + + +def _wait_text(context, text, timeout=40): + WebDriverWait(context.driver, timeout).until( + lambda d: text.lower() in _body_text(d).lower(), + message=f"timed out waiting for {text!r}") + + +@given('I open the dashboard in a browser') +def step_open(context): + context.driver.get(context.base_url + "/dashboard") + # NO_AUTH -> goes straight to the app shell (sidebar nav with the tabs). + _wait_text(context, "Analytics", timeout=40) + _wait_text(context, "Activity", timeout=40) + + +@when('I click the "{tab}" tab') +def step_click_tab(context, tab): + # Tab buttons are , + # so the visible text is " Label" -> match by substring. + deadline = time.time() + 20 + while time.time() < deadline: + for el in context.driver.find_elements(By.CSS_SELECTOR, ".tab"): + try: + if tab.lower() in el.text.strip().lower() and el.is_displayed(): + context.driver.execute_script("arguments[0].click();", el) + time.sleep(0.8) # let the tab's loader fetch + render + return + except Exception: + continue + time.sleep(0.3) + raise AssertionError(f'tab {tab!r} not found/clickable') + + +@then('I see "{text}" rendered') +def step_see(context, text): + _wait_text(context, text) + + +@when('I expand the first Activity row') +def step_expand_row(context): + rows = context.driver.find_elements(By.CSS_SELECTOR, "#recent .actRow") + assert rows, "no Activity rows to expand" + context.driver.execute_script("arguments[0].click();", rows[0]) + time.sleep(0.5) + open_details = context.driver.find_elements( + By.CSS_SELECTOR, "#recent .actDetail:not(.hidden)") + assert open_details, "row did not expand on click" + + +@when('I wait {seconds:d} seconds') +def step_wait(context, seconds): + time.sleep(seconds) + + +@then('an Activity row is still expanded') +def step_still_expanded(context): + open_details = context.driver.find_elements( + By.CSS_SELECTOR, "#recent .actDetail:not(.hidden)") + assert open_details, ("an expanded Activity row collapsed by itself " + "(auto-refresh bug regressed)") + + +@then('I do NOT see "{text}" rendered') +def step_not_see(context, text): + time.sleep(1.0) + assert text.lower() not in _body_text(context.driver).lower(), \ + f'unexpectedly saw {text!r} on screen' diff --git a/features/steps/steps.py b/features/steps/steps.py new file mode 100644 index 0000000..29c5cae --- /dev/null +++ b/features/steps/steps.py @@ -0,0 +1,216 @@ +"""Generic + specific step definitions for the unhardcoded user-flow suite.""" +import json +import requests +from behave import given, when, then +from environment import auth_headers, jpath, SENTINEL, FREE_POLICY_IR + + +# ---- request steps -------------------------------------------------------- + +def _do(context, method, path, *, auth, body=None, stream=False): + url = context.base_url + path + headers = {} + if auth == "consumer": + headers = auth_headers(context) + elif auth == "bad": + headers = {"Authorization": "Bearer not-a-real-token", + "Content-Type": "application/json"} + elif auth == "none": + headers = {"Content-Type": "application/json"} + # auth == "admin" -> dashboard NO_AUTH, no headers needed + context.resp = requests.request( + method, url, headers=headers, + json=body if body is not None else None, + timeout=200, stream=stream) + context.resp_text = context.resp.text # NB: context.text is reserved by behave + try: + context.json = context.resp.json() + except Exception: + context.json = None + + +@given('the stack is healthy') +def step_healthy(context): + r = requests.get(context.base_url + "/healthz", timeout=10) + assert r.status_code == 200, r.status_code + assert r.json().get("ok") is True + + +@given('I have a caller token') +def step_have_token(context): + assert getattr(context, "caller_token", None), "no caller token minted" + + +@when('I GET "{path}" as {auth}') +def step_get(context, path, auth): + _do(context, "GET", path, auth=auth) + + +# NOTE: behave puts a step's docstring (the body) in context.text BEFORE the step +# runs; we read it here, then _do() overwrites context.text with the response body. +@when('I POST "{path}" as {auth} with json') +def step_post_json(context, path, auth): + body = json.loads(context.text) + _do(context, "POST", path, auth=auth, body=body) + + +@when('I create a consumer key for "{consumer}"') +def step_create_key(context, consumer): + _do(context, "POST", "/dashboard/api/keys", auth="admin", body={"consumer": consumer}) + assert context.resp.status_code == 200, context.resp_text[:200] + context.created_consumer = consumer + context.created_key = context.json["api_key"] + context.created_prefix = context.json["sha256_prefix"] + context.caller_token = context.created_key # subsequent "as consumer" uses it (scenario-scoped) + + +@when('I revoke the created key') +def step_revoke_created(context): + _do(context, "POST", "/dashboard/api/keys/revoke", auth="admin", + body={"consumer": context.created_consumer, "sha256_prefix": context.created_prefix}) + + +@when('I run the flow1 ensemble (retry on flake)') +def step_flow1(context): + import os + here = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + with open(os.path.join(here, "fixtures", "flow1.json")) as f: + body = json.load(f) + last = None + for _ in range(5): + _do(context, "POST", "/v1/chat/completions", auth="consumer", body=body) + last = context.resp + if context.resp.status_code == 200 and isinstance(context.json, dict) \ + and context.json.get("object") == "chat.completion": + return + raise AssertionError( + f"flow1 never succeeded in 5 tries; last status {last.status_code}: " + f"{context.resp_text[:200]}") + + +@when('I log into the dashboard with my caller key') +def step_dash_login(context): + _do(context, "POST", "/dashboard/login", auth="none", body={"api_key": context.caller_token}) + + +@then('the file "{path}" exists') +def step_file_exists(context, path): + import os + assert os.path.exists(path), f"missing: {path}" + + +@when('I POST a free chat as consumer') +def step_free_chat(context): + _do(context, "POST", "/v1/chat/completions", auth="consumer", body={ + "model": "", "max_tokens": 16, + "messages": [{"role": "user", "content": "Reply with one short sentence."}], + "policy_ir": FREE_POLICY_IR, + }) + + +@when('I POST a free flow as consumer') +def step_free_flow(context): + flow = ["flow", { + "q": {"kind": "input"}, + "a": {"kind": "llm", "system": "Be concise.", "policy": FREE_POLICY_IR, "inputs": ["q"]}, + "b": {"kind": "llm", "system": "Refine.", "policy": FREE_POLICY_IR, "inputs": ["a"], + "template": "Refine: $1"}, + "out": {"kind": "output", "inputs": ["b"]}, + }] + _do(context, "POST", "/v1/chat/completions", auth="consumer", body={ + "model": "", "max_tokens": 120, + "messages": [{"role": "user", "content": "Say hello."}], + "flow_ir": flow, + }) + + +# ---- assertion steps ------------------------------------------------------ + +@then('the status is {code:d}') +def step_status(context, code): + assert context.resp.status_code == code, \ + f"expected {code}, got {context.resp.status_code}: {context.resp_text[:300]}" + + +@then('the field "{path}" is present') +def step_present(context, path): + v = jpath(context.json, path) + assert v is not SENTINEL, f'"{path}" missing in {str(context.json)[:300]}' + + +@then('the field "{path}" is non-empty') +def step_nonempty(context, path): + v = jpath(context.json, path) + assert v is not SENTINEL, f'"{path}" missing' + assert v not in (None, "", [], {}), f'"{path}" is empty: {v!r}' + + +@then('the field "{path}" equals "{value}"') +def step_equals_str(context, path, value): + v = jpath(context.json, path) + assert str(v) == value, f'"{path}" = {v!r}, expected {value!r}' + + +@then('the field "{path}" equals {value:d}') +def step_equals_int(context, path, value): + v = jpath(context.json, path) + assert v == value, f'"{path}" = {v!r}, expected {value}' + + +@then('the field "{path}" is a number') +def step_is_number(context, path): + v = jpath(context.json, path) + assert isinstance(v, (int, float)) and not isinstance(v, bool), f'"{path}" = {v!r}' + + +@then('the field "{path}" is at least {value:d}') +def step_at_least(context, path, value): + v = jpath(context.json, path) + assert isinstance(v, (int, float)), f'"{path}" not numeric: {v!r}' + assert v >= value, f'"{path}" = {v}, expected >= {value}' + + +@then('the field "{path}" contains "{sub}"') +def step_contains(context, path, sub): + v = jpath(context.json, path) + assert v is not SENTINEL, f'"{path}" missing' + assert sub in str(v), f'"{path}" = {v!r} does not contain {sub!r}' + + +@then('the array "{path}" has at least {n:d} items') +def step_array_len(context, path, n): + v = jpath(context.json, path) + assert isinstance(v, list), f'"{path}" is not a list: {type(v)}' + assert len(v) >= n, f'"{path}" has {len(v)} items, expected >= {n}' + + +@then('the array "{path}" includes an item where "{key}" equals "{value}"') +def step_array_item(context, path, key, value): + arr = jpath(context.json, path) + assert isinstance(arr, list), f'"{path}" not a list' + for it in arr: + if str(jpath(it, key)) == value: + context.matched_item = it + return + raise AssertionError(f'no item in "{path}" with {key}=={value!r}; ' + f'saw {[jpath(i, key) for i in arr][:10]}') + + +@then('the matched item field "{path}" equals "{value}"') +def step_matched_equals(context, path, value): + v = jpath(context.matched_item, path) + assert str(v) == value, f'matched item "{path}" = {v!r}, expected {value!r}' + + +@then('every item in "{path}" has a "{key}"') +def step_every_has(context, path, key): + arr = jpath(context.json, path) + assert isinstance(arr, list), f'"{path}" not a list' + assert arr, f'"{path}" empty' + for it in arr: + assert jpath(it, key) is not SENTINEL, f'item missing {key}: {str(it)[:200]}' + + +@then('the response text contains "{sub}"') +def step_text_contains(context, sub): + assert sub in context.resp_text, f'response text missing {sub!r}' diff --git a/scripts/gen-dev-wallet.sh b/scripts/gen-dev-wallet.sh new file mode 100755 index 0000000..30ba00b --- /dev/null +++ b/scripts/gen-dev-wallet.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env sh +# Generate a fresh AntSeed *dev* wallet identity (a secp256k1 / EVM private key) +# plus a control token, and print the two .env lines to paste in. +# +# The sidecar derives the Base-mainnet address from the key. After adding these +# to .env and running `docker compose --profile antseed up -d`, get the address +# to fund with: docker compose exec antseed antseed buyer balance --json +# +# ⚠️ This is a real private key. Use a DEDICATED DEV WALLET with a tiny balance, +# never your production wallet. Never commit it. +set -eu + +rand_hex() { + # $1 = number of bytes + if command -v openssl >/dev/null 2>&1; then + openssl rand -hex "$1" + elif command -v node >/dev/null 2>&1; then + node -e "console.log(require('crypto').randomBytes($1).toString('hex'))" + elif command -v python3 >/dev/null 2>&1; then + python3 -c "import secrets;print(secrets.token_hex($1))" + else + echo "need one of: openssl, node, python3" >&2 + exit 1 + fi +} + +KEY=$(rand_hex 32) +TOKEN=$(rand_hex 16) + +cat < the address to fund +# send a little USDC + ETH (gas) on Base mainnet to that address, +# then Deposit it into escrow from the dashboard Catalog (wallet cell). +EOF diff --git a/user_flows.json b/user_flows.json new file mode 100644 index 0000000..449cd00 --- /dev/null +++ b/user_flows.json @@ -0,0 +1,726 @@ +{ + "meta": { + "product": "unhardcoded", + "description": "OpenAI-compatible LLM policy router. Data-plane 'router' (serve.py/shim.py) behind an 'ingress' auth-proxy (auth_proxy.py) that also serves the operator dashboard. Sigma_pol/Sigma_flow engine vendored at core/ (unhardcoded-engine).", + "purpose": "Exhaustive catalogue of user flows in entry order, to be lowered into Gherkin .feature files.", + "architecture_note": "Two HTTP layers. Consumers talk to the INGRESS (:8080): it does Bearer caller-auth, route allow-lists (403 caller_route_not_allowed), rate limits (429), usage recording, then proxies to the SHIM (router:18080) which runs the policy and owns /v1/* and operator /x/* (proxy 404s /x/* for consumers). Provider keys live in the host env; callers only ever send their own caller token.", + "test_flag_legend": { + "real_money": "executing for real spends on-chain funds / provider credits", + "real_credentials": "touches a real provider credential or private key", + "tos_risk": "uses an unofficial/ToS-risky path (codex)", + "mockable": "can be exercised hermetically against the local router with set_mock_response / a stub backend", + "priority": "P0 critical happy path, P1 important, P2 edge/advanced" + }, + "phases": [ + "1-onboarding", "2-auth", "3-providers", "4-consumer-keys", + "5-consumer-api", "6-dashboard-ops", "7-money-antseed" + ] + }, + "flows": [ + { + "id": "clone-with-submodule", "order": 1, "phase": "1-onboarding", + "name": "Clone the repo recursively (vendored Sigma_pol core submodule)", + "actor": "new-user", "category": "setup", + "entry_point": "git clone --recursive https://github.com/genlayerlabs/unhardcoded.git", + "preconditions": ["git installed; network to github.com", "core/ is a submodule -> genlayerlabs/unhardcoded-engine.git (.gitmodules)"], + "steps": [ + {"action": "git clone --recursive ", "expected": "repo + core/ populated; core/router.lua and core/llm_policy.lua exist"} + ], + "notes": "Without --recursive, core/ is empty and the Docker build fails at Dockerfile:19 (test -f core/router.lua && test -f core/llm_policy.lua) -- surfaces the missing submodule early.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P0"} + }, + { + "id": "submodule-update-after-plain-clone", "order": 2, "phase": "1-onboarding", + "name": "Populate the core submodule after a plain clone", + "actor": "new-user", "category": "setup", + "entry_point": "git submodule update --init", + "preconditions": ["did a plain git clone (no --recursive); core/ empty"], + "steps": [ + {"action": "git submodule update --init", "expected": "core/ checked out at the pinned commit; core/router.lua + core/llm_policy.lua present"} + ], + "notes": "Documented in README.md and CONTRIBUTING.md.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"} + }, + { + "id": "create-env-secrets", "order": 3, "phase": "1-onboarding", + "name": "Create .env.secrets from .env.example", + "actor": "operator", "category": "setup", + "entry_point": "cp .env.example .env.secrets && chmod 600 .env.secrets", + "preconditions": [".env, .env.secrets, secrets/ are gitignored -- never commit them"], + "steps": [ + {"action": "cp .env.example .env.secrets && chmod 600 .env.secrets", "expected": "template created, owner-only"}, + {"action": "fill provider keys + dashboard auth values", "expected": "ready for router+ingress containers (both env_file .env.secrets)"} + ], + "notes": "Env vars: OPENAI_API_KEY(opt), OPENROUTER_API_KEY, CODEX_AUTH_PATH, CALLER_KEYS_JSON, CALLER_KEYS_SHA256_JSON, DASHBOARD_PASSWORD_SHA256, DASHBOARD_SESSION_SECRET, DASHBOARD_TRUSTED_USER_HEADER/SECRET, DASHBOARD_NO_AUTH, LLM_ROUTER_HOST_PORT, PUBLIC_BASE_URL, RATE_PER_MIN, BURST, LOG_LEVEL.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P0"} + }, + { + "id": "generate-dashboard-admin-secrets", "order": 4, "phase": "1-onboarding", + "name": "Generate DASHBOARD_PASSWORD_SHA256 and DASHBOARD_SESSION_SECRET", + "actor": "operator", "category": "auth", + "entry_point": ".env.secrets: DASHBOARD_PASSWORD_SHA256=, DASHBOARD_SESSION_SECRET=", + "preconditions": [".env.secrets exists; Python available"], + "steps": [ + {"action": "python - (sha256 of getpass) -> DASHBOARD_PASSWORD_SHA256", "expected": "64-hex digest"}, + {"action": "python -c 'import secrets;print(secrets.token_urlsafe(32))' -> DASHBOARD_SESSION_SECRET", "expected": "urlsafe secret set"} + ], + "notes": "_dashboard_password_ok compares sha256(submitted) via hmac.compare_digest. If SESSION_SECRET unset, no session can be minted -> only NO_AUTH or trusted-header SSO work.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"} + }, + { + "id": "run-via-nix-shell", "order": 5, "phase": "1-onboarding", + "name": "Run the data-plane shim directly via nix-shell (dev, no Docker, no auth/dashboard)", + "actor": "new-user", "category": "setup", + "entry_point": "nix-shell -p 'python3.withPackages(ps: with ps;[lupa httpx fastapi uvicorn pydantic])' --run 'python serve.py --config config.live.lua --default-profile default --host 127.0.0.1 --port 8080'", + "preconditions": ["submodule populated; Nix installed; provider keys exported in the shell env"], + "steps": [ + {"action": "enter nix-shell with runtime deps (or shell.nix)", "expected": "lupa/httpx/fastapi/uvicorn/pydantic available"}, + {"action": "python serve.py ...", "expected": "loads env_secrets, optional model_meta refresh, inits LLMRouterHost, uvicorn serves the RAW shim (no auth_proxy, no dashboard)"} + ], + "notes": "serve.py = data-plane only. The bearer contract + dashboard exist ONLY in auth_proxy/ingress. Flags: --metrics, --default-max-tokens (0=strict), --timeout-s, --codex-auth. MODEL_META_REFRESH=0 skips the boot refresh.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"} + }, + { + "id": "docker-compose-up-default", "order": 6, "phase": "1-onboarding", + "name": "Start the default 2-service stack (router + ingress)", + "actor": "operator", "category": "setup", + "entry_point": "docker compose -f compose.yml up -d --build", + "preconditions": [".env.secrets filled; submodule populated; external docker network 'genlayer-web' exists"], + "steps": [ + {"action": "docker compose up -d --build", "expected": "builds unhardcoded:local; router (serve.py :18080) + ingress (auth_proxy :8080). ingress depends_on router service_healthy"}, + {"action": "ingress binds 127.0.0.1:${LLM_ROUTER_HOST_PORT:-8080}", "expected": "only loopback exposed; router never published"} + ], + "notes": "ingress env: ROUTER_UPSTREAM=http://router:18080, DASHBOARD_KEY_ENV_PATH, DASHBOARD_ISSUED_KEYS_PATH. .env.secrets bind-mounted into ingress at /run/llm-router/.env.secrets. genlayer-web is external:true -> must pre-exist.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P0"} + }, + { + "id": "docker-compose-up-antseed", "order": 7, "phase": "1-onboarding", + "name": "Start the stack with the AntSeed buyer sidecar (--profile antseed)", + "actor": "operator", "category": "setup", + "entry_point": "docker compose -f compose.yml --profile antseed up -d --build", + "preconditions": ["default stack ok; AntSeed participation desired; a funded identity"], + "steps": [ + {"action": "compose --profile antseed up", "expected": "also starts antseed (browse mode); writes /market/market.json + status-antseed.json into the antseed-market volume"}, + {"action": "compose exec antseed antseed buyer status; ... network browse --services --top 5", "expected": "sidecar up; sellers discovered"} + ], + "notes": "antseed gated behind the compose profile (omitted from default up). Wallet self-service needs ANTSEED_CONTROL_TOKEN on both router+sidecar. Needs a FUNDED wallet to transact (real money).", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": true, "mockable": false, "priority": "P2"} + }, + { + "id": "healthz-and-smoke", "order": 8, "phase": "1-onboarding", + "name": "Health check + smoke test the running stack", + "actor": "operator", "category": "setup", + "entry_point": "curl -fsS http://127.0.0.1:8080/healthz", + "preconditions": ["stack started"], + "steps": [ + {"action": "GET /healthz (no auth)", "expected": "ingress proxies to router /healthz -> {ok:true,initialized:true}"}, + {"action": "GET /v1/models with Bearer ", "expected": "200 model list"}, + {"action": "POST /v1/chat/completions {model:'',messages:[pong],max_tokens:8}", "expected": "200; x_router shows chosen provider/model"}, + {"action": "GET /v1/models with NO bearer", "expected": "401 Unauthorized"} + ], + "notes": "Both services define /healthz healthchecks; ingress depends_on router service_healthy.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P0"} + }, + + { + "id": "dashboard-no-auth-bypass", "order": 9, "phase": "2-auth", + "name": "Local-dev: bypass dashboard auth with DASHBOARD_NO_AUTH", + "actor": "operator", "category": "auth", + "entry_point": ".env.secrets: DASHBOARD_NO_AUTH=1 ; GET http://127.0.0.1:8080/dashboard", + "preconditions": ["local-only deployment nobody else can reach"], + "steps": [ + {"action": "set DASHBOARD_NO_AUTH=1 (1/true/yes/on) and restart ingress", "expected": "every dashboard request treated as local admin"}, + {"action": "open /dashboard", "expected": "loads as admin without login; _require_dashboard_context returns {role:admin,user:'local-dev'}"} + ], + "notes": "DANGER: bypasses ALL dashboard auth. Checked FIRST, before trusted-header and session cookie. Never on a reachable deployment.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P0"} + }, + { + "id": "dashboard-password-login", "order": 10, "phase": "2-auth", + "name": "Admin login to the dashboard with the password", + "actor": "operator", "category": "auth", + "entry_point": "POST /dashboard/login {password}", + "preconditions": ["DASHBOARD_PASSWORD_SHA256 + DASHBOARD_SESSION_SECRET set; NO_AUTH unset"], + "steps": [ + {"action": "POST /dashboard/login {password}", "expected": "sha256(password) compared to hash; on ok sets httponly signed cookie router_dashboard_session (role=admin,user='admin'), records login"}, + {"action": "subsequent /dashboard/api/* carry the cookie", "expected": "admin role -> full access"} + ], + "notes": "Cookie is secure=True + path=/dashboard -> remote use needs TLS. 401 on bad password.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P0"} + }, + { + "id": "dashboard-consumer-key-login", "order": 11, "phase": "2-auth", + "name": "Consumer logs into the dashboard with their API key (scoped view)", + "actor": "consuming-service", "category": "auth", + "entry_point": "POST /dashboard/login {api_key}", + "preconditions": ["valid active consumer token; DASHBOARD_SESSION_SECRET set"], + "steps": [ + {"action": "POST /dashboard/login {api_key}", "expected": "_caller_auth validates; mints consumer-role session {role:consumer,consumer,key_sha256}"}, + {"action": "dashboard renders consumer-scoped view", "expected": "Catalog/Provider-keys/Key-usage hidden; admin ops -> 403 dashboard_admin_required"} + ], + "notes": "Same endpoint as password login; api_key vs password selects the branch.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"} + }, + { + "id": "trusted-header-sso-admin", "order": 12, "phase": "2-auth", + "name": "Trusted-header SSO admin auth (behind a reverse proxy)", + "actor": "operator", "category": "auth", + "entry_point": "headers: : + x-dashboard-trusted-secret: ", + "preconditions": ["both DASHBOARD_TRUSTED_USER_HEADER and DASHBOARD_TRUSTED_USER_SECRET set; a proxy that strips client copies"], + "steps": [ + {"action": "proxy injects trusted user header + x-dashboard-trusted-secret", "expected": "if secret matches via hmac.compare_digest -> admin session {user:trusted_user}"} + ], + "notes": "Disabled by default. Evaluated AFTER NO_AUTH, BEFORE the cookie. Secret header name is fixed; only the user header name is configurable.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P2"} + }, + { + "id": "dashboard-logout", "order": 13, "phase": "2-auth", + "name": "Log out of the dashboard", + "actor": "operator", "category": "auth", + "entry_point": "POST /dashboard/logout", + "preconditions": ["any session"], + "steps": [{"action": "POST /dashboard/logout", "expected": "session cookie deleted; login card shown"}], + "notes": "", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P2"} + }, + { + "id": "caller-bearer-contract", "order": 14, "phase": "2-auth", + "name": "Consuming service authenticates to /v1 with its bearer token", + "actor": "consuming-service", "category": "auth", + "entry_point": "Authorization: Bearer on any /v1 path (point at ingress :8080, not router)", + "preconditions": ["stack running; valid active consumer token"], + "steps": [ + {"action": "client sets LLM_BASE_URL=http://127.0.0.1:8080/v1, LLM_API_KEY=", "expected": "standard OpenAI SDK usage"}, + {"action": "ingress validates token (_caller_auth)", "expected": "unknown/missing -> 401; inactive/revoked/expired -> 403"}, + {"action": "ingress strips Authorization, adds x-llm-router-caller, proxies to router", "expected": "router runs policy over the host's keys; caller never sends provider keys"} + ], + "notes": "Token maps to a caller name for audit. /x/* paths are 404'd for consumers. PUBLIC_BASE_URL shown in the key-handoff blurb.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P0"} + }, + + { + "id": "openrouter-key-config", "order": 15, "phase": "3-providers", + "name": "Configure the OpenRouter API key", + "actor": "operator", "category": "providers", + "entry_point": "env OPENROUTER_API_KEY in .env.secrets (provider openrouter/openrouter_market, auth_env=OPENROUTER_API_KEY); or dashboard provider-keys/update", + "preconditions": ["an OpenRouter account+key; .env.secrets mounted into router+ingress"], + "steps": [ + {"action": "set OPENROUTER_API_KEY in .env.secrets, restart router", "expected": "host resolves Authorization: Bearer $OPENROUTER_API_KEY for openrouter calls"}, + {"action": "optional: read /credits", "expected": "credits_usd balance + runway shown in dashboard"} + ], + "notes": "/models pricing discovery is keyless; the key is only needed to call and to read /credits. Tier=fallback.", + "test_flags": {"real_money": false, "real_credentials": true, "tos_risk": false, "mockable": true, "priority": "P0"} + }, + { + "id": "openrouter-catalog-discovery", "order": 16, "phase": "3-providers", + "name": "OpenRouter live whole-catalog discovery", + "actor": "operator", "category": "providers", + "entry_point": "provider openrouter_market (discovery=marketplace); sources/openrouter.py", + "preconditions": ["openrouter_market enabled (default)"], + "steps": [ + {"action": "source polls GET /models every 3600s", "expected": "long-tail exposed as marketplace offers; curated families skipped"}, + {"action": "each model gets traits inline (benchmarks, modalities, cap_*, ranks)", "expected": "discovered families rank on real benchmark not just price"} + ], + "notes": "Negative per-token prices (OpenRouter -1 sentinel) dropped so they can't win cost-led policies; $0 free models stay routable.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"} + }, + { + "id": "refresh-model-meta", "order": 17, "phase": "3-providers", + "name": "Regenerate model_meta.lua (registered benchmark/modality/cap traits)", + "actor": "operator", "category": "providers", + "entry_point": "python scripts/refresh_model_meta.py [--config config.live.lua] [--out model_meta.lua]", + "preconditions": ["repo checkout; network to OpenRouter /models (keyless)"], + "steps": [ + {"action": "run script (startup job + periodically)", "expected": "fetches /models; writes per-curated-family traits + ranks to model_meta.lua (GENERATED, do not hand-edit)"} + ], + "notes": "Only curated families land here (on-chain/deterministic path); discovered families carry traits inline. No money/credentials.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P2"} + }, + { + "id": "provider-add", "order": 18, "phase": "3-providers", + "name": "Dashboard 'Add provider' (BYO OpenAI-compatible gateway)", + "actor": "operator", "category": "providers", + "entry_point": "POST /dashboard/api/provider-keys/add -> router /x/providers", + "preconditions": ["admin session; secrets/ mounted RW for ingress; .env.secrets writable"], + "steps": [ + {"action": "POST {id, base_url, auth_env, tier, served_models[], key}", "expected": "validate_entry (id regex, http(s) base_url, api_kind=openai_compatible only, UPPER_SNAKE auth_env, known families)"}, + {"action": "persist + hot-apply", "expected": "key -> .env.secrets under auth_env; provider def -> secrets/providers.local.json (key never stored there); /x/providers re-inits core preserving breaker/EMA; on failure loads next restart"} + ], + "notes": "Only openai_compatible providers can be added at runtime. Overlay never overwrites a config.live.lua provider. No per-tenant provider keys -- provider keys are global to the router.", + "test_flags": {"real_money": false, "real_credentials": true, "tos_risk": false, "mockable": true, "priority": "P1"} + }, + { + "id": "provider-key-update-reveal", "order": 19, "phase": "3-providers", + "name": "Update / reveal an existing provider credential", + "actor": "operator", "category": "providers", + "entry_point": "POST /dashboard/api/provider-keys/update ; GET /dashboard/api/provider-keys/reveal?provider=", + "preconditions": ["admin session; provider has an auth_env (NOT the oauth/codex provider)"], + "steps": [ + {"action": "update POST {provider,key}", "expected": "writes .env.secrets, hot-applies via /x/provider-key"}, + {"action": "reveal GET", "expected": "raw env-key value + 12-char fingerprint; logged dashboard_provider_key_revealed"} + ], + "notes": "Update rejects providers with no auth_env (codex -> use the Codex flow). Reveal exposes the real key in plaintext to an admin session.", + "test_flags": {"real_money": false, "real_credentials": true, "tos_risk": false, "mockable": true, "priority": "P1"} + }, + { + "id": "codex-login-setup", "order": 20, "phase": "3-providers", + "name": "Codex (ChatGPT-subscription) provider setup (codex login + mount auth.json)", + "actor": "operator", "category": "providers", + "entry_point": "codex login; env CODEX_AUTH_PATH (compose mounts host file -> /codex/auth.json); router --codex-auth; provider openai (api_kind=openai_codex, oauth)", + "preconditions": ["a paid ChatGPT subscription; codex CLI installed"], + "steps": [ + {"action": "codex login (browser sign-in)", "expected": "writes ~/.codex/auth.json with access/refresh/id tokens + account_id"}, + {"action": "set CODEX_AUTH_PATH; compose bind-mounts to /codex/auth.json", "expected": "router picks it up lazily on first codex call"}, + {"action": "a codex-routed request", "expected": "codex_backend POSTs chatgpt.com/backend-api/codex/responses (SSE) with Bearer token + chatgpt-account-id"} + ], + "notes": "UNOFFICIAL / ToS-risky. access_token JWT expires in hours -> see codex-token-refresh; an expired token + dead refresh_token -> auth_error(401) -> provider disabled. Mount must be writable (token is rewritten on refresh). temperature/max_tokens NOT forwarded.", + "test_flags": {"real_money": false, "real_credentials": true, "tos_risk": true, "mockable": true, "priority": "P1"} + }, + { + "id": "codex-token-refresh", "order": 21, "phase": "3-providers", + "name": "Codex token auto-refresh (refresh_token -> access_token, written back)", + "actor": "operator", "category": "providers", + "entry_point": "codex_auth.CodexAuth.access_token() (per codex call)", + "preconditions": ["a valid refresh_token in auth.json"], + "steps": [ + {"action": "read JWT exp; if within 300s margin -> refresh", "expected": "_needs_refresh() true"}, + {"action": "POST auth.openai.com/oauth/token grant_type=refresh_token (public client app_EMoa...)", "expected": "new tokens; _write_back to auth.json preserving layout"} + ], + "notes": "Refresh failures are swallowed (keep old token; 401 surfaces later). Thread-safe. If refresh_token is dead -> 401 -> provider disabled (the failure we hit). The mounted host file gets rewritten -> bind mount must be writable.", + "test_flags": {"real_money": false, "real_credentials": true, "tos_risk": true, "mockable": true, "priority": "P2"} + }, + { + "id": "codex-multi-account", "order": 22, "phase": "3-providers", + "name": "Manage multiple Codex accounts from the dashboard", + "actor": "operator", "category": "providers", + "entry_point": "GET/POST /dashboard/api/codex/accounts ; DELETE /dashboard/api/codex/accounts/{name}; CodexAuthStore over CODEX_ACCOUNTS_DIR (default /codex/accounts on the PVC)", + "preconditions": ["admin session; auth.json blob(s) to paste"], + "steps": [ + {"action": "POST {name, auth_json}", "expected": "validated (must contain access_token), saved .json chmod 600, store reloaded, router /x/codex/reload"}, + {"action": "GET list", "expected": "per-account {name,account_id,fingerprint} (never raw token) + active=sorted(names)[0] + activity (used_percent, recent_429, scarcity_price_in)"}, + {"action": "DELETE {name}", "expected": "file unlinked, reloaded; 404 if missing"} + ], + "notes": "GOTCHA: active account = first alphabetically by name (per-call selection is a follow-up). Legacy single /codex/auth.json auto-adopted as account 'default' (which sorts before most names). In compose, /codex/accounts is NOT shared with the router by default -- the router reads only the mounted /codex/auth.json (a real wiring gap).", + "test_flags": {"real_money": false, "real_credentials": true, "tos_risk": true, "mockable": true, "priority": "P2"} + }, + { + "id": "codex-scarcity-ramp", "order": 23, "phase": "3-providers", + "name": "Codex scarcity price ramp (ranking-only; bills $0)", + "actor": "operator", "category": "providers", + "entry_point": "sources/codex.py ingest()/poll; Config tab knobs (codex.quota_demote_start, imputed_price_in/out, quota_429_window_s, quota_429_shed)", + "preconditions": ["codex live and receiving traffic"], + "steps": [ + {"action": "every codex call pushes an observation (status + quota headers) -- never probes", "expected": "_demote_frac() = max(quota used fraction, recent-429 fraction)"}, + {"action": "imputed ranking price = imputed_price_* x frac via update_metrics", "expected": "paid routes take over before the 429 wall; price decays back as pressure ages out"} + ], + "notes": "Codex seeded at price ~0 in metrics.live.lua so cost-led policies rank it first; executed/billing cost stays $0. Ranking-only.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": true, "mockable": true, "priority": "P2"} + }, + + { + "id": "register-consumer-key-cli", "order": 24, "phase": "4-consumer-keys", + "name": "Register a per-consumer ingress token via CLI (sha256 storage)", + "actor": "operator", "category": "consumer-keys", + "entry_point": "python scripts/register_consumer_key.py [--env .env.secrets] [--token ...] [--plaintext]", + "preconditions": ["writable .env.secrets"], + "steps": [ + {"action": "run with a consumer name", "expected": "generates llmr_ (rejects <16 chars)"}, + {"action": "default mode", "expected": "stores sha256(token)->consumer in CALLER_KEYS_SHA256_JSON; raw token printed once, never stored"}, + {"action": "--plaintext", "expected": "legacy CALLER_KEYS_JSON (raw token, discouraged)"} + ], + "notes": ".env.secrets rewritten chmod 600. Restart/reload for keys to take effect (caller maps read at module import).", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"} + }, + { + "id": "issue-consumer-key-dashboard", "order": 25, "phase": "4-consumer-keys", + "name": "Issue a consumer key from the dashboard (+ setup blurb)", + "actor": "operator", "category": "consumer-keys", + "entry_point": "POST /dashboard/api/keys {consumer}", + "preconditions": ["admin session; DASHBOARD_ISSUED_KEYS_PATH + .env.secrets writable"], + "steps": [ + {"action": "POST {consumer}", "expected": "mints _; sha256->consumer in CALLER_KEYS_SHA256_JSON; appends {sha256_prefix,status:active,created_at} record"}, + {"action": "response + copy blurb (buildKeyHandoff)", "expected": "raw api_key shown once; blurb has PUBLIC_BASE_URL, profile:default, curl + Python OpenAI-SDK examples, /usage, 403 route hint"} + ], + "notes": "Only hashed metadata persisted. The new key works immediately on the ingress (in-memory map updated). This is the path we used to mint the local test key.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P0"} + }, + { + "id": "rotate-consumer-key", "order": 26, "phase": "4-consumer-keys", + "name": "Rotate a consumer key with a grace period", + "actor": "operator", "category": "consumer-keys", + "entry_point": "POST /dashboard/api/keys {consumer, rotate:true, grace_period_s?}", + "preconditions": ["admin session; existing active key(s)"], + "steps": [ + {"action": "POST rotate:true", "expected": "new key minted"}, + {"action": "existing active non-expiring keys get expires_at=now+grace (clamped 0..90d) + replaced_at", "expected": "old keys work until expiry, then rejected caller_key_expired"} + ], + "notes": "Lets clients swap keys without downtime; expiry enforced at request time.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"} + }, + { + "id": "revoke-consumer-key", "order": 27, "phase": "4-consumer-keys", + "name": "Revoke a consumer key immediately", + "actor": "operator", "category": "consumer-keys", + "entry_point": "POST /dashboard/api/keys/revoke {consumer, sha256_prefix}", + "preconditions": ["admin session; prefix 8-64 hex"], + "steps": [ + {"action": "POST", "expected": "record status:revoked + revoked_at; hash dropped from CALLER_KEYS_SHA256_JSON (and plaintext); persisted to .env.secrets"}, + {"action": "subsequent calls", "expected": "404 if prefix not found; revoked key -> caller_key_revoked; takes effect immediately"} + ], + "notes": "", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"} + }, + { + "id": "configure-consumer-limits", "order": 28, "phase": "4-consumer-keys", + "name": "Configure consumer policy (status / allowed_routes / rate / burst)", + "actor": "operator", "category": "consumer-keys", + "entry_point": "POST /dashboard/api/consumers/{consumer} {status?, allowed_routes?, rate_per_min?, burst?}", + "preconditions": ["admin session"], + "steps": [ + {"action": "set status active/inactive", "expected": "inactive -> all that consumer's keys rejected (caller_inactive)"}, + {"action": "set allowed_routes (exact, * / all, or prefix*)", "expected": "_route_allowed gates each request by model / profile:; empty=all"}, + {"action": "set rate_per_min / burst", "expected": "per-caller 60s window; effective = max(rate_per_min, burst), default to global RATE_PER_MIN/BURST"} + ], + "notes": "Dashboard UI = Consumers tab -> Settings drawer (saveConsumerSettings).", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"} + }, + { + "id": "reveal-consumer-key", "order": 29, "phase": "4-consumer-keys", + "name": "Reveal a recoverable consumer key (legacy plaintext only)", + "actor": "operator", "category": "consumer-keys", + "entry_point": "GET /dashboard/api/keys/reveal?consumer=", + "preconditions": ["admin session"], + "steps": [ + {"action": "GET", "expected": "raw rows only for legacy CALLER_KEYS_JSON entries + hash_only_count"}, + {"action": "hash-only consumer", "expected": "message: cannot be revealed; generate a replacement"} + ], + "notes": "Hash-only (default) keys are unrecoverable by design.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P2"} + }, + + { + "id": "chat-default-policy", "order": 30, "phase": "5-consumer-api", + "name": "Chat completion with the default policy (empty model)", + "actor": "consuming-service", "category": "api", + "entry_point": "POST /v1/chat/completions", + "preconditions": ["Bearer caller token (active, route allowed, under rate limit)"], + "steps": [ + {"action": "POST {model:'', messages:[...]}", "expected": "contract.profile=default_profile; the single declarative default Sigma_pol policy runs; max_tokens defaulted to 4096 if omitted"}, + {"action": "router filters->scores->picks over the host keys", "expected": "200 chat.completion + x_router{provider, model_family, served_model_id, price_in/out, cost_usd, policy_fingerprint, decision_trace (ranked trimmed to top 10)}"} + ], + "notes": "Primary 'let the router decide' path. cost_usd = tokens x chosen price (0 for codex subs, clamped >=0).", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P0"} + }, + { + "id": "chat-family-pin", "order": 31, "phase": "5-consumer-api", + "name": "Chat completion pinned to a model family (model=family:NAME)", + "actor": "consuming-service", "category": "api", + "entry_point": "POST /v1/chat/completions", + "preconditions": ["Bearer caller token"], + "steps": [ + {"action": "POST model:'family:deepseek-v3'", "expected": "default policy + requirements.model_family; only that family qualifies"}, + {"action": "router ranks the family's sellers", "expected": "200; x_router.model_family = the pin; route key for allow-lists = 'family:deepseek-v3'"} + ], + "notes": "Sugar over the default policy.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"} + }, + { + "id": "chat-provider-pin", "order": 32, "phase": "5-consumer-api", + "name": "Chat completion pinned to one (provider, family) (model=pin:PROVIDER/FAMILY)", + "actor": "consuming-service", "category": "api", + "entry_point": "POST /v1/chat/completions", + "preconditions": ["Bearer caller token"], + "steps": [ + {"action": "POST model:'pin:openrouter/deepseek-v3'", "expected": "requirements.pin={provider,model}; only that exact seller qualifies; no cross-provider fallback"}, + {"action": "router calls the pinned seller", "expected": "200 with x_router.provider==pin; if it errors -> exhausted/no_candidates (nothing to fall back to)"} + ], + "notes": "Malformed pin (no '/') silently degrades to plain default policy.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"} + }, + { + "id": "chat-named-profile", "order": 33, "phase": "5-consumer-api", + "name": "Chat completion via a named profile (model=profile:NAME or path /NAME/v1/...)", + "actor": "consuming-service", "category": "api", + "entry_point": "POST /v1/chat/completions OR POST /{profile}/v1/chat/completions", + "preconditions": ["Bearer caller token"], + "steps": [ + {"action": "POST model:'profile:edge' (or path-addressed)", "expected": "contract.profile='edge'; for the path form the path wins and the model string is ignored"}, + {"action": "router runs that profile's term", "expected": "200; x_router.policy_fingerprint identifies the profile"} + ], + "notes": "Only 'default' ships out of the box. Route 'profile:edge' is derived from the path too, so route restriction works for both forms.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"} + }, + { + "id": "chat-per-call-policy-ir", "order": 34, "phase": "5-consumer-api", + "name": "Chat completion with a per-call Sigma_pol policy_ir term", + "actor": "consuming-service", "category": "api", + "entry_point": "POST /v1/chat/completions", + "preconditions": ["Bearer caller token"], + "steps": [ + {"action": "POST body with a policy_ir array term", "expected": "shim forwards it verbatim; the CORE is the single admission boundary (check sorts/arity/depth<=64/nodes<=4096) then AND-composes the host policy_envelope (caller can only narrow)"}, + {"action": "admission + execution", "expected": "200 with x_router.policy_fingerprint = term identity; admission failure -> 400 invalid_request_error code invalid_policy 'policy_ir rejected at admission: '"} + ], + "notes": "The PRIMARY routing path. 5-slot term: filter/score/pick/xform/failplan (core/docs/SIGMA-POL.md).", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P0"} + }, + { + "id": "chat-flow-ir", "order": 35, "phase": "5-consumer-api", + "name": "Chat completion driving a Sigma_flow DAG (flow_ir term)", + "actor": "consuming-service", "category": "api", + "entry_point": "POST /v1/chat/completions", + "preconditions": ["Bearer caller token"], + "steps": [ + {"action": "POST body with flow_ir = ['flow', {input, llm nodes (each with own policy+system+inputs), output}]", "expected": "flow_ir takes precedence over policy_ir/model; core admits the whole DAG (acyclic, one source/one sink, each node's policy admitted)"}, + {"action": "execute_flow_async schedules the DAG; each llm node is a normal routed call", "expected": "answer = sink output; x_router.provider='flow', model_family='flow:', decision_trace.flow_nodes[] per-node trace (provider/model/price/tokens/latency/nested trace)"}, + {"action": "admission/node failure", "expected": "admission -> 400 invalid_flow; a node failing surfaces the node's REAL error kind (e.g. no_candidates->503), not a blanket 502; failed flow still records provider:'flow' + failed_node"} + ], + "notes": "Sigma_flow = declarative composition over Sigma_pol (no loops/effects). Tokens ~ Nx input. This is the ensemble/review-MoA path we screenshotted.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P0"} + }, + { + "id": "chat-streaming", "order": 36, "phase": "5-consumer-api", + "name": "Streaming chat completion (stream:true, SSE)", + "actor": "consuming-service", "category": "api", + "entry_point": "POST /v1/chat/completions {stream:true}", + "preconditions": ["Bearer caller token"], + "steps": [ + {"action": "POST any routing form with stream:true", "expected": "Content-Type text/event-stream; fallback BEFORE the first content delta is still a clean JSON error (not SSE)"}, + {"action": "receive SSE", "expected": "role chunk, text deltas, final chunk with usage+x_router (cost+trimmed trace), then [DONE]; heartbeat comments keep the line warm under the 60s ALB idle timeout"}, + {"action": "proxy records AFTER stream ends", "expected": "ingress tees the SSE tail to extract final usage/cost -> real tokens in the usage row"} + ], + "notes": "A streaming Sigma_flow has no token stream -> first byte + heartbeats while it runs, then the assembled result (or trace+error on failure).", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"} + }, + { + "id": "chat-tool-calls", "order": 37, "phase": "5-consumer-api", + "name": "Chat completion with tool calls", + "actor": "consuming-service", "category": "api", + "entry_point": "POST /v1/chat/completions {tools:[...]}", + "preconditions": ["Bearer caller token"], + "steps": [ + {"action": "POST with tools[]/tool_choice (+ any routing form)", "expected": "shim forwards tools/tool_choice/response_format/temperature/seed; a policy can gate on cap_tools"}, + {"action": "model returns tool calls", "expected": "200 with choices[0].message.tool_calls, finish_reason 'tool_calls'; in a flow the sink node's tool_calls are emitted"} + ], + "notes": "flow+tools with no tool-capable candidate fails clean with no_candidates (503), not 502.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"} + }, + { + "id": "list-models", "order": 38, "phase": "5-consumer-api", + "name": "List the routable model catalog", + "actor": "consuming-service", "category": "api", + "entry_point": "GET /v1/models", + "preconditions": ["Bearer caller token"], + "steps": [ + {"action": "GET /v1/models", "expected": "{object:list, data:[{id:'profile:'},{id:'family:'}...]} = profile names + curated + discovered families; exactly the model-field values a caller can send"} + ], + "notes": "Auth required (goes through the proxy).", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P0"} + }, + { + "id": "consumer-key-usage", "order": 39, "phase": "5-consumer-api", + "name": "Per-key usage self-service (consumer)", + "actor": "consuming-service", "category": "api", + "entry_point": "GET /v1/usage (alias GET /api/usage)", + "preconditions": ["Bearer caller token (the exact key whose usage is reported)"], + "steps": [ + {"action": "GET /v1/usage?since&until&limit&offset OR /api/usage?window=24h", "expected": "usage for ONLY that key; window 15m/24h/7d/4w overrides since; limit clamped 1-500"}, + {"action": "receive snapshot", "expected": "schema_version 3, key_sha256_prefix (first 12 hex only), consumer_settings, totals, cost_estimate, daily/monthly, by_provider/model_family/route/served_model/status, route_health, recent[]"} + ], + "notes": "Bad window -> 400 invalid_usage_window. Operator equivalent: POST /dashboard/api/key-usage (admin session).", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"} + }, + { + "id": "route-and-rate-rejections", "order": 40, "phase": "5-consumer-api", + "name": "Auth / route-restriction / rate-limit rejections (ingress)", + "actor": "consuming-service", "category": "api", + "entry_point": "any /v1 or /api path", + "preconditions": ["Bearer caller token"], + "steps": [ + {"action": "no/unknown token", "expected": "401 caller_auth"}, + {"action": "inactive/revoked/expired key", "expected": "403 caller_inactive | caller_key_revoked | caller_key_expired"}, + {"action": "route not in allowed_routes (route = body.model or profile: from path)", "expected": "403 caller_route_not_allowed; empty/['all']/['*'] = all; 'prefix*' wildcard"}, + {"action": "exceed rate_per_min/burst", "expected": "429 caller_rate_limit"} + ], + "notes": "All enforced in the ingress BEFORE forwarding; rejects recorded (_record_reject).", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P0"} + }, + { + "id": "router-cascade-errors", "order": 41, "phase": "5-consumer-api", + "name": "Fallback/cascade behavior + router error kinds (in the trace)", + "actor": "consuming-service", "category": "api", + "entry_point": "POST /v1/chat/completions", + "preconditions": ["Bearer caller token"], + "steps": [ + {"action": "a chosen provider errors mid-cascade", "expected": "router tries ranked candidates in order; decision_path records each attempt (provider/family, error_kind, http_status, message). 402->payment_required, 401/403->auth_error, 429->rate_limit, 408/504->timeout, 404->model_unavailable, 400->bad_request/context_overflow, 5xx->server_error, empty content->bad_response"}, + {"action": "all candidates exhausted / none qualify", "expected": "error body {error{message,type:router_error,code}, x_router{decision_trace}}; mapped: no_candidates->503, auth_error->401, rate_limit->429, bad_request/context_overflow->400, timeout->504, else->502; message appends per-attempt summary"} + ], + "notes": "This is exactly what we debugged (antseed payment_required/network_error, z-ai/glm-5.2 bad_response, opus bad_request). Same translation on the streaming path.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P0"} + }, + + { + "id": "analytics-view", "order": 42, "phase": "6-dashboard-ops", + "name": "View Analytics (spend / traffic / errors with filters)", + "actor": "operator", "category": "dashboard", + "entry_point": "Analytics tab -> GET /dashboard/api/stats?timeframe&consumer&provider&model", + "preconditions": ["session"], + "steps": [ + {"action": "open Analytics", "expected": "metric cards Requests/Spend/Tokens/Success-rate; over-time bars; By provider/model/consumer/status tables; live chat-health banner"}, + {"action": "change timeframe (all/runtime/1h/24h/7d/30d), consumer, provider, model filters; Refresh", "expected": "snapshot rescoped/refetched; 15s auto-poll"} + ], + "notes": "", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"} + }, + { + "id": "activity-trace", "order": 43, "phase": "6-dashboard-ops", + "name": "Activity -- per-request trace (policy term, fallback chain, cost, Sigma_flow DAG)", + "actor": "operator", "category": "dashboard", + "entry_point": "Activity tab -> rows from /dashboard/api/stats recent[]; filter All/Requests/Rejects/Probes", + "preconditions": ["session"], + "steps": [ + {"action": "open Activity, filter", "expected": "table Time/Event/Caller/Status/Route/Provider/Cost/Error"}, + {"action": "expand a row", "expected": "meta pills (policy/flow fingerprint, cost, tokens, latency, served model); Attempts fallback order with per-provider ok/error/skip; copyable Sigma_pol term"}, + {"action": "expand a Sigma_flow request", "expected": "node DAG view (level-laid nodes, per-node provider/model/latency/tokens + inner attempts)"} + ], + "notes": "This is the view we used for the flow screenshots.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P0"} + }, + { + "id": "builder-policy", "order": 44, "phase": "6-dashboard-ops", + "name": "Builder -- compose / review / download / test a Sigma_pol policy", + "actor": "operator", "category": "dashboard", + "entry_point": "Builder tab (Policy mode); /dashboard/api/policy/{build,preview,normalize,test}, /dashboard/api/fields, /dashboard/api/policies", + "preconditions": ["admin session"], + "steps": [ + {"action": "compose filter rows / score terms / selector (rows<->raw term)", "expected": "bBuildTerm assembles ['policy',filter,score,selector,...]; fields data-driven from /x/fields"}, + {"action": "Review ranking", "expected": "normalize -> fingerprint; preview (/x/rank) -> ranked Provider/Model/Tier/$in/$out/Score; empty survivors -> 'No survivors'"}, + {"action": "Download policy", "expected": "sigma-pol-.json {version,fingerprint,policy_ir}"}, + {"action": "Test call (prompt)", "expected": "runs /v1/chat/completions with policy_ir; result chips + output; recorded in Activity (caller=dashboard-test, not billed)"} + ], + "notes": "400s render verbatim in the builder error box.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"} + }, + { + "id": "builder-flow", "order": 45, "phase": "6-dashboard-ops", + "name": "Flow Builder -- compose / review / download / test a Sigma_flow DAG", + "actor": "operator", "category": "dashboard", + "entry_point": "Builder tab (Flow mode); /dashboard/api/flow/{normalize,test}", + "preconditions": ["admin session"], + "steps": [ + {"action": "add nodes, per-node system prompt, choose inputs (multi-input=fusion), pick answer node; '↧ use Policy-builder term' or edit raw", "expected": "fBuildIR assembles ['flow',{u:input,n*:llm,out:output}]"}, + {"action": "Review flow", "expected": "/dashboard/api/flow/normalize -> fingerprint, 'Flow admitted'"}, + {"action": "Download flow", "expected": "sigma-flow-.json"}, + {"action": "Test call", "expected": "/dashboard/api/flow/test runs live; per-node trace chips + output; recorded in Activity"} + ], + "notes": "Default example = Mixture-of-agents (2 drafts -> synthesize), same shape as our ensemble.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"} + }, + { + "id": "catalog-view", "order": 46, "phase": "6-dashboard-ops", + "name": "Catalog (Market) -- price book + per-seller perf, + SKILL.md download", + "actor": "operator", "category": "dashboard", + "entry_point": "Catalog tab -> GET /dashboard/api/market (-> /x/market); /dashboard/api/skill", + "preconditions": ["admin session"], + "steps": [ + {"action": "open Catalog", "expected": "per-family rows: quality, benchmark badges, modalities, cheapest $in/$out, seller count"}, + {"action": "expand a family", "expected": "seller table: wire model, $in/$out, status (live/disabled/pinned/over-cap), perf (success%/ms/calls), refreshed time"}, + {"action": "filter / Tradable-only / Copy / download SKILL.md", "expected": "filtered list; catalog JSON to clipboard; SKILL.md with live catalog + field vocab baked in"} + ], + "notes": "Wallet cell appears on the AntSeed provider. 502 if router /x/market unavailable.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"} + }, + { + "id": "consumers-manage", "order": 47, "phase": "6-dashboard-ops", + "name": "Consumers tab -- browse / scope / settings / create-rotate-revoke-reveal keys", + "actor": "operator", "category": "dashboard", + "entry_point": "Consumers tab + Settings group; /dashboard/api/{stats,keys,keys/revoke,keys/reveal,consumers/{c}}", + "preconditions": ["admin session"], + "steps": [ + {"action": "browse/search/filter; click a row", "expected": "rows (status, requests/errors/tokens/spend/last-seen/key-status); scope to one consumer -> detail + routes/providers breakdown + recent activity"}, + {"action": "drawer actions", "expected": "Save settings (configure-consumer-limits); Generate/rotate key (+ handoff blurb); Revoke key; Reveal keys -- see flows 25-29"} + ], + "notes": "Consumer-role sessions get admin actions hidden.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P1"} + }, + { + "id": "key-usage-lookup", "order": 48, "phase": "6-dashboard-ops", + "name": "Key usage -- operator per-key lookup", + "actor": "operator", "category": "dashboard", + "entry_point": "Key usage tab -> POST /dashboard/api/key-usage {api_key, window?, limit?, offset?}", + "preconditions": ["admin session"], + "steps": [ + {"action": "paste api_key + window/limit/offset", "expected": "same sanitized router_key_usage snapshot as /v1/usage for that key; 404 key_usage_not_found if unseen; 401 if not admin"} + ], + "notes": "Consumer bearer does NOT satisfy this -- admin session only.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P2"} + }, + { + "id": "provider-keys-tab", "order": 49, "phase": "6-dashboard-ops", + "name": "Provider keys tab -- view / reveal / edit / add / manage Codex accounts", + "actor": "operator", "category": "dashboard", + "entry_point": "Provider keys tab; /dashboard/api/provider-keys[/add|/update|/reveal], /dashboard/api/codex/accounts", + "preconditions": ["admin session"], + "steps": [ + {"action": "view + reveal/copy", "expected": "rows: provider, credential (env name+fingerprint) or AntSeed wallet cell, usage stats; reveal -> raw key inline"}, + {"action": "edit / add provider / manage codex accounts", "expected": "see flows provider-add, provider-key-update-reveal, codex-multi-account"} + ], + "notes": "Privatized by default (env names + fingerprints).", + "test_flags": {"real_money": false, "real_credentials": true, "tos_risk": false, "mockable": true, "priority": "P1"} + }, + { + "id": "config-knobs", "order": 50, "phase": "6-dashboard-ops", + "name": "Config tab -- per-provider runtime knobs (PVC-backed, hot-apply)", + "actor": "operator", "category": "dashboard", + "entry_point": "Config tab -> GET /dashboard/api/config ; POST /dashboard/api/config (-> /x/config/reload)", + "preconditions": ["admin session"], + "steps": [ + {"action": "open Config", "expected": "knobs grouped by provider (antseed/codex/openrouter): value, default, [min,max], override pill"}, + {"action": "edit + Save", "expected": "validated vs schema, persisted to PVC, hot-applied; 'Saved · live'; 400 on schema failure"}, + {"action": "Reset an overridden knob", "expected": "override cleared to default"} + ], + "notes": "Knobs: antseed top-N/runway, codex scarcity ramp/runway, openrouter credit runway.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P2"} + }, + + { + "id": "antseed-identity-and-fund", "order": 51, "phase": "7-money-antseed", + "name": "AntSeed buyer identity + fund the hot-wallet (USDC + ETH gas on Base mainnet)", + "actor": "operator", "category": "billing", + "entry_point": "env ANTSEED_IDENTITY_HEX (private key) ; on-chain transfer to the wallet address", + "preconditions": ["a secp256k1 private key controlling a Base wallet; real USDC + ETH"], + "steps": [ + {"action": "set ANTSEED_IDENTITY_HEX to the funded wallet's private key", "expected": "durable identity across volume loss (unset -> ephemeral key in the volume, lost with it)"}, + {"action": "send USDC + ETH(gas) to the wallet address", "expected": "raw wallet USDC appears (but is NOT yet spendable -- see escrow)"} + ], + "notes": "REAL MONEY. Treat ANTSEED_IDENTITY_HEX as a private key -- never commit it. wallet USDC alone does nothing; the buyer spends from escrow.", + "test_flags": {"real_money": true, "real_credentials": true, "tos_risk": false, "mockable": false, "priority": "P2"} + }, + { + "id": "antseed-deposit-withdraw", "order": 52, "phase": "7-money-antseed", + "name": "AntSeed deposit / withdraw / refresh (wallet <-> escrow)", + "actor": "operator", "category": "billing", + "entry_point": "dashboard Deposit/Withdraw/Refresh -> POST /dashboard/api/wallet/{deposit,withdraw,refresh} -> /x/wallet/* -> sidecar control :8379 -> antseed buyer ", + "preconditions": ["funded wallet; ANTSEED_CONTROL_TOKEN set on router+sidecar (else 503); in k8s ANTSEED_CONTROL_URL=http://127.0.0.1:8379"], + "steps": [ + {"action": "Deposit {amount}", "expected": "amount validated ^\\d+(\\.\\d{1,6})?$ >0; antseed buyer deposit on-chain (120s tx timeout); mutations serialized"}, + {"action": "control runs buyer status --json", "expected": "status-antseed.json rewritten; router reads new depositsAvailable on next poll"}, + {"action": "dashboard refresh", "expected": "new escrow + runway (off available+reserved total; reserved returns as channels settle)"} + ], + "notes": "REAL on-chain spend. wallet-vs-escrow gotcha: dashboard shows depositsAvailable (escrow), what the buyer spends -- NOT raw wallet USDC. Prod runbook = the antseed-prod-deposit skill (VPN+SSO+kubectl). Propagation lag: sidecar 60s, router poll 300s.", + "test_flags": {"real_money": true, "real_credentials": false, "tos_risk": false, "mockable": false, "priority": "P2"} + }, + { + "id": "antseed-market-dump", "order": 53, "phase": "7-money-antseed", + "name": "AntSeed browse market / regenerate price seeds (no spend)", + "actor": "operator", "category": "billing", + "entry_point": "antseed network browse --services --json --top 50 ; auto /market/market.json ; dashboard Market via /x/market", + "preconditions": ["AntSeed node running"], + "steps": [ + {"action": "browse the network", "expected": "per-peer servicePricing[] input/output USD-per-million, queryable WITHOUT spending"}, + {"action": "seed metrics.live.lua with the cheapest peer per model", "expected": "a seed only; live EMA becomes source of truth once proxies run"} + ], + "notes": "Stale dump (>900s) -> degraded (no antseed candidates). Writer validates JSON before writing. Caps enforced by the buyer proxy so a stale seed can't overpay.", + "test_flags": {"real_money": false, "real_credentials": false, "tos_risk": false, "mockable": true, "priority": "P2"} + } + ] +}