From 319d2e669f3e931fe4b60b919d811c2a7632210f Mon Sep 17 00:00:00 2001
From: Sweets Sweetman <sweetmantech@gmail.com>
Date: Mon, 25 May 2026 14:40:44 -0500
Subject: [PATCH 1/7] feat(credits): port computeCreditsDeductedCents +
 estimateModelUsageCost from open-agents
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

First piece of the chat-workflow billing path. Ports the per-turn cost
math from open-agents' `apps/web/lib/credits/compute-credits-deducted-cents.ts`
and `apps/web/lib/models.ts:estimateModelUsageCost` so the same billing
logic runs on both sides during the cutover.

Resolution order matches open-agents exactly:
  1. gateway-reported cost on responseMessage.metadata.totalMessageCost
     (the same number the chat UI shows next to the response)
  2. token-based estimate against the model catalog's cost entry
  3. 1c floor when no pricing is available — so a successful turn
     never lands as a free run

Three new files (per api's one-exported-function-per-file SRP):
  - AvailableModelCost.ts — shape mirroring open-agents' richer cost
    type (input, output, cache_read, context_over_200k) so the same
    estimator runs against either catalog
  - estimateModelUsageCost.ts — token-based USD estimator including
    the 200k+ context tier swap and cache_read pricing
  - computeCreditsDeductedCents.ts — top-level orchestrator (gateway
    cost → token estimate → 1c floor) using api's getAvailableModels
    directly (no HTTP self-fetch like open-agents does)

Test coverage: 27 new unit tests across the two test files. All pricing
edge cases covered (NaN/Infinity/negative gateway cost, cached-tokens-
exceeding-input clamping, context_over_200k tier swap with partial
overrides, catalog miss / fetch failure fallbacks).

Unblocks step 3 (deductCreditsWithAudit TS wrapper) of the chat credits
gap in recoupable/api#605.

Full suite: 3191 → 3205 passing.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 lib/credits/AvailableModelCost.ts             |  25 +++
 .../computeCreditsDeductedCents.test.ts       | 183 ++++++++++++++++++
 .../__tests__/estimateModelUsageCost.test.ts  | 165 ++++++++++++++++
 lib/credits/computeCreditsDeductedCents.ts    |  57 ++++++
 lib/credits/estimateModelUsageCost.ts         |  79 ++++++++
 5 files changed, 509 insertions(+)
 create mode 100644 lib/credits/AvailableModelCost.ts
 create mode 100644 lib/credits/__tests__/computeCreditsDeductedCents.test.ts
 create mode 100644 lib/credits/__tests__/estimateModelUsageCost.test.ts
 create mode 100644 lib/credits/computeCreditsDeductedCents.ts
 create mode 100644 lib/credits/estimateModelUsageCost.ts

diff --git a/lib/credits/AvailableModelCost.ts b/lib/credits/AvailableModelCost.ts
new file mode 100644
index 000000000..72d3026f9
--- /dev/null
+++ b/lib/credits/AvailableModelCost.ts
@@ -0,0 +1,25 @@
+/**
+ * Shape of the per-model cost catalog used for token-based credit estimation.
+ *
+ * Mirrors open-agents'
+ * `apps/web/lib/models.ts:AvailableModelCost` so the same
+ * `estimateModelUsageCost` math runs against either catalog without
+ * shape conversion. api's current gateway/models.dev pipeline emits
+ * only `{ input, output }` (see
+ * `lib/ai/parseModelsDevMetadata.ts:ModelsDevMetadata`); the richer
+ * `cache_read` and `context_over_200k` fields are kept in the type so
+ * a future catalog expansion (or a hand-edited override) gets picked
+ * up automatically by the estimator.
+ *
+ * All token-cost units are USD per million tokens, matching
+ * models.dev.
+ */
+export interface AvailableModelCostTier {
+  input?: number;
+  output?: number;
+  cache_read?: number;
+}
+
+export interface AvailableModelCost extends AvailableModelCostTier {
+  context_over_200k?: AvailableModelCostTier;
+}
diff --git a/lib/credits/__tests__/computeCreditsDeductedCents.test.ts b/lib/credits/__tests__/computeCreditsDeductedCents.test.ts
new file mode 100644
index 000000000..a05c79df7
--- /dev/null
+++ b/lib/credits/__tests__/computeCreditsDeductedCents.test.ts
@@ -0,0 +1,183 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { computeCreditsDeductedCents } from "@/lib/credits/computeCreditsDeductedCents";
+import { getAvailableModels } from "@/lib/ai/getAvailableModels";
+
+vi.mock("@/lib/ai/getAvailableModels", () => ({
+  getAvailableModels: vi.fn(),
+}));
+
+beforeEach(() => {
+  vi.clearAllMocks();
+  // Default catalog: empty (forces token-estimate path to fall through to 1c).
+  vi.mocked(getAvailableModels).mockResolvedValue([]);
+});
+
+const ZERO_USAGE = { inputTokens: 0, cachedInputTokens: 0, outputTokens: 0 };
+
+describe("computeCreditsDeductedCents", () => {
+  describe("gateway cost path (preferred)", () => {
+    it("returns gateway cost in cents when gatewayCostUsd is a positive number", async () => {
+      // $0.0074 → 0.74c → ceil to 1c minimum is not needed; round to 1c
+      expect(
+        await computeCreditsDeductedCents(ZERO_USAGE, "anthropic/claude-haiku-4.5", 0.0074),
+      ).toBe(1);
+      // $0.42 → 42c
+      expect(
+        await computeCreditsDeductedCents(ZERO_USAGE, "anthropic/claude-haiku-4.5", 0.42),
+      ).toBe(42);
+    });
+
+    it("rounds the gateway cost to the nearest cent", async () => {
+      // $0.123 → 12.3c → 12c
+      expect(await computeCreditsDeductedCents(ZERO_USAGE, "model", 0.123)).toBe(12);
+      // $0.126 → 12.6c → 13c
+      expect(await computeCreditsDeductedCents(ZERO_USAGE, "model", 0.126)).toBe(13);
+    });
+
+    it("returns at least 1 when gateway cost rounds to 0", async () => {
+      // $0.0001 → 0.01c → would round to 0, must bump to 1
+      expect(await computeCreditsDeductedCents(ZERO_USAGE, "model", 0.0001)).toBe(1);
+    });
+
+    it("does NOT call the catalog when gateway cost is usable", async () => {
+      await computeCreditsDeductedCents(ZERO_USAGE, "model", 0.05);
+      expect(getAvailableModels).not.toHaveBeenCalled();
+    });
+  });
+
+  describe("falls back to token-based estimate when gateway cost is unusable", () => {
+    it("when gatewayCostUsd is undefined", async () => {
+      vi.mocked(getAvailableModels).mockResolvedValue([
+        { id: "model-x", cost: { input: 1, output: 4 } } as never,
+      ]);
+      // 1M in + 1M out → $5 → 500c
+      expect(
+        await computeCreditsDeductedCents(
+          { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 },
+          "model-x",
+          undefined,
+        ),
+      ).toBe(500);
+    });
+
+    it("when gatewayCostUsd is 0", async () => {
+      vi.mocked(getAvailableModels).mockResolvedValue([
+        { id: "model-x", cost: { input: 1, output: 4 } } as never,
+      ]);
+      expect(
+        await computeCreditsDeductedCents(
+          { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 },
+          "model-x",
+          0,
+        ),
+      ).toBe(500);
+    });
+
+    it("when gatewayCostUsd is negative (corrupted/upstream bug)", async () => {
+      vi.mocked(getAvailableModels).mockResolvedValue([
+        { id: "model-x", cost: { input: 1, output: 4 } } as never,
+      ]);
+      expect(
+        await computeCreditsDeductedCents(
+          { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 },
+          "model-x",
+          -1,
+        ),
+      ).toBe(500);
+    });
+
+    it("when gatewayCostUsd is NaN (not Number.isFinite)", async () => {
+      vi.mocked(getAvailableModels).mockResolvedValue([
+        { id: "model-x", cost: { input: 1, output: 4 } } as never,
+      ]);
+      expect(
+        await computeCreditsDeductedCents(
+          { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 },
+          "model-x",
+          Number.NaN,
+        ),
+      ).toBe(500);
+    });
+
+    it("when gatewayCostUsd is Infinity", async () => {
+      vi.mocked(getAvailableModels).mockResolvedValue([
+        { id: "model-x", cost: { input: 1, output: 4 } } as never,
+      ]);
+      expect(
+        await computeCreditsDeductedCents(
+          { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 },
+          "model-x",
+          Number.POSITIVE_INFINITY,
+        ),
+      ).toBe(500);
+    });
+  });
+
+  describe("estimate fallbacks (also: never charge zero on success)", () => {
+    it("returns 1 when modelId is not in the catalog", async () => {
+      vi.mocked(getAvailableModels).mockResolvedValue([
+        { id: "other-model", cost: { input: 1, output: 4 } } as never,
+      ]);
+      expect(
+        await computeCreditsDeductedCents(
+          { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 1000 },
+          "model-x",
+          undefined,
+        ),
+      ).toBe(1);
+    });
+
+    it("returns 1 when the catalog has no cost for the model", async () => {
+      vi.mocked(getAvailableModels).mockResolvedValue([{ id: "model-x" } as never]);
+      expect(
+        await computeCreditsDeductedCents(
+          { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 1000 },
+          "model-x",
+          undefined,
+        ),
+      ).toBe(1);
+    });
+
+    it("returns 1 when getAvailableModels rejects", async () => {
+      vi.mocked(getAvailableModels).mockRejectedValue(new Error("gateway down"));
+      expect(
+        await computeCreditsDeductedCents(
+          { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 1000 },
+          "model-x",
+          undefined,
+        ),
+      ).toBe(1);
+    });
+
+    it("returns 1 when token estimate rounds to 0 (very tiny usage)", async () => {
+      vi.mocked(getAvailableModels).mockResolvedValue([
+        { id: "model-x", cost: { input: 0.0001, output: 0.0001 } } as never,
+      ]);
+      // ~$0.0000002 → 0.00002c → bumps to 1c minimum
+      expect(
+        await computeCreditsDeductedCents(
+          { inputTokens: 1, cachedInputTokens: 0, outputTokens: 1 },
+          "model-x",
+          undefined,
+        ),
+      ).toBe(1);
+    });
+  });
+
+  describe("model lookup", () => {
+    it("matches modelId exactly (provider/model form)", async () => {
+      vi.mocked(getAvailableModels).mockResolvedValue([
+        { id: "anthropic/claude-haiku-4.5", cost: { input: 1, output: 4 } } as never,
+        { id: "openai/gpt-5", cost: { input: 10, output: 40 } } as never,
+      ]);
+      // Pick haiku: 1M in + 1M out @ haiku rates → $5 → 500c
+      expect(
+        await computeCreditsDeductedCents(
+          { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 },
+          "anthropic/claude-haiku-4.5",
+          undefined,
+        ),
+      ).toBe(500);
+    });
+  });
+});
diff --git a/lib/credits/__tests__/estimateModelUsageCost.test.ts b/lib/credits/__tests__/estimateModelUsageCost.test.ts
new file mode 100644
index 000000000..e41d9454e
--- /dev/null
+++ b/lib/credits/__tests__/estimateModelUsageCost.test.ts
@@ -0,0 +1,165 @@
+import { describe, it, expect } from "vitest";
+import { estimateModelUsageCost } from "@/lib/credits/estimateModelUsageCost";
+
+const baseCost = { input: 1, output: 4 }; // $1/M in, $4/M out
+
+describe("estimateModelUsageCost", () => {
+  describe("guard rails", () => {
+    it("returns undefined when cost catalog entry is missing", () => {
+      expect(
+        estimateModelUsageCost(
+          { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 500 },
+          undefined,
+        ),
+      ).toBeUndefined();
+    });
+
+    it("returns undefined when input price is missing", () => {
+      expect(
+        estimateModelUsageCost(
+          { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 500 },
+          { output: 4 },
+        ),
+      ).toBeUndefined();
+    });
+
+    it("returns undefined when output price is missing", () => {
+      expect(
+        estimateModelUsageCost(
+          { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 500 },
+          { input: 1 },
+        ),
+      ).toBeUndefined();
+    });
+  });
+
+  describe("base tier (≤200k input tokens)", () => {
+    it("computes uncached input + output cost in USD", () => {
+      // 1_000_000 in @ $1/M + 1_000_000 out @ $4/M = $5
+      expect(
+        estimateModelUsageCost(
+          {
+            inputTokens: 1_000_000,
+            cachedInputTokens: 0,
+            outputTokens: 1_000_000,
+          },
+          baseCost,
+        ),
+      ).toBe(5);
+    });
+
+    it("applies cache_read price for cachedInputTokens portion when present", () => {
+      // 100k cached @ $0.10/M + 100k uncached @ $1/M + 100k out @ $4/M
+      // = 0.01 + 0.10 + 0.40 = $0.51
+      const cost = { input: 1, output: 4, cache_read: 0.1 };
+      expect(
+        estimateModelUsageCost(
+          { inputTokens: 200_000, cachedInputTokens: 100_000, outputTokens: 100_000 },
+          cost,
+        ),
+      ).toBeCloseTo(0.51, 6);
+    });
+
+    it("falls back to input price when cache_read is undefined (cached tokens billed at full price)", () => {
+      // 100k cached @ $1/M + 100k uncached @ $1/M + 100k out @ $4/M
+      // = 0.10 + 0.10 + 0.40 = $0.60
+      expect(
+        estimateModelUsageCost(
+          { inputTokens: 200_000, cachedInputTokens: 100_000, outputTokens: 100_000 },
+          baseCost,
+        ),
+      ).toBeCloseTo(0.6, 6);
+    });
+
+    it("clamps negative cachedInputTokens to 0", () => {
+      expect(
+        estimateModelUsageCost(
+          { inputTokens: 1_000_000, cachedInputTokens: -50_000, outputTokens: 0 },
+          baseCost,
+        ),
+      ).toBe(1);
+    });
+
+    it("clamps cachedInputTokens > inputTokens so uncached doesn't go negative", () => {
+      // cached=200_000 but input=100_000 — uncached must clamp to 0 (not -100_000).
+      // 200_000 cached @ $1/M (no cache_read, falls back to input) + 0 out = $0.20.
+      // Without the Math.max guard, this would underbill: a negative uncached count
+      // times the input price would subtract from the cached charge.
+      expect(
+        estimateModelUsageCost(
+          { inputTokens: 100_000, cachedInputTokens: 200_000, outputTokens: 0 },
+          baseCost,
+        ),
+      ).toBeCloseTo(0.2, 6);
+    });
+
+    it("clamps negative outputTokens to 0", () => {
+      expect(
+        estimateModelUsageCost(
+          { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: -1000 },
+          baseCost,
+        ),
+      ).toBe(1);
+    });
+  });
+
+  describe("context_over_200k tier", () => {
+    const tieredCost = {
+      input: 1,
+      output: 4,
+      context_over_200k: { input: 2, output: 8 },
+    };
+
+    it("uses context_over_200k tier when inputTokens exceeds 200k", () => {
+      // 300_000 in @ $2/M + 100_000 out @ $8/M = 0.60 + 0.80 = $1.40
+      expect(
+        estimateModelUsageCost(
+          { inputTokens: 300_000, cachedInputTokens: 0, outputTokens: 100_000 },
+          tieredCost,
+        ),
+      ).toBeCloseTo(1.4, 6);
+    });
+
+    it("does NOT use context_over_200k tier when inputTokens is exactly 200k", () => {
+      // boundary check — must be strictly > 200k
+      // 200_000 @ $1/M + 0 out = $0.20
+      expect(
+        estimateModelUsageCost(
+          { inputTokens: 200_000, cachedInputTokens: 0, outputTokens: 0 },
+          tieredCost,
+        ),
+      ).toBeCloseTo(0.2, 6);
+    });
+
+    it("ignores context_over_200k when both input and output overrides are missing", () => {
+      // only cache_read is set in the override — should NOT trigger the tier swap
+      const cost = {
+        input: 1,
+        output: 4,
+        context_over_200k: { cache_read: 0.5 },
+      };
+      // Treated as base tier — 300k @ $1/M + 0 out = $0.30
+      expect(
+        estimateModelUsageCost(
+          { inputTokens: 300_000, cachedInputTokens: 0, outputTokens: 0 },
+          cost,
+        ),
+      ).toBeCloseTo(0.3, 6);
+    });
+
+    it("falls back to base tier input when context_over_200k.input is missing", () => {
+      const cost = {
+        input: 1,
+        output: 4,
+        context_over_200k: { output: 8 }, // only output overridden
+      };
+      // 300k in @ $1/M (fallback) + 100k out @ $8/M = 0.30 + 0.80 = $1.10
+      expect(
+        estimateModelUsageCost(
+          { inputTokens: 300_000, cachedInputTokens: 0, outputTokens: 100_000 },
+          cost,
+        ),
+      ).toBeCloseTo(1.1, 6);
+    });
+  });
+});
diff --git a/lib/credits/computeCreditsDeductedCents.ts b/lib/credits/computeCreditsDeductedCents.ts
new file mode 100644
index 000000000..7887801ad
--- /dev/null
+++ b/lib/credits/computeCreditsDeductedCents.ts
@@ -0,0 +1,57 @@
+import { getAvailableModels } from "@/lib/ai/getAvailableModels";
+import { estimateModelUsageCost } from "@/lib/credits/estimateModelUsageCost";
+import type { AvailableModelCost } from "@/lib/credits/AvailableModelCost";
+
+/**
+ * Per-turn credit charge in cents (minimum 1).
+ *
+ * Mirrors open-agents'
+ * `apps/web/lib/credits/compute-credits-deducted-cents.ts` so the same
+ * billing math runs on both sides of the chat cutover. Resolution order:
+ *
+ *   1. Gateway-reported cost on `responseMessage.metadata.totalMessageCost`
+ *      — the exact number the chat UI shows next to the assistant
+ *      response. Used directly so the wallet debit converges with the
+ *      cost label.
+ *   2. Token-based estimate against the model catalog's `cost` entry.
+ *      Catalog is the same gateway / models.dev pipeline that backs
+ *      `GET /api/ai/models`.
+ *   3. 1c floor when no pricing is available — every successful turn
+ *      moves the wallet by at least 1c so a transient catalog outage
+ *      can't make a turn free.
+ *
+ * Errors in the catalog fetch are swallowed and treated as path #3 —
+ * the caller (recordChatUsage) must not fail the workflow on a credit
+ * accounting hiccup.
+ *
+ * @param usage Token counts for the turn (matches AI SDK's `LanguageModelUsage`).
+ * @param modelId Fully qualified gateway id (e.g. `anthropic/claude-haiku-4.5`).
+ * @param gatewayCostUsd Gateway-reported total USD cost for the turn,
+ *   when available. Subagent steps (collectTaskToolUsageEvents) won't
+ *   have one and fall through to the token estimate.
+ * @returns Integer cent amount, ≥ 1.
+ */
+export async function computeCreditsDeductedCents(
+  usage: {
+    inputTokens: number;
+    cachedInputTokens: number;
+    outputTokens: number;
+  },
+  modelId: string,
+  gatewayCostUsd?: number,
+): Promise<number> {
+  if (typeof gatewayCostUsd === "number" && Number.isFinite(gatewayCostUsd) && gatewayCostUsd > 0) {
+    return Math.max(1, Math.round(gatewayCostUsd * 100));
+  }
+
+  try {
+    const models = await getAvailableModels();
+    const model = models.find(m => m.id === modelId) as { cost?: AvailableModelCost } | undefined;
+    const usd = estimateModelUsageCost(usage, model?.cost);
+    if (typeof usd !== "number" || usd <= 0) return 1;
+    return Math.max(1, Math.round(usd * 100));
+  } catch (error) {
+    console.error("Failed to compute credits from usage:", error);
+    return 1;
+  }
+}
diff --git a/lib/credits/estimateModelUsageCost.ts b/lib/credits/estimateModelUsageCost.ts
new file mode 100644
index 000000000..d675486a0
--- /dev/null
+++ b/lib/credits/estimateModelUsageCost.ts
@@ -0,0 +1,79 @@
+import type { AvailableModelCost, AvailableModelCostTier } from "@/lib/credits/AvailableModelCost";
+
+const TOKENS_PER_MILLION = 1_000_000;
+
+/**
+ * Picks the right cost tier for `usage`. Above 200k input tokens many
+ * providers charge more, so the cost catalog exposes an override on
+ * `cost.context_over_200k`. Missing fields fall back to the base tier
+ * so a partial override is still valid.
+ *
+ * The trigger is "input > 200k AND at least one of input/output is
+ * overridden" — a tier that only overrides `cache_read` is treated as
+ * the base tier (it's not a real tier swap, just a cache discount).
+ */
+function resolveCostTier(
+  usage: { inputTokens: number },
+  cost: AvailableModelCost | undefined,
+): AvailableModelCostTier | undefined {
+  if (!cost) return undefined;
+
+  if (
+    usage.inputTokens > 200_000 &&
+    (typeof cost.context_over_200k?.input === "number" ||
+      typeof cost.context_over_200k?.output === "number")
+  ) {
+    return {
+      input: cost.context_over_200k.input ?? cost.input,
+      output: cost.context_over_200k.output ?? cost.output,
+      cache_read: cost.context_over_200k.cache_read ?? cost.cache_read,
+    };
+  }
+
+  return cost;
+}
+
+/**
+ * Token-based estimate of a turn's USD cost, used as a fallback when
+ * the gateway hasn't reported an actual cost on the
+ * `assistantMessage.metadata.totalMessageCost` field.
+ *
+ * Ports `apps/web/lib/models.ts:estimateModelUsageCost` from
+ * open-agents so the same per-turn math runs on both sides during the
+ * cutover. Cached input tokens are billed at `cache_read` when the
+ * catalog exposes it and fall back to the base input price otherwise.
+ *
+ * Returns `undefined` when the catalog can't price this model (missing
+ * cost entry, missing input price, or missing output price) so the
+ * caller knows to use a different fallback (open-agents' rule: never
+ * charge 0 — bill the 1c minimum instead).
+ *
+ * @param usage Token counts for the turn (mirrors AI SDK's `LanguageModelUsage`).
+ * @param cost Per-model catalog entry. `undefined` short-circuits to `undefined`.
+ * @returns USD cost, or `undefined` if not priceable.
+ */
+export function estimateModelUsageCost(
+  usage: {
+    inputTokens: number;
+    cachedInputTokens: number;
+    outputTokens: number;
+  },
+  cost: AvailableModelCost | undefined,
+): number | undefined {
+  const costTier = resolveCostTier(usage, cost);
+  const inputPrice = costTier?.input;
+  const outputPrice = costTier?.output;
+  if (typeof inputPrice !== "number" || typeof outputPrice !== "number") {
+    return undefined;
+  }
+
+  const cachedInputTokens = Math.max(0, usage.cachedInputTokens);
+  const uncachedInputTokens = Math.max(0, usage.inputTokens - cachedInputTokens);
+  const cacheReadPrice = costTier?.cache_read ?? inputPrice;
+
+  return (
+    (uncachedInputTokens * inputPrice) / TOKENS_PER_MILLION +
+    (cachedInputTokens * cacheReadPrice) / TOKENS_PER_MILLION +
+    (Math.max(0, usage.outputTokens) * outputPrice) / TOKENS_PER_MILLION
+  );
+}

From 51cfe3c777595cd2e92d9cb68395dfa07deba42a Mon Sep 17 00:00:00 2001
From: Sweets Sweetman <sweetmantech@gmail.com>
Date: Mon, 25 May 2026 15:46:58 -0500
Subject: [PATCH 2/7] feat(credits): charge credits per chat turn (atomic
 wallet debit + audit)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes the silent revenue-loss gap tracked in #605: every successful
chat workflow turn now debits the account's wallet AND records a
usage_events audit row, in a single atomic transaction.

End-to-end flow:
  1. runAgentStep's onFinish captures responseMessage.metadata
     ({totalMessageCost, totalMessageUsage}) — same number the chat UI
     shows next to the response.
  2. runAgentWorkflow calls recordChatUsage(accountId, modelId, message)
     after persistAssistantMessage.
  3. recordChatUsage → computeCreditsDeductedCents (gateway cost OR
     token estimate OR 1c floor) → deductCreditsWithAudit
     (supabase.rpc'deduct_credits_with_audit').
  4. The Postgres function (recoupable/database#26) runs the wallet
     UPDATE and the usage_events INSERT in one implicit transaction
     — either both land or neither does. Matches open-agents'
     db.transaction(...) atomicity guarantee.

Threads accountId through RunAgentWorkflowInput from
validateChatWorkflow (auth-derived; never trusted from the request
body).

New files:
  - lib/supabase/credits_usage/deductCreditsWithAudit.ts (+ tests)
    Thin supabase.rpc wrapper; fire-and-forget (returns ok/error
    instead of throwing). Lives in lib/supabase/ per CLAUDE.md SRP.
  - app/lib/workflows/recordChatUsage.ts (+ tests)
    "use step" function that ties the two together with entry/skip/
    success/error logs and graceful handling of missing metadata,
    catalog failures, and RPC errors.

Updated:
  - app/lib/workflows/runAgentWorkflow.ts
    + accountId field on RunAgentWorkflowInput
    + recordChatUsage call after successful persistAssistantMessage
  - lib/chat/handleChatWorkflowStream.ts
    + passes validated.accountId into start(runAgentWorkflow, ...)
  - app/lib/workflows/__tests__/runAgentWorkflow.test.ts
    + 3 new tests (records on success, skips when no responseMessage,
      skips when runAgentStep throws)

TDD: each new file went red → minimal impl → green.
Suite: 3205 → 3220 passing.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../__tests__/recordChatUsage.test.ts         | 201 ++++++++++++++++++
 .../__tests__/runAgentWorkflow.test.ts        |  53 +++++
 app/lib/workflows/recordChatUsage.ts          | 101 +++++++++
 app/lib/workflows/runAgentWorkflow.ts         |  20 ++
 lib/chat/handleChatWorkflowStream.ts          |   1 +
 .../__tests__/deductCreditsWithAudit.test.ts  |  89 ++++++++
 .../credits_usage/deductCreditsWithAudit.ts   |  61 ++++++
 7 files changed, 526 insertions(+)
 create mode 100644 app/lib/workflows/__tests__/recordChatUsage.test.ts
 create mode 100644 app/lib/workflows/recordChatUsage.ts
 create mode 100644 lib/supabase/credits_usage/__tests__/deductCreditsWithAudit.test.ts
 create mode 100644 lib/supabase/credits_usage/deductCreditsWithAudit.ts

diff --git a/app/lib/workflows/__tests__/recordChatUsage.test.ts b/app/lib/workflows/__tests__/recordChatUsage.test.ts
new file mode 100644
index 000000000..2588f496f
--- /dev/null
+++ b/app/lib/workflows/__tests__/recordChatUsage.test.ts
@@ -0,0 +1,201 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { recordChatUsage } from "@/app/lib/workflows/recordChatUsage";
+import { computeCreditsDeductedCents } from "@/lib/credits/computeCreditsDeductedCents";
+import { deductCreditsWithAudit } from "@/lib/supabase/credits_usage/deductCreditsWithAudit";
+
+vi.mock("@/lib/credits/computeCreditsDeductedCents", () => ({
+  computeCreditsDeductedCents: vi.fn(),
+}));
+vi.mock("@/lib/supabase/credits_usage/deductCreditsWithAudit", () => ({
+  deductCreditsWithAudit: vi.fn(),
+}));
+
+beforeEach(() => {
+  vi.clearAllMocks();
+});
+
+const ACCOUNT = "11111111-1111-1111-1111-111111111111";
+
+function buildAssistantMessage(metadata: Record<string, unknown>) {
+  return {
+    id: "msg_abc",
+    role: "assistant" as const,
+    parts: [{ type: "text", text: "ok" }],
+    metadata,
+  };
+}
+
+describe("recordChatUsage", () => {
+  it("computes cents from the gateway cost on responseMessage.metadata.totalMessageCost", async () => {
+    vi.mocked(computeCreditsDeductedCents).mockResolvedValue(7);
+    vi.mocked(deductCreditsWithAudit).mockResolvedValue({ ok: true });
+
+    const message = buildAssistantMessage({
+      totalMessageCost: 0.07,
+      totalMessageUsage: {
+        inputTokens: 100,
+        cachedInputTokens: 10,
+        outputTokens: 20,
+      },
+    });
+
+    await recordChatUsage({
+      accountId: ACCOUNT,
+      modelId: "anthropic/claude-haiku-4.5",
+      responseMessage: message as never,
+    });
+
+    expect(computeCreditsDeductedCents).toHaveBeenCalledWith(
+      { inputTokens: 100, cachedInputTokens: 10, outputTokens: 20 },
+      "anthropic/claude-haiku-4.5",
+      0.07,
+    );
+  });
+
+  it("calls deductCreditsWithAudit with the computed cents and a fresh event id", async () => {
+    vi.mocked(computeCreditsDeductedCents).mockResolvedValue(42);
+    vi.mocked(deductCreditsWithAudit).mockResolvedValue({ ok: true });
+
+    const message = buildAssistantMessage({
+      totalMessageCost: 0.42,
+      totalMessageUsage: {
+        inputTokens: 100,
+        cachedInputTokens: 0,
+        outputTokens: 200,
+      },
+    });
+
+    await recordChatUsage({
+      accountId: ACCOUNT,
+      modelId: "anthropic/claude-haiku-4.5",
+      responseMessage: message as never,
+    });
+
+    expect(deductCreditsWithAudit).toHaveBeenCalledTimes(1);
+    const args = vi.mocked(deductCreditsWithAudit).mock.calls[0]?.[0];
+    expect(args?.accountId).toBe(ACCOUNT);
+    expect(args?.cents).toBe(42);
+    expect(typeof args?.eventId).toBe("string");
+    expect(args?.eventId.length).toBeGreaterThan(0);
+    expect(args?.event).toMatchObject({
+      source: "api",
+      agent_type: "main",
+      provider: "anthropic",
+      model_id: "anthropic/claude-haiku-4.5",
+      input_tokens: 100,
+      cached_input_tokens: 0,
+      output_tokens: 200,
+    });
+  });
+
+  it("derives provider from modelId (everything before the first slash)", async () => {
+    vi.mocked(computeCreditsDeductedCents).mockResolvedValue(5);
+    vi.mocked(deductCreditsWithAudit).mockResolvedValue({ ok: true });
+
+    const message = buildAssistantMessage({
+      totalMessageCost: 0.05,
+      totalMessageUsage: { inputTokens: 1, cachedInputTokens: 0, outputTokens: 1 },
+    });
+
+    await recordChatUsage({
+      accountId: ACCOUNT,
+      modelId: "openai/gpt-5.4",
+      responseMessage: message as never,
+    });
+
+    const args = vi.mocked(deductCreditsWithAudit).mock.calls[0]?.[0];
+    expect(args?.event.provider).toBe("openai");
+    expect(args?.event.model_id).toBe("openai/gpt-5.4");
+  });
+
+  it("skips entirely when responseMessage is undefined (no usage to charge)", async () => {
+    await recordChatUsage({
+      accountId: ACCOUNT,
+      modelId: "anthropic/claude-haiku-4.5",
+      responseMessage: undefined,
+    });
+
+    expect(computeCreditsDeductedCents).not.toHaveBeenCalled();
+    expect(deductCreditsWithAudit).not.toHaveBeenCalled();
+  });
+
+  it("skips entirely when responseMessage has no metadata at all", async () => {
+    await recordChatUsage({
+      accountId: ACCOUNT,
+      modelId: "anthropic/claude-haiku-4.5",
+      responseMessage: {
+        id: "m",
+        role: "assistant",
+        parts: [{ type: "text", text: "x" }],
+      } as never,
+    });
+
+    expect(computeCreditsDeductedCents).not.toHaveBeenCalled();
+    expect(deductCreditsWithAudit).not.toHaveBeenCalled();
+  });
+
+  it("falls back to zero token counts when totalMessageUsage is missing (still charges the 1c floor)", async () => {
+    vi.mocked(computeCreditsDeductedCents).mockResolvedValue(1);
+    vi.mocked(deductCreditsWithAudit).mockResolvedValue({ ok: true });
+
+    const message = buildAssistantMessage({
+      totalMessageCost: 0.0001,
+      // No totalMessageUsage on metadata
+    });
+
+    await recordChatUsage({
+      accountId: ACCOUNT,
+      modelId: "anthropic/claude-haiku-4.5",
+      responseMessage: message as never,
+    });
+
+    expect(computeCreditsDeductedCents).toHaveBeenCalledWith(
+      { inputTokens: 0, cachedInputTokens: 0, outputTokens: 0 },
+      "anthropic/claude-haiku-4.5",
+      0.0001,
+    );
+    const args = vi.mocked(deductCreditsWithAudit).mock.calls[0]?.[0];
+    expect(args?.event.input_tokens).toBe(0);
+    expect(args?.event.output_tokens).toBe(0);
+  });
+
+  it("does NOT throw when deductCreditsWithAudit returns ok:false (fire-and-forget contract)", async () => {
+    vi.mocked(computeCreditsDeductedCents).mockResolvedValue(7);
+    vi.mocked(deductCreditsWithAudit).mockResolvedValue({
+      ok: false,
+      error: "credits_usage row not found",
+    });
+
+    const message = buildAssistantMessage({
+      totalMessageCost: 0.07,
+      totalMessageUsage: { inputTokens: 1, cachedInputTokens: 0, outputTokens: 1 },
+    });
+
+    await expect(
+      recordChatUsage({
+        accountId: ACCOUNT,
+        modelId: "anthropic/claude-haiku-4.5",
+        responseMessage: message as never,
+      }),
+    ).resolves.toBeUndefined();
+  });
+
+  it("does NOT throw when computeCreditsDeductedCents rejects (fire-and-forget contract)", async () => {
+    vi.mocked(computeCreditsDeductedCents).mockRejectedValue(new Error("catalog down"));
+
+    const message = buildAssistantMessage({
+      totalMessageCost: 0.07,
+      totalMessageUsage: { inputTokens: 1, cachedInputTokens: 0, outputTokens: 1 },
+    });
+
+    await expect(
+      recordChatUsage({
+        accountId: ACCOUNT,
+        modelId: "anthropic/claude-haiku-4.5",
+        responseMessage: message as never,
+      }),
+    ).resolves.toBeUndefined();
+
+    expect(deductCreditsWithAudit).not.toHaveBeenCalled();
+  });
+});
diff --git a/app/lib/workflows/__tests__/runAgentWorkflow.test.ts b/app/lib/workflows/__tests__/runAgentWorkflow.test.ts
index 3e59ffc2d..b08b601fc 100644
--- a/app/lib/workflows/__tests__/runAgentWorkflow.test.ts
+++ b/app/lib/workflows/__tests__/runAgentWorkflow.test.ts
@@ -5,6 +5,7 @@ import { clearChatActiveStream } from "@/lib/chat/clearChatActiveStream";
 import { closeChatStream } from "@/app/lib/workflows/closeChatStream";
 import { generateAssistantMessageId } from "@/app/lib/workflows/generateAssistantMessageId";
 import { persistAssistantMessage } from "@/lib/chat/persistAssistantMessage";
+import { recordChatUsage } from "@/app/lib/workflows/recordChatUsage";
 
 vi.mock("@/app/lib/workflows/runAgentStep", () => ({
   runAgentStep: vi.fn(),
@@ -21,6 +22,9 @@ vi.mock("@/app/lib/workflows/generateAssistantMessageId", () => ({
 vi.mock("@/lib/chat/persistAssistantMessage", () => ({
   persistAssistantMessage: vi.fn(),
 }));
+vi.mock("@/app/lib/workflows/recordChatUsage", () => ({
+  recordChatUsage: vi.fn(),
+}));
 // Captured writable stub so tests can assert closeChatStream got the
 // same instance the workflow body holds.
 const writableStub = new WritableStream();
@@ -43,6 +47,7 @@ const baseInput = {
   messages: [{ id: "m1", role: "user", parts: [{ type: "text", text: "hi" }] } as never],
   chatId: "chat-1",
   sessionId: "session-1",
+  accountId: "acc-1",
   modelId: "anthropic/claude-haiku-4.5",
   agentContext: {
     sandbox: { state: { type: "vercel" }, workingDirectory: "/sandbox/mono" },
@@ -170,4 +175,52 @@ describe("runAgentWorkflow", () => {
       expect.objectContaining({ assistantMessageId: "asst-in-progress" }),
     );
   });
+
+  it("calls recordChatUsage with accountId/modelId/responseMessage when runAgentStep returns a message", async () => {
+    const responseMessage = {
+      id: "assistant-msg-xyz",
+      role: "assistant",
+      parts: [{ type: "text", text: "Hello!" }],
+      metadata: {
+        totalMessageCost: 0.07,
+        totalMessageUsage: {
+          inputTokens: 100,
+          cachedInputTokens: 10,
+          outputTokens: 20,
+        },
+      },
+    };
+    vi.mocked(runAgentStep).mockResolvedValue({
+      finishReason: "stop",
+      responseMessage: responseMessage as never,
+    });
+
+    await runAgentWorkflow(baseInput);
+
+    expect(recordChatUsage).toHaveBeenCalledTimes(1);
+    expect(recordChatUsage).toHaveBeenCalledWith({
+      accountId: "acc-1",
+      modelId: "anthropic/claude-haiku-4.5",
+      responseMessage,
+    });
+  });
+
+  it("does NOT call recordChatUsage when runAgentStep returns no responseMessage", async () => {
+    vi.mocked(runAgentStep).mockResolvedValue({
+      finishReason: "stop",
+      responseMessage: undefined,
+    });
+
+    await runAgentWorkflow(baseInput);
+
+    expect(recordChatUsage).not.toHaveBeenCalled();
+  });
+
+  it("does NOT call recordChatUsage when runAgentStep throws (no message to bill)", async () => {
+    vi.mocked(runAgentStep).mockRejectedValue(new Error("model exploded"));
+
+    await expect(runAgentWorkflow(baseInput)).rejects.toThrow("model exploded");
+
+    expect(recordChatUsage).not.toHaveBeenCalled();
+  });
 });
diff --git a/app/lib/workflows/recordChatUsage.ts b/app/lib/workflows/recordChatUsage.ts
new file mode 100644
index 000000000..d18d1543e
--- /dev/null
+++ b/app/lib/workflows/recordChatUsage.ts
@@ -0,0 +1,101 @@
+import { nanoid } from "nanoid";
+import { computeCreditsDeductedCents } from "@/lib/credits/computeCreditsDeductedCents";
+import { deductCreditsWithAudit } from "@/lib/supabase/credits_usage/deductCreditsWithAudit";
+
+/**
+ * Duck-typed shape of the assembled assistant message we read off the
+ * workflow result. Intentionally loose — both AI SDK's `UIMessage` and
+ * the in-test fixtures satisfy it. The actual bill comes from
+ * `metadata.totalMessageCost` (gateway-reported USD, the same number
+ * the chat UI displays) and `metadata.totalMessageUsage` (token
+ * counts). When metadata is missing we silently no-op.
+ */
+type AssistantMessageForUsage = {
+  metadata?: {
+    totalMessageCost?: number;
+    totalMessageUsage?: {
+      inputTokens?: number;
+      cachedInputTokens?: number;
+      outputTokens?: number;
+    };
+  };
+};
+
+/**
+ * Fire-and-forget billing step run at the end of a chat workflow.
+ * Mirrors open-agents' `recordWorkflowUsage` main-agent path
+ * (apps/web/app/workflows/chat-post-finish.ts) — without this step,
+ * every chat turn on `/api/chat/workflow` is free (recoupable/api#605).
+ *
+ * Wallet debit + audit row are atomic via the
+ * `deduct_credits_with_audit` Postgres function (one transaction).
+ * Errors are caught + logged so a transient credits-table outage
+ * never aborts the chat workflow.
+ *
+ * NOTE: Sub-agent (task tool) attribution is a separate follow-up —
+ * this step only charges the main-agent turn. The audit row is
+ * tagged `agent_type='main'`.
+ */
+export async function recordChatUsage(input: {
+  accountId: string;
+  modelId: string;
+  responseMessage: AssistantMessageForUsage | undefined;
+}): Promise<void> {
+  "use step";
+  console.log("[recordChatUsage] enter", {
+    accountId: input.accountId,
+    modelId: input.modelId,
+    hasResponseMessage: input.responseMessage !== undefined,
+  });
+  try {
+    const metadata = input.responseMessage?.metadata;
+    if (!metadata) {
+      console.log("[recordChatUsage] skip: no metadata on responseMessage", {
+        accountId: input.accountId,
+        modelId: input.modelId,
+      });
+      return;
+    }
+
+    const usage = {
+      inputTokens: metadata.totalMessageUsage?.inputTokens ?? 0,
+      cachedInputTokens: metadata.totalMessageUsage?.cachedInputTokens ?? 0,
+      outputTokens: metadata.totalMessageUsage?.outputTokens ?? 0,
+    };
+
+    const cents = await computeCreditsDeductedCents(
+      usage,
+      input.modelId,
+      metadata.totalMessageCost,
+    );
+
+    const provider = input.modelId.includes("/") ? input.modelId.split("/")[0] : undefined;
+
+    const result = await deductCreditsWithAudit({
+      accountId: input.accountId,
+      cents,
+      eventId: nanoid(),
+      event: {
+        source: "api",
+        agent_type: "main",
+        provider,
+        model_id: input.modelId,
+        input_tokens: usage.inputTokens,
+        cached_input_tokens: usage.cachedInputTokens,
+        output_tokens: usage.outputTokens,
+      },
+    });
+
+    if (!result.ok) {
+      console.error("[recordChatUsage] debit failed:", result.error);
+      return;
+    }
+    console.log("[recordChatUsage] success", {
+      accountId: input.accountId,
+      modelId: input.modelId,
+      cents,
+    });
+  } catch (error) {
+    console.error("[recordChatUsage] unexpected error:", error);
+  }
+}
diff --git a/app/lib/workflows/runAgentWorkflow.ts b/app/lib/workflows/runAgentWorkflow.ts
index e4e628e96..70094a843 100644
--- a/app/lib/workflows/runAgentWorkflow.ts
+++ b/app/lib/workflows/runAgentWorkflow.ts
@@ -5,12 +5,20 @@ import { generateAssistantMessageId } from "@/app/lib/workflows/generateAssistan
 import { runAgentStep } from "@/app/lib/workflows/runAgentStep";
 import { clearChatActiveStream } from "@/lib/chat/clearChatActiveStream";
 import { persistAssistantMessage } from "@/lib/chat/persistAssistantMessage";
+import { recordChatUsage } from "@/app/lib/workflows/recordChatUsage";
 import type { DurableAgentContext } from "@/lib/agent/tools/AgentContext";
 
 export type RunAgentWorkflowInput = {
   messages: UIMessage[];
   chatId: string;
   sessionId: string;
+  /**
+   * Authenticated account whose wallet absorbs the turn's cost. Resolved by
+   * the route handler via `validateChatWorkflow` so we never trust a
+   * caller-supplied id. Threaded into `recordChatUsage` after the assistant
+   * message is persisted.
+   */
+  accountId: string;
   modelId: string;
   /**
    * JSON-serializable subset of AgentContext that survives the durable
@@ -82,6 +90,18 @@ export async function runAgentWorkflow(input: RunAgentWorkflowInput): Promise<vo
     // mark the workflow run failed.
     if (result.responseMessage) {
       await persistAssistantMessage(input.chatId, result.responseMessage);
+
+      // Charge the account for this turn. Atomic wallet debit + audit
+      // row insert via the `deduct_credits_with_audit` Postgres function
+      // — see `recordChatUsage`. Fire-and-forget by contract; transient
+      // credits-table failures must not abort the chat workflow.
+      // Mirrors open-agents' `recordWorkflowUsage` main-agent path
+      // (apps/web/app/workflows/chat-post-finish.ts).
+      await recordChatUsage({
+        accountId: input.accountId,
+        modelId: input.modelId,
+        responseMessage: result.responseMessage,
+      });
     }
   } finally {
     // Run two cleanup steps in parallel:
diff --git a/lib/chat/handleChatWorkflowStream.ts b/lib/chat/handleChatWorkflowStream.ts
index 20f048a5c..27961b126 100644
--- a/lib/chat/handleChatWorkflowStream.ts
+++ b/lib/chat/handleChatWorkflowStream.ts
@@ -122,6 +122,7 @@ export async function handleChatWorkflowStream(request: NextRequest): Promise<Re
       messages: validated.messages,
       chatId: validated.chatId,
       sessionId: validated.sessionId,
+      accountId: validated.accountId,
       modelId,
       agentContext: {
         sandbox: {
diff --git a/lib/supabase/credits_usage/__tests__/deductCreditsWithAudit.test.ts b/lib/supabase/credits_usage/__tests__/deductCreditsWithAudit.test.ts
new file mode 100644
index 000000000..c2157bd88
--- /dev/null
+++ b/lib/supabase/credits_usage/__tests__/deductCreditsWithAudit.test.ts
@@ -0,0 +1,89 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { deductCreditsWithAudit } from "@/lib/supabase/credits_usage/deductCreditsWithAudit";
+import supabase from "@/lib/supabase/serverClient";
+
+vi.mock("@/lib/supabase/serverClient", () => ({
+  default: { rpc: vi.fn() },
+}));
+
+beforeEach(() => {
+  vi.clearAllMocks();
+});
+
+const ACCOUNT = "11111111-1111-1111-1111-111111111111";
+const validEvent = {
+  source: "api" as const,
+  agent_type: "main" as const,
+  provider: "anthropic",
+  model_id: "anthropic/claude-haiku-4.5",
+  input_tokens: 42,
+  cached_input_tokens: 10,
+  output_tokens: 13,
+  tool_call_count: 1,
+};
+
+describe("deductCreditsWithAudit", () => {
+  it("calls the deduct_credits_with_audit RPC with the right param names", async () => {
+    vi.mocked(supabase.rpc).mockResolvedValue({ data: null, error: null } as never);
+
+    await deductCreditsWithAudit({
+      accountId: ACCOUNT,
+      cents: 7,
+      eventId: "evt_abc",
+      event: validEvent,
+    });
+
+    expect(supabase.rpc).toHaveBeenCalledTimes(1);
+    expect(supabase.rpc).toHaveBeenCalledWith("deduct_credits_with_audit", {
+      p_account_id: ACCOUNT,
+      p_amount: 7,
+      p_event_id: "evt_abc",
+      p_event: validEvent,
+    });
+  });
+
+  it("returns { ok: true } when the RPC succeeds", async () => {
+    vi.mocked(supabase.rpc).mockResolvedValue({ data: null, error: null } as never);
+
+    const result = await deductCreditsWithAudit({
+      accountId: ACCOUNT,
+      cents: 7,
+      eventId: "evt_abc",
+      event: validEvent,
+    });
+
+    expect(result).toEqual({ ok: true });
+  });
+
+  it("returns { ok: false, error } when the RPC returns an error (does NOT throw)", async () => {
+    vi.mocked(supabase.rpc).mockResolvedValue({
+      data: null,
+      error: { message: "credits_usage row not found" },
+    } as never);
+
+    const result = await deductCreditsWithAudit({
+      accountId: ACCOUNT,
+      cents: 7,
+      eventId: "evt_abc",
+      event: validEvent,
+    });
+
+    expect(result).toEqual({ ok: false, error: "credits_usage row not found" });
+  });
+
+  it("returns { ok: false, error } when the rpc call throws (network failure)", async () => {
+    vi.mocked(supabase.rpc).mockRejectedValue(new Error("network blip"));
+
+    const result = await deductCreditsWithAudit({
+      accountId: ACCOUNT,
+      cents: 7,
+      eventId: "evt_abc",
+      event: validEvent,
+    });
+
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.error).toContain("network blip");
+    }
+  });
+});
diff --git a/lib/supabase/credits_usage/deductCreditsWithAudit.ts b/lib/supabase/credits_usage/deductCreditsWithAudit.ts
new file mode 100644
index 000000000..1bafef1e0
--- /dev/null
+++ b/lib/supabase/credits_usage/deductCreditsWithAudit.ts
@@ -0,0 +1,61 @@
+import supabase from "@/lib/supabase/serverClient";
+
+/**
+ * JSON payload populating the `usage_events` audit row. Optional
+ * fields fall back to column defaults (`source='api'`,
+ * `agent_type='main'`, provider/model_id NULL, counts 0) inside the
+ * SQL function — see `database/supabase/migrations/20260525000000_deduct_credits_with_audit.sql`.
+ */
+export interface DeductCreditsAuditEvent {
+  source?: "api" | "web";
+  agent_type?: "main" | "subagent";
+  provider?: string;
+  model_id?: string;
+  input_tokens?: number;
+  cached_input_tokens?: number;
+  output_tokens?: number;
+  tool_call_count?: number;
+}
+
+export type DeductCreditsWithAuditResult = { ok: true } | { ok: false; error: string };
+
+/**
+ * Atomically debits `credits_usage.remaining_credits` and inserts the
+ * corresponding `usage_events` audit row via the
+ * `deduct_credits_with_audit` Postgres function. The SQL function
+ * runs both writes inside an implicit transaction so wallet/meter
+ * can never drift on partial failure — matching open-agents'
+ * `recordUsage` `db.transaction(...)` guarantee.
+ *
+ * Caller convention: `cents` is the amount to debit (integer ≥ 1).
+ * `eventId` should match `lib/supabase/usage_events/insertUsageEvent.ts`'s
+ * nanoid convention so the audit trail is consistent.
+ *
+ * Errors are never thrown — returns `{ ok: false, error }` instead so
+ * the caller (`recordChatUsage`) can swallow without aborting the
+ * chat workflow on a credit accounting hiccup.
+ */
+export async function deductCreditsWithAudit(params: {
+  accountId: string;
+  cents: number;
+  eventId: string;
+  event: DeductCreditsAuditEvent;
+}): Promise<DeductCreditsWithAuditResult> {
+  try {
+    const { error } = await supabase.rpc("deduct_credits_with_audit", {
+      p_account_id: params.accountId,
+      p_amount: params.cents,
+      p_event_id: params.eventId,
+      p_event: params.event,
+    });
+    if (error) {
+      return { ok: false, error: error.message };
+    }
+    return { ok: true };
+  } catch (error) {
+    return {
+      ok: false,
+      error: error instanceof Error ? error.message : String(error),
+    };
+  }
+}

From 456b5b321535193aeb055dbcc8bae44cb72ec342 Mon Sep 17 00:00:00 2001
From: Sweets Sweetman <sweetmantech@gmail.com>
Date: Mon, 25 May 2026 15:56:19 -0500
Subject: [PATCH 3/7] fix(credits): use flat interface for
 DeductCreditsWithAuditResult
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Next.js 16's type checker wasn't narrowing the discriminated union
`{ ok: true } | { ok: false; error: string }` through `if (!result.ok)`,
breaking the production build at `recordChatUsage.ts:90`. Vitest's own
type config tolerated it, so this only surfaced on the preview deploy.

Flat interface with optional `error?: string` avoids the narrowing
requirement entirely — caller can read `result.error` directly when
`result.ok` is false. Slight type-safety loss (compiler doesn't enforce
that `error` is present when ok is false) is worth the build stability.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 lib/supabase/credits_usage/deductCreditsWithAudit.ts | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/lib/supabase/credits_usage/deductCreditsWithAudit.ts b/lib/supabase/credits_usage/deductCreditsWithAudit.ts
index 1bafef1e0..b4f6b442d 100644
--- a/lib/supabase/credits_usage/deductCreditsWithAudit.ts
+++ b/lib/supabase/credits_usage/deductCreditsWithAudit.ts
@@ -17,7 +17,15 @@ export interface DeductCreditsAuditEvent {
   tool_call_count?: number;
 }
 
-export type DeductCreditsWithAuditResult = { ok: true } | { ok: false; error: string };
+export interface DeductCreditsWithAuditResult {
+  ok: boolean;
+  /**
+   * Present only when `ok === false`. Kept optional rather than as a
+   * discriminated union so callers can read `result.error` without
+   * tripping a narrowing edge case in the Next.js 16 type checker.
+   */
+  error?: string;
+}
 
 /**
  * Atomically debits `credits_usage.remaining_credits` and inserts the

From 62e102b1a3cd1f5663aa29023b344fe48b1f7af0 Mon Sep 17 00:00:00 2001
From: Sweets Sweetman <sweetmantech@gmail.com>
Date: Mon, 25 May 2026 16:01:15 -0500
Subject: [PATCH 4/7] fix(credits): regenerate supabase RPC type for
 deduct_credits_with_audit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous deploy failed because:

1. `types/database.types.ts` was stale — it didn't include the
   `deduct_credits_with_audit` RPC that landed in
   recoupable/database#26 (and was manually applied via the MCP
   after Supabase's GitHub App 502'd post-merge). Without that entry,
   `supabase.rpc("deduct_credits_with_audit", ...)` failed Next.js's
   stricter type check.

2. Even with the entry, the typed `Args.p_event: Json` couldn't
   accept our `DeductCreditsAuditEvent` interface directly — TS
   doesn't infer interface → index-signature assignment.

Fixes:
  - Added the `deduct_credits_with_audit` entry to the Functions
    block of types/database.types.ts (matches the upstream regen
    via mcp__plugin_supabase_supabase__generate_typescript_types).
  - Cast `params.event as unknown as Json` at the supabase boundary
    in deductCreditsWithAudit.ts. The runtime payload is unchanged
    and the interface keeps its strong typing for callers.

Verified locally: `pnpm exec tsc --noEmit` shows no errors in any
file this PR touches.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 lib/supabase/credits_usage/deductCreditsWithAudit.ts | 6 +++++-
 types/database.types.ts                              | 9 +++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/lib/supabase/credits_usage/deductCreditsWithAudit.ts b/lib/supabase/credits_usage/deductCreditsWithAudit.ts
index b4f6b442d..871800f73 100644
--- a/lib/supabase/credits_usage/deductCreditsWithAudit.ts
+++ b/lib/supabase/credits_usage/deductCreditsWithAudit.ts
@@ -1,4 +1,5 @@
 import supabase from "@/lib/supabase/serverClient";
+import type { Json } from "@/types/database.types";
 
 /**
  * JSON payload populating the `usage_events` audit row. Optional
@@ -54,7 +55,10 @@ export async function deductCreditsWithAudit(params: {
       p_account_id: params.accountId,
       p_amount: params.cents,
       p_event_id: params.eventId,
-      p_event: params.event,
+      // DeductCreditsAuditEvent is structurally JSON-safe, but TS can't
+      // infer an interface → index-signature assignment automatically.
+      // Cast once at this boundary; the runtime payload is unchanged.
+      p_event: params.event as unknown as Json,
     });
     if (error) {
       return { ok: false, error: error.message };
diff --git a/types/database.types.ts b/types/database.types.ts
index 20287a6a9..cc7d03e1e 100644
--- a/types/database.types.ts
+++ b/types/database.types.ts
@@ -3961,6 +3961,15 @@ export type Database = {
         Args: { account_id: string; amount: number };
         Returns: undefined;
       };
+      deduct_credits_with_audit: {
+        Args: {
+          p_account_id: string;
+          p_amount: number;
+          p_event: Json;
+          p_event_id: string;
+        };
+        Returns: undefined;
+      };
       extract_domain: { Args: { email: string }; Returns: string };
       get_account_invitations: {
         Args: { account_slug: string };

From 193b6429de6138ab642bd4a698944f2d4bd0596c Mon Sep 17 00:00:00 2001
From: Sweets Sweetman <sweetmantech@gmail.com>
Date: Mon, 25 May 2026 16:41:19 -0500
Subject: [PATCH 5/7] refactor(credits): consolidate chat-workflow billing into
 handleChatCredits (DRY)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Addresses the user's PR review: my new files duplicated existing
infrastructure. Consolidates everything into the existing pattern
(handleChatCredits → getCreditUsage + recordCreditDeduction) so chat
workflow billing uses the SAME orchestrator that the streaming chat
path (handleChatStream) already uses.

Changes:

1. lib/credits/getCreditUsage.ts
   - Added optional `gatewayCostUsd?: number` parameter
   - When positive, returns it directly (skips catalog lookup)
   - Otherwise existing token-math path is unchanged (backwards compat)

2. lib/credits/handleChatCredits.ts
   - Added `gatewayCostUsd?: number` (threaded to getCreditUsage)
   - Added `source?: "web" | "api"` (defaults to "web" for backwards
     compat; chat workflow passes "api" so admin dashboards can
     distinguish surface in spend rollups)

3. lib/credits/recordCreditDeduction.ts
   - Switched from `deductCredits + insertUsageEvent` (two separate
     Supabase calls, non-atomic — could leave wallet/meter drifted on
     partial failure) to the single `deduct_credits_with_audit` RPC.
   - Now atomic for ALL callers (chat workflow + research handlers),
     not just the new chat-workflow path.
   - Return shape simplified: `{ success: boolean }` instead of
     `{ success, newBalance }` (no caller was reading newBalance).

4. app/lib/workflows/runAgentWorkflow.ts
   - Imports handleChatCredits instead of recordChatUsage.
   - Reads gatewayCostUsd + token counts from
     responseMessage.metadata.{totalMessageCost, totalMessageUsage}.

5. Deleted (consolidated into existing infrastructure):
   - app/lib/workflows/recordChatUsage.ts
   - lib/credits/computeCreditsDeductedCents.ts
   - lib/credits/estimateModelUsageCost.ts
   - lib/credits/AvailableModelCost.ts
   - lib/credits/resolveCostTier.ts
   - All their test files

Net delta: -7 files, +0 new orchestrator function. Plus the atomicity
guarantee now applies to research handlers too.

TDD: each change went RED → minimum impl → GREEN, with all 3195 tests
passing at the end.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../__tests__/recordChatUsage.test.ts         | 201 ------------------
 .../__tests__/runAgentWorkflow.test.ts        |  54 +++--
 app/lib/workflows/recordChatUsage.ts          | 101 ---------
 app/lib/workflows/runAgentWorkflow.ts         |  43 +++-
 .../integration/chatEndToEnd.test.ts          |   1 +
 lib/credits/AvailableModelCost.ts             |  25 ---
 .../computeCreditsDeductedCents.test.ts       | 183 ----------------
 .../__tests__/estimateModelUsageCost.test.ts  | 165 --------------
 lib/credits/__tests__/getCreditUsage.test.ts  |  76 +++++++
 .../__tests__/handleChatCredits.test.ts       |  49 ++++-
 .../__tests__/recordCreditDeduction.test.ts   |  82 ++++---
 lib/credits/computeCreditsDeductedCents.ts    |  57 -----
 lib/credits/estimateModelUsageCost.ts         |  79 -------
 lib/credits/getCreditUsage.ts                 |  17 ++
 lib/credits/handleChatCredits.ts              |  33 ++-
 lib/credits/recordCreditDeduction.ts          |  63 +++---
 16 files changed, 319 insertions(+), 910 deletions(-)
 delete mode 100644 app/lib/workflows/__tests__/recordChatUsage.test.ts
 delete mode 100644 app/lib/workflows/recordChatUsage.ts
 delete mode 100644 lib/credits/AvailableModelCost.ts
 delete mode 100644 lib/credits/__tests__/computeCreditsDeductedCents.test.ts
 delete mode 100644 lib/credits/__tests__/estimateModelUsageCost.test.ts
 delete mode 100644 lib/credits/computeCreditsDeductedCents.ts
 delete mode 100644 lib/credits/estimateModelUsageCost.ts

diff --git a/app/lib/workflows/__tests__/recordChatUsage.test.ts b/app/lib/workflows/__tests__/recordChatUsage.test.ts
deleted file mode 100644
index 2588f496f..000000000
--- a/app/lib/workflows/__tests__/recordChatUsage.test.ts
+++ /dev/null
@@ -1,201 +0,0 @@
-import { describe, it, expect, vi, beforeEach } from "vitest";
-import { recordChatUsage } from "@/app/lib/workflows/recordChatUsage";
-import { computeCreditsDeductedCents } from "@/lib/credits/computeCreditsDeductedCents";
-import { deductCreditsWithAudit } from "@/lib/supabase/credits_usage/deductCreditsWithAudit";
-
-vi.mock("@/lib/credits/computeCreditsDeductedCents", () => ({
-  computeCreditsDeductedCents: vi.fn(),
-}));
-vi.mock("@/lib/supabase/credits_usage/deductCreditsWithAudit", () => ({
-  deductCreditsWithAudit: vi.fn(),
-}));
-
-beforeEach(() => {
-  vi.clearAllMocks();
-});
-
-const ACCOUNT = "11111111-1111-1111-1111-111111111111";
-
-function buildAssistantMessage(metadata: Record<string, unknown>) {
-  return {
-    id: "msg_abc",
-    role: "assistant" as const,
-    parts: [{ type: "text", text: "ok" }],
-    metadata,
-  };
-}
-
-describe("recordChatUsage", () => {
-  it("computes cents from the gateway cost on responseMessage.metadata.totalMessageCost", async () => {
-    vi.mocked(computeCreditsDeductedCents).mockResolvedValue(7);
-    vi.mocked(deductCreditsWithAudit).mockResolvedValue({ ok: true });
-
-    const message = buildAssistantMessage({
-      totalMessageCost: 0.07,
-      totalMessageUsage: {
-        inputTokens: 100,
-        cachedInputTokens: 10,
-        outputTokens: 20,
-      },
-    });
-
-    await recordChatUsage({
-      accountId: ACCOUNT,
-      modelId: "anthropic/claude-haiku-4.5",
-      responseMessage: message as never,
-    });
-
-    expect(computeCreditsDeductedCents).toHaveBeenCalledWith(
-      { inputTokens: 100, cachedInputTokens: 10, outputTokens: 20 },
-      "anthropic/claude-haiku-4.5",
-      0.07,
-    );
-  });
-
-  it("calls deductCreditsWithAudit with the computed cents and a fresh event id", async () => {
-    vi.mocked(computeCreditsDeductedCents).mockResolvedValue(42);
-    vi.mocked(deductCreditsWithAudit).mockResolvedValue({ ok: true });
-
-    const message = buildAssistantMessage({
-      totalMessageCost: 0.42,
-      totalMessageUsage: {
-        inputTokens: 100,
-        cachedInputTokens: 0,
-        outputTokens: 200,
-      },
-    });
-
-    await recordChatUsage({
-      accountId: ACCOUNT,
-      modelId: "anthropic/claude-haiku-4.5",
-      responseMessage: message as never,
-    });
-
-    expect(deductCreditsWithAudit).toHaveBeenCalledTimes(1);
-    const args = vi.mocked(deductCreditsWithAudit).mock.calls[0]?.[0];
-    expect(args?.accountId).toBe(ACCOUNT);
-    expect(args?.cents).toBe(42);
-    expect(typeof args?.eventId).toBe("string");
-    expect(args?.eventId.length).toBeGreaterThan(0);
-    expect(args?.event).toMatchObject({
-      source: "api",
-      agent_type: "main",
-      provider: "anthropic",
-      model_id: "anthropic/claude-haiku-4.5",
-      input_tokens: 100,
-      cached_input_tokens: 0,
-      output_tokens: 200,
-    });
-  });
-
-  it("derives provider from modelId (everything before the first slash)", async () => {
-    vi.mocked(computeCreditsDeductedCents).mockResolvedValue(5);
-    vi.mocked(deductCreditsWithAudit).mockResolvedValue({ ok: true });
-
-    const message = buildAssistantMessage({
-      totalMessageCost: 0.05,
-      totalMessageUsage: { inputTokens: 1, cachedInputTokens: 0, outputTokens: 1 },
-    });
-
-    await recordChatUsage({
-      accountId: ACCOUNT,
-      modelId: "openai/gpt-5.4",
-      responseMessage: message as never,
-    });
-
-    const args = vi.mocked(deductCreditsWithAudit).mock.calls[0]?.[0];
-    expect(args?.event.provider).toBe("openai");
-    expect(args?.event.model_id).toBe("openai/gpt-5.4");
-  });
-
-  it("skips entirely when responseMessage is undefined (no usage to charge)", async () => {
-    await recordChatUsage({
-      accountId: ACCOUNT,
-      modelId: "anthropic/claude-haiku-4.5",
-      responseMessage: undefined,
-    });
-
-    expect(computeCreditsDeductedCents).not.toHaveBeenCalled();
-    expect(deductCreditsWithAudit).not.toHaveBeenCalled();
-  });
-
-  it("skips entirely when responseMessage has no metadata at all", async () => {
-    await recordChatUsage({
-      accountId: ACCOUNT,
-      modelId: "anthropic/claude-haiku-4.5",
-      responseMessage: {
-        id: "m",
-        role: "assistant",
-        parts: [{ type: "text", text: "x" }],
-      } as never,
-    });
-
-    expect(computeCreditsDeductedCents).not.toHaveBeenCalled();
-    expect(deductCreditsWithAudit).not.toHaveBeenCalled();
-  });
-
-  it("falls back to zero token counts when totalMessageUsage is missing (still charges the 1c floor)", async () => {
-    vi.mocked(computeCreditsDeductedCents).mockResolvedValue(1);
-    vi.mocked(deductCreditsWithAudit).mockResolvedValue({ ok: true });
-
-    const message = buildAssistantMessage({
-      totalMessageCost: 0.0001,
-      // No totalMessageUsage on metadata
-    });
-
-    await recordChatUsage({
-      accountId: ACCOUNT,
-      modelId: "anthropic/claude-haiku-4.5",
-      responseMessage: message as never,
-    });
-
-    expect(computeCreditsDeductedCents).toHaveBeenCalledWith(
-      { inputTokens: 0, cachedInputTokens: 0, outputTokens: 0 },
-      "anthropic/claude-haiku-4.5",
-      0.0001,
-    );
-    const args = vi.mocked(deductCreditsWithAudit).mock.calls[0]?.[0];
-    expect(args?.event.input_tokens).toBe(0);
-    expect(args?.event.output_tokens).toBe(0);
-  });
-
-  it("does NOT throw when deductCreditsWithAudit returns ok:false (fire-and-forget contract)", async () => {
-    vi.mocked(computeCreditsDeductedCents).mockResolvedValue(7);
-    vi.mocked(deductCreditsWithAudit).mockResolvedValue({
-      ok: false,
-      error: "credits_usage row not found",
-    });
-
-    const message = buildAssistantMessage({
-      totalMessageCost: 0.07,
-      totalMessageUsage: { inputTokens: 1, cachedInputTokens: 0, outputTokens: 1 },
-    });
-
-    await expect(
-      recordChatUsage({
-        accountId: ACCOUNT,
-        modelId: "anthropic/claude-haiku-4.5",
-        responseMessage: message as never,
-      }),
-    ).resolves.toBeUndefined();
-  });
-
-  it("does NOT throw when computeCreditsDeductedCents rejects (fire-and-forget contract)", async () => {
-    vi.mocked(computeCreditsDeductedCents).mockRejectedValue(new Error("catalog down"));
-
-    const message = buildAssistantMessage({
-      totalMessageCost: 0.07,
-      totalMessageUsage: { inputTokens: 1, cachedInputTokens: 0, outputTokens: 1 },
-    });
-
-    await expect(
-      recordChatUsage({
-        accountId: ACCOUNT,
-        modelId: "anthropic/claude-haiku-4.5",
-        responseMessage: message as never,
-      }),
-    ).resolves.toBeUndefined();
-
-    expect(deductCreditsWithAudit).not.toHaveBeenCalled();
-  });
-});
diff --git a/app/lib/workflows/__tests__/runAgentWorkflow.test.ts b/app/lib/workflows/__tests__/runAgentWorkflow.test.ts
index b08b601fc..19cec3b78 100644
--- a/app/lib/workflows/__tests__/runAgentWorkflow.test.ts
+++ b/app/lib/workflows/__tests__/runAgentWorkflow.test.ts
@@ -5,7 +5,7 @@ import { clearChatActiveStream } from "@/lib/chat/clearChatActiveStream";
 import { closeChatStream } from "@/app/lib/workflows/closeChatStream";
 import { generateAssistantMessageId } from "@/app/lib/workflows/generateAssistantMessageId";
 import { persistAssistantMessage } from "@/lib/chat/persistAssistantMessage";
-import { recordChatUsage } from "@/app/lib/workflows/recordChatUsage";
+import { handleChatCredits } from "@/lib/credits/handleChatCredits";
 
 vi.mock("@/app/lib/workflows/runAgentStep", () => ({
   runAgentStep: vi.fn(),
@@ -22,8 +22,8 @@ vi.mock("@/app/lib/workflows/generateAssistantMessageId", () => ({
 vi.mock("@/lib/chat/persistAssistantMessage", () => ({
   persistAssistantMessage: vi.fn(),
 }));
-vi.mock("@/app/lib/workflows/recordChatUsage", () => ({
-  recordChatUsage: vi.fn(),
+vi.mock("@/lib/credits/handleChatCredits", () => ({
+  handleChatCredits: vi.fn(),
 }));
 // Captured writable stub so tests can assert closeChatStream got the
 // same instance the workflow body holds.
@@ -176,7 +176,7 @@ describe("runAgentWorkflow", () => {
     );
   });
 
-  it("calls recordChatUsage with accountId/modelId/responseMessage when runAgentStep returns a message", async () => {
+  it("calls handleChatCredits with the gateway cost + token usage from responseMessage.metadata", async () => {
     const responseMessage = {
       id: "assistant-msg-xyz",
       role: "assistant",
@@ -197,15 +197,45 @@ describe("runAgentWorkflow", () => {
 
     await runAgentWorkflow(baseInput);
 
-    expect(recordChatUsage).toHaveBeenCalledTimes(1);
-    expect(recordChatUsage).toHaveBeenCalledWith({
+    expect(handleChatCredits).toHaveBeenCalledTimes(1);
+    expect(handleChatCredits).toHaveBeenCalledWith({
       accountId: "acc-1",
-      modelId: "anthropic/claude-haiku-4.5",
-      responseMessage,
+      model: "anthropic/claude-haiku-4.5",
+      source: "api",
+      gatewayCostUsd: 0.07,
+      usage: {
+        inputTokens: 100,
+        cachedInputTokens: 10,
+        outputTokens: 20,
+      },
+    });
+  });
+
+  it("calls handleChatCredits with zero usage when metadata is missing (lets the 1c floor apply)", async () => {
+    const responseMessage = {
+      id: "assistant-msg-xyz",
+      role: "assistant",
+      parts: [{ type: "text", text: "Hello!" }],
+      // no metadata
+    };
+    vi.mocked(runAgentStep).mockResolvedValue({
+      finishReason: "stop",
+      responseMessage: responseMessage as never,
+    });
+
+    await runAgentWorkflow(baseInput);
+
+    expect(handleChatCredits).toHaveBeenCalledTimes(1);
+    expect(handleChatCredits).toHaveBeenCalledWith({
+      accountId: "acc-1",
+      model: "anthropic/claude-haiku-4.5",
+      source: "api",
+      gatewayCostUsd: undefined,
+      usage: { inputTokens: 0, cachedInputTokens: 0, outputTokens: 0 },
     });
   });
 
-  it("does NOT call recordChatUsage when runAgentStep returns no responseMessage", async () => {
+  it("does NOT call handleChatCredits when runAgentStep returns no responseMessage", async () => {
     vi.mocked(runAgentStep).mockResolvedValue({
       finishReason: "stop",
       responseMessage: undefined,
@@ -213,14 +243,14 @@ describe("runAgentWorkflow", () => {
 
     await runAgentWorkflow(baseInput);
 
-    expect(recordChatUsage).not.toHaveBeenCalled();
+    expect(handleChatCredits).not.toHaveBeenCalled();
   });
 
-  it("does NOT call recordChatUsage when runAgentStep throws (no message to bill)", async () => {
+  it("does NOT call handleChatCredits when runAgentStep throws (no message to bill)", async () => {
     vi.mocked(runAgentStep).mockRejectedValue(new Error("model exploded"));
 
     await expect(runAgentWorkflow(baseInput)).rejects.toThrow("model exploded");
 
-    expect(recordChatUsage).not.toHaveBeenCalled();
+    expect(handleChatCredits).not.toHaveBeenCalled();
   });
 });
diff --git a/app/lib/workflows/recordChatUsage.ts b/app/lib/workflows/recordChatUsage.ts
deleted file mode 100644
index d18d1543e..000000000
--- a/app/lib/workflows/recordChatUsage.ts
+++ /dev/null
@@ -1,101 +0,0 @@
-import { nanoid } from "nanoid";
-import { computeCreditsDeductedCents } from "@/lib/credits/computeCreditsDeductedCents";
-import { deductCreditsWithAudit } from "@/lib/supabase/credits_usage/deductCreditsWithAudit";
-
-/**
- * Duck-typed shape of the assembled assistant message we read off the
- * workflow result. Intentionally loose — both AI SDK's `UIMessage` and
- * the in-test fixtures satisfy it. The actual bill comes from
- * `metadata.totalMessageCost` (gateway-reported USD, the same number
- * the chat UI displays) and `metadata.totalMessageUsage` (token
- * counts). When metadata is missing we silently no-op.
- */
-type AssistantMessageForUsage = {
-  metadata?: {
-    totalMessageCost?: number;
-    totalMessageUsage?: {
-      inputTokens?: number;
-      cachedInputTokens?: number;
-      outputTokens?: number;
-    };
-  };
-};
-
-/**
- * Fire-and-forget billing step run at the end of a chat workflow.
- * Mirrors open-agents' `recordWorkflowUsage` main-agent path
- * (apps/web/app/workflows/chat-post-finish.ts) — without this step,
- * every chat turn on `/api/chat/workflow` is free (recoupable/api#605).
- *
- * Wallet debit + audit row are atomic via the
- * `deduct_credits_with_audit` Postgres function (one transaction).
- * Errors are caught + logged so a transient credits-table outage
- * never aborts the chat workflow.
- *
- * NOTE: Sub-agent (task tool) attribution is a separate follow-up —
- * this step only charges the main-agent turn. The audit row is
- * tagged `agent_type='main'`.
- */
-export async function recordChatUsage(input: {
-  accountId: string;
-  modelId: string;
-  responseMessage: AssistantMessageForUsage | undefined;
-}): Promise<void> {
-  "use step";
-  console.log("[recordChatUsage] enter", {
-    accountId: input.accountId,
-    modelId: input.modelId,
-    hasResponseMessage: input.responseMessage !== undefined,
-  });
-  try {
-    const metadata = input.responseMessage?.metadata;
-    if (!metadata) {
-      console.log("[recordChatUsage] skip: no metadata on responseMessage", {
-        accountId: input.accountId,
-        modelId: input.modelId,
-      });
-      return;
-    }
-
-    const usage = {
-      inputTokens: metadata.totalMessageUsage?.inputTokens ?? 0,
-      cachedInputTokens: metadata.totalMessageUsage?.cachedInputTokens ?? 0,
-      outputTokens: metadata.totalMessageUsage?.outputTokens ?? 0,
-    };
-
-    const cents = await computeCreditsDeductedCents(
-      usage,
-      input.modelId,
-      metadata.totalMessageCost,
-    );
-
-    const provider = input.modelId.includes("/") ? input.modelId.split("/")[0] : undefined;
-
-    const result = await deductCreditsWithAudit({
-      accountId: input.accountId,
-      cents,
-      eventId: nanoid(),
-      event: {
-        source: "api",
-        agent_type: "main",
-        provider,
-        model_id: input.modelId,
-        input_tokens: usage.inputTokens,
-        cached_input_tokens: usage.cachedInputTokens,
-        output_tokens: usage.outputTokens,
-      },
-    });
-
-    if (!result.ok) {
-      console.error("[recordChatUsage] debit failed:", result.error);
-      return;
-    }
-    console.log("[recordChatUsage] success", {
-      accountId: input.accountId,
-      modelId: input.modelId,
-      cents,
-    });
-  } catch (error) {
-    console.error("[recordChatUsage] unexpected error:", error);
-  }
-}
diff --git a/app/lib/workflows/runAgentWorkflow.ts b/app/lib/workflows/runAgentWorkflow.ts
index 70094a843..9afcdeb79 100644
--- a/app/lib/workflows/runAgentWorkflow.ts
+++ b/app/lib/workflows/runAgentWorkflow.ts
@@ -1,11 +1,11 @@
 import { getWorkflowMetadata, getWritable } from "workflow";
-import type { UIMessage, UIMessageChunk } from "ai";
+import type { LanguageModelUsage, UIMessage, UIMessageChunk } from "ai";
 import { closeChatStream } from "@/app/lib/workflows/closeChatStream";
 import { generateAssistantMessageId } from "@/app/lib/workflows/generateAssistantMessageId";
 import { runAgentStep } from "@/app/lib/workflows/runAgentStep";
 import { clearChatActiveStream } from "@/lib/chat/clearChatActiveStream";
 import { persistAssistantMessage } from "@/lib/chat/persistAssistantMessage";
-import { recordChatUsage } from "@/app/lib/workflows/recordChatUsage";
+import { handleChatCredits } from "@/lib/credits/handleChatCredits";
 import type { DurableAgentContext } from "@/lib/agent/tools/AgentContext";
 
 export type RunAgentWorkflowInput = {
@@ -93,14 +93,39 @@ export async function runAgentWorkflow(input: RunAgentWorkflowInput): Promise<vo
 
       // Charge the account for this turn. Atomic wallet debit + audit
       // row insert via the `deduct_credits_with_audit` Postgres function
-      // — see `recordChatUsage`. Fire-and-forget by contract; transient
-      // credits-table failures must not abort the chat workflow.
-      // Mirrors open-agents' `recordWorkflowUsage` main-agent path
-      // (apps/web/app/workflows/chat-post-finish.ts).
-      await recordChatUsage({
+      // (wired into `handleChatCredits` → `recordCreditDeduction`).
+      // Fire-and-forget by contract; transient credits-table failures
+      // must not abort the chat workflow. Mirrors open-agents'
+      // `recordWorkflowUsage` main-agent path
+      // (apps/web/app/workflows/chat-post-finish.ts) and reuses the
+      // same `handleChatCredits` orchestrator that `handleChatStream`
+      // already uses for the non-workflow chat path.
+      const metadata = (
+        result.responseMessage as {
+          metadata?: {
+            totalMessageCost?: number;
+            totalMessageUsage?: {
+              inputTokens?: number;
+              cachedInputTokens?: number;
+              outputTokens?: number;
+            };
+          };
+        }
+      ).metadata;
+      await handleChatCredits({
         accountId: input.accountId,
-        modelId: input.modelId,
-        responseMessage: result.responseMessage,
+        model: input.modelId,
+        source: "api",
+        gatewayCostUsd: metadata?.totalMessageCost,
+        // Cast at the boundary: AI SDK's LanguageModelUsage type
+        // requires additional optional fields (inputTokenDetails,
+        // outputTokenDetails, totalTokens). `handleChatCredits` only
+        // reads the three core fields, so the cast is safe.
+        usage: {
+          inputTokens: metadata?.totalMessageUsage?.inputTokens ?? 0,
+          cachedInputTokens: metadata?.totalMessageUsage?.cachedInputTokens ?? 0,
+          outputTokens: metadata?.totalMessageUsage?.outputTokens ?? 0,
+        } as LanguageModelUsage,
       });
     }
   } finally {
diff --git a/lib/chat/__tests__/integration/chatEndToEnd.test.ts b/lib/chat/__tests__/integration/chatEndToEnd.test.ts
index cf330a142..23d771abc 100644
--- a/lib/chat/__tests__/integration/chatEndToEnd.test.ts
+++ b/lib/chat/__tests__/integration/chatEndToEnd.test.ts
@@ -535,6 +535,7 @@ describe("Chat Integration Tests", () => {
       expect(mockGetCreditUsage).toHaveBeenCalledWith(
         { promptTokens: 1000, completionTokens: 500 },
         "gpt-4",
+        undefined,
       );
       expect(mockRecordCreditDeduction).toHaveBeenCalledWith(
         expect.objectContaining({
diff --git a/lib/credits/AvailableModelCost.ts b/lib/credits/AvailableModelCost.ts
deleted file mode 100644
index 72d3026f9..000000000
--- a/lib/credits/AvailableModelCost.ts
+++ /dev/null
@@ -1,25 +0,0 @@
-/**
- * Shape of the per-model cost catalog used for token-based credit estimation.
- *
- * Mirrors open-agents'
- * `apps/web/lib/models.ts:AvailableModelCost` so the same
- * `estimateModelUsageCost` math runs against either catalog without
- * shape conversion. api's current gateway/models.dev pipeline emits
- * only `{ input, output }` (see
- * `lib/ai/parseModelsDevMetadata.ts:ModelsDevMetadata`); the richer
- * `cache_read` and `context_over_200k` fields are kept in the type so
- * a future catalog expansion (or a hand-edited override) gets picked
- * up automatically by the estimator.
- *
- * All token-cost units are USD per million tokens, matching
- * models.dev.
- */
-export interface AvailableModelCostTier {
-  input?: number;
-  output?: number;
-  cache_read?: number;
-}
-
-export interface AvailableModelCost extends AvailableModelCostTier {
-  context_over_200k?: AvailableModelCostTier;
-}
diff --git a/lib/credits/__tests__/computeCreditsDeductedCents.test.ts b/lib/credits/__tests__/computeCreditsDeductedCents.test.ts
deleted file mode 100644
index a05c79df7..000000000
--- a/lib/credits/__tests__/computeCreditsDeductedCents.test.ts
+++ /dev/null
@@ -1,183 +0,0 @@
-import { describe, it, expect, vi, beforeEach } from "vitest";
-import { computeCreditsDeductedCents } from "@/lib/credits/computeCreditsDeductedCents";
-import { getAvailableModels } from "@/lib/ai/getAvailableModels";
-
-vi.mock("@/lib/ai/getAvailableModels", () => ({
-  getAvailableModels: vi.fn(),
-}));
-
-beforeEach(() => {
-  vi.clearAllMocks();
-  // Default catalog: empty (forces token-estimate path to fall through to 1c).
-  vi.mocked(getAvailableModels).mockResolvedValue([]);
-});
-
-const ZERO_USAGE = { inputTokens: 0, cachedInputTokens: 0, outputTokens: 0 };
-
-describe("computeCreditsDeductedCents", () => {
-  describe("gateway cost path (preferred)", () => {
-    it("returns gateway cost in cents when gatewayCostUsd is a positive number", async () => {
-      // $0.0074 → 0.74c → ceil to 1c minimum is not needed; round to 1c
-      expect(
-        await computeCreditsDeductedCents(ZERO_USAGE, "anthropic/claude-haiku-4.5", 0.0074),
-      ).toBe(1);
-      // $0.42 → 42c
-      expect(
-        await computeCreditsDeductedCents(ZERO_USAGE, "anthropic/claude-haiku-4.5", 0.42),
-      ).toBe(42);
-    });
-
-    it("rounds the gateway cost to the nearest cent", async () => {
-      // $0.123 → 12.3c → 12c
-      expect(await computeCreditsDeductedCents(ZERO_USAGE, "model", 0.123)).toBe(12);
-      // $0.126 → 12.6c → 13c
-      expect(await computeCreditsDeductedCents(ZERO_USAGE, "model", 0.126)).toBe(13);
-    });
-
-    it("returns at least 1 when gateway cost rounds to 0", async () => {
-      // $0.0001 → 0.01c → would round to 0, must bump to 1
-      expect(await computeCreditsDeductedCents(ZERO_USAGE, "model", 0.0001)).toBe(1);
-    });
-
-    it("does NOT call the catalog when gateway cost is usable", async () => {
-      await computeCreditsDeductedCents(ZERO_USAGE, "model", 0.05);
-      expect(getAvailableModels).not.toHaveBeenCalled();
-    });
-  });
-
-  describe("falls back to token-based estimate when gateway cost is unusable", () => {
-    it("when gatewayCostUsd is undefined", async () => {
-      vi.mocked(getAvailableModels).mockResolvedValue([
-        { id: "model-x", cost: { input: 1, output: 4 } } as never,
-      ]);
-      // 1M in + 1M out → $5 → 500c
-      expect(
-        await computeCreditsDeductedCents(
-          { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 },
-          "model-x",
-          undefined,
-        ),
-      ).toBe(500);
-    });
-
-    it("when gatewayCostUsd is 0", async () => {
-      vi.mocked(getAvailableModels).mockResolvedValue([
-        { id: "model-x", cost: { input: 1, output: 4 } } as never,
-      ]);
-      expect(
-        await computeCreditsDeductedCents(
-          { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 },
-          "model-x",
-          0,
-        ),
-      ).toBe(500);
-    });
-
-    it("when gatewayCostUsd is negative (corrupted/upstream bug)", async () => {
-      vi.mocked(getAvailableModels).mockResolvedValue([
-        { id: "model-x", cost: { input: 1, output: 4 } } as never,
-      ]);
-      expect(
-        await computeCreditsDeductedCents(
-          { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 },
-          "model-x",
-          -1,
-        ),
-      ).toBe(500);
-    });
-
-    it("when gatewayCostUsd is NaN (not Number.isFinite)", async () => {
-      vi.mocked(getAvailableModels).mockResolvedValue([
-        { id: "model-x", cost: { input: 1, output: 4 } } as never,
-      ]);
-      expect(
-        await computeCreditsDeductedCents(
-          { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 },
-          "model-x",
-          Number.NaN,
-        ),
-      ).toBe(500);
-    });
-
-    it("when gatewayCostUsd is Infinity", async () => {
-      vi.mocked(getAvailableModels).mockResolvedValue([
-        { id: "model-x", cost: { input: 1, output: 4 } } as never,
-      ]);
-      expect(
-        await computeCreditsDeductedCents(
-          { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 },
-          "model-x",
-          Number.POSITIVE_INFINITY,
-        ),
-      ).toBe(500);
-    });
-  });
-
-  describe("estimate fallbacks (also: never charge zero on success)", () => {
-    it("returns 1 when modelId is not in the catalog", async () => {
-      vi.mocked(getAvailableModels).mockResolvedValue([
-        { id: "other-model", cost: { input: 1, output: 4 } } as never,
-      ]);
-      expect(
-        await computeCreditsDeductedCents(
-          { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 1000 },
-          "model-x",
-          undefined,
-        ),
-      ).toBe(1);
-    });
-
-    it("returns 1 when the catalog has no cost for the model", async () => {
-      vi.mocked(getAvailableModels).mockResolvedValue([{ id: "model-x" } as never]);
-      expect(
-        await computeCreditsDeductedCents(
-          { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 1000 },
-          "model-x",
-          undefined,
-        ),
-      ).toBe(1);
-    });
-
-    it("returns 1 when getAvailableModels rejects", async () => {
-      vi.mocked(getAvailableModels).mockRejectedValue(new Error("gateway down"));
-      expect(
-        await computeCreditsDeductedCents(
-          { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 1000 },
-          "model-x",
-          undefined,
-        ),
-      ).toBe(1);
-    });
-
-    it("returns 1 when token estimate rounds to 0 (very tiny usage)", async () => {
-      vi.mocked(getAvailableModels).mockResolvedValue([
-        { id: "model-x", cost: { input: 0.0001, output: 0.0001 } } as never,
-      ]);
-      // ~$0.0000002 → 0.00002c → bumps to 1c minimum
-      expect(
-        await computeCreditsDeductedCents(
-          { inputTokens: 1, cachedInputTokens: 0, outputTokens: 1 },
-          "model-x",
-          undefined,
-        ),
-      ).toBe(1);
-    });
-  });
-
-  describe("model lookup", () => {
-    it("matches modelId exactly (provider/model form)", async () => {
-      vi.mocked(getAvailableModels).mockResolvedValue([
-        { id: "anthropic/claude-haiku-4.5", cost: { input: 1, output: 4 } } as never,
-        { id: "openai/gpt-5", cost: { input: 10, output: 40 } } as never,
-      ]);
-      // Pick haiku: 1M in + 1M out @ haiku rates → $5 → 500c
-      expect(
-        await computeCreditsDeductedCents(
-          { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 },
-          "anthropic/claude-haiku-4.5",
-          undefined,
-        ),
-      ).toBe(500);
-    });
-  });
-});
diff --git a/lib/credits/__tests__/estimateModelUsageCost.test.ts b/lib/credits/__tests__/estimateModelUsageCost.test.ts
deleted file mode 100644
index e41d9454e..000000000
--- a/lib/credits/__tests__/estimateModelUsageCost.test.ts
+++ /dev/null
@@ -1,165 +0,0 @@
-import { describe, it, expect } from "vitest";
-import { estimateModelUsageCost } from "@/lib/credits/estimateModelUsageCost";
-
-const baseCost = { input: 1, output: 4 }; // $1/M in, $4/M out
-
-describe("estimateModelUsageCost", () => {
-  describe("guard rails", () => {
-    it("returns undefined when cost catalog entry is missing", () => {
-      expect(
-        estimateModelUsageCost(
-          { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 500 },
-          undefined,
-        ),
-      ).toBeUndefined();
-    });
-
-    it("returns undefined when input price is missing", () => {
-      expect(
-        estimateModelUsageCost(
-          { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 500 },
-          { output: 4 },
-        ),
-      ).toBeUndefined();
-    });
-
-    it("returns undefined when output price is missing", () => {
-      expect(
-        estimateModelUsageCost(
-          { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 500 },
-          { input: 1 },
-        ),
-      ).toBeUndefined();
-    });
-  });
-
-  describe("base tier (≤200k input tokens)", () => {
-    it("computes uncached input + output cost in USD", () => {
-      // 1_000_000 in @ $1/M + 1_000_000 out @ $4/M = $5
-      expect(
-        estimateModelUsageCost(
-          {
-            inputTokens: 1_000_000,
-            cachedInputTokens: 0,
-            outputTokens: 1_000_000,
-          },
-          baseCost,
-        ),
-      ).toBe(5);
-    });
-
-    it("applies cache_read price for cachedInputTokens portion when present", () => {
-      // 100k cached @ $0.10/M + 100k uncached @ $1/M + 100k out @ $4/M
-      // = 0.01 + 0.10 + 0.40 = $0.51
-      const cost = { input: 1, output: 4, cache_read: 0.1 };
-      expect(
-        estimateModelUsageCost(
-          { inputTokens: 200_000, cachedInputTokens: 100_000, outputTokens: 100_000 },
-          cost,
-        ),
-      ).toBeCloseTo(0.51, 6);
-    });
-
-    it("falls back to input price when cache_read is undefined (cached tokens billed at full price)", () => {
-      // 100k cached @ $1/M + 100k uncached @ $1/M + 100k out @ $4/M
-      // = 0.10 + 0.10 + 0.40 = $0.60
-      expect(
-        estimateModelUsageCost(
-          { inputTokens: 200_000, cachedInputTokens: 100_000, outputTokens: 100_000 },
-          baseCost,
-        ),
-      ).toBeCloseTo(0.6, 6);
-    });
-
-    it("clamps negative cachedInputTokens to 0", () => {
-      expect(
-        estimateModelUsageCost(
-          { inputTokens: 1_000_000, cachedInputTokens: -50_000, outputTokens: 0 },
-          baseCost,
-        ),
-      ).toBe(1);
-    });
-
-    it("clamps cachedInputTokens > inputTokens so uncached doesn't go negative", () => {
-      // cached=200_000 but input=100_000 — uncached must clamp to 0 (not -100_000).
-      // 200_000 cached @ $1/M (no cache_read, falls back to input) + 0 out = $0.20.
-      // Without the Math.max guard, this would underbill: a negative uncached count
-      // times the input price would subtract from the cached charge.
-      expect(
-        estimateModelUsageCost(
-          { inputTokens: 100_000, cachedInputTokens: 200_000, outputTokens: 0 },
-          baseCost,
-        ),
-      ).toBeCloseTo(0.2, 6);
-    });
-
-    it("clamps negative outputTokens to 0", () => {
-      expect(
-        estimateModelUsageCost(
-          { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: -1000 },
-          baseCost,
-        ),
-      ).toBe(1);
-    });
-  });
-
-  describe("context_over_200k tier", () => {
-    const tieredCost = {
-      input: 1,
-      output: 4,
-      context_over_200k: { input: 2, output: 8 },
-    };
-
-    it("uses context_over_200k tier when inputTokens exceeds 200k", () => {
-      // 300_000 in @ $2/M + 100_000 out @ $8/M = 0.60 + 0.80 = $1.40
-      expect(
-        estimateModelUsageCost(
-          { inputTokens: 300_000, cachedInputTokens: 0, outputTokens: 100_000 },
-          tieredCost,
-        ),
-      ).toBeCloseTo(1.4, 6);
-    });
-
-    it("does NOT use context_over_200k tier when inputTokens is exactly 200k", () => {
-      // boundary check — must be strictly > 200k
-      // 200_000 @ $1/M + 0 out = $0.20
-      expect(
-        estimateModelUsageCost(
-          { inputTokens: 200_000, cachedInputTokens: 0, outputTokens: 0 },
-          tieredCost,
-        ),
-      ).toBeCloseTo(0.2, 6);
-    });
-
-    it("ignores context_over_200k when both input and output overrides are missing", () => {
-      // only cache_read is set in the override — should NOT trigger the tier swap
-      const cost = {
-        input: 1,
-        output: 4,
-        context_over_200k: { cache_read: 0.5 },
-      };
-      // Treated as base tier — 300k @ $1/M + 0 out = $0.30
-      expect(
-        estimateModelUsageCost(
-          { inputTokens: 300_000, cachedInputTokens: 0, outputTokens: 0 },
-          cost,
-        ),
-      ).toBeCloseTo(0.3, 6);
-    });
-
-    it("falls back to base tier input when context_over_200k.input is missing", () => {
-      const cost = {
-        input: 1,
-        output: 4,
-        context_over_200k: { output: 8 }, // only output overridden
-      };
-      // 300k in @ $1/M (fallback) + 100k out @ $8/M = 0.30 + 0.80 = $1.10
-      expect(
-        estimateModelUsageCost(
-          { inputTokens: 300_000, cachedInputTokens: 0, outputTokens: 100_000 },
-          cost,
-        ),
-      ).toBeCloseTo(1.1, 6);
-    });
-  });
-});
diff --git a/lib/credits/__tests__/getCreditUsage.test.ts b/lib/credits/__tests__/getCreditUsage.test.ts
index d79edd7de..a06811ffd 100644
--- a/lib/credits/__tests__/getCreditUsage.test.ts
+++ b/lib/credits/__tests__/getCreditUsage.test.ts
@@ -139,4 +139,80 @@ describe("getCreditUsage", () => {
       expect(cost).toBe(0);
     });
   });
+
+  describe("gateway cost short-circuit", () => {
+    it("returns gatewayCostUsd directly when it is a positive number (skips catalog lookup)", async () => {
+      const cost = await getCreditUsage(
+        { inputTokens: 1000, outputTokens: 500 },
+        "anthropic/claude-haiku-4.5",
+        0.07,
+      );
+      expect(cost).toBe(0.07);
+      expect(mockGetModel).not.toHaveBeenCalled();
+    });
+
+    it("falls through to token math when gatewayCostUsd is undefined", async () => {
+      mockGetModel.mockResolvedValue({
+        id: "gpt-4",
+        pricing: { input: "0.00003", output: "0.00006" },
+      } as any);
+
+      const cost = await getCreditUsage(
+        { inputTokens: 1000, outputTokens: 500 },
+        "gpt-4",
+        undefined,
+      );
+      // 1000 * 0.00003 + 500 * 0.00006 = 0.06
+      expect(cost).toBeCloseTo(0.06);
+      expect(mockGetModel).toHaveBeenCalledWith("gpt-4");
+    });
+
+    it("falls through to token math when gatewayCostUsd is 0", async () => {
+      mockGetModel.mockResolvedValue({
+        id: "gpt-4",
+        pricing: { input: "0.00003", output: "0.00006" },
+      } as any);
+
+      const cost = await getCreditUsage({ inputTokens: 1000, outputTokens: 500 }, "gpt-4", 0);
+      expect(cost).toBeCloseTo(0.06);
+    });
+
+    it("falls through to token math when gatewayCostUsd is negative", async () => {
+      mockGetModel.mockResolvedValue({
+        id: "gpt-4",
+        pricing: { input: "0.00003", output: "0.00006" },
+      } as any);
+
+      const cost = await getCreditUsage({ inputTokens: 1000, outputTokens: 500 }, "gpt-4", -1);
+      expect(cost).toBeCloseTo(0.06);
+    });
+
+    it("falls through to token math when gatewayCostUsd is NaN", async () => {
+      mockGetModel.mockResolvedValue({
+        id: "gpt-4",
+        pricing: { input: "0.00003", output: "0.00006" },
+      } as any);
+
+      const cost = await getCreditUsage(
+        { inputTokens: 1000, outputTokens: 500 },
+        "gpt-4",
+        Number.NaN,
+      );
+      expect(cost).toBeCloseTo(0.06);
+    });
+
+    it("falls through to token math when gatewayCostUsd is Infinity", async () => {
+      mockGetModel.mockResolvedValue({
+        id: "gpt-4",
+        pricing: { input: "0.00003", output: "0.00006" },
+      } as any);
+
+      const cost = await getCreditUsage(
+        { inputTokens: 1000, outputTokens: 500 },
+        "gpt-4",
+        Number.POSITIVE_INFINITY,
+      );
+      expect(cost).toBeCloseTo(0.06);
+    });
+  });
 });
diff --git a/lib/credits/__tests__/handleChatCredits.test.ts b/lib/credits/__tests__/handleChatCredits.test.ts
index 0e92f352e..c1786fc8f 100644
--- a/lib/credits/__tests__/handleChatCredits.test.ts
+++ b/lib/credits/__tests__/handleChatCredits.test.ts
@@ -42,7 +42,7 @@ describe("handleChatCredits", () => {
         accountId: "account-123",
       });
 
-      expect(mockGetCreditUsage).toHaveBeenCalledWith(USAGE, "gpt-4");
+      expect(mockGetCreditUsage).toHaveBeenCalledWith(USAGE, "gpt-4", undefined);
       expect(mockRecordCreditDeduction).toHaveBeenCalledWith({
         accountId: "account-123",
         creditsToDeduct: 5,
@@ -154,4 +154,51 @@ describe("handleChatCredits", () => {
       expect(consoleSpy).toHaveBeenCalledWith("Failed to handle chat credits:", expect.any(Error));
     });
   });
+
+  describe("gateway cost + source extensions", () => {
+    it("forwards gatewayCostUsd to getCreditUsage when provided", async () => {
+      mockGetCreditUsage.mockResolvedValue(0.07);
+      mockRecordCreditDeduction.mockResolvedValue({ success: true, newBalance: 93 });
+
+      await handleChatCredits({
+        usage: USAGE,
+        model: "anthropic/claude-haiku-4.5",
+        accountId: "account-123",
+        gatewayCostUsd: 0.07,
+      });
+
+      expect(mockGetCreditUsage).toHaveBeenCalledWith(USAGE, "anthropic/claude-haiku-4.5", 0.07);
+    });
+
+    it("defaults source to 'web' when not provided (backwards compatible)", async () => {
+      mockGetCreditUsage.mockResolvedValue(0.05);
+      mockRecordCreditDeduction.mockResolvedValue({ success: true, newBalance: 95 });
+
+      await handleChatCredits({
+        usage: USAGE,
+        model: "gpt-4",
+        accountId: "account-123",
+      });
+
+      expect(mockRecordCreditDeduction).toHaveBeenCalledWith(
+        expect.objectContaining({ source: "web" }),
+      );
+    });
+
+    it("propagates source='api' when caller is the chat workflow", async () => {
+      mockGetCreditUsage.mockResolvedValue(0.05);
+      mockRecordCreditDeduction.mockResolvedValue({ success: true, newBalance: 95 });
+
+      await handleChatCredits({
+        usage: USAGE,
+        model: "anthropic/claude-haiku-4.5",
+        accountId: "account-123",
+        source: "api",
+      });
+
+      expect(mockRecordCreditDeduction).toHaveBeenCalledWith(
+        expect.objectContaining({ source: "api" }),
+      );
+    });
+  });
 });
diff --git a/lib/credits/__tests__/recordCreditDeduction.test.ts b/lib/credits/__tests__/recordCreditDeduction.test.ts
index 9cdfa4eba..95910af4e 100644
--- a/lib/credits/__tests__/recordCreditDeduction.test.ts
+++ b/lib/credits/__tests__/recordCreditDeduction.test.ts
@@ -1,17 +1,12 @@
 import { describe, it, expect, vi, beforeEach } from "vitest";
 import { recordCreditDeduction } from "@/lib/credits/recordCreditDeduction";
 
-const { deductCreditsMock, insertUsageEventMock } = vi.hoisted(() => ({
-  deductCreditsMock: vi.fn(),
-  insertUsageEventMock: vi.fn(),
+const { deductCreditsWithAuditMock } = vi.hoisted(() => ({
+  deductCreditsWithAuditMock: vi.fn(),
 }));
 
-vi.mock("@/lib/credits/deductCredits", () => ({
-  deductCredits: deductCreditsMock,
-}));
-
-vi.mock("@/lib/supabase/usage_events/insertUsageEvent", () => ({
-  insertUsageEvent: insertUsageEventMock,
+vi.mock("@/lib/supabase/credits_usage/deductCreditsWithAudit", () => ({
+  deductCreditsWithAudit: deductCreditsWithAuditMock,
 }));
 
 const ACCOUNT = "123e4567-e89b-12d3-a456-426614174000";
@@ -21,9 +16,8 @@ describe("recordCreditDeduction", () => {
     vi.clearAllMocks();
   });
 
-  it("deducts credits then inserts a usage_events row carrying token detail", async () => {
-    deductCreditsMock.mockResolvedValue({ success: true, newBalance: 100 });
-    insertUsageEventMock.mockResolvedValue({ id: "abc" });
+  it("calls the atomic RPC with cents + event payload carrying token detail", async () => {
+    deductCreditsWithAuditMock.mockResolvedValue({ ok: true });
 
     const result = await recordCreditDeduction({
       accountId: ACCOUNT,
@@ -37,13 +31,13 @@ describe("recordCreditDeduction", () => {
       toolCallCount: 3,
     });
 
-    expect(deductCreditsMock).toHaveBeenCalledWith({
-      accountId: ACCOUNT,
-      creditsToDeduct: 250,
-    });
-    expect(insertUsageEventMock).toHaveBeenCalledWith({
-      account_id: ACCOUNT,
-      credits_deducted_cents: 250,
+    expect(deductCreditsWithAuditMock).toHaveBeenCalledTimes(1);
+    const args = deductCreditsWithAuditMock.mock.calls[0]?.[0];
+    expect(args.accountId).toBe(ACCOUNT);
+    expect(args.cents).toBe(250);
+    expect(typeof args.eventId).toBe("string");
+    expect(args.eventId.length).toBeGreaterThan(0);
+    expect(args.event).toEqual({
       source: "web",
       agent_type: "main",
       provider: "anthropic",
@@ -53,12 +47,11 @@ describe("recordCreditDeduction", () => {
       output_tokens: 567,
       tool_call_count: 3,
     });
-    expect(result).toEqual({ success: true, newBalance: 100 });
+    expect(result).toEqual({ success: true });
   });
 
-  it("applies defaults (agent_type='main', zero tokens, null model/provider) when token detail is omitted", async () => {
-    deductCreditsMock.mockResolvedValue({ success: true, newBalance: 95 });
-    insertUsageEventMock.mockResolvedValue({ id: "abc" });
+  it("applies defaults (agent_type='main', zero tokens, undefined model/provider) when token detail is omitted", async () => {
+    deductCreditsWithAuditMock.mockResolvedValue({ ok: true });
 
     await recordCreditDeduction({
       accountId: ACCOUNT,
@@ -66,13 +59,12 @@ describe("recordCreditDeduction", () => {
       source: "api",
     });
 
-    expect(insertUsageEventMock).toHaveBeenCalledWith({
-      account_id: ACCOUNT,
-      credits_deducted_cents: 5,
+    const args = deductCreditsWithAuditMock.mock.calls[0]?.[0];
+    expect(args.event).toEqual({
       source: "api",
       agent_type: "main",
-      provider: null,
-      model_id: null,
+      provider: undefined,
+      model_id: undefined,
       input_tokens: 0,
       cached_input_tokens: 0,
       output_tokens: 0,
@@ -80,33 +72,35 @@ describe("recordCreditDeduction", () => {
     });
   });
 
-  it("does not insert the audit row when the wallet deduction fails", async () => {
-    deductCreditsMock.mockRejectedValue(new Error("Insufficient credits"));
+  it("returns { success: false } and logs when the RPC reports an error (does NOT throw)", async () => {
+    const consoleSpy = vi.spyOn(console, "error").mockImplementation(() => {});
+    deductCreditsWithAuditMock.mockResolvedValue({
+      ok: false,
+      error: "credits_usage row not found",
+    });
 
-    await expect(
-      recordCreditDeduction({
-        accountId: ACCOUNT,
-        creditsToDeduct: 50,
-        source: "web",
-      }),
-    ).rejects.toThrow(/Insufficient credits/);
+    const result = await recordCreditDeduction({
+      accountId: ACCOUNT,
+      creditsToDeduct: 50,
+      source: "web",
+    });
 
-    expect(insertUsageEventMock).not.toHaveBeenCalled();
+    expect(result).toEqual({ success: false });
+    expect(consoleSpy).toHaveBeenCalled();
+    consoleSpy.mockRestore();
   });
 
-  it("returns the deduction result even if the audit insert fails (logs and swallows)", async () => {
+  it("does not throw when the RPC wrapper itself rejects (defense in depth)", async () => {
     const consoleSpy = vi.spyOn(console, "error").mockImplementation(() => {});
-    deductCreditsMock.mockResolvedValue({ success: true, newBalance: 100 });
-    insertUsageEventMock.mockRejectedValue(new Error("db down"));
+    deductCreditsWithAuditMock.mockRejectedValue(new Error("network blip"));
 
     const result = await recordCreditDeduction({
       accountId: ACCOUNT,
       creditsToDeduct: 5,
-      source: "api",
+      source: "web",
     });
 
-    expect(result).toEqual({ success: true, newBalance: 100 });
-    expect(consoleSpy).toHaveBeenCalled();
+    expect(result).toEqual({ success: false });
     consoleSpy.mockRestore();
   });
 });
diff --git a/lib/credits/computeCreditsDeductedCents.ts b/lib/credits/computeCreditsDeductedCents.ts
deleted file mode 100644
index 7887801ad..000000000
--- a/lib/credits/computeCreditsDeductedCents.ts
+++ /dev/null
@@ -1,57 +0,0 @@
-import { getAvailableModels } from "@/lib/ai/getAvailableModels";
-import { estimateModelUsageCost } from "@/lib/credits/estimateModelUsageCost";
-import type { AvailableModelCost } from "@/lib/credits/AvailableModelCost";
-
-/**
- * Per-turn credit charge in cents (minimum 1).
- *
- * Mirrors open-agents'
- * `apps/web/lib/credits/compute-credits-deducted-cents.ts` so the same
- * billing math runs on both sides of the chat cutover. Resolution order:
- *
- *   1. Gateway-reported cost on `responseMessage.metadata.totalMessageCost`
- *      — the exact number the chat UI shows next to the assistant
- *      response. Used directly so the wallet debit converges with the
- *      cost label.
- *   2. Token-based estimate against the model catalog's `cost` entry.
- *      Catalog is the same gateway / models.dev pipeline that backs
- *      `GET /api/ai/models`.
- *   3. 1c floor when no pricing is available — every successful turn
- *      moves the wallet by at least 1c so a transient catalog outage
- *      can't make a turn free.
- *
- * Errors in the catalog fetch are swallowed and treated as path #3 —
- * the caller (recordChatUsage) must not fail the workflow on a credit
- * accounting hiccup.
- *
- * @param usage Token counts for the turn (matches AI SDK's `LanguageModelUsage`).
- * @param modelId Fully qualified gateway id (e.g. `anthropic/claude-haiku-4.5`).
- * @param gatewayCostUsd Gateway-reported total USD cost for the turn,
- *   when available. Subagent steps (collectTaskToolUsageEvents) won't
- *   have one and fall through to the token estimate.
- * @returns Integer cent amount, ≥ 1.
- */
-export async function computeCreditsDeductedCents(
-  usage: {
-    inputTokens: number;
-    cachedInputTokens: number;
-    outputTokens: number;
-  },
-  modelId: string,
-  gatewayCostUsd?: number,
-): Promise<number> {
-  if (typeof gatewayCostUsd === "number" && Number.isFinite(gatewayCostUsd) && gatewayCostUsd > 0) {
-    return Math.max(1, Math.round(gatewayCostUsd * 100));
-  }
-
-  try {
-    const models = await getAvailableModels();
-    const model = models.find(m => m.id === modelId) as { cost?: AvailableModelCost } | undefined;
-    const usd = estimateModelUsageCost(usage, model?.cost);
-    if (typeof usd !== "number" || usd <= 0) return 1;
-    return Math.max(1, Math.round(usd * 100));
-  } catch (error) {
-    console.error("Failed to compute credits from usage:", error);
-    return 1;
-  }
-}
diff --git a/lib/credits/estimateModelUsageCost.ts b/lib/credits/estimateModelUsageCost.ts
deleted file mode 100644
index d675486a0..000000000
--- a/lib/credits/estimateModelUsageCost.ts
+++ /dev/null
@@ -1,79 +0,0 @@
-import type { AvailableModelCost, AvailableModelCostTier } from "@/lib/credits/AvailableModelCost";
-
-const TOKENS_PER_MILLION = 1_000_000;
-
-/**
- * Picks the right cost tier for `usage`. Above 200k input tokens many
- * providers charge more, so the cost catalog exposes an override on
- * `cost.context_over_200k`. Missing fields fall back to the base tier
- * so a partial override is still valid.
- *
- * The trigger is "input > 200k AND at least one of input/output is
- * overridden" — a tier that only overrides `cache_read` is treated as
- * the base tier (it's not a real tier swap, just a cache discount).
- */
-function resolveCostTier(
-  usage: { inputTokens: number },
-  cost: AvailableModelCost | undefined,
-): AvailableModelCostTier | undefined {
-  if (!cost) return undefined;
-
-  if (
-    usage.inputTokens > 200_000 &&
-    (typeof cost.context_over_200k?.input === "number" ||
-      typeof cost.context_over_200k?.output === "number")
-  ) {
-    return {
-      input: cost.context_over_200k.input ?? cost.input,
-      output: cost.context_over_200k.output ?? cost.output,
-      cache_read: cost.context_over_200k.cache_read ?? cost.cache_read,
-    };
-  }
-
-  return cost;
-}
-
-/**
- * Token-based estimate of a turn's USD cost, used as a fallback when
- * the gateway hasn't reported an actual cost on the
- * `assistantMessage.metadata.totalMessageCost` field.
- *
- * Ports `apps/web/lib/models.ts:estimateModelUsageCost` from
- * open-agents so the same per-turn math runs on both sides during the
- * cutover. Cached input tokens are billed at `cache_read` when the
- * catalog exposes it and fall back to the base input price otherwise.
- *
- * Returns `undefined` when the catalog can't price this model (missing
- * cost entry, missing input price, or missing output price) so the
- * caller knows to use a different fallback (open-agents' rule: never
- * charge 0 — bill the 1c minimum instead).
- *
- * @param usage Token counts for the turn (mirrors AI SDK's `LanguageModelUsage`).
- * @param cost Per-model catalog entry. `undefined` short-circuits to `undefined`.
- * @returns USD cost, or `undefined` if not priceable.
- */
-export function estimateModelUsageCost(
-  usage: {
-    inputTokens: number;
-    cachedInputTokens: number;
-    outputTokens: number;
-  },
-  cost: AvailableModelCost | undefined,
-): number | undefined {
-  const costTier = resolveCostTier(usage, cost);
-  const inputPrice = costTier?.input;
-  const outputPrice = costTier?.output;
-  if (typeof inputPrice !== "number" || typeof outputPrice !== "number") {
-    return undefined;
-  }
-
-  const cachedInputTokens = Math.max(0, usage.cachedInputTokens);
-  const uncachedInputTokens = Math.max(0, usage.inputTokens - cachedInputTokens);
-  const cacheReadPrice = costTier?.cache_read ?? inputPrice;
-
-  return (
-    (uncachedInputTokens * inputPrice) / TOKENS_PER_MILLION +
-    (cachedInputTokens * cacheReadPrice) / TOKENS_PER_MILLION +
-    (Math.max(0, usage.outputTokens) * outputPrice) / TOKENS_PER_MILLION
-  );
-}
diff --git a/lib/credits/getCreditUsage.ts b/lib/credits/getCreditUsage.ts
index 4e1dc5b28..26eb9180b 100644
--- a/lib/credits/getCreditUsage.ts
+++ b/lib/credits/getCreditUsage.ts
@@ -3,14 +3,31 @@ import { LanguageModelUsage } from "ai";
 
 /**
  * Calculates the total spend in USD for a given language model usage.
+ *
+ * Resolution order:
+ *   1. `gatewayCostUsd` — gateway-reported actual cost from
+ *      `responseMessage.metadata.totalMessageCost`. Used directly when
+ *      present and positive so the wallet debit converges with the cost
+ *      label the chat UI shows next to the assistant response.
+ *   2. Token-based estimate using `model.pricing.input/output` from
+ *      the gateway catalog (`getModel`). Authoritative for token cost.
+ *   3. `0` when nothing prices the turn (caller floors to the 1c
+ *      minimum via `Math.max(1, Math.round(usd * 100))`).
+ *
  * @param usage - The language model usage data
  * @param modelId - The ID of the model used
+ * @param gatewayCostUsd - Optional gateway-reported USD cost (preferred over token math)
  * @returns The total spend in USD or 0 if calculation fails
  */
 export const getCreditUsage = async (
   usage: LanguageModelUsage,
   modelId: string,
+  gatewayCostUsd?: number,
 ): Promise<number> => {
+  if (typeof gatewayCostUsd === "number" && Number.isFinite(gatewayCostUsd) && gatewayCostUsd > 0) {
+    return gatewayCostUsd;
+  }
+
   try {
     const model = await getModel(modelId);
     if (!model) {
diff --git a/lib/credits/handleChatCredits.ts b/lib/credits/handleChatCredits.ts
index bc2ecff9f..061043420 100644
--- a/lib/credits/handleChatCredits.ts
+++ b/lib/credits/handleChatCredits.ts
@@ -6,19 +6,42 @@ interface HandleChatCreditsParams {
   usage: LanguageModelUsage;
   model: string;
   accountId?: string;
+  /**
+   * Gateway-reported total USD cost for this turn (from
+   * `responseMessage.metadata.totalMessageCost`). When present and
+   * positive, used directly instead of the token-based estimate so
+   * the wallet debit converges with the cost label the chat UI shows.
+   */
+  gatewayCostUsd?: number;
+  /**
+   * Which surface generated the turn — used to label the
+   * `usage_events` audit row. Defaults to `"web"` to preserve the
+   * existing `handleChatStream` contract; the chat-workflow path
+   * (`/api/chat/workflow`) passes `"api"` so admin dashboards can
+   * distinguish surface in spend rollups.
+   */
+  source?: "web" | "api";
 }
 
 /**
  * Handles credit deduction after chat completion.
  * Always deducts at least 1 credit when accountId is present (round up from usage cost).
- * @param usage - The language model usage data
- * @param model - The model ID used for the chat
- * @param accountId - The account ID to deduct credits from (optional)
+ *
+ * Resolution order for the per-turn cost:
+ *   1. `gatewayCostUsd` — gateway-reported actual USD (preferred)
+ *   2. Token-based estimate via `getCreditUsage` against the gateway catalog
+ *   3. 1c floor — every successful turn debits at least 1 cent
+ *
+ * Wallet debit + audit row insert are atomic via
+ * `recordCreditDeduction` (backed by the `deduct_credits_with_audit`
+ * Postgres function).
  */
 export const handleChatCredits = async ({
   usage,
   model,
   accountId,
+  gatewayCostUsd,
+  source = "web",
 }: HandleChatCreditsParams): Promise<void> => {
   if (!accountId) {
     console.error("No account ID provided, skipping credit deduction");
@@ -26,13 +49,13 @@ export const handleChatCredits = async ({
   }
 
   try {
-    const usageCost = await getCreditUsage(usage, model);
+    const usageCost = await getCreditUsage(usage, model, gatewayCostUsd);
     const creditsToDeduct = Math.max(1, Math.round(usageCost * 100));
 
     await recordCreditDeduction({
       accountId,
       creditsToDeduct,
-      source: "web",
+      source,
       modelId: model,
       inputTokens: usage.inputTokens,
       outputTokens: usage.outputTokens,
diff --git a/lib/credits/recordCreditDeduction.ts b/lib/credits/recordCreditDeduction.ts
index b2db46803..b71f96c55 100644
--- a/lib/credits/recordCreditDeduction.ts
+++ b/lib/credits/recordCreditDeduction.ts
@@ -1,5 +1,5 @@
-import { deductCredits } from "./deductCredits";
-import { insertUsageEvent } from "@/lib/supabase/usage_events/insertUsageEvent";
+import { nanoid } from "nanoid";
+import { deductCreditsWithAudit } from "@/lib/supabase/credits_usage/deductCreditsWithAudit";
 
 interface RecordCreditDeductionParams {
   accountId: string;
@@ -16,42 +16,49 @@ interface RecordCreditDeductionParams {
 
 interface RecordCreditDeductionResult {
   success: boolean;
-  newBalance?: number;
 }
 
 /**
- * Wallet + meter wrapper. Debits the credits_usage balance via deductCredits,
- * then writes a usage_events row recording the wallet impact alongside any
- * token detail the caller has.
+ * Wallet + meter atomic wrapper. Calls the `deduct_credits_with_audit`
+ * Postgres function (recoupable/database#26) which runs the
+ * `credits_usage` debit and the `usage_events` insert inside a single
+ * transaction — either both writes commit or neither does. Eliminates
+ * the wallet/meter drift risk the previous (two separate Supabase
+ * calls) implementation had.
  *
- * If the audit insert fails, the deduction is preserved (already committed)
- * and the error is logged but not surfaced — the wallet stays authoritative
- * and a reconciliation job can recover the missing audit row later.
+ * Errors are caught and surfaced via `{ success: false }` so the
+ * caller (the chat workflow or any research handler) never aborts on
+ * a credit-accounting hiccup. The wallet stays authoritative — if the
+ * RPC rejects, neither write happened, and the caller can decide
+ * whether to retry or move on.
  */
 export const recordCreditDeduction = async (
   params: RecordCreditDeductionParams,
 ): Promise<RecordCreditDeductionResult> => {
-  const result = await deductCredits({
-    accountId: params.accountId,
-    creditsToDeduct: params.creditsToDeduct,
-  });
-
   try {
-    await insertUsageEvent({
-      account_id: params.accountId,
-      credits_deducted_cents: params.creditsToDeduct,
-      source: params.source,
-      agent_type: params.agentType ?? "main",
-      provider: params.provider ?? null,
-      model_id: params.modelId ?? null,
-      input_tokens: params.inputTokens ?? 0,
-      cached_input_tokens: params.cachedInputTokens ?? 0,
-      output_tokens: params.outputTokens ?? 0,
-      tool_call_count: params.toolCallCount ?? 0,
+    const result = await deductCreditsWithAudit({
+      accountId: params.accountId,
+      cents: params.creditsToDeduct,
+      eventId: nanoid(),
+      event: {
+        source: params.source,
+        agent_type: params.agentType ?? "main",
+        provider: params.provider,
+        model_id: params.modelId,
+        input_tokens: params.inputTokens ?? 0,
+        cached_input_tokens: params.cachedInputTokens ?? 0,
+        output_tokens: params.outputTokens ?? 0,
+        tool_call_count: params.toolCallCount ?? 0,
+      },
     });
+
+    if (!result.ok) {
+      console.error("[recordCreditDeduction] atomic debit failed:", result.error);
+      return { success: false };
+    }
+    return { success: true };
   } catch (error) {
-    console.error("Failed to insert usage_events row (wallet was still debited):", error);
+    console.error("[recordCreditDeduction] unexpected error:", error);
+    return { success: false };
   }
-
-  return result;
 };

From 17637b8c382840a20ad4caffde1b1f59d19c0a9d Mon Sep 17 00:00:00 2001
From: Sweets Sweetman <sweetmantech@gmail.com>
Date: Mon, 25 May 2026 16:44:01 -0500
Subject: [PATCH 6/7] fix(credits): mark recordCreditDeduction as 'use step'
 for workflow runtime
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Vercel Workflow's build-time detector flagged `nanoid` as a Node.js
module that can't run inside the workflow body. Marking
recordCreditDeduction as 'use step' moves it into the step runtime
where Node modules are allowed. Backwards compatible for the existing
research-handler callers (regular API routes) — 'use step' functions
execute immediately when called from non-workflow contexts.

Also matches open-agents' pattern: their recordWorkflowUsage (which
contains the equivalent nanoid call) is a 'use step' function.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 lib/credits/recordCreditDeduction.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/credits/recordCreditDeduction.ts b/lib/credits/recordCreditDeduction.ts
index b71f96c55..3bf61ecce 100644
--- a/lib/credits/recordCreditDeduction.ts
+++ b/lib/credits/recordCreditDeduction.ts
@@ -35,6 +35,7 @@ interface RecordCreditDeductionResult {
 export const recordCreditDeduction = async (
   params: RecordCreditDeductionParams,
 ): Promise<RecordCreditDeductionResult> => {
+  "use step";
   try {
     const result = await deductCreditsWithAudit({
       accountId: params.accountId,

From b0299f41ecee8eb3607db217301d04d0e11f668f Mon Sep 17 00:00:00 2001
From: Sweets Sweetman <sweetmantech@gmail.com>
Date: Mon, 25 May 2026 16:52:12 -0500
Subject: [PATCH 7/7] refactor(workflow): collapse inline metadata duck-type
 (KISS)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR review feedback: the 14-line inline type assertion for
`result.responseMessage` was needless boilerplate. Replaced with:

1. Import the existing `AgentMessageMetadata` type (already used by
   `runAgentStep`'s `messageMetadata` callback — single source of
   truth for the shape).
2. Hoist a module-level `ZERO_USAGE` default so the fallback when
   metadata is missing is a named constant, not an inline literal.
3. Cast `result.responseMessage.metadata` once (`as AgentMessageMetadata
   | undefined`).

Net delta: 14 lines → 5 lines inside the workflow body, no behavior
change.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 app/lib/workflows/runAgentWorkflow.ts | 30 ++++++++-------------------
 1 file changed, 9 insertions(+), 21 deletions(-)

diff --git a/app/lib/workflows/runAgentWorkflow.ts b/app/lib/workflows/runAgentWorkflow.ts
index 9afcdeb79..67eb94b24 100644
--- a/app/lib/workflows/runAgentWorkflow.ts
+++ b/app/lib/workflows/runAgentWorkflow.ts
@@ -6,8 +6,15 @@ import { runAgentStep } from "@/app/lib/workflows/runAgentStep";
 import { clearChatActiveStream } from "@/lib/chat/clearChatActiveStream";
 import { persistAssistantMessage } from "@/lib/chat/persistAssistantMessage";
 import { handleChatCredits } from "@/lib/credits/handleChatCredits";
+import type { AgentMessageMetadata } from "@/lib/agent/messageMetadata/AgentMessageMetadata";
 import type { DurableAgentContext } from "@/lib/agent/tools/AgentContext";
 
+const ZERO_USAGE: LanguageModelUsage = {
+  inputTokens: 0,
+  cachedInputTokens: 0,
+  outputTokens: 0,
+} as LanguageModelUsage;
+
 export type RunAgentWorkflowInput = {
   messages: UIMessage[];
   chatId: string;
@@ -100,32 +107,13 @@ export async function runAgentWorkflow(input: RunAgentWorkflowInput): Promise<vo
       // (apps/web/app/workflows/chat-post-finish.ts) and reuses the
       // same `handleChatCredits` orchestrator that `handleChatStream`
       // already uses for the non-workflow chat path.
-      const metadata = (
-        result.responseMessage as {
-          metadata?: {
-            totalMessageCost?: number;
-            totalMessageUsage?: {
-              inputTokens?: number;
-              cachedInputTokens?: number;
-              outputTokens?: number;
-            };
-          };
-        }
-      ).metadata;
+      const metadata = result.responseMessage.metadata as AgentMessageMetadata | undefined;
       await handleChatCredits({
         accountId: input.accountId,
         model: input.modelId,
         source: "api",
         gatewayCostUsd: metadata?.totalMessageCost,
-        // Cast at the boundary: AI SDK's LanguageModelUsage type
-        // requires additional optional fields (inputTokenDetails,
-        // outputTokenDetails, totalTokens). `handleChatCredits` only
-        // reads the three core fields, so the cast is safe.
-        usage: {
-          inputTokens: metadata?.totalMessageUsage?.inputTokens ?? 0,
-          cachedInputTokens: metadata?.totalMessageUsage?.cachedInputTokens ?? 0,
-          outputTokens: metadata?.totalMessageUsage?.outputTokens ?? 0,
-        } as LanguageModelUsage,
+        usage: metadata?.totalMessageUsage ?? ZERO_USAGE,
       });
     }
   } finally {