From 319d2e669f3e931fe4b60b919d811c2a7632210f Mon Sep 17 00:00:00 2001 From: Sweets Sweetman Date: Mon, 25 May 2026 14:40:44 -0500 Subject: [PATCH 1/7] feat(credits): port computeCreditsDeductedCents + estimateModelUsageCost from open-agents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First piece of the chat-workflow billing path. Ports the per-turn cost math from open-agents' `apps/web/lib/credits/compute-credits-deducted-cents.ts` and `apps/web/lib/models.ts:estimateModelUsageCost` so the same billing logic runs on both sides during the cutover. Resolution order matches open-agents exactly: 1. gateway-reported cost on responseMessage.metadata.totalMessageCost (the same number the chat UI shows next to the response) 2. token-based estimate against the model catalog's cost entry 3. 1c floor when no pricing is available — so a successful turn never lands as a free run Three new files (per api's one-exported-function-per-file SRP): - AvailableModelCost.ts — shape mirroring open-agents' richer cost type (input, output, cache_read, context_over_200k) so the same estimator runs against either catalog - estimateModelUsageCost.ts — token-based USD estimator including the 200k+ context tier swap and cache_read pricing - computeCreditsDeductedCents.ts — top-level orchestrator (gateway cost → token estimate → 1c floor) using api's getAvailableModels directly (no HTTP self-fetch like open-agents does) Test coverage: 27 new unit tests across the two test files. All pricing edge cases covered (NaN/Infinity/negative gateway cost, cached-tokens- exceeding-input clamping, context_over_200k tier swap with partial overrides, catalog miss / fetch failure fallbacks). Unblocks step 3 (deductCreditsWithAudit TS wrapper) of the chat credits gap in recoupable/api#605. Full suite: 3191 → 3205 passing. Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/credits/AvailableModelCost.ts | 25 +++ .../computeCreditsDeductedCents.test.ts | 183 ++++++++++++++++++ .../__tests__/estimateModelUsageCost.test.ts | 165 ++++++++++++++++ lib/credits/computeCreditsDeductedCents.ts | 57 ++++++ lib/credits/estimateModelUsageCost.ts | 79 ++++++++ 5 files changed, 509 insertions(+) create mode 100644 lib/credits/AvailableModelCost.ts create mode 100644 lib/credits/__tests__/computeCreditsDeductedCents.test.ts create mode 100644 lib/credits/__tests__/estimateModelUsageCost.test.ts create mode 100644 lib/credits/computeCreditsDeductedCents.ts create mode 100644 lib/credits/estimateModelUsageCost.ts diff --git a/lib/credits/AvailableModelCost.ts b/lib/credits/AvailableModelCost.ts new file mode 100644 index 000000000..72d3026f9 --- /dev/null +++ b/lib/credits/AvailableModelCost.ts @@ -0,0 +1,25 @@ +/** + * Shape of the per-model cost catalog used for token-based credit estimation. + * + * Mirrors open-agents' + * `apps/web/lib/models.ts:AvailableModelCost` so the same + * `estimateModelUsageCost` math runs against either catalog without + * shape conversion. api's current gateway/models.dev pipeline emits + * only `{ input, output }` (see + * `lib/ai/parseModelsDevMetadata.ts:ModelsDevMetadata`); the richer + * `cache_read` and `context_over_200k` fields are kept in the type so + * a future catalog expansion (or a hand-edited override) gets picked + * up automatically by the estimator. + * + * All token-cost units are USD per million tokens, matching + * models.dev. + */ +export interface AvailableModelCostTier { + input?: number; + output?: number; + cache_read?: number; +} + +export interface AvailableModelCost extends AvailableModelCostTier { + context_over_200k?: AvailableModelCostTier; +} diff --git a/lib/credits/__tests__/computeCreditsDeductedCents.test.ts b/lib/credits/__tests__/computeCreditsDeductedCents.test.ts new file mode 100644 index 000000000..a05c79df7 --- /dev/null +++ b/lib/credits/__tests__/computeCreditsDeductedCents.test.ts @@ -0,0 +1,183 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { computeCreditsDeductedCents } from "@/lib/credits/computeCreditsDeductedCents"; +import { getAvailableModels } from "@/lib/ai/getAvailableModels"; + +vi.mock("@/lib/ai/getAvailableModels", () => ({ + getAvailableModels: vi.fn(), +})); + +beforeEach(() => { + vi.clearAllMocks(); + // Default catalog: empty (forces token-estimate path to fall through to 1c). + vi.mocked(getAvailableModels).mockResolvedValue([]); +}); + +const ZERO_USAGE = { inputTokens: 0, cachedInputTokens: 0, outputTokens: 0 }; + +describe("computeCreditsDeductedCents", () => { + describe("gateway cost path (preferred)", () => { + it("returns gateway cost in cents when gatewayCostUsd is a positive number", async () => { + // $0.0074 → 0.74c → ceil to 1c minimum is not needed; round to 1c + expect( + await computeCreditsDeductedCents(ZERO_USAGE, "anthropic/claude-haiku-4.5", 0.0074), + ).toBe(1); + // $0.42 → 42c + expect( + await computeCreditsDeductedCents(ZERO_USAGE, "anthropic/claude-haiku-4.5", 0.42), + ).toBe(42); + }); + + it("rounds the gateway cost to the nearest cent", async () => { + // $0.123 → 12.3c → 12c + expect(await computeCreditsDeductedCents(ZERO_USAGE, "model", 0.123)).toBe(12); + // $0.126 → 12.6c → 13c + expect(await computeCreditsDeductedCents(ZERO_USAGE, "model", 0.126)).toBe(13); + }); + + it("returns at least 1 when gateway cost rounds to 0", async () => { + // $0.0001 → 0.01c → would round to 0, must bump to 1 + expect(await computeCreditsDeductedCents(ZERO_USAGE, "model", 0.0001)).toBe(1); + }); + + it("does NOT call the catalog when gateway cost is usable", async () => { + await computeCreditsDeductedCents(ZERO_USAGE, "model", 0.05); + expect(getAvailableModels).not.toHaveBeenCalled(); + }); + }); + + describe("falls back to token-based estimate when gateway cost is unusable", () => { + it("when gatewayCostUsd is undefined", async () => { + vi.mocked(getAvailableModels).mockResolvedValue([ + { id: "model-x", cost: { input: 1, output: 4 } } as never, + ]); + // 1M in + 1M out → $5 → 500c + expect( + await computeCreditsDeductedCents( + { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 }, + "model-x", + undefined, + ), + ).toBe(500); + }); + + it("when gatewayCostUsd is 0", async () => { + vi.mocked(getAvailableModels).mockResolvedValue([ + { id: "model-x", cost: { input: 1, output: 4 } } as never, + ]); + expect( + await computeCreditsDeductedCents( + { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 }, + "model-x", + 0, + ), + ).toBe(500); + }); + + it("when gatewayCostUsd is negative (corrupted/upstream bug)", async () => { + vi.mocked(getAvailableModels).mockResolvedValue([ + { id: "model-x", cost: { input: 1, output: 4 } } as never, + ]); + expect( + await computeCreditsDeductedCents( + { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 }, + "model-x", + -1, + ), + ).toBe(500); + }); + + it("when gatewayCostUsd is NaN (not Number.isFinite)", async () => { + vi.mocked(getAvailableModels).mockResolvedValue([ + { id: "model-x", cost: { input: 1, output: 4 } } as never, + ]); + expect( + await computeCreditsDeductedCents( + { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 }, + "model-x", + Number.NaN, + ), + ).toBe(500); + }); + + it("when gatewayCostUsd is Infinity", async () => { + vi.mocked(getAvailableModels).mockResolvedValue([ + { id: "model-x", cost: { input: 1, output: 4 } } as never, + ]); + expect( + await computeCreditsDeductedCents( + { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 }, + "model-x", + Number.POSITIVE_INFINITY, + ), + ).toBe(500); + }); + }); + + describe("estimate fallbacks (also: never charge zero on success)", () => { + it("returns 1 when modelId is not in the catalog", async () => { + vi.mocked(getAvailableModels).mockResolvedValue([ + { id: "other-model", cost: { input: 1, output: 4 } } as never, + ]); + expect( + await computeCreditsDeductedCents( + { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 1000 }, + "model-x", + undefined, + ), + ).toBe(1); + }); + + it("returns 1 when the catalog has no cost for the model", async () => { + vi.mocked(getAvailableModels).mockResolvedValue([{ id: "model-x" } as never]); + expect( + await computeCreditsDeductedCents( + { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 1000 }, + "model-x", + undefined, + ), + ).toBe(1); + }); + + it("returns 1 when getAvailableModels rejects", async () => { + vi.mocked(getAvailableModels).mockRejectedValue(new Error("gateway down")); + expect( + await computeCreditsDeductedCents( + { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 1000 }, + "model-x", + undefined, + ), + ).toBe(1); + }); + + it("returns 1 when token estimate rounds to 0 (very tiny usage)", async () => { + vi.mocked(getAvailableModels).mockResolvedValue([ + { id: "model-x", cost: { input: 0.0001, output: 0.0001 } } as never, + ]); + // ~$0.0000002 → 0.00002c → bumps to 1c minimum + expect( + await computeCreditsDeductedCents( + { inputTokens: 1, cachedInputTokens: 0, outputTokens: 1 }, + "model-x", + undefined, + ), + ).toBe(1); + }); + }); + + describe("model lookup", () => { + it("matches modelId exactly (provider/model form)", async () => { + vi.mocked(getAvailableModels).mockResolvedValue([ + { id: "anthropic/claude-haiku-4.5", cost: { input: 1, output: 4 } } as never, + { id: "openai/gpt-5", cost: { input: 10, output: 40 } } as never, + ]); + // Pick haiku: 1M in + 1M out @ haiku rates → $5 → 500c + expect( + await computeCreditsDeductedCents( + { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 }, + "anthropic/claude-haiku-4.5", + undefined, + ), + ).toBe(500); + }); + }); +}); diff --git a/lib/credits/__tests__/estimateModelUsageCost.test.ts b/lib/credits/__tests__/estimateModelUsageCost.test.ts new file mode 100644 index 000000000..e41d9454e --- /dev/null +++ b/lib/credits/__tests__/estimateModelUsageCost.test.ts @@ -0,0 +1,165 @@ +import { describe, it, expect } from "vitest"; +import { estimateModelUsageCost } from "@/lib/credits/estimateModelUsageCost"; + +const baseCost = { input: 1, output: 4 }; // $1/M in, $4/M out + +describe("estimateModelUsageCost", () => { + describe("guard rails", () => { + it("returns undefined when cost catalog entry is missing", () => { + expect( + estimateModelUsageCost( + { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 500 }, + undefined, + ), + ).toBeUndefined(); + }); + + it("returns undefined when input price is missing", () => { + expect( + estimateModelUsageCost( + { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 500 }, + { output: 4 }, + ), + ).toBeUndefined(); + }); + + it("returns undefined when output price is missing", () => { + expect( + estimateModelUsageCost( + { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 500 }, + { input: 1 }, + ), + ).toBeUndefined(); + }); + }); + + describe("base tier (≤200k input tokens)", () => { + it("computes uncached input + output cost in USD", () => { + // 1_000_000 in @ $1/M + 1_000_000 out @ $4/M = $5 + expect( + estimateModelUsageCost( + { + inputTokens: 1_000_000, + cachedInputTokens: 0, + outputTokens: 1_000_000, + }, + baseCost, + ), + ).toBe(5); + }); + + it("applies cache_read price for cachedInputTokens portion when present", () => { + // 100k cached @ $0.10/M + 100k uncached @ $1/M + 100k out @ $4/M + // = 0.01 + 0.10 + 0.40 = $0.51 + const cost = { input: 1, output: 4, cache_read: 0.1 }; + expect( + estimateModelUsageCost( + { inputTokens: 200_000, cachedInputTokens: 100_000, outputTokens: 100_000 }, + cost, + ), + ).toBeCloseTo(0.51, 6); + }); + + it("falls back to input price when cache_read is undefined (cached tokens billed at full price)", () => { + // 100k cached @ $1/M + 100k uncached @ $1/M + 100k out @ $4/M + // = 0.10 + 0.10 + 0.40 = $0.60 + expect( + estimateModelUsageCost( + { inputTokens: 200_000, cachedInputTokens: 100_000, outputTokens: 100_000 }, + baseCost, + ), + ).toBeCloseTo(0.6, 6); + }); + + it("clamps negative cachedInputTokens to 0", () => { + expect( + estimateModelUsageCost( + { inputTokens: 1_000_000, cachedInputTokens: -50_000, outputTokens: 0 }, + baseCost, + ), + ).toBe(1); + }); + + it("clamps cachedInputTokens > inputTokens so uncached doesn't go negative", () => { + // cached=200_000 but input=100_000 — uncached must clamp to 0 (not -100_000). + // 200_000 cached @ $1/M (no cache_read, falls back to input) + 0 out = $0.20. + // Without the Math.max guard, this would underbill: a negative uncached count + // times the input price would subtract from the cached charge. + expect( + estimateModelUsageCost( + { inputTokens: 100_000, cachedInputTokens: 200_000, outputTokens: 0 }, + baseCost, + ), + ).toBeCloseTo(0.2, 6); + }); + + it("clamps negative outputTokens to 0", () => { + expect( + estimateModelUsageCost( + { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: -1000 }, + baseCost, + ), + ).toBe(1); + }); + }); + + describe("context_over_200k tier", () => { + const tieredCost = { + input: 1, + output: 4, + context_over_200k: { input: 2, output: 8 }, + }; + + it("uses context_over_200k tier when inputTokens exceeds 200k", () => { + // 300_000 in @ $2/M + 100_000 out @ $8/M = 0.60 + 0.80 = $1.40 + expect( + estimateModelUsageCost( + { inputTokens: 300_000, cachedInputTokens: 0, outputTokens: 100_000 }, + tieredCost, + ), + ).toBeCloseTo(1.4, 6); + }); + + it("does NOT use context_over_200k tier when inputTokens is exactly 200k", () => { + // boundary check — must be strictly > 200k + // 200_000 @ $1/M + 0 out = $0.20 + expect( + estimateModelUsageCost( + { inputTokens: 200_000, cachedInputTokens: 0, outputTokens: 0 }, + tieredCost, + ), + ).toBeCloseTo(0.2, 6); + }); + + it("ignores context_over_200k when both input and output overrides are missing", () => { + // only cache_read is set in the override — should NOT trigger the tier swap + const cost = { + input: 1, + output: 4, + context_over_200k: { cache_read: 0.5 }, + }; + // Treated as base tier — 300k @ $1/M + 0 out = $0.30 + expect( + estimateModelUsageCost( + { inputTokens: 300_000, cachedInputTokens: 0, outputTokens: 0 }, + cost, + ), + ).toBeCloseTo(0.3, 6); + }); + + it("falls back to base tier input when context_over_200k.input is missing", () => { + const cost = { + input: 1, + output: 4, + context_over_200k: { output: 8 }, // only output overridden + }; + // 300k in @ $1/M (fallback) + 100k out @ $8/M = 0.30 + 0.80 = $1.10 + expect( + estimateModelUsageCost( + { inputTokens: 300_000, cachedInputTokens: 0, outputTokens: 100_000 }, + cost, + ), + ).toBeCloseTo(1.1, 6); + }); + }); +}); diff --git a/lib/credits/computeCreditsDeductedCents.ts b/lib/credits/computeCreditsDeductedCents.ts new file mode 100644 index 000000000..7887801ad --- /dev/null +++ b/lib/credits/computeCreditsDeductedCents.ts @@ -0,0 +1,57 @@ +import { getAvailableModels } from "@/lib/ai/getAvailableModels"; +import { estimateModelUsageCost } from "@/lib/credits/estimateModelUsageCost"; +import type { AvailableModelCost } from "@/lib/credits/AvailableModelCost"; + +/** + * Per-turn credit charge in cents (minimum 1). + * + * Mirrors open-agents' + * `apps/web/lib/credits/compute-credits-deducted-cents.ts` so the same + * billing math runs on both sides of the chat cutover. Resolution order: + * + * 1. Gateway-reported cost on `responseMessage.metadata.totalMessageCost` + * — the exact number the chat UI shows next to the assistant + * response. Used directly so the wallet debit converges with the + * cost label. + * 2. Token-based estimate against the model catalog's `cost` entry. + * Catalog is the same gateway / models.dev pipeline that backs + * `GET /api/ai/models`. + * 3. 1c floor when no pricing is available — every successful turn + * moves the wallet by at least 1c so a transient catalog outage + * can't make a turn free. + * + * Errors in the catalog fetch are swallowed and treated as path #3 — + * the caller (recordChatUsage) must not fail the workflow on a credit + * accounting hiccup. + * + * @param usage Token counts for the turn (matches AI SDK's `LanguageModelUsage`). + * @param modelId Fully qualified gateway id (e.g. `anthropic/claude-haiku-4.5`). + * @param gatewayCostUsd Gateway-reported total USD cost for the turn, + * when available. Subagent steps (collectTaskToolUsageEvents) won't + * have one and fall through to the token estimate. + * @returns Integer cent amount, ≥ 1. + */ +export async function computeCreditsDeductedCents( + usage: { + inputTokens: number; + cachedInputTokens: number; + outputTokens: number; + }, + modelId: string, + gatewayCostUsd?: number, +): Promise { + if (typeof gatewayCostUsd === "number" && Number.isFinite(gatewayCostUsd) && gatewayCostUsd > 0) { + return Math.max(1, Math.round(gatewayCostUsd * 100)); + } + + try { + const models = await getAvailableModels(); + const model = models.find(m => m.id === modelId) as { cost?: AvailableModelCost } | undefined; + const usd = estimateModelUsageCost(usage, model?.cost); + if (typeof usd !== "number" || usd <= 0) return 1; + return Math.max(1, Math.round(usd * 100)); + } catch (error) { + console.error("Failed to compute credits from usage:", error); + return 1; + } +} diff --git a/lib/credits/estimateModelUsageCost.ts b/lib/credits/estimateModelUsageCost.ts new file mode 100644 index 000000000..d675486a0 --- /dev/null +++ b/lib/credits/estimateModelUsageCost.ts @@ -0,0 +1,79 @@ +import type { AvailableModelCost, AvailableModelCostTier } from "@/lib/credits/AvailableModelCost"; + +const TOKENS_PER_MILLION = 1_000_000; + +/** + * Picks the right cost tier for `usage`. Above 200k input tokens many + * providers charge more, so the cost catalog exposes an override on + * `cost.context_over_200k`. Missing fields fall back to the base tier + * so a partial override is still valid. + * + * The trigger is "input > 200k AND at least one of input/output is + * overridden" — a tier that only overrides `cache_read` is treated as + * the base tier (it's not a real tier swap, just a cache discount). + */ +function resolveCostTier( + usage: { inputTokens: number }, + cost: AvailableModelCost | undefined, +): AvailableModelCostTier | undefined { + if (!cost) return undefined; + + if ( + usage.inputTokens > 200_000 && + (typeof cost.context_over_200k?.input === "number" || + typeof cost.context_over_200k?.output === "number") + ) { + return { + input: cost.context_over_200k.input ?? cost.input, + output: cost.context_over_200k.output ?? cost.output, + cache_read: cost.context_over_200k.cache_read ?? cost.cache_read, + }; + } + + return cost; +} + +/** + * Token-based estimate of a turn's USD cost, used as a fallback when + * the gateway hasn't reported an actual cost on the + * `assistantMessage.metadata.totalMessageCost` field. + * + * Ports `apps/web/lib/models.ts:estimateModelUsageCost` from + * open-agents so the same per-turn math runs on both sides during the + * cutover. Cached input tokens are billed at `cache_read` when the + * catalog exposes it and fall back to the base input price otherwise. + * + * Returns `undefined` when the catalog can't price this model (missing + * cost entry, missing input price, or missing output price) so the + * caller knows to use a different fallback (open-agents' rule: never + * charge 0 — bill the 1c minimum instead). + * + * @param usage Token counts for the turn (mirrors AI SDK's `LanguageModelUsage`). + * @param cost Per-model catalog entry. `undefined` short-circuits to `undefined`. + * @returns USD cost, or `undefined` if not priceable. + */ +export function estimateModelUsageCost( + usage: { + inputTokens: number; + cachedInputTokens: number; + outputTokens: number; + }, + cost: AvailableModelCost | undefined, +): number | undefined { + const costTier = resolveCostTier(usage, cost); + const inputPrice = costTier?.input; + const outputPrice = costTier?.output; + if (typeof inputPrice !== "number" || typeof outputPrice !== "number") { + return undefined; + } + + const cachedInputTokens = Math.max(0, usage.cachedInputTokens); + const uncachedInputTokens = Math.max(0, usage.inputTokens - cachedInputTokens); + const cacheReadPrice = costTier?.cache_read ?? inputPrice; + + return ( + (uncachedInputTokens * inputPrice) / TOKENS_PER_MILLION + + (cachedInputTokens * cacheReadPrice) / TOKENS_PER_MILLION + + (Math.max(0, usage.outputTokens) * outputPrice) / TOKENS_PER_MILLION + ); +} From 51cfe3c777595cd2e92d9cb68395dfa07deba42a Mon Sep 17 00:00:00 2001 From: Sweets Sweetman Date: Mon, 25 May 2026 15:46:58 -0500 Subject: [PATCH 2/7] feat(credits): charge credits per chat turn (atomic wallet debit + audit) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the silent revenue-loss gap tracked in #605: every successful chat workflow turn now debits the account's wallet AND records a usage_events audit row, in a single atomic transaction. End-to-end flow: 1. runAgentStep's onFinish captures responseMessage.metadata ({totalMessageCost, totalMessageUsage}) — same number the chat UI shows next to the response. 2. runAgentWorkflow calls recordChatUsage(accountId, modelId, message) after persistAssistantMessage. 3. recordChatUsage → computeCreditsDeductedCents (gateway cost OR token estimate OR 1c floor) → deductCreditsWithAudit (supabase.rpc'deduct_credits_with_audit'). 4. The Postgres function (recoupable/database#26) runs the wallet UPDATE and the usage_events INSERT in one implicit transaction — either both land or neither does. Matches open-agents' db.transaction(...) atomicity guarantee. Threads accountId through RunAgentWorkflowInput from validateChatWorkflow (auth-derived; never trusted from the request body). New files: - lib/supabase/credits_usage/deductCreditsWithAudit.ts (+ tests) Thin supabase.rpc wrapper; fire-and-forget (returns ok/error instead of throwing). Lives in lib/supabase/ per CLAUDE.md SRP. - app/lib/workflows/recordChatUsage.ts (+ tests) "use step" function that ties the two together with entry/skip/ success/error logs and graceful handling of missing metadata, catalog failures, and RPC errors. Updated: - app/lib/workflows/runAgentWorkflow.ts + accountId field on RunAgentWorkflowInput + recordChatUsage call after successful persistAssistantMessage - lib/chat/handleChatWorkflowStream.ts + passes validated.accountId into start(runAgentWorkflow, ...) - app/lib/workflows/__tests__/runAgentWorkflow.test.ts + 3 new tests (records on success, skips when no responseMessage, skips when runAgentStep throws) TDD: each new file went red → minimal impl → green. Suite: 3205 → 3220 passing. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../__tests__/recordChatUsage.test.ts | 201 ++++++++++++++++++ .../__tests__/runAgentWorkflow.test.ts | 53 +++++ app/lib/workflows/recordChatUsage.ts | 101 +++++++++ app/lib/workflows/runAgentWorkflow.ts | 20 ++ lib/chat/handleChatWorkflowStream.ts | 1 + .../__tests__/deductCreditsWithAudit.test.ts | 89 ++++++++ .../credits_usage/deductCreditsWithAudit.ts | 61 ++++++ 7 files changed, 526 insertions(+) create mode 100644 app/lib/workflows/__tests__/recordChatUsage.test.ts create mode 100644 app/lib/workflows/recordChatUsage.ts create mode 100644 lib/supabase/credits_usage/__tests__/deductCreditsWithAudit.test.ts create mode 100644 lib/supabase/credits_usage/deductCreditsWithAudit.ts diff --git a/app/lib/workflows/__tests__/recordChatUsage.test.ts b/app/lib/workflows/__tests__/recordChatUsage.test.ts new file mode 100644 index 000000000..2588f496f --- /dev/null +++ b/app/lib/workflows/__tests__/recordChatUsage.test.ts @@ -0,0 +1,201 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { recordChatUsage } from "@/app/lib/workflows/recordChatUsage"; +import { computeCreditsDeductedCents } from "@/lib/credits/computeCreditsDeductedCents"; +import { deductCreditsWithAudit } from "@/lib/supabase/credits_usage/deductCreditsWithAudit"; + +vi.mock("@/lib/credits/computeCreditsDeductedCents", () => ({ + computeCreditsDeductedCents: vi.fn(), +})); +vi.mock("@/lib/supabase/credits_usage/deductCreditsWithAudit", () => ({ + deductCreditsWithAudit: vi.fn(), +})); + +beforeEach(() => { + vi.clearAllMocks(); +}); + +const ACCOUNT = "11111111-1111-1111-1111-111111111111"; + +function buildAssistantMessage(metadata: Record) { + return { + id: "msg_abc", + role: "assistant" as const, + parts: [{ type: "text", text: "ok" }], + metadata, + }; +} + +describe("recordChatUsage", () => { + it("computes cents from the gateway cost on responseMessage.metadata.totalMessageCost", async () => { + vi.mocked(computeCreditsDeductedCents).mockResolvedValue(7); + vi.mocked(deductCreditsWithAudit).mockResolvedValue({ ok: true }); + + const message = buildAssistantMessage({ + totalMessageCost: 0.07, + totalMessageUsage: { + inputTokens: 100, + cachedInputTokens: 10, + outputTokens: 20, + }, + }); + + await recordChatUsage({ + accountId: ACCOUNT, + modelId: "anthropic/claude-haiku-4.5", + responseMessage: message as never, + }); + + expect(computeCreditsDeductedCents).toHaveBeenCalledWith( + { inputTokens: 100, cachedInputTokens: 10, outputTokens: 20 }, + "anthropic/claude-haiku-4.5", + 0.07, + ); + }); + + it("calls deductCreditsWithAudit with the computed cents and a fresh event id", async () => { + vi.mocked(computeCreditsDeductedCents).mockResolvedValue(42); + vi.mocked(deductCreditsWithAudit).mockResolvedValue({ ok: true }); + + const message = buildAssistantMessage({ + totalMessageCost: 0.42, + totalMessageUsage: { + inputTokens: 100, + cachedInputTokens: 0, + outputTokens: 200, + }, + }); + + await recordChatUsage({ + accountId: ACCOUNT, + modelId: "anthropic/claude-haiku-4.5", + responseMessage: message as never, + }); + + expect(deductCreditsWithAudit).toHaveBeenCalledTimes(1); + const args = vi.mocked(deductCreditsWithAudit).mock.calls[0]?.[0]; + expect(args?.accountId).toBe(ACCOUNT); + expect(args?.cents).toBe(42); + expect(typeof args?.eventId).toBe("string"); + expect(args?.eventId.length).toBeGreaterThan(0); + expect(args?.event).toMatchObject({ + source: "api", + agent_type: "main", + provider: "anthropic", + model_id: "anthropic/claude-haiku-4.5", + input_tokens: 100, + cached_input_tokens: 0, + output_tokens: 200, + }); + }); + + it("derives provider from modelId (everything before the first slash)", async () => { + vi.mocked(computeCreditsDeductedCents).mockResolvedValue(5); + vi.mocked(deductCreditsWithAudit).mockResolvedValue({ ok: true }); + + const message = buildAssistantMessage({ + totalMessageCost: 0.05, + totalMessageUsage: { inputTokens: 1, cachedInputTokens: 0, outputTokens: 1 }, + }); + + await recordChatUsage({ + accountId: ACCOUNT, + modelId: "openai/gpt-5.4", + responseMessage: message as never, + }); + + const args = vi.mocked(deductCreditsWithAudit).mock.calls[0]?.[0]; + expect(args?.event.provider).toBe("openai"); + expect(args?.event.model_id).toBe("openai/gpt-5.4"); + }); + + it("skips entirely when responseMessage is undefined (no usage to charge)", async () => { + await recordChatUsage({ + accountId: ACCOUNT, + modelId: "anthropic/claude-haiku-4.5", + responseMessage: undefined, + }); + + expect(computeCreditsDeductedCents).not.toHaveBeenCalled(); + expect(deductCreditsWithAudit).not.toHaveBeenCalled(); + }); + + it("skips entirely when responseMessage has no metadata at all", async () => { + await recordChatUsage({ + accountId: ACCOUNT, + modelId: "anthropic/claude-haiku-4.5", + responseMessage: { + id: "m", + role: "assistant", + parts: [{ type: "text", text: "x" }], + } as never, + }); + + expect(computeCreditsDeductedCents).not.toHaveBeenCalled(); + expect(deductCreditsWithAudit).not.toHaveBeenCalled(); + }); + + it("falls back to zero token counts when totalMessageUsage is missing (still charges the 1c floor)", async () => { + vi.mocked(computeCreditsDeductedCents).mockResolvedValue(1); + vi.mocked(deductCreditsWithAudit).mockResolvedValue({ ok: true }); + + const message = buildAssistantMessage({ + totalMessageCost: 0.0001, + // No totalMessageUsage on metadata + }); + + await recordChatUsage({ + accountId: ACCOUNT, + modelId: "anthropic/claude-haiku-4.5", + responseMessage: message as never, + }); + + expect(computeCreditsDeductedCents).toHaveBeenCalledWith( + { inputTokens: 0, cachedInputTokens: 0, outputTokens: 0 }, + "anthropic/claude-haiku-4.5", + 0.0001, + ); + const args = vi.mocked(deductCreditsWithAudit).mock.calls[0]?.[0]; + expect(args?.event.input_tokens).toBe(0); + expect(args?.event.output_tokens).toBe(0); + }); + + it("does NOT throw when deductCreditsWithAudit returns ok:false (fire-and-forget contract)", async () => { + vi.mocked(computeCreditsDeductedCents).mockResolvedValue(7); + vi.mocked(deductCreditsWithAudit).mockResolvedValue({ + ok: false, + error: "credits_usage row not found", + }); + + const message = buildAssistantMessage({ + totalMessageCost: 0.07, + totalMessageUsage: { inputTokens: 1, cachedInputTokens: 0, outputTokens: 1 }, + }); + + await expect( + recordChatUsage({ + accountId: ACCOUNT, + modelId: "anthropic/claude-haiku-4.5", + responseMessage: message as never, + }), + ).resolves.toBeUndefined(); + }); + + it("does NOT throw when computeCreditsDeductedCents rejects (fire-and-forget contract)", async () => { + vi.mocked(computeCreditsDeductedCents).mockRejectedValue(new Error("catalog down")); + + const message = buildAssistantMessage({ + totalMessageCost: 0.07, + totalMessageUsage: { inputTokens: 1, cachedInputTokens: 0, outputTokens: 1 }, + }); + + await expect( + recordChatUsage({ + accountId: ACCOUNT, + modelId: "anthropic/claude-haiku-4.5", + responseMessage: message as never, + }), + ).resolves.toBeUndefined(); + + expect(deductCreditsWithAudit).not.toHaveBeenCalled(); + }); +}); diff --git a/app/lib/workflows/__tests__/runAgentWorkflow.test.ts b/app/lib/workflows/__tests__/runAgentWorkflow.test.ts index 3e59ffc2d..b08b601fc 100644 --- a/app/lib/workflows/__tests__/runAgentWorkflow.test.ts +++ b/app/lib/workflows/__tests__/runAgentWorkflow.test.ts @@ -5,6 +5,7 @@ import { clearChatActiveStream } from "@/lib/chat/clearChatActiveStream"; import { closeChatStream } from "@/app/lib/workflows/closeChatStream"; import { generateAssistantMessageId } from "@/app/lib/workflows/generateAssistantMessageId"; import { persistAssistantMessage } from "@/lib/chat/persistAssistantMessage"; +import { recordChatUsage } from "@/app/lib/workflows/recordChatUsage"; vi.mock("@/app/lib/workflows/runAgentStep", () => ({ runAgentStep: vi.fn(), @@ -21,6 +22,9 @@ vi.mock("@/app/lib/workflows/generateAssistantMessageId", () => ({ vi.mock("@/lib/chat/persistAssistantMessage", () => ({ persistAssistantMessage: vi.fn(), })); +vi.mock("@/app/lib/workflows/recordChatUsage", () => ({ + recordChatUsage: vi.fn(), +})); // Captured writable stub so tests can assert closeChatStream got the // same instance the workflow body holds. const writableStub = new WritableStream(); @@ -43,6 +47,7 @@ const baseInput = { messages: [{ id: "m1", role: "user", parts: [{ type: "text", text: "hi" }] } as never], chatId: "chat-1", sessionId: "session-1", + accountId: "acc-1", modelId: "anthropic/claude-haiku-4.5", agentContext: { sandbox: { state: { type: "vercel" }, workingDirectory: "/sandbox/mono" }, @@ -170,4 +175,52 @@ describe("runAgentWorkflow", () => { expect.objectContaining({ assistantMessageId: "asst-in-progress" }), ); }); + + it("calls recordChatUsage with accountId/modelId/responseMessage when runAgentStep returns a message", async () => { + const responseMessage = { + id: "assistant-msg-xyz", + role: "assistant", + parts: [{ type: "text", text: "Hello!" }], + metadata: { + totalMessageCost: 0.07, + totalMessageUsage: { + inputTokens: 100, + cachedInputTokens: 10, + outputTokens: 20, + }, + }, + }; + vi.mocked(runAgentStep).mockResolvedValue({ + finishReason: "stop", + responseMessage: responseMessage as never, + }); + + await runAgentWorkflow(baseInput); + + expect(recordChatUsage).toHaveBeenCalledTimes(1); + expect(recordChatUsage).toHaveBeenCalledWith({ + accountId: "acc-1", + modelId: "anthropic/claude-haiku-4.5", + responseMessage, + }); + }); + + it("does NOT call recordChatUsage when runAgentStep returns no responseMessage", async () => { + vi.mocked(runAgentStep).mockResolvedValue({ + finishReason: "stop", + responseMessage: undefined, + }); + + await runAgentWorkflow(baseInput); + + expect(recordChatUsage).not.toHaveBeenCalled(); + }); + + it("does NOT call recordChatUsage when runAgentStep throws (no message to bill)", async () => { + vi.mocked(runAgentStep).mockRejectedValue(new Error("model exploded")); + + await expect(runAgentWorkflow(baseInput)).rejects.toThrow("model exploded"); + + expect(recordChatUsage).not.toHaveBeenCalled(); + }); }); diff --git a/app/lib/workflows/recordChatUsage.ts b/app/lib/workflows/recordChatUsage.ts new file mode 100644 index 000000000..d18d1543e --- /dev/null +++ b/app/lib/workflows/recordChatUsage.ts @@ -0,0 +1,101 @@ +import { nanoid } from "nanoid"; +import { computeCreditsDeductedCents } from "@/lib/credits/computeCreditsDeductedCents"; +import { deductCreditsWithAudit } from "@/lib/supabase/credits_usage/deductCreditsWithAudit"; + +/** + * Duck-typed shape of the assembled assistant message we read off the + * workflow result. Intentionally loose — both AI SDK's `UIMessage` and + * the in-test fixtures satisfy it. The actual bill comes from + * `metadata.totalMessageCost` (gateway-reported USD, the same number + * the chat UI displays) and `metadata.totalMessageUsage` (token + * counts). When metadata is missing we silently no-op. + */ +type AssistantMessageForUsage = { + metadata?: { + totalMessageCost?: number; + totalMessageUsage?: { + inputTokens?: number; + cachedInputTokens?: number; + outputTokens?: number; + }; + }; +}; + +/** + * Fire-and-forget billing step run at the end of a chat workflow. + * Mirrors open-agents' `recordWorkflowUsage` main-agent path + * (apps/web/app/workflows/chat-post-finish.ts) — without this step, + * every chat turn on `/api/chat/workflow` is free (recoupable/api#605). + * + * Wallet debit + audit row are atomic via the + * `deduct_credits_with_audit` Postgres function (one transaction). + * Errors are caught + logged so a transient credits-table outage + * never aborts the chat workflow. + * + * NOTE: Sub-agent (task tool) attribution is a separate follow-up — + * this step only charges the main-agent turn. The audit row is + * tagged `agent_type='main'`. + */ +export async function recordChatUsage(input: { + accountId: string; + modelId: string; + responseMessage: AssistantMessageForUsage | undefined; +}): Promise { + "use step"; + console.log("[recordChatUsage] enter", { + accountId: input.accountId, + modelId: input.modelId, + hasResponseMessage: input.responseMessage !== undefined, + }); + try { + const metadata = input.responseMessage?.metadata; + if (!metadata) { + console.log("[recordChatUsage] skip: no metadata on responseMessage", { + accountId: input.accountId, + modelId: input.modelId, + }); + return; + } + + const usage = { + inputTokens: metadata.totalMessageUsage?.inputTokens ?? 0, + cachedInputTokens: metadata.totalMessageUsage?.cachedInputTokens ?? 0, + outputTokens: metadata.totalMessageUsage?.outputTokens ?? 0, + }; + + const cents = await computeCreditsDeductedCents( + usage, + input.modelId, + metadata.totalMessageCost, + ); + + const provider = input.modelId.includes("/") ? input.modelId.split("/")[0] : undefined; + + const result = await deductCreditsWithAudit({ + accountId: input.accountId, + cents, + eventId: nanoid(), + event: { + source: "api", + agent_type: "main", + provider, + model_id: input.modelId, + input_tokens: usage.inputTokens, + cached_input_tokens: usage.cachedInputTokens, + output_tokens: usage.outputTokens, + }, + }); + + if (!result.ok) { + console.error("[recordChatUsage] debit failed:", result.error); + return; + } + console.log("[recordChatUsage] success", { + accountId: input.accountId, + modelId: input.modelId, + cents, + }); + } catch (error) { + console.error("[recordChatUsage] unexpected error:", error); + } +} diff --git a/app/lib/workflows/runAgentWorkflow.ts b/app/lib/workflows/runAgentWorkflow.ts index e4e628e96..70094a843 100644 --- a/app/lib/workflows/runAgentWorkflow.ts +++ b/app/lib/workflows/runAgentWorkflow.ts @@ -5,12 +5,20 @@ import { generateAssistantMessageId } from "@/app/lib/workflows/generateAssistan import { runAgentStep } from "@/app/lib/workflows/runAgentStep"; import { clearChatActiveStream } from "@/lib/chat/clearChatActiveStream"; import { persistAssistantMessage } from "@/lib/chat/persistAssistantMessage"; +import { recordChatUsage } from "@/app/lib/workflows/recordChatUsage"; import type { DurableAgentContext } from "@/lib/agent/tools/AgentContext"; export type RunAgentWorkflowInput = { messages: UIMessage[]; chatId: string; sessionId: string; + /** + * Authenticated account whose wallet absorbs the turn's cost. Resolved by + * the route handler via `validateChatWorkflow` so we never trust a + * caller-supplied id. Threaded into `recordChatUsage` after the assistant + * message is persisted. + */ + accountId: string; modelId: string; /** * JSON-serializable subset of AgentContext that survives the durable @@ -82,6 +90,18 @@ export async function runAgentWorkflow(input: RunAgentWorkflowInput): Promise ({ + default: { rpc: vi.fn() }, +})); + +beforeEach(() => { + vi.clearAllMocks(); +}); + +const ACCOUNT = "11111111-1111-1111-1111-111111111111"; +const validEvent = { + source: "api" as const, + agent_type: "main" as const, + provider: "anthropic", + model_id: "anthropic/claude-haiku-4.5", + input_tokens: 42, + cached_input_tokens: 10, + output_tokens: 13, + tool_call_count: 1, +}; + +describe("deductCreditsWithAudit", () => { + it("calls the deduct_credits_with_audit RPC with the right param names", async () => { + vi.mocked(supabase.rpc).mockResolvedValue({ data: null, error: null } as never); + + await deductCreditsWithAudit({ + accountId: ACCOUNT, + cents: 7, + eventId: "evt_abc", + event: validEvent, + }); + + expect(supabase.rpc).toHaveBeenCalledTimes(1); + expect(supabase.rpc).toHaveBeenCalledWith("deduct_credits_with_audit", { + p_account_id: ACCOUNT, + p_amount: 7, + p_event_id: "evt_abc", + p_event: validEvent, + }); + }); + + it("returns { ok: true } when the RPC succeeds", async () => { + vi.mocked(supabase.rpc).mockResolvedValue({ data: null, error: null } as never); + + const result = await deductCreditsWithAudit({ + accountId: ACCOUNT, + cents: 7, + eventId: "evt_abc", + event: validEvent, + }); + + expect(result).toEqual({ ok: true }); + }); + + it("returns { ok: false, error } when the RPC returns an error (does NOT throw)", async () => { + vi.mocked(supabase.rpc).mockResolvedValue({ + data: null, + error: { message: "credits_usage row not found" }, + } as never); + + const result = await deductCreditsWithAudit({ + accountId: ACCOUNT, + cents: 7, + eventId: "evt_abc", + event: validEvent, + }); + + expect(result).toEqual({ ok: false, error: "credits_usage row not found" }); + }); + + it("returns { ok: false, error } when the rpc call throws (network failure)", async () => { + vi.mocked(supabase.rpc).mockRejectedValue(new Error("network blip")); + + const result = await deductCreditsWithAudit({ + accountId: ACCOUNT, + cents: 7, + eventId: "evt_abc", + event: validEvent, + }); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error).toContain("network blip"); + } + }); +}); diff --git a/lib/supabase/credits_usage/deductCreditsWithAudit.ts b/lib/supabase/credits_usage/deductCreditsWithAudit.ts new file mode 100644 index 000000000..1bafef1e0 --- /dev/null +++ b/lib/supabase/credits_usage/deductCreditsWithAudit.ts @@ -0,0 +1,61 @@ +import supabase from "@/lib/supabase/serverClient"; + +/** + * JSON payload populating the `usage_events` audit row. Optional + * fields fall back to column defaults (`source='api'`, + * `agent_type='main'`, provider/model_id NULL, counts 0) inside the + * SQL function — see `database/supabase/migrations/20260525000000_deduct_credits_with_audit.sql`. + */ +export interface DeductCreditsAuditEvent { + source?: "api" | "web"; + agent_type?: "main" | "subagent"; + provider?: string; + model_id?: string; + input_tokens?: number; + cached_input_tokens?: number; + output_tokens?: number; + tool_call_count?: number; +} + +export type DeductCreditsWithAuditResult = { ok: true } | { ok: false; error: string }; + +/** + * Atomically debits `credits_usage.remaining_credits` and inserts the + * corresponding `usage_events` audit row via the + * `deduct_credits_with_audit` Postgres function. The SQL function + * runs both writes inside an implicit transaction so wallet/meter + * can never drift on partial failure — matching open-agents' + * `recordUsage` `db.transaction(...)` guarantee. + * + * Caller convention: `cents` is the amount to debit (integer ≥ 1). + * `eventId` should match `lib/supabase/usage_events/insertUsageEvent.ts`'s + * nanoid convention so the audit trail is consistent. + * + * Errors are never thrown — returns `{ ok: false, error }` instead so + * the caller (`recordChatUsage`) can swallow without aborting the + * chat workflow on a credit accounting hiccup. + */ +export async function deductCreditsWithAudit(params: { + accountId: string; + cents: number; + eventId: string; + event: DeductCreditsAuditEvent; +}): Promise { + try { + const { error } = await supabase.rpc("deduct_credits_with_audit", { + p_account_id: params.accountId, + p_amount: params.cents, + p_event_id: params.eventId, + p_event: params.event, + }); + if (error) { + return { ok: false, error: error.message }; + } + return { ok: true }; + } catch (error) { + return { + ok: false, + error: error instanceof Error ? error.message : String(error), + }; + } +} From 456b5b321535193aeb055dbcc8bae44cb72ec342 Mon Sep 17 00:00:00 2001 From: Sweets Sweetman Date: Mon, 25 May 2026 15:56:19 -0500 Subject: [PATCH 3/7] fix(credits): use flat interface for DeductCreditsWithAuditResult MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Next.js 16's type checker wasn't narrowing the discriminated union `{ ok: true } | { ok: false; error: string }` through `if (!result.ok)`, breaking the production build at `recordChatUsage.ts:90`. Vitest's own type config tolerated it, so this only surfaced on the preview deploy. Flat interface with optional `error?: string` avoids the narrowing requirement entirely — caller can read `result.error` directly when `result.ok` is false. Slight type-safety loss (compiler doesn't enforce that `error` is present when ok is false) is worth the build stability. Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/supabase/credits_usage/deductCreditsWithAudit.ts | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/supabase/credits_usage/deductCreditsWithAudit.ts b/lib/supabase/credits_usage/deductCreditsWithAudit.ts index 1bafef1e0..b4f6b442d 100644 --- a/lib/supabase/credits_usage/deductCreditsWithAudit.ts +++ b/lib/supabase/credits_usage/deductCreditsWithAudit.ts @@ -17,7 +17,15 @@ export interface DeductCreditsAuditEvent { tool_call_count?: number; } -export type DeductCreditsWithAuditResult = { ok: true } | { ok: false; error: string }; +export interface DeductCreditsWithAuditResult { + ok: boolean; + /** + * Present only when `ok === false`. Kept optional rather than as a + * discriminated union so callers can read `result.error` without + * tripping a narrowing edge case in the Next.js 16 type checker. + */ + error?: string; +} /** * Atomically debits `credits_usage.remaining_credits` and inserts the From 62e102b1a3cd1f5663aa29023b344fe48b1f7af0 Mon Sep 17 00:00:00 2001 From: Sweets Sweetman Date: Mon, 25 May 2026 16:01:15 -0500 Subject: [PATCH 4/7] fix(credits): regenerate supabase RPC type for deduct_credits_with_audit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous deploy failed because: 1. `types/database.types.ts` was stale — it didn't include the `deduct_credits_with_audit` RPC that landed in recoupable/database#26 (and was manually applied via the MCP after Supabase's GitHub App 502'd post-merge). Without that entry, `supabase.rpc("deduct_credits_with_audit", ...)` failed Next.js's stricter type check. 2. Even with the entry, the typed `Args.p_event: Json` couldn't accept our `DeductCreditsAuditEvent` interface directly — TS doesn't infer interface → index-signature assignment. Fixes: - Added the `deduct_credits_with_audit` entry to the Functions block of types/database.types.ts (matches the upstream regen via mcp__plugin_supabase_supabase__generate_typescript_types). - Cast `params.event as unknown as Json` at the supabase boundary in deductCreditsWithAudit.ts. The runtime payload is unchanged and the interface keeps its strong typing for callers. Verified locally: `pnpm exec tsc --noEmit` shows no errors in any file this PR touches. Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/supabase/credits_usage/deductCreditsWithAudit.ts | 6 +++++- types/database.types.ts | 9 +++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/lib/supabase/credits_usage/deductCreditsWithAudit.ts b/lib/supabase/credits_usage/deductCreditsWithAudit.ts index b4f6b442d..871800f73 100644 --- a/lib/supabase/credits_usage/deductCreditsWithAudit.ts +++ b/lib/supabase/credits_usage/deductCreditsWithAudit.ts @@ -1,4 +1,5 @@ import supabase from "@/lib/supabase/serverClient"; +import type { Json } from "@/types/database.types"; /** * JSON payload populating the `usage_events` audit row. Optional @@ -54,7 +55,10 @@ export async function deductCreditsWithAudit(params: { p_account_id: params.accountId, p_amount: params.cents, p_event_id: params.eventId, - p_event: params.event, + // DeductCreditsAuditEvent is structurally JSON-safe, but TS can't + // infer an interface → index-signature assignment automatically. + // Cast once at this boundary; the runtime payload is unchanged. + p_event: params.event as unknown as Json, }); if (error) { return { ok: false, error: error.message }; diff --git a/types/database.types.ts b/types/database.types.ts index 20287a6a9..cc7d03e1e 100644 --- a/types/database.types.ts +++ b/types/database.types.ts @@ -3961,6 +3961,15 @@ export type Database = { Args: { account_id: string; amount: number }; Returns: undefined; }; + deduct_credits_with_audit: { + Args: { + p_account_id: string; + p_amount: number; + p_event: Json; + p_event_id: string; + }; + Returns: undefined; + }; extract_domain: { Args: { email: string }; Returns: string }; get_account_invitations: { Args: { account_slug: string }; From 193b6429de6138ab642bd4a698944f2d4bd0596c Mon Sep 17 00:00:00 2001 From: Sweets Sweetman Date: Mon, 25 May 2026 16:41:19 -0500 Subject: [PATCH 5/7] refactor(credits): consolidate chat-workflow billing into handleChatCredits (DRY) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses the user's PR review: my new files duplicated existing infrastructure. Consolidates everything into the existing pattern (handleChatCredits → getCreditUsage + recordCreditDeduction) so chat workflow billing uses the SAME orchestrator that the streaming chat path (handleChatStream) already uses. Changes: 1. lib/credits/getCreditUsage.ts - Added optional `gatewayCostUsd?: number` parameter - When positive, returns it directly (skips catalog lookup) - Otherwise existing token-math path is unchanged (backwards compat) 2. lib/credits/handleChatCredits.ts - Added `gatewayCostUsd?: number` (threaded to getCreditUsage) - Added `source?: "web" | "api"` (defaults to "web" for backwards compat; chat workflow passes "api" so admin dashboards can distinguish surface in spend rollups) 3. lib/credits/recordCreditDeduction.ts - Switched from `deductCredits + insertUsageEvent` (two separate Supabase calls, non-atomic — could leave wallet/meter drifted on partial failure) to the single `deduct_credits_with_audit` RPC. - Now atomic for ALL callers (chat workflow + research handlers), not just the new chat-workflow path. - Return shape simplified: `{ success: boolean }` instead of `{ success, newBalance }` (no caller was reading newBalance). 4. app/lib/workflows/runAgentWorkflow.ts - Imports handleChatCredits instead of recordChatUsage. - Reads gatewayCostUsd + token counts from responseMessage.metadata.{totalMessageCost, totalMessageUsage}. 5. Deleted (consolidated into existing infrastructure): - app/lib/workflows/recordChatUsage.ts - lib/credits/computeCreditsDeductedCents.ts - lib/credits/estimateModelUsageCost.ts - lib/credits/AvailableModelCost.ts - lib/credits/resolveCostTier.ts - All their test files Net delta: -7 files, +0 new orchestrator function. Plus the atomicity guarantee now applies to research handlers too. TDD: each change went RED → minimum impl → GREEN, with all 3195 tests passing at the end. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../__tests__/recordChatUsage.test.ts | 201 ------------------ .../__tests__/runAgentWorkflow.test.ts | 54 +++-- app/lib/workflows/recordChatUsage.ts | 101 --------- app/lib/workflows/runAgentWorkflow.ts | 43 +++- .../integration/chatEndToEnd.test.ts | 1 + lib/credits/AvailableModelCost.ts | 25 --- .../computeCreditsDeductedCents.test.ts | 183 ---------------- .../__tests__/estimateModelUsageCost.test.ts | 165 -------------- lib/credits/__tests__/getCreditUsage.test.ts | 76 +++++++ .../__tests__/handleChatCredits.test.ts | 49 ++++- .../__tests__/recordCreditDeduction.test.ts | 82 ++++--- lib/credits/computeCreditsDeductedCents.ts | 57 ----- lib/credits/estimateModelUsageCost.ts | 79 ------- lib/credits/getCreditUsage.ts | 17 ++ lib/credits/handleChatCredits.ts | 33 ++- lib/credits/recordCreditDeduction.ts | 63 +++--- 16 files changed, 319 insertions(+), 910 deletions(-) delete mode 100644 app/lib/workflows/__tests__/recordChatUsage.test.ts delete mode 100644 app/lib/workflows/recordChatUsage.ts delete mode 100644 lib/credits/AvailableModelCost.ts delete mode 100644 lib/credits/__tests__/computeCreditsDeductedCents.test.ts delete mode 100644 lib/credits/__tests__/estimateModelUsageCost.test.ts delete mode 100644 lib/credits/computeCreditsDeductedCents.ts delete mode 100644 lib/credits/estimateModelUsageCost.ts diff --git a/app/lib/workflows/__tests__/recordChatUsage.test.ts b/app/lib/workflows/__tests__/recordChatUsage.test.ts deleted file mode 100644 index 2588f496f..000000000 --- a/app/lib/workflows/__tests__/recordChatUsage.test.ts +++ /dev/null @@ -1,201 +0,0 @@ -import { describe, it, expect, vi, beforeEach } from "vitest"; -import { recordChatUsage } from "@/app/lib/workflows/recordChatUsage"; -import { computeCreditsDeductedCents } from "@/lib/credits/computeCreditsDeductedCents"; -import { deductCreditsWithAudit } from "@/lib/supabase/credits_usage/deductCreditsWithAudit"; - -vi.mock("@/lib/credits/computeCreditsDeductedCents", () => ({ - computeCreditsDeductedCents: vi.fn(), -})); -vi.mock("@/lib/supabase/credits_usage/deductCreditsWithAudit", () => ({ - deductCreditsWithAudit: vi.fn(), -})); - -beforeEach(() => { - vi.clearAllMocks(); -}); - -const ACCOUNT = "11111111-1111-1111-1111-111111111111"; - -function buildAssistantMessage(metadata: Record) { - return { - id: "msg_abc", - role: "assistant" as const, - parts: [{ type: "text", text: "ok" }], - metadata, - }; -} - -describe("recordChatUsage", () => { - it("computes cents from the gateway cost on responseMessage.metadata.totalMessageCost", async () => { - vi.mocked(computeCreditsDeductedCents).mockResolvedValue(7); - vi.mocked(deductCreditsWithAudit).mockResolvedValue({ ok: true }); - - const message = buildAssistantMessage({ - totalMessageCost: 0.07, - totalMessageUsage: { - inputTokens: 100, - cachedInputTokens: 10, - outputTokens: 20, - }, - }); - - await recordChatUsage({ - accountId: ACCOUNT, - modelId: "anthropic/claude-haiku-4.5", - responseMessage: message as never, - }); - - expect(computeCreditsDeductedCents).toHaveBeenCalledWith( - { inputTokens: 100, cachedInputTokens: 10, outputTokens: 20 }, - "anthropic/claude-haiku-4.5", - 0.07, - ); - }); - - it("calls deductCreditsWithAudit with the computed cents and a fresh event id", async () => { - vi.mocked(computeCreditsDeductedCents).mockResolvedValue(42); - vi.mocked(deductCreditsWithAudit).mockResolvedValue({ ok: true }); - - const message = buildAssistantMessage({ - totalMessageCost: 0.42, - totalMessageUsage: { - inputTokens: 100, - cachedInputTokens: 0, - outputTokens: 200, - }, - }); - - await recordChatUsage({ - accountId: ACCOUNT, - modelId: "anthropic/claude-haiku-4.5", - responseMessage: message as never, - }); - - expect(deductCreditsWithAudit).toHaveBeenCalledTimes(1); - const args = vi.mocked(deductCreditsWithAudit).mock.calls[0]?.[0]; - expect(args?.accountId).toBe(ACCOUNT); - expect(args?.cents).toBe(42); - expect(typeof args?.eventId).toBe("string"); - expect(args?.eventId.length).toBeGreaterThan(0); - expect(args?.event).toMatchObject({ - source: "api", - agent_type: "main", - provider: "anthropic", - model_id: "anthropic/claude-haiku-4.5", - input_tokens: 100, - cached_input_tokens: 0, - output_tokens: 200, - }); - }); - - it("derives provider from modelId (everything before the first slash)", async () => { - vi.mocked(computeCreditsDeductedCents).mockResolvedValue(5); - vi.mocked(deductCreditsWithAudit).mockResolvedValue({ ok: true }); - - const message = buildAssistantMessage({ - totalMessageCost: 0.05, - totalMessageUsage: { inputTokens: 1, cachedInputTokens: 0, outputTokens: 1 }, - }); - - await recordChatUsage({ - accountId: ACCOUNT, - modelId: "openai/gpt-5.4", - responseMessage: message as never, - }); - - const args = vi.mocked(deductCreditsWithAudit).mock.calls[0]?.[0]; - expect(args?.event.provider).toBe("openai"); - expect(args?.event.model_id).toBe("openai/gpt-5.4"); - }); - - it("skips entirely when responseMessage is undefined (no usage to charge)", async () => { - await recordChatUsage({ - accountId: ACCOUNT, - modelId: "anthropic/claude-haiku-4.5", - responseMessage: undefined, - }); - - expect(computeCreditsDeductedCents).not.toHaveBeenCalled(); - expect(deductCreditsWithAudit).not.toHaveBeenCalled(); - }); - - it("skips entirely when responseMessage has no metadata at all", async () => { - await recordChatUsage({ - accountId: ACCOUNT, - modelId: "anthropic/claude-haiku-4.5", - responseMessage: { - id: "m", - role: "assistant", - parts: [{ type: "text", text: "x" }], - } as never, - }); - - expect(computeCreditsDeductedCents).not.toHaveBeenCalled(); - expect(deductCreditsWithAudit).not.toHaveBeenCalled(); - }); - - it("falls back to zero token counts when totalMessageUsage is missing (still charges the 1c floor)", async () => { - vi.mocked(computeCreditsDeductedCents).mockResolvedValue(1); - vi.mocked(deductCreditsWithAudit).mockResolvedValue({ ok: true }); - - const message = buildAssistantMessage({ - totalMessageCost: 0.0001, - // No totalMessageUsage on metadata - }); - - await recordChatUsage({ - accountId: ACCOUNT, - modelId: "anthropic/claude-haiku-4.5", - responseMessage: message as never, - }); - - expect(computeCreditsDeductedCents).toHaveBeenCalledWith( - { inputTokens: 0, cachedInputTokens: 0, outputTokens: 0 }, - "anthropic/claude-haiku-4.5", - 0.0001, - ); - const args = vi.mocked(deductCreditsWithAudit).mock.calls[0]?.[0]; - expect(args?.event.input_tokens).toBe(0); - expect(args?.event.output_tokens).toBe(0); - }); - - it("does NOT throw when deductCreditsWithAudit returns ok:false (fire-and-forget contract)", async () => { - vi.mocked(computeCreditsDeductedCents).mockResolvedValue(7); - vi.mocked(deductCreditsWithAudit).mockResolvedValue({ - ok: false, - error: "credits_usage row not found", - }); - - const message = buildAssistantMessage({ - totalMessageCost: 0.07, - totalMessageUsage: { inputTokens: 1, cachedInputTokens: 0, outputTokens: 1 }, - }); - - await expect( - recordChatUsage({ - accountId: ACCOUNT, - modelId: "anthropic/claude-haiku-4.5", - responseMessage: message as never, - }), - ).resolves.toBeUndefined(); - }); - - it("does NOT throw when computeCreditsDeductedCents rejects (fire-and-forget contract)", async () => { - vi.mocked(computeCreditsDeductedCents).mockRejectedValue(new Error("catalog down")); - - const message = buildAssistantMessage({ - totalMessageCost: 0.07, - totalMessageUsage: { inputTokens: 1, cachedInputTokens: 0, outputTokens: 1 }, - }); - - await expect( - recordChatUsage({ - accountId: ACCOUNT, - modelId: "anthropic/claude-haiku-4.5", - responseMessage: message as never, - }), - ).resolves.toBeUndefined(); - - expect(deductCreditsWithAudit).not.toHaveBeenCalled(); - }); -}); diff --git a/app/lib/workflows/__tests__/runAgentWorkflow.test.ts b/app/lib/workflows/__tests__/runAgentWorkflow.test.ts index b08b601fc..19cec3b78 100644 --- a/app/lib/workflows/__tests__/runAgentWorkflow.test.ts +++ b/app/lib/workflows/__tests__/runAgentWorkflow.test.ts @@ -5,7 +5,7 @@ import { clearChatActiveStream } from "@/lib/chat/clearChatActiveStream"; import { closeChatStream } from "@/app/lib/workflows/closeChatStream"; import { generateAssistantMessageId } from "@/app/lib/workflows/generateAssistantMessageId"; import { persistAssistantMessage } from "@/lib/chat/persistAssistantMessage"; -import { recordChatUsage } from "@/app/lib/workflows/recordChatUsage"; +import { handleChatCredits } from "@/lib/credits/handleChatCredits"; vi.mock("@/app/lib/workflows/runAgentStep", () => ({ runAgentStep: vi.fn(), @@ -22,8 +22,8 @@ vi.mock("@/app/lib/workflows/generateAssistantMessageId", () => ({ vi.mock("@/lib/chat/persistAssistantMessage", () => ({ persistAssistantMessage: vi.fn(), })); -vi.mock("@/app/lib/workflows/recordChatUsage", () => ({ - recordChatUsage: vi.fn(), +vi.mock("@/lib/credits/handleChatCredits", () => ({ + handleChatCredits: vi.fn(), })); // Captured writable stub so tests can assert closeChatStream got the // same instance the workflow body holds. @@ -176,7 +176,7 @@ describe("runAgentWorkflow", () => { ); }); - it("calls recordChatUsage with accountId/modelId/responseMessage when runAgentStep returns a message", async () => { + it("calls handleChatCredits with the gateway cost + token usage from responseMessage.metadata", async () => { const responseMessage = { id: "assistant-msg-xyz", role: "assistant", @@ -197,15 +197,45 @@ describe("runAgentWorkflow", () => { await runAgentWorkflow(baseInput); - expect(recordChatUsage).toHaveBeenCalledTimes(1); - expect(recordChatUsage).toHaveBeenCalledWith({ + expect(handleChatCredits).toHaveBeenCalledTimes(1); + expect(handleChatCredits).toHaveBeenCalledWith({ accountId: "acc-1", - modelId: "anthropic/claude-haiku-4.5", - responseMessage, + model: "anthropic/claude-haiku-4.5", + source: "api", + gatewayCostUsd: 0.07, + usage: { + inputTokens: 100, + cachedInputTokens: 10, + outputTokens: 20, + }, + }); + }); + + it("calls handleChatCredits with zero usage when metadata is missing (lets the 1c floor apply)", async () => { + const responseMessage = { + id: "assistant-msg-xyz", + role: "assistant", + parts: [{ type: "text", text: "Hello!" }], + // no metadata + }; + vi.mocked(runAgentStep).mockResolvedValue({ + finishReason: "stop", + responseMessage: responseMessage as never, + }); + + await runAgentWorkflow(baseInput); + + expect(handleChatCredits).toHaveBeenCalledTimes(1); + expect(handleChatCredits).toHaveBeenCalledWith({ + accountId: "acc-1", + model: "anthropic/claude-haiku-4.5", + source: "api", + gatewayCostUsd: undefined, + usage: { inputTokens: 0, cachedInputTokens: 0, outputTokens: 0 }, }); }); - it("does NOT call recordChatUsage when runAgentStep returns no responseMessage", async () => { + it("does NOT call handleChatCredits when runAgentStep returns no responseMessage", async () => { vi.mocked(runAgentStep).mockResolvedValue({ finishReason: "stop", responseMessage: undefined, @@ -213,14 +243,14 @@ describe("runAgentWorkflow", () => { await runAgentWorkflow(baseInput); - expect(recordChatUsage).not.toHaveBeenCalled(); + expect(handleChatCredits).not.toHaveBeenCalled(); }); - it("does NOT call recordChatUsage when runAgentStep throws (no message to bill)", async () => { + it("does NOT call handleChatCredits when runAgentStep throws (no message to bill)", async () => { vi.mocked(runAgentStep).mockRejectedValue(new Error("model exploded")); await expect(runAgentWorkflow(baseInput)).rejects.toThrow("model exploded"); - expect(recordChatUsage).not.toHaveBeenCalled(); + expect(handleChatCredits).not.toHaveBeenCalled(); }); }); diff --git a/app/lib/workflows/recordChatUsage.ts b/app/lib/workflows/recordChatUsage.ts deleted file mode 100644 index d18d1543e..000000000 --- a/app/lib/workflows/recordChatUsage.ts +++ /dev/null @@ -1,101 +0,0 @@ -import { nanoid } from "nanoid"; -import { computeCreditsDeductedCents } from "@/lib/credits/computeCreditsDeductedCents"; -import { deductCreditsWithAudit } from "@/lib/supabase/credits_usage/deductCreditsWithAudit"; - -/** - * Duck-typed shape of the assembled assistant message we read off the - * workflow result. Intentionally loose — both AI SDK's `UIMessage` and - * the in-test fixtures satisfy it. The actual bill comes from - * `metadata.totalMessageCost` (gateway-reported USD, the same number - * the chat UI displays) and `metadata.totalMessageUsage` (token - * counts). When metadata is missing we silently no-op. - */ -type AssistantMessageForUsage = { - metadata?: { - totalMessageCost?: number; - totalMessageUsage?: { - inputTokens?: number; - cachedInputTokens?: number; - outputTokens?: number; - }; - }; -}; - -/** - * Fire-and-forget billing step run at the end of a chat workflow. - * Mirrors open-agents' `recordWorkflowUsage` main-agent path - * (apps/web/app/workflows/chat-post-finish.ts) — without this step, - * every chat turn on `/api/chat/workflow` is free (recoupable/api#605). - * - * Wallet debit + audit row are atomic via the - * `deduct_credits_with_audit` Postgres function (one transaction). - * Errors are caught + logged so a transient credits-table outage - * never aborts the chat workflow. - * - * NOTE: Sub-agent (task tool) attribution is a separate follow-up — - * this step only charges the main-agent turn. The audit row is - * tagged `agent_type='main'`. - */ -export async function recordChatUsage(input: { - accountId: string; - modelId: string; - responseMessage: AssistantMessageForUsage | undefined; -}): Promise { - "use step"; - console.log("[recordChatUsage] enter", { - accountId: input.accountId, - modelId: input.modelId, - hasResponseMessage: input.responseMessage !== undefined, - }); - try { - const metadata = input.responseMessage?.metadata; - if (!metadata) { - console.log("[recordChatUsage] skip: no metadata on responseMessage", { - accountId: input.accountId, - modelId: input.modelId, - }); - return; - } - - const usage = { - inputTokens: metadata.totalMessageUsage?.inputTokens ?? 0, - cachedInputTokens: metadata.totalMessageUsage?.cachedInputTokens ?? 0, - outputTokens: metadata.totalMessageUsage?.outputTokens ?? 0, - }; - - const cents = await computeCreditsDeductedCents( - usage, - input.modelId, - metadata.totalMessageCost, - ); - - const provider = input.modelId.includes("/") ? input.modelId.split("/")[0] : undefined; - - const result = await deductCreditsWithAudit({ - accountId: input.accountId, - cents, - eventId: nanoid(), - event: { - source: "api", - agent_type: "main", - provider, - model_id: input.modelId, - input_tokens: usage.inputTokens, - cached_input_tokens: usage.cachedInputTokens, - output_tokens: usage.outputTokens, - }, - }); - - if (!result.ok) { - console.error("[recordChatUsage] debit failed:", result.error); - return; - } - console.log("[recordChatUsage] success", { - accountId: input.accountId, - modelId: input.modelId, - cents, - }); - } catch (error) { - console.error("[recordChatUsage] unexpected error:", error); - } -} diff --git a/app/lib/workflows/runAgentWorkflow.ts b/app/lib/workflows/runAgentWorkflow.ts index 70094a843..9afcdeb79 100644 --- a/app/lib/workflows/runAgentWorkflow.ts +++ b/app/lib/workflows/runAgentWorkflow.ts @@ -1,11 +1,11 @@ import { getWorkflowMetadata, getWritable } from "workflow"; -import type { UIMessage, UIMessageChunk } from "ai"; +import type { LanguageModelUsage, UIMessage, UIMessageChunk } from "ai"; import { closeChatStream } from "@/app/lib/workflows/closeChatStream"; import { generateAssistantMessageId } from "@/app/lib/workflows/generateAssistantMessageId"; import { runAgentStep } from "@/app/lib/workflows/runAgentStep"; import { clearChatActiveStream } from "@/lib/chat/clearChatActiveStream"; import { persistAssistantMessage } from "@/lib/chat/persistAssistantMessage"; -import { recordChatUsage } from "@/app/lib/workflows/recordChatUsage"; +import { handleChatCredits } from "@/lib/credits/handleChatCredits"; import type { DurableAgentContext } from "@/lib/agent/tools/AgentContext"; export type RunAgentWorkflowInput = { @@ -93,14 +93,39 @@ export async function runAgentWorkflow(input: RunAgentWorkflowInput): Promise { expect(mockGetCreditUsage).toHaveBeenCalledWith( { promptTokens: 1000, completionTokens: 500 }, "gpt-4", + undefined, ); expect(mockRecordCreditDeduction).toHaveBeenCalledWith( expect.objectContaining({ diff --git a/lib/credits/AvailableModelCost.ts b/lib/credits/AvailableModelCost.ts deleted file mode 100644 index 72d3026f9..000000000 --- a/lib/credits/AvailableModelCost.ts +++ /dev/null @@ -1,25 +0,0 @@ -/** - * Shape of the per-model cost catalog used for token-based credit estimation. - * - * Mirrors open-agents' - * `apps/web/lib/models.ts:AvailableModelCost` so the same - * `estimateModelUsageCost` math runs against either catalog without - * shape conversion. api's current gateway/models.dev pipeline emits - * only `{ input, output }` (see - * `lib/ai/parseModelsDevMetadata.ts:ModelsDevMetadata`); the richer - * `cache_read` and `context_over_200k` fields are kept in the type so - * a future catalog expansion (or a hand-edited override) gets picked - * up automatically by the estimator. - * - * All token-cost units are USD per million tokens, matching - * models.dev. - */ -export interface AvailableModelCostTier { - input?: number; - output?: number; - cache_read?: number; -} - -export interface AvailableModelCost extends AvailableModelCostTier { - context_over_200k?: AvailableModelCostTier; -} diff --git a/lib/credits/__tests__/computeCreditsDeductedCents.test.ts b/lib/credits/__tests__/computeCreditsDeductedCents.test.ts deleted file mode 100644 index a05c79df7..000000000 --- a/lib/credits/__tests__/computeCreditsDeductedCents.test.ts +++ /dev/null @@ -1,183 +0,0 @@ -import { describe, it, expect, vi, beforeEach } from "vitest"; -import { computeCreditsDeductedCents } from "@/lib/credits/computeCreditsDeductedCents"; -import { getAvailableModels } from "@/lib/ai/getAvailableModels"; - -vi.mock("@/lib/ai/getAvailableModels", () => ({ - getAvailableModels: vi.fn(), -})); - -beforeEach(() => { - vi.clearAllMocks(); - // Default catalog: empty (forces token-estimate path to fall through to 1c). - vi.mocked(getAvailableModels).mockResolvedValue([]); -}); - -const ZERO_USAGE = { inputTokens: 0, cachedInputTokens: 0, outputTokens: 0 }; - -describe("computeCreditsDeductedCents", () => { - describe("gateway cost path (preferred)", () => { - it("returns gateway cost in cents when gatewayCostUsd is a positive number", async () => { - // $0.0074 → 0.74c → ceil to 1c minimum is not needed; round to 1c - expect( - await computeCreditsDeductedCents(ZERO_USAGE, "anthropic/claude-haiku-4.5", 0.0074), - ).toBe(1); - // $0.42 → 42c - expect( - await computeCreditsDeductedCents(ZERO_USAGE, "anthropic/claude-haiku-4.5", 0.42), - ).toBe(42); - }); - - it("rounds the gateway cost to the nearest cent", async () => { - // $0.123 → 12.3c → 12c - expect(await computeCreditsDeductedCents(ZERO_USAGE, "model", 0.123)).toBe(12); - // $0.126 → 12.6c → 13c - expect(await computeCreditsDeductedCents(ZERO_USAGE, "model", 0.126)).toBe(13); - }); - - it("returns at least 1 when gateway cost rounds to 0", async () => { - // $0.0001 → 0.01c → would round to 0, must bump to 1 - expect(await computeCreditsDeductedCents(ZERO_USAGE, "model", 0.0001)).toBe(1); - }); - - it("does NOT call the catalog when gateway cost is usable", async () => { - await computeCreditsDeductedCents(ZERO_USAGE, "model", 0.05); - expect(getAvailableModels).not.toHaveBeenCalled(); - }); - }); - - describe("falls back to token-based estimate when gateway cost is unusable", () => { - it("when gatewayCostUsd is undefined", async () => { - vi.mocked(getAvailableModels).mockResolvedValue([ - { id: "model-x", cost: { input: 1, output: 4 } } as never, - ]); - // 1M in + 1M out → $5 → 500c - expect( - await computeCreditsDeductedCents( - { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 }, - "model-x", - undefined, - ), - ).toBe(500); - }); - - it("when gatewayCostUsd is 0", async () => { - vi.mocked(getAvailableModels).mockResolvedValue([ - { id: "model-x", cost: { input: 1, output: 4 } } as never, - ]); - expect( - await computeCreditsDeductedCents( - { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 }, - "model-x", - 0, - ), - ).toBe(500); - }); - - it("when gatewayCostUsd is negative (corrupted/upstream bug)", async () => { - vi.mocked(getAvailableModels).mockResolvedValue([ - { id: "model-x", cost: { input: 1, output: 4 } } as never, - ]); - expect( - await computeCreditsDeductedCents( - { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 }, - "model-x", - -1, - ), - ).toBe(500); - }); - - it("when gatewayCostUsd is NaN (not Number.isFinite)", async () => { - vi.mocked(getAvailableModels).mockResolvedValue([ - { id: "model-x", cost: { input: 1, output: 4 } } as never, - ]); - expect( - await computeCreditsDeductedCents( - { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 }, - "model-x", - Number.NaN, - ), - ).toBe(500); - }); - - it("when gatewayCostUsd is Infinity", async () => { - vi.mocked(getAvailableModels).mockResolvedValue([ - { id: "model-x", cost: { input: 1, output: 4 } } as never, - ]); - expect( - await computeCreditsDeductedCents( - { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 }, - "model-x", - Number.POSITIVE_INFINITY, - ), - ).toBe(500); - }); - }); - - describe("estimate fallbacks (also: never charge zero on success)", () => { - it("returns 1 when modelId is not in the catalog", async () => { - vi.mocked(getAvailableModels).mockResolvedValue([ - { id: "other-model", cost: { input: 1, output: 4 } } as never, - ]); - expect( - await computeCreditsDeductedCents( - { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 1000 }, - "model-x", - undefined, - ), - ).toBe(1); - }); - - it("returns 1 when the catalog has no cost for the model", async () => { - vi.mocked(getAvailableModels).mockResolvedValue([{ id: "model-x" } as never]); - expect( - await computeCreditsDeductedCents( - { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 1000 }, - "model-x", - undefined, - ), - ).toBe(1); - }); - - it("returns 1 when getAvailableModels rejects", async () => { - vi.mocked(getAvailableModels).mockRejectedValue(new Error("gateway down")); - expect( - await computeCreditsDeductedCents( - { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 1000 }, - "model-x", - undefined, - ), - ).toBe(1); - }); - - it("returns 1 when token estimate rounds to 0 (very tiny usage)", async () => { - vi.mocked(getAvailableModels).mockResolvedValue([ - { id: "model-x", cost: { input: 0.0001, output: 0.0001 } } as never, - ]); - // ~$0.0000002 → 0.00002c → bumps to 1c minimum - expect( - await computeCreditsDeductedCents( - { inputTokens: 1, cachedInputTokens: 0, outputTokens: 1 }, - "model-x", - undefined, - ), - ).toBe(1); - }); - }); - - describe("model lookup", () => { - it("matches modelId exactly (provider/model form)", async () => { - vi.mocked(getAvailableModels).mockResolvedValue([ - { id: "anthropic/claude-haiku-4.5", cost: { input: 1, output: 4 } } as never, - { id: "openai/gpt-5", cost: { input: 10, output: 40 } } as never, - ]); - // Pick haiku: 1M in + 1M out @ haiku rates → $5 → 500c - expect( - await computeCreditsDeductedCents( - { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 }, - "anthropic/claude-haiku-4.5", - undefined, - ), - ).toBe(500); - }); - }); -}); diff --git a/lib/credits/__tests__/estimateModelUsageCost.test.ts b/lib/credits/__tests__/estimateModelUsageCost.test.ts deleted file mode 100644 index e41d9454e..000000000 --- a/lib/credits/__tests__/estimateModelUsageCost.test.ts +++ /dev/null @@ -1,165 +0,0 @@ -import { describe, it, expect } from "vitest"; -import { estimateModelUsageCost } from "@/lib/credits/estimateModelUsageCost"; - -const baseCost = { input: 1, output: 4 }; // $1/M in, $4/M out - -describe("estimateModelUsageCost", () => { - describe("guard rails", () => { - it("returns undefined when cost catalog entry is missing", () => { - expect( - estimateModelUsageCost( - { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 500 }, - undefined, - ), - ).toBeUndefined(); - }); - - it("returns undefined when input price is missing", () => { - expect( - estimateModelUsageCost( - { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 500 }, - { output: 4 }, - ), - ).toBeUndefined(); - }); - - it("returns undefined when output price is missing", () => { - expect( - estimateModelUsageCost( - { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 500 }, - { input: 1 }, - ), - ).toBeUndefined(); - }); - }); - - describe("base tier (≤200k input tokens)", () => { - it("computes uncached input + output cost in USD", () => { - // 1_000_000 in @ $1/M + 1_000_000 out @ $4/M = $5 - expect( - estimateModelUsageCost( - { - inputTokens: 1_000_000, - cachedInputTokens: 0, - outputTokens: 1_000_000, - }, - baseCost, - ), - ).toBe(5); - }); - - it("applies cache_read price for cachedInputTokens portion when present", () => { - // 100k cached @ $0.10/M + 100k uncached @ $1/M + 100k out @ $4/M - // = 0.01 + 0.10 + 0.40 = $0.51 - const cost = { input: 1, output: 4, cache_read: 0.1 }; - expect( - estimateModelUsageCost( - { inputTokens: 200_000, cachedInputTokens: 100_000, outputTokens: 100_000 }, - cost, - ), - ).toBeCloseTo(0.51, 6); - }); - - it("falls back to input price when cache_read is undefined (cached tokens billed at full price)", () => { - // 100k cached @ $1/M + 100k uncached @ $1/M + 100k out @ $4/M - // = 0.10 + 0.10 + 0.40 = $0.60 - expect( - estimateModelUsageCost( - { inputTokens: 200_000, cachedInputTokens: 100_000, outputTokens: 100_000 }, - baseCost, - ), - ).toBeCloseTo(0.6, 6); - }); - - it("clamps negative cachedInputTokens to 0", () => { - expect( - estimateModelUsageCost( - { inputTokens: 1_000_000, cachedInputTokens: -50_000, outputTokens: 0 }, - baseCost, - ), - ).toBe(1); - }); - - it("clamps cachedInputTokens > inputTokens so uncached doesn't go negative", () => { - // cached=200_000 but input=100_000 — uncached must clamp to 0 (not -100_000). - // 200_000 cached @ $1/M (no cache_read, falls back to input) + 0 out = $0.20. - // Without the Math.max guard, this would underbill: a negative uncached count - // times the input price would subtract from the cached charge. - expect( - estimateModelUsageCost( - { inputTokens: 100_000, cachedInputTokens: 200_000, outputTokens: 0 }, - baseCost, - ), - ).toBeCloseTo(0.2, 6); - }); - - it("clamps negative outputTokens to 0", () => { - expect( - estimateModelUsageCost( - { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: -1000 }, - baseCost, - ), - ).toBe(1); - }); - }); - - describe("context_over_200k tier", () => { - const tieredCost = { - input: 1, - output: 4, - context_over_200k: { input: 2, output: 8 }, - }; - - it("uses context_over_200k tier when inputTokens exceeds 200k", () => { - // 300_000 in @ $2/M + 100_000 out @ $8/M = 0.60 + 0.80 = $1.40 - expect( - estimateModelUsageCost( - { inputTokens: 300_000, cachedInputTokens: 0, outputTokens: 100_000 }, - tieredCost, - ), - ).toBeCloseTo(1.4, 6); - }); - - it("does NOT use context_over_200k tier when inputTokens is exactly 200k", () => { - // boundary check — must be strictly > 200k - // 200_000 @ $1/M + 0 out = $0.20 - expect( - estimateModelUsageCost( - { inputTokens: 200_000, cachedInputTokens: 0, outputTokens: 0 }, - tieredCost, - ), - ).toBeCloseTo(0.2, 6); - }); - - it("ignores context_over_200k when both input and output overrides are missing", () => { - // only cache_read is set in the override — should NOT trigger the tier swap - const cost = { - input: 1, - output: 4, - context_over_200k: { cache_read: 0.5 }, - }; - // Treated as base tier — 300k @ $1/M + 0 out = $0.30 - expect( - estimateModelUsageCost( - { inputTokens: 300_000, cachedInputTokens: 0, outputTokens: 0 }, - cost, - ), - ).toBeCloseTo(0.3, 6); - }); - - it("falls back to base tier input when context_over_200k.input is missing", () => { - const cost = { - input: 1, - output: 4, - context_over_200k: { output: 8 }, // only output overridden - }; - // 300k in @ $1/M (fallback) + 100k out @ $8/M = 0.30 + 0.80 = $1.10 - expect( - estimateModelUsageCost( - { inputTokens: 300_000, cachedInputTokens: 0, outputTokens: 100_000 }, - cost, - ), - ).toBeCloseTo(1.1, 6); - }); - }); -}); diff --git a/lib/credits/__tests__/getCreditUsage.test.ts b/lib/credits/__tests__/getCreditUsage.test.ts index d79edd7de..a06811ffd 100644 --- a/lib/credits/__tests__/getCreditUsage.test.ts +++ b/lib/credits/__tests__/getCreditUsage.test.ts @@ -139,4 +139,80 @@ describe("getCreditUsage", () => { expect(cost).toBe(0); }); }); + + describe("gateway cost short-circuit", () => { + it("returns gatewayCostUsd directly when it is a positive number (skips catalog lookup)", async () => { + const cost = await getCreditUsage( + { inputTokens: 1000, outputTokens: 500 }, + "anthropic/claude-haiku-4.5", + 0.07, + ); + expect(cost).toBe(0.07); + expect(mockGetModel).not.toHaveBeenCalled(); + }); + + it("falls through to token math when gatewayCostUsd is undefined", async () => { + mockGetModel.mockResolvedValue({ + id: "gpt-4", + pricing: { input: "0.00003", output: "0.00006" }, + } as any); + + const cost = await getCreditUsage( + { inputTokens: 1000, outputTokens: 500 }, + "gpt-4", + undefined, + ); + // 1000 * 0.00003 + 500 * 0.00006 = 0.06 + expect(cost).toBeCloseTo(0.06); + expect(mockGetModel).toHaveBeenCalledWith("gpt-4"); + }); + + it("falls through to token math when gatewayCostUsd is 0", async () => { + mockGetModel.mockResolvedValue({ + id: "gpt-4", + pricing: { input: "0.00003", output: "0.00006" }, + } as any); + + const cost = await getCreditUsage({ inputTokens: 1000, outputTokens: 500 }, "gpt-4", 0); + expect(cost).toBeCloseTo(0.06); + }); + + it("falls through to token math when gatewayCostUsd is negative", async () => { + mockGetModel.mockResolvedValue({ + id: "gpt-4", + pricing: { input: "0.00003", output: "0.00006" }, + } as any); + + const cost = await getCreditUsage({ inputTokens: 1000, outputTokens: 500 }, "gpt-4", -1); + expect(cost).toBeCloseTo(0.06); + }); + + it("falls through to token math when gatewayCostUsd is NaN", async () => { + mockGetModel.mockResolvedValue({ + id: "gpt-4", + pricing: { input: "0.00003", output: "0.00006" }, + } as any); + + const cost = await getCreditUsage( + { inputTokens: 1000, outputTokens: 500 }, + "gpt-4", + Number.NaN, + ); + expect(cost).toBeCloseTo(0.06); + }); + + it("falls through to token math when gatewayCostUsd is Infinity", async () => { + mockGetModel.mockResolvedValue({ + id: "gpt-4", + pricing: { input: "0.00003", output: "0.00006" }, + } as any); + + const cost = await getCreditUsage( + { inputTokens: 1000, outputTokens: 500 }, + "gpt-4", + Number.POSITIVE_INFINITY, + ); + expect(cost).toBeCloseTo(0.06); + }); + }); }); diff --git a/lib/credits/__tests__/handleChatCredits.test.ts b/lib/credits/__tests__/handleChatCredits.test.ts index 0e92f352e..c1786fc8f 100644 --- a/lib/credits/__tests__/handleChatCredits.test.ts +++ b/lib/credits/__tests__/handleChatCredits.test.ts @@ -42,7 +42,7 @@ describe("handleChatCredits", () => { accountId: "account-123", }); - expect(mockGetCreditUsage).toHaveBeenCalledWith(USAGE, "gpt-4"); + expect(mockGetCreditUsage).toHaveBeenCalledWith(USAGE, "gpt-4", undefined); expect(mockRecordCreditDeduction).toHaveBeenCalledWith({ accountId: "account-123", creditsToDeduct: 5, @@ -154,4 +154,51 @@ describe("handleChatCredits", () => { expect(consoleSpy).toHaveBeenCalledWith("Failed to handle chat credits:", expect.any(Error)); }); }); + + describe("gateway cost + source extensions", () => { + it("forwards gatewayCostUsd to getCreditUsage when provided", async () => { + mockGetCreditUsage.mockResolvedValue(0.07); + mockRecordCreditDeduction.mockResolvedValue({ success: true, newBalance: 93 }); + + await handleChatCredits({ + usage: USAGE, + model: "anthropic/claude-haiku-4.5", + accountId: "account-123", + gatewayCostUsd: 0.07, + }); + + expect(mockGetCreditUsage).toHaveBeenCalledWith(USAGE, "anthropic/claude-haiku-4.5", 0.07); + }); + + it("defaults source to 'web' when not provided (backwards compatible)", async () => { + mockGetCreditUsage.mockResolvedValue(0.05); + mockRecordCreditDeduction.mockResolvedValue({ success: true, newBalance: 95 }); + + await handleChatCredits({ + usage: USAGE, + model: "gpt-4", + accountId: "account-123", + }); + + expect(mockRecordCreditDeduction).toHaveBeenCalledWith( + expect.objectContaining({ source: "web" }), + ); + }); + + it("propagates source='api' when caller is the chat workflow", async () => { + mockGetCreditUsage.mockResolvedValue(0.05); + mockRecordCreditDeduction.mockResolvedValue({ success: true, newBalance: 95 }); + + await handleChatCredits({ + usage: USAGE, + model: "anthropic/claude-haiku-4.5", + accountId: "account-123", + source: "api", + }); + + expect(mockRecordCreditDeduction).toHaveBeenCalledWith( + expect.objectContaining({ source: "api" }), + ); + }); + }); }); diff --git a/lib/credits/__tests__/recordCreditDeduction.test.ts b/lib/credits/__tests__/recordCreditDeduction.test.ts index 9cdfa4eba..95910af4e 100644 --- a/lib/credits/__tests__/recordCreditDeduction.test.ts +++ b/lib/credits/__tests__/recordCreditDeduction.test.ts @@ -1,17 +1,12 @@ import { describe, it, expect, vi, beforeEach } from "vitest"; import { recordCreditDeduction } from "@/lib/credits/recordCreditDeduction"; -const { deductCreditsMock, insertUsageEventMock } = vi.hoisted(() => ({ - deductCreditsMock: vi.fn(), - insertUsageEventMock: vi.fn(), +const { deductCreditsWithAuditMock } = vi.hoisted(() => ({ + deductCreditsWithAuditMock: vi.fn(), })); -vi.mock("@/lib/credits/deductCredits", () => ({ - deductCredits: deductCreditsMock, -})); - -vi.mock("@/lib/supabase/usage_events/insertUsageEvent", () => ({ - insertUsageEvent: insertUsageEventMock, +vi.mock("@/lib/supabase/credits_usage/deductCreditsWithAudit", () => ({ + deductCreditsWithAudit: deductCreditsWithAuditMock, })); const ACCOUNT = "123e4567-e89b-12d3-a456-426614174000"; @@ -21,9 +16,8 @@ describe("recordCreditDeduction", () => { vi.clearAllMocks(); }); - it("deducts credits then inserts a usage_events row carrying token detail", async () => { - deductCreditsMock.mockResolvedValue({ success: true, newBalance: 100 }); - insertUsageEventMock.mockResolvedValue({ id: "abc" }); + it("calls the atomic RPC with cents + event payload carrying token detail", async () => { + deductCreditsWithAuditMock.mockResolvedValue({ ok: true }); const result = await recordCreditDeduction({ accountId: ACCOUNT, @@ -37,13 +31,13 @@ describe("recordCreditDeduction", () => { toolCallCount: 3, }); - expect(deductCreditsMock).toHaveBeenCalledWith({ - accountId: ACCOUNT, - creditsToDeduct: 250, - }); - expect(insertUsageEventMock).toHaveBeenCalledWith({ - account_id: ACCOUNT, - credits_deducted_cents: 250, + expect(deductCreditsWithAuditMock).toHaveBeenCalledTimes(1); + const args = deductCreditsWithAuditMock.mock.calls[0]?.[0]; + expect(args.accountId).toBe(ACCOUNT); + expect(args.cents).toBe(250); + expect(typeof args.eventId).toBe("string"); + expect(args.eventId.length).toBeGreaterThan(0); + expect(args.event).toEqual({ source: "web", agent_type: "main", provider: "anthropic", @@ -53,12 +47,11 @@ describe("recordCreditDeduction", () => { output_tokens: 567, tool_call_count: 3, }); - expect(result).toEqual({ success: true, newBalance: 100 }); + expect(result).toEqual({ success: true }); }); - it("applies defaults (agent_type='main', zero tokens, null model/provider) when token detail is omitted", async () => { - deductCreditsMock.mockResolvedValue({ success: true, newBalance: 95 }); - insertUsageEventMock.mockResolvedValue({ id: "abc" }); + it("applies defaults (agent_type='main', zero tokens, undefined model/provider) when token detail is omitted", async () => { + deductCreditsWithAuditMock.mockResolvedValue({ ok: true }); await recordCreditDeduction({ accountId: ACCOUNT, @@ -66,13 +59,12 @@ describe("recordCreditDeduction", () => { source: "api", }); - expect(insertUsageEventMock).toHaveBeenCalledWith({ - account_id: ACCOUNT, - credits_deducted_cents: 5, + const args = deductCreditsWithAuditMock.mock.calls[0]?.[0]; + expect(args.event).toEqual({ source: "api", agent_type: "main", - provider: null, - model_id: null, + provider: undefined, + model_id: undefined, input_tokens: 0, cached_input_tokens: 0, output_tokens: 0, @@ -80,33 +72,35 @@ describe("recordCreditDeduction", () => { }); }); - it("does not insert the audit row when the wallet deduction fails", async () => { - deductCreditsMock.mockRejectedValue(new Error("Insufficient credits")); + it("returns { success: false } and logs when the RPC reports an error (does NOT throw)", async () => { + const consoleSpy = vi.spyOn(console, "error").mockImplementation(() => {}); + deductCreditsWithAuditMock.mockResolvedValue({ + ok: false, + error: "credits_usage row not found", + }); - await expect( - recordCreditDeduction({ - accountId: ACCOUNT, - creditsToDeduct: 50, - source: "web", - }), - ).rejects.toThrow(/Insufficient credits/); + const result = await recordCreditDeduction({ + accountId: ACCOUNT, + creditsToDeduct: 50, + source: "web", + }); - expect(insertUsageEventMock).not.toHaveBeenCalled(); + expect(result).toEqual({ success: false }); + expect(consoleSpy).toHaveBeenCalled(); + consoleSpy.mockRestore(); }); - it("returns the deduction result even if the audit insert fails (logs and swallows)", async () => { + it("does not throw when the RPC wrapper itself rejects (defense in depth)", async () => { const consoleSpy = vi.spyOn(console, "error").mockImplementation(() => {}); - deductCreditsMock.mockResolvedValue({ success: true, newBalance: 100 }); - insertUsageEventMock.mockRejectedValue(new Error("db down")); + deductCreditsWithAuditMock.mockRejectedValue(new Error("network blip")); const result = await recordCreditDeduction({ accountId: ACCOUNT, creditsToDeduct: 5, - source: "api", + source: "web", }); - expect(result).toEqual({ success: true, newBalance: 100 }); - expect(consoleSpy).toHaveBeenCalled(); + expect(result).toEqual({ success: false }); consoleSpy.mockRestore(); }); }); diff --git a/lib/credits/computeCreditsDeductedCents.ts b/lib/credits/computeCreditsDeductedCents.ts deleted file mode 100644 index 7887801ad..000000000 --- a/lib/credits/computeCreditsDeductedCents.ts +++ /dev/null @@ -1,57 +0,0 @@ -import { getAvailableModels } from "@/lib/ai/getAvailableModels"; -import { estimateModelUsageCost } from "@/lib/credits/estimateModelUsageCost"; -import type { AvailableModelCost } from "@/lib/credits/AvailableModelCost"; - -/** - * Per-turn credit charge in cents (minimum 1). - * - * Mirrors open-agents' - * `apps/web/lib/credits/compute-credits-deducted-cents.ts` so the same - * billing math runs on both sides of the chat cutover. Resolution order: - * - * 1. Gateway-reported cost on `responseMessage.metadata.totalMessageCost` - * — the exact number the chat UI shows next to the assistant - * response. Used directly so the wallet debit converges with the - * cost label. - * 2. Token-based estimate against the model catalog's `cost` entry. - * Catalog is the same gateway / models.dev pipeline that backs - * `GET /api/ai/models`. - * 3. 1c floor when no pricing is available — every successful turn - * moves the wallet by at least 1c so a transient catalog outage - * can't make a turn free. - * - * Errors in the catalog fetch are swallowed and treated as path #3 — - * the caller (recordChatUsage) must not fail the workflow on a credit - * accounting hiccup. - * - * @param usage Token counts for the turn (matches AI SDK's `LanguageModelUsage`). - * @param modelId Fully qualified gateway id (e.g. `anthropic/claude-haiku-4.5`). - * @param gatewayCostUsd Gateway-reported total USD cost for the turn, - * when available. Subagent steps (collectTaskToolUsageEvents) won't - * have one and fall through to the token estimate. - * @returns Integer cent amount, ≥ 1. - */ -export async function computeCreditsDeductedCents( - usage: { - inputTokens: number; - cachedInputTokens: number; - outputTokens: number; - }, - modelId: string, - gatewayCostUsd?: number, -): Promise { - if (typeof gatewayCostUsd === "number" && Number.isFinite(gatewayCostUsd) && gatewayCostUsd > 0) { - return Math.max(1, Math.round(gatewayCostUsd * 100)); - } - - try { - const models = await getAvailableModels(); - const model = models.find(m => m.id === modelId) as { cost?: AvailableModelCost } | undefined; - const usd = estimateModelUsageCost(usage, model?.cost); - if (typeof usd !== "number" || usd <= 0) return 1; - return Math.max(1, Math.round(usd * 100)); - } catch (error) { - console.error("Failed to compute credits from usage:", error); - return 1; - } -} diff --git a/lib/credits/estimateModelUsageCost.ts b/lib/credits/estimateModelUsageCost.ts deleted file mode 100644 index d675486a0..000000000 --- a/lib/credits/estimateModelUsageCost.ts +++ /dev/null @@ -1,79 +0,0 @@ -import type { AvailableModelCost, AvailableModelCostTier } from "@/lib/credits/AvailableModelCost"; - -const TOKENS_PER_MILLION = 1_000_000; - -/** - * Picks the right cost tier for `usage`. Above 200k input tokens many - * providers charge more, so the cost catalog exposes an override on - * `cost.context_over_200k`. Missing fields fall back to the base tier - * so a partial override is still valid. - * - * The trigger is "input > 200k AND at least one of input/output is - * overridden" — a tier that only overrides `cache_read` is treated as - * the base tier (it's not a real tier swap, just a cache discount). - */ -function resolveCostTier( - usage: { inputTokens: number }, - cost: AvailableModelCost | undefined, -): AvailableModelCostTier | undefined { - if (!cost) return undefined; - - if ( - usage.inputTokens > 200_000 && - (typeof cost.context_over_200k?.input === "number" || - typeof cost.context_over_200k?.output === "number") - ) { - return { - input: cost.context_over_200k.input ?? cost.input, - output: cost.context_over_200k.output ?? cost.output, - cache_read: cost.context_over_200k.cache_read ?? cost.cache_read, - }; - } - - return cost; -} - -/** - * Token-based estimate of a turn's USD cost, used as a fallback when - * the gateway hasn't reported an actual cost on the - * `assistantMessage.metadata.totalMessageCost` field. - * - * Ports `apps/web/lib/models.ts:estimateModelUsageCost` from - * open-agents so the same per-turn math runs on both sides during the - * cutover. Cached input tokens are billed at `cache_read` when the - * catalog exposes it and fall back to the base input price otherwise. - * - * Returns `undefined` when the catalog can't price this model (missing - * cost entry, missing input price, or missing output price) so the - * caller knows to use a different fallback (open-agents' rule: never - * charge 0 — bill the 1c minimum instead). - * - * @param usage Token counts for the turn (mirrors AI SDK's `LanguageModelUsage`). - * @param cost Per-model catalog entry. `undefined` short-circuits to `undefined`. - * @returns USD cost, or `undefined` if not priceable. - */ -export function estimateModelUsageCost( - usage: { - inputTokens: number; - cachedInputTokens: number; - outputTokens: number; - }, - cost: AvailableModelCost | undefined, -): number | undefined { - const costTier = resolveCostTier(usage, cost); - const inputPrice = costTier?.input; - const outputPrice = costTier?.output; - if (typeof inputPrice !== "number" || typeof outputPrice !== "number") { - return undefined; - } - - const cachedInputTokens = Math.max(0, usage.cachedInputTokens); - const uncachedInputTokens = Math.max(0, usage.inputTokens - cachedInputTokens); - const cacheReadPrice = costTier?.cache_read ?? inputPrice; - - return ( - (uncachedInputTokens * inputPrice) / TOKENS_PER_MILLION + - (cachedInputTokens * cacheReadPrice) / TOKENS_PER_MILLION + - (Math.max(0, usage.outputTokens) * outputPrice) / TOKENS_PER_MILLION - ); -} diff --git a/lib/credits/getCreditUsage.ts b/lib/credits/getCreditUsage.ts index 4e1dc5b28..26eb9180b 100644 --- a/lib/credits/getCreditUsage.ts +++ b/lib/credits/getCreditUsage.ts @@ -3,14 +3,31 @@ import { LanguageModelUsage } from "ai"; /** * Calculates the total spend in USD for a given language model usage. + * + * Resolution order: + * 1. `gatewayCostUsd` — gateway-reported actual cost from + * `responseMessage.metadata.totalMessageCost`. Used directly when + * present and positive so the wallet debit converges with the cost + * label the chat UI shows next to the assistant response. + * 2. Token-based estimate using `model.pricing.input/output` from + * the gateway catalog (`getModel`). Authoritative for token cost. + * 3. `0` when nothing prices the turn (caller floors to the 1c + * minimum via `Math.max(1, Math.round(usd * 100))`). + * * @param usage - The language model usage data * @param modelId - The ID of the model used + * @param gatewayCostUsd - Optional gateway-reported USD cost (preferred over token math) * @returns The total spend in USD or 0 if calculation fails */ export const getCreditUsage = async ( usage: LanguageModelUsage, modelId: string, + gatewayCostUsd?: number, ): Promise => { + if (typeof gatewayCostUsd === "number" && Number.isFinite(gatewayCostUsd) && gatewayCostUsd > 0) { + return gatewayCostUsd; + } + try { const model = await getModel(modelId); if (!model) { diff --git a/lib/credits/handleChatCredits.ts b/lib/credits/handleChatCredits.ts index bc2ecff9f..061043420 100644 --- a/lib/credits/handleChatCredits.ts +++ b/lib/credits/handleChatCredits.ts @@ -6,19 +6,42 @@ interface HandleChatCreditsParams { usage: LanguageModelUsage; model: string; accountId?: string; + /** + * Gateway-reported total USD cost for this turn (from + * `responseMessage.metadata.totalMessageCost`). When present and + * positive, used directly instead of the token-based estimate so + * the wallet debit converges with the cost label the chat UI shows. + */ + gatewayCostUsd?: number; + /** + * Which surface generated the turn — used to label the + * `usage_events` audit row. Defaults to `"web"` to preserve the + * existing `handleChatStream` contract; the chat-workflow path + * (`/api/chat/workflow`) passes `"api"` so admin dashboards can + * distinguish surface in spend rollups. + */ + source?: "web" | "api"; } /** * Handles credit deduction after chat completion. * Always deducts at least 1 credit when accountId is present (round up from usage cost). - * @param usage - The language model usage data - * @param model - The model ID used for the chat - * @param accountId - The account ID to deduct credits from (optional) + * + * Resolution order for the per-turn cost: + * 1. `gatewayCostUsd` — gateway-reported actual USD (preferred) + * 2. Token-based estimate via `getCreditUsage` against the gateway catalog + * 3. 1c floor — every successful turn debits at least 1 cent + * + * Wallet debit + audit row insert are atomic via + * `recordCreditDeduction` (backed by the `deduct_credits_with_audit` + * Postgres function). */ export const handleChatCredits = async ({ usage, model, accountId, + gatewayCostUsd, + source = "web", }: HandleChatCreditsParams): Promise => { if (!accountId) { console.error("No account ID provided, skipping credit deduction"); @@ -26,13 +49,13 @@ export const handleChatCredits = async ({ } try { - const usageCost = await getCreditUsage(usage, model); + const usageCost = await getCreditUsage(usage, model, gatewayCostUsd); const creditsToDeduct = Math.max(1, Math.round(usageCost * 100)); await recordCreditDeduction({ accountId, creditsToDeduct, - source: "web", + source, modelId: model, inputTokens: usage.inputTokens, outputTokens: usage.outputTokens, diff --git a/lib/credits/recordCreditDeduction.ts b/lib/credits/recordCreditDeduction.ts index b2db46803..b71f96c55 100644 --- a/lib/credits/recordCreditDeduction.ts +++ b/lib/credits/recordCreditDeduction.ts @@ -1,5 +1,5 @@ -import { deductCredits } from "./deductCredits"; -import { insertUsageEvent } from "@/lib/supabase/usage_events/insertUsageEvent"; +import { nanoid } from "nanoid"; +import { deductCreditsWithAudit } from "@/lib/supabase/credits_usage/deductCreditsWithAudit"; interface RecordCreditDeductionParams { accountId: string; @@ -16,42 +16,49 @@ interface RecordCreditDeductionParams { interface RecordCreditDeductionResult { success: boolean; - newBalance?: number; } /** - * Wallet + meter wrapper. Debits the credits_usage balance via deductCredits, - * then writes a usage_events row recording the wallet impact alongside any - * token detail the caller has. + * Wallet + meter atomic wrapper. Calls the `deduct_credits_with_audit` + * Postgres function (recoupable/database#26) which runs the + * `credits_usage` debit and the `usage_events` insert inside a single + * transaction — either both writes commit or neither does. Eliminates + * the wallet/meter drift risk the previous (two separate Supabase + * calls) implementation had. * - * If the audit insert fails, the deduction is preserved (already committed) - * and the error is logged but not surfaced — the wallet stays authoritative - * and a reconciliation job can recover the missing audit row later. + * Errors are caught and surfaced via `{ success: false }` so the + * caller (the chat workflow or any research handler) never aborts on + * a credit-accounting hiccup. The wallet stays authoritative — if the + * RPC rejects, neither write happened, and the caller can decide + * whether to retry or move on. */ export const recordCreditDeduction = async ( params: RecordCreditDeductionParams, ): Promise => { - const result = await deductCredits({ - accountId: params.accountId, - creditsToDeduct: params.creditsToDeduct, - }); - try { - await insertUsageEvent({ - account_id: params.accountId, - credits_deducted_cents: params.creditsToDeduct, - source: params.source, - agent_type: params.agentType ?? "main", - provider: params.provider ?? null, - model_id: params.modelId ?? null, - input_tokens: params.inputTokens ?? 0, - cached_input_tokens: params.cachedInputTokens ?? 0, - output_tokens: params.outputTokens ?? 0, - tool_call_count: params.toolCallCount ?? 0, + const result = await deductCreditsWithAudit({ + accountId: params.accountId, + cents: params.creditsToDeduct, + eventId: nanoid(), + event: { + source: params.source, + agent_type: params.agentType ?? "main", + provider: params.provider, + model_id: params.modelId, + input_tokens: params.inputTokens ?? 0, + cached_input_tokens: params.cachedInputTokens ?? 0, + output_tokens: params.outputTokens ?? 0, + tool_call_count: params.toolCallCount ?? 0, + }, }); + + if (!result.ok) { + console.error("[recordCreditDeduction] atomic debit failed:", result.error); + return { success: false }; + } + return { success: true }; } catch (error) { - console.error("Failed to insert usage_events row (wallet was still debited):", error); + console.error("[recordCreditDeduction] unexpected error:", error); + return { success: false }; } - - return result; }; From 17637b8c382840a20ad4caffde1b1f59d19c0a9d Mon Sep 17 00:00:00 2001 From: Sweets Sweetman Date: Mon, 25 May 2026 16:44:01 -0500 Subject: [PATCH 6/7] fix(credits): mark recordCreditDeduction as 'use step' for workflow runtime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Vercel Workflow's build-time detector flagged `nanoid` as a Node.js module that can't run inside the workflow body. Marking recordCreditDeduction as 'use step' moves it into the step runtime where Node modules are allowed. Backwards compatible for the existing research-handler callers (regular API routes) — 'use step' functions execute immediately when called from non-workflow contexts. Also matches open-agents' pattern: their recordWorkflowUsage (which contains the equivalent nanoid call) is a 'use step' function. Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/credits/recordCreditDeduction.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/credits/recordCreditDeduction.ts b/lib/credits/recordCreditDeduction.ts index b71f96c55..3bf61ecce 100644 --- a/lib/credits/recordCreditDeduction.ts +++ b/lib/credits/recordCreditDeduction.ts @@ -35,6 +35,7 @@ interface RecordCreditDeductionResult { export const recordCreditDeduction = async ( params: RecordCreditDeductionParams, ): Promise => { + "use step"; try { const result = await deductCreditsWithAudit({ accountId: params.accountId, From b0299f41ecee8eb3607db217301d04d0e11f668f Mon Sep 17 00:00:00 2001 From: Sweets Sweetman Date: Mon, 25 May 2026 16:52:12 -0500 Subject: [PATCH 7/7] refactor(workflow): collapse inline metadata duck-type (KISS) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR review feedback: the 14-line inline type assertion for `result.responseMessage` was needless boilerplate. Replaced with: 1. Import the existing `AgentMessageMetadata` type (already used by `runAgentStep`'s `messageMetadata` callback — single source of truth for the shape). 2. Hoist a module-level `ZERO_USAGE` default so the fallback when metadata is missing is a named constant, not an inline literal. 3. Cast `result.responseMessage.metadata` once (`as AgentMessageMetadata | undefined`). Net delta: 14 lines → 5 lines inside the workflow body, no behavior change. Co-Authored-By: Claude Opus 4.7 (1M context) --- app/lib/workflows/runAgentWorkflow.ts | 30 ++++++++------------------- 1 file changed, 9 insertions(+), 21 deletions(-) diff --git a/app/lib/workflows/runAgentWorkflow.ts b/app/lib/workflows/runAgentWorkflow.ts index 9afcdeb79..67eb94b24 100644 --- a/app/lib/workflows/runAgentWorkflow.ts +++ b/app/lib/workflows/runAgentWorkflow.ts @@ -6,8 +6,15 @@ import { runAgentStep } from "@/app/lib/workflows/runAgentStep"; import { clearChatActiveStream } from "@/lib/chat/clearChatActiveStream"; import { persistAssistantMessage } from "@/lib/chat/persistAssistantMessage"; import { handleChatCredits } from "@/lib/credits/handleChatCredits"; +import type { AgentMessageMetadata } from "@/lib/agent/messageMetadata/AgentMessageMetadata"; import type { DurableAgentContext } from "@/lib/agent/tools/AgentContext"; +const ZERO_USAGE: LanguageModelUsage = { + inputTokens: 0, + cachedInputTokens: 0, + outputTokens: 0, +} as LanguageModelUsage; + export type RunAgentWorkflowInput = { messages: UIMessage[]; chatId: string; @@ -100,32 +107,13 @@ export async function runAgentWorkflow(input: RunAgentWorkflowInput): Promise