Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 46 additions & 7 deletions src/app/api/internal/code-review-status/[reviewId]/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,12 @@

import type { NextRequest } from 'next/server';
import { NextResponse } from 'next/server';
import { updateCodeReviewStatus, getCodeReviewById } from '@/lib/code-reviews/db/code-reviews';
import {
updateCodeReviewStatus,
updateCodeReviewUsage,
getCodeReviewById,
getSessionUsageFromBilling,
} from '@/lib/code-reviews/db/code-reviews';
import { tryDispatchPendingReviews } from '@/lib/code-reviews/dispatch/dispatch-pending-reviews';
import { getBotUserId } from '@/lib/bot-users/bot-user-service';
import { logExceptInTest, errorExceptInTest } from '@/lib/utils.server';
Expand Down Expand Up @@ -112,24 +117,58 @@ function normalizePayload(raw: StatusUpdatePayload): {

/**
* Read a review's usage data, polling with exponential backoff if not yet available.
* Handles the race between the orchestrator's usage report and the cloud agent's completion callback.
*
* For v1 (SSE) reviews the orchestrator reports usage before the completion
* callback fires, so a short poll handles the race. For v2 (cloud-agent-next)
* reviews the orchestrator never reports usage — we fall back to aggregating
* from the billing tables (microdollar_usage) keyed by cli_session_id.
*
* When the billing fallback is used we also back-fill the code_reviews record
* so subsequent reads (e.g. the admin panel) don't need the aggregation again.
*/
async function getReviewUsageData(reviewId: string) {
const MAX_RETRIES = 3;
const BASE_DELAY_MS = 200;

let review = await getCodeReviewById(reviewId);

// Short poll: usage may arrive from the orchestrator just before the callback
for (let attempt = 0; attempt < MAX_RETRIES && review && !review.model; attempt++) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WARNING: This adds a fixed delay to every v2 completion callback

The comment above says cloud-agent-next reviews never receive orchestrator usage, but this loop still waits through the full exponential backoff whenever review.model is empty. For v2 reviews that means every completion path pays ~1.4s before we even try the billing fallback, which delays the reaction/comment update for every successful review.

await new Promise(resolve => setTimeout(resolve, BASE_DELAY_MS * 2 ** attempt));
review = await getCodeReviewById(reviewId);
}

return {
model: review?.model ?? null,
tokensIn: review?.total_tokens_in ?? null,
tokensOut: review?.total_tokens_out ?? null,
};
if (review?.model) {
return {
model: review.model,
tokensIn: review.total_tokens_in ?? null,
tokensOut: review.total_tokens_out ?? null,
};
}

// Fallback: aggregate from billing tables (covers v2 / cloud-agent-next reviews)
if (review?.cli_session_id) {
const billing = await getSessionUsageFromBilling(review.cli_session_id);
if (billing) {
// Back-fill the code_reviews record so we don't repeat this aggregation
updateCodeReviewUsage(reviewId, {
model: billing.model,
totalTokensIn: billing.totalTokensIn,
totalTokensOut: billing.totalTokensOut,
totalCostMusd: billing.totalCostMusd,
}).catch(err => {
logExceptInTest('[code-review-status] Failed to back-fill usage from billing', err);
});

return {
model: billing.model,
tokensIn: billing.totalTokensIn,
tokensOut: billing.totalTokensOut,
};
}
}

return { model: null, tokensIn: null, tokensOut: null };
}

/**
Expand Down
81 changes: 79 additions & 2 deletions src/lib/code-reviews/db/code-reviews.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,12 @@
*/

import { db } from '@/lib/drizzle';
import { cloud_agent_code_reviews } from '@kilocode/db/schema';
import { eq, and, desc, count, ne, inArray } from 'drizzle-orm';
import {
cloud_agent_code_reviews,
microdollar_usage,
microdollar_usage_metadata,
} from '@kilocode/db/schema';
import { eq, and, desc, count, ne, inArray, sql, sum } from 'drizzle-orm';
import { captureException } from '@sentry/nextjs';
import type { CreateReviewParams, CodeReviewStatus, ListReviewsParams, Owner } from '../core';
import type { CloudAgentCodeReview } from '@kilocode/db/schema';
Expand Down Expand Up @@ -479,3 +483,76 @@ export async function userOwnsReview(reviewId: string, userId: string): Promise<
throw error;
}
}

/**
* Result of aggregating billing usage for a session.
*/
export type SessionUsageSummary = {
model: string;
totalTokensIn: number;
totalTokensOut: number;
totalCostMusd: number;
};

/**
* Aggregates LLM usage from the billing tables for a given kilo session ID.
*
* This is the fallback path for v2 (cloud-agent-next) reviews where the
* orchestrator does not accumulate usage from SSE events. The billing
* system (processUsage → microdollar_usage) already records per-request
* usage keyed by session_id, so we aggregate here.
*
* Uses two queries:
* 1. Session-wide totals (tokens + cost across all models)
* 2. The model with the most tokens (the primary review model name)
*
* This avoids undercounting when a session uses more than one model.
*/
export async function getSessionUsageFromBilling(
cliSessionId: string
): Promise<SessionUsageSummary | null> {
try {
const sessionFilter = eq(microdollar_usage_metadata.session_id, cliSessionId);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WARNING: This fallback filters on an unindexed column

microdollar_usage_metadata only has a created_at index in the schema today, so both aggregation queries will end up scanning the metadata table by session_id on every completed v2 review. Because this runs on the completion callback path, larger billing tables will make review completion and summary updates noticeably slower.

const joinCondition = eq(microdollar_usage.id, microdollar_usage_metadata.id);

// 1. Session-wide totals (all models combined)
const [totals] = await db
.select({
totalTokensIn: sum(microdollar_usage.input_tokens).mapWith(Number),
totalTokensOut: sum(microdollar_usage.output_tokens).mapWith(Number),
totalCostMusd: sum(microdollar_usage.cost).mapWith(Number),
})
.from(microdollar_usage)
.innerJoin(microdollar_usage_metadata, joinCondition)
.where(sessionFilter);

if (totals?.totalTokensIn == null) return null;

// 2. Pick the model with the most tokens (the primary review model)
const [topModel] = await db
.select({ model: microdollar_usage.model })
.from(microdollar_usage)
.innerJoin(microdollar_usage_metadata, joinCondition)
.where(sessionFilter)
.groupBy(microdollar_usage.model)
Comment thread
alex-alecu marked this conversation as resolved.
.orderBy(
sql`sum(${microdollar_usage.input_tokens} + ${microdollar_usage.output_tokens}) desc`
)
.limit(1);

if (!topModel?.model) return null;

return {
model: topModel.model,
totalTokensIn: totals.totalTokensIn,
totalTokensOut: totals.totalTokensOut ?? 0,
totalCostMusd: totals.totalCostMusd ?? 0,
};
} catch (error) {
captureException(error, {
tags: { operation: 'getSessionUsageFromBilling' },
extra: { cliSessionId },
});
return null;
}
}