Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ CODERAG_GEMINI_API_KEY=your_api_key_here
# Compatibility alias also accepted: CODERAG_GEMINI_AI_KEY

# Optional: Override the default Gemini embedding model
# Default: models/gemini-embedding-001
CODERAG_GEMINI_MODEL=models/gemini-embedding-001
# Default: models/gemini-embedding-2
CODERAG_GEMINI_MODEL=models/gemini-embedding-2

# ============================================
# EMBEDDING CONFIGURATION
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ Supported environment overrides:
- `CODERAG_CUSTOM_HTTP_FORMAT`
- `CODERAG_LLM_HEADERS`

When `embedding.provider` is `gemini`, CodeRag defaults to `models/gemini-embedding-001` and requests 768-dimensional vectors explicitly so the stored embedding fingerprint matches the vectors written to LanceDB. It accepts either `CODERAG_GEMINI_API_KEY` or the compatibility alias `CODERAG_GEMINI_AI_KEY`.
When `embedding.provider` is `gemini`, CodeRag defaults to `models/gemini-embedding-2` and requests 768-dimensional vectors explicitly so the stored embedding fingerprint matches the vectors written to LanceDB. It accepts either `CODERAG_GEMINI_API_KEY` or the compatibility alias `CODERAG_GEMINI_AI_KEY`.

When `embedding.provider` is `onnx`, CodeRag uses `Xenova/gte-small` (384-dim, ~33MB) running locally via `@xenova/transformers`. No API key or external server needed. The model must be downloaded to `<onnxModelDir>/Xenova/gte-small/` (default `.coderag-models/models/Xenova/gte-small/`).

Expand Down
2 changes: 1 addition & 1 deletion src/cli/setup-wizard.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ export const runSetupWizard = async (cwd: string, logger?: Logger): Promise<void
};
const embeddingProviderKind = providerMap[embeddingProvider] ?? "local-hash";

let geminiModel = "models/gemini-embedding-001";
let geminiModel = "models/gemini-embedding-2";
let geminiApiKey = "";
let onnxModelDir = ".coderag-models/models";

Expand Down
10 changes: 6 additions & 4 deletions src/indexer/documents.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ const readExternalNodeDoc = async (nodeId: string, docsPath: string): Promise<st
};

const EMPTY_LIST = "- None";
const MAX_EMBEDDING_CHARS = 2048; // Embedding models cap at ~512 tokens; extra chars waste memory
/** ~4 chars per token is a safe estimate for mixed code/text content */
const CHARS_PER_TOKEN = 4;

const formatList = (items: string[]): string => (items.length > 0 ? items.map((item) => `- ${item}`).join("\n") : EMPTY_LIST);

Expand Down Expand Up @@ -259,9 +260,10 @@ export const buildIndexedDocuments = async (
embeddingText = [doc, sourceText].filter(Boolean).join("\n\n");
}

// Truncate to save memory — embedding models cap at ~512 tokens anyway
if (embeddingText.length > MAX_EMBEDDING_CHARS) {
embeddingText = embeddingText.slice(0, MAX_EMBEDDING_CHARS);
// Truncate to fit the model's token limit
const maxChars = embeddingProvider.maxInputTokens * CHARS_PER_TOKEN;
if (embeddingText.length > maxChars) {
embeddingText = embeddingText.slice(0, maxChars);
}

preparedForChunk.push({
Expand Down
2 changes: 2 additions & 0 deletions src/indexer/embedder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ export class LocalHashEmbeddingProvider implements EmbeddingProvider {
readonly name = "local-hash";
readonly model = "local-hash";
readonly dimensions: number;
/** Unlimited — hash-based embedding has no token limit. */
readonly maxInputTokens = Infinity;

constructor(dimensions = 256) {
this.dimensions = dimensions;
Expand Down
3 changes: 2 additions & 1 deletion src/indexer/gemini-embedder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import type { EmbeddingProvider } from "../types.js";
import { ConfigurationError } from "../errors/index.js";

const GEMINI_API_BASE = "https://generativelanguage.googleapis.com/v1beta";
const DEFAULT_MODEL = "models/gemini-embedding-001";
const DEFAULT_MODEL = "models/gemini-embedding-2";
const DEFAULT_DIMENSIONS = 768;
const MAX_BATCH_SIZE = 100;
const GEMINI_API_KEY_ENV = "CODERAG_GEMINI_API_KEY";
Expand All @@ -23,6 +23,7 @@ export class GeminiEmbeddingProvider implements EmbeddingProvider {
readonly name = "gemini";
readonly dimensions = DEFAULT_DIMENSIONS;
readonly maxBatchSize = MAX_BATCH_SIZE;
readonly maxInputTokens = 8192;
readonly model: string;
private readonly apiKey: string;
private readonly timeoutMs: number;
Expand Down
1 change: 1 addition & 0 deletions src/indexer/onnx-embedder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ export class OnnxEmbeddingProvider implements EmbeddingProvider {
readonly model = DEFAULT_MODEL;
readonly dimensions = DEFAULT_DIMENSIONS;
readonly maxBatchSize = 1; // One at a time to minimize memory pressure
readonly maxInputTokens = 256; // all-MiniLM-L6-v2 max sequence length
private readonly modelDir: string;
private readonly logger?: Logger;

Expand Down
64 changes: 54 additions & 10 deletions src/llm/context-builder.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
import type { BlueprintNode } from "@abhinav2203/codeflow-core/schema";

import type { ContextPackage, GraphSnapshot, IndexedNodeDocument, RetrievedNodeContext, RetrievalConfig } from "../types.js";
import type {
ContextPackage,
GraphSnapshot,
IndexedNodeDocument,
RetrievedNodeContext,
RetrievalConfig
} from "../types.js";
import type { SectionLimits } from "./prompt.js";
import { FileCache } from "../store/file-cache.js";
import { createRetrievedNodeContext } from "../retrieval/page-index.js";

Expand All @@ -25,6 +32,36 @@ const buildGraphSummary = (
return parts.join(" ");
};

/**
* Derives per-section char limits from retrieval config.
*
* Defaults are proportional to maxContextChars so they scale automatically.
* Explicit overrides (when the user sets primaryDocLimit, etc.) always take precedence.
*
* Default distribution for a 16K baseline:
* primaryDoc -> 1,200 (7.5%)
* primaryFile -> 4,000 (25%)
* relatedDoc -> 320 (2%)
* relatedFile -> 1,200 (7.5%)
* Remaining ~58% is for structural overhead (headers, warnings, graph summary).
*/
export const deriveSectionLimits = (retrieval: RetrievalConfig): SectionLimits => {
const mcc = retrieval.maxContextChars;

// Proportional defaults relative to a 16,000 baseline.
const primaryDocDefault = Math.max(1, Math.round((mcc / 16000) * 1200));
const primaryFileDefault = Math.max(1, Math.round((mcc / 16000) * 4000));
const relatedDocDefault = Math.max(1, Math.round((mcc / 16000) * 320));
const relatedFileDefault = Math.max(1, Math.round((mcc / 16000) * 1200));

return {
primaryDoc: retrieval.primaryDocLimit ?? primaryDocDefault,
primaryFile: retrieval.primaryFileLimit ?? primaryFileDefault,
relatedDoc: retrieval.relatedDocLimit ?? relatedDocDefault,
relatedFile: retrieval.relatedFileLimit ?? relatedFileDefault
};
};

const truncateContext = (context: RetrievedNodeContext, maxChars: number, warnings: string[]): RetrievedNodeContext => {
if (context.fullFileContent.length <= maxChars) {
return context;
Expand Down Expand Up @@ -115,6 +152,8 @@ const buildRelatedContextPromises = (

/**
* Builds the final context package passed to the LLM or returned directly to the caller.
*
* The caller receives `limits` so it can pass them through to `buildMessages()`.
*/
export const buildContextPackage = async (
question: string,
Expand All @@ -127,7 +166,7 @@ export const buildContextPackage = async (
dependencies: BlueprintNode[],
dependents: BlueprintNode[],
answerMode: ContextPackage["answerMode"]
): Promise<ContextPackage> => {
): Promise<{ context: ContextPackage; limits: SectionLimits }> => {
const primaryDocument = primaryNode ? documents[primaryNode.id] : undefined;
const primaryContext = primaryDocument
? await createRetrievedNodeContext(repoPath, fileCache, snapshot, primaryDocument, "primary")
Expand All @@ -138,13 +177,18 @@ export const buildContextPackage = async (
const primaryResult = fitPrimaryContext(primaryContext, retrieval.maxContextChars);
const relatedResult = fitRelatedContexts(resolvedRelatedContexts, primaryResult.remainingBudget);

const limits = deriveSectionLimits(retrieval);

return {
question,
answerMode,
retrievalMode: "single" as const,
primaryNode: primaryResult.primaryContext,
relatedNodes: relatedResult.relatedContexts,
graphSummary: buildGraphSummary(primaryNode, dependencies, dependents),
warnings: [...primaryResult.warnings, ...relatedResult.warnings]
context: {
question,
answerMode,
retrievalMode: "single" as const,
primaryNode: primaryResult.primaryContext,
relatedNodes: relatedResult.relatedContexts,
graphSummary: buildGraphSummary(primaryNode, dependencies, dependents),
warnings: [...primaryResult.warnings, ...relatedResult.warnings]
},
limits
};
};
};
37 changes: 22 additions & 15 deletions src/llm/multi-hop-context-builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@ import type {
GraphSnapshot,
IndexedNodeDocument,
MultiHopRetrievalResult,
RetrievedNodeContext,
RetrievalConfig
} from "../types.js";
import type { RetrievedNodeContext } from "../types.js";
import type { SectionLimits } from "./prompt.js";
import { deriveSectionLimits } from "./context-builder.js";
import { FileCache } from "../store/file-cache.js";
import { createRetrievedNodeContext } from "../retrieval/page-index.js";

Expand Down Expand Up @@ -77,6 +79,8 @@ const buildRelatedNodeContexts = async (
* Unlike the single-node path, there is no single primary node.
* The first retrieved node is promoted to "primary" for display purposes,
* and all remaining nodes are listed as related.
*
* Returns both the context and the derived section limits for prompt building.
*/
export const buildMultiHopContextPackage = async (
question: string,
Expand All @@ -87,10 +91,9 @@ export const buildMultiHopContextPackage = async (
documents: Record<string, IndexedNodeDocument>,
retrieval: RetrievalConfig,
fileCache: FileCache
): Promise<ContextPackage> => {
): Promise<{ context: ContextPackage; limits: SectionLimits }> => {
const allNodes = retrievalResult.deduplicatedNodes;

// Build RetrievedNodeContext for all deduplicated nodes
const allContexts = await buildRelatedNodeContexts(
allNodes,
repoPath,
Expand All @@ -99,14 +102,12 @@ export const buildMultiHopContextPackage = async (
documents
);

// Promote the first node to "primary" for display
const firstCtx = allContexts[0];
const primaryContext: RetrievedNodeContext | null = firstCtx
? Object.assign({}, firstCtx, { relationship: "primary" as const, subQuestionIndex: undefined })
: null;
const relatedContexts: RetrievedNodeContext[] = allContexts.length > 1 ? allContexts.slice(1) : [];

// Apply context budgeting
const warnings: string[] = [];
let remainingBudget = retrieval.maxContextChars;

Expand All @@ -125,6 +126,7 @@ export const buildMultiHopContextPackage = async (
const fittedRelated: RetrievedNodeContext[] = [];
for (const ctx of relatedContexts) {
if (remainingBudget <= 0) {
warnings.push(`Dropped file content for ${ctx.filePath} because the context budget was exhausted.`);
fittedRelated.push({ ...ctx, fullFileContent: "" });
continue;
}
Expand All @@ -149,15 +151,20 @@ export const buildMultiHopContextPackage = async (
filesReferenced: meta.filesReferenced
}));

const limits = deriveSectionLimits(retrieval);

return {
question,
answerMode: "llm" as const,
retrievalMode: "multi-hop" as const,
primaryNode: fittedPrimary,
relatedNodes: fittedRelated,
graphSummary: buildMultiHopGraphSummary(subQuestions, retrievalResult, snapshot),
warnings,
subQuestions,
subQuestionResults
context: {
question,
answerMode: "llm" as const,
retrievalMode: "multi-hop" as const,
primaryNode: fittedPrimary,
relatedNodes: fittedRelated,
graphSummary: buildMultiHopGraphSummary(subQuestions, retrievalResult, snapshot),
warnings,
subQuestions,
subQuestionResults
},
limits
};
};
};
Loading
Loading