diff --git a/docs/docs/features/mcp-server.mdx b/docs/docs/features/mcp-server.mdx index a21c4cccf..3ac166f1f 100644 --- a/docs/docs/features/mcp-server.mdx +++ b/docs/docs/features/mcp-server.mdx @@ -189,6 +189,22 @@ Parameters: | `ref` | no | Commit SHA, branch or tag name to fetch the source code for. If not provided, uses the default branch. | +### `list_tree` + +Lists files and directories from a repository path. Can be used as a directory listing tool (`depth: 1`) or a repo-tree tool (`depth > 1`). + +Parameters: +| Name | Required | Description | +|:---------------------|:---------|:--------------------------------------------------------------------------------------------------------------| +| `repo` | yes | The name of the repository to list files from. | +| `path` | no | Directory path (relative to repo root). If omitted, the repo root is used. | +| `ref` | no | Commit SHA, branch or tag name to list files from. If not provided, uses the default branch. | +| `depth` | no | Number of directory levels to traverse below `path` (min 1, max 10, default: 1). | +| `includeFiles` | no | Whether to include file entries in the output (default: true). | +| `includeDirectories` | no | Whether to include directory entries in the output (default: true). | +| `maxEntries` | no | Maximum number of entries to return before truncating (min 1, max 10000, default: 1000). | + + ### `list_commits` Get a list of commits for a given repository. diff --git a/packages/mcp/CHANGELOG.md b/packages/mcp/CHANGELOG.md index 1044dcb2f..be42d2fb9 100644 --- a/packages/mcp/CHANGELOG.md +++ b/packages/mcp/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- Added `list_tree` tool for listing files/directories in a repository path with depth controls, suitable for both directory listings and repo-tree workflows. [#870](https://github.com/sourcebot-dev/sourcebot/pull/870) + ## [1.0.15] - 2026-02-02 ### Added @@ -94,4 +97,4 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [1.0.0] - 2025-05-07 ### Added -- Initial release \ No newline at end of file +- Initial release diff --git a/packages/mcp/README.md b/packages/mcp/README.md index 537eb92f2..c58620dae 100644 --- a/packages/mcp/README.md +++ b/packages/mcp/README.md @@ -214,6 +214,25 @@ Reads the source code for a given file. | `ref` | no | Commit SHA, branch or tag name to fetch the source code for. If not provided, uses the default branch. | +### list_tree + +Lists files and directories from a repository path. Can be used as a directory listing tool (`depth: 1`) or a repo-tree tool (`depth > 1`). + +
+Parameters + +| Name | Required | Description | +|:---------------------|:---------|:--------------------------------------------------------------------------------------------------------------| +| `repo` | yes | The name of the repository to list files from. | +| `path` | no | Directory path (relative to repo root). If omitted, the repo root is used. | +| `ref` | no | Commit SHA, branch or tag name to list files from. If not provided, uses the default branch. | +| `depth` | no | Number of directory levels to traverse below `path` (min 1, max 10, default: 1). | +| `includeFiles` | no | Whether to include file entries in the output (default: true). | +| `includeDirectories` | no | Whether to include directory entries in the output (default: true). | +| `maxEntries` | no | Maximum number of entries to return before truncating (min 1, max 10000, default: 1000). | + +
+ ### list_commits Get a list of commits for a given repository. diff --git a/packages/mcp/src/client.ts b/packages/mcp/src/client.ts index 896692a56..52613e2f7 100644 --- a/packages/mcp/src/client.ts +++ b/packages/mcp/src/client.ts @@ -1,6 +1,6 @@ import { env } from './env.js'; -import { listReposResponseSchema, searchResponseSchema, fileSourceResponseSchema, listCommitsResponseSchema, askCodebaseResponseSchema, listLanguageModelsResponseSchema } from './schemas.js'; -import { AskCodebaseRequest, AskCodebaseResponse, FileSourceRequest, ListReposQueryParams, SearchRequest, ListCommitsQueryParamsSchema, ListLanguageModelsResponse } from './types.js'; +import { listReposResponseSchema, searchResponseSchema, fileSourceResponseSchema, listCommitsResponseSchema, askCodebaseResponseSchema, listLanguageModelsResponseSchema, listTreeApiResponseSchema } from './schemas.js'; +import { AskCodebaseRequest, AskCodebaseResponse, FileSourceRequest, ListReposQueryParams, SearchRequest, ListCommitsQueryParamsSchema, ListLanguageModelsResponse, ListTreeApiRequest, ListTreeApiResponse } from './types.js'; import { isServiceError, ServiceErrorException } from './utils.js'; import { z } from 'zod'; @@ -108,6 +108,26 @@ export const listCommits = async (queryParams: ListCommitsQueryParamsSchema) => return { commits, totalCount }; } +/** + * Fetches a repository tree (or subtree union) from the Sourcebot tree API. + * + * @param request - Repository name, revision, and path selectors for the tree query + * @returns A tree response rooted at `tree` containing nested `tree`/`blob` nodes + */ +export const listTree = async (request: ListTreeApiRequest): Promise => { + const response = await fetch(`${env.SOURCEBOT_HOST}/api/tree`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'X-Sourcebot-Client-Source': 'mcp', + ...(env.SOURCEBOT_API_KEY ? { 'X-Sourcebot-Api-Key': env.SOURCEBOT_API_KEY } : {}) + }, + body: JSON.stringify(request), + }); + + return parseResponse(response, listTreeApiResponseSchema); +} + /** * Asks a natural language question about the codebase using the Sourcebot AI agent. * This is a blocking call that runs the full agent loop and returns when complete. diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index 68e8e8e98..198bc401e 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -6,10 +6,11 @@ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js' import _dedent from "dedent"; import escapeStringRegexp from 'escape-string-regexp'; import { z } from 'zod'; -import { askCodebase, getFileSource, listCommits, listLanguageModels, listRepos, search } from './client.js'; +import { askCodebase, getFileSource, listCommits, listLanguageModels, listRepos, listTree, search } from './client.js'; import { env, numberSchema } from './env.js'; -import { askCodebaseRequestSchema, fileSourceRequestSchema, listCommitsQueryParamsSchema, listReposQueryParamsSchema } from './schemas.js'; -import { AskCodebaseRequest, FileSourceRequest, ListCommitsQueryParamsSchema, ListReposQueryParams, TextContent } from './types.js'; +import { askCodebaseRequestSchema, DEFAULT_MAX_TREE_ENTRIES, DEFAULT_TREE_DEPTH, fileSourceRequestSchema, listCommitsQueryParamsSchema, listReposQueryParamsSchema, listTreeRequestSchema, MAX_MAX_TREE_ENTRIES, MAX_TREE_DEPTH } from './schemas.js'; +import { AskCodebaseRequest, FileSourceRequest, ListCommitsQueryParamsSchema, ListReposQueryParams, ListTreeEntry, ListTreeRequest, TextContent } from './types.js'; +import { buildTreeNodeIndex, joinTreePath, normalizeTreePath, sortTreeEntries } from './utils.js'; const dedent = _dedent.withOptions({ alignValues: true }); @@ -238,6 +239,155 @@ server.tool( } ); +server.tool( + "list_tree", + dedent` + Lists files and directories from a repository path. This can be used as a repo tree tool or directory listing tool. + Returns a flat list of entries with path metadata and depth relative to the requested path. + `, + listTreeRequestSchema.shape, + async ({ + repo, + path = '', + ref = 'HEAD', + depth = DEFAULT_TREE_DEPTH, + includeFiles = true, + includeDirectories = true, + maxEntries = DEFAULT_MAX_TREE_ENTRIES, + }: ListTreeRequest) => { + const normalizedPath = normalizeTreePath(path); + const normalizedDepth = Math.min(depth, MAX_TREE_DEPTH); + const normalizedMaxEntries = Math.min(maxEntries, MAX_MAX_TREE_ENTRIES); + + if (!includeFiles && !includeDirectories) { + return { + content: [{ + type: "text", + text: JSON.stringify({ + repo, + ref, + path: normalizedPath, + entries: [] as ListTreeEntry[], + totalReturned: 0, + truncated: false, + }), + }], + }; + } + + // BFS frontier of directories still to expand. Each item stores a repo-relative + // directory path plus the current depth from the requested root `path`. + const queue: Array<{ path: string; depth: number }> = [{ path: normalizedPath, depth: 0 }]; + + // Tracks directory paths that have already been enqueued. + // With the current single-root traversal duplicates are uncommon, but this + // prevents duplicate expansion if we later support overlapping multi-root + // inputs (e.g. ["src", "src/lib"]) or receive overlapping tree data. + const queuedPaths = new Set([normalizedPath]); + + const seenEntries = new Set(); + const entries: ListTreeEntry[] = []; + let truncated = false; + + // Traverse breadth-first by depth, batching all directories at the same + // depth into a single /api/tree request per iteration. + while (queue.length > 0 && !truncated) { + const currentDepth = queue[0]!.depth; + const currentLevelPaths: string[] = []; + + // Drain only the current depth level so we can issue one API call + // for all sibling directories before moving deeper. + while (queue.length > 0 && queue[0]!.depth === currentDepth) { + const next = queue.shift()!; + currentLevelPaths.push(next.path); + } + + // Ask Sourcebot for a tree spanning all requested paths at this level. + const treeResponse = await listTree({ + repoName: repo, + revisionName: ref, + paths: currentLevelPaths.filter(Boolean), + }); + const treeNodeIndex = buildTreeNodeIndex(treeResponse.tree); + + for (const currentPath of currentLevelPaths) { + const currentNode = currentPath === '' ? treeResponse.tree : treeNodeIndex.get(currentPath); + if (!currentNode || currentNode.type !== 'tree') { + // Skip paths that are missing from the response or resolve to a + // file node. We only iterate children of directories. + continue; + } + + for (const child of currentNode.children) { + if (child.type !== 'tree' && child.type !== 'blob') { + // Skip non-standard git object types (e.g. unexpected entries) + // since this tool only exposes directories and files. + continue; + } + + const childPath = joinTreePath(currentPath, child.name); + const childDepth = currentDepth + 1; + + // Queue child directories for the next depth level only if + // they are within the requested depth bound. + if (child.type === 'tree' && childDepth < normalizedDepth && !queuedPaths.has(childPath)) { + queue.push({ path: childPath, depth: childDepth }); + queuedPaths.add(childPath); + } + + if ((child.type === 'blob' && !includeFiles) || (child.type === 'tree' && !includeDirectories)) { + // Skip entries filtered out by caller preferences + // (`includeFiles` / `includeDirectories`). + continue; + } + + const key = `${child.type}:${childPath}`; + if (seenEntries.has(key)) { + // Skip duplicates when multiple requested paths overlap and + // surface the same child entry. + continue; + } + seenEntries.add(key); + + // Stop collecting once the entry budget is exhausted. + if (entries.length >= normalizedMaxEntries) { + truncated = true; + break; + } + + entries.push({ + type: child.type, + path: childPath, + name: child.name, + parentPath: currentPath, + depth: childDepth, + }); + } + + if (truncated) { + break; + } + } + } + + const sortedEntries = sortTreeEntries(entries); + + return { + content: [{ + type: "text", + text: JSON.stringify({ + repo, + ref, + path: normalizedPath, + entries: sortedEntries, + totalReturned: sortedEntries.length, + truncated, + }), + }] + }; + } +); + server.tool( "list_language_models", dedent`Lists the available language models configured on the Sourcebot instance. Use this to discover which models can be specified when calling ask_codebase.`, diff --git a/packages/mcp/src/schemas.ts b/packages/mcp/src/schemas.ts index 41a9fd456..fe6e70087 100644 --- a/packages/mcp/src/schemas.ts +++ b/packages/mcp/src/schemas.ts @@ -216,6 +216,94 @@ export const fileSourceResponseSchema = z.object({ externalWebUrl: z.string().optional(), }); +type TreeNode = { + type: string; + path: string; + name: string; + children: TreeNode[]; +}; + +const treeNodeSchema: z.ZodType = z.lazy(() => z.object({ + type: z.string(), + path: z.string(), + name: z.string(), + children: z.array(treeNodeSchema), +})); + +export const listTreeApiRequestSchema = z.object({ + repoName: z.string(), + revisionName: z.string(), + paths: z.array(z.string()), +}); + +export const listTreeApiResponseSchema = z.object({ + tree: treeNodeSchema, +}); + +export const DEFAULT_TREE_DEPTH = 1; +export const MAX_TREE_DEPTH = 10; +export const DEFAULT_MAX_TREE_ENTRIES = 1000; +export const MAX_MAX_TREE_ENTRIES = 10000; + +export const listTreeRequestSchema = z.object({ + repo: z + .string() + .describe("The name of the repository to list files from."), + path: z + .string() + .describe("Directory path (relative to repo root). If omitted, the repo root is used.") + .optional() + .default(''), + ref: z + .string() + .describe("Commit SHA, branch or tag name to list files from. If not provided, uses the default branch.") + .optional() + .default('HEAD'), + depth: z + .number() + .int() + .positive() + .max(MAX_TREE_DEPTH) + .describe(`How many directory levels to traverse below \`path\` (min 1, max ${MAX_TREE_DEPTH}, default ${DEFAULT_TREE_DEPTH}).`) + .optional() + .default(DEFAULT_TREE_DEPTH), + includeFiles: z + .boolean() + .describe("Whether to include files in the output (default: true).") + .optional() + .default(true), + includeDirectories: z + .boolean() + .describe("Whether to include directories in the output (default: true).") + .optional() + .default(true), + maxEntries: z + .number() + .int() + .positive() + .max(MAX_MAX_TREE_ENTRIES) + .describe(`Maximum number of entries to return (min 1, max ${MAX_MAX_TREE_ENTRIES}, default ${DEFAULT_MAX_TREE_ENTRIES}).`) + .optional() + .default(DEFAULT_MAX_TREE_ENTRIES), +}); + +export const listTreeEntrySchema = z.object({ + type: z.enum(['tree', 'blob']), + path: z.string(), + name: z.string(), + parentPath: z.string(), + depth: z.number().int().positive(), +}); + +export const listTreeResponseSchema = z.object({ + repo: z.string(), + ref: z.string(), + path: z.string(), + entries: z.array(listTreeEntrySchema), + totalReturned: z.number().int().nonnegative(), + truncated: z.boolean(), +}); + export const serviceErrorSchema = z.object({ statusCode: z.number(), errorCode: z.string(), diff --git a/packages/mcp/src/types.ts b/packages/mcp/src/types.ts index 55266ba1e..63c050856 100644 --- a/packages/mcp/src/types.ts +++ b/packages/mcp/src/types.ts @@ -16,6 +16,11 @@ import { askCodebaseResponseSchema, languageModelInfoSchema, listLanguageModelsResponseSchema, + listTreeApiRequestSchema, + listTreeApiResponseSchema, + listTreeRequestSchema, + listTreeEntrySchema, + listTreeResponseSchema, } from "./schemas.js"; import { z } from "zod"; @@ -44,3 +49,11 @@ export type AskCodebaseResponse = z.infer; export type LanguageModelInfo = z.infer; export type ListLanguageModelsResponse = z.infer; + +export type ListTreeApiRequest = z.infer; +export type ListTreeApiResponse = z.infer; +export type ListTreeApiNode = ListTreeApiResponse["tree"]; + +export type ListTreeRequest = z.input; +export type ListTreeEntry = z.infer; +export type ListTreeResponse = z.infer; diff --git a/packages/mcp/src/utils.ts b/packages/mcp/src/utils.ts index 56e02f3e1..fc1ee56c3 100644 --- a/packages/mcp/src/utils.ts +++ b/packages/mcp/src/utils.ts @@ -1,4 +1,4 @@ -import { ServiceError } from "./types.js"; +import { ListTreeApiNode, ListTreeEntry, ServiceError } from "./types.js"; export const isServiceError = (data: unknown): data is ServiceError => { @@ -14,3 +14,55 @@ export class ServiceErrorException extends Error { super(JSON.stringify(serviceError)); } } + +export const normalizeTreePath = (path: string): string => { + const withoutLeading = path.replace(/^\/+/, ''); + return withoutLeading.replace(/\/+$/, ''); +} + +export const joinTreePath = (parentPath: string, name: string): string => { + if (!parentPath) { + return name; + } + + return `${parentPath}/${name}`; +} + +export const buildTreeNodeIndex = (root: ListTreeApiNode): Map => { + const nodeIndex = new Map(); + + const visit = (node: ListTreeApiNode, currentPath: string) => { + nodeIndex.set(currentPath, node); + + for (const child of node.children) { + visit(child, joinTreePath(currentPath, child.name)); + } + }; + + visit(root, ''); + + return nodeIndex; +} + +export const sortTreeEntries = (entries: ListTreeEntry[]): ListTreeEntry[] => { + const collator = new Intl.Collator(undefined, { sensitivity: 'base' }); + + return [...entries].sort((a, b) => { + const parentCompare = collator.compare(a.parentPath, b.parentPath); + if (parentCompare !== 0) { + return parentCompare; + } + + if (a.type !== b.type) { + // sort directories above files + return a.type === 'tree' ? -1 : 1; + } + + const nameCompare = collator.compare(a.name, b.name); + if (nameCompare !== 0) { + return nameCompare; + } + + return collator.compare(a.path, b.path); + }); +} diff --git a/packages/web/src/features/git/getTreeApi.ts b/packages/web/src/features/git/getTreeApi.ts index cd43a1659..a4af9acb7 100644 --- a/packages/web/src/features/git/getTreeApi.ts +++ b/packages/web/src/features/git/getTreeApi.ts @@ -34,7 +34,7 @@ export const getTree = async ({ repoName, revisionName, paths }: GetTreeRequest) }); if (!repo) { - return notFound(); + return notFound(`Repository "${repoName}" not found.`); } const { path: repoPath } = getRepoPath(repo);