Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
15 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions packages/cli/src/ui/hooks/atCommandProcessor.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1132,5 +1132,30 @@ describe('handleAtCommand', () => {
expect(result.toolDisplays!.length).toBeGreaterThanOrEqual(1);
expect(result.toolDisplays![0].description).toContain('file.txt');
});

it('should mark per-file failures as Error status, not Success', async () => {
// Trigger the >10MB size error in processSingleFileContent so the
// readManyFiles result carries a per-file `error` field.
const filePath = path.join(testRootDir, 'oversized.bin');
await fsPromises.mkdir(path.dirname(filePath), { recursive: true });
await fsPromises.writeFile(filePath, Buffer.alloc(10 * 1024 * 1024 + 1));
const query = `@${filePath}`;

const result = await handleAtCommand({
query,
config: mockConfig,
onDebugMessage: mockOnDebugMessage,
messageId: 504,
signal: abortController.signal,
});

expect(result.toolDisplays).toBeDefined();
expect(result.toolDisplays).toHaveLength(1);
expect(result.toolDisplays![0].status).toBe(ToolCallStatus.Error);
expect(result.toolDisplays![0].resultDisplay).toContain(
'Failed to read oversized.bin',
);
expect(result.toolDisplays![0].resultDisplay).toContain('10MB');
});
});
});
6 changes: 4 additions & 2 deletions packages/cli/src/ui/hooks/atCommandProcessor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -364,8 +364,10 @@ export async function handleAtCommand({
description: file.isDirectory
? `Read directory ${path.basename(file.filePath)}`
: `Read file ${path.basename(file.filePath)}`,
status: ToolCallStatus.Success,
resultDisplay: undefined,
status: file.error ? ToolCallStatus.Error : ToolCallStatus.Success,
resultDisplay: file.error
? `Failed to read ${path.basename(file.filePath)}: ${file.error}`
: undefined,
confirmationDetails: undefined,
}),
);
Expand Down
63 changes: 63 additions & 0 deletions packages/core/src/tools/read-file.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,69 @@ describe('ReadFileTool', () => {
expect(result.returnDisplay).toBe('');
});

it('should handle Jupyter notebook file', async () => {
const nbPath = path.join(tempRootDir, 'test.ipynb');
const notebook = {
cells: [
{
cell_type: 'code',
source: ['print("hello")'],
execution_count: 1,
outputs: [{ output_type: 'stream', text: ['hello\n'] }],
metadata: {},
},
],
metadata: { language_info: { name: 'python' } },
};
await fsp.writeFile(nbPath, JSON.stringify(notebook), 'utf-8');
const params: ReadFileToolParams = { file_path: nbPath };
const invocation = tool.build(params) as ToolInvocation<
ReadFileToolParams,
ToolResult
>;

const result = await invocation.execute(abortSignal);
expect(typeof result.llmContent).toBe('string');
expect(result.llmContent).toContain('Jupyter Notebook');
expect(result.llmContent).toContain('print("hello")');
expect(result.llmContent).toContain('hello');
expect(result.returnDisplay).toBe('Read notebook: test.ipynb');
});

it('should reject invalid pages parameter', () => {
const params: ReadFileToolParams = {
file_path: '/tmp/test.pdf',
pages: 'abc',
};
expect(() => tool.build(params)).toThrow('Invalid pages parameter');
});

it('should reject pages range exceeding 20', () => {
const params: ReadFileToolParams = {
file_path: '/tmp/test.pdf',
pages: '1-25',
};
expect(() => tool.build(params)).toThrow(
'Pages range exceeds maximum of 20',
);
});

it('should reject open-ended pages range', () => {
const params: ReadFileToolParams = {
file_path: '/tmp/test.pdf',
pages: '3-',
};
expect(() => tool.build(params)).toThrow('Open-ended page ranges');
});

it('should accept valid pages parameter', () => {
const params: ReadFileToolParams = {
file_path: path.join(tempRootDir, 'test.pdf'),
pages: '1-5',
};
expect(() => tool.build(params)).not.toThrow();
});

it('should support offset and limit for text files', async () => {
const filePath = path.join(tempRootDir, 'paginated.txt');
const lines = Array.from({ length: 20 }, (_, i) => `Line ${i + 1}`);
Expand Down
34 changes: 33 additions & 1 deletion packages/core/src/tools/read-file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import {
processSingleFileContent,
getSpecificMimeType,
} from '../utils/fileUtils.js';
import { parsePDFPageRange } from '../utils/pdf.js';
import type { Config } from '../config/config.js';
import { FileOperation } from '../telemetry/metrics.js';
import { getProgrammingLanguage } from '../telemetry/telemetry-utils.js';
Expand All @@ -43,6 +44,13 @@ export interface ReadFileToolParams {
* The number of lines to read (optional)
*/
limit?: number;

/**
* For PDF files, the page range to extract as text (e.g. "1-5", "3", "10-20").
* Pages are 1-indexed. Max 20 pages per request. Open-ended ranges like "3-"
* are not supported.
*/
pages?: string;
}

class ReadFileToolInvocation extends BaseToolInvocation<
Expand All @@ -63,6 +71,10 @@ class ReadFileToolInvocation extends BaseToolInvocation<
);
const shortPath = shortenPath(relativePath);

if (this.params.pages) {
return `${shortPath} (pages ${this.params.pages})`;
}

const { offset, limit } = this.params;
if (offset !== undefined && limit !== undefined) {
return `${shortPath} (lines ${offset + 1}-${offset + limit})`;
Expand Down Expand Up @@ -111,6 +123,7 @@ class ReadFileToolInvocation extends BaseToolInvocation<
this.config,
this.params.offset,
this.params.limit,
this.params.pages,
);

if (result.error) {
Expand Down Expand Up @@ -173,7 +186,7 @@ export class ReadFileTool extends BaseDeclarativeTool<
super(
ReadFileTool.Name,
ToolDisplayNames.READ_FILE,
`Reads and returns the content of a specified file. If the file is large, the content will be truncated. The tool's response will clearly indicate if truncation has occurred and will provide details on how to read more of the file using the 'offset' and 'limit' parameters. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), and PDF files. For text files, it can read specific line ranges.`,
`Reads and returns the content of a specified file. If the file is large, the content will be truncated. The tool's response will clearly indicate if truncation has occurred and will provide details on how to read more of the file using the 'offset' and 'limit' parameters. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), PDF files, and Jupyter notebooks (.ipynb). For text files, it can read specific line ranges. For PDF files, use the 'pages' parameter to extract specific page ranges as text (e.g. '1-5'). Max 20 pages per request. This tool can read Jupyter notebooks (.ipynb) and returns structured cell content with outputs.`,
Kind.Read,
{
properties: {
Expand All @@ -192,6 +205,11 @@ export class ReadFileTool extends BaseDeclarativeTool<
"Optional: For text files, maximum number of lines to read. Use with 'offset' to paginate through large files. If omitted, reads the entire file (if feasible, up to a default limit).",
type: 'number',
},
pages: {
description:
"Optional: For PDF files, the page range to extract as text (e.g., '1-5', '3', '10-20'). Pages are 1-indexed. Max 20 pages per request. Open-ended ranges like '3-' are not supported. When provided, PDF content is extracted as text regardless of model capabilities.",
type: 'string',
},
},
required: ['file_path'],
type: 'object',
Expand All @@ -218,6 +236,20 @@ export class ReadFileTool extends BaseDeclarativeTool<
return 'Limit must be a positive number';
}

if (params.pages !== undefined) {
const parsed = parsePDFPageRange(params.pages);
Comment thread
wenshao marked this conversation as resolved.
if (!parsed) {
return `Invalid pages parameter: '${params.pages}'. Use formats like '5' or '1-10'.`;
}
if (parsed.lastPage === Infinity) {
return `Open-ended page ranges (e.g. '3-') are not supported; specify an explicit end page within the 20-page limit (e.g. '3-22').`;
}
const maxPages = 20;
if (parsed.lastPage - parsed.firstPage + 1 > maxPages) {
return `Pages range exceeds maximum of ${maxPages} pages per request.`;
}
}

const fileService = this.config.getFileService();
if (fileService.shouldQwenIgnoreFile(params.file_path)) {
return `File path '${filePath}' is ignored by .qwenignore pattern(s).`;
Expand Down
155 changes: 148 additions & 7 deletions packages/core/src/utils/fileUtils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -802,6 +802,10 @@ describe('fileUtils', () => {
expect(await detectFileType(filePathForDetectTest)).toBe('binary');
});

it('should detect .ipynb as notebook', async () => {
expect(await detectFileType('analysis.ipynb')).toBe('notebook');
});

it('should default to text if mime type is unknown and content is not binary', async () => {
mockMimeGetType.mockReturnValueOnce(false); // Unknown mime type
// filePathForDetectTest is already a text file by default from beforeEach
Expand Down Expand Up @@ -915,7 +919,7 @@ describe('fileUtils', () => {
expect(result.returnDisplay).toContain('Skipped image file');
});

it('should reject PDF files when model does not support PDF', async () => {
it('should fall back to pdftotext when model does not support PDF', async () => {
const fakePdfData = Buffer.from('fake pdf data');
actualNodeFs.writeFileSync(testPdfFilePath, fakePdfData);
mockMimeGetType.mockReturnValue('application/pdf');
Expand All @@ -932,12 +936,84 @@ describe('fileUtils', () => {
mockConfigNoPdf,
);
expect(typeof result.llmContent).toBe('string');
expect(result.llmContent).toContain('Unsupported pdf file');
expect(result.llmContent).toContain(
'does not support PDF input directly',
);
expect(result.llmContent).toContain('/extensions install');
expect(result.returnDisplay).toContain('Skipped pdf file');
// When pdftotext is not installed, should return a helpful error
// rather than silently skipping
expect(result.llmContent).toContain('Cannot extract text from PDF');
expect(result.returnDisplay).toContain('Failed to read pdf');
});

it('should skip the 10MB size gate when extracting PDF text by pages', async () => {
// Tiny file on disk — the fs.stat spy below reports a size >10MB so
// the upstream size gate would reject if it still ran. With the
// text-extraction path we want pdftotext to handle oversized PDFs,
// since it streams the file and the output is capped downstream.
const fakePdfData = Buffer.from('fake pdf data');
actualNodeFs.writeFileSync(testPdfFilePath, fakePdfData);
mockMimeGetType.mockReturnValue('application/pdf');

const statSpy = vi.spyOn(fs.promises, 'stat').mockResolvedValueOnce({
size: 15 * 1024 * 1024,
isDirectory: () => false,
isFile: () => true,
} as fs.Stats);

try {
const mockConfigNoPdf = {
...mockConfig,
getContentGeneratorConfig: () => ({
modalities: { image: true },
}),
} as unknown as Config;

const result = await processSingleFileContent(
testPdfFilePath,
mockConfigNoPdf,
undefined,
undefined,
'1-5',
);

// Must not be rejected by the generic 10MB gate.
expect(result.error ?? '').not.toContain('10MB limit');
expect(result.llmContent).not.toMatch(/exceeds the 10MB limit/i);
// Routed into the pdftotext path — either success or the
// install-guidance error, never "File size exceeds the 10MB limit".
expect(result.returnDisplay ?? '').toMatch(/pdf/i);
} finally {
statSpy.mockRestore();
}
});

it('should still reject oversized PDFs when routing to the native base64 path', async () => {
// When the model supports PDF modality and no pages arg is provided,
// the base64 path applies and the 10MB inline-data cap still matters.
const fakePdfData = Buffer.from('fake pdf data');
actualNodeFs.writeFileSync(testPdfFilePath, fakePdfData);
mockMimeGetType.mockReturnValue('application/pdf');

const statSpy = vi.spyOn(fs.promises, 'stat').mockResolvedValueOnce({
size: 15 * 1024 * 1024,
isDirectory: () => false,
isFile: () => true,
} as fs.Stats);

try {
const mockConfigWithPdf = {
...mockConfig,
getContentGeneratorConfig: () => ({
modalities: { image: true, pdf: true },
}),
} as unknown as Config;

const result = await processSingleFileContent(
testPdfFilePath,
mockConfigWithPdf,
);

expect(result.error).toContain('10MB limit');
} finally {
statSpy.mockRestore();
}
});

it('should accept PDF files when model supports PDF', async () => {
Expand Down Expand Up @@ -1157,6 +1233,7 @@ describe('fileUtils', () => {
const statSpy = vi.spyOn(fs.promises, 'stat').mockResolvedValueOnce({
size: 11 * 1024 * 1024,
isDirectory: () => false,
isFile: () => true,
} as fs.Stats);

try {
Expand All @@ -1174,5 +1251,69 @@ describe('fileUtils', () => {
statSpy.mockRestore();
}
});

it('should reject PDFs that exceed the text-extraction size cap (100MB)', async () => {
const fakePdfData = Buffer.from('fake pdf data');
actualNodeFs.writeFileSync(testPdfFilePath, fakePdfData);
mockMimeGetType.mockReturnValue('application/pdf');

// 200MB PDF — text-extraction path skips the 10MB gate but still
// needs a sane ceiling so pdftotext can't be asked to stream GBs
// until the 30s timeout fires.
const statSpy = vi.spyOn(fs.promises, 'stat').mockResolvedValueOnce({
size: 200 * 1024 * 1024,
isDirectory: () => false,
isFile: () => true,
} as fs.Stats);

try {
const mockConfigNoPdf = {
...mockConfig,
getContentGeneratorConfig: () => ({
modalities: { image: true },
}),
} as unknown as Config;

const result = await processSingleFileContent(
testPdfFilePath,
mockConfigNoPdf,
undefined,
undefined,
'1-5',
);

expect(result.error).toMatch(/exceeds extraction size limit/i);
expect(result.returnDisplay).toMatch(/PDF file too large/i);
expect(result.errorType).toBeDefined();
} finally {
statSpy.mockRestore();
}
});

it('should reject non-regular files (FIFOs, devices, sockets)', async () => {
actualNodeFs.writeFileSync(testTextFilePath, 'placeholder');

// A FIFO / socket / /dev/zero shows up as a non-file, non-directory
// stat entry. stats.size is typically 0 or meaningless, so without
// this guard a caller could accidentally stream /dev/zero through
// pdftotext until the timeout fires.
const statSpy = vi.spyOn(fs.promises, 'stat').mockResolvedValueOnce({
size: 0,
isDirectory: () => false,
isFile: () => false,
} as fs.Stats);

try {
const result = await processSingleFileContent(
testTextFilePath,
mockConfig,
);

expect(result.error).toMatch(/not a regular file/i);
expect(result.returnDisplay).toMatch(/not a regular file/i);
} finally {
statSpy.mockRestore();
}
});
});
});
Loading
Loading