QwenLM · wenshao · Apr 20, 2026 · Apr 12, 2026 · Apr 12, 2026 · Apr 12, 2026
diff --git a/packages/cli/src/ui/hooks/atCommandProcessor.test.ts b/packages/cli/src/ui/hooks/atCommandProcessor.test.ts
@@ -1132,5 +1132,30 @@ describe('handleAtCommand', () => {
       expect(result.toolDisplays!.length).toBeGreaterThanOrEqual(1);
       expect(result.toolDisplays![0].description).toContain('file.txt');
     });
+
+    it('should mark per-file failures as Error status, not Success', async () => {
+      // Trigger the >10MB size error in processSingleFileContent so the
+      // readManyFiles result carries a per-file `error` field.
+      const filePath = path.join(testRootDir, 'oversized.bin');
+      await fsPromises.mkdir(path.dirname(filePath), { recursive: true });
+      await fsPromises.writeFile(filePath, Buffer.alloc(10 * 1024 * 1024 + 1));
+      const query = `@${filePath}`;
+
+      const result = await handleAtCommand({
+        query,
+        config: mockConfig,
+        onDebugMessage: mockOnDebugMessage,
+        messageId: 504,
+        signal: abortController.signal,
+      });
+
+      expect(result.toolDisplays).toBeDefined();
+      expect(result.toolDisplays).toHaveLength(1);
+      expect(result.toolDisplays![0].status).toBe(ToolCallStatus.Error);
+      expect(result.toolDisplays![0].resultDisplay).toContain(
+        'Failed to read oversized.bin',
+      );
+      expect(result.toolDisplays![0].resultDisplay).toContain('10MB');
+    });
   });
 });
diff --git a/packages/cli/src/ui/hooks/atCommandProcessor.ts b/packages/cli/src/ui/hooks/atCommandProcessor.ts
@@ -364,8 +364,10 @@ export async function handleAtCommand({
         description: file.isDirectory
           ? `Read directory ${path.basename(file.filePath)}`
           : `Read file ${path.basename(file.filePath)}`,
-        status: ToolCallStatus.Success,
-        resultDisplay: undefined,
+        status: file.error ? ToolCallStatus.Error : ToolCallStatus.Success,
+        resultDisplay: file.error
+          ? `Failed to read ${path.basename(file.filePath)}: ${file.error}`
+          : undefined,
         confirmationDetails: undefined,
       }),
     );

diff --git a/packages/core/src/tools/read-file.test.ts b/packages/core/src/tools/read-file.test.ts
@@ -417,6 +417,69 @@ describe('ReadFileTool', () => {
       expect(result.returnDisplay).toBe('');
     });
 
+    it('should handle Jupyter notebook file', async () => {
+      const nbPath = path.join(tempRootDir, 'test.ipynb');
+      const notebook = {
+        cells: [
+          {
+            cell_type: 'code',
+            source: ['print("hello")'],
+            execution_count: 1,
+            outputs: [{ output_type: 'stream', text: ['hello\n'] }],
+            metadata: {},
+          },
+        ],
+        metadata: { language_info: { name: 'python' } },
+      };
+      await fsp.writeFile(nbPath, JSON.stringify(notebook), 'utf-8');
+      const params: ReadFileToolParams = { file_path: nbPath };
+      const invocation = tool.build(params) as ToolInvocation<
+        ReadFileToolParams,
+        ToolResult
+      >;
+
+      const result = await invocation.execute(abortSignal);
+      expect(typeof result.llmContent).toBe('string');
+      expect(result.llmContent).toContain('Jupyter Notebook');
+      expect(result.llmContent).toContain('print("hello")');
+      expect(result.llmContent).toContain('hello');
+      expect(result.returnDisplay).toBe('Read notebook: test.ipynb');
+    });
+
+    it('should reject invalid pages parameter', () => {
+      const params: ReadFileToolParams = {
+        file_path: '/tmp/test.pdf',
+        pages: 'abc',
+      };
+      expect(() => tool.build(params)).toThrow('Invalid pages parameter');
+    });
+
+    it('should reject pages range exceeding 20', () => {
+      const params: ReadFileToolParams = {
+        file_path: '/tmp/test.pdf',
+        pages: '1-25',
+      };
+      expect(() => tool.build(params)).toThrow(
+        'Pages range exceeds maximum of 20',
+      );
+    });
+
+    it('should reject open-ended pages range', () => {
+      const params: ReadFileToolParams = {
+        file_path: '/tmp/test.pdf',
+        pages: '3-',
+      };
+      expect(() => tool.build(params)).toThrow('Open-ended page ranges');
+    });
+
+    it('should accept valid pages parameter', () => {
+      const params: ReadFileToolParams = {
+        file_path: path.join(tempRootDir, 'test.pdf'),
+        pages: '1-5',
+      };
+      expect(() => tool.build(params)).not.toThrow();
+    });
+
     it('should support offset and limit for text files', async () => {
       const filePath = path.join(tempRootDir, 'paginated.txt');
       const lines = Array.from({ length: 20 }, (_, i) => `Line ${i + 1}`);

diff --git a/packages/core/src/tools/read-file.ts b/packages/core/src/tools/read-file.ts
@@ -17,6 +17,7 @@ import {
   processSingleFileContent,
   getSpecificMimeType,
 } from '../utils/fileUtils.js';
+import { parsePDFPageRange } from '../utils/pdf.js';
 import type { Config } from '../config/config.js';
 import { FileOperation } from '../telemetry/metrics.js';
 import { getProgrammingLanguage } from '../telemetry/telemetry-utils.js';
@@ -43,6 +44,13 @@ export interface ReadFileToolParams {
    * The number of lines to read (optional)
    */
   limit?: number;
+
+  /**
+   * For PDF files, the page range to extract as text (e.g. "1-5", "3", "10-20").
+   * Pages are 1-indexed. Max 20 pages per request. Open-ended ranges like "3-"
+   * are not supported.
+   */
+  pages?: string;
 }
 
 class ReadFileToolInvocation extends BaseToolInvocation<
@@ -63,6 +71,10 @@ class ReadFileToolInvocation extends BaseToolInvocation<
     );
     const shortPath = shortenPath(relativePath);
 
+    if (this.params.pages) {
+      return `${shortPath} (pages ${this.params.pages})`;
+    }
+
     const { offset, limit } = this.params;
     if (offset !== undefined && limit !== undefined) {
       return `${shortPath} (lines ${offset + 1}-${offset + limit})`;
@@ -111,6 +123,7 @@ class ReadFileToolInvocation extends BaseToolInvocation<
       this.config,
       this.params.offset,
       this.params.limit,
+      this.params.pages,
     );
 
     if (result.error) {
@@ -173,7 +186,7 @@ export class ReadFileTool extends BaseDeclarativeTool<
     super(
       ReadFileTool.Name,
       ToolDisplayNames.READ_FILE,
-      `Reads and returns the content of a specified file. If the file is large, the content will be truncated. The tool's response will clearly indicate if truncation has occurred and will provide details on how to read more of the file using the 'offset' and 'limit' parameters. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), and PDF files. For text files, it can read specific line ranges.`,
+      `Reads and returns the content of a specified file. If the file is large, the content will be truncated. The tool's response will clearly indicate if truncation has occurred and will provide details on how to read more of the file using the 'offset' and 'limit' parameters. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), PDF files, and Jupyter notebooks (.ipynb). For text files, it can read specific line ranges. For PDF files, use the 'pages' parameter to extract specific page ranges as text (e.g. '1-5'). Max 20 pages per request. This tool can read Jupyter notebooks (.ipynb) and returns structured cell content with outputs.`,
       Kind.Read,
       {
         properties: {
@@ -192,6 +205,11 @@ export class ReadFileTool extends BaseDeclarativeTool<
               "Optional: For text files, maximum number of lines to read. Use with 'offset' to paginate through large files. If omitted, reads the entire file (if feasible, up to a default limit).",
             type: 'number',
           },
+          pages: {
+            description:
+              "Optional: For PDF files, the page range to extract as text (e.g., '1-5', '3', '10-20'). Pages are 1-indexed. Max 20 pages per request. Open-ended ranges like '3-' are not supported. When provided, PDF content is extracted as text regardless of model capabilities.",
+            type: 'string',
+          },
         },
         required: ['file_path'],
         type: 'object',
@@ -218,6 +236,20 @@ export class ReadFileTool extends BaseDeclarativeTool<
       return 'Limit must be a positive number';
     }
 
+    if (params.pages !== undefined) {
+      const parsed = parsePDFPageRange(params.pages);
+      if (!parsed) {
+        return `Invalid pages parameter: '${params.pages}'. Use formats like '5' or '1-10'.`;
+      }
+      if (parsed.lastPage === Infinity) {
+        return `Open-ended page ranges (e.g. '3-') are not supported; specify an explicit end page within the 20-page limit (e.g. '3-22').`;
+      }
+      const maxPages = 20;
+      if (parsed.lastPage - parsed.firstPage + 1 > maxPages) {
+        return `Pages range exceeds maximum of ${maxPages} pages per request.`;
+      }
+    }
+
     const fileService = this.config.getFileService();
     if (fileService.shouldQwenIgnoreFile(params.file_path)) {
       return `File path '${filePath}' is ignored by .qwenignore pattern(s).`;

diff --git a/packages/core/src/utils/fileUtils.test.ts b/packages/core/src/utils/fileUtils.test.ts
@@ -802,6 +802,10 @@ describe('fileUtils', () => {
       expect(await detectFileType(filePathForDetectTest)).toBe('binary');
     });
 
+    it('should detect .ipynb as notebook', async () => {
+      expect(await detectFileType('analysis.ipynb')).toBe('notebook');
+    });
+
     it('should default to text if mime type is unknown and content is not binary', async () => {
       mockMimeGetType.mockReturnValueOnce(false); // Unknown mime type
       // filePathForDetectTest is already a text file by default from beforeEach
@@ -915,7 +919,7 @@ describe('fileUtils', () => {
       expect(result.returnDisplay).toContain('Skipped image file');
     });
 
-    it('should reject PDF files when model does not support PDF', async () => {
+    it('should fall back to pdftotext when model does not support PDF', async () => {
       const fakePdfData = Buffer.from('fake pdf data');
       actualNodeFs.writeFileSync(testPdfFilePath, fakePdfData);
       mockMimeGetType.mockReturnValue('application/pdf');
@@ -932,12 +936,84 @@ describe('fileUtils', () => {
         mockConfigNoPdf,
       );
       expect(typeof result.llmContent).toBe('string');
-      expect(result.llmContent).toContain('Unsupported pdf file');
-      expect(result.llmContent).toContain(
-        'does not support PDF input directly',
-      );
-      expect(result.llmContent).toContain('/extensions install');
-      expect(result.returnDisplay).toContain('Skipped pdf file');
+      // When pdftotext is not installed, should return a helpful error
+      // rather than silently skipping
+      expect(result.llmContent).toContain('Cannot extract text from PDF');
+      expect(result.returnDisplay).toContain('Failed to read pdf');
+    });
+
+    it('should skip the 10MB size gate when extracting PDF text by pages', async () => {
+      // Tiny file on disk — the fs.stat spy below reports a size >10MB so
+      // the upstream size gate would reject if it still ran. With the
+      // text-extraction path we want pdftotext to handle oversized PDFs,
+      // since it streams the file and the output is capped downstream.
+      const fakePdfData = Buffer.from('fake pdf data');
+      actualNodeFs.writeFileSync(testPdfFilePath, fakePdfData);
+      mockMimeGetType.mockReturnValue('application/pdf');
+
+      const statSpy = vi.spyOn(fs.promises, 'stat').mockResolvedValueOnce({
+        size: 15 * 1024 * 1024,
+        isDirectory: () => false,
+        isFile: () => true,
+      } as fs.Stats);
+
+      try {
+        const mockConfigNoPdf = {
+          ...mockConfig,
+          getContentGeneratorConfig: () => ({
+            modalities: { image: true },
+          }),
+        } as unknown as Config;
+
+        const result = await processSingleFileContent(
+          testPdfFilePath,
+          mockConfigNoPdf,
+          undefined,
+          undefined,
+          '1-5',
+        );
+
+        // Must not be rejected by the generic 10MB gate.
+        expect(result.error ?? '').not.toContain('10MB limit');
+        expect(result.llmContent).not.toMatch(/exceeds the 10MB limit/i);
+        // Routed into the pdftotext path — either success or the
+        // install-guidance error, never "File size exceeds the 10MB limit".
+        expect(result.returnDisplay ?? '').toMatch(/pdf/i);
+      } finally {
+        statSpy.mockRestore();
+      }
+    });
+
+    it('should still reject oversized PDFs when routing to the native base64 path', async () => {
+      // When the model supports PDF modality and no pages arg is provided,
+      // the base64 path applies and the 10MB inline-data cap still matters.
+      const fakePdfData = Buffer.from('fake pdf data');
+      actualNodeFs.writeFileSync(testPdfFilePath, fakePdfData);
+      mockMimeGetType.mockReturnValue('application/pdf');
+
+      const statSpy = vi.spyOn(fs.promises, 'stat').mockResolvedValueOnce({
+        size: 15 * 1024 * 1024,
+        isDirectory: () => false,
+        isFile: () => true,
+      } as fs.Stats);
+
+      try {
+        const mockConfigWithPdf = {
+          ...mockConfig,
+          getContentGeneratorConfig: () => ({
+            modalities: { image: true, pdf: true },
+          }),
+        } as unknown as Config;
+
+        const result = await processSingleFileContent(
+          testPdfFilePath,
+          mockConfigWithPdf,
+        );
+
+        expect(result.error).toContain('10MB limit');
+      } finally {
+        statSpy.mockRestore();
+      }
     });
 
     it('should accept PDF files when model supports PDF', async () => {
@@ -1157,6 +1233,7 @@ describe('fileUtils', () => {
       const statSpy = vi.spyOn(fs.promises, 'stat').mockResolvedValueOnce({
         size: 11 * 1024 * 1024,
         isDirectory: () => false,
+        isFile: () => true,
       } as fs.Stats);
 
       try {
@@ -1174,5 +1251,69 @@ describe('fileUtils', () => {
         statSpy.mockRestore();
       }
     });
+
+    it('should reject PDFs that exceed the text-extraction size cap (100MB)', async () => {
+      const fakePdfData = Buffer.from('fake pdf data');
+      actualNodeFs.writeFileSync(testPdfFilePath, fakePdfData);
+      mockMimeGetType.mockReturnValue('application/pdf');
+
+      // 200MB PDF — text-extraction path skips the 10MB gate but still
+      // needs a sane ceiling so pdftotext can't be asked to stream GBs
+      // until the 30s timeout fires.
+      const statSpy = vi.spyOn(fs.promises, 'stat').mockResolvedValueOnce({
+        size: 200 * 1024 * 1024,
+        isDirectory: () => false,
+        isFile: () => true,
+      } as fs.Stats);
+
+      try {
+        const mockConfigNoPdf = {
+          ...mockConfig,
+          getContentGeneratorConfig: () => ({
+            modalities: { image: true },
+          }),
+        } as unknown as Config;
+
+        const result = await processSingleFileContent(
+          testPdfFilePath,
+          mockConfigNoPdf,
+          undefined,
+          undefined,
+          '1-5',
+        );
+
+        expect(result.error).toMatch(/exceeds extraction size limit/i);
+        expect(result.returnDisplay).toMatch(/PDF file too large/i);
+        expect(result.errorType).toBeDefined();
+      } finally {
+        statSpy.mockRestore();
+      }
+    });
+
+    it('should reject non-regular files (FIFOs, devices, sockets)', async () => {
+      actualNodeFs.writeFileSync(testTextFilePath, 'placeholder');
+
+      // A FIFO / socket / /dev/zero shows up as a non-file, non-directory
+      // stat entry. stats.size is typically 0 or meaningless, so without
+      // this guard a caller could accidentally stream /dev/zero through
+      // pdftotext until the timeout fires.
+      const statSpy = vi.spyOn(fs.promises, 'stat').mockResolvedValueOnce({
+        size: 0,
+        isDirectory: () => false,
+        isFile: () => false,
+      } as fs.Stats);
+
+      try {
+        const result = await processSingleFileContent(
+          testTextFilePath,
+          mockConfig,
+        );
+
+        expect(result.error).toMatch(/not a regular file/i);
+        expect(result.returnDisplay).toMatch(/not a regular file/i);
+      } finally {
+        statSpy.mockRestore();
+      }
+    });
   });
 });