QwenLM · wenshao · Jun 14, 2026 · May 26, 2026 · May 27, 2026 · May 27, 2026
diff --git a/packages/core/src/services/chatCompressionService.test.ts b/packages/core/src/services/chatCompressionService.test.ts
@@ -7,6 +7,7 @@
 import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
 import {
   ChatCompressionService,
+  COMPACT_MAX_OUTPUT_TOKENS,
   computeThresholds,
   MAX_CONSECUTIVE_FAILURES,
   MAX_HOOK_INSTRUCTIONS_CHARS,
@@ -914,23 +915,137 @@ describe('ChatCompressionService', () => {
     expect(result.newHistory).toBeNull();
   });
 
-  it('should return FAILED if usage metadata is missing', async () => {
+  it('should use estimated token count if usage metadata is missing', async () => {
+    const largeMessage = 'x'.repeat(4_000);
+    const history: Content[] = [
+      { role: 'user', parts: [{ text: largeMessage }] },
+      { role: 'model', parts: [{ text: largeMessage }] },
+      { role: 'user', parts: [{ text: largeMessage }] },
+      { role: 'model', parts: [{ text: largeMessage }] },
+    ];
+    vi.mocked(mockChat.getHistory).mockReturnValue(history);
+    vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(
+      5_000,
+    );
+    vi.mocked(mockConfig.getContentGeneratorConfig).mockReturnValue({
+      model: 'gemini-pro',
+      contextWindowSize: 6_000,
+    } as unknown as ReturnType<typeof mockConfig.getContentGeneratorConfig>);
+    const debug = vi.fn();
+    (
+      mockConfig as unknown as {
+        getDebugLogger: () => {
+          warn: ReturnType<typeof vi.fn>;
+          debug: typeof debug;
+        };
+      }
+    ).getDebugLogger = () => ({
+      warn: vi.fn(),
+      debug,
+    });
+
+    const mockGenerateContent = vi.fn().mockResolvedValue({
+      text: 'Summary',
+      // Some OpenAI-compatible providers (for example MiniMax-2.7) may omit
+      // usage on the compression side-query even when they return a summary.
+      usage: undefined,
+    });
+    vi.mocked(mockConfig.getBaseLlmClient).mockReturnValue({
+      generateText: mockGenerateContent,
+    } as unknown as BaseLlmClient);
+
+    const result = await service.compress(mockChat, {
+      promptId: mockPromptId,
+      force: true,
+      model: mockModel,
+      config: mockConfig,
+      consecutiveFailures: 0,
+      originalTokenCount: uiTelemetryService.getLastPromptTokenCount(),
+    });
+
+    expect(result.info.compressionStatus).toBe(CompressionStatus.COMPRESSED);
+    expect(result.info.originalTokenCount).toBe(5_000);
+    expect(result.info.newTokenCount).toBeGreaterThan(1_000);
+    expect(result.info.newTokenCount).toBeLessThan(1_100);
+    expect(result.newHistory).not.toBeNull();
+    expect(result.newHistory![0].parts![0].text).toContain('Summary');
+    expect(debug).toHaveBeenCalledWith(
+      expect.stringContaining('usage metadata missing'),
+    );
+    expect(debug).toHaveBeenCalledWith(
+      expect.stringContaining('API-reported non-visible remainder (1000)'),
+    );
+  });
+
+  it('should reject inflated local delta if usage metadata is missing', async () => {
+    const history: Content[] = [
+      { role: 'user', parts: [{ text: 'short user message' }] },
+      { role: 'model', parts: [{ text: 'short model response' }] },
+      { role: 'user', parts: [{ text: 'another short user message' }] },
+      { role: 'model', parts: [{ text: 'another short model response' }] },
+    ];
+    vi.mocked(mockChat.getHistory).mockReturnValue(history);
+    vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(800);
+    vi.mocked(mockConfig.getContentGeneratorConfig).mockReturnValue({
+      model: 'gemini-pro',
+      contextWindowSize: 6_000,
+    } as unknown as ReturnType<typeof mockConfig.getContentGeneratorConfig>);
+
+    const mockGenerateContent = vi.fn().mockResolvedValue({
+      text: 'x'.repeat(40_000),
+      usage: undefined,
+    });
+    vi.mocked(mockConfig.getBaseLlmClient).mockReturnValue({
+      generateText: mockGenerateContent,
+    } as unknown as BaseLlmClient);
+
+    const result = await service.compress(mockChat, {
+      promptId: mockPromptId,
+      force: true,
+      model: mockModel,
+      config: mockConfig,
+      consecutiveFailures: 0,
+      originalTokenCount: uiTelemetryService.getLastPromptTokenCount(),
+    });
+
+    expect(result.info.compressionStatus).toBe(
+      CompressionStatus.COMPRESSION_FAILED_INFLATED_TOKEN_COUNT,
+    );
+    expect(result.info.originalTokenCount).toBe(800);
+    expect(result.info.newTokenCount).toBeGreaterThan(800);
+    expect(result.newHistory).toBeNull();
+  });
+
+  it('should reject cap-sized summaries even if usage metadata is missing', async () => {
     const history: Content[] = [
       { role: 'user', parts: [{ text: 'msg1' }] },
       { role: 'model', parts: [{ text: 'msg2' }] },
       { role: 'user', parts: [{ text: 'msg3' }] },
       { role: 'model', parts: [{ text: 'msg4' }] },
     ];
     vi.mocked(mockChat.getHistory).mockReturnValue(history);
-    vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(800);
+    vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(
+      180_000,
+    );
     vi.mocked(mockConfig.getContentGeneratorConfig).mockReturnValue({
       model: 'gemini-pro',
-      contextWindowSize: 1000,
+      contextWindowSize: 200_000,
     } as unknown as ReturnType<typeof mockConfig.getContentGeneratorConfig>);
 
+    const warn = vi.fn();
+    (
+      mockConfig as unknown as {
+        getDebugLogger: () => {
+          warn: typeof warn;
+          debug: ReturnType<typeof vi.fn>;
+        };
+      }
+    ).getDebugLogger = () => ({
+      warn,
+      debug: vi.fn(),
+    });
     const mockGenerateContent = vi.fn().mockResolvedValue({
-      text: 'Summary',
-      // No usage -> keep original token count
+      text: 'x'.repeat(COMPACT_MAX_OUTPUT_TOKENS * 4),
       usage: undefined,
     });
     vi.mocked(mockConfig.getBaseLlmClient).mockReturnValue({
@@ -947,11 +1062,72 @@ describe('ChatCompressionService', () => {
     });
 
     expect(result.info.compressionStatus).toBe(
-      CompressionStatus.COMPRESSION_FAILED_TOKEN_COUNT_ERROR,
+      CompressionStatus.COMPRESSION_FAILED_OUTPUT_TRUNCATED,
+    );
+    expect(result.newHistory).toBeNull();
+    expect(warn).toHaveBeenCalledWith(
+      expect.stringContaining('local estimate'),
+    );
+    expect(warn).toHaveBeenCalledWith(
+      expect.stringContaining('COMPACT_MAX_OUTPUT_TOKENS'),
+    );
+  });
+
+  it('should reject CJK cap-sized summaries when usage metadata is missing', async () => {
+    const history: Content[] = [
+      { role: 'user', parts: [{ text: 'msg1' }] },
+      { role: 'model', parts: [{ text: 'msg2' }] },
+      { role: 'user', parts: [{ text: 'msg3' }] },
+      { role: 'model', parts: [{ text: 'msg4' }] },
+    ];
+    vi.mocked(mockChat.getHistory).mockReturnValue(history);
+    vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(
+      180_000,
+    );
+    vi.mocked(mockConfig.getContentGeneratorConfig).mockReturnValue({
+      model: 'gemini-pro',
+      contextWindowSize: 200_000,
+    } as unknown as ReturnType<typeof mockConfig.getContentGeneratorConfig>);
+
+    const warn = vi.fn();
+    (
+      mockConfig as unknown as {
+        getDebugLogger: () => {
+          warn: typeof warn;
+          debug: ReturnType<typeof vi.fn>;
+        };
+      }
+    ).getDebugLogger = () => ({
+      warn,
+      debug: vi.fn(),
+    });
+    const mockGenerateContent = vi.fn().mockResolvedValue({
+      text: '\u4e00'.repeat(Math.ceil(COMPACT_MAX_OUTPUT_TOKENS / 1.5)),
+      usage: undefined,
+    });
+    vi.mocked(mockConfig.getBaseLlmClient).mockReturnValue({
+      generateText: mockGenerateContent,
+    } as unknown as BaseLlmClient);
+
+    const result = await service.compress(mockChat, {
+      promptId: mockPromptId,
+      force: false,
+      model: mockModel,
+      config: mockConfig,
+      consecutiveFailures: 0,
+      originalTokenCount: uiTelemetryService.getLastPromptTokenCount(),
+    });
+
+    expect(result.info.compressionStatus).toBe(
+      CompressionStatus.COMPRESSION_FAILED_OUTPUT_TRUNCATED,
     );
-    expect(result.info.originalTokenCount).toBe(800);
-    expect(result.info.newTokenCount).toBe(800);
     expect(result.newHistory).toBeNull();
+    expect(warn).toHaveBeenCalledWith(
+      expect.stringContaining('local estimate'),
+    );
+    expect(warn).toHaveBeenCalledWith(
+      expect.stringContaining('COMPACT_MAX_OUTPUT_TOKENS'),
+    );
   });
 
   it('should return FAILED if summary is empty string', async () => {

diff --git a/packages/core/src/services/chatCompressionService.ts b/packages/core/src/services/chatCompressionService.ts
@@ -25,7 +25,11 @@ import {
   resolveSlimmingConfig,
   slimCompactionInput,
 } from './compactionInputSlimming.js';
-import { CHARS_PER_TOKEN, estimatePromptTokens } from './tokenEstimation.js';
+import {
+  CHARS_PER_TOKEN,
+  estimateContentTokens,
+  estimatePromptTokens,
+} from './tokenEstimation.js';
 import {
   buildStateReminderParts,
   composePostCompactHistory,
@@ -91,6 +95,30 @@ export const HARD_BUFFER = 3_000;
  */
 export const MAX_CONSECUTIVE_FAILURES = 3;
 
+const CJK_CHAR_TOKEN_MULTIPLIER = 1.5;
+const CJK_CHAR_PATTERN =
+  /[\u3040-\u30ff\u3400-\u9fff\uf900-\ufaff\uac00-\ud7af]/g;
+
+function estimateSummaryOutputTokens(
+  summary: string,
+  imageTokenEstimate: number,
+): number {
+  const genericEstimate = estimateContentTokens(
+    [{ role: 'model', parts: [{ text: summary }] }],
+    imageTokenEstimate,
+  );
+  const cjkCharCount = summary.match(CJK_CHAR_PATTERN)?.length ?? 0;
+  if (cjkCharCount === 0) {
+    return genericEstimate;
+  }
+
+  const nonCjkCharCount = Math.max(0, summary.length - cjkCharCount);
+  const cjkAwareEstimate =
+    Math.ceil(nonCjkCharCount / CHARS_PER_TOKEN) +
+    Math.ceil(cjkCharCount * CJK_CHAR_TOKEN_MULTIPLIER);
+  return Math.max(genericEstimate, cjkAwareEstimate);
+}
+
 /**
  * Hard cap on the PreCompact hook's `additionalContext` once it is merged
  * into the side-query system prompt. The user-supplied `/compress` text is
@@ -497,6 +525,19 @@ export class ChatCompressionService {
         compressionUsageMetadata.totalTokenCount - compressionInputTokenCount,
       );
     }
+    if (compressionOutputTokenCount === undefined && !isSummaryEmpty) {
+      compressionOutputTokenCount = estimateSummaryOutputTokens(
+        summary,
+        slimmingConfig.imageTokenEstimate,
+      );
+      config
+        .getDebugLogger()
+        .warn(
+          `[chat-compression] compression side-query omitted usage metadata; ` +
+            `using local estimate for summary output token count ` +
+            `(${compressionOutputTokenCount}).`,
+        );
+    }
 
     // Defensive guard: if the side-query hit COMPACT_MAX_OUTPUT_TOKENS, the
     // summary is likely truncated mid-content and unsafe to persist. Drop it
@@ -624,7 +665,12 @@ export class ChatCompressionService {
         ];
       }
 
-      // Best-effort token math using *only* model-reported token counts.
+      // Best-effort token math using model-reported token counts when
+      // available. Some OpenAI-compatible providers omit usage for the
+      // compression side-query; in that case, fall back to the same local
+      // content estimator used by the auto-compaction gate so a valid summary
+      // can still shrink the history instead of failing with a token-count
+      // error.
       //
       // Note: compressionInputTokenCount includes the entire compression
       // system prompt (the <state_snapshot> instructions, ~900 tokens) PLUS
@@ -654,12 +700,9 @@ export class ChatCompressionService {
         // The composer injects file-restoration blocks (up to
         // maxRecentFiles × 5K tokens) and an image-restoration block (up to
         // maxRecentImages images) that are NOT in
-        // compressionOutputTokenCount. Estimate their
-        // cost locally so the inflation guard below
-        // (newTokenCount > originalTokenCount) actually fires when
-        // attachments dominate the post-compact size, and so
-        // `lastPromptTokenCount` doesn't under-report the next auto-
-        // compaction cheap-gate input (Finding 1).
+        // compressionOutputTokenCount. Estimate their cost locally so the
+        // inflation guard below fires when attachments dominate the
+        // post-compact size.
         const restorationChars = extraHistory
           .slice(2) // skip [summary, model ack]
           .reduce(
@@ -668,6 +711,41 @@ export class ChatCompressionService {
             0,
           );
         newTokenCount += Math.ceil(restorationChars / CHARS_PER_TOKEN);
+      } else {
+        const estimatedOriginalVisibleTokenCount = estimateContentTokens(
+          curatedHistory,
+          slimmingConfig.imageTokenEstimate,
+        );
+        const estimatedNewVisibleTokenCount = estimateContentTokens(
+          extraHistory,
+          slimmingConfig.imageTokenEstimate,
+        );
+        if (
+          estimatedOriginalVisibleTokenCount > 0 &&
+          estimatedNewVisibleTokenCount > 0
+        ) {
+          const estimatedNonVisibleTokenCount = Math.max(
+            0,
+            originalTokenCount - estimatedOriginalVisibleTokenCount,
+          );
+          // Keep the API-reported system/tool/prompt remainder intact. The
+          // local estimator is only used for the visible conversation delta, so
+          // missing usage metadata cannot replace the authoritative total with
+          // a much smaller visible-history-only estimate.
+          newTokenCount =
+            estimatedNonVisibleTokenCount + estimatedNewVisibleTokenCount;
+          canCalculateNewTokenCount = true;
+          config
+            .getDebugLogger()
+            .debug(
+              `[chat-compression] usage metadata missing; estimated ` +
+                `post-compression token count by preserving the ` +
+                `API-reported non-visible remainder ` +
+                `(${estimatedNonVisibleTokenCount}) and replacing the ` +
+                `visible-history estimate (${estimatedOriginalVisibleTokenCount} -> ` +
+                `${estimatedNewVisibleTokenCount}).`,
+            );
+        }
       }
     }