Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@ integration-tests/concurrent-runner/task-*

integration-tests/terminal-capture/scenarios/screenshots/

# Test-built worker artifact (fzfWorkerHandle.test.ts builds this on-the-fly)
packages/core/src/utils/filesearch/fzfWorker.js

# storybook
*storybook.log
storybook-static
Expand Down
162 changes: 93 additions & 69 deletions esbuild.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -80,76 +80,100 @@ const external = [
// in skill-manager / ripgrepUtils / i18n / extensions/new.
const BUNDLE_CHUNK_DIR = 'chunks';

esbuild
.build({
entryPoints: { cli: 'packages/cli/index.ts' },
bundle: true,
outdir: 'dist',
entryNames: '[name]',
chunkNames: `${BUNDLE_CHUNK_DIR}/[name]-[hash]`,
splitting: true,
platform: 'node',
format: 'esm',
target: 'node22',
external,
packages: 'bundle',
inject: [path.resolve(__dirname, 'scripts/esbuild-shims.js')],
banner: {
js: `// Force strict mode and setup for ESM
const mainBuild = esbuild.build({
entryPoints: { cli: 'packages/cli/index.ts' },
bundle: true,
outdir: 'dist',
entryNames: '[name]',
chunkNames: `${BUNDLE_CHUNK_DIR}/[name]-[hash]`,
splitting: true,
platform: 'node',
format: 'esm',
target: 'node22',
external,
packages: 'bundle',
inject: [path.resolve(__dirname, 'scripts/esbuild-shims.js')],
banner: {
js: `// Force strict mode and setup for ESM
"use strict";`,
},
alias: {
'is-in-ci': path.resolve(
__dirname,
'packages/cli/src/patches/is-in-ci.ts',
),
'@qwen-code/web-templates': path.resolve(
__dirname,
'packages/web-templates/src/index.ts',
),
// Resolve to userland punycode instead of deprecated node:punycode built-in
punycode: require.resolve('punycode/'),
},
define: {
'process.env.CLI_VERSION': JSON.stringify(pkg.version),
// react-reconciler ≥0.33 (ink 7) gates its dev build behind NODE_ENV
// and calls performance.measure() on every render, leaking
// PerformanceMeasure objects into the global measureEntryBuffer.
// Setting production here tree-shakes the entire dev build (~15k lines).
'process.env.NODE_ENV': JSON.stringify('production'),
// Make global available for compatibility
global: 'globalThis',
// Redirect free __dirname/__filename references to the shim so that
// vendored libraries that emit their own `var __dirname` locals don't
// collide with our injected bindings when code-splitting is enabled.
//
// CONTRIBUTOR WARNING: this rewrite applies to *all* source files, so
// any bare `__dirname` / `__filename` in our own code resolves to the
// shim chunk's on-disk location (i.e. `dist/chunks/`), NOT the source
// file's own directory. To get a per-file path, declare a local shadow
// at the top of the module:
//
// import { fileURLToPath } from 'node:url';
// const __filename = fileURLToPath(import.meta.url);
// const __dirname = path.dirname(__filename);
//
// esbuild leaves the local binding alone (it's a declared identifier,
// not a free reference). For sibling-asset lookups in modules that may
// be hoisted into a shared chunk, prefer
// `resolveBundleDir(import.meta.url)` from
// `packages/core/src/utils/bundlePaths.ts` — it both produces a
// per-file path and strips the chunk segment when the module ends up
// under `dist/chunks/`.
__dirname: '__qwen_dirname',
__filename: '__qwen_filename',
},
loader: { '.node': 'file' },
plugins: [wasmBinaryPlugin, wasmLoader({ mode: 'embedded' })],
metafile: true,
write: true,
keepNames: true,
})
.then(({ metafile }) => {
},
alias: {
'is-in-ci': path.resolve(__dirname, 'packages/cli/src/patches/is-in-ci.ts'),
'@qwen-code/web-templates': path.resolve(
__dirname,
'packages/web-templates/src/index.ts',
),
// Resolve to userland punycode instead of deprecated node:punycode built-in
punycode: require.resolve('punycode/'),
},
define: {
'process.env.CLI_VERSION': JSON.stringify(pkg.version),
// react-reconciler ≥0.33 (ink 7) gates its dev build behind NODE_ENV
// and calls performance.measure() on every render, leaking
// PerformanceMeasure objects into the global measureEntryBuffer.
// Setting production here tree-shakes the entire dev build (~15k lines).
'process.env.NODE_ENV': JSON.stringify('production'),
// Make global available for compatibility
global: 'globalThis',
// Redirect free __dirname/__filename references to the shim so that
// vendored libraries that emit their own `var __dirname` locals don't
// collide with our injected bindings when code-splitting is enabled.
//
// CONTRIBUTOR WARNING: this rewrite applies to *all* source files, so
// any bare `__dirname` / `__filename` in our own code resolves to the
// shim chunk's on-disk location (i.e. `dist/chunks/`), NOT the source
// file's own directory. To get a per-file path, declare a local shadow
// at the top of the module:
//
// import { fileURLToPath } from 'node:url';
// const __filename = fileURLToPath(import.meta.url);
// const __dirname = path.dirname(__filename);
//
// esbuild leaves the local binding alone (it's a declared identifier,
// not a free reference). For sibling-asset lookups in modules that may
// be hoisted into a shared chunk, prefer
// `resolveBundleDir(import.meta.url)` from
// `packages/core/src/utils/bundlePaths.ts` — it both produces a
// per-file path and strips the chunk segment when the module ends up
// under `dist/chunks/`.
__dirname: '__qwen_dirname',
__filename: '__qwen_filename',
},
loader: { '.node': 'file' },
plugins: [wasmBinaryPlugin, wasmLoader({ mode: 'embedded' })],
metafile: true,
write: true,
keepNames: true,
});

// fzf index worker — runs in its own worker_threads worker that
// `fzfWorkerHandle.ts` spawns via `new Worker(new URL('./fzfWorker.js', ...))`.
// Must exist as a standalone file next to `dist/cli.js` so the URL resolves
// at runtime; we bundle it self-contained (no chunk splitting) so fzf is
// inlined and the worker doesn't need to walk back into node_modules from
// the published tarball. `prepare-package.js` whitelists `fzfWorker.js` in
// the dist `files` array.
const workerBuild = esbuild.build({
entryPoints: ['packages/core/src/utils/filesearch/fzfWorker.ts'],
bundle: true,
outfile: 'dist/fzfWorker.js',
platform: 'node',
format: 'esm',
target: 'node22',
external,
packages: 'bundle',
// fzf is CJS — needs the same require()-shim the main bundle uses for
// CJS interop in ESM output.
inject: [path.resolve(__dirname, 'scripts/esbuild-shims.js')],
banner: {
js: `"use strict";`,
},
write: true,
keepNames: true,
});

Promise.all([mainBuild, workerBuild])
.then(([{ metafile }]) => {
if (process.env.DEV === 'true') {
writeFileSync('./dist/esbuild.json', JSON.stringify(metafile, null, 2));
}
Expand Down
22 changes: 21 additions & 1 deletion packages/cli/src/ui/hooks/useAtCompletion.ts
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,10 @@ export function useAtCompletion(props: UseAtCompletionProps): void {

useEffect(() => {
dispatch({ type: 'RESET' });
return () => {
void fileSearch.current?.dispose?.();
fileSearch.current = null;
};
}, [cwd, config]);

// Reacts to user input (`pattern`) ONLY.
Expand Down Expand Up @@ -153,8 +157,15 @@ export function useAtCompletion(props: UseAtCompletionProps): void {

// The "Worker" that performs async operations based on status.
useEffect(() => {
let cancelled = false;

const initialize = async () => {
try {
// Dispose previous instance to prevent worker thread leaks on
// re-initialization (cwd/config change triggers RESET → re-init).

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Suggestion] The async initialize() is fire-and-forget with no cancellation guard. If the component unmounts or cwd/config changes while await searcher.initialize() is in flight (worker spawn + ready handshake takes several ms), the cleanup effect runs fileSearch.current?.dispose?.() — but fileSearch.current is still null from line 164, so nothing is disposed. When the pending initialize() resolves, fileSearch.current = searcher writes a live worker into the ref with no owner to dispose it.

In React 18 Strict Mode (dev), the double-invocation makes this deterministic: invocation-1's async init completes after cleanup already ran, leaking a worker thread.

Consider adding a cancelled flag or AbortController that the effect cleanup signals:

const initialize = async (signal: AbortSignal) => {
  try {
    await fileSearch.current?.dispose?.();
    fileSearch.current = null;
    const searcher = FileSearchFactory.create({ ... });
    await searcher.initialize();
    if (signal.aborted) { await searcher.dispose?.(); return; }
    fileSearch.current = searcher;
    // ...
  } catch (err) { /* ... */ }
};

— qwen3.7-max via Qwen Code /review

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 0dae073. Added a cancelled flag inside the effect closure — set to true in cleanup. After await searcher.initialize(), if cancelled is true the searcher is disposed immediately instead of being written to fileSearch.current. This covers both the unmount-during-init and the Strict Mode double-invocation scenarios.

await fileSearch.current?.dispose?.();

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MEDIUM — Concurrent initialize() calls can leak worker threads

The await fileSearch.current?.dispose?.() here only disposes the previously assigned reference. When cwd/config changes rapidly, two initialize() invocations can overlap:

  1. Init A starts — disposes old searcher, creates searcherA (spawns worker A), awaits searcherA.initialize().
  2. cwd changes again — Init B starts — fileSearch.current is still null (Init A hasn't set it yet) — dispose is a no-op.
  3. Init A completes — sets fileSearch.current = searcherA.
  4. Init B completes — sets fileSearch.current = searcherBsearcherA and its worker thread are leaked.

This race pre-dates the PR, but each leaked worker_threads worker holds ~1–4 MB stack + a V8 isolate, making the leak materially more expensive than the old in-process AsyncFzf.

Suggested change
await fileSearch.current?.dispose?.();
const initialize = async () => {
const myGeneration = ++initGeneration;
try {
// Dispose previous instance to prevent worker thread leaks on
// re-initialization (cwd/config change triggers RESET → re-init).
await fileSearch.current?.dispose?.();
fileSearch.current = null;
const searcher = FileSearchFactory.create({

A module-level let initGeneration = 0; plus a stale-check after await searcher.initialize() would cancel superseded initializations:

await searcher.initialize();
if (myGeneration !== initGeneration) {
  // A newer initialize() already started — dispose ours and bail.
  await searcher.dispose?.();
  return;
}
fileSearch.current = searcher;

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 0dae073. Added a cancelled flag inside the effect — the cleanup sets it to true, and after await searcher.initialize() the code checks it before assigning to fileSearch.current. If cancelled (because a newer init already started or the component unmounted), the newly-created searcher is disposed immediately. This prevents the overlap scenario you described where Init A's searcher would be orphaned when Init B overwrites the ref.

fileSearch.current = null;

const searcher = FileSearchFactory.create({
projectRoot: cwd,
ignoreDirs: [],
Expand All @@ -171,13 +182,21 @@ export function useAtCompletion(props: UseAtCompletionProps): void {
config?.getFileFilteringEnableFuzzySearch() !== false,
});
await searcher.initialize();
// Guard against the effect being cleaned up (unmount / cwd change)
// or superseded by a newer initialize() while we were awaiting.
if (cancelled) {
await searcher.dispose?.();
return;
}
fileSearch.current = searcher;
dispatch({ type: 'INITIALIZE_SUCCESS' });
if (state.pattern !== null) {
dispatch({ type: 'SEARCH', payload: state.pattern });
}
} catch (_) {
dispatch({ type: 'ERROR' });
if (!cancelled) {
dispatch({ type: 'ERROR' });
}
}
};

Expand Down Expand Up @@ -234,6 +253,7 @@ export function useAtCompletion(props: UseAtCompletionProps): void {
}

return () => {
cancelled = true;
searchAbortController.current?.abort();
if (slowSearchTimer.current) {
clearTimeout(slowSearchTimer.current);
Expand Down
3 changes: 3 additions & 0 deletions packages/core/src/utils/environmentContext.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,9 @@ describe('formatDateForContext', () => {
const result = formatDateForContext();
expect(typeof result).toBe('string');
expect(result.length).toBeGreaterThan(0);
});
});

describe('startup reminder builders', () => {
function registry(overrides: Partial<ToolRegistry>): ToolRegistry {
return {
Expand Down
45 changes: 45 additions & 0 deletions packages/core/src/utils/filesearch/fileSearch.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -772,4 +772,49 @@ describe('FileSearch', () => {
expect(results).toEqual(['.gitignore', 'file2.ts']);
});
});

describe('dispose()', () => {
it('should release fzf handle on dispose', async () => {
tmpDir = await createTmpDir({
src: ['a.ts', 'b.ts'],
});

const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false,
useQwenignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
enableRecursiveFileSearch: true,
enableFuzzySearch: true,
});

await fileSearch.initialize();
await expect(fileSearch.dispose?.()).resolves.toBeUndefined();
// Idempotent
await expect(fileSearch.dispose?.()).resolves.toBeUndefined();
});

it('should be a no-op for DirectoryFileSearch', async () => {
tmpDir = await createTmpDir({
src: ['a.ts'],
});

const fileSearch = FileSearchFactory.create({
projectRoot: tmpDir,
useGitignore: false,
useQwenignore: false,
ignoreDirs: [],
cache: false,
cacheTtl: 0,
enableRecursiveFileSearch: false,
enableFuzzySearch: true,
});

await fileSearch.initialize();
// DirectoryFileSearch has no dispose — should be undefined.
expect(fileSearch.dispose).toBeUndefined();
});
});
});
36 changes: 30 additions & 6 deletions packages/core/src/utils/filesearch/fileSearch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import { loadIgnoreRules } from './ignore.js';
import { ResultCache } from './result-cache.js';
import { crawl } from './crawler.js';
import type { FzfResultItem } from 'fzf';
import { AsyncFzf } from 'fzf';
import { FzfWorkerHandle } from './fzfWorkerHandle.js';
import { unescapePath } from '../paths.js';

/**
Expand Down Expand Up @@ -98,13 +98,19 @@ export interface SearchOptions {
export interface FileSearch {
initialize(): Promise<void>;
search(pattern: string, options?: SearchOptions): Promise<string[]>;
/**
* Release any worker / native resources held by this search instance.
* Optional because the directory-only path holds no such resources.
* Implementations must be safe to call multiple times.
*/
dispose?(): Promise<void>;

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Critical] Worker thread leak in the CLI's @-picker path. useAtCompletion (in packages/cli/src/ui/hooks/useAtCompletion.ts) creates a FileSearch via FileSearchFactory.create() and stores it in fileSearch.current. When cwd/config changes trigger re-initialization or the component unmounts, the old FileSearch is silently overwritten without calling dispose(). The VSCode path was correctly wired up in FileMessageHandler.clearFileSearchCache(), but the CLI hook — by far the most common consumer — was missed.

On workspaces with ≥5 000 files, each RecursiveFileSearch holds a FzfWorkerHandle that owns a worker_threads Worker (~5–10 MB + full file list). In a long-running CLI session, every directory switch or config reload leaks another worker. worker.unref() prevents blocking process.exit(), but the workers accumulate for the lifetime of the process.

Fix in useAtCompletion.ts:

Suggested change
dispose?(): Promise<void>;
// In the useEffect cleanup:
return () => {
searchAbortController.current?.abort();
if (slowSearchTimer.current) {
clearTimeout(slowSearchTimer.current);
}
void fileSearch.current?.dispose?.();
fileSearch.current = null;
};

Also dispose the previous instance before overwriting in initialize().

— qwen3.7-max via Qwen Code /review

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 81a513d. Added dispose() calls in both the initialize() path (dispose old instance before creating new one) and the effect cleanup function. Worker threads are now properly released on re-init and unmount.

}

class RecursiveFileSearch implements FileSearch {
private ignore: Ignore | undefined;
private resultCache: ResultCache | undefined;
private allFiles: string[] = [];
private fzf: AsyncFzf<string[]> | undefined;
private fzf: FzfWorkerHandle | undefined;

constructor(private readonly options: FileSearchOptions) {}

Expand All @@ -120,7 +126,13 @@ class RecursiveFileSearch implements FileSearch {
maxDepth: this.options.maxDepth,
maxFiles: MAX_CRAWL_FILES,
});
this.buildResultCache();
await this.buildResultCache();

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Suggestion] RecursiveFileSearch.dispose() is a new public method but fileSearch.test.ts has no tests for it. Callers (useAtCompletion.ts, FileMessageHandler.ts) rely on dispose() for worker thread cleanup — a regression here would silently leak workers.

Consider adding a test:

it('should release fzf handle on dispose', async () => {
  const fileSearch = FileSearchFactory.create({ ... });
  await fileSearch.initialize();
  await expect(fileSearch.dispose()).resolves.toBeUndefined();
  // Idempotent
  await expect(fileSearch.dispose()).resolves.toBeUndefined();
});

— qwen3.7-max via Qwen Code /review

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in f3b9666. Added two tests in fileSearch.test.ts: one verifying RecursiveFileSearch.dispose() resolves and is idempotent, another confirming DirectoryFileSearch has no dispose method (expected — it holds no worker resources).

}

async dispose(): Promise<void> {
const handle = this.fzf;
this.fzf = undefined;
await handle?.dispose();
}

async search(
Expand Down Expand Up @@ -151,8 +163,13 @@ class RecursiveFileSearch implements FileSearch {
if (pattern.includes('*') || !this.fzf) {
filteredCandidates = await filter(candidates, pattern, options.signal);
} else {
// Pass a generous limit to the worker so results are trimmed before
// IPC serialization — avoids sending 50k+ entries across postMessage
// when only ~72 are displayed. The 200 cap leaves headroom for
// downstream ignore-filter to drop entries without starving results.
const fzfLimit = Math.max(200, (options.maxResults ?? 200) * 3);
filteredCandidates = await this.fzf
.find(pattern)
.find(pattern, fzfLimit)
.then((results: Array<FzfResultItem<string>>) =>
results.map((entry: FzfResultItem<string>) => entry.item),
)
Expand Down Expand Up @@ -190,14 +207,21 @@ class RecursiveFileSearch implements FileSearch {
return results;
}

private buildResultCache(): void {
private async buildResultCache(): Promise<void> {
this.resultCache = new ResultCache(this.allFiles);
// Initialize fuzzy search if enabled (or undefined, default true).
if (this.options.enableFuzzySearch !== false) {
// The v1 algorithm is much faster since it only looks at the first
// occurence of the pattern. We use it for search spaces that have >20k
// files, because the v2 algorithm is just too slow in those cases.
this.fzf = new AsyncFzf(this.allFiles, {
//
// Construction is the actual main-thread freeze on large workspaces
// (the AsyncFzf constructor is misleadingly named — it runs sync
// during `new`). FzfWorkerHandle hosts the instance in a
// worker_threads worker once the file count crosses ~5k; below that
// it stays in-thread because worker spawn + IPC overhead exceeds the
// construction cost.
this.fzf = await FzfWorkerHandle.create(this.allFiles, {
fuzzy: this.allFiles.length > 20000 ? 'v1' : 'v2',
});
}
Expand Down
Loading
Loading