diff --git a/.gitignore b/.gitignore index 24c74ea8a7b..330abd59227 100644 --- a/.gitignore +++ b/.gitignore @@ -94,6 +94,9 @@ integration-tests/concurrent-runner/task-* integration-tests/terminal-capture/scenarios/screenshots/ +# Test-built worker artifact (fzfWorkerHandle.test.ts builds this on-the-fly) +packages/core/src/utils/filesearch/fzfWorker.js + # storybook *storybook.log storybook-static diff --git a/esbuild.config.js b/esbuild.config.js index bc50dc265ea..956720a28f1 100644 --- a/esbuild.config.js +++ b/esbuild.config.js @@ -80,76 +80,100 @@ const external = [ // in skill-manager / ripgrepUtils / i18n / extensions/new. const BUNDLE_CHUNK_DIR = 'chunks'; -esbuild - .build({ - entryPoints: { cli: 'packages/cli/index.ts' }, - bundle: true, - outdir: 'dist', - entryNames: '[name]', - chunkNames: `${BUNDLE_CHUNK_DIR}/[name]-[hash]`, - splitting: true, - platform: 'node', - format: 'esm', - target: 'node22', - external, - packages: 'bundle', - inject: [path.resolve(__dirname, 'scripts/esbuild-shims.js')], - banner: { - js: `// Force strict mode and setup for ESM +const mainBuild = esbuild.build({ + entryPoints: { cli: 'packages/cli/index.ts' }, + bundle: true, + outdir: 'dist', + entryNames: '[name]', + chunkNames: `${BUNDLE_CHUNK_DIR}/[name]-[hash]`, + splitting: true, + platform: 'node', + format: 'esm', + target: 'node22', + external, + packages: 'bundle', + inject: [path.resolve(__dirname, 'scripts/esbuild-shims.js')], + banner: { + js: `// Force strict mode and setup for ESM "use strict";`, - }, - alias: { - 'is-in-ci': path.resolve( - __dirname, - 'packages/cli/src/patches/is-in-ci.ts', - ), - '@qwen-code/web-templates': path.resolve( - __dirname, - 'packages/web-templates/src/index.ts', - ), - // Resolve to userland punycode instead of deprecated node:punycode built-in - punycode: require.resolve('punycode/'), - }, - define: { - 'process.env.CLI_VERSION': JSON.stringify(pkg.version), - // react-reconciler ≥0.33 (ink 7) gates its dev build behind NODE_ENV - // and calls performance.measure() on every render, leaking - // PerformanceMeasure objects into the global measureEntryBuffer. - // Setting production here tree-shakes the entire dev build (~15k lines). - 'process.env.NODE_ENV': JSON.stringify('production'), - // Make global available for compatibility - global: 'globalThis', - // Redirect free __dirname/__filename references to the shim so that - // vendored libraries that emit their own `var __dirname` locals don't - // collide with our injected bindings when code-splitting is enabled. - // - // CONTRIBUTOR WARNING: this rewrite applies to *all* source files, so - // any bare `__dirname` / `__filename` in our own code resolves to the - // shim chunk's on-disk location (i.e. `dist/chunks/`), NOT the source - // file's own directory. To get a per-file path, declare a local shadow - // at the top of the module: - // - // import { fileURLToPath } from 'node:url'; - // const __filename = fileURLToPath(import.meta.url); - // const __dirname = path.dirname(__filename); - // - // esbuild leaves the local binding alone (it's a declared identifier, - // not a free reference). For sibling-asset lookups in modules that may - // be hoisted into a shared chunk, prefer - // `resolveBundleDir(import.meta.url)` from - // `packages/core/src/utils/bundlePaths.ts` — it both produces a - // per-file path and strips the chunk segment when the module ends up - // under `dist/chunks/`. - __dirname: '__qwen_dirname', - __filename: '__qwen_filename', - }, - loader: { '.node': 'file' }, - plugins: [wasmBinaryPlugin, wasmLoader({ mode: 'embedded' })], - metafile: true, - write: true, - keepNames: true, - }) - .then(({ metafile }) => { + }, + alias: { + 'is-in-ci': path.resolve(__dirname, 'packages/cli/src/patches/is-in-ci.ts'), + '@qwen-code/web-templates': path.resolve( + __dirname, + 'packages/web-templates/src/index.ts', + ), + // Resolve to userland punycode instead of deprecated node:punycode built-in + punycode: require.resolve('punycode/'), + }, + define: { + 'process.env.CLI_VERSION': JSON.stringify(pkg.version), + // react-reconciler ≥0.33 (ink 7) gates its dev build behind NODE_ENV + // and calls performance.measure() on every render, leaking + // PerformanceMeasure objects into the global measureEntryBuffer. + // Setting production here tree-shakes the entire dev build (~15k lines). + 'process.env.NODE_ENV': JSON.stringify('production'), + // Make global available for compatibility + global: 'globalThis', + // Redirect free __dirname/__filename references to the shim so that + // vendored libraries that emit their own `var __dirname` locals don't + // collide with our injected bindings when code-splitting is enabled. + // + // CONTRIBUTOR WARNING: this rewrite applies to *all* source files, so + // any bare `__dirname` / `__filename` in our own code resolves to the + // shim chunk's on-disk location (i.e. `dist/chunks/`), NOT the source + // file's own directory. To get a per-file path, declare a local shadow + // at the top of the module: + // + // import { fileURLToPath } from 'node:url'; + // const __filename = fileURLToPath(import.meta.url); + // const __dirname = path.dirname(__filename); + // + // esbuild leaves the local binding alone (it's a declared identifier, + // not a free reference). For sibling-asset lookups in modules that may + // be hoisted into a shared chunk, prefer + // `resolveBundleDir(import.meta.url)` from + // `packages/core/src/utils/bundlePaths.ts` — it both produces a + // per-file path and strips the chunk segment when the module ends up + // under `dist/chunks/`. + __dirname: '__qwen_dirname', + __filename: '__qwen_filename', + }, + loader: { '.node': 'file' }, + plugins: [wasmBinaryPlugin, wasmLoader({ mode: 'embedded' })], + metafile: true, + write: true, + keepNames: true, +}); + +// fzf index worker — runs in its own worker_threads worker that +// `fzfWorkerHandle.ts` spawns via `new Worker(new URL('./fzfWorker.js', ...))`. +// Must exist as a standalone file next to `dist/cli.js` so the URL resolves +// at runtime; we bundle it self-contained (no chunk splitting) so fzf is +// inlined and the worker doesn't need to walk back into node_modules from +// the published tarball. `prepare-package.js` whitelists `fzfWorker.js` in +// the dist `files` array. +const workerBuild = esbuild.build({ + entryPoints: ['packages/core/src/utils/filesearch/fzfWorker.ts'], + bundle: true, + outfile: 'dist/fzfWorker.js', + platform: 'node', + format: 'esm', + target: 'node22', + external, + packages: 'bundle', + // fzf is CJS — needs the same require()-shim the main bundle uses for + // CJS interop in ESM output. + inject: [path.resolve(__dirname, 'scripts/esbuild-shims.js')], + banner: { + js: `"use strict";`, + }, + write: true, + keepNames: true, +}); + +Promise.all([mainBuild, workerBuild]) + .then(([{ metafile }]) => { if (process.env.DEV === 'true') { writeFileSync('./dist/esbuild.json', JSON.stringify(metafile, null, 2)); } diff --git a/packages/cli/src/ui/hooks/useAtCompletion.ts b/packages/cli/src/ui/hooks/useAtCompletion.ts index d1793139b7d..c6a571ce769 100644 --- a/packages/cli/src/ui/hooks/useAtCompletion.ts +++ b/packages/cli/src/ui/hooks/useAtCompletion.ts @@ -121,6 +121,10 @@ export function useAtCompletion(props: UseAtCompletionProps): void { useEffect(() => { dispatch({ type: 'RESET' }); + return () => { + void fileSearch.current?.dispose?.(); + fileSearch.current = null; + }; }, [cwd, config]); // Reacts to user input (`pattern`) ONLY. @@ -153,8 +157,15 @@ export function useAtCompletion(props: UseAtCompletionProps): void { // The "Worker" that performs async operations based on status. useEffect(() => { + let cancelled = false; + const initialize = async () => { try { + // Dispose previous instance to prevent worker thread leaks on + // re-initialization (cwd/config change triggers RESET → re-init). + await fileSearch.current?.dispose?.(); + fileSearch.current = null; + const searcher = FileSearchFactory.create({ projectRoot: cwd, ignoreDirs: [], @@ -171,13 +182,21 @@ export function useAtCompletion(props: UseAtCompletionProps): void { config?.getFileFilteringEnableFuzzySearch() !== false, }); await searcher.initialize(); + // Guard against the effect being cleaned up (unmount / cwd change) + // or superseded by a newer initialize() while we were awaiting. + if (cancelled) { + await searcher.dispose?.(); + return; + } fileSearch.current = searcher; dispatch({ type: 'INITIALIZE_SUCCESS' }); if (state.pattern !== null) { dispatch({ type: 'SEARCH', payload: state.pattern }); } } catch (_) { - dispatch({ type: 'ERROR' }); + if (!cancelled) { + dispatch({ type: 'ERROR' }); + } } }; @@ -234,6 +253,7 @@ export function useAtCompletion(props: UseAtCompletionProps): void { } return () => { + cancelled = true; searchAbortController.current?.abort(); if (slowSearchTimer.current) { clearTimeout(slowSearchTimer.current); diff --git a/packages/core/src/utils/environmentContext.test.ts b/packages/core/src/utils/environmentContext.test.ts index b4768393ed6..92fc903f9c1 100644 --- a/packages/core/src/utils/environmentContext.test.ts +++ b/packages/core/src/utils/environmentContext.test.ts @@ -399,6 +399,9 @@ describe('formatDateForContext', () => { const result = formatDateForContext(); expect(typeof result).toBe('string'); expect(result.length).toBeGreaterThan(0); + }); +}); + describe('startup reminder builders', () => { function registry(overrides: Partial): ToolRegistry { return { diff --git a/packages/core/src/utils/filesearch/fileSearch.test.ts b/packages/core/src/utils/filesearch/fileSearch.test.ts index aa094039f94..6cfa0ff4c0e 100644 --- a/packages/core/src/utils/filesearch/fileSearch.test.ts +++ b/packages/core/src/utils/filesearch/fileSearch.test.ts @@ -772,4 +772,49 @@ describe('FileSearch', () => { expect(results).toEqual(['.gitignore', 'file2.ts']); }); }); + + describe('dispose()', () => { + it('should release fzf handle on dispose', async () => { + tmpDir = await createTmpDir({ + src: ['a.ts', 'b.ts'], + }); + + const fileSearch = FileSearchFactory.create({ + projectRoot: tmpDir, + useGitignore: false, + useQwenignore: false, + ignoreDirs: [], + cache: false, + cacheTtl: 0, + enableRecursiveFileSearch: true, + enableFuzzySearch: true, + }); + + await fileSearch.initialize(); + await expect(fileSearch.dispose?.()).resolves.toBeUndefined(); + // Idempotent + await expect(fileSearch.dispose?.()).resolves.toBeUndefined(); + }); + + it('should be a no-op for DirectoryFileSearch', async () => { + tmpDir = await createTmpDir({ + src: ['a.ts'], + }); + + const fileSearch = FileSearchFactory.create({ + projectRoot: tmpDir, + useGitignore: false, + useQwenignore: false, + ignoreDirs: [], + cache: false, + cacheTtl: 0, + enableRecursiveFileSearch: false, + enableFuzzySearch: true, + }); + + await fileSearch.initialize(); + // DirectoryFileSearch has no dispose — should be undefined. + expect(fileSearch.dispose).toBeUndefined(); + }); + }); }); diff --git a/packages/core/src/utils/filesearch/fileSearch.ts b/packages/core/src/utils/filesearch/fileSearch.ts index cd3297e6767..a25fdadcbb8 100644 --- a/packages/core/src/utils/filesearch/fileSearch.ts +++ b/packages/core/src/utils/filesearch/fileSearch.ts @@ -11,7 +11,7 @@ import { loadIgnoreRules } from './ignore.js'; import { ResultCache } from './result-cache.js'; import { crawl } from './crawler.js'; import type { FzfResultItem } from 'fzf'; -import { AsyncFzf } from 'fzf'; +import { FzfWorkerHandle } from './fzfWorkerHandle.js'; import { unescapePath } from '../paths.js'; /** @@ -98,13 +98,19 @@ export interface SearchOptions { export interface FileSearch { initialize(): Promise; search(pattern: string, options?: SearchOptions): Promise; + /** + * Release any worker / native resources held by this search instance. + * Optional because the directory-only path holds no such resources. + * Implementations must be safe to call multiple times. + */ + dispose?(): Promise; } class RecursiveFileSearch implements FileSearch { private ignore: Ignore | undefined; private resultCache: ResultCache | undefined; private allFiles: string[] = []; - private fzf: AsyncFzf | undefined; + private fzf: FzfWorkerHandle | undefined; constructor(private readonly options: FileSearchOptions) {} @@ -120,7 +126,13 @@ class RecursiveFileSearch implements FileSearch { maxDepth: this.options.maxDepth, maxFiles: MAX_CRAWL_FILES, }); - this.buildResultCache(); + await this.buildResultCache(); + } + + async dispose(): Promise { + const handle = this.fzf; + this.fzf = undefined; + await handle?.dispose(); } async search( @@ -151,8 +163,13 @@ class RecursiveFileSearch implements FileSearch { if (pattern.includes('*') || !this.fzf) { filteredCandidates = await filter(candidates, pattern, options.signal); } else { + // Pass a generous limit to the worker so results are trimmed before + // IPC serialization — avoids sending 50k+ entries across postMessage + // when only ~72 are displayed. The 200 cap leaves headroom for + // downstream ignore-filter to drop entries without starving results. + const fzfLimit = Math.max(200, (options.maxResults ?? 200) * 3); filteredCandidates = await this.fzf - .find(pattern) + .find(pattern, fzfLimit) .then((results: Array>) => results.map((entry: FzfResultItem) => entry.item), ) @@ -190,14 +207,21 @@ class RecursiveFileSearch implements FileSearch { return results; } - private buildResultCache(): void { + private async buildResultCache(): Promise { this.resultCache = new ResultCache(this.allFiles); // Initialize fuzzy search if enabled (or undefined, default true). if (this.options.enableFuzzySearch !== false) { // The v1 algorithm is much faster since it only looks at the first // occurence of the pattern. We use it for search spaces that have >20k // files, because the v2 algorithm is just too slow in those cases. - this.fzf = new AsyncFzf(this.allFiles, { + // + // Construction is the actual main-thread freeze on large workspaces + // (the AsyncFzf constructor is misleadingly named — it runs sync + // during `new`). FzfWorkerHandle hosts the instance in a + // worker_threads worker once the file count crosses ~5k; below that + // it stays in-thread because worker spawn + IPC overhead exceeds the + // construction cost. + this.fzf = await FzfWorkerHandle.create(this.allFiles, { fuzzy: this.allFiles.length > 20000 ? 'v1' : 'v2', }); } diff --git a/packages/core/src/utils/filesearch/fzfWorker.ts b/packages/core/src/utils/filesearch/fzfWorker.ts new file mode 100644 index 00000000000..39555c881a5 --- /dev/null +++ b/packages/core/src/utils/filesearch/fzfWorker.ts @@ -0,0 +1,109 @@ +/** + * @license + * Copyright 2025 Qwen team + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * Worker entry that owns an AsyncFzf instance. + * + * `new AsyncFzf(allFiles, ...)` is misleadingly named — its constructor is + * synchronous and dominates the main-thread cost on large workspaces (>20k + * files), freezing the Ink render loop while the @-picker initializes. By + * hosting the instance in a worker thread the construction work happens off + * the main thread; only completed find() results cross the message channel. + * + * Protocol (one round per message; no streaming or backpressure): + * + * main → worker: { type: 'init', files, options } (once) + * worker → main: { type: 'ready' } (after init OK) + * worker → main: { type: 'init-error', message } (init threw) + * main → worker: { type: 'find', reqId, pattern } + * worker → main: { type: 'result', reqId, items } + * worker → main: { type: 'find-error', reqId, message } + * main → worker: { type: 'dispose' } (closes port) + */ + +import { parentPort } from 'node:worker_threads'; +import { AsyncFzf, type FzfResultItem } from 'fzf'; + +interface InitMessage { + type: 'init'; + files: string[]; + options: { fuzzy: 'v1' | 'v2' | false }; +} + +interface FindMessage { + type: 'find'; + reqId: number; + pattern: string; + limit?: number; +} + +interface DisposeMessage { + type: 'dispose'; +} + +type IncomingMessage = InitMessage | FindMessage | DisposeMessage; + +if (!parentPort) { + throw new Error('fzfWorker.ts must be loaded as a worker_threads worker'); +} + +const port = parentPort; + +let fzf: AsyncFzf | null = null; + +port.on('message', (msg: IncomingMessage) => { + if (!msg || typeof msg !== 'object') return; + + if (msg.type === 'init') { + try { + fzf = new AsyncFzf(msg.files, msg.options); + port.postMessage({ type: 'ready' }); + } catch (err) { + port.postMessage({ + type: 'init-error', + message: err instanceof Error ? err.message : String(err), + }); + } + return; + } + + if (msg.type === 'find') { + if (!fzf) { + port.postMessage({ + type: 'find-error', + reqId: msg.reqId, + message: 'fzf not initialized', + }); + return; + } + fzf + .find(msg.pattern) + .then((items: Array>) => { + // Strip the heavy `positions` Set from each item before sending — + // structuredClone serialises Sets but the @-picker only needs the + // ranked `item` strings. Keeps IPC payloads small on big result sets. + const limit = msg.limit ?? items.length; + const trimmed = items + .slice(0, limit) + .map((entry) => ({ item: entry.item })); + port.postMessage({ type: 'result', reqId: msg.reqId, items: trimmed }); + }) + .catch((err: unknown) => { + port.postMessage({ + type: 'find-error', + reqId: msg.reqId, + message: err instanceof Error ? err.message : String(err), + }); + }); + return; + } + + if (msg.type === 'dispose') { + fzf = null; + port.close(); + return; + } +}); diff --git a/packages/core/src/utils/filesearch/fzfWorkerHandle.test.ts b/packages/core/src/utils/filesearch/fzfWorkerHandle.test.ts new file mode 100644 index 00000000000..ba5e0e75f8f --- /dev/null +++ b/packages/core/src/utils/filesearch/fzfWorkerHandle.test.ts @@ -0,0 +1,280 @@ +/** + * @license + * Copyright 2025 Qwen team + * SPDX-License-Identifier: Apache-2.0 + */ + +import fs from 'node:fs'; +import path from 'node:path'; +import { afterAll, afterEach, beforeAll, describe, expect, it } from 'vitest'; +import { + __resetWorkerScriptResolutionForTests, + __setWorkerThresholdForTests, + FzfWorkerHandle, + installInProcessFzfTransport, +} from './fzfWorkerHandle.js'; + +describe('FzfWorkerHandle', () => { + const restorers: Array<() => void> = []; + + afterEach(async () => { + while (restorers.length > 0) { + restorers.pop()!(); + } + }); + + describe('in-process fallback (small inputs)', () => { + it('returns ranked find() results matching AsyncFzf semantics', async () => { + const files = [ + 'src/utils/filesearch/fileSearch.ts', + 'src/utils/filesearch/fzfWorker.ts', + 'src/utils/filesearch/fzfWorkerHandle.ts', + 'src/utils/paths.ts', + ]; + const handle = await FzfWorkerHandle.create(files, { fuzzy: 'v2' }); + try { + const results = await handle.find('handle'); + expect(results.length).toBeGreaterThan(0); + expect(results[0].item).toBe('src/utils/filesearch/fzfWorkerHandle.ts'); + } finally { + await handle.dispose(); + } + }); + + it('dispose() is idempotent', async () => { + const handle = await FzfWorkerHandle.create(['a.ts', 'b.ts'], { + fuzzy: 'v2', + }); + await handle.dispose(); + await expect(handle.dispose()).resolves.toBeUndefined(); + }); + + it('returns empty array when no candidates match', async () => { + const handle = await FzfWorkerHandle.create(['a.ts', 'b.ts', 'c.ts'], { + fuzzy: 'v2', + }); + const results = await handle.find('xxxxxxxx-no-match'); + expect(results).toEqual([]); + await handle.dispose(); + }); + + it('respects the limit parameter', async () => { + const files = Array.from({ length: 100 }, (_, i) => `file${i}.ts`); + const handle = await FzfWorkerHandle.create(files, { fuzzy: 'v2' }); + const results = await handle.find('file', 5); + expect(results.length).toBeLessThanOrEqual(5); + await handle.dispose(); + }); + }); + + describe('installInProcessFzfTransport()', () => { + it('forces the in-thread path even when file count exceeds the worker threshold', async () => { + // Lower threshold so a small input would normally trip the worker path. + restorers.push(__setWorkerThresholdForTests(1)); + restorers.push(installInProcessFzfTransport()); + + // If the override leaked we'd be spawning a real worker_threads worker + // here. Confirm the call returns synchronously enough to be a no-op + // wrapper around AsyncFzf — no spawn, no postMessage round-trip. + const before = Date.now(); + const handle = await FzfWorkerHandle.create(['x.ts', 'y.ts'], { + fuzzy: 'v2', + }); + const setupMs = Date.now() - before; + // Worker spawn is at least ~10 ms even on a fast machine. The in-thread + // path is tens of microseconds. Generous bound to avoid CI flake. + expect(setupMs).toBeLessThan(50); + + const results = await handle.find('y'); + expect(results.map((r) => r.item)).toContain('y.ts'); + await handle.dispose(); + }); + + it('restorer reverts the override', async () => { + const restore = installInProcessFzfTransport(); + restore(); + // After restoring, threshold-based selection is back. With a tiny + // input we still expect the in-thread path (below default threshold), + // so this just verifies create() still works without a leaked override. + const handle = await FzfWorkerHandle.create(['z.ts'], { fuzzy: 'v2' }); + const results = await handle.find('z'); + expect(results.map((r) => r.item)).toContain('z.ts'); + await handle.dispose(); + }); + }); + + describe('worker transport (real worker_threads)', () => { + let workerExists = false; + const outfile = path.resolve(import.meta.dirname, 'fzfWorker.js'); + + beforeAll(async () => { + // Build the worker script on-the-fly so the test exercises the real + // worker_threads code path. Uses esbuild to produce a self-contained + // .js next to the source file (matching getWorkerScriptPath() resolution). + const { build } = await import('esbuild'); + const workerSrc = path.resolve(import.meta.dirname, 'fzfWorker.ts'); + + if (!fs.existsSync(outfile)) { + await build({ + entryPoints: [workerSrc], + outfile, + bundle: true, + platform: 'node', + format: 'esm', + target: 'node20', + banner: { + js: "import{createRequire}from'module';const require=createRequire(import.meta.url);", + }, + }); + } + workerExists = fs.existsSync(outfile); + }); + + afterAll(() => { + // Clean up the built worker to avoid polluting the source tree. + try { + fs.unlinkSync(outfile); + } catch { + /* already gone */ + } + }); + + afterEach(() => { + __resetWorkerScriptResolutionForTests(); + }); + + it('spawn → init → find → dispose lifecycle', async () => { + if (!workerExists) return; + __resetWorkerScriptResolutionForTests(); + restorers.push(__setWorkerThresholdForTests(1)); + + const files = [ + 'src/main.ts', + 'src/utils/helper.ts', + 'src/utils/worker.ts', + 'README.md', + ]; + const handle = await FzfWorkerHandle.create(files, { fuzzy: 'v2' }); + const results = await handle.find('worker'); + expect(results.length).toBeGreaterThan(0); + expect(results[0].item).toBe('src/utils/worker.ts'); + await handle.dispose(); + }); + + it('find() rejects after dispose()', async () => { + if (!workerExists) return; + __resetWorkerScriptResolutionForTests(); + restorers.push(__setWorkerThresholdForTests(1)); + + const handle = await FzfWorkerHandle.create(['a.ts', 'b.ts'], { + fuzzy: 'v2', + }); + await handle.dispose(); + await expect(handle.find('a')).rejects.toThrow(); + }); + + it('respects the limit parameter across IPC', async () => { + if (!workerExists) return; + __resetWorkerScriptResolutionForTests(); + restorers.push(__setWorkerThresholdForTests(1)); + + const files = Array.from({ length: 200 }, (_, i) => `module${i}.ts`); + const handle = await FzfWorkerHandle.create(files, { fuzzy: 'v2' }); + const results = await handle.find('module', 10); + expect(results.length).toBeLessThanOrEqual(10); + expect(results.length).toBeGreaterThan(0); + await handle.dispose(); + }); + + it('create() rejects and disposes on init failure', async () => { + if (!workerExists) return; + __resetWorkerScriptResolutionForTests(); + restorers.push(__setWorkerThresholdForTests(1)); + + // Pass invalid options to trigger an init error in the worker. + // AsyncFzf constructor throws on non-array input. + await expect( + FzfWorkerHandle.create(null as unknown as string[], { + fuzzy: 'v2', + }), + ).rejects.toThrow(); + }); + + it('rejects pending find() when worker crashes unexpectedly', async () => { + if (!workerExists) return; + + // Overwrite fzfWorker.js with a script that crashes on 'find'. + const goodWorker = fs.readFileSync(outfile, 'utf8'); + fs.writeFileSync( + outfile, + `import { parentPort } from 'node:worker_threads'; + parentPort.on('message', (msg) => { + if (msg.type === 'init') { parentPort.postMessage({ type: 'ready' }); return; } + if (msg.type === 'find') { process.exit(1); } + });`, + ); + __resetWorkerScriptResolutionForTests(); + restorers.push(__setWorkerThresholdForTests(1)); + + try { + const handle = await FzfWorkerHandle.create(['a.ts'], { fuzzy: 'v2' }); + await expect(handle.find('a')).rejects.toThrow(/exited unexpectedly/); + } finally { + // Restore the real worker for subsequent tests. + fs.writeFileSync(outfile, goodWorker); + __resetWorkerScriptResolutionForTests(); + } + }); + + it('rejects subsequent find() calls after worker crash', async () => { + if (!workerExists) return; + + const goodWorker = fs.readFileSync(outfile, 'utf8'); + fs.writeFileSync( + outfile, + `import { parentPort } from 'node:worker_threads'; + parentPort.on('message', (msg) => { + if (msg.type === 'init') { parentPort.postMessage({ type: 'ready' }); return; } + if (msg.type === 'find') { process.exit(1); } + });`, + ); + __resetWorkerScriptResolutionForTests(); + restorers.push(__setWorkerThresholdForTests(1)); + + try { + const handle = await FzfWorkerHandle.create(['a.ts'], { fuzzy: 'v2' }); + // First find triggers the crash + await expect(handle.find('a')).rejects.toThrow(/exited unexpectedly/); + // Subsequent find should reject immediately with 'failed' state + await expect(handle.find('b')).rejects.toThrow(/not available.*failed/); + } finally { + fs.writeFileSync(outfile, goodWorker); + __resetWorkerScriptResolutionForTests(); + } + }); + + it('handles concurrent find() calls', async () => { + if (!workerExists) return; + __resetWorkerScriptResolutionForTests(); + restorers.push(__setWorkerThresholdForTests(1)); + + const files = [ + 'alpha.ts', + 'beta.ts', + 'gamma.ts', + 'delta.ts', + 'epsilon.ts', + ]; + const handle = await FzfWorkerHandle.create(files, { fuzzy: 'v2' }); + const [r1, r2, r3] = await Promise.all([ + handle.find('alpha'), + handle.find('beta'), + handle.find('gamma'), + ]); + expect(r1[0].item).toBe('alpha.ts'); + expect(r2[0].item).toBe('beta.ts'); + expect(r3[0].item).toBe('gamma.ts'); + await handle.dispose(); + }); + }); +}); diff --git a/packages/core/src/utils/filesearch/fzfWorkerHandle.ts b/packages/core/src/utils/filesearch/fzfWorkerHandle.ts new file mode 100644 index 00000000000..30227868355 --- /dev/null +++ b/packages/core/src/utils/filesearch/fzfWorkerHandle.ts @@ -0,0 +1,355 @@ +/** + * @license + * Copyright 2025 Qwen team + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * Main-thread proxy for an AsyncFzf instance hosted in a worker_threads worker. + * + * Same interface as `AsyncFzf` for the subset RecursiveFileSearch + * uses (`find()` only) so the call site in `fileSearch.ts` is a one-line + * swap. The constructor cost — which is the actual main-thread freeze on + * large workspaces — moves into the worker. + * + * Below ~5k files the constructor cost is dominated by worker spawn + IPC + * overhead, so we keep an in-thread fallback that just instantiates AsyncFzf + * directly. Tests pin the in-thread path explicitly via + * `installInProcessFzfTransport()` so they don't pay worker spawn cost or + * have to ship `dist/fzfWorker.js` to test fixtures. + */ + +import fs from 'node:fs'; +import path from 'node:path'; +import { Worker } from 'node:worker_threads'; +import { AsyncFzf, type FzfResultItem } from 'fzf'; +import { resolveBundleDir } from '../bundlePaths.js'; + +export type FzfFuzzyMode = 'v1' | 'v2' | false; + +export interface FzfWorkerOptions { + fuzzy: FzfFuzzyMode; +} + +/** + * Tunable: file count at which we cut over from the in-thread fallback to + * the worker. Below this AsyncFzf constructor finishes before worker spawn + * + initial postMessage IPC would. The 5_000 floor was picked from the + * fzf-bench numbers in the parent PR — at ~5k files AsyncFzf takes ~30 ms + * on macOS Node 22, which is the same order as the worker_threads spawn + * latency on Linux/macOS (Windows spawn is a bit slower, but still in the + * same 30–80 ms band, and the breakeven file count there isn't very + * different). + */ +const DEFAULT_WORKER_THRESHOLD = 5_000; + +let workerThresholdOverride: number | null = null; + +/** For tests: force the worker path even on tiny inputs. Returns a restorer. */ +export function __setWorkerThresholdForTests(n: number): () => void { + const prev = workerThresholdOverride; + workerThresholdOverride = n; + return () => { + workerThresholdOverride = prev; + }; +} + +interface Transport { + spawn(files: string[], options: FzfWorkerOptions): TransportInstance; +} + +interface TransportInstance { + ready(): Promise; + find(pattern: string, limit?: number): Promise>>; + dispose(): Promise; +} + +const inProcessTransport: Transport = { + spawn(files, options) { + // AsyncFzf constructor is synchronous. We capture any throw eagerly so + // ready() / find() observe it identically to the worker-error path. + let constructError: Error | null = null; + let fzf: AsyncFzf | null = null; + try { + fzf = new AsyncFzf(files, options); + } catch (err) { + constructError = err instanceof Error ? err : new Error(String(err)); + } + + return { + async ready() { + if (constructError) throw constructError; + }, + async find(pattern, limit?) { + if (constructError) throw constructError; + if (!fzf) throw new Error('fzf not initialized'); + const results = await fzf.find(pattern); + return limit != null ? results.slice(0, limit) : results; + }, + async dispose() { + fzf = null; + }, + }; + }, +}; + +/** + * Locate `fzfWorker.js` on disk. Three layouts to handle: + * - Bundled CLI: handle hoisted into `dist/chunks/.js`; worker at + * `dist/fzfWorker.js`. `resolveBundleDir()` strips the + * chunk segment for us. + * - tsc output: consumed as a library; handle at + * `/dist/utils/filesearch/fzfWorkerHandle.js`, + * worker sits next to it. + * - tsx / source: running TS files directly (`npm run dev`). The + * sibling `.js` doesn't exist on disk, so the worker + * transport is unavailable and we fall back to the + * in-process path. Devs hitting the @-picker on huge + * workspaces will see the same brief freeze the bundled + * build avoids — acceptable for a `dev` script. + * + * Cached after first lookup so we don't `existsSync` on every handle + * creation. + */ +let resolvedWorkerPath: string | null | undefined; +function getWorkerScriptPath(): string | null { + if (resolvedWorkerPath !== undefined) return resolvedWorkerPath; + const candidate = path.join( + resolveBundleDir(import.meta.url), + 'fzfWorker.js', + ); + resolvedWorkerPath = fs.existsSync(candidate) ? candidate : null; + return resolvedWorkerPath; +} + +/** For tests: clear the cached worker-path lookup. */ +export function __resetWorkerScriptResolutionForTests(): void { + resolvedWorkerPath = undefined; +} + +const workerTransport: Transport = { + spawn(files, options) { + const workerPath = getWorkerScriptPath(); + if (!workerPath) { + // Should never reach this — `FzfWorkerHandle.create()` checks + // availability before picking the worker transport. Keep the throw + // so a future caller bypassing that gate fails loudly instead of + // hanging waiting for a non-existent worker to send `ready`. + throw new Error( + 'fzf worker transport requested but fzfWorker.js was not found', + ); + } + const worker = new Worker(workerPath); + // Don't pin the Node main loop on this worker — the parent CLI process + // should be free to exit even if the worker is mid-find. Ref'd workers + // would otherwise block process.exit() until terminate() resolves. + worker.unref(); + + let nextReqId = 1; + const pending = new Map< + number, + { + resolve: (items: Array>) => void; + reject: (err: Error) => void; + } + >(); + + let readyResolve: () => void = () => {}; + let readyReject: (err: Error) => void = () => {}; + const readyPromise = new Promise((res, rej) => { + readyResolve = res; + readyReject = rej; + }); + let readyState: 'pending' | 'ready' | 'failed' | 'disposed' = 'pending'; + + worker.on( + 'message', + ( + msg: + | { type: 'ready' } + | { type: 'init-error'; message: string } + | { + type: 'result'; + reqId: number; + items: Array<{ item: string }>; + } + | { type: 'find-error'; reqId: number; message: string }, + ) => { + if (msg.type === 'ready') { + readyState = 'ready'; + readyResolve(); + return; + } + if (msg.type === 'init-error') { + readyState = 'failed'; + readyReject(new Error(`fzf worker init failed: ${msg.message}`)); + return; + } + if (msg.type === 'result') { + const slot = pending.get(msg.reqId); + if (slot) { + pending.delete(msg.reqId); + // Worker stripped `positions` to keep IPC small. Reconstruct the + // FzfResultItem shape with an empty positions Set so callers that + // type the return as Array> stay happy — + // RecursiveFileSearch.search only reads `entry.item`. + slot.resolve( + msg.items.map( + (entry) => + ({ + item: entry.item, + positions: new Set(), + start: 0, + end: 0, + score: 0, + }) as FzfResultItem, + ), + ); + } + return; + } + if (msg.type === 'find-error') { + const slot = pending.get(msg.reqId); + if (slot) { + pending.delete(msg.reqId); + slot.reject(new Error(msg.message)); + } + return; + } + }, + ); + + const failAll = (err: Error) => { + if (readyState === 'pending') { + readyReject(err); + } + if (readyState !== 'disposed') { + readyState = 'failed'; + } + for (const slot of pending.values()) { + slot.reject(err); + } + pending.clear(); + }; + + worker.on('error', (err) => { + failAll(err instanceof Error ? err : new Error(String(err))); + }); + + worker.on('exit', (code) => { + if (readyState === 'disposed') return; // expected exit + failAll(new Error(`fzf worker exited unexpectedly (code=${code})`)); + }); + + // Kick off init. The first `find()` will await readyPromise. + worker.postMessage({ type: 'init', files, options }); + + return { + ready() { + return readyPromise; + }, + async find(pattern, limit?) { + if (readyState === 'disposed' || readyState === 'failed') { + throw new Error(`fzf worker not available (${readyState})`); + } + await readyPromise; + const reqId = nextReqId++; + return new Promise>>((resolve, reject) => { + if (readyState === 'disposed' || readyState === 'failed') { + reject(new Error(`fzf worker not available (${readyState})`)); + return; + } + pending.set(reqId, { resolve, reject }); + try { + worker.postMessage({ type: 'find', reqId, pattern, limit }); + } catch (err) { + pending.delete(reqId); + reject(err instanceof Error ? err : new Error(String(err))); + } + }); + }, + async dispose() { + readyState = 'disposed'; + try { + worker.postMessage({ type: 'dispose' }); + } catch { + // Worker may already be gone; terminate() below covers it. + } + await worker.terminate(); + for (const slot of pending.values()) { + slot.reject(new Error('fzf worker disposed')); + } + pending.clear(); + }, + }; + }, +}; + +let transportOverride: Transport | null = null; + +/** + * Test/sandbox helper: route all FzfWorkerHandle.create() calls through the + * in-thread fallback regardless of file count. Returns a restorer function + * the caller MUST run in afterAll/afterEach to avoid leaking the override + * into other test files (the very pitfall the parent PR's test-setup.ts + * tripped on — see wenshao 04-21 review on PR #3455). + */ +export function installInProcessFzfTransport(): () => void { + const prev = transportOverride; + transportOverride = inProcessTransport; + return () => { + transportOverride = prev; + }; +} + +export class FzfWorkerHandle { + private constructor(private readonly inst: TransportInstance) {} + + static async create( + files: string[], + options: FzfWorkerOptions, + ): Promise { + const threshold = workerThresholdOverride ?? DEFAULT_WORKER_THRESHOLD; + let transport: Transport; + if (transportOverride) { + transport = transportOverride; + } else if (files.length >= threshold && getWorkerScriptPath() !== null) { + transport = workerTransport; + } else { + transport = inProcessTransport; + } + const inst = transport.spawn(files, options); + const timeoutMs = Math.max(10_000, files.length / 10); + let timerId: ReturnType | undefined; + try { + await Promise.race([ + inst.ready(), + new Promise((_, rej) => { + timerId = setTimeout( + () => + rej( + new Error( + `fzf worker init timed out after ${timeoutMs}ms (${files.length} files)`, + ), + ), + timeoutMs, + ); + }), + ]); + } catch (err) { + await inst.dispose(); + throw err; + } finally { + if (timerId !== undefined) clearTimeout(timerId); + } + return new FzfWorkerHandle(inst); + } + + find(pattern: string, limit?: number): Promise>> { + return this.inst.find(pattern, limit); + } + + dispose(): Promise { + return this.inst.dispose(); + } +} diff --git a/packages/vscode-ide-companion/src/webview/handlers/FileMessageHandler.ts b/packages/vscode-ide-companion/src/webview/handlers/FileMessageHandler.ts index eaf527a1479..cca6b19eb58 100644 --- a/packages/vscode-ide-companion/src/webview/handlers/FileMessageHandler.ts +++ b/packages/vscode-ide-companion/src/webview/handlers/FileMessageHandler.ts @@ -104,8 +104,17 @@ export class FileMessageHandler extends BaseMessageHandler { } private clearFileSearchCache(rootPath: string): void { + // Drop the instance from the Map first so any concurrent reader sees a + // miss, then dispose() the prior holder so its worker_threads worker + // (if RecursiveFileSearch promoted past the in-thread threshold) + // doesn't accumulate inside the long-running extension host. Fire-and- + // forget — dispose is best-effort. + const previous = this.fileSearchInstances.get(rootPath); this.fileSearchInstances.delete(rootPath); this.fileSearchInitializing.delete(rootPath); + void previous?.dispose?.().catch(() => { + // Already gone or never had a worker; nothing actionable here. + }); crawlCache.clear(); console.log( '[FileMessageHandler] Cleared file search cache, trigger:', @@ -171,6 +180,11 @@ export class FileMessageHandler extends BaseMessageHandler { } this.fileWatchers.clear(); foldersChangeListener.dispose(); + for (const instance of this.fileSearchInstances.values()) { + void instance.dispose?.().catch(() => {}); + } + this.fileSearchInstances.clear(); + this.fileSearchInitializing.clear(); }, }; } diff --git a/scripts/prepare-package.js b/scripts/prepare-package.js index d4e5d711abe..0a05bebcf0a 100644 --- a/scripts/prepare-package.js +++ b/scripts/prepare-package.js @@ -140,6 +140,11 @@ function writeDistPackageJson(rootDir, distDir) { }, files: [ 'cli.js', + // Worker thread entry loaded by FzfWorkerHandle at runtime via + // `resolveBundleDir(import.meta.url)` + `path.join(dir, 'fzfWorker.js')`. + // Must ship in the tarball or the @-picker silently falls back to the + // in-thread AsyncFzf path on big workspaces in npm-installed CLIs. + 'fzfWorker.js', 'chunks', 'vendor', '*.sb',