From e05e614afa04d84691137894389093397c54ecb3 Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Mon, 27 Apr 2026 17:30:11 +0800 Subject: [PATCH 01/10] feat(memory): add autoSkill background project skill extraction --- docs/design/skill-nudge/skill-nudge.md | 460 ++++++++++++++++++ packages/cli/src/config/config.ts | 10 +- packages/cli/src/config/settingsSchema.ts | 10 + packages/cli/src/nonInteractiveCli.ts | 13 + packages/cli/src/ui/hooks/useGeminiStream.ts | 4 + .../core/src/agents/runtime/agent-core.ts | 8 +- packages/core/src/config/config.ts | 12 + packages/core/src/core/client.ts | 46 +- packages/core/src/memory/manager.test.ts | 176 +++++++ packages/core/src/memory/manager.ts | 175 ++++++- .../src/memory/skillReviewAgentPlanner.ts | 240 +++++++++ .../skillReviewNudge.integration.test.ts | 436 +++++++++++++++++ packages/core/src/skills/skill-paths.test.ts | 48 ++ packages/core/src/skills/skill-paths.ts | 42 ++ packages/core/src/tools/agent/agent.ts | 9 +- packages/core/src/tools/skill-manage.test.ts | 209 ++++++++ packages/core/src/tools/skill-manage.ts | 261 ++++++++++ packages/core/src/tools/tool-names.ts | 2 + .../schemas/settings.schema.json | 5 + 19 files changed, 2154 insertions(+), 12 deletions(-) create mode 100644 docs/design/skill-nudge/skill-nudge.md create mode 100644 packages/core/src/memory/skillReviewAgentPlanner.ts create mode 100644 packages/core/src/memory/skillReviewNudge.integration.test.ts create mode 100644 packages/core/src/skills/skill-paths.test.ts create mode 100644 packages/core/src/skills/skill-paths.ts create mode 100644 packages/core/src/tools/skill-manage.test.ts create mode 100644 packages/core/src/tools/skill-manage.ts diff --git a/docs/design/skill-nudge/skill-nudge.md b/docs/design/skill-nudge/skill-nudge.md new file mode 100644 index 00000000000..9e89bca6c9f --- /dev/null +++ b/docs/design/skill-nudge/skill-nudge.md @@ -0,0 +1,460 @@ +# Skill Nudge:自动技能提炼系统设计文档 + +## 概述 + +本文档描述在 QwenCode 现有 Memory-Dream 架构基础上,增加 **Skill Nudge** 能力的设计方案。 + +Skill Nudge 是一种**程序性记忆自动提炼机制**:当 agent 完成了一个工具调用密集型任务后,系统在后台悄悄评估本次对话中是否存在值得复用的操作流程,并将其自动保存为项目级 skill。 + +### 与 Memory Extract 的定位差异 + +| 维度 | Memory Extract | Skill Nudge | +| ------------ | -------------------------------- | ------------------------------ | +| **记忆类型** | 陈述性记忆(用户是谁、项目背景) | 程序性记忆(如何做某类任务) | +| **触发时机** | 每次会话结束后 | 会话内工具调用达到阈值 | +| **写入目标** | `${projectRoot}/.qwen/memory/` | `${projectRoot}/.qwen/skills/` | +| **内容性质** | 用户偏好、项目上下文、反馈规则 | 可复用的操作步骤、最佳实践 | +| **生命周期** | Dream 定期整合/修剪 | 按需更新,由 agent 主动维护 | + +--- + +## 核心设计原则 + +1. **与 Extract 共享触发时机**:Skill Nudge 和 Memory Extract 都在会话结束后的同一个调度点触发,通过合并机制避免创建多个 forked agent。 +2. **工具调用密度计数**:仅当本次会话内工具调用累计 ≥ 20 次才触发,确保只在真正复杂的任务后提炼。 +3. **写保护边界明确**:`skill_manage` 只能操作当前项目的 project-level skill(`${projectRoot}/.qwen/skills/`),不能触碰 user / extension / bundled 层。 +4. **最大保留 Hermes 核心 prompt**:review agent 使用的提示语直接移植自 Hermes `_SKILL_REVIEW_PROMPT`,只做最小化适配。 + +--- + +## 架构变更 + +### 1. 计数器:`toolCallCount` + +在会话状态中增加一个工具调用计数器,由 agent 主循环维护。 + +``` +会话启动 + toolCallCount = 0 + +每次工具调用完成 + toolCallCount += 1 + if (任意工具调用了 skill_manage): + toolCallCount = 0 // 重置:已主动操作,无需 nudge + +会话结束 + if (toolCallCount >= AUTO_SKILL_THRESHOLD): // 默认 20 + _shouldReviewSkills = true + toolCallCount = 0 +``` + +**计数器重置条件**:会话期间只要有任意一次工具调用的函数名为 `skill_manage`,立即重置计数器。这防止模型刚主动创建/修改 skill 后又被 nudge 重复写入。 + +> **为何用工具调用次数而非对话轮次?** +> 工具调用次数反映任务复杂度——一个用户消息可能触发 1 次或 30 次工具调用。高工具密度意味着试错、调整策略等行为更多,产生可复用经验的概率也更高。阈值 20 比 Hermes 的 10 更保守,原因是 QwenCode 工具调用粒度通常更细(如逐行 edit)。 + +### 2. 调度点:与 Extract 合并 + +现有的 `MemoryManager.scheduleExtract()` 调用点(会话结束)作为统一调度入口,扩展为可同时调度 skill review。 + +``` +会话结束 + ├─ scheduleExtract(params) // 现有逻辑不变 + └─ scheduleSkillReview(params) // 新增 + 条件:_shouldReviewSkills === true + +scheduleSkillReview 内部: + 检查是否有 extract 任务正在 pending/running + ├─ 有 → 将 skill review 合并进同一个 forked agent(combined prompt) + └─ 无 → 单独 fork skill review agent +``` + +**合并机制**:参考 Hermes 的 `_COMBINED_REVIEW_PROMPT`,当 extract 和 skill review 同时触发时,向同一个 forked agent 注入合并 prompt,一次 API 调用完成两件事,避免资源浪费。 + +### 3. `skill_manage` 在不同 agent 上下文中的可用性 + +系统中存在两类 forked agent,`skill_manage` 的可用性应明确区分: + +| Agent 类型 | 触发来源 | `skill_manage` | 原因 | +| --------------------------------------- | ---------------------------- | ------------------------------- | ---------------------------------- | +| **Skill Review Agent**(本机制引入) | sessionEnd nudge(系统触发) | ✅ 可用,仅限 create/edit/patch | 专门用于写 skill,这是它的唯一职责 | +| **Task-execution subagent**(现有机制) | 主 agent 在任务中 fork | ❌ 禁用 | 见下文 | + +**Task-execution subagent 禁用 `skill_manage` 的原因**: + +1. **用户意图未传递**:用户从未直接指令 subagent,由主 agent 决定 fork。Subagent 自行写 skill 超出了用户授权范围。 +2. **缺乏全局视角**:Subagent 只看到子任务上下文,不知道主 agent 已经或即将触发 skill review,对"值得保存"缺乏判断依据。 +3. **计数器干扰**:Subagent 调用 `skill_manage` 会触发主 agent 的计数器重置逻辑(`skill_manage` 被调用 → `toolCallCount = 0`),导致 sessionEnd 的 nudge 被错误跳过。 +4. **噪声风险**:一次任务可能 fork 多个 subagent,若每个都能写 skill,`.qwen/skills/` 会被碎片化的局部经验填满。 + +**实现方式**:在 `fork-subagent` 的工具集继承逻辑中,显式将 `skill_manage` 加入 task-execution subagent 的排除列表。Skill review agent 作为专用 forked agent 独立配置,不走通用继承路径。 + +### 4. 权限沙箱:`SkillScopedPermissionManager` + +参照 `extractionAgentPlanner.ts` 中的 `createMemoryScopedAgentConfig`,为 skill review agent 创建专用权限范围: + +```typescript +// 仅允许以下操作 +shell: 只读命令(Shell AST 静态分析,复用现有 isShellCommandReadOnlyAST) +edit: 仅限 ${projectRoot}/.qwen/skills/ 路径下的文件 +write_file: 仅限 ${projectRoot}/.qwen/skills/ 路径下的文件 +skill_manage: 允许所有 action(含 delete),但 delete 受调用来源约束(见下文) +``` + +**`delete` 的调用来源约束**:`skill_manage(action="delete")` 保留在工具集中,但通过 **system prompt 约束**而非 schema 限制来控制调用时机: + +- **主 agent(用户直接交互)**:可以响应用户明确的删除指令(如"删除这个 skill")调用 `delete` +- **review agent(Skill Nudge 后台触发)**:system prompt 中明确声明不得主动调用 `delete`,必须等待用户在后续对话中明确要求 + +review agent 的 system prompt 约束片段: + +``` +You are reviewing this conversation to extract reusable skills. +You may create new skills or update existing ones. +Do NOT delete any skills unless the user has explicitly requested deletion +in this conversation. Autonomous deletion is not permitted. +``` + +### 5. 新增工具:`skill_manage`(project-scope) + +在 forked skill review agent 的工具集中注册一个 project-scoped 版本的 `skill_manage`: + +```typescript +// 工具 schema(注册到 skill review agent 的工具集) +{ + name: "skill_manage", + description: "Create or update a project-level skill in .qwen/skills/. " + + "Use this to save reusable procedures, workflows, or approaches discovered in this session. " + + "Skills are stored in the current project only.", + parameters: { + action: { enum: ["create", "edit", "patch", "write_file", "delete"] }, + name: { type: "string" }, + content: { type: "string", description: "Full SKILL.md content for create/edit" }, + old_string: { type: "string", description: "For patch: text to find" }, + new_string: { type: "string", description: "For patch: replacement text" }, + category: { type: "string", description: "Optional subdirectory, e.g. 'typescript'" }, + file_path: { type: "string", description: "For write_file: relative path like 'references/api.md'" }, + file_content: { type: "string" } + }, + required: ["action", "name"] +} +``` + +**写保护实现**:handler 在执行前验证目标路径必须在 `${projectRoot}/.qwen/skills/` 内,若尝试写入其他路径一律返回错误: + +```typescript +function assertProjectSkillPath(targetPath: string, projectRoot: string): void { + const projectSkillsDir = path.join(projectRoot, '.qwen', 'skills'); + const resolved = path.resolve(targetPath); + if (!resolved.startsWith(path.resolve(projectSkillsDir) + path.sep)) { + throw new Error( + `skill_manage can only write to ${projectSkillsDir}. ` + + `Use the Skills UI to manage user or bundled skills.`, + ); + } +} +``` + +--- + +## Skill Review Agent 设计 + +### 触发 prompt(移植自 Hermes,最小化适配) + +**仅 skill review(独立触发)**: + +``` +Review the conversation above and consider saving or updating a skill if appropriate. + +Focus on: was a non-trivial approach used to complete a task that required trial +and error, or changing course due to experiential findings along the way, or did +the user expect or desire a different method or outcome? If a relevant skill +already exists, update it with what you learned. Otherwise, create a new skill +if the approach is reusable. + +If nothing is worth saving, just say 'Nothing to save.' and stop. + +Skills are saved to the current project (.qwen/skills/). Use skill_manage to +create or update skills. Each skill requires a SKILL.md with YAML frontmatter: + +--- +name: +description: +--- + + +``` + +**合并 prompt(extract + skill review 同时触发)**: + +``` +Review the conversation above and consider two things: + +**Memory**: Has the user revealed things about themselves — their persona, +desires, preferences, or personal details? Has the user expressed expectations +about how you should behave, their work style, or ways they want you to operate? +If so, save using the available memory write tools. + +**Skills**: Was a non-trivial approach used to complete a task that required +trial and error, or changing course due to experiential findings along the way, +or did the user expect or desire a different method or outcome? If a relevant +skill already exists, update it with what you learned. Otherwise, create a new +skill if the approach is reusable. + +Only act if there's something genuinely worth saving. +If nothing stands out, just say 'Nothing to save.' and stop. + +Skills are saved to the current project (.qwen/skills/) via skill_manage. +Memory is saved to .qwen/memory/ via the memory write tools. +``` + +### Agent 配置 + +```typescript +{ + name: "managed-skill-extractor", + tools: [ + "read_file", // 读现有 skill 内容 + "list_directory", // 扫描 .qwen/skills/ 目录 + "skill_manage", // project-scoped 写入(见上文) + ], + permissionManager: createSkillScopedAgentConfig(config, projectRoot), + // 传入完整对话历史快照(同 Hermes messages_snapshot) + history: sessionHistory, +} +``` + +--- + +## 与现有 MemoryManager 的集成 + +### `ScheduleSkillReviewParams`(新增类型) + +```typescript +export interface ScheduleSkillReviewParams { + projectRoot: string; + sessionId: string; + history: Content[]; // 完整会话历史快照 + toolCallCount: number; // 本次会话的工具调用次数(用于日志/调试) + config?: Config; +} + +export interface SkillReviewScheduleResult { + status: 'scheduled' | 'skipped' | 'merged'; + taskId?: string; + mergedWithExtractTaskId?: string; // 若合并,记录 extract task id + skippedReason?: 'below_threshold' | 'skill_manage_called' | 'disabled'; +} +``` + +### `MemoryManager.scheduleSkillReview()`(新增方法) + +```typescript +scheduleSkillReview(params: ScheduleSkillReviewParams): SkillReviewScheduleResult { + // 1. 阈值检查 + if (params.toolCallCount < AUTO_SKILL_THRESHOLD) { + return { status: 'skipped', skippedReason: 'below_threshold' }; + } + + // 2. 检查是否有 extract 任务正在 pending/running,可合并 + const pendingExtract = this.findPendingExtractTask(params.projectRoot); + if (pendingExtract) { + // 标记 extract 任务需要同时做 skill review(合并模式) + this.mergeSkillReviewIntoExtract(pendingExtract.id, params); + return { + status: 'merged', + mergedWithExtractTaskId: pendingExtract.id, + }; + } + + // 3. 独立调度 + const record = makeTaskRecord('skill-review', params.projectRoot, params.sessionId); + this.tasks.set(record.id, record); + const promise = this.runSkillReview(record, params); + this.inFlight.add(promise); + promise.finally(() => this.inFlight.delete(promise)); + return { status: 'scheduled', taskId: record.id }; +} +``` + +### 任务类型扩展 + +```typescript +// 扩展现有 MemoryTaskRecord.taskType +export type MemoryTaskType = 'extract' | 'dream' | 'skill-review'; + +// 新增常量 +export const SKILL_REVIEW_TASK_TYPE = 'managed-skill-extractor' as const; +export const AUTO_SKILL_THRESHOLD = 20; // 工具调用次数阈值 +``` + +--- + +## 数据流 + +``` +会话进行中 + agent 主循环 + ├─ 每次工具调用 → toolCallCount += 1 + └─ 调用 skill_manage → toolCallCount = 0(重置) + +会话结束(sessionEnd 事件) + ├─ scheduleExtract(params) + │ └─ [现有逻辑:fork extraction agent → 写 .qwen/memory/] + │ + └─ toolCallCount >= 20 ? + ├─ 否 → skip + └─ 是 → scheduleSkillReview(params) + ├─ extract 正在 pending → 合并 prompt → 同一 forked agent + └─ extract 已运行/不存在 → 独立 fork skill review agent + ↓ + skill review agent(max 8 轮,2 min,沙箱权限) + 传入完整 sessionHistory + ↓ + 模型判断是否有可复用方法 + ├─ 有 → skill_manage(create/patch) + │ → 写入 ${projectRoot}/.qwen/skills/ + │ → SkillManager 缓存失效(notifyChangeListeners) + └─ 无 → "Nothing to save." 结束 + +下次会话 + SkillManager.listSkills({ level: 'project' }) + → 扫描 .qwen/skills/ 发现新建 skill + → 注入 system prompt 的 块(Tier 1) +``` + +--- + +## SKILL.md 格式约定(project-level) + +自动提炼的 skill 写入 `${projectRoot}/.qwen/skills//SKILL.md`,格式与现有 SkillManager 完全兼容: + +```yaml +--- +name: # 必填,小写字母 + 连字符 +description: # 必填,≤ 1024 字符 +# 以下字段可选 +version: 1.0.0 +metadata: + source: auto-extracted # 标记为自动提炼,便于区分 + extracted_at: '2026-04-24T12:00:00Z' +--- +# <技能标题> + +<操作步骤 / 最佳实践 / 注意事项> +``` + +`metadata.source: auto-extracted` 字段为可选约定,便于用户在 UI 中区分自动提炼的 skill 和手动创建的 skill,不影响 SkillManager 的正常加载逻辑。 + +--- + +## 安全考量 + +| 风险 | 缓解措施 | +| -------------------------------- | ------------------------------------------------------------------------------------------------ | +| 自动提炼覆盖用户精心编写的 skill | `create` 时检测同名 skill 已存在则改为 `patch`(追加/更新),不全量覆盖 | +| skill 无限增长 | review prompt 明确要求"优先更新已有 skill";`patch` action 优于 `create` | +| 写入项目外路径 | `assertProjectSkillPath` 路径白名单强制检查 | +| 提炼出含注入风险的内容 | 复用现有 `skills_guard` 安全扫描(同 Hermes `_security_scan_skill`) | +| review agent 自行删除 skill | review agent system prompt 明确禁止主动调用 `delete`;`delete` 仅在主 agent 中响应用户的明确指令 | +| 并发写入同一 skill | `_atomic_write_text` 原子写入(tempfile + os.rename),同 Hermes | + +--- + +## 配置项 + +在 QwenCode config 中新增以下配置项(可选,有默认值): + +```typescript +// config schema 新增(在 memory 下) +memory?: { + enableAutoSkill?: boolean; // 默认 true +} +``` + +对应 QWEN.md / `~/.qwen/config.json` 配置示例: + +```json +{ + "memory": { + "enableAutoSkill": true + } +} +``` + +--- + +## E2E 测试清单 + +功能实现完成后,按照 `.qwen/skills/e2e-testing/SKILL.md` 的流程,先执行 `npm run build && npm run bundle`,再使用本地构建产物 `node dist/cli.js` 进行端到端验证。 + +### 1. 低工具调用密度不触发 + +- 使用临时项目目录运行 headless 模式。 +- 配置 `memory.enableAutoSkill: true`、较低但仍高于本用例工具调用次数的 `threshold`。 +- 执行一个只需要少量工具调用的简单任务并正常结束会话。 +- 断言 `.qwen/skills/` 未新增自动提炼 skill;JSON 流中不应出现 `skill_manage` 调用。 + +### 2. 达到阈值后触发 skill review + +- 使用临时项目目录运行 headless 模式(`AUTO_SKILL_THRESHOLD` 硬编码为 20,可在测试夹具中调低)。 +- 发送一个需要多次工具调用并包含可复用流程的任务。 +- 断言会话结束后调度了 skill review;若模型判断值得保存,`.qwen/skills//SKILL.md` 被创建,且包含合法 YAML frontmatter。 +- 若模型判断 `Nothing to save.`,断言流程正常结束且没有权限错误。 + +### 3. `skill_manage` 调用会重置 nudge 计数 + +- 构造一次会话,在达到阈值前后显式触发 `skill_manage`(或通过测试夹具模拟该工具调用)。 +- 断言本轮 sessionEnd 不会再次自动调度重复的 skill review。 +- 断言不会因同一会话内已主动管理 skill 而重复写入 `.qwen/skills/`。 + +### 4. 写保护只允许 project-level skills + +- 通过 skill review agent 尝试写入项目外路径、user-level skill 路径或 bundled skill 路径。 +- 断言写入被拒绝,错误信息指向只能写入 `${projectRoot}/.qwen/skills/`。 +- 断言允许写入 `${projectRoot}/.qwen/skills//SKILL.md` 及其 skill 内部 reference 文件。 + +### 5. task-execution subagent 不暴露 `skill_manage` + +- 运行一个会 fork task-execution subagent 的任务。 +- 检查子 agent 的可用工具列表或 API logging 请求体。 +- 断言 task-execution subagent 中没有 `skill_manage`,主会话工具计数不会被子 agent 的 skill 写入逻辑干扰。 + +### 6. Extract 与 Skill Review 合并触发 + +- 构造同时满足 memory extract 和 skill nudge 的会话。 +- 断言只创建一个 forked review/extract agent 任务,且使用 combined prompt 同时处理 Memory 与 Skills。 +- 断言 memory 写入仍落在 `.qwen/memory/`,skill 写入仍落在 `.qwen/skills/`。 + +### 7. 配置开关生效 + +- 配置 `memory.enableAutoSkill: false`,即使工具调用次数超过阈值也不触发。 +- 验证默认开启时(`enableAutoSkill` 未配置或为 `true`),工具调用达到阈值后正常触发。 + +### 8. 本地构建产物验证 + +- 按 e2e-testing skill 使用 headless JSON 输出: + `node dist/cli.js "" --approval-mode yolo --output-format json 2>/dev/null`。 +- 必要时加 `--openai-logging --openai-logging-dir ` 检查请求体中的工具 schema、prompt 和权限配置。 +- 对涉及 TUI 或 sessionEnd 可见状态的场景,使用 tmux interactive 流程捕获最终输出。 + +## 与现有系统的关系 + +``` +现有 MemoryManager + ├─ scheduleExtract() ← 不变 + ├─ scheduleDream() ← 不变 + ├─ recall() ← 不变 + ├─ forget() ← 不变 + └─ scheduleSkillReview() ← 新增(本文档) + +现有 SkillManager + ├─ listSkills() ← 不变(自动发现 .qwen/skills/ 下新增文件) + ├─ loadSkill() ← 不变 + └─ [write 能力] ← 通过 skill_manage 工具暴露给 review agent + +触发点(现有 sessionEnd hook) + └─ 同时调用 scheduleExtract + scheduleSkillReview(条件满足时) +``` + +SkillManager 的读取侧(`listSkills`、`loadSkill`)完全不需要修改——review agent 写入 `${projectRoot}/.qwen/skills/` 后,`SkillManager` 通过现有的 `chokidar` 文件监听自动感知变化,调用 `notifyChangeListeners()` 触发缓存刷新,下次对话自然可以在 system prompt 中看到新 skill。 diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index d85d1194119..2f6d849eef5 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -30,6 +30,7 @@ import { NativeLspService, isBareMode, isToolEnabled, + type ConfigParameters, } from '@qwen-code/qwen-code-core'; import { extensionsCommand } from '../commands/extensions.js'; import { hooksCommand } from '../commands/hooks.js'; @@ -1106,7 +1107,7 @@ export async function loadCliConfig( const modelProvidersConfig = settings.modelProviders; - const config = new Config({ + const configParams: ConfigParameters = { sessionId, sessionData, embeddingModel: DEFAULT_QWEN_EMBEDDING_MODEL, @@ -1231,6 +1232,9 @@ export async function loadCliConfig( ? false : (settings.memory?.enableManagedAutoMemory ?? true), enableManagedAutoDream: settings.memory?.enableManagedAutoDream ?? false, + enableAutoSkill: bareMode + ? false + : (settings.memory?.enableAutoSkill ?? false), fastModel: settings.fastModel || undefined, // Use separated hooks if provided, otherwise fall back to merged hooks userHooks: bareMode @@ -1268,7 +1272,9 @@ export async function loadCliConfig( : undefined, } : undefined, - }); + }; + + const config = new Config(configParams); if (lspEnabled) { try { diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index 8aa7517a62d..a7b082c620b 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -1123,6 +1123,16 @@ const SETTINGS_SCHEMA = { 'Enable automatic consolidation (dream) of collected memories.', showInDialog: false, }, + enableAutoSkill: { + type: 'boolean', + label: 'Enable Auto Skill', + category: 'Memory', + requiresRestart: false, + default: false, + description: + 'Enable background review for reusable project skills after tool-heavy sessions.', + showInDialog: false, + }, }, }, diff --git a/packages/cli/src/nonInteractiveCli.ts b/packages/cli/src/nonInteractiveCli.ts index dc90c77756b..469445d6422 100644 --- a/packages/cli/src/nonInteractiveCli.ts +++ b/packages/cli/src/nonInteractiveCli.ts @@ -451,6 +451,9 @@ export async function runNonInteractive( } adapter.emitToolResult(finalRequestInfo, toolResponse); + config + .getGeminiClient() + .recordCompletedToolCall(finalRequestInfo.name); if (toolResponse.responseParts) { toolResponseParts.push(...toolResponse.responseParts); @@ -599,6 +602,9 @@ export async function runNonInteractive( } adapter.emitToolResult(requestInfo, toolResponse); + config + .getGeminiClient() + .recordCompletedToolCall(requestInfo.name); if (toolResponse.responseParts) { itemToolResponseParts.push(...toolResponse.responseParts); @@ -713,6 +719,13 @@ export async function runNonInteractive( await new Promise((r) => setTimeout(r, 100)); } + const memoryTaskPromises = config + .getGeminiClient() + .consumePendingMemoryTaskPromises(); + if (memoryTaskPromises.length > 0) { + await Promise.allSettled(memoryTaskPromises); + } + const metrics = uiTelemetryService.getMetrics(); const usage = computeUsageFromMetrics(metrics); // Get stats for JSON format output diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index d680a736d52..9313d56b031 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -1705,6 +1705,10 @@ export const useGeminiStream = ( (t) => !t.request.isClientInitiated, ); + for (const toolCall of geminiTools) { + geminiClient?.recordCompletedToolCall(toolCall.request.name); + } + if (geminiTools.length === 0) { return; } diff --git a/packages/core/src/agents/runtime/agent-core.ts b/packages/core/src/agents/runtime/agent-core.ts index 76359454083..babce151df1 100644 --- a/packages/core/src/agents/runtime/agent-core.ts +++ b/packages/core/src/agents/runtime/agent-core.ts @@ -74,6 +74,7 @@ import { type ContextState, templateString } from './agent-headless.js'; */ export const EXCLUDED_TOOLS_FOR_SUBAGENTS: ReadonlySet = new Set([ ToolNames.AGENT, + ToolNames.SKILL_MANAGE, ToolNames.CRON_CREATE, ToolNames.CRON_LIST, ToolNames.CRON_DELETE, @@ -334,7 +335,12 @@ export class AgentCore { ), ); } - toolsList.push(...onlyInlineDecls); + // Also filter inline FunctionDeclaration[] passed directly in toolConfig. + toolsList.push( + ...onlyInlineDecls.filter( + (d) => !(d.name && excludedFromSubagents.has(d.name)), + ), + ); } else { // Inherit all available tools by default when not specified. toolsList.push( diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 16a9f05f7db..0d579627cf3 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -462,6 +462,8 @@ export interface ConfigParameters { enableManagedAutoMemory?: boolean; /** Enable managed auto-dream consolidation separately from extraction. Defaults to true. */ enableManagedAutoDream?: boolean; + /** Enable automatic project skill review after tool-heavy sessions. Defaults to true. */ + enableAutoSkill?: boolean; /** * Lightweight model for background tasks (memory extraction, dream, /btw side questions). * When set and valid for the current auth type, forked agents use this model instead of @@ -690,6 +692,7 @@ export class Config { private readonly defaultFileEncoding: FileEncodingType | undefined; private readonly enableManagedAutoMemory: boolean; private readonly enableManagedAutoDream: boolean; + private readonly enableAutoSkill: boolean; private fastModel?: string; private readonly disableAllHooks: boolean; /** User-level hooks (always loaded regardless of trust) */ @@ -886,6 +889,7 @@ export class Config { }); this.enableManagedAutoMemory = params.enableManagedAutoMemory ?? true; this.enableManagedAutoDream = params.enableManagedAutoDream ?? false; + this.enableAutoSkill = params.enableAutoSkill ?? false; this.fastModel = params.fastModel || undefined; this.disableAllHooks = params.disableAllHooks ?? false; // Store user and project hooks separately for proper source attribution @@ -2128,6 +2132,10 @@ export class Config { return this.enableManagedAutoDream && !this.getBareMode(); } + getAutoSkillEnabled(): boolean { + return this.enableAutoSkill && !this.getBareMode(); + } + /** * Return the MemoryManager instance created for this Config. * Use this to share background-task state (registry, drainer) with memory @@ -2630,6 +2638,10 @@ export class Config { const { SkillTool } = await import('../tools/skill.js'); return new SkillTool(this); }); + await registerLazy(ToolNames.SKILL_MANAGE, async () => { + const { SkillManageTool } = await import('../tools/skill-manage.js'); + return new SkillManageTool(this); + }); await registerLazy(ToolNames.LS, async () => { const { LSTool } = await import('../tools/ls.js'); return new LSTool(this); diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 8c648f59dfb..8ba50ec88d5 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -128,6 +128,7 @@ const EMPTY_RELEVANT_AUTO_MEMORY_RESULT: RelevantAutoMemoryPromptResult = { export class GeminiClient { private chat?: GeminiChat; private sessionTurnCount = 0; + private toolCallCount = 0; private readonly surfacedRelevantAutoMemoryPaths = new Set(); private readonly loopDetector: LoopDetectionService; @@ -513,11 +514,10 @@ export class GeminiClient { private runManagedAutoMemoryBackgroundTasks( messageType: SendMessageType, ): void { - if (messageType !== SendMessageType.UserQuery) { - return; - } - - if (!this.config.getManagedAutoMemoryEnabled()) { + if ( + messageType !== SendMessageType.UserQuery && + messageType !== SendMessageType.ToolResult + ) { return; } @@ -525,6 +525,34 @@ export class GeminiClient { const sessionId = this.config.getSessionId(); const history = this.getHistory(); const mgr = this.config.getMemoryManager(); + const autoSkillEnabled = this.config.getAutoSkillEnabled(); + + if (autoSkillEnabled) { + const skillReviewResult = mgr.scheduleSkillReview({ + projectRoot, + sessionId, + history, + config: this.config, + toolCallCount: this.toolCallCount, + enabled: autoSkillEnabled, + threshold: 20, + maxTurns: 8, + timeoutMs: 120_000, + }); + if (skillReviewResult.promise) { + this.pendingMemoryTaskPromises.push( + skillReviewResult.promise.then((record) => { + const touched = record.metadata?.['touchedSkillFiles']; + return Array.isArray(touched) ? touched.length : 0; + }), + ); + } + } + this.toolCallCount = 0; + + if (!this.config.getManagedAutoMemoryEnabled()) { + return; + } const extractPromise = mgr .scheduleExtract({ @@ -581,6 +609,14 @@ export class GeminiClient { return promises; } + recordCompletedToolCall(toolName: string): void { + if (toolName === 'skill_manage') { + this.toolCallCount = 0; + return; + } + this.toolCallCount += 1; + } + async *sendMessageStream( request: PartListUnion, signal: AbortSignal, diff --git a/packages/core/src/memory/manager.test.ts b/packages/core/src/memory/manager.test.ts index 4860bdaae49..c71ca468888 100644 --- a/packages/core/src/memory/manager.test.ts +++ b/packages/core/src/memory/manager.test.ts @@ -27,8 +27,13 @@ vi.mock('./dream.js', () => ({ runManagedAutoMemoryDream: vi.fn(), })); +vi.mock('./skillReviewAgentPlanner.js', () => ({ + runSkillReviewByAgent: vi.fn(), +})); + import { runAutoMemoryExtract } from './extract.js'; import { runManagedAutoMemoryDream } from './dream.js'; +import { runSkillReviewByAgent } from './skillReviewAgentPlanner.js'; // ─── Helpers ────────────────────────────────────────────────────────────────── @@ -222,6 +227,85 @@ describe('MemoryManager', () => { }); }); + // ─── Skill review ───────────────────────────────────────────────────────── + + describe('scheduleSkillReview()', () => { + beforeEach(() => { + vi.resetAllMocks(); + vi.mocked(runSkillReviewByAgent).mockResolvedValue({ + touchedSkillFiles: ['/project/.qwen/skills/test/SKILL.md'], + }); + }); + + it('skips below threshold', () => { + const mgr = new MemoryManager(); + const result = mgr.scheduleSkillReview({ + projectRoot: '/project', + sessionId: 'sess', + history: [], + toolCallCount: 1, + threshold: 2, + config: makeMockConfig(), + }); + + expect(result).toEqual({ + status: 'skipped', + skippedReason: 'below_threshold', + }); + expect(runSkillReviewByAgent).not.toHaveBeenCalled(); + }); + + it('skips when skill_manage was called in history', () => { + const mgr = new MemoryManager(); + const result = mgr.scheduleSkillReview({ + projectRoot: '/project', + sessionId: 'sess', + history: [ + { + role: 'model', + parts: [{ functionCall: { name: 'skill_manage', args: {} } }], + }, + ], + toolCallCount: 20, + threshold: 2, + config: makeMockConfig(), + }); + + expect(result).toEqual({ + status: 'skipped', + skippedReason: 'skill_manage_called', + }); + expect(runSkillReviewByAgent).not.toHaveBeenCalled(); + }); + + it('schedules skill review at threshold', async () => { + const mgr = new MemoryManager(); + const result = mgr.scheduleSkillReview({ + projectRoot: '/project', + sessionId: 'sess', + history: [{ role: 'user', parts: [{ text: 'hi' }] }], + toolCallCount: 2, + threshold: 2, + config: makeMockConfig(), + maxTurns: 3, + timeoutMs: 30_000, + }); + + expect(result.status).toBe('scheduled'); + await result.promise; + expect(runSkillReviewByAgent).toHaveBeenCalledWith({ + config: expect.any(Object), + projectRoot: '/project', + history: [{ role: 'user', parts: [{ text: 'hi' }] }], + maxTurns: 3, + timeoutMs: 30_000, + }); + expect(mgr.listTasksByType('skill-review', '/project')[0]?.status).toBe( + 'completed', + ); + }); + }); + // ─── listTasksByType() ──────────────────────────────────────────────────── describe('listTasksByType()', () => { @@ -229,6 +313,7 @@ describe('MemoryManager', () => { const mgr = new MemoryManager(); expect(mgr.listTasksByType('extract')).toEqual([]); expect(mgr.listTasksByType('dream')).toEqual([]); + expect(mgr.listTasksByType('skill-review')).toEqual([]); }); it('filters by projectRoot when provided', async () => { @@ -425,6 +510,97 @@ describe('MemoryManager', () => { }); }); + // ─── scheduleSkillReview: merge with running extract ────────────────────── + + describe('scheduleSkillReview(): merged_with_extract (checklist 6)', () => { + it('returns merged_with_extract when extract is already running for same project', async () => { + // arrange: extract never resolves so it stays "running" + vi.mocked(runAutoMemoryExtract).mockReturnValue(new Promise(() => {})); + + const mgr = new MemoryManager(); + const projectRoot = '/test-project-merge'; + const config = makeMockConfig(); + + // Start extract (will stay in-flight) + void mgr.scheduleExtract({ + projectRoot, + sessionId: 'sess-extract', + history: [{ role: 'user', parts: [{ text: 'do some work' }] }], + config, + }); + + // Now schedule skill review while extract is running + const result = mgr.scheduleSkillReview({ + projectRoot, + sessionId: 'sess-extract', + history: [{ role: 'user', parts: [{ text: 'do some work' }] }], + toolCallCount: 25, + threshold: 20, + enabled: true, + config, + }); + + expect(result.status).toBe('skipped'); + expect(result.skippedReason).toBe('merged_with_extract'); + expect(result.taskId).toBeDefined(); + }); + + it('marks the extract record with shouldReviewSkillsAlso metadata', async () => { + vi.mocked(runAutoMemoryExtract).mockReturnValue(new Promise(() => {})); + + const mgr = new MemoryManager(); + const projectRoot = '/test-project-metadata'; + const config = makeMockConfig(); + + void mgr.scheduleExtract({ + projectRoot, + sessionId: 'sess-meta', + history: [{ role: 'user', parts: [{ text: 'work' }] }], + config, + }); + + mgr.scheduleSkillReview({ + projectRoot, + sessionId: 'sess-meta', + history: [{ role: 'user', parts: [{ text: 'work' }] }], + toolCallCount: 30, + threshold: 20, + enabled: true, + config, + }); + + // The extract task record should now carry the merge flag + const extractRecords = mgr.listTasksByType('extract', projectRoot); + expect(extractRecords.length).toBeGreaterThan(0); + const extractRecord = extractRecords[0]!; + expect(extractRecord.metadata?.['shouldReviewSkillsAlso']).toBe(true); + }); + + it('schedules skill review independently when no extract is running', () => { + const mgr = new MemoryManager(); + const projectRoot = '/test-project-independent'; + const config = makeMockConfig(); + + vi.mocked(runSkillReviewByAgent).mockResolvedValue({ + touchedSkillFiles: [], + }); + + const result = mgr.scheduleSkillReview({ + projectRoot, + sessionId: 'sess-1', + history: [{ role: 'user', parts: [{ text: 'work' }] }], + toolCallCount: 25, + threshold: 20, + enabled: true, + config, + }); + + expect(result.status).toBe('scheduled'); + expect(result.skippedReason).toBeUndefined(); + expect(result.taskId).toBeDefined(); + }); + }); + // ─── resetExtractStateForTests() ───────────────────────────────────────── describe('resetExtractStateForTests()', () => { diff --git a/packages/core/src/memory/manager.ts b/packages/core/src/memory/manager.ts index 86924893cc8..288974f48a2 100644 --- a/packages/core/src/memory/manager.ts +++ b/packages/core/src/memory/manager.ts @@ -40,6 +40,7 @@ import type { Content, Part } from '@google/genai'; import type { Config } from '../config/config.js'; import { Storage } from '../config/storage.js'; import { logMemoryExtract, MemoryExtractEvent } from '../telemetry/index.js'; +import { ToolNames } from '../tools/tool-names.js'; import { isAutoMemPath } from './paths.js'; import { getAutoMemoryConsolidationLockPath, @@ -65,6 +66,7 @@ import { getManagedAutoMemoryStatus } from './status.js'; import { appendManagedAutoMemoryToUserMemory } from './prompt.js'; import { writeDreamManualRunToMetadata } from './dream.js'; import { buildConsolidationTaskPrompt } from './dreamAgentPlanner.js'; +import { runSkillReviewByAgent } from './skillReviewAgentPlanner.js'; import type { AutoMemoryMetadata } from './types.js'; // ─── Re-export public types consumed by callers ─────────────────────────────── @@ -91,7 +93,7 @@ export type MemoryTaskStatus = export interface MemoryTaskRecord { id: string; - taskType: 'extract' | 'dream'; + taskType: 'extract' | 'dream' | 'skill-review'; projectRoot: string; sessionId?: string; status: MemoryTaskStatus; @@ -112,6 +114,30 @@ export interface ScheduleExtractParams { config?: Config; } +export interface ScheduleSkillReviewParams { + projectRoot: string; + sessionId: string; + history: Content[]; + toolCallCount: number; + now?: Date; + config?: Config; + enabled?: boolean; + threshold?: number; + maxTurns?: number; + timeoutMs?: number; +} + +export interface SkillReviewScheduleResult { + status: 'scheduled' | 'skipped'; + taskId?: string; + skippedReason?: + | 'below_threshold' + | 'skill_manage_called' + | 'disabled' + | 'merged_with_extract'; + promise?: Promise; +} + // AutoMemoryExtractResult is re-used as the return type export type { AutoMemoryExtractResult as ExtractResult } from './extract.js'; @@ -157,6 +183,8 @@ export interface DrainOptions { export const EXTRACT_TASK_TYPE = 'managed-auto-memory-extraction' as const; export const DREAM_TASK_TYPE = 'managed-auto-memory-dream' as const; +export const SKILL_REVIEW_TASK_TYPE = 'managed-skill-extractor' as const; +export const AUTO_SKILL_THRESHOLD = 20; export const DEFAULT_AUTO_DREAM_MIN_HOURS = 24; export const DEFAULT_AUTO_DREAM_MIN_SESSIONS = 5; @@ -174,7 +202,7 @@ const WRITE_TOOL_NAMES = new Set([ // ─── Internal helpers ───────────────────────────────────────────────────────── function makeTaskRecord( - type: 'extract' | 'dream', + type: MemoryTaskRecord['taskType'], projectRoot: string, sessionId?: string, ): MemoryTaskRecord { @@ -228,6 +256,14 @@ function historyWritesToMemory( ); } +function historyCallsSkillManage(history: Content[]): boolean { + return history.some((msg) => + (msg.parts ?? []).some( + (p) => p.functionCall?.name === ToolNames.SKILL_MANAGE, + ), + ); +} + function isProcessRunning(pid: number): boolean { try { process.kill(pid, 0); @@ -420,7 +456,7 @@ export class MemoryManager { /** Return task records filtered by type and optionally by projectRoot. */ listTasksByType( - taskType: 'extract' | 'dream', + taskType: MemoryTaskRecord['taskType'], projectRoot?: string, ): MemoryTaskRecord[] { return [...this.tasks.values()] @@ -646,6 +682,139 @@ export class MemoryManager { ); } + // ─── Skill review ───────────────────────────────────────────────────────────── + + scheduleSkillReview( + params: ScheduleSkillReviewParams, + ): SkillReviewScheduleResult { + if (params.enabled === false) { + return { status: 'skipped', skippedReason: 'disabled' }; + } + + if (historyCallsSkillManage(params.history)) { + return { status: 'skipped', skippedReason: 'skill_manage_called' }; + } + + const threshold = params.threshold ?? AUTO_SKILL_THRESHOLD; + if (params.toolCallCount < threshold) { + return { status: 'skipped', skippedReason: 'below_threshold' }; + } + + if (!params.config) { + return { status: 'skipped', skippedReason: 'disabled' }; + } + + // Check if extract is already running and eligible for merge + const pendingExtractTaskId = this.findPendingOrRunningExtractTask( + params.projectRoot, + ); + if (pendingExtractTaskId) { + // Mark extract record to include skill review in its execution + const extractRecord = this.tasks.get(pendingExtractTaskId); + if (extractRecord && extractRecord.metadata) { + extractRecord.metadata['shouldReviewSkillsAlso'] = true; + extractRecord.metadata['skillReviewParams'] = { + toolCallCount: params.toolCallCount, + threshold, + }; + this.notify(); + } + // Create a lightweight task record for tracking but return merged status + const record = makeTaskRecord( + 'skill-review', + params.projectRoot, + params.sessionId, + ); + this.storeWith(record, { + status: 'skipped', + progressText: 'Merged into extract task for combined review.', + metadata: { + mergedWithTaskId: pendingExtractTaskId, + historyLength: params.history.length, + toolCallCount: params.toolCallCount, + threshold, + }, + }); + return { + status: 'skipped', + taskId: record.id, + skippedReason: 'merged_with_extract', + }; + } + + const record = makeTaskRecord( + 'skill-review', + params.projectRoot, + params.sessionId, + ); + this.storeWith(record, { + status: 'running', + progressText: 'Running managed skill review.', + metadata: { + historyLength: params.history.length, + toolCallCount: params.toolCallCount, + threshold, + }, + }); + + const promise = this.track(record.id, this.runSkillReview(record, params)); + return { status: 'scheduled', taskId: record.id, promise }; + } + + private findPendingOrRunningExtractTask( + projectRoot: string, + ): string | undefined { + // Check currently running extract + const currentTaskId = this.extractCurrentTaskId.get(projectRoot); + if (currentTaskId) { + return currentTaskId; + } + + // Check queued extract + const queued = this.extractQueued.get(projectRoot); + if (queued) { + return queued.taskId; + } + + // Check for recently created but not yet running extract task + const records = this.listTasksByType('extract', projectRoot); + for (const record of records) { + if (record.status === 'pending' || record.status === 'running') { + return record.id; + } + } + + return undefined; + } + + private async runSkillReview( + record: MemoryTaskRecord, + params: ScheduleSkillReviewParams, + ): Promise { + try { + const result = await runSkillReviewByAgent({ + config: params.config!, + projectRoot: params.projectRoot, + history: params.history, + maxTurns: params.maxTurns, + timeoutMs: params.timeoutMs, + }); + this.update(record, { + status: 'completed', + progressText: + result.systemMessage ?? + 'Managed skill review completed without durable changes.', + metadata: { touchedSkillFiles: result.touchedSkillFiles }, + }); + } catch (error) { + this.update(record, { + status: 'failed', + error: error instanceof Error ? error.message : String(error), + }); + } + return record; + } + // ─── Dream ──────────────────────────────────────────────────────────────────── /** diff --git a/packages/core/src/memory/skillReviewAgentPlanner.ts b/packages/core/src/memory/skillReviewAgentPlanner.ts new file mode 100644 index 00000000000..a359fa78c3c --- /dev/null +++ b/packages/core/src/memory/skillReviewAgentPlanner.ts @@ -0,0 +1,240 @@ +/** + * @license + * Copyright 2026 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { Content } from '@google/genai'; +import type { Config } from '../config/config.js'; +import type { PermissionManager } from '../permissions/permission-manager.js'; +import type { + PermissionCheckContext, + PermissionDecision, +} from '../permissions/types.js'; +import { runForkedAgent } from '../utils/forkedAgent.js'; +import { buildFunctionResponseParts } from '../tools/agent/fork-subagent.js'; +import { ToolNames } from '../tools/tool-names.js'; +import { isShellCommandReadOnlyAST } from '../utils/shellAstParser.js'; +import { stripShellWrapper } from '../utils/shell-utils.js'; +import { + getProjectSkillsRoot, + isProjectSkillPath, +} from '../skills/skill-paths.js'; + +export const SKILL_REVIEW_AGENT_NAME = 'managed-skill-extractor' as const; +export const DEFAULT_AUTO_SKILL_MAX_TURNS = 8; +export const DEFAULT_AUTO_SKILL_TIMEOUT_MS = 120_000; + +export interface SkillReviewExecutionResult { + touchedSkillFiles: string[]; + systemMessage?: string; +} + +type SkillScopedPermissionManager = Pick< + PermissionManager, + | 'evaluate' + | 'findMatchingDenyRule' + | 'hasMatchingAskRule' + | 'hasRelevantRules' + | 'isToolEnabled' +>; + +function isScopedTool(toolName: string): boolean { + return ( + toolName === ToolNames.SHELL || + toolName === ToolNames.EDIT || + toolName === ToolNames.WRITE_FILE || + toolName === ToolNames.SKILL_MANAGE + ); +} + +function mergePermissionDecision( + scopedDecision: PermissionDecision, + baseDecision: PermissionDecision, +): PermissionDecision { + const priority: Record = { + deny: 4, + ask: 3, + allow: 2, + default: 1, + }; + return priority[baseDecision] > priority[scopedDecision] + ? baseDecision + : scopedDecision; +} + +async function evaluateScopedDecision( + ctx: PermissionCheckContext, + projectRoot: string, +): Promise { + switch (ctx.toolName) { + case ToolNames.SHELL: { + if (!ctx.command) { + return 'deny'; + } + const isReadOnly = await isShellCommandReadOnlyAST( + stripShellWrapper(ctx.command), + ); + return isReadOnly ? 'allow' : 'deny'; + } + case ToolNames.EDIT: + case ToolNames.WRITE_FILE: + return ctx.filePath && isProjectSkillPath(ctx.filePath, projectRoot) + ? 'allow' + : 'deny'; + case ToolNames.SKILL_MANAGE: + return 'allow'; + default: + return 'default'; + } +} + +function getScopedDenyRule( + ctx: PermissionCheckContext, + projectRoot: string, +): string | undefined { + switch (ctx.toolName) { + case ToolNames.SHELL: + return 'ManagedSkillReview(run_shell_command: read-only only)'; + case ToolNames.EDIT: + return `ManagedSkillReview(edit: only within ${getProjectSkillsRoot(projectRoot)})`; + case ToolNames.WRITE_FILE: + return `ManagedSkillReview(write_file: only within ${getProjectSkillsRoot(projectRoot)})`; + default: + return undefined; + } +} + +export function createSkillScopedAgentConfig( + config: Config, + projectRoot: string, +): Config { + const basePm = config.getPermissionManager?.(); + const scopedPm: SkillScopedPermissionManager = { + hasRelevantRules(ctx: PermissionCheckContext): boolean { + return isScopedTool(ctx.toolName) || !!basePm?.hasRelevantRules(ctx); + }, + hasMatchingAskRule(ctx: PermissionCheckContext): boolean { + return basePm?.hasMatchingAskRule(ctx) ?? false; + }, + findMatchingDenyRule(ctx: PermissionCheckContext): string | undefined { + const scoped = getScopedDenyRule(ctx, projectRoot); + if (scoped) return scoped; + return basePm?.findMatchingDenyRule(ctx); + }, + async evaluate(ctx: PermissionCheckContext): Promise { + const scopedDecision = await evaluateScopedDecision(ctx, projectRoot); + if (!basePm) return scopedDecision; + const baseDecision = basePm.hasRelevantRules(ctx) + ? await basePm.evaluate(ctx) + : 'default'; + return mergePermissionDecision(scopedDecision, baseDecision); + }, + async isToolEnabled(toolName: string): Promise { + if (isScopedTool(toolName)) return true; + if (basePm) return basePm.isToolEnabled(toolName); + return true; + }, + }; + + const scopedConfig = Object.create(config) as Config; + scopedConfig.getPermissionManager = () => + scopedPm as unknown as PermissionManager; + return scopedConfig; +} + +const SKILL_REVIEW_SYSTEM_PROMPT = [ + 'You are reviewing this conversation to extract reusable skills.', + 'You may create new skills or update existing ones.', + 'Do NOT delete any skills unless the user has explicitly requested deletion in this conversation. Autonomous deletion is not permitted.', + '', + 'Review the conversation above and consider saving or updating a skill if appropriate.', + '', + 'Focus on: was a non-trivial approach used to complete a task that required trial and error, or changing course due to experiential findings along the way, or did the user expect or desire a different method or outcome? If a relevant skill already exists, update it with what you learned. Otherwise, create a new skill if the approach is reusable.', + '', + "If nothing is worth saving, just say 'Nothing to save.' and stop.", +].join('\n'); + +function buildAgentHistory(history: Content[]): Content[] { + if (history.length === 0) return []; + const last = history[history.length - 1]; + if (last.role !== 'model') return history.slice(0, -1); + const openCalls = (last.parts ?? []).filter((p) => p.functionCall); + if (openCalls.length === 0) return [...history]; + const toolResponses = buildFunctionResponseParts( + last, + 'Background skill review started.', + ); + return [ + ...history, + { role: 'user' as const, parts: toolResponses }, + { role: 'model' as const, parts: [{ text: 'Acknowledged.' }] }, + ]; +} + +function buildTaskPrompt(skillsRoot: string): string { + return [ + `Project skills directory: \`${skillsRoot}\``, + '', + 'Use `list_directory` and `read_file` to inspect existing skills before writing.', + 'Use `skill_manage` to create or update project-level skills only.', + 'Each skill requires a SKILL.md with YAML frontmatter:', + '', + '---', + 'name: ', + 'description: ', + '---', + '', + '', + ].join('\n'); +} + +export async function runSkillReviewByAgent(params: { + config: Config; + projectRoot: string; + history: Content[]; + maxTurns?: number; + timeoutMs?: number; +}): Promise { + const skillsRoot = getProjectSkillsRoot(params.projectRoot); + const scopedConfig = createSkillScopedAgentConfig( + params.config, + params.projectRoot, + ); + const result = await runForkedAgent({ + name: SKILL_REVIEW_AGENT_NAME, + config: scopedConfig, + taskPrompt: buildTaskPrompt(skillsRoot), + systemPrompt: SKILL_REVIEW_SYSTEM_PROMPT, + maxTurns: params.maxTurns ?? DEFAULT_AUTO_SKILL_MAX_TURNS, + maxTimeMinutes: + (params.timeoutMs ?? DEFAULT_AUTO_SKILL_TIMEOUT_MS) / 60_000, + tools: [ + ToolNames.READ_FILE, + ToolNames.LS, + ToolNames.SHELL, + ToolNames.WRITE_FILE, + ToolNames.EDIT, + ToolNames.SKILL_MANAGE, + ], + extraHistory: buildAgentHistory(params.history), + }); + + if (result.status !== 'completed') { + throw new Error( + result.terminateReason || + 'Skill review agent did not complete successfully', + ); + } + + const touchedSkillFiles = result.filesTouched.filter((filePath) => + isProjectSkillPath(filePath, params.projectRoot), + ); + return { + touchedSkillFiles, + systemMessage: + touchedSkillFiles.length > 0 + ? `Skill review updated ${touchedSkillFiles.length} file(s).` + : undefined, + }; +} diff --git a/packages/core/src/memory/skillReviewNudge.integration.test.ts b/packages/core/src/memory/skillReviewNudge.integration.test.ts new file mode 100644 index 00000000000..b353340b5f3 --- /dev/null +++ b/packages/core/src/memory/skillReviewNudge.integration.test.ts @@ -0,0 +1,436 @@ +/** + * @license + * Copyright 2026 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * E2E Integration Tests for AutoSkill mechanism (per skill-nudge.md L258-320) + * + * Tests the complete workflow from toolCallCount tracking to skill file writing. + * These tests validate the behavior described in the design document's E2E checklist. + */ + +import * as fs from 'node:fs/promises'; +import * as os from 'node:os'; +import * as path from 'node:path'; +import type { Content } from '@google/genai'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import type { Config } from '../config/config.js'; +import { MemoryManager, AUTO_SKILL_THRESHOLD } from './manager.js'; +import { getProjectSkillsRoot } from '../skills/skill-paths.js'; + +describe('Skill Nudge E2E Integration Tests', () => { + let tempDir: string; + let projectRoot: string; + let mgr: MemoryManager; + let mockConfig: Config; + + const sampleHistory: Content[] = [ + { role: 'user', parts: [{ text: 'Help me refactor this code' }] }, + { + role: 'model', + parts: [{ text: 'I can help. Let me analyze the code first.' }], + }, + ]; + + beforeEach(async () => { + tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'auto-skill-')); + projectRoot = path.join(tempDir, 'project'); + await fs.mkdir(projectRoot, { recursive: true }); + + mgr = new MemoryManager(); + mockConfig = { + getSessionId: () => 'test-session-1', + getModel: () => 'qwen-coder-32b', + getProjectRoot: () => projectRoot, + } as Config; + }); + + afterEach(async () => { + await fs.rm(tempDir, { recursive: true, force: true }); + }); + + // ─── Test 1: Low Tool Call Density Not Trigger ─────────────────────────── + + describe('Test 1: Low tool call density should not trigger skill review', () => { + it('should skip when toolCallCount < threshold', () => { + const result = mgr.scheduleSkillReview({ + projectRoot, + sessionId: 'test-session-1', + history: sampleHistory, + toolCallCount: 5, // Below default threshold of 20 + threshold: AUTO_SKILL_THRESHOLD, + enabled: true, + config: mockConfig, + }); + + expect(result.status).toBe('skipped'); + expect(result.skippedReason).toBe('below_threshold'); + expect(result.taskId).toBeUndefined(); + }); + + it('should skip when exactly at threshold minus 1', () => { + const result = mgr.scheduleSkillReview({ + projectRoot, + sessionId: 'test-session-1', + history: sampleHistory, + toolCallCount: AUTO_SKILL_THRESHOLD - 1, + threshold: AUTO_SKILL_THRESHOLD, + enabled: true, + config: mockConfig, + }); + + expect(result.status).toBe('skipped'); + expect(result.skippedReason).toBe('below_threshold'); + }); + }); + + // ─── Test 2: At Threshold Should Trigger ────────────────────────────────── + + describe('Test 2: At or above threshold should trigger skill review', () => { + it('should schedule when toolCallCount exactly equals threshold', () => { + const result = mgr.scheduleSkillReview({ + projectRoot, + sessionId: 'test-session-1', + history: sampleHistory, + toolCallCount: AUTO_SKILL_THRESHOLD, + threshold: AUTO_SKILL_THRESHOLD, + enabled: true, + config: mockConfig, + }); + + expect(result.status).toBe('scheduled'); + expect(result.taskId).toBeDefined(); + expect(result.skippedReason).toBeUndefined(); + }); + + it('should schedule when toolCallCount exceeds threshold', () => { + const result = mgr.scheduleSkillReview({ + projectRoot, + sessionId: 'test-session-1', + history: sampleHistory, + toolCallCount: AUTO_SKILL_THRESHOLD + 10, + threshold: AUTO_SKILL_THRESHOLD, + enabled: true, + config: mockConfig, + }); + + expect(result.status).toBe('scheduled'); + expect(result.taskId).toBeDefined(); + }); + + it('should respect custom threshold when provided', () => { + const customThreshold = 50; + const result = mgr.scheduleSkillReview({ + projectRoot, + sessionId: 'test-session-1', + history: sampleHistory, + toolCallCount: 30, + threshold: customThreshold, + enabled: true, + config: mockConfig, + }); + + expect(result.status).toBe('skipped'); + expect(result.skippedReason).toBe('below_threshold'); + }); + }); + + // ─── Test 3: skill_manage Call Resets Counter ────────────────────────────── + + describe('Test 3: skill_manage function call in history should prevent nudge', () => { + it('should skip when history contains skill_manage call', () => { + const historyWithSkillManage: Content[] = [ + { role: 'user', parts: [{ text: 'Create a skill' }] }, + { + role: 'model', + parts: [ + { + functionCall: { + name: 'skill_manage', + args: { + action: 'create', + name: 'my-skill', + content: '---\nname: my-skill\n---\n', + }, + }, + }, + ], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'skill_manage', + response: { output: 'Success' }, + }, + }, + ], + }, + ]; + + const result = mgr.scheduleSkillReview({ + projectRoot, + sessionId: 'test-session-1', + history: historyWithSkillManage, + toolCallCount: 30, // Well above threshold + threshold: AUTO_SKILL_THRESHOLD, + enabled: true, + config: mockConfig, + }); + + expect(result.status).toBe('skipped'); + expect(result.skippedReason).toBe('skill_manage_called'); + }); + + it('should not trigger nudge even with high toolCallCount if skill_manage was used', () => { + const result = mgr.scheduleSkillReview({ + projectRoot, + sessionId: 'test-session-1', + history: [ + { role: 'user', parts: [{ text: 'Many tool calls...' }] }, + { + role: 'model', + parts: [ + { + functionCall: { + name: 'skill_manage', + args: { action: 'patch' }, + }, + }, + ], + }, + ], + toolCallCount: 100, + enabled: true, + config: mockConfig, + }); + + expect(result.status).toBe('skipped'); + expect(result.skippedReason).toBe('skill_manage_called'); + }); + }); + + // ─── Test 4: Config Enable/Disable Gate ──────────────────────────────────── + + describe('Test 4: Configuration enable/disable gate', () => { + it('should skip when memory.enableAutoSkill is false', () => { + const result = mgr.scheduleSkillReview({ + projectRoot, + sessionId: 'test-session-1', + history: sampleHistory, + toolCallCount: AUTO_SKILL_THRESHOLD, + enabled: false, + config: mockConfig, + }); + + expect(result.status).toBe('skipped'); + expect(result.skippedReason).toBe('disabled'); + }); + + it('should skip when config is not provided', () => { + const result = mgr.scheduleSkillReview({ + projectRoot, + sessionId: 'test-session-1', + history: sampleHistory, + toolCallCount: AUTO_SKILL_THRESHOLD, + config: undefined, + }); + + expect(result.status).toBe('skipped'); + expect(result.skippedReason).toBe('disabled'); + }); + + it('should schedule when enabled is true', () => { + const result = mgr.scheduleSkillReview({ + projectRoot, + sessionId: 'test-session-1', + history: sampleHistory, + toolCallCount: AUTO_SKILL_THRESHOLD, + enabled: true, + config: mockConfig, + }); + + expect(result.status).toBe('scheduled'); + }); + }); + + // ─── Test 5: Merge Detection ────────────────────────────────────────────── + + describe('Test 5: Extract + Skill Review merge detection', () => { + it('should return valid result when skill review is scheduled', () => { + // Schedule skill review at threshold + const result = mgr.scheduleSkillReview({ + projectRoot, + sessionId: 'test-session-1', + history: sampleHistory, + toolCallCount: AUTO_SKILL_THRESHOLD, + enabled: true, + config: mockConfig, + }); + + // Should successfully schedule or skip with valid status + expect(result.status).toBeDefined(); + expect(['scheduled', 'skipped']).toContain(result.status); + + // If scheduled, should have taskId + if (result.status === 'scheduled') { + expect(result.taskId).toBeDefined(); + } + + // Should not have unexpected errors + expect(result.skippedReason).not.toBe('failed'); + }); + + it('should handle multiple skill reviews for same project', () => { + const result1 = mgr.scheduleSkillReview({ + projectRoot, + sessionId: 'session-1', + history: sampleHistory, + toolCallCount: AUTO_SKILL_THRESHOLD, + enabled: true, + config: mockConfig, + }); + + const result2 = mgr.scheduleSkillReview({ + projectRoot, + sessionId: 'session-2', + history: sampleHistory, + toolCallCount: AUTO_SKILL_THRESHOLD, + enabled: true, + config: mockConfig, + }); + + // Both should be processed independently + expect(result1.taskId).toBeDefined(); + expect(result2.taskId).toBeDefined(); + expect(result1.taskId).not.toBe(result2.taskId); + + // Both tasks should be tracked + const records = mgr.listTasksByType('skill-review', projectRoot); + expect(records.length).toBeGreaterThanOrEqual(2); + }); + }); + + // ─── Test 6: Task Record Tracking ──────────────────────────────────────── + + describe('Test 6: Task record tracking and metadata', () => { + it('should create task record with correct metadata', () => { + const toolCallCount = 25; + const threshold = 20; + + const result = mgr.scheduleSkillReview({ + projectRoot, + sessionId: 'test-session-1', + history: sampleHistory, + toolCallCount, + threshold, + enabled: true, + config: mockConfig, + }); + + expect(result.status).toBe('scheduled'); + expect(result.taskId).toBeDefined(); + + // Verify task record + const records = mgr.listTasksByType('skill-review', projectRoot); + expect(records.length).toBeGreaterThan(0); + + const record = records[0]; + expect(record.status).toBe('running'); + expect(record.metadata?.['toolCallCount']).toBe(toolCallCount); + expect(record.metadata?.['threshold']).toBe(threshold); + expect(record.metadata?.['historyLength']).toBe(sampleHistory.length); + }); + + it('should track task status transitions', () => { + const result = mgr.scheduleSkillReview({ + projectRoot, + sessionId: 'test-session-1', + history: sampleHistory, + toolCallCount: AUTO_SKILL_THRESHOLD, + enabled: true, + config: mockConfig, + }); + + const recordId = result.taskId; + const records = mgr.listTasksByType('skill-review', projectRoot); + const record = records.find((r) => r.id === recordId); + + expect(record).toBeDefined(); + expect(record?.taskType).toBe('skill-review'); + expect(record?.status).toBe('running'); + }); + }); + + // ─── Test 7: Threshold Boundary Cases ────────────────────────────────────── + + describe('Test 7: Threshold boundary cases', () => { + it('should not trigger at threshold - 1', () => { + const result = mgr.scheduleSkillReview({ + projectRoot, + sessionId: 'test-session-1', + history: sampleHistory, + toolCallCount: AUTO_SKILL_THRESHOLD - 1, + threshold: AUTO_SKILL_THRESHOLD, + enabled: true, + config: mockConfig, + }); + + expect(result.status).toBe('skipped'); + }); + + it('should trigger at threshold', () => { + const result = mgr.scheduleSkillReview({ + projectRoot, + sessionId: 'test-session-1', + history: sampleHistory, + toolCallCount: AUTO_SKILL_THRESHOLD, + threshold: AUTO_SKILL_THRESHOLD, + enabled: true, + config: mockConfig, + }); + + expect(result.status).toBe('scheduled'); + }); + + it('should trigger at threshold + 1', () => { + const result = mgr.scheduleSkillReview({ + projectRoot, + sessionId: 'test-session-1', + history: sampleHistory, + toolCallCount: AUTO_SKILL_THRESHOLD + 1, + threshold: AUTO_SKILL_THRESHOLD, + enabled: true, + config: mockConfig, + }); + + expect(result.status).toBe('scheduled'); + }); + }); + + // ─── Test 8: Project Skills Directory Structure ──────────────────────────── + + describe('Test 8: Project skills directory validation', () => { + it('should verify project skills root exists when scheduled', async () => { + const result = mgr.scheduleSkillReview({ + projectRoot, + sessionId: 'test-session-1', + history: sampleHistory, + toolCallCount: AUTO_SKILL_THRESHOLD, + enabled: true, + config: mockConfig, + }); + + expect(result.status).toBe('scheduled'); + + // Project skills directory should be ready for writes + const skillsRoot = getProjectSkillsRoot(projectRoot); + const skillsRootPath = path.join(projectRoot, skillsRoot); + // Directory may not exist yet, but the path should be valid + expect(skillsRootPath.includes('.qwen/skills')).toBe(true); + }); + }); +}); diff --git a/packages/core/src/skills/skill-paths.test.ts b/packages/core/src/skills/skill-paths.test.ts new file mode 100644 index 00000000000..4e321eb88f2 --- /dev/null +++ b/packages/core/src/skills/skill-paths.test.ts @@ -0,0 +1,48 @@ +/** + * @license + * Copyright 2026 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import * as path from 'node:path'; +import { describe, expect, it } from 'vitest'; +import { + assertProjectSkillPath, + getProjectSkillsRoot, + isProjectSkillPath, + sanitizeSkillName, +} from './skill-paths.js'; + +describe('skill project paths', () => { + const projectRoot = '/tmp/project'; + + it('resolves the project skills root', () => { + expect(getProjectSkillsRoot(projectRoot)).toBe( + path.join(projectRoot, '.qwen', 'skills'), + ); + }); + + it('allows paths inside project .qwen/skills', () => { + const skillPath = path.join( + projectRoot, + '.qwen', + 'skills', + 'my-skill', + 'SKILL.md', + ); + expect(isProjectSkillPath(skillPath, projectRoot)).toBe(true); + expect(() => assertProjectSkillPath(skillPath, projectRoot)).not.toThrow(); + }); + + it('rejects sibling paths that merely share the prefix', () => { + const sibling = path.join(projectRoot, '.qwen', 'skills-evil', 'SKILL.md'); + expect(isProjectSkillPath(sibling, projectRoot)).toBe(false); + expect(() => assertProjectSkillPath(sibling, projectRoot)).toThrow( + 'skill_manage can only write to', + ); + }); + + it('normalizes skill names', () => { + expect(sanitizeSkillName(' My Skill! ')).toBe('my-skill-'); + }); +}); diff --git a/packages/core/src/skills/skill-paths.ts b/packages/core/src/skills/skill-paths.ts new file mode 100644 index 00000000000..d2556214ba3 --- /dev/null +++ b/packages/core/src/skills/skill-paths.ts @@ -0,0 +1,42 @@ +/** + * @license + * Copyright 2026 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import * as path from 'node:path'; + +export const PROJECT_SKILLS_RELATIVE_DIR = path.join('.qwen', 'skills'); +export const SKILL_FILE_NAME = 'SKILL.md'; + +export function getProjectSkillsRoot(projectRoot: string): string { + return path.join(projectRoot, PROJECT_SKILLS_RELATIVE_DIR); +} + +export function isProjectSkillPath( + filePath: string, + projectRoot: string, +): boolean { + const skillsRoot = path.resolve(getProjectSkillsRoot(projectRoot)); + const resolved = path.resolve(filePath); + return resolved === skillsRoot || resolved.startsWith(skillsRoot + path.sep); +} + +export function assertProjectSkillPath( + targetPath: string, + projectRoot: string, +): void { + if (!isProjectSkillPath(targetPath, projectRoot)) { + throw new Error( + `skill_manage can only write to ${getProjectSkillsRoot(projectRoot)}. ` + + 'Use the Skills UI to manage user or bundled skills.', + ); + } +} + +export function sanitizeSkillName(name: string): string { + return name + .trim() + .toLowerCase() + .replace(/[^a-z0-9-]/g, '-'); +} diff --git a/packages/core/src/tools/agent/agent.ts b/packages/core/src/tools/agent/agent.ts index aef153adaaa..57ae4ea0e10 100644 --- a/packages/core/src/tools/agent/agent.ts +++ b/packages/core/src/tools/agent/agent.ts @@ -7,6 +7,7 @@ import { randomUUID } from 'node:crypto'; import { BaseDeclarativeTool, BaseToolInvocation, Kind } from '../tools.js'; import { ToolNames, ToolDisplayNames } from '../tool-names.js'; +import { EXCLUDED_TOOLS_FOR_SUBAGENTS } from '../../agents/runtime/agent-core.js'; import type { ToolResult, ToolResultDisplay, @@ -694,12 +695,18 @@ class AgentToolInvocation extends BaseToolInvocation { // the parent, not an isolated subagent, so the general subagent // exclusion list does not apply. Recursive forks are blocked by the // ALS-based `isInForkExecution()` guard. + // However, we still exclude tools that must never be available to + // any subagent (agent, skill_manage, cron tools). const parentToolDecls: FunctionDeclaration[] = ( generationConfig.tools as Array<{ functionDeclarations?: FunctionDeclaration[]; }> - )?.flatMap((t) => t.functionDeclarations ?? []) ?? []; + ) + ?.flatMap((t) => t.functionDeclarations ?? []) + .filter( + (d) => !(d.name && EXCLUDED_TOOLS_FOR_SUBAGENTS.has(d.name)), + ) ?? []; promptConfig = { renderedSystemPrompt: generationConfig.systemInstruction as diff --git a/packages/core/src/tools/skill-manage.test.ts b/packages/core/src/tools/skill-manage.test.ts new file mode 100644 index 00000000000..9ab89161048 --- /dev/null +++ b/packages/core/src/tools/skill-manage.test.ts @@ -0,0 +1,209 @@ +/** + * @license + * Copyright 2026 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import * as fs from 'node:fs/promises'; +import * as os from 'node:os'; +import * as path from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import type { Config } from '../config/config.js'; +import { SkillManageTool } from './skill-manage.js'; +import { EXCLUDED_TOOLS_FOR_SUBAGENTS } from '../agents/runtime/agent-core.js'; +import { ToolNames } from '../tools/tool-names.js'; + +// ─── Checklist 5: skill_manage excluded from subagents ─────────────────────── + +describe('EXCLUDED_TOOLS_FOR_SUBAGENTS – skill_manage isolation', () => { + it('contains skill_manage so task-execution subagents cannot call it', () => { + expect(EXCLUDED_TOOLS_FOR_SUBAGENTS.has(ToolNames.SKILL_MANAGE)).toBe(true); + }); + + it('contains agent to prevent recursive subagent spawning', () => { + expect(EXCLUDED_TOOLS_FOR_SUBAGENTS.has(ToolNames.AGENT)).toBe(true); + }); + + it('does NOT exclude ordinary tools like read_file', () => { + expect(EXCLUDED_TOOLS_FOR_SUBAGENTS.has(ToolNames.READ_FILE)).toBe(false); + }); +}); + +async function runSkillManage( + tool: SkillManageTool, + params: Parameters[0], +) { + return tool.build(params).execute(new AbortController().signal); +} + +describe('SkillManageTool', () => { + let tempDir: string; + let projectRoot: string; + let config: Config; + let tool: SkillManageTool; + const refreshCache = vi.fn(); + + beforeEach(async () => { + tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'skill-manage-')); + projectRoot = path.join(tempDir, 'project'); + await fs.mkdir(projectRoot, { recursive: true }); + refreshCache.mockReset(); + config = { + getProjectRoot: vi.fn().mockReturnValue(projectRoot), + getSkillManager: vi.fn().mockReturnValue({ refreshCache }), + } as unknown as Config; + tool = new SkillManageTool(config); + }); + + afterEach(async () => { + await fs.rm(tempDir, { recursive: true, force: true }); + }); + + it('creates a project-level SKILL.md', async () => { + const result = await runSkillManage(tool, { + action: 'create', + name: 'My Skill', + content: '---\nname: my-skill\ndescription: Test\n---\n\n# Test\n', + }); + + expect(result.error).toBeUndefined(); + await expect( + fs.readFile( + path.join(projectRoot, '.qwen', 'skills', 'my-skill', 'SKILL.md'), + 'utf-8', + ), + ).resolves.toContain('name: my-skill'); + expect(refreshCache).toHaveBeenCalled(); + }); + + it('patches an existing skill file', async () => { + const skillPath = path.join( + projectRoot, + '.qwen', + 'skills', + 'patch-me', + 'SKILL.md', + ); + await fs.mkdir(path.dirname(skillPath), { recursive: true }); + await fs.writeFile(skillPath, 'old body', 'utf-8'); + + const result = await runSkillManage(tool, { + action: 'patch', + name: 'patch-me', + old_string: 'old', + new_string: 'new', + }); + + expect(result.error).toBeUndefined(); + await expect(fs.readFile(skillPath, 'utf-8')).resolves.toBe('new body'); + }); + + it('rejects write_file path traversal outside project skills', async () => { + const result = await runSkillManage(tool, { + action: 'write_file', + name: 'refs', + file_path: '../../outside.md', + file_content: 'bad', + }); + + expect(result.error?.message).toContain('skill_manage can only write to'); + await expect( + fs.stat(path.join(projectRoot, '.qwen', 'outside.md')), + ).rejects.toThrow(); + }); + + // ─── Checklist 4: Write protection ───────────────────────────────────────── + + it('sanitizes name path traversal: creates skill safely inside project', async () => { + // Attempt to use a path-traversal-looking name. + // sanitizeSkillName() converts "../../../etc/malicious" → "----------etc-malicious" + // so the result is a safe path inside .qwen/skills/ (never escapes it). + const result = await runSkillManage(tool, { + action: 'create', + name: '../../../etc/malicious', + content: '---\nname: sanitized\n---\n', + }); + + // The call should succeed (the name is sanitized, not rejected) + expect(result.error).toBeUndefined(); + // The file must be written inside the project skills root, not at /etc/malicious + await expect(fs.stat('/etc/malicious/SKILL.md')).rejects.toThrow(); + // Verify it landed somewhere inside .qwen/skills/ + const skillsRoot = path.join(projectRoot, '.qwen', 'skills'); + const dirs = await fs.readdir(skillsRoot); + expect(dirs.length).toBeGreaterThan(0); + }); + + it('allows write_file inside skill directory (reference file)', async () => { + const result = await runSkillManage(tool, { + action: 'write_file', + name: 'my-skill', + file_path: 'references/api.md', + file_content: '# API reference\n', + }); + + expect(result.error).toBeUndefined(); + await expect( + fs.readFile( + path.join( + projectRoot, + '.qwen', + 'skills', + 'my-skill', + 'references', + 'api.md', + ), + 'utf-8', + ), + ).resolves.toContain('API reference'); + }); + + it('upsert: create succeeds if file already exists (idempotent)', async () => { + const content1 = '---\nname: my-skill\ndescription: v1\n---\n'; + const content2 = '---\nname: my-skill\ndescription: v2\n---\n'; + + // First create + await runSkillManage(tool, { + action: 'create', + name: 'my-skill', + content: content1, + }); + + // Second create on same name should succeed (upsert), not throw + const result = await runSkillManage(tool, { + action: 'create', + name: 'my-skill', + content: content2, + }); + + expect(result.error).toBeUndefined(); + // Content should be updated to v2 + const written = await fs.readFile( + path.join(projectRoot, '.qwen', 'skills', 'my-skill', 'SKILL.md'), + 'utf-8', + ); + expect(written).toContain('v2'); + }); + + it('deletes the skill directory', async () => { + // Setup + await runSkillManage(tool, { + action: 'create', + name: 'to-delete', + content: '---\nname: to-delete\n---\n', + }); + + const skillDir = path.join(projectRoot, '.qwen', 'skills', 'to-delete'); + await expect(fs.stat(skillDir)).resolves.toBeTruthy(); + + // Delete + const result = await runSkillManage(tool, { + action: 'delete', + name: 'to-delete', + }); + + expect(result.error).toBeUndefined(); + await expect(fs.stat(skillDir)).rejects.toThrow(); + expect(refreshCache).toHaveBeenCalled(); + }); +}); diff --git a/packages/core/src/tools/skill-manage.ts b/packages/core/src/tools/skill-manage.ts new file mode 100644 index 00000000000..24deaace285 --- /dev/null +++ b/packages/core/src/tools/skill-manage.ts @@ -0,0 +1,261 @@ +/** + * @license + * Copyright 2026 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import * as fs from 'node:fs/promises'; +import * as path from 'node:path'; +import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js'; +import { ToolDisplayNames, ToolNames } from './tool-names.js'; +import type { ToolResult, ToolResultDisplay } from './tools.js'; +import type { Config } from '../config/config.js'; +import { + assertProjectSkillPath, + getProjectSkillsRoot, + sanitizeSkillName, + SKILL_FILE_NAME, +} from '../skills/skill-paths.js'; + +export type SkillManageAction = + | 'create' + | 'edit' + | 'patch' + | 'write_file' + | 'delete'; + +export interface SkillManageParams { + action: SkillManageAction; + name: string; + content?: string; + old_string?: string; + new_string?: string; + category?: string; + file_path?: string; + file_content?: string; +} + +export class SkillManageTool extends BaseDeclarativeTool< + SkillManageParams, + ToolResult +> { + static readonly Name: string = ToolNames.SKILL_MANAGE; + + constructor(private readonly config: Config) { + super( + SkillManageTool.Name, + ToolDisplayNames.SKILL_MANAGE, + 'Create, update, patch, write files for, or delete a project-level skill in .qwen/skills/.', + Kind.Edit, + { + type: 'object', + properties: { + action: { + type: 'string', + enum: ['create', 'edit', 'patch', 'write_file', 'delete'], + }, + name: { type: 'string' }, + content: { + type: 'string', + description: 'Full SKILL.md content for create/edit.', + }, + old_string: { + type: 'string', + description: 'For patch: text to find.', + }, + new_string: { + type: 'string', + description: 'For patch: replacement text.', + }, + category: { + type: 'string', + description: "Optional subdirectory, e.g. 'typescript'.", + }, + file_path: { + type: 'string', + description: + "For write_file: relative path like 'references/api.md'.", + }, + file_content: { type: 'string' }, + }, + required: ['action', 'name'], + additionalProperties: false, + $schema: 'http://json-schema.org/draft-07/schema#', + }, + false, + false, + ); + } + + protected override validateToolParamValues( + params: SkillManageParams, + ): string | null { + if (!params.name.trim()) { + return 'Parameter "name" must be a non-empty string.'; + } + if ( + (params.action === 'create' || params.action === 'edit') && + !params.content + ) { + return `Parameter "content" is required for action "${params.action}".`; + } + if (params.action === 'patch') { + if (!params.old_string || params.new_string === undefined) { + return 'Parameters "old_string" and "new_string" are required for action "patch".'; + } + } + if (params.action === 'write_file') { + if (!params.file_path || params.file_content === undefined) { + return 'Parameters "file_path" and "file_content" are required for action "write_file".'; + } + } + return null; + } + + protected createInvocation(params: SkillManageParams) { + return new SkillManageToolInvocation(this.config, params); + } +} + +class SkillManageToolInvocation extends BaseToolInvocation< + SkillManageParams, + ToolResult +> { + constructor( + private readonly config: Config, + params: SkillManageParams, + ) { + super(params); + } + + getDescription(): string { + return `${this.params.action} project skill ${this.params.name}`; + } + + override toolLocations() { + return [{ path: this.resolveTargetPath() }]; + } + + override getDefaultPermission() { + return Promise.resolve('ask' as const); + } + + async execute( + _signal: AbortSignal, + _updateOutput?: (output: ToolResultDisplay) => void, + ): Promise { + try { + const targetPath = this.resolveTargetPath(); + assertProjectSkillPath(targetPath, this.config.getProjectRoot()); + + switch (this.params.action) { + case 'create': + await this.writeSkillFileWithFallback( + targetPath, + this.params.content!, + ); + break; + case 'edit': + await this.writeSkillFile(targetPath, this.params.content!, 'w'); + break; + case 'patch': + await this.patchSkillFile(targetPath); + break; + case 'write_file': + await this.writeReferenceFile(targetPath); + break; + case 'delete': + await fs.rm(path.dirname(targetPath), { + recursive: true, + force: true, + }); + break; + default: + throw new Error(`Unsupported action: ${this.params.action}`); + } + + await this.config.getSkillManager()?.refreshCache?.(); + const message = `skill_manage ${this.params.action} succeeded: ${targetPath}`; + return { llmContent: message, returnDisplay: message }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { + llmContent: message, + returnDisplay: message, + error: { message }, + }; + } + } + + private async writeSkillFileWithFallback( + targetPath: string, + content: string, + ): Promise { + try { + // Try exclusive create first + await this.writeSkillFile(targetPath, content, 'wx'); + } catch (error) { + // If file exists, fallback to edit (upsert behavior) + const err = error as NodeJS.ErrnoException; + if (err.code === 'EEXIST') { + // File already exists, upgrade to edit + await this.writeSkillFile(targetPath, content, 'w'); + } else { + throw error; + } + } + } + + private async writeSkillFile( + targetPath: string, + content: string, + flag: 'w' | 'wx', + ): Promise { + await fs.mkdir(path.dirname(targetPath), { recursive: true }); + await fs.writeFile( + targetPath, + content.endsWith('\n') ? content : `${content}\n`, + { + encoding: 'utf-8', + flag, + }, + ); + } + + private async patchSkillFile(targetPath: string): Promise { + const oldString = this.params.old_string!; + const newString = this.params.new_string!; + const original = await fs.readFile(targetPath, 'utf-8'); + if (!original.includes(oldString)) { + throw new Error('old_string was not found in the target skill file.'); + } + await fs.writeFile( + targetPath, + original.replace(oldString, newString), + 'utf-8', + ); + } + + private async writeReferenceFile(targetPath: string): Promise { + await fs.mkdir(path.dirname(targetPath), { recursive: true }); + await fs.writeFile(targetPath, this.params.file_content!, 'utf-8'); + } + + private resolveTargetPath(): string { + const projectSkillsRoot = getProjectSkillsRoot( + this.config.getProjectRoot(), + ); + const skillName = sanitizeSkillName(this.params.name); + const parts = [projectSkillsRoot]; + if (this.params.category) { + parts.push(this.params.category); + } + parts.push(skillName); + + if (this.params.action === 'write_file') { + const filePath = this.params.file_path ?? ''; + return path.resolve(path.join(...parts, filePath)); + } + return path.resolve(path.join(...parts, SKILL_FILE_NAME)); + } +} diff --git a/packages/core/src/tools/tool-names.ts b/packages/core/src/tools/tool-names.ts index 9edc21508fc..e990efc9ec9 100644 --- a/packages/core/src/tools/tool-names.ts +++ b/packages/core/src/tools/tool-names.ts @@ -20,6 +20,7 @@ export const ToolNames = { MEMORY: 'save_memory', AGENT: 'agent', SKILL: 'skill', + SKILL_MANAGE: 'skill_manage', EXIT_PLAN_MODE: 'exit_plan_mode', WEB_FETCH: 'web_fetch', WEB_SEARCH: 'web_search', @@ -47,6 +48,7 @@ export const ToolDisplayNames = { MEMORY: 'SaveMemory', AGENT: 'Agent', SKILL: 'Skill', + SKILL_MANAGE: 'SkillManage', EXIT_PLAN_MODE: 'ExitPlanMode', WEB_FETCH: 'WebFetch', WEB_SEARCH: 'WebSearch', diff --git a/packages/vscode-ide-companion/schemas/settings.schema.json b/packages/vscode-ide-companion/schemas/settings.schema.json index a54ddd1a463..ed3f9c726c0 100644 --- a/packages/vscode-ide-companion/schemas/settings.schema.json +++ b/packages/vscode-ide-companion/schemas/settings.schema.json @@ -460,6 +460,11 @@ "description": "Enable automatic consolidation (dream) of collected memories.", "type": "boolean", "default": false + }, + "enableAutoSkill": { + "description": "Enable background review for reusable project skills after tool-heavy sessions.", + "type": "boolean", + "default": false } } }, From 4cfa191c08e9365176d8882ddfdcee6c2eb69e5d Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Mon, 27 Apr 2026 20:07:04 +0800 Subject: [PATCH 02/10] fix(test): add missing mock methods for autoSkill (getAutoSkillEnabled, recordCompletedToolCall, consumePendingMemoryTaskPromises) --- packages/cli/src/nonInteractiveCli.test.ts | 3 +++ packages/cli/src/ui/hooks/useGeminiStream.test.tsx | 1 + packages/core/src/core/client.test.ts | 1 + 3 files changed, 5 insertions(+) diff --git a/packages/cli/src/nonInteractiveCli.test.ts b/packages/cli/src/nonInteractiveCli.test.ts index 9f3e339eb9a..2554b04a815 100644 --- a/packages/cli/src/nonInteractiveCli.test.ts +++ b/packages/cli/src/nonInteractiveCli.test.ts @@ -109,6 +109,8 @@ describe('runNonInteractive', () => { mockGeminiClient = { sendMessageStream: vi.fn(), + consumePendingMemoryTaskPromises: vi.fn().mockReturnValue([]), + recordCompletedToolCall: vi.fn(), getChatRecordingService: vi.fn(() => ({ initialize: vi.fn(), recordMessage: vi.fn(), @@ -154,6 +156,7 @@ describe('runNonInteractive', () => { getCronScheduler: vi.fn().mockReturnValue(null), setModelInvocableCommandsProvider: vi.fn(), setModelInvocableCommandsExecutor: vi.fn(), + getAutoSkillEnabled: vi.fn().mockReturnValue(false), getDisabledSlashCommands: vi.fn().mockReturnValue([]), getBackgroundTaskRegistry: vi.fn().mockReturnValue({ setNotificationCallback: vi.fn(), diff --git a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx index 7483e934940..b9faacb9f7c 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx +++ b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx @@ -51,6 +51,7 @@ const MockedGeminiClientClass = vi.hoisted(() => this.sendMessageStream = mockSendMessageStream; this.addHistory = vi.fn(); this.consumePendingMemoryTaskPromises = vi.fn().mockReturnValue([]); + this.recordCompletedToolCall = vi.fn(); this.getChatRecordingService = vi.fn().mockReturnValue({ recordThought: vi.fn(), initialize: vi.fn(), diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts index 6ca290c775b..47ca7bff72e 100644 --- a/packages/core/src/core/client.test.ts +++ b/packages/core/src/core/client.test.ts @@ -390,6 +390,7 @@ describe('Gemini Client (client.ts)', () => { getArenaAgentClient: vi.fn().mockReturnValue(null), getManagedAutoMemoryEnabled: vi.fn().mockReturnValue(true), getMemoryManager: vi.fn().mockReturnValue(mockMemoryManager), + getAutoSkillEnabled: vi.fn().mockReturnValue(false), getDisableAllHooks: vi.fn().mockReturnValue(true), getArenaManager: vi.fn().mockReturnValue(null), getMessageBus: vi.fn().mockReturnValue(undefined), From 73c54451d2b3465b648dd890f68426cf6caad52d Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Mon, 27 Apr 2026 20:26:39 +0800 Subject: [PATCH 03/10] fix(test): fix cross-platform path comparison in skillReviewNudge integration test --- .../core/src/memory/skillReviewNudge.integration.test.ts | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/core/src/memory/skillReviewNudge.integration.test.ts b/packages/core/src/memory/skillReviewNudge.integration.test.ts index b353340b5f3..82681a8e582 100644 --- a/packages/core/src/memory/skillReviewNudge.integration.test.ts +++ b/packages/core/src/memory/skillReviewNudge.integration.test.ts @@ -427,10 +427,11 @@ describe('Skill Nudge E2E Integration Tests', () => { expect(result.status).toBe('scheduled'); // Project skills directory should be ready for writes - const skillsRoot = getProjectSkillsRoot(projectRoot); - const skillsRootPath = path.join(projectRoot, skillsRoot); + const skillsRootPath = getProjectSkillsRoot(projectRoot); // Directory may not exist yet, but the path should be valid - expect(skillsRootPath.includes('.qwen/skills')).toBe(true); + // Use path.normalize-friendly comparison for cross-platform (Windows uses backslash) + const normalizedPath = skillsRootPath.split(path.sep).join('/'); + expect(normalizedPath.includes('.qwen/skills')).toBe(true); }); }); }); From f7601a0066663ba7344ba1da973ad9344942ca40 Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Tue, 28 Apr 2026 10:35:49 +0800 Subject: [PATCH 04/10] fix(autoSkill): address critical review comments - Fix merged_with_extract silent drop: remove broken merge optimization in scheduleSkillReview(). When an extract task is pending/running, skill review is now scheduled independently instead of recording metadata that no production code ever reads. - Fix SKILL_MANAGE blocked from skill review agent: prepareTools() now only enforces the recursion guard (AGENT tool) when the agent has an explicit tools list. Wildcard/inherit subagents still get the full EXCLUDED_TOOLS_FOR_SUBAGENTS filter, preventing task subagents from calling skill_manage. The dedicated skill review agent can now receive the skill_manage tool it requires. - Update manager.test.ts: replace merged_with_extract tests with concurrent-extract independent-scheduling tests. - Update skill-manage.test.ts: clarify test description to reflect wildcard-only exclusion semantics. --- .../core/src/agents/runtime/agent-core.ts | 10 ++- packages/core/src/memory/manager.test.ts | 47 +++---------- packages/core/src/memory/manager.ts | 70 +------------------ packages/core/src/tools/skill-manage.test.ts | 2 +- 4 files changed, 19 insertions(+), 110 deletions(-) diff --git a/packages/core/src/agents/runtime/agent-core.ts b/packages/core/src/agents/runtime/agent-core.ts index babce151df1..35938c8332b 100644 --- a/packages/core/src/agents/runtime/agent-core.ts +++ b/packages/core/src/agents/runtime/agent-core.ts @@ -309,6 +309,11 @@ export class AgentCore { const toolsList: FunctionDeclaration[] = []; const excludedFromSubagents = EXCLUDED_TOOLS_FOR_SUBAGENTS; + // When a subagent has an explicit tools list (not wildcard), only the + // recursive-spawn guard (AgentTool) is enforced. Other exclusions like + // SKILL_MANAGE are intentionally bypassed because the caller has + // explicitly opted in to those tools (e.g. the skill review agent). + const recursionGuardOnly = new Set([ToolNames.AGENT]); if (this.toolConfig) { const asStrings = this.toolConfig.tools.filter( @@ -329,16 +334,17 @@ export class AgentCore { .filter((t) => !(t.name && excludedFromSubagents.has(t.name))), ); } else { + // Explicit tool list: only prevent recursive agent spawning. toolsList.push( ...toolRegistry.getFunctionDeclarationsFiltered( - asStrings.filter((name) => !excludedFromSubagents.has(name)), + asStrings.filter((name) => !recursionGuardOnly.has(name)), ), ); } // Also filter inline FunctionDeclaration[] passed directly in toolConfig. toolsList.push( ...onlyInlineDecls.filter( - (d) => !(d.name && excludedFromSubagents.has(d.name)), + (d) => !(d.name && recursionGuardOnly.has(d.name)), ), ); } else { diff --git a/packages/core/src/memory/manager.test.ts b/packages/core/src/memory/manager.test.ts index c71ca468888..bd420b1f5e1 100644 --- a/packages/core/src/memory/manager.test.ts +++ b/packages/core/src/memory/manager.test.ts @@ -510,15 +510,18 @@ describe('MemoryManager', () => { }); }); - // ─── scheduleSkillReview: merge with running extract ────────────────────── + // ─── scheduleSkillReview: concurrent extract ────────────────────────────── - describe('scheduleSkillReview(): merged_with_extract (checklist 6)', () => { - it('returns merged_with_extract when extract is already running for same project', async () => { + describe('scheduleSkillReview(): concurrent extract (checklist 6)', () => { + it('schedules skill review independently even when extract is already running', async () => { // arrange: extract never resolves so it stays "running" vi.mocked(runAutoMemoryExtract).mockReturnValue(new Promise(() => {})); + vi.mocked(runSkillReviewByAgent).mockResolvedValue({ + touchedSkillFiles: [], + }); const mgr = new MemoryManager(); - const projectRoot = '/test-project-merge'; + const projectRoot = '/test-project-concurrent'; const config = makeMockConfig(); // Start extract (will stay in-flight) @@ -529,7 +532,7 @@ describe('MemoryManager', () => { config, }); - // Now schedule skill review while extract is running + // Skill review must be scheduled independently, not silently dropped const result = mgr.scheduleSkillReview({ projectRoot, sessionId: 'sess-extract', @@ -540,42 +543,10 @@ describe('MemoryManager', () => { config, }); - expect(result.status).toBe('skipped'); - expect(result.skippedReason).toBe('merged_with_extract'); + expect(result.status).toBe('scheduled'); expect(result.taskId).toBeDefined(); }); - it('marks the extract record with shouldReviewSkillsAlso metadata', async () => { - vi.mocked(runAutoMemoryExtract).mockReturnValue(new Promise(() => {})); - - const mgr = new MemoryManager(); - const projectRoot = '/test-project-metadata'; - const config = makeMockConfig(); - - void mgr.scheduleExtract({ - projectRoot, - sessionId: 'sess-meta', - history: [{ role: 'user', parts: [{ text: 'work' }] }], - config, - }); - - mgr.scheduleSkillReview({ - projectRoot, - sessionId: 'sess-meta', - history: [{ role: 'user', parts: [{ text: 'work' }] }], - toolCallCount: 30, - threshold: 20, - enabled: true, - config, - }); - - // The extract task record should now carry the merge flag - const extractRecords = mgr.listTasksByType('extract', projectRoot); - expect(extractRecords.length).toBeGreaterThan(0); - const extractRecord = extractRecords[0]!; - expect(extractRecord.metadata?.['shouldReviewSkillsAlso']).toBe(true); - }); - it('schedules skill review independently when no extract is running', () => { const mgr = new MemoryManager(); const projectRoot = '/test-project-independent'; diff --git a/packages/core/src/memory/manager.ts b/packages/core/src/memory/manager.ts index 288974f48a2..087fb82b5af 100644 --- a/packages/core/src/memory/manager.ts +++ b/packages/core/src/memory/manager.ts @@ -130,11 +130,7 @@ export interface ScheduleSkillReviewParams { export interface SkillReviewScheduleResult { status: 'scheduled' | 'skipped'; taskId?: string; - skippedReason?: - | 'below_threshold' - | 'skill_manage_called' - | 'disabled' - | 'merged_with_extract'; + skippedReason?: 'below_threshold' | 'skill_manage_called' | 'disabled'; promise?: Promise; } @@ -704,44 +700,6 @@ export class MemoryManager { return { status: 'skipped', skippedReason: 'disabled' }; } - // Check if extract is already running and eligible for merge - const pendingExtractTaskId = this.findPendingOrRunningExtractTask( - params.projectRoot, - ); - if (pendingExtractTaskId) { - // Mark extract record to include skill review in its execution - const extractRecord = this.tasks.get(pendingExtractTaskId); - if (extractRecord && extractRecord.metadata) { - extractRecord.metadata['shouldReviewSkillsAlso'] = true; - extractRecord.metadata['skillReviewParams'] = { - toolCallCount: params.toolCallCount, - threshold, - }; - this.notify(); - } - // Create a lightweight task record for tracking but return merged status - const record = makeTaskRecord( - 'skill-review', - params.projectRoot, - params.sessionId, - ); - this.storeWith(record, { - status: 'skipped', - progressText: 'Merged into extract task for combined review.', - metadata: { - mergedWithTaskId: pendingExtractTaskId, - historyLength: params.history.length, - toolCallCount: params.toolCallCount, - threshold, - }, - }); - return { - status: 'skipped', - taskId: record.id, - skippedReason: 'merged_with_extract', - }; - } - const record = makeTaskRecord( 'skill-review', params.projectRoot, @@ -761,32 +719,6 @@ export class MemoryManager { return { status: 'scheduled', taskId: record.id, promise }; } - private findPendingOrRunningExtractTask( - projectRoot: string, - ): string | undefined { - // Check currently running extract - const currentTaskId = this.extractCurrentTaskId.get(projectRoot); - if (currentTaskId) { - return currentTaskId; - } - - // Check queued extract - const queued = this.extractQueued.get(projectRoot); - if (queued) { - return queued.taskId; - } - - // Check for recently created but not yet running extract task - const records = this.listTasksByType('extract', projectRoot); - for (const record of records) { - if (record.status === 'pending' || record.status === 'running') { - return record.id; - } - } - - return undefined; - } - private async runSkillReview( record: MemoryTaskRecord, params: ScheduleSkillReviewParams, diff --git a/packages/core/src/tools/skill-manage.test.ts b/packages/core/src/tools/skill-manage.test.ts index 9ab89161048..6143a521d20 100644 --- a/packages/core/src/tools/skill-manage.test.ts +++ b/packages/core/src/tools/skill-manage.test.ts @@ -16,7 +16,7 @@ import { ToolNames } from '../tools/tool-names.js'; // ─── Checklist 5: skill_manage excluded from subagents ─────────────────────── describe('EXCLUDED_TOOLS_FOR_SUBAGENTS – skill_manage isolation', () => { - it('contains skill_manage so task-execution subagents cannot call it', () => { + it('contains skill_manage so wildcard/inherit subagents cannot call it', () => { expect(EXCLUDED_TOOLS_FOR_SUBAGENTS.has(ToolNames.SKILL_MANAGE)).toBe(true); }); From 60fbe2cf50cb22dcd09578cb8a52287f9bfb25be Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Tue, 28 Apr 2026 15:29:06 +0800 Subject: [PATCH 05/10] fix(autoSkill): reject symlink traversal in skill_manage path guard assertProjectSkillPath() uses path.resolve() which is purely lexical and does not dereference symlinks. If any path component inside .qwen/skills/ is a symlink pointing outside the project, fs.writeFile/readFile/rm would follow the link and mutate files outside the advertised write boundary. Add assertRealProjectSkillPath() (async) in skill-paths.ts that: - Resolves the real path of the skills root via fs.realpath() - Walks up from targetPath to find the nearest existing ancestor - Resolves that ancestor to its real filesystem path - Rejects if the real path falls outside the real skills root skill-manage.ts execute() now calls both the cheap lexical check (fast fail for obviously wrong paths) and the async real-path check before any fs.writeFile / fs.rm mutation. Add three symlink-specific tests in skill-paths.test.ts covering: - Legitimate path accepted - Symlinked directory pointing outside skills root rejected - Skills root itself being a symlink (safe target) accepted --- packages/core/src/skills/skill-paths.test.ts | 56 +++++++++++++++++- packages/core/src/skills/skill-paths.ts | 61 ++++++++++++++++++++ packages/core/src/tools/skill-manage.ts | 5 ++ 3 files changed, 121 insertions(+), 1 deletion(-) diff --git a/packages/core/src/skills/skill-paths.test.ts b/packages/core/src/skills/skill-paths.test.ts index 4e321eb88f2..fa4ac9cae7e 100644 --- a/packages/core/src/skills/skill-paths.test.ts +++ b/packages/core/src/skills/skill-paths.test.ts @@ -4,10 +4,13 @@ * SPDX-License-Identifier: Apache-2.0 */ +import * as fs from 'node:fs/promises'; +import * as os from 'node:os'; import * as path from 'node:path'; -import { describe, expect, it } from 'vitest'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; import { assertProjectSkillPath, + assertRealProjectSkillPath, getProjectSkillsRoot, isProjectSkillPath, sanitizeSkillName, @@ -46,3 +49,54 @@ describe('skill project paths', () => { expect(sanitizeSkillName(' My Skill! ')).toBe('my-skill-'); }); }); + +describe('assertRealProjectSkillPath – symlink traversal', () => { + let tmpDir: string; + let projectRoot: string; + let skillsDir: string; + let outsideDir: string; + + beforeEach(async () => { + tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'skill-symlink-')); + projectRoot = path.join(tmpDir, 'project'); + skillsDir = path.join(projectRoot, '.qwen', 'skills'); + outsideDir = path.join(tmpDir, 'outside'); + await fs.mkdir(skillsDir, { recursive: true }); + await fs.mkdir(outsideDir, { recursive: true }); + }); + + afterEach(async () => { + await fs.rm(tmpDir, { recursive: true, force: true }); + }); + + it('accepts a legitimate path inside skills dir', async () => { + const target = path.join(skillsDir, 'my-skill', 'SKILL.md'); + await expect( + assertRealProjectSkillPath(target, projectRoot), + ).resolves.toBeUndefined(); + }); + + it('rejects a path whose parent is a symlink pointing outside skills dir', async () => { + // Create a symlink: .qwen/skills/escape -> ../../outside + const symlinkPath = path.join(skillsDir, 'escape'); + await fs.symlink(outsideDir, symlinkPath); + + const target = path.join(symlinkPath, 'evil.md'); + await expect( + assertRealProjectSkillPath(target, projectRoot), + ).rejects.toThrow('symlink traversal detected'); + }); + + it('accepts a path where skills root itself is a symlink to a safe dir', async () => { + // skills dir → realSkills (still inside project) + const realSkills = path.join(projectRoot, '.qwen', 'real-skills'); + await fs.mkdir(realSkills, { recursive: true }); + await fs.rm(skillsDir, { recursive: true }); + await fs.symlink(realSkills, skillsDir); + + const target = path.join(skillsDir, 'my-skill', 'SKILL.md'); + await expect( + assertRealProjectSkillPath(target, projectRoot), + ).resolves.toBeUndefined(); + }); +}); diff --git a/packages/core/src/skills/skill-paths.ts b/packages/core/src/skills/skill-paths.ts index d2556214ba3..4083d81c1a1 100644 --- a/packages/core/src/skills/skill-paths.ts +++ b/packages/core/src/skills/skill-paths.ts @@ -5,6 +5,7 @@ */ import * as path from 'node:path'; +import * as fs from 'node:fs/promises'; export const PROJECT_SKILLS_RELATIVE_DIR = path.join('.qwen', 'skills'); export const SKILL_FILE_NAME = 'SKILL.md'; @@ -34,6 +35,66 @@ export function assertProjectSkillPath( } } +/** + * Async variant that also rejects symlink traversal. + * + * `path.resolve()` is a purely lexical operation and does not dereference + * symlinks. If any component of `targetPath` (or its parent chain) is a + * symlink pointing outside the skills directory, the lexical check passes + * but `fs.writeFile/readFile/rm` will follow the link and mutate the real + * target. This function resolves the nearest existing ancestor to its real + * filesystem path and verifies it still sits under the real skills root. + */ +export async function assertRealProjectSkillPath( + targetPath: string, + projectRoot: string, +): Promise { + // First do the cheap lexical check. + assertProjectSkillPath(targetPath, projectRoot); + + const skillsRoot = getProjectSkillsRoot(projectRoot); + + // Resolve the real path of the skills root (it may itself be a symlink). + let realSkillsRoot: string; + try { + realSkillsRoot = await fs.realpath(skillsRoot); + } catch { + // Skills root does not exist yet — nothing to traverse. + return; + } + + // Walk up from targetPath to find the nearest existing ancestor, then + // resolve it to its real path and verify containment. + let check = targetPath; + for (;;) { + try { + const real = await fs.realpath(check); + // Found an existing node — verify it is inside the real skills root. + if ( + real !== realSkillsRoot && + !real.startsWith(realSkillsRoot + path.sep) + ) { + throw new Error( + `skill_manage: symlink traversal detected — resolved path "${real}" ` + + `is outside the project skills directory "${realSkillsRoot}".`, + ); + } + return; + } catch (err) { + if ((err as NodeJS.ErrnoException).code === 'ENOENT') { + const parent = path.dirname(check); + if (parent === check) { + // Reached filesystem root without finding an existing node. + return; + } + check = parent; + continue; + } + throw err; + } + } +} + export function sanitizeSkillName(name: string): string { return name .trim() diff --git a/packages/core/src/tools/skill-manage.ts b/packages/core/src/tools/skill-manage.ts index 24deaace285..f30ee9f04c6 100644 --- a/packages/core/src/tools/skill-manage.ts +++ b/packages/core/src/tools/skill-manage.ts @@ -12,6 +12,7 @@ import type { ToolResult, ToolResultDisplay } from './tools.js'; import type { Config } from '../config/config.js'; import { assertProjectSkillPath, + assertRealProjectSkillPath, getProjectSkillsRoot, sanitizeSkillName, SKILL_FILE_NAME, @@ -147,6 +148,10 @@ class SkillManageToolInvocation extends BaseToolInvocation< try { const targetPath = this.resolveTargetPath(); assertProjectSkillPath(targetPath, this.config.getProjectRoot()); + await assertRealProjectSkillPath( + targetPath, + this.config.getProjectRoot(), + ); switch (this.params.action) { case 'create': From ab40689ffe3ae12b1a277b79d4a63ab84f3af565 Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Wed, 29 Apr 2026 16:51:37 +0800 Subject: [PATCH 06/10] refactor(autoSkill): remove skill_manage tool, use path-based skill write detection Address reviewer feedback: instead of keeping skill_manage as the sole write gate (which still had symlink bypass risk via generic tools), remove the dedicated tool entirely and replace with a two-layer protection: 1. skillsModifiedInSession (client.ts): detects writes to .qwen/skills/ by inspecting the file_path arg of every completed tool call, replacing the fragile historyCallsSkillManage() history scan. 2. hasAutoSkillSource + evaluateScopedDecision (skillReviewAgentPlanner.ts): the review agent's permission sandbox now verifies BOTH that the target path is inside the skills directory AND that the existing file already contains 'source: auto-skill' in its frontmatter before allowing edits, preventing the agent from overwriting user-managed skills. Changes: - Delete skill-manage.ts and skill-manage.test.ts - Remove SKILL_MANAGE from ToolNames, ToolDisplayNames, config registerLazy, agent-core EXCLUDED_TOOLS comment, and agent.ts comment - Replace historyCallsSkillManage() with skillsModified: boolean param in scheduleSkillReview; skip reason renamed skills_modified_in_session - recordCompletedToolCall(name, filePath?) detects .qwen/skills/ writes; CLI layers pass file_path arg from tool call request - Fix buildTaskPrompt frontmatter template to use top-level source: auto-skill - Update skill-paths.ts error messages to remove skill_manage references - Update all unit/integration tests accordingly --- docs/design/skill-nudge/skill-nudge.md | 357 ++++++++---------- packages/cli/src/nonInteractiveCli.ts | 14 +- packages/cli/src/ui/hooks/useGeminiStream.ts | 7 +- .../core/src/agents/runtime/agent-core.ts | 5 +- packages/core/src/config/config.ts | 4 - packages/core/src/core/client.ts | 15 +- packages/core/src/memory/manager.test.ts | 16 +- packages/core/src/memory/manager.ts | 16 +- .../src/memory/skillReviewAgentPlanner.ts | 63 +++- .../skillReviewNudge.integration.test.ts | 80 ++-- packages/core/src/skills/skill-paths.test.ts | 2 +- packages/core/src/skills/skill-paths.ts | 4 +- packages/core/src/tools/agent/agent.ts | 2 +- packages/core/src/tools/skill-manage.test.ts | 209 ---------- packages/core/src/tools/skill-manage.ts | 266 ------------- packages/core/src/tools/tool-names.ts | 2 - 16 files changed, 285 insertions(+), 777 deletions(-) delete mode 100644 packages/core/src/tools/skill-manage.test.ts delete mode 100644 packages/core/src/tools/skill-manage.ts diff --git a/docs/design/skill-nudge/skill-nudge.md b/docs/design/skill-nudge/skill-nudge.md index 9e89bca6c9f..f20184115cd 100644 --- a/docs/design/skill-nudge/skill-nudge.md +++ b/docs/design/skill-nudge/skill-nudge.md @@ -1,37 +1,41 @@ -# Skill Nudge:自动技能提炼系统设计文档 +# AutoSkill:自动技能提炼系统设计文档 ## 概述 -本文档描述在 QwenCode 现有 Memory-Dream 架构基础上,增加 **Skill Nudge** 能力的设计方案。 +本文档描述在 QwenCode 现有 Memory-Dream 架构基础上,增加 **AutoSkill** 能力的设计方案。 -Skill Nudge 是一种**程序性记忆自动提炼机制**:当 agent 完成了一个工具调用密集型任务后,系统在后台悄悄评估本次对话中是否存在值得复用的操作流程,并将其自动保存为项目级 skill。 +AutoSkill 是一种**程序性记忆自动提炼机制**:当 agent 完成了一个工具调用密集型任务后,系统在后台悄悄评估本次对话中是否存在值得复用的操作流程,并将其自动保存为项目级 skill。 ### 与 Memory Extract 的定位差异 -| 维度 | Memory Extract | Skill Nudge | +| 维度 | Memory Extract | AutoSkill | | ------------ | -------------------------------- | ------------------------------ | | **记忆类型** | 陈述性记忆(用户是谁、项目背景) | 程序性记忆(如何做某类任务) | | **触发时机** | 每次会话结束后 | 会话内工具调用达到阈值 | | **写入目标** | `${projectRoot}/.qwen/memory/` | `${projectRoot}/.qwen/skills/` | | **内容性质** | 用户偏好、项目上下文、反馈规则 | 可复用的操作步骤、最佳实践 | -| **生命周期** | Dream 定期整合/修剪 | 按需更新,由 agent 主动维护 | +| **生命周期** | Dream 定期整合/修剪 | 按需更新,由 review agent 维护 | --- ## 核心设计原则 -1. **与 Extract 共享触发时机**:Skill Nudge 和 Memory Extract 都在会话结束后的同一个调度点触发,通过合并机制避免创建多个 forked agent。 -2. **工具调用密度计数**:仅当本次会话内工具调用累计 ≥ 20 次才触发,确保只在真正复杂的任务后提炼。 -3. **写保护边界明确**:`skill_manage` 只能操作当前项目的 project-level skill(`${projectRoot}/.qwen/skills/`),不能触碰 user / extension / bundled 层。 -4. **最大保留 Hermes 核心 prompt**:review agent 使用的提示语直接移植自 Hermes `_SKILL_REVIEW_PROMPT`,只做最小化适配。 +1. **无专用写入工具**:skill review agent 直接使用通用的 `read_file`、`write_file`、`edit` 工具操作 `.qwen/skills/`,不引入 `skill_manage` 专用工具。主会话同理——用户若要手动维护 skill,也使用相同的通用工具。 +2. **技能变动检测代替工具计数重置**:参照 memory extract 检测 `memory_tool` 调用的方式,系统检测主会话中是否有任何写操作落在 `.qwen/skills/` 目录下。若有,说明用户本轮已主动操作 skill,session 结束时跳过自动 skill review。 +3. **`auto-skill` 标识保护用户创建的 skill**:review agent 创建的 skill 在 YAML frontmatter 中必须包含 `source: auto-skill` 标记。skill review agent 只能修改带有此标记的 skill,不得触碰用户手工创建的 skill。 +4. **工具调用密度触发**:仅当本次会话内工具调用累计 ≥ 20 次才触发,确保只在真正复杂的任务后提炼。 +5. **写保护边界明确**:review agent 的权限管理器将 `write_file`、`edit` 限制在 `${projectRoot}/.qwen/skills/` 内,不能触碰 user / extension / bundled 层。 +6. **最大保留 Hermes 核心 prompt**:review agent 使用的提示语直接移植自 Hermes `_SKILL_REVIEW_PROMPT`,只做最小化适配。 --- ## 架构变更 -### 1. 计数器:`toolCallCount` +### 1. 计数器:`toolCallCount` 与技能变动检测 -在会话状态中增加一个工具调用计数器,由 agent 主循环维护。 +在会话状态中维护两个并行追踪量: + +**工具调用计数器**(决定是否触发 skill review): ``` 会话启动 @@ -39,121 +43,87 @@ Skill Nudge 是一种**程序性记忆自动提炼机制**:当 agent 完成了 每次工具调用完成 toolCallCount += 1 - if (任意工具调用了 skill_manage): - toolCallCount = 0 // 重置:已主动操作,无需 nudge 会话结束 if (toolCallCount >= AUTO_SKILL_THRESHOLD): // 默认 20 - _shouldReviewSkills = true - toolCallCount = 0 + 检查 skillsModifiedInSession + ├─ true → skip(本轮已手动操作 skill,无需自动 review) + └─ false → scheduleSkillReview() +``` + +**技能变动检测**(代替原来的 `skill_manage` 调用重置): + +``` +每次工具调用完成 + if (工具调用的目标路径在 ${projectRoot}/.qwen/skills/ 下): + skillsModifiedInSession = true ``` -**计数器重置条件**:会话期间只要有任意一次工具调用的函数名为 `skill_manage`,立即重置计数器。这防止模型刚主动创建/修改 skill 后又被 nudge 重复写入。 +检测逻辑:扫描工具调用结果中涉及的文件路径,判断是否落在 skills 目录下。具体实现参照 `historyCallsSkillManage()` 的模式——遍历 `history` 中的 tool result,提取 `write_file`、`edit` 等写操作的目标路径进行前缀匹配。 + +> **为何用技能变动检测而非工具名检测?** +> 不再有专用的 `skill_manage` 工具,主会话和 review agent 都使用通用的 `write_file`/`edit`。因此检测维度从"是否调用了某个专用工具"转为"是否有写操作落在 `.qwen/skills/` 目录",语义更准确:只要用户本轮已主动操作过 skill 文件,就跳过自动 review。 > **为何用工具调用次数而非对话轮次?** > 工具调用次数反映任务复杂度——一个用户消息可能触发 1 次或 30 次工具调用。高工具密度意味着试错、调整策略等行为更多,产生可复用经验的概率也更高。阈值 20 比 Hermes 的 10 更保守,原因是 QwenCode 工具调用粒度通常更细(如逐行 edit)。 -### 2. 调度点:与 Extract 合并 +### 2. 调度点 -现有的 `MemoryManager.scheduleExtract()` 调用点(会话结束)作为统一调度入口,扩展为可同时调度 skill review。 +现有的 `MemoryManager` 调用点(会话结束)作为统一调度入口,扩展为可同时调度 skill review。 ``` 会话结束 ├─ scheduleExtract(params) // 现有逻辑不变 └─ scheduleSkillReview(params) // 新增 - 条件:_shouldReviewSkills === true - -scheduleSkillReview 内部: - 检查是否有 extract 任务正在 pending/running - ├─ 有 → 将 skill review 合并进同一个 forked agent(combined prompt) - └─ 无 → 单独 fork skill review agent + 条件:toolCallCount >= AUTO_SKILL_THRESHOLD + && !skillsModifiedInSession ``` -**合并机制**:参考 Hermes 的 `_COMBINED_REVIEW_PROMPT`,当 extract 和 skill review 同时触发时,向同一个 forked agent 注入合并 prompt,一次 API 调用完成两件事,避免资源浪费。 - -### 3. `skill_manage` 在不同 agent 上下文中的可用性 - -系统中存在两类 forked agent,`skill_manage` 的可用性应明确区分: - -| Agent 类型 | 触发来源 | `skill_manage` | 原因 | -| --------------------------------------- | ---------------------------- | ------------------------------- | ---------------------------------- | -| **Skill Review Agent**(本机制引入) | sessionEnd nudge(系统触发) | ✅ 可用,仅限 create/edit/patch | 专门用于写 skill,这是它的唯一职责 | -| **Task-execution subagent**(现有机制) | 主 agent 在任务中 fork | ❌ 禁用 | 见下文 | +extract 和 skill review 各自独立调度,通过 `MemoryManager.track()` 并行执行,互不阻塞。 -**Task-execution subagent 禁用 `skill_manage` 的原因**: +### 3. Skill Review Agent 的工具访问权限 -1. **用户意图未传递**:用户从未直接指令 subagent,由主 agent 决定 fork。Subagent 自行写 skill 超出了用户授权范围。 -2. **缺乏全局视角**:Subagent 只看到子任务上下文,不知道主 agent 已经或即将触发 skill review,对"值得保存"缺乏判断依据。 -3. **计数器干扰**:Subagent 调用 `skill_manage` 会触发主 agent 的计数器重置逻辑(`skill_manage` 被调用 → `toolCallCount = 0`),导致 sessionEnd 的 nudge 被错误跳过。 -4. **噪声风险**:一次任务可能 fork 多个 subagent,若每个都能写 skill,`.qwen/skills/` 会被碎片化的局部经验填满。 +skill review agent **不使用** `skill_manage` 专用工具,而是直接使用通用文件工具: -**实现方式**:在 `fork-subagent` 的工具集继承逻辑中,显式将 `skill_manage` 加入 task-execution subagent 的排除列表。Skill review agent 作为专用 forked agent 独立配置,不走通用继承路径。 +| 工具 | 用途 | 范围限制 | +| ------------ | ------------------------------------- | --------------------------------------------------------------------------- | +| `read_file` | 读取现有 skill 内容,检查 frontmatter | 无限制 | +| `ls` | 扫描 `.qwen/skills/` 目录结构 | 无限制 | +| `write_file` | 创建新 skill 文件 | 仅限 `${projectRoot}/.qwen/skills/` 内 | +| `edit` | 修改已有 skill 内容 | 仅限 `${projectRoot}/.qwen/skills/` 内,且目标文件须含 `source: auto-skill` | +| `shell` | 只读命令(如 `cat`、`find`) | 仅允许只读命令(Shell AST 静态分析) | -### 4. 权限沙箱:`SkillScopedPermissionManager` +**对 `edit` 的额外约束(`auto-skill` 保护)**: -参照 `extractionAgentPlanner.ts` 中的 `createMemoryScopedAgentConfig`,为 skill review agent 创建专用权限范围: +skill review agent 的权限管理器在执行 `edit` 或 `write_file`(对已有文件的覆盖写)前,读取目标文件的 YAML frontmatter,检查 `source: auto-skill` 字段。若该字段不存在,拒绝写入并返回错误: -```typescript -// 仅允许以下操作 -shell: 只读命令(Shell AST 静态分析,复用现有 isShellCommandReadOnlyAST) -edit: 仅限 ${projectRoot}/.qwen/skills/ 路径下的文件 -write_file: 仅限 ${projectRoot}/.qwen/skills/ 路径下的文件 -skill_manage: 允许所有 action(含 delete),但 delete 受调用来源约束(见下文) +``` +skill_review_agent: edit is only allowed on skills with 'source: auto-skill' in frontmatter. +This skill appears to be user-created. Modify it manually or ask the user. ``` -**`delete` 的调用来源约束**:`skill_manage(action="delete")` 保留在工具集中,但通过 **system prompt 约束**而非 schema 限制来控制调用时机: - -- **主 agent(用户直接交互)**:可以响应用户明确的删除指令(如"删除这个 skill")调用 `delete` -- **review agent(Skill Nudge 后台触发)**:system prompt 中明确声明不得主动调用 `delete`,必须等待用户在后续对话中明确要求 - -review agent 的 system prompt 约束片段: +这一检查在 `createSkillScopedAgentConfig` 的权限层实现,而非仅靠 system prompt,确保即使模型出错也不会覆盖用户手工编写的 skill。 -``` -You are reviewing this conversation to extract reusable skills. -You may create new skills or update existing ones. -Do NOT delete any skills unless the user has explicitly requested deletion -in this conversation. Autonomous deletion is not permitted. -``` +**主会话中的工具访问**:主 agent 不限制对 `.qwen/skills/` 的读写——用户可以通过正常的 `write_file`/`edit` 指令管理 skill。此类操作会触发 `skillsModifiedInSession = true`,导致 session 结束时跳过自动 skill review。 -### 5. 新增工具:`skill_manage`(project-scope) +### 4. 权限沙箱:`SkillScopedPermissionManager` -在 forked skill review agent 的工具集中注册一个 project-scoped 版本的 `skill_manage`: +参照 `extractionAgentPlanner.ts` 中的 `createMemoryScopedAgentConfig`,为 skill review agent 创建专用权限范围: ```typescript -// 工具 schema(注册到 skill review agent 的工具集) -{ - name: "skill_manage", - description: "Create or update a project-level skill in .qwen/skills/. " + - "Use this to save reusable procedures, workflows, or approaches discovered in this session. " + - "Skills are stored in the current project only.", - parameters: { - action: { enum: ["create", "edit", "patch", "write_file", "delete"] }, - name: { type: "string" }, - content: { type: "string", description: "Full SKILL.md content for create/edit" }, - old_string: { type: "string", description: "For patch: text to find" }, - new_string: { type: "string", description: "For patch: replacement text" }, - category: { type: "string", description: "Optional subdirectory, e.g. 'typescript'" }, - file_path: { type: "string", description: "For write_file: relative path like 'references/api.md'" }, - file_content: { type: "string" } - }, - required: ["action", "name"] -} +// skill review agent 允许的操作 +read_file: 无路径限制(需要读取任意文件来了解项目上下文) +ls: 无路径限制 +shell: 只读命令(Shell AST 静态分析,复用现有 isShellCommandReadOnlyAST) +write_file: 仅限 ${projectRoot}/.qwen/skills/ 路径下的文件(创建新 skill) +edit: 仅限 ${projectRoot}/.qwen/skills/ 内,且目标文件含 source: auto-skill ``` -**写保护实现**:handler 在执行前验证目标路径必须在 `${projectRoot}/.qwen/skills/` 内,若尝试写入其他路径一律返回错误: +**`auto-skill` 保护的实现层次**: -```typescript -function assertProjectSkillPath(targetPath: string, projectRoot: string): void { - const projectSkillsDir = path.join(projectRoot, '.qwen', 'skills'); - const resolved = path.resolve(targetPath); - if (!resolved.startsWith(path.resolve(projectSkillsDir) + path.sep)) { - throw new Error( - `skill_manage can only write to ${projectSkillsDir}. ` + - `Use the Skills UI to manage user or bundled skills.`, - ); - } -} -``` +1. **权限管理器层**(硬约束):`edit` 前读取 frontmatter,不含 `source: auto-skill` 则拒绝 +2. **System prompt 层**(软约束):明确告知 agent 只能修改带有 `source: auto-skill` 标记的 skill +3. **双重保障**:即使 system prompt 约束被绕过,权限管理器也会拦截 --- @@ -161,66 +131,54 @@ function assertProjectSkillPath(targetPath: string, projectRoot: string): void { ### 触发 prompt(移植自 Hermes,最小化适配) -**仅 skill review(独立触发)**: - ``` Review the conversation above and consider saving or updating a skill if appropriate. Focus on: was a non-trivial approach used to complete a task that required trial and error, or changing course due to experiential findings along the way, or did the user expect or desire a different method or outcome? If a relevant skill -already exists, update it with what you learned. Otherwise, create a new skill -if the approach is reusable. +already exists and has 'source: auto-skill' in its frontmatter, update it with +what you learned. Otherwise, create a new skill if the approach is reusable. + +IMPORTANT constraints: +- You may ONLY modify skill files that contain 'source: auto-skill' in their + YAML frontmatter. Always read a skill file before editing it. +- Do NOT touch skills that lack this marker — they were created by the user. +- When creating a new skill, you MUST include 'source: auto-skill' in the + frontmatter so future review agents can safely update it. +- Do NOT delete any skill. Only create or update. If nothing is worth saving, just say 'Nothing to save.' and stop. -Skills are saved to the current project (.qwen/skills/). Use skill_manage to -create or update skills. Each skill requires a SKILL.md with YAML frontmatter: +Skills are saved to the current project (.qwen/skills/). +Use write_file to create a new skill, edit to update an existing auto-skill. +Each skill lives at .qwen/skills//SKILL.md with YAML frontmatter: --- name: description: +metadata: + source: auto-skill + extracted_at: '' --- ``` -**合并 prompt(extract + skill review 同时触发)**: - -``` -Review the conversation above and consider two things: - -**Memory**: Has the user revealed things about themselves — their persona, -desires, preferences, or personal details? Has the user expressed expectations -about how you should behave, their work style, or ways they want you to operate? -If so, save using the available memory write tools. - -**Skills**: Was a non-trivial approach used to complete a task that required -trial and error, or changing course due to experiential findings along the way, -or did the user expect or desire a different method or outcome? If a relevant -skill already exists, update it with what you learned. Otherwise, create a new -skill if the approach is reusable. - -Only act if there's something genuinely worth saving. -If nothing stands out, just say 'Nothing to save.' and stop. - -Skills are saved to the current project (.qwen/skills/) via skill_manage. -Memory is saved to .qwen/memory/ via the memory write tools. -``` - ### Agent 配置 ```typescript { name: "managed-skill-extractor", tools: [ - "read_file", // 读现有 skill 内容 - "list_directory", // 扫描 .qwen/skills/ 目录 - "skill_manage", // project-scoped 写入(见上文) + "read_file", // 读现有 skill 内容,检查 source: auto-skill + "ls", // 扫描 .qwen/skills/ 目录 + "write_file", // 创建新 skill 文件(权限管理器限制路径) + "edit", // 修改已有 auto-skill(权限管理器验证 frontmatter) + "shell", // 只读命令(如 find、cat) ], permissionManager: createSkillScopedAgentConfig(config, projectRoot), - // 传入完整对话历史快照(同 Hermes messages_snapshot) - history: sessionHistory, + history: sessionHistory, // 传入完整对话历史快照 } ``` @@ -235,15 +193,19 @@ export interface ScheduleSkillReviewParams { projectRoot: string; sessionId: string; history: Content[]; // 完整会话历史快照 - toolCallCount: number; // 本次会话的工具调用次数(用于日志/调试) + toolCallCount: number; // 本次会话的工具调用次数 + skillsModified: boolean; // 本次会话是否有写操作落在 .qwen/skills/ config?: Config; + enabled?: boolean; + threshold?: number; + maxTurns?: number; + timeoutMs?: number; } export interface SkillReviewScheduleResult { - status: 'scheduled' | 'skipped' | 'merged'; + status: 'scheduled' | 'skipped'; taskId?: string; - mergedWithExtractTaskId?: string; // 若合并,记录 extract task id - skippedReason?: 'below_threshold' | 'skill_manage_called' | 'disabled'; + skippedReason?: 'below_threshold' | 'skills_modified_in_session' | 'disabled'; } ``` @@ -251,29 +213,26 @@ export interface SkillReviewScheduleResult { ```typescript scheduleSkillReview(params: ScheduleSkillReviewParams): SkillReviewScheduleResult { - // 1. 阈值检查 - if (params.toolCallCount < AUTO_SKILL_THRESHOLD) { + // 1. 配置门控 + if (params.enabled === false) { + return { status: 'skipped', skippedReason: 'disabled' }; + } + + // 2. 阈值检查 + const threshold = params.threshold ?? AUTO_SKILL_THRESHOLD; + if (params.toolCallCount < threshold) { return { status: 'skipped', skippedReason: 'below_threshold' }; } - // 2. 检查是否有 extract 任务正在 pending/running,可合并 - const pendingExtract = this.findPendingExtractTask(params.projectRoot); - if (pendingExtract) { - // 标记 extract 任务需要同时做 skill review(合并模式) - this.mergeSkillReviewIntoExtract(pendingExtract.id, params); - return { - status: 'merged', - mergedWithExtractTaskId: pendingExtract.id, - }; + // 3. 本轮已主动操作 skill,跳过自动 review + if (params.skillsModified) { + return { status: 'skipped', skippedReason: 'skills_modified_in_session' }; } - // 3. 独立调度 + // 4. 独立调度 const record = makeTaskRecord('skill-review', params.projectRoot, params.sessionId); - this.tasks.set(record.id, record); - const promise = this.runSkillReview(record, params); - this.inFlight.add(promise); - promise.finally(() => this.inFlight.delete(promise)); - return { status: 'scheduled', taskId: record.id }; + const promise = this.track(record.id, this.runSkillReview(record, params)); + return { status: 'scheduled', taskId: record.id, promise }; } ``` @@ -283,8 +242,7 @@ scheduleSkillReview(params: ScheduleSkillReviewParams): SkillReviewScheduleResul // 扩展现有 MemoryTaskRecord.taskType export type MemoryTaskType = 'extract' | 'dream' | 'skill-review'; -// 新增常量 -export const SKILL_REVIEW_TASK_TYPE = 'managed-skill-extractor' as const; +// 常量 export const AUTO_SKILL_THRESHOLD = 20; // 工具调用次数阈值 ``` @@ -296,24 +254,26 @@ export const AUTO_SKILL_THRESHOLD = 20; // 工具调用次数阈值 会话进行中 agent 主循环 ├─ 每次工具调用 → toolCallCount += 1 - └─ 调用 skill_manage → toolCallCount = 0(重置) + └─ 若写操作目标路径在 ${projectRoot}/.qwen/skills/ 下 + → skillsModifiedInSession = true 会话结束(sessionEnd 事件) ├─ scheduleExtract(params) │ └─ [现有逻辑:fork extraction agent → 写 .qwen/memory/] │ - └─ toolCallCount >= 20 ? - ├─ 否 → skip + └─ toolCallCount >= 20 && !skillsModifiedInSession ? + ├─ 否 → skip(密度不足 或 本轮已手动操作 skill) └─ 是 → scheduleSkillReview(params) - ├─ extract 正在 pending → 合并 prompt → 同一 forked agent - └─ extract 已运行/不存在 → 独立 fork skill review agent + └─ 独立 fork skill review agent ↓ skill review agent(max 8 轮,2 min,沙箱权限) + 工具:read_file, ls, write_file, edit, shell 传入完整 sessionHistory ↓ 模型判断是否有可复用方法 - ├─ 有 → skill_manage(create/patch) - │ → 写入 ${projectRoot}/.qwen/skills/ + ├─ 有 → 读取已有 skill(检查 source: auto-skill) + │ → write_file 创建新 skill(含 source: auto-skill) + │ → edit 更新已有 auto-skill │ → SkillManager 缓存失效(notifyChangeListeners) └─ 无 → "Nothing to save." 结束 @@ -333,10 +293,9 @@ export const AUTO_SKILL_THRESHOLD = 20; // 工具调用次数阈值 --- name: # 必填,小写字母 + 连字符 description: # 必填,≤ 1024 字符 -# 以下字段可选 version: 1.0.0 metadata: - source: auto-extracted # 标记为自动提炼,便于区分 + source: auto-skill # 必填(review agent 创建时强制写入) extracted_at: '2026-04-24T12:00:00Z' --- # <技能标题> @@ -344,20 +303,28 @@ metadata: <操作步骤 / 最佳实践 / 注意事项> ``` -`metadata.source: auto-extracted` 字段为可选约定,便于用户在 UI 中区分自动提炼的 skill 和手动创建的 skill,不影响 SkillManager 的正常加载逻辑。 +**`source: auto-skill` 的约束语义**: + +| 标记值 | 创建方 | skill review agent 可修改? | 用户可修改? | +| ------------ | ------------ | --------------------------- | ------------ | +| `auto-skill` | review agent | ✅ 是 | ✅ 是 | +| 无此字段 | 用户手工创建 | ❌ 否(权限管理器拦截) | ✅ 是 | + +用户若将自己创建的 skill 也加上 `source: auto-skill`,即表示允许 review agent 后续自动更新它。 --- ## 安全考量 -| 风险 | 缓解措施 | -| -------------------------------- | ------------------------------------------------------------------------------------------------ | -| 自动提炼覆盖用户精心编写的 skill | `create` 时检测同名 skill 已存在则改为 `patch`(追加/更新),不全量覆盖 | -| skill 无限增长 | review prompt 明确要求"优先更新已有 skill";`patch` action 优于 `create` | -| 写入项目外路径 | `assertProjectSkillPath` 路径白名单强制检查 | -| 提炼出含注入风险的内容 | 复用现有 `skills_guard` 安全扫描(同 Hermes `_security_scan_skill`) | -| review agent 自行删除 skill | review agent system prompt 明确禁止主动调用 `delete`;`delete` 仅在主 agent 中响应用户的明确指令 | -| 并发写入同一 skill | `_atomic_write_text` 原子写入(tempfile + os.rename),同 Hermes | +| 风险 | 缓解措施 | +| ------------------------------------ | ---------------------------------------------------------------------------------------------------------------- | +| 自动提炼覆盖用户精心编写的 skill | 权限管理器读取 frontmatter,无 `source: auto-skill` 则拒绝 `edit`;system prompt 也明确告知只能改 auto-skill | +| skill 无限增长 | review prompt 明确要求"优先更新已有 skill";更新已有 skill 优于新建 | +| 写入项目外路径 | `write_file`/`edit` 权限限制在 `${projectRoot}/.qwen/skills/` 内;`assertRealProjectSkillPath` 拒绝 symlink 穿越 | +| 提炼出含注入风险的内容 | 复用现有内容安全扫描逻辑 | +| review agent 删除 skill | review agent 工具集不含删除操作(无 `rm`、无 `shell` 写操作);system prompt 明确禁止删除 | +| 主会话手动操作 skill 后仍触发 review | `skillsModifiedInSession` 检测:主会话有写操作落在 `.qwen/skills/` 则跳过 review | +| symlink 穿越写入 skills 目录外的文件 | `assertRealProjectSkillPath`(async):用 `fs.realpath()` 解析真实路径,确认在真实 skills root 内才允许写入 | --- @@ -391,40 +358,41 @@ memory?: { ### 1. 低工具调用密度不触发 - 使用临时项目目录运行 headless 模式。 -- 配置 `memory.enableAutoSkill: true`、较低但仍高于本用例工具调用次数的 `threshold`。 +- 配置 `memory.enableAutoSkill: true`。 - 执行一个只需要少量工具调用的简单任务并正常结束会话。 -- 断言 `.qwen/skills/` 未新增自动提炼 skill;JSON 流中不应出现 `skill_manage` 调用。 +- 断言 `.qwen/skills/` 未新增 `source: auto-skill` skill;JSON 流中不应出现对 `.qwen/skills/` 的写操作。 ### 2. 达到阈值后触发 skill review - 使用临时项目目录运行 headless 模式(`AUTO_SKILL_THRESHOLD` 硬编码为 20,可在测试夹具中调低)。 - 发送一个需要多次工具调用并包含可复用流程的任务。 -- 断言会话结束后调度了 skill review;若模型判断值得保存,`.qwen/skills//SKILL.md` 被创建,且包含合法 YAML frontmatter。 +- 断言会话结束后调度了 skill review;若模型判断值得保存,`.qwen/skills//SKILL.md` 被创建,且 frontmatter 包含 `source: auto-skill`。 - 若模型判断 `Nothing to save.`,断言流程正常结束且没有权限错误。 -### 3. `skill_manage` 调用会重置 nudge 计数 +### 3. 主会话操作 skill 后跳过 review -- 构造一次会话,在达到阈值前后显式触发 `skill_manage`(或通过测试夹具模拟该工具调用)。 -- 断言本轮 sessionEnd 不会再次自动调度重复的 skill review。 -- 断言不会因同一会话内已主动管理 skill 而重复写入 `.qwen/skills/`。 +- 构造一次会话,在工具调用达到阈值的同时,通过 `write_file` 或 `edit` 写入 `.qwen/skills/` 下的文件(模拟用户手动管理 skill)。 +- 断言 session 结束时 `skillsModifiedInSession = true`,`scheduleSkillReview` 返回 `skippedReason: 'skills_modified_in_session'`。 +- 断言不会启动 review agent,避免重复写入。 ### 4. 写保护只允许 project-level skills - 通过 skill review agent 尝试写入项目外路径、user-level skill 路径或 bundled skill 路径。 - 断言写入被拒绝,错误信息指向只能写入 `${projectRoot}/.qwen/skills/`。 -- 断言允许写入 `${projectRoot}/.qwen/skills//SKILL.md` 及其 skill 内部 reference 文件。 +- 断言允许写入 `${projectRoot}/.qwen/skills//SKILL.md`。 -### 5. task-execution subagent 不暴露 `skill_manage` +### 5. `auto-skill` 标识保护用户创建的 skill -- 运行一个会 fork task-execution subagent 的任务。 -- 检查子 agent 的可用工具列表或 API logging 请求体。 -- 断言 task-execution subagent 中没有 `skill_manage`,主会话工具计数不会被子 agent 的 skill 写入逻辑干扰。 +- 在 `.qwen/skills/` 中预置一个无 `source: auto-skill` 的用户创建 skill。 +- 触发 skill review agent 并引导模型尝试修改该 skill。 +- 断言写入被权限管理器拒绝,错误信息说明该 skill 不是 auto-skill。 +- 断言同目录下带有 `source: auto-skill` 的 skill 可以正常更新。 -### 6. Extract 与 Skill Review 合并触发 +### 6. symlink 穿越被拒绝 -- 构造同时满足 memory extract 和 skill nudge 的会话。 -- 断言只创建一个 forked review/extract agent 任务,且使用 combined prompt 同时处理 Memory 与 Skills。 -- 断言 memory 写入仍落在 `.qwen/memory/`,skill 写入仍落在 `.qwen/skills/`。 +- 在 `.qwen/skills/` 下创建一个指向项目外目录的 symlink。 +- 触发 skill review agent 尝试写入该 symlink 路径。 +- 断言 `assertRealProjectSkillPath` 拒绝写入,返回 `symlink traversal detected` 错误。 ### 7. 配置开关生效 @@ -442,16 +410,21 @@ memory?: { ``` 现有 MemoryManager - ├─ scheduleExtract() ← 不变 - ├─ scheduleDream() ← 不变 - ├─ recall() ← 不变 - ├─ forget() ← 不变 - └─ scheduleSkillReview() ← 新增(本文档) + ├─ scheduleExtract() ← 不变 + ├─ scheduleDream() ← 不变 + ├─ recall() ← 不变 + ├─ forget() ← 不变 + └─ scheduleSkillReview() ← 新增(本文档) 现有 SkillManager - ├─ listSkills() ← 不变(自动发现 .qwen/skills/ 下新增文件) - ├─ loadSkill() ← 不变 - └─ [write 能力] ← 通过 skill_manage 工具暴露给 review agent + ├─ listSkills() ← 不变(自动发现 .qwen/skills/ 下新增文件) + └─ loadSkill() ← 不变 + +现有文件工具(read_file / write_file / edit) + ├─ 主会话中:用户可通过这些工具手动管理 skill + │ └─ 写操作落在 .qwen/skills/ → skillsModifiedInSession = true + └─ skill review agent 中:直接用于创建/更新 auto-skill + └─ 权限管理器限制路径 + 验证 source: auto-skill 触发点(现有 sessionEnd hook) └─ 同时调用 scheduleExtract + scheduleSkillReview(条件满足时) diff --git a/packages/cli/src/nonInteractiveCli.ts b/packages/cli/src/nonInteractiveCli.ts index 469445d6422..7bf47ef2260 100644 --- a/packages/cli/src/nonInteractiveCli.ts +++ b/packages/cli/src/nonInteractiveCli.ts @@ -453,7 +453,12 @@ export async function runNonInteractive( adapter.emitToolResult(finalRequestInfo, toolResponse); config .getGeminiClient() - .recordCompletedToolCall(finalRequestInfo.name); + .recordCompletedToolCall( + finalRequestInfo.name, + typeof finalRequestInfo.args['file_path'] === 'string' + ? finalRequestInfo.args['file_path'] + : undefined, + ); if (toolResponse.responseParts) { toolResponseParts.push(...toolResponse.responseParts); @@ -604,7 +609,12 @@ export async function runNonInteractive( adapter.emitToolResult(requestInfo, toolResponse); config .getGeminiClient() - .recordCompletedToolCall(requestInfo.name); + .recordCompletedToolCall( + requestInfo.name, + typeof requestInfo.args['file_path'] === 'string' + ? requestInfo.args['file_path'] + : undefined, + ); if (toolResponse.responseParts) { itemToolResponseParts.push(...toolResponse.responseParts); diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index 9313d56b031..42959a19fdc 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -1706,7 +1706,12 @@ export const useGeminiStream = ( ); for (const toolCall of geminiTools) { - geminiClient?.recordCompletedToolCall(toolCall.request.name); + geminiClient?.recordCompletedToolCall( + toolCall.request.name, + typeof toolCall.request.args['file_path'] === 'string' + ? toolCall.request.args['file_path'] + : undefined, + ); } if (geminiTools.length === 0) { diff --git a/packages/core/src/agents/runtime/agent-core.ts b/packages/core/src/agents/runtime/agent-core.ts index 35938c8332b..3a9846f121f 100644 --- a/packages/core/src/agents/runtime/agent-core.ts +++ b/packages/core/src/agents/runtime/agent-core.ts @@ -74,7 +74,6 @@ import { type ContextState, templateString } from './agent-headless.js'; */ export const EXCLUDED_TOOLS_FOR_SUBAGENTS: ReadonlySet = new Set([ ToolNames.AGENT, - ToolNames.SKILL_MANAGE, ToolNames.CRON_CREATE, ToolNames.CRON_LIST, ToolNames.CRON_DELETE, @@ -310,9 +309,7 @@ export class AgentCore { const excludedFromSubagents = EXCLUDED_TOOLS_FOR_SUBAGENTS; // When a subagent has an explicit tools list (not wildcard), only the - // recursive-spawn guard (AgentTool) is enforced. Other exclusions like - // SKILL_MANAGE are intentionally bypassed because the caller has - // explicitly opted in to those tools (e.g. the skill review agent). + // recursive-spawn guard (AgentTool) is enforced. const recursionGuardOnly = new Set([ToolNames.AGENT]); if (this.toolConfig) { diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 0d579627cf3..534a69271e1 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -2638,10 +2638,6 @@ export class Config { const { SkillTool } = await import('../tools/skill.js'); return new SkillTool(this); }); - await registerLazy(ToolNames.SKILL_MANAGE, async () => { - const { SkillManageTool } = await import('../tools/skill-manage.js'); - return new SkillManageTool(this); - }); await registerLazy(ToolNames.LS, async () => { const { LSTool } = await import('../tools/ls.js'); return new LSTool(this); diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 8ba50ec88d5..be7b2e39a8c 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -48,6 +48,7 @@ import { LoopDetectionService } from '../services/loopDetectionService.js'; // Tools import type { RelevantAutoMemoryPromptResult } from '../memory/manager.js'; +import { isProjectSkillPath } from '../skills/skill-paths.js'; import { ToolNames } from '../tools/tool-names.js'; // Telemetry @@ -129,6 +130,7 @@ export class GeminiClient { private chat?: GeminiChat; private sessionTurnCount = 0; private toolCallCount = 0; + private skillsModifiedInSession = false; private readonly surfacedRelevantAutoMemoryPaths = new Set(); private readonly loopDetector: LoopDetectionService; @@ -534,6 +536,7 @@ export class GeminiClient { history, config: this.config, toolCallCount: this.toolCallCount, + skillsModified: this.skillsModifiedInSession, enabled: autoSkillEnabled, threshold: 20, maxTurns: 8, @@ -549,6 +552,7 @@ export class GeminiClient { } } this.toolCallCount = 0; + this.skillsModifiedInSession = false; if (!this.config.getManagedAutoMemoryEnabled()) { return; @@ -609,10 +613,13 @@ export class GeminiClient { return promises; } - recordCompletedToolCall(toolName: string): void { - if (toolName === 'skill_manage') { - this.toolCallCount = 0; - return; + recordCompletedToolCall(toolName: string, filePath?: string): void { + if ( + filePath && + (toolName === 'write_file' || toolName === 'edit') && + isProjectSkillPath(filePath, this.config.getProjectRoot()) + ) { + this.skillsModifiedInSession = true; } this.toolCallCount += 1; } diff --git a/packages/core/src/memory/manager.test.ts b/packages/core/src/memory/manager.test.ts index bd420b1f5e1..e38e5bfa9a0 100644 --- a/packages/core/src/memory/manager.test.ts +++ b/packages/core/src/memory/manager.test.ts @@ -245,6 +245,7 @@ describe('MemoryManager', () => { history: [], toolCallCount: 1, threshold: 2, + skillsModified: false, config: makeMockConfig(), }); @@ -255,25 +256,21 @@ describe('MemoryManager', () => { expect(runSkillReviewByAgent).not.toHaveBeenCalled(); }); - it('skips when skill_manage was called in history', () => { + it('skips when skills were modified in session', () => { const mgr = new MemoryManager(); const result = mgr.scheduleSkillReview({ projectRoot: '/project', sessionId: 'sess', - history: [ - { - role: 'model', - parts: [{ functionCall: { name: 'skill_manage', args: {} } }], - }, - ], + history: [{ role: 'user', parts: [{ text: 'hi' }] }], toolCallCount: 20, threshold: 2, + skillsModified: true, config: makeMockConfig(), }); expect(result).toEqual({ status: 'skipped', - skippedReason: 'skill_manage_called', + skippedReason: 'skills_modified_in_session', }); expect(runSkillReviewByAgent).not.toHaveBeenCalled(); }); @@ -286,6 +283,7 @@ describe('MemoryManager', () => { history: [{ role: 'user', parts: [{ text: 'hi' }] }], toolCallCount: 2, threshold: 2, + skillsModified: false, config: makeMockConfig(), maxTurns: 3, timeoutMs: 30_000, @@ -540,6 +538,7 @@ describe('MemoryManager', () => { toolCallCount: 25, threshold: 20, enabled: true, + skillsModified: false, config, }); @@ -563,6 +562,7 @@ describe('MemoryManager', () => { toolCallCount: 25, threshold: 20, enabled: true, + skillsModified: false, config, }); diff --git a/packages/core/src/memory/manager.ts b/packages/core/src/memory/manager.ts index 087fb82b5af..4b82f460aa1 100644 --- a/packages/core/src/memory/manager.ts +++ b/packages/core/src/memory/manager.ts @@ -40,7 +40,6 @@ import type { Content, Part } from '@google/genai'; import type { Config } from '../config/config.js'; import { Storage } from '../config/storage.js'; import { logMemoryExtract, MemoryExtractEvent } from '../telemetry/index.js'; -import { ToolNames } from '../tools/tool-names.js'; import { isAutoMemPath } from './paths.js'; import { getAutoMemoryConsolidationLockPath, @@ -119,6 +118,7 @@ export interface ScheduleSkillReviewParams { sessionId: string; history: Content[]; toolCallCount: number; + skillsModified: boolean; now?: Date; config?: Config; enabled?: boolean; @@ -130,7 +130,7 @@ export interface ScheduleSkillReviewParams { export interface SkillReviewScheduleResult { status: 'scheduled' | 'skipped'; taskId?: string; - skippedReason?: 'below_threshold' | 'skill_manage_called' | 'disabled'; + skippedReason?: 'below_threshold' | 'skills_modified_in_session' | 'disabled'; promise?: Promise; } @@ -252,14 +252,6 @@ function historyWritesToMemory( ); } -function historyCallsSkillManage(history: Content[]): boolean { - return history.some((msg) => - (msg.parts ?? []).some( - (p) => p.functionCall?.name === ToolNames.SKILL_MANAGE, - ), - ); -} - function isProcessRunning(pid: number): boolean { try { process.kill(pid, 0); @@ -687,8 +679,8 @@ export class MemoryManager { return { status: 'skipped', skippedReason: 'disabled' }; } - if (historyCallsSkillManage(params.history)) { - return { status: 'skipped', skippedReason: 'skill_manage_called' }; + if (params.skillsModified) { + return { status: 'skipped', skippedReason: 'skills_modified_in_session' }; } const threshold = params.threshold ?? AUTO_SKILL_THRESHOLD; diff --git a/packages/core/src/memory/skillReviewAgentPlanner.ts b/packages/core/src/memory/skillReviewAgentPlanner.ts index a359fa78c3c..961ccd908d8 100644 --- a/packages/core/src/memory/skillReviewAgentPlanner.ts +++ b/packages/core/src/memory/skillReviewAgentPlanner.ts @@ -5,6 +5,7 @@ */ import type { Content } from '@google/genai'; +import * as fs from 'node:fs/promises'; import type { Config } from '../config/config.js'; import type { PermissionManager } from '../permissions/permission-manager.js'; import type { @@ -39,12 +40,29 @@ type SkillScopedPermissionManager = Pick< | 'isToolEnabled' >; +/** + * Returns true if the file at `filePath` exists and contains + * `source: auto-skill` in its YAML frontmatter block. + * Returns false if the file does not exist (caller may allow creation). + */ +async function hasAutoSkillSource(filePath: string): Promise { + let content: string; + try { + content = await fs.readFile(filePath, 'utf-8'); + } catch { + // File does not exist — allow creation. + return null; + } + const match = /^---\r?\n([\s\S]*?)\r?\n---/m.exec(content); + if (!match) return false; + return /^\s*source:\s*auto-skill\s*$/m.test(match[1]); +} + function isScopedTool(toolName: string): boolean { return ( toolName === ToolNames.SHELL || toolName === ToolNames.EDIT || - toolName === ToolNames.WRITE_FILE || - toolName === ToolNames.SKILL_MANAGE + toolName === ToolNames.WRITE_FILE ); } @@ -78,12 +96,18 @@ async function evaluateScopedDecision( return isReadOnly ? 'allow' : 'deny'; } case ToolNames.EDIT: - case ToolNames.WRITE_FILE: - return ctx.filePath && isProjectSkillPath(ctx.filePath, projectRoot) - ? 'allow' - : 'deny'; - case ToolNames.SKILL_MANAGE: - return 'allow'; + case ToolNames.WRITE_FILE: { + if (!ctx.filePath || !isProjectSkillPath(ctx.filePath, projectRoot)) { + return 'deny'; + } + // For existing files, verify source: auto-skill is present. + const sourceFlag = await hasAutoSkillSource(ctx.filePath); + if (sourceFlag === null) { + // File does not exist yet — allow creation. + return 'allow'; + } + return sourceFlag ? 'allow' : 'deny'; + } default: return 'default'; } @@ -97,9 +121,9 @@ function getScopedDenyRule( case ToolNames.SHELL: return 'ManagedSkillReview(run_shell_command: read-only only)'; case ToolNames.EDIT: - return `ManagedSkillReview(edit: only within ${getProjectSkillsRoot(projectRoot)})`; + return `ManagedSkillReview(edit: only within ${getProjectSkillsRoot(projectRoot)} and only on skills with 'source: auto-skill' in frontmatter)`; case ToolNames.WRITE_FILE: - return `ManagedSkillReview(write_file: only within ${getProjectSkillsRoot(projectRoot)})`; + return `ManagedSkillReview(write_file: only within ${getProjectSkillsRoot(projectRoot)}; existing files must have 'source: auto-skill' in frontmatter)`; default: return undefined; } @@ -145,12 +169,16 @@ export function createSkillScopedAgentConfig( const SKILL_REVIEW_SYSTEM_PROMPT = [ 'You are reviewing this conversation to extract reusable skills.', - 'You may create new skills or update existing ones.', - 'Do NOT delete any skills unless the user has explicitly requested deletion in this conversation. Autonomous deletion is not permitted.', '', 'Review the conversation above and consider saving or updating a skill if appropriate.', '', - 'Focus on: was a non-trivial approach used to complete a task that required trial and error, or changing course due to experiential findings along the way, or did the user expect or desire a different method or outcome? If a relevant skill already exists, update it with what you learned. Otherwise, create a new skill if the approach is reusable.', + "Focus on: was a non-trivial approach used to complete a task that required trial and error, or changing course due to experiential findings along the way, or did the user expect or desire a different method or outcome? If a relevant skill already exists and has 'source: auto-skill' in its frontmatter, update it with what you learned. Otherwise, create a new skill if the approach is reusable.", + '', + 'IMPORTANT constraints:', + "- You may ONLY modify skill files that contain 'source: auto-skill' in their YAML frontmatter. Always read a skill file before editing it.", + '- Do NOT touch skills that lack this marker — they were created by the user.', + "- When creating a new skill, you MUST include 'source: auto-skill' in the frontmatter so future review agents can safely update it.", + '- Do NOT delete any skill. Only create or update.', '', "If nothing is worth saving, just say 'Nothing to save.' and stop.", ].join('\n'); @@ -176,13 +204,15 @@ function buildTaskPrompt(skillsRoot: string): string { return [ `Project skills directory: \`${skillsRoot}\``, '', - 'Use `list_directory` and `read_file` to inspect existing skills before writing.', - 'Use `skill_manage` to create or update project-level skills only.', - 'Each skill requires a SKILL.md with YAML frontmatter:', + 'Use `ls` and `read_file` to inspect existing skills before writing.', + 'Use `write_file` to create a new skill, `edit` to update an existing auto-skill.', + "Each skill lives at .qwen/skills//SKILL.md. Skills you create MUST include 'source: auto-skill' in the frontmatter:", '', '---', 'name: ', 'description: ', + 'source: auto-skill', + `extracted_at: '${new Date().toISOString()}'`, '---', '', '', @@ -215,7 +245,6 @@ export async function runSkillReviewByAgent(params: { ToolNames.SHELL, ToolNames.WRITE_FILE, ToolNames.EDIT, - ToolNames.SKILL_MANAGE, ], extraHistory: buildAgentHistory(params.history), }); diff --git a/packages/core/src/memory/skillReviewNudge.integration.test.ts b/packages/core/src/memory/skillReviewNudge.integration.test.ts index 82681a8e582..e445b52d921 100644 --- a/packages/core/src/memory/skillReviewNudge.integration.test.ts +++ b/packages/core/src/memory/skillReviewNudge.integration.test.ts @@ -59,7 +59,8 @@ describe('Skill Nudge E2E Integration Tests', () => { projectRoot, sessionId: 'test-session-1', history: sampleHistory, - toolCallCount: 5, // Below default threshold of 20 + toolCallCount: 5, + skillsModified: false, // Below default threshold of 20 threshold: AUTO_SKILL_THRESHOLD, enabled: true, config: mockConfig, @@ -76,6 +77,7 @@ describe('Skill Nudge E2E Integration Tests', () => { sessionId: 'test-session-1', history: sampleHistory, toolCallCount: AUTO_SKILL_THRESHOLD - 1, + skillsModified: false, threshold: AUTO_SKILL_THRESHOLD, enabled: true, config: mockConfig, @@ -95,6 +97,7 @@ describe('Skill Nudge E2E Integration Tests', () => { sessionId: 'test-session-1', history: sampleHistory, toolCallCount: AUTO_SKILL_THRESHOLD, + skillsModified: false, threshold: AUTO_SKILL_THRESHOLD, enabled: true, config: mockConfig, @@ -111,6 +114,7 @@ describe('Skill Nudge E2E Integration Tests', () => { sessionId: 'test-session-1', history: sampleHistory, toolCallCount: AUTO_SKILL_THRESHOLD + 10, + skillsModified: false, threshold: AUTO_SKILL_THRESHOLD, enabled: true, config: mockConfig, @@ -127,6 +131,7 @@ describe('Skill Nudge E2E Integration Tests', () => { sessionId: 'test-session-1', history: sampleHistory, toolCallCount: 30, + skillsModified: false, threshold: customThreshold, enabled: true, config: mockConfig, @@ -137,79 +142,38 @@ describe('Skill Nudge E2E Integration Tests', () => { }); }); - // ─── Test 3: skill_manage Call Resets Counter ────────────────────────────── - - describe('Test 3: skill_manage function call in history should prevent nudge', () => { - it('should skip when history contains skill_manage call', () => { - const historyWithSkillManage: Content[] = [ - { role: 'user', parts: [{ text: 'Create a skill' }] }, - { - role: 'model', - parts: [ - { - functionCall: { - name: 'skill_manage', - args: { - action: 'create', - name: 'my-skill', - content: '---\nname: my-skill\n---\n', - }, - }, - }, - ], - }, - { - role: 'user', - parts: [ - { - functionResponse: { - name: 'skill_manage', - response: { output: 'Success' }, - }, - }, - ], - }, - ]; + // ─── Test 3: Skills Modified In Session ────────────────────────────────── + describe('Test 3: skills modified in session should prevent nudge', () => { + it('should skip when skillsModified is true', () => { const result = mgr.scheduleSkillReview({ projectRoot, sessionId: 'test-session-1', - history: historyWithSkillManage, + history: sampleHistory, toolCallCount: 30, // Well above threshold threshold: AUTO_SKILL_THRESHOLD, + skillsModified: true, enabled: true, config: mockConfig, }); expect(result.status).toBe('skipped'); - expect(result.skippedReason).toBe('skill_manage_called'); + expect(result.skippedReason).toBe('skills_modified_in_session'); }); - it('should not trigger nudge even with high toolCallCount if skill_manage was used', () => { + it('should not trigger nudge even with high toolCallCount if skills were modified', () => { const result = mgr.scheduleSkillReview({ projectRoot, sessionId: 'test-session-1', - history: [ - { role: 'user', parts: [{ text: 'Many tool calls...' }] }, - { - role: 'model', - parts: [ - { - functionCall: { - name: 'skill_manage', - args: { action: 'patch' }, - }, - }, - ], - }, - ], + history: sampleHistory, toolCallCount: 100, + skillsModified: true, enabled: true, config: mockConfig, }); expect(result.status).toBe('skipped'); - expect(result.skippedReason).toBe('skill_manage_called'); + expect(result.skippedReason).toBe('skills_modified_in_session'); }); }); @@ -222,6 +186,7 @@ describe('Skill Nudge E2E Integration Tests', () => { sessionId: 'test-session-1', history: sampleHistory, toolCallCount: AUTO_SKILL_THRESHOLD, + skillsModified: false, enabled: false, config: mockConfig, }); @@ -236,6 +201,7 @@ describe('Skill Nudge E2E Integration Tests', () => { sessionId: 'test-session-1', history: sampleHistory, toolCallCount: AUTO_SKILL_THRESHOLD, + skillsModified: false, config: undefined, }); @@ -249,6 +215,7 @@ describe('Skill Nudge E2E Integration Tests', () => { sessionId: 'test-session-1', history: sampleHistory, toolCallCount: AUTO_SKILL_THRESHOLD, + skillsModified: false, enabled: true, config: mockConfig, }); @@ -267,6 +234,7 @@ describe('Skill Nudge E2E Integration Tests', () => { sessionId: 'test-session-1', history: sampleHistory, toolCallCount: AUTO_SKILL_THRESHOLD, + skillsModified: false, enabled: true, config: mockConfig, }); @@ -290,6 +258,7 @@ describe('Skill Nudge E2E Integration Tests', () => { sessionId: 'session-1', history: sampleHistory, toolCallCount: AUTO_SKILL_THRESHOLD, + skillsModified: false, enabled: true, config: mockConfig, }); @@ -299,6 +268,7 @@ describe('Skill Nudge E2E Integration Tests', () => { sessionId: 'session-2', history: sampleHistory, toolCallCount: AUTO_SKILL_THRESHOLD, + skillsModified: false, enabled: true, config: mockConfig, }); @@ -327,6 +297,7 @@ describe('Skill Nudge E2E Integration Tests', () => { history: sampleHistory, toolCallCount, threshold, + skillsModified: false, enabled: true, config: mockConfig, }); @@ -351,6 +322,7 @@ describe('Skill Nudge E2E Integration Tests', () => { sessionId: 'test-session-1', history: sampleHistory, toolCallCount: AUTO_SKILL_THRESHOLD, + skillsModified: false, enabled: true, config: mockConfig, }); @@ -374,6 +346,7 @@ describe('Skill Nudge E2E Integration Tests', () => { sessionId: 'test-session-1', history: sampleHistory, toolCallCount: AUTO_SKILL_THRESHOLD - 1, + skillsModified: false, threshold: AUTO_SKILL_THRESHOLD, enabled: true, config: mockConfig, @@ -388,6 +361,7 @@ describe('Skill Nudge E2E Integration Tests', () => { sessionId: 'test-session-1', history: sampleHistory, toolCallCount: AUTO_SKILL_THRESHOLD, + skillsModified: false, threshold: AUTO_SKILL_THRESHOLD, enabled: true, config: mockConfig, @@ -402,6 +376,7 @@ describe('Skill Nudge E2E Integration Tests', () => { sessionId: 'test-session-1', history: sampleHistory, toolCallCount: AUTO_SKILL_THRESHOLD + 1, + skillsModified: false, threshold: AUTO_SKILL_THRESHOLD, enabled: true, config: mockConfig, @@ -420,6 +395,7 @@ describe('Skill Nudge E2E Integration Tests', () => { sessionId: 'test-session-1', history: sampleHistory, toolCallCount: AUTO_SKILL_THRESHOLD, + skillsModified: false, enabled: true, config: mockConfig, }); diff --git a/packages/core/src/skills/skill-paths.test.ts b/packages/core/src/skills/skill-paths.test.ts index fa4ac9cae7e..e2a69c27b42 100644 --- a/packages/core/src/skills/skill-paths.test.ts +++ b/packages/core/src/skills/skill-paths.test.ts @@ -41,7 +41,7 @@ describe('skill project paths', () => { const sibling = path.join(projectRoot, '.qwen', 'skills-evil', 'SKILL.md'); expect(isProjectSkillPath(sibling, projectRoot)).toBe(false); expect(() => assertProjectSkillPath(sibling, projectRoot)).toThrow( - 'skill_manage can only write to', + 'Skills writes are restricted to', ); }); diff --git a/packages/core/src/skills/skill-paths.ts b/packages/core/src/skills/skill-paths.ts index 4083d81c1a1..1766f12f4c4 100644 --- a/packages/core/src/skills/skill-paths.ts +++ b/packages/core/src/skills/skill-paths.ts @@ -29,7 +29,7 @@ export function assertProjectSkillPath( ): void { if (!isProjectSkillPath(targetPath, projectRoot)) { throw new Error( - `skill_manage can only write to ${getProjectSkillsRoot(projectRoot)}. ` + + `Skills writes are restricted to ${getProjectSkillsRoot(projectRoot)}. ` + 'Use the Skills UI to manage user or bundled skills.', ); } @@ -75,7 +75,7 @@ export async function assertRealProjectSkillPath( !real.startsWith(realSkillsRoot + path.sep) ) { throw new Error( - `skill_manage: symlink traversal detected — resolved path "${real}" ` + + `Skills write blocked: symlink traversal detected — resolved path "${real}" ` + `is outside the project skills directory "${realSkillsRoot}".`, ); } diff --git a/packages/core/src/tools/agent/agent.ts b/packages/core/src/tools/agent/agent.ts index 57ae4ea0e10..19ce88f1986 100644 --- a/packages/core/src/tools/agent/agent.ts +++ b/packages/core/src/tools/agent/agent.ts @@ -696,7 +696,7 @@ class AgentToolInvocation extends BaseToolInvocation { // exclusion list does not apply. Recursive forks are blocked by the // ALS-based `isInForkExecution()` guard. // However, we still exclude tools that must never be available to - // any subagent (agent, skill_manage, cron tools). + // any subagent (agent, cron tools). const parentToolDecls: FunctionDeclaration[] = ( generationConfig.tools as Array<{ diff --git a/packages/core/src/tools/skill-manage.test.ts b/packages/core/src/tools/skill-manage.test.ts deleted file mode 100644 index 6143a521d20..00000000000 --- a/packages/core/src/tools/skill-manage.test.ts +++ /dev/null @@ -1,209 +0,0 @@ -/** - * @license - * Copyright 2026 Qwen Team - * SPDX-License-Identifier: Apache-2.0 - */ - -import * as fs from 'node:fs/promises'; -import * as os from 'node:os'; -import * as path from 'node:path'; -import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; -import type { Config } from '../config/config.js'; -import { SkillManageTool } from './skill-manage.js'; -import { EXCLUDED_TOOLS_FOR_SUBAGENTS } from '../agents/runtime/agent-core.js'; -import { ToolNames } from '../tools/tool-names.js'; - -// ─── Checklist 5: skill_manage excluded from subagents ─────────────────────── - -describe('EXCLUDED_TOOLS_FOR_SUBAGENTS – skill_manage isolation', () => { - it('contains skill_manage so wildcard/inherit subagents cannot call it', () => { - expect(EXCLUDED_TOOLS_FOR_SUBAGENTS.has(ToolNames.SKILL_MANAGE)).toBe(true); - }); - - it('contains agent to prevent recursive subagent spawning', () => { - expect(EXCLUDED_TOOLS_FOR_SUBAGENTS.has(ToolNames.AGENT)).toBe(true); - }); - - it('does NOT exclude ordinary tools like read_file', () => { - expect(EXCLUDED_TOOLS_FOR_SUBAGENTS.has(ToolNames.READ_FILE)).toBe(false); - }); -}); - -async function runSkillManage( - tool: SkillManageTool, - params: Parameters[0], -) { - return tool.build(params).execute(new AbortController().signal); -} - -describe('SkillManageTool', () => { - let tempDir: string; - let projectRoot: string; - let config: Config; - let tool: SkillManageTool; - const refreshCache = vi.fn(); - - beforeEach(async () => { - tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'skill-manage-')); - projectRoot = path.join(tempDir, 'project'); - await fs.mkdir(projectRoot, { recursive: true }); - refreshCache.mockReset(); - config = { - getProjectRoot: vi.fn().mockReturnValue(projectRoot), - getSkillManager: vi.fn().mockReturnValue({ refreshCache }), - } as unknown as Config; - tool = new SkillManageTool(config); - }); - - afterEach(async () => { - await fs.rm(tempDir, { recursive: true, force: true }); - }); - - it('creates a project-level SKILL.md', async () => { - const result = await runSkillManage(tool, { - action: 'create', - name: 'My Skill', - content: '---\nname: my-skill\ndescription: Test\n---\n\n# Test\n', - }); - - expect(result.error).toBeUndefined(); - await expect( - fs.readFile( - path.join(projectRoot, '.qwen', 'skills', 'my-skill', 'SKILL.md'), - 'utf-8', - ), - ).resolves.toContain('name: my-skill'); - expect(refreshCache).toHaveBeenCalled(); - }); - - it('patches an existing skill file', async () => { - const skillPath = path.join( - projectRoot, - '.qwen', - 'skills', - 'patch-me', - 'SKILL.md', - ); - await fs.mkdir(path.dirname(skillPath), { recursive: true }); - await fs.writeFile(skillPath, 'old body', 'utf-8'); - - const result = await runSkillManage(tool, { - action: 'patch', - name: 'patch-me', - old_string: 'old', - new_string: 'new', - }); - - expect(result.error).toBeUndefined(); - await expect(fs.readFile(skillPath, 'utf-8')).resolves.toBe('new body'); - }); - - it('rejects write_file path traversal outside project skills', async () => { - const result = await runSkillManage(tool, { - action: 'write_file', - name: 'refs', - file_path: '../../outside.md', - file_content: 'bad', - }); - - expect(result.error?.message).toContain('skill_manage can only write to'); - await expect( - fs.stat(path.join(projectRoot, '.qwen', 'outside.md')), - ).rejects.toThrow(); - }); - - // ─── Checklist 4: Write protection ───────────────────────────────────────── - - it('sanitizes name path traversal: creates skill safely inside project', async () => { - // Attempt to use a path-traversal-looking name. - // sanitizeSkillName() converts "../../../etc/malicious" → "----------etc-malicious" - // so the result is a safe path inside .qwen/skills/ (never escapes it). - const result = await runSkillManage(tool, { - action: 'create', - name: '../../../etc/malicious', - content: '---\nname: sanitized\n---\n', - }); - - // The call should succeed (the name is sanitized, not rejected) - expect(result.error).toBeUndefined(); - // The file must be written inside the project skills root, not at /etc/malicious - await expect(fs.stat('/etc/malicious/SKILL.md')).rejects.toThrow(); - // Verify it landed somewhere inside .qwen/skills/ - const skillsRoot = path.join(projectRoot, '.qwen', 'skills'); - const dirs = await fs.readdir(skillsRoot); - expect(dirs.length).toBeGreaterThan(0); - }); - - it('allows write_file inside skill directory (reference file)', async () => { - const result = await runSkillManage(tool, { - action: 'write_file', - name: 'my-skill', - file_path: 'references/api.md', - file_content: '# API reference\n', - }); - - expect(result.error).toBeUndefined(); - await expect( - fs.readFile( - path.join( - projectRoot, - '.qwen', - 'skills', - 'my-skill', - 'references', - 'api.md', - ), - 'utf-8', - ), - ).resolves.toContain('API reference'); - }); - - it('upsert: create succeeds if file already exists (idempotent)', async () => { - const content1 = '---\nname: my-skill\ndescription: v1\n---\n'; - const content2 = '---\nname: my-skill\ndescription: v2\n---\n'; - - // First create - await runSkillManage(tool, { - action: 'create', - name: 'my-skill', - content: content1, - }); - - // Second create on same name should succeed (upsert), not throw - const result = await runSkillManage(tool, { - action: 'create', - name: 'my-skill', - content: content2, - }); - - expect(result.error).toBeUndefined(); - // Content should be updated to v2 - const written = await fs.readFile( - path.join(projectRoot, '.qwen', 'skills', 'my-skill', 'SKILL.md'), - 'utf-8', - ); - expect(written).toContain('v2'); - }); - - it('deletes the skill directory', async () => { - // Setup - await runSkillManage(tool, { - action: 'create', - name: 'to-delete', - content: '---\nname: to-delete\n---\n', - }); - - const skillDir = path.join(projectRoot, '.qwen', 'skills', 'to-delete'); - await expect(fs.stat(skillDir)).resolves.toBeTruthy(); - - // Delete - const result = await runSkillManage(tool, { - action: 'delete', - name: 'to-delete', - }); - - expect(result.error).toBeUndefined(); - await expect(fs.stat(skillDir)).rejects.toThrow(); - expect(refreshCache).toHaveBeenCalled(); - }); -}); diff --git a/packages/core/src/tools/skill-manage.ts b/packages/core/src/tools/skill-manage.ts deleted file mode 100644 index f30ee9f04c6..00000000000 --- a/packages/core/src/tools/skill-manage.ts +++ /dev/null @@ -1,266 +0,0 @@ -/** - * @license - * Copyright 2026 Qwen Team - * SPDX-License-Identifier: Apache-2.0 - */ - -import * as fs from 'node:fs/promises'; -import * as path from 'node:path'; -import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js'; -import { ToolDisplayNames, ToolNames } from './tool-names.js'; -import type { ToolResult, ToolResultDisplay } from './tools.js'; -import type { Config } from '../config/config.js'; -import { - assertProjectSkillPath, - assertRealProjectSkillPath, - getProjectSkillsRoot, - sanitizeSkillName, - SKILL_FILE_NAME, -} from '../skills/skill-paths.js'; - -export type SkillManageAction = - | 'create' - | 'edit' - | 'patch' - | 'write_file' - | 'delete'; - -export interface SkillManageParams { - action: SkillManageAction; - name: string; - content?: string; - old_string?: string; - new_string?: string; - category?: string; - file_path?: string; - file_content?: string; -} - -export class SkillManageTool extends BaseDeclarativeTool< - SkillManageParams, - ToolResult -> { - static readonly Name: string = ToolNames.SKILL_MANAGE; - - constructor(private readonly config: Config) { - super( - SkillManageTool.Name, - ToolDisplayNames.SKILL_MANAGE, - 'Create, update, patch, write files for, or delete a project-level skill in .qwen/skills/.', - Kind.Edit, - { - type: 'object', - properties: { - action: { - type: 'string', - enum: ['create', 'edit', 'patch', 'write_file', 'delete'], - }, - name: { type: 'string' }, - content: { - type: 'string', - description: 'Full SKILL.md content for create/edit.', - }, - old_string: { - type: 'string', - description: 'For patch: text to find.', - }, - new_string: { - type: 'string', - description: 'For patch: replacement text.', - }, - category: { - type: 'string', - description: "Optional subdirectory, e.g. 'typescript'.", - }, - file_path: { - type: 'string', - description: - "For write_file: relative path like 'references/api.md'.", - }, - file_content: { type: 'string' }, - }, - required: ['action', 'name'], - additionalProperties: false, - $schema: 'http://json-schema.org/draft-07/schema#', - }, - false, - false, - ); - } - - protected override validateToolParamValues( - params: SkillManageParams, - ): string | null { - if (!params.name.trim()) { - return 'Parameter "name" must be a non-empty string.'; - } - if ( - (params.action === 'create' || params.action === 'edit') && - !params.content - ) { - return `Parameter "content" is required for action "${params.action}".`; - } - if (params.action === 'patch') { - if (!params.old_string || params.new_string === undefined) { - return 'Parameters "old_string" and "new_string" are required for action "patch".'; - } - } - if (params.action === 'write_file') { - if (!params.file_path || params.file_content === undefined) { - return 'Parameters "file_path" and "file_content" are required for action "write_file".'; - } - } - return null; - } - - protected createInvocation(params: SkillManageParams) { - return new SkillManageToolInvocation(this.config, params); - } -} - -class SkillManageToolInvocation extends BaseToolInvocation< - SkillManageParams, - ToolResult -> { - constructor( - private readonly config: Config, - params: SkillManageParams, - ) { - super(params); - } - - getDescription(): string { - return `${this.params.action} project skill ${this.params.name}`; - } - - override toolLocations() { - return [{ path: this.resolveTargetPath() }]; - } - - override getDefaultPermission() { - return Promise.resolve('ask' as const); - } - - async execute( - _signal: AbortSignal, - _updateOutput?: (output: ToolResultDisplay) => void, - ): Promise { - try { - const targetPath = this.resolveTargetPath(); - assertProjectSkillPath(targetPath, this.config.getProjectRoot()); - await assertRealProjectSkillPath( - targetPath, - this.config.getProjectRoot(), - ); - - switch (this.params.action) { - case 'create': - await this.writeSkillFileWithFallback( - targetPath, - this.params.content!, - ); - break; - case 'edit': - await this.writeSkillFile(targetPath, this.params.content!, 'w'); - break; - case 'patch': - await this.patchSkillFile(targetPath); - break; - case 'write_file': - await this.writeReferenceFile(targetPath); - break; - case 'delete': - await fs.rm(path.dirname(targetPath), { - recursive: true, - force: true, - }); - break; - default: - throw new Error(`Unsupported action: ${this.params.action}`); - } - - await this.config.getSkillManager()?.refreshCache?.(); - const message = `skill_manage ${this.params.action} succeeded: ${targetPath}`; - return { llmContent: message, returnDisplay: message }; - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - return { - llmContent: message, - returnDisplay: message, - error: { message }, - }; - } - } - - private async writeSkillFileWithFallback( - targetPath: string, - content: string, - ): Promise { - try { - // Try exclusive create first - await this.writeSkillFile(targetPath, content, 'wx'); - } catch (error) { - // If file exists, fallback to edit (upsert behavior) - const err = error as NodeJS.ErrnoException; - if (err.code === 'EEXIST') { - // File already exists, upgrade to edit - await this.writeSkillFile(targetPath, content, 'w'); - } else { - throw error; - } - } - } - - private async writeSkillFile( - targetPath: string, - content: string, - flag: 'w' | 'wx', - ): Promise { - await fs.mkdir(path.dirname(targetPath), { recursive: true }); - await fs.writeFile( - targetPath, - content.endsWith('\n') ? content : `${content}\n`, - { - encoding: 'utf-8', - flag, - }, - ); - } - - private async patchSkillFile(targetPath: string): Promise { - const oldString = this.params.old_string!; - const newString = this.params.new_string!; - const original = await fs.readFile(targetPath, 'utf-8'); - if (!original.includes(oldString)) { - throw new Error('old_string was not found in the target skill file.'); - } - await fs.writeFile( - targetPath, - original.replace(oldString, newString), - 'utf-8', - ); - } - - private async writeReferenceFile(targetPath: string): Promise { - await fs.mkdir(path.dirname(targetPath), { recursive: true }); - await fs.writeFile(targetPath, this.params.file_content!, 'utf-8'); - } - - private resolveTargetPath(): string { - const projectSkillsRoot = getProjectSkillsRoot( - this.config.getProjectRoot(), - ); - const skillName = sanitizeSkillName(this.params.name); - const parts = [projectSkillsRoot]; - if (this.params.category) { - parts.push(this.params.category); - } - parts.push(skillName); - - if (this.params.action === 'write_file') { - const filePath = this.params.file_path ?? ''; - return path.resolve(path.join(...parts, filePath)); - } - return path.resolve(path.join(...parts, SKILL_FILE_NAME)); - } -} diff --git a/packages/core/src/tools/tool-names.ts b/packages/core/src/tools/tool-names.ts index e990efc9ec9..9edc21508fc 100644 --- a/packages/core/src/tools/tool-names.ts +++ b/packages/core/src/tools/tool-names.ts @@ -20,7 +20,6 @@ export const ToolNames = { MEMORY: 'save_memory', AGENT: 'agent', SKILL: 'skill', - SKILL_MANAGE: 'skill_manage', EXIT_PLAN_MODE: 'exit_plan_mode', WEB_FETCH: 'web_fetch', WEB_SEARCH: 'web_search', @@ -48,7 +47,6 @@ export const ToolDisplayNames = { MEMORY: 'SaveMemory', AGENT: 'Agent', SKILL: 'Skill', - SKILL_MANAGE: 'SkillManage', EXIT_PLAN_MODE: 'ExitPlanMode', WEB_FETCH: 'WebFetch', WEB_SEARCH: 'WebSearch', From f03c73a8b7ba4a70de94f573eafee935bab8b1af Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Wed, 29 Apr 2026 19:14:11 +0800 Subject: [PATCH 07/10] fix(autoSkill): deduplicate concurrent skill-review tasks per projectRoot scheduleSkillReview() was launching a new background task every time the threshold was reached for the same project, with no guard against multiple in-flight reviews running concurrently. Fix: add skillReviewInFlightByProject Map that tracks the taskId of any running review per projectRoot. A second call while one is in-flight returns { status: 'skipped', skippedReason: 'already_running', taskId: }. The map entry is cleared in a finally block inside runSkillReview() so the next session can schedule a fresh review after the current one completes. Also extend SkillReviewScheduleResult.skippedReason union to include 'already_running', and add a unit test covering the full lifecycle: first call schedules, second call is skipped with existing taskId, and a third call after completion schedules a new task. --- packages/core/src/memory/manager.test.ts | 44 ++++++++++++++++++++++++ packages/core/src/memory/manager.ts | 23 ++++++++++++- 2 files changed, 66 insertions(+), 1 deletion(-) diff --git a/packages/core/src/memory/manager.test.ts b/packages/core/src/memory/manager.test.ts index e38e5bfa9a0..b904db88cbb 100644 --- a/packages/core/src/memory/manager.test.ts +++ b/packages/core/src/memory/manager.test.ts @@ -275,6 +275,50 @@ describe('MemoryManager', () => { expect(runSkillReviewByAgent).not.toHaveBeenCalled(); }); + it('skips second call while first is still in-flight (already_running)', async () => { + let resolveReview!: (v: { touchedSkillFiles: string[] }) => void; + vi.mocked(runSkillReviewByAgent).mockReturnValueOnce( + new Promise<{ touchedSkillFiles: string[] }>((resolve) => { + resolveReview = resolve; + }), + ); + + const mgr = new MemoryManager(); + const baseParams = { + projectRoot: '/project', + sessionId: 'sess', + history: [{ role: 'user' as const, parts: [{ text: 'hi' }] }], + toolCallCount: 25, + threshold: 2, + skillsModified: false, + config: makeMockConfig(), + }; + + const first = mgr.scheduleSkillReview(baseParams); + expect(first.status).toBe('scheduled'); + + // Second call while first is still running + const second = mgr.scheduleSkillReview({ + ...baseParams, + sessionId: 'sess-2', + }); + expect(second.status).toBe('skipped'); + expect(second.skippedReason).toBe('already_running'); + // Returns the existing task id so callers can observe it + expect(second.taskId).toBe(first.taskId); + + // After first completes, a new call is allowed + resolveReview({ touchedSkillFiles: [] }); + await first.promise; + + vi.mocked(runSkillReviewByAgent).mockResolvedValueOnce({ + touchedSkillFiles: [], + }); + const third = mgr.scheduleSkillReview(baseParams); + expect(third.status).toBe('scheduled'); + expect(third.taskId).not.toBe(first.taskId); + }); + it('schedules skill review at threshold', async () => { const mgr = new MemoryManager(); const result = mgr.scheduleSkillReview({ diff --git a/packages/core/src/memory/manager.ts b/packages/core/src/memory/manager.ts index 4b82f460aa1..2d3540008f0 100644 --- a/packages/core/src/memory/manager.ts +++ b/packages/core/src/memory/manager.ts @@ -130,7 +130,11 @@ export interface ScheduleSkillReviewParams { export interface SkillReviewScheduleResult { status: 'scheduled' | 'skipped'; taskId?: string; - skippedReason?: 'below_threshold' | 'skills_modified_in_session' | 'disabled'; + skippedReason?: + | 'below_threshold' + | 'skills_modified_in_session' + | 'disabled' + | 'already_running'; promise?: Promise; } @@ -382,6 +386,9 @@ export class MemoryManager { { taskId: string; params: ScheduleExtractParams } >(); + // ── Skill-review in-flight dedup ───────────────────────────────────────────── + private readonly skillReviewInFlightByProject = new Map(); + // ── Dream scheduling state ─────────────────────────────────────────────────── private readonly dreamInFlightByKey = new Map(); private readonly dreamLastSessionScanAt = new Map(); @@ -692,6 +699,17 @@ export class MemoryManager { return { status: 'skipped', skippedReason: 'disabled' }; } + const existingTaskId = this.skillReviewInFlightByProject.get( + params.projectRoot, + ); + if (existingTaskId) { + return { + status: 'skipped', + skippedReason: 'already_running', + taskId: existingTaskId, + }; + } + const record = makeTaskRecord( 'skill-review', params.projectRoot, @@ -715,6 +733,7 @@ export class MemoryManager { record: MemoryTaskRecord, params: ScheduleSkillReviewParams, ): Promise { + this.skillReviewInFlightByProject.set(params.projectRoot, record.id); try { const result = await runSkillReviewByAgent({ config: params.config!, @@ -735,6 +754,8 @@ export class MemoryManager { status: 'failed', error: error instanceof Error ? error.message : String(error), }); + } finally { + this.skillReviewInFlightByProject.delete(params.projectRoot); } return record; } From e430ff9c2994a7b63c6d94baa68fc394c0087ccd Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Thu, 30 Apr 2026 14:12:00 +0800 Subject: [PATCH 08/10] fix(autoSkill): address all critical review comments 1. hasAutoSkillSource: narrow catch to ENOENT only (EISDIR/EACCES etc. return false to deny); tighten frontmatter regex to match opening block only. 2. evaluateScopedDecision: add explicit allow for READ_FILE and LS so they don't fall to 'default' which the base PermissionManager might widen; EDIT/WRITE_FILE now call assertRealProjectSkillPath() (async realpath guard) in addition to the lexical check, closing the symlink traversal hole. 3. isScopedTool / getScopedDenyRule: cover READ_FILE and LS so hasRelevantRules returns true and findMatchingDenyRule is correctly consulted for them. 4. recordCompletedToolCall (client.ts): broaden tool name set to match WRITE_TOOL_NAMES in manager.ts (write_file, edit, replace, create_file) and inspect all three arg keys (file_path, path, target_file). Signature changed from (name, filePath?) to (name, args?) to carry all args through. 5. client.ts hardcoded literals: replace threshold/maxTurns/timeoutMs with the named constants AUTO_SKILL_THRESHOLD / DEFAULT_AUTO_SKILL_MAX_TURNS / DEFAULT_AUTO_SKILL_TIMEOUT_MS imported from manager.ts and skillReviewAgentPlanner.ts. 6. toolCallCount / skillsModifiedInSession reset: only reset when skill review is actually scheduled (status === 'scheduled'), not every turn, so the counter correctly accumulates across turns within a session as per design doc. 7. runSkillReview (manager.ts): rethrow after marking record failed, consistent with runExtract behavior. 8. skillReviewNudge.integration.test.ts test 5: rewrite to reflect the in-flight dedup contract (second same-project call returns already_running with existing taskId; third call after completion gets a new task). Add vi.mock for runSkillReviewByAgent so the test does not need a full Config. --- packages/cli/src/nonInteractiveCli.ts | 8 +-- packages/cli/src/ui/hooks/useGeminiStream.ts | 4 +- packages/core/src/core/client.ts | 58 +++++++++++++------ packages/core/src/memory/manager.ts | 1 + .../src/memory/skillReviewAgentPlanner.ts | 43 +++++++++++--- .../skillReviewNudge.integration.test.ts | 40 ++++++++++--- 6 files changed, 110 insertions(+), 44 deletions(-) diff --git a/packages/cli/src/nonInteractiveCli.ts b/packages/cli/src/nonInteractiveCli.ts index 1c5ff173b55..4e330d8cc0e 100644 --- a/packages/cli/src/nonInteractiveCli.ts +++ b/packages/cli/src/nonInteractiveCli.ts @@ -461,9 +461,7 @@ export async function runNonInteractive( .getGeminiClient() .recordCompletedToolCall( finalRequestInfo.name, - typeof finalRequestInfo.args['file_path'] === 'string' - ? finalRequestInfo.args['file_path'] - : undefined, + finalRequestInfo.args as Record, ); if (toolResponse.responseParts) { @@ -617,9 +615,7 @@ export async function runNonInteractive( .getGeminiClient() .recordCompletedToolCall( requestInfo.name, - typeof requestInfo.args['file_path'] === 'string' - ? requestInfo.args['file_path'] - : undefined, + requestInfo.args as Record, ); if (toolResponse.responseParts) { diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index 4e42198bda0..fac3a659f00 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -1885,9 +1885,7 @@ export const useGeminiStream = ( for (const toolCall of geminiTools) { geminiClient?.recordCompletedToolCall( toolCall.request.name, - typeof toolCall.request.args['file_path'] === 'string' - ? toolCall.request.args['file_path'] - : undefined, + toolCall.request.args as Record, ); } diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 1f3424d676c..5d60d85c0c1 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -48,6 +48,11 @@ import { LoopDetectionService } from '../services/loopDetectionService.js'; // Tools import type { RelevantAutoMemoryPromptResult } from '../memory/manager.js'; +import { AUTO_SKILL_THRESHOLD } from '../memory/manager.js'; +import { + DEFAULT_AUTO_SKILL_MAX_TURNS, + DEFAULT_AUTO_SKILL_TIMEOUT_MS, +} from '../memory/skillReviewAgentPlanner.js'; import { isProjectSkillPath } from '../skills/skill-paths.js'; import { ToolNames } from '../tools/tool-names.js'; @@ -530,21 +535,25 @@ export class GeminiClient { toolCallCount: this.toolCallCount, skillsModified: this.skillsModifiedInSession, enabled: autoSkillEnabled, - threshold: 20, - maxTurns: 8, - timeoutMs: 120_000, + threshold: AUTO_SKILL_THRESHOLD, + maxTurns: DEFAULT_AUTO_SKILL_MAX_TURNS, + timeoutMs: DEFAULT_AUTO_SKILL_TIMEOUT_MS, }); - if (skillReviewResult.promise) { - this.pendingMemoryTaskPromises.push( - skillReviewResult.promise.then((record) => { - const touched = record.metadata?.['touchedSkillFiles']; - return Array.isArray(touched) ? touched.length : 0; - }), - ); + if (skillReviewResult.status === 'scheduled') { + // Reset per-session counters only when review is actually dispatched, + // so the count accumulates correctly across turns within a session. + this.toolCallCount = 0; + this.skillsModifiedInSession = false; + if (skillReviewResult.promise) { + this.pendingMemoryTaskPromises.push( + skillReviewResult.promise.then((record) => { + const touched = record.metadata?.['touchedSkillFiles']; + return Array.isArray(touched) ? touched.length : 0; + }), + ); + } } } - this.toolCallCount = 0; - this.skillsModifiedInSession = false; if (!this.config.getManagedAutoMemoryEnabled()) { return; @@ -605,13 +614,24 @@ export class GeminiClient { return promises; } - recordCompletedToolCall(toolName: string, filePath?: string): void { - if ( - filePath && - (toolName === 'write_file' || toolName === 'edit') && - isProjectSkillPath(filePath, this.config.getProjectRoot()) - ) { - this.skillsModifiedInSession = true; + recordCompletedToolCall( + toolName: string, + args?: Record, + ): void { + const SKILL_WRITE_TOOL_NAMES = new Set([ + 'write_file', + 'edit', + 'replace', + 'create_file', + ]); + if (args && SKILL_WRITE_TOOL_NAMES.has(toolName)) { + const filePath = args['file_path'] ?? args['path'] ?? args['target_file']; + if ( + typeof filePath === 'string' && + isProjectSkillPath(filePath, this.config.getProjectRoot()) + ) { + this.skillsModifiedInSession = true; + } } this.toolCallCount += 1; } diff --git a/packages/core/src/memory/manager.ts b/packages/core/src/memory/manager.ts index 2d3540008f0..5809379ab83 100644 --- a/packages/core/src/memory/manager.ts +++ b/packages/core/src/memory/manager.ts @@ -754,6 +754,7 @@ export class MemoryManager { status: 'failed', error: error instanceof Error ? error.message : String(error), }); + throw error; } finally { this.skillReviewInFlightByProject.delete(params.projectRoot); } diff --git a/packages/core/src/memory/skillReviewAgentPlanner.ts b/packages/core/src/memory/skillReviewAgentPlanner.ts index 961ccd908d8..254d0af62c0 100644 --- a/packages/core/src/memory/skillReviewAgentPlanner.ts +++ b/packages/core/src/memory/skillReviewAgentPlanner.ts @@ -18,6 +18,7 @@ import { ToolNames } from '../tools/tool-names.js'; import { isShellCommandReadOnlyAST } from '../utils/shellAstParser.js'; import { stripShellWrapper } from '../utils/shell-utils.js'; import { + assertRealProjectSkillPath, getProjectSkillsRoot, isProjectSkillPath, } from '../skills/skill-paths.js'; @@ -41,25 +42,36 @@ type SkillScopedPermissionManager = Pick< >; /** - * Returns true if the file at `filePath` exists and contains - * `source: auto-skill` in its YAML frontmatter block. - * Returns false if the file does not exist (caller may allow creation). + * Returns true if the file at `filePath` exists and its YAML frontmatter + * contains `source: auto-skill`. + * Returns null if the file does not exist (caller may allow creation). + * Returns false for any other read error (EISDIR, EACCES, etc.) — caller + * should deny in that case. */ async function hasAutoSkillSource(filePath: string): Promise { let content: string; try { content = await fs.readFile(filePath, 'utf-8'); - } catch { - // File does not exist — allow creation. - return null; + } catch (err) { + if ((err as NodeJS.ErrnoException).code === 'ENOENT') { + // File does not exist — allow creation. + return null; + } + // EISDIR, EACCES, EMFILE, EPERM, etc. — deny to be safe. + return false; } - const match = /^---\r?\n([\s\S]*?)\r?\n---/m.exec(content); + // Match the opening frontmatter block only (up to the closing ---) + const match = /^---[ \t]*\r?\n([\s\S]*?)\r?\n---[ \t]*(\r?\n|$)/.exec( + content, + ); if (!match) return false; - return /^\s*source:\s*auto-skill\s*$/m.test(match[1]); + return /^source:\s*auto-skill\s*$/m.test(match[1]); } function isScopedTool(toolName: string): boolean { return ( + toolName === ToolNames.READ_FILE || + toolName === ToolNames.LS || toolName === ToolNames.SHELL || toolName === ToolNames.EDIT || toolName === ToolNames.WRITE_FILE @@ -86,6 +98,10 @@ async function evaluateScopedDecision( projectRoot: string, ): Promise { switch (ctx.toolName) { + case ToolNames.READ_FILE: + case ToolNames.LS: + // Read tools are always allowed — the agent needs to inspect skills. + return 'allow'; case ToolNames.SHELL: { if (!ctx.command) { return 'deny'; @@ -100,10 +116,16 @@ async function evaluateScopedDecision( if (!ctx.filePath || !isProjectSkillPath(ctx.filePath, projectRoot)) { return 'deny'; } + // Reject symlink traversal (realpath check). + try { + await assertRealProjectSkillPath(ctx.filePath, projectRoot); + } catch { + return 'deny'; + } // For existing files, verify source: auto-skill is present. const sourceFlag = await hasAutoSkillSource(ctx.filePath); if (sourceFlag === null) { - // File does not exist yet — allow creation. + // File does not exist yet — allow creation (path already validated above). return 'allow'; } return sourceFlag ? 'allow' : 'deny'; @@ -118,6 +140,9 @@ function getScopedDenyRule( projectRoot: string, ): string | undefined { switch (ctx.toolName) { + case ToolNames.READ_FILE: + case ToolNames.LS: + return undefined; // always allow — no deny rule needed case ToolNames.SHELL: return 'ManagedSkillReview(run_shell_command: read-only only)'; case ToolNames.EDIT: diff --git a/packages/core/src/memory/skillReviewNudge.integration.test.ts b/packages/core/src/memory/skillReviewNudge.integration.test.ts index e445b52d921..4ecfb224fcc 100644 --- a/packages/core/src/memory/skillReviewNudge.integration.test.ts +++ b/packages/core/src/memory/skillReviewNudge.integration.test.ts @@ -15,11 +15,15 @@ import * as fs from 'node:fs/promises'; import * as os from 'node:os'; import * as path from 'node:path'; import type { Content } from '@google/genai'; -import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import type { Config } from '../config/config.js'; import { MemoryManager, AUTO_SKILL_THRESHOLD } from './manager.js'; import { getProjectSkillsRoot } from '../skills/skill-paths.js'; +vi.mock('./skillReviewAgentPlanner.js', () => ({ + runSkillReviewByAgent: vi.fn().mockResolvedValue({ touchedSkillFiles: [] }), +})); + describe('Skill Nudge E2E Integration Tests', () => { let tempDir: string; let projectRoot: string; @@ -252,7 +256,7 @@ describe('Skill Nudge E2E Integration Tests', () => { expect(result.skippedReason).not.toBe('failed'); }); - it('should handle multiple skill reviews for same project', () => { + it('should handle multiple skill reviews for same project (sequential)', async () => { const result1 = mgr.scheduleSkillReview({ projectRoot, sessionId: 'session-1', @@ -263,7 +267,9 @@ describe('Skill Nudge E2E Integration Tests', () => { config: mockConfig, }); - const result2 = mgr.scheduleSkillReview({ + // While first is in-flight, the second call for the same project is + // deduped — it returns skipped with the existing taskId. + const result2WhileRunning = mgr.scheduleSkillReview({ projectRoot, sessionId: 'session-2', history: sampleHistory, @@ -273,12 +279,32 @@ describe('Skill Nudge E2E Integration Tests', () => { config: mockConfig, }); - // Both should be processed independently + expect(result1.status).toBe('scheduled'); expect(result1.taskId).toBeDefined(); - expect(result2.taskId).toBeDefined(); - expect(result1.taskId).not.toBe(result2.taskId); + expect(result2WhileRunning.status).toBe('skipped'); + expect(result2WhileRunning.skippedReason).toBe('already_running'); + // Returns the existing taskId so callers can observe the in-flight task. + expect(result2WhileRunning.taskId).toBe(result1.taskId); + + // Wait for the first review to complete. + await result1.promise; + + // Now a new review for the same project should be accepted. + const result3AfterCompletion = mgr.scheduleSkillReview({ + projectRoot, + sessionId: 'session-3', + history: sampleHistory, + toolCallCount: AUTO_SKILL_THRESHOLD, + skillsModified: false, + enabled: true, + config: mockConfig, + }); + + expect(result3AfterCompletion.status).toBe('scheduled'); + expect(result3AfterCompletion.taskId).toBeDefined(); + expect(result3AfterCompletion.taskId).not.toBe(result1.taskId); - // Both tasks should be tracked + // Both completed tasks should be tracked. const records = mgr.listTasksByType('skill-review', projectRoot); expect(records.length).toBeGreaterThanOrEqual(2); }); From 493e251ba0aace0d29abaf11f49a390cb60df5e6 Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Fri, 8 May 2026 18:01:36 +0800 Subject: [PATCH 09/10] fix(autoSkill): address all review comments - skill-paths: detect dangling symlinks with lstat before treating ENOENT as safe - skill-paths: fix isProjectSkillPath relative path resolution to use projectRoot - skillReviewAgentPlanner: restrict READ_FILE/LS to project root only - skillReviewAgentPlanner: remove SHELL tool from review agent tool list - skillReviewAgentPlanner: add path import; remove unused shell imports - skillReviewAgentPlanner: add comment for buildAgentHistory trailing user message - client: fix runManagedAutoMemoryBackgroundTasks gate widening - client: fix skillsModifiedInSession deadlock - client: add .catch() to skill review promise - client: hoist SKILL_WRITE_TOOL_NAMES to module-level ReadonlySet - agent-core: use full EXCLUDED_TOOLS_FOR_SUBAGENTS for explicit tool list subagents - manager: extend notify() signature to accept 'skill-review' taskType - config: fix JSDoc default value comment (false, not true) --- .../core/src/agents/runtime/agent-core.ts | 7 +- packages/core/src/config/config.ts | 2 +- packages/core/src/core/client.ts | 100 +++++++++++------- packages/core/src/memory/manager.ts | 4 +- .../src/memory/skillReviewAgentPlanner.ts | 38 ++++--- packages/core/src/skills/skill-paths.ts | 20 +++- 6 files changed, 110 insertions(+), 61 deletions(-) diff --git a/packages/core/src/agents/runtime/agent-core.ts b/packages/core/src/agents/runtime/agent-core.ts index e0180eb45dd..444f711b002 100644 --- a/packages/core/src/agents/runtime/agent-core.ts +++ b/packages/core/src/agents/runtime/agent-core.ts @@ -391,10 +391,13 @@ export class AgentCore { .filter((t) => !(t.name && excludedFromSubagents.has(t.name))), ); } else { - // Explicit tool list: only prevent recursive agent spawning. + // Explicit tool list: apply the full subagent exclusion set (not just + // the recursion guard). This prevents control-plane tools + // (CRON_CREATE, TASK_STOP, SEND_MESSAGE, etc.) from leaking into + // explicitly-configured subagents that happen to list them. toolsList.push( ...toolRegistry.getFunctionDeclarationsFiltered( - asStrings.filter((name) => !recursionGuardOnly.has(name)), + asStrings.filter((name) => !excludedFromSubagents.has(name)), ), ); } diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index aa4604cd4a4..ae5d25c00fa 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -538,7 +538,7 @@ export interface ConfigParameters { enableManagedAutoMemory?: boolean; /** Enable managed auto-dream consolidation separately from extraction. Defaults to true. */ enableManagedAutoDream?: boolean; - /** Enable automatic project skill review after tool-heavy sessions. Defaults to true. */ + /** Enable automatic project skill review after tool-heavy sessions. Defaults to false. */ enableAutoSkill?: boolean; /** * Lightweight model for background tasks (memory extraction, dream, /btw side questions). diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index bba1035c8cd..82ae207f15f 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -173,6 +173,12 @@ async function resolveAutoMemoryWithDeadline( } } +/** Tools that can write to the skills directory, used to detect skillsModifiedInSession. */ +const SKILL_WRITE_TOOL_NAMES: ReadonlySet = new Set([ + ToolNames.WRITE_FILE, + ToolNames.EDIT, +]); + export class GeminiClient { private chat?: GeminiChat; private sessionTurnCount = 0; @@ -634,10 +640,64 @@ export class GeminiClient { private runManagedAutoMemoryBackgroundTasks( messageType: SendMessageType, ): void { + // autoSkill counts tool calls and can trigger on both UserQuery and + // ToolResult turns so the threshold can fire mid-session. if ( - messageType !== SendMessageType.UserQuery && - messageType !== SendMessageType.ToolResult + messageType === SendMessageType.UserQuery || + messageType === SendMessageType.ToolResult ) { + const projectRoot = this.config.getProjectRoot(); + const sessionId = this.config.getSessionId(); + const history = this.getHistory(); + const mgr = this.config.getMemoryManager(); + const autoSkillEnabled = this.config.getAutoSkillEnabled(); + + if (autoSkillEnabled) { + const skillReviewResult = mgr.scheduleSkillReview({ + projectRoot, + sessionId, + history, + config: this.config, + toolCallCount: this.toolCallCount, + skillsModified: this.skillsModifiedInSession, + enabled: autoSkillEnabled, + threshold: AUTO_SKILL_THRESHOLD, + maxTurns: DEFAULT_AUTO_SKILL_MAX_TURNS, + timeoutMs: DEFAULT_AUTO_SKILL_TIMEOUT_MS, + }); + if (skillReviewResult.status === 'scheduled') { + // Reset tool-call counter only when a review is actually dispatched, + // so the count accumulates correctly across turns within a session. + this.toolCallCount = 0; + if (skillReviewResult.promise) { + this.pendingMemoryTaskPromises.push( + skillReviewResult.promise + .then((record) => { + const touched = record.metadata?.['touchedSkillFiles']; + return Array.isArray(touched) ? touched.length : 0; + }) + .catch((error: unknown) => { + debugLogger.warn( + 'Failed to run managed skill review.', + error, + ); + return 0; + }), + ); + } + } + // Always reset the skills-modified flag after the scheduleSkillReview + // check, regardless of whether a review was dispatched. This prevents + // a deadlock where skillsModifiedInSession stays true forever: when + // the flag is set, scheduleSkillReview returns 'skipped' immediately + // (never 'scheduled'), so without this reset the flag can never clear. + this.skillsModifiedInSession = false; + } + } + + // extract and dream keep the original UserQuery-only gate to preserve + // the existing "once per user turn" semantics and avoid redundant work. + if (messageType !== SendMessageType.UserQuery) { return; } @@ -645,36 +705,6 @@ export class GeminiClient { const sessionId = this.config.getSessionId(); const history = this.getHistory(); const mgr = this.config.getMemoryManager(); - const autoSkillEnabled = this.config.getAutoSkillEnabled(); - - if (autoSkillEnabled) { - const skillReviewResult = mgr.scheduleSkillReview({ - projectRoot, - sessionId, - history, - config: this.config, - toolCallCount: this.toolCallCount, - skillsModified: this.skillsModifiedInSession, - enabled: autoSkillEnabled, - threshold: AUTO_SKILL_THRESHOLD, - maxTurns: DEFAULT_AUTO_SKILL_MAX_TURNS, - timeoutMs: DEFAULT_AUTO_SKILL_TIMEOUT_MS, - }); - if (skillReviewResult.status === 'scheduled') { - // Reset per-session counters only when review is actually dispatched, - // so the count accumulates correctly across turns within a session. - this.toolCallCount = 0; - this.skillsModifiedInSession = false; - if (skillReviewResult.promise) { - this.pendingMemoryTaskPromises.push( - skillReviewResult.promise.then((record) => { - const touched = record.metadata?.['touchedSkillFiles']; - return Array.isArray(touched) ? touched.length : 0; - }), - ); - } - } - } if (!this.config.getManagedAutoMemoryEnabled()) { return; @@ -739,12 +769,6 @@ export class GeminiClient { toolName: string, args?: Record, ): void { - const SKILL_WRITE_TOOL_NAMES = new Set([ - 'write_file', - 'edit', - 'replace', - 'create_file', - ]); if (args && SKILL_WRITE_TOOL_NAMES.has(toolName)) { const filePath = args['file_path'] ?? args['path'] ?? args['target_file']; if ( diff --git a/packages/core/src/memory/manager.ts b/packages/core/src/memory/manager.ts index 678fbfc3c64..ca0bb5aa27e 100644 --- a/packages/core/src/memory/manager.ts +++ b/packages/core/src/memory/manager.ts @@ -475,9 +475,9 @@ export class MemoryManager { * subscribers can be reached too; the unfiltered subscriber set * always receives the wakeup either way. */ - private notify(taskType?: 'extract' | 'dream'): void { + private notify(taskType?: 'extract' | 'dream' | 'skill-review'): void { for (const fn of this.subscribers) fn(); - if (taskType) { + if (taskType && taskType !== 'skill-review') { const typed = this.subscribersByType.get(taskType); if (typed) for (const fn of typed) fn(); } diff --git a/packages/core/src/memory/skillReviewAgentPlanner.ts b/packages/core/src/memory/skillReviewAgentPlanner.ts index 254d0af62c0..d030c8aa5bb 100644 --- a/packages/core/src/memory/skillReviewAgentPlanner.ts +++ b/packages/core/src/memory/skillReviewAgentPlanner.ts @@ -6,6 +6,7 @@ import type { Content } from '@google/genai'; import * as fs from 'node:fs/promises'; +import * as path from 'node:path'; import type { Config } from '../config/config.js'; import type { PermissionManager } from '../permissions/permission-manager.js'; import type { @@ -15,8 +16,6 @@ import type { import { runForkedAgent } from '../utils/forkedAgent.js'; import { buildFunctionResponseParts } from '../tools/agent/fork-subagent.js'; import { ToolNames } from '../tools/tool-names.js'; -import { isShellCommandReadOnlyAST } from '../utils/shellAstParser.js'; -import { stripShellWrapper } from '../utils/shell-utils.js'; import { assertRealProjectSkillPath, getProjectSkillsRoot, @@ -72,7 +71,6 @@ function isScopedTool(toolName: string): boolean { return ( toolName === ToolNames.READ_FILE || toolName === ToolNames.LS || - toolName === ToolNames.SHELL || toolName === ToolNames.EDIT || toolName === ToolNames.WRITE_FILE ); @@ -99,17 +97,20 @@ async function evaluateScopedDecision( ): Promise { switch (ctx.toolName) { case ToolNames.READ_FILE: - case ToolNames.LS: - // Read tools are always allowed — the agent needs to inspect skills. - return 'allow'; - case ToolNames.SHELL: { - if (!ctx.command) { - return 'deny'; + case ToolNames.LS: { + // Read tools are allowed only within the project root. This prevents + // the review agent from reading arbitrary files (e.g. ~/.aws/credentials) + // and embedding them into a SKILL.md that gets committed. + if (!ctx.filePath) return 'allow'; // no path means listing root — allow + const resolvedRead = path.resolve(projectRoot, ctx.filePath); + const resolvedRoot = path.resolve(projectRoot); + if ( + resolvedRead === resolvedRoot || + resolvedRead.startsWith(resolvedRoot + path.sep) + ) { + return 'allow'; } - const isReadOnly = await isShellCommandReadOnlyAST( - stripShellWrapper(ctx.command), - ); - return isReadOnly ? 'allow' : 'deny'; + return 'deny'; } case ToolNames.EDIT: case ToolNames.WRITE_FILE: { @@ -142,9 +143,7 @@ function getScopedDenyRule( switch (ctx.toolName) { case ToolNames.READ_FILE: case ToolNames.LS: - return undefined; // always allow — no deny rule needed - case ToolNames.SHELL: - return 'ManagedSkillReview(run_shell_command: read-only only)'; + return undefined; // allow within project root — no deny rule needed case ToolNames.EDIT: return `ManagedSkillReview(edit: only within ${getProjectSkillsRoot(projectRoot)} and only on skills with 'source: auto-skill' in frontmatter)`; case ToolNames.WRITE_FILE: @@ -211,6 +210,12 @@ const SKILL_REVIEW_SYSTEM_PROMPT = [ function buildAgentHistory(history: Content[]): Content[] { if (history.length === 0) return []; const last = history[history.length - 1]; + // If the final message is a user turn (not a model turn), drop it. A trailing + // user message means the session ended mid-exchange (e.g. user sent a new + // query that has not yet received a model response). Including it would make + // the skill-review agent see an open "conversation" with an unanswered user + // prompt, which can confuse the model and produce hallucinated tool calls + // attempting to "answer" the user instead of reviewing skills. if (last.role !== 'model') return history.slice(0, -1); const openCalls = (last.parts ?? []).filter((p) => p.functionCall); if (openCalls.length === 0) return [...history]; @@ -267,7 +272,6 @@ export async function runSkillReviewByAgent(params: { tools: [ ToolNames.READ_FILE, ToolNames.LS, - ToolNames.SHELL, ToolNames.WRITE_FILE, ToolNames.EDIT, ], diff --git a/packages/core/src/skills/skill-paths.ts b/packages/core/src/skills/skill-paths.ts index 1766f12f4c4..a905e362298 100644 --- a/packages/core/src/skills/skill-paths.ts +++ b/packages/core/src/skills/skill-paths.ts @@ -19,7 +19,7 @@ export function isProjectSkillPath( projectRoot: string, ): boolean { const skillsRoot = path.resolve(getProjectSkillsRoot(projectRoot)); - const resolved = path.resolve(filePath); + const resolved = path.resolve(projectRoot, filePath); return resolved === skillsRoot || resolved.startsWith(skillsRoot + path.sep); } @@ -82,6 +82,24 @@ export async function assertRealProjectSkillPath( return; } catch (err) { if ((err as NodeJS.ErrnoException).code === 'ENOENT') { + // Before treating a missing path as safe, check whether the path + // itself exists as a dangling symlink (lstat succeeds but realpath + // fails with ENOENT because the link target is missing). A dangling + // symlink could be used to pre-aim a write at an arbitrary location. + try { + const lstatResult = await fs.lstat(check); + if (lstatResult.isSymbolicLink()) { + throw new Error( + `Skills write blocked: dangling symlink detected at "${check}".`, + ); + } + } catch (lstatErr) { + // lstat itself threw — re-throw only if it's not ENOENT (path truly + // doesn't exist at all, which is safe to walk up from). + if ((lstatErr as NodeJS.ErrnoException).code !== 'ENOENT') { + throw lstatErr; + } + } const parent = path.dirname(check); if (parent === check) { // Reached filesystem root without finding an existing node. From 37d83a21d0f061ec63cf7b678cd0783b18b9b4c3 Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Sat, 9 May 2026 10:49:51 +0800 Subject: [PATCH 10/10] fix(autoSkill): address second round review comments - client: reset toolCallCount when scheduleSkillReview returns already_running and count >= threshold, preventing immediate cascade after in-flight review - client.test: add autoSkill branch tests (scheduled/already_running/skills_modified) - client.test: add full recordCompletedToolCall unit tests (skillsModifiedInSession, toolCallCount increment, skill path detection for write_file/edit/read_file) - client.test: add scheduleSkillReview mock to mockMemoryManager - nonInteractiveCli.test: add assertions for recordCompletedToolCall and consumePendingMemoryTaskPromises in tool-call integration test --- packages/cli/src/nonInteractiveCli.test.ts | 11 ++ packages/core/src/core/client.test.ts | 177 +++++++++++++++++++++ packages/core/src/core/client.ts | 13 +- 3 files changed, 199 insertions(+), 2 deletions(-) diff --git a/packages/cli/src/nonInteractiveCli.test.ts b/packages/cli/src/nonInteractiveCli.test.ts index 29fa3b87b69..df030142f35 100644 --- a/packages/cli/src/nonInteractiveCli.test.ts +++ b/packages/cli/src/nonInteractiveCli.test.ts @@ -93,6 +93,8 @@ describe('runNonInteractive', () => { sendMessageStream: Mock; getChatRecordingService: Mock; getChat: Mock; + consumePendingMemoryTaskPromises: Mock; + recordCompletedToolCall: Mock; }; let mockGetDebugResponses: Mock; @@ -407,6 +409,15 @@ describe('runNonInteractive', () => { { type: SendMessageType.ToolResult }, ); expect(processStdoutSpy).toHaveBeenCalledWith('Final answer\n'); + // Verify recordCompletedToolCall is called with the tool name and args. + expect(mockGeminiClient.recordCompletedToolCall).toHaveBeenCalledWith( + 'testTool', + { arg1: 'value1' }, + ); + // Verify consumePendingMemoryTaskPromises is called at the end of the session. + expect( + mockGeminiClient.consumePendingMemoryTaskPromises, + ).toHaveBeenCalled(); }); it('should handle error during tool execution and should send error back to the model', async () => { diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts index 9e0bb9010c3..94163c226ee 100644 --- a/packages/core/src/core/client.test.ts +++ b/packages/core/src/core/client.test.ts @@ -299,6 +299,7 @@ describe('Gemini Client (client.ts)', () => { scheduleExtract: ReturnType; scheduleDream: ReturnType; recall: ReturnType; + scheduleSkillReview: ReturnType; }; beforeEach(async () => { vi.resetAllMocks(); @@ -324,6 +325,10 @@ describe('Gemini Client (client.ts)', () => { selectedDocs: [], strategy: 'none', }), + scheduleSkillReview: vi.fn().mockReturnValue({ + status: 'skipped', + skippedReason: 'below_threshold', + }), }; mockGenerateContentFn = vi.fn().mockResolvedValue({ @@ -1467,6 +1472,178 @@ hello }); }); + describe('autoSkill: scheduleSkillReview via runManagedAutoMemoryBackgroundTasks', () => { + let mockStreamFn: () => AsyncGenerator<{ type: string; value: string }>; + let mockChat: Partial; + + beforeEach(() => { + vi.spyOn(client['config'], 'getAutoSkillEnabled').mockReturnValue(true); + mockStreamFn = async function* () { + yield { type: GeminiEventType.Content, value: 'Done' }; + }; + mockTurnRunFn.mockReturnValue(mockStreamFn()); + mockChat = { + addHistory: vi.fn(), + getHistory: vi.fn().mockReturnValue([ + { role: 'user', parts: [{ text: 'hello' }] }, + { role: 'model', parts: [{ text: 'Done' }] }, + ]), + }; + client['chat'] = mockChat as GeminiChat; + }); + + it('should call scheduleSkillReview with correct params on UserQuery', async () => { + mockMemoryManager.scheduleSkillReview.mockReturnValue({ + status: 'skipped', + skippedReason: 'below_threshold', + }); + + await fromAsync( + client.sendMessageStream( + [{ text: 'a query' }], + new AbortController().signal, + 'prompt-id-autoskill-query', + ), + ); + + expect(mockMemoryManager.scheduleSkillReview).toHaveBeenCalledWith( + expect.objectContaining({ + projectRoot: '/test/project/root', + sessionId: 'test-session-id', + config: mockConfig, + }), + ); + }); + + it('should reset toolCallCount and push promise when review is scheduled', async () => { + let resolveFn!: (v: unknown) => void; + const promise = new Promise<{ metadata?: Record }>( + (r) => { + resolveFn = r as (v: unknown) => void; + }, + ); + mockMemoryManager.scheduleSkillReview.mockReturnValue({ + status: 'scheduled', + taskId: 'task-1', + promise, + }); + + // Artificially bump toolCallCount above 0 to verify it resets. + client['toolCallCount'] = 5; + + await fromAsync( + client.sendMessageStream( + [{ text: 'trigger review' }], + new AbortController().signal, + 'prompt-id-autoskill-scheduled', + ), + ); + + // Counter should have been reset. + expect(client['toolCallCount']).toBe(0); + // Promise should have been pushed to pendingMemoryTaskPromises. + expect(client['pendingMemoryTaskPromises'].length).toBeGreaterThan(0); + + // Resolve promise so there are no dangling promises. + resolveFn({ metadata: { touchedSkillFiles: ['skill.md'] } }); + }); + + it('should reset toolCallCount when review is already_running and count exceeds threshold', async () => { + mockMemoryManager.scheduleSkillReview.mockReturnValue({ + status: 'skipped', + skippedReason: 'already_running', + taskId: 'task-inflight', + }); + + // Simulate counter above threshold. + const AUTO_SKILL_THRESHOLD = 20; + client['toolCallCount'] = AUTO_SKILL_THRESHOLD + 5; + + await fromAsync( + client.sendMessageStream( + [{ text: 'trigger while in-flight' }], + new AbortController().signal, + 'prompt-id-autoskill-inflight', + ), + ); + + // Counter should have been reset to prevent immediate cascade. + expect(client['toolCallCount']).toBe(0); + }); + + it('should always reset skillsModifiedInSession after scheduleSkillReview check', async () => { + mockMemoryManager.scheduleSkillReview.mockReturnValue({ + status: 'skipped', + skippedReason: 'skills_modified_in_session', + }); + + client['skillsModifiedInSession'] = true; + + await fromAsync( + client.sendMessageStream( + [{ text: 'wrote a skill file' }], + new AbortController().signal, + 'prompt-id-autoskill-modified', + ), + ); + + expect(client['skillsModifiedInSession']).toBe(false); + }); + }); + + describe('recordCompletedToolCall', () => { + it('should increment toolCallCount on each call', () => { + expect(client['toolCallCount']).toBe(0); + client.recordCompletedToolCall('read_file'); + expect(client['toolCallCount']).toBe(1); + client.recordCompletedToolCall('write_file'); + expect(client['toolCallCount']).toBe(2); + }); + + it('should set skillsModifiedInSession=true when write_file targets a skill path', () => { + vi.spyOn(client['config'], 'getProjectRoot').mockReturnValue( + '/project', + ); + expect(client['skillsModifiedInSession']).toBe(false); + + client.recordCompletedToolCall('write_file', { + file_path: '/project/.qwen/skills/my-skill.md', + }); + + expect(client['skillsModifiedInSession']).toBe(true); + }); + + it('should not set skillsModifiedInSession=true for write_file outside skill path', () => { + vi.spyOn(client['config'], 'getProjectRoot').mockReturnValue( + '/project', + ); + client.recordCompletedToolCall('write_file', { + file_path: '/project/src/index.ts', + }); + expect(client['skillsModifiedInSession']).toBe(false); + }); + + it('should set skillsModifiedInSession=true when edit targets a skill path', () => { + vi.spyOn(client['config'], 'getProjectRoot').mockReturnValue( + '/project', + ); + client.recordCompletedToolCall('edit', { + path: '/project/.qwen/skills/my-skill.md', + }); + expect(client['skillsModifiedInSession']).toBe(true); + }); + + it('should not set skillsModifiedInSession=true for non-write tools', () => { + vi.spyOn(client['config'], 'getProjectRoot').mockReturnValue( + '/project', + ); + client.recordCompletedToolCall('read_file', { + file_path: '/project/.qwen/skills/my-skill.md', + }); + expect(client['skillsModifiedInSession']).toBe(false); + }); + }); + it('should add context if ideMode is enabled and there are open files but no active file', async () => { // Arrange vi.mocked(ideContextStore.get).mockReturnValue({ diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 82ae207f15f..68005f8a859 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -666,8 +666,8 @@ export class GeminiClient { timeoutMs: DEFAULT_AUTO_SKILL_TIMEOUT_MS, }); if (skillReviewResult.status === 'scheduled') { - // Reset tool-call counter only when a review is actually dispatched, - // so the count accumulates correctly across turns within a session. + // Reset tool-call counter when a review is dispatched so the next + // review only fires after a full new threshold worth of tool calls. this.toolCallCount = 0; if (skillReviewResult.promise) { this.pendingMemoryTaskPromises.push( @@ -685,6 +685,15 @@ export class GeminiClient { }), ); } + } else if ( + skillReviewResult.status === 'skipped' && + skillReviewResult.skippedReason === 'already_running' && + this.toolCallCount >= AUTO_SKILL_THRESHOLD + ) { + // A review is already in-flight; reset the counter so that when the + // current review completes the next call doesn't immediately trigger + // another review without accumulating a fresh threshold of tool calls. + this.toolCallCount = 0; } // Always reset the skills-modified flag after the scheduleSkillReview // check, regardless of whether a review was dispatched. This prevents