diff --git a/CHANGELOG.md b/CHANGELOG.md index 7c476a71f..924feb1ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,15 +10,25 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- `includes:` manifest field (auto | list) for explicit governance of local `.apm/` content. Closes audit-blindness gap (#887). +- `apm audit --ci` now verifies hash integrity of locally deployed files, detecting hand-edits and config drift. (#887) +- `policy.manifest.require_explicit_includes` policy field enforces explicit `includes` lists (rejects `auto` + undeclared). (#887) +- `includes-consent` advisory appears in `apm audit` CLI/JSON output when local content is deployed without an explicit `includes:` declaration (#887) - `apm-primitives-architect` agent: reusable persona for designing and critiquing `.apm/` skill bundles. (#882) - CI: add `APM Self-Check` to `ci.yml` for `apm audit --ci`, regeneration-drift validation, and `merge-gate.yml` `EXPECTED_CHECKS` coverage. (#885) ### Changed +- Lockfile in-memory shape: a synthesized self-entry now appears in `LockFile.dependencies` for local content. The on-disk YAML format is unchanged (data still serialized as flat `local_deployed_files`/`local_deployed_file_hashes` fields). (#887) - Hardened `apm-review-panel` skill: one-comment output contract, pre-arbitration completeness gate, Hybrid E Auth Expert routing, verdict template extracted to `assets/`, and `python-architect` mandatory three-artifact PR review contract (classDiagram + flowchart + Design patterns). (#882) - CI: smoke tests in `build-release.yml`'s `build-and-test` job (Linux x86_64, Linux arm64, Windows) are now gated to promotion boundaries (tag/schedule/dispatch) instead of running on every push to main. Push-time smoke duplicated the merge-time smoke gate in `ci-integration.yml` and burned ~15 redundant codex-binary downloads/day. Tag-cut releases still run smoke as a pre-ship gate; nightly catches upstream codex URL drift; merge-time still gates merges into main. (#878) - CI docs: clarify that branch-protection ruleset must store the check-run name (`gate`), not the workflow display string (`Merge Gate / gate`); document the merge-gate aggregator in `cicd.instructions.md` and mark the legacy stub workflow as deprecated. +### Fixed + +- Audit blindness for local `.apm/` content -- `apm audit --ci` now detects drift, missing files, and content tampering on locally-authored files (not just installed packages). (#887) +- Packer leak risk: local-content fields (`local_deployed_files`, `local_deployed_file_hashes`) are now stripped from bundled lockfiles, preventing phantom self-entries on unpack. (#887) + ### Removed - CI: deleted `ci-integration-pr-stub.yml`. The four stubs were a holdover from the pre-merge-gate model where branch protection required each Tier 2 check name directly. After #867, branch protection requires only `gate`, so the stubs are dead weight. Reduced `EXPECTED_CHECKS` in `merge-gate.yml` to just `Build & Test (Linux)`. diff --git a/apm.lock.yaml b/apm.lock.yaml index ebc5b1c48..3ceb90fca 100644 --- a/apm.lock.yaml +++ b/apm.lock.yaml @@ -33,16 +33,16 @@ local_deployed_file_hashes: .github/agents/agentic-workflows.agent.md: sha256:d1ea2d038e2af8be11d6c95b3213b03b9777fae46f0438efa95d5a803e6c3765 .github/agents/apm-ceo.agent.md: sha256:dfc436e6eeffc7ec1c2f556edb78e4a5166ac36d162ea720d08b4b79af0a9938 .github/agents/apm-primitives-architect.agent.md: sha256:6c01eab74ba18d70f21d45010d636cc6535d63cee81da12e61898d8036e0b028 - .github/agents/auth-expert.agent.md: sha256:efdc8c7fd046409f4467ecf14da9f0d5f0e4a86372e5885c3763e89ff6f9ea69 + .github/agents/auth-expert.agent.md: sha256:85409aab097cf239e5aa7ad61db6c4586be9884ef64a45fa9c894017b046b56b .github/agents/cli-logging-expert.agent.md: sha256:24bf6c4b420c42292700ad0eb80b53d275be5c9cb186d471d706211f8419e3b9 .github/agents/devx-ux-expert.agent.md: sha256:3472680f43b2b4411b9437ec31529216afd4e576e1874c14430935e7f1ded1f2 .github/agents/doc-analyser.agent.md: sha256:47b1d0204904b786c19d4fe84343e86cdab6f92f862f676ba741ffe6e1385679 .github/agents/doc-writer.agent.md: sha256:328a5b9ea079869b8ccd914a6e2135c204225a5eedb42f59a1ec73058f7f0b47 .github/agents/oss-growth-hacker.agent.md: sha256:8d18f5be46913c40ad3aa66fb984575a88988cfac402d39353cdfb09f7e582c5 - .github/agents/python-architect.agent.md: sha256:80443a15945e39c56ae9d45983c2671eccc29b6dcb65bf328ca5dc8ecc87f48d + .github/agents/python-architect.agent.md: sha256:32ed3390cb0e41fea28b3fd95b00124cd097ea0db51f992d2349e6837742723c .github/agents/supply-chain-security-expert.agent.md: sha256:9a4e731b12e7658f71d54c22e90f80ce0c45e3eacbb069b8505ed96ec9e79ba5 .github/instructions/changelog.instructions.md: sha256:1e51ec4c74e847967962bd279dc4c6e582c5d3578490b3c28d5f3acd3e05f73e - .github/instructions/cicd.instructions.md: sha256:170e6fa09bcf4064d33420ffca6b3125bf7011982c4c7a00320af71f2f6c6bf9 + .github/instructions/cicd.instructions.md: sha256:9c0fafc74f743aa97e5adba2168d66c9e3a327b135065e3b804bdbb5f04cda5d .github/instructions/cli.instructions.md: sha256:8e39e8d5047ce88575cb02f87c2bcede584dfef258bd86f7466c7badf136541a .github/instructions/doc-sync.instructions.md: sha256:bb3816254f8df6bffc6faacd556871f36903e9d7f348982f1e2de0339384c696 .github/instructions/encoding.instructions.md: sha256:93db7377dc896f6efecf2c5d8c5d89255a555562f468d034d64c42edd5cf46d5 diff --git a/apm.yml b/apm.yml index 69a008238..dcfe211d0 100644 --- a/apm.yml +++ b/apm.yml @@ -1,6 +1,7 @@ name: apm -version: 0.9.0 +version: 0.10.0 description: APM (Agent Package Manager) -- ship and govern AI agent context +includes: auto author: Microsoft license: MIT diff --git a/docs/src/content/docs/enterprise/apm-policy.md b/docs/src/content/docs/enterprise/apm-policy.md index a3a5bf4c0..bb41048b1 100644 --- a/docs/src/content/docs/enterprise/apm-policy.md +++ b/docs/src/content/docs/enterprise/apm-policy.md @@ -92,7 +92,7 @@ Policy is evaluated at two points. Both use the same policy file and the same me ### CI time (audit gate) -`apm audit --ci --policy org` runs the same checks (plus 6 baseline lockfile checks) and is intended as a required status check on pull requests. It produces SARIF output that GitHub Code Scanning renders inline on the PR diff. +`apm audit --ci --policy org` runs the same checks (plus 7 baseline lockfile checks) and is intended as a required status check on pull requests. It produces SARIF output that GitHub Code Scanning renders inline on the PR diff. For setup, see [CI Policy Enforcement](../../guides/ci-policy-setup/). diff --git a/docs/src/content/docs/enterprise/governance-guide.md b/docs/src/content/docs/enterprise/governance-guide.md index 84636bd96..9002353d6 100644 --- a/docs/src/content/docs/enterprise/governance-guide.md +++ b/docs/src/content/docs/enterprise/governance-guide.md @@ -85,12 +85,15 @@ The scope matrix below is the contract. Every row maps a security or operational | Source attribution | `compilation.source_attribution` | `source-attribution` | `[i] audit-only` | Yes | | Required manifest fields | `manifest.required_fields` | `required-manifest-fields` | `[i] audit-only` | Yes | | Manifest scripts policy | `manifest.scripts` | `scripts-policy` | `[i] audit-only` | Yes | +| Explicit manifest includes | `manifest.require_explicit_includes` | `explicit-includes` | `[i] audit-only` | Yes | | Unmanaged files in governed dirs | `unmanaged_files.action`, `.directories` | `unmanaged-files` | `[i] audit-only` | Yes | | Cache TTL override | `policy.cache.ttl` | -- | `[!] parsed but not enforced` (cache reader uses hardcoded 1h) | -- | | Transitive MCP trust (policy field) | `mcp.trust_transitive` | -- | `[!] parsed but not enforced` (gate is the `--trust-transitive-mcp` CLI flag) | -- | | Manifest content types | `manifest.content_types` | -- | `[!] parsed but not enforced` | -- | -The full schema and the canonical 6+16 check enumeration live in the [Policy Reference](../policy-reference/). The 6 baseline lockfile checks (lockfile presence, ref consistency, deployed files present, no orphaned packages, MCP config consistency, content integrity) run on every `apm audit --ci` regardless of policy and are non-bypassable -- they are covered in section 7. +The full schema and the canonical 7+17 check enumeration live in the [Policy Reference](../policy-reference/). The 7 baseline lockfile checks (lockfile presence, ref consistency, deployed files present, no orphaned packages, MCP config consistency, content integrity, includes consent) run on every `apm audit --ci` regardless of policy and are non-bypassable -- they are covered in section 7. + +`manifest.require_explicit_includes` (`bool`, default `false`) deserves a callout: when set to `true`, the `explicit-includes` check rejects any `apm.yml` that omits `includes:` or sets `includes: auto`. Use this when every published local file must be enumerated in the manifest and reviewable in PR diffs. See the [`includes` field](../../reference/manifest-schema/#39-includes) for the three accepted forms. --- @@ -104,7 +107,6 @@ Be clear with stakeholders about what is out of scope today: - **File integration paths.** Where files land on disk inside the repo is decided by the integrators in each APM package. Policy cannot rewrite a package's file layout. - **Custom agent tools beyond MCP.** If an agent stack ships its own non-MCP tool plugins, they sit outside policy scope. Govern them indirectly through `dependencies.allow`/`deny` and `unmanaged_files`. - **Token scopes and OAuth scopes.** APM does not audit the scope of the GitHub PAT or app token used to fetch policies and packages. Manage that through the standard GitHub controls on the token issuer. -- **Drift on deployed files after install.** `deployed_file_hashes` are recorded in the lockfile but never re-verified. A developer who hand-edits a deployed instruction file post-install will not be detected by `apm install` or `apm audit --ci`. See section 14. - **Anything `apm compile` or `apm run` does.** Those commands trust whatever `apm install` placed on disk. They do not re-check policy. For the underlying threat model (what the content scanner protects against, MCP trust boundary, dependency provenance), see the [Security Model](../security/). @@ -134,7 +136,7 @@ graph TD DryRun["apm install --dry-run"] --> DryPre["[*] Preview only
install_preflight dry_run=True
'Would be blocked' lines, no raise"] - Audit["apm audit --ci
--policy <scope>"] --> AuditRun["[*] Enforcement point 4
6 baseline + 16 policy checks
(only enforcer of audit-only fields)"] + Audit["apm audit --ci
--policy <scope>"] --> AuditRun["[*] Enforcement point 4
7 baseline + 17 policy checks
(only enforcer of audit-only fields)"] style Start fill:#e3f2fd,stroke:#1976d2,stroke-width:2px,color:#000 style Gate fill:#f3e5f5,stroke:#7b1fa2,stroke-width:3px,color:#000 @@ -163,7 +165,7 @@ When you install an APM package that itself declares MCP dependencies, those MCP ### 5d. `apm audit --ci --policy ` -The only enforcer of the audit-only checks (`compilation-strategy`, `source-attribution`, `required-manifest-fields`, `scripts-policy`, `unmanaged-files`). Runs the 6 baseline lockfile checks unconditionally, then -- if a policy is discovered or supplied -- runs the 16 policy checks. This is the check you wire into branch protection. +The only enforcer of the audit-only checks (`compilation-strategy`, `source-attribution`, `required-manifest-fields`, `scripts-policy`, `unmanaged-files`). Runs the 7 baseline lockfile checks unconditionally, then -- if a policy is discovered or supplied -- runs the 17 policy checks. This is the check you wire into branch protection. --- @@ -272,25 +274,25 @@ This is the certitude section. Read it twice if you are deciding whether `apm au | Surface | What it bypasses LOCALLY | What it CANNOT bypass | Reviewable in | |---|---|---|---| -| `apm install --no-policy` | All 16 policy checks at install (incl. transitive MCP, hash pin) | The 6 baseline checks in `apm audit --ci` | git diff of `apm.lock.yaml` in PR | -| `APM_POLICY_DISABLE=1` env | Same as `--no-policy` plus the 16 audit policy checks | The 6 baseline checks in `apm audit --ci` | PR diff; CI env vars in Actions logs | +| `apm install --no-policy` | All 17 policy checks at install (incl. transitive MCP, hash pin) | The 7 baseline checks in `apm audit --ci` | git diff of `apm.lock.yaml` in PR | +| `APM_POLICY_DISABLE=1` env | Same as `--no-policy` plus the 17 audit policy checks | The 7 baseline checks in `apm audit --ci` | PR diff; CI env vars in Actions logs | | Manual edit to `apm.lock.yaml` | Nothing; install regenerates the file each run | Audit baseline `ref-consistency` and `deployed-files-present` | git diff | -| Manual edit to deployed file post-install | Content-equality drift (no re-hash) | Hidden-Unicode scan in `apm audit` content mode | git diff of the deployed file in PR | +| Manual edit to deployed file post-install | Local file content until next audit | Audit baseline `content-integrity` (re-hashes deployed files); hidden-Unicode scan in `apm audit` content mode | git diff of the deployed file in PR | | Direct `git clone` of an APM package, bypassing install | Everything; nothing detects out-of-band file drops | Audit baseline `no-orphaned-packages` and audit-only `unmanaged-files` | git diff | | Fork repo to a personal org | Org policy auto-discovery (resolves to fork's `.github`) | Whatever your CI requires on the canonical repo | branch protection on canonical repo | | `--trust-transitive-mcp` CLI flag | The transitive MCP preflight (second pass) | Direct MCP preflight; baseline content scan; audit MCP checks | CI command lines and Actions logs | | `--allow-insecure` CLI flag | The HTTP-MCP refusal (lets a `http://` MCP through) | All `mcp.*` policy rules; audit MCP checks | CI command lines and Actions logs | -| `apm install --force` | On-disk collision detection AND content-scan blocks | The 16 policy checks; baseline checks at next audit | CI command lines; PR diff of overwritten files | +| `apm install --force` | On-disk collision detection AND content-scan blocks | The 17 policy checks; baseline checks at next audit | CI command lines; PR diff of overwritten files | Notes on specific rows: - **`apm install --no-policy`** also bypasses the `apm install --mcp` preflight, the transitive-MCP preflight, and any project-side `policy.hash` pin. -- **`APM_POLICY_DISABLE=1`** short-circuits discovery to `outcome="disabled"` everywhere -- including `apm audit --ci`, where the 16 policy checks are skipped (the 6 baseline checks still run). +- **`APM_POLICY_DISABLE=1`** short-circuits discovery to `outcome="disabled"` everywhere -- including `apm audit --ci`, where the 17 policy checks are skipped (the 7 baseline checks still run). - **Manual lockfile edits**: `content_hash` mismatch on registry-proxy deps is caught at the next install when downloads resume. - **Direct `git clone`**: `unmanaged-files` only flags governed dirs and only when configured to `warn` / `deny`. - **Fork-to-personal-org**: discovery resolves via `git remote get-url origin`; branch protection on the upstream repo is the trust boundary. -**The non-bypass contract.** The 6 baseline lockfile checks (run by `apm audit --ci` *without* `--no-policy` or `APM_POLICY_DISABLE=1`) are unconditional. They do not consult the policy file, do not depend on org discovery, and are not affected by either escape hatch. Combined with branch protection that requires `apm audit --ci` to pass, no developer override is invisible: a `--no-policy` install leaves a lockfile that audit will reject if the result is inconsistent, and an `APM_POLICY_DISABLE=1` audit run cannot itself bypass the baseline checks. Every override appears in the PR diff, in the workflow file, or in the Actions environment configuration -- all of which are reviewable in code review. +**The non-bypass contract.** The 7 baseline lockfile checks (run by `apm audit --ci` *without* `--no-policy` or `APM_POLICY_DISABLE=1`) are unconditional. They do not consult the policy file, do not depend on org discovery, and are not affected by either escape hatch. Combined with branch protection that requires `apm audit --ci` to pass, no developer override is invisible: a `--no-policy` install leaves a lockfile that audit will reject if the result is inconsistent, and an `APM_POLICY_DISABLE=1` audit run cannot itself bypass the baseline checks. Every override appears in the PR diff, in the workflow file, or in the Actions environment configuration -- all of which are reviewable in code review. **Workstation blast radius.** Because "file presence is execution" for agent files (an instruction or chat-mode file on disk is consumed by the agent runtime as soon as it is opened), the fork-to-personal-org bypass mitigates the *org's* trust gate but not the *individual workstation's*: between fork-clone-install and PR creation, the developer's machine already has the tainted files. Compensating control: an MDM-deployed mirror of `/.github/apm-policy.yml` consulted by a wrapper script around `apm install`, or a workstation-level allowlist of permitted git remotes for APM-managed repos. @@ -312,7 +314,7 @@ When `apm install` returns exit code 0 and the effective org policy is in `enfor You are NOT guaranteed: -- That files on disk are still what install wrote. There is no drift detection on `deployed_file_hashes`. +- That files on disk are still what install wrote. `apm install` itself does not re-verify deployed files; drift is caught by `apm audit --ci` baseline `content-integrity` (re-hashes against `deployed_file_hashes`). - That prompt or instruction content is semantically safe. Only the hidden-Unicode scan runs. - That the audit-only checks (`compilation-strategy`, `source-attribution`, `required-manifest-fields`, `scripts-policy`, `unmanaged-files`) passed. Run `apm audit --ci --policy ` in CI for those. - That non-APM files in the repo conform to anything. APM only governs files it placed. @@ -487,7 +489,6 @@ We publish this list because silent gaps are worse than known ones. Every item b These are the sharp edges. Plan around them; do not assume they are solved. -- **No drift detection on deployed files.** `deployed_file_hashes` are recorded in the lockfile but never re-verified by `apm install` or `apm audit --ci`. A developer who hand-edits a deployed instruction file post-install will not be detected. Operational mitigation: rely on git diff in PR review for files in `.github/`, `.apm/`, and other governed directories. - **`policy.cache.ttl` field is parsed but not honored.** The cache reader uses a hardcoded 1-hour TTL. Setting `policy.cache.ttl: 86400` in your policy will be silently ignored. Operational mitigation: do not rely on this field; assume 1-hour cache TTL universally. - **`mcp.trust_transitive` policy field is parsed but not enforced.** The transitive-MCP gate is the `--trust-transitive-mcp` CLI flag, NOT the policy field. Operational mitigation: govern transitive MCP trust through CI command lines and code review of workflow files, not through policy YAML. - **`manifest.content_types` field is parsed but no check enforces it.** Operational mitigation: do not advertise this field as a control to stakeholders. @@ -498,7 +499,7 @@ These are the sharp edges. Plan around them; do not assume they are solved. - **Non-GitHub remotes are not auto-discovered.** If your project's `git remote get-url origin` points to ADO, GitLab, or a plain git host, policy auto-discovery falls through with no policy applied. Operational mitigation: pass `apm audit --ci --policy ` explicitly in those CI environments. - **Trust anchor is `git remote get-url origin`.** A developer who pushes the project to a personal org will have policy discovery resolve `/.github/apm-policy.yml` -- which they control. Operational mitigation: branch protection on the canonical repo is the trust boundary; nothing about a personal fork can bypass what your CI requires before merge. - **`apm install --dry-run` silently downgrades hash-mismatch.** In dry-run, `raise_blocking_errors=False` (outcome_routing.py:104-119) causes the mismatch to surface as `discovery_miss` with no "Would be blocked" line and exit 0. Operational mitigation: rely on `apm audit --ci` in CI for hash-pin verification, not on `apm install --dry-run`. -- **`apm install --no-policy` help text is misleading.** It claims "Does NOT bypass apm audit --ci" -- this is only true for the 6 baseline lockfile checks; the 16 policy checks ARE bypassed in audit when this flag (or `APM_POLICY_DISABLE=1`) is set. Operational mitigation: do not rely on the help text; the bypass contract in section 7 is authoritative. +- **`apm install --no-policy` help text is misleading.** It claims "Does NOT bypass apm audit --ci" -- this is only true for the 7 baseline lockfile checks; the 17 policy checks ARE bypassed in audit when this flag (or `APM_POLICY_DISABLE=1`) is set. Operational mitigation: do not rely on the help text; the bypass contract in section 7 is authoritative. - **Gate + transitive-MCP preflight may double-emit the same MCP violation.** A single bad transitive MCP can produce two SARIF alerts with the same rule and different code paths. Operational mitigation: dedupe by `(rule_id, server_name)` when aggregating alerts in your SIEM or dashboard. - **No signed attestation that the gate ran.** APM does not currently produce a signed (e.g. SLSA / sigstore) attestation for the install gate or the audit run. Non-repudiation depends on the GitHub Actions audit log plus branch-protection enforcement of the required check. Operational mitigation: pair APM with branch protection requiring `apm audit --ci` as a status check; rely on GitHub's audit log for auditor evidence. @@ -598,7 +599,7 @@ policy: - [`apm-policy.yml`](../apm-policy/) -- the file's mental model. - [CI Policy Enforcement](../../guides/ci-policy-setup/) -- step-by-step CI wiring with YAML. -- [Policy Reference](../policy-reference/) -- complete schema, the canonical 6+16 check enumeration, the 12-row merge rule table, exit codes. +- [Policy Reference](../policy-reference/) -- complete schema, the canonical 7+17 check enumeration, the 12-row merge rule table, exit codes. - [Security Model](../security/) -- threat model, MCP trust boundary, content scanning, token handling. - [Adoption Playbook](../adoption-playbook/) -- broader APM rollout (governance is one phase). - [Lockfile Spec](../../reference/lockfile-spec/) -- lockfile schema for forensic queries. diff --git a/docs/src/content/docs/guides/ci-policy-setup.md b/docs/src/content/docs/guides/ci-policy-setup.md index 0dc828fd3..18f7b841d 100644 --- a/docs/src/content/docs/guides/ci-policy-setup.md +++ b/docs/src/content/docs/guides/ci-policy-setup.md @@ -13,8 +13,8 @@ Set up automated policy enforcement so every pull request is checked against you ## Prerequisites - An organization on GitHub with repositories using APM -- `apm audit --ci` runs 6 baseline consistency checks with no configuration -- `apm audit --ci --policy org` adds 16 policy checks defined in `apm-policy.yml` +- `apm audit --ci` runs 7 baseline consistency checks with no configuration +- `apm audit --ci --policy org` adds 17 policy checks defined in `apm-policy.yml` For the full policy schema, see the [Policy Reference](../../enterprise/policy-reference/). @@ -84,7 +84,7 @@ This catches lockfile/manifest drift, missing files, and hidden Unicode — with ## Step 3: Enable policy enforcement -Add `--policy org` to run the full 16 policy checks on top of baseline: +Add `--policy org` to run the full 17 policy checks on top of baseline: :::note Since this release, `apm audit --ci` auto-discovers the org policy. `--policy org` remains valid as an explicit override; use `--no-policy` to skip discovery. diff --git a/docs/src/content/docs/integrations/ci-cd.md b/docs/src/content/docs/integrations/ci-cd.md index 021305b51..5c2221e44 100644 --- a/docs/src/content/docs/integrations/ci-cd.md +++ b/docs/src/content/docs/integrations/ci-cd.md @@ -147,7 +147,7 @@ apm install ## Governance with `apm audit` -`apm audit --ci` verifies lockfile consistency in CI (6 baseline checks, no configuration). Add `--policy org` to enforce organizational rules (16 additional checks). For full setup including SARIF integration and GitHub Code Scanning, see the [CI Policy Enforcement guide](../../guides/ci-policy-setup/). +`apm audit --ci` verifies lockfile consistency in CI (7 baseline checks, no configuration). Add `--policy org` to enforce organizational rules (17 additional checks). For full setup including SARIF integration and GitHub Code Scanning, see the [CI Policy Enforcement guide](../../guides/ci-policy-setup/). For content scanning and hidden Unicode detection, `apm install` automatically blocks critical findings. Run `apm audit` for on-demand reporting. See [Governance](../../enterprise/governance-guide/) for the full governance model. diff --git a/docs/src/content/docs/reference/cli-commands.md b/docs/src/content/docs/reference/cli-commands.md index 01b4691f5..cd1981167 100644 --- a/docs/src/content/docs/reference/cli-commands.md +++ b/docs/src/content/docs/reference/cli-commands.md @@ -424,7 +424,7 @@ apm audit [PACKAGE] [OPTIONS] - `-v, --verbose` - Show info-level findings and file details - `-f, --format [text|json|sarif|markdown]` - Output format: `text` (default), `json` (machine-readable), `sarif` (GitHub Code Scanning), `markdown` (step summaries). Cannot be combined with `--strip` or `--dry-run`. - `-o, --output PATH` - Write report to file. Auto-detects format from extension (`.sarif`, `.sarif.json` → SARIF; `.json` → JSON; `.md` → Markdown) when `--format` is not specified. -- `--ci` - Run lockfile consistency checks for CI/CD gates. Exit 0 if clean, 1 if violations found. Auto-discovers org policy from the org `.github` repo unless `--no-policy` is set. +- `--ci` - Run lockfile consistency checks for CI/CD gates. Exit 0 if clean, 1 if violations found. Auto-discovers org policy from the org `.github` repo unless `--no-policy` is set. Runs the 7 baseline checks: lockfile presence, ref consistency, deployed files present, no orphaned packages, MCP config consistency, content integrity (Unicode + hash drift on every deployed file including local content), includes consent (advisory). - `--policy SOURCE` - *(Experimental)* Override discovery: `org` (auto-discover from org), file path, or URL. Without this flag, `--ci` auto-discovers. - `--no-policy` - Skip policy discovery and enforcement entirely. Equivalent to `APM_POLICY_DISABLE=1`. - `--no-cache` - Force fresh policy fetch (skip cache). Only relevant with policy discovery active. @@ -498,6 +498,7 @@ apm audit --ci --policy org --no-fail-fast - **Critical**: Tag characters (U+E0001–E007F), bidi overrides (U+202A–E, U+2066–9), variation selectors 17–256 (U+E0100–E01EF, Glassworm attack vector) - **Warning**: Zero-width spaces/joiners (U+200B–D), variation selectors 1–15 (U+FE00–FE0E), bidi marks (U+200E–F, U+061C), invisible operators (U+2061–4), annotation markers (U+FFF9–B), deprecated formatting (U+206A–F), soft hyphen (U+00AD), mid-file BOM - **Info**: Non-breaking spaces, unusual whitespace, emoji presentation selector (U+FE0F). ZWJ between emoji characters is context-downgraded to info. +- **Hash drift (`--ci` only)**: Files deployed by `apm install` whose on-disk SHA-256 no longer matches the value recorded in the lockfile (`deployed_file_hashes`). Covers content from package dependencies AND local `.apm/` content via the synthesized self-entry. ### `apm policy` - Inspect organization policy diff --git a/docs/src/content/docs/reference/lockfile-spec.md b/docs/src/content/docs/reference/lockfile-spec.md index 1c42bc202..8955d20b1 100644 --- a/docs/src/content/docs/reference/lockfile-spec.md +++ b/docs/src/content/docs/reference/lockfile-spec.md @@ -162,6 +162,41 @@ dependencies: Lock files generated before this feature omit `content_hash`. APM handles this gracefully — verification is skipped and the hash is populated on the next install. +### 4.5 Self-Entry Convention + +For uniform traversal, the in-memory `dependencies` map includes a synthesized +entry representing the host project's own local `.apm/` content. This entry is +materialized on read and stripped on write -- it is **never serialized** to disk. + +The on-disk YAML format is unchanged: the host project's local content lives in +the flat top-level fields `local_deployed_files` and `local_deployed_file_hashes` +(see [section 4.4](#44-content-integrity) for the hashing scheme used for +verification). `LockFile.from_yaml()` synthesizes the self-entry from those +fields; `LockFile.to_yaml()` removes it before serialization. Round-trip is +byte-stable. + +The synthesized entry MUST follow this convention: + +| Field | Value | +|-------|-------| +| Map key | `"."` (single dot) | +| `repo_url` | `""` | +| `local_path` | `"."` | +| `source` | `"local"` | +| `is_dev` | `true` | +| `depth` | `0` | +| `deployed_files` | populated from `local_deployed_files` | +| `deployed_file_hashes` | populated from `local_deployed_file_hashes` | + +`is_dev: true` is non-negotiable. Plugin bundle exporters (`apm pack --format +plugin`) skip dev dependencies; this flag ensures the host project's own content +is excluded from distributable bundles via the existing dev-dependency filter, +without requiring exporters to special-case the self-entry. + +Consumers iterating `dependencies` SHOULD treat the `"."` key as the host +project. Consumers reading the on-disk YAML directly will not see this entry -- +they MUST read `local_deployed_files` and `local_deployed_file_hashes` instead. + ## 5. Path Conventions All paths in `deployed_files` MUST use forward slashes (POSIX format), diff --git a/docs/src/content/docs/reference/manifest-schema.md b/docs/src/content/docs/reference/manifest-schema.md index dde0a3922..573925e9b 100644 --- a/docs/src/content/docs/reference/manifest-schema.md +++ b/docs/src/content/docs/reference/manifest-schema.md @@ -48,6 +48,7 @@ license: target: type: scripts: > +includes: > dependencies: apm: > mcp: > @@ -164,7 +165,37 @@ Declares how the package's content is processed during install and compile. Curr | **Value** | Shell command string | | **Description** | Named commands executed via `apm run `. MUST support `--param key=value` substitution. | -### 3.9. `policy` +### 3.9. `includes` + +| | | +|---|---| +| **Type** | `string` (literal `auto`) `\| list` | +| **Required** | OPTIONAL | +| **Default** | Undeclared (legacy implicit auto-publish; flagged by `apm audit`) | +| **Allowed values** | `auto` or a list of paths relative to the project root | + +Declares which local `.apm/` content the project consents to publish when packing or deploying. Three forms are supported: + +1. **Undeclared** -- field omitted. Legacy behaviour: all local `.apm/` content is published as if `auto` were set. `apm audit` emits an `includes-consent` advisory (the check itself passes; the message recommends declaring `includes: auto`) whenever local content is deployed under this form. +2. **`includes: auto`** -- explicit consent to publish all local `.apm/` content via the file scanner. No path enumeration required. Default for newly initialised projects. +3. **`includes: [, ...]`** -- explicit allow-list of paths the project consents to publish. Strongest governance form; changes are reviewable in PR diffs. + +```yaml +# Form 1: undeclared (legacy; audit advisory) +# includes: + +# Form 2: explicit auto-publish (default for new projects) +includes: auto + +# Form 3: explicit path list (strongest governance) +# includes: +# - .apm/instructions/ +# - .apm/skills/my-skill/ +``` + +When `policy.manifest.require_explicit_includes` is `true` (see [Governance guide](../../enterprise/governance-guide/)), only form 3 passes the policy check; `auto` and undeclared are rejected at install/audit time by the `explicit-includes` policy check (not at YAML parse time). + +### 3.10. `policy` | | | |---|---| diff --git a/packages/apm-guide/.apm/skills/apm-usage/governance.md b/packages/apm-guide/.apm/skills/apm-usage/governance.md index c784c1289..961f47c8b 100644 --- a/packages/apm-guide/.apm/skills/apm-usage/governance.md +++ b/packages/apm-guide/.apm/skills/apm-usage/governance.md @@ -50,12 +50,25 @@ manifest: scripts: allow # allow | deny content_types: allow: [] # instructions | skill | hybrid | prompts + require_explicit_includes: false # mandate explicit `includes:` list in apm.yml (rejects `auto` and undeclared) unmanaged_files: action: ignore # ignore | warn | deny directories: [] # directories to scan ``` +## Local content governance + +The `includes:` field in `apm.yml` controls which local `.apm/` content the +package publishes: + +- `includes: auto` -- publish all local `.apm/` content (default, convenient). +- `includes: [path/to/file, ...]` -- explicit list of paths (governance-friendly). + +For compliance, prefer the explicit list and pair it with +`policy.manifest.require_explicit_includes: true`, which rejects `auto` and +undeclared local content at install / audit time. + ## Enforcement modes | Value | Behavior | diff --git a/pyproject.toml b/pyproject.toml index f99a0391b..81d374a9d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "apm-cli" -version = "0.9.2" +version = "0.10.0" description = "MCP configuration tool" readme = "README.md" requires-python = ">=3.10" diff --git a/src/apm_cli/bundle/lockfile_enrichment.py b/src/apm_cli/bundle/lockfile_enrichment.py index 4a8b2d55a..e2e730ef4 100644 --- a/src/apm_cli/bundle/lockfile_enrichment.py +++ b/src/apm_cli/bundle/lockfile_enrichment.py @@ -153,6 +153,17 @@ def enrich_lockfile_for_pack( dep["deployed_files"] = filtered all_mappings.update(mappings) + # Issue #887: strip packaging-time local-content fields from the bundle + # lockfile. ``local_deployed_files`` / ``local_deployed_file_hashes`` + # describe the packager's own repo content, which is intentionally NOT + # shipped in the bundle (see packer.py source-local guard). Leaving them + # in the bundle lockfile would cause ``LockFile.from_yaml()`` on the + # consumer side to synthesize a self-entry whose ``deployed_files`` do + # not exist under the bundle source dir, breaking unpacker verification. + if isinstance(data, dict): + data.pop("local_deployed_files", None) + data.pop("local_deployed_file_hashes", None) + # Build the pack: metadata section (after filtering so we know if mapping # occurred). # Serialize target as a comma-joined string for backward compatibility diff --git a/src/apm_cli/bundle/packer.py b/src/apm_cli/bundle/packer.py index 19253dc96..9f279d993 100644 --- a/src/apm_cli/bundle/packer.py +++ b/src/apm_cli/bundle/packer.py @@ -119,9 +119,15 @@ def pack_bundle( if effective_target == "minimal": effective_target = "all" - # 4. Collect deployed_files from all dependencies, filtered by target + # 4. Collect deployed_files from all dependencies, filtered by target. + # Skip local-source entries: these include the synthesized root self-entry + # (local_path == ".") and any local-path manifest deps. Local content is + # not portable and is bundled separately via the project's own files + # (or rejected outright at L89-97 for manifest-declared local deps). all_deployed: List[str] = [] for dep in lockfile.get_all_dependencies(): + if dep.source == "local": + continue all_deployed.extend(dep.deployed_files) filtered_files, path_mappings = _filter_files_by_target(all_deployed, effective_target) diff --git a/src/apm_cli/commands/_helpers.py b/src/apm_cli/commands/_helpers.py index fcb857ebd..136cd0ae7 100644 --- a/src/apm_cli/commands/_helpers.py +++ b/src/apm_cli/commands/_helpers.py @@ -132,7 +132,7 @@ def _build_expected_install_paths(declared_deps, lockfile, apm_modules_dir: Path expected.add(str(install_path)) if lockfile: - for dep in lockfile.get_all_dependencies(): + for dep in lockfile.get_package_dependencies(): if dep.depth is not None and dep.depth > 1: dep_ref = dep.to_dependency_ref() install_path = dep_ref.get_install_path(apm_modules_dir) @@ -481,6 +481,12 @@ def _create_minimal_apm_yml(config, plugin=False, target_path=None): "description": config["description"], "author": config["author"], "dependencies": {"apm": [], "mcp": []}, + # Issue #887: scaffold with explicit consent for local content + # deployment so day-2 audit doesn't surprise the maintainer with + # an "includes not declared" advisory the moment they drop a + # primitive in .apm/. Override with an explicit path list to + # gate what gets deployed. + "includes": "auto", } if plugin: diff --git a/src/apm_cli/commands/deps/cli.py b/src/apm_cli/commands/deps/cli.py index fc937e3aa..5e7d3f5da 100644 --- a/src/apm_cli/commands/deps/cli.py +++ b/src/apm_cli/commands/deps/cli.py @@ -336,7 +336,7 @@ def tree(global_): if lockfile_path.exists(): lockfile = LockFile.read(lockfile_path) if lockfile: - lockfile_deps = lockfile.get_all_dependencies() + lockfile_deps = lockfile.get_package_dependencies() except Exception: pass diff --git a/src/apm_cli/commands/uninstall/engine.py b/src/apm_cli/commands/uninstall/engine.py index 42025b910..13e264127 100644 --- a/src/apm_cli/commands/uninstall/engine.py +++ b/src/apm_cli/commands/uninstall/engine.py @@ -92,7 +92,7 @@ def _dry_run_uninstall(packages_to_remove, apm_modules_dir, logger): potential_orphans = builtins.set() while queue: parent_url = queue.pop() - for dep in lockfile.get_all_dependencies(): + for dep in lockfile.get_package_dependencies(): key = dep.get_unique_key() if key in potential_orphans: continue @@ -165,7 +165,7 @@ def _cleanup_transitive_orphans(lockfile, packages_to_remove, apm_modules_dir, a queue = builtins.list(removed_repo_urls) while queue: parent_url = queue.pop() - for dep in lockfile.get_all_dependencies(): + for dep in lockfile.get_package_dependencies(): key = dep.get_unique_key() if key in orphans: continue @@ -190,7 +190,7 @@ def _cleanup_transitive_orphans(lockfile, packages_to_remove, apm_modules_dir, a except Exception: pass - for dep in lockfile.get_all_dependencies(): + for dep in lockfile.get_package_dependencies(): key = dep.get_unique_key() if key not in orphans and dep.repo_url not in removed_repo_urls: remaining_deps.add(key) diff --git a/src/apm_cli/deps/lockfile.py b/src/apm_cli/deps/lockfile.py index f8b518eb9..38b839077 100644 --- a/src/apm_cli/deps/lockfile.py +++ b/src/apm_cli/deps/lockfile.py @@ -15,6 +15,8 @@ logger = logging.getLogger(__name__) +_SELF_KEY = "." + @dataclass class LockedDependency: @@ -251,27 +253,40 @@ def get_all_dependencies(self) -> List[LockedDependency]: self.dependencies.values(), key=lambda d: (d.depth, d.repo_url) ) + def get_package_dependencies(self) -> List[LockedDependency]: + """Get all dependencies excluding the virtual self-entry.""" + return [d for d in self.get_all_dependencies() if d.local_path != "."] + def to_yaml(self) -> str: """Serialize to YAML string.""" - data: Dict[str, Any] = { - "lockfile_version": self.lockfile_version, - "generated_at": self.generated_at, - } - if self.apm_version: - data["apm_version"] = self.apm_version - data["dependencies"] = [dep.to_dict() for dep in self.get_all_dependencies()] - if self.mcp_servers: - data["mcp_servers"] = sorted(self.mcp_servers) - if self.mcp_configs: - data["mcp_configs"] = dict(sorted(self.mcp_configs.items())) - if self.local_deployed_files: - data["local_deployed_files"] = sorted(self.local_deployed_files) - if self.local_deployed_file_hashes: - data["local_deployed_file_hashes"] = dict( - sorted(self.local_deployed_file_hashes.items()) - ) - from ..utils.yaml_io import yaml_to_str - return yaml_to_str(data) + # The synthesized self-entry (key ".") is an in-memory normalization + # of the flat local_deployed_files / local_deployed_file_hashes + # fields. It must not be written back into the dependencies list, + # since the flat fields remain the source of truth in YAML. + _self_dep = self.dependencies.pop(_SELF_KEY, None) + try: + data: Dict[str, Any] = { + "lockfile_version": self.lockfile_version, + "generated_at": self.generated_at, + } + if self.apm_version: + data["apm_version"] = self.apm_version + data["dependencies"] = [dep.to_dict() for dep in self.get_all_dependencies()] + if self.mcp_servers: + data["mcp_servers"] = sorted(self.mcp_servers) + if self.mcp_configs: + data["mcp_configs"] = dict(sorted(self.mcp_configs.items())) + if self.local_deployed_files: + data["local_deployed_files"] = sorted(self.local_deployed_files) + if self.local_deployed_file_hashes: + data["local_deployed_file_hashes"] = dict( + sorted(self.local_deployed_file_hashes.items()) + ) + from ..utils.yaml_io import yaml_to_str + return yaml_to_str(data) + finally: + if _self_dep is not None: + self.dependencies[_SELF_KEY] = _self_dep @classmethod def from_yaml(cls, yaml_str: str) -> "LockFile": @@ -294,6 +309,19 @@ def from_yaml(cls, yaml_str: str) -> "LockFile": lock.local_deployed_file_hashes = dict( data.get("local_deployed_file_hashes") or {} ) + # Synthesize a virtual self-entry representing the project's own + # local content. This unifies traversal across "real" dependencies + # and the local package, without changing the on-disk YAML shape. + if lock.local_deployed_files: + lock.dependencies[_SELF_KEY] = LockedDependency( + repo_url="", + source="local", + local_path=".", + is_dev=True, + depth=0, + deployed_files=list(lock.local_deployed_files), + deployed_file_hashes=dict(lock.local_deployed_file_hashes), + ) return lock def write(self, path: Path) -> None: @@ -388,6 +416,8 @@ def get_installed_paths(self, apm_modules_dir: Path) -> List[str]: seen: set = set() paths: List[str] = [] for dep in self.get_all_dependencies(): + if dep.local_path == _SELF_KEY: + continue dep_ref = dep.to_dependency_ref() install_path = dep_ref.get_install_path(apm_modules_dir) try: @@ -423,6 +453,10 @@ def is_semantically_equivalent(self, other: "LockFile") -> bool: return False if sorted(self.local_deployed_files) != sorted(other.local_deployed_files): return False + # Issue #887: include hash dict in equivalence so post-install + # hash updates persist even when the file list is unchanged. + if dict(self.local_deployed_file_hashes) != dict(other.local_deployed_file_hashes): + return False return True @classmethod diff --git a/src/apm_cli/install/phases/cleanup.py b/src/apm_cli/install/phases/cleanup.py index 67436ddb2..351bf7a22 100644 --- a/src/apm_cli/install/phases/cleanup.py +++ b/src/apm_cli/install/phases/cleanup.py @@ -67,9 +67,16 @@ def run(ctx: InstallContext) -> None: # absent from package_deployed_files; deriving orphans from the outcome # set would then misclassify it as removed and delete its previously # deployed files even though it is still in apm.yml. + from apm_cli.deps.lockfile import _SELF_KEY _orphan_total_deleted = 0 _orphan_deleted_targets: list = [] for _orphan_key, _orphan_dep in existing_lockfile.dependencies.items(): + # Issue #887: skip the synthesized self-entry (local .apm/ + # content). Local content stale cleanup happens in the + # post_deps_local phase, not here. Treating self-entry as + # an orphan would delete the project's own .github/ files. + if _orphan_key == _SELF_KEY: + continue if _orphan_key in intended_dep_keys: continue # still in manifest -- handled by stale-cleanup below if not _orphan_dep.deployed_files: diff --git a/src/apm_cli/install/phases/policy_gate.py b/src/apm_cli/install/phases/policy_gate.py index 1942df26b..2a060fdda 100644 --- a/src/apm_cli/install/phases/policy_gate.py +++ b/src/apm_cli/install/phases/policy_gate.py @@ -95,6 +95,17 @@ def run(ctx: "InstallContext") -> None: mcp_deps = getattr(ctx, "direct_mcp_deps", None) + # Pass manifest.includes only when we actually have an APMPackage -- + # leaving the kwarg unset preserves the legacy behaviour for callers + # that have no manifest context (the seam treats "unset" as "skip + # explicit-includes check"). + extra_kwargs = {} + apm_package = getattr(ctx, "apm_package", None) + if apm_package is not None: + extra_kwargs["manifest_includes"] = getattr( + apm_package, "includes", None + ) + audit_result = run_dependency_policy_checks( ctx.deps_to_install, lockfile=ctx.existing_lockfile, @@ -103,6 +114,7 @@ def run(ctx: "InstallContext") -> None: effective_target=None, # target-aware checks after targets phase fetch_outcome=fetch_result.outcome, fail_fast=(enforcement == "block"), + **extra_kwargs, ) # ------------------------------------------------------------------ diff --git a/src/apm_cli/integration/mcp_integrator.py b/src/apm_cli/integration/mcp_integrator.py index 862707f79..85e54c1fa 100644 --- a/src/apm_cli/integration/mcp_integrator.py +++ b/src/apm_cli/integration/mcp_integrator.py @@ -87,7 +87,7 @@ def collect_transitive( lockfile = LockFile.read(lock_path) if lockfile is not None: locked_paths = builtins.set() - for dep in lockfile.get_all_dependencies(): + for dep in lockfile.get_package_dependencies(): if dep.repo_url: yml = ( apm_modules_dir / dep.repo_url / dep.virtual_path / "apm.yml" diff --git a/src/apm_cli/integration/skill_integrator.py b/src/apm_cli/integration/skill_integrator.py index 883ae854a..98c517fea 100644 --- a/src/apm_cli/integration/skill_integrator.py +++ b/src/apm_cli/integration/skill_integrator.py @@ -587,7 +587,7 @@ def _build_ownership_maps(project_root: Path) -> tuple[dict[str, str], dict[str, lockfile = LockFile.read(get_lockfile_path(project_root)) if not lockfile: return owned_by, native_owners - for dep in lockfile.get_all_dependencies(): + for dep in lockfile.get_package_dependencies(): short_owner = (dep.virtual_path or dep.repo_url).rsplit("/", 1)[-1] unique_key = dep.get_unique_key() for deployed_path in dep.deployed_files: diff --git a/src/apm_cli/models/apm_package.py b/src/apm_cli/models/apm_package.py index 7e84c6069..48e1ea4d7 100644 --- a/src/apm_cli/models/apm_package.py +++ b/src/apm_cli/models/apm_package.py @@ -75,7 +75,8 @@ class APMPackage: package_path: Optional[Path] = None # Local path to package target: Optional[Union[str, List[str]]] = None # Target agent(s): single string or list (applies to compile and install) type: Optional[PackageContentType] = None # Package content type: instructions, skill, hybrid, or prompts - + includes: Optional[Union[str, List[str]]] = None # Include-only manifest: 'auto' or list of repo paths + @classmethod def from_apm_yml(cls, apm_yml_path: Path) -> "APMPackage": """Load APM package from apm.yml file. @@ -196,6 +197,21 @@ def from_apm_yml(cls, apm_yml_path: Path) -> "APMPackage": except ValueError as e: raise ValueError(f"Invalid 'type' field in apm.yml: {e}") + # Parse includes (auto-publish opt-in): either the literal "auto" or a list of repo paths + includes = None + if 'includes' in data and data['includes'] is not None: + includes_value = data['includes'] + if isinstance(includes_value, str): + if includes_value != 'auto': + raise ValueError("'includes' must be 'auto' or a list of strings") + includes = 'auto' + elif isinstance(includes_value, list): + if not all(isinstance(item, str) for item in includes_value): + raise ValueError("'includes' must be 'auto' or a list of strings") + includes = list(includes_value) + else: + raise ValueError("'includes' must be 'auto' or a list of strings") + result = cls( name=data['name'], version=data['version'], @@ -208,6 +224,7 @@ def from_apm_yml(cls, apm_yml_path: Path) -> "APMPackage": package_path=apm_yml_path.parent, target=data.get('target'), type=pkg_type, + includes=includes, ) _apm_yml_cache[resolved] = result return result diff --git a/src/apm_cli/policy/ci_checks.py b/src/apm_cli/policy/ci_checks.py index 0b1e94d16..5188af0ec 100644 --- a/src/apm_cli/policy/ci_checks.py +++ b/src/apm_cli/policy/ci_checks.py @@ -16,14 +16,21 @@ from typing import List from .models import CIAuditResult, CheckResult +from ..deps.lockfile import _SELF_KEY # -- Individual checks --------------------------------------------- def _check_lockfile_exists(project_root: Path) -> CheckResult: - """Check that ``apm.lock.yaml`` is present when ``apm.yml`` has deps.""" - from ..deps.lockfile import get_lockfile_path + """Check that ``apm.lock.yaml`` is present when relevant. + + Relevance is determined by either: + * the manifest declaring APM/MCP dependencies, or + * a lockfile already on disk recording local-only content + (``local_deployed_files``) for this project. + """ + from ..deps.lockfile import LockFile, get_lockfile_path apm_yml_path = project_root / "apm.yml" if not apm_yml_path.exists(): @@ -45,6 +52,20 @@ def _check_lockfile_exists(project_root: Path) -> CheckResult: ) has_deps = manifest.has_apm_dependencies() or bool(manifest.get_mcp_dependencies()) + lockfile_path = get_lockfile_path(project_root) + + # Local-only repos may declare no remote/MCP deps but still have a + # lockfile recording the project's own local content (synthesized as + # the "." self-entry). Treat that as having deps so downstream audit + # checks (deployed-files-present, content-integrity) still run. + if not has_deps and lockfile_path.exists(): + try: + lock_for_gating = LockFile.read(lockfile_path) + if lock_for_gating is not None and lock_for_gating.local_deployed_files: + has_deps = True + except Exception: + pass # fall through; if unreadable the missing-lockfile branch warns + if not has_deps: return CheckResult( name="lockfile-exists", @@ -52,7 +73,6 @@ def _check_lockfile_exists(project_root: Path) -> CheckResult: message="No dependencies declared -- lockfile not required", ) - lockfile_path = get_lockfile_path(project_root) if lockfile_path.exists(): return CheckResult( name="lockfile-exists", @@ -146,7 +166,7 @@ def _check_no_orphans( orphaned = [ dep_key for dep_key in lock.dependencies - if dep_key not in manifest_keys + if dep_key not in manifest_keys and dep_key != _SELF_KEY ] if not orphaned: return CheckResult( @@ -223,8 +243,22 @@ def _check_content_integrity( project_root: Path, lock: "LockFile", ) -> CheckResult: - """Check deployed files for critical hidden Unicode characters.""" + """Check deployed files for critical hidden Unicode and hash drift. + + Two signals are evaluated: + * Critical hidden Unicode (steganographic markers) via the file + scanner. + * SHA-256 drift between the on-disk content and the hash recorded + in ``deployed_file_hashes`` at install time. + + Missing files are deliberately skipped here -- ``_check_deployed_files_present`` + already reports those, and double-reporting muddies the audit output. + Symlinks are skipped because they may legitimately point elsewhere, + and lockfile entries without a recorded hash (e.g. directories) are + skipped silently. + """ from ..security.file_scanner import scan_lockfile_packages + from ..utils.content_hash import compute_file_hash findings_by_file, _files_scanned = scan_lockfile_packages(project_root) @@ -234,20 +268,109 @@ def _check_content_integrity( if any(f.severity == "critical" for f in findings): critical_files.append(rel_path) - if not critical_files: + # Per-file hash verification across all dependencies (the synthesized + # self-entry is included in ``lock.dependencies`` so local content is + # covered through the same iteration). + hash_mismatches: List[tuple] = [] # (dep_key, rel_path, expected, actual) + # Local import: matches the scoping pattern used in + # _check_deployed_files_present (line 131); avoids cycles. + from ..integration.base_integrator import BaseIntegrator as _BaseIntegrator + for dep_key, dep in lock.dependencies.items(): + if not dep.deployed_file_hashes: + continue + for rel_path, expected_hash in dep.deployed_file_hashes.items(): + # Path safety: silently skip any rel_path that escapes + # project_root or targets a non-allowlisted prefix. Mirrors + # the guard in _check_deployed_files_present so a forged + # lockfile cannot induce reads outside managed locations. + safe_rel = rel_path.rstrip("/") + if not _BaseIntegrator.validate_deploy_path(safe_rel, project_root): + continue + file_path = project_root / safe_rel + if not file_path.exists(): + continue # _check_deployed_files_present owns this signal + if file_path.is_symlink(): + continue + if not file_path.is_file(): + continue + actual_hash = compute_file_hash(file_path) + if actual_hash != expected_hash: + hash_mismatches.append((dep_key, rel_path, expected_hash, actual_hash)) + + if not critical_files and not hash_mismatches: return CheckResult( name="content-integrity", passed=True, - message="No critical hidden Unicode characters detected", + message="No critical hidden Unicode or hash drift detected", ) + + details: List[str] = [] + for rel_path in critical_files: + details.append(f"unicode: {rel_path}") + for dep_key, rel_path, expected, actual in hash_mismatches: + # Truncate hashes for terminal width; full hashes available via JSON output. + exp_short = expected.split(":", 1)[-1][:12] if ":" in expected else expected[:12] + act_short = actual.split(":", 1)[-1][:12] if ":" in actual else actual[:12] + # Render the synthesized self-entry with a friendly label rather + # than the internal _SELF_KEY constant ("." is opaque to users). + dep_label = "" if dep_key == _SELF_KEY else dep_key + details.append( + f"hash-drift: {rel_path} (dep={dep_label}, expected={exp_short}..., actual={act_short}...)" + ) + + parts: List[str] = [] + remedies: List[str] = [] + if critical_files: + parts.append(f"{len(critical_files)} file(s) with critical hidden Unicode") + remedies.append("'apm audit --strip' to clean Unicode") + if hash_mismatches: + parts.append(f"{len(hash_mismatches)} file(s) with hash drift") + remedies.append("'apm install' to restore drifted files") + summary = "; ".join(parts) + remedy = " and ".join(remedies) return CheckResult( name="content-integrity", passed=False, - message=( - f"{len(critical_files)} file(s) contain critical hidden Unicode -- " - "run 'apm audit --strip' to clean" - ), - details=critical_files, + message=f"{summary} -- run {remedy}", + details=details, + ) + + +def _check_includes_consent( + manifest: "APMPackage", + lock: "LockFile", +) -> CheckResult: + """Advisory check: nudge toward declaring 'includes:' when local content is deployed. + + This check never hard-fails -- it always returns ``passed=True``. When + the lockfile records local content but the manifest does not declare an + ``includes:`` field, the result message advises the maintainer to add + ``includes: auto`` (or an explicit list) for governance clarity. The + ``[+]`` rendered by the CI table is intentional: this is informational, + not a violation. Use ``manifest.require_explicit_includes`` policy to + promote this to a hard block. + """ + if not lock.local_deployed_files: + return CheckResult( + name="includes-consent", + passed=True, + message="No local content deployed -- includes consent check skipped", + ) + + if manifest.includes is None: + return CheckResult( + name="includes-consent", + passed=True, + message=( + "Local content deployed but 'includes:' not declared in " + "apm.yml -- consider adding 'includes: auto' for explicit consent" + ), + ) + + return CheckResult( + name="includes-consent", + passed=True, + message="'includes:' declared -- local content deployment is explicitly consented", ) @@ -317,6 +440,10 @@ def _run(check: CheckResult) -> bool: return result # Check 6: Content integrity - _run(_check_content_integrity(project_root, lock)) + if _run(_check_content_integrity(project_root, lock)): + return result + + # Check 7: Includes consent (advisory; never hard-fails) + _run(_check_includes_consent(manifest, lock)) return result diff --git a/src/apm_cli/policy/parser.py b/src/apm_cli/policy/parser.py index 16a26bc02..95ca9a73d 100644 --- a/src/apm_cli/policy/parser.py +++ b/src/apm_cli/policy/parser.py @@ -140,6 +140,11 @@ def validate_policy(data: dict) -> Tuple[List[str], List[str]]: errors.append( f"manifest.scripts must be one of {sorted(_VALID_SCRIPTS)}, got '{scripts}'" ) + rei = manifest.get("require_explicit_includes") + if rei is not None and not isinstance(rei, bool): + errors.append( + f"manifest.require_explicit_includes must be a boolean, got '{rei}'" + ) # unmanaged_files.action uf = data.get("unmanaged_files") @@ -210,6 +215,9 @@ def _build_policy(data: dict) -> ApmPolicy: required_fields=_parse_tuple(manifest_data.get("required_fields")), scripts=manifest_data.get("scripts", ManifestPolicy.scripts), content_types=manifest_data.get("content_types"), + require_explicit_includes=bool( + manifest_data.get("require_explicit_includes", False) + ), ) uf_data = data.get("unmanaged_files") or {} diff --git a/src/apm_cli/policy/policy_checks.py b/src/apm_cli/policy/policy_checks.py index d04aa5511..39dbee595 100644 --- a/src/apm_cli/policy/policy_checks.py +++ b/src/apm_cli/policy/policy_checks.py @@ -569,6 +569,65 @@ def _check_required_manifest_fields( ) +_INCLUDES_NOT_PROVIDED = object() + + +def _check_includes_explicit( + manifest_includes, + policy: "ManifestPolicy", +) -> CheckResult: + """Check: manifest declares an explicit ``includes:`` list when policy requires it. + + ``manifest_includes`` is the parsed value of the manifest's ``includes:`` + field as exposed by :class:`APMPackage` -- one of ``None`` (field + absent), the literal string ``"auto"``, or a list of repo-relative + path strings. + + Violation when ``policy.require_explicit_includes`` is True and + ``manifest_includes`` is ``None`` or ``"auto"``. + """ + if not policy.require_explicit_includes: + return CheckResult( + name="explicit-includes", + passed=True, + message="Explicit includes not required by policy", + ) + + if manifest_includes is None: + return CheckResult( + name="explicit-includes", + passed=False, + message=( + "Policy requires explicit 'includes:' paths but none are " + "declared. Add 'includes: [, ...]' to apm.yml with " + "the paths you intend to publish." + ), + details=[ + "includes: , require_explicit_includes: true", + ], + ) + + if manifest_includes == "auto": + return CheckResult( + name="explicit-includes", + passed=False, + message=( + "Policy requires explicit 'includes:' paths but manifest " + "uses 'includes: auto'. Replace with an explicit list of " + "paths." + ), + details=[ + "includes: 'auto', require_explicit_includes: true", + ], + ) + + return CheckResult( + name="explicit-includes", + passed=True, + message="Manifest declares explicit includes paths", + ) + + def _check_scripts_policy( raw_yml: Optional[dict], policy: "ManifestPolicy", @@ -708,6 +767,7 @@ def run_dependency_policy_checks( effective_target: Optional[str] = None, fetch_outcome: Optional[str] = None, fail_fast: bool = True, + manifest_includes=_INCLUDES_NOT_PROVIDED, ) -> CIAuditResult: """Evaluate :class:`ApmPolicy` against an already-resolved dependency set. @@ -739,6 +799,12 @@ def run_dependency_policy_checks( ``"cached"``, ``"fetched"``). Currently informational only. fail_fast: Stop after the first failing check (default ``True``). + manifest_includes: + The parsed value of the manifest's ``includes:`` field + (``None``, ``"auto"``, or a list of paths). When omitted, + the ``explicit-includes`` check is skipped -- callers that + do not have manifest information available (e.g. dep-only + seams) can leave it unset. Returns ------- @@ -816,6 +882,15 @@ def _run(check: CheckResult) -> bool: ): return result + # -- Manifest-level explicit-includes check -------------------- + # Only run when the caller supplied the manifest includes value. + # Dep-only seams that lack manifest context (legacy callers) skip + # this check; the install pipeline and ``apm audit`` wrappers both + # supply it. + if manifest_includes is not _INCLUDES_NOT_PROVIDED: + if _run(_check_includes_explicit(manifest_includes, policy.manifest)): + return result + # NOTE: compilation strategy, source attribution, manifest fields, # scripts policy, and unmanaged files are disk-level / manifest-level # concerns. They are NOT included in the resolved-dep seam because @@ -886,6 +961,7 @@ def run_policy_checks( mcp_deps=mcp_deps, # effective_target=None: target checks handled below from raw_yml fail_fast=fail_fast, + manifest_includes=manifest.includes, ) result.checks.extend(dep_result.checks) diff --git a/src/apm_cli/policy/schema.py b/src/apm_cli/policy/schema.py index 98e78d7cc..be444d3c6 100644 --- a/src/apm_cli/policy/schema.py +++ b/src/apm_cli/policy/schema.py @@ -84,6 +84,7 @@ class ManifestPolicy: required_fields: Tuple[str, ...] = () scripts: str = "allow" # allow | deny content_types: Optional[Dict] = None # {"allow": [...]} + require_explicit_includes: bool = False @dataclass(frozen=True) diff --git a/tests/integration/test_local_content_audit.py b/tests/integration/test_local_content_audit.py new file mode 100644 index 000000000..83c3e4799 --- /dev/null +++ b/tests/integration/test_local_content_audit.py @@ -0,0 +1,301 @@ +"""End-to-end integration tests for local .apm/ content audit (issue #887). + +Exercises the full install + audit round-trip using a real fake APM +project on disk and ``apm`` invoked via subprocess. Verifies: + +* ``apm install`` records local content into the lockfile + (``local_deployed_files`` / ``local_deployed_file_hashes``). +* ``apm audit --ci`` passes on a clean install and surfaces an + ``[!]`` advisory for missing ``includes:`` declaration. +* Hash drift on a deployed file is detected and reported as + ``hash-drift`` with a non-zero exit code. +* Declaring ``includes:`` (auto or explicit list) silences the + consent advisory. +* ``policy.manifest.require_explicit_includes`` blocks + ``includes: auto`` via ``apm audit --ci --policy ``. +""" + +import json +import shutil +import subprocess +from pathlib import Path + +import pytest +import yaml + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def apm_command(): + """Resolve the ``apm`` CLI executable for subprocess invocation.""" + apm_on_path = shutil.which("apm") + if apm_on_path: + return apm_on_path + venv_apm = Path(__file__).parent.parent.parent / ".venv" / "bin" / "apm" + if venv_apm.exists(): + return str(venv_apm) + return "apm" + + +def _write_manifest(project: Path, *, includes=None) -> None: + """Write a minimal apm.yml. ``includes`` may be None, 'auto', or a list.""" + data = { + "name": "audit-fixture", + "version": "0.1.0", + "description": "Fixture project for local-content audit integration tests", + } + if includes is not None: + data["includes"] = includes + (project / "apm.yml").write_text(yaml.dump(data, sort_keys=False)) + + +def _seed_local_content(project: Path) -> None: + """Create a SKILL.md and an instruction file under .apm/.""" + skill_dir = project / ".apm" / "skills" / "foo" + skill_dir.mkdir(parents=True) + (skill_dir / "SKILL.md").write_text( + "---\ndescription: foo skill for tests\n---\n# Foo\nbody\n" + ) + + instr_dir = project / ".apm" / "instructions" + instr_dir.mkdir(parents=True) + (instr_dir / "bar.instructions.md").write_text( + "---\napplyTo: '**'\n---\n# Bar\nbody\n" + ) + + # .github/ already-exists triggers copilot target detection (and the + # default fallback is also copilot, so this is belt-and-suspenders). + (project / ".github").mkdir() + + +def _make_project(tmp_path: Path, *, includes=None) -> Path: + """Build a fake APM project rooted at ``tmp_path/proj``.""" + project = tmp_path / "proj" + project.mkdir() + _write_manifest(project, includes=includes) + _seed_local_content(project) + return project + + +def _run_apm(apm_command: str, args, cwd: Path): + """Invoke the apm CLI and return CompletedProcess.""" + return subprocess.run( + [apm_command, *args], + cwd=cwd, + capture_output=True, + text=True, + timeout=120, + ) + + +def _install(apm_command: str, project: Path): + result = _run_apm(apm_command, ["install"], project) + assert result.returncode == 0, ( + f"apm install failed (exit {result.returncode}):\n" + f"STDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" + ) + return result + + +def _audit_json(apm_command: str, project: Path, extra_args=()): + """Run ``apm audit --ci -f json`` and return (exit_code, parsed_json).""" + args = ["audit", "--ci", "--no-policy", "-f", "json", *extra_args] + if "--policy" in extra_args: + # When policy is provided, drop --no-policy so the override wins. + args = ["audit", "--ci", "-f", "json", *extra_args] + result = _run_apm(apm_command, args, project) + # JSON output is on stdout; tolerate trailing log lines. + payload = None + try: + payload = json.loads(result.stdout) + except json.JSONDecodeError: + # Some checks may print warnings on stdout before the JSON body. + # Try to locate the first '{' and parse from there. + idx = result.stdout.find("{") + if idx >= 0: + payload = json.loads(result.stdout[idx:]) + assert payload is not None, ( + f"audit JSON parse failed:\nSTDOUT:\n{result.stdout}\n" + f"STDERR:\n{result.stderr}" + ) + return result.returncode, payload, result + + +def _check(payload: dict, name: str) -> dict: + for c in payload["checks"]: + if c["name"] == name: + return c + raise AssertionError( + f"check '{name}' not found in payload (have: " + f"{[c['name'] for c in payload['checks']]})" + ) + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +class TestLocalContentAudit: + """End-to-end coverage for issue #887 close-the-gap behavior.""" + + def test_install_records_self_entry(self, tmp_path, apm_command): + """Test A: ``apm install`` records local files + hashes in lockfile.""" + project = _make_project(tmp_path) + _install(apm_command, project) + + lock_path = project / "apm.lock.yaml" + assert lock_path.exists(), "apm.lock.yaml not created by install" + + with open(lock_path) as f: + lock = yaml.safe_load(f) + + deployed = lock.get("local_deployed_files") or [] + hashes = lock.get("local_deployed_file_hashes") or {} + + assert deployed, f"local_deployed_files empty: {lock!r}" + assert hashes, f"local_deployed_file_hashes empty: {lock!r}" + + # Both seeded primitives should be deployed under .github/ (copilot + # target). Skill goes to .github/skills/foo/, instruction to + # .github/instructions/bar.instructions.md. + assert any( + "instructions/bar.instructions.md" in p for p in deployed + ), f"instruction not deployed: {deployed}" + assert any("skills/foo" in p for p in deployed), ( + f"skill not deployed: {deployed}" + ) + + # The instruction file (a regular file) must have a hash entry. + instr_keys = [ + k for k in hashes if k.endswith("bar.instructions.md") + ] + assert instr_keys, ( + f"no hash recorded for bar.instructions.md: {list(hashes)}" + ) + assert hashes[instr_keys[0]].startswith("sha256:"), ( + f"hash not sha256-prefixed: {hashes[instr_keys[0]]!r}" + ) + + def test_audit_passes_clean_install(self, tmp_path, apm_command): + """Test B: clean install passes audit; consent advisory present.""" + project = _make_project(tmp_path) # no includes: declared + _install(apm_command, project) + + exit_code, payload, _ = _audit_json(apm_command, project) + assert exit_code == 0, ( + f"audit --ci failed on clean install: {payload}" + ) + assert payload["passed"] is True + + ci = _check(payload, "content-integrity") + assert ci["passed"] is True, f"content-integrity not passing: {ci}" + + consent = _check(payload, "includes-consent") + # Advisory is encoded in the raw message text; the JSON payload does + # not guarantee CLI-style status markers such as [!]. + assert consent["passed"] is True + assert "includes:" in consent["message"], ( + f"expected includes guidance in includes-consent message, got: " + f"{consent['message']!r}" + ) + assert "includes: auto" in consent["message"], ( + f"expected auto-includes advisory in includes-consent message, " + f"got: {consent['message']!r}" + ) + + def test_audit_detects_drift(self, tmp_path, apm_command): + """Test C: hand-edit a deployed file -> hash-drift detected.""" + project = _make_project(tmp_path) + _install(apm_command, project) + + # Tamper with the deployed copy under .github/. + deployed_instr = ( + project / ".github" / "instructions" / "bar.instructions.md" + ) + assert deployed_instr.exists(), ( + f"target file missing post-install: {deployed_instr}" + ) + with open(deployed_instr, "a") as f: + f.write("\nTAMPERED\n") + + exit_code, payload, result = _audit_json(apm_command, project) + assert exit_code != 0, ( + f"audit --ci should fail on drift but passed: {payload}\n" + f"STDERR: {result.stderr}" + ) + ci = _check(payload, "content-integrity") + assert ci["passed"] is False, f"content-integrity should fail: {ci}" + # Either the message or details should reference hash-drift and the + # path of the modified file. + haystack = ci["message"] + " " + " ".join(ci.get("details") or []) + assert "hash-drift" in haystack, ( + f"'hash-drift' not in failure output: {haystack!r}" + ) + assert "bar.instructions.md" in haystack, ( + f"path of modified file not surfaced: {haystack!r}" + ) + + def test_audit_passes_with_explicit_includes(self, tmp_path, apm_command): + """Test D: declaring ``includes:`` removes the consent advisory.""" + # Use 'auto' first. + project = _make_project(tmp_path, includes="auto") + _install(apm_command, project) + + exit_code, payload, _ = _audit_json(apm_command, project) + assert exit_code == 0, f"audit failed unexpectedly: {payload}" + + consent = _check(payload, "includes-consent") + assert consent["passed"] is True + assert "[!]" not in consent["message"], ( + f"unexpected '[!]' advisory when includes is declared: " + f"{consent['message']!r}" + ) + + # Also verify the explicit-list form by rewriting and re-auditing. + _write_manifest( + project, + includes=[ + ".apm/skills/foo/SKILL.md", + ".apm/instructions/bar.instructions.md", + ], + ) + exit_code2, payload2, _ = _audit_json(apm_command, project) + assert exit_code2 == 0, f"audit failed with explicit list: {payload2}" + consent2 = _check(payload2, "includes-consent") + assert "[!]" not in consent2["message"] + + def test_policy_blocks_undeclared_includes(self, tmp_path, apm_command): + """Test E: ``require_explicit_includes`` blocks ``includes: auto``.""" + project = _make_project(tmp_path, includes="auto") + _install(apm_command, project) + + # Local policy file -- pass via --policy . + policy_path = project / "apm-policy.yml" + policy_path.write_text( + yaml.dump( + { + "name": "test-explicit-includes", + "enforcement": "block", + "manifest": {"require_explicit_includes": True}, + }, + sort_keys=False, + ) + ) + + exit_code, payload, result = _audit_json( + apm_command, project, extra_args=["--policy", str(policy_path)] + ) + assert exit_code != 0, ( + f"policy should have blocked includes: auto but passed:\n" + f"{payload}\nSTDERR:\n{result.stderr}" + ) + check = _check(payload, "explicit-includes") + assert check["passed"] is False + # Message should mention the policy requirement. + assert "explicit" in check["message"].lower() + assert "includes" in check["message"].lower() diff --git a/tests/unit/install/test_file_scanner.py b/tests/unit/install/test_file_scanner.py new file mode 100644 index 000000000..5ae75f385 --- /dev/null +++ b/tests/unit/install/test_file_scanner.py @@ -0,0 +1,201 @@ +"""Coverage tests for ``compute_deployed_hashes`` (issue #887, Wave 3). + +The lockfile self-entry synthesis (Wave 1) made it visible that +``local_deployed_files`` and ``local_deployed_file_hashes`` can legitimately +have different cardinalities: directory entries are tracked in the file list +(for cleanup/audit) but never hashed (only regular file contents have +meaningful provenance). + +These tests pin down that contract so future changes don't accidentally: + +* Drop hashes for regular files (silent audit blindness). +* Add hashes for directories or symlinks (false provenance claims). + +The architect's section 1.6 edge cases (directories, symlinks, empty files, +hidden files) are exercised against a synthesized fixture project that +mirrors the real ``.apm/`` shape this repo emits. +""" + +from __future__ import annotations + +import os +import re +import sys +from pathlib import Path + +import pytest + +from apm_cli.install.phases.lockfile import compute_deployed_hashes + + +_SHA256_PREFIXED = re.compile(r"^sha256:[0-9a-f]{64}$") + + +def _build_fixture_project(root: Path) -> tuple[list[str], set[str], set[str]]: + """Create a fixture project with a representative .apm/-shaped layout. + + Returns ``(deployed_files, expected_dirs, expected_files)`` where + ``deployed_files`` is the list a real integrator would emit (mix of + regular files and a few directory entries, mirroring the live + behavior observed in this repo: skill subtrees are tracked as a + single directory entry). + + All paths are POSIX-relative to ``root``. + """ + apm_dir = root / ".apm" + skills_dir = root / ".github" / "skills" + prompts_dir = root / ".github" / "prompts" + apm_dir.mkdir(parents=True) + skills_dir.mkdir(parents=True) + prompts_dir.mkdir(parents=True) + + files: list[tuple[str, bytes]] = [ + (".apm/agents/sample.agent.md", b"# sample agent\n"), + (".apm/skills/devx/SKILL.md", b"# devx skill\n"), + (".github/prompts/build.prompt.md", b"prompt body\n"), + (".github/prompts/empty.prompt.md", b""), + (".mcp.json", b"{}\n"), + ] + skill_a = skills_dir / "skill-a" + skill_b = skills_dir / "skill-b" + skill_a.mkdir() + skill_b.mkdir() + (skill_a / "SKILL.md").write_bytes(b"# skill a\n") + (skill_b / "SKILL.md").write_bytes(b"# skill b\n") + + expected_files: set[str] = set() + for rel, payload in files: + full = root / rel + full.parent.mkdir(parents=True, exist_ok=True) + full.write_bytes(payload) + expected_files.add(rel) + expected_files.update({ + ".github/skills/skill-a/SKILL.md", + ".github/skills/skill-b/SKILL.md", + }) + + expected_dirs: set[str] = { + ".github/skills/skill-a", + ".github/skills/skill-b", + } + + deployed_files = sorted(expected_files | expected_dirs) + return deployed_files, expected_dirs, expected_files + + +class TestComputeDeployedHashesCoverage: + """Verify the file/hash coverage invariants for a synthesized project.""" + + def test_every_regular_file_has_hash_entry(self, tmp_path: Path) -> None: + deployed_files, _expected_dirs, expected_files = _build_fixture_project(tmp_path) + hashes = compute_deployed_hashes(deployed_files, tmp_path) + missing = expected_files - set(hashes.keys()) + assert missing == set(), ( + f"regular files missing from hashes: {sorted(missing)}" + ) + + def test_directories_excluded_from_hashes(self, tmp_path: Path) -> None: + deployed_files, expected_dirs, _expected_files = _build_fixture_project(tmp_path) + hashes = compute_deployed_hashes(deployed_files, tmp_path) + leaked = expected_dirs & set(hashes.keys()) + assert leaked == set(), ( + f"directory entries leaked into hashes: {sorted(leaked)}" + ) + + def test_set_difference_equals_directory_entries(self, tmp_path: Path) -> None: + """The canonical invariant: ``files - hashes.keys() == dirs``.""" + deployed_files, expected_dirs, _expected_files = _build_fixture_project(tmp_path) + hashes = compute_deployed_hashes(deployed_files, tmp_path) + diff = set(deployed_files) - set(hashes.keys()) + assert diff == expected_dirs, ( + f"unexpected coverage gap: diff={sorted(diff)} expected_dirs={sorted(expected_dirs)}" + ) + + def test_hash_values_are_sha256_hex_64chars(self, tmp_path: Path) -> None: + deployed_files, _expected_dirs, _expected_files = _build_fixture_project(tmp_path) + hashes = compute_deployed_hashes(deployed_files, tmp_path) + assert hashes, "fixture must produce at least one hash" + for rel, value in hashes.items(): + assert _SHA256_PREFIXED.match(value), ( + f"hash for {rel!r} not in sha256:<64hex> form: {value!r}" + ) + + def test_empty_files_are_hashed(self, tmp_path: Path) -> None: + """Zero-byte files are still regular files and must be hashed.""" + deployed_files, _expected_dirs, _expected_files = _build_fixture_project(tmp_path) + hashes = compute_deployed_hashes(deployed_files, tmp_path) + rel = ".github/prompts/empty.prompt.md" + assert rel in hashes, ( + f"empty file {rel!r} missing from hashes (regular file coverage gap)" + ) + # SHA-256 of empty bytes is well-known. + assert hashes[rel] == ( + "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + ) + + def test_hidden_files_are_hashed(self, tmp_path: Path) -> None: + """Dotfiles like ``.mcp.json`` must not be silently skipped.""" + deployed_files, _expected_dirs, _expected_files = _build_fixture_project(tmp_path) + hashes = compute_deployed_hashes(deployed_files, tmp_path) + assert ".mcp.json" in hashes, ( + "hidden dotfile coverage gap: .mcp.json not hashed" + ) + + @pytest.mark.skipif( + sys.platform.startswith("win"), + reason="symlink creation requires elevated privileges on Windows", + ) + def test_symlinks_excluded_from_hashes(self, tmp_path: Path) -> None: + """Documented decision: symlinks are NEVER hashed. + + ``compute_deployed_hashes`` filters via ``is_file() and not + is_symlink()`` (see ``src/apm_cli/install/phases/lockfile.py:41``) + and ``compute_file_hash`` itself short-circuits symlinks to the + empty-content sentinel (see ``src/apm_cli/utils/content_hash.py:77``). + Provenance over a symlink target would be misleading because the + target may live outside the project root. + + If a future change starts hashing symlinks, the audit chain + (``_check_content_integrity``) would silently begin reporting + target-content provenance for paths that look like local files + -- a safety regression. Update this test together with that + decision and document it in lockfile-spec.md. + """ + _build_fixture_project(tmp_path) + target = tmp_path / ".apm" / "agents" / "sample.agent.md" + link = tmp_path / ".apm" / "agents" / "alias.agent.md" + try: + os.symlink(target, link) + except (OSError, NotImplementedError) as exc: + pytest.skip(f"symlink creation not supported here: {exc}") + + rel_link = ".apm/agents/alias.agent.md" + hashes = compute_deployed_hashes([rel_link], tmp_path) + assert rel_link not in hashes, ( + "symlink leaked into hashes -- contract violated" + ) + + def test_no_extra_hashes_beyond_deployed_files(self, tmp_path: Path) -> None: + """Hash dict must never contain keys absent from ``deployed_files``.""" + deployed_files, _expected_dirs, _expected_files = _build_fixture_project(tmp_path) + hashes = compute_deployed_hashes(deployed_files, tmp_path) + extras = set(hashes.keys()) - set(deployed_files) + assert extras == set(), ( + f"hashes contain entries not in deployed_files: {sorted(extras)}" + ) + + def test_missing_files_are_skipped_silently(self, tmp_path: Path) -> None: + """Paths in ``deployed_files`` that don't exist on disk produce no hash. + + This mirrors the ``compute_deployed_hashes`` contract: files that + cannot be read contribute no provenance, but they are NOT an + error here -- the audit layer (``_check_content_integrity``) is + responsible for surfacing the missing-file diagnostic. + """ + (tmp_path / ".apm").mkdir() + present = tmp_path / ".apm" / "present.md" + present.write_bytes(b"hi\n") + rels = [".apm/present.md", ".apm/missing.md"] + hashes = compute_deployed_hashes(rels, tmp_path) + assert ".apm/present.md" in hashes + assert ".apm/missing.md" not in hashes diff --git a/tests/unit/install/test_policy_gate_phase.py b/tests/unit/install/test_policy_gate_phase.py index 22e57ce99..bd825f821 100644 --- a/tests/unit/install/test_policy_gate_phase.py +++ b/tests/unit/install/test_policy_gate_phase.py @@ -47,14 +47,16 @@ class _FakeContext: policy_fetch: Any = None policy_enforcement_active: bool = False no_policy: bool = False + apm_package: Any = None -def _make_ctx(*, logger=None, no_policy=False, deps=None): +def _make_ctx(*, logger=None, no_policy=False, deps=None, apm_package=None): """Build a _FakeContext with defaults.""" return _FakeContext( logger=logger or MagicMock(), no_policy=no_policy, deps_to_install=deps or [], + apm_package=apm_package, ) @@ -790,3 +792,59 @@ def test_no_direct_mcp_deps_passes_none(self, mock_discover, mock_checks): mock_checks.assert_called_once() assert mock_checks.call_args[1]["mcp_deps"] is None + + +class TestExplicitIncludesWiring: + """policy_gate threads ctx.apm_package.includes into the dep seam.""" + + @patch(_PATCH_CHECKS) + @patch(_PATCH_DISCOVER) + def test_no_apm_package_omits_kwarg(self, mock_discover, mock_checks): + # When ctx.apm_package is None the seam kwarg must NOT be set, + # so legacy "skip" behaviour is preserved. + mock_discover.return_value = _make_fetch_result("found", enforcement="warn") + mock_checks.return_value = _passing_audit() + ctx = _make_ctx(apm_package=None) + + run(ctx) + + kwargs = mock_checks.call_args[1] + assert "manifest_includes" not in kwargs + + @patch(_PATCH_CHECKS) + @patch(_PATCH_DISCOVER) + def test_apm_package_threads_includes(self, mock_discover, mock_checks): + mock_discover.return_value = _make_fetch_result("found", enforcement="block") + mock_checks.return_value = _passing_audit() + fake_pkg = MagicMock() + fake_pkg.includes = "auto" + ctx = _make_ctx(apm_package=fake_pkg) + + run(ctx) + + kwargs = mock_checks.call_args[1] + assert kwargs.get("manifest_includes") == "auto" + + @patch(_PATCH_CHECKS) + @patch(_PATCH_DISCOVER) + def test_explicit_includes_violation_raises(self, mock_discover, mock_checks): + # Simulate the seam returning an explicit-includes violation; + # under enforcement=block, run() must raise PolicyViolationError. + mock_discover.return_value = _make_fetch_result("found", enforcement="block") + mock_checks.return_value = CIAuditResult(checks=[ + CheckResult( + name="explicit-includes", + passed=False, + message=( + "Policy requires explicit 'includes:' paths but manifest has " + "'includes: auto' or no includes field." + ), + details=["includes: 'auto', require_explicit_includes: true"], + ), + ]) + fake_pkg = MagicMock() + fake_pkg.includes = "auto" + ctx = _make_ctx(apm_package=fake_pkg) + + with pytest.raises(PolicyViolationError): + run(ctx) diff --git a/tests/unit/policy/test_ci_checks.py b/tests/unit/policy/test_ci_checks.py index c3175611b..fdf3d5fca 100644 --- a/tests/unit/policy/test_ci_checks.py +++ b/tests/unit/policy/test_ci_checks.py @@ -385,6 +385,176 @@ def test_fail_critical_unicode(self, tmp_path): assert not result.passed assert any("evil.md" in d for d in result.details) + # -- Hash verification ---------------------------------------------- + + def test_hash_pass_when_all_match(self, tmp_path): + from apm_cli.utils.content_hash import compute_file_hash + + _make_deployed_file(tmp_path, ".github/prompts/clean.md", "Clean content\n") + actual_hash = compute_file_hash(tmp_path / ".github/prompts/clean.md") + _write_lockfile( + tmp_path, + textwrap.dedent(f"""\ + lockfile_version: '1' + generated_at: '2025-01-01T00:00:00Z' + dependencies: + - repo_url: owner/repo + deployed_files: + - .github/prompts/clean.md + deployed_file_hashes: + .github/prompts/clean.md: '{actual_hash}' + """), + ) + from apm_cli.deps.lockfile import LockFile, get_lockfile_path + + lock = LockFile.read(get_lockfile_path(tmp_path)) + result = _check_content_integrity(tmp_path, lock) + assert result.passed, result.details + + def test_hash_fail_on_hand_edit(self, tmp_path): + from apm_cli.utils.content_hash import compute_file_hash + + _make_deployed_file( + tmp_path, ".github/prompts/installed.md", "Original content\n" + ) + recorded_hash = compute_file_hash(tmp_path / ".github/prompts/installed.md") + _write_lockfile( + tmp_path, + textwrap.dedent(f"""\ + lockfile_version: '1' + generated_at: '2025-01-01T00:00:00Z' + dependencies: + - repo_url: owner/repo + deployed_files: + - .github/prompts/installed.md + deployed_file_hashes: + .github/prompts/installed.md: '{recorded_hash}' + """), + ) + # Simulate hand edit after install + (tmp_path / ".github/prompts/installed.md").write_text( + "Tampered content\n", encoding="utf-8" + ) + + from apm_cli.deps.lockfile import LockFile, get_lockfile_path + + lock = LockFile.read(get_lockfile_path(tmp_path)) + result = _check_content_integrity(tmp_path, lock) + assert not result.passed + assert any( + "hash-drift" in d and "installed.md" in d for d in result.details + ), result.details + + def test_hash_skips_missing_file(self, tmp_path): + # Lockfile records a file with a hash, but the file is missing on + # disk -- _check_deployed_files_present owns that signal, so + # content-integrity must not double-report it. + _write_lockfile( + tmp_path, + textwrap.dedent("""\ + lockfile_version: '1' + generated_at: '2025-01-01T00:00:00Z' + dependencies: + - repo_url: owner/repo + deployed_files: + - .github/prompts/missing.md + deployed_file_hashes: + .github/prompts/missing.md: 'sha256:deadbeef' + """), + ) + from apm_cli.deps.lockfile import LockFile, get_lockfile_path + + lock = LockFile.read(get_lockfile_path(tmp_path)) + result = _check_content_integrity(tmp_path, lock) + assert result.passed, result.details + + def test_hash_skips_entry_without_hash(self, tmp_path): + # File listed in deployed_files but with no entry in + # deployed_file_hashes (e.g. directories) must not raise. + _make_deployed_file(tmp_path, ".github/prompts/no-hash.md", "stuff\n") + _write_lockfile( + tmp_path, + textwrap.dedent("""\ + lockfile_version: '1' + generated_at: '2025-01-01T00:00:00Z' + dependencies: + - repo_url: owner/repo + deployed_files: + - .github/prompts/no-hash.md + """), + ) + from apm_cli.deps.lockfile import LockFile, get_lockfile_path + + lock = LockFile.read(get_lockfile_path(tmp_path)) + result = _check_content_integrity(tmp_path, lock) + assert result.passed, result.details + + def test_hash_skips_symlink(self, tmp_path): + import os + from apm_cli.utils.content_hash import compute_file_hash + + # Create a real target file outside the deployed path + target = tmp_path / "target.md" + target.write_text("target content\n", encoding="utf-8") + # Place a symlink at the deployed path + link_path = tmp_path / ".github/prompts/link.md" + link_path.parent.mkdir(parents=True, exist_ok=True) + os.symlink(target, link_path) + + # Record an obviously wrong hash -- symlinks must be skipped, not + # flagged. + _write_lockfile( + tmp_path, + textwrap.dedent("""\ + lockfile_version: '1' + generated_at: '2025-01-01T00:00:00Z' + dependencies: + - repo_url: owner/repo + deployed_files: + - .github/prompts/link.md + deployed_file_hashes: + .github/prompts/link.md: 'sha256:deadbeef' + """), + ) + from apm_cli.deps.lockfile import LockFile, get_lockfile_path + + lock = LockFile.read(get_lockfile_path(tmp_path)) + result = _check_content_integrity(tmp_path, lock) + assert result.passed, result.details + + def test_hash_covers_self_entry_local_files(self, tmp_path): + # Local content lives in lock.local_deployed_files / hashes; the + # LockFile loader synthesizes a self-entry into lock.dependencies, + # so iterating dependencies.items() must catch drift here too. + from apm_cli.utils.content_hash import compute_file_hash + + _make_deployed_file(tmp_path, ".github/prompts/local.md", "local v1\n") + recorded_hash = compute_file_hash(tmp_path / ".github/prompts/local.md") + _write_lockfile( + tmp_path, + textwrap.dedent(f"""\ + lockfile_version: '1' + generated_at: '2025-01-01T00:00:00Z' + local_deployed_files: + - .github/prompts/local.md + local_deployed_file_hashes: + .github/prompts/local.md: '{recorded_hash}' + """), + ) + # Mutate local file + (tmp_path / ".github/prompts/local.md").write_text( + "local v2 tampered\n", encoding="utf-8" + ) + + from apm_cli.deps.lockfile import LockFile, get_lockfile_path + + lock = LockFile.read(get_lockfile_path(tmp_path)) + result = _check_content_integrity(tmp_path, lock) + assert not result.passed + assert any( + "hash-drift" in d and "local.md" in d for d in result.details + ), result.details + # -- Aggregate runner ---------------------------------------------- @@ -407,7 +577,7 @@ def test_all_pass(self, tmp_path): ) result = run_baseline_checks(tmp_path) assert result.passed - assert len(result.checks) == 6 # all 6 checks ran + assert len(result.checks) == 7 # all 7 checks ran (incl. includes-consent) def test_mixed_pass_fail(self, tmp_path): # Ref mismatch (fail) + missing file (fail) + clean otherwise @@ -554,3 +724,300 @@ def test_sarif_uses_message_when_no_details(self): ) s = result.to_sarif() assert s["runs"][0]["results"][0]["message"]["text"] == "the message" + + +# -- Local-only repo support (issue #887) -------------------------- + + +class TestLocalOnlyRepoSupport: + """Audit must support repos with only local content (no remote deps). + + A local-only repo declares no APM/MCP deps in apm.yml but the lockfile + records the project's own local content via ``local_deployed_files``, + which the LockFile loader synthesizes as a "." self-entry in + ``lock.dependencies``. + """ + + def test_lockfile_exists_passes_when_local_content_recorded(self, tmp_path): + """(a) Empty manifest deps + non-empty local_deployed_files in + lockfile must not short-circuit with 'no dependencies declared'. + It must require/accept the lockfile so downstream checks run.""" + _make_deployed_file(tmp_path, ".github/prompts/local.prompt.md", "# main\n") + _write_apm_yml(tmp_path) # no deps + _write_lockfile( + tmp_path, + textwrap.dedent("""\ + lockfile_version: '1' + generated_at: '2025-01-01T00:00:00Z' + dependencies: [] + local_deployed_files: + - .github/prompts/local.prompt.md + """), + ) + result = _check_lockfile_exists(tmp_path) + assert result.passed + assert "lockfile present" in result.message.lower() + # Must NOT have been short-circuited as "no dependencies declared" + assert "not required" not in result.message.lower() + + def test_lockfile_exists_still_passes_when_no_deps_and_no_local(self, tmp_path): + """Regression guard: empty manifest + empty/absent lockfile still + returns the 'no dependencies declared' fast-path (no false fail).""" + _write_apm_yml(tmp_path) # no deps + # No lockfile on disk at all. + result = _check_lockfile_exists(tmp_path) + assert result.passed + assert "not required" in result.message.lower() + + def test_aggregate_runs_deployed_files_check_for_local_only_repo(self, tmp_path): + """(c) Aggregate must NOT short-circuit before deployed-files-present + runs against the synthesized self-entry.""" + # File declared in lockfile but missing on disk -> deployed check fails. + _write_apm_yml(tmp_path) # no deps + _write_lockfile( + tmp_path, + textwrap.dedent("""\ + lockfile_version: '1' + generated_at: '2025-01-01T00:00:00Z' + dependencies: [] + local_deployed_files: + - .github/prompts/missing.prompt.md + """), + ) + result = run_baseline_checks(tmp_path, fail_fast=False) + check_names = {c.name for c in result.checks} + # Aggregate must execute deployed-files-present, not stop after + # lockfile-exists. + assert "deployed-files-present" in check_names + # And it must FAIL because the self-entry's file is missing. + deployed = next(c for c in result.checks if c.name == "deployed-files-present") + assert not deployed.passed + assert ".github/prompts/missing.prompt.md" in deployed.details + + def test_aggregate_passes_for_clean_local_only_repo(self, tmp_path): + """End-to-end happy path: local-only repo with file on disk passes.""" + _make_deployed_file(tmp_path, ".github/prompts/local.prompt.md", "# main\n") + _write_apm_yml(tmp_path) # no deps + _write_lockfile( + tmp_path, + textwrap.dedent("""\ + lockfile_version: '1' + generated_at: '2025-01-01T00:00:00Z' + dependencies: [] + local_deployed_files: + - .github/prompts/local.prompt.md + """), + ) + result = run_baseline_checks(tmp_path, fail_fast=False) + assert result.passed, [ + (c.name, c.message, c.details) for c in result.failed_checks + ] + check_names = {c.name for c in result.checks} + assert "deployed-files-present" in check_names + assert "no-orphaned-packages" in check_names + + def test_no_orphans_self_entry_alone_not_flagged(self, tmp_path): + """(b) Lockfile with only the '.' self-entry + manifest with no deps + must not flag the self-entry as orphaned.""" + _write_apm_yml(tmp_path) # no deps + _write_lockfile( + tmp_path, + textwrap.dedent("""\ + lockfile_version: '1' + generated_at: '2025-01-01T00:00:00Z' + dependencies: [] + local_deployed_files: + - .github/prompts/local.prompt.md + """), + ) + from apm_cli.models.apm_package import APMPackage + from apm_cli.deps.lockfile import LockFile, get_lockfile_path, _SELF_KEY + + manifest = APMPackage.from_apm_yml(tmp_path / "apm.yml") + lock = LockFile.read(get_lockfile_path(tmp_path)) + # Sanity: the loader synthesized the self-entry. + assert _SELF_KEY in lock.dependencies + result = _check_no_orphans(manifest, lock) + assert result.passed, result.details + + def test_no_orphans_self_entry_with_declared_local_dep(self, tmp_path): + """(b) Self-entry + a declared local-path dep both present -> no orphans.""" + # Create the local package directory so manifest parsing accepts it. + local_pkg = tmp_path / "packages" / "shared" + local_pkg.mkdir(parents=True) + (local_pkg / "apm.yml").write_text( + "name: shared\nversion: '1.0.0'\n", encoding="utf-8" + ) + + _write_apm_yml(tmp_path, deps=["./packages/shared"]) + _write_lockfile( + tmp_path, + textwrap.dedent("""\ + lockfile_version: '1' + generated_at: '2025-01-01T00:00:00Z' + dependencies: + - repo_url: + source: local + local_path: ./packages/shared + deployed_files: [] + local_deployed_files: + - .github/prompts/local.prompt.md + """), + ) + from apm_cli.models.apm_package import APMPackage + from apm_cli.deps.lockfile import LockFile, get_lockfile_path + + manifest = APMPackage.from_apm_yml(tmp_path / "apm.yml") + lock = LockFile.read(get_lockfile_path(tmp_path)) + result = _check_no_orphans(manifest, lock) + assert result.passed, result.details + + def test_no_orphans_still_detects_real_orphan_with_self_entry(self, tmp_path): + """(b) Negative: self-entry must not mask a genuine remote orphan.""" + _write_apm_yml(tmp_path) # manifest declares nothing + _write_lockfile( + tmp_path, + textwrap.dedent("""\ + lockfile_version: '1' + generated_at: '2025-01-01T00:00:00Z' + dependencies: + - repo_url: extra/orphan + deployed_files: [] + local_deployed_files: + - .github/prompts/local.prompt.md + """), + ) + from apm_cli.models.apm_package import APMPackage + from apm_cli.deps.lockfile import LockFile, get_lockfile_path, _SELF_KEY + + manifest = APMPackage.from_apm_yml(tmp_path / "apm.yml") + lock = LockFile.read(get_lockfile_path(tmp_path)) + result = _check_no_orphans(manifest, lock) + assert not result.passed + assert "extra/orphan" in result.details + # Self-entry must NOT appear in the orphan list. + assert _SELF_KEY not in result.details + + +# -- Includes consent advisory (issue #887) ------------------------ + + +class TestIncludesConsent: + """Advisory check that nudges maintainers to declare 'includes:' when + the lockfile records local content. Never hard-fails.""" + + def _write_manifest(self, project: Path, includes_line: str | None) -> None: + lines = ["name: test-project", "version: '1.0.0'"] + if includes_line is not None: + lines.append(includes_line) + (project / "apm.yml").write_text("\n".join(lines) + "\n", encoding="utf-8") + + def _write_local_lock(self, project: Path, files: list[str]) -> None: + if files: + file_lines = "\n".join(f" - {f}" for f in files) + body = textwrap.dedent("""\ + lockfile_version: '1' + generated_at: '2025-01-01T00:00:00Z' + dependencies: [] + local_deployed_files: + """) + file_lines + "\n" + else: + body = textwrap.dedent("""\ + lockfile_version: '1' + generated_at: '2025-01-01T00:00:00Z' + dependencies: [] + """) + _write_lockfile(project, body) + + def _load(self, project: Path): + from apm_cli.models.apm_package import APMPackage + from apm_cli.deps.lockfile import LockFile, get_lockfile_path + + manifest = APMPackage.from_apm_yml(project / "apm.yml") + lock = LockFile.read(get_lockfile_path(project)) + return manifest, lock + + def test_auto_with_local_content_passes_silently(self, tmp_path): + from apm_cli.policy.ci_checks import _check_includes_consent + + _make_deployed_file(tmp_path, ".github/prompts/local.prompt.md") + self._write_manifest(tmp_path, "includes: auto") + self._write_local_lock(tmp_path, [".github/prompts/local.prompt.md"]) + + manifest, lock = self._load(tmp_path) + result = _check_includes_consent(manifest, lock) + assert result.passed + assert "[!]" not in result.message + + def test_absent_with_local_content_passes_with_advisory(self, tmp_path): + from apm_cli.policy.ci_checks import _check_includes_consent + + _make_deployed_file(tmp_path, ".github/prompts/local.prompt.md") + self._write_manifest(tmp_path, includes_line=None) + self._write_local_lock(tmp_path, [".github/prompts/local.prompt.md"]) + + manifest, lock = self._load(tmp_path) + result = _check_includes_consent(manifest, lock) + assert result.passed # advisory, not a hard failure + assert "consider adding 'includes: auto'" in result.message + assert "includes:" in result.message + assert "consent" in result.message.lower() + # ASCII-only convention: no unicode warning glyphs. + assert "\u26a0" not in result.message # warning sign + assert "\ufe0f" not in result.message # variation selector + + def test_absent_with_no_local_content_skipped(self, tmp_path): + from apm_cli.policy.ci_checks import _check_includes_consent + + self._write_manifest(tmp_path, includes_line=None) + self._write_local_lock(tmp_path, files=[]) + + manifest, lock = self._load(tmp_path) + result = _check_includes_consent(manifest, lock) + assert result.passed + assert "[!]" not in result.message + assert "skipped" in result.message.lower() + + def test_list_with_local_content_passes_silently(self, tmp_path): + from apm_cli.policy.ci_checks import _check_includes_consent + + _make_deployed_file(tmp_path, ".github/prompts/local.prompt.md") + self._write_manifest( + tmp_path, + "includes:\n - .github/prompts/local.prompt.md\n - .github/prompts/other.md", + ) + self._write_local_lock(tmp_path, [".github/prompts/local.prompt.md"]) + + manifest, lock = self._load(tmp_path) + result = _check_includes_consent(manifest, lock) + assert result.passed + assert "[!]" not in result.message + + def test_auto_with_no_local_content_passes_silently(self, tmp_path): + from apm_cli.policy.ci_checks import _check_includes_consent + + self._write_manifest(tmp_path, "includes: auto") + self._write_local_lock(tmp_path, files=[]) + + manifest, lock = self._load(tmp_path) + result = _check_includes_consent(manifest, lock) + assert result.passed + assert "[!]" not in result.message + + def test_aggregate_runner_includes_consent_check_last(self, tmp_path): + """Aggregate must run the consent check after content-integrity, + producing a 7th check entry with the [!] advisory when applicable.""" + _make_deployed_file(tmp_path, ".github/prompts/local.prompt.md") + self._write_manifest(tmp_path, includes_line=None) # no includes: + self._write_local_lock(tmp_path, [".github/prompts/local.prompt.md"]) + + result = run_baseline_checks(tmp_path, fail_fast=False) + assert result.passed, [ + (c.name, c.message, c.details) for c in result.failed_checks + ] + names = [c.name for c in result.checks] + assert "includes-consent" in names + assert names[-1] == "includes-consent" # appears last + consent = next(c for c in result.checks if c.name == "includes-consent") + assert consent.passed + assert "consider adding 'includes: auto'" in consent.message diff --git a/tests/unit/policy/test_parser.py b/tests/unit/policy/test_parser.py index cf834bfb3..2b5e61d3d 100644 --- a/tests/unit/policy/test_parser.py +++ b/tests/unit/policy/test_parser.py @@ -56,6 +56,28 @@ def test_invalid_scripts(self): self.assertEqual(len(errors), 1) self.assertIn("scripts", errors[0]) + def test_valid_require_explicit_includes(self): + for val in (True, False): + errors, warnings = validate_policy( + {"manifest": {"require_explicit_includes": val}} + ) + self.assertEqual(errors, []) + self.assertEqual(warnings, []) + + def test_invalid_require_explicit_includes_string(self): + errors, warnings = validate_policy( + {"manifest": {"require_explicit_includes": "true"}} + ) + self.assertEqual(len(errors), 1) + self.assertIn("require_explicit_includes", errors[0]) + + def test_invalid_require_explicit_includes_int(self): + errors, warnings = validate_policy( + {"manifest": {"require_explicit_includes": 1}} + ) + self.assertEqual(len(errors), 1) + self.assertIn("require_explicit_includes", errors[0]) + def test_invalid_unmanaged_action(self): errors, warnings = validate_policy({"unmanaged_files": {"action": "block"}}) self.assertEqual(len(errors), 1) @@ -211,6 +233,25 @@ def test_minimal_policy(self): self.assertEqual(policy.cache.ttl, 3600) self.assertIsNone(policy.dependencies.allow) self.assertEqual(policy.dependencies.max_depth, 50) + self.assertFalse(policy.manifest.require_explicit_includes) + + def test_require_explicit_includes_true(self): + yaml_str = textwrap.dedent(""" + manifest: + require_explicit_includes: true + """) + policy, warnings = load_policy(yaml_str) + self.assertEqual(warnings, []) + self.assertTrue(policy.manifest.require_explicit_includes) + + def test_require_explicit_includes_no_unknown_warning(self): + yaml_str = textwrap.dedent(""" + manifest: + require_explicit_includes: false + """) + policy, warnings = load_policy(yaml_str) + self.assertEqual(warnings, []) + self.assertFalse(policy.manifest.require_explicit_includes) def test_empty_yaml(self): policy, warnings = load_policy("") diff --git a/tests/unit/policy/test_policy_checks.py b/tests/unit/policy/test_policy_checks.py index 34aaf6189..57dc81515 100644 --- a/tests/unit/policy/test_policy_checks.py +++ b/tests/unit/policy/test_policy_checks.py @@ -15,6 +15,7 @@ _check_compilation_target, _check_dependency_allowlist, _check_dependency_denylist, + _check_includes_explicit, _check_mcp_allowlist, _check_mcp_denylist, _check_mcp_self_defined, @@ -753,8 +754,8 @@ def test_rglob_cap_skips_check(self, tmp_path, monkeypatch): class TestRunPolicyChecks: - def test_returns_all_16_checks(self, tmp_path): - """Full run should produce exactly 16 checks.""" + def test_returns_all_17_checks(self, tmp_path): + """Full run should produce exactly 17 checks.""" _write_apm_yml( tmp_path, { @@ -778,7 +779,7 @@ def test_returns_all_16_checks(self, tmp_path): policy = ApmPolicy() result = run_policy_checks(tmp_path, policy) - assert len(result.checks) == 16 + assert len(result.checks) == 17 # Default policy = all checks pass assert result.passed diff --git a/tests/unit/policy/test_run_dependency_policy_checks.py b/tests/unit/policy/test_run_dependency_policy_checks.py index ad75c222f..9658d60bf 100644 --- a/tests/unit/policy/test_run_dependency_policy_checks.py +++ b/tests/unit/policy/test_run_dependency_policy_checks.py @@ -568,3 +568,66 @@ def test_project_wins_full_pass(self): c for c in result.checks if c.name == "required-package-version" ] assert ver_check and ver_check[0].details + + +class TestExplicitIncludesSeam: + """Wiring of the explicit-includes check into run_dependency_policy_checks. + + Covers the sentinel behaviour: when the caller does not supply + ``manifest_includes`` the check is skipped (preserves legacy + callers that lack manifest context); when the caller supplies it + the check runs against ``policy.manifest.require_explicit_includes``. + """ + + def _policy(self, *, require: bool): + from apm_cli.policy.schema import ManifestPolicy + + return ApmPolicy(manifest=ManifestPolicy(require_explicit_includes=require)) + + def test_skipped_when_manifest_includes_not_provided(self): + # Default: no manifest_includes kwarg -> no explicit-includes + # CheckResult appears in the result. + result = run_dependency_policy_checks( + [], policy=self._policy(require=True) + ) + assert "explicit-includes" not in _check_names(result) + + def test_violation_when_required_and_includes_none(self): + result = run_dependency_policy_checks( + [], + policy=self._policy(require=True), + manifest_includes=None, + fail_fast=False, + ) + assert "explicit-includes" in _failed_names(result) + + def test_violation_when_required_and_includes_auto(self): + result = run_dependency_policy_checks( + [], + policy=self._policy(require=True), + manifest_includes="auto", + fail_fast=False, + ) + assert "explicit-includes" in _failed_names(result) + + def test_pass_when_required_and_includes_explicit_list(self): + result = run_dependency_policy_checks( + [], + policy=self._policy(require=True), + manifest_includes=["a.md", "b.md"], + fail_fast=False, + ) + assert "explicit-includes" in _check_names(result) + assert "explicit-includes" not in _failed_names(result) + + def test_pass_when_not_required_regardless_of_includes(self): + for value in (None, "auto", ["a.md"]): + result = run_dependency_policy_checks( + [], + policy=self._policy(require=False), + manifest_includes=value, + fail_fast=False, + ) + assert "explicit-includes" not in _failed_names(result), ( + f"unexpected violation for includes={value!r}" + ) diff --git a/tests/unit/test_apm_package.py b/tests/unit/test_apm_package.py index 12a55a14c..f7cc19fd5 100644 --- a/tests/unit/test_apm_package.py +++ b/tests/unit/test_apm_package.py @@ -295,3 +295,70 @@ def test_clear_forces_reparse(self, tmp_path): assert pkg1.name == "test-pkg" assert pkg2.name == "changed-pkg" + + +class TestIncludesField: + """Tests for the 'includes' field on APMPackage (auto-publish opt-in).""" + + def test_includes_auto_parses_to_string(self, tmp_path): + yml = _write_apm_yml(tmp_path, { + "name": "test-pkg", + "version": "1.0.0", + "includes": "auto", + }) + pkg = APMPackage.from_apm_yml(yml) + assert pkg.includes == "auto" + + def test_includes_list_parses_to_list(self, tmp_path): + yml = _write_apm_yml(tmp_path, { + "name": "test-pkg", + "version": "1.0.0", + "includes": ["path/a", "path/b"], + }) + pkg = APMPackage.from_apm_yml(yml) + assert pkg.includes == ["path/a", "path/b"] + + def test_includes_missing_is_none(self, tmp_path): + yml = _write_apm_yml(tmp_path, { + "name": "test-pkg", + "version": "1.0.0", + }) + pkg = APMPackage.from_apm_yml(yml) + assert pkg.includes is None + + def test_includes_invalid_int_raises(self, tmp_path): + yml = _write_apm_yml(tmp_path, { + "name": "test-pkg", + "version": "1.0.0", + "includes": 42, + }) + with pytest.raises(ValueError, match="'includes' must be 'auto' or a list of strings"): + APMPackage.from_apm_yml(yml) + + def test_includes_list_with_non_strings_raises(self, tmp_path): + yml = _write_apm_yml(tmp_path, { + "name": "test-pkg", + "version": "1.0.0", + "includes": [1, 2], + }) + with pytest.raises(ValueError, match="'includes' must be 'auto' or a list of strings"): + APMPackage.from_apm_yml(yml) + + def test_includes_other_string_raises(self, tmp_path): + yml = _write_apm_yml(tmp_path, { + "name": "test-pkg", + "version": "1.0.0", + "includes": "explicit", + }) + with pytest.raises(ValueError, match="'includes' must be 'auto' or a list of strings"): + APMPackage.from_apm_yml(yml) + + def test_has_apm_dependencies_false_for_include_only_manifest(self, tmp_path): + """Include-only manifests (no apm: deps) still report no APM dependencies.""" + yml = _write_apm_yml(tmp_path, { + "name": "test-pkg", + "version": "1.0.0", + "includes": "auto", + }) + pkg = APMPackage.from_apm_yml(yml) + assert pkg.has_apm_dependencies() is False diff --git a/tests/unit/test_audit_policy_command.py b/tests/unit/test_audit_policy_command.py index 61e1e531d..f334c5e89 100644 --- a/tests/unit/test_audit_policy_command.py +++ b/tests/unit/test_audit_policy_command.py @@ -101,8 +101,10 @@ def test_ci_with_policy_json_output(self, runner, tmp_path, monkeypatch): ) assert result.exit_code == 0 data = json.loads(result.output) - # Baseline: up to 6 checks, Policy: 16 checks -> total > 6 - assert data["summary"]["total"] > 6 + # Baseline: up to 7 checks, Policy: 17 checks -> total > 7 when + # policy evaluation actually ran. Asserting > 7 (not > 6) catches + # the regression where only baseline checks are returned. + assert data["summary"]["total"] > 7 def test_ci_with_policy_deny_fails(self, runner, tmp_path, monkeypatch): """Policy deny list causing failure -> exit 1.""" @@ -236,5 +238,5 @@ def test_baseline_only(self, runner, tmp_path, monkeypatch): ) assert result.exit_code == 0 data = json.loads(result.output) - # Only baseline checks (max 6) - assert data["summary"]["total"] <= 6 + # Only baseline checks (max 7 incl. includes-consent) + assert data["summary"]["total"] <= 7 diff --git a/tests/unit/test_deps_list_tree_info.py b/tests/unit/test_deps_list_tree_info.py index 2cbc0d235..d48e127b9 100644 --- a/tests/unit/test_deps_list_tree_info.py +++ b/tests/unit/test_deps_list_tree_info.py @@ -346,6 +346,7 @@ def test_tree_with_lockfile_shows_dep(self): mock_lockfile = MagicMock() mock_lockfile.get_all_dependencies.return_value = [mock_dep] + mock_lockfile.get_package_dependencies.return_value = [mock_dep] mock_lf_path = MagicMock() mock_lf_path.exists.return_value = True @@ -370,6 +371,47 @@ def test_tree_project_name_from_apm_yml(self): assert result.exit_code == 0 assert "awesomeproject" in result.output + def test_tree_does_not_show_self_entry(self): + """Lockfile self-entry must not surface as a dependency in the tree. + + Regression for #887: the synthesized '.' entry (repo_url='') is + an internal representation of the project's own local content, not a + dependency. apm deps tree must skip it. + """ + from apm_cli.deps.lockfile import LockFile, LockedDependency + + with self._chdir_tmp() as tmp: + self._make_package(tmp, "realorg", "realrepo") + (tmp / "apm.yml").write_text("name: myproj\n") + + lock = LockFile( + lockfile_version="1", + generated_at="2025-01-01T00:00:00+00:00", + apm_version="0.0.0-test", + ) + lock.add_dependency( + LockedDependency( + repo_url="realorg/realrepo", + resolved_commit="b" * 40, + depth=1, + version="1.0.0", + ) + ) + lock.local_deployed_files = [".github/skills/local/SKILL.md"] + lock.local_deployed_file_hashes = { + ".github/skills/local/SKILL.md": "abc", + } + lockfile_path = tmp / "apm.lock.yaml" + lockfile_path.write_text(lock.to_yaml()) + + with patch("apm_cli.core.scope.get_apm_dir", return_value=tmp), _force_rich_fallback(): + result = self.runner.invoke(cli, ["deps", "tree"]) + + assert result.exit_code == 0 + assert "realorg/realrepo" in result.output + # The self-entry must not appear under any of its representations. + assert "" not in result.output + class TestDepsInfoCommand(_DepsCmdBase): """Tests for apm deps info.""" diff --git a/tests/unit/test_lockfile_self_entry.py b/tests/unit/test_lockfile_self_entry.py new file mode 100644 index 000000000..b876710dd --- /dev/null +++ b/tests/unit/test_lockfile_self_entry.py @@ -0,0 +1,275 @@ +"""Unit tests for the virtual self-entry synthesis in LockFile. + +Covers: +- _SELF_KEY constant +- LockFile.from_yaml() synthesizes a "." entry from local_deployed_files +- LockFile.to_yaml() does NOT emit the "." entry into the dependencies array +- Round-trip byte-stability of the YAML output +- get_package_dependencies() excludes the self-entry +- get_all_dependencies() includes the self-entry, sorted first by depth=0 +- get_unique_key() returns "." for the synthesized entry +- is_semantically_equivalent honors the synthesized entry +""" + +import yaml + +from apm_cli.deps.lockfile import ( + LockFile, + LockedDependency, + _SELF_KEY, +) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_lockfile_with_local_content() -> LockFile: + """Build a LockFile with realistic local content + one remote dep.""" + lock = LockFile( + lockfile_version="1", + generated_at="2025-01-01T00:00:00+00:00", + apm_version="0.0.0-test", + ) + # A real remote dependency at depth=1 + lock.add_dependency( + LockedDependency( + repo_url="https://github.com/owner/repo", + resolved_commit="a" * 40, + depth=1, + deployed_files=[".github/skills/foo/SKILL.md"], + deployed_file_hashes={".github/skills/foo/SKILL.md": "deadbeef"}, + ) + ) + # Local content (the "self" payload) + lock.local_deployed_files = [ + ".github/instructions/python.instructions.md", + ".github/skills/local/SKILL.md", + ".apm/agents/main.agent.md", + ] + lock.local_deployed_file_hashes = { + ".github/instructions/python.instructions.md": "1111111111111111", + ".github/skills/local/SKILL.md": "2222222222222222", + ".apm/agents/main.agent.md": "3333333333333333", + } + return lock + + +# --------------------------------------------------------------------------- +# Constant +# --------------------------------------------------------------------------- + + +def test_self_key_constant_is_dot(): + assert _SELF_KEY == "." + + +# --------------------------------------------------------------------------- +# Synthesis behavior +# --------------------------------------------------------------------------- + + +class TestSelfEntrySynthesis: + def test_synthesized_entry_present_when_local_content(self): + lock = _make_lockfile_with_local_content() + roundtripped = LockFile.from_yaml(lock.to_yaml()) + assert _SELF_KEY in roundtripped.dependencies + + def test_synthesized_entry_fields(self): + lock = _make_lockfile_with_local_content() + roundtripped = LockFile.from_yaml(lock.to_yaml()) + self_dep = roundtripped.dependencies[_SELF_KEY] + assert self_dep.is_dev is True + assert self_dep.source == "local" + assert self_dep.local_path == "." + assert self_dep.repo_url == "" + assert self_dep.depth == 0 + + def test_synthesized_entry_carries_deployed_files_and_hashes(self): + lock = _make_lockfile_with_local_content() + roundtripped = LockFile.from_yaml(lock.to_yaml()) + self_dep = roundtripped.dependencies[_SELF_KEY] + assert sorted(self_dep.deployed_files) == sorted(lock.local_deployed_files) + assert self_dep.deployed_file_hashes == lock.local_deployed_file_hashes + + def test_no_self_entry_when_local_deployed_files_empty(self): + lock = LockFile( + lockfile_version="1", + generated_at="2025-01-01T00:00:00+00:00", + ) + # No local content, no remote deps + roundtripped = LockFile.from_yaml(lock.to_yaml()) + assert _SELF_KEY not in roundtripped.dependencies + + def test_no_self_entry_when_only_remote_deps(self): + lock = LockFile( + lockfile_version="1", + generated_at="2025-01-01T00:00:00+00:00", + ) + lock.add_dependency( + LockedDependency( + repo_url="https://github.com/owner/repo", + resolved_commit="b" * 40, + depth=1, + ) + ) + roundtripped = LockFile.from_yaml(lock.to_yaml()) + assert _SELF_KEY not in roundtripped.dependencies + + def test_synthesized_entry_unique_key_is_dot(self): + lock = _make_lockfile_with_local_content() + roundtripped = LockFile.from_yaml(lock.to_yaml()) + self_dep = roundtripped.dependencies[_SELF_KEY] + assert self_dep.get_unique_key() == "." + + +# --------------------------------------------------------------------------- +# YAML serialization: self-entry must NOT appear in the dependencies array +# --------------------------------------------------------------------------- + + +class TestSelfEntryNotSerialized: + def test_self_entry_absent_from_dependencies_array(self): + lock = _make_lockfile_with_local_content() + # Force the in-memory presence of the synthesized self-entry first. + lock = LockFile.from_yaml(lock.to_yaml()) + assert _SELF_KEY in lock.dependencies # precondition + + out = lock.to_yaml() + parsed = yaml.safe_load(out) + repo_urls = [d["repo_url"] for d in parsed.get("dependencies", [])] + assert "" not in repo_urls + # Local content is still preserved via the flat fields + assert parsed["local_deployed_files"] == sorted(lock.local_deployed_files) + + def test_to_yaml_restores_self_entry_in_memory(self): + """to_yaml must not mutate the in-memory dependencies dict (try/finally).""" + lock = LockFile.from_yaml(_make_lockfile_with_local_content().to_yaml()) + assert _SELF_KEY in lock.dependencies + _ = lock.to_yaml() + assert _SELF_KEY in lock.dependencies, ( + "to_yaml() must restore the popped self-entry" + ) + + def test_to_yaml_restores_self_entry_even_on_exception(self, monkeypatch): + """If serialization raises, the self-entry must still be restored.""" + from apm_cli.utils import yaml_io + + lock = LockFile.from_yaml(_make_lockfile_with_local_content().to_yaml()) + assert _SELF_KEY in lock.dependencies + + def _boom(*args, **kwargs): + raise RuntimeError("simulated dump failure") + + monkeypatch.setattr(yaml_io, "yaml_to_str", _boom) + try: + lock.to_yaml() + except RuntimeError: + pass + assert _SELF_KEY in lock.dependencies, ( + "to_yaml() must restore self-entry even on exception" + ) + + +# --------------------------------------------------------------------------- +# Round-trip byte stability (the critical correctness invariant) +# --------------------------------------------------------------------------- + + +class TestRoundTripByteStability: + def test_round_trip_bytes_stable_with_local_content(self): + lock = _make_lockfile_with_local_content() + # First dump establishes a canonical YAML form. + canonical = lock.to_yaml() + # Reload + redump should produce a byte-identical string. + reloaded = LockFile.from_yaml(canonical) + redumped = reloaded.to_yaml() + assert canonical == redumped + + def test_round_trip_bytes_stable_no_local_content(self): + lock = LockFile( + lockfile_version="1", + generated_at="2025-01-01T00:00:00+00:00", + apm_version="0.0.0-test", + ) + lock.add_dependency( + LockedDependency( + repo_url="https://github.com/owner/repo", + resolved_commit="c" * 40, + depth=1, + ) + ) + canonical = lock.to_yaml() + redumped = LockFile.from_yaml(canonical).to_yaml() + assert canonical == redumped + + def test_multiple_round_trips_remain_stable(self): + lock = _make_lockfile_with_local_content() + y1 = lock.to_yaml() + y2 = LockFile.from_yaml(y1).to_yaml() + y3 = LockFile.from_yaml(y2).to_yaml() + assert y1 == y2 == y3 + + +# --------------------------------------------------------------------------- +# get_all_dependencies / get_package_dependencies +# --------------------------------------------------------------------------- + + +class TestDependencyAccessors: + def test_get_all_dependencies_includes_self_entry(self): + lock = LockFile.from_yaml(_make_lockfile_with_local_content().to_yaml()) + all_deps = lock.get_all_dependencies() + assert any(d.local_path == "." for d in all_deps) + + def test_get_all_dependencies_self_entry_sorted_first(self): + lock = LockFile.from_yaml(_make_lockfile_with_local_content().to_yaml()) + all_deps = lock.get_all_dependencies() + # depth=0 sorts first by (depth, repo_url) + assert all_deps[0].local_path == "." + assert all_deps[0].depth == 0 + + def test_get_package_dependencies_excludes_self_entry(self): + lock = LockFile.from_yaml(_make_lockfile_with_local_content().to_yaml()) + pkg_deps = lock.get_package_dependencies() + assert all(d.local_path != "." for d in pkg_deps) + # Should still contain the remote dep + assert any(d.repo_url == "https://github.com/owner/repo" for d in pkg_deps) + + def test_get_package_dependencies_empty_when_only_self(self): + lock = LockFile( + lockfile_version="1", + generated_at="2025-01-01T00:00:00+00:00", + ) + lock.local_deployed_files = [".github/skills/local/SKILL.md"] + lock.local_deployed_file_hashes = {".github/skills/local/SKILL.md": "abc"} + roundtripped = LockFile.from_yaml(lock.to_yaml()) + assert _SELF_KEY in roundtripped.dependencies + assert roundtripped.get_package_dependencies() == [] + + +# --------------------------------------------------------------------------- +# is_semantically_equivalent +# --------------------------------------------------------------------------- + + +class TestSemanticEquivalenceWithSelfEntry: + def test_two_lockfiles_with_same_local_content_equivalent(self): + lock_a = LockFile.from_yaml(_make_lockfile_with_local_content().to_yaml()) + lock_b = LockFile.from_yaml(_make_lockfile_with_local_content().to_yaml()) + assert lock_a.is_semantically_equivalent(lock_b) + assert lock_b.is_semantically_equivalent(lock_a) + + def test_different_local_content_not_equivalent(self): + lock_a = LockFile.from_yaml(_make_lockfile_with_local_content().to_yaml()) + + other = _make_lockfile_with_local_content() + other.local_deployed_files = list(other.local_deployed_files) + [ + ".github/skills/extra/SKILL.md" + ] + other.local_deployed_file_hashes = dict(other.local_deployed_file_hashes) + other.local_deployed_file_hashes[".github/skills/extra/SKILL.md"] = "ffff" + lock_b = LockFile.from_yaml(other.to_yaml()) + + assert not lock_a.is_semantically_equivalent(lock_b) diff --git a/tests/unit/test_packer.py b/tests/unit/test_packer.py index a1cc350f6..920fb2b02 100644 --- a/tests/unit/test_packer.py +++ b/tests/unit/test_packer.py @@ -562,3 +562,378 @@ def test_pack_list_config_target_when_no_explicit(self, tmp_path): # Should include files from both .github/ (copilot) and .claude/ (claude) assert ".github/agents/a.md" in result.files assert ".claude/commands/b.md" in result.files + + +class TestPackSourceLocalGuard: + """Source-local entries (self-entry, local-path deps) must NOT be bundled. + + Issue #887 / task packer-source-guard: Local content is not portable. + The synthesized root self-entry (source='local', local_path='.') and + any manifest local-path deps must be excluded from the apm-format + bundle's collected deployed_files. + """ + + def _setup_with_self_entry( + self, + tmp_path: Path, + remote_files: list[str], + local_files: list[str], + *, + local_path: str = ".", + is_dev: bool = True, + ) -> Path: + """Create a project where the lockfile has a remote dep AND a local entry.""" + project = tmp_path / "project" + project.mkdir() + + (project / "apm.yml").write_text( + yaml.dump({"name": "test-pkg", "version": "1.0.0"}), encoding="utf-8" + ) + + # Materialize all referenced files on disk (covers both groups) + for fpath in remote_files + local_files: + full = project / fpath + if fpath.endswith("/"): + full.mkdir(parents=True, exist_ok=True) + else: + full.parent.mkdir(parents=True, exist_ok=True) + full.write_text(f"content of {fpath}", encoding="utf-8") + + lockfile = LockFile() + lockfile.add_dependency( + LockedDependency( + repo_url="owner/remote-repo", + resolved_commit="abc123", + deployed_files=remote_files, + ) + ) + lockfile.add_dependency( + LockedDependency( + repo_url="" if local_path == "." else "local-pkg", + source="local", + local_path=local_path, + is_dev=is_dev, + deployed_files=local_files, + ) + ) + lockfile.write(project / "apm.lock.yaml") + return project + + def test_apm_format_excludes_self_entry_files(self, tmp_path): + """Self-entry (source=local, local_path='.') files are excluded from apm bundle.""" + remote = [".github/agents/remote.md"] + local = [".github/agents/local-self.md"] + project = self._setup_with_self_entry(tmp_path, remote, local) + out = tmp_path / "build" + + result = pack_bundle(project, out, fmt="apm") + + assert ".github/agents/remote.md" in result.files + assert ".github/agents/local-self.md" not in result.files + + def test_apm_format_excludes_local_path_dep_files(self, tmp_path): + """Manifest local-path deps (source=local, local_path='./pkg/...') excluded. + + L89-97 already rejects local manifest deps when apm.yml is parsed; this + test guards the lockfile-side path where the lockfile already contains + such a dep (e.g. produced by a prior install) but apm.yml has been + cleaned up. Behavior must remain consistent: do not bundle. + """ + remote = [".github/agents/remote.md"] + local = [".github/agents/from-local-dep.md"] + project = self._setup_with_self_entry( + tmp_path, + remote, + local, + local_path="./packages/foo", + is_dev=False, + ) + out = tmp_path / "build" + + result = pack_bundle(project, out, fmt="apm") + + assert ".github/agents/remote.md" in result.files + assert ".github/agents/from-local-dep.md" not in result.files + + def test_apm_format_all_remote_unchanged(self, tmp_path): + """No local deps -> all remote files included (no regression).""" + remote = [".github/agents/a.md", ".github/agents/b.md"] + project = _setup_project(tmp_path, remote) + out = tmp_path / "build" + + result = pack_bundle(project, out, fmt="apm") + + assert set(result.files) == set(remote) + + def test_apm_format_dry_run_excludes_local(self, tmp_path): + """Dry-run path also honours the source-local guard.""" + remote = [".github/agents/remote.md"] + local = [".github/agents/leaked.md"] + project = self._setup_with_self_entry(tmp_path, remote, local) + out = tmp_path / "build" + + result = pack_bundle(project, out, fmt="apm", dry_run=True) + + assert ".github/agents/remote.md" in result.files + assert ".github/agents/leaked.md" not in result.files + + def test_plugin_format_self_entry_not_processed_via_deps_loop(self, tmp_path): + """Regression: plugin exporter's deps loop must skip the self-entry. + + Plugin format does not use ``dep.deployed_files`` directly; instead it + resolves each dep's install path on disk and scans it. If the self-entry + (source='local', local_path='.', is_dev=True) were not filtered, the + deps loop would resolve its install path to the project root and + re-collect every own-component, generating spurious collisions against + step 6's own-component collection. + + The existing ``is_dev`` filter at plugin_exporter.py:473 prevents this. + This test guards that behavior by asserting no collision-from-deps is + reported when only the self-entry would have collided. + """ + from apm_cli.bundle.plugin_exporter import export_plugin_bundle + + project = tmp_path / "project" + project.mkdir() + (project / "apm.yml").write_text( + yaml.dump({"name": "test-pkg", "version": "1.0.0"}), encoding="utf-8" + ) + # Own component on disk under .apm/agents/ + (project / ".apm" / "agents").mkdir(parents=True) + (project / ".apm" / "agents" / "own.md").write_text("own", encoding="utf-8") + + # Lockfile contains ONLY a self-entry (is_dev=True). No remote deps. + lockfile = LockFile() + lockfile.add_dependency( + LockedDependency( + repo_url="", + source="local", + local_path=".", + is_dev=True, + deployed_files=[".apm/agents/own.md"], + ) + ) + lockfile.write(project / "apm.lock.yaml") + + result = export_plugin_bundle( + project_root=project, + output_dir=tmp_path / "plugin-out", + dry_run=True, + ) + + # Own component must appear exactly once -- if the self-entry leaked + # through the deps loop, we'd see a collision (deps run before own). + own_matches = [f for f in result.files if f.endswith("own.md")] + assert len(own_matches) == 1, ( + f"Self-entry should not be re-processed via deps loop; " + f"files={result.files}" + ) + + def test_plugin_format_self_entry_with_is_dev_false_would_leak(self, tmp_path): + """Confirm the is_dev filter is the active gate. + + If a self-entry slipped in WITHOUT is_dev=True (older lockfile, bug), + the plugin deps loop would attempt to process it. This negative test + documents the current behavior and guards the contract that the + synthesizer in lockfile-self-entry MUST set is_dev=True. + """ + from apm_cli.bundle.plugin_exporter import export_plugin_bundle + + project = tmp_path / "project" + project.mkdir() + (project / "apm.yml").write_text( + yaml.dump({"name": "test-pkg", "version": "1.0.0"}), encoding="utf-8" + ) + (project / ".apm" / "agents").mkdir(parents=True) + (project / ".apm" / "agents" / "own.md").write_text("own", encoding="utf-8") + + # Self-entry WITHOUT is_dev -- simulates a buggy synthesizer + lockfile = LockFile() + lockfile.add_dependency( + LockedDependency( + repo_url="", + source="local", + local_path=".", + is_dev=False, + deployed_files=[".apm/agents/own.md"], + ) + ) + lockfile.write(project / "apm.lock.yaml") + + # The deps loop will try _dep_install_path on the self-entry. Whether + # this raises, double-collects, or silently no-ops is implementation + # detail; the contract we assert is just that no exception escapes + # AND the own component still appears at least once. + result = export_plugin_bundle( + project_root=project, + output_dir=tmp_path / "plugin-out2", + dry_run=True, + ) + assert any(f.endswith("own.md") for f in result.files) + + +class TestPackBundleStripsLocalFields: + """Issue #887 N1: enriched bundle lockfile must not carry the packager's + own ``local_deployed_files`` / ``local_deployed_file_hashes``. + + Those fields describe packaging-time repo content, which is intentionally + excluded from the bundle by the source-local guard in pack_bundle(). + Persisting them would cause LockFile.from_yaml() on the consumer side to + synthesize a self-entry whose deployed_files do not exist in the bundle + source dir -- breaking unpacker verification (unpacker.py:129). + """ + + def _setup_with_local_content( + self, + tmp_path: Path, + remote_files: list[str], + local_files: list[str], + ) -> Path: + project = tmp_path / "project" + project.mkdir() + (project / "apm.yml").write_text( + yaml.dump({"name": "test-pkg", "version": "1.0.0"}), encoding="utf-8" + ) + for fpath in remote_files + local_files: + full = project / fpath + full.parent.mkdir(parents=True, exist_ok=True) + full.write_text(f"content of {fpath}", encoding="utf-8") + + lockfile = LockFile() + lockfile.add_dependency( + LockedDependency( + repo_url="owner/remote-repo", + resolved_commit="abc123", + deployed_files=remote_files, + ) + ) + # Persist packager's own local content via the flat fields. + lockfile.local_deployed_files = list(local_files) + lockfile.local_deployed_file_hashes = {f: "deadbeef" for f in local_files} + lockfile.write(project / "apm.lock.yaml") + return project + + def test_pack_apm_bundle_excludes_local_self_entry(self, tmp_path): + """Bundle lockfile has no self-entry and no flat local-content fields.""" + remote = [".github/agents/remote.md"] + local = [".github/agents/own.md", ".github/instructions/own.md"] + project = self._setup_with_local_content(tmp_path, remote, local) + out = tmp_path / "build" + + result = pack_bundle(project, out, fmt="apm") + + # File payload: no local content shipped (existing source-local guard) + assert ".github/agents/remote.md" in result.files + for f in local: + assert f not in result.files + + # Bundle lockfile: must not carry packager-side local-content metadata + bundle_lock_text = (result.bundle_path / "apm.lock.yaml").read_text( + encoding="utf-8" + ) + bundle_lock = yaml.safe_load(bundle_lock_text) + assert "local_deployed_files" not in bundle_lock + assert "local_deployed_file_hashes" not in bundle_lock + + # Round-trip through LockFile.from_yaml() must NOT synthesize a self-entry + # (since the flat fields are absent, the synthesizer guard short-circuits). + # Strip the pack: section first since LockFile parser ignores it. + loaded = LockFile.from_yaml(bundle_lock_text) + assert loaded.local_deployed_files == [] + assert loaded.local_deployed_file_hashes == {} + for dep in loaded.get_all_dependencies(): + assert dep.source != "local", ( + f"Unexpected local-source dep in bundle lockfile: {dep.repo_url}" + ) + assert dep.repo_url != "" + + def test_pack_plugin_bundle_excludes_local_self_entry(self, tmp_path): + """Plugin format does not write a lockfile, but the file payload must + still exclude packager-side local content. Confirms the plugin path + (plugin_exporter.py is_dev filter at L470-476) keeps local content out. + """ + from apm_cli.bundle.plugin_exporter import export_plugin_bundle + + project = tmp_path / "project" + project.mkdir() + (project / "apm.yml").write_text( + yaml.dump({"name": "test-pkg", "version": "1.0.0"}), encoding="utf-8" + ) + # Packager's own component on disk + (project / ".apm" / "agents").mkdir(parents=True) + (project / ".apm" / "agents" / "own.md").write_text("own", encoding="utf-8") + + # Lockfile carries flat local-content fields (synthesizes a self-entry + # via from_yaml). The plugin exporter must still treat the project's + # own components as the canonical source (collected once in step 6), + # not double-process them via the deps loop. + lockfile = LockFile() + lockfile.local_deployed_files = [".apm/agents/own.md"] + lockfile.local_deployed_file_hashes = {".apm/agents/own.md": "deadbeef"} + lockfile.write(project / "apm.lock.yaml") + + result = export_plugin_bundle( + project_root=project, + output_dir=tmp_path / "plugin-out", + dry_run=True, + ) + + # Own component appears exactly once -- no self-entry leakage from deps loop + own_matches = [f for f in result.files if f.endswith("own.md")] + assert len(own_matches) == 1, ( + f"Self-entry leaked into plugin deps loop; files={result.files}" + ) + + def test_unpack_bundle_with_no_local_content(self, tmp_path): + """Round-trip: pack a project with packager local content, then unpack. + + Without the N1 fix the unpacker would fail verification because the + bundle lockfile would synthesize a self-entry whose deployed_files + (the packager's own .github/.. paths) do not exist under the bundle + source dir. + """ + from apm_cli.bundle.unpacker import unpack_bundle + + remote = [".github/agents/remote.md"] + local = [".github/agents/own.md"] + project = self._setup_with_local_content(tmp_path, remote, local) + out = tmp_path / "build" + + result = pack_bundle(project, out, fmt="apm") + + # Unpack into a fresh dest -- must NOT raise about missing local files. + dest = tmp_path / "unpacked" + dest.mkdir() + unpack_result = unpack_bundle(result.bundle_path, dest) + assert unpack_result is not None + + def test_enrich_lockfile_strips_local_fields(self): + """Direct unit test of enrich_lockfile_for_pack(): the returned YAML + must not contain local_deployed_files / local_deployed_file_hashes, + regardless of whether the input lockfile carries them. + """ + from apm_cli.bundle.lockfile_enrichment import enrich_lockfile_for_pack + + lock = LockFile() + lock.add_dependency( + LockedDependency( + repo_url="owner/r", + resolved_commit="abc", + deployed_files=[".github/agents/r.md"], + ) + ) + lock.local_deployed_files = [".github/agents/own.md"] + lock.local_deployed_file_hashes = {".github/agents/own.md": "h"} + + enriched_yaml = enrich_lockfile_for_pack(lock, fmt="apm", target="copilot") + + # Strings should not even appear as keys + parsed = yaml.safe_load(enriched_yaml) + assert "local_deployed_files" not in parsed + assert "local_deployed_file_hashes" not in parsed + # Pack metadata still present + assert "pack" in parsed + assert parsed["pack"]["format"] == "apm" + # Original lockfile object is not mutated + assert lock.local_deployed_files == [".github/agents/own.md"] + assert lock.local_deployed_file_hashes == {".github/agents/own.md": "h"} diff --git a/tests/unit/test_self_entry_caller_guards.py b/tests/unit/test_self_entry_caller_guards.py new file mode 100644 index 000000000..0c967370f --- /dev/null +++ b/tests/unit/test_self_entry_caller_guards.py @@ -0,0 +1,124 @@ +"""Regression tests ensuring callers do not crash on the virtual self-entry. + +Issue #887 introduced a synthesized "." self-entry in the lockfile representing +the project's own local content. Callers that interpret entries as installable +remote packages (paths via to_dependency_ref(), apm_modules/, etc.) +must skip it. These tests pin the contract for representative call sites. +""" + +from pathlib import Path +from unittest.mock import patch + +from apm_cli.commands.uninstall.engine import _validate_uninstall_packages +from apm_cli.core.command_logger import CommandLogger +from apm_cli.deps.lockfile import LockFile, LockedDependency, _SELF_KEY +from apm_cli.integration.skill_integrator import SkillIntegrator + + +def _lockfile_with_self_and_remote() -> LockFile: + lock = LockFile( + lockfile_version="1", + generated_at="2025-01-01T00:00:00+00:00", + apm_version="0.0.0-test", + ) + lock.add_dependency( + LockedDependency( + repo_url="owner/repo", + resolved_commit="a" * 40, + depth=1, + deployed_files=[".github/skills/remote-skill/SKILL.md"], + deployed_file_hashes={".github/skills/remote-skill/SKILL.md": "deadbeef"}, + ) + ) + lock.local_deployed_files = [".github/skills/local-skill/SKILL.md"] + lock.local_deployed_file_hashes = { + ".github/skills/local-skill/SKILL.md": "1111111111111111", + } + # Round-trip so the synthesized "." entry is materialized. + return LockFile.from_yaml(lock.to_yaml()) + + +# --------------------------------------------------------------------------- +# get_installed_paths() must skip the self-entry +# --------------------------------------------------------------------------- + + +class TestGetInstalledPathsSkipsSelfEntry: + def test_self_entry_not_in_installed_paths(self, tmp_path): + lock = _lockfile_with_self_and_remote() + # Sanity: the self-entry must be present in raw dependencies. + assert _SELF_KEY in lock.dependencies + + paths = lock.get_installed_paths(tmp_path / "apm_modules") + + # The self-entry's local_path is "." which would resolve to the + # apm_modules dir itself; ensure it is filtered out. + assert "." not in paths + assert "owner/repo" in paths + + def test_only_self_entry_returns_empty(self, tmp_path): + lock = LockFile( + lockfile_version="1", + generated_at="2025-01-01T00:00:00+00:00", + ) + lock.local_deployed_files = [".github/skills/local/SKILL.md"] + lock.local_deployed_file_hashes = { + ".github/skills/local/SKILL.md": "abc", + } + lock = LockFile.from_yaml(lock.to_yaml()) + + # Must not crash trying to parse "" as a repo URL. + paths = lock.get_installed_paths(tmp_path / "apm_modules") + assert paths == [] + + +# --------------------------------------------------------------------------- +# apm uninstall must not crash +# --------------------------------------------------------------------------- + + +class TestUninstallRejectsSelfKey: + def test_uninstall_dot_does_not_crash(self): + """Trying to uninstall the literal '.' returns 'not found', no crash.""" + logger = CommandLogger(command="uninstall", verbose=False) + # current_deps are real apm.yml dependency entries; "." is not one. + to_remove, not_found = _validate_uninstall_packages( + packages=["."], + current_deps=["owner/repo"], + logger=logger, + ) + # "." has no "/", so the validator rejects it as invalid format and + # skips it entirely - never reaches the not-found list. + assert to_remove == [] + assert "." not in to_remove + + +# --------------------------------------------------------------------------- +# SkillIntegrator ownership-map iteration must skip the self-entry +# --------------------------------------------------------------------------- + + +class TestSkillIntegratorSkipsSelfEntry: + def test_ownership_map_excludes_self_entry(self, tmp_path): + """The self-entry's local skills must not pollute the package-owner map. + + The self-entry has repo_url='' / virtual_path=None which would + give a bogus short_owner of ''. It must be filtered out. + """ + lock = _lockfile_with_self_and_remote() + # The self-entry has a deployed skill file that would otherwise land + # in owned_by under owner ''. + + with patch( + "apm_cli.deps.lockfile.LockFile.read", return_value=lock + ), patch( + "apm_cli.deps.lockfile.get_lockfile_path", + return_value=tmp_path / "apm.lock", + ): + owned_by, native_owners = SkillIntegrator._build_ownership_maps(tmp_path) + + # Self-entry's owner string would have been '' if not skipped. + assert "" not in owned_by.values() + assert "" not in native_owners.values() + # The remote dep's skill leaf-name is 'SKILL.md' (last path segment). + assert owned_by.get("SKILL.md") == "repo" diff --git a/uv.lock b/uv.lock index b28996d6e..e9eca01d9 100644 --- a/uv.lock +++ b/uv.lock @@ -179,7 +179,7 @@ wheels = [ [[package]] name = "apm-cli" -version = "0.9.1" +version = "0.10.0" source = { editable = "." } dependencies = [ { name = "click" },