diff --git a/Cargo.lock b/Cargo.lock index 30ad3866a4..68ded5c385 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5413,6 +5413,8 @@ dependencies = [ "perry-diagnostics", "perry-parser", "perry-types", + "serde", + "serde_json", "swc_common", "swc_ecma_ast", "thiserror 1.0.69", diff --git a/crates/perry-hir/Cargo.toml b/crates/perry-hir/Cargo.toml index 46392b8984..ea00f0c50c 100644 --- a/crates/perry-hir/Cargo.toml +++ b/crates/perry-hir/Cargo.toml @@ -14,6 +14,8 @@ swc_common.workspace = true thiserror.workspace = true anyhow.workspace = true +serde = { workspace = true } +serde_json = { workspace = true } [dev-dependencies] perry-parser.workspace = true diff --git a/crates/perry-hir/src/audit.rs b/crates/perry-hir/src/audit.rs new file mode 100644 index 0000000000..c25ddb06a6 --- /dev/null +++ b/crates/perry-hir/src/audit.rs @@ -0,0 +1,410 @@ +//! #495 — behavioral SBOM emitted at compile time. +//! +//! Walks the HIR for each source module and collects a per-module +//! manifest of stdlib symbols actually called. The manifest is the +//! foundation for the rest of the supply-chain hardening series: +//! +//! - `#501` consumes it to enforce host-controlled per-package +//! capabilities (e.g. "this dep must not call `child_process.*`"). +//! - `#496` (`--lockdown`) flags violations from the same data. +//! - Reviewers can diff a `package.json` change's effect on the +//! binary's behavioral surface without re-running the build. +//! +//! Scope of this first cut (MVP): stdlib symbol calls only. Literal +//! hosts/URLs (#502) and native-library symbol references (FFI +//! registry) are tracked separately and will graft onto the same +//! manifest in follow-up PRs. The JSON shape is versioned so future +//! additions don't break consumers. + +use crate::ir::{Expr, Module, Stmt}; +use crate::walker::walk_expr_children; +use std::collections::BTreeMap; + +/// Per-module audit record. Keys are sorted (BTreeMap) so the +/// serialized JSON is byte-deterministic across builds — critical +/// for the `perry audit --diff` workflow. +#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize, PartialEq, Eq)] +pub struct ModuleAudit { + /// Source path the module was lowered from. Absolute / canonical + /// when known; matches `LoweringContext::source_file_path`. + pub source: String, + /// npm package name when `source` resolves through + /// `node_modules//...`. `None` for host source. + pub package: Option, + /// stdlib namespace → sorted unique method names called by this + /// module. Method names match the `NativeMethodCall::method` + /// field — i.e. the symbol as it appears in user source after + /// alias resolution. + pub stdlib: BTreeMap>, +} + +/// Top-level audit manifest. Version is bumped if the JSON shape +/// changes incompatibly (`stdlib` is open for extension within v1). +#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize, PartialEq, Eq)] +pub struct AuditManifest { + pub version: u32, + pub modules: Vec, +} + +impl AuditManifest { + pub fn new() -> Self { + Self { + version: 1, + modules: Vec::new(), + } + } +} + +/// Walk a single HIR `Module` and return its `ModuleAudit`. The walk +/// visits `init` (top-level statements), every function body, and +/// every method on every class. `NativeMethodCall::module` / +/// `::method` pairs are folded into the `stdlib` map. +pub fn audit_module(hir_module: &Module, source: &str) -> ModuleAudit { + let mut record = ModuleAudit { + source: source.to_string(), + package: package_name_for_source_path(source).map(|s| s.to_string()), + stdlib: BTreeMap::new(), + }; + + // The HIR may carry a `Stmt::Expr(expr)` shape where the expr + // itself contains nested calls; the walker recurses through + // all Expr children, so we only need to visit each top-level + // Expr once. + for stmt in &hir_module.init { + visit_stmt(stmt, &mut record); + } + for func in &hir_module.functions { + for stmt in &func.body { + visit_stmt(stmt, &mut record); + } + } + for class in &hir_module.classes { + for method in &class.methods { + for stmt in &method.body { + visit_stmt(stmt, &mut record); + } + } + } + + // Deduplicate + sort within each namespace bucket so the + // serialized JSON is stable across builds. + for methods in record.stdlib.values_mut() { + methods.sort(); + methods.dedup(); + } + + record +} + +fn visit_stmt(stmt: &Stmt, out: &mut ModuleAudit) { + match stmt { + Stmt::Expr(e) => visit_expr(e, out), + Stmt::Let { init, .. } => { + if let Some(v) = init { + visit_expr(v, out); + } + } + Stmt::Return(Some(e)) => visit_expr(e, out), + Stmt::Return(None) | Stmt::Break | Stmt::Continue => {} + Stmt::LabeledBreak(_) | Stmt::LabeledContinue(_) => {} + Stmt::Labeled { body, .. } => visit_stmt(body, out), + Stmt::If { + condition, + then_branch, + else_branch, + } => { + visit_expr(condition, out); + for s in then_branch { + visit_stmt(s, out); + } + if let Some(else_b) = else_branch { + for s in else_b { + visit_stmt(s, out); + } + } + } + Stmt::While { condition, body } | Stmt::DoWhile { body, condition } => { + visit_expr(condition, out); + for s in body { + visit_stmt(s, out); + } + } + Stmt::For { + init, + condition, + update, + body, + } => { + if let Some(init) = init { + visit_stmt(init, out); + } + if let Some(c) = condition { + visit_expr(c, out); + } + if let Some(u) = update { + visit_expr(u, out); + } + for s in body { + visit_stmt(s, out); + } + } + Stmt::Throw(e) => visit_expr(e, out), + Stmt::Try { + body, + catch, + finally, + } => { + for s in body { + visit_stmt(s, out); + } + if let Some(catch_clause) = catch { + for s in &catch_clause.body { + visit_stmt(s, out); + } + } + if let Some(finally_b) = finally { + for s in finally_b { + visit_stmt(s, out); + } + } + } + Stmt::Switch { + discriminant, + cases, + } => { + visit_expr(discriminant, out); + for case in cases { + if let Some(test) = &case.test { + visit_expr(test, out); + } + for s in &case.body { + visit_stmt(s, out); + } + } + } + // PreallocateBoxes carries only LocalIds, no Expr / Stmt children. + Stmt::PreallocateBoxes(_) => {} + } +} + +fn visit_expr(expr: &Expr, out: &mut ModuleAudit) { + // General-shape native calls (`mysql2.createConnection`, + // `child_process.execSync`, `crypto.randomUUID`, …) carry the + // namespace and method by name on the variant. + if let Expr::NativeMethodCall { module, method, .. } = expr { + record_call(out, module, method); + } + // The HIR also has dedicated variants for hot stdlib symbols + // (folded at lowering time for codegen specialization). The + // audit needs to see those too — otherwise a host that only + // calls `fs.readFileSync` would appear to make zero stdlib + // calls, defeating the SBOM. Mapping is mechanical: + // `Expr::Fs` → ("fs", ""). Keep this exhaustive + // for the namespaces that matter to supply-chain review (`fs`, + // `path`, `process`); extend opportunistically for others. + if let Some((module, method)) = specialized_stdlib_call(expr) { + record_call(out, module, method); + } + walk_expr_children(expr, &mut |child| visit_expr(child, out)); +} + +/// Map specialized `Expr::Fs*` / `Expr::Path*` / `Expr::Process*` / +/// `Expr::Tty*` variants to the `(namespace, method)` pair that the +/// equivalent un-specialized call would have produced. Returning +/// `None` is the catch-all — the walker still descends into children +/// for those variants so nested calls aren't missed. +fn specialized_stdlib_call(expr: &Expr) -> Option<(&'static str, &'static str)> { + Some(match expr { + // fs — paths involving the filesystem are the highest-signal + // capability check for supply-chain review. + Expr::FsReadFileSync(_) => ("fs", "readFileSync"), + Expr::FsWriteFileSync(_, _) => ("fs", "writeFileSync"), + Expr::FsExistsSync(_) => ("fs", "existsSync"), + Expr::FsMkdirSync(_) => ("fs", "mkdirSync"), + Expr::FsUnlinkSync(_) => ("fs", "unlinkSync"), + Expr::FsAppendFileSync(_, _) => ("fs", "appendFileSync"), + Expr::FsReadFileBinary(_) => ("fs", "readFile"), + Expr::FsRmRecursive(_) => ("fs", "rm"), + // path — pure-string transforms, lower security-signal but + // included so `perry audit --sbom` shows the full surface. + Expr::PathJoin(_, _) | Expr::PathResolveJoin(_, _) | Expr::PathWin32Join(_, _) => { + ("path", "join") + } + Expr::PathDirname(_) => ("path", "dirname"), + Expr::PathBasename(_) | Expr::PathBasenameExt(_, _) => ("path", "basename"), + Expr::PathExtname(_) => ("path", "extname"), + Expr::PathResolve(_) => ("path", "resolve"), + Expr::PathIsAbsolute(_) => ("path", "isAbsolute"), + Expr::PathRelative(_, _) => ("path", "relative"), + Expr::PathNormalize(_) => ("path", "normalize"), + Expr::PathParse(_) => ("path", "parse"), + Expr::PathFormat(_) => ("path", "format"), + Expr::PathSep => ("path", "sep"), + Expr::PathDelimiter => ("path", "delimiter"), + Expr::PathToNamespacedPath(_) => ("path", "toNamespacedPath"), + Expr::PathMatchesGlob(_, _) => ("path", "matchesGlob"), + // process — `process.env` etc. are accessed via dedicated + // HIR variants. The SBOM should reflect that the binary + // touches them. + Expr::ProcessEnv => ("process", "env"), + Expr::ProcessUptime => ("process", "uptime"), + Expr::ProcessCwd => ("process", "cwd"), + Expr::ProcessArgv => ("process", "argv"), + Expr::ProcessStdinIsTTY => ("process", "stdin.isTTY"), + Expr::ProcessStdoutIsTTY => ("process", "stdout.isTTY"), + Expr::ProcessStderrIsTTY => ("process", "stderr.isTTY"), + Expr::ProcessStdoutColumns => ("process", "stdout.columns"), + Expr::ProcessStdoutRows => ("process", "stdout.rows"), + // tty — TTY tests for terminal detection. + Expr::TtyIsAtty(_) => ("tty", "isatty"), + // url — file-URL conversion. + Expr::FileURLToPath(_) => ("url", "fileURLToPath"), + _ => return None, + }) +} + +fn record_call(out: &mut ModuleAudit, module: &str, method: &str) { + out.stdlib + .entry(module.to_string()) + .or_default() + .push(method.to_string()); +} + +/// Extract the owning npm package name from a source-file path by +/// locating the rightmost `node_modules/` segment. Scope-aware. +/// Mirrors the logic shared with the supply-chain gates — duplicated +/// here so this module doesn't pull in a perry-driver dep. +fn package_name_for_source_path(source_path: &str) -> Option<&str> { + let idx = source_path.rfind("node_modules/")?; + let after = &source_path[idx + "node_modules/".len()..]; + if let Some(stripped) = after.strip_prefix('@') { + let mut parts = stripped.splitn(3, '/'); + let scope = parts.next().unwrap_or(""); + let pkg = parts.next().unwrap_or(""); + if scope.is_empty() || pkg.is_empty() { + return None; + } + let end = idx + "node_modules/".len() + 1 + scope.len() + 1 + pkg.len(); + Some(&source_path[idx + "node_modules/".len()..end]) + } else { + let pkg = after.split('/').next()?; + if pkg.is_empty() { + None + } else { + Some(pkg) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ir::{Expr, Module, Stmt}; + + fn empty_module(name: &str) -> Module { + Module::new(name) + } + + fn native_call(module: &str, method: &str) -> Expr { + Expr::NativeMethodCall { + module: module.to_string(), + class_name: None, + object: None, + method: method.to_string(), + args: vec![], + } + } + + #[test] + fn empty_module_has_no_records() { + let m = empty_module("test"); + let rec = audit_module(&m, "/repo/src/test.ts"); + assert!(rec.stdlib.is_empty()); + assert_eq!(rec.source, "/repo/src/test.ts"); + assert!(rec.package.is_none()); + } + + #[test] + fn top_level_native_call_recorded() { + let mut m = empty_module("test"); + m.init.push(Stmt::Expr(native_call("fs", "readFileSync"))); + let rec = audit_module(&m, "/repo/src/test.ts"); + assert_eq!(rec.stdlib.get("fs"), Some(&vec!["readFileSync".into()])); + } + + #[test] + fn duplicate_calls_dedupe() { + let mut m = empty_module("test"); + m.init.push(Stmt::Expr(native_call("fs", "readFileSync"))); + m.init.push(Stmt::Expr(native_call("fs", "readFileSync"))); + m.init.push(Stmt::Expr(native_call("fs", "writeFileSync"))); + let rec = audit_module(&m, "/repo/src/test.ts"); + // Sorted + deduped: ["readFileSync", "writeFileSync"]. + assert_eq!( + rec.stdlib.get("fs"), + Some(&vec!["readFileSync".into(), "writeFileSync".into()]) + ); + } + + #[test] + fn package_name_extracted_from_node_modules_path() { + let m = empty_module("test"); + let rec = audit_module(&m, "/repo/node_modules/lodash/lib/x.ts"); + assert_eq!(rec.package.as_deref(), Some("lodash")); + } + + #[test] + fn scoped_package_name_extracted() { + let m = empty_module("test"); + let rec = audit_module(&m, "/repo/node_modules/@scope/pkg/src/x.ts"); + assert_eq!(rec.package.as_deref(), Some("@scope/pkg")); + } + + #[test] + fn nested_node_modules_returns_innermost() { + let m = empty_module("test"); + let rec = audit_module(&m, "/repo/node_modules/outer/node_modules/inner/lib/x.ts"); + assert_eq!(rec.package.as_deref(), Some("inner")); + } + + #[test] + fn user_source_has_no_package() { + let m = empty_module("test"); + let rec = audit_module(&m, "/repo/src/main.ts"); + assert!(rec.package.is_none()); + } + + #[test] + fn nested_call_recorded() { + // The walker recurses through Expr children — a NativeMethodCall + // buried under e.g. a Stmt::If condition still surfaces. + let mut m = empty_module("test"); + m.init.push(Stmt::If { + condition: native_call("process", "uptime"), + then_branch: vec![Stmt::Expr(native_call("fs", "readFileSync"))], + else_branch: None, + }); + let rec = audit_module(&m, "/repo/src/test.ts"); + assert_eq!(rec.stdlib.get("process"), Some(&vec!["uptime".into()])); + assert_eq!(rec.stdlib.get("fs"), Some(&vec!["readFileSync".into()])); + } + + #[test] + fn serializes_to_stable_json() { + let mut m = empty_module("test"); + m.init.push(Stmt::Expr(native_call("fs", "writeFileSync"))); + m.init.push(Stmt::Expr(native_call("fs", "readFileSync"))); + let rec = audit_module(&m, "/repo/src/test.ts"); + let manifest = AuditManifest { + version: 1, + modules: vec![rec], + }; + let json = serde_json::to_string(&manifest).unwrap(); + // BTreeMap + sort_unstable on the method vec means the + // output ordering is independent of insertion order. + assert!( + json.contains("\"fs\":[\"readFileSync\",\"writeFileSync\"]"), + "unexpected: {json}" + ); + } +} diff --git a/crates/perry-hir/src/lib.rs b/crates/perry-hir/src/lib.rs index afa9b86004..b84e181459 100644 --- a/crates/perry-hir/src/lib.rs +++ b/crates/perry-hir/src/lib.rs @@ -5,6 +5,7 @@ pub mod analysis; pub mod capability; +pub mod audit; pub(crate) mod destructuring; pub mod dynamic_import; pub mod egress; @@ -24,6 +25,7 @@ pub mod walker; pub use analysis::{collect_local_refs_expr, collect_local_refs_stmt}; pub use capability::{audit_module_capabilities, CapabilityPolicy, CapabilityViolation}; +pub use audit::{audit_module, AuditManifest, ModuleAudit}; pub use dynamic_import::{ collect_module_const_locals, detect_top_level_await, flatten_exports, for_each_dynamic_import_mut, resolve_import_path, resolve_import_path_with_consts, FlatExport, diff --git a/crates/perry/src/commands/audit.rs b/crates/perry/src/commands/audit.rs index c992b26de0..cd563631b2 100644 --- a/crates/perry/src/commands/audit.rs +++ b/crates/perry/src/commands/audit.rs @@ -41,6 +41,15 @@ pub struct AuditArgs { /// Verify service URL #[arg(long, default_value = "https://verify.perryts.com")] pub verify_url: String, + + /// #495: print the local behavioral SBOM produced at the last + /// compile (`.perry-cache/audit.json` under the current project + /// root). Per-module list of stdlib symbols actually called, + /// keyed by source file with the owning npm package name when + /// the source lives under `node_modules//...`. When this + /// flag is set, the remote security scan is *not* invoked. + #[arg(long)] + pub sbom: bool, } // --- Response types matching perry-verify/src/audit/types.ts --- @@ -340,7 +349,99 @@ fn display_audit_results(audit: &AuditResponse, fail_on: &str) { } /// Entry point for `perry audit` command +/// #495: print the local behavioral SBOM emitted by the last +/// `perry compile`/`perry run` into `/.perry-cache/audit.json`. +/// In Text mode, formats a per-module breakdown grouped by owning npm +/// package; in JSON mode, dumps the raw manifest. Returns a clear +/// error if the manifest doesn't exist yet (build first). +fn print_local_sbom(path_arg: &str, format: OutputFormat) -> Result<()> { + let root = std::path::PathBuf::from(path_arg); + let root = root.canonicalize().unwrap_or(root); + // Walk up to find a directory containing `.perry-cache/audit.json` + // — same shape `perry compile` walks up to find `package.json`, + // so `perry audit --sbom` works from anywhere in the project tree. + let manifest_path = { + let mut dir = root.clone(); + loop { + let candidate = dir.join(".perry-cache").join("audit.json"); + if candidate.exists() { + break Some(candidate); + } + if !dir.pop() { + break None; + } + } + }; + let Some(manifest_path) = manifest_path else { + bail!( + "no .perry-cache/audit.json found under `{}` — run `perry compile` or `perry run` first; the manifest is written on every successful build.", + root.display() + ); + }; + let raw = fs::read_to_string(&manifest_path) + .with_context(|| format!("Failed to read {}", manifest_path.display()))?; + let manifest: perry_hir::AuditManifest = serde_json::from_str(&raw) + .with_context(|| format!("Failed to parse {}", manifest_path.display()))?; + + match format { + OutputFormat::Json => { + // Pretty-print so `perry audit --sbom --format json | jq` is + // pleasant; downstream tools that need compact output can + // re-serialize. + println!("{}", serde_json::to_string_pretty(&manifest)?); + } + OutputFormat::Text => { + print_sbom_text(&manifest); + } + } + Ok(()) +} + +/// Human-readable SBOM summary. Groups modules by owning package +/// (host source is reported under ``), then lists each module's +/// stdlib calls as `: method1, method2, ...`. +fn print_sbom_text(manifest: &perry_hir::AuditManifest) { + use std::collections::BTreeMap; + + println!("Behavioral SBOM (perry audit --sbom)"); + println!(" manifest version: {}", manifest.version); + println!(" modules: {}", manifest.modules.len()); + println!(); + + // Group by package; None → host source bucket. + let mut by_pkg: BTreeMap, Vec<&perry_hir::ModuleAudit>> = BTreeMap::new(); + for module in &manifest.modules { + by_pkg + .entry(module.package.clone()) + .or_default() + .push(module); + } + for (pkg, modules) in &by_pkg { + match pkg { + None => println!("== =="), + Some(name) => println!("== {} ==", name), + } + for module in modules { + if module.stdlib.is_empty() { + println!(" {} (no stdlib calls)", module.source); + continue; + } + println!(" {}", module.source); + for (ns, methods) in &module.stdlib { + println!(" {}: {}", ns, methods.join(", ")); + } + } + println!(); + } +} + pub fn run(args: AuditArgs, format: OutputFormat, _use_color: bool) -> Result<()> { + // #495: `--sbom` switches to the local behavioral-SBOM viewer. + // No tokio runtime needed — we just read a JSON file and print + // it. Short-circuits before the remote-scan path below. + if args.sbom { + return print_local_sbom(&args.path, format); + } let rt = tokio::runtime::Runtime::new()?; rt.block_on(async { let path = std::path::PathBuf::from(&args.path); diff --git a/crates/perry/src/commands/compile.rs b/crates/perry/src/commands/compile.rs index 8df668d118..a601070efc 100644 --- a/crates/perry/src/commands/compile.rs +++ b/crates/perry/src/commands/compile.rs @@ -130,6 +130,29 @@ fn package_name_for_path(source_path: &str) -> Option { } } +/// #495: serialize the per-module behavioral SBOM to +/// `.perry-cache/audit.json` under the current project root. Walks +/// every collected native HIR module (skips JS-runtime modules — they +/// don't have HIR), groups records by stable canonical-path order +/// so the JSON is byte-deterministic across builds. +fn write_audit_manifest(ctx: &CompilationContext) -> std::io::Result<()> { + let mut manifest = perry_hir::AuditManifest::new(); + // BTreeMap iteration is sorted by key; native_modules is keyed by + // PathBuf so the resulting Vec is in stable filesystem order. + for (path, hir_module) in &ctx.native_modules { + let source = path.to_string_lossy().into_owned(); + let record = perry_hir::audit_module(hir_module, &source); + manifest.modules.push(record); + } + let dir = ctx.project_root.join(".perry-cache"); + fs::create_dir_all(&dir)?; + let path = dir.join("audit.json"); + let json = serde_json::to_string_pretty(&manifest) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?; + fs::write(&path, json)?; + Ok(()) +} + fn package_bundle_id_from_input(input: &Path) -> Option { let mut dir = input.canonicalize().ok()?; if dir.is_file() { @@ -2022,6 +2045,23 @@ pub fn run_with_parse_cache( } } + // #495: emit a behavioral SBOM at `.perry-cache/audit.json`. The + // manifest captures, per source module, the stdlib symbols + // actually called from the lowered HIR. Foundation for the + // host-controlled per-package capability enforcement issue (#501) + // and for `perry audit --diff` change review. Best-effort write + // — a missing directory or filesystem error is logged but + // doesn't fail the build, since the SBOM is observational + // metadata, not a correctness gate. + if let Err(e) = write_audit_manifest(&ctx) { + match format { + OutputFormat::Text => { + eprintln!("warning: failed to write .perry-cache/audit.json: {}", e); + } + OutputFormat::Json => {} + } + } + if args.enable_geisterhand || args.geisterhand_port.is_some() { ctx.needs_geisterhand = true; if let Some(port) = args.geisterhand_port { diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md index f2e218abc6..7e339bf578 100644 --- a/docs/src/SUMMARY.md +++ b/docs/src/SUMMARY.md @@ -145,6 +145,7 @@ - [`--lockdown`](cli/lockdown.md) - [Egress Allowlist (`allowedHosts`)](cli/allowed-hosts.md) - [Per-Package Capabilities (`perry.permissions`)](cli/capabilities.md) +- [`perry audit --sbom`](cli/perry-audit-sbom.md) - [perry.toml Reference](cli/perry-toml.md) --- diff --git a/docs/src/cli/perry-audit-sbom.md b/docs/src/cli/perry-audit-sbom.md new file mode 100644 index 0000000000..efc91bab31 --- /dev/null +++ b/docs/src/cli/perry-audit-sbom.md @@ -0,0 +1,103 @@ +# Behavioral SBOM (`perry audit --sbom`) + +Every Perry compile writes a behavioral SBOM to +`/.perry-cache/audit.json` — a per-module manifest of the +stdlib symbols the build actually calls. The manifest is the +foundation for the rest of the supply-chain hardening series and gives +reviewers a way to see exactly what surface a dependency touches +without rebuilding the binary. + +**Zero runtime cost.** The walk runs at compile time over the lowered +HIR; the file is written observationally and a missing-directory +error never fails the build. + +## What's recorded + +For each source module: + +- **`source`** — canonical path the module was lowered from. +- **`package`** — owning npm package name when the source lives + under `node_modules//...` (scope-aware: `@scope/pkg`). + `null` for host source. +- **`stdlib`** — map of `` → sorted unique method names. + Captures both the general-shape `NativeMethodCall` lowering + (`mysql2.createConnection`, `child_process.execSync`, …) and the + dedicated specialized variants Perry uses for hot paths + (`fs.readFileSync`, `path.join`, `process.env`, `tty.isatty`, + `url.fileURLToPath`, …). + +## Example + +A `main.ts` like: + +```typescript,no-test +import * as fs from "fs"; +import * as path from "path"; + +const data = fs.readFileSync("/etc/hostname", "utf8"); +const p = path.join("/tmp", "x"); +console.log(data, p); +``` + +produces: + +```json +{ + "version": 1, + "modules": [ + { + "source": "/repo/main.ts", + "package": null, + "stdlib": { + "fs": ["readFileSync"], + "path": ["join"] + } + } + ] +} +``` + +The JSON output is byte-deterministic across builds (BTreeMap keys + +sorted method lists), so `perry audit --sbom > before.txt` + a +`package.json` change + a re-build + `perry audit --sbom > after.txt` ++ `diff before.txt after.txt` is a meaningful review tool — any new +capability a dependency reaches surfaces as added lines. + +## CLI + +`perry audit --sbom [PATH]` + +- Reads the manifest from `/.perry-cache/audit.json`, walking + up the directory tree if needed (same shape `perry compile` walks + up to find `package.json`). +- Default `PATH`: current directory. +- In `--format json` mode dumps the raw manifest pretty-printed. +- In text mode groups modules by owning npm package; host source is + reported under ``. +- Returns a clear error if the manifest doesn't exist yet — `perry + compile` or `perry run` writes it on every successful build. + +## What's NOT yet recorded + +Scope of this first cut (MVP): + +- **Literal `fetch` / `http.get` URLs** — covered separately by + [`#502`](https://github.com/PerryTS/perry/issues/502) which the + manifest will graft onto under a `literal_hosts` key. +- **Native-library symbol references** (FFI registry) — tracked in + the perry-codegen FFI registry and will graft onto the manifest + under a `native_symbols` key. +- **`perry audit --sbom --diff`** — the bytes-deterministic JSON + shape already enables the diff workflow via plain `diff` / + `git diff`; a built-in `--diff` is a follow-up that picks a + baseline (`.perry-cache/audit.last.json`) and pretty-prints the + change set. + +The manifest shape is versioned (`version: 1`) so consumers can +detect when new top-level keys land. + +## See also + +- [`#495`](https://github.com/PerryTS/perry/issues/495) — design discussion. +- The wider supply-chain hardening series + ([`#495`–`#506`](https://github.com/PerryTS/perry/issues?q=is%3Aissue+label%3Aenhancement+security)).