Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 48 additions & 5 deletions src/pipeline/pass_definitions.c
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,21 @@ int cbm_pipeline_pass_definitions(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t
int total_imports = 0;
int errors = 0;

/* Sequential pass must extract all defs (which create Module/Function/...
* nodes) BEFORE resolving imports — otherwise a workspace import in the
* first file processed can't find the target Module node, because the
* target file's defs haven't been extracted yet. Result cache is
* required for this two-phase ordering. */
CBMFileResult **local_cache = ctx->result_cache;
bool owns_local_cache = false;
if (!local_cache) {
local_cache = (CBMFileResult **)calloc((size_t)file_count, sizeof(CBMFileResult *));
owns_local_cache = (local_cache != NULL);
}

/* Phase 1: extract every file and create def-derived nodes (Modules,
* Functions, ...) so any file's IMPORTS can resolve against the
* complete in-memory graph in Phase 2. */
for (int i = 0; i < file_count; i++) {
if (cbm_pipeline_check_cancel(ctx)) {
return CBM_NOT_FOUND;
Expand Down Expand Up @@ -371,17 +386,45 @@ int cbm_pipeline_pass_definitions(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t
/* Store calls for pass_calls (we save them in the extraction results
* for now — a future optimization would batch these) */
total_calls += result->calls.count;
total_imports += create_import_edges_for_file(ctx, result, rel);
create_channel_edges_for_file(ctx, result, rel);

/* Cache or free the extraction result */
if (ctx->result_cache) {
ctx->result_cache[i] = result;
if (local_cache) {
local_cache[i] = result;
} else {
/* Cache unavailable: imports for this file can still only
* resolve to defs already in the graph, but the file's
* own defs are now persisted before the lookup. */
total_imports += create_import_edges_for_file(ctx, result, rel);
create_channel_edges_for_file(ctx, result, rel);
cbm_free_result(result);
}
}

/* Phase 2: now that all extraction results are cached and Module
* nodes for every file are in the graph, walk the cache again to
* create IMPORTS / channel edges. Imports resolve against the full
* project graph. */
if (local_cache) {
for (int i = 0; i < file_count; i++) {
if (cbm_pipeline_check_cancel(ctx)) {
break;
}
CBMFileResult *result = local_cache[i];
if (!result) {
continue;
}
total_imports += create_import_edges_for_file(ctx, result, files[i].rel_path);
create_channel_edges_for_file(ctx, result, files[i].rel_path);
}
if (owns_local_cache) {
for (int i = 0; i < file_count; i++) {
if (local_cache[i]) {
cbm_free_result(local_cache[i]);
}
}
free(local_cache);
}
}

cbm_log_info("pass.done", "pass", "definitions", "defs", itoa_log(total_defs), "calls",
itoa_log(total_calls), "imports", itoa_log(total_imports), "errors",
itoa_log(errors));
Expand Down
5 changes: 5 additions & 0 deletions src/pipeline/pass_parallel.c
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,11 @@ static void merge_pkg_entries(cbm_pipeline_ctx_t *ctx, cbm_pkg_entries_t *pkg_en
if (!pkg_entries) {
return;
}
/* Supplement with a repo-wide filesystem walk so manifests filtered
* by the main discoverer (package.json, composer.json — in
* IGNORED_JSON_FILES) still feed pkgmap. Append into worker 0's
* array so the existing merge below sees them. */
cbm_pkgmap_scan_repo(ctx->repo_path, &pkg_entries[0]);
cbm_pipeline_set_pkgmap(cbm_pkgmap_build(pkg_entries, worker_count, ctx->project_name));
for (int i = 0; i < worker_count; i++) {
cbm_pkg_entries_free(&pkg_entries[i]);
Expand Down
145 changes: 136 additions & 9 deletions src/pipeline/pass_pkgmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
*/
#include "pipeline/pipeline.h"
#include "pipeline/pipeline_internal.h"
#include "discover/discover.h"
#include "foundation/compat.h"
#include "foundation/constants.h"
#include "foundation/hash_table.h"
Expand All @@ -22,10 +23,12 @@

#include <yyjson/yyjson.h>

#include <dirent.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>

/* Read an entire file into a malloc'd buffer. Returns NULL on failure. */
static char *pkgmap_read_file(const char *path, int *out_len) {
Expand Down Expand Up @@ -747,6 +750,100 @@ CBMHashTable *cbm_pkgmap_build(cbm_pkg_entries_t *worker_entries, int worker_cou
return map;
}

/* Returns true if basename is a package manifest we know how to parse.
* Used by the filesystem walker; cbm_pkgmap_try_parse is the source of
* truth for which basenames produce entries. */
static bool is_pkgmap_manifest_basename(const char *basename) {
if (!basename) {
return false;
}
if (strcmp(basename, "package.json") == 0 || strcmp(basename, "go.mod") == 0 ||
strcmp(basename, "Cargo.toml") == 0 || strcmp(basename, "pyproject.toml") == 0 ||
strcmp(basename, "composer.json") == 0 || strcmp(basename, "pubspec.yaml") == 0 ||
strcmp(basename, "pom.xml") == 0 || strcmp(basename, "build.gradle") == 0 ||
strcmp(basename, "build.gradle.kts") == 0 || strcmp(basename, "mix.exs") == 0) {
return true;
}
return ends_with(basename, ".gemspec");
}

/* Recursive filesystem walker that finds and parses package manifest
* files independently of the main discovery filter. The main discovery
* filter intentionally hides package.json / composer.json etc. from
* code indexing (they're config, not source), but pass_pkgmap still
* needs to read them to resolve workspace imports. Skips directories
* matched by the shared cbm_should_skip_dir helper so we don't walk
* node_modules, .git, build, etc. Returns the number of manifests
* parsed, accumulated across the whole walk. */
static int pkgmap_walk_dir(const char *abs_dir, const char *rel_dir,
cbm_pkg_entries_t *entries) {
DIR *dir = opendir(abs_dir);
if (!dir) {
return 0;
}
int parsed = 0;
struct dirent *entry;
while ((entry = readdir(dir)) != NULL) {
const char *name = entry->d_name;
if (name[0] == '.' && (name[1] == '\0' || (name[1] == '.' && name[2] == '\0'))) {
continue;
}
char abs_path[PKGMAP_PATH_BUF];
char rel_path[PKGMAP_PATH_BUF];
snprintf(abs_path, sizeof(abs_path), "%s/%s", abs_dir, name);
if (rel_dir && rel_dir[0]) {
snprintf(rel_path, sizeof(rel_path), "%s/%s", rel_dir, name);
} else {
snprintf(rel_path, sizeof(rel_path), "%s", name);
}
struct stat st;
if (lstat(abs_path, &st) != 0) {
continue;
}
if (S_ISLNK(st.st_mode)) {
continue;
}
if (S_ISDIR(st.st_mode)) {
if (cbm_should_skip_dir(name, CBM_MODE_FULL)) {
continue;
}
parsed += pkgmap_walk_dir(abs_path, rel_path, entries);
continue;
}
if (!S_ISREG(st.st_mode)) {
continue;
}
if (!is_pkgmap_manifest_basename(name)) {
continue;
}
int source_len = 0;
char *source = pkgmap_read_file(abs_path, &source_len);
if (!source) {
continue;
}
if (cbm_pkgmap_try_parse(name, rel_path, source, source_len, entries)) {
parsed++;
}
free(source);
}
closedir(dir);
return parsed;
}

/* Scan a repository for package manifest files via the filesystem
* walker above. Always-available companion to the parallel path's
* per-worker manifest parsing, which is bound to whatever `files[]`
* the discoverer produces and therefore misses ignored manifests like
* package.json. NULL-safe; returns 0 entries when repo_path is unset. */
int cbm_pkgmap_scan_repo(const char *repo_path, cbm_pkg_entries_t *entries) {
if (!repo_path || !entries) {
return 0;
}
int parsed = pkgmap_walk_dir(repo_path, "", entries);
cbm_log_info("pkgmap.scan_repo", "manifests", pkgmap_itoa(parsed));
return parsed;
}

/* Build pkgmap for sequential path (reads manifest files directly) */
CBMHashTable *cbm_pkgmap_build_from_files(const cbm_file_info_t *files, int file_count,
const char *project_name) {
Expand All @@ -755,15 +852,7 @@ CBMHashTable *cbm_pkgmap_build_from_files(const cbm_file_info_t *files, int file

for (int i = 0; i < file_count; i++) {
const char *basename = path_basename(files[i].rel_path);
/* Quick check: is this a manifest file? */
bool is_manifest =
(strcmp(basename, "package.json") == 0 || strcmp(basename, "go.mod") == 0 ||
strcmp(basename, "Cargo.toml") == 0 || strcmp(basename, "pyproject.toml") == 0 ||
strcmp(basename, "composer.json") == 0 || strcmp(basename, "pubspec.yaml") == 0 ||
strcmp(basename, "pom.xml") == 0 || strcmp(basename, "build.gradle") == 0 ||
strcmp(basename, "build.gradle.kts") == 0 || strcmp(basename, "mix.exs") == 0 ||
ends_with(basename, ".gemspec"));
if (!is_manifest) {
if (!is_pkgmap_manifest_basename(basename)) {
continue;
}

Expand All @@ -782,6 +871,44 @@ CBMHashTable *cbm_pkgmap_build_from_files(const cbm_file_info_t *files, int file
return map;
}

/* Variant of cbm_pkgmap_build_from_files that ALSO walks the repo
* filesystem to pick up manifests filtered out by the main discoverer
* (the canonical case: package.json, which is in IGNORED_JSON_FILES).
* Falls back to the files[]-only behaviour if repo_path is NULL. */
CBMHashTable *cbm_pkgmap_build_from_repo(const char *repo_path, const cbm_file_info_t *files,
int file_count, const char *project_name) {
cbm_pkg_entries_t entries;
cbm_pkg_entries_init(&entries);

/* Manifests already visible through discovery (Cargo.toml, go.mod,
* pyproject.toml, ...). package.json typically isn't, but we still
* harvest whatever the discovery filter exposed in case downstream
* filters change. */
int from_files = 0;
for (int i = 0; i < file_count; i++) {
const char *basename = path_basename(files[i].rel_path);
if (!is_pkgmap_manifest_basename(basename)) {
continue;
}
from_files++;
int source_len = 0;
char *source = pkgmap_read_file(files[i].path, &source_len);
if (!source) {
continue;
}
cbm_pkgmap_try_parse(basename, files[i].rel_path, source, source_len, &entries);
free(source);
}

int from_walk = cbm_pkgmap_scan_repo(repo_path, &entries);
cbm_log_info("pkgmap.scan", "manifests_from_files", pkgmap_itoa(from_files),
"manifests_from_walk", pkgmap_itoa(from_walk),
"entries", pkgmap_itoa(entries.count));
CBMHashTable *map = cbm_pkgmap_build(&entries, SKIP_ONE, project_name);
cbm_pkg_entries_free(&entries);
return map;
}

static void pkgmap_free_entry(const char *key, void *value, void *userdata) {
(void)userdata;
free((void *)key);
Expand Down
Loading