diff --git a/CHANGELOG.md b/CHANGELOG.md index 8915cf869..e2e349d20 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - `apm update` and `apm install` no longer issue redundant `git clone --depth=1` calls when multiple dependencies share the same `(repo, ref)` tuple. A new four-tier resolver (per-run cache, commits API, bare `rev-parse`, legacy clone) collapses duplicate ref-to-SHA resolutions; on the reporter's 9-dep manifest with 3 unique `(repo, ref)` tuples this cuts the integrate phase from 1583s (Windows + ADO) to seconds. Reused by `install`, `update`, `outdated`, and `publish`. Emergency rollback: `APM_TIERED_RESOLVER=0`. (#1369) +- `apm install` now accepts Bitbucket Data Center / Server personal-repository URLs containing `~` (e.g. `https://example.com/scm/~myuser/my-apm-repo.git`). The path-component whitelist on non-Azure-DevOps hosts now includes `~`, which is an RFC 3986 unreserved character used by Bitbucket DC to denote personal-user projects (Sourcehut `~user` paths are incidentally unblocked too). (#1375) - Fixed `shared/apm.md` matrix fan-out being silently stripped by the GitHub Actions cross-job secret masker when the GitHub App `private-key` was carried in the `apm-prep` matrix output; credentials are now resolved per row via `$GITHUB_ENV` in the `apm` job. (#1373) - `apm compile --watch` now honors `targets: [claude, cursor]` (and every other multi-target / single-target configuration) on every recompile. Previously the watch path bypassed the resolver the one-shot path uses and let `CompilationConfig.from_apm_yml` fall back to the all-families default, silently regenerating `GEMINI.md` after every file edit. The watch path now resolves the effective target via the same helper the one-shot path uses and forwards it as `target=` into both the initial compile and every debounced recompile. (#1345) - Fixed hook commands using relative paths that break for Claude target (#1310) diff --git a/docs/src/content/docs/consumer/private-and-org-packages.md b/docs/src/content/docs/consumer/private-and-org-packages.md index 48db8de63..045af37a6 100644 --- a/docs/src/content/docs/consumer/private-and-org-packages.md +++ b/docs/src/content/docs/consumer/private-and-org-packages.md @@ -146,6 +146,24 @@ dependencies: APM falls back across protocols on the same port: `ssh://host:7999` will retry as `https://host:7999/...` if SSH is unreachable. +## Bitbucket Data Center personal repos + +Bitbucket Data Center / Server exposes personal repositories under +`/scm/~username/`. The `~` is part of the path segment and is preserved +as-is in `apm.yml`: + +```yaml +dependencies: + apm: + - git: https://bitbucket.example.com/scm/~jdoe/ml-utils.git + ref: v1.0.0 + - git: ssh://git@bitbucket.example.com:7999/~jdoe/ml-utils.git + ref: v1.0.0 +``` + +Token: `BITBUCKET_APM_PAT` (HTTPS) or your SSH key (SSH form). Sourcehut +(`~user` path convention) works the same way. + ## Pre-fetched bundles (offline / air-gapped) Install a packed bundle from disk: diff --git a/docs/src/content/docs/reference/manifest-schema.md b/docs/src/content/docs/reference/manifest-schema.md index 6c1507783..53af8ee3f 100644 --- a/docs/src/content/docs/reference/manifest-schema.md +++ b/docs/src/content/docs/reference/manifest-schema.md @@ -282,7 +282,7 @@ local_path_form = ("./" / "../" / "/" / "~/" / ".\\" / "..\\" / "~\\") path |---|---|---|---| | `host` | OPTIONAL | FQDN (e.g. `gitlab.com`) | Git host. Defaults to `github.com`. | | `port` | OPTIONAL | `1`-`65535` | Non-default port on `ssh://`, `https://`, `http://` clone URLs. Not expressible in SCP shorthand. | -| `owner/repo` | REQUIRED | 2+ path segments of `[a-zA-Z0-9._-]+` | Repository path. GitHub uses exactly 2 segments. Non-GitHub hosts MAY use nested groups (e.g. `gitlab.com/group/sub/repo`). | +| `owner/repo` | REQUIRED | 2+ path segments of `[a-zA-Z0-9._~-]+` on non-Azure-DevOps hosts; `[a-zA-Z0-9._\- ]+` (allows spaces, not tilde) on Azure DevOps | Repository path. GitHub uses exactly 2 segments. Non-GitHub hosts MAY use nested groups (e.g. `gitlab.com/group/sub/repo`). Tilde supports Bitbucket Data Center personal-repo segments (`/scm/~user/repo.git`) and Sourcehut `~user` paths. | | `virtual_path` | OPTIONAL | Path segments after repo | Subdirectory or file within the repo. See Section 4.1.3. | | `ref` | OPTIONAL | Branch, tag, or commit SHA | Git reference. Commit SHAs matched by `^[a-f0-9]{7,40}$`. Semver tags matched by `^v?\d+\.\d+\.\d+`. | diff --git a/packages/apm-guide/.apm/skills/apm-usage/dependencies.md b/packages/apm-guide/.apm/skills/apm-usage/dependencies.md index d6903997e..92b33c816 100644 --- a/packages/apm-guide/.apm/skills/apm-usage/dependencies.md +++ b/packages/apm-guide/.apm/skills/apm-usage/dependencies.md @@ -23,6 +23,10 @@ dependencies: - ssh://git@bitbucket.example.com:7999/project/repo.git - https://git.internal:8443/team/repo.git + # Bitbucket Data Center personal repos (~user) and Sourcehut + - https://bitbucket.example.com/scm/~jdoe/ml-utils.git + - https://git.sr.ht/~jdoe/dotfiles + # FQDN shorthand (non-GitHub hosts keep the domain) - gitlab.com/acme/coding-standards - gitlab.com/group/subgroup/repo diff --git a/src/apm_cli/models/dependency/reference.py b/src/apm_cli/models/dependency/reference.py index 52c65a27a..0bc238d08 100644 --- a/src/apm_cli/models/dependency/reference.py +++ b/src/apm_cli/models/dependency/reference.py @@ -32,6 +32,25 @@ # spelled the URL. _DEFAULT_SCHEME_PORTS: dict[str, int] = {"https": 443, "http": 80, "ssh": 22} +# Allowed character set for a single repository path segment. +# +# ADO accepts spaces (project / repo names can contain them) but NOT tilde -- +# tilde has no meaning on Azure DevOps URLs and keeping it out preserves the +# asymmetry that protects the ADO surface from inadvertent regressions. +# +# Non-ADO hosts accept tilde because Bitbucket Data Center / Server (and +# Sourcehut) use ``~username`` path segments for personal repositories +# (e.g. ``/scm/~jdoe/repo.git``). ``~`` is RFC 3986 unreserved, has no +# POSIX path-traversal meaning, and all subprocess calls in APM use +# list-form ``argv`` so there is no shell-expansion vector. +_ADO_PATH_SEGMENT_RE = r"^[a-zA-Z0-9._\- ]+$" +_NON_ADO_PATH_SEGMENT_RE = r"^[a-zA-Z0-9._~-]+$" + + +def _path_segment_pattern(is_ado_host: bool) -> str: + """Return the allowed-character regex for a single repo path segment.""" + return _ADO_PATH_SEGMENT_RE if is_ado_host else _NON_ADO_PATH_SEGMENT_RE + @dataclass class DependencyReference: @@ -1089,7 +1108,7 @@ def _resolve_shorthand_to_parsed_url(cls, repo_url, host): elif len(uparts) < 2: raise ValueError(f"Invalid repository format: {repo_url}. Expected 'user/repo'") - allowed_pattern = r"^[a-zA-Z0-9._\- ]+$" if is_ado_host else r"^[a-zA-Z0-9._-]+$" + allowed_pattern = _path_segment_pattern(is_ado_host) validate_path_segments("/".join(uparts), context="repository path") for part in uparts: if not re.match(allowed_pattern, part.rstrip(".git")): @@ -1202,7 +1221,7 @@ def _validate_url_repo_path(cls, parsed_url) -> tuple[str, str | None]: f"Use the dict format with 'path:' for virtual packages in HTTPS URLs" ) - allowed_pattern = r"^[a-zA-Z0-9._\- ]+$" if is_ado_host else r"^[a-zA-Z0-9._-]+$" + allowed_pattern = _path_segment_pattern(is_ado_host) validate_path_segments( "/".join(path_parts), context="repository URL path", @@ -1309,7 +1328,7 @@ def _validate_final_repo_fields(cls, host, repo_url): segments = repo_url.split("/") if len(segments) < 2: raise ValueError(f"Invalid repository format: {repo_url}. Expected 'user/repo'") - if not all(re.match(r"^[a-zA-Z0-9._-]+$", s) for s in segments): + if not all(re.match(_NON_ADO_PATH_SEGMENT_RE, s) for s in segments): raise ValueError(f"Invalid repository format: {repo_url}. Contains invalid characters") validate_path_segments(repo_url, context="repository path") for seg in segments: diff --git a/tests/unit/test_generic_git_urls.py b/tests/unit/test_generic_git_urls.py index f74f9abf9..a26512bc4 100644 --- a/tests/unit/test_generic_git_urls.py +++ b/tests/unit/test_generic_git_urls.py @@ -463,6 +463,43 @@ def test_invalid_characters_rejected(self): with pytest.raises(ValueError, match="Invalid repository path component"): DependencyReference.parse("https://gitlab.com/user/repo$bad") + def test_bitbucket_personal_repo_tilde_url(self): + """Bitbucket Data Center personal repos use ``~username`` path segments.""" + dep = DependencyReference.parse("https://example.com/scm/~myuser/my-apm-repo.git") + assert dep.host == "example.com" + assert dep.repo_url == "scm/~myuser/my-apm-repo" + assert dep.is_virtual is False + + def test_bitbucket_personal_repo_tilde_shorthand(self): + """Tilde-prefixed user segment is also valid in FQDN shorthand form.""" + dep = DependencyReference.parse("example.com/scm/~myuser/my-apm-repo") + assert dep.host == "example.com" + assert dep.repo_url == "scm/~myuser/my-apm-repo" + + def test_ado_rejects_tilde_in_repo_path(self): + """ADO URLs MUST reject ``~`` in path segments. + + Regression trap for the secure_by_default asymmetry between the ADO + and non-ADO path-component whitelists. Tilde has no meaning on + Azure DevOps URLs; keeping it out preserves the strict ADO surface + even though Bitbucket DC accepts it. + """ + with pytest.raises(ValueError, match="Invalid repository path component"): + DependencyReference.parse("https://dev.azure.com/myorg/myproj/_git/~bad") + + def test_bitbucket_personal_repo_tilde_scp_form(self): + """SCP shorthand (``git@host:path``) carries Bitbucket DC personal repos too.""" + dep = DependencyReference.parse("git@bitbucket.example.com:~jdoe/ml-utils.git") + assert dep.host == "bitbucket.example.com" + assert dep.repo_url == "~jdoe/ml-utils" + + def test_bitbucket_personal_repo_tilde_ssh_url(self): + """``ssh://`` URL form with custom port carries Bitbucket DC personal repos.""" + dep = DependencyReference.parse("ssh://git@bitbucket.example.com:7999/~jdoe/ml-utils.git") + assert dep.host == "bitbucket.example.com" + assert dep.port == 7999 + assert dep.repo_url == "~jdoe/ml-utils" + class TestFQDNVirtualPaths: """Test FQDN shorthand with virtual paths on generic hosts.