Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- `apm install` now deploys `.instructions.md` files to `.claude/rules/*.md` for Claude Code, converting `applyTo:` frontmatter to Claude's `paths:` format (#516)

### Changed

- Artifactory virtual file downloads now use the Archive Entry Download API to fetch individual files without downloading the full archive; falls back to full-archive extraction when the entry API is unavailable (#525)

## [0.8.9] - 2026-03-31

### Fixed
Expand Down
183 changes: 183 additions & 0 deletions src/apm_cli/deps/artifactory_entry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
"""Artifactory registry backend -- Archive Entry Download.

JFrog Artifactory supports downloading individual entries from an archived
artifact without fetching the entire archive. The URL pattern appends
``!/{path}`` to the archive URL::

GET {archive_url}!/{root_prefix}/{file_path}

Both GitHub and GitLab archives use a root directory prefix of
``{repo}-{ref}/``, though hosting platforms may normalize the ref
(e.g. ``feature/foo`` becomes ``feature-foo`` in the directory name).
This module tries both the raw and normalized forms.

:class:`ArtifactoryRegistryClient` implements the :class:`RegistryClient`
protocol defined in :mod:`~apm_cli.deps.registry_proxy` so the download
pipeline can fetch files without knowing which registry type is in use.
"""

from __future__ import annotations

import logging
from typing import TYPE_CHECKING, Callable, List, Optional
from urllib.parse import quote

import requests as _requests

if TYPE_CHECKING:
from .registry_proxy import RegistryConfig

logger = logging.getLogger(__name__)


class ArtifactoryRegistryClient:
"""Artifactory backend for the :class:`RegistryClient` protocol.

Constructed via :meth:`RegistryConfig.get_client`; callers interact
with the :class:`RegistryClient` protocol, not this class directly.
"""

def __init__(self, config: "RegistryConfig") -> None:
self._config = config

# -- RegistryClient protocol ---------------------------------------------

def fetch_file(
self,
owner: str,
repo: str,
file_path: str,
ref: str = "main",
resilient_get: Optional[Callable] = None,
) -> Optional[bytes]:
"""Fetch a single file via the Archive Entry Download API.

Tries each candidate archive URL (GitHub heads, GitLab, GitHub
tags) with both raw and normalized root prefixes. Returns raw
file bytes on success, or ``None`` when the entry API is not
supported or the file is not found -- the caller should fall
back to downloading the full archive.
"""
return _fetch_entry(
host=self._config.host,
prefix=self._config.prefix,
owner=owner,
repo=repo,
file_path=file_path,
ref=ref,
scheme=self._config.scheme,
headers=self._config.get_headers(),
resilient_get=resilient_get,
)


# ---------------------------------------------------------------------------
# Standalone helper (for callers without a RegistryConfig)
# ---------------------------------------------------------------------------


def fetch_entry_from_archive(
host: str,
prefix: str,
owner: str,
repo: str,
file_path: str,
ref: str = "main",
scheme: str = "https",
headers: Optional[dict] = None,
resilient_get: Optional[Callable] = None,
) -> Optional[bytes]:
"""Fetch a single file from an Artifactory-proxied archive.

Convenience wrapper around the core entry-download logic for callers
that do not have a :class:`RegistryConfig` instance (e.g. the
marketplace client in #506).

Returns raw file bytes on success, or ``None`` on failure.
"""
return _fetch_entry(
host=host,
prefix=prefix,
owner=owner,
repo=repo,
file_path=file_path,
ref=ref,
scheme=scheme,
headers=headers,
resilient_get=resilient_get,
)


# ---------------------------------------------------------------------------
# Core implementation (shared by class and standalone function)
# ---------------------------------------------------------------------------


def _fetch_entry(
host: str,
prefix: str,
owner: str,
repo: str,
file_path: str,
ref: str,
scheme: str,
headers: Optional[dict],
resilient_get: Optional[Callable],
) -> Optional[bytes]:
"""Core entry-download logic shared by the class and standalone helper."""
from ..utils.github_host import build_artifactory_archive_url
from ..utils.path_security import PathTraversalError, validate_path_segments

# Guard: reject traversal sequences via the centralized path validator
try:
validate_path_segments(
file_path,
context="artifactory archive entry path",
reject_empty=True,
)
except PathTraversalError:
logger.debug("Refusing invalid file_path: %s", file_path)
return None

archive_urls = build_artifactory_archive_url(
host, prefix, owner, repo, ref, scheme=scheme,
)

# Root directory inside the archive is typically "{repo}-{ref}", but
# hosting platforms may normalize refs (e.g. "feature/foo" -> "feature-foo").
root_prefixes: List[str] = [f"{repo}-{ref}"]
normalized_ref = ref.replace("/", "-")
if normalized_ref != ref:
normalized_root = f"{repo}-{normalized_ref}"
if normalized_root not in root_prefixes:
root_prefixes.append(normalized_root)

req_headers = headers or {}

for archive_url in archive_urls:
for root_prefix in root_prefixes:
# URL-encode the entry path (spaces, special chars) but keep '/' as-is
encoded_path = quote(f"{root_prefix}/{file_path}", safe="/")
entry_url = f"{archive_url}!/{encoded_path}"
try:
if resilient_get is not None:
resp = resilient_get(entry_url, headers=req_headers, timeout=30)
else:
resp = _requests.get(
entry_url, headers=req_headers, timeout=30,
)
if resp.status_code == 200:
logger.debug("Archive entry download OK: %s", entry_url)
return resp.content
logger.debug(
"Archive entry download HTTP %d: %s",
resp.status_code,
entry_url,
)
except _requests.RequestException:
logger.debug(
"Archive entry download failed: %s", entry_url, exc_info=True,
)
continue

return None
31 changes: 30 additions & 1 deletion src/apm_cli/deps/github_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,36 @@ def _download_artifactory_archive(self, host: str, prefix: str, owner: str, repo

def _download_file_from_artifactory(self, host: str, prefix: str, owner: str,
repo: str, file_path: str, ref: str, scheme: str = "https") -> bytes:
"""Download a single file from Artifactory by fetching the full archive and extracting it."""
"""Download a single file from Artifactory.

Tries the Archive Entry Download API first (fetches one file
without downloading the full archive). Falls back to the full
archive approach when the entry API is unavailable or returns an
error.
"""
# Fast path: use the RegistryClient interface for entry download
cfg = self.registry_config
if cfg is not None and cfg.host == host:
client = cfg.get_client()
content = client.fetch_file(
owner, repo, file_path, ref,
resilient_get=self._resilient_get,
)
else:
# No RegistryConfig or host mismatch (explicit FQDN mode) --
# fall back to the standalone helper.
from .artifactory_entry import fetch_entry_from_archive

content = fetch_entry_from_archive(
host, prefix, owner, repo, file_path, ref,
scheme=scheme,
headers=self._get_artifactory_headers(),
resilient_get=self._resilient_get,
)
if content is not None:
return content

# Fallback: download full archive and extract the file
import io
import zipfile

Expand Down
39 changes: 38 additions & 1 deletion src/apm_cli/deps/registry_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import os
import warnings
from dataclasses import dataclass
from typing import TYPE_CHECKING, List, Optional
from typing import TYPE_CHECKING, Callable, List, Optional, Protocol, runtime_checkable
from urllib.parse import urlparse

if TYPE_CHECKING:
Expand All @@ -36,6 +36,32 @@
# ---------------------------------------------------------------------------


@runtime_checkable
class RegistryClient(Protocol):
"""Interface for registry proxy backends.

Each backend (Artifactory, Nexus, etc.) implements this protocol so
the download pipeline can fetch files without knowing which registry
type is in use.
"""

def fetch_file(
self,
owner: str,
repo: str,
file_path: str,
ref: str = "main",
resilient_get: Optional[Callable] = None,
) -> Optional[bytes]:
"""Fetch a single file from the registry.

Returns raw file bytes on success, or ``None`` when the file
cannot be fetched (caller should fall back to full-archive
download).
"""
...


@dataclass(frozen=True)
class RegistryConfig:
"""Immutable registry proxy configuration parsed from environment variables.
Expand Down Expand Up @@ -129,6 +155,17 @@ def get_headers(self) -> dict:
return {"Authorization": f"Bearer {self.token}"}
return {}

def get_client(self) -> "RegistryClient":
"""Return a :class:`RegistryClient` for this configuration.

Currently returns an Artifactory backend. When additional
registry types are needed, this method can inspect the URL or
a configuration hint to select the right backend.
"""
from .artifactory_entry import ArtifactoryRegistryClient

return ArtifactoryRegistryClient(config=self)

def validate_lockfile_deps(
self, locked_deps: "List[LockedDependency]"
) -> "List[LockedDependency]":
Expand Down
Loading
Loading