diff --git a/README.md b/README.md index 0c576965..0a9484f5 100644 --- a/README.md +++ b/README.md @@ -256,6 +256,8 @@ code-review-graph visualize --format graphml # Export as GraphML code-review-graph visualize --format svg # Export as SVG code-review-graph visualize --format obsidian # Export as Obsidian vault code-review-graph visualize --format cypher # Export as Neo4j Cypher +code-review-graph visualize --format json # Export as understand-quickly-compatible JSON +code-review-graph visualize --publish-to-uq # Publish to looptech-ai/understand-quickly registry code-review-graph wiki # Generate markdown wiki from communities code-review-graph detect-changes # Risk-scored change impact analysis code-review-graph register # Register repo in multi-repo registry diff --git a/code_review_graph/cli.py b/code_review_graph/cli.py index 9573d168..3f10201f 100644 --- a/code_review_graph/cli.py +++ b/code_review_graph/cli.py @@ -528,10 +528,20 @@ def main() -> None: ) vis_cmd.add_argument( "--format", - choices=["html", "graphml", "cypher", "obsidian", "svg"], + choices=["html", "graphml", "cypher", "obsidian", "svg", "json"], default="html", help="Export format (default: html)", ) + vis_cmd.add_argument( + "--publish-to-uq", + action="store_true", + help=( + "After writing the graph, fire a repository_dispatch at " + "looptech-ai/understand-quickly so the registry resyncs this entry. " + "Implies --format json. Requires UNDERSTAND_QUICKLY_TOKEN; without " + "the token the JSON is still written and the dispatch is skipped." + ), + ) vis_cmd.add_argument( "--data-dir", default=None, @@ -996,8 +1006,26 @@ def main() -> None: data_dir = get_data_dir(repo_root) fmt = getattr(args, "format", "html") or "html" + publish_to_uq = getattr(args, "publish_to_uq", False) + # --publish-to-uq implies --format json, since the registry only + # consumes the JSON shape. Force the override for any non-json + # format and warn when the user explicitly asked for something + # else (html is the default, so don't warn for it). + if publish_to_uq and fmt != "json": + if fmt != "html": + print( + f"warning: --publish-to-uq requires --format json; " + f"ignoring --format {fmt}", + file=sys.stderr, + ) + fmt = "json" + + if fmt == "json" or publish_to_uq: + from .publish import publish as _publish_to_uq - if fmt == "graphml": + out = data_dir / "graph.json" + _publish_to_uq(store, repo_root, out, publish_to_uq=publish_to_uq) + elif fmt == "graphml": from .exports import export_graphml out = data_dir / "graph.graphml" diff --git a/code_review_graph/publish.py b/code_review_graph/publish.py new file mode 100644 index 00000000..20ac9f22 --- /dev/null +++ b/code_review_graph/publish.py @@ -0,0 +1,167 @@ +"""Opt-in JSON export + understand-quickly registry publish. + +Wraps :func:`code_review_graph.visualization.export_graph_data` with the +metadata block looptech-ai/understand-quickly expects, writes the result to +``/graph.json``, and optionally fires a ``repository_dispatch`` so +the registry resyncs the entry. Gated on ``UNDERSTAND_QUICKLY_TOKEN`` — without +it, the JSON is still written and the dispatch is skipped (an informational +message is printed to stdout pointing at the nightly sync fallback). + +Protocol: https://github.com/looptech-ai/understand-quickly/blob/main/docs/integrations/protocol.md +""" + +from __future__ import annotations + +import json +import logging +import os +import re +import subprocess # nosec B404 +from datetime import datetime, timezone +from pathlib import Path +from typing import Optional, Tuple +from urllib import error as urlerror +from urllib import request as urlrequest + +from .graph import GraphStore +from .visualization import export_graph_data + +logger = logging.getLogger(__name__) + +REGISTRY_REPO = "looptech-ai/understand-quickly" +DISPATCH_URL = f"https://api.github.com/repos/{REGISTRY_REPO}/dispatches" +TOOL_NAME = "code-review-graph" + + +def _git_head_sha(repo_root: Path) -> Optional[str]: + try: + out = subprocess.check_output( # nosec B603 B607 + ["git", "rev-parse", "HEAD"], cwd=str(repo_root), stderr=subprocess.DEVNULL, + ) + sha = out.decode().strip() + if re.fullmatch(r"[0-9a-f]{40}", sha): + return sha + except (OSError, subprocess.CalledProcessError) as exc: + logger.debug("git rev-parse HEAD failed: %s", exc) + return None + + +def _git_origin_owner_repo(repo_root: Path) -> Optional[Tuple[str, str]]: + """Parse ``owner/repo`` from ``git remote get-url origin`` (https or ssh).""" + try: + out = subprocess.check_output( # nosec B603 B607 + ["git", "remote", "get-url", "origin"], + cwd=str(repo_root), stderr=subprocess.DEVNULL, + ) + url = out.decode().strip() + except (OSError, subprocess.CalledProcessError) as exc: + logger.debug("git remote get-url failed: %s", exc) + return None + for pat in ( + r"https?://[^/]+/([^/]+)/([^/]+?)(?:\.git)?/?$", + r"[^@]+@[^:]+:([^/]+)/([^/]+?)(?:\.git)?$", + ): + m = re.match(pat, url) + if m: + return m.group(1), m.group(2) + return None + + +def _tool_version() -> str: + try: + from importlib.metadata import PackageNotFoundError + from importlib.metadata import version as pkg_version + return pkg_version("code-review-graph") + except PackageNotFoundError: + return "dev" + except Exception as exc: # noqa: BLE001 + logger.debug("tool_version lookup failed: %s", exc) + return "dev" + + +def build_publish_payload(store: GraphStore, repo_root: Path) -> dict: + """Return ``export_graph_data(store)`` with a registry-shaped metadata block.""" + data = export_graph_data(store) + metadata = { + "tool": TOOL_NAME, + "tool_version": _tool_version(), + "generated_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), + } + sha = _git_head_sha(repo_root) + if sha: + metadata["commit"] = sha + data["metadata"] = metadata + return data + + +def write_publish_json(store: GraphStore, repo_root: Path, output_path: Path) -> Path: + payload = build_publish_payload(store, repo_root) + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(json.dumps(payload, indent=2, default=str), encoding="utf-8") + return output_path + + +def fire_dispatch(owner: str, repo: str, token: str) -> Tuple[bool, str]: + """POST a ``sync-entry`` repository_dispatch. Returns ``(ok, message)``.""" + body = json.dumps( + {"event_type": "sync-entry", "client_payload": {"id": f"{owner}/{repo}"}} + ).encode() + req = urlrequest.Request( # nosec B310 - fixed https URL + DISPATCH_URL, data=body, method="POST", + headers={ + "Accept": "application/vnd.github+json", + "Authorization": f"Bearer {token}", + "X-GitHub-Api-Version": "2022-11-28", + "Content-Type": "application/json", + "User-Agent": f"{TOOL_NAME}-publish", + }, + ) + try: + with urlrequest.urlopen(req, timeout=15) as resp: # nosec B310 + status = getattr(resp, "status", 0) or resp.getcode() + if 200 <= status < 300: + return True, f"dispatch sent (HTTP {status})" + return False, f"unexpected HTTP {status}" + except urlerror.HTTPError as exc: + return False, f"HTTP {exc.code}" + except urlerror.URLError as exc: + return False, f"network error: {exc.reason}" + + +def publish( + store: GraphStore, repo_root: Path, output_path: Path, publish_to_uq: bool = False, +) -> Path: + """Write JSON and optionally ping the registry. Always writes the JSON.""" + written = write_publish_json(store, repo_root, output_path) + print(f"JSON exported: {written}") + + if not publish_to_uq: + return written + + token = os.environ.get("UNDERSTAND_QUICKLY_TOKEN") + if not token: + print( + "[understand-quickly] UNDERSTAND_QUICKLY_TOKEN not set; " + "skipping repository_dispatch (nightly sync will pick this up)." + ) + return written + + owner_repo = _git_origin_owner_repo(repo_root) + if owner_repo is None: + print( + "[understand-quickly] could not derive owner/repo from " + "`git remote get-url origin`; skipping dispatch." + ) + return written + + owner, repo = owner_repo + ok, message = fire_dispatch(owner, repo, token) + if ok: + print(f"[understand-quickly] {message} for {owner}/{repo}") + else: + print( + f"[understand-quickly] dispatch failed for {owner}/{repo}: {message}. " + "If this repo is not yet registered, register it once with: " + "npx @understand-quickly/cli add" + ) + return written diff --git a/tests/test_publish.py b/tests/test_publish.py new file mode 100644 index 00000000..4f07d440 --- /dev/null +++ b/tests/test_publish.py @@ -0,0 +1,135 @@ +"""Tests for the opt-in understand-quickly publish flow.""" + +from __future__ import annotations + +import io +import json +import re +from pathlib import Path +from unittest import mock + +import pytest + +from code_review_graph import publish as publish_mod +from code_review_graph.graph import GraphStore +from code_review_graph.parser import NodeInfo + + +@pytest.fixture +def store(tmp_path: Path) -> GraphStore: + db_path = tmp_path / "test.db" + s = GraphStore(db_path) + s.upsert_node( + NodeInfo( + kind="File", + name="auth.py", + file_path="src/auth.py", + line_start=1, + line_end=10, + language="python", + parent_name=None, + params=None, + return_type=None, + modifiers=None, + is_test=False, + extra={}, + ) + ) + return s + + +def test_build_publish_payload_embeds_metadata(store: GraphStore, tmp_path: Path): + payload = publish_mod.build_publish_payload(store, tmp_path) + assert "nodes" in payload and "edges" in payload + md = payload["metadata"] + assert md["tool"] == "code-review-graph" + assert md["tool_version"] # non-empty + assert re.fullmatch(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z", md["generated_at"]) + # commit is omitted when not in a git repo (tmp_path has no .git) + assert "commit" not in md or re.fullmatch(r"[0-9a-f]{40}", md["commit"]) + + +def test_write_publish_json_writes_to_disk(store: GraphStore, tmp_path: Path): + out = tmp_path / ".code-review-graph" / "graph.json" + publish_mod.write_publish_json(store, tmp_path, out) + assert out.exists() + data = json.loads(out.read_text()) + assert data["metadata"]["tool"] == "code-review-graph" + + +def test_publish_without_token_skips_dispatch( + store: GraphStore, tmp_path: Path, monkeypatch, capsys +): + monkeypatch.delenv("UNDERSTAND_QUICKLY_TOKEN", raising=False) + out = tmp_path / "graph.json" + with mock.patch.object(publish_mod.urlrequest, "urlopen") as m_open: + publish_mod.publish(store, tmp_path, out, publish_to_uq=True) + m_open.assert_not_called() + captured = capsys.readouterr().out + assert "UNDERSTAND_QUICKLY_TOKEN not set" in captured + + +def test_publish_with_token_fires_dispatch( + store: GraphStore, tmp_path: Path, monkeypatch, capsys +): + monkeypatch.setenv("UNDERSTAND_QUICKLY_TOKEN", "ghp_fake") + + fake_resp = mock.MagicMock() + fake_resp.__enter__.return_value = fake_resp + fake_resp.status = 204 + fake_resp.getcode.return_value = 204 + + with mock.patch.object( + publish_mod, "_git_origin_owner_repo", return_value=("looptech-ai", "demo") + ), mock.patch.object(publish_mod.urlrequest, "urlopen", return_value=fake_resp) as m_open: + out = tmp_path / "graph.json" + publish_mod.publish(store, tmp_path, out, publish_to_uq=True) + + m_open.assert_called_once() + sent = m_open.call_args[0][0] + assert sent.full_url == publish_mod.DISPATCH_URL + body = json.loads(sent.data.decode()) + assert body == { + "event_type": "sync-entry", + "client_payload": {"id": "looptech-ai/demo"}, + } + assert sent.headers["Authorization"] == "Bearer ghp_fake" + captured = capsys.readouterr().out + assert "dispatch sent" in captured + + +def test_publish_dispatch_failure_is_soft( + store: GraphStore, tmp_path: Path, monkeypatch, capsys +): + monkeypatch.setenv("UNDERSTAND_QUICKLY_TOKEN", "ghp_fake") + from urllib import error as urlerror + + err = urlerror.HTTPError( + publish_mod.DISPATCH_URL, 422, "Unprocessable", {}, io.BytesIO(b"") + ) + out = tmp_path / "graph.json" + with mock.patch.object( + publish_mod, "_git_origin_owner_repo", return_value=("looptech-ai", "demo") + ), mock.patch.object(publish_mod.urlrequest, "urlopen", side_effect=err): + # Must not raise — soft-fail and inform the user. + publish_mod.publish(store, tmp_path, out, publish_to_uq=True) + captured = capsys.readouterr().out + assert "dispatch failed" in captured + assert "npx @understand-quickly/cli add" in captured + assert out.exists() + + +@pytest.mark.parametrize( + "url,expected", + [ + ("https://github.com/looptech-ai/demo.git", ("looptech-ai", "demo")), + ("https://github.com/looptech-ai/demo", ("looptech-ai", "demo")), + ("git@github.com:looptech-ai/demo.git", ("looptech-ai", "demo")), + ("git@github.com:looptech-ai/demo", ("looptech-ai", "demo")), + ], +) +def test_origin_owner_repo_parsing(url: str, expected: tuple[str, str], tmp_path: Path): + with mock.patch.object( + publish_mod.subprocess, "check_output", return_value=(url + "\n").encode() + ): + assert publish_mod._git_origin_owner_repo(tmp_path) == expected