diff --git a/code_review_graph/flows.py b/code_review_graph/flows.py index fd12ade6..61af55db 100644 --- a/code_review_graph/flows.py +++ b/code_review_graph/flows.py @@ -46,9 +46,16 @@ re.compile(r"(override_settings|modify_settings)", re.IGNORECASE), # SQLAlchemy / event systems re.compile(r"(event\.)?listens_for", re.IGNORECASE), - # Java Spring + # Java Spring MVC / WebFlux annotation-based re.compile(r"(Get|Post|Put|Delete|Patch|RequestMapping)Mapping", re.IGNORECASE), re.compile(r"(Scheduled|EventListener|Bean|Configuration)", re.IGNORECASE), + re.compile(r"RestController", re.IGNORECASE), + # Spring Kafka / Temporal entry points + re.compile(r"KafkaListener", re.IGNORECASE), + re.compile(r"(WorkflowMethod|ActivityMethod)", re.IGNORECASE), + # Spring WebFlux functional routing (RouterFunction / route().GET()) + re.compile(r"RouterFunction", re.IGNORECASE), + re.compile(r"HandlerFunction", re.IGNORECASE), # JS/TS frameworks re.compile(r"(Component|Injectable|Controller|Module|Guard|Pipe)", re.IGNORECASE), re.compile(r"(Subscribe|Mutation|Query|Resolver)", re.IGNORECASE), @@ -115,17 +122,22 @@ # --------------------------------------------------------------------------- +_WEBFLUX_RETURN_TYPE_RE = re.compile(r"RouterFunction", re.IGNORECASE) + + def _has_framework_decorator(node: GraphNode) -> bool: """Return True if *node* has a decorator matching a framework pattern.""" decorators = node.extra.get("decorators") - if not decorators: - return False - if isinstance(decorators, str): - decorators = [decorators] - for dec in decorators: - for pat in _FRAMEWORK_DECORATOR_PATTERNS: - if pat.search(dec): - return True + if decorators: + if isinstance(decorators, str): + decorators = [decorators] + for dec in decorators: + for pat in _FRAMEWORK_DECORATOR_PATTERNS: + if pat.search(dec): + return True + # WebFlux functional routing: @Bean methods returning RouterFunction + if node.return_type and _WEBFLUX_RETURN_TYPE_RE.search(node.return_type): + return True return False diff --git a/code_review_graph/graph.py b/code_review_graph/graph.py index 6457aef8..e311dea6 100644 --- a/code_review_graph/graph.py +++ b/code_review_graph/graph.py @@ -350,6 +350,33 @@ def get_edges_by_target(self, qualified_name: str) -> list[GraphEdge]: ).fetchall() return [self._row_to_edge(r) for r in rows] + def get_edges_by_endpoint_key(self, key: str) -> list[GraphEdge]: + """Find HANDLES edges whose target is 'http:METHOD:path'. + + Endpoint nodes are stored with qualified names like + 'file.java::OrderController.GET /orders' while HANDLES edges store + targets as 'http:GET:/orders'. This method bridges that gap so BFS + traversal from an Endpoint node can reach the handler that serves it. + """ + rows = self._conn.execute( + "SELECT * FROM edges WHERE target_qualified = ? AND kind = 'HANDLES'", + (key,), + ).fetchall() + return [self._row_to_edge(r) for r in rows] + + def get_edges_by_config_key(self, key: str) -> list[GraphEdge]: + """Find DEPENDS_ON_CONFIG edges whose target is 'config:{key}'. + + Config edges store targets as 'config:some.key' while config nodes + are stored with qualified names like 'app.yml::some.key'. This method + bridges that gap so callers can find who reads a given config property. + """ + rows = self._conn.execute( + "SELECT * FROM edges WHERE target_qualified = ? AND kind = 'DEPENDS_ON_CONFIG'", + (f"config:{key}",), + ).fetchall() + return [self._row_to_edge(r) for r in rows] + def search_edges_by_target_name(self, name: str, kind: str = "CALLS") -> list[GraphEdge]: """Search for edges where target_qualified matches an unqualified name. diff --git a/code_review_graph/main.py b/code_review_graph/main.py index 8f4fdbe8..5655dbb5 100644 --- a/code_review_graph/main.py +++ b/code_review_graph/main.py @@ -228,6 +228,7 @@ def query_graph_tool( - children_of: Find nodes contained in a file or class - tests_for: Find tests for the target - inheritors_of: Find classes inheriting from the target + - consumers_of: Find classes/methods that read a config property - file_summary: Get all nodes in a file Args: diff --git a/code_review_graph/parser.py b/code_review_graph/parser.py index d82d69b5..702d5610 100644 --- a/code_review_graph/parser.py +++ b/code_review_graph/parser.py @@ -142,6 +142,10 @@ class EdgeInfo: ".v": "verilog", ".vh": "verilog", ".sql": "sql", + # Config files — parsed with custom regex-based extractors (no tree-sitter grammar) + ".yml": "yaml", + ".yaml": "yaml", + ".properties": "properties", } # Shebang interpreter → language mapping for extension-less Unix scripts. @@ -357,7 +361,7 @@ class EdgeInfo: "tsx": ["call_expression", "new_expression"], "go": ["call_expression"], "rust": ["call_expression", "macro_invocation"], - "java": ["method_invocation", "object_creation_expression"], + "java": ["method_invocation", "object_creation_expression", "method_reference"], "c": ["call_expression"], "cpp": ["call_expression"], "csharp": ["invocation_expression", "object_creation_expression"], @@ -481,6 +485,39 @@ class EdgeInfo: # Kafka consumer annotations (annotation-based pattern) _KAFKA_LISTENER_ANNOTATIONS = frozenset({"KafkaListener", "KafkaHandler"}) +# WebFlux fluent router HTTP verb method names +_WEBFLUX_HTTP_VERBS: frozenset[str] = frozenset({"GET", "POST", "PUT", "DELETE", "PATCH"}) + + +def _normalize_config_key(key: str) -> str: + """Normalize a Spring config key to camelCase for relaxed-binding matching. + + Spring accepts kebab-case (task-queue), camelCase (taskQueue), and + SCREAMING_SNAKE (TASK_QUEUE) interchangeably. Normalise to camelCase + so that YAML keys and @Value keys resolve to the same node name. + """ + parts = key.split(".") + normalized = [] + for part in parts: + # Convert kebab-case and SCREAMING_SNAKE segments to camelCase + if "-" in part or "_" in part: + tokens = re.split(r"[-_]", part) + part = tokens[0].lower() + "".join(t.capitalize() for t in tokens[1:]) + else: + part = part[0].lower() + part[1:] if part else part + normalized.append(part) + return ".".join(normalized) + +# Spring MVC / WebFlux annotation → HTTP method mapping +_HTTP_MAPPING_ANNOTATIONS: dict[str, str] = { + "GetMapping": "GET", + "PostMapping": "POST", + "PutMapping": "PUT", + "DeleteMapping": "DELETE", + "PatchMapping": "PATCH", + "RequestMapping": "ANY", +} + # Kafka consumer field types (reactive / imperative) _KAFKA_CONSUMER_TYPES = frozenset({ "KafkaReceiver", @@ -935,6 +972,12 @@ def parse_bytes(self, path: Path, source: bytes) -> tuple[list[NodeInfo], list[E if language == "sql": return self._parse_sql(path, source) + # YAML / .properties: regex-based config key extraction. + if language == "yaml": + return self._parse_yaml_config(path, source) + if language == "properties": + return self._parse_properties_config(path, source) + parser = self._get_parser(language) if not parser: return [], [] @@ -2027,6 +2070,136 @@ def _parse_sql( return nodes, edges + def _parse_yaml_config( + self, path: Path, source: bytes, + ) -> tuple[list[NodeInfo], list[EdgeInfo]]: + """Extract leaf config keys from a YAML file as config_property nodes. + + Flattens nested YAML into dotted paths (e.g. ``app.kafka.topic``). + Only scalar leaf values are indexed; list items use a numeric suffix. + Skips anchor/alias lines and comment-only lines. + """ + nodes: list[NodeInfo] = [] + file_path_str = str(path) + + try: + text = source.decode("utf-8", errors="replace") + except Exception: + return [], [] + + # Simple line-based flattening — avoids a PyYAML dependency and handles + # the common Spring Boot application.yml structure accurately. + stack: list[tuple[int, str]] = [] # (indent, key_prefix) + list_counters: dict[str, int] = {} + + for lineno, raw_line in enumerate(text.splitlines(), start=1): + stripped = raw_line.lstrip() + if not stripped or stripped.startswith("#"): + continue + indent = len(raw_line) - len(stripped) + + # Pop stack entries whose indent >= current + while stack and stack[-1][0] >= indent: + stack.pop() + + if stripped.startswith("- "): + # List item: use parent key with numeric index + parent_prefix = stack[-1][1] if stack else "" + counter = list_counters.get(parent_prefix, 0) + list_counters[parent_prefix] = counter + 1 + value_part = stripped[2:].strip() + if value_part and not value_part.endswith(":"): + key = f"{parent_prefix}[{counter}]" if parent_prefix else f"[{counter}]" + _qn = f"config:{key}" + nodes.append(NodeInfo( + kind="ConfigProperty", + name=key, + file_path=file_path_str, + line_start=lineno, + line_end=lineno, + language="yaml", + parent_name=None, + extra={"config_value": value_part, "source_file": path.name}, + )) + continue + + if ":" in stripped: + colon_pos = stripped.index(":") + key_part = stripped[:colon_pos].strip() + value_part = stripped[colon_pos + 1:].strip() + + parent_prefix = stack[-1][1] if stack else "" + full_key = f"{parent_prefix}.{key_part}" if parent_prefix else key_part + full_key = full_key.lstrip(".") + + if value_part and not value_part.startswith("#"): + # Scalar leaf — store both raw and normalised key + norm_key = _normalize_config_key(full_key) + nodes.append(NodeInfo( + kind="ConfigProperty", + name=norm_key, + file_path=file_path_str, + line_start=lineno, + line_end=lineno, + language="yaml", + parent_name=None, + extra={ + "config_value": value_part, + "source_file": path.name, + "raw_key": full_key, + }, + )) + else: + # Mapping key — push onto stack + stack.append((indent, full_key)) + + return nodes, [] + + def _parse_properties_config( + self, path: Path, source: bytes, + ) -> tuple[list[NodeInfo], list[EdgeInfo]]: + """Extract key=value pairs from a Java .properties file as config_property nodes.""" + nodes: list[NodeInfo] = [] + file_path_str = str(path) + + try: + text = source.decode("utf-8", errors="replace") + except Exception: + return [], [] + + continuation = "" + for lineno, raw_line in enumerate(text.splitlines(), start=1): + line = raw_line.strip() + if not line or line.startswith("#") or line.startswith("!"): + continuation = "" + continue + if continuation: + line = continuation + line + continuation = "" + if line.endswith("\\"): + continuation = line[:-1].rstrip() + continue + # Split on first = or : + for sep in ("=", ":"): + if sep in line: + key, _, value = line.partition(sep) + key = key.strip() + value = value.strip() + if key: + nodes.append(NodeInfo( + kind="ConfigProperty", + name=key, + file_path=file_path_str, + line_start=lineno, + line_end=lineno, + language="properties", + parent_name=None, + extra={"config_value": value, "source_file": path.name}, + )) + break + + return nodes, [] + def _walk_sql_tree( self, node, @@ -3969,6 +4142,93 @@ def _emit_spring_constructor_injection( extra=extra, )) + def _emit_spring_config_edges( + self, + class_node, + class_name: str, + class_annotations: list[str], + file_path: str, + edges: list[EdgeInfo], + ) -> None: + """Emit DEPENDS_ON_CONFIG edges for Spring config injection points. + + Handles two patterns: + - @Value("${property.key}") on fields — one edge per property key + - @ConfigurationProperties(prefix="...") on the class — one edge per prefix + """ + qualified_source = self._qualify(class_name, file_path, None) + + # Class-level @ConfigurationProperties + if "ConfigurationProperties" in class_annotations: + for node in class_node.children: + if node.type != "modifiers": + continue + for mod in node.children: + if mod.type != "annotation": + continue + ann_name = "" + for sub in mod.children: + if sub.type == "identifier": + ann_name = sub.text.decode("utf-8", errors="replace") + break + if ann_name != "ConfigurationProperties": + continue + prefix = "" + for sub in mod.children: + if sub.type == "annotation_argument_list": + raw = sub.text.decode("utf-8", errors="replace") + m = re.search(r'prefix\s*=\s*"([^"]+)"', raw) + if m: + prefix = m.group(1) + else: + m2 = re.search(r'"([^"]+)"', raw) + if m2: + prefix = m2.group(1) + if prefix: + edges.append(EdgeInfo( + kind="DEPENDS_ON_CONFIG", + source=qualified_source, + target=f"config:{_normalize_config_key(prefix)}.*", + file_path=file_path, + line=class_node.start_point[0] + 1, + extra={"resolution": "configuration_properties", "confidence": 1.0}, + )) + + # Field-level @Value("${property.key}") + for node in class_node.children: + if node.type != "class_body": + continue + for member in node.children: + if member.type != "field_declaration": + continue + for child in member.children: + if child.type != "modifiers": + continue + for mod in child.children: + if mod.type != "annotation": + continue + ann_name = "" + ann_value = "" + for sub in mod.children: + if sub.type == "identifier": + ann_name = sub.text.decode("utf-8", errors="replace") + elif sub.type == "annotation_argument_list": + ann_value = sub.text.decode("utf-8", errors="replace") + if ann_name != "Value": + continue + m = re.search(r'\$\{([^}]+)\}', ann_value) + if not m: + continue + prop_key = m.group(1).split(":")[0].strip() + edges.append(EdgeInfo( + kind="DEPENDS_ON_CONFIG", + source=qualified_source, + target=f"config:{_normalize_config_key(prop_key)}", + file_path=file_path, + line=member.start_point[0] + 1, + extra={"resolution": "value_annotation", "confidence": 1.0}, + )) + def _emit_temporal_stub_fields( self, class_node, @@ -4057,6 +4317,130 @@ def _get_kafka_annotation_topics(annotation_node) -> list[str]: topics.append(raw) return topics + @staticmethod + def _get_http_annotation_path(annotation_node) -> Optional[str]: + """Extract the path string from a Spring HTTP mapping annotation argument. + + Handles @GetMapping("/path"), @RequestMapping(value="/path"), + and @RequestMapping(path="/path") variants. + """ + for child in annotation_node.children: + if child.type != "annotation_argument_list": + continue + # Single string argument: @GetMapping("/path") + for item in child.children: + if item.type == "string_literal": + raw = item.text.decode("utf-8", errors="replace").strip('"').strip("'") + if raw: + return raw + # Named argument: @RequestMapping(value = "/path") or path = "/path" + for pair in child.children: + if pair.type != "element_value_pair": + continue + key_node = next((c for c in pair.children if c.type == "identifier"), None) + if key_node is None: + continue + if key_node.text.decode("utf-8", errors="replace") not in ("value", "path"): + continue + for val in pair.children: + if val.type == "string_literal": + raw = val.text.decode("utf-8", errors="replace").strip('"').strip("'") + if raw: + return raw + return None + + def _emit_http_endpoint_nodes_and_edges( + self, + method_node, + method_name: str, + class_name: Optional[str], + file_path: str, + nodes: list[NodeInfo], + edges: list[EdgeInfo], + ) -> None: + """Emit Endpoint nodes and HANDLES edges for Spring HTTP mapping annotations.""" + qualified_source = self._qualify(method_name, file_path, class_name) + + for child in method_node.children: + if child.type != "modifiers": + continue + for mod in child.children: + if mod.type not in ("annotation", "marker_annotation"): + continue + ann_name: Optional[str] = None + for sub in mod.children: + if sub.type == "identifier": + ann_name = sub.text.decode("utf-8", errors="replace") + break + if ann_name not in _HTTP_MAPPING_ANNOTATIONS: + continue + http_method = _HTTP_MAPPING_ANNOTATIONS[ann_name] + path = self._get_http_annotation_path(mod) or "" + endpoint_qn = f"http:{http_method}:{path}" if path else f"http:{http_method}:?" + nodes.append(NodeInfo( + kind="Endpoint", + name=f"{http_method} {path}" if path else f"{http_method} ?", + file_path=file_path, + line_start=method_node.start_point[0] + 1, + line_end=method_node.start_point[0] + 1, + language="java", + parent_name=class_name, + extra={"http_method": http_method, "path": path}, + )) + edges.append(EdgeInfo( + kind="HANDLES", + source=qualified_source, + target=endpoint_qn, + file_path=file_path, + line=method_node.start_point[0] + 1, + extra={"http_method": http_method, "path": path}, + )) + + def _emit_bean_parameter_injections( + self, + method_node, + method_name: str, + class_name: Optional[str], + file_path: str, + edges: list[EdgeInfo], + ) -> None: + """Emit INJECTS edges for formal parameters of a @Bean factory method. + + Spring instantiates @Bean method parameters from the application context, + making them equivalent to constructor-injected dependencies. The spring + resolver uses these edges to resolve method-reference receivers like + handler::method to their declared type. + """ + qualified_source = self._qualify( + class_name, file_path, None + ) if class_name else file_path + + for child in method_node.children: + if child.type != "formal_parameters": + continue + for param in child.children: + if param.type != "formal_parameter": + continue + param_type: Optional[str] = None + param_name: Optional[str] = None + for sub in param.children: + if sub.type == "type_identifier" and param_type is None: + param_type = sub.text.decode("utf-8", errors="replace") + elif sub.type == "identifier": + param_name = sub.text.decode("utf-8", errors="replace") + if param_type: + extra: dict = {"injection_type": "bean_parameter"} + if param_name: + extra["field_name"] = param_name + edges.append(EdgeInfo( + kind="INJECTS", + source=qualified_source, + target=param_type, + file_path=file_path, + line=param.start_point[0] + 1, + extra=extra, + )) + def _emit_kafka_edges_from_class( self, class_node, @@ -4281,6 +4665,10 @@ def _extract_classes( self._emit_spring_injections( child, name, class_annotations, language, file_path, edges, ) + # Config: emit DEPENDS_ON_CONFIG edges for @Value / @ConfigurationProperties + self._emit_spring_config_edges( + child, name, class_annotations, file_path, edges, + ) # Temporal: emit TEMPORAL_STUB edges for activity/workflow stub fields self._emit_temporal_stub_fields(child, name, file_path, edges) # Kafka: emit CONSUMES/PRODUCES edges for Kafka field declarations @@ -4362,6 +4750,8 @@ def _extract_functions( # Java: detect Temporal method-level annotations and Kafka listeners method_extra: dict = {} + if deco_list: + method_extra["decorators"] = deco_list if language == "java" and deco_list: temporal_method_annots = [ a for a in deco_list if a in _TEMPORAL_METHOD_ANNOTATIONS @@ -4373,6 +4763,14 @@ def _extract_functions( self._emit_kafka_edges_from_method( child, name, enclosing_class, file_path, edges, ) + if any(a.split("(")[0] in _HTTP_MAPPING_ANNOTATIONS for a in deco_list): + self._emit_http_endpoint_nodes_and_edges( + child, name, enclosing_class, file_path, nodes, edges, + ) + if any(a.split("(")[0] == "Bean" for a in deco_list): + self._emit_bean_parameter_injections( + child, name, enclosing_class, file_path, edges, + ) node = NodeInfo( kind=kind, @@ -4615,8 +5013,8 @@ def _extract_calls( else: caller = file_path - # Java method_invocation: extract actual method name and receiver - # separately so the Spring DI resolver can rewrite the target. + # Java method_invocation / method_reference: extract method name and + # receiver separately so the Spring DI resolver can rewrite the target. call_extra: dict = {} if language == "java" and child.type == "method_invocation": method_name, receiver = self._get_java_method_and_receiver(child) @@ -4624,6 +5022,41 @@ def _extract_calls( call_name = method_name if receiver: call_extra["receiver"] = receiver + elif language == "java" and child.type == "method_reference": + identifiers = [c for c in child.children if c.type == "identifier"] + if len(identifiers) >= 2: + call_extra["receiver"] = identifiers[0].text.decode("utf-8", errors="replace") + + # WebFlux functional routing: route().GET("/path", handler::method) + # Emit an Endpoint node + HANDLES edge when a Java method_invocation + # named GET/POST/etc. has a string-literal first argument starting "/". + if ( + language == "java" + and child.type == "method_invocation" + and call_name in _WEBFLUX_HTTP_VERBS + ): + path = self._get_webflux_route_path(child) + if path and enclosing_func: + http_method = call_name + endpoint_qn = f"http:{http_method}:{path}" + nodes.append(NodeInfo( + kind="Endpoint", + name=f"{http_method} {path}", + file_path=file_path, + line_start=child.start_point[0] + 1, + line_end=child.start_point[0] + 1, + language="java", + parent_name=enclosing_class, + extra={"http_method": http_method, "path": path}, + )) + edges.append(EdgeInfo( + kind="HANDLES", + source=caller, + target=endpoint_qn, + file_path=file_path, + line=child.start_point[0] + 1, + extra={"http_method": http_method, "path": path}, + )) # When a receiver is present, skip scope-based resolution: the method # lives on the receiver's type, not in the current file's scope. @@ -4646,6 +5079,26 @@ def _extract_calls( return False + @staticmethod + def _get_webflux_route_path(node) -> Optional[str]: + """Extract the path string from a WebFlux fluent .GET("/path", ...) call. + + Returns the path string if the first argument in the argument_list is a + string literal starting with "/", otherwise None. + """ + for child in node.children: + if child.type != "argument_list": + continue + for item in child.children: + if item.type == "string_literal": + raw = item.text.decode("utf-8", errors="replace").strip('"').strip("'") + if raw.startswith("/"): + return raw + # Skip non-string tokens like "(" and "," + if item.type not in ("(", ")", ","): + break + return None + @staticmethod def _get_java_method_and_receiver(node) -> tuple[Optional[str], Optional[str]]: """For a Java method_invocation node, return (method_name, receiver_name). @@ -6384,6 +6837,41 @@ def _normalize_php_name(text: str) -> str: return child.text.decode("utf-8", errors="replace") return None + # Java: method_reference (handler::process) — return method name. + # The receiver (first identifier) is captured in _extract_calls via + # call_extra["receiver"] so the Spring DI resolver can rewrite the target. + if language == "java" and node.type == "method_reference": + identifiers = [c for c in node.children if c.type == "identifier"] + if len(identifiers) >= 2: + return identifiers[-1].text.decode("utf-8", errors="replace") + if identifiers: + return identifiers[0].text.decode("utf-8", errors="replace") + return None + + # Java: chained method_invocation (route().GET("/path", ...)) — the first + # child is the receiver expression (another method_invocation), so the actual + # method name is the identifier child that follows it. + if language == "java" and node.type == "method_invocation": + if first.type not in ("identifier", "simple_identifier"): + # Skip past the receiver and find the method name identifier + found_receiver = False + for child in node.children: + if not found_receiver: + if child.type not in (".", ): + found_receiver = True + continue + if child.type == "identifier": + return child.text.decode("utf-8", errors="replace") + return None + + # Java: object_creation_expression (new TrialAutomation(...)) — the + # first child is the `new` keyword; the type name is the type_identifier. + if language == "java" and node.type == "object_creation_expression": + for child in node.children: + if child.type == "type_identifier": + return child.text.decode("utf-8", errors="replace") + return None + # Objective-C: [receiver method:arg] — the method name is the # SECOND identifier-like child (the first is the receiver). For # multi-part selectors like `[obj add:a to:b]` we keep the first diff --git a/code_review_graph/tools/query.py b/code_review_graph/tools/query.py index e3d8c3e1..6531ca5c 100644 --- a/code_review_graph/tools/query.py +++ b/code_review_graph/tools/query.py @@ -27,6 +27,7 @@ "children_of": "Find all nodes contained in a file or class", "tests_for": "Find all tests for a given function or class", "inheritors_of": "Find all classes that inherit from a given class", + "consumers_of": "Find all classes/methods that read a given config property", "file_summary": "Get a summary of all nodes in a file", } @@ -207,7 +208,8 @@ def query_graph( "candidates": [node_to_dict(c) for c in candidates], } - if not node and pattern != "file_summary": + # consumers_of can resolve via config edge keys even without a node + if not node and pattern not in ("file_summary", "consumers_of"): return { "status": "not_found", "summary": f"No node found matching '{target}'.", @@ -270,19 +272,20 @@ def query_graph( results.append(node_to_dict(child)) elif pattern == "tests_for": - for e in store.get_edges_by_target(qn): - if e.kind == "TESTED_BY": - test = store.get_node(e.source_qualified) - if test: - results.append(node_to_dict(test)) - # Also search by naming convention + transitive = store.get_transitive_tests(qn, max_depth=3) + seen: set[str] = set() + for t in transitive: + qn_t = t.get("qualified_name", "") + if qn_t not in seen: + seen.add(qn_t) + results.append(t) + # Naming-convention fallback for tests not linked by TESTED_BY edges name = node.name if node else target - test_nodes = store.search_nodes(f"test_{name}", limit=10) - test_nodes += store.search_nodes(f"Test{name}", limit=10) - seen = {r.get("qualified_name") for r in results} - for t in test_nodes: - if t.qualified_name not in seen and t.is_test: - results.append(node_to_dict(t)) + for prefix in (f"test_{name}", f"Test{name}"): + for candidate in store.search_nodes(prefix, limit=10): + if candidate.qualified_name not in seen and candidate.is_test: + seen.add(candidate.qualified_name) + results.append(node_to_dict(candidate)) elif pattern == "inheritors_of": for e in store.get_edges_by_target(qn): @@ -302,6 +305,24 @@ def query_graph( results.append(node_to_dict(child)) edges_out.append(edge_to_dict(e)) + elif pattern == "consumers_of": + # Find classes/methods that read this config property. + # DEPENDS_ON_CONFIG edges use 'config:{key}' as target_qualified + # while config nodes are stored as 'file.yml::{key}'. + # Use the node's plain name (the config key) to bridge that gap. + key = node.name if node else target.removeprefix("config:") + for e in store.get_edges_by_config_key(key): + consumer = store.get_node(e.source_qualified) + if consumer: + results.append(node_to_dict(consumer)) + edges_out.append(edge_to_dict(e)) + # Prefix-wildcard: @ConfigurationProperties emits 'config:prefix.*' + for e in store.get_edges_by_config_key(f"{key}.*"): + consumer = store.get_node(e.source_qualified) + if consumer: + results.append(node_to_dict(consumer)) + edges_out.append(edge_to_dict(e)) + elif pattern == "file_summary": abs_path = str(root / target) file_nodes = store.get_nodes_by_file(abs_path) @@ -636,14 +657,44 @@ def traverse_graph_func( traversal.append(entry) # Get neighbours - out_edges = store.get_edges_by_source( - current_qn - ) - in_edges = store.get_edges_by_target( - current_qn - ) + out_edges = store.get_edges_by_source(current_qn) + in_edges = store.get_edges_by_target(current_qn) + + # ConfigProperty nodes: DEPENDS_ON_CONFIG edges store targets as + # 'config:{key}' but nodes are stored as 'file.yml::{key}'. + # Bridge this gap so "who reads this property?" works in BFS. + if node.kind == "ConfigProperty": + in_edges = list(in_edges) + store.get_edges_by_config_key(node.name) + + # Endpoint nodes: HANDLES edges store targets as 'http:METHOD:path' + # but nodes are stored as 'file.java::Class.METHOD path'. + # Bridge this so BFS from an Endpoint reaches the handler method. + if node.kind == "Endpoint": + http_key = "http:" + node.name.replace(" ", ":", 1) + in_edges = list(in_edges) + store.get_edges_by_endpoint_key(http_key) + for e in out_edges: tgt = e.target_qualified + # Resolve 'config:{key}' edge targets to the actual node qn + if tgt.startswith("config:") and not tgt.endswith(".*"): + key = tgt[len("config:"):] + candidates = store.search_nodes(key, limit=3) + config_nodes = [c for c in candidates if c.kind == "ConfigProperty"] + for cn in config_nodes: + if cn.qualified_name not in visited: + queue.append((cn.qualified_name, cur_depth + 1)) + continue + # Resolve 'http:METHOD:path' edge targets to the actual Endpoint node qn + if tgt.startswith("http:"): + parts = tgt.split(":", 2) + if len(parts) == 3: + ep_name = f"{parts[1]} {parts[2]}" + candidates = store.search_nodes(ep_name, limit=3) + ep_nodes = [c for c in candidates if c.kind == "Endpoint"] + for ep in ep_nodes: + if ep.qualified_name not in visited: + queue.append((ep.qualified_name, cur_depth + 1)) + continue if tgt not in visited: queue.append((tgt, cur_depth + 1)) for e in in_edges: @@ -659,10 +710,9 @@ def traverse_graph_func( "traversal": traversal, "truncated": approx_tokens > token_budget, "next_tool_suggestions": [ - "query_graph callers_of" - " -- focused relationship query", - "get_impact_radius" - " -- blast radius analysis", + "query_graph callers_of -- focused call relationship query", + "query_graph consumers_of -- who reads a config property", + "get_impact_radius -- blast radius analysis", ], } finally: diff --git a/tests/fixtures/SampleJava.java b/tests/fixtures/SampleJava.java index d8ecd727..4f5ea690 100644 --- a/tests/fixtures/SampleJava.java +++ b/tests/fixtures/SampleJava.java @@ -64,3 +64,67 @@ public void save(User user) { super.save(user); } } + +// --- Method reference and constructor call fixtures --- + +interface Processor { + void process(User user); +} + +class AuditProcessor implements Processor { + public void process(User user) {} +} + +class BatchProcessor implements Processor { + public void process(User user) {} +} + +class ProcessorFactory { + public static Processor create(String type) { + switch (type) { + case "audit": return new AuditProcessor(); + case "batch": return new BatchProcessor(); + default: return new AuditProcessor(); + } + } +} + +class WebFluxRouter { + private final AuditProcessor auditProcessor; + + public WebFluxRouter(AuditProcessor auditProcessor) { + this.auditProcessor = auditProcessor; + } + + public void setupRoutes() { + // method reference: auditProcessor::process + Runnable r = auditProcessor::process; + } +} + +// WebFlux functional router — path literals as Endpoint nodes +class ItemRouter { + private final AuditProcessor handler; + + public ItemRouter(AuditProcessor handler) { + this.handler = handler; + } + + public RouterFunction routes() { + return route() + .GET("/items", handler::process) + .POST("/items", handler::process) + .build(); + } +} + +// --- HTTP endpoint fixture --- + +@RestController +class OrderController { + @GetMapping("/orders") + public String listOrders() { return "ok"; } + + @PostMapping("/orders") + public String createOrder() { return "ok"; } +} diff --git a/tests/fixtures/SpringDI.java b/tests/fixtures/SpringDI.java index 402c213f..bb5c6814 100644 --- a/tests/fixtures/SpringDI.java +++ b/tests/fixtures/SpringDI.java @@ -70,9 +70,33 @@ class AppConfig { public OrderRepository orderRepository() { return new JpaOrderRepository(); } + + @Bean + public NotificationService notificationService(OrderRepository orderRepository) { + return new NotificationService(); + } } class Order { private Long id; public Long getId() { return id; } } + +// @Value field injection — two distinct property keys +@Service +class PaymentService { + @Value("${payment.gateway.url}") + private String gatewayUrl; + + @Value("${payment.timeout.seconds:30}") + private int timeoutSeconds; + + public void process() {} +} + +// @ConfigurationProperties class +@ConfigurationProperties(prefix = "app.kafka") +class KafkaConfigProperties { + private String bootstrapServers; + private String topic; +} diff --git a/tests/fixtures/app.properties b/tests/fixtures/app.properties new file mode 100644 index 00000000..bb413455 --- /dev/null +++ b/tests/fixtures/app.properties @@ -0,0 +1,4 @@ +payment.gateway.url=https://pay.example.com +payment.timeout.seconds=30 +spring.datasource.url=jdbc:postgresql://localhost:5432/mydb +app.kafka.topic=order.created diff --git a/tests/fixtures/application.yml b/tests/fixtures/application.yml new file mode 100644 index 00000000..b01cc225 --- /dev/null +++ b/tests/fixtures/application.yml @@ -0,0 +1,14 @@ +spring: + datasource: + url: jdbc:postgresql://localhost:5432/mydb + username: app_user + kafka: + bootstrap-servers: localhost:9092 + topic: order.created + +app: + payment: + gateway-url: https://pay.example.com + timeout-seconds: 30 + feature-flags: + bulk-enabled: true diff --git a/tests/test_flows.py b/tests/test_flows.py index 8c0536fa..7e9faa03 100644 --- a/tests/test_flows.py +++ b/tests/test_flows.py @@ -141,6 +141,40 @@ def test_detect_entry_points_celery_task(self): ep_names = {ep.name for ep in eps} assert "process_data" in ep_names + def test_detect_entry_points_kafka_listener(self): + """Java @KafkaListener marks function as entry point.""" + self._add_func("handleOrder", extra={"decorators": ['KafkaListener(topics = "order.created")']}) + eps = detect_entry_points(self.store) + ep_names = {ep.name for ep in eps} + assert "handleOrder" in ep_names + + def test_detect_entry_points_workflow_method(self): + """Temporal @WorkflowMethod marks function as entry point.""" + self._add_func("startWorkflow", extra={"decorators": ["WorkflowMethod"]}) + eps = detect_entry_points(self.store) + ep_names = {ep.name for ep in eps} + assert "startWorkflow" in ep_names + + def test_detect_entry_points_webflux_router_function(self): + """WebFlux @Bean method returning RouterFunction is an entry point.""" + from code_review_graph.parser import NodeInfo + node = NodeInfo( + kind="Function", + name="routes", + file_path="app.py", + line_start=1, + line_end=10, + language="java", + parent_name=None, + return_type="RouterFunction", + extra={"decorators": ["Bean"]}, + ) + self.store.upsert_node(node, file_hash="abc") + self.store.commit() + eps = detect_entry_points(self.store) + ep_names = {ep.name for ep in eps} + assert "routes" in ep_names + def test_detect_entry_points_agent_tool(self): """@agent.tool decorator marks function as entry point.""" self._add_func("query_health", extra={"decorators": ["health_agent.tool"]}) diff --git a/tests/test_multilang.py b/tests/test_multilang.py index b8cddf23..a8dc4ef3 100644 --- a/tests/test_multilang.py +++ b/tests/test_multilang.py @@ -2127,6 +2127,29 @@ def test_java_method_call_target_is_method_not_receiver(self): assert any("save" in t for t in targets), f"expected 'save' in targets, got {targets}" # receiver variable names must NOT appear as CALLS targets assert "orderRepository" not in targets + + def test_value_annotation_emits_depends_on_config_edges(self): + """@Value fields emit DEPENDS_ON_CONFIG edges with the property key as target.""" + config_edges = [e for e in self.edges if e.kind == "DEPENDS_ON_CONFIG"] + targets = {e.target for e in config_edges} + assert "config:payment.gateway.url" in targets + # Default-value syntax ${key:default} — only the key part is used + assert "config:payment.timeout.seconds" in targets + + def test_value_annotation_resolution_metadata(self): + """DEPENDS_ON_CONFIG edges from @Value carry value_annotation resolution.""" + config_edges = [e for e in self.edges if e.kind == "DEPENDS_ON_CONFIG" + and e.extra.get("resolution") == "value_annotation"] + assert len(config_edges) >= 2 + for e in config_edges: + assert e.extra.get("confidence") == 1.0 + + def test_configuration_properties_emits_depends_on_config_edge(self): + """@ConfigurationProperties(prefix=...) emits a DEPENDS_ON_CONFIG edge for the prefix.""" + config_edges = [e for e in self.edges if e.kind == "DEPENDS_ON_CONFIG" + and e.extra.get("resolution") == "configuration_properties"] + targets = {e.target for e in config_edges} + assert "config:app.kafka.*" in targets assert "notificationService" not in targets def test_java_receiver_stored_in_calls_extra(self): @@ -2528,3 +2551,314 @@ def test_table_reference_edges(self): targets = {e.target for e in imports} # active_orders view and archive procedure both reference orders/users assert "orders" in targets or "users" in targets + + +class TestJavaMethodReferenceAndConstructorCalls: + """Tests for method_reference and object_creation_expression CALLS edges.""" + + def setup_method(self): + self.parser = CodeParser() + self.nodes, self.edges = self.parser.parse_file(FIXTURES / "SampleJava.java") + + def test_constructor_call_in_switch_emits_calls_edge(self): + """new AuditProcessor() inside a switch arm must emit a CALLS edge.""" + calls = [e for e in self.edges if e.kind == "CALLS"] + targets = {e.target for e in calls} + assert "AuditProcessor" in targets or any("AuditProcessor" in t for t in targets) + + def test_multiple_constructors_in_switch_all_emitted(self): + """Each concrete type returned in switch arms must produce a CALLS edge.""" + calls = [e for e in self.edges if e.kind == "CALLS"] + targets = {e.target for e in calls} + assert any("AuditProcessor" in t for t in targets) + assert any("BatchProcessor" in t for t in targets) + + def test_method_reference_emits_calls_edge(self): + """auditProcessor::process must emit a CALLS edge with target 'process'.""" + calls = [e for e in self.edges if e.kind == "CALLS"] + targets = {e.target for e in calls} + assert "process" in targets or any("process" in t for t in targets) + + def test_method_reference_stores_receiver(self): + """CALLS edge from a method_reference must carry extra.receiver.""" + ref_calls = [ + e for e in self.edges + if e.kind == "CALLS" and e.extra.get("receiver") == "auditProcessor" + ] + assert len(ref_calls) >= 1 + + +class TestJavaHttpEndpointExtraction: + """Tests for HTTP Endpoint node and HANDLES edge extraction.""" + + def setup_method(self): + self.parser = CodeParser() + self.nodes, self.edges = self.parser.parse_file(FIXTURES / "SampleJava.java") + + def test_get_mapping_creates_endpoint_node(self): + """@GetMapping('/orders') must produce an Endpoint node.""" + endpoints = [n for n in self.nodes if n.kind == "Endpoint"] + paths = {n.extra.get("path") for n in endpoints} + assert "/orders" in paths + + def test_post_mapping_creates_endpoint_node(self): + """@PostMapping('/orders') must produce an Endpoint node.""" + endpoints = [n for n in self.nodes if n.kind == "Endpoint"] + paths = {n.extra.get("path") for n in endpoints} + assert "/orders" in paths + + def test_endpoint_node_carries_http_method(self): + """Endpoint nodes must have http_method in extra.""" + endpoints = [n for n in self.nodes if n.kind == "Endpoint"] + http_methods = {n.extra.get("http_method") for n in endpoints} + assert "GET" in http_methods + assert "POST" in http_methods + + def test_handles_edge_links_handler_to_endpoint(self): + """A HANDLES edge must connect the Java method to the Endpoint node.""" + handles = [e for e in self.edges if e.kind == "HANDLES"] + assert len(handles) >= 2 + targets = {e.target for e in handles} + assert any("/orders" in t for t in targets) + + +class TestYamlConfigParsing: + """Tests for YAML config file parsing into ConfigProperty nodes.""" + + def setup_method(self): + self.parser = CodeParser() + self.nodes, self.edges = self.parser.parse_file(FIXTURES / "application.yml") + + def test_yaml_nodes_are_config_property_kind(self): + assert all(n.kind == "ConfigProperty" for n in self.nodes) + + def test_nested_keys_are_flattened(self): + names = {n.name for n in self.nodes} + assert "spring.datasource.url" in names + assert "spring.kafka.bootstrapServers" in names + + def test_deep_nested_key(self): + names = {n.name for n in self.nodes} + assert "app.payment.gatewayUrl" in names + + def test_config_value_stored_in_extra(self): + url_node = next( + (n for n in self.nodes if n.name == "spring.datasource.url"), None + ) + assert url_node is not None + assert "jdbc:postgresql" in url_node.extra.get("config_value", "") + + def test_no_edges_emitted(self): + assert self.edges == [] + + +class TestPropertiesConfigParsing: + """Tests for .properties file parsing into ConfigProperty nodes.""" + + def setup_method(self): + self.parser = CodeParser() + self.nodes, self.edges = self.parser.parse_file(FIXTURES / "app.properties") + + def test_properties_nodes_are_config_property_kind(self): + assert all(n.kind == "ConfigProperty" for n in self.nodes) + + def test_all_keys_parsed(self): + names = {n.name for n in self.nodes} + assert "payment.gateway.url" in names + assert "payment.timeout.seconds" in names + assert "spring.datasource.url" in names + assert "app.kafka.topic" in names + + def test_config_value_stored(self): + node = next((n for n in self.nodes if n.name == "app.kafka.topic"), None) + assert node is not None + assert node.extra.get("config_value") == "order.created" + + +class TestBeanParameterInjection: + """@Bean method parameters should emit INJECTS edges so the Spring DI resolver + can map field receivers to their declared types.""" + + def setup_method(self): + self.parser = CodeParser() + self.nodes, self.edges = self.parser.parse_file(FIXTURES / "SpringDI.java") + self.injects = [e for e in self.edges if e.kind == "INJECTS"] + + def test_bean_parameter_emits_injects_edge(self): + """notificationService(@Bean param OrderRepository) must create an INJECTS edge.""" + targets = {e.target for e in self.injects} + assert "OrderRepository" in targets + + def test_bean_parameter_injection_type_is_tagged(self): + edge = next( + (e for e in self.injects if e.extra.get("injection_type") == "bean_parameter"), + None, + ) + assert edge is not None + + def test_bean_parameter_field_name_stored(self): + edge = next( + (e for e in self.injects + if e.extra.get("injection_type") == "bean_parameter" + and e.extra.get("field_name") == "orderRepository"), + None, + ) + assert edge is not None + + +class TestWebFluxFunctionalRoutingPaths: + """route().GET("/path", handler::method) should create Endpoint nodes + HANDLES edges.""" + + def setup_method(self): + self.parser = CodeParser() + self.nodes, self.edges = self.parser.parse_file(FIXTURES / "SampleJava.java") + self.endpoints = [n for n in self.nodes if n.kind == "Endpoint"] + self.handles = [e for e in self.edges if e.kind == "HANDLES"] + + def test_get_route_creates_endpoint_node(self): + paths = {n.extra.get("path") for n in self.endpoints} + assert "/items" in paths + + def test_post_route_creates_endpoint_node(self): + methods = {n.extra.get("http_method") for n in self.endpoints} + assert "POST" in methods + + def test_endpoint_name_includes_http_method_and_path(self): + names = {n.name for n in self.endpoints} + assert "GET /items" in names or "POST /items" in names + + def test_handles_edge_links_router_method_to_endpoint(self): + targets = {e.target for e in self.handles} + assert any(t.startswith("http:") for t in targets) + + +class TestConfigKeyNormalization: + """YAML kebab-case keys and @Value camelCase refs should resolve to the same name.""" + + def setup_method(self): + self.parser = CodeParser() + self.yaml_nodes, _ = self.parser.parse_file(FIXTURES / "application.yml") + _, self.java_edges = self.parser.parse_file(FIXTURES / "SpringDI.java") + self.config_edges = [ + e for e in self.java_edges if e.kind == "DEPENDS_ON_CONFIG" + ] + + def test_yaml_kebab_key_is_normalized_to_camel(self): + """app.payment.gateway-url in YAML should become app.payment.gatewayUrl in graph.""" + names = {n.name for n in self.yaml_nodes} + assert "app.payment.gatewayUrl" in names + + def test_value_annotation_key_normalized(self): + """@Value("${payment.gateway.url}") target should be config:payment.gateway.url.""" + targets = {e.target for e in self.config_edges} + assert any("payment.gateway" in t for t in targets) + + def test_configuration_properties_prefix_normalized(self): + """@ConfigurationProperties(prefix="app.kafka") target should use normalized prefix.""" + targets = {e.target for e in self.config_edges} + assert any("app.kafka" in t for t in targets) + + +class TestEndpointBFSTraversal: + """traverse_graph from an Endpoint node should follow HANDLES edges to handler methods.""" + + def _build(self, tmp_path): + from code_review_graph.graph import GraphStore + from code_review_graph.incremental import full_build, get_db_path + import shutil + + (tmp_path / ".git").mkdir() + shutil.copy(FIXTURES / "SampleJava.java", tmp_path / "SampleJava.java") + db_path = get_db_path(tmp_path) + db_path.parent.mkdir(parents=True, exist_ok=True) + store = GraphStore(db_path) + full_build(tmp_path, store) + return store + + def test_endpoint_node_exists_in_graph(self, tmp_path): + store = self._build(tmp_path) + cur = store._conn.cursor() + rows = cur.execute( + "SELECT name, qualified_name FROM nodes WHERE kind='Endpoint'" + ).fetchall() + names = {r[0] for r in rows} + assert "GET /orders" in names + + def test_handles_edge_uses_http_prefix(self, tmp_path): + store = self._build(tmp_path) + cur = store._conn.cursor() + rows = cur.execute( + "SELECT target_qualified FROM edges WHERE kind='HANDLES'" + ).fetchall() + targets = {r[0] for r in rows} + assert "http:GET:/orders" in targets + + def test_get_edges_by_endpoint_key_finds_handler(self, tmp_path): + store = self._build(tmp_path) + edges = store.get_edges_by_endpoint_key("http:GET:/orders") + assert len(edges) > 0 + assert all(e.kind == "HANDLES" for e in edges) + + def test_traverse_from_endpoint_reaches_handler_method(self, tmp_path): + from code_review_graph.tools.query import traverse_graph_func + self._build(tmp_path) + result = traverse_graph_func( + query="GET /orders", + depth=3, + repo_root=str(tmp_path), + ) + names = {n["name"] for n in result.get("traversal", [])} + kinds = {n["kind"] for n in result.get("traversal", [])} + assert "Endpoint" in kinds + assert any("listOrders" in (n or "") for n in names) + + +class TestConsumersOfPattern: + """query_graph consumers_of and traverse_graph must resolve config→Java edges.""" + + def _build(self, tmp_path): + from code_review_graph.graph import GraphStore + from code_review_graph.incremental import full_build, get_db_path + import shutil + + (tmp_path / ".git").mkdir() + shutil.copy(FIXTURES / "SpringDI.java", tmp_path / "SpringDI.java") + shutil.copy(FIXTURES / "application.yml", tmp_path / "application.yml") + shutil.copy(FIXTURES / "app.properties", tmp_path / "app.properties") + db_path = get_db_path(tmp_path) + db_path.parent.mkdir(parents=True, exist_ok=True) + store = GraphStore(db_path) + full_build(tmp_path, store) + return store + + def test_get_edges_by_config_key_returns_depends_on_config(self, tmp_path): + store = self._build(tmp_path) + edges = store.get_edges_by_config_key("payment.gateway.url") + assert len(edges) > 0 + assert all(e.kind == "DEPENDS_ON_CONFIG" for e in edges) + + def test_consumers_of_finds_java_class_reading_config(self, tmp_path): + from code_review_graph.tools.query import query_graph + store = self._build(tmp_path) + result = query_graph( + pattern="consumers_of", + target="payment.gateway.url", + repo_root=str(tmp_path), + ) + assert result.get("status") == "ok" + names = {r.get("name") for r in result.get("results", [])} + assert any("PaymentService" in (n or "") for n in names) + + def test_traverse_graph_from_config_node_reaches_consumers(self, tmp_path): + from code_review_graph.tools.query import traverse_graph_func + self._build(tmp_path) + result = traverse_graph_func( + query="payment.gateway.url", + depth=3, + repo_root=str(tmp_path), + ) + kinds = {n["kind"] for n in result.get("traversal", [])} + names = {n["name"] for n in result.get("traversal", [])} + # BFS from the ConfigProperty should reach the Java class that reads it + assert "ConfigProperty" in kinds + assert any("PaymentService" in (n or "") for n in names)