diff --git a/apps/api/src/alicebot_api/vnext_store.py b/apps/api/src/alicebot_api/vnext_store.py index 2a6ce0b..694d112 100644 --- a/apps/api/src/alicebot_api/vnext_store.py +++ b/apps/api/src/alicebot_api/vnext_store.py @@ -1,5 +1,6 @@ from __future__ import annotations +import re from typing import Any, cast import psycopg @@ -13,6 +14,30 @@ JsonList = list[object] VNextRow = dict[str, object] +_SEARCH_STOPWORDS = {"about", "what", "when", "where", "which", "with", "from", "this", "that", "should", "could"} + + +def _search_patterns(query: str) -> list[str]: + normalized = " ".join(str(query).split()).strip() + if len(normalized) >= 2 and ( + (normalized[0] == normalized[-1] and normalized[0] in {"'", '"'}) + or (normalized[0], normalized[-1]) in {("\u201c", "\u201d"), ("\u2018", "\u2019")} + ): + normalized = normalized[1:-1].strip() + + patterns: list[str] = [] + if normalized: + patterns.append(f"%{normalized}%") + seen = {pattern.casefold() for pattern in patterns} + for term in re.findall(r"[A-Za-z0-9][A-Za-z0-9_-]{1,}", normalized): + folded = term.casefold() + if folded in _SEARCH_STOPWORDS: + continue + pattern = f"%{folded}%" + if pattern.casefold() not in seen: + patterns.append(pattern) + seen.add(pattern.casefold()) + return patterns or ["%%"] EVENT_LOG_COLUMNS = """ @@ -1176,7 +1201,8 @@ def search_memories( sensitivity_allowed: list[str] | None = None, limit: int = 8, ) -> list[VNextRow]: - pattern = f"%{query}%" + patterns = _search_patterns(query) + exact_pattern = patterns[0] return self._fetch_all( f""" SELECT {MEMORY_COLUMNS} @@ -1186,17 +1212,19 @@ def search_memories( AND (%s::text[] IS NULL OR domain = ANY(%s::text[]) OR domain = 'unknown') AND (%s::text[] IS NULL OR sensitivity = ANY(%s::text[])) AND ( - memory_key ILIKE %s - OR title ILIKE %s - OR canonical_text ILIKE %s - OR summary ILIKE %s - OR value::text ILIKE %s + memory_key ILIKE ANY(%s::text[]) + OR title ILIKE ANY(%s::text[]) + OR canonical_text ILIKE ANY(%s::text[]) + OR summary ILIKE ANY(%s::text[]) + OR value::text ILIKE ANY(%s::text[]) ) ORDER BY CASE WHEN canonical_text ILIKE %s THEN 0 WHEN title ILIKE %s THEN 1 - ELSE 2 + WHEN canonical_text ILIKE ANY(%s::text[]) THEN 2 + WHEN title ILIKE ANY(%s::text[]) THEN 3 + ELSE 4 END, updated_at DESC, created_at DESC, @@ -1208,13 +1236,15 @@ def search_memories( domains, sensitivity_allowed, sensitivity_allowed, - pattern, - pattern, - pattern, - pattern, - pattern, - pattern, - pattern, + patterns, + patterns, + patterns, + patterns, + patterns, + exact_pattern, + exact_pattern, + patterns, + patterns, limit, ), ) @@ -1458,7 +1488,8 @@ def search_sources( sensitivity_allowed: list[str] | None = None, limit: int = 8, ) -> list[VNextRow]: - pattern = f"%{query}%" + patterns = _search_patterns(query) + exact_pattern = patterns[0] return self._fetch_all( f""" SELECT {SOURCE_COLUMNS} @@ -1467,14 +1498,21 @@ def search_sources( AND (%s::text[] IS NULL OR domain = ANY(%s::text[]) OR domain = 'unknown') AND (%s::text[] IS NULL OR sensitivity = ANY(%s::text[])) AND ( - title ILIKE %s - OR author ILIKE %s - OR uri ILIKE %s - OR raw_path ILIKE %s - OR content_hash ILIKE %s - OR metadata_json::text ILIKE %s + title ILIKE ANY(%s::text[]) + OR author ILIKE ANY(%s::text[]) + OR uri ILIKE ANY(%s::text[]) + OR raw_path ILIKE ANY(%s::text[]) + OR content_hash ILIKE ANY(%s::text[]) + OR metadata_json::text ILIKE ANY(%s::text[]) ) - ORDER BY captured_at DESC, id DESC + ORDER BY + CASE + WHEN title ILIKE %s THEN 0 + WHEN title ILIKE ANY(%s::text[]) THEN 1 + ELSE 2 + END, + captured_at DESC, + id DESC LIMIT %s """, ( @@ -1482,12 +1520,14 @@ def search_sources( domains, sensitivity_allowed, sensitivity_allowed, - pattern, - pattern, - pattern, - pattern, - pattern, - pattern, + patterns, + patterns, + patterns, + patterns, + patterns, + patterns, + exact_pattern, + patterns, limit, ), ) diff --git a/tests/integration/test_vnext_agentic_memory_commit_api.py b/tests/integration/test_vnext_agentic_memory_commit_api.py index a04c5ad..d14a4b4 100644 --- a/tests/integration/test_vnext_agentic_memory_commit_api.py +++ b/tests/integration/test_vnext_agentic_memory_commit_api.py @@ -182,3 +182,50 @@ def test_agentic_memory_commit_confirmation_review_and_rejection_api(migrated_da assert rejected_status == 200 assert rejected_payload["status"] == "rejected" assert "read_only_agent_cannot_write" in rejected_payload["reasons"] + + +def test_unknown_domain_agentic_memory_selected_by_keyword_context_pack(migrated_database_urls, monkeypatch) -> None: + user_id = seed_user(migrated_database_urls["app"], email="agentic-memory-unknown-domain@example.com") + monkeypatch.setattr(main_module, "get_settings", lambda: Settings(database_url=migrated_database_urls["app"])) + user_id_text = str(user_id) + + commit_status, commit_payload = invoke_request( + "POST", + "/v0/vnext/memories/commit", + payload={ + "user_id": user_id_text, + "agent": _agent(), + "intent": "explicit_remember", + "title": "Agent-first vNext preference", + "canonical_text": ( + "Alice should be agent-first, with /vnext as an audit and correction cockpit " + "rather than a required manual review dashboard." + ), + "memory_type": "semantic", + "domain": "unknown", + "sensitivity": "unknown", + "confidence": 0.95, + "source_type": "direct_user_instruction", + "idempotency_key": "agentic-memory-unknown-domain-keyword", + }, + ) + assert commit_status == 201 + memory_id = commit_payload["memory"]["id"] + + context_status, context_payload = invoke_request( + "POST", + "/v0/vnext/context-packs", + payload={ + "user_id": user_id_text, + "query": "agent-first /vnext audit correction cockpit", + "scope": {"domains": ["professional", "project", "personal"]}, + "options": { + "max_items": 20, + "sensitivity_allowed": ["public", "internal", "private", "unknown"], + }, + }, + ) + + assert context_status == 201 + assert any(item["id"] == memory_id for item in context_payload["relevant_memories"]) + assert "no_relevant_memories_selected" not in context_payload["warnings"] diff --git a/tests/unit/test_vnext_store.py b/tests/unit/test_vnext_store.py index 9e320df..54b9c88 100644 --- a/tests/unit/test_vnext_store.py +++ b/tests/unit/test_vnext_store.py @@ -7,7 +7,7 @@ from psycopg.types.json import Jsonb from alicebot_api.vnext_event_log import build_event_log_record -from alicebot_api.vnext_store import PostgresVNextStore +from alicebot_api.vnext_store import PostgresVNextStore, _search_patterns class RecordingCursor: @@ -138,6 +138,17 @@ def test_get_source_by_content_hash_uses_dedupe_lookup() -> None: assert params == ("sha256:abc",) +def test_search_patterns_strip_quotes_and_add_keyword_fallbacks() -> None: + patterns = _search_patterns('"agent-first /vnext audit correction cockpit"') + + assert patterns[0] == "%agent-first /vnext audit correction cockpit%" + assert "%agent-first%" in patterns + assert "%vnext%" in patterns + assert "%audit%" in patterns + assert "%correction%" in patterns + assert "%cockpit%" in patterns + + def test_keyword_search_methods_apply_domain_sensitivity_and_limit_filters() -> None: cursor = RecordingCursor( fetchone_results=[], @@ -176,10 +187,14 @@ def test_keyword_search_methods_apply_domain_sensitivity_and_limit_filters() -> assert "status IN ('active', 'accepted')" in memory_query assert "domain = ANY" in memory_query assert "sensitivity = ANY" in memory_query + assert "ILIKE ANY" in memory_query assert memory_params is not None + assert memory_params[4] == ["%Alice provenance%", "%alice%", "%provenance%"] assert memory_params[-1] == 4 assert "FROM sources" in source_query + assert "ILIKE ANY" in source_query assert source_params is not None + assert source_params[4] == ["%Alice provenance%", "%alice%", "%provenance%"] assert source_params[-1] == 3 assert "FROM open_loops" in open_loop_query assert "%s::text IS NULL OR status = %s" in open_loop_query