Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 68 additions & 28 deletions apps/api/src/alicebot_api/vnext_store.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import re
from typing import Any, cast

import psycopg
Expand All @@ -13,6 +14,30 @@

JsonList = list[object]
VNextRow = dict[str, object]
_SEARCH_STOPWORDS = {"about", "what", "when", "where", "which", "with", "from", "this", "that", "should", "could"}


def _search_patterns(query: str) -> list[str]:
normalized = " ".join(str(query).split()).strip()
if len(normalized) >= 2 and (
(normalized[0] == normalized[-1] and normalized[0] in {"'", '"'})
or (normalized[0], normalized[-1]) in {("\u201c", "\u201d"), ("\u2018", "\u2019")}
):
normalized = normalized[1:-1].strip()

patterns: list[str] = []
if normalized:
patterns.append(f"%{normalized}%")
seen = {pattern.casefold() for pattern in patterns}
for term in re.findall(r"[A-Za-z0-9][A-Za-z0-9_-]{1,}", normalized):
folded = term.casefold()
if folded in _SEARCH_STOPWORDS:
continue
pattern = f"%{folded}%"
if pattern.casefold() not in seen:
patterns.append(pattern)
seen.add(pattern.casefold())
return patterns or ["%%"]


EVENT_LOG_COLUMNS = """
Expand Down Expand Up @@ -1176,7 +1201,8 @@ def search_memories(
sensitivity_allowed: list[str] | None = None,
limit: int = 8,
) -> list[VNextRow]:
pattern = f"%{query}%"
patterns = _search_patterns(query)
exact_pattern = patterns[0]
return self._fetch_all(
f"""
SELECT {MEMORY_COLUMNS}
Expand All @@ -1186,17 +1212,19 @@ def search_memories(
AND (%s::text[] IS NULL OR domain = ANY(%s::text[]) OR domain = 'unknown')
AND (%s::text[] IS NULL OR sensitivity = ANY(%s::text[]))
AND (
memory_key ILIKE %s
OR title ILIKE %s
OR canonical_text ILIKE %s
OR summary ILIKE %s
OR value::text ILIKE %s
memory_key ILIKE ANY(%s::text[])
OR title ILIKE ANY(%s::text[])
OR canonical_text ILIKE ANY(%s::text[])
OR summary ILIKE ANY(%s::text[])
OR value::text ILIKE ANY(%s::text[])
)
ORDER BY
CASE
WHEN canonical_text ILIKE %s THEN 0
WHEN title ILIKE %s THEN 1
ELSE 2
WHEN canonical_text ILIKE ANY(%s::text[]) THEN 2
WHEN title ILIKE ANY(%s::text[]) THEN 3
ELSE 4
END,
updated_at DESC,
created_at DESC,
Expand All @@ -1208,13 +1236,15 @@ def search_memories(
domains,
sensitivity_allowed,
sensitivity_allowed,
pattern,
pattern,
pattern,
pattern,
pattern,
pattern,
pattern,
patterns,
patterns,
patterns,
patterns,
patterns,
exact_pattern,
exact_pattern,
patterns,
patterns,
limit,
),
)
Expand Down Expand Up @@ -1458,7 +1488,8 @@ def search_sources(
sensitivity_allowed: list[str] | None = None,
limit: int = 8,
) -> list[VNextRow]:
pattern = f"%{query}%"
patterns = _search_patterns(query)
exact_pattern = patterns[0]
return self._fetch_all(
f"""
SELECT {SOURCE_COLUMNS}
Expand All @@ -1467,27 +1498,36 @@ def search_sources(
AND (%s::text[] IS NULL OR domain = ANY(%s::text[]) OR domain = 'unknown')
AND (%s::text[] IS NULL OR sensitivity = ANY(%s::text[]))
AND (
title ILIKE %s
OR author ILIKE %s
OR uri ILIKE %s
OR raw_path ILIKE %s
OR content_hash ILIKE %s
OR metadata_json::text ILIKE %s
title ILIKE ANY(%s::text[])
OR author ILIKE ANY(%s::text[])
OR uri ILIKE ANY(%s::text[])
OR raw_path ILIKE ANY(%s::text[])
OR content_hash ILIKE ANY(%s::text[])
OR metadata_json::text ILIKE ANY(%s::text[])
)
ORDER BY captured_at DESC, id DESC
ORDER BY
CASE
WHEN title ILIKE %s THEN 0
WHEN title ILIKE ANY(%s::text[]) THEN 1
ELSE 2
END,
captured_at DESC,
id DESC
LIMIT %s
""",
(
domains,
domains,
sensitivity_allowed,
sensitivity_allowed,
pattern,
pattern,
pattern,
pattern,
pattern,
pattern,
patterns,
patterns,
patterns,
patterns,
patterns,
patterns,
exact_pattern,
patterns,
limit,
),
)
Expand Down
47 changes: 47 additions & 0 deletions tests/integration/test_vnext_agentic_memory_commit_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,3 +182,50 @@ def test_agentic_memory_commit_confirmation_review_and_rejection_api(migrated_da
assert rejected_status == 200
assert rejected_payload["status"] == "rejected"
assert "read_only_agent_cannot_write" in rejected_payload["reasons"]


def test_unknown_domain_agentic_memory_selected_by_keyword_context_pack(migrated_database_urls, monkeypatch) -> None:
user_id = seed_user(migrated_database_urls["app"], email="agentic-memory-unknown-domain@example.com")
monkeypatch.setattr(main_module, "get_settings", lambda: Settings(database_url=migrated_database_urls["app"]))
user_id_text = str(user_id)

commit_status, commit_payload = invoke_request(
"POST",
"/v0/vnext/memories/commit",
payload={
"user_id": user_id_text,
"agent": _agent(),
"intent": "explicit_remember",
"title": "Agent-first vNext preference",
"canonical_text": (
"Alice should be agent-first, with /vnext as an audit and correction cockpit "
"rather than a required manual review dashboard."
),
"memory_type": "semantic",
"domain": "unknown",
"sensitivity": "unknown",
"confidence": 0.95,
"source_type": "direct_user_instruction",
"idempotency_key": "agentic-memory-unknown-domain-keyword",
},
)
assert commit_status == 201
memory_id = commit_payload["memory"]["id"]

context_status, context_payload = invoke_request(
"POST",
"/v0/vnext/context-packs",
payload={
"user_id": user_id_text,
"query": "agent-first /vnext audit correction cockpit",
"scope": {"domains": ["professional", "project", "personal"]},
"options": {
"max_items": 20,
"sensitivity_allowed": ["public", "internal", "private", "unknown"],
},
},
)

assert context_status == 201
assert any(item["id"] == memory_id for item in context_payload["relevant_memories"])
assert "no_relevant_memories_selected" not in context_payload["warnings"]
17 changes: 16 additions & 1 deletion tests/unit/test_vnext_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from psycopg.types.json import Jsonb

from alicebot_api.vnext_event_log import build_event_log_record
from alicebot_api.vnext_store import PostgresVNextStore
from alicebot_api.vnext_store import PostgresVNextStore, _search_patterns


class RecordingCursor:
Expand Down Expand Up @@ -138,6 +138,17 @@ def test_get_source_by_content_hash_uses_dedupe_lookup() -> None:
assert params == ("sha256:abc",)


def test_search_patterns_strip_quotes_and_add_keyword_fallbacks() -> None:
patterns = _search_patterns('"agent-first /vnext audit correction cockpit"')

assert patterns[0] == "%agent-first /vnext audit correction cockpit%"
assert "%agent-first%" in patterns
assert "%vnext%" in patterns
assert "%audit%" in patterns
assert "%correction%" in patterns
assert "%cockpit%" in patterns


def test_keyword_search_methods_apply_domain_sensitivity_and_limit_filters() -> None:
cursor = RecordingCursor(
fetchone_results=[],
Expand Down Expand Up @@ -176,10 +187,14 @@ def test_keyword_search_methods_apply_domain_sensitivity_and_limit_filters() ->
assert "status IN ('active', 'accepted')" in memory_query
assert "domain = ANY" in memory_query
assert "sensitivity = ANY" in memory_query
assert "ILIKE ANY" in memory_query
assert memory_params is not None
assert memory_params[4] == ["%Alice provenance%", "%alice%", "%provenance%"]
assert memory_params[-1] == 4
assert "FROM sources" in source_query
assert "ILIKE ANY" in source_query
assert source_params is not None
assert source_params[4] == ["%Alice provenance%", "%alice%", "%provenance%"]
assert source_params[-1] == 3
assert "FROM open_loops" in open_loop_query
assert "%s::text IS NULL OR status = %s" in open_loop_query
Expand Down