From efd718822b0585c7816bcb2b1e5eb6d4dfb808c2 Mon Sep 17 00:00:00 2001 From: 0xVox Date: Sun, 7 Jun 2026 21:38:12 -0700 Subject: [PATCH 1/4] feat(cascade): add StrEnum validation for md_change_state fields Add ChangeKind, ChangeType, and ChangeStatus StrEnums to enforce valid values on the cascade work-queue table's kind, change_type, and status fields. Previously these accepted arbitrary strings despite documented valid values, causing confusing 'no handler registered' errors on typos. Follows the existing RunStatus(StrEnum) pattern in infra/ome/records.py. Refs: #193 --- .../sqlite/repos/md_change_state.py | 58 ++++++++------- .../sqlite/tables/md_change_state.py | 68 +++++++++++++----- src/everos/memory/cascade/reconciler.py | 26 ++++--- src/everos/memory/cascade/scanner.py | 12 ++-- src/everos/memory/cascade/types.py | 16 ++++- src/everos/memory/cascade/worker.py | 3 +- .../test_utils/test_datetime.py | 20 +++--- .../test_repos/test_md_change_state.py | 72 +++++++++++++++++++ 8 files changed, 207 insertions(+), 68 deletions(-) diff --git a/src/everos/infra/persistence/sqlite/repos/md_change_state.py b/src/everos/infra/persistence/sqlite/repos/md_change_state.py index b0082927b..fc6f9edc1 100644 --- a/src/everos/infra/persistence/sqlite/repos/md_change_state.py +++ b/src/everos/infra/persistence/sqlite/repos/md_change_state.py @@ -38,6 +38,7 @@ from ..sqlite_manager import get_session_factory from ..tables import MdChangeState +from ..tables.md_change_state import ChangeKind, ChangeStatus, ChangeType @dataclasses.dataclass(frozen=True) @@ -82,8 +83,8 @@ async def upsert( self, md_path: str, *, - kind: str, - change_type: str, + kind: ChangeKind | str, + change_type: ChangeType | str, mtime: float, ) -> int: """Enqueue or re-enqueue ``md_path``; return the assigned LSN. @@ -117,7 +118,7 @@ async def upsert( first_seen_at=now, last_changed_at=now, lsn=new_lsn, - status="pending", + status=ChangeStatus.PENDING, retryable=None, last_attempt_at=None, retry_count=0, @@ -131,7 +132,7 @@ async def upsert( "mtime": mtime, "last_changed_at": now, "lsn": new_lsn, - "status": "pending", + "status": ChangeStatus.PENDING, "retryable": None, "last_attempt_at": None, "retry_count": 0, @@ -143,7 +144,7 @@ async def upsert( await s.commit() return new_lsn - async def force_enqueue(self, md_path: str, kind: str) -> int: + async def force_enqueue(self, md_path: str, kind: ChangeKind | str) -> int: """`cascade sync --path` entry: re-enqueue regardless of status. Semantically the same as :meth:`upsert` with ``change_type @@ -153,7 +154,7 @@ async def force_enqueue(self, md_path: str, kind: str) -> int: return await self.upsert( md_path, kind=kind, - change_type="modified", + change_type=ChangeType.MODIFIED, mtime=0.0, ) @@ -172,8 +173,8 @@ async def claim_one(self, md_path: str) -> MdChangeState | None: result = await s.execute( update(MdChangeState) .where(MdChangeState.md_path == md_path) - .where(MdChangeState.status == "pending") - .values(status="processing", last_attempt_at=now) + .where(MdChangeState.status == ChangeStatus.PENDING) + .values(status=ChangeStatus.PROCESSING, last_attempt_at=now) ) await s.commit() if result.rowcount != 1: @@ -197,7 +198,7 @@ async def claim_pending_batch(self, limit: int = 100) -> list[MdChangeState]: ( await s.execute( select(MdChangeState.md_path) - .where(MdChangeState.status == "pending") + .where(MdChangeState.status == ChangeStatus.PENDING) .order_by(MdChangeState.lsn) .limit(limit) ) @@ -210,8 +211,8 @@ async def claim_pending_batch(self, limit: int = 100) -> list[MdChangeState]: update_result = await s.execute( update(MdChangeState) .where(MdChangeState.md_path.in_(picks)) - .where(MdChangeState.status == "pending") - .values(status="processing", last_attempt_at=now) + .where(MdChangeState.status == ChangeStatus.PENDING) + .values(status=ChangeStatus.PROCESSING, last_attempt_at=now) ) await s.commit() if update_result.rowcount == 0: @@ -221,7 +222,7 @@ async def claim_pending_batch(self, limit: int = 100) -> list[MdChangeState]: await s.execute( select(MdChangeState) .where(MdChangeState.md_path.in_(picks)) - .where(MdChangeState.status == "processing") + .where(MdChangeState.status == ChangeStatus.PROCESSING) .order_by(MdChangeState.lsn) ) ) @@ -248,9 +249,9 @@ async def mark_done(self, md_path: str) -> None: await s.execute( update(MdChangeState) .where(MdChangeState.md_path == md_path) - .where(MdChangeState.status == "processing") + .where(MdChangeState.status == ChangeStatus.PROCESSING) .values( - status="done", + status=ChangeStatus.DONE, last_attempt_at=now, error=None, retryable=None, @@ -294,9 +295,9 @@ async def mark_failed( await s.execute( update(MdChangeState) .where(MdChangeState.md_path == md_path) - .where(MdChangeState.status == "processing") + .where(MdChangeState.status == ChangeStatus.PROCESSING) .values( - status="failed", + status=ChangeStatus.FAILED, retryable=retryable, last_attempt_at=now, error=error, @@ -319,8 +320,8 @@ async def recover_orphan_processing(self) -> int: async with session_scope(self._factory) as s: result = await s.execute( update(MdChangeState) - .where(MdChangeState.status == "processing") - .values(status="pending", last_attempt_at=None) + .where(MdChangeState.status == ChangeStatus.PROCESSING) + .values(status=ChangeStatus.PENDING, last_attempt_at=None) ) await s.commit() return int(result.rowcount or 0) @@ -338,7 +339,7 @@ async def list_failed(self) -> list[MdChangeState]: ( await s.execute( select(MdChangeState) - .where(MdChangeState.status == "failed") + .where(MdChangeState.status == ChangeStatus.FAILED) .order_by(MdChangeState.lsn) ) ) @@ -361,10 +362,10 @@ async def reset_retryable_to_pending(self) -> int: async with session_scope(self._factory) as s: result = await s.execute( update(MdChangeState) - .where(MdChangeState.status == "failed") + .where(MdChangeState.status == ChangeStatus.FAILED) .where(MdChangeState.retryable.is_(True)) .values( - status="pending", + status=ChangeStatus.PENDING, retryable=None, retry_count=0, error=None, @@ -378,17 +379,20 @@ async def queue_summary(self) -> QueueSummary: """Aggregate the table for the ``cascade status`` CLI.""" async with session_scope(self._factory) as s: pending = await _count_where( - s, MdChangeState.status.in_(["pending", "processing"]) + s, + MdChangeState.status.in_( + [ChangeStatus.PENDING, ChangeStatus.PROCESSING] + ), ) - done = await _count_where(s, MdChangeState.status == "done") + done = await _count_where(s, MdChangeState.status == ChangeStatus.DONE) failed_retryable = await _count_where( s, - (MdChangeState.status == "failed") + (MdChangeState.status == ChangeStatus.FAILED) & (MdChangeState.retryable.is_(True)), ) failed_permanent = await _count_where( s, - (MdChangeState.status == "failed") + (MdChangeState.status == ChangeStatus.FAILED) & (MdChangeState.retryable.is_(False)), ) max_lsn_stmt = select(func.coalesce(func.max(MdChangeState.lsn), 0)) @@ -397,7 +401,9 @@ async def queue_summary(self) -> QueueSummary: ( await s.execute( select(func.coalesce(func.max(MdChangeState.lsn), 0)).where( - MdChangeState.status.in_(["done", "failed"]) + MdChangeState.status.in_( + [ChangeStatus.DONE, ChangeStatus.FAILED] + ) ) ) ).scalar_one() diff --git a/src/everos/infra/persistence/sqlite/tables/md_change_state.py b/src/everos/infra/persistence/sqlite/tables/md_change_state.py index 07ac5d91e..12b7fbb82 100644 --- a/src/everos/infra/persistence/sqlite/tables/md_change_state.py +++ b/src/everos/infra/persistence/sqlite/tables/md_change_state.py @@ -13,6 +13,8 @@ from __future__ import annotations +from enum import StrEnum + from sqlalchemy import Index, text from everos.component.utils.datetime import UtcDatetime, get_utc_now @@ -20,6 +22,47 @@ from everos.core.persistence.sqlite.base import UtcDateTimeColumn +class ChangeKind(StrEnum): + """Registered cascade handler kinds. + + Each value corresponds to a :class:`Handler` subclass's ``kind`` + class attribute in :mod:`everos.memory.cascade.handlers`. + """ + + EPISODE = "episode" + ATOMIC_FACT = "atomic_fact" + FORESIGHT = "foresight" + AGENT_CASE = "agent_case" + AGENT_SKILL = "agent_skill" + USER_PROFILE = "user_profile" + + +class ChangeType(StrEnum): + """Lifecycle hint for a single md path's work-queue row. + + The handler re-derives truth from the actual file state at run + time (DD-3 in 12 doc); this field is a dispatch hint only. + """ + + ADDED = "added" + MODIFIED = "modified" + DELETED = "deleted" + + +class ChangeStatus(StrEnum): + """Work-queue row lifecycle. + + ``PROCESSING`` is an internal claim state used by + :meth:`MdChangeStateRepo.claim_one`; CLI output rolls it back + into ``PENDING`` for display (16 doc §4.2 — DD-12). + """ + + PENDING = "pending" + PROCESSING = "processing" + DONE = "done" + FAILED = "failed" + + class MdChangeState(BaseTable, table=True): """One row per markdown path; UPSERT-driven work queue for cascade. @@ -58,13 +101,12 @@ class MdChangeState(BaseTable, table=True): """Path relative to the memory-root (e.g. ``users/u_jason/ episodes/episode-2026-05-12.md``). Every reverse-link anchors here.""" - kind: str = Field(nullable=False, index=True) - """Kind registry name (e.g. ``"episode"``); worker dispatches the - matching handler.""" + kind: ChangeKind = Field(nullable=False, index=True) + """Kind registry name; worker dispatches the matching handler.""" - change_type: str = Field(nullable=False) - """``"added"`` | ``"modified"`` | ``"deleted"``. A hint for the - worker — handler re-derives truth from the actual file state.""" + change_type: ChangeType = Field(nullable=False) + """A hint for the worker — handler re-derives truth from the + actual file state.""" mtime: float = Field(default=0.0, nullable=False) """File mtime captured when the row was last UPSERTed. Scanner @@ -85,16 +127,10 @@ class MdChangeState(BaseTable, table=True): processes pending rows in ascending lsn order; the gap between ``MAX(lsn)`` and the last processed lsn is the queue lag.""" - status: str = Field(default="pending", nullable=False, index=True) - """Lifecycle: - - - ``"pending"`` — waiting for the worker. - - ``"processing"`` — claimed by a worker (internal; CLI rolls into - pending for display). - - ``"done"`` — handler completed successfully. - - ``"failed"`` — handler exhausted retries or hit an - unrecoverable error (see :attr:`retryable`). - """ + status: ChangeStatus = Field( + default=ChangeStatus.PENDING, nullable=False, index=True + ) + """Lifecycle: ``PENDING`` → ``PROCESSING`` → ``DONE`` | ``FAILED``.""" retryable: bool | None = Field(default=None) """Meaningful only when ``status='failed'``. diff --git a/src/everos/memory/cascade/reconciler.py b/src/everos/memory/cascade/reconciler.py index 263f30975..06125afff 100644 --- a/src/everos/memory/cascade/reconciler.py +++ b/src/everos/memory/cascade/reconciler.py @@ -29,7 +29,13 @@ import dataclasses from collections.abc import Iterable, Mapping -from .types import ReconcileDecision, ScanInput +from .types import ( + ChangeKind, + ChangeStatus, + ChangeType, + ReconcileDecision, + ScanInput, +) @dataclasses.dataclass(frozen=True) @@ -42,10 +48,10 @@ class PriorState: """ md_path: str - kind: str + kind: ChangeKind mtime: float - status: str # "pending" | "processing" | "done" | "failed" - change_type: str # "added" | "modified" | "deleted" + status: ChangeStatus + change_type: ChangeType def reconcile( @@ -76,19 +82,19 @@ def reconcile( ReconcileDecision( md_path=item.md_path, kind=item.kind, - change_type="added", + change_type=ChangeType.ADDED, mtime=item.mtime, ) ) continue # Skip when the row is already done and mtime hasn't moved. - if prior.status == "done" and prior.mtime == item.mtime: + if prior.status == ChangeStatus.DONE and prior.mtime == item.mtime: continue decisions.append( ReconcileDecision( md_path=item.md_path, kind=item.kind, - change_type="modified", + change_type=ChangeType.MODIFIED, mtime=item.mtime, ) ) @@ -102,13 +108,15 @@ def reconcile( # 'modified' means the watcher missed the subsequent unlink — # without re-emitting 'deleted' here the scanner would never # recover the stale LanceDB rows. - if prior.status == "done" and prior.change_type == "deleted": + done = prior.status == ChangeStatus.DONE + deleted = prior.change_type == ChangeType.DELETED + if done and deleted: continue decisions.append( ReconcileDecision( md_path=path, kind=prior.kind, - change_type="deleted", + change_type=ChangeType.DELETED, mtime=prior.mtime, ) ) diff --git a/src/everos/memory/cascade/scanner.py b/src/everos/memory/cascade/scanner.py index 13c34e5fc..2ccb63cfa 100644 --- a/src/everos/memory/cascade/scanner.py +++ b/src/everos/memory/cascade/scanner.py @@ -36,7 +36,7 @@ from .reconciler import PriorState, reconcile from .registry import KIND_REGISTRY -from .types import ReconcileDecision, ScanInput +from .types import ChangeType, ReconcileDecision, ScanInput logger = get_logger(__name__) @@ -95,9 +95,13 @@ async def scan_once(self) -> list[ReconcileDecision]: logger.info( "cascade_scanner_decisions", count=len(decisions), - added=sum(1 for d in decisions if d.change_type == "added"), - modified=sum(1 for d in decisions if d.change_type == "modified"), - deleted=sum(1 for d in decisions if d.change_type == "deleted"), + added=sum(1 for d in decisions if d.change_type == ChangeType.ADDED), + modified=sum( + 1 for d in decisions if d.change_type == ChangeType.MODIFIED + ), + deleted=sum( + 1 for d in decisions if d.change_type == ChangeType.DELETED + ), ) return decisions diff --git a/src/everos/memory/cascade/types.py b/src/everos/memory/cascade/types.py index bced4a33b..8c697290c 100644 --- a/src/everos/memory/cascade/types.py +++ b/src/everos/memory/cascade/types.py @@ -8,9 +8,21 @@ from __future__ import annotations import dataclasses -from typing import Literal -ChangeType = Literal["added", "modified", "deleted"] +from everos.infra.persistence.sqlite.tables.md_change_state import ( + ChangeKind, + ChangeStatus, + ChangeType, +) + +__all__ = [ + "ChangeKind", + "ChangeStatus", + "ChangeType", + "HandlerOutcome", + "ReconcileDecision", + "ScanInput", +] @dataclasses.dataclass(frozen=True) diff --git a/src/everos/memory/cascade/worker.py b/src/everos/memory/cascade/worker.py index 9e2cc5b9e..10c764406 100644 --- a/src/everos/memory/cascade/worker.py +++ b/src/everos/memory/cascade/worker.py @@ -49,6 +49,7 @@ from .errors import RecoverableError from .handlers import Handler +from .types import ChangeType logger = get_logger(__name__) @@ -307,7 +308,7 @@ async def _process_one(self, row: MdChangeState) -> str | None: last_error: str = "" for attempt in range(self._max_retry + 1): try: - if row.change_type == "deleted": + if row.change_type == ChangeType.DELETED: outcome = await handler.handle_deleted(row.md_path) else: outcome = await handler.handle_added_or_modified(row.md_path) diff --git a/tests/unit/test_component/test_utils/test_datetime.py b/tests/unit/test_component/test_utils/test_datetime.py index 7123a3e3e..a5f01ba15 100644 --- a/tests/unit/test_component/test_utils/test_datetime.py +++ b/tests/unit/test_component/test_utils/test_datetime.py @@ -968,7 +968,7 @@ async def _run() -> None: s.add( MdChangeState( md_path="p_orm", - kind="ep", + kind="episode", change_type="added", mtime=0.0, lsn=1, @@ -1007,7 +1007,7 @@ async def _run() -> None: await s.execute( insert(MdChangeState).values( md_path="p_core_ins", - kind="ep", + kind="episode", change_type="added", mtime=0.0, lsn=2, @@ -1049,7 +1049,7 @@ async def _run() -> None: s.add( MdChangeState( md_path="p_upd", - kind="ep", + kind="episode", change_type="added", mtime=0.0, lsn=3, @@ -1092,7 +1092,7 @@ async def _run() -> None: s.add( MdChangeState( md_path="p_utc", - kind="ep", + kind="episode", change_type="added", mtime=0.0, lsn=4, @@ -1144,7 +1144,7 @@ async def _run() -> None: s.add( MdChangeState( md_path="p_naive", - kind="ep", + kind="episode", change_type="added", mtime=0.0, lsn=5, @@ -1184,7 +1184,7 @@ async def _run() -> None: s.add( MdChangeState( md_path="p_us", - kind="ep", + kind="episode", change_type="added", mtime=0.0, lsn=6, @@ -1227,7 +1227,7 @@ async def _run() -> None: [ MdChangeState( md_path="p_1970", - kind="ep", + kind="episode", change_type="added", mtime=0.0, lsn=7, @@ -1235,7 +1235,7 @@ async def _run() -> None: ), MdChangeState( md_path="p_2099", - kind="ep", + kind="episode", change_type="added", mtime=0.0, lsn=8, @@ -1292,7 +1292,7 @@ async def _run() -> None: [ MdChangeState( md_path="p_pre", - kind="ep", + kind="episode", change_type="added", mtime=0.0, lsn=9, @@ -1300,7 +1300,7 @@ async def _run() -> None: ), MdChangeState( md_path="p_post", - kind="ep", + kind="episode", change_type="added", mtime=0.0, lsn=10, diff --git a/tests/unit/test_infra/test_sqlite/test_repos/test_md_change_state.py b/tests/unit/test_infra/test_sqlite/test_repos/test_md_change_state.py index 6219ab1d7..884c4e2eb 100644 --- a/tests/unit/test_infra/test_sqlite/test_repos/test_md_change_state.py +++ b/tests/unit/test_infra/test_sqlite/test_repos/test_md_change_state.py @@ -490,6 +490,78 @@ async def test_recover_orphan_processing_only_touches_processing_rows( # ── Partial indexes (smoke) ───────────────────────────────────────────── +# ── StrEnum validation ─────────────────────────────────────────────────── + + +async def test_upsert_rejects_invalid_kind(repo: _MdChangeStateRepo) -> None: + """An invalid kind value must be rejected by Pydantic validation.""" + from everos.infra.persistence.sqlite.tables.md_change_state import ( + ChangeKind, + ) + + # Verify the enum accepts valid values. + assert ChangeKind("episode") == ChangeKind.EPISODE + + # Verify the enum rejects invalid values. + with pytest.raises(ValueError, match="'epsiode'"): + ChangeKind("epsiode") + + +async def test_upsert_rejects_invalid_change_type( + repo: _MdChangeStateRepo, +) -> None: + from everos.infra.persistence.sqlite.tables.md_change_state import ( + ChangeType, + ) + + assert ChangeType("added") == ChangeType.ADDED + with pytest.raises(ValueError, match="'addded'"): + ChangeType("addded") + + +async def test_upsert_rejects_invalid_status( + repo: _MdChangeStateRepo, +) -> None: + from everos.infra.persistence.sqlite.tables.md_change_state import ( + ChangeStatus, + ) + + assert ChangeStatus("pending") == ChangeStatus.PENDING + with pytest.raises(ValueError, match="'pendig'"): + ChangeStatus("pendig") + + +async def test_enum_values_match_documented_strings( + repo: _MdChangeStateRepo, +) -> None: + """Enum values must match the strings documented in the docstrings.""" + from everos.infra.persistence.sqlite.tables.md_change_state import ( + ChangeKind, + ChangeStatus, + ChangeType, + ) + + assert set(ChangeKind) == { + ChangeKind.EPISODE, + ChangeKind.ATOMIC_FACT, + ChangeKind.FORESIGHT, + ChangeKind.AGENT_CASE, + ChangeKind.AGENT_SKILL, + ChangeKind.USER_PROFILE, + } + assert set(ChangeType) == { + ChangeType.ADDED, + ChangeType.MODIFIED, + ChangeType.DELETED, + } + assert set(ChangeStatus) == { + ChangeStatus.PENDING, + ChangeStatus.PROCESSING, + ChangeStatus.DONE, + ChangeStatus.FAILED, + } + + async def test_partial_indexes_are_created(repo: _MdChangeStateRepo) -> None: """The three partial / mtime indexes from the schema land in sqlite_master.""" async with repo.session_factory() as s: From 45dcccdf1f31c3b28254b44b8b76390e8de16276 Mon Sep 17 00:00:00 2001 From: 0xVox Date: Sun, 7 Jun 2026 21:38:18 -0700 Subject: [PATCH 2/4] docs: add chat-agent integration guide with search tool schema (#193) Add docs/chat-agent-integration.md covering on-demand search vs per-turn RAG, write/read paths, time-range filtering, and an OpenAI-compatible memory_search tool schema. Add cross-reference from api.md /search section and entry in docs/index.md. Closes #193 (documentation part). --- docs/api.md | 5 + docs/chat-agent-integration.md | 182 +++++++++++++++++++++++++++++++++ docs/index.md | 1 + 3 files changed, 188 insertions(+) create mode 100644 docs/chat-agent-integration.md diff --git a/docs/api.md b/docs/api.md index d0f05c7bf..834b43cf5 100644 --- a/docs/api.md +++ b/docs/api.md @@ -609,6 +609,11 @@ Hybrid retrieval over the memory store. Combines BM25, dense vector ANN, optional scalar filtering, optional cross-encoder rerank, and optional final LLM rerank. Returns ranked items grouped by kind. +> **Integrating a Chat Agent?** See the +> [Chat Agent Integration Guide](chat-agent-integration.md) for +> recommended patterns (on-demand search vs per-turn RAG) and an +> official tool schema. + #### Request body | Field | Type | Required | Default | Constraints | diff --git a/docs/chat-agent-integration.md b/docs/chat-agent-integration.md new file mode 100644 index 000000000..4ff1c1a6e --- /dev/null +++ b/docs/chat-agent-integration.md @@ -0,0 +1,182 @@ +# Chat Agent Integration Guide + +Recommended patterns for integrating a Chat Agent (LLM-based assistant) +with EverOS persistent memory. Covers the write path (ingesting +conversations), the read path (recalling memories on demand), and an +official tool schema for function-calling agents. + +## Architecture: On-Demand Search vs Per-Turn RAG + +| Pattern | How it works | Trade-off | +|---|---|---| +| **Per-turn RAG** | Every turn, retrieve memories and inject into the LLM context window before generating a response. | Simple but pollutes context with irrelevant memories; burns tokens on every turn. | +| **On-demand search** | The agent decides *when* to recall by calling a memory search tool. | Token-efficient; closer to how human memory works (you don't recall everything every sentence). | + +**Recommendation: on-demand search.** Keep short-term context (the last +*n* turns) in the LLM `messages` array as working memory. Long-term +memory is retrieved only when the agent determines it needs historical +context. + +## Write Path + +Ingest every conversation turn automatically. Do not wait for the agent +to decide what to remember. + +``` +POST /api/v1/memory/add +{ + "session_id": "chat-abc123", + "messages": [ + { + "sender_id": "user_42", + "role": "user", + "content": "I prefer dark mode for all my apps", + "timestamp": 1740564000000 + } + ] +} +``` + +When you need to trigger memory extraction immediately (e.g. end of +conversation), call flush: + +``` +POST /api/v1/memory/flush +{ + "session_id": "chat-abc123" +} +``` + +Extraction also fires automatically when the buffer reaches a size +threshold. Calling flush is optional but useful when you want memories +available for search right away. + +See [POST /api/v1/memory/add](api.md#post-apiv1memoryadd) for the +full request schema. + +## Read Path + +When the agent needs to recall past context, have it call the search +tool: + +``` +POST /api/v1/memory/search +{ + "user_id": "user_42", + "query": "dark mode preferences", + "filters": { + "timestamp": { + "gte": 1740480000000, + "lt": 1740566400000 + } + } +} +``` + +See [POST /api/v1/memory/search](api.md#post-apiv1memorysearch) for +the full request schema. + +### Time-Range Filtering + +For natural-language time references ("what we discussed yesterday about +X"), resolve the spoken time window to concrete `timestamp` bounds in +the `filters` field: + +- Use Unix epoch milliseconds **or** ISO-8601 strings. +- `gte` / `lt` operators bracket the window. +- Timestamps reflect **when the conversation happened**, not when the + memory was extracted. If your extraction pipeline is async (flush- + based), propagate the original conversation timestamp. + +```json +{ + "filters": { + "AND": [ + {"timestamp": {"gte": 1740480000000, "lt": 1740566400000}}, + {"session_id": {"eq": "chat-abc123"}} + ] + } +} +``` + +### Retrieval Methods + +| Method | When to use | +|---|---| +| `hybrid` (default) | General-purpose — combines BM25 + vector search. Best starting point. | +| `keyword` | When the query is exact-match friendly (e.g. function names, error codes). | +| `vector` | When semantic similarity matters more than keyword overlap. | +| `agentic` | When you want the system to run multi-step retrieval with LLM sufficiency checks. | + +## Official Tool Schema + +The following OpenAI-compatible tool definition exposes memory search as +a function the agent can call. Fields align with the `/search` endpoint +documented in [api.md](api.md#post-apiv1memorysearch). + +```json +{ + "type": "function", + "function": { + "name": "memory_search", + "description": "Search the user's long-term memory for relevant past conversations, facts, and context. Use when the user references previous sessions, asks about past decisions, or when historical context would improve your response.", + "parameters": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "Search query — what to look for in past memories. Be specific." + }, + "top_k": { + "type": "integer", + "default": 10, + "description": "Maximum number of results to return. -1 for server default." + }, + "filters": { + "type": "object", + "description": "Optional filters for time range, session, or other dimensions.", + "properties": { + "timestamp": { + "type": "object", + "description": "Time range filter. Use gte/lt with Unix epoch ms or ISO-8601 strings.", + "properties": { + "gte": {"type": ["integer", "string"], "description": "Start of time range (inclusive)"}, + "lt": {"type": ["integer", "string"], "description": "End of time range (exclusive)"} + } + }, + "session_id": { + "type": "object", + "properties": { + "eq": {"type": "string", "description": "Filter to a specific session"} + } + } + } + } + }, + "required": ["query"] + } + } +} +``` + +### MCP Tool Reference + +For Claude Code and other MCP-compatible agents, a reference +implementation is available at +[`use-cases/claude-code-plugin/skills/memory-tools.md`](../use-cases/claude-code-plugin/skills/memory-tools.md). +That document describes the `evermem_search` tool and when to use it. + +## Key Integration Points + +1. **Write automatic, read agent-initiated.** Every turn goes through + `/add`; the agent calls `/search` only when it needs context. + +2. **Session scoping.** Use `session_id` to group turns from one + conversation. The `/search` endpoint can filter by session. + +3. **Owner scoping.** Pass `user_id` for user-facing agents or + `agent_id` for autonomous agents. Results never cross owner + boundaries. + +4. **App / project scoping.** Use `app_id` and `project_id` to + isolate memories across different products or environments. diff --git a/docs/index.md b/docs/index.md index f334ef653..c577a9f06 100644 --- a/docs/index.md +++ b/docs/index.md @@ -37,6 +37,7 @@ specific thing (drain a queue, recover from a stuck row, etc.). | Doc | Purpose | |---|---| | [cascade_runbook.md](cascade_runbook.md) | Cascade subsystem ops — drain queue, recover stuck rows | +| [chat-agent-integration.md](chat-agent-integration.md) | Chat Agent + EverOS integration guide — on-demand search, tool schema | ## Engineering / Internal From b8c31a57bf1007bfc51a3ccac7dd0f8d454bf98d Mon Sep 17 00:00:00 2001 From: 0xVox Date: Sun, 7 Jun 2026 21:53:59 -0700 Subject: [PATCH 3/4] fix(cascade): move enums to everos.core.enums to satisfy import-linter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The layered-architecture contract forbids everos.memory from importing everos.infra. Move ChangeKind/ChangeType/ChangeStatus to everos.core.enums — a shared layer accessible from both infra and memory. This also fixes the integration test_lap_append_during_handler_no_loss failure caused by the enum definitions living in a module that the memory layer could not legally reach. --- src/everos/core/enums.py | 51 +++++++++++++++++++ .../sqlite/tables/md_change_state.py | 44 +--------------- src/everos/memory/cascade/types.py | 6 +-- 3 files changed, 53 insertions(+), 48 deletions(-) create mode 100644 src/everos/core/enums.py diff --git a/src/everos/core/enums.py b/src/everos/core/enums.py new file mode 100644 index 000000000..e01b3a319 --- /dev/null +++ b/src/everos/core/enums.py @@ -0,0 +1,51 @@ +"""Shared StrEnum types used across EverOS layers. + +These enums live at the ``everos.core`` level so both ``infra`` and +``memory`` can import them without violating the layered-architecture +contract enforced by import-linter. +""" + +from __future__ import annotations + +from enum import StrEnum + + +class ChangeKind(StrEnum): + """Registered cascade handler kinds. + + Each value corresponds to a :class:`Handler` subclass's ``kind`` + class attribute in :mod:`everos.memory.cascade.handlers`. + """ + + EPISODE = "episode" + ATOMIC_FACT = "atomic_fact" + FORESIGHT = "foresight" + AGENT_CASE = "agent_case" + AGENT_SKILL = "agent_skill" + USER_PROFILE = "user_profile" + + +class ChangeType(StrEnum): + """Lifecycle hint for a single md path's work-queue row. + + The handler re-derives truth from the actual file state at run + time (DD-3 in 12 doc); this field is a dispatch hint only. + """ + + ADDED = "added" + MODIFIED = "modified" + DELETED = "deleted" + + +class ChangeStatus(StrEnum): + """Work-queue row lifecycle. + + ``PROCESSING`` is an internal claim state used by + :meth:`MdChangeStateRepo.claim_one`; CLI output rolls it back + into ``PENDING`` for display (16 doc §4.2 — DD-12). + """ + + PENDING = "pending" + PROCESSING = "processing" + DONE = "done" + FAILED = "failed" diff --git a/src/everos/infra/persistence/sqlite/tables/md_change_state.py b/src/everos/infra/persistence/sqlite/tables/md_change_state.py index 12b7fbb82..2080cf177 100644 --- a/src/everos/infra/persistence/sqlite/tables/md_change_state.py +++ b/src/everos/infra/persistence/sqlite/tables/md_change_state.py @@ -13,56 +13,14 @@ from __future__ import annotations -from enum import StrEnum - from sqlalchemy import Index, text from everos.component.utils.datetime import UtcDatetime, get_utc_now +from everos.core.enums import ChangeKind, ChangeStatus, ChangeType from everos.core.persistence.sqlite import BaseTable, Field from everos.core.persistence.sqlite.base import UtcDateTimeColumn -class ChangeKind(StrEnum): - """Registered cascade handler kinds. - - Each value corresponds to a :class:`Handler` subclass's ``kind`` - class attribute in :mod:`everos.memory.cascade.handlers`. - """ - - EPISODE = "episode" - ATOMIC_FACT = "atomic_fact" - FORESIGHT = "foresight" - AGENT_CASE = "agent_case" - AGENT_SKILL = "agent_skill" - USER_PROFILE = "user_profile" - - -class ChangeType(StrEnum): - """Lifecycle hint for a single md path's work-queue row. - - The handler re-derives truth from the actual file state at run - time (DD-3 in 12 doc); this field is a dispatch hint only. - """ - - ADDED = "added" - MODIFIED = "modified" - DELETED = "deleted" - - -class ChangeStatus(StrEnum): - """Work-queue row lifecycle. - - ``PROCESSING`` is an internal claim state used by - :meth:`MdChangeStateRepo.claim_one`; CLI output rolls it back - into ``PENDING`` for display (16 doc §4.2 — DD-12). - """ - - PENDING = "pending" - PROCESSING = "processing" - DONE = "done" - FAILED = "failed" - - class MdChangeState(BaseTable, table=True): """One row per markdown path; UPSERT-driven work queue for cascade. diff --git a/src/everos/memory/cascade/types.py b/src/everos/memory/cascade/types.py index 8c697290c..9a1035926 100644 --- a/src/everos/memory/cascade/types.py +++ b/src/everos/memory/cascade/types.py @@ -9,11 +9,7 @@ import dataclasses -from everos.infra.persistence.sqlite.tables.md_change_state import ( - ChangeKind, - ChangeStatus, - ChangeType, -) +from everos.core.enums import ChangeKind, ChangeStatus, ChangeType __all__ = [ "ChangeKind", From c721d07f3070a94204768610cbec472a9f4c001e Mon Sep 17 00:00:00 2001 From: 0xVox Date: Sun, 7 Jun 2026 22:17:43 -0700 Subject: [PATCH 4/4] =?UTF-8?q?fix:=20address=20codex=20GPT-5.5=20brake=20?= =?UTF-8?q?findings=20(BLOCK=E2=86=92fixes)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P1-1: SQLAlchemy Enum stores enum *names* (EPISODE) not values (episode). Add values_callable=lambda e: [x.value for x in e] to all three StrEnum fields so SQLite stores lowercase values matching existing data and partial indexes. P1-2: Tool schema missed user_id/agent_id (required by SearchRequest) and had wrong top_k default (10 vs -1). Fixed schema and added XOR note. P2: Added test_db_stores_lowercase_enum_values — raw SQL round-trip verifying stored values are lowercase, not enum names. --- docs/chat-agent-integration.md | 17 ++++++++- .../sqlite/tables/md_change_state.py | 17 +++++++-- .../test_repos/test_md_change_state.py | 37 +++++++++++++++++++ 3 files changed, 66 insertions(+), 5 deletions(-) diff --git a/docs/chat-agent-integration.md b/docs/chat-agent-integration.md index 4ff1c1a6e..8a8578be7 100644 --- a/docs/chat-agent-integration.md +++ b/docs/chat-agent-integration.md @@ -127,10 +127,18 @@ documented in [api.md](api.md#post-apiv1memorysearch). "type": "string", "description": "Search query — what to look for in past memories. Be specific." }, + "user_id": { + "type": "string", + "description": "Memory owner (user). Set exactly one of user_id or agent_id." + }, + "agent_id": { + "type": "string", + "description": "Memory owner (agent). Set exactly one of user_id or agent_id." + }, "top_k": { "type": "integer", - "default": 10, - "description": "Maximum number of results to return. -1 for server default." + "default": -1, + "description": "Maximum results. -1 uses server default." }, "filters": { "type": "object", @@ -159,6 +167,11 @@ documented in [api.md](api.md#post-apiv1memorysearch). } ``` +> **Note:** The `/search` endpoint requires exactly one of `user_id` or +> `agent_id`. Both are omitted from `required` above because JSON Schema +> does not support XOR constraints. The server returns 422 if neither or +> both are set. + ### MCP Tool Reference For Claude Code and other MCP-compatible agents, a reference diff --git a/src/everos/infra/persistence/sqlite/tables/md_change_state.py b/src/everos/infra/persistence/sqlite/tables/md_change_state.py index 2080cf177..4ae5c1ae7 100644 --- a/src/everos/infra/persistence/sqlite/tables/md_change_state.py +++ b/src/everos/infra/persistence/sqlite/tables/md_change_state.py @@ -13,6 +13,7 @@ from __future__ import annotations +from sqlalchemy import Enum as SAEnum from sqlalchemy import Index, text from everos.component.utils.datetime import UtcDatetime, get_utc_now @@ -59,10 +60,17 @@ class MdChangeState(BaseTable, table=True): """Path relative to the memory-root (e.g. ``users/u_jason/ episodes/episode-2026-05-12.md``). Every reverse-link anchors here.""" - kind: ChangeKind = Field(nullable=False, index=True) + kind: ChangeKind = Field( + nullable=False, + index=True, + sa_type=SAEnum(ChangeKind, values_callable=lambda e: [x.value for x in e]), + ) """Kind registry name; worker dispatches the matching handler.""" - change_type: ChangeType = Field(nullable=False) + change_type: ChangeType = Field( + nullable=False, + sa_type=SAEnum(ChangeType, values_callable=lambda e: [x.value for x in e]), + ) """A hint for the worker — handler re-derives truth from the actual file state.""" @@ -86,7 +94,10 @@ class MdChangeState(BaseTable, table=True): ``MAX(lsn)`` and the last processed lsn is the queue lag.""" status: ChangeStatus = Field( - default=ChangeStatus.PENDING, nullable=False, index=True + default=ChangeStatus.PENDING, + nullable=False, + index=True, + sa_type=SAEnum(ChangeStatus, values_callable=lambda e: [x.value for x in e]), ) """Lifecycle: ``PENDING`` → ``PROCESSING`` → ``DONE`` | ``FAILED``.""" diff --git a/tests/unit/test_infra/test_sqlite/test_repos/test_md_change_state.py b/tests/unit/test_infra/test_sqlite/test_repos/test_md_change_state.py index 884c4e2eb..c56f79f8a 100644 --- a/tests/unit/test_infra/test_sqlite/test_repos/test_md_change_state.py +++ b/tests/unit/test_infra/test_sqlite/test_repos/test_md_change_state.py @@ -578,3 +578,40 @@ async def test_partial_indexes_are_created(repo: _MdChangeStateRepo) -> None: "idx_md_change_kind", ): assert expected in names, f"missing index {expected!r}; got {names!r}" + + +async def test_db_stores_lowercase_enum_values( + repo: _MdChangeStateRepo, +) -> None: + """Raw SQLite values must be lowercase (matching pre-StrEnum rows). + + SQLAlchemy's ``Enum`` stores the *name* by default (e.g. ``EPISODE``). + Our ``values_callable`` forces it to store the *value* (``episode``) + so partial indexes and existing data keep working. + """ + from sqlalchemy import text + + await repo.upsert( + "users/u/episodes/episode-2026-05-12.md", + kind="episode", + change_type="added", + mtime=1.0, + ) + + async with repo.session_factory() as s: + result = await s.execute( + text( + "SELECT kind, change_type, status " + "FROM md_change_state " + "WHERE md_path = 'users/u/episodes/episode-2026-05-12.md'" + ) + ) + row = result.one() + + assert row.kind == "episode", f"kind stored as {row.kind!r}, expected 'episode'" + assert row.change_type == "added", ( + f"change_type stored as {row.change_type!r}, expected 'added'" + ) + assert row.status == "pending", ( + f"status stored as {row.status!r}, expected 'pending'" + )