ai_memory/storage/
mod.rs

1// Copyright 2026 AlphaOne LLC
2// SPDX-License-Identifier: Apache-2.0
3
4// #873 — `recall_hybrid_with_telemetry` exceeds the per-function 250-
5// line budget; tracked for split as #871 (stage-helpers: param-prep /
6// fts-branch / semantic-branch / blend+rerank / touch+telemetry). The
7// allowance is module-scope so future too-big helpers in the same
8// file are caught by the lint at PR-time instead of silently growing.
9#![allow(clippy::too_many_lines)]
10
11use crate::models::field_names;
12use anyhow::{Context, Result};
13use chrono::{DateTime, Utc};
14use rusqlite::{Connection, params};
15use std::collections::HashMap;
16use std::path::Path;
17
18// ── #1558 batch 6 — file-local SQL SSOT (pm-v3.1 hardcoded-literal gate) ──
19const SQL_DELETE_MEMORY_BY_ID: &str = "DELETE FROM memories WHERE id = ?1";
20const SQL_DELETE_NAMESPACE_META_BY_STANDARD_ID: &str =
21    "DELETE FROM namespace_meta WHERE standard_id = ?1";
22const SQL_MEMORY_EXISTS_COUNT: &str = "SELECT COUNT(*) > 0 FROM memories WHERE id = ?1";
23const SQL_MEMORY_EXISTS: &str = "SELECT EXISTS(SELECT 1 FROM memories WHERE id = ?1)";
24const SQL_SELECT_MEMORY_ROW_BY_ID: &str = "SELECT * FROM memories WHERE id = ?1";
25// ── #1579 A2 — sargable `list` SQL fragments ──────────────────────────────
26// The always-present expiry guard opens the WHERE clause; every other
27// filter is appended by `build_list_query` ONLY when the caller supplied
28// it, so the planner sees bare `col = ?` / `col >= ?` predicates it can
29// drive through `idx_memories_list_order` / `idx_memories_ns_list_order`
30// instead of the formerly non-sargable `(?N IS NULL OR col = ?N)` arms.
31const SQL_LIST_BASE: &str = "SELECT * FROM memories WHERE (expires_at IS NULL OR expires_at > ?)";
32const SQL_LIST_ORDER_LIMIT: &str = " ORDER BY priority DESC, updated_at DESC LIMIT ? OFFSET ?";
33
34/// v0.7.0 H6 (round-2) — truncate a `DateTime<Utc>` to microsecond
35/// precision. Companion of the same-named helper in
36/// `store/postgres.rs:3539` (G3 fix); both ends of the link sign/verify
37/// roundtrip now collapse sub-microsecond digits BEFORE CBOR
38/// canonicalisation. PostgreSQL's `TIMESTAMPTZ` stores microseconds —
39/// the SQLite path was lossless, but a link created on SQLite and
40/// later re-verified on Postgres (or vice versa via federation) would
41/// see the canonical RFC3339 string change shape on the storage hop
42/// and break the Ed25519 signature. Truncating at write time makes the
43/// shape stable across adapters. See `store/postgres.rs:3520-3543` for
44/// the full design context.
45#[must_use]
46pub fn truncate_to_microseconds(t: DateTime<Utc>) -> DateTime<Utc> {
47    use chrono::Timelike;
48    let micros = t.nanosecond() / 1_000;
49    t.with_nanosecond(micros * 1_000).unwrap_or(t)
50}
51
52use crate::models::{
53    AGENTS_NAMESPACE, AgentRegistration, Approval, ApproverType, ConfidenceSource, DuplicateCheck,
54    DuplicateMatch, GovernanceDecision, GovernanceLevel, GovernancePolicy, GovernedAction,
55    MAX_NAMESPACE_DEPTH, Memory, MemoryKind, MemoryLink, NamespaceCount, PROMOTION_THRESHOLD,
56    PendingAction, SourceSpan, Stats, Taxonomy, TaxonomyNode, Tier, TierCount, namespace_ancestors,
57};
58
59// #962 — typed substrate-layer error envelope. Substrate code emits
60// `anyhow::Error::new(StorageError::…)` instead of the legacy
61// `anyhow::bail!("…")`; handlers downcast via
62// `MemoryError::from(anyhow::Error)` to map each variant to its
63// canonical HTTP status. The error-prefix constants live alongside the
64// typed enum so the Display impl and the prefix tokens stay in lockstep.
65mod error;
66pub use error::{LINK_CYCLE_ERR_PREFIX, LINK_PERMISSION_DENIED_ERR_PREFIX, LinkEnd, StorageError};
67
68// ---------------------------------------------------------------------------
69// v0.7.0 L1-6 Deliverable E — governance pre-write hook (issue #691)
70// ---------------------------------------------------------------------------
71//
72// Substrate-internal: layering-preserving insertion point for the
73// agent-action rules engine. The hook is a process-wide `OnceLock`
74// holding an optional closure of the shape
75//
76//     Fn(&Memory) -> Result<(), String> + Send + Sync
77//
78// installed exactly once at daemon `serve` boot (BEFORE binding the
79// listener) and consulted by every substrate write path
80// (`storage::insert`, `storage::insert_with_conflict`,
81// `storage::insert_if_newer`) immediately BEFORE the SQL `INSERT`.
82//
83// Why a `OnceLock` and not a thread-local or `RwLock<Option<_>>`:
84//
85//   1. Operator standing directive: "rules and standards can NEVER be
86//      bypassed by AI/AI Agents — 100% of the time". A `OnceLock`
87//      enforces installation-is-one-shot at the type level — no
88//      reset, no override, no test-only escape hatch reachable from
89//      production code paths.
90//   2. The hook closure is read on every write; an `RwLock` would add
91//      contention on the hot path. `OnceLock::get()` is lock-free.
92//   3. CLI one-shot mode (`ai-memory store …`, `ai-memory mine …`,
93//      etc.) MUST NOT install the hook — the operator's direct
94//      substrate ops stay unimpeded by design. `OnceLock` defaults to
95//      empty, so the CLI path is the no-op default; only the daemon's
96//      `serve` boot reaches the `.set` callsite.
97//
98// Refusal contract: when the hook fires it returns `Err(reason)`.
99// The caller wraps `reason` in a typed [`GovernanceRefusal`] (which
100// implements [`std::error::Error`]) and propagates via `anyhow::Error`.
101// The handler layer's `MemoryError::from(anyhow::Error)` impl
102// downcasts and promotes it to [`crate::errors::MemoryError::RefusedByGovernance`]
103// — see `src/errors.rs` for the 403 / `GOVERNANCE_REFUSED` mapping.
104
105/// Optional governance pre-write hook. When `Some`, every substrate
106/// `INSERT` path consults the closure BEFORE the SQL write; an
107/// `Err(reason)` short-circuits the write with no row touched.
108///
109/// Installation is one-shot (`OnceLock::set`); the daemon `serve`
110/// bootstrap is the only caller in production. CLI one-shot binaries
111/// must leave this empty.
112///
113/// See module-level comment for the full layering rationale.
114pub static GOVERNANCE_PRE_WRITE: std::sync::OnceLock<
115    Box<dyn Fn(&Memory) -> std::result::Result<(), String> + Send + Sync>,
116> = std::sync::OnceLock::new();
117
118/// Typed substrate-layer marker error for the pre-write hook refusal
119/// path. Wrapped in `anyhow::Error` so the existing
120/// `anyhow::Result<String>` return shape of `storage::insert*` stays
121/// unchanged — the handler layer downcasts via
122/// `MemoryError::from(anyhow::Error)` (see `src/errors.rs`) to map
123/// the refusal to HTTP `403 FORBIDDEN` + code `GOVERNANCE_REFUSED`.
124///
125/// Carries the operator-authored `reason` verbatim. The MCP layer
126/// surfaces the same string (audit log + tool error data field).
127#[derive(Debug, Clone)]
128pub struct GovernanceRefusal {
129    pub reason: String,
130}
131
132impl std::fmt::Display for GovernanceRefusal {
133    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
134        write!(f, "governance-refused: {}", self.reason)
135    }
136}
137
138impl std::error::Error for GovernanceRefusal {}
139
140/// Internal helper consulted by every substrate write path BEFORE
141/// the SQL write. When the [`GOVERNANCE_PRE_WRITE`] hook is unset
142/// (CLI mode or pre-hook-install daemon path), this is a zero-cost
143/// no-op `Ok(())`. When the hook is set, the closure runs and an
144/// `Err(reason)` wraps into a [`GovernanceRefusal`] propagated up the
145/// `anyhow` chain.
146///
147/// Visibility: `pub(crate)` so the `PostgresStore` SAL adapter
148/// (`src/store/postgres.rs`) can consult the same hook on its write
149/// paths — fixing ARCH-1 (substrate governance pre-write parity
150/// between the SQLite and Postgres backends). The hook itself is
151/// process-wide and installed once by the daemon `serve` bootstrap;
152/// every substrate write path on EVERY backend MUST consult it before
153/// touching SQL.
154///
155/// The function is hot-path; avoid heap allocation on the Allow leg.
156#[inline]
157pub(crate) fn consult_governance_pre_write(mem: &Memory) -> Result<()> {
158    if let Some(hook) = GOVERNANCE_PRE_WRITE.get() {
159        if let Err(reason) = hook(mem) {
160            return Err(anyhow::Error::new(GovernanceRefusal { reason }));
161        }
162    }
163    Ok(())
164}
165
166/// Computed 4-tuple of visibility prefixes for an agent position (Task 1.5).
167/// Index 0 = agent's own namespace (private), 1 = parent (team),
168/// 2 = grandparent (unit), 3 = great-grandparent (org). Missing = `None`.
169type VisibilityPrefixes = (
170    Option<String>,
171    Option<String>,
172    Option<String>,
173    Option<String>,
174);
175
176fn compute_visibility_prefixes(as_agent: Option<&str>) -> VisibilityPrefixes {
177    let Some(ns) = as_agent else {
178        return (None, None, None, None);
179    };
180    let ancestors = namespace_ancestors(ns);
181    let p = ancestors.first().cloned();
182    let t = ancestors.get(1).cloned();
183    let u = ancestors.get(2).cloned();
184    let o = ancestors.get(3).cloned();
185    (p, t, u, o)
186}
187
188/// Rust-side visibility check for paths that can't easily attach SQL
189/// visibility (the HNSW branch of `recall_hybrid` iterates memories loaded
190/// via `get()`). Returns `true` when `as_agent` is unset (no filter) or
191/// when the memory's scope + namespace grant visibility to the caller.
192fn is_visible(mem: &Memory, prefixes: &VisibilityPrefixes) -> bool {
193    // v0.7.0 multi-agent literal-sweep (scanner B finding F-B8.x):
194    // typed-enum exhaustive match via `MemoryScope` + `META_KEY_SCOPE`
195    // SSOT. Adding a new scope variant from here forward is a
196    // compile-time error in this match (was a silent `_ => false`
197    // fall-through pre-refactor — masked drift). Unknown-scope
198    // strings still degrade to `false` via the `from_str` → `None`
199    // arm, preserving pre-refactor semantics byte-for-byte.
200    use crate::models::namespace::MemoryScope;
201    let (p, t, u, o) = prefixes;
202    if p.is_none() {
203        return true;
204    }
205    let Some(scope) = mem
206        .metadata
207        .get(crate::META_KEY_SCOPE)
208        .and_then(|v| v.as_str())
209        .map_or(Some(MemoryScope::default()), MemoryScope::from_str)
210    else {
211        return false;
212    };
213    match scope {
214        MemoryScope::Collective => true,
215        MemoryScope::Private => p.as_ref().is_some_and(|ns| &mem.namespace == ns),
216        MemoryScope::Team => matches_subtree(&mem.namespace, t.as_deref()),
217        MemoryScope::Unit => matches_subtree(&mem.namespace, u.as_deref()),
218        MemoryScope::Org => matches_subtree(&mem.namespace, o.as_deref()),
219    }
220}
221
222fn matches_subtree(namespace: &str, prefix: Option<&str>) -> bool {
223    match prefix {
224        None => false,
225        Some(p) => namespace == p || namespace.starts_with(&format!("{p}/")),
226    }
227}
228
229/// Generate the visibility WHERE-clause fragment starting at placeholder `start`.
230/// Uses placeholders `?start .. ?start+3` for private/team/unit/org prefixes.
231/// See `compute_visibility_prefixes` for the bind order.
232///
233/// Performance (v0.6.0 GA): each scope branch compares against the indexed
234/// generated column `scope_idx` (schema v10) rather than re-evaluating
235/// `json_extract(metadata, '$.scope')` per row. The query planner picks
236/// `idx_memories_scope_idx` whenever the predicate narrows by scope,
237/// dropping recall from "scan every namespace row and parse its JSON" to
238/// an index seek + per-row refinement. See `docs/ARCHITECTURAL_LIMITS.md`
239/// for which `SQLite` limits remain structural.
240///
241/// Security (issue #217): the team/unit/org branches use `LIKE` to expand a
242/// prefix into its sub-tree. Without escaping, a caller who can influence the
243/// prefix could inject SQL `LIKE` meta-characters (`%`, `_`) and broaden the
244/// match across unrelated namespaces. We neutralise this at SQL evaluation
245/// time by `replace()`-escaping `%` and `_` in the bound prefix and pairing
246/// the LIKE with `ESCAPE '\'`. `validate_namespace` already rejects backslash,
247/// so `\` cannot appear in the bound prefix and the escape sentinel is safe.
248/// The `=` equality side is unaffected by LIKE wildcards and binds the raw
249/// value so that legitimate namespaces containing `_` (e.g. `under_score`)
250/// continue to match exactly.
251/// v0.7.0 WT-1-E — atom-preference WHERE fragment.
252///
253/// Default recall surfaces atoms (the canonical post-atomisation
254/// unit) in place of the archived source row. An archived source is
255/// one where:
256///
257///   * `atomised_into > 0` — the substrate-visible count of atoms
258///     emitted by the WT-1-B atomiser.
259///   * `metadata.atomisation_archived_at` is set — the RFC3339 stamp
260///     WT-1-B writes alongside the column flip (see
261///     `src/atomisation/mod.rs::archive_source`). The column is the
262///     fast index target; the metadata key is the substrate-visible
263///     read signal that the row is "atomised and archived" — both
264///     are checked so a hypothetical column-only or metadata-only
265///     drift gets filtered consistently.
266///
267/// Atoms themselves (rows where `atom_of IS NOT NULL`) are unaffected
268/// — they are not "archived" by this definition. The fragment
269/// excludes archived sources only.
270///
271/// When `include_archived` is true the fragment is empty (no
272/// filter), so auditors and the forensic-export path see the full
273/// chain. The atom rows are returned in both cases.
274fn archived_source_clause(include_archived: bool, table_alias: &str) -> &'static str {
275    if include_archived {
276        ""
277    } else {
278        // Two-part predicate: a row is archived-source when BOTH
279        // (a) atomised_into > 0 and
280        // (b) metadata.atomisation_archived_at IS NOT NULL.
281        // Either one alone could be a partial-state row (e.g. a
282        // crash between the column flip and the metadata write); we
283        // only filter rows that present BOTH signals so a partial-
284        // state row still surfaces under default recall.
285        // Static fragment with the alias baked in — recall and
286        // recall_hybrid pass `"m"`, search passes `"m"` too.
287        match table_alias {
288            "m" => {
289                "AND NOT (\
290                m.atomised_into IS NOT NULL AND m.atomised_into > 0 \
291                AND json_extract(m.metadata, '$.atomisation_archived_at') IS NOT NULL\
292            )"
293            }
294            "memories" => {
295                "AND NOT (\
296                memories.atomised_into IS NOT NULL AND memories.atomised_into > 0 \
297                AND json_extract(memories.metadata, '$.atomisation_archived_at') IS NOT NULL\
298            )"
299            }
300            _ => "",
301        }
302    }
303}
304
305/// v0.7.0 WT-1-E — Rust-side mirror of [`archived_source_clause`].
306///
307/// Used by the HNSW retrieval branch of `recall_hybrid_with_telemetry`
308/// where the bypass-the-SQL-WHERE walk fetches each candidate via
309/// `get()` and then applies post-load filters in Rust. The check
310/// reads `metadata.atomisation_archived_at` (the WT-1-B substrate-
311/// visible read signal) and tolerates the absence of the metadata
312/// key — only rows that DO present the key are excluded.
313///
314/// Note: the SQL fragment also requires `atomised_into > 0` to be
315/// set. The HNSW branch deliberately only checks the metadata key
316/// because the loaded `Memory` struct does not carry the
317/// `atomised_into` column. The two signals are written in the same
318/// `archive_source` transaction (see `src/atomisation/mod.rs`), so
319/// in steady-state every row presents both signals together; the
320/// pathological partial-state row that exists only momentarily
321/// during a crash window still surfaces through HNSW until the next
322/// recall — accepted as a tolerable looseness on the cold-fallback
323/// path.
324fn is_archived_source(mem: &Memory) -> bool {
325    mem.metadata
326        .get(field_names::ATOMISATION_ARCHIVED_AT)
327        .is_some_and(|v| !v.is_null())
328}
329
330fn visibility_clause(start: usize, table_alias: &str) -> String {
331    let private_ph = start;
332    let team_ph = start + 1;
333    let unit_ph = start + 2;
334    let org_ph = start + 3;
335    let ta = table_alias;
336    format!(
337        "AND (\
338            ?{private_ph} IS NULL \
339            OR {ta}.scope_idx = 'collective' \
340            OR ({ta}.scope_idx = 'private' AND {ta}.namespace = ?{private_ph}) \
341            OR ({ta}.scope_idx = 'team' AND ?{team_ph} IS NOT NULL AND ({ta}.namespace = ?{team_ph} OR {ta}.namespace LIKE replace(replace(?{team_ph}, '%', '\\%'), '_', '\\_') || '/%' ESCAPE '\\')) \
342            OR ({ta}.scope_idx = 'unit' AND ?{unit_ph} IS NOT NULL AND ({ta}.namespace = ?{unit_ph} OR {ta}.namespace LIKE replace(replace(?{unit_ph}, '%', '\\%'), '_', '\\_') || '/%' ESCAPE '\\')) \
343            OR ({ta}.scope_idx = 'org'  AND ?{org_ph}  IS NOT NULL AND ({ta}.namespace = ?{org_ph}  OR {ta}.namespace LIKE replace(replace(?{org_ph}, '%', '\\%'), '_', '\\_') || '/%' ESCAPE '\\'))\
344        )"
345    )
346}
347
348/// v0.7.0 Form 4 / Cluster-A PERF-3 — escape SQL `LIKE` metacharacters
349/// (`%`, `_`, `\`) in a user-supplied substring so the substring matches
350/// literally when paired with `LIKE ... ESCAPE '\\'`. Used by the
351/// `source_uri LIKE 'prefix%'` filter in [`recall`] and
352/// [`recall_hybrid_with_telemetry`] to push the `--source-uri-prefix`
353/// filter into SQL.
354fn escape_like_pattern(s: &str) -> String {
355    let mut out = String::with_capacity(s.len());
356    for ch in s.chars() {
357        match ch {
358            '\\' | '%' | '_' => {
359                out.push('\\');
360                out.push(ch);
361            }
362            _ => out.push(ch),
363        }
364    }
365    out
366}
367
368// v0.7.0 L0.5-3 — flat `src/db.rs` decomposed into `src/storage/`.
369// Sub-modules stay private to this module per the L0.5-1 pattern;
370// only the re-exports below form the public surface. The
371// `pub use storage as db;` shim in `src/lib.rs` preserves the
372// historical `crate::db::*` paths used elsewhere.
373pub(crate) mod connection;
374// `pub` (rather than `pub(crate)`) so the V-4 closeout
375// integration test suite (`tests/signed_events_chain_v34.rs`) can
376// invoke `migrate_v34_backfill_chain` directly to exercise the
377// idempotent-replay property without going through a full daemon
378// boot cycle.
379pub mod migration_meta;
380pub mod migrations;
381pub(crate) mod reflect;
382
383// Re-exports — every `pub` item that previously lived in `src/db.rs`
384// is re-published at `crate::storage::*` (and therefore `crate::db::*`
385// via the lib.rs shim) so callsites keep resolving without churn.
386pub use connection::open;
387// #1579 B7 — mmap_size knob. `set_db_mmap_size` is the boot-time
388// seeding hook (`daemon_runtime::run`); the DEFAULT const is the
389// compiled fallback the `AppConfig::resolve_storage()` ladder bottoms
390// out on (also consumed by the config-precedence tests).
391pub use connection::{DEFAULT_DB_MMAP_SIZE_BYTES, set_db_mmap_size};
392// v0.7.0 refactor PR-1 (#793) — schema-pins SSOT. Re-export the
393// test-facing helper so callers can use either
394// `ai_memory::storage::current_schema_version_for_tests()` or the
395// existing `ai_memory::db::current_schema_version_for_tests()` shim
396// (via `pub use storage as db;` in `src/lib.rs`).
397pub use migrations::current_schema_version_for_tests;
398// Pre-migration safety-snapshot infix accessor — lets coverage tests
399// locate / name-assert the snapshot file without restamping the literal.
400pub use migrations::pre_migration_backup_infix_for_tests;
401pub use reflect::{
402    ReflectError, ReflectHookDecision, ReflectHooks, ReflectInput, ReflectOutcome,
403    canonical_cbor_reflection_depth_exceeded, reflect, reflect_with_hooks,
404};
405// `emit_reflection_depth_exceeded_audit` is `pub(crate)` — preserve
406// the same visibility on the re-export so it remains reachable from
407// `crate::db::emit_reflection_depth_exceeded_audit` (the original
408// path) without widening the public surface. The current crate has
409// no external callers (the path is only used internally by
410// `reflect_with_hooks`); the re-export is retained for surface
411// parity with pre-L0.5-3.
412#[allow(unused_imports)]
413pub(crate) use reflect::emit_reflection_depth_exceeded_audit;
414
415pub(crate) fn row_to_memory(row: &rusqlite::Row) -> rusqlite::Result<Memory> {
416    let row_id: String = row.get("id")?;
417    let tags_json: String = row.get("tags")?;
418    let tags: Vec<String> = serde_json::from_str(&tags_json).unwrap_or_default();
419    let tier_str: String = row.get("tier")?;
420    let tier = Tier::from_str(&tier_str).unwrap_or(Tier::Mid);
421    let metadata_str: String = row
422        .get::<_, String>("metadata")
423        .unwrap_or_else(|_| "{}".to_string());
424    let metadata: serde_json::Value = serde_json::from_str(&metadata_str).unwrap_or_else(|e| {
425        tracing::warn!(
426            row_id = %row_id,
427            column = "metadata",
428            error = %e,
429            "corrupt metadata in DB row, defaulting to {{}}"
430        );
431        crate::metrics::record_corrupt_provenance("metadata");
432        serde_json::json!({})
433    });
434    // v0.7.0 Form 4 / Cluster-A COR-3 — citations JSON. Pre-fix used a
435    // bare `.ok()` chain that silently turned corrupt JSON into an empty
436    // vec with no operator signal. Now: log via `tracing::warn!` with the
437    // row id + column + parse error, bump the
438    // `corrupt_provenance_rows_total{column=...}` counter, then return
439    // the safe default.
440    let citations = match row.get::<_, String>("citations").ok() {
441        Some(s) => match serde_json::from_str::<Vec<crate::models::Citation>>(&s) {
442            Ok(v) => v,
443            Err(e) => {
444                tracing::warn!(
445                    row_id = %row_id,
446                    column = "citations",
447                    error = %e,
448                    "corrupt citations JSON in DB row, defaulting to []"
449                );
450                crate::metrics::record_corrupt_provenance("citations");
451                Vec::new()
452            }
453        },
454        None => Vec::new(),
455    };
456    let source_span: Option<SourceSpan> = row
457        .get::<_, Option<String>>(field_names::SOURCE_SPAN)
458        .unwrap_or(None)
459        .and_then(|s| match serde_json::from_str::<SourceSpan>(&s) {
460            Ok(span) => Some(span),
461            Err(e) => {
462                tracing::warn!(
463                    row_id = %row_id,
464                    column = field_names::SOURCE_SPAN,
465                    error = %e,
466                    "corrupt source_span JSON in DB row, defaulting to None"
467                );
468                crate::metrics::record_corrupt_provenance(field_names::SOURCE_SPAN);
469                None
470            }
471        });
472    let confidence_signals = row
473        .get::<_, Option<String>>(field_names::CONFIDENCE_SIGNALS)
474        .unwrap_or(None)
475        .and_then(
476            |s| match serde_json::from_str::<crate::models::ConfidenceSignals>(&s) {
477                Ok(v) => Some(v),
478                Err(e) => {
479                    tracing::warn!(
480                        row_id = %row_id,
481                        column = field_names::CONFIDENCE_SIGNALS,
482                        error = %e,
483                        "corrupt confidence_signals JSON in DB row, defaulting to None"
484                    );
485                    crate::metrics::record_corrupt_provenance(field_names::CONFIDENCE_SIGNALS);
486                    None
487                }
488            },
489        );
490    Ok(Memory {
491        id: row_id,
492        tier,
493        namespace: row.get("namespace")?,
494        title: row.get("title")?,
495        content: row.get("content")?,
496        tags,
497        priority: row.get("priority")?,
498        confidence: row.get(field_names::CONFIDENCE).unwrap_or(1.0),
499        source: row.get("source").unwrap_or_else(|_| "api".to_string()),
500        access_count: row.get(field_names::ACCESS_COUNT)?,
501        created_at: row.get(field_names::CREATED_AT)?,
502        updated_at: row.get(field_names::UPDATED_AT)?,
503        last_accessed_at: row.get(field_names::LAST_ACCESSED_AT)?,
504        expires_at: row.get(field_names::EXPIRES_AT)?,
505        metadata,
506        // v0.7.0 Task 1/8 — schema v29 column. `.unwrap_or(0)` keeps the
507        // reader tolerant of pre-v29 row reads (no panic if the migration
508        // ladder hasn't reached this DB yet) and is consistent with the
509        // SQL-side `DEFAULT 0`.
510        reflection_depth: row.get(field_names::REFLECTION_DEPTH).unwrap_or(0_i32),
511        // v0.7.0 L1-1 — schema v30 column. Falls back to `Observation` on
512        // pre-v30 rows (column absent) and on any unrecognised value from a
513        // future schema (forward-compat).
514        memory_kind: row
515            .get::<_, String>(field_names::MEMORY_KIND)
516            .ok()
517            .and_then(|s| crate::models::MemoryKind::from_str(&s))
518            .unwrap_or_default(),
519        // v0.7.0 QW-2 — Persona-as-artifact discriminator columns.
520        // Populated only for `memory_kind = 'persona'` rows. NULL on
521        // every observation/reflection row. Pre-v36 rows lack the
522        // column entirely — the `.ok()` fallthrough yields None.
523        entity_id: row.get::<_, Option<String>>("entity_id").unwrap_or(None),
524        persona_version: row
525            .get::<_, Option<i32>>(field_names::PERSONA_VERSION)
526            .unwrap_or(None),
527        // v0.7.0 Form 4 — schema v38 fact-provenance columns. `citations`
528        // / `source_span` corruption now logs WARN + bumps the
529        // `corrupt_provenance_rows_total` counter above so silent JSON
530        // drops surface in operator observability (Cluster-A COR-3 fix).
531        // `source_uri` is a plain TEXT column (NULL on legacy rows).
532        citations,
533        source_uri: row
534            .get::<_, Option<String>>(field_names::SOURCE_URI)
535            .unwrap_or(None),
536        source_span,
537        // v0.7.0 Form 5 — schema v39 columns. Legacy rows resolve
538        // to `CallerProvided` (SQL DEFAULT), NULL signals, NULL
539        // decayed_at. `.ok()` fallthrough keeps the reader tolerant
540        // of pre-v39 row reads (no panic when migrate hasn't fired
541        // yet).
542        confidence_source: row
543            .get::<_, String>(field_names::CONFIDENCE_SOURCE)
544            .ok()
545            .and_then(|s| crate::models::ConfidenceSource::from_str(&s))
546            .unwrap_or_default(),
547        confidence_signals,
548        confidence_decayed_at: row
549            .get::<_, Option<String>>(field_names::CONFIDENCE_DECAYED_AT)
550            .unwrap_or(None),
551        // v0.7.0 Provenance Gap 1 (#884) — schema v45 optimistic-
552        // concurrency column. Pre-v45 rows lack the column entirely
553        // — the `.ok()` fallthrough yields the SQL DEFAULT 1 (same
554        // value a pre-v45 row would land at the moment the ALTER
555        // fires in the migrate ladder).
556        version: row.get::<_, i64>("version").unwrap_or(1),
557    })
558}
559
560/// v0.7.0 polish PERF-8 (issue #781) — extract the canonical
561/// `mentioned_entity_id` from a memory at write time.
562///
563/// The auto-persona matcher (`hooks::post_reflect::auto_persona`) and
564/// the persona source-pool loader (`persona::load_reflections_for_entity`)
565/// previously scanned `(title|content|metadata) LIKE '%<entity>%'` to
566/// find candidate reflections — a full-table scan against three TEXT
567/// columns for every reflection in the namespace. PERF-8 denormalises
568/// the entity descriptor onto a dedicated indexed column so the matcher
569/// resolves with `WHERE mentioned_entity_id = ?` instead.
570///
571/// Resolution order mirrors the runtime extractor in
572/// `auto_persona::resolve_entity_id`:
573///
574/// 1. `metadata.entity_id` (the structured tag the curator + most
575///    operators supply when minting a reflection about a known entity).
576/// 2. `[entity:X]` marker in the title (operator-supplied fallback
577///    when no structured tag exists yet).
578///
579/// Returns `None` when neither yields a non-empty string — the row
580/// stays NULL on the column and contributes zero index pages (matches
581/// the partial index predicate `WHERE mentioned_entity_id IS NOT NULL`).
582///
583/// Restricted to `memory_kind = 'reflection'` rows: the matcher only
584/// scans reflections, so populating the column on observations would
585/// inflate the index footprint without speeding any query. (Persona
586/// rows already use the orthogonal QW-2 `entity_id` column for their
587/// own attribution.)
588pub(crate) fn extract_mentioned_entity_id(mem: &Memory) -> Option<String> {
589    if mem.memory_kind != MemoryKind::Reflection {
590        return None;
591    }
592    // Step 1: structured metadata.entity_id tag.
593    if let Some(eid) = mem
594        .metadata
595        .get(field_names::ENTITY_ID)
596        .and_then(|v| v.as_str())
597        .map(str::trim)
598        .filter(|s| !s.is_empty())
599    {
600        return Some(eid.to_string());
601    }
602    // Step 2: `[entity:X]` title marker. Mirrors the runtime extractor
603    // in `auto_persona::resolve_entity_id` so cadence accounting and
604    // matcher selection agree on the same descriptor for a given row.
605    if let Some(start) = mem.title.find("[entity:") {
606        let rest = &mem.title[start + "[entity:".len()..];
607        if let Some(end) = rest.find(']') {
608            let extracted = rest[..end].trim();
609            if !extracted.is_empty() {
610                return Some(extracted.to_string());
611            }
612        }
613    }
614    None
615}
616
617/// Insert with upsert on title+namespace. Returns the ID (existing or new).
618///
619/// Ultrareview #352: collapses the previous `INSERT`/`ON CONFLICT` +
620/// separate `SELECT` into a single `INSERT ... RETURNING id`. Another
621/// concurrent writer could otherwise slot in between the two statements
622/// and the `SELECT` would return the wrong row id. `SQLite` 3.35+
623/// supports `RETURNING`; it executes atomically within the `INSERT`.
624pub fn insert(conn: &Connection, mem: &Memory) -> Result<String> {
625    // v0.7.0 L1-6 Deliverable E — substrate governance pre-write
626    // gate. Consults the (optional) `GOVERNANCE_PRE_WRITE` hook
627    // BEFORE any SQL touches the DB; a refusal returns cleanly with
628    // no row written. See module-level comment for layering details.
629    consult_governance_pre_write(mem)?;
630
631    let tags_json = serde_json::to_string(&mem.tags)?;
632    let metadata_json = serde_json::to_string(&mem.metadata)?;
633    // v0.7.0 Form 4 — encode citations/source_span to JSON for the
634    // schema v38 TEXT columns. citations always lands as a JSON array
635    // (default `[]` when caller supplied nothing); source_span lands as
636    // `{start,end}` or NULL.
637    let citations_json = serde_json::to_string(&mem.citations)?;
638    let source_span_json = match mem.source_span {
639        Some(span) => Some(serde_json::to_string(&span)?),
640        None => None,
641    };
642    // v0.7.0 Form 5 — encode confidence-provenance fields for the
643    // schema v39 TEXT columns. The `confidence_source` column has a
644    // SQL DEFAULT of 'caller_provided' so legacy/default rows land
645    // there; `confidence_signals` is a JSON envelope (or NULL); and
646    // `confidence_decayed_at` is RFC3339 (or NULL).
647    let confidence_signals_json = match &mem.confidence_signals {
648        Some(s) => Some(serde_json::to_string(s)?),
649        None => None,
650    };
651    // v0.7.0 polish PERF-8 (#781) — denormalised `mentioned_entity_id`
652    // column, populated at write time from `metadata.entity_id` (or a
653    // `[entity:X]` title-marker fallback) on reflection rows. See
654    // `extract_mentioned_entity_id` for the resolution order.
655    let mentioned_entity_id = extract_mentioned_entity_id(mem);
656    // #1579 B6 — `insert` is the hottest write statement in the
657    // substrate (every store / upsert / capture-turn / federation push
658    // lands here). `prepare_cached` skips the re-parse of this ~60-line
659    // upsert on every call after the first.
660    let mut insert_stmt = conn.prepare_cached(
661        "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, confidence, source, access_count, created_at, updated_at, last_accessed_at, expires_at, metadata, reflection_depth, memory_kind, entity_id, persona_version, citations, source_uri, source_span, confidence_source, confidence_signals, confidence_decayed_at, mentioned_entity_id)
662         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19, ?20, ?21, ?22, ?23, ?24, ?25, ?26)
663         ON CONFLICT(title, namespace) DO UPDATE SET
664            content = excluded.content,
665            tags = excluded.tags,
666            priority = MAX(memories.priority, excluded.priority),
667            confidence = MAX(memories.confidence, excluded.confidence),
668            source = excluded.source,
669            tier = CASE WHEN excluded.tier = 'long' THEN 'long'
670                        WHEN memories.tier = 'long' THEN 'long'
671                        WHEN excluded.tier = 'mid' THEN 'mid'
672                        ELSE memories.tier END,
673            updated_at = excluded.updated_at,
674            expires_at = CASE WHEN excluded.tier = 'long' OR memories.tier = 'long' THEN NULL
675                              ELSE COALESCE(excluded.expires_at, memories.expires_at) END,
676            -- Preserve metadata.agent_id across upsert (NHI provenance is immutable).
677            metadata = CASE
678                WHEN json_extract(memories.metadata, '$.agent_id') IS NOT NULL
679                THEN json_set(
680                    excluded.metadata,
681                    '$.agent_id',
682                    json_extract(memories.metadata, '$.agent_id')
683                )
684                ELSE excluded.metadata
685            END,
686            -- v0.7.0 Task 1/8 — recursion depth takes the max across upsert
687            -- so a subsequent reflection at higher depth doesn't lose its
688            -- provenance signal when re-stored at the same (title, namespace).
689            reflection_depth = MAX(memories.reflection_depth, excluded.reflection_depth),
690            -- v0.7.0 L1-1 — kind is sticky: once Reflection, always Reflection.
691            -- An upsert of an observation onto an existing reflection row must
692            -- not downgrade the kind (reflect is not reversible by re-store).
693            -- v0.7.0 QW-2 — Persona is also sticky once set; the engine
694            -- writes new versions via fresh rows under a unique
695            -- `__persona_<entity>_v<n>` title rather than upsert.
696            memory_kind = CASE WHEN memories.memory_kind = 'reflection' THEN 'reflection'
697                               WHEN memories.memory_kind = 'persona' THEN 'persona'
698                               ELSE excluded.memory_kind END,
699            -- v0.7.0 QW-2 — entity_id + persona_version stay attached to
700            -- the row they were minted with (Persona-kind upserts use
701            -- versioned titles so the conflict path is exercised only
702            -- on accidental same-title collisions).
703            entity_id = COALESCE(memories.entity_id, excluded.entity_id),
704            persona_version = COALESCE(memories.persona_version, excluded.persona_version),
705            -- v0.7.0 Form 4 — fact-provenance: when the incoming row
706            -- carries a non-empty citations array, replace the stored
707            -- value (caller re-asserted provenance); otherwise keep
708            -- the existing value (silent merge would lose freshly-cited
709            -- evidence). source_uri / source_span follow COALESCE
710            -- semantics so a new write that omits them does not blank
711            -- out existing provenance pointers.
712            citations = CASE WHEN excluded.citations = '[]'
713                             THEN memories.citations
714                             ELSE excluded.citations END,
715            source_uri = COALESCE(excluded.source_uri, memories.source_uri),
716            source_span = COALESCE(excluded.source_span, memories.source_span),
717            -- v0.7.0 Form 5 — confidence-provenance follows the same
718            -- shape as Form 4 columns: explicit non-default replaces;
719            -- caller_provided + NULL signals keep the existing
720            -- provenance signal so a re-store doesn't blank out an
721            -- auto-derived or calibrated value.
722            confidence_source = CASE WHEN excluded.confidence_source != 'caller_provided'
723                                     THEN excluded.confidence_source
724                                     ELSE memories.confidence_source END,
725            confidence_signals = COALESCE(excluded.confidence_signals, memories.confidence_signals),
726            confidence_decayed_at = COALESCE(excluded.confidence_decayed_at, memories.confidence_decayed_at),
727            -- v0.7.0 polish PERF-8 (#781) — denormalised mention tag.
728            -- COALESCE keeps any pre-existing tag (re-write that
729            -- omits the structured entity_id metadata should NOT
730            -- blank out the indexed column) while letting a fresh
731            -- extraction populate previously-NULL rows.
732            mentioned_entity_id = COALESCE(excluded.mentioned_entity_id, memories.mentioned_entity_id),
733            -- #1632 — upsert-merge IS a mutation (content/tags/priority
734            -- can change), so the Gap-1 optimistic-concurrency counter
735            -- bumps here exactly like db::update. Pre-#1632 a re-store
736            -- rewrote content while version stood still, so a stale
737            -- If-Match could overwrite the merge invisibly. The decay
738            -- sweep remains the only documented non-bumping mutator
739            -- (tests/non_version_bumping_sites_1036.rs).
740            version = memories.version + 1
741         RETURNING id",
742    )?;
743    let actual_id: String = insert_stmt.query_row(
744        params![
745            mem.id,
746            mem.tier.as_str(),
747            mem.namespace,
748            mem.title,
749            mem.content,
750            tags_json,
751            mem.priority,
752            mem.confidence,
753            mem.source,
754            mem.access_count,
755            mem.created_at,
756            mem.updated_at,
757            mem.last_accessed_at,
758            mem.effective_expires_at(),
759            metadata_json,
760            mem.reflection_depth,
761            mem.memory_kind.as_str(),
762            mem.entity_id,
763            mem.persona_version,
764            citations_json,
765            mem.source_uri,
766            source_span_json,
767            mem.confidence_source.as_str(),
768            confidence_signals_json,
769            mem.confidence_decayed_at,
770            mentioned_entity_id,
771        ],
772        |r| r.get(0),
773    )?;
774    Ok(actual_id)
775}
776
777/// v0.7.0 fix campaign R1-M3 (#690) — substrate-side `on_conflict`
778/// policy for [`insert_with_conflict`].
779///
780/// Before this enum existed, every call into [`insert`] silently
781/// merged on `(title, namespace)` collision. The G6 work in v0.6.3.1
782/// closed the silent-merge gap at the MCP / HTTP **handler** layer
783/// (see `mcp::tools::store` and `handlers::http::create_link`), but
784/// substrate-internal writers — `storage::reflect`, the curator
785/// consolidation surface, and the federation `sync_push` link loop —
786/// kept calling [`insert`] directly and inheriting the silent-merge
787/// behaviour. R1-M3 surfaces the same three policies the handler
788/// layer already exposes on a typed enum so substrate callers can
789/// opt into the right semantics explicitly.
790///
791/// Policies:
792///
793/// * [`ConflictMode::Error`] — refuse the write when a `(title,
794///   namespace)` row already exists, returning a typed error. Used
795///   by `storage::reflect` so a duplicate reflection cannot silently
796///   replace an earlier one.
797///
798/// * [`ConflictMode::Merge`] — current silent-merge behaviour (the
799///   v0.6.3 default). [`insert`] continues to call into the merge
800///   path verbatim for backward compatibility.
801///
802/// * [`ConflictMode::Version`] — append a monotonic suffix to the
803///   title until a free `(title, namespace)` slot is found, then
804///   insert a new row. Mirrors the `on_conflict='version'` handler
805///   policy.
806#[derive(Debug, Clone, Copy, PartialEq, Eq)]
807pub enum ConflictMode {
808    /// Refuse the write with a typed `(title, namespace)` collision
809    /// error. The existing row is left untouched.
810    Error,
811    /// Silently merge on `(title, namespace)` collision (the legacy
812    /// v0.6.3 substrate default). The existing row's content / tags /
813    /// metadata.agent_id / reflection_depth are merged with the
814    /// incoming row per the SQL in [`insert`].
815    Merge,
816    /// Append `(2)`, `(3)`, … to the title until a free slot is found,
817    /// then insert a new row. Both old and new rows persist.
818    Version,
819}
820
821/// Typed error returned by [`insert_with_conflict`] under
822/// [`ConflictMode::Error`] when a `(title, namespace)` row already
823/// exists. Carries the existing row's id so callers can surface a
824/// well-shaped diagnostic instead of leaking a generic SQL string.
825#[derive(Debug)]
826pub struct ConflictError {
827    pub existing_id: String,
828    pub title: String,
829    pub namespace: String,
830}
831
832impl std::fmt::Display for ConflictError {
833    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
834        write!(
835            f,
836            "CONFLICT: memory with title '{}' already exists in namespace '{}' \
837             (existing id: {})",
838            self.title, self.namespace, self.existing_id
839        )
840    }
841}
842
843impl std::error::Error for ConflictError {}
844
845/// v0.7.0 #1416 / RFC-0001 — sqlite SSOT for the L4 layered-capture
846/// idempotent write. Both the MCP `memory_capture_turn` handler (which
847/// holds a raw `&rusqlite::Connection`) and `SqliteStore::
848/// capture_turn_idempotent` (the SAL trait surface) call through here,
849/// so the dedup-lookup + atomic three-row insert exists in exactly one
850/// place on the sqlite path.
851///
852/// Mirrors the original inline handler transaction verbatim:
853/// 1. dedup SELECT on `(host_session_id, host_turn_index)` (the
854///    `IS NOT NULL` predicate pins the partial index from schema v52).
855/// 2. On hit → return the existing id with `dedup_hit: true`, no write.
856/// 3. On miss → `BEGIN IMMEDIATE` → `insert` (merge upsert) →
857///    `transcript_line_dedup` INSERT → `signed_events` chain row →
858///    COMMIT; any failure rolls all three rows back atomically.
859///
860/// # Errors
861///
862/// String-stable codes per the MCP error convention: `DEDUP_QUERY_FAILED`,
863/// `TX_BEGIN_FAILED`, `MEMORY_INSERT_FAILED`, `DEDUP_INSERT_FAILED`,
864/// `SIGNED_EVENTS_APPEND_FAILED`, `TX_COMMIT_FAILED`.
865pub fn capture_turn_idempotent(
866    conn: &Connection,
867    write: &crate::models::CaptureTurnWrite,
868) -> std::result::Result<crate::models::CaptureTurnResult, String> {
869    use rusqlite::OptionalExtension;
870
871    // #1579 B6 — the dedup probe fires on EVERY captured turn before
872    // any write; `prepare_cached` keeps the per-turn cost at bind+step.
873    let existing: Option<String> = conn
874        .prepare_cached(
875            "SELECT memory_id FROM transcript_line_dedup \
876             WHERE host_session_id IS NOT NULL \
877               AND host_session_id = ?1 \
878               AND host_turn_index = ?2",
879        )
880        .and_then(|mut stmt| {
881            stmt.query_row(
882                params![&write.host_session_id, write.host_turn_index],
883                |row| row.get(0),
884            )
885            .optional()
886        })
887        .map_err(|e| format!("DEDUP_QUERY_FAILED: {e}"))?;
888
889    if let Some(memory_id) = existing {
890        return Ok(crate::models::CaptureTurnResult {
891            memory_id,
892            dedup_hit: true,
893        });
894    }
895
896    conn.execute_batch(connection::SQL_BEGIN_IMMEDIATE)
897        .map_err(|e| format!("TX_BEGIN_FAILED: {e}"))?;
898
899    let tx_result = (|| -> std::result::Result<String, String> {
900        let inserted_id =
901            insert(conn, &write.memory).map_err(|e| format!("MEMORY_INSERT_FAILED: {e}"))?;
902
903        conn.prepare_cached(
904            "INSERT INTO transcript_line_dedup \
905             (sha256, memory_id, host_kind, transcript_path, \
906              host_session_id, host_turn_index, recovered_at) \
907             VALUES (?1, ?2, ?3, NULL, ?4, ?5, ?6)",
908        )
909        .and_then(|mut stmt| {
910            stmt.execute(params![
911                write.sha256,
912                inserted_id,
913                write.host_kind,
914                write.host_session_id,
915                write.host_turn_index,
916                write.recovered_at_ms,
917            ])
918        })
919        .map_err(|e| format!("DEDUP_INSERT_FAILED: {e}"))?;
920
921        crate::signed_events::append_signed_event_no_tx(conn, &write.signed_event)
922            .map_err(|e| format!("SIGNED_EVENTS_APPEND_FAILED: {e}"))?;
923
924        Ok(inserted_id)
925    })();
926
927    match tx_result {
928        Ok(memory_id) => {
929            conn.execute_batch(connection::SQL_COMMIT)
930                .map_err(|e| format!("TX_COMMIT_FAILED: {e}"))?;
931            Ok(crate::models::CaptureTurnResult {
932                memory_id,
933                dedup_hit: false,
934            })
935        }
936        Err(e) => {
937            let _ = conn.execute_batch(connection::SQL_ROLLBACK);
938            Err(e)
939        }
940    }
941}
942
943/// v0.7.0 fix campaign R1-M3 (#690) — insert a memory under an
944/// explicit [`ConflictMode`].
945///
946/// This is the substrate primitive every direct-DB writer that cares
947/// about collision semantics should reach for. Callers that want the
948/// legacy silent-merge behaviour (most of the existing surface) keep
949/// calling [`insert`] — it is now thin glue around
950/// `insert_with_conflict(.., ConflictMode::Merge)` so backward compat
951/// is preserved without invasive churn.
952///
953/// # Errors
954///
955/// * Bubbles up rusqlite errors from the underlying INSERT.
956/// * Under [`ConflictMode::Error`], returns a typed [`ConflictError`]
957///   when `(mem.title, mem.namespace)` already exists. The existing
958///   row is left untouched.
959/// * Under [`ConflictMode::Version`], returns an error when no free
960///   `title (N)` slot is found within the safety cap (see
961///   [`next_versioned_title`]).
962pub fn insert_with_conflict(conn: &Connection, mem: &Memory, mode: ConflictMode) -> Result<String> {
963    match mode {
964        ConflictMode::Merge => insert(conn, mem),
965        ConflictMode::Error => {
966            // v0.7.0 L1-6 Deliverable E — fire the pre-write governance
967            // hook BEFORE the existence-check `SELECT`. The Merge and
968            // Version branches reach the hook via the `insert(..)`
969            // tail call below; the `Error` branch needs its own gate
970            // because it bypasses `insert` to issue the unannotated
971            // INSERT itself. Refusal here returns no row written and
972            // no SELECT performed — symmetric with the Merge path.
973            consult_governance_pre_write(mem)?;
974            // Existence check + INSERT must be atomic against
975            // concurrent writers. We rely on the (title, namespace)
976            // UNIQUE index — issue a plain INSERT WITHOUT the upsert
977            // tail, let SQLite enforce the constraint, and translate
978            // the constraint violation into a typed error.
979            //
980            // The SELECT before INSERT is intentionally kept as an
981            // up-front read so the typed error message can carry the
982            // existing row's id. Two queries open a TOCTOU window
983            // (another writer slots in between SELECT and INSERT and
984            // we return Error pointing at the *wrong* existing id) —
985            // but the constraint violation on the subsequent INSERT
986            // still fires loud, and the caller's retry sees the new
987            // state. Reading the id is best-effort context for the
988            // diagnostic.
989            if let Some(existing_id) = find_by_title_namespace(conn, &mem.title, &mem.namespace)? {
990                return Err(ConflictError {
991                    existing_id,
992                    title: mem.title.clone(),
993                    namespace: mem.namespace.clone(),
994                }
995                .into());
996            }
997            let tags_json = serde_json::to_string(&mem.tags)?;
998            let metadata_json = serde_json::to_string(&mem.metadata)?;
999            // v0.7.0 Form 4 — encode citations + source_span for the
1000            // schema v38 TEXT columns. Mirrors the encode in
1001            // `insert(...)` above; the ConflictMode::Error path lands
1002            // here on the first-write happy path and must persist the
1003            // provenance columns the caller supplied.
1004            let citations_json = serde_json::to_string(&mem.citations)?;
1005            let source_span_json = match mem.source_span {
1006                Some(span) => Some(serde_json::to_string(&span)?),
1007                None => None,
1008            };
1009            // v0.7.0 Form 5 — encode confidence-provenance fields for
1010            // the schema v39 TEXT columns. Mirrors the encode in
1011            // `insert(...)` above.
1012            let confidence_signals_json = match &mem.confidence_signals {
1013                Some(s) => Some(serde_json::to_string(s)?),
1014                None => None,
1015            };
1016            // v0.7.0 polish PERF-8 (#781) — same denormalised mention
1017            // tag wired here so the ConflictMode::Error path (used by
1018            // `storage::reflect`) populates the indexed column on the
1019            // first-write happy path; otherwise the auto-persona matcher
1020            // would miss every reflection minted via reflect.
1021            let mentioned_entity_id = extract_mentioned_entity_id(mem);
1022            // v0.7.0 L1-1 wave merge — include the `memory_kind` column.
1023            // This INSERT path was added by the fix-campaign R1-M3
1024            // (ConflictMode::Error refuses duplicates) and originally
1025            // omitted the new L1-1 column because L1-1 was authored
1026            // against the pre-fix-campaign storage layer. Without
1027            // memory_kind here, a `db::reflect` call (which uses
1028            // `insert_with_conflict(.., ConflictMode::Error)`) loses
1029            // its `MemoryKind::Reflection` typing and the stored row
1030            // falls back to the column DEFAULT 'observation'.
1031            let actual_id: String = conn.query_row(
1032                "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, confidence, source, access_count, created_at, updated_at, last_accessed_at, expires_at, metadata, reflection_depth, memory_kind, entity_id, persona_version, citations, source_uri, source_span, confidence_source, confidence_signals, confidence_decayed_at, mentioned_entity_id)
1033                 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19, ?20, ?21, ?22, ?23, ?24, ?25, ?26)
1034                 RETURNING id",
1035                params![
1036                    mem.id, mem.tier.as_str(), mem.namespace, mem.title, mem.content,
1037                    tags_json, mem.priority, mem.confidence, mem.source, mem.access_count,
1038                    mem.created_at, mem.updated_at, mem.last_accessed_at, mem.effective_expires_at(),
1039                    metadata_json, mem.reflection_depth, mem.memory_kind.as_str(),
1040                    mem.entity_id, mem.persona_version,
1041                    citations_json, mem.source_uri, source_span_json,
1042                    mem.confidence_source.as_str(), confidence_signals_json, mem.confidence_decayed_at,
1043                    mentioned_entity_id,
1044                ],
1045                |r| r.get(0),
1046            ).map_err(|e| {
1047                // Translate a UNIQUE constraint violation that
1048                // raced past the SELECT into the typed error so
1049                // callers see the same shape on TOCTOU as on the
1050                // happy path.
1051                let msg = e.to_string();
1052                if msg.contains("UNIQUE constraint failed") {
1053                    anyhow::Error::new(ConflictError {
1054                        existing_id: String::new(),
1055                        title: mem.title.clone(),
1056                        namespace: mem.namespace.clone(),
1057                    })
1058                } else {
1059                    e.into()
1060                }
1061            })?;
1062            Ok(actual_id)
1063        }
1064        ConflictMode::Version => {
1065            let resolved_title = next_versioned_title(conn, &mem.title, &mem.namespace)?;
1066            let mut versioned = mem.clone();
1067            versioned.title = resolved_title;
1068            // The chosen title is fresh — fall into the plain insert
1069            // path (which still calls into the upsert SQL, but the
1070            // upsert branch is unreachable for a fresh title).
1071            insert(conn, &versioned)
1072        }
1073    }
1074}
1075
1076pub fn get(conn: &Connection, id: &str) -> Result<Option<Memory>> {
1077    let mut stmt = conn.prepare_cached(SQL_SELECT_MEMORY_ROW_BY_ID)?;
1078    let mut rows = stmt.query_map(params![id], row_to_memory)?;
1079    match rows.next() {
1080        Some(Ok(m)) => Ok(Some(m)),
1081        Some(Err(e)) => Err(e.into()),
1082        None => Ok(None),
1083    }
1084}
1085
1086/// Batch-fetch memories by ID. Mirrors [`get`] but issues a single
1087/// `WHERE id IN (?, ?, ...)` SELECT instead of N per-id round-trips.
1088///
1089/// v0.7.0 #981 — used by the HNSW [`semantic_phase`] recall branch
1090/// where ANN-hit batches of 50–250 IDs need to materialise as
1091/// `Memory` rows; the per-id `get` loop was 5–10× slower on a warm
1092/// cache and extended the DB-mutex hold (which compounds the
1093/// single-connection serialization the daemon ships with on sqlite).
1094///
1095/// Returns a `HashMap<String, Memory>` keyed by id so the caller can
1096/// re-apply the original hit ordering via the HNSW hit list.
1097///
1098/// Chunks ids into batches of 500 to stay well under SQLite's default
1099/// `SQLITE_LIMIT_VARIABLE_NUMBER = 999` regardless of how the operator
1100/// has compiled their sqlite (Debian ships 999, Alpine ships 250000;
1101/// 500 is a safe middle ground that also keeps the prepared-statement
1102/// plan reusable across calls).
1103///
1104/// Empty `ids` short-circuits to an empty map without touching the
1105/// connection. Missing rows are silently skipped — the caller can
1106/// observe via `fetched.get(&id).is_none()` and fall through to
1107/// whatever default the original per-id path would have produced.
1108pub fn get_many(conn: &Connection, ids: &[String]) -> Result<HashMap<String, Memory>> {
1109    let mut out: HashMap<String, Memory> = HashMap::with_capacity(ids.len());
1110    if ids.is_empty() {
1111        return Ok(out);
1112    }
1113    const CHUNK: usize = 500;
1114    for chunk in ids.chunks(CHUNK) {
1115        let placeholders = std::iter::repeat("?")
1116            .take(chunk.len())
1117            .collect::<Vec<_>>()
1118            .join(",");
1119        let sql = format!("SELECT * FROM memories WHERE id IN ({placeholders})");
1120        let mut stmt = conn.prepare(&sql)?;
1121        let rows = stmt.query_map(rusqlite::params_from_iter(chunk.iter()), row_to_memory)?;
1122        for r in rows {
1123            let mem = r?;
1124            out.insert(mem.id.clone(), mem);
1125        }
1126    }
1127    Ok(out)
1128}
1129
1130/// Look up a memory by ID prefix. Returns the memory if exactly one match is found.
1131/// Returns `Ok(None)` if no matches. Returns an error if the prefix is ambiguous (>1 match).
1132pub fn get_by_prefix(conn: &Connection, prefix: &str) -> Result<Option<Memory>> {
1133    // Escape SQL LIKE wildcards in the prefix to prevent % and _ from matching broadly
1134    let escaped = prefix.replace('%', "\\%").replace('_', "\\_");
1135    let pattern = format!("{escaped}%");
1136    let mut stmt = conn.prepare("SELECT * FROM memories WHERE id LIKE ?1 ESCAPE '\\'")?;
1137    let rows: Vec<Memory> = stmt
1138        .query_map(params![pattern], row_to_memory)?
1139        .filter_map(Result::ok)
1140        .collect();
1141    match rows.len() {
1142        0 => Ok(None),
1143        1 => Ok(Some(rows.into_iter().next().expect("len checked"))),
1144        _ => {
1145            let ids: Vec<String> = rows.iter().map(|m| m.id.clone()).collect();
1146            // #962 — typed envelope; handler downcasts via
1147            // `MemoryError::from(anyhow::Error)` to map to 400 BAD_REQUEST.
1148            // The match-count is preserved in `candidates.len()` so the
1149            // Display format ("ambiguous ID prefix 'X': N matches\n…")
1150            // stays byte-identical to the legacy bail!() string.
1151            Err(anyhow::Error::new(StorageError::AmbiguousIdPrefix {
1152                prefix: prefix.to_string(),
1153                candidates: ids,
1154            }))
1155        }
1156    }
1157}
1158
1159/// Resolve an ID that may be a prefix. Tries exact match first, then prefix match.
1160pub fn resolve_id(conn: &Connection, id: &str) -> Result<Option<Memory>> {
1161    if let Some(mem) = get(conn, id)? {
1162        return Ok(Some(mem));
1163    }
1164    get_by_prefix(conn, id)
1165}
1166
1167/// Bump access count, extend TTL, auto-promote — atomic via transaction.
1168pub fn touch(conn: &Connection, id: &str, short_extend: i64, mid_extend: i64) -> Result<()> {
1169    let now = Utc::now();
1170    let now_str = now.to_rfc3339();
1171    let short_expires = (now + chrono::Duration::seconds(short_extend)).to_rfc3339();
1172    let mid_expires = (now + chrono::Duration::seconds(mid_extend)).to_rfc3339();
1173
1174    conn.execute_batch(connection::SQL_BEGIN_IMMEDIATE)?;
1175
1176    let result = (|| -> Result<()> {
1177        // #1596 — the per-access TTL window is an extension FLOOR, not a
1178        // replacement. `MAX(expires_at, ?N)` keeps whichever expiry is
1179        // later, so a fresh mid-tier row carrying its create-time +7d
1180        // backstop is no longer pulled IN to now+1d on first recall
1181        // (lived evidence: row 4c7e7cc1 went 2026-06-18 → 2026-06-12).
1182        // Both operands are UTC RFC3339 strings, so SQLite's scalar
1183        // MAX() lexicographic comparison is chronological. Long-tier
1184        // (NULL expiry) rows stay NULL via the first CASE arm.
1185        conn.execute(
1186            "UPDATE memories SET
1187                access_count = MIN(access_count + 1, 1000000),
1188                last_accessed_at = ?1,
1189                expires_at = CASE
1190                    WHEN tier = 'long' THEN expires_at
1191                    WHEN tier = 'short' AND expires_at IS NOT NULL THEN MAX(expires_at, ?2)
1192                    WHEN tier = 'mid' AND expires_at IS NOT NULL THEN MAX(expires_at, ?3)
1193                    ELSE expires_at
1194                END
1195             WHERE id = ?4",
1196            params![now_str, short_expires, mid_expires, id],
1197        )?;
1198
1199        conn.execute(
1200            "UPDATE memories SET tier = 'long', expires_at = NULL, updated_at = ?1
1201             WHERE id = ?2 AND tier = 'mid' AND access_count >= ?3",
1202            params![now_str, id, PROMOTION_THRESHOLD],
1203        )?;
1204
1205        conn.execute(
1206            "UPDATE memories SET priority = MIN(priority + 1, 10)
1207             WHERE id = ?1 AND access_count > 0 AND access_count % 10 = 0 AND priority < 10",
1208            params![id],
1209        )?;
1210
1211        Ok(())
1212    })();
1213
1214    match result {
1215        Ok(()) => {
1216            conn.execute_batch(connection::SQL_COMMIT)?;
1217            Ok(())
1218        }
1219        Err(e) => {
1220            if let Err(rb) = conn.execute_batch(connection::SQL_ROLLBACK) {
1221                tracing::error!("ROLLBACK failed in touch: {}", rb);
1222            }
1223            Err(e)
1224        }
1225    }
1226}
1227
1228/// Cluster-F PERF-6 — batched touch.
1229///
1230/// Equivalent to invoking [`touch`] K times in sequence, but
1231/// collapses the per-row `BEGIN IMMEDIATE` … `COMMIT` cycle into a
1232/// SINGLE outer transaction so a K-row recall pays the SQLite
1233/// write-lock + commit cost ONCE instead of K times. The three
1234/// per-row UPDATE statements still run (same semantics: access bump
1235/// + TTL extend, mid→long promotion at `PROMOTION_THRESHOLD`,
1236/// priority+1 every 10 accesses); only the transaction framing
1237/// changes.
1238///
1239/// A failure mid-batch rolls back the entire transaction (no partial
1240/// touches survive) and surfaces a single error to the caller — which
1241/// matches the existing behaviour where any failed touch surfaces
1242/// to the recall log path.
1243///
1244/// Returns the number of rows successfully touched (always equal to
1245/// `ids.len()` on success).
1246pub fn touch_many(
1247    conn: &Connection,
1248    ids: &[&str],
1249    short_extend: i64,
1250    mid_extend: i64,
1251) -> Result<usize> {
1252    if ids.is_empty() {
1253        return Ok(0);
1254    }
1255    let now = Utc::now();
1256    let now_str = now.to_rfc3339();
1257    let short_expires = (now + chrono::Duration::seconds(short_extend)).to_rfc3339();
1258    let mid_expires = (now + chrono::Duration::seconds(mid_extend)).to_rfc3339();
1259
1260    conn.execute_batch(connection::SQL_BEGIN_IMMEDIATE)?;
1261
1262    let result = (|| -> Result<()> {
1263        // Cache the three prepared statements once for the whole
1264        // batch; each `execute` reuses the cached query plan instead
1265        // of re-parsing per row.
1266        // #1596 — extension-floor semantics, mirroring [`touch`]: the
1267        // per-access window only ever EXTENDS expiry (MAX over the
1268        // existing column), never shortens it. One batched UPDATE per
1269        // row is preserved.
1270        let mut bump_stmt = conn.prepare_cached(
1271            "UPDATE memories SET
1272                access_count = MIN(access_count + 1, 1000000),
1273                last_accessed_at = ?1,
1274                expires_at = CASE
1275                    WHEN tier = 'long' THEN expires_at
1276                    WHEN tier = 'short' AND expires_at IS NOT NULL THEN MAX(expires_at, ?2)
1277                    WHEN tier = 'mid' AND expires_at IS NOT NULL THEN MAX(expires_at, ?3)
1278                    ELSE expires_at
1279                END
1280             WHERE id = ?4",
1281        )?;
1282        let mut promote_stmt = conn.prepare_cached(
1283            "UPDATE memories SET tier = 'long', expires_at = NULL, updated_at = ?1
1284             WHERE id = ?2 AND tier = 'mid' AND access_count >= ?3",
1285        )?;
1286        let mut priority_stmt = conn.prepare_cached(
1287            "UPDATE memories SET priority = MIN(priority + 1, 10)
1288             WHERE id = ?1 AND access_count > 0 AND access_count % 10 = 0 AND priority < 10",
1289        )?;
1290        for id in ids {
1291            bump_stmt.execute(params![now_str, short_expires, mid_expires, id])?;
1292            promote_stmt.execute(params![now_str, id, PROMOTION_THRESHOLD])?;
1293            priority_stmt.execute(params![id])?;
1294        }
1295        Ok(())
1296    })();
1297
1298    match result {
1299        Ok(()) => {
1300            conn.execute_batch(connection::SQL_COMMIT)?;
1301            Ok(ids.len())
1302        }
1303        Err(e) => {
1304            if let Err(rb) = conn.execute_batch(connection::SQL_ROLLBACK) {
1305                tracing::error!("ROLLBACK failed in touch_many: {}", rb);
1306            }
1307            Err(e)
1308        }
1309    }
1310}
1311
1312#[allow(clippy::too_many_arguments)]
1313/// Update a memory by ID. Returns (found, `content_changed`) so callers can
1314/// re-generate embeddings when the searchable text has changed.
1315/// v0.7.0 Provenance Gap 1 (issue #884) — typed optimistic-concurrency
1316/// error returned by [`update_with_expected_version`] when the caller
1317/// passed `expected_version` and the stored row's current `version`
1318/// has drifted. Carries both expected + current so the caller can
1319/// surface a useful diagnostic and choose between re-read+re-apply
1320/// or bubbling CONFLICT upstream.
1321#[derive(Debug, Clone)]
1322pub struct VersionConflict {
1323    pub id: String,
1324    pub expected: i64,
1325    pub current: i64,
1326}
1327
1328impl std::fmt::Display for VersionConflict {
1329    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1330        write!(
1331            f,
1332            "CONFLICT: memory {} expected_version={} but stored version={}",
1333            self.id, self.expected, self.current
1334        )
1335    }
1336}
1337
1338impl std::error::Error for VersionConflict {}
1339
1340#[allow(clippy::too_many_arguments)]
1341pub fn update(
1342    conn: &Connection,
1343    id: &str,
1344    title: Option<&str>,
1345    content: Option<&str>,
1346    tier: Option<&Tier>,
1347    namespace: Option<&str>,
1348    tags: Option<&Vec<String>>,
1349    priority: Option<i32>,
1350    confidence: Option<f64>,
1351    expires_at: Option<&str>,
1352    metadata: Option<&serde_json::Value>,
1353) -> Result<(bool, bool)> {
1354    update_with_expected_version(
1355        conn, id, title, content, tier, namespace, tags, priority, confidence, expires_at,
1356        metadata, None, None,
1357    )
1358}
1359
1360/// v0.7.0 Provenance Gap 1 (issue #884) — optimistic-concurrency aware
1361/// variant of [`update`]. When `expected_version` is `Some(v)`, the
1362/// update fails with a typed [`VersionConflict`] error if the stored
1363/// row's `version` is not equal to `v`. When `None`, the legacy
1364/// last-write-wins behaviour is preserved (still bumps `version` on
1365/// success). On a successful mutation the row's `version` is
1366/// monotonically incremented; the new value is observable on the
1367/// subsequent read.
1368///
1369/// # Errors
1370///
1371/// * [`VersionConflict`] — when `expected_version` is `Some` and the
1372///   stored value has drifted.
1373/// * Other rusqlite errors bubble up from the prepare/execute pair.
1374#[allow(clippy::too_many_arguments, clippy::too_many_lines)]
1375pub fn update_with_expected_version(
1376    conn: &Connection,
1377    id: &str,
1378    title: Option<&str>,
1379    content: Option<&str>,
1380    tier: Option<&Tier>,
1381    namespace: Option<&str>,
1382    tags: Option<&Vec<String>>,
1383    priority: Option<i32>,
1384    confidence: Option<f64>,
1385    expires_at: Option<&str>,
1386    metadata: Option<&serde_json::Value>,
1387    source_uri: Option<&str>,
1388    expected_version: Option<i64>,
1389) -> Result<(bool, bool)> {
1390    let mut stmt = conn.prepare_cached(SQL_SELECT_MEMORY_ROW_BY_ID)?;
1391    let mut rows = stmt.query_map(params![id], row_to_memory)?;
1392    let Some(Ok(existing)) = rows.next() else {
1393        return Ok((false, false));
1394    };
1395    drop(rows);
1396    drop(stmt);
1397
1398    // v0.7.0 Provenance Gap 1 (#884) — pre-check optimistic gate.
1399    // The same predicate is also asserted atomically inside the
1400    // UPDATE statement below so a racing writer that slipped in
1401    // between the SELECT and the UPDATE still fails CONFLICT.
1402    if let Some(expected) = expected_version
1403        && existing.version != expected
1404    {
1405        return Err(VersionConflict {
1406            id: existing.id.clone(),
1407            expected,
1408            current: existing.version,
1409        }
1410        .into());
1411    }
1412
1413    let new_title = title.unwrap_or(&existing.title);
1414    let new_content = content.unwrap_or(&existing.content);
1415    let content_changed = new_title != existing.title || new_content != existing.content;
1416
1417    // Tier downgrade protection: never downgrade, consistent with insert path.
1418    let effective_tier = match (tier, &existing.tier) {
1419        (Some(requested), existing_tier) => match (existing_tier, requested) {
1420            (Tier::Long, _) => &Tier::Long,         // long never downgrades
1421            (Tier::Mid, Tier::Short) => &Tier::Mid, // mid never downgrades to short
1422            (_, requested) => requested,            // upgrades and same-tier are fine
1423        },
1424        (None, existing_tier) => existing_tier,
1425    };
1426
1427    let namespace = namespace.unwrap_or(&existing.namespace);
1428    let tags = tags.unwrap_or(&existing.tags);
1429    let priority = priority.unwrap_or(existing.priority);
1430    let confidence = confidence.unwrap_or(existing.confidence);
1431    // Treat empty string as None (clear expiry) — don't store "" in the DB
1432    let expires_at = match expires_at {
1433        Some("" | "null") => None,
1434        Some(v) => Some(v),
1435        None => existing.expires_at.as_deref(),
1436    };
1437    let metadata = metadata.unwrap_or(&existing.metadata);
1438
1439    // #1451 (SEC, HIGH) — substrate governance pre-write gate on the
1440    // optimistic-update path. The insert/supersede/consolidate/restore
1441    // paths all consult GOVERNANCE_PRE_WRITE; update was the lone gap,
1442    // so a refuse rule could be evaded by storing benign content then
1443    // updating it into the refused namespace/tier/title. Build the
1444    // post-merge row and consult BEFORE any SQL touches the DB; a
1445    // refusal returns the typed GovernanceRefusal with no row mutated.
1446    let governed = Memory {
1447        tier: effective_tier.clone(),
1448        namespace: namespace.to_string(),
1449        title: new_title.to_string(),
1450        content: new_content.to_string(),
1451        tags: tags.clone(),
1452        priority,
1453        confidence,
1454        expires_at: expires_at.map(str::to_string),
1455        metadata: metadata.clone(),
1456        source_uri: source_uri
1457            .map(str::to_string)
1458            .or_else(|| existing.source_uri.clone()),
1459        ..existing.clone()
1460    };
1461    consult_governance_pre_write(&governed)?;
1462
1463    let tags_json = serde_json::to_string(tags)?;
1464    let metadata_json = serde_json::to_string(metadata)?;
1465    let now = Utc::now().to_rfc3339();
1466
1467    // Ultrareview #354: rely on the UNIQUE INDEX on (title, namespace)
1468    // to enforce collision atomically at the DB layer. The previous
1469    // check-then-update sequence had a race — another transaction
1470    // could insert a colliding row between the SELECT and the UPDATE,
1471    // and the UPDATE would surface as a generic SQLite constraint
1472    // error to the caller. Now the collision check is inline: the
1473    // UPDATE fails with a well-scoped UniqueViolation, and we re-
1474    // query the colliding row's id only on that specific error for
1475    // the friendly message.
1476    //
1477    // v0.7.0 Provenance Gap 1 (#884) — UPDATE re-asserts
1478    // `expected_version` atomically and bumps `version + 1` on
1479    // success so a racing caller that read the SAME expected_version
1480    // sees a CONFLICT (their WHERE clause no longer matches the
1481    // bumped value). When `expected_version` is NULL the
1482    // `?12 IS NULL` predicate short-circuits the gate.
1483    // v0.7.0 Provenance Gap 2 (#906) — `source_uri` is an opt-in patch
1484    // field. When `None`, the COALESCE keeps the stored value (a
1485    // patch that doesn't touch source_uri must NOT blank it out).
1486    // When `Some(uri)`, the row's source_uri is rewritten verbatim
1487    // (rename / scheme migration / bad-data correction).
1488    let update_res = conn.execute(
1489        "UPDATE memories SET tier=?1, namespace=?2, title=?3, content=?4, tags=?5, priority=?6, confidence=?7, updated_at=?8, expires_at=?9, metadata=?10, source_uri = COALESCE(?11, source_uri), version = version + 1
1490         WHERE id=?12 AND (?13 IS NULL OR version = ?13)",
1491        params![effective_tier.as_str(), namespace, new_title, new_content, tags_json, priority, confidence, now, expires_at, metadata_json, source_uri, id, expected_version],
1492    );
1493    match update_res {
1494        Ok(0) => {
1495            // Either the row vanished between SELECT and UPDATE, or
1496            // the version drifted (racing writer slipped in). When
1497            // expected_version was supplied, re-read so the CONFLICT
1498            // envelope carries the current stored value.
1499            if let Some(expected) = expected_version {
1500                let current_version: Option<i64> = conn
1501                    .query_row(
1502                        "SELECT version FROM memories WHERE id = ?1",
1503                        params![id],
1504                        |r| r.get(0),
1505                    )
1506                    .ok();
1507                if let Some(current) = current_version {
1508                    return Err(VersionConflict {
1509                        id: id.to_string(),
1510                        expected,
1511                        current,
1512                    }
1513                    .into());
1514                }
1515            }
1516            Ok((false, false))
1517        }
1518        Ok(_) => Ok((true, content_changed)),
1519        Err(rusqlite::Error::SqliteFailure(err, _))
1520            if err.code == rusqlite::ErrorCode::ConstraintViolation =>
1521        {
1522            let other: Option<String> = conn
1523                .query_row(
1524                    "SELECT id FROM memories WHERE title = ?1 AND namespace = ?2 AND id != ?3",
1525                    params![new_title, namespace, id],
1526                    |r| r.get(0),
1527                )
1528                .ok();
1529            if let Some(other_id) = other {
1530                // #962 typed envelope — UniqueConflict surfaces as
1531                // `MemoryError::Conflict` (HTTP 409).
1532                return Err(anyhow::Error::new(StorageError::UniqueConflict {
1533                    reason: format!(
1534                        "title '{new_title}' already exists in namespace '{namespace}' (memory {other_id})"
1535                    ),
1536                }));
1537            }
1538            Err(anyhow::anyhow!("update failed with constraint violation"))
1539        }
1540        Err(e) => Err(e.into()),
1541    }
1542}
1543
1544/// v0.7.0 Provenance Gap 5 (issue #888) — append-and-archive result
1545/// returned by [`update_with_archive_on_supersede`].
1546///
1547/// * `archived_id` is the OLD memory's id (now in
1548///   `archived_memories` with `archive_reason='superseded'`).
1549/// * `new_id` is the freshly-minted row carrying the patched
1550///   content. The supersede lineage is encoded via TWO mechanisms
1551///   (NOT three): (1) `archived_memories.archive_reason='superseded'`
1552///   on the OLD row, (2) `new_memory.metadata.superseded_id` forward
1553///   pointer on the NEW row. A `memory_links` `supersedes` edge is
1554///   NOT written because the FK `target_id REFERENCES memories(id)`
1555///   would reject it (the archived row no longer lives in the live
1556///   `memories` table). See #895 for the future archive-cross-ref
1557///   path that would unblock a uniform link surface.
1558#[derive(Debug, Clone)]
1559pub struct SupersedeResult {
1560    pub archived_id: String,
1561    pub new_id: String,
1562}
1563
1564/// v0.7.0 Provenance Gap 5 (issue #888) — append-and-archive write
1565/// path. Used by the MCP `memory_update` tool when the caller passes
1566/// `edit_source` of `llm` or `hook`. Atomic: every step runs inside
1567/// a `BEGIN IMMEDIATE` / `COMMIT` pair so a failure mid-way leaves
1568/// the old row live (no partial supersede).
1569///
1570/// Sequence (mirrors mem9's split-write-path pattern):
1571///
1572/// 1. Honor the optimistic-concurrency gate (`expected_version`)
1573///    against the OLD row. Conflict surfaces as
1574///    [`VersionConflict`] before any mutation lands.
1575/// 2. Archive the OLD row with `archive_reason='superseded'` and a
1576///    `superseded_at` timestamp in the archive metadata so a
1577///    rewind via `memory_archive_list` can find it.
1578/// 3. Insert a NEW memory row carrying the patched fields. The new
1579///    row's `(title, namespace)` may collide with the archived
1580///    row's (since the archive is in a separate table); the new
1581///    row's `id` is fresh.
1582/// 4. Stamp the supersede pointer in the new row's
1583///    `metadata.superseded_id`. A `memory_links` `supersedes` row
1584///    is intentionally NOT written — the FK target would point at
1585///    the archived id which has left the live `memories` table.
1586///    See impl comment + #895 for the archive-cross-ref follow-on.
1587///
1588/// # Errors
1589///
1590/// * [`VersionConflict`] — when `expected_version` is `Some` and
1591///   the stored row's `version` has drifted.
1592/// * rusqlite / serde errors bubble up from the underlying
1593///   archive + insert + link writes.
1594#[allow(clippy::too_many_arguments, clippy::too_many_lines)]
1595pub fn update_with_archive_on_supersede(
1596    conn: &Connection,
1597    id: &str,
1598    title: Option<&str>,
1599    content: Option<&str>,
1600    tier: Option<&Tier>,
1601    namespace: Option<&str>,
1602    tags: Option<&Vec<String>>,
1603    priority: Option<i32>,
1604    confidence: Option<f64>,
1605    expires_at: Option<&str>,
1606    metadata: Option<&serde_json::Value>,
1607    source_uri: Option<&str>,
1608    expected_version: Option<i64>,
1609    edit_source: crate::models::EditSource,
1610) -> Result<SupersedeResult> {
1611    // Read the existing row so we can compose the patched NEW row.
1612    let mut stmt = conn.prepare_cached(SQL_SELECT_MEMORY_ROW_BY_ID)?;
1613    let mut rows = stmt.query_map(params![id], row_to_memory)?;
1614    let Some(Ok(existing)) = rows.next() else {
1615        // #962 typed envelope — 404 NOT_FOUND through MemoryError mapping.
1616        return Err(anyhow::Error::new(StorageError::MemoryNotFound {
1617            id: id.to_string(),
1618            role: None,
1619        }));
1620    };
1621    drop(rows);
1622    drop(stmt);
1623
1624    // v0.7.0 Provenance Gap 1 (#884) — optimistic-concurrency gate.
1625    if let Some(expected) = expected_version
1626        && existing.version != expected
1627    {
1628        return Err(VersionConflict {
1629            id: existing.id.clone(),
1630            expected,
1631            current: existing.version,
1632        }
1633        .into());
1634    }
1635
1636    // Compose the NEW memory row by overlaying the patch on the
1637    // OLD row. Mirrors the in-place `update` patch semantics:
1638    // unspecified fields inherit from the existing row.
1639    let new_id = uuid::Uuid::new_v4().to_string();
1640    let now = Utc::now().to_rfc3339();
1641    let new_title = title.unwrap_or(&existing.title).to_string();
1642    let new_content = content.unwrap_or(&existing.content).to_string();
1643    // Tier monotonicity preserved (long ≥ mid ≥ short).
1644    let new_tier = match (tier, &existing.tier) {
1645        (Some(requested), existing_tier) => match (existing_tier, requested) {
1646            (Tier::Long, _) => Tier::Long,
1647            (Tier::Mid, Tier::Short) => Tier::Mid,
1648            (_, r) => r.clone(),
1649        },
1650        (None, existing_tier) => existing_tier.clone(),
1651    };
1652    let new_namespace = namespace.unwrap_or(&existing.namespace).to_string();
1653    let new_tags = tags.cloned().unwrap_or_else(|| existing.tags.clone());
1654    let new_priority = priority.unwrap_or(existing.priority);
1655    let new_confidence = confidence.unwrap_or(existing.confidence);
1656    let new_expires = match expires_at {
1657        Some("" | "null") => None,
1658        Some(v) => Some(v.to_string()),
1659        None => existing.expires_at.clone(),
1660    };
1661    // v0.7.0 Provenance Gap 2 (#906) — caller-supplied source_uri
1662    // wins; otherwise inherit from the OLD row. Mirrors the pattern
1663    // used for title/content/tier above.
1664    let new_source_uri = match source_uri {
1665        Some(uri) => Some(uri.to_string()),
1666        None => existing.source_uri.clone(),
1667    };
1668    // Stamp the edit-source provenance into the new row's metadata so
1669    // downstream observers can tell this row came from an
1670    // append-and-archive supersede vs. a direct user write.
1671    let mut new_metadata = metadata
1672        .cloned()
1673        .unwrap_or_else(|| existing.metadata.clone());
1674    if let serde_json::Value::Object(ref mut m) = new_metadata {
1675        m.insert(
1676            "edit_source".to_string(),
1677            serde_json::Value::String(edit_source.as_str().to_string()),
1678        );
1679        m.insert(
1680            field_names::SUPERSEDED_ID.to_string(),
1681            serde_json::Value::String(existing.id.clone()),
1682        );
1683    }
1684
1685    // #1638 — archive + insert run inside ONE BEGIN IMMEDIATE (below),
1686    // honoring the documented atomicity contract: a failure mid-way
1687    // (SQLITE_BUSY from a concurrent CLI-process writer, ENOSPC, FTS
1688    // trigger I/O error on the insert) rolls back the archive too, so
1689    // the OLD row stays live instead of vanishing into the archive
1690    // with an error returned. Uses `archive_memory_no_tx` (the
1691    // `append_signed_event_no_tx` idiom) because SQLite refuses
1692    // nested transactions.
1693    let archived_id = existing.id.clone();
1694
1695    // FX-C5 — compose the NEW row up front so the substrate
1696    // pre-write governance hook (`GOVERNANCE_PRE_WRITE`) gets a
1697    // chance to refuse BEFORE the archive step destroys the live
1698    // OLD row. Pre-FX-C5 the hook was consulted transitively via
1699    // `insert(..)` at the tail of this function; archive ran first
1700    // so a refusal left the live table without the OLD row AND
1701    // without the patched NEW row. Now the hook fires on a fully-
1702    // composed candidate before any state mutation, mirroring the
1703    // FX-2 pattern on the postgres adapter (see
1704    // `consult_governance_pre_write_pg` in `src/store/postgres.rs`).
1705    let mut new_mem = existing.clone();
1706    new_mem.id = new_id.clone();
1707    new_mem.title = new_title;
1708    new_mem.content = new_content;
1709    new_mem.tier = new_tier;
1710    new_mem.namespace = new_namespace;
1711    new_mem.tags = new_tags;
1712    new_mem.priority = new_priority;
1713    new_mem.confidence = new_confidence;
1714    new_mem.expires_at = new_expires;
1715    new_mem.metadata = new_metadata;
1716    new_mem.source_uri = new_source_uri;
1717    new_mem.created_at = now.clone();
1718    new_mem.updated_at = now.clone();
1719    new_mem.access_count = 0;
1720    new_mem.last_accessed_at = None;
1721    // The NEW row starts at version=1 — it is a fresh row, not a
1722    // continuation of the OLD row's version chain (the chain is
1723    // preserved via the supersede link stamped in metadata).
1724    new_mem.version = crate::models::default_memory_version();
1725
1726    // FX-C5 — consult the substrate governance pre-write hook on
1727    // the composed NEW row BEFORE archiving the OLD row. A refusal
1728    // returns cleanly with no state change.
1729    consult_governance_pre_write(&new_mem)?;
1730
1731    // Steps 1+2 (#1638): one transaction around archive + insert.
1732    conn.execute_batch(connection::SQL_BEGIN_IMMEDIATE)?;
1733    let tx_result = (|| -> Result<()> {
1734        // Step 1: archive the OLD row with reason='superseded'.
1735        let moved = archive_memory_no_tx(conn, &archived_id, Some("superseded"))?;
1736        if !moved {
1737            // #962 typed envelope — substrate-internal fault (DB row
1738            // vanished between read and write or row count drifted).
1739            // Maps to 500.
1740            return Err(anyhow::Error::new(StorageError::ArchiveSupersedeFailed {
1741                archived_id: archived_id.clone(),
1742            }));
1743        }
1744        // Step 2: insert the NEW row carrying the patched content.
1745        insert(conn, &new_mem)?;
1746        Ok(())
1747    })();
1748    match tx_result {
1749        Ok(()) => conn.execute_batch(connection::SQL_COMMIT)?,
1750        Err(e) => {
1751            let _ = conn.execute_batch(connection::SQL_ROLLBACK);
1752            return Err(e);
1753        }
1754    }
1755
1756    // Step 3: the supersede edge from new→archived id is preserved
1757    // in the new row's `metadata.superseded_id` (see above). A
1758    // proper `memory_links` row would trip the FK CHECK on
1759    // `target_id REFERENCES memories(id)` because the OLD row no
1760    // longer lives in `memories`; the metadata pointer is the
1761    // substrate-clean way to record the lineage until archive
1762    // cross-references land (tracked separately).
1763    Ok(SupersedeResult {
1764        archived_id,
1765        new_id,
1766    })
1767}
1768
1769pub fn delete(conn: &Connection, id: &str) -> Result<bool> {
1770    // Clean up namespace_meta if this memory was a namespace standard
1771    conn.execute(SQL_DELETE_NAMESPACE_META_BY_STANDARD_ID, params![id])?;
1772    let changed = conn.execute(SQL_DELETE_MEMORY_BY_ID, params![id])?;
1773    Ok(changed > 0)
1774}
1775
1776/// Move a memory from `memories` to `archived_memories`. Used by the
1777/// HTTP `/api/v1/archive` explicit-archive endpoint (S29) and by
1778/// `sync_push` when a peer pushes an `archives: [id]` record.
1779///
1780/// Unlike `gc(archive=true)` this does not filter on `expires_at` — the
1781/// caller is explicitly asking for the row to be archived right now.
1782///
1783/// Returns `true` if a row was moved, `false` if no live memory existed
1784/// with this id (e.g. it was already archived or never written locally).
1785/// A missing-on-peer id is expected during normal fanout and callers
1786/// treat it as a no-op.
1787///
1788/// # Errors
1789///
1790/// Returns an error if the INSERT-SELECT or DELETE fails.
1791pub fn archive_memory(conn: &Connection, id: &str, reason: Option<&str>) -> Result<bool> {
1792    conn.execute_batch(connection::SQL_BEGIN_IMMEDIATE)?;
1793    let result = archive_memory_no_tx(conn, id, reason);
1794    match result {
1795        Ok(moved) => {
1796            conn.execute_batch(connection::SQL_COMMIT)?;
1797            Ok(moved)
1798        }
1799        Err(e) => {
1800            let _ = conn.execute_batch(connection::SQL_ROLLBACK);
1801            Err(e)
1802        }
1803    }
1804}
1805
1806/// #1638 — transaction-free core of [`archive_memory`], for callers
1807/// that already hold an open transaction (the supersede path wraps
1808/// archive + insert in ONE `BEGIN IMMEDIATE` so a mid-failure leaves
1809/// the OLD row live, per the function's documented atomicity
1810/// contract). Same idiom as `append_signed_event_no_tx`.
1811pub(crate) fn archive_memory_no_tx(
1812    conn: &Connection,
1813    id: &str,
1814    reason: Option<&str>,
1815) -> Result<bool> {
1816    let now = Utc::now().to_rfc3339();
1817    let reason = reason.unwrap_or("archive");
1818    let result = (|| -> Result<bool> {
1819        let exists: bool = conn
1820            .query_row(SQL_MEMORY_EXISTS_COUNT, params![id], |r| r.get(0))
1821            .unwrap_or(false);
1822        if !exists {
1823            return Ok(false);
1824        }
1825        // v0.6.3.1 P2 (G5) — copy embedding + embedding_dim into the archive
1826        // and capture original tier + expires_at so restore_archived can
1827        // round-trip the row instead of resetting to long/permanent.
1828        conn.execute(
1829            "INSERT OR REPLACE INTO archived_memories
1830             (id, tier, namespace, title, content, tags, priority, confidence,
1831              source, access_count, created_at, updated_at, last_accessed_at,
1832              expires_at, archived_at, archive_reason, metadata,
1833              embedding, embedding_dim, original_tier, original_expires_at,
1834              reflection_depth, atomised_into, atom_of, memory_kind,
1835              entity_id, persona_version, citations, source_uri, source_span,
1836              confidence_source, confidence_signals, confidence_decayed_at,
1837              mentioned_entity_id, version)
1838             SELECT id, tier, namespace, title, content, tags, priority, confidence,
1839                    source, access_count, created_at, updated_at, last_accessed_at,
1840                    expires_at, ?1, ?2, metadata,
1841                    embedding, embedding_dim, tier, expires_at,
1842                    reflection_depth, atomised_into, atom_of, memory_kind,
1843                    entity_id, persona_version, citations, source_uri, source_span,
1844                    confidence_source, confidence_signals, confidence_decayed_at,
1845                    mentioned_entity_id, version
1846             FROM memories WHERE id = ?3",
1847            params![now, reason, id],
1848        )?;
1849        // Clean up namespace_meta — mirrors `delete`'s cleanup so an archived
1850        // row is not still referenced as the namespace standard.
1851        conn.execute(SQL_DELETE_NAMESPACE_META_BY_STANDARD_ID, params![id])?;
1852        let removed = conn.execute(SQL_DELETE_MEMORY_BY_ID, params![id])?;
1853        Ok(removed > 0)
1854    })();
1855    result
1856}
1857
1858/// #940 (security-high, 2026-05-20) — caller-scoped archive variant.
1859/// Mirrors [`archive_memory`] but constrains the soft-move to rows
1860/// in the live `memories` table whose `metadata->'agent_id'` JSON
1861/// field matches `caller` (with the inbox-target carve-out:
1862/// `metadata->'target_agent_id' == caller` is also archivable by
1863/// the inbox owner, matching
1864/// [`crate::store::is_visible_to_caller`]).
1865///
1866/// Pre-#940 the HTTP handler at
1867/// `src/handlers/archive.rs::archive_by_ids` (sqlite branch) called
1868/// the owner-blind [`archive_memory`] directly; any authenticated
1869/// HTTP caller could bulk-archive any other owner's live rows
1870/// (cross-tenant denial-of-service primitive). The postgres SAL
1871/// branch was already QC-P1-fixed (2026-05-20) to pass
1872/// `CallerContext::for_agent(caller)`; the sqlite branch is closed
1873/// by this helper. Returns `Ok(false)` on a non-owner attempt so
1874/// the surface cannot be used to probe other owners' live ids.
1875///
1876/// # Errors
1877///
1878/// Returns an error if the INSERT-SELECT or DELETE fails.
1879pub fn archive_memory_for_caller(
1880    conn: &Connection,
1881    id: &str,
1882    reason: Option<&str>,
1883    caller: &str,
1884) -> Result<bool> {
1885    let now = Utc::now().to_rfc3339();
1886    let reason = reason.unwrap_or("archive");
1887    conn.execute_batch(connection::SQL_BEGIN_IMMEDIATE)?;
1888    let result = (|| -> Result<bool> {
1889        // Owner gate: row must exist AND match the caller (or be an
1890        // inbox-target row whose recipient is the caller).
1891        let owned: bool = conn
1892            .query_row(
1893                "SELECT COUNT(*) > 0 FROM memories \
1894                 WHERE id = ?1 \
1895                   AND ( \
1896                     json_extract(metadata, '$.agent_id') = ?2 OR \
1897                     json_extract(metadata, '$.target_agent_id') = ?2 OR \
1898                     json_extract(metadata, '$.agent_id') IS NULL OR \
1899                     json_extract(metadata, '$.agent_id') = '' \
1900                   )",
1901                params![id, caller],
1902                |r| r.get(0),
1903            )
1904            .unwrap_or(false);
1905        if !owned {
1906            return Ok(false);
1907        }
1908        conn.execute(
1909            "INSERT OR REPLACE INTO archived_memories
1910             (id, tier, namespace, title, content, tags, priority, confidence,
1911              source, access_count, created_at, updated_at, last_accessed_at,
1912              expires_at, archived_at, archive_reason, metadata,
1913              embedding, embedding_dim, original_tier, original_expires_at,
1914              reflection_depth, atomised_into, atom_of, memory_kind,
1915              entity_id, persona_version, citations, source_uri, source_span,
1916              confidence_source, confidence_signals, confidence_decayed_at,
1917              mentioned_entity_id, version)
1918             SELECT id, tier, namespace, title, content, tags, priority, confidence,
1919                    source, access_count, created_at, updated_at, last_accessed_at,
1920                    expires_at, ?1, ?2, metadata,
1921                    embedding, embedding_dim, tier, expires_at,
1922                    reflection_depth, atomised_into, atom_of, memory_kind,
1923                    entity_id, persona_version, citations, source_uri, source_span,
1924                    confidence_source, confidence_signals, confidence_decayed_at,
1925                    mentioned_entity_id, version
1926             FROM memories WHERE id = ?3",
1927            params![now, reason, id],
1928        )?;
1929        // Clean up namespace_meta — mirrors `delete`'s cleanup so an archived
1930        // row is not still referenced as the namespace standard.
1931        conn.execute(SQL_DELETE_NAMESPACE_META_BY_STANDARD_ID, params![id])?;
1932        let removed = conn.execute(SQL_DELETE_MEMORY_BY_ID, params![id])?;
1933        Ok(removed > 0)
1934    })();
1935    match result {
1936        Ok(moved) => {
1937            conn.execute_batch(connection::SQL_COMMIT)?;
1938            Ok(moved)
1939        }
1940        Err(e) => {
1941            let _ = conn.execute_batch(connection::SQL_ROLLBACK);
1942            Err(e)
1943        }
1944    }
1945}
1946
1947/// #1601 — build the FTS5 query for the DESTRUCTIVE forget paths.
1948///
1949/// `forget` / `forget_count` historically routed the caller's pattern
1950/// through `sanitize_fts_query(pat, /* use_or = */ true)` — the fuzzy
1951/// OR join the recall path uses for high RANKED retrieval. For a bulk
1952/// DELETE that over-matches catastrophically: pattern "D6 scratch"
1953/// matched (and would delete) every row containing EITHER token, and
1954/// "D6 nonexistentzzzword" still matched rows containing just "D6".
1955/// Destructive matching must be conservative: every
1956/// whitespace-separated token must match (FTS5 implicit AND — the
1957/// sanitized phrase-quoted tokens are space-joined). All three forget
1958/// sites (`forget_count`, the `forget` delete arm, and the
1959/// archive-before-delete arm) route through this single builder so
1960/// their match sets can never drift apart.
1961fn forget_fts_query(pat: &str) -> String {
1962    sanitize_fts_query(pat, false)
1963}
1964
1965/// Count memories that would be deleted by forget (for `dry_run`).
1966pub fn forget_count(
1967    conn: &Connection,
1968    namespace: Option<&str>,
1969    pattern: Option<&str>,
1970    tier: Option<&Tier>,
1971) -> Result<usize> {
1972    if pattern.is_none() && namespace.is_none() && tier.is_none() {
1973        // #962 typed envelope — 400 BAD_REQUEST via ValidationFailed.
1974        return Err(anyhow::Error::new(StorageError::InvalidArgument {
1975            reason: crate::errors::msg::FORGET_FILTER_REQUIRED.to_string(),
1976        }));
1977    }
1978    if let Some(pat) = pattern {
1979        let fts_query = forget_fts_query(pat);
1980        let tier_str = tier.map(|t| t.as_str().to_string());
1981        let count: i64 = conn.query_row(
1982            "SELECT COUNT(*) FROM memories WHERE rowid IN (
1983                SELECT m.rowid FROM memories_fts fts
1984                JOIN memories m ON m.rowid = fts.rowid
1985                WHERE memories_fts MATCH ?1
1986                  AND (?2 IS NULL OR m.namespace = ?2)
1987                  AND (?3 IS NULL OR m.tier = ?3)
1988            )",
1989            params![fts_query, namespace, tier_str],
1990            |r| r.get(0),
1991        )?;
1992        return Ok(usize::try_from(count).unwrap_or(0));
1993    }
1994    let tier_str = tier.map(|t| t.as_str().to_string());
1995    let count: i64 = conn.query_row(
1996        "SELECT COUNT(*) FROM memories WHERE (?1 IS NULL OR namespace = ?1) AND (?2 IS NULL OR tier = ?2)",
1997        params![namespace, tier_str],
1998        |r| r.get(0),
1999    )?;
2000    Ok(usize::try_from(count).unwrap_or(0))
2001}
2002
2003/// Forget by pattern — delete memories matching namespace + FTS pattern + tier.
2004/// If `archive` is true, archives memories before deletion.
2005pub fn forget(
2006    conn: &Connection,
2007    namespace: Option<&str>,
2008    pattern: Option<&str>,
2009    tier: Option<&Tier>,
2010    archive: bool,
2011) -> Result<usize> {
2012    if pattern.is_none() && namespace.is_none() && tier.is_none() {
2013        // #962 typed envelope — 400 BAD_REQUEST via ValidationFailed.
2014        return Err(anyhow::Error::new(StorageError::InvalidArgument {
2015            reason: crate::errors::msg::FORGET_FILTER_REQUIRED.to_string(),
2016        }));
2017    }
2018
2019    if archive {
2020        // Archive matching memories before deletion
2021        let now = Utc::now().to_rfc3339();
2022        if let Some(pat) = pattern {
2023            let fts_query = forget_fts_query(pat);
2024            let tier_str = tier.map(|t| t.as_str().to_string());
2025            // v0.6.3.1 P2 (G5) — preserve embedding + tier + expiry on forget-archive.
2026            // v0.7.0 issue #861 — also project `metadata` into the
2027            // archive row. The pre-fix INSERT omitted both the column
2028            // and the SELECT expression, so the column defaulted to
2029            // `'{}'` and `memory_archive_list` returned an empty object
2030            // for every forget-archived row (silently stripping
2031            // `agent_id`, `imported_from_*`, and every other operator-
2032            // visible attribution key). Mirrors the gc + explicit-
2033            // archive paths that already preserve metadata.
2034            conn.execute(
2035                "INSERT OR REPLACE INTO archived_memories
2036                 (id, tier, namespace, title, content, tags, priority, confidence,
2037                  source, access_count, created_at, updated_at, last_accessed_at,
2038                  expires_at, archived_at, archive_reason, metadata,
2039                  embedding, embedding_dim, original_tier, original_expires_at,
2040                  reflection_depth, atomised_into, atom_of, memory_kind,
2041                  entity_id, persona_version, citations, source_uri, source_span,
2042                  confidence_source, confidence_signals, confidence_decayed_at,
2043                  mentioned_entity_id, version)
2044                 SELECT id, tier, namespace, title, content, tags, priority, confidence,
2045                        source, access_count, created_at, updated_at, last_accessed_at,
2046                        expires_at, ?4, 'forget', metadata,
2047                        embedding, embedding_dim, tier, expires_at,
2048                        reflection_depth, atomised_into, atom_of, memory_kind,
2049                        entity_id, persona_version, citations, source_uri, source_span,
2050                        confidence_source, confidence_signals, confidence_decayed_at,
2051                        mentioned_entity_id, version
2052                 FROM memories WHERE rowid IN (
2053                    SELECT m.rowid FROM memories_fts fts
2054                    JOIN memories m ON m.rowid = fts.rowid
2055                    WHERE memories_fts MATCH ?1
2056                      AND (?2 IS NULL OR m.namespace = ?2)
2057                      AND (?3 IS NULL OR m.tier = ?3)
2058                 )",
2059                params![fts_query, namespace, tier_str, now],
2060            )?;
2061        } else {
2062            let tier_str = tier.map(|t| t.as_str().to_string());
2063            // v0.7.0 issue #861 — same metadata-projection fix as the
2064            // patterned branch above. Forget without a pattern still
2065            // archives whole namespaces/tiers, so the same bug applied.
2066            conn.execute(
2067                "INSERT OR REPLACE INTO archived_memories
2068                 (id, tier, namespace, title, content, tags, priority, confidence,
2069                  source, access_count, created_at, updated_at, last_accessed_at,
2070                  expires_at, archived_at, archive_reason, metadata,
2071                  embedding, embedding_dim, original_tier, original_expires_at,
2072                  reflection_depth, atomised_into, atom_of, memory_kind,
2073                  entity_id, persona_version, citations, source_uri, source_span,
2074                  confidence_source, confidence_signals, confidence_decayed_at,
2075                  mentioned_entity_id, version)
2076                 SELECT id, tier, namespace, title, content, tags, priority, confidence,
2077                        source, access_count, created_at, updated_at, last_accessed_at,
2078                        expires_at, ?3, 'forget', metadata,
2079                        embedding, embedding_dim, tier, expires_at,
2080                        reflection_depth, atomised_into, atom_of, memory_kind,
2081                        entity_id, persona_version, citations, source_uri, source_span,
2082                        confidence_source, confidence_signals, confidence_decayed_at,
2083                        mentioned_entity_id, version
2084                 FROM memories WHERE (?1 IS NULL OR namespace = ?1) AND (?2 IS NULL OR tier = ?2)",
2085                params![namespace, tier_str, now],
2086            )?;
2087        }
2088    }
2089
2090    // If pattern provided, use FTS to find matching IDs
2091    if let Some(pat) = pattern {
2092        let fts_query = forget_fts_query(pat);
2093        let tier_str = tier.map(|t| t.as_str().to_string());
2094        let deleted = conn.execute(
2095            "DELETE FROM memories WHERE rowid IN (
2096                SELECT m.rowid FROM memories_fts fts
2097                JOIN memories m ON m.rowid = fts.rowid
2098                WHERE memories_fts MATCH ?1
2099                  AND (?2 IS NULL OR m.namespace = ?2)
2100                  AND (?3 IS NULL OR m.tier = ?3)
2101            )",
2102            params![fts_query, namespace, tier_str],
2103        )?;
2104        return Ok(deleted);
2105    }
2106
2107    let tier_str = tier.map(|t| t.as_str().to_string());
2108    let deleted = conn.execute(
2109        "DELETE FROM memories WHERE (?1 IS NULL OR namespace = ?1) AND (?2 IS NULL OR tier = ?2)",
2110        params![namespace, tier_str],
2111    )?;
2112    Ok(deleted)
2113}
2114
2115/// #1602 — one row of a forget preview / deletion audit listing.
2116#[derive(Debug, Clone, serde::Serialize)]
2117pub struct ForgetMatch {
2118    pub id: String,
2119    pub title: String,
2120    pub namespace: String,
2121    pub tier: String,
2122}
2123
2124/// #1602 — list the rows the forget filters currently match, capped
2125/// at `limit`.
2126///
2127/// `memory_forget {dry_run:true}` previously returned only a blind
2128/// `{would_delete: N}` count, so callers had no way to see WHAT a
2129/// destructive pattern was about to remove; the live run likewise
2130/// returned only a count, leaving recovery (archive restore) a
2131/// guessing game. This helper shares filter semantics with [`forget`]
2132/// / [`forget_count`] — including the #1601 AND pattern matching via
2133/// [`forget_fts_query`] — so the preview is exactly the set `forget`
2134/// would delete. Rows come back in stable `rowid` order; callers pass
2135/// `cap + 1` to detect truncation without a second COUNT query.
2136pub fn forget_matches(
2137    conn: &Connection,
2138    namespace: Option<&str>,
2139    pattern: Option<&str>,
2140    tier: Option<&Tier>,
2141    limit: usize,
2142) -> Result<Vec<ForgetMatch>> {
2143    if pattern.is_none() && namespace.is_none() && tier.is_none() {
2144        // #962 typed envelope — same refusal as `forget` / `forget_count`.
2145        return Err(anyhow::Error::new(StorageError::InvalidArgument {
2146            reason: crate::errors::msg::FORGET_FILTER_REQUIRED.to_string(),
2147        }));
2148    }
2149    let tier_str = tier.map(|t| t.as_str().to_string());
2150    let limit_i64 = i64::try_from(limit).unwrap_or(i64::MAX);
2151    let row_to_match = |row: &rusqlite::Row<'_>| -> rusqlite::Result<ForgetMatch> {
2152        Ok(ForgetMatch {
2153            id: row.get(0)?,
2154            title: row.get(1)?,
2155            namespace: row.get(2)?,
2156            tier: row.get(3)?,
2157        })
2158    };
2159    if let Some(pat) = pattern {
2160        let fts_query = forget_fts_query(pat);
2161        let mut stmt = conn.prepare(
2162            "SELECT m.id, m.title, m.namespace, m.tier
2163             FROM memories_fts fts
2164             JOIN memories m ON m.rowid = fts.rowid
2165             WHERE memories_fts MATCH ?1
2166               AND (?2 IS NULL OR m.namespace = ?2)
2167               AND (?3 IS NULL OR m.tier = ?3)
2168             ORDER BY m.rowid
2169             LIMIT ?4",
2170        )?;
2171        let rows = stmt
2172            .query_map(
2173                params![fts_query, namespace, tier_str, limit_i64],
2174                row_to_match,
2175            )?
2176            .collect::<rusqlite::Result<Vec<_>>>()?;
2177        return Ok(rows);
2178    }
2179    let mut stmt = conn.prepare(
2180        "SELECT id, title, namespace, tier FROM memories
2181         WHERE (?1 IS NULL OR namespace = ?1) AND (?2 IS NULL OR tier = ?2)
2182         ORDER BY rowid
2183         LIMIT ?3",
2184    )?;
2185    let rows = stmt
2186        .query_map(params![namespace, tier_str, limit_i64], row_to_match)?
2187        .collect::<rusqlite::Result<Vec<_>>>()?;
2188    Ok(rows)
2189}
2190
2191/// #1579 A2 — build the sargable `list` SQL + parameter vector.
2192///
2193/// The legacy single-shape query expressed every optional filter as a
2194/// `(?N IS NULL OR col = ?N)` arm. SQLite cannot drive such an arm
2195/// through an index (the predicate is not sargable), so the P1 perf
2196/// audit measured the 100k-row list page at ~141 ms: the plan answered
2197/// the expiry guard via `idx_memories_expires` and paid a USE TEMP
2198/// B-TREE FOR ORDER BY over the whole table. Appending each filter
2199/// ONLY when the caller supplied it gives the planner bare `col = ?` /
2200/// `col >= ?` predicates, so it walks `idx_memories_list_order
2201/// (priority DESC, updated_at DESC)` — or `idx_memories_ns_list_order
2202/// (namespace, priority DESC, updated_at DESC)` for namespace-filtered
2203/// shapes — in ORDER BY order with early-stop under the LIMIT
2204/// (~0.06 ms on the same corpus). EXPLAIN QUERY PLAN proof is pinned
2205/// by `tests/issue_1579_storage_perf.rs`.
2206///
2207/// The distinct shapes repeat across calls, so `list` prepares them
2208/// via `prepare_cached` — at most 2^7 shapes exist and real traffic
2209/// concentrates on a handful.
2210///
2211/// Public as the test-facing SSOT accessor for the EXPLAIN-pinning
2212/// regression tests (the `current_schema_version_for_tests` precedent):
2213/// the tests must plan the EXACT SQL production runs, not a restated
2214/// copy that could drift.
2215#[allow(clippy::too_many_arguments)]
2216#[must_use]
2217pub fn build_list_query(
2218    namespace: Option<&str>,
2219    tier: Option<&Tier>,
2220    min_priority: Option<i32>,
2221    now: &str,
2222    since: Option<&str>,
2223    until: Option<&str>,
2224    tags_filter: Option<&str>,
2225    agent_id: Option<&str>,
2226    limit: usize,
2227    offset: usize,
2228) -> (String, Vec<Box<dyn rusqlite::types::ToSql>>) {
2229    let mut sql = String::from(SQL_LIST_BASE);
2230    let mut params_vec: Vec<Box<dyn rusqlite::types::ToSql>> = vec![Box::new(now.to_string())];
2231    if let Some(ns) = namespace {
2232        sql.push_str(" AND namespace = ?");
2233        params_vec.push(Box::new(ns.to_string()));
2234    }
2235    if let Some(t) = tier {
2236        sql.push_str(" AND tier = ?");
2237        params_vec.push(Box::new(t.as_str().to_string()));
2238    }
2239    if let Some(p) = min_priority {
2240        sql.push_str(" AND priority >= ?");
2241        params_vec.push(Box::new(p));
2242    }
2243    if let Some(s) = since {
2244        sql.push_str(" AND created_at >= ?");
2245        params_vec.push(Box::new(s.to_string()));
2246    }
2247    if let Some(u) = until {
2248        sql.push_str(" AND created_at <= ?");
2249        params_vec.push(Box::new(u.to_string()));
2250    }
2251    if let Some(tag) = tags_filter {
2252        sql.push_str(
2253            " AND EXISTS (SELECT 1 FROM json_each(memories.tags) WHERE json_each.value = ?)",
2254        );
2255        params_vec.push(Box::new(tag.to_string()));
2256    }
2257    if let Some(a) = agent_id {
2258        sql.push_str(" AND agent_id_idx = ?");
2259        params_vec.push(Box::new(a.to_string()));
2260    }
2261    sql.push_str(SQL_LIST_ORDER_LIMIT);
2262    params_vec.push(Box::new(limit));
2263    params_vec.push(Box::new(offset));
2264    (sql, params_vec)
2265}
2266
2267#[allow(clippy::too_many_arguments)]
2268pub fn list(
2269    conn: &Connection,
2270    namespace: Option<&str>,
2271    tier: Option<&Tier>,
2272    limit: usize,
2273    offset: usize,
2274    min_priority: Option<i32>,
2275    since: Option<&str>,
2276    until: Option<&str>,
2277    tags_filter: Option<&str>,
2278    agent_id: Option<&str>,
2279) -> Result<Vec<Memory>> {
2280    let now = Utc::now().to_rfc3339();
2281    let (sql, params_vec) = build_list_query(
2282        namespace,
2283        tier,
2284        min_priority,
2285        &now,
2286        since,
2287        until,
2288        tags_filter,
2289        agent_id,
2290        limit,
2291        offset,
2292    );
2293    let params_refs: Vec<&dyn rusqlite::types::ToSql> =
2294        params_vec.iter().map(std::convert::AsRef::as_ref).collect();
2295    let mut stmt = conn.prepare_cached(&sql)?;
2296    let rows = stmt.query_map(params_refs.as_slice(), row_to_memory)?;
2297    rows.collect::<rusqlite::Result<Vec<_>>>()
2298        .map_err(Into::into)
2299}
2300
2301/// L1-1 (v0.7.0) — return all non-expired memories that match the given
2302/// [`crate::models::MemoryKind`]. Used by the L2-1 curator reflection pass to
2303/// enumerate observation-class memories as synthesis candidates.
2304///
2305/// The query is deliberately minimal: no tier filter, no priority floor, no
2306/// pagination. Callers that need subsetting should post-filter the returned
2307/// `Vec<Memory>`. The index on `memory_kind` (added in migration v30) keeps
2308/// this query O(kind-count) rather than O(table-size) on production data.
2309#[allow(dead_code)] // consumed by L2-1 curator; not yet wired in this PR
2310pub(crate) fn memories_by_kind(
2311    conn: &Connection,
2312    kind: &crate::models::MemoryKind,
2313) -> Result<Vec<Memory>> {
2314    let now = Utc::now().to_rfc3339();
2315    let mut stmt = conn.prepare(
2316        "SELECT * FROM memories
2317         WHERE memory_kind = ?1
2318           AND (expires_at IS NULL OR expires_at > ?2)
2319         ORDER BY priority DESC, updated_at DESC",
2320    )?;
2321    let rows = stmt.query_map(params![kind.as_str(), now], row_to_memory)?;
2322    rows.collect::<rusqlite::Result<Vec<_>>>()
2323        .map_err(Into::into)
2324}
2325
2326#[allow(clippy::too_many_arguments)]
2327pub fn search(
2328    conn: &Connection,
2329    query: &str,
2330    namespace: Option<&str>,
2331    tier: Option<&Tier>,
2332    limit: usize,
2333    min_priority: Option<i32>,
2334    since: Option<&str>,
2335    until: Option<&str>,
2336    tags_filter: Option<&str>,
2337    agent_id: Option<&str>,
2338    as_agent: Option<&str>,
2339    // v0.7.0 WT-1-E — when false (default), search excludes archived
2340    // sources whose atoms surface in their place. See
2341    // [`recall_with_telemetry`] for the full contract.
2342    include_archived: bool,
2343) -> Result<Vec<Memory>> {
2344    search_with_source_uri(
2345        conn,
2346        query,
2347        namespace,
2348        tier,
2349        limit,
2350        min_priority,
2351        since,
2352        until,
2353        tags_filter,
2354        agent_id,
2355        as_agent,
2356        include_archived,
2357        None,
2358    )
2359}
2360
2361/// v0.7.0 Provenance Gap 6 (issue #889) — search with optional
2362/// reciprocal `source_uri` filter. When `source_uri` is `Some(uri)`,
2363/// the FTS search is post-filtered (in SQL) to memories whose
2364/// `source_uri` column equals the supplied value verbatim. The
2365/// partial `idx_memories_source_uri` index (created at v38) covers
2366/// the lookup, keeping it O(log N) over the URI-keyed subspace.
2367///
2368/// When `source_uri` is `None`, this delegates to the legacy
2369/// [`search`] path verbatim.
2370#[allow(clippy::too_many_arguments)]
2371pub fn search_with_source_uri(
2372    conn: &Connection,
2373    query: &str,
2374    namespace: Option<&str>,
2375    tier: Option<&Tier>,
2376    limit: usize,
2377    min_priority: Option<i32>,
2378    since: Option<&str>,
2379    until: Option<&str>,
2380    tags_filter: Option<&str>,
2381    agent_id: Option<&str>,
2382    as_agent: Option<&str>,
2383    include_archived: bool,
2384    source_uri: Option<&str>,
2385) -> Result<Vec<Memory>> {
2386    let now = Utc::now().to_rfc3339();
2387    let tier_str = tier.map(|t| t.as_str().to_string());
2388    let fts_query = sanitize_fts_query(query, false);
2389    let (vis_p, vis_t, vis_u, vis_o) = compute_visibility_prefixes(as_agent);
2390    let archived_fragment = archived_source_clause(include_archived, "m");
2391    let source_uri_fragment = if source_uri.is_some() {
2392        "AND m.source_uri = ?15"
2393    } else {
2394        ""
2395    };
2396
2397    let sql = format!(
2398        "SELECT m.id, m.tier, m.namespace, m.title, m.content, m.tags, m.priority,
2399                m.confidence, m.source, m.access_count, m.created_at, m.updated_at,
2400                m.last_accessed_at, m.expires_at, m.metadata, m.reflection_depth,
2401                m.memory_kind, m.entity_id, m.persona_version,
2402                m.citations, m.source_uri, m.source_span,
2403                m.confidence_source, m.confidence_signals, m.confidence_decayed_at
2404         FROM memories_fts fts
2405         JOIN memories m ON m.rowid = fts.rowid
2406         WHERE memories_fts MATCH ?1
2407           AND (?2 IS NULL OR m.namespace = ?2)
2408           AND (?3 IS NULL OR m.tier = ?3)
2409           AND (?4 IS NULL OR m.priority >= ?4)
2410           AND (m.expires_at IS NULL OR m.expires_at > ?5)
2411           AND (?6 IS NULL OR m.created_at >= ?6)
2412           AND (?7 IS NULL OR m.created_at <= ?7)
2413           AND (?8 IS NULL OR EXISTS (SELECT 1 FROM json_each(m.tags) WHERE json_each.value = ?8))
2414           AND (?10 IS NULL OR m.agent_id_idx = ?10)
2415           {archived_fragment}
2416           {source_uri_fragment}
2417           {vis}
2418         ORDER BY (fts.rank * -1)
2419           + (m.priority * 0.5)
2420           + (MIN(m.access_count, 50) * 0.1)
2421           + (m.confidence * 2.0)
2422           + (1.0 / (1.0 + (julianday('now') - julianday(m.updated_at)) * 0.1))
2423           DESC
2424         LIMIT ?9",
2425        vis = visibility_clause(11, "m"),
2426    );
2427    let mut stmt = conn.prepare(&sql)?;
2428    let rows = if let Some(uri) = source_uri {
2429        stmt.query_map(
2430            params![
2431                fts_query,
2432                namespace,
2433                tier_str,
2434                min_priority,
2435                now,
2436                since,
2437                until,
2438                tags_filter,
2439                limit,
2440                agent_id,
2441                vis_p,
2442                vis_t,
2443                vis_u,
2444                vis_o,
2445                uri,
2446            ],
2447            row_to_memory,
2448        )?
2449        .collect::<rusqlite::Result<Vec<_>>>()
2450        .map_err(Into::into)
2451    } else {
2452        stmt.query_map(
2453            params![
2454                fts_query,
2455                namespace,
2456                tier_str,
2457                min_priority,
2458                now,
2459                since,
2460                until,
2461                tags_filter,
2462                limit,
2463                agent_id,
2464                vis_p,
2465                vis_t,
2466                vis_u,
2467                vis_o,
2468            ],
2469            row_to_memory,
2470        )?
2471        .collect::<rusqlite::Result<Vec<_>>>()
2472        .map_err(Into::into)
2473    };
2474    rows
2475}
2476
2477/// v0.7.0 Provenance Gap 6 (issue #889) — list every memory carrying
2478/// the supplied `source_uri`. Bypasses the FTS layer so callers that
2479/// want the full reciprocal set ("every memory from this document")
2480/// don't need to type a query. Hits the partial
2481/// `idx_memories_source_uri` index directly. Pure read.
2482///
2483/// `as_agent` is the visibility principal. When `Some(...)`, the
2484/// `compute_visibility_prefixes` + `visibility_clause` pair is applied
2485/// so the reciprocal source-uri endpoint respects the same
2486/// scope=private gate as `search_with_source_uri` (#942 + #975
2487/// follow-up: any query path returning Memory MUST inherit the SAL
2488/// #910 visibility filter). When `None`, the filter is bypassed —
2489/// reserved for substrate-internal callers + tests that explicitly
2490/// opt out.
2491pub fn list_by_source_uri(
2492    conn: &Connection,
2493    source_uri: &str,
2494    namespace: Option<&str>,
2495    limit: Option<usize>,
2496    as_agent: Option<&str>,
2497) -> Result<Vec<Memory>> {
2498    let cap = limit.unwrap_or(LIST_DEFAULT_CAP).min(LIST_MAX_LIMIT);
2499    let (vis_p, vis_t, vis_u, vis_o) = compute_visibility_prefixes(as_agent);
2500    // Placeholder layout: ?1 = uri, ?2 = namespace, ?3 = limit,
2501    // ?4..?7 = visibility prefixes (private/team/unit/org).
2502    let sql = format!(
2503        "SELECT m.id, m.tier, m.namespace, m.title, m.content, m.tags, m.priority,
2504                m.confidence, m.source, m.access_count, m.created_at, m.updated_at,
2505                m.last_accessed_at, m.expires_at, m.metadata, m.reflection_depth,
2506                m.memory_kind, m.entity_id, m.persona_version,
2507                m.citations, m.source_uri, m.source_span,
2508                m.confidence_source, m.confidence_signals, m.confidence_decayed_at,
2509                m.version
2510         FROM memories m
2511         WHERE m.source_uri = ?1
2512           AND (?2 IS NULL OR m.namespace = ?2)
2513           {vis}
2514         ORDER BY m.created_at ASC
2515         LIMIT ?3",
2516        vis = visibility_clause(4, "m"),
2517    );
2518    let mut stmt = conn.prepare(&sql)?;
2519    let rows = stmt.query_map(
2520        params![
2521            source_uri,
2522            namespace,
2523            i64::try_from(cap).unwrap_or(i64::MAX),
2524            vis_p,
2525            vis_t,
2526            vis_u,
2527            vis_o,
2528        ],
2529        row_to_memory,
2530    )?;
2531    rows.collect::<rusqlite::Result<Vec<_>>>()
2532        .map_err(Into::into)
2533}
2534
2535/// Task 1.12 — proximity boost applied to a memory's score based on its
2536/// depth distance from the queried agent namespace. Uses the formula
2537/// `1 / (1 + depth_distance * 0.3)` per spec. Distance 0 = full strength
2538/// (1.0), each step up the hierarchy dampens linearly.
2539#[must_use]
2540pub fn proximity_boost(agent_ns: &str, memory_ns: &str) -> f64 {
2541    let agent_depth = crate::models::namespace_depth(agent_ns);
2542    let memory_depth = crate::models::namespace_depth(memory_ns);
2543    let distance = agent_depth.saturating_sub(memory_depth);
2544    #[allow(clippy::cast_precision_loss)]
2545    let d = distance as f64;
2546    1.0 / (1.0 + d * 0.3)
2547}
2548
2549/// Task 1.12 — SQL fragment + boolean indicating whether hierarchy
2550/// expansion is in play. When active the `namespace` SQL param binds
2551/// NULL (so `?N IS NULL OR m.namespace = ?N` passes trivially) and a
2552/// separate `AND m.namespace IN (<ancestors>)` clause narrows to the
2553/// hierarchy. When inactive the returned fragment is empty.
2554///
2555/// Ancestor strings are interpolated because `SQLite` `IN` with a
2556/// variable-length positional list is awkward, and the inputs come
2557/// from `namespace_ancestors()` → `validate_namespace`-approved
2558/// strings. Single-quote doubling is applied defensively.
2559///
2560/// PERF-8 (FX-C4-batch2, 2026-05-26): the hierarchy fragment is a
2561/// pure function of `namespace`, so a bounded LRU cache amortises
2562/// the `format!` + `Vec<String>::join` cost across the recall
2563/// hot path. Cache hits return a clone of the cached `String`
2564/// (still allocates, but skips the per-call SQL string build); the
2565/// cache itself is keyed by namespace string and capped at
2566/// `HIERARCHY_CACHE_MAX` entries to bound memory in the face of
2567/// per-tenant namespace explosions.
2568fn hierarchy_in_clause(namespace: Option<&str>) -> (Option<String>, bool) {
2569    let Some(ns) = namespace else {
2570        return (None, false);
2571    };
2572    if !ns.contains('/') {
2573        return (None, false);
2574    }
2575
2576    // PERF-8 cache lookup. The cache stores the rendered SQL
2577    // fragment Option<String>; the `bool` shadow flag is always
2578    // `true` for cached entries (we only cache hierarchical
2579    // namespaces — the `!ns.contains('/')` short-circuit above
2580    // never reaches the cache).
2581    if let Some(cached) = hierarchy_cache_get(ns) {
2582        return (Some(cached), true);
2583    }
2584
2585    let ancestors = crate::models::namespace_ancestors(ns);
2586    if ancestors.is_empty() {
2587        return (None, false);
2588    }
2589    let quoted: Vec<String> = ancestors
2590        .iter()
2591        .map(|a| format!("'{}'", a.replace('\'', "''")))
2592        .collect();
2593    let fragment = format!("AND m.namespace IN ({})", quoted.join(","));
2594    hierarchy_cache_put(ns, &fragment);
2595    (Some(fragment), true)
2596}
2597
2598// PERF-8 (FX-C4-batch2, 2026-05-26) — bounded LRU cache for the
2599// rendered `hierarchy_in_clause` SQL fragment. Cap chosen to be
2600// large enough for the typical few-hundred-namespace deployment
2601// while keeping memory bounded on multi-tenant hosts.
2602const HIERARCHY_CACHE_MAX: usize = 256;
2603
2604fn hierarchy_cache() -> &'static std::sync::Mutex<std::collections::HashMap<String, String>> {
2605    static CACHE: std::sync::OnceLock<std::sync::Mutex<std::collections::HashMap<String, String>>> =
2606        std::sync::OnceLock::new();
2607    CACHE.get_or_init(|| std::sync::Mutex::new(std::collections::HashMap::new()))
2608}
2609
2610fn hierarchy_cache_get(ns: &str) -> Option<String> {
2611    let cache = hierarchy_cache().lock().ok()?;
2612    cache.get(ns).cloned()
2613}
2614
2615fn hierarchy_cache_put(ns: &str, fragment: &str) {
2616    let Ok(mut cache) = hierarchy_cache().lock() else {
2617        return;
2618    };
2619    if cache.len() >= HIERARCHY_CACHE_MAX {
2620        // Bounded eviction: drop one arbitrary entry. The cache is
2621        // not a true LRU because the recall hot path runs in
2622        // microseconds and a full LRU's bookkeeping cost would
2623        // dominate the cache-hit savings. Random eviction is fine
2624        // because the hot working set typically stays well under
2625        // the cap; the eviction only fires on the long tail.
2626        if let Some(k) = cache.keys().next().cloned() {
2627            cache.remove(&k);
2628        }
2629    }
2630    cache.insert(ns.to_string(), fragment.to_string());
2631}
2632
2633#[cfg(test)]
2634fn hierarchy_cache_clear_for_tests() {
2635    if let Ok(mut cache) = hierarchy_cache().lock() {
2636        cache.clear();
2637    }
2638}
2639
2640/// Task 1.12 — apply proximity boost to scored memories ranked against
2641/// an agent's hierarchical namespace. Re-sorts by boosted score.
2642fn apply_proximity_boost(scored: Vec<(Memory, f64)>, agent_ns: &str) -> Vec<(Memory, f64)> {
2643    let mut boosted: Vec<(Memory, f64)> = scored
2644        .into_iter()
2645        .map(|(mem, score)| {
2646            let boost = proximity_boost(agent_ns, &mem.namespace);
2647            (mem, score * boost)
2648        })
2649        .collect();
2650    boosted.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
2651    boosted
2652}
2653
2654/// Phase P6 (R1) — count tokens in `text` using OpenAI's `cl100k_base`
2655/// BPE encoding. This is the de-facto standard for Claude / GPT context
2656/// budgeting and is shipped with `tiktoken-rs` (the BPE table is embedded
2657/// in the crate, ~1.7 MB, so the count is offline-deterministic across
2658/// all hosts). The encoder is built lazily and cached process-wide via
2659/// `OnceLock` — `cl100k_base()` itself parses the embedded table on every
2660/// call, which adds a few ms; we pay that cost once.
2661///
2662/// Returns the token count. On the (vanishingly rare) cl100k_base init
2663/// failure, falls back to the prior `len/4` byte heuristic so a budget
2664/// request never hard-errors.
2665#[must_use]
2666pub fn count_tokens_cl100k(text: &str) -> usize {
2667    use std::sync::OnceLock;
2668    static BPE: OnceLock<Option<tiktoken_rs::CoreBPE>> = OnceLock::new();
2669    let bpe = BPE.get_or_init(|| tiktoken_rs::cl100k_base().ok());
2670    if let Some(bpe) = bpe.as_ref() {
2671        bpe.encode_with_special_tokens(text).len()
2672    } else {
2673        // Defensive fallback — should never trigger in practice because
2674        // the BPE table is bundled in the crate, but we never want a
2675        // budget call to fail because of tokenizer init.
2676        text.len() / 4
2677    }
2678}
2679
2680/// Phase P6 — token cost of a memory's `content` only (not title), per
2681/// the R1 spec which budgets against the LLM context window. Title and
2682/// metadata are caller-side ornament; `content` is what gets stuffed
2683/// into the prompt.
2684#[must_use]
2685pub fn count_memory_tokens(mem: &Memory) -> usize {
2686    count_tokens_cl100k(&mem.content)
2687}
2688
2689/// Phase P6 — kept for backward compatibility with the Task 1.11 byte-
2690/// heuristic surface. New code should use `count_memory_tokens`. The
2691/// returned value is now BPE-accurate (cl100k_base) rather than the
2692/// prior `len/4` estimate, so callers reading this through the public
2693/// API get the more accurate value automatically.
2694#[must_use]
2695pub fn estimate_memory_tokens(mem: &Memory) -> usize {
2696    count_memory_tokens(mem)
2697}
2698
2699/// Phase P6 — outcome of applying a token budget to a ranked recall
2700/// list. Carries everything `mcp::handle_recall` needs to populate the
2701/// new RecallMeta block (`budget_tokens_used`, `budget_tokens_remaining`,
2702/// `memories_dropped`, `budget_overflow`).
2703#[derive(Debug, Clone)]
2704pub struct BudgetOutcome {
2705    /// Cumulative cl100k_base token count of the returned content.
2706    pub tokens_used: usize,
2707    /// `budget - tokens_used`, saturating at 0. `None` when no budget set.
2708    pub tokens_remaining: Option<usize>,
2709    /// How many candidates the budget cut from the ranked list.
2710    pub memories_dropped: usize,
2711    /// True iff the highest-ranked memory alone exceeded the budget and
2712    /// was returned anyway (R1 guarantee: at least one memory if any
2713    /// matched). Always false when no budget is set.
2714    pub budget_overflow: bool,
2715}
2716
2717/// Phase P6 (R1) — context-budget greedy fill. Iterates over scored
2718/// candidates in rank order; stops at the first memory whose inclusion
2719/// would exceed the budget — UNLESS the output is still empty, in
2720/// which case the highest-ranked memory is returned anyway with
2721/// `budget_overflow = true`. This preserves the R1 guarantee that a
2722/// successful recall always returns at least one result when any
2723/// matched, even if the user supplied an unrealistically tight budget.
2724///
2725/// When `budget_tokens` is `None`, every candidate is returned and the
2726/// `tokens_used` tally falls back to the cheap byte-heuristic (`len/4`)
2727/// — running cl100k_base on every recall regardless of caller intent
2728/// would impose ~200 ms cold-start (BPE table parse) and several ms per
2729/// memory on the hot path. The heuristic is byte-exact-deterministic,
2730/// honoring the prior Task 1.11 contract for "observe the cost without
2731/// enforcing it". When `budget_tokens` is `Some(_)`, the BPE-accurate
2732/// cl100k count is used because the caller cares enough about the
2733/// number to enforce on it. When `budget_tokens` is `Some(0)`, **zero
2734/// memories are returned** with `budget_overflow = false` — the spec
2735/// semantics for "no budget at all, please" (R1 §6 acceptance #3).
2736#[must_use]
2737pub fn apply_token_budget(
2738    scored: Vec<(Memory, f64)>,
2739    budget_tokens: Option<usize>,
2740) -> (Vec<(Memory, f64)>, BudgetOutcome) {
2741    let total_candidates = scored.len();
2742
2743    // Phase P6 — explicit `0` budget short-circuits to an empty result.
2744    // Per the R1 acceptance test `budget_tokens_zero_returns_zero_memories`,
2745    // this is a deliberate no-op fill (overflow is *false* — the user
2746    // said "give me nothing").
2747    if budget_tokens == Some(0) {
2748        return (
2749            Vec::new(),
2750            BudgetOutcome {
2751                tokens_used: 0,
2752                tokens_remaining: Some(0),
2753                memories_dropped: total_candidates,
2754                budget_overflow: false,
2755            },
2756        );
2757    }
2758
2759    // No-budget fast path: skip cl100k entirely. The byte heuristic is
2760    // a few ns vs. the BPE encoder's couple-of-µs per memory plus the
2761    // one-shot ~200 ms init. Bench harness benchmarks recall with
2762    // `budget_tokens=None`; this keeps the hot path cl100k-free.
2763    if budget_tokens.is_none() {
2764        let mut used: usize = 0;
2765        let mut out: Vec<(Memory, f64)> = Vec::with_capacity(scored.len());
2766        for (mem, score) in scored {
2767            used = used.saturating_add(mem.content.len() / 4);
2768            out.push((mem, score));
2769        }
2770        return (
2771            out,
2772            BudgetOutcome {
2773                tokens_used: used,
2774                tokens_remaining: None,
2775                memories_dropped: 0,
2776                budget_overflow: false,
2777            },
2778        );
2779    }
2780
2781    // Budget path — caller asked for enforcement, so spend the tokens
2782    // for accurate cl100k accounting.
2783    let mut used: usize = 0;
2784    let mut out: Vec<(Memory, f64)> = Vec::with_capacity(scored.len());
2785    let mut overflow = false;
2786
2787    for (mem, score) in scored {
2788        let cost = count_memory_tokens(&mem);
2789        if let Some(budget) = budget_tokens
2790            && used.saturating_add(cost) > budget
2791        {
2792            // R1 always-return-at-least-one guarantee: if we've collected
2793            // nothing yet, take the top-ranked memory and flag overflow.
2794            if out.is_empty() {
2795                used = used.saturating_add(cost);
2796                out.push((mem, score));
2797                overflow = true;
2798            }
2799            break;
2800        }
2801        used = used.saturating_add(cost);
2802        out.push((mem, score));
2803    }
2804
2805    let dropped = total_candidates.saturating_sub(out.len());
2806    let tokens_remaining = budget_tokens.map(|b| b.saturating_sub(used));
2807    (
2808        out,
2809        BudgetOutcome {
2810            tokens_used: used,
2811            tokens_remaining,
2812            memories_dropped: dropped,
2813            budget_overflow: overflow,
2814        },
2815    )
2816}
2817
2818/// Recall — fuzzy OR search + touch + auto-promote + TTL extension.
2819/// Task 1.11: after ranking, applies optional `budget_tokens` cap.
2820/// Phase P6: returns the full `BudgetOutcome` (tokens_used,
2821/// tokens_remaining, memories_dropped, budget_overflow) instead of just
2822/// the prior bare `tokens_used`. Callers that only need `tokens_used`
2823/// read `outcome.tokens_used`.
2824#[allow(clippy::too_many_arguments)]
2825/// v0.6.3.1 (P3): keyword-only recall with retrieval-stage telemetry.
2826///
2827/// Identical to [`recall`] but additionally returns a [`crate::models::RecallTelemetry`]
2828/// describing the FTS5 candidate count (HNSW count is always 0 for this
2829/// path — no semantic stage runs). MCP `handle_recall` uses this to build
2830/// the `meta` block; [`recall`] is preserved as a thin wrapper for
2831/// existing callers (HTTP handlers, CLI, bench).
2832#[allow(clippy::too_many_arguments)]
2833pub fn recall_with_telemetry(
2834    conn: &Connection,
2835    context: &str,
2836    namespace: Option<&str>,
2837    limit: usize,
2838    tags_filter: Option<&str>,
2839    since: Option<&str>,
2840    until: Option<&str>,
2841    short_extend: i64,
2842    mid_extend: i64,
2843    as_agent: Option<&str>,
2844    budget_tokens: Option<usize>,
2845    // v0.7.0 WT-1-E — when false (default), recall excludes archived
2846    // sources whose atoms now surface in their place. When true, the
2847    // archive-filter WHERE clause is dropped so forensic-export and
2848    // explicit auditor recall returns both atoms and sources.
2849    include_archived: bool,
2850    // v0.7.0 Form 4 / Cluster-A PERF-3 — push `--source-uri-prefix`
2851    // into the SQL WHERE so the partial `idx_memories_source_uri`
2852    // index covers the lookup and excluded rows never enter the
2853    // top-K. See [`recall`] for the contract.
2854    source_uri_prefix: Option<&str>,
2855) -> Result<(
2856    Vec<(Memory, f64)>,
2857    BudgetOutcome,
2858    crate::models::RecallTelemetry,
2859)> {
2860    let (results, outcome) = recall(
2861        conn,
2862        context,
2863        namespace,
2864        limit,
2865        tags_filter,
2866        since,
2867        until,
2868        short_extend,
2869        mid_extend,
2870        as_agent,
2871        budget_tokens,
2872        include_archived,
2873        source_uri_prefix,
2874    )?;
2875    let telemetry = crate::models::RecallTelemetry {
2876        fts_candidates: results.len(),
2877        hnsw_candidates: 0,
2878        blend_weight_avg: 0.0,
2879        embedding_dim_mismatch: 0,
2880    };
2881    Ok((results, outcome, telemetry))
2882}
2883
2884pub fn recall(
2885    conn: &Connection,
2886    context: &str,
2887    namespace: Option<&str>,
2888    limit: usize,
2889    tags_filter: Option<&str>,
2890    since: Option<&str>,
2891    until: Option<&str>,
2892    short_extend: i64,
2893    mid_extend: i64,
2894    as_agent: Option<&str>,
2895    budget_tokens: Option<usize>,
2896    // v0.7.0 WT-1-E — see [`recall_with_telemetry`] for the
2897    // archived-source exclusion contract.
2898    include_archived: bool,
2899    // v0.7.0 Form 4 / Cluster-A PERF-3 — when `Some(prefix)`, restrict
2900    // results to memories whose `source_uri` starts with `prefix`. The
2901    // predicate is `source_uri LIKE 'prefix%'` so the partial
2902    // `idx_memories_source_uri` index (defined in migration
2903    // `0032_v07_form4_provenance.sql`) covers the scan. Pre-fix this
2904    // filter ran in Rust AFTER the SQL returned, which excluded valid
2905    // matches from the top-K when the substrate returned `limit` rows
2906    // that subsequently filtered to fewer. `None` preserves the legacy
2907    // no-filter behaviour for callers that filter post-hoc.
2908    source_uri_prefix: Option<&str>,
2909) -> Result<(Vec<(Memory, f64)>, BudgetOutcome)> {
2910    let now = Utc::now().to_rfc3339();
2911    let fts_query = sanitize_fts_query(context, true);
2912    let (vis_p, vis_t, vis_u, vis_o) = compute_visibility_prefixes(as_agent);
2913
2914    // Task 1.12: hierarchy expansion. If `namespace` is hierarchical (contains
2915    // `/`), broaden the filter to the full ancestor chain. Flat namespaces
2916    // keep exact-match semantics (backward compat).
2917    let (hierarchy_in, hierarchy_active) = hierarchy_in_clause(namespace);
2918    let hierarchy_fragment = hierarchy_in.unwrap_or_default();
2919    let effective_namespace = if hierarchy_active { None } else { namespace };
2920
2921    // v0.7.0 WT-1-E — archived-source exclusion (default) / pass-
2922    // through (include_archived=true). Composes with the existing
2923    // namespace, expiry, tag, time-window, and visibility filters.
2924    let archived_fragment = archived_source_clause(include_archived, "m");
2925
2926    // v0.7.0 Form 4 / Cluster-A PERF-3 — push the source-URI prefix
2927    // predicate into SQL. We escape SQL LIKE metacharacters (`%`, `_`,
2928    // `\`) in the supplied prefix so a caller passing e.g. `doc:abc_`
2929    // matches only that literal value (not `doc:abcX`). The LIKE
2930    // pattern is constructed with the bound parameter holding the
2931    // already-escaped prefix + `%`; combined with the partial index
2932    // on `source_uri WHERE source_uri IS NOT NULL`, SQLite picks the
2933    // index for the lookup. See [`escape_like_pattern`].
2934    let (source_uri_fragment, source_uri_param): (&str, Option<String>) = match source_uri_prefix {
2935        Some(prefix) if !prefix.is_empty() => (
2936            "AND m.source_uri LIKE ?12 ESCAPE '\\'",
2937            Some(format!("{}%", escape_like_pattern(prefix))),
2938        ),
2939        _ => ("", None),
2940    };
2941
2942    let sql = format!(
2943        "SELECT m.id, m.tier, m.namespace, m.title, m.content, m.tags, m.priority,
2944                m.confidence, m.source, m.access_count, m.created_at, m.updated_at,
2945                m.last_accessed_at, m.expires_at, m.metadata, m.reflection_depth,
2946                m.memory_kind, m.entity_id, m.persona_version,
2947                m.citations, m.source_uri, m.source_span,
2948                m.confidence_source, m.confidence_signals, m.confidence_decayed_at,
2949                (fts.rank * -1)
2950                + (m.priority * 0.5)
2951                + (MIN(m.access_count, 50) * 0.1)
2952                + (m.confidence * 2.0)
2953                + (CASE m.tier WHEN 'long' THEN 3.0 WHEN 'mid' THEN 1.0 ELSE 0.0 END)
2954                + (1.0 / (1.0 + (julianday('now') - julianday(m.updated_at)) * 0.1))
2955                AS score
2956         FROM memories_fts fts
2957         JOIN memories m ON m.rowid = fts.rowid
2958         WHERE memories_fts MATCH ?1
2959           AND (?2 IS NULL OR m.namespace = ?2)
2960           {hierarchy_fragment}
2961           AND (m.expires_at IS NULL OR m.expires_at > ?3)
2962           AND (?4 IS NULL OR EXISTS (SELECT 1 FROM json_each(m.tags) WHERE json_each.value = ?4))
2963           AND (?5 IS NULL OR m.created_at >= ?5)
2964           AND (?6 IS NULL OR m.created_at <= ?6)
2965           {archived_fragment}
2966           {source_uri_fragment}
2967           {vis}
2968         ORDER BY score DESC
2969         LIMIT ?7",
2970        vis = visibility_clause(8, "m"),
2971    );
2972    let mut stmt = conn.prepare(&sql)?;
2973    // Bind ?12 only when the source-URI fragment is active; SQLite
2974    // errors on parameter-count mismatch.
2975    let row_handler = |row: &rusqlite::Row<'_>| -> rusqlite::Result<(Memory, f64)> {
2976        let mem = row_to_memory(row)?;
2977        // v0.7.0 Form 4 / v0.7.x Form 6 — name-based read for the
2978        // trailing score column. Switched from positional `row.get`
2979        // after schema v38 (citations, source_uri, source_span) and
2980        // Form 6's `memory_kind`/`entity_id`/`persona_version`
2981        // shifted the trailing column's index; name-based reads
2982        // survive future column additions without further churn.
2983        let score: f64 = row.get("score")?;
2984        Ok((mem, score))
2985    };
2986    let results: Vec<(Memory, f64)> = if let Some(ref uri_param) = source_uri_param {
2987        let rows = stmt.query_map(
2988            params![
2989                fts_query,
2990                effective_namespace,
2991                now,
2992                tags_filter,
2993                since,
2994                until,
2995                limit,
2996                vis_p,
2997                vis_t,
2998                vis_u,
2999                vis_o,
3000                uri_param,
3001            ],
3002            row_handler,
3003        )?;
3004        rows.collect::<rusqlite::Result<Vec<_>>>()?
3005    } else {
3006        let rows = stmt.query_map(
3007            params![
3008                fts_query,
3009                effective_namespace,
3010                now,
3011                tags_filter,
3012                since,
3013                until,
3014                limit,
3015                vis_p,
3016                vis_t,
3017                vis_u,
3018                vis_o,
3019            ],
3020            row_handler,
3021        )?;
3022        rows.collect::<rusqlite::Result<Vec<_>>>()?
3023    };
3024
3025    // Task 1.12: proximity boost when hierarchy expansion is active.
3026    let boosted = if let (true, Some(anchor)) = (hierarchy_active, namespace) {
3027        apply_proximity_boost(results, anchor)
3028    } else {
3029        results
3030    };
3031
3032    // Task 1.11 / Phase P6: apply optional token budget in rank order
3033    // (AFTER proximity). Returns BudgetOutcome with all R1 meta fields.
3034    let (budgeted, outcome) = apply_token_budget(boosted, budget_tokens);
3035
3036    // Cluster-F PERF-6 — collapse K per-row touches into a single
3037    // `BEGIN IMMEDIATE` transaction. Same semantics (access bump,
3038    // TTL extend, promotion, priority bump every 10 accesses); the
3039    // 3K UPDATE round-trips now share one commit instead of K.
3040    let touch_ids: Vec<&str> = budgeted.iter().map(|(mem, _)| mem.id.as_str()).collect();
3041    if let Err(e) = touch_many(conn, &touch_ids, short_extend, mid_extend) {
3042        tracing::warn!("touch_many failed for recall set: {}", e);
3043    }
3044    Ok((budgeted, outcome))
3045}
3046
3047/// Task 1.7 — vertical memory promotion.
3048///
3049/// Clones `source_id` into `to_namespace`, which must be a proper `/`-derived
3050/// ancestor of the memory's current namespace. The original memory is
3051/// **untouched** (vertical promotion is a fan-out, not a move). A
3052/// `derived_from` link is created from the new clone back to the source so
3053/// the promotion trail is queryable.
3054///
3055/// Returns the clone's new ID.
3056///
3057/// Errors when:
3058/// - source doesn't exist
3059/// - `to_namespace` is empty, equal to the source namespace, or not an
3060///   ancestor of it (see `namespace_ancestors`)
3061pub fn promote_to_namespace(
3062    conn: &Connection,
3063    source_id: &str,
3064    to_namespace: &str,
3065) -> Result<String> {
3066    if to_namespace.is_empty() {
3067        // #962 typed envelope.
3068        return Err(anyhow::Error::new(StorageError::InvalidArgument {
3069            reason: "to_namespace cannot be empty".to_string(),
3070        }));
3071    }
3072    let source = get(conn, source_id)?.ok_or_else(|| {
3073        // #962 typed envelope. `Source` here labels the promotion source,
3074        // not a link end, but the user-facing message ("source memory
3075        // not found: …") is preserved via the LinkEnd::Source Display arm.
3076        anyhow::Error::new(StorageError::MemoryNotFound {
3077            id: source_id.to_string(),
3078            role: Some(LinkEnd::Source),
3079        })
3080    })?;
3081    if to_namespace == source.namespace {
3082        // #962 typed envelope.
3083        return Err(anyhow::Error::new(StorageError::InvalidArgument {
3084            reason: format!(
3085                "to_namespace must be a proper ancestor of the memory's namespace (got self: {})",
3086                source.namespace
3087            ),
3088        }));
3089    }
3090    let ancestors = namespace_ancestors(&source.namespace);
3091    if !ancestors.iter().any(|a| a == to_namespace) {
3092        // #962 typed envelope.
3093        return Err(anyhow::Error::new(StorageError::InvalidArgument {
3094            reason: format!(
3095                "to_namespace '{to_namespace}' is not an ancestor of '{}' (ancestors: {ancestors:?})",
3096                source.namespace
3097            ),
3098        }));
3099    }
3100
3101    let now = Utc::now().to_rfc3339();
3102    let clone = Memory {
3103        id: uuid::Uuid::new_v4().to_string(),
3104        tier: source.tier.clone(),
3105        namespace: to_namespace.to_string(),
3106        title: source.title.clone(),
3107        content: source.content.clone(),
3108        tags: source.tags.clone(),
3109        priority: source.priority,
3110        confidence: source.confidence,
3111        source: source.source.clone(),
3112        access_count: 0,
3113        created_at: now.clone(),
3114        updated_at: now,
3115        last_accessed_at: None,
3116        expires_at: source.expires_at.clone(),
3117        metadata: source.metadata.clone(),
3118        reflection_depth: source.reflection_depth,
3119        memory_kind: source.memory_kind.clone(),
3120        entity_id: None,
3121        persona_version: None,
3122        citations: Vec::new(),
3123        source_uri: None,
3124        source_span: None,
3125        confidence_source: ConfidenceSource::CallerProvided,
3126        confidence_signals: None,
3127        confidence_decayed_at: None,
3128        version: 1,
3129    };
3130    let actual_id = insert(conn, &clone)?;
3131    // Clone → source: derived_from. Safe to ignore if the link layer
3132    // short-circuits on self-link (impossible here — distinct IDs).
3133    create_link(
3134        conn,
3135        &actual_id,
3136        source_id,
3137        crate::models::MemoryLinkRelation::DerivedFrom.as_str(),
3138    )?;
3139    Ok(actual_id)
3140}
3141
3142/// v0.6.3.1 P2 (G6) — quick existence check for `(title, namespace)`. Used by
3143/// `on_conflict='error'` callers to short-circuit before the full upsert
3144/// machinery runs. Returns the existing row id if there is one.
3145///
3146/// # Errors
3147///
3148/// Returns the underlying SQLite error.
3149pub fn find_by_title_namespace(
3150    conn: &Connection,
3151    title: &str,
3152    namespace: &str,
3153) -> Result<Option<String>> {
3154    let id: Option<String> = conn
3155        .query_row(
3156            "SELECT id FROM memories WHERE title = ?1 AND namespace = ?2 LIMIT 1",
3157            params![title, namespace],
3158            |r| r.get(0),
3159        )
3160        .ok();
3161    Ok(id)
3162}
3163
3164/// v0.6.3.1 P2 (G6) — pick a title that does not collide with an existing
3165/// `(title, namespace)` row by appending `(2)`, `(3)`, ... up to a hard cap.
3166/// The first available suffix wins. Used by `on_conflict='version'`.
3167///
3168/// The cap (`MAX_VERSION_SUFFIX`) prevents an infinite loop in pathological
3169/// cases (e.g. an attacker spamming the same title in a loop). Once the cap
3170/// is hit, the caller falls back to error mode.
3171const MAX_VERSION_SUFFIX: u32 = 1024;
3172
3173/// # Errors
3174///
3175/// Returns the underlying SQLite error or an error if no free suffix is
3176/// found within `MAX_VERSION_SUFFIX` attempts.
3177pub fn next_versioned_title(
3178    conn: &Connection,
3179    base_title: &str,
3180    namespace: &str,
3181) -> Result<String> {
3182    if find_by_title_namespace(conn, base_title, namespace)?.is_none() {
3183        return Ok(base_title.to_string());
3184    }
3185    for n in 2..=MAX_VERSION_SUFFIX {
3186        let candidate = format!("{base_title} ({n})");
3187        if find_by_title_namespace(conn, &candidate, namespace)?.is_none() {
3188            return Ok(candidate);
3189        }
3190    }
3191    // #962 typed envelope — UniqueConflict (the substrate could not
3192    // mint a non-colliding versioned title within the cap). Caller is
3193    // expected to retry with a different base title or raise the cap.
3194    Err(anyhow::Error::new(StorageError::UniqueConflict {
3195        reason: format!(
3196            "could not find a free versioned title for '{base_title}' in namespace '{namespace}' \
3197             within {MAX_VERSION_SUFFIX} attempts"
3198        ),
3199    }))
3200}
3201
3202/// Stopwords stripped before computing the title-similarity Jaccard floor
3203/// in [`find_contradictions`]. The list is intentionally tiny — a small
3204/// closed-class English set — because a maximalist stopword list would
3205/// over-filter agglutinative or short titles and re-introduce noise on
3206/// the other side. The substrate's contradiction surface is supposed to
3207/// be a near-duplicate-titles signal, not a generic content search.
3208const CONTRADICTION_TITLE_STOPWORDS: &[&str] = &[
3209    "a", "an", "and", "are", "as", "at", "be", "by", "for", "from", "has", "have", "in", "is",
3210    "it", "its", "of", "on", "or", "that", "the", "this", "to", "was", "were", "will", "with",
3211];
3212
3213/// Minimum Jaccard-of-content-tokens between the seed title and a
3214/// candidate title for the candidate to qualify as a contradiction
3215/// hit. Computed after lowercasing + stopword removal.
3216///
3217/// **Why this exists** (issue #1320). Pre-fix, [`find_contradictions`]
3218/// returned the top 5 FTS5 matches on an OR-joined sanitised query
3219/// against the title. With seed title "Tomatoes are red" the OR list
3220/// becomes `"tomatoes" OR "are" OR "red"`, and FTS5 happily ranked
3221/// every row containing the common stopword "are" near the top.
3222/// Operators observed unrelated memories ("Moon landing happened in
3223/// 1969", "Retrieval-augmented generation works by...") flagged as
3224/// `potential_contradictions` against tomato facts — pure stopword
3225/// noise. The Jaccard floor below preserves the documented "similar
3226/// titles" semantics (e.g. "Database is PostgreSQL" vs "Database is
3227/// MySQL" share `{database}` after stopword removal — Jaccard
3228/// `1/3 ≈ 0.33`, passes the 0.3 floor) while rejecting the
3229/// disjoint-topic false positives (Jaccard 0).
3230const CONTRADICTION_TITLE_JACCARD_FLOOR: f32 = 0.30;
3231
3232/// Lowercase + stopword-strip a title for the contradiction Jaccard
3233/// comparison. Splits on non-alphanumeric so titles like
3234/// `"Database is PostgreSQL"` and `"Database/is/PostgreSQL"` produce
3235/// the same token set.
3236fn contradiction_title_tokens(title: &str) -> std::collections::HashSet<String> {
3237    title
3238        .split(|c: char| !c.is_alphanumeric())
3239        .map(str::to_ascii_lowercase)
3240        .filter(|t| !t.is_empty())
3241        .filter(|t| !CONTRADICTION_TITLE_STOPWORDS.contains(&t.as_str()))
3242        .collect()
3243}
3244
3245/// Jaccard token overlap between two pre-tokenised title sets. Returns
3246/// `0.0` when either side is empty so a seed title that's pure
3247/// stopwords (e.g. `"the"`) cannot produce phantom hits.
3248#[allow(clippy::cast_precision_loss)]
3249fn contradiction_title_jaccard(
3250    a: &std::collections::HashSet<String>,
3251    b: &std::collections::HashSet<String>,
3252) -> f32 {
3253    if a.is_empty() || b.is_empty() {
3254        return 0.0;
3255    }
3256    let inter = a.intersection(b).count() as f32;
3257    let union = a.union(b).count() as f32;
3258    if union > 0.0 { inter / union } else { 0.0 }
3259}
3260
3261/// Stage-1 FTS5 recall for similar-title candidates. Returns up to
3262/// `limit` rows from `memories_fts` matching the sanitised seed
3263/// title, ordered by FTS5 rank.
3264///
3265/// This is the broader recall pool that feeds both
3266/// [`find_contradictions`] (wire-side `potential_contradictions`,
3267/// post Stage-2 Jaccard floor) and [`find_synthesis_candidates`]
3268/// (Form 1 synthesis curator, NO Jaccard floor). Two consumers,
3269/// two different relevance budgets; see #1320 + #1337 for why the
3270/// pool can't be filtered universally.
3271fn find_similar_title_candidates(
3272    conn: &Connection,
3273    title: &str,
3274    namespace: &str,
3275    limit: usize,
3276) -> Result<Vec<Memory>> {
3277    let fts_query = sanitize_fts_query(title, true);
3278    let mut stmt = conn.prepare(
3279        "SELECT m.id, m.tier, m.namespace, m.title, m.content, m.tags, m.priority,
3280                m.confidence, m.source, m.access_count, m.created_at, m.updated_at,
3281                m.last_accessed_at, m.expires_at, m.metadata, m.reflection_depth,
3282                m.memory_kind, m.entity_id, m.persona_version,
3283                m.citations, m.source_uri, m.source_span,
3284                m.confidence_source, m.confidence_signals, m.confidence_decayed_at
3285         FROM memories_fts fts
3286         JOIN memories m ON m.rowid = fts.rowid
3287         WHERE memories_fts MATCH ?1 AND m.namespace = ?2
3288         ORDER BY fts.rank
3289         LIMIT ?3",
3290    )?;
3291    let rows = stmt.query_map(
3292        params![fts_query, namespace, i64::try_from(limit).unwrap_or(20)],
3293        row_to_memory,
3294    )?;
3295    rows.collect::<rusqlite::Result<Vec<_>>>()
3296        .map_err(Into::into)
3297}
3298
3299/// Detect potential contradictions: memories in same namespace with similar titles.
3300///
3301/// Two-stage filter (#1320 calibration):
3302/// 1. FTS5 OR-match on stopword-tolerant query — fast recall over
3303///    `memories_fts`, capped at a candidate ceiling so a pathological
3304///    common-word title can't pull the entire namespace.
3305/// 2. Jaccard-token-overlap floor on the stopword-stripped title sets,
3306///    keeping only candidates whose title shares at least
3307///    [`CONTRADICTION_TITLE_JACCARD_FLOOR`] of the seed's content
3308///    tokens. Final result is capped at 5 (the pre-fix wire ceiling).
3309///
3310/// The two-stage design preserves the "similar title" semantics that
3311/// the wire-side `potential_contradictions` field documents while
3312/// removing the stopword-OR noise floor that crossed unrelated topics
3313/// at v0.6.x / pre-fix v0.7.0.
3314///
3315/// **Scope** (#1337): this function is the WIRE-output filter. The
3316/// Form 1 synthesis curator path uses [`find_synthesis_candidates`]
3317/// instead, which omits the Stage-2 Jaccard floor — the curator needs
3318/// the broader Stage-1 pool to see legitimately-similar memories
3319/// whose titles share only one strong content token (e.g.
3320/// `"kubernetes deployment notes"` vs
3321/// `"kubernetes rolling deploy strategy"`, Jaccard 1/6 ≈ 0.167)
3322/// without depending on whether 0.30 happens to be the right
3323/// stopword-noise floor for the wire surface.
3324pub fn find_contradictions(conn: &Connection, title: &str, namespace: &str) -> Result<Vec<Memory>> {
3325    // Stage 1 — FTS5 recall. Pull a wider candidate pool (20) so the
3326    // stage-2 Jaccard filter has headroom; the final cap of 5 is
3327    // applied after the filter so the wire shape is preserved.
3328    let candidates = find_similar_title_candidates(conn, title, namespace, 20)?;
3329
3330    // Stage 2 — Jaccard floor on stopword-stripped title tokens.
3331    let seed_tokens = contradiction_title_tokens(title);
3332    let mut filtered: Vec<Memory> = candidates
3333        .into_iter()
3334        .filter(|cand| {
3335            let cand_tokens = contradiction_title_tokens(&cand.title);
3336            contradiction_title_jaccard(&seed_tokens, &cand_tokens)
3337                >= CONTRADICTION_TITLE_JACCARD_FLOOR
3338        })
3339        .collect();
3340    filtered.truncate(5);
3341    Ok(filtered)
3342}
3343
3344/// Stage-1-only FTS5 candidate recall for the Form 1 synthesis
3345/// curator path.
3346///
3347/// The synthesis curator (`mcp/tools/store/synthesis.rs`) needs the
3348/// broader similar-title pool — every namespace row whose title
3349/// matches the seed under FTS5 — so the LLM can decide which
3350/// candidates legitimately overlap with the incoming write.
3351///
3352/// This intentionally OMITS the Stage-2 Jaccard floor that
3353/// [`find_contradictions`] applies to its wire output: the floor was
3354/// calibrated for "stopword-only overlap" wire-noise rejection
3355/// (#1320), but the synthesis tests exercise legitimate single-strong-
3356/// token overlaps (e.g. `"kubernetes deployment notes"` vs
3357/// `"kubernetes rolling deploy strategy"` share `{kubernetes}` =
3358/// Jaccard 1/6 ≈ 0.167 < 0.30). Applying the wire-floor here would
3359/// hide those candidates from the curator and short-circuit every
3360/// add/update/delete verb in the synthesis verdict matrix (#1337).
3361///
3362/// Returns up to 5 candidates (matches the wire ceiling for
3363/// `potential_contradictions`, the historical synthesis prompt cap).
3364pub fn find_synthesis_candidates(
3365    conn: &Connection,
3366    title: &str,
3367    namespace: &str,
3368) -> Result<Vec<Memory>> {
3369    let mut candidates = find_similar_title_candidates(conn, title, namespace, 20)?;
3370    candidates.truncate(5);
3371    Ok(candidates)
3372}
3373
3374// --- Links ---
3375//
3376// v0.7.0 fix-campaign A3 (LINK-PARITY) error prefix constants
3377// (`LINK_CYCLE_ERR_PREFIX`, `LINK_PERMISSION_DENIED_ERR_PREFIX`) moved
3378// to `super::error` under #962 so they stay co-located with the typed
3379// `StorageError` variants whose Display impl emits them. Re-exported
3380// at the module root above for `db::LINK_CYCLE_ERR_PREFIX` path
3381// stability.
3382
3383/// v0.7.0 fix-campaign A3 (LINK-PARITY) — shared pre-create validator
3384/// invoked by every link-write entry point.
3385///
3386/// Closes the S5-H2 HIGH finding (#690): before A3 the L1-2 cycle
3387/// check + K9 permission pipeline ran only in
3388/// `src/mcp/tools/link.rs::handle_link`, so the HTTP `POST /api/v1/links`
3389/// path and the federation-receive `sync_push` link loop could land
3390/// `reflects_on` edges that the MCP path would have refused. The fix
3391/// is defense-in-depth at the storage layer: every path — MCP, HTTP,
3392/// SAL, federation — calls this helper, so the gates enforce no
3393/// matter which entry point initiates the write.
3394///
3395/// Pipeline:
3396///
3397/// 1. Cycle check — invoked only when `relation == "reflects_on"`.
3398///    Calls [`crate::kg::cycle_check::would_create_reflection_cycle`]
3399///    with the namespace-scoped `effective_max_reflection_depth` cap; on
3400///    a `would_cycle` hit, returns an error prefixed with
3401///    [`LINK_CYCLE_ERR_PREFIX`] so HTTP can surface 409 CONFLICT and
3402///    signed-event emit can record the refusal. The walk fails CLOSED on
3403///    SQL errors and on depth-ceiling truncation.
3404/// 2. K9 permission eval — runs the unified
3405///    [`crate::permissions::Permissions::evaluate`] pipeline against the
3406///    source memory's namespace. On `Deny`, returns an error prefixed
3407///    with [`LINK_PERMISSION_DENIED_ERR_PREFIX`] so HTTP surfaces 403.
3408///    `Ask` is treated as `Deny` here because the storage-layer
3409///    helper has no Ask-channel back to the operator; entry points
3410///    that want interactive Ask handling (MCP) should invoke
3411///    `Permissions::evaluate` directly BEFORE calling create_link.
3412///
3413/// `skip_governance` lets federation-receive bypass the K9 gate when
3414/// the inbound link has already been cryptographically attested by an
3415/// enrolled peer (attest_level == "peer_attested"). The cycle check
3416/// always runs — even a trusted peer should not be able to extend a
3417/// reflection cycle on the receiver. See `create_link_inbound` for the
3418/// caller-side decision logic.
3419///
3420/// `agent_id` defaults to `"system"` when the caller cannot resolve a
3421/// concrete claimant (federation receive path with no claim, etc.) —
3422/// the permission rule matcher uses it for `agent_pattern` matching.
3423pub fn validate_link_pre_create(
3424    conn: &Connection,
3425    source_id: &str,
3426    target_id: &str,
3427    relation: &str,
3428    agent_id: &str,
3429    skip_governance: bool,
3430) -> Result<()> {
3431    // Pass 1: cycle check. Only `reflects_on` participates in the
3432    // DAG invariant — the other four relations are intentionally
3433    // allowed to form cycles (e.g. mutual `related_to`).
3434    if relation == crate::models::MemoryLinkRelation::ReflectsOn.as_str() {
3435        // Resolve the namespace-scoped reflection-depth cap so the cycle
3436        // walk's fail-CLOSED ceiling tracks the same governance policy the
3437        // MCP link path uses (`src/mcp/tools/link.rs`). The source memory's
3438        // namespace governs; a missing source falls back to the default
3439        // namespace (create_link's FK guard surfaces the missing row later).
3440        let link_ns = match get(conn, source_id) {
3441            Ok(Some(m)) => m.namespace,
3442            _ => crate::DEFAULT_NAMESPACE.to_string(),
3443        };
3444        let max_depth = resolve_governance_policy(conn, &link_ns)
3445            .unwrap_or_default()
3446            .effective_max_reflection_depth();
3447        if crate::kg::cycle_check::would_create_reflection_cycle(
3448            conn, source_id, target_id, max_depth,
3449        )?
3450        .would_cycle
3451        {
3452            // #962 typed envelope. Display preserves `LINK_CYCLE_ERR_PREFIX`.
3453            return Err(anyhow::Error::new(StorageError::LinkReflectionCycle {
3454                source_id: source_id.to_string(),
3455                target_id: target_id.to_string(),
3456            }));
3457        }
3458    }
3459
3460    // Pass 2: K9 permission eval. Skip when the caller has already
3461    // established external attestation (federation peer_attested).
3462    if !skip_governance {
3463        // Link evaluation is scoped to the *source* memory's
3464        // namespace — matches the MCP path's choice at
3465        // `src/mcp/tools/link.rs:31`. Missing source memory falls
3466        // back to "global"; create_link's own FK guard will surface
3467        // the missing-memory error after this returns.
3468        let link_ns = match get(conn, source_id) {
3469            Ok(Some(m)) => m.namespace,
3470            _ => crate::DEFAULT_NAMESPACE.to_string(),
3471        };
3472        evaluate_link_permission(&link_ns, source_id, target_id, relation, agent_id)
3473            .map_err(anyhow::Error::new)?;
3474    }
3475    Ok(())
3476}
3477
3478/// #1568 (H1 residual) — backend-agnostic K9 permission evaluation for
3479/// a pending link write. This is Pass 2 of [`validate_link_pre_create`]
3480/// hoisted into a shared free fn so BOTH adapters consult the same
3481/// governance gate: the sqlite path delegates from
3482/// `validate_link_pre_create`; the postgres SAL adapter's
3483/// `link_internal` (`src/store/postgres.rs`) calls it directly after
3484/// resolving the source memory's namespace via SQL. Keeping the
3485/// evaluation here means the two backends cannot drift on link
3486/// governance semantics.
3487///
3488/// # Errors
3489///
3490/// Returns [`StorageError::LinkPermissionDenied`] (Display preserves
3491/// [`LINK_PERMISSION_DENIED_ERR_PREFIX`]) on `Deny`, and on `Ask` —
3492/// the storage layer has no Ask channel; entry points that want
3493/// interactive Ask handling (MCP) run `Permissions::evaluate`
3494/// themselves BEFORE the storage write.
3495pub(crate) fn evaluate_link_permission(
3496    link_ns: &str,
3497    source_id: &str,
3498    target_id: &str,
3499    relation: &str,
3500    agent_id: &str,
3501) -> std::result::Result<(), StorageError> {
3502    use crate::permissions::{Decision, Op, PermissionContext, Permissions};
3503    let ctx = PermissionContext {
3504        op: Op::MemoryLink,
3505        namespace: link_ns.to_string(),
3506        agent_id: agent_id.to_string(),
3507        payload: serde_json::json!({
3508            "source_id": source_id,
3509            "target_id": target_id,
3510            "relation": relation,
3511        }),
3512    };
3513    match Permissions::evaluate(&ctx, &[]) {
3514        Decision::Allow | Decision::Modify(_) => Ok(()),
3515        // #962 typed envelope. Display preserves
3516        // `LINK_PERMISSION_DENIED_ERR_PREFIX`.
3517        Decision::Deny(reason) => Err(StorageError::LinkPermissionDenied { reason }),
3518        Decision::Ask(prompt) => Err(StorageError::LinkPermissionDenied {
3519            reason: format!("ask deferred to storage layer ({prompt})"),
3520        }),
3521    }
3522}
3523
3524/// Insert a directional `(source_id, target_id, relation)` link.
3525///
3526/// Backward-compat shim around [`create_link_signed`] with no active
3527/// keypair — every call here writes `signature = NULL` and
3528/// `attest_level = "unsigned"`. New code that wants signing should
3529/// route through [`create_link_signed`] directly.
3530pub fn create_link(
3531    conn: &Connection,
3532    source_id: &str,
3533    target_id: &str,
3534    relation: &str,
3535) -> Result<()> {
3536    create_link_signed(conn, source_id, target_id, relation, None).map(|_| ())
3537}
3538
3539/// v0.7 H2 — link write that optionally signs with the active agent's
3540/// Ed25519 keypair.
3541///
3542/// When `keypair` carries a private key, the six signable fields
3543/// (`src_id`, `dst_id`, `relation`, `observed_by`, `valid_from`,
3544/// `valid_until`) are encoded to deterministic CBOR per RFC 8949
3545/// §4.2.1, signed, and the 64-byte signature is persisted in the
3546/// existing `signature` BLOB column with `attest_level = "self_signed"`.
3547///
3548/// When `keypair` is `None` or carries only a public key, the row is
3549/// written with `signature = NULL` and `attest_level = "unsigned"` —
3550/// preserving v0.6.4 behaviour for callers that haven't generated a
3551/// keypair yet.
3552///
3553/// `observed_by` on the signed payload is set to the keypair's
3554/// `agent_id` when a keypair is present (the writer is, by definition,
3555/// the observer). The `observed_by` *column* itself is intentionally
3556/// left at the v0.6.3 default (NULL on this insert path) so existing
3557/// KG queries that join on `observed_by` keep their current shape; H4's
3558/// `memory_verify` will surface the signing identity from the keypair
3559/// + signature, not from this column.
3560///
3561/// Returns the chosen attest level so callers (HTTP/MCP wrappers) can
3562/// surface it in the wire response without re-querying the row.
3563pub fn create_link_signed(
3564    conn: &Connection,
3565    source_id: &str,
3566    target_id: &str,
3567    relation: &str,
3568    keypair: Option<&crate::identity::keypair::AgentKeypair>,
3569) -> Result<&'static str> {
3570    // v0.7.0 fix-campaign A3 (LINK-PARITY, #690) — gates that were
3571    // previously enforced only at `src/mcp/tools/link.rs::handle_link`
3572    // now run here so EVERY caller (MCP, HTTP, SAL, federation) hits
3573    // them. The agent_id used for the K9 evaluation is the keypair's
3574    // claim when present (the writer is by definition the actor);
3575    // when no keypair is configured we fall back to "system" — the
3576    // unified evaluator's `agent_pattern` defaults to `*`, so an
3577    // operator who has not authored agent-narrow rules sees no
3578    // behaviour change. The MCP path runs its own evaluate BEFORE
3579    // calling here (it needs Ask-channel handling we can't surface
3580    // from storage); the second evaluation here is idempotent under
3581    // the registry's deny-first semantics.
3582    let agent_id_for_eval = keypair
3583        .as_ref()
3584        .map(|kp| kp.agent_id.as_str())
3585        .unwrap_or("system");
3586    validate_link_pre_create(
3587        conn,
3588        source_id,
3589        target_id,
3590        relation,
3591        agent_id_for_eval,
3592        false,
3593    )?;
3594    // Verify both IDs exist before creating link
3595    let source_exists: bool = conn
3596        .query_row(SQL_MEMORY_EXISTS, params![source_id], |r| r.get(0))
3597        .unwrap_or(false);
3598    if !source_exists {
3599        // #962 typed envelope — MemoryNotFound{role=Source}.
3600        return Err(anyhow::Error::new(StorageError::MemoryNotFound {
3601            id: source_id.to_string(),
3602            role: Some(LinkEnd::Source),
3603        }));
3604    }
3605    let target_exists: bool = conn
3606        .query_row(SQL_MEMORY_EXISTS, params![target_id], |r| r.get(0))
3607        .unwrap_or(false);
3608    if !target_exists {
3609        // #962 typed envelope — MemoryNotFound{role=Target}.
3610        return Err(anyhow::Error::new(StorageError::MemoryNotFound {
3611            id: target_id.to_string(),
3612            role: Some(LinkEnd::Target),
3613        }));
3614    }
3615    // Schema v15 (Pillar 2 / Stream B) added `valid_from` for temporal
3616    // KG queries. Backfill on migration handled legacy rows; here we
3617    // populate it on the insert path so newly created links are
3618    // visible to `memory_kg_timeline` without a downstream backfill.
3619    //
3620    // v0.7.0 H6 (round-2): mirror the postgres G3 fix at
3621    // `store/postgres.rs:3539` — truncate the timestamp to microsecond
3622    // precision BEFORE we both sign over it and persist it. SQLite
3623    // stores RFC3339 TEXT and round-trips losslessly so this is a
3624    // no-op for SQLite reads, BUT a link created on the SQLite path
3625    // and later re-verified on the postgres path (or vice versa)
3626    // must commit to the same canonical RFC3339 string on both
3627    // sides. Postgres's `TIMESTAMPTZ` quantises at microsecond
3628    // resolution, so sub-microsecond digits silently disappear on
3629    // round-trip and break the Ed25519 signature. Truncating here
3630    // makes the sign/verify CBOR byte-stable across the storage
3631    // boundary regardless of which adapter wrote the row originally.
3632    let now = truncate_to_microseconds(Utc::now()).to_rfc3339();
3633
3634    // v0.7 H2 — sign if we have a private key. We compute the signature
3635    // BEFORE issuing INSERT so a CBOR/sign failure surfaces as an
3636    // outright write error (vs. a silent partial-write). The signed
3637    // payload includes `valid_from = now` and matching `observed_by`
3638    // so H3's verifier can re-derive the same bytes from the row.
3639    //
3640    // v0.7 H3 follow-up: the `observed_by` *column* is now populated
3641    // from the keypair's `agent_id` on signed inserts so federation
3642    // export (`export_links`) ships the same claim the signature
3643    // commits to. Receivers re-derive `SignableLink` from the wire
3644    // record (see `verify::verify`); without populating the column,
3645    // verification would always fail with `Tampered` because the
3646    // sender signed `Some(agent_id)` but exported `None`.
3647    let (signature, attest_level, observed_by_col): (Option<Vec<u8>>, &'static str, Option<&str>) =
3648        match keypair {
3649            Some(kp) if kp.can_sign() => {
3650                let link = crate::identity::sign::SignableLink {
3651                    src_id: source_id,
3652                    dst_id: target_id,
3653                    relation,
3654                    observed_by: Some(kp.agent_id.as_str()),
3655                    valid_from: Some(now.as_str()),
3656                    valid_until: None,
3657                };
3658                let sig = crate::identity::sign::sign(kp, &link)?;
3659                (
3660                    Some(sig),
3661                    crate::models::AttestLevel::SelfSigned.as_str(),
3662                    Some(kp.agent_id.as_str()),
3663                )
3664            }
3665            _ => (None, crate::models::AttestLevel::Unsigned.as_str(), None),
3666        };
3667
3668    let inserted = conn.execute(
3669        "INSERT OR IGNORE INTO memory_links \
3670            (source_id, target_id, relation, created_at, valid_from, signature, attest_level, observed_by) \
3671         VALUES (?1, ?2, ?3, ?4, ?4, ?5, ?6, ?7)",
3672        params![
3673            source_id,
3674            target_id,
3675            relation,
3676            now,
3677            signature,
3678            attest_level,
3679            observed_by_col
3680        ],
3681    )?;
3682
3683    // v0.7.0 S4-INFO2 — append a `memory_link.created` row to
3684    // `signed_events` so the audit ledger reflects every new link
3685    // (signed or unsigned). The `payload_hash` binds to the same
3686    // canonical CBOR that the H2 signer hashed (or would have, for
3687    // unsigned rows) so an auditor can re-derive the bytes and check
3688    // them against the row.
3689    //
3690    // Best-effort: a failure here logs a warn but does NOT roll back
3691    // the link insert. Cratering a legitimate write because the
3692    // append-only ledger had a transient SQLite error would punish
3693    // the caller for a substrate problem they cannot fix — same
3694    // discipline as `invalidate_link`'s `memory_link.invalidated`
3695    // emit (see also A2's pattern on `execute_pending_action`).
3696    //
3697    // We only emit when the INSERT actually wrote a row.
3698    // `INSERT OR IGNORE` returns `Ok(0)` on a uniqueness-conflict
3699    // replay of an existing `(source_id, target_id, relation)`; in
3700    // that case the audit row was already appended on the original
3701    // create call, and re-appending would generate a misleading
3702    // duplicate-create event.
3703    if inserted > 0 {
3704        let agent_for_event = observed_by_col
3705            .map(str::to_string)
3706            .unwrap_or_else(|| "unknown".to_string());
3707        let signable = crate::identity::sign::SignableLink {
3708            src_id: source_id,
3709            dst_id: target_id,
3710            relation,
3711            observed_by: observed_by_col,
3712            valid_from: Some(now.as_str()),
3713            valid_until: None,
3714        };
3715        match crate::identity::sign::canonical_cbor(&signable) {
3716            Ok(cbor) => {
3717                let event = crate::signed_events::SignedEvent {
3718                    id: uuid::Uuid::new_v4().to_string(),
3719                    agent_id: agent_for_event,
3720                    event_type: crate::signed_events::event_types::MEMORY_LINK_CREATED.to_string(),
3721                    payload_hash: crate::signed_events::payload_hash(&cbor),
3722                    signature: signature.clone(),
3723                    attest_level: attest_level.to_string(),
3724                    timestamp: Utc::now().to_rfc3339(),
3725                    ..crate::signed_events::SignedEvent::default()
3726                };
3727                if let Err(e) = crate::signed_events::append_signed_event(conn, &event) {
3728                    tracing::warn!(
3729                        target: crate::signed_events::SIGNED_EVENTS_TRACE_TARGET,
3730                        source_id, target_id, relation,
3731                        "failed to append memory_link.created audit row: {e}"
3732                    );
3733                }
3734            }
3735            Err(e) => {
3736                tracing::warn!(
3737                    target: crate::signed_events::SIGNED_EVENTS_TRACE_TARGET,
3738                    source_id, target_id, relation,
3739                    "failed to encode canonical CBOR for memory_link.created audit: {e}"
3740                );
3741            }
3742        }
3743    }
3744
3745    Ok(attest_level)
3746}
3747
3748/// v0.7.0 issue #812 / #813 — return the strongest `attest_level`
3749/// label across every outbound link rooted at `source_id`.
3750///
3751/// Strength ladder (highest first):
3752///
3753///   `peer_attested` > `self_signed` > `unsigned`
3754///
3755/// The persona-signing path (`PersonaGenerator::generate`) uses this
3756/// to stamp the Persona's own `attest_level` metadata so the
3757/// downstream `memory_persona` / `memory_persona_generate` wire
3758/// response carries the same attestation level the substrate's
3759/// `derives_from` edges actually hold — a Persona whose source
3760/// links are all signed is itself self-signed, whereas a Persona
3761/// whose source links are unsigned cannot truthfully claim
3762/// `self_signed` no matter what label the curator stamps on it.
3763///
3764/// Returns `"unsigned"` for a source with no outbound links — the
3765/// only honest default for a row whose attestation surface is
3766/// empty.
3767///
3768/// # Errors
3769///
3770/// Bubbles up `rusqlite` errors from the SELECT.
3771pub fn strongest_attest_level_for_source(conn: &Connection, source_id: &str) -> Result<String> {
3772    let mut stmt = conn.prepare(
3773        "SELECT attest_level FROM memory_links \
3774         WHERE source_id = ?1",
3775    )?;
3776    let rows = stmt.query_map(params![source_id], |r| r.get::<_, String>(0))?;
3777    let unsigned = crate::models::AttestLevel::Unsigned.as_str();
3778    let self_signed = crate::models::AttestLevel::SelfSigned.as_str();
3779    let peer_attested = crate::models::AttestLevel::PeerAttested.as_str();
3780    let mut strongest = unsigned;
3781    for row in rows {
3782        let level = row?;
3783        if level == peer_attested {
3784            return Ok(peer_attested.to_string());
3785        }
3786        if level == self_signed && strongest == unsigned {
3787            strongest = self_signed;
3788        }
3789    }
3790    Ok(strongest.to_string())
3791}
3792
3793/// v0.7 H3 — insert an inbound (federation-replicated) link with a
3794/// pre-computed signature and attest level.
3795///
3796/// Distinct from [`create_link_signed`] because the receiver is *not*
3797/// the signer: it must persist whatever bytes the peer signed
3798/// (signature + observed_by + valid_from + valid_until) verbatim, so a
3799/// later `memory_verify` (H4) can re-derive the same canonical CBOR
3800/// from the stored row and re-check against the peer's public key. We
3801/// can't re-sign on the receiver — we don't hold the peer's private
3802/// key, by design.
3803///
3804/// The caller (federation `sync_push` link loop) is responsible for:
3805/// 1. Looking up the peer's public key via
3806///    [`crate::identity::verify::lookup_peer_public_key`].
3807/// 2. Calling [`crate::identity::verify::verify`] when a public key is
3808///    known, and rejecting the link when verification fails.
3809/// 3. Choosing the `attest_level` literal:
3810///    - `"peer_attested"` — verified successfully against an enrolled key,
3811///    - `"unsigned"` — no public key enrolled for `observed_by`, or the
3812///      sender shipped no signature (legacy peer).
3813///
3814/// Idempotent on the unique `(source_id, target_id, relation)` index —
3815/// duplicate inbound replays collapse to a no-op without error.
3816///
3817/// Both `source_id` and `target_id` must already exist locally; the
3818/// receiver is expected to apply incoming `memories` *before* incoming
3819/// `links` in the same `sync_push` request, which the existing handler
3820/// already does.
3821///
3822/// `valid_from` defaults to "now" only when the inbound row carries
3823/// `None` (legacy peer that never populated the column); otherwise the
3824/// peer's value is preserved so the signature still verifies.
3825///
3826/// # Errors
3827///
3828/// Bubbles up the same DB / FK errors as `create_link_signed`. Pre-flight
3829/// existence checks mirror the outbound path so the receiver fails loud
3830/// on missing memories rather than silently dropping the link.
3831pub fn create_link_inbound(conn: &Connection, link: &MemoryLink, attest_level: &str) -> Result<()> {
3832    // v0.7.0 fix-campaign A3 (LINK-PARITY, #690) — defense-in-depth at
3833    // the receiver. The cycle check ALWAYS runs even on inbound peer
3834    // writes: a peer should not be able to extend a `reflects_on`
3835    // cycle on the receiver any more than a local caller can. The K9
3836    // permission gate is BYPASSED only when the inbound link is
3837    // `peer_attested` (the peer's signature was cryptographically
3838    // verified against an enrolled public key in
3839    // `handlers::federation_receive::sync_push` before this call). For
3840    // every other attest_level — including `"unsigned"`, which covers
3841    // legacy peers AND peers whose public key we have not enrolled —
3842    // the local K9 rules enforce. This is the design choice documented
3843    // in #690: mTLS + Ed25519 sig verification is the federation's
3844    // attestation layer; once that passes, namespace governance is the
3845    // peer's local responsibility, not the receiver's. The
3846    // `observed_by` claim becomes the `agent_id` for the K9 evaluation
3847    // when not bypassed — that's the peer's claimed writer and matches
3848    // what the rule matcher already uses for outbound links.
3849    let skip_governance = attest_level == crate::models::AttestLevel::PeerAttested.as_str();
3850    let peer_agent_id = link.observed_by.as_deref().unwrap_or("system");
3851    validate_link_pre_create(
3852        conn,
3853        &link.source_id,
3854        &link.target_id,
3855        link.relation.as_str(),
3856        peer_agent_id,
3857        skip_governance,
3858    )?;
3859    // Same FK guard as create_link_signed — a missing memory means the
3860    // peer raced ahead of us; we surface that to the caller's warn log
3861    // rather than papering over with INSERT OR IGNORE silently.
3862    let source_exists: bool = conn
3863        .query_row(SQL_MEMORY_EXISTS, params![link.source_id], |r| r.get(0))
3864        .unwrap_or(false);
3865    if !source_exists {
3866        // #962 typed envelope — MemoryNotFound{role=Source}.
3867        return Err(anyhow::Error::new(StorageError::MemoryNotFound {
3868            id: link.source_id.clone(),
3869            role: Some(LinkEnd::Source),
3870        }));
3871    }
3872    let target_exists: bool = conn
3873        .query_row(SQL_MEMORY_EXISTS, params![link.target_id], |r| r.get(0))
3874        .unwrap_or(false);
3875    if !target_exists {
3876        // #962 typed envelope — MemoryNotFound{role=Target}.
3877        return Err(anyhow::Error::new(StorageError::MemoryNotFound {
3878            id: link.target_id.clone(),
3879            role: Some(LinkEnd::Target),
3880        }));
3881    }
3882
3883    let now = Utc::now().to_rfc3339();
3884    // Preserve peer's `valid_from` byte-identical so `memory_verify`
3885    // (H4) can re-derive the signed payload from the stored row.
3886    let valid_from = link.valid_from.clone().unwrap_or_else(|| now.clone());
3887    let created_at = if link.created_at.is_empty() {
3888        now
3889    } else {
3890        link.created_at.clone()
3891    };
3892
3893    let inserted = conn.execute(
3894        "INSERT OR IGNORE INTO memory_links \
3895            (source_id, target_id, relation, created_at, valid_from, valid_until, \
3896             signature, attest_level, observed_by) \
3897         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)",
3898        params![
3899            link.source_id,
3900            link.target_id,
3901            link.relation.as_str(),
3902            created_at,
3903            valid_from,
3904            link.valid_until,
3905            link.signature,
3906            attest_level,
3907            link.observed_by,
3908        ],
3909    )?;
3910
3911    // v0.7.0 S4-INFO2 — append a `memory_link.created` row to
3912    // `signed_events` for inbound replicated links too. The audit
3913    // ledger should reflect every new link visible locally, not just
3914    // outbound writes. `payload_hash` binds to the canonical CBOR
3915    // re-derived from the wire-shape link the peer signed, so an
3916    // auditor can replay the exact bytes that were verified at
3917    // ingress.
3918    //
3919    // Best-effort: a failure logs a warn but does NOT roll back the
3920    // link insert (same discipline as the outbound path above and as
3921    // `invalidate_link`'s emit).
3922    //
3923    // Only emit when the INSERT actually wrote a row (idempotent
3924    // sync replays must not generate duplicate-create events).
3925    if inserted > 0 {
3926        let agent_for_event = link
3927            .observed_by
3928            .clone()
3929            .unwrap_or_else(|| "unknown".to_string());
3930        let signable = crate::identity::sign::SignableLink {
3931            src_id: link.source_id.as_str(),
3932            dst_id: link.target_id.as_str(),
3933            relation: link.relation.as_str(),
3934            observed_by: link.observed_by.as_deref(),
3935            valid_from: Some(valid_from.as_str()),
3936            valid_until: link.valid_until.as_deref(),
3937        };
3938        match crate::identity::sign::canonical_cbor(&signable) {
3939            Ok(cbor) => {
3940                let event = crate::signed_events::SignedEvent {
3941                    id: uuid::Uuid::new_v4().to_string(),
3942                    agent_id: agent_for_event,
3943                    event_type: crate::signed_events::event_types::MEMORY_LINK_CREATED.to_string(),
3944                    payload_hash: crate::signed_events::payload_hash(&cbor),
3945                    signature: link.signature.clone(),
3946                    attest_level: attest_level.to_string(),
3947                    timestamp: Utc::now().to_rfc3339(),
3948                    ..crate::signed_events::SignedEvent::default()
3949                };
3950                if let Err(e) = crate::signed_events::append_signed_event(conn, &event) {
3951                    tracing::warn!(
3952                        target: crate::signed_events::SIGNED_EVENTS_TRACE_TARGET,
3953                        source_id = %link.source_id,
3954                        target_id = %link.target_id,
3955                        relation = %link.relation,
3956                        "failed to append memory_link.created audit row (inbound): {e}"
3957                    );
3958                }
3959            }
3960            Err(e) => {
3961                tracing::warn!(
3962                    target: crate::signed_events::SIGNED_EVENTS_TRACE_TARGET,
3963                    source_id = %link.source_id,
3964                    target_id = %link.target_id,
3965                    relation = %link.relation,
3966                    "failed to encode canonical CBOR for inbound memory_link.created audit: {e}"
3967                );
3968            }
3969        }
3970    }
3971
3972    Ok(())
3973}
3974
3975pub fn get_links(conn: &Connection, id: &str) -> Result<Vec<MemoryLink>> {
3976    // v0.7.0 issue #860 — the `memory_get_links` MCP tool's docstring
3977    // promises attestation level + temporal-validity columns
3978    // (`valid_from`, `valid_until`, `observed_by`, `attest_level`) per
3979    // link. The pre-fix SELECT only pulled 4 columns and hard-coded the
3980    // optional fields to `None`, so the promised columns never reached
3981    // the caller. Expand the SELECT to the full row projection that
3982    // the docs commit to. `signature` is intentionally NOT surfaced —
3983    // it is the verification surface owned by the `memory_verify` tool
3984    // (`LinkVerifyRecord` below), not the read-only graph view.
3985    let mut stmt = conn.prepare(
3986        "SELECT source_id, target_id, relation, created_at, \
3987                valid_from, valid_until, observed_by, attest_level \
3988         FROM memory_links \
3989         WHERE source_id = ?1 OR target_id = ?1",
3990    )?;
3991    let rows = stmt.query_map(params![id], |row| {
3992        let relation_str: String = row.get(2)?;
3993        Ok(MemoryLink {
3994            source_id: row.get(0)?,
3995            target_id: row.get(1)?,
3996            // v0.7.0 fix campaign R1-M4 — parse the TEXT column into the
3997            // typed `MemoryLinkRelation` closed set. Unknown values (only
3998            // possible from pre-CHECK rows or a buggy direct-SQL writer)
3999            // fall back to the canonical default so the read-side never
4000            // panics; the SQL CHECK on the write side prevents new bad
4001            // rows from landing.
4002            relation: crate::models::MemoryLinkRelation::from_str(&relation_str)
4003                .unwrap_or_default(),
4004            created_at: row.get(3)?,
4005            // v0.7.0 #860 — temporal-validity + attestation columns
4006            // promised by the `memory_get_links` docstring. `signature`
4007            // stays `None`: that bytes-on-the-wire surface is the
4008            // verifier's concern (`LinkVerifyRecord`), and exposing it
4009            // here would force the JSON response to carry a base64 blob
4010            // every existing caller would have to ignore.
4011            signature: None,
4012            valid_from: row.get::<_, Option<String>>(4)?,
4013            valid_until: row.get::<_, Option<String>>(5)?,
4014            observed_by: row.get::<_, Option<String>>(6)?,
4015            attest_level: row.get::<_, Option<String>>(7)?,
4016        })
4017    })?;
4018    rows.collect::<rusqlite::Result<Vec<_>>>()
4019        .map_err(Into::into)
4020}
4021
4022#[allow(dead_code)]
4023pub fn delete_link(conn: &Connection, source_id: &str, target_id: &str) -> Result<bool> {
4024    let changed = conn.execute(
4025        "DELETE FROM memory_links WHERE source_id = ?1 AND target_id = ?2",
4026        params![source_id, target_id],
4027    )?;
4028    Ok(changed > 0)
4029}
4030
4031/// v0.7 H4 — full row-projection used by the `memory_verify` MCP tool.
4032///
4033/// `get_links` (above) was deliberately scoped to the four columns the
4034/// graph-traversal callers care about; H4 needs the *signed bundle* —
4035/// the raw signature blob, the agent_id that signed (`observed_by`),
4036/// and the temporal-validity columns the signature commits to. Splitting
4037/// it from `get_links` keeps the existing read path's wire shape
4038/// unchanged (and its column-count tested by callers).
4039///
4040/// Returns `Ok(None)` when the row is absent so the caller can shape a
4041/// "not found" response instead of bubbling up a generic SQL error.
4042#[derive(Debug, Clone)]
4043pub struct LinkVerifyRecord {
4044    pub source_id: String,
4045    pub target_id: String,
4046    pub relation: String,
4047    pub signature: Option<Vec<u8>>,
4048    pub observed_by: Option<String>,
4049    pub valid_from: Option<String>,
4050    pub valid_until: Option<String>,
4051    /// Raw column value as stored by H2/H3 (`"unsigned"`, `"self_signed"`,
4052    /// `"peer_attested"`, or rarely `NULL` for very old rows that
4053    /// pre-date the H2 `attest_level` column). H4's MCP handler
4054    /// normalises a `NULL` to the `Unsigned` enum variant.
4055    pub attest_level: Option<String>,
4056}
4057
4058/// Fetch the single link identified by the `(source_id, target_id, relation)`
4059/// composite primary key — the only unique identifier `memory_links`
4060/// exposes today.
4061///
4062/// Used by the H4 `memory_verify` MCP tool to re-derive the canonical
4063/// CBOR payload from the stored row before re-checking the signature.
4064///
4065/// # Errors
4066///
4067/// Bubbles up rusqlite errors. Returns `Ok(None)` when the row is
4068/// absent — this is the load-bearing distinction `memory_verify` needs
4069/// to surface a structured "link not found" response to its caller.
4070pub fn get_link_for_verify(
4071    conn: &Connection,
4072    source_id: &str,
4073    target_id: &str,
4074    relation: &str,
4075) -> Result<Option<LinkVerifyRecord>> {
4076    let mut stmt = conn.prepare(
4077        "SELECT source_id, target_id, relation, signature, observed_by, \
4078                valid_from, valid_until, attest_level \
4079         FROM memory_links \
4080         WHERE source_id = ?1 AND target_id = ?2 AND relation = ?3",
4081    )?;
4082    let mut rows = stmt.query(params![source_id, target_id, relation])?;
4083    if let Some(row) = rows.next()? {
4084        Ok(Some(LinkVerifyRecord {
4085            source_id: row.get(0)?,
4086            target_id: row.get(1)?,
4087            relation: row.get(2)?,
4088            signature: row.get::<_, Option<Vec<u8>>>(3)?,
4089            observed_by: row.get::<_, Option<String>>(4)?,
4090            valid_from: row.get::<_, Option<String>>(5)?,
4091            valid_until: row.get::<_, Option<String>>(6)?,
4092            attest_level: row.get::<_, Option<String>>(7)?,
4093        }))
4094    } else {
4095        Ok(None)
4096    }
4097}
4098
4099// --- Consolidation ---
4100
4101/// #1558 batch 5 wave 3 — canonical `source` value stamped on rows
4102/// minted by [`consolidate`] (MCP `memory_consolidate` + the HTTP
4103/// power-consolidation handler pass it verbatim). Listed in
4104/// `validate::VALID_SOURCES`; one spelling, hoist-only.
4105pub const CONSOLIDATION_SOURCE: &str = "consolidation";
4106
4107/// Consolidate multiple memories into one. Returns the new memory ID.
4108/// Deletes the source memories and creates links from new → old (`derived_from`).
4109#[allow(clippy::too_many_arguments)]
4110pub fn consolidate(
4111    conn: &Connection,
4112    ids: &[String],
4113    title: &str,
4114    summary: &str,
4115    namespace: &str,
4116    tier: &Tier,
4117    source: &str,
4118    consolidator_agent_id: &str,
4119) -> Result<String> {
4120    let now = Utc::now().to_rfc3339();
4121    let new_id = uuid::Uuid::new_v4().to_string();
4122
4123    conn.execute_batch(connection::SQL_BEGIN_IMMEDIATE)?;
4124
4125    let result = (|| -> Result<String> {
4126        // Verify all IDs exist and collect metadata in one pass
4127        let mut max_priority = 5i32;
4128        let mut all_tags: Vec<String> = Vec::new();
4129        let mut total_access = 0i64;
4130        let mut merged_metadata = serde_json::Map::new();
4131        // Collect original agent_ids separately — they go into
4132        // `consolidated_from_agents` for forensic attribution.
4133        // The consolidator's own agent_id becomes `agent_id` on the result.
4134        let mut source_agent_ids: Vec<String> = Vec::new();
4135        for id in ids {
4136            match get(conn, id)? {
4137                Some(mem) => {
4138                    max_priority = max_priority.max(mem.priority);
4139                    all_tags.extend(mem.tags);
4140                    total_access = total_access.saturating_add(mem.access_count);
4141                    // Merge metadata: later values overwrite earlier ones on key conflict.
4142                    // Intentionally SKIP `agent_id` to avoid last-write-wins forgery;
4143                    // the consolidator's id is authoritative on the result.
4144                    if let serde_json::Value::Object(map) = mem.metadata {
4145                        for (k, v) in map {
4146                            if k == "agent_id" {
4147                                if let serde_json::Value::String(aid) = &v
4148                                    && !source_agent_ids.contains(aid)
4149                                {
4150                                    source_agent_ids.push(aid.clone());
4151                                }
4152                                continue;
4153                            }
4154                            if let Some(existing) = merged_metadata.get(&k)
4155                                && std::mem::discriminant(existing) != std::mem::discriminant(&v)
4156                            {
4157                                tracing::warn!(
4158                                    "consolidate: key '{}' type changed during merge",
4159                                    k
4160                                );
4161                            }
4162                            merged_metadata.insert(k, v);
4163                        }
4164                    } else {
4165                        tracing::warn!(
4166                            "memory {} has non-object metadata during consolidate, skipping",
4167                            id
4168                        );
4169                    }
4170                }
4171                None => {
4172                    // #962 typed envelope.
4173                    return Err(anyhow::Error::new(StorageError::MemoryNotFound {
4174                        id: id.to_string(),
4175                        role: None,
4176                    }));
4177                }
4178            }
4179        }
4180        all_tags.sort();
4181        all_tags.dedup();
4182        let tags_json = serde_json::to_string(&all_tags)?;
4183        // Record source IDs in metadata for provenance (links would be CASCADE-deleted)
4184        merged_metadata.insert(
4185            crate::models::MemoryLinkRelation::DerivedFrom
4186                .as_str()
4187                .to_string(),
4188            serde_json::Value::Array(
4189                ids.iter()
4190                    .map(|id| serde_json::Value::String(id.clone()))
4191                    .collect(),
4192            ),
4193        );
4194        // NHI: the consolidator owns the new memory (authoritative agent_id);
4195        // original authors are preserved as a separate array for forensics.
4196        merged_metadata.insert(
4197            "agent_id".to_string(),
4198            serde_json::Value::String(consolidator_agent_id.to_string()),
4199        );
4200        if !source_agent_ids.is_empty() {
4201            merged_metadata.insert(
4202                "consolidated_from_agents".to_string(),
4203                serde_json::Value::Array(
4204                    source_agent_ids
4205                        .into_iter()
4206                        .map(serde_json::Value::String)
4207                        .collect(),
4208                ),
4209            );
4210        }
4211        let merged_metadata_value = serde_json::Value::Object(merged_metadata);
4212        crate::validate::validate_metadata(&merged_metadata_value)
4213            .context("merged metadata exceeds size limit")?;
4214        let metadata_json = serde_json::to_string(&merged_metadata_value)?;
4215
4216        // FX-C5 — substrate governance pre-write hook parity. Consolidate
4217        // mints a fresh memory via a raw INSERT that bypasses the
4218        // `db::insert(..)` tail (which is where the SQLite path normally
4219        // consults `GOVERNANCE_PRE_WRITE`). Without this call the
4220        // operator's signed governance rules could be bypassed by
4221        // routing through the consolidate surface. Compose the candidate
4222        // memory shape the way the INSERT below would persist it and
4223        // fire the hook; a refusal short-circuits the transaction body
4224        // and the outer ROLLBACK undoes any work already done in this
4225        // closure.
4226        let candidate = Memory {
4227            id: new_id.clone(),
4228            tier: tier.clone(),
4229            namespace: namespace.to_string(),
4230            title: title.to_string(),
4231            content: summary.to_string(),
4232            tags: all_tags.clone(),
4233            priority: max_priority,
4234            confidence: 1.0,
4235            source: source.to_string(),
4236            access_count: total_access,
4237            created_at: now.clone(),
4238            updated_at: now.clone(),
4239            last_accessed_at: None,
4240            expires_at: None,
4241            metadata: merged_metadata_value.clone(),
4242            reflection_depth: 0,
4243            memory_kind: crate::models::MemoryKind::Observation,
4244            entity_id: None,
4245            persona_version: None,
4246            citations: Vec::new(),
4247            source_uri: None,
4248            source_span: None,
4249            // #1633 — the engine pins confidence=1.0, so the honest
4250            // provenance is CuratorDerived (the #1242 audit-honesty
4251            // invariant: engine-derived values must be discoverable to
4252            // the calibration sweep; 'caller_provided' rows are
4253            // excluded by idx_memories_confidence_source).
4254            confidence_source: crate::models::ConfidenceSource::CuratorDerived,
4255            confidence_signals: None,
4256            confidence_decayed_at: None,
4257            version: crate::models::default_memory_version(),
4258        };
4259        consult_governance_pre_write(&candidate)?;
4260
4261        // v0.7.0 #1466 — consolidate mints a fresh memory via this raw
4262        // INSERT, so it must carry the tier-default expiry too; otherwise a
4263        // consolidated mid/short row would be immortal (NULL expires_at) and
4264        // never reaped by GC. `candidate.created_at == now` so the backfill
4265        // here matches the `?10` bound below.
4266        conn.execute(
4267            "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, confidence, source, access_count, created_at, updated_at, expires_at, metadata, confidence_source)
4268             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, 1.0, ?8, ?9, ?10, ?10, ?11, ?12, ?13)",
4269            params![new_id, tier.as_str(), namespace, title, summary, tags_json, max_priority, source, total_access, now, candidate.effective_expires_at(), metadata_json, candidate.confidence_source.as_str()],
4270        )?;
4271
4272        // Delete source memories first. Note: we intentionally do NOT create
4273        // derived_from links before deletion because ON DELETE CASCADE would
4274        // immediately remove them. Instead, source IDs are recorded in the
4275        // consolidated memory's metadata for provenance.
4276        for id in ids {
4277            delete(conn, id)?;
4278        }
4279
4280        Ok(new_id.clone())
4281    })();
4282
4283    match result {
4284        Ok(id) => {
4285            conn.execute_batch(connection::SQL_COMMIT)?;
4286            Ok(id)
4287        }
4288        Err(e) => {
4289            if let Err(rb) = conn.execute_batch(connection::SQL_ROLLBACK) {
4290                tracing::error!("ROLLBACK failed in consolidate: {}", rb);
4291            }
4292            Err(e)
4293        }
4294    }
4295}
4296
4297// ---------------------------------------------------------------------------
4298// Reflection (v0.7.0 recursive-learning Task 4/8, issue #655).
4299// ---------------------------------------------------------------------------
4300
4301/// Strip zero-width and invisible Unicode characters that could bypass FTS search.
4302fn strip_invisible(s: &str) -> String {
4303    s.chars()
4304        .filter(|c| {
4305            !matches!(c,
4306                '\u{200B}' | '\u{200C}' | '\u{200D}' | '\u{FEFF}' |
4307                '\u{00AD}' | '\u{034F}' | '\u{061C}' |
4308                '\u{180E}' | '\u{2060}' | '\u{2061}'..='\u{2064}' |
4309                '\u{FE00}'..='\u{FE0F}' | '\u{200E}' | '\u{200F}' |
4310                '\u{202A}'..='\u{202E}' | '\u{2066}'..='\u{2069}'
4311            )
4312        })
4313        .collect()
4314}
4315
4316fn sanitize_fts_query(input: &str, use_or: bool) -> String {
4317    let joiner = if use_or { " OR " } else { " " };
4318    let cleaned = strip_invisible(input);
4319    let tokens: Vec<String> = cleaned
4320        .split_whitespace()
4321        .filter(|t| !t.is_empty())
4322        .filter(|t| {
4323            // Filter out FTS5 boolean operators as standalone tokens
4324            let upper = t.to_uppercase();
4325            upper != "AND" && upper != "OR" && upper != "NOT" && upper != "NEAR"
4326        })
4327        .map(|token| {
4328            // Strip FTS5 special characters to prevent injection.
4329            // Hyphens are allowed inside words (e.g. "well-known"): the
4330            // unicode61 tokenizer treats `-` as a separator when indexing,
4331            // so `foo-bar` indexes as `foo` + `bar`. Keeping the hyphen in
4332            // the per-token phrase (below we wrap each token in `"…"`)
4333            // produces a phrase query that FTS5 evaluates by matching the
4334            // hyphen-split component terms in order — which is exactly
4335            // what callers expect when searching for hyphenated content.
4336            // Dropping the `'-'` filter here fixes scenario S28 without
4337            // reopening the `+`/`-` exclusion-injection hole (every token
4338            // is already phrase-quoted before being joined, so `-` cannot
4339            // reach FTS5 as a prefix operator).
4340            let clean: String = token
4341                .chars()
4342                .filter(|c| {
4343                    *c != '"'
4344                        && *c != '*'
4345                        && *c != '^'
4346                        && *c != '{'
4347                        && *c != '}'
4348                        && *c != '('
4349                        && *c != ')'
4350                        && *c != ':'
4351                        && *c != '|'
4352                        && *c != '+'
4353                })
4354                .collect();
4355            if clean.is_empty() {
4356                return String::new();
4357            }
4358            format!("\"{clean}\"")
4359        })
4360        .filter(|t| !t.is_empty())
4361        .collect();
4362    if tokens.is_empty() {
4363        return "\"_empty_\"".to_string();
4364    }
4365    tokens.join(joiner)
4366}
4367
4368pub fn list_namespaces(conn: &Connection) -> Result<Vec<NamespaceCount>> {
4369    let now = Utc::now().to_rfc3339();
4370    let mut stmt = conn.prepare(
4371        "SELECT namespace, COUNT(*) FROM memories WHERE expires_at IS NULL OR expires_at > ?1 GROUP BY namespace ORDER BY COUNT(*) DESC",
4372    )?;
4373    let rows = stmt.query_map(params![now], |row| {
4374        Ok(NamespaceCount {
4375            namespace: row.get(0)?,
4376            count: row.get(1)?,
4377        })
4378    })?;
4379    rows.collect::<rusqlite::Result<Vec<_>>>()
4380        .map_err(Into::into)
4381}
4382
4383/// Hard cap on input groups walked when assembling a taxonomy tree.
4384/// Even when callers pass a wildly large `limit`, we never walk more
4385/// than this many `(namespace, count)` rows — bounds memory + time.
4386/// Shared by the sqlite + postgres taxonomy paths and the HTTP / MCP
4387/// taxonomy surfaces so all four clamp identically.
4388pub const TAXONOMY_MAX_LIMIT: usize = 10_000;
4389
4390/// Default group budget for taxonomy listings when the caller passes
4391/// no explicit `limit` (HTTP `/api/v1/namespaces`, MCP
4392/// `memory_get_taxonomy`).
4393pub const TAXONOMY_DEFAULT_LIMIT: usize = 1000;
4394
4395/// Build a hierarchical namespace taxonomy (Pillar 1 / Stream A).
4396///
4397/// Groups live (non-expired) memories by `namespace`, splits each on
4398/// `/`, and folds them into a `TaxonomyNode` tree. The returned root
4399/// represents `namespace_prefix` (or the synthetic empty-string root if
4400/// no prefix is supplied); each child level descends one segment.
4401///
4402/// `max_depth` is interpreted as "show at most N levels *below the
4403/// prefix*". Memories whose namespace would have required descending
4404/// past the cutoff still contribute to the `subtree_count` of the
4405/// boundary ancestor (their counts are not lost — only the leaf
4406/// rendering is suppressed).
4407///
4408/// `limit` caps the number of input `(namespace, count)` rows we walk
4409/// — when truncated, `total_count` still reflects the full prefix
4410/// total (a separate aggregation), and `truncated` is set so callers
4411/// can warn the user. Hard ceiling: [`TAXONOMY_MAX_LIMIT`].
4412// Body is intentionally one logical pipeline (SQL aggregation → tree
4413// assembly → root materialisation); pulling helpers out hurts
4414// readability more than it helps.
4415#[allow(clippy::too_many_lines)]
4416pub fn get_taxonomy(
4417    conn: &Connection,
4418    namespace_prefix: Option<&str>,
4419    max_depth: usize,
4420    limit: usize,
4421) -> Result<Taxonomy> {
4422    let now = Utc::now().to_rfc3339();
4423    let effective_limit = limit.min(TAXONOMY_MAX_LIMIT);
4424    // Clamp depth so callers asking for "everything" can't construct a
4425    // pathological deep walk; the namespace validator already rejects
4426    // depths > MAX_NAMESPACE_DEPTH on writes.
4427    let effective_depth = max_depth.min(MAX_NAMESPACE_DEPTH);
4428
4429    let prefix = namespace_prefix.unwrap_or("");
4430    // #1531 L5 — `validate_namespace` deliberately places no per-segment
4431    // character restriction (historical flexibility), so a stored
4432    // namespace/prefix may contain the LIKE metacharacters `%` / `_`.
4433    // Escape the descendant pattern (mirroring the visibility clause at
4434    // the top of this file and the postgres `taxonomy_namespaces`
4435    // twin) so a prefix like `a%` cannot over-match `aX/...` subtrees.
4436    let descendant_pattern = format!(
4437        "{}/%",
4438        prefix
4439            .replace('\\', "\\\\")
4440            .replace('%', "\\%")
4441            .replace('_', "\\_")
4442    );
4443
4444    // Total count for the prefix is computed independently of the
4445    // truncated row walk so the caller-visible total stays honest even
4446    // when `limit` drops rows from the tree.
4447    let total_count: usize = if prefix.is_empty() {
4448        let v: i64 = conn.query_row(
4449            "SELECT COUNT(*) FROM memories WHERE expires_at IS NULL OR expires_at > ?1",
4450            params![now],
4451            |row| row.get(0),
4452        )?;
4453        usize::try_from(v).unwrap_or(0)
4454    } else {
4455        let v: i64 = conn.query_row(
4456            "SELECT COUNT(*) FROM memories
4457             WHERE (expires_at IS NULL OR expires_at > ?1)
4458               AND (namespace = ?2 OR namespace LIKE ?3 ESCAPE '\\')",
4459            params![now, prefix, descendant_pattern],
4460            |row| row.get(0),
4461        )?;
4462        usize::try_from(v).unwrap_or(0)
4463    };
4464
4465    // Group rows ordered by count DESC so a small `limit` keeps the
4466    // densest namespaces, then alphabetic for stable tie-breaking.
4467    let groups: Vec<(String, usize)> = if prefix.is_empty() {
4468        let mut stmt = conn.prepare(
4469            "SELECT namespace, COUNT(*) FROM memories
4470             WHERE expires_at IS NULL OR expires_at > ?1
4471             GROUP BY namespace
4472             ORDER BY COUNT(*) DESC, namespace ASC
4473             LIMIT ?2",
4474        )?;
4475        let rows = stmt.query_map(
4476            params![now, i64::try_from(effective_limit).unwrap_or(i64::MAX)],
4477            |row| {
4478                let ns: String = row.get(0)?;
4479                let c: i64 = row.get(1)?;
4480                Ok((ns, usize::try_from(c).unwrap_or(0)))
4481            },
4482        )?;
4483        rows.collect::<rusqlite::Result<Vec<_>>>()?
4484    } else {
4485        let mut stmt = conn.prepare(
4486            "SELECT namespace, COUNT(*) FROM memories
4487             WHERE (expires_at IS NULL OR expires_at > ?1)
4488               AND (namespace = ?2 OR namespace LIKE ?3 ESCAPE '\\')
4489             GROUP BY namespace
4490             ORDER BY COUNT(*) DESC, namespace ASC
4491             LIMIT ?4",
4492        )?;
4493        let rows = stmt.query_map(
4494            params![
4495                now,
4496                prefix,
4497                descendant_pattern,
4498                i64::try_from(effective_limit).unwrap_or(i64::MAX)
4499            ],
4500            |row| {
4501                let ns: String = row.get(0)?;
4502                let c: i64 = row.get(1)?;
4503                Ok((ns, usize::try_from(c).unwrap_or(0)))
4504            },
4505        )?;
4506        rows.collect::<rusqlite::Result<Vec<_>>>()?
4507    };
4508
4509    let walked_count: usize = groups.iter().map(|(_, c)| *c).sum();
4510    let truncated = walked_count < total_count;
4511
4512    // Synthesize the root node. `name` is the trailing segment of the
4513    // prefix (or empty for the global root) so renderers can label it.
4514    let root_name = prefix.rsplit('/').next().unwrap_or("").to_string();
4515    let mut root = TaxonomyNode {
4516        namespace: prefix.to_string(),
4517        name: root_name,
4518        count: 0,
4519        subtree_count: 0,
4520        children: Vec::new(),
4521    };
4522
4523    for (ns, c) in groups {
4524        // Compute path segments below the prefix. When prefix is empty,
4525        // the whole namespace becomes the suffix; when ns == prefix
4526        // exactly, segments is empty and the count lands on the root.
4527        let suffix: &str = if prefix.is_empty() {
4528            ns.as_str()
4529        } else if ns == prefix {
4530            ""
4531        } else if ns.len() > prefix.len() + 1
4532            && ns.starts_with(prefix)
4533            && ns.as_bytes()[prefix.len()] == b'/'
4534        {
4535            &ns[prefix.len() + 1..]
4536        } else {
4537            // Defensive: SQL filter shouldn't return this, but skip rather
4538            // than panic if it ever does (e.g. a stray match like
4539            // "alphaone-sibling" matching prefix "alphaone").
4540            continue;
4541        };
4542        let all_segments: Vec<&str> = if suffix.is_empty() {
4543            Vec::new()
4544        } else {
4545            suffix.split('/').collect()
4546        };
4547        let take = all_segments.len().min(effective_depth);
4548        let used = &all_segments[..take];
4549        let exact_match_in_view = take == all_segments.len();
4550
4551        // Walk into the tree. Every ancestor's subtree_count grows by c
4552        // — including the root — and only the deepest visible node's
4553        // `count` does, and only when it represents the exact namespace
4554        // (not a clamped boundary).
4555        root.subtree_count += c;
4556        if used.is_empty() {
4557            root.count += c;
4558            continue;
4559        }
4560
4561        let mut path_so_far = prefix.to_string();
4562        let mut node = &mut root;
4563        for (i, seg) in used.iter().enumerate() {
4564            if !path_so_far.is_empty() {
4565                path_so_far.push('/');
4566            }
4567            path_so_far.push_str(seg);
4568            let pos = node.children.iter().position(|ch| ch.name == *seg);
4569            let idx = if let Some(p) = pos {
4570                p
4571            } else {
4572                node.children.push(TaxonomyNode {
4573                    namespace: path_so_far.clone(),
4574                    name: (*seg).to_string(),
4575                    count: 0,
4576                    subtree_count: 0,
4577                    children: Vec::new(),
4578                });
4579                node.children.len() - 1
4580            };
4581            node = &mut node.children[idx];
4582            node.subtree_count += c;
4583            let is_leaf = i + 1 == used.len();
4584            if is_leaf && exact_match_in_view {
4585                node.count += c;
4586            }
4587        }
4588    }
4589
4590    sort_taxonomy(&mut root);
4591
4592    Ok(Taxonomy {
4593        tree: root,
4594        total_count,
4595        truncated,
4596    })
4597}
4598
4599fn sort_taxonomy(node: &mut TaxonomyNode) {
4600    node.children.sort_by(|a, b| a.name.cmp(&b.name));
4601    for child in &mut node.children {
4602        sort_taxonomy(child);
4603    }
4604}
4605
4606/// v0.7.0 ARCH-2 followup (FX-C2-batch3) — backend-blind taxonomy
4607/// tree-folding helper. Lifted out of `get_taxonomy` so the Postgres
4608/// SAL adapter can share the exact same fold logic with the SQLite
4609/// adapter, holding the cross-backend wire shape byte-for-byte.
4610///
4611/// Inputs:
4612/// - `prefix`: the namespace prefix the caller queried (`""` = global root).
4613/// - `effective_depth`: clamped depth, already `min(MAX_NAMESPACE_DEPTH)`.
4614/// - `total_count`: full prefix total (NOT truncated by the row walk).
4615/// - `truncated`: caller-computed truncation flag.
4616/// - `groups`: walked `(namespace, count)` rows.
4617///
4618/// Returns the assembled [`Taxonomy`] tree with sorted children.
4619#[doc(hidden)]
4620pub fn fold_taxonomy_groups(
4621    prefix: &str,
4622    effective_depth: usize,
4623    total_count: usize,
4624    truncated: bool,
4625    groups: Vec<(String, usize)>,
4626) -> Taxonomy {
4627    let root_name = prefix.rsplit('/').next().unwrap_or("").to_string();
4628    let mut root = TaxonomyNode {
4629        namespace: prefix.to_string(),
4630        name: root_name,
4631        count: 0,
4632        subtree_count: 0,
4633        children: Vec::new(),
4634    };
4635
4636    for (ns, c) in groups {
4637        let suffix: &str = if prefix.is_empty() {
4638            ns.as_str()
4639        } else if ns == prefix {
4640            ""
4641        } else if ns.len() > prefix.len() + 1
4642            && ns.starts_with(prefix)
4643            && ns.as_bytes()[prefix.len()] == b'/'
4644        {
4645            &ns[prefix.len() + 1..]
4646        } else {
4647            continue;
4648        };
4649        let all_segments: Vec<&str> = if suffix.is_empty() {
4650            Vec::new()
4651        } else {
4652            suffix.split('/').collect()
4653        };
4654        let take = all_segments.len().min(effective_depth);
4655        let used = &all_segments[..take];
4656        let exact_match_in_view = take == all_segments.len();
4657
4658        root.subtree_count += c;
4659        if used.is_empty() {
4660            root.count += c;
4661            continue;
4662        }
4663
4664        let mut path_so_far = prefix.to_string();
4665        let mut node = &mut root;
4666        for (i, seg) in used.iter().enumerate() {
4667            if !path_so_far.is_empty() {
4668                path_so_far.push('/');
4669            }
4670            path_so_far.push_str(seg);
4671            let pos = node.children.iter().position(|ch| ch.name == *seg);
4672            let idx = if let Some(p) = pos {
4673                p
4674            } else {
4675                node.children.push(TaxonomyNode {
4676                    namespace: path_so_far.clone(),
4677                    name: (*seg).to_string(),
4678                    count: 0,
4679                    subtree_count: 0,
4680                    children: Vec::new(),
4681                });
4682                node.children.len() - 1
4683            };
4684            node = &mut node.children[idx];
4685            node.subtree_count += c;
4686            let is_leaf = i + 1 == used.len();
4687            if is_leaf && exact_match_in_view {
4688                node.count += c;
4689            }
4690        }
4691    }
4692
4693    sort_taxonomy(&mut root);
4694
4695    Taxonomy {
4696        tree: root,
4697        total_count,
4698        truncated,
4699    }
4700}
4701
4702/// Default row cap for memory list/search surfaces when the caller
4703/// passes no explicit limit. Mirrored by the postgres SAL adapter
4704/// (`src/store/postgres.rs::list_by_source_uri`) so both backends
4705/// page identically.
4706pub const LIST_DEFAULT_CAP: usize = 200;
4707
4708/// Hard ceiling on rows returned by the memory list/search surfaces.
4709/// One shared knob across the sqlite + postgres SAL adapters; same
4710/// family as `KG_TIMELINE_MAX_LIMIT` / `KG_QUERY_MAX_LIMIT`.
4711pub const LIST_MAX_LIMIT: usize = 1000;
4712
4713/// Post-clamp `usize → i64` conversion fallback for list/query limits.
4714/// Unreachable in practice (values are already clamped to at most
4715/// `LIST_MAX_LIMIT`, which always fits `i64`); kept as a named knob so
4716/// the fallback page size is explicit rather than magic.
4717pub const LIST_FALLBACK_LIMIT: usize = 100;
4718
4719/// Default page size for archive listings (HTTP `/api/v1/archive` and
4720/// MCP `memory_archive_list`) when the caller passes no explicit
4721/// `limit` — one knob so both surfaces page identically.
4722pub const ARCHIVE_DEFAULT_PAGE_LIMIT: usize = 50;
4723
4724/// Default page size for governance pending-action listings (MCP
4725/// `memory_pending_list` / subscription approval feeds).
4726pub const PENDING_DEFAULT_PAGE_LIMIT: usize = 100;
4727
4728/// Hard floor for duplicate-check threshold. Below this, anything can match
4729/// random unrelated content — refuse to honor the lookup so callers don't
4730/// silently get garbage merge suggestions.
4731pub const DUPLICATE_THRESHOLD_MIN: f32 = 0.5;
4732
4733/// Default cosine similarity threshold for declaring a candidate a
4734/// duplicate. Empirically tuned for MiniLM-L6-v2 (the local embedder):
4735/// near-paraphrases of the same memory tend to land at 0.88+, while
4736/// loosely related content sits well below 0.85. Callers can override.
4737pub const DUPLICATE_THRESHOLD_DEFAULT: f32 = 0.85;
4738
4739/// Find the nearest-neighbor live memory by cosine similarity (Pillar 2 /
4740/// Stream D — `memory_check_duplicate`).
4741///
4742/// Linear scan over `memories.embedding` rows that pass the live-row
4743/// (non-expired) gate and the optional namespace filter. The chosen
4744/// candidate is the highest-cosine match across the pool; the
4745/// caller-supplied `threshold` is used purely to set `is_duplicate` on
4746/// the response — the nearest neighbor is always returned (when the
4747/// pool is non-empty) so callers can show "closest existing memory was
4748/// X at similarity Y" even on a not-quite-duplicate.
4749///
4750/// Threshold is clamped at [`DUPLICATE_THRESHOLD_MIN`] so that wildly
4751/// permissive thresholds can't be used to dress unrelated content as a
4752/// merge suggestion.
4753///
4754/// Returns `(check, scanned)` where `scanned` is the count of embedded
4755/// candidates compared (useful for diagnostics).
4756pub fn check_duplicate(
4757    conn: &Connection,
4758    query_embedding: &[f32],
4759    namespace: Option<&str>,
4760    threshold: f32,
4761) -> Result<DuplicateCheck> {
4762    let effective_threshold = threshold.max(DUPLICATE_THRESHOLD_MIN);
4763    let now = Utc::now().to_rfc3339();
4764
4765    // SQL filter handles the live-row + optional namespace gate; the
4766    // cosine pass happens in Rust because SQLite has no native vector
4767    // op. We only pull rows with non-NULL embeddings — anything missing
4768    // an embedding can't be a near-duplicate by this definition.
4769    let rows: Vec<(String, String, String, Vec<u8>)> = if let Some(ns) = namespace {
4770        let mut stmt = conn.prepare(
4771            "SELECT id, title, namespace, embedding FROM memories
4772             WHERE embedding IS NOT NULL
4773               AND (expires_at IS NULL OR expires_at > ?1)
4774               AND namespace = ?2",
4775        )?;
4776        let mapped = stmt.query_map(params![now, ns], |row| {
4777            Ok((
4778                row.get::<_, String>(0)?,
4779                row.get::<_, String>(1)?,
4780                row.get::<_, String>(2)?,
4781                row.get::<_, Vec<u8>>(3)?,
4782            ))
4783        })?;
4784        mapped.collect::<rusqlite::Result<Vec<_>>>()?
4785    } else {
4786        let mut stmt = conn.prepare(
4787            "SELECT id, title, namespace, embedding FROM memories
4788             WHERE embedding IS NOT NULL
4789               AND (expires_at IS NULL OR expires_at > ?1)",
4790        )?;
4791        let mapped = stmt.query_map(params![now], |row| {
4792            Ok((
4793                row.get::<_, String>(0)?,
4794                row.get::<_, String>(1)?,
4795                row.get::<_, String>(2)?,
4796                row.get::<_, Vec<u8>>(3)?,
4797            ))
4798        })?;
4799        mapped.collect::<rusqlite::Result<Vec<_>>>()?
4800    };
4801
4802    let mut best: Option<DuplicateMatch> = None;
4803    let mut scanned: usize = 0;
4804    for (id, title, ns, bytes) in rows {
4805        if bytes.is_empty() {
4806            continue;
4807        }
4808        // v0.6.3.1 P2 — magic-byte aware decode. Malformed payloads
4809        // (anything other than headed-LE or legacy-LE) are skipped with
4810        // telemetry so a corrupted row can't poison duplicate detection.
4811        let candidate = match crate::embeddings::decode_embedding_blob(&bytes) {
4812            Ok(v) => v,
4813            Err(e) => {
4814                tracing::warn!(
4815                    memory_id = %id,
4816                    blob_len = bytes.len(),
4817                    error = %e,
4818                    "skipping duplicate-check candidate with malformed embedding"
4819                );
4820                continue;
4821            }
4822        };
4823        // Vectors of mismatched dimension would compute against a
4824        // truncated query (Embedder::cosine_similarity zips). Skip
4825        // rather than report a misleading similarity score.
4826        if candidate.len() != query_embedding.len() {
4827            tracing::warn!(
4828                memory_id = %id,
4829                expected = query_embedding.len(),
4830                got = candidate.len(),
4831                "skipping duplicate-check candidate with dimension mismatch"
4832            );
4833            continue;
4834        }
4835        let similarity =
4836            crate::embeddings::Embedder::cosine_similarity(query_embedding, &candidate);
4837        scanned += 1;
4838        let is_better = best.as_ref().is_none_or(|m| similarity > m.similarity);
4839        if is_better {
4840            best = Some(DuplicateMatch {
4841                id,
4842                title,
4843                namespace: ns,
4844                similarity,
4845            });
4846        }
4847    }
4848
4849    let is_duplicate = best
4850        .as_ref()
4851        .is_some_and(|m| m.similarity >= effective_threshold);
4852    Ok(DuplicateCheck {
4853        is_duplicate,
4854        threshold: effective_threshold,
4855        nearest: best,
4856        candidates_scanned: scanned,
4857    })
4858}
4859
4860/// Canonical hash used by [`check_duplicate_with_text`] to detect
4861/// byte-identical `title + content` pairs even when the embedding
4862/// pipeline (lower-casing, prefix tagging, etc.) prevents the cosine
4863/// similarity from saturating at 1.0.
4864///
4865/// The input is the *exact* text the MCP/HTTP layer hands to the
4866/// embedder — `crate::embeddings::embedding_document(title, content)` — and we hash its raw
4867/// UTF-8 bytes with no normalization. Lower-casing or whitespace
4868/// stripping at this layer would re-introduce the very ambiguity we
4869/// are trying to short-circuit (two semantically-identical strings
4870/// hashing to the same value but being substantively different in,
4871/// e.g., a code snippet that differs only in whitespace).
4872///
4873/// SHA-256 is the same primitive the audit/subscriptions/signed-events
4874/// layers already use, so callers don't have to reach for a new
4875/// dependency.
4876#[must_use]
4877pub fn canonical_content_hash(text: &str) -> [u8; 32] {
4878    use sha2::{Digest, Sha256};
4879    let mut hasher = Sha256::new();
4880    hasher.update(text.as_bytes());
4881    hasher.finalize().into()
4882}
4883
4884// ---------------------------------------------------------------------------
4885// v0.7.0 (issue #519) — proactive conflict detection on memory_store
4886// ---------------------------------------------------------------------------
4887
4888/// Cosine-similarity threshold above which a candidate is treated as a
4889/// near-duplicate for the purpose of [`proactive_conflict_check`].
4890///
4891/// Empirically tuned for the MiniLM-L6-v2 / Nomic embedder pair: rows
4892/// whose `(title, content)` paraphrase the query at this level are
4893/// already considered "the same memory" by the existing duplicate
4894/// machinery (`DUPLICATE_THRESHOLD_DEFAULT` sits at 0.85 for the
4895/// merge-suggestion surface). 0.95 is the stricter "this is the same
4896/// fact, restated" bar; combined with the textual contradiction signal
4897/// below, we surface only writes that proactively conflict with an
4898/// established near-duplicate.
4899///
4900/// **Known miss class (pre-existing; deliberately unchanged by the
4901/// #1579 A5 remediation):** genuine paraphrases can embed just BELOW
4902/// this bar — the P2-audit probe pair ("deadline is june 15" vs
4903/// "deadline is june 22" in otherwise-identical sentences) scored
4904/// 0.945 cosine on the release MiniLM and is therefore not detected.
4905/// Safe direction for an advisory gate (the write is ALLOWED; nothing
4906/// is wrongly refused); lowering the bar instead would re-open the
4907/// false-409 epidemic the
4908/// [`PROACTIVE_CONFLICT_CONTENT_JACCARD_FLOOR`] corroboration exists
4909/// to close. The deeper `detect_contradiction` tooling remains the
4910/// surface for sub-threshold contradictions.
4911pub const PROACTIVE_CONFLICT_SIM_THRESHOLD: f32 = 0.95;
4912
4913/// Top-K cap for the candidate pool inspected by
4914/// [`proactive_conflict_check`]. Bounded so the per-write cost is O(K)
4915/// rather than O(namespace_size).
4916pub const PROACTIVE_CONFLICT_TOP_K: usize = 5;
4917
4918/// #1579 A5 — row cap on the bounded fallback scan in
4919/// [`proactive_conflict_check`] (most-recently-updated rows first).
4920///
4921/// Pre-#1579 the check decoded + cosine-scored EVERY embedded live row
4922/// in the namespace per write — an O(N) scan that (under the HTTP
4923/// daemon's single-connection mutex) collapsed semantic-tier write
4924/// throughput to 0.3-1.7 rps in the P2 audit. The fallback path (used
4925/// when no fully-searchable HNSW index is available: keyword tier,
4926/// the async-boot warm window, CLI one-shots) now scans only the
4927/// `PROACTIVE_CONFLICT_SCAN_LIMIT` most-recently-updated candidates.
4928/// Recency ordering is the right prior for an advisory near-duplicate
4929/// gate: conflicting restatements cluster temporally (an agent
4930/// re-asserting a fact it just learned), and the indexed path (the
4931/// production semantic-tier route) covers the long tail. A miss here
4932/// only ALLOWS a write that deeper inspection might have refused —
4933/// never refuses a legitimate one — which is the safe direction for
4934/// an advisory check with a `force=true` bypass.
4935pub const PROACTIVE_CONFLICT_SCAN_LIMIT: usize = 1024;
4936
4937/// #1579 A5 — `k` requested from the HNSW index by
4938/// [`proactive_conflict_check_with_index`]. Deliberately larger than
4939/// [`PROACTIVE_CONFLICT_TOP_K`] because the index is global while the
4940/// conflict check is namespace-scoped: the namespace filter is applied
4941/// AFTER the ANN search (post-filter semantics), so foreign-namespace
4942/// hits consume slots. 32 gives the in-namespace pool ample headroom
4943/// (the ≥ 0.95 cosine gate means only near-identical vectors matter,
4944/// and > 32 near-identical foreign-namespace rows crowding out an
4945/// in-namespace conflict is a pathology the bounded fallback's
4946/// advisory contract already tolerates — see
4947/// [`PROACTIVE_CONFLICT_SCAN_LIMIT`]).
4948pub const PROACTIVE_CONFLICT_INDEX_K: usize = 32;
4949
4950/// #1579 A5 — minimum Jaccard token overlap between the incoming
4951/// `content` and a cosine-near-duplicate candidate's `content` for the
4952/// pair to be classified as a proactive conflict.
4953///
4954/// **Why this exists** (the P2 false-409 epidemic). The P2 perf audit
4955/// measured **81% of semantic-tier writes refused with 409** when a
4956/// loadtest wrote unique random-alphanumeric payloads: MiniLM-L6-v2
4957/// assigns ≥ 0.95 cosine to ~28% of PAIRS of unrelated 256-byte noise
4958/// documents (probe on the release model: pairwise min 0.44 / mean
4959/// 0.83 / max 0.97), so with a 1k-row namespace virtually every write
4960/// found SOME ≥ 0.95 "near-duplicate" — while a genuine paraphrase
4961/// pair ("deadline is june 15" vs "deadline is june 22" in identical
4962/// sentences) scored 0.945, BELOW the threshold. Embedding cosine
4963/// alone is therefore not sufficient evidence of "the same fact,
4964/// restated". The deterministic corroboration is lexical: a true
4965/// restatement shares vocabulary. We reuse the #1320 tokenizer
4966/// (lowercase, split on non-alphanumeric, stopword-strip — see
4967/// [`CONTRADICTION_TITLE_JACCARD_FLOOR`]) over the CONTENT bodies and
4968/// require this floor, which rejects the disjoint-token noise pairs
4969/// (Jaccard ≈ 0) while keeping real restatements (the june-15/june-22
4970/// pair scores 0.5).
4971pub const PROACTIVE_CONFLICT_CONTENT_JACCARD_FLOOR: f32 = 0.30;
4972
4973/// Result envelope returned by [`proactive_conflict_check`] when an
4974/// existing memory near-duplicates AND textually contradicts the
4975/// incoming write.
4976#[derive(Debug, Clone)]
4977pub struct ProactiveConflict {
4978    /// `id` of the existing memory the new write conflicts with.
4979    pub existing_id: String,
4980    /// Title of the existing memory (for diagnostic surfacing).
4981    pub existing_title: String,
4982    /// Cosine similarity (always `>= PROACTIVE_CONFLICT_SIM_THRESHOLD`
4983    /// in returned values).
4984    pub similarity: f32,
4985    /// Reason the candidate was classified as conflicting. Currently
4986    /// always `"near_duplicate_with_differing_content"`; future
4987    /// extensions (LLM-backed detector, negation-flip heuristic) can
4988    /// surface a different reason string here.
4989    pub reason: &'static str,
4990}
4991
4992/// v0.7.0 (issue #519) — proactive contradiction detection on the
4993/// `memory_store` write path.
4994///
4995/// Scans the top-`PROACTIVE_CONFLICT_TOP_K` most similar live memories
4996/// in the new memory's namespace (by cosine similarity over the
4997/// existing `memories.embedding` column) and returns the first match
4998/// whose similarity meets `PROACTIVE_CONFLICT_SIM_THRESHOLD` AND whose
4999/// stored `content` differs from the incoming `mem.content` exactly.
5000///
5001/// The "differs exactly" check is the deterministic substrate-layer
5002/// contradiction signal — a row that paraphrases the same fact at
5003/// ≥ 0.95 cosine but spells out a different content body is, by
5004/// construction, asserting a near-duplicate fact with a different
5005/// substantive payload (the LLM detector would call this a soft
5006/// contradiction; the substrate check calls it a near-duplicate with
5007/// differing content). Callers that want the full LLM-backed
5008/// `detect_contradiction` round-trip can layer it on top of the
5009/// proactive-check result; the substrate path stays LLM-independent so
5010/// it runs deterministically under `AI_MEMORY_NO_CONFIG=1` and in
5011/// every CI environment.
5012///
5013/// A `force=true` switch at the handler layer (MCP / CLI / HTTP)
5014/// bypasses this check entirely — see `src/mcp/tools/store.rs` and
5015/// `src/handlers/http.rs::create_memory`.
5016///
5017/// Returns:
5018/// * `Ok(None)` — no conflict detected; the caller may proceed with
5019///   the insert.
5020/// * `Ok(Some(ProactiveConflict))` — at least one candidate triggered
5021///   the near-duplicate-with-differing-content guard; the caller
5022///   should refuse the insert (and return an error envelope naming
5023///   `existing_id`) unless `force=true` was set.
5024///
5025/// # Errors
5026///
5027/// Bubbles rusqlite errors from the candidate-pool SELECT. The cosine
5028/// pass itself is in-memory and infallible (mismatched-dim candidates
5029/// are skipped with a tracing warn, mirroring `check_duplicate`).
5030pub fn proactive_conflict_check(
5031    conn: &Connection,
5032    mem: &Memory,
5033    query_embedding: &[f32],
5034) -> Result<Option<ProactiveConflict>> {
5035    if query_embedding.is_empty() {
5036        return Ok(None);
5037    }
5038    let now = Utc::now().to_rfc3339();
5039
5040    // Pull (id, title, content, embedding) for the live, in-namespace
5041    // pool. We restrict to the same namespace as the incoming write
5042    // because cross-namespace "contradictions" are not a substrate
5043    // concept (namespaces are deliberately isolated scopes); the
5044    // namespace-scoped check matches the `find_contradictions` /
5045    // `find_by_title_namespace` semantics already used by the
5046    // `OnConflict::Error` branch of `insert_with_conflict`.
5047    //
5048    // #1579 A5 — BOUNDED: most-recently-updated rows first, capped at
5049    // `PROACTIVE_CONFLICT_SCAN_LIMIT`. See the const for the recency
5050    // rationale and the advisory-miss contract. The unbounded
5051    // full-namespace decode+scan this replaces was the P2-measured
5052    // write-throughput collapse (0.3-1.7 rps under the HTTP mutex).
5053    let mut stmt = conn.prepare(
5054        "SELECT id, title, content, embedding FROM memories
5055         WHERE embedding IS NOT NULL
5056           AND (expires_at IS NULL OR expires_at > ?1)
5057           AND namespace = ?2
5058         ORDER BY updated_at DESC
5059         LIMIT ?3",
5060    )?;
5061    let rows: Vec<(String, String, String, Vec<u8>)> = stmt
5062        .query_map(
5063            params![
5064                now,
5065                &mem.namespace,
5066                i64::try_from(PROACTIVE_CONFLICT_SCAN_LIMIT).unwrap_or(i64::MAX)
5067            ],
5068            |row| {
5069                Ok((
5070                    row.get::<_, String>(0)?,
5071                    row.get::<_, String>(1)?,
5072                    row.get::<_, String>(2)?,
5073                    row.get::<_, Vec<u8>>(3)?,
5074                ))
5075            },
5076        )?
5077        .collect::<rusqlite::Result<Vec<_>>>()?;
5078
5079    Ok(proactive_conflict_verdict(mem, query_embedding, rows))
5080}
5081
5082/// #1579 A5 — HNSW-routed entry point for the proactive conflict
5083/// check. This is the production write-path dispatcher:
5084///
5085/// * When a [`crate::hnsw::VectorIndex`] is available AND fully
5086///   searchable (its graph covers `all_entries` — see
5087///   [`crate::hnsw::VectorIndex::is_fully_searchable`]), the candidate
5088///   pool comes from an O(log N) ANN query instead of the table scan;
5089///   candidates are then re-verified against the DB (live, same
5090///   namespace, EXACT cosine recomputed from the stored blob — the
5091///   index's distance is approximate and assumes L2-normalised
5092///   vectors, so the stored-blob recompute keeps the decision function
5093///   byte-equal to the scan path).
5094/// * Otherwise (no index at keyword tier, the async-boot warm window
5095///   before the first graph swap, CLI one-shots below the build
5096///   threshold) it falls back to the BOUNDED recency scan in
5097///   [`proactive_conflict_check`]. An EMPTY index also routes to the
5098///   fallback (#1579 QC): emptiness makes `is_fully_searchable`
5099///   vacuously true, but during the async-boot LOAD phase (daemon
5100///   bound with `VectorIndex::empty()`, boot loader still reading the
5101///   stored embeddings, `seed_entries` not yet landed) it says
5102///   nothing about what the DB holds — consulting it would silently
5103///   SKIP the check instead of degrading to the documented bounded
5104///   scan. On a genuinely empty corpus the fallback scan matches zero
5105///   rows, so the routing is behaviour-neutral outside that window.
5106///
5107/// Known under-detection windows, both safe-direction (a missed
5108/// conflict ALLOWS a write; the check never wrongly refuses):
5109/// rows evicted from the index's 100k entry cap are invisible to the
5110/// ANN query, and a warm-window write beyond the bounded scan's
5111/// recency horizon is invisible to the fallback. Callers that need a
5112/// hard guarantee already have the `(title, namespace)` SQL conflict
5113/// gate; this check is the advisory #519 layer with a `force=true`
5114/// bypass.
5115///
5116/// # Errors
5117///
5118/// Bubbles rusqlite errors from the candidate SELECTs (same contract
5119/// as [`proactive_conflict_check`]).
5120pub fn proactive_conflict_check_with_index(
5121    conn: &Connection,
5122    mem: &Memory,
5123    query_embedding: &[f32],
5124    vector_index: Option<&crate::hnsw::VectorIndex>,
5125) -> Result<Option<ProactiveConflict>> {
5126    if query_embedding.is_empty() {
5127        return Ok(None);
5128    }
5129    if let Some(idx) = vector_index
5130        && idx.is_fully_searchable()
5131        // #1579 QC — an empty index is vacuously fully-searchable but
5132        // proves nothing about the DB during the async-boot LOAD
5133        // phase; see the doc comment above and
5134        // `crate::hnsw::VectorIndex::is_empty`.
5135        && !idx.is_empty()
5136    {
5137        let hits = idx.search(query_embedding, PROACTIVE_CONFLICT_INDEX_K);
5138        let ids: Vec<String> = hits.into_iter().map(|h| h.id).collect();
5139        return proactive_conflict_check_candidates(conn, mem, query_embedding, &ids);
5140    }
5141    tracing::trace!(
5142        target: "proactive_conflict",
5143        namespace = %mem.namespace,
5144        "no fully-searchable (or empty) vector index — bounded recency-scan fallback (#1579 A5)"
5145    );
5146    proactive_conflict_check(conn, mem, query_embedding)
5147}
5148
5149/// #1579 A5 — verify an ANN-derived candidate id list against the DB
5150/// and apply the conflict verdict. Fetches only the named rows (point
5151/// lookups by PK), re-applies the live/namespace filters the table
5152/// scan used, and recomputes EXACT cosine from the stored embedding
5153/// blob so the decision function is identical to the scan path.
5154///
5155/// Public so the HTTP create handler (which holds the vector index
5156/// behind an async mutex and must run the ANN search BEFORE taking
5157/// the DB lock) can split the search from the verification.
5158///
5159/// # Errors
5160///
5161/// Bubbles rusqlite errors from the `IN (...)` candidate SELECT.
5162pub fn proactive_conflict_check_candidates(
5163    conn: &Connection,
5164    mem: &Memory,
5165    query_embedding: &[f32],
5166    candidate_ids: &[String],
5167) -> Result<Option<ProactiveConflict>> {
5168    if query_embedding.is_empty() || candidate_ids.is_empty() {
5169        return Ok(None);
5170    }
5171    let now = Utc::now().to_rfc3339();
5172    let placeholders = std::iter::repeat_n("?", candidate_ids.len())
5173        .collect::<Vec<_>>()
5174        .join(",");
5175    let sql = format!(
5176        "SELECT id, title, content, embedding FROM memories
5177         WHERE id IN ({placeholders})
5178           AND embedding IS NOT NULL
5179           AND (expires_at IS NULL OR expires_at > ?{p_now})
5180           AND namespace = ?{p_ns}",
5181        p_now = candidate_ids.len() + 1,
5182        p_ns = candidate_ids.len() + 2,
5183    );
5184    let mut stmt = conn.prepare(&sql)?;
5185    let bind_iter = candidate_ids
5186        .iter()
5187        .map(String::as_str)
5188        .chain([now.as_str(), mem.namespace.as_str()]);
5189    let rows: Vec<(String, String, String, Vec<u8>)> = stmt
5190        .query_map(rusqlite::params_from_iter(bind_iter), |row| {
5191            Ok((
5192                row.get::<_, String>(0)?,
5193                row.get::<_, String>(1)?,
5194                row.get::<_, String>(2)?,
5195                row.get::<_, Vec<u8>>(3)?,
5196            ))
5197        })?
5198        .collect::<rusqlite::Result<Vec<_>>>()?;
5199
5200    Ok(proactive_conflict_verdict(mem, query_embedding, rows))
5201}
5202
5203/// #1579 A5 — shared scoring + verdict tail of the proactive conflict
5204/// check. Decodes candidate blobs, cosine-scores against the query,
5205/// sorts descending, and applies the conflict rule to the top
5206/// [`PROACTIVE_CONFLICT_TOP_K`]:
5207///
5208///   near-duplicate (≥ [`PROACTIVE_CONFLICT_SIM_THRESHOLD`] cosine)
5209///   AND content differs
5210///   AND content token-overlap ≥ [`PROACTIVE_CONFLICT_CONTENT_JACCARD_FLOOR`]
5211///
5212/// The Jaccard corroboration is the #1579 false-409 fix — see the
5213/// floor const for the P2 evidence (81% of semantic-tier loadtest
5214/// writes refused because MiniLM clusters unrelated noise documents
5215/// above 0.95 cosine).
5216fn proactive_conflict_verdict(
5217    mem: &Memory,
5218    query_embedding: &[f32],
5219    rows: Vec<(String, String, String, Vec<u8>)>,
5220) -> Option<ProactiveConflict> {
5221    // Score every candidate and keep the top-K by cosine.
5222    let mut scored: Vec<(f32, String, String, String)> = Vec::with_capacity(rows.len());
5223    for (id, title, content, blob) in rows {
5224        if blob.is_empty() {
5225            continue;
5226        }
5227        // Skip self (same id) — happens when a re-store reuses the
5228        // existing memory id (NHI replay path).
5229        if id == mem.id {
5230            continue;
5231        }
5232        let candidate = match crate::embeddings::decode_embedding_blob(&blob) {
5233            Ok(v) => v,
5234            Err(e) => {
5235                tracing::warn!(
5236                    memory_id = %id,
5237                    blob_len = blob.len(),
5238                    error = %e,
5239                    "proactive_conflict_check: skipping candidate with malformed embedding"
5240                );
5241                continue;
5242            }
5243        };
5244        if candidate.len() != query_embedding.len() {
5245            tracing::warn!(
5246                memory_id = %id,
5247                expected = query_embedding.len(),
5248                got = candidate.len(),
5249                "proactive_conflict_check: skipping candidate with dimension mismatch"
5250            );
5251            continue;
5252        }
5253        let sim = crate::embeddings::Embedder::cosine_similarity(query_embedding, &candidate);
5254        scored.push((sim, id, title, content));
5255    }
5256    // Sort descending by similarity so we visit the strongest matches
5257    // first; bail at the top-K cap.
5258    scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
5259    let incoming_tokens = contradiction_title_tokens(&mem.content);
5260    for (sim, id, title, content) in scored.into_iter().take(PROACTIVE_CONFLICT_TOP_K) {
5261        if sim < PROACTIVE_CONFLICT_SIM_THRESHOLD {
5262            // The top-K cap is sorted descending — once we drop below
5263            // the threshold we can't find any conflicts in the tail.
5264            break;
5265        }
5266        // Deterministic textual contradiction signal: the candidate
5267        // is near-duplicate (≥ 0.95 cosine) AND its content body
5268        // differs from the incoming write's content. Same-content
5269        // near-duplicates are not contradictions; they are the upsert
5270        // happy-path that the SQL `ON CONFLICT(title, namespace)`
5271        // already handles.
5272        //
5273        // #1579 A5 — lexical corroboration: a true "same fact,
5274        // restated" pair shares vocabulary. Without this floor,
5275        // unrelated documents that the embedder happens to cluster
5276        // above 0.95 cosine (P2-measured on random-alphanumeric
5277        // payloads) produced the 81% false-409 epidemic.
5278        if content != mem.content
5279            && contradiction_title_jaccard(&incoming_tokens, &contradiction_title_tokens(&content))
5280                >= PROACTIVE_CONFLICT_CONTENT_JACCARD_FLOOR
5281        {
5282            return Some(ProactiveConflict {
5283                existing_id: id,
5284                existing_title: title,
5285                similarity: sim,
5286                reason: "near_duplicate_with_differing_content",
5287            });
5288        }
5289    }
5290    None
5291}
5292
5293/// v0.7.0 F18 — exact-match-aware nearest-neighbor duplicate check.
5294///
5295/// Wraps [`check_duplicate`] with a SHA-256 short-circuit on the raw
5296/// `query_text` so byte-identical content scores `similarity = 1.0`
5297/// even when the embedding pipeline (Nomic prefixes, casing, whitespace
5298/// normalization) would otherwise cap cosine similarity at ~0.92 for
5299/// the same string. Round-2 evidence: storing content `C` and then
5300/// asking `check_duplicate` about `C` returned similarity 0.92 because
5301/// the stored embedding was prefixed with `search_document:` while the
5302/// query embedding got `search_query:` — mismatched prefixes prevent
5303/// cosine from saturating at 1.0.
5304///
5305/// Algorithm:
5306/// 1. Compute `H_query = SHA-256(query_text)`.
5307/// 2. For each live, namespace-matching candidate, compute
5308///    `H_row = SHA-256(format!("{row.title} {row.content}"))` and
5309///    compare. The first match wins and is returned with
5310///    `similarity = 1.0`, `is_duplicate = true`.
5311/// 3. If no hash match is found, fall through to embedding-based
5312///    cosine similarity (i.e. delegate to [`check_duplicate`]).
5313///
5314/// The hash compare is computed per call (no schema migration); it
5315/// scales linearly in the candidate pool, but so does the existing
5316/// embedding loop, so worst-case asymptotics are unchanged. A future
5317/// `content_hash` column on `memories` would make this O(1) per
5318/// candidate via an index — flagged for a separate migration PR.
5319///
5320/// `query_text` MUST be the exact string used to produce
5321/// `query_embedding` (typically `crate::embeddings::embedding_document(title, content)`).
5322/// Passing a different string is not a correctness bug — the function
5323/// just falls through to the embedding-similarity path — but it
5324/// defeats the point of the short-circuit.
5325pub fn check_duplicate_with_text(
5326    conn: &Connection,
5327    query_embedding: &[f32],
5328    query_text: &str,
5329    namespace: Option<&str>,
5330    threshold: f32,
5331) -> Result<DuplicateCheck> {
5332    let effective_threshold = threshold.max(DUPLICATE_THRESHOLD_MIN);
5333    let now = Utc::now().to_rfc3339();
5334    let query_hash = canonical_content_hash(query_text);
5335
5336    // Pull (id, title, namespace, content) for the live candidate pool.
5337    // We keep the same gates as `check_duplicate` (live row, optional
5338    // namespace) but do NOT require a non-NULL embedding here — an
5339    // identical row with a missing embedding is still a valid exact-
5340    // match short-circuit candidate.
5341    let rows: Vec<(String, String, String, String)> = if let Some(ns) = namespace {
5342        let mut stmt = conn.prepare(
5343            "SELECT id, title, namespace, content FROM memories
5344             WHERE (expires_at IS NULL OR expires_at > ?1)
5345               AND namespace = ?2",
5346        )?;
5347        let mapped = stmt.query_map(params![now, ns], |row| {
5348            Ok((
5349                row.get::<_, String>(0)?,
5350                row.get::<_, String>(1)?,
5351                row.get::<_, String>(2)?,
5352                row.get::<_, String>(3)?,
5353            ))
5354        })?;
5355        mapped.collect::<rusqlite::Result<Vec<_>>>()?
5356    } else {
5357        let mut stmt = conn.prepare(
5358            "SELECT id, title, namespace, content FROM memories
5359             WHERE (expires_at IS NULL OR expires_at > ?1)",
5360        )?;
5361        let mapped = stmt.query_map(params![now], |row| {
5362            Ok((
5363                row.get::<_, String>(0)?,
5364                row.get::<_, String>(1)?,
5365                row.get::<_, String>(2)?,
5366                row.get::<_, String>(3)?,
5367            ))
5368        })?;
5369        mapped.collect::<rusqlite::Result<Vec<_>>>()?
5370    };
5371
5372    // Phase 1 — SHA-256 exact-match short-circuit. We hash the same
5373    // `crate::embeddings::embedding_document(title, content)` shape the MCP/HTTP layers use to
5374    // build the embedding text so an identical store-then-check sequence
5375    // surfaces as similarity=1.0 even when the embedding pipeline would
5376    // otherwise cap at ~0.92 due to prefix asymmetry.
5377    for (id, title, ns, content) in &rows {
5378        let row_text = crate::embeddings::embedding_document(title, content);
5379        let row_hash = canonical_content_hash(&row_text);
5380        if row_hash == query_hash {
5381            return Ok(DuplicateCheck {
5382                is_duplicate: true,
5383                threshold: effective_threshold,
5384                nearest: Some(DuplicateMatch {
5385                    id: id.clone(),
5386                    title: title.clone(),
5387                    namespace: ns.clone(),
5388                    similarity: 1.0,
5389                }),
5390                // We scanned every row through the hash compare to find
5391                // the match — report that, not just the first one.
5392                candidates_scanned: rows.len(),
5393            });
5394        }
5395    }
5396
5397    // Phase 2 — no hash match; fall back to the embedding-based
5398    // nearest-neighbor scan so callers still get the "closest existing
5399    // memory was X at similarity Y" signal on near-but-not-exact hits.
5400    check_duplicate(conn, query_embedding, namespace, threshold)
5401}
5402
5403/// Register an entity (canonical name + aliases) under a namespace
5404/// (Pillar 2 / Stream B).
5405///
5406/// An entity is stored as a long-tier memory:
5407/// - `title = canonical_name`
5408/// - `namespace = namespace`
5409/// - `tags` includes [`ENTITY_TAG`]
5410/// - `metadata.kind = "entity"` (so the resolver can never confuse an
5411///   entity with a regular memory that happens to share a title)
5412///
5413/// Aliases live in the `entity_aliases` side table keyed by
5414/// `(entity_id, alias)`.
5415///
5416/// **Idempotency:** if an entity with this `(canonical_name, namespace)`
5417/// already exists, its ID is reused and `aliases` are merged with
5418/// `INSERT OR IGNORE`. The returned [`EntityRegistration::created`] is
5419/// `false` in that case.
5420///
5421/// **Collision detection:** if a non-entity memory already occupies
5422/// `(title=canonical_name, namespace=namespace)`, the call errors
5423/// rather than silently upgrading it (the upsert path on `insert`
5424/// would otherwise overwrite the existing row's content/tags). Callers
5425/// must rename the entity or its colliding memory.
5426///
5427/// `extra_metadata` is merged into the entity memory's metadata; any
5428/// caller-supplied `kind` field is overwritten with `"entity"` and
5429/// `agent_id` is stamped from the caller (NHI provenance) when
5430/// `extra_metadata` does not already specify one.
5431pub fn entity_register(
5432    conn: &Connection,
5433    canonical_name: &str,
5434    namespace: &str,
5435    aliases: &[String],
5436    extra_metadata: &serde_json::Value,
5437    agent_id: Option<&str>,
5438) -> Result<crate::models::EntityRegistration> {
5439    use crate::models::{ENTITY_KIND, ENTITY_TAG, EntityRegistration};
5440
5441    // Look up an existing entity in this namespace by canonical_name +
5442    // metadata.kind. If a non-entity memory occupies the same
5443    // (title, namespace), surface a hard error instead of upserting.
5444    let existing_id: Option<String> = match conn.query_row(
5445        "SELECT id FROM memories
5446         WHERE namespace = ?1 AND title = ?2
5447           AND COALESCE(json_extract(metadata, '$.kind'), '') = ?3",
5448        params![namespace, canonical_name, ENTITY_KIND],
5449        |r| r.get::<_, String>(0),
5450    ) {
5451        Ok(id) => Some(id),
5452        Err(rusqlite::Error::QueryReturnedNoRows) => None,
5453        Err(e) => return Err(e.into()),
5454    };
5455
5456    let (entity_id, created) = if let Some(id) = existing_id {
5457        (id, false)
5458    } else {
5459        let collision: Option<String> = match conn.query_row(
5460            "SELECT id FROM memories
5461             WHERE namespace = ?1 AND title = ?2
5462               AND COALESCE(json_extract(metadata, '$.kind'), '') != ?3",
5463            params![namespace, canonical_name, ENTITY_KIND],
5464            |r| r.get::<_, String>(0),
5465        ) {
5466            Ok(id) => Some(id),
5467            Err(rusqlite::Error::QueryReturnedNoRows) => None,
5468            Err(e) => return Err(e.into()),
5469        };
5470        if collision.is_some() {
5471            // #962 typed envelope — UniqueConflict (409).
5472            return Err(anyhow::Error::new(StorageError::UniqueConflict {
5473                reason: format!(
5474                    "entity_register: title '{canonical_name}' in namespace '{namespace}' is already used by a non-entity memory"
5475                ),
5476            }));
5477        }
5478
5479        // Build metadata: caller-supplied object merged, kind forced
5480        // to "entity", agent_id preserved from caller when not set.
5481        let mut meta_map = match extra_metadata {
5482            serde_json::Value::Object(m) => m.clone(),
5483            _ => serde_json::Map::new(),
5484        };
5485        meta_map.insert(
5486            "kind".to_string(),
5487            serde_json::Value::String(ENTITY_KIND.to_string()),
5488        );
5489        if let Some(a) = agent_id {
5490            meta_map
5491                .entry("agent_id".to_string())
5492                .or_insert(serde_json::Value::String(a.to_string()));
5493        }
5494        let metadata = serde_json::Value::Object(meta_map);
5495
5496        let now = Utc::now().to_rfc3339();
5497        let mem = Memory {
5498            id: uuid::Uuid::new_v4().to_string(),
5499            tier: Tier::Long,
5500            namespace: namespace.to_string(),
5501            title: canonical_name.to_string(),
5502            content: canonical_name.to_string(),
5503            tags: vec![ENTITY_TAG.to_string()],
5504            priority: 7,
5505            confidence: 1.0,
5506            source: "api".to_string(),
5507            access_count: 0,
5508            created_at: now.clone(),
5509            updated_at: now,
5510            last_accessed_at: None,
5511            expires_at: None,
5512            metadata,
5513            reflection_depth: 0,
5514            memory_kind: crate::models::MemoryKind::Observation,
5515            entity_id: None,
5516            persona_version: None,
5517            citations: Vec::new(),
5518            source_uri: None,
5519            source_span: None,
5520            confidence_source: ConfidenceSource::CallerProvided,
5521            confidence_signals: None,
5522            confidence_decayed_at: None,
5523            version: 1,
5524        };
5525        let id = insert(conn, &mem).context("insert entity memory")?;
5526        (id, true)
5527    };
5528
5529    let now = Utc::now().to_rfc3339();
5530    {
5531        let mut stmt = conn.prepare(
5532            "INSERT OR IGNORE INTO entity_aliases (entity_id, alias, created_at)
5533             VALUES (?1, ?2, ?3)",
5534        )?;
5535        // canonical_name is always reachable via entity_get_by_alias.
5536        // Without this row, registering an entity with no aliases makes
5537        // it unreachable by name (NHI-P3-T2).
5538        stmt.execute(params![entity_id, canonical_name, now])?;
5539        for alias in aliases {
5540            let trimmed = alias.trim();
5541            if trimmed.is_empty() || trimmed == canonical_name {
5542                continue;
5543            }
5544            stmt.execute(params![entity_id, trimmed, now])?;
5545        }
5546    }
5547
5548    let aliases_out = list_entity_aliases(conn, &entity_id)?;
5549
5550    Ok(EntityRegistration {
5551        entity_id,
5552        canonical_name: canonical_name.to_string(),
5553        namespace: namespace.to_string(),
5554        aliases: aliases_out,
5555        created,
5556    })
5557}
5558
5559/// Resolve an alias to its registered entity (Pillar 2 / Stream B).
5560///
5561/// When `namespace` is `Some`, only entities in that namespace are
5562/// considered. When `None`, all namespaces are searched and the
5563/// most-recently-created matching entity wins (deterministic
5564/// disambiguation when the same alias was registered in multiple
5565/// namespaces).
5566///
5567/// Returns `Ok(None)` if no entity claims this alias under the given
5568/// filter. Returns the full alias set for the resolved entity.
5569pub fn entity_get_by_alias(
5570    conn: &Connection,
5571    alias: &str,
5572    namespace: Option<&str>,
5573) -> Result<Option<crate::models::EntityRecord>> {
5574    use crate::models::{ENTITY_KIND, EntityRecord};
5575
5576    let trimmed = alias.trim();
5577    if trimmed.is_empty() {
5578        return Ok(None);
5579    }
5580
5581    let row: std::result::Result<(String, String, String), rusqlite::Error> =
5582        if let Some(ns) = namespace {
5583            conn.query_row(
5584                "SELECT m.id, m.title, m.namespace
5585                 FROM entity_aliases ea
5586                 JOIN memories m ON m.id = ea.entity_id
5587                 WHERE ea.alias = ?1
5588                   AND m.namespace = ?2
5589                   AND COALESCE(json_extract(m.metadata, '$.kind'), '') = ?3
5590                 ORDER BY m.created_at DESC
5591                 LIMIT 1",
5592                params![trimmed, ns, ENTITY_KIND],
5593                |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
5594            )
5595        } else {
5596            conn.query_row(
5597                "SELECT m.id, m.title, m.namespace
5598                 FROM entity_aliases ea
5599                 JOIN memories m ON m.id = ea.entity_id
5600                 WHERE ea.alias = ?1
5601                   AND COALESCE(json_extract(m.metadata, '$.kind'), '') = ?2
5602                 ORDER BY m.created_at DESC
5603                 LIMIT 1",
5604                params![trimmed, ENTITY_KIND],
5605                |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
5606            )
5607        };
5608
5609    let (entity_id, canonical_name, ns) = match row {
5610        Ok(t) => t,
5611        Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
5612        Err(e) => return Err(e.into()),
5613    };
5614
5615    let aliases = list_entity_aliases(conn, &entity_id)?;
5616    Ok(Some(EntityRecord {
5617        entity_id,
5618        canonical_name,
5619        namespace: ns,
5620        aliases,
5621    }))
5622}
5623
5624/// Default cap on rows returned by `kg_timeline` when the caller does
5625/// not specify one (Pillar 2 / Stream C). Sized to fit a reasonable
5626/// agent context window without paging — callers needing more should
5627/// pass an explicit limit.
5628pub const KG_TIMELINE_DEFAULT_LIMIT: usize = 200;
5629
5630/// Hard ceiling on `kg_timeline` rows. Matches the existing list/recall
5631/// caps to keep the timeline bounded against pathological entities.
5632pub const KG_TIMELINE_MAX_LIMIT: usize = 1000;
5633
5634/// Ordered fact timeline for an entity (Pillar 2 / Stream C —
5635/// `memory_kg_timeline`). Returns outbound assertions from
5636/// `source_id`, ordered by `valid_from ASC` and tie-broken by
5637/// `created_at ASC` for deterministic display.
5638///
5639/// Filters:
5640/// - `since` (RFC3339, inclusive): drop events with `valid_from < since`
5641/// - `until` (RFC3339, inclusive): drop events with `valid_from > until`
5642/// - `limit`: row cap, clamped to [1, [`KG_TIMELINE_MAX_LIMIT`]]
5643///
5644/// Rows with NULL `valid_from` are excluded — a link without a
5645/// valid-from anchor cannot be ordered on the timeline. The schema-v15
5646/// migration backfilled legacy rows to `created_at`, and the `link()`
5647/// path stamps the column on every new insert, so this is a hard
5648/// guarantee for current code; the explicit `IS NOT NULL` guard exists
5649/// to keep external writes (`store/sqlite.rs`, custom migrations) from
5650/// silently producing invisible links.
5651///
5652/// Cross-namespace by design: timelines often span the same canonical
5653/// entity asserted by agents in different namespaces. Callers can
5654/// post-filter by `target_namespace` if they need a namespace-scoped
5655/// view.
5656///
5657/// v0.7 AGE acceleration onramp (charter §"Stream C" bullet 4). When
5658/// the v0.7 SAL ships with Apache AGE, the equivalent property-graph
5659/// query is:
5660///
5661/// ```cypher
5662/// MATCH (s {id: $source_id})-[r {valid_from IS NOT NULL,
5663///        valid_from >= $since, valid_from <= $until}]->(t)
5664/// WHERE t.id <> s.id  // exclude self-loops
5665/// RETURN t.id, r.relation, r.valid_from, r.valid_until, r.observed_by
5666/// ORDER BY r.valid_from ASC, r.created_at ASC
5667/// LIMIT $limit
5668/// ```
5669///
5670/// Stub left here per charter intent so the v0.7 migration has a 1:1
5671/// reference query.
5672pub fn kg_timeline(
5673    conn: &Connection,
5674    source_id: &str,
5675    since: Option<&str>,
5676    until: Option<&str>,
5677    limit: Option<usize>,
5678) -> Result<Vec<crate::models::KgTimelineEvent>> {
5679    use crate::models::KgTimelineEvent;
5680
5681    let cap = limit
5682        .unwrap_or(KG_TIMELINE_DEFAULT_LIMIT)
5683        .clamp(1, KG_TIMELINE_MAX_LIMIT);
5684
5685    // Compose the predicate dynamically for `since` / `until`. Bind
5686    // values are appended in the same order so the placeholders line up.
5687    let mut sql = String::from(
5688        "SELECT ml.target_id, ml.relation, ml.valid_from, ml.valid_until,
5689                ml.observed_by, m.title, m.namespace, ml.created_at
5690         FROM memory_links ml
5691         JOIN memories m ON m.id = ml.target_id
5692         WHERE ml.source_id = ?1
5693           AND ml.valid_from IS NOT NULL",
5694    );
5695    let mut binds: Vec<Box<dyn rusqlite::ToSql>> = vec![Box::new(source_id.to_string())];
5696    if let Some(s) = since {
5697        sql.push_str(" AND ml.valid_from >= ?");
5698        sql.push_str(&(binds.len() + 1).to_string());
5699        binds.push(Box::new(s.to_string()));
5700    }
5701    if let Some(u) = until {
5702        sql.push_str(" AND ml.valid_from <= ?");
5703        sql.push_str(&(binds.len() + 1).to_string());
5704        binds.push(Box::new(u.to_string()));
5705    }
5706    sql.push_str(" ORDER BY ml.valid_from ASC, ml.created_at ASC LIMIT ?");
5707    sql.push_str(&(binds.len() + 1).to_string());
5708    binds.push(Box::new(i64::try_from(cap).unwrap_or(i64::MAX)));
5709
5710    let mut stmt = conn.prepare(&sql)?;
5711    let bind_refs: Vec<&dyn rusqlite::ToSql> = binds.iter().map(AsRef::as_ref).collect();
5712    let rows = stmt.query_map(rusqlite::params_from_iter(bind_refs), |row| {
5713        Ok(KgTimelineEvent {
5714            target_id: row.get(0)?,
5715            relation: row.get(1)?,
5716            valid_from: row.get(2)?,
5717            valid_until: row.get(3)?,
5718            observed_by: row.get(4)?,
5719            title: row.get(5)?,
5720            target_namespace: row.get(6)?,
5721        })
5722    })?;
5723    rows.collect::<rusqlite::Result<Vec<_>>>()
5724        .map_err(Into::into)
5725}
5726
5727/// Outcome of [`invalidate_link`] (Pillar 2 / Stream C —
5728/// `memory_kg_invalidate`). `valid_until` is the timestamp now stored on
5729/// the link; `previous_valid_until` is the prior value, or `None` if
5730/// this was the first invalidation. Callers can use the prior value to
5731/// distinguish a fresh supersession from an idempotent retry.
5732#[derive(Debug, Clone, PartialEq, Eq)]
5733pub struct InvalidateResult {
5734    pub valid_until: String,
5735    pub previous_valid_until: Option<String>,
5736}
5737
5738/// Mark a KG link as superseded by setting its `valid_until` column
5739/// (Pillar 2 / Stream C — `memory_kg_invalidate`). Returns `Ok(None)`
5740/// when the `(source_id, target_id, relation)` triple does not match an
5741/// existing link. The supplied `valid_until` defaults to the current
5742/// wall-clock time in RFC3339 form when omitted; callers needing
5743/// historical or future supersession can pass an explicit value.
5744///
5745/// Idempotent: calling repeatedly overwrites the prior `valid_until`
5746/// (the prior value is returned in `previous_valid_until` so callers
5747/// can detect the overwrite). The schema does not yet carry an audit
5748/// column for the supersession reason; that arrives with v0.7
5749/// attestation. Until then, callers should record the rationale in
5750/// their own logs or a paired memory.
5751///
5752/// # v0.7.0 #628 H5 — signed-row preservation
5753///
5754/// `valid_until` is one of the six fields the H2 outbound signer
5755/// commits to (see [`crate::identity::sign::SignableLink`]). Mutating
5756/// it on a previously self-signed link silently flips every future
5757/// `memory_verify` to `signature_verified=false / attest_level=unsigned`
5758/// — legitimate supersession would be indistinguishable from
5759/// tampering on the wire. To preserve the audit chain we:
5760///
5761/// 1. NULL the `signature` column (and reset `attest_level` to
5762///    `"unsigned"`) so a future verify reports an honest "no
5763///    signature on this row" rather than a misleading "signature
5764///    mismatch".
5765/// 2. Append a `memory_link.invalidated` row to `signed_events` whose
5766///    `payload_hash` binds to the post-supersession canonical CBOR —
5767///    the auditor can replay both the original `memory_link.created`
5768///    row AND the matching `memory_link.invalidated` row to prove the
5769///    supersession was an intentional act by the same agent.
5770///
5771/// The audit append is best-effort: if the `signed_events` write
5772/// fails (vanishingly unlikely outside disk-full / schema-drift
5773/// scenarios), the supersession still persists and the failure is
5774/// surfaced in `tracing::warn!`. Cratering the supersession on an
5775/// audit-write failure would punish the legitimate caller for a
5776/// substrate problem they cannot fix.
5777pub fn invalidate_link(
5778    conn: &Connection,
5779    source_id: &str,
5780    target_id: &str,
5781    relation: &str,
5782    valid_until: Option<&str>,
5783) -> Result<Option<InvalidateResult>> {
5784    let stamp = valid_until.map_or_else(|| Utc::now().to_rfc3339(), str::to_string);
5785
5786    // P2 (#628 agent-3 follow-up): wrap the SELECT-then-UPDATE-then-
5787    // audit-INSERT in a single `BEGIN IMMEDIATE` transaction. Without
5788    // this, a daemon crash between the UPDATE (which clears the
5789    // signature) and the audit INSERT leaves H5's silent-supersession
5790    // state — the exact thing H5 was added to prevent. RESERVED-lock
5791    // semantics also serialise concurrent writers across processes.
5792    conn.execute(connection::SQL_BEGIN_IMMEDIATE, [])?;
5793    // From here on, every early return MUST `ROLLBACK` first.
5794    let rollback = || {
5795        let _ = conn.execute(connection::SQL_ROLLBACK, []);
5796    };
5797
5798    // Pull the prior `valid_until` AND the signing surface so the
5799    // audit append can reflect the row's pre-mutation attest state.
5800    // A single round-trip keeps the SELECT cheap.
5801    let prior_row: (
5802        Option<String>,
5803        Option<Vec<u8>>,
5804        Option<String>,
5805        Option<String>,
5806        Option<String>,
5807    ) = match conn.query_row(
5808        "SELECT valid_until, signature, attest_level, observed_by, valid_from \
5809             FROM memory_links \
5810             WHERE source_id = ?1 AND target_id = ?2 AND relation = ?3",
5811        params![source_id, target_id, relation],
5812        |r| {
5813            Ok((
5814                r.get::<_, Option<String>>(0)?,
5815                r.get::<_, Option<Vec<u8>>>(1)?,
5816                r.get::<_, Option<String>>(2)?,
5817                r.get::<_, Option<String>>(3)?,
5818                r.get::<_, Option<String>>(4)?,
5819            ))
5820        },
5821    ) {
5822        Ok(v) => v,
5823        Err(rusqlite::Error::QueryReturnedNoRows) => {
5824            rollback();
5825            return Ok(None);
5826        }
5827        Err(e) => {
5828            rollback();
5829            return Err(e.into());
5830        }
5831    };
5832    let (prior, prior_signature, _prior_attest, observed_by, valid_from) = prior_row;
5833    let was_signed = prior_signature.is_some();
5834
5835    let update_result = if was_signed {
5836        // v0.7.0 #628 H5 — clear the signing surface so a future
5837        // `memory_verify` honestly reports "unsigned" instead of
5838        // "signature mismatch". Resetting `attest_level` keeps the
5839        // column consistent with the now-NULL signature blob.
5840        conn.execute(
5841            "UPDATE memory_links \
5842                SET valid_until = ?4, signature = NULL, attest_level = 'unsigned' \
5843              WHERE source_id = ?1 AND target_id = ?2 AND relation = ?3",
5844            params![source_id, target_id, relation, &stamp],
5845        )
5846    } else {
5847        conn.execute(
5848            "UPDATE memory_links SET valid_until = ?4 \
5849             WHERE source_id = ?1 AND target_id = ?2 AND relation = ?3",
5850            params![source_id, target_id, relation, &stamp],
5851        )
5852    };
5853    if let Err(e) = update_result {
5854        rollback();
5855        return Err(e.into());
5856    }
5857
5858    // v0.7.0 #628 H5 — append an `invalidated` audit row when we
5859    // cleared a signature. The `payload_hash` commits to the
5860    // canonical CBOR over the post-supersession SignableLink so the
5861    // auditor sees exactly what the row looks like now (`valid_until`
5862    // populated). The `signature` column on the audit row is the
5863    // *previous* signature — the auditor can compare it byte-for-byte
5864    // against the original `memory_link.created` row's signature to
5865    // confirm the same key issued both events. We deliberately do NOT
5866    // re-sign here: this writer has no guarantee that the original
5867    // signing keypair is loaded (federation may have applied an
5868    // inbound `peer_attested` row), so an honest "the signing surface
5869    // was cleared" event is the only response that doesn't risk
5870    // forgery.
5871    if was_signed {
5872        let signable = crate::identity::sign::SignableLink {
5873            src_id: source_id,
5874            dst_id: target_id,
5875            relation,
5876            observed_by: observed_by.as_deref(),
5877            valid_from: valid_from.as_deref(),
5878            valid_until: Some(stamp.as_str()),
5879        };
5880        match crate::identity::sign::canonical_cbor(&signable) {
5881            Ok(cbor) => {
5882                let event = crate::signed_events::SignedEvent {
5883                    id: uuid::Uuid::new_v4().to_string(),
5884                    // Best-effort agent_id: the `observed_by` claim
5885                    // from the original signed row (the agent that
5886                    // attested the supersession's source row). Falls
5887                    // back to "unknown" when the legacy row carried
5888                    // no observed_by — vanishingly rare for signed
5889                    // rows since H2 always populates the column on
5890                    // self-signed inserts.
5891                    agent_id: observed_by.clone().unwrap_or_else(|| "unknown".to_string()),
5892                    event_type: crate::signed_events::event_types::MEMORY_LINK_INVALIDATED
5893                        .to_string(),
5894                    payload_hash: crate::signed_events::payload_hash(&cbor),
5895                    signature: prior_signature,
5896                    attest_level: crate::models::AttestLevel::Unsigned.as_str().to_string(),
5897                    timestamp: Utc::now().to_rfc3339(),
5898                    ..crate::signed_events::SignedEvent::default()
5899                };
5900                // v0.7.0 ship-readiness: use the `_no_tx` variant — we
5901                // are already inside the BEGIN IMMEDIATE wrap (line 3560
5902                // above). The public `append_signed_event` opens its own
5903                // unchecked_transaction which would fail under nesting
5904                // (SQLite does not allow nested transactions on a single
5905                // connection).
5906                if let Err(e) = crate::signed_events::append_signed_event_no_tx(conn, &event) {
5907                    // P2 (#628 agent-3): refuse to commit the UPDATE if
5908                    // the audit row can't be appended. Otherwise the
5909                    // signature clearing happens silently and we lose
5910                    // the audit trail H5 was added to provide.
5911                    rollback();
5912                    return Err(anyhow::anyhow!(
5913                        "failed to append memory_link.invalidated audit row \
5914                         (rolled back signature clearing): {e}"
5915                    ));
5916                }
5917            }
5918            Err(e) => {
5919                rollback();
5920                return Err(anyhow::anyhow!(
5921                    "failed to encode canonical CBOR for invalidation audit \
5922                     (rolled back signature clearing): {e}"
5923                ));
5924            }
5925        }
5926    }
5927
5928    conn.execute(connection::SQL_COMMIT, [])?;
5929    Ok(Some(InvalidateResult {
5930        valid_until: stamp,
5931        previous_valid_until: prior,
5932    }))
5933}
5934
5935/// Default cap on rows returned by `kg_query` when the caller does not
5936/// specify one (Pillar 2 / Stream C). Mirrors `kg_timeline`'s default so
5937/// the two traversal tools behave consistently for agents driving them.
5938pub const KG_QUERY_DEFAULT_LIMIT: usize = 200;
5939
5940/// Hard ceiling on `kg_query` rows. Matches `kg_timeline` and the
5941/// existing list/recall caps to keep traversal bounded against
5942/// pathological fan-out.
5943pub const KG_QUERY_MAX_LIMIT: usize = 1000;
5944
5945/// Maximum traversal depth supported by [`kg_query`]. The recursive-CTE
5946/// implementation enforces an explicit ceiling so a crafted call cannot
5947/// run an unbounded traversal; the charter (`v0.6.3-grand-slam.md`
5948/// § Performance Budgets) sets the published budget at depth ≤ 5.
5949pub const KG_QUERY_MAX_SUPPORTED_DEPTH: usize = 5;
5950
5951/// Outbound KG traversal from a source memory (Pillar 2 / Stream C —
5952/// `memory_kg_query`). Returns one row per link reachable within
5953/// `max_depth` hops, filtered by:
5954///
5955/// - `valid_at` (RFC3339, optional): only links valid at that instant —
5956///   `valid_from <= valid_at AND (valid_until IS NULL OR valid_until > valid_at)`.
5957///   When omitted, the temporal filter is skipped and rows with NULL
5958///   `valid_from` are also returned (legacy / un-anchored links).
5959/// - `allowed_agents` (optional): when provided, only links with
5960///   `observed_by` in the set are returned. An **empty** allowlist
5961///   returns zero rows by design — callers signaling "no agents are
5962///   trusted" must get an empty traversal, not the unfiltered fallback.
5963///   When omitted entirely (`None`), the agent filter is skipped.
5964/// - `limit`: row cap, clamped to [1, [`KG_QUERY_MAX_LIMIT`]].
5965///
5966/// `max_depth` must be in `[1, KG_QUERY_MAX_SUPPORTED_DEPTH]`; passing
5967/// a larger value yields an explicit error rather than a silent
5968/// truncation, so callers learn they hit the ceiling instead of
5969/// receiving a partial graph.
5970///
5971/// Multi-hop traversal uses a recursive CTE with cycle detection on
5972/// the accumulated path, so cycles in the link graph cannot loop the
5973/// traversal indefinitely. Each hop reapplies the same temporal /
5974/// agent filters as the anchor — a chain only extends through links
5975/// that pass every filter on every hop.
5976///
5977/// Ordering is `depth ASC, COALESCE(valid_from, created_at) ASC,
5978/// created_at ASC` — shallower hops first, then time-ordered within
5979/// each level. For depth=1 callers this collapses to the original
5980/// time ordering. The `depth` field reflects the actual hop count and
5981/// `path` is the full `src->mid->target` chain.
5982pub fn kg_query(
5983    conn: &Connection,
5984    source_id: &str,
5985    max_depth: usize,
5986    valid_at: Option<&str>,
5987    allowed_agents: Option<&[String]>,
5988    limit: Option<usize>,
5989    include_invalidated: bool,
5990) -> Result<Vec<crate::models::KgQueryNode>> {
5991    use crate::models::KgQueryNode;
5992
5993    if max_depth == 0 {
5994        // #962 typed envelope.
5995        return Err(anyhow::Error::new(StorageError::InvalidArgument {
5996            reason: crate::errors::msg::MAX_DEPTH_MIN.to_string(),
5997        }));
5998    }
5999    if max_depth > KG_QUERY_MAX_SUPPORTED_DEPTH {
6000        // #962 typed envelope.
6001        return Err(anyhow::Error::new(StorageError::InvalidArgument {
6002            reason: format!(
6003                "max_depth={max_depth} exceeds supported depth={KG_QUERY_MAX_SUPPORTED_DEPTH}"
6004            ),
6005        }));
6006    }
6007
6008    // Empty allowlist == "no agents are trusted" — short-circuit so we
6009    // don't have to invent a SQL `IN ()` clause (which is invalid).
6010    if let Some(agents) = allowed_agents
6011        && agents.is_empty()
6012    {
6013        return Ok(Vec::new());
6014    }
6015
6016    let cap = limit
6017        .unwrap_or(KG_QUERY_DEFAULT_LIMIT)
6018        .clamp(1, KG_QUERY_MAX_LIMIT);
6019
6020    // Build the per-hop predicate once; the anchor and recursive members
6021    // both apply it to a row aliased `ml`. Bind values are appended in
6022    // resolution order so positional placeholders line up.
6023    let mut binds: Vec<Box<dyn rusqlite::ToSql>> = Vec::new();
6024    let mut hop_filter = String::new();
6025    if let Some(t) = valid_at {
6026        hop_filter.push_str(" AND ml.valid_from IS NOT NULL AND ml.valid_from <= ?");
6027        binds.push(Box::new(t.to_string()));
6028        hop_filter.push_str(&binds.len().to_string());
6029        hop_filter.push_str(" AND (ml.valid_until IS NULL OR ml.valid_until > ?");
6030        binds.push(Box::new(t.to_string()));
6031        hop_filter.push_str(&binds.len().to_string());
6032        hop_filter.push(')');
6033    } else if !include_invalidated {
6034        // "Current view" default — exclude edges that have been
6035        // invalidated via memory_kg_invalidate (valid_until set in the
6036        // past). NHI-P3-T7 regression: prior versions returned
6037        // invalidated edges in default kg_query results.
6038        // Caller can pass include_invalidated=true to opt in to the
6039        // full-history view.
6040        hop_filter.push_str(
6041            " AND (ml.valid_until IS NULL OR ml.valid_until > strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))",
6042        );
6043    }
6044    if let Some(agents) = allowed_agents {
6045        // Already short-circuited the empty case above.
6046        hop_filter.push_str(" AND ml.observed_by IN (");
6047        for (i, a) in agents.iter().enumerate() {
6048            binds.push(Box::new(a.clone()));
6049            if i > 0 {
6050                hop_filter.push_str(", ");
6051            }
6052            hop_filter.push('?');
6053            hop_filter.push_str(&binds.len().to_string());
6054        }
6055        hop_filter.push(')');
6056    }
6057
6058    // Anchor binds source_id, recursive member binds max_depth, final
6059    // SELECT binds the row cap. Order matters — placeholders are
6060    // resolved by the position they occupy in the assembled string.
6061    binds.push(Box::new(source_id.to_string()));
6062    let source_ph = binds.len();
6063    binds.push(Box::new(i64::try_from(max_depth).unwrap_or(i64::MAX)));
6064    let max_depth_ph = binds.len();
6065    binds.push(Box::new(i64::try_from(cap).unwrap_or(i64::MAX)));
6066    let limit_ph = binds.len();
6067
6068    // v0.7 AGE acceleration onramp (charter §"Stream C — KG Query Layer"
6069    // bullet 4). The recursive CTE below is the v0.6.3 SQLite/Postgres
6070    // implementation. When the v0.7 SAL ships with Apache AGE wired in,
6071    // the equivalent property-graph query will look like:
6072    //
6073    //   MATCH (s {id: $source_id})-[r*1..$max_depth {valid_from <= $t,
6074    //          observed_by IN $allowed_agents}]->(t)
6075    //   WHERE NONE(n IN nodes(path) WHERE n.id = t.id)  -- cycle prune
6076    //   RETURN t.id, last(r).relation, t.title, length(r) AS depth,
6077    //          [n IN nodes(path) | n.id] AS path
6078    //   ORDER BY depth, last(r).valid_from
6079    //   LIMIT $limit
6080    //
6081    // Stub left here per charter intent so the v0.7 migration to AGE
6082    // has a 1:1 reference query alongside the SQL implementation.
6083
6084    let sql = format!(
6085        "WITH RECURSIVE traversal(\
6086            target_id, relation, valid_from, valid_until, observed_by, \
6087            link_created_at, depth, path\
6088         ) AS (\
6089            SELECT ml.target_id, ml.relation, ml.valid_from, ml.valid_until, \
6090                   ml.observed_by, ml.created_at, 1, \
6091                   json_array(ml.source_id, ml.target_id) \
6092            FROM memory_links ml \
6093            WHERE ml.source_id = ?{source_ph}{hop_filter} \
6094            UNION ALL \
6095            SELECT ml.target_id, ml.relation, ml.valid_from, ml.valid_until, \
6096                   ml.observed_by, ml.created_at, t.depth + 1, \
6097                   json_insert(t.path, '$[' || json_array_length(t.path) || ']', ml.target_id) \
6098            FROM memory_links ml \
6099            JOIN traversal t ON ml.source_id = t.target_id \
6100            WHERE t.depth < ?{max_depth_ph} \
6101              AND NOT EXISTS (SELECT 1 FROM json_each(t.path) WHERE value = ml.target_id)\
6102              {hop_filter}\
6103         ) \
6104         SELECT t.target_id, t.relation, t.valid_from, t.valid_until, \
6105                t.observed_by, m.title, m.namespace, t.depth, \
6106                (SELECT group_concat(value, '->') FROM json_each(t.path)) \
6107         FROM traversal t \
6108         JOIN memories m ON m.id = t.target_id \
6109         ORDER BY t.depth ASC, COALESCE(t.valid_from, t.link_created_at) ASC, \
6110                  t.link_created_at ASC \
6111         LIMIT ?{limit_ph}",
6112    );
6113
6114    let mut stmt = conn.prepare(&sql)?;
6115    let bind_refs: Vec<&dyn rusqlite::ToSql> = binds.iter().map(AsRef::as_ref).collect();
6116    let rows = stmt.query_map(rusqlite::params_from_iter(bind_refs), |row| {
6117        let target_id: String = row.get(0)?;
6118        let depth: i64 = row.get(7)?;
6119        Ok(KgQueryNode {
6120            target_id,
6121            relation: row.get(1)?,
6122            valid_from: row.get(2)?,
6123            valid_until: row.get(3)?,
6124            observed_by: row.get(4)?,
6125            title: row.get(5)?,
6126            target_namespace: row.get(6)?,
6127            depth: usize::try_from(depth).unwrap_or(0),
6128            path: row.get(8)?,
6129        })
6130    })?;
6131    rows.collect::<rusqlite::Result<Vec<_>>>()
6132        .map_err(Into::into)
6133}
6134
6135/// Default cap on paths returned by [`find_paths`] when the caller does
6136/// not specify one. Matches the v0.7 J7 charter.
6137pub const FIND_PATHS_DEFAULT_LIMIT: usize = 10;
6138
6139/// Hard ceiling on paths returned by [`find_paths`]. A crafted call
6140/// asking for more than this many paths is clamped down. Matches the
6141/// v0.7 J7 charter.
6142pub const FIND_PATHS_MAX_LIMIT: usize = 50;
6143
6144/// Hard ceiling on traversal depth supported by [`find_paths`].
6145/// Distinct from [`KG_QUERY_MAX_SUPPORTED_DEPTH`] because path
6146/// enumeration is more expensive than reachability — we can afford a
6147/// slightly deeper budget for the BFS but not by much.
6148///
6149/// **Cap = 7.** Asking for more is rejected with an error that names
6150/// this constant explicitly so callers see exactly which knob to file
6151/// against. Contact maintainers to raise this bound *after* benchmarking
6152/// the new ceiling on a representative KG; the BFS is `O(d * |E|)` per
6153/// hop with a `json_each` cycle check, and depth-8+ has not been load-
6154/// tested as of v0.7.0.
6155pub const FIND_PATHS_MAX_DEPTH: usize = 7;
6156
6157/// Default depth used when the caller omits `max_depth`. Mirrors the
6158/// v0.7 J7 charter's "shallow by default, opt-in deep traversal" rule.
6159pub const FIND_PATHS_DEFAULT_DEPTH: usize = 4;
6160
6161/// v0.7 J7 — enumerate up to N undirected paths between two memories.
6162///
6163/// Walks `memory_links` with a recursive CTE that carries the full
6164/// visited-id chain on each row, both as the outbound `path` rendered
6165/// for callers and as the cycle-detection set so the traversal cannot
6166/// loop on a cyclic link graph. Each row of the CTE represents one
6167/// candidate prefix; rows that reach `target_id` are projected out as
6168/// completed paths.
6169///
6170/// # Directionality contract (v0.7.0)
6171///
6172/// **`find_paths` is UNDIRECTED** (UNION of forward + reverse edges at
6173/// every hop) — **`kg_query` is DIRECTED** (forward edges only, by
6174/// design). The two tools answer different questions and are not
6175/// interchangeable:
6176///
6177/// - `find_paths(a, b)` — *are these two memories connected through any
6178///   relation chain?* Symmetric closure: `find_paths(a, b)` and
6179///   `find_paths(b, a)` return the same path set (modulo reversal).
6180/// - `kg_query(start, depth)` — *what does the directed `source →
6181///   target` subgraph rooted at `start` look like at depth ≤ N?*
6182///   `kg_query(b, …)` will not surface `a → b`.
6183///
6184/// **`include_invalidated` is honored identically** by both tools: when
6185/// `false` (default), edges whose `valid_until` lies in the past are
6186/// excluded from the traversal; when `true`, the full historical link
6187/// graph is walked. The flag's semantics do not change with directionality.
6188///
6189/// The KG corpus uses directional links to model temporal ordering of an
6190/// assertion (`source → target`), so path queries — which are "are these
6191/// two memories connected via *any* relation chain?" — apply the
6192/// symmetric closure here via `UNION ALL` over the original edge and the
6193/// reverse edge at each hop.
6194///
6195/// # Limits
6196///
6197/// `max_depth` defaults to [`FIND_PATHS_DEFAULT_DEPTH`] and is hard-
6198/// capped at [`FIND_PATHS_MAX_DEPTH`] (= 7); passing a larger value
6199/// yields an explicit error rather than silent truncation. The error
6200/// message names `FIND_PATHS_MAX_DEPTH` so operators can grep the
6201/// codebase for the single tunable knob. `max_results` defaults to
6202/// [`FIND_PATHS_DEFAULT_LIMIT`] and is clamped at
6203/// [`FIND_PATHS_MAX_LIMIT`]; passing a larger value collapses to the
6204/// ceiling without error (paths beyond the cap are dropped, the
6205/// shortest paths win on the `ORDER BY`).
6206///
6207/// Returns `Vec<Vec<String>>` — one inner vector per discovered path,
6208/// each carrying the chain of memory ids from `source_id` (first) to
6209/// `target_id` (last). Self-paths (`source_id == target_id`) collapse
6210/// to a single one-element path. Disconnected pairs return an empty
6211/// outer vector.
6212pub fn find_paths(
6213    conn: &Connection,
6214    source_id: &str,
6215    target_id: &str,
6216    max_depth: Option<usize>,
6217    max_results: Option<usize>,
6218    include_invalidated: bool,
6219) -> Result<Vec<Vec<String>>> {
6220    let depth = max_depth.unwrap_or(FIND_PATHS_DEFAULT_DEPTH);
6221    if depth == 0 {
6222        // #962 typed envelope.
6223        return Err(anyhow::Error::new(StorageError::InvalidArgument {
6224            reason: crate::errors::msg::MAX_DEPTH_MIN.to_string(),
6225        }));
6226    }
6227    if depth > FIND_PATHS_MAX_DEPTH {
6228        // #962 typed envelope.
6229        return Err(anyhow::Error::new(StorageError::InvalidArgument {
6230            reason: format!(
6231                "max_depth={depth} exceeds supported depth={FIND_PATHS_MAX_DEPTH} (FIND_PATHS_MAX_DEPTH); contact maintainers to raise this bound after benchmarking"
6232            ),
6233        }));
6234    }
6235    let cap = max_results
6236        .unwrap_or(FIND_PATHS_DEFAULT_LIMIT)
6237        .clamp(1, FIND_PATHS_MAX_LIMIT);
6238
6239    // Self-path short-circuit. The recursive CTE below requires depth>=1
6240    // before it can match `target_id`; the trivial chain is just the
6241    // single-element path through the start node.
6242    if source_id == target_id {
6243        return Ok(vec![vec![source_id.to_string()]]);
6244    }
6245
6246    // "Current view" filter — exclude edges whose `valid_until` lies in
6247    // the past (invalidated via `memory_kg_invalidate`). Caller can pass
6248    // `include_invalidated=true` to traverse the full historical link
6249    // graph. NHI-P3-T7 regression: prior versions enumerated paths
6250    // through invalidated edges by default.
6251    let invalidated_filter = if include_invalidated {
6252        ""
6253    } else {
6254        " WHERE (valid_until IS NULL OR valid_until > strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))"
6255    };
6256
6257    // The CTE walks symmetric edges: for each row in `memory_links` we
6258    // also generate its reverse so the traversal is undirected. Cycle
6259    // detection uses the JSON-encoded path array (same trick as
6260    // `kg_query`) — `NOT EXISTS (... json_each ...)` short-circuits the
6261    // recursion as soon as the next hop would revisit a node already in
6262    // the prefix.
6263    //
6264    // The completed-path filter sits in the outer SELECT rather than
6265    // the recursive member because a partial prefix that lands on
6266    // `target_id` should be reported AND continue to extend (a longer
6267    // path through `target_id` might reach itself through a different
6268    // route — though for the KG that should be rare, the CTE doesn't
6269    // need to know that). `ORDER BY depth, path` keeps the shortest
6270    // paths first so the `LIMIT` cap drops the longest tail.
6271    let sql = format!(
6272        "WITH RECURSIVE traversal(current_id, depth, path) AS (
6273            SELECT ?1, 0, json_array(?1)
6274            UNION ALL
6275            SELECT next_id, t.depth + 1,
6276                   json_insert(t.path, '$[' || json_array_length(t.path) || ']', next_id)
6277            FROM traversal t
6278            JOIN (
6279                SELECT source_id AS from_id, target_id AS next_id
6280                FROM memory_links{invalidated_filter}
6281                UNION
6282                SELECT target_id AS from_id, source_id AS next_id
6283                FROM memory_links{invalidated_filter}
6284            ) edges ON edges.from_id = t.current_id
6285            WHERE t.depth < ?3
6286              AND NOT EXISTS (
6287                  SELECT 1 FROM json_each(t.path) WHERE value = next_id
6288              )
6289         )
6290         SELECT path
6291         FROM traversal
6292         WHERE current_id = ?2 AND depth >= 1
6293         ORDER BY depth ASC, path ASC
6294         LIMIT ?4"
6295    );
6296
6297    let depth_i64 = i64::try_from(depth).unwrap_or(i64::MAX);
6298    let cap_i64 = i64::try_from(cap).unwrap_or(i64::MAX);
6299
6300    let mut stmt = conn.prepare(&sql)?;
6301    let rows = stmt.query_map(params![source_id, target_id, depth_i64, cap_i64], |row| {
6302        let json_path: String = row.get(0)?;
6303        Ok(json_path)
6304    })?;
6305
6306    let mut paths: Vec<Vec<String>> = Vec::new();
6307    for row in rows {
6308        let json = row?;
6309        let parsed: Vec<String> = serde_json::from_str(&json).map_err(|e| {
6310            rusqlite::Error::FromSqlConversionFailure(0, rusqlite::types::Type::Text, Box::new(e))
6311        })?;
6312        paths.push(parsed);
6313    }
6314
6315    Ok(paths)
6316}
6317
6318/// List all aliases registered for an entity, ordered by registration
6319/// time then alphabetical for stable display.
6320fn list_entity_aliases(conn: &Connection, entity_id: &str) -> Result<Vec<String>> {
6321    let mut stmt = conn.prepare(
6322        "SELECT alias FROM entity_aliases
6323         WHERE entity_id = ?1
6324         ORDER BY created_at ASC, alias ASC",
6325    )?;
6326    let aliases: Vec<String> = stmt
6327        .query_map(params![entity_id], |r| r.get::<_, String>(0))?
6328        .collect::<rusqlite::Result<Vec<_>>>()?;
6329    Ok(aliases)
6330}
6331
6332/// Register or refresh an agent in the reserved `_agents` namespace.
6333///
6334/// Each agent is stored as a long-tier memory with `title = "agent:<agent_id>"`.
6335/// Duplicate registration for the same `agent_id` refreshes `last_seen_at` and
6336/// overwrites `agent_type` + `capabilities`, while preserving the original
6337/// `registered_at` timestamp (caller-observable provenance).
6338///
6339/// Returns the stored memory ID.
6340pub fn register_agent(
6341    conn: &Connection,
6342    agent_id: &str,
6343    agent_type: &str,
6344    capabilities: &[String],
6345) -> Result<String> {
6346    let title = crate::models::agent_registration_title(agent_id);
6347    let now = Utc::now().to_rfc3339();
6348
6349    // Preserve original registered_at across re-registration.
6350    let registered_at = conn
6351        .query_row(
6352            "SELECT json_extract(metadata, '$.registered_at') FROM memories
6353             WHERE namespace = ?1 AND title = ?2",
6354            params![AGENTS_NAMESPACE, &title],
6355            |row| row.get::<_, Option<String>>(0),
6356        )
6357        .ok()
6358        .flatten()
6359        .unwrap_or_else(|| now.clone());
6360
6361    let caps_json: Vec<serde_json::Value> = capabilities
6362        .iter()
6363        .map(|c| serde_json::Value::String(c.clone()))
6364        .collect();
6365
6366    let metadata = serde_json::json!({
6367        "agent_id": agent_id,
6368        (field_names::AGENT_TYPE): agent_type,
6369        (field_names::CAPABILITIES): caps_json,
6370        (field_names::REGISTERED_AT): registered_at,
6371        (field_names::LAST_SEEN_AT): now,
6372        // #910 (SAL-level enforcement) — agent-registration rows live
6373        // in the `_agents` namespace and are a public roster: every
6374        // agent has a legitimate need to know which other agents are
6375        // registered (consensus voting, peer attestation, etc.). Stamp
6376        // scope=collective so the SAL visibility filter doesn't drop
6377        // them on cross-agent reads.
6378        "scope": crate::models::MemoryScope::Collective.as_str(),
6379    });
6380
6381    let content = serde_json::to_string(&metadata)
6382        .context("failed to serialize agent registration content")?;
6383
6384    let mem = Memory {
6385        id: uuid::Uuid::new_v4().to_string(),
6386        tier: Tier::Long,
6387        namespace: AGENTS_NAMESPACE.to_string(),
6388        title,
6389        content,
6390        tags: vec!["agent-registration".to_string()],
6391        priority: 5,
6392        confidence: 1.0,
6393        source: "system".to_string(),
6394        access_count: 0,
6395        created_at: now.clone(),
6396        updated_at: now,
6397        last_accessed_at: None,
6398        expires_at: None,
6399        metadata,
6400        reflection_depth: 0,
6401        memory_kind: crate::models::MemoryKind::Observation,
6402        entity_id: None,
6403        persona_version: None,
6404        citations: Vec::new(),
6405        source_uri: None,
6406        source_span: None,
6407        confidence_source: ConfidenceSource::CallerProvided,
6408        confidence_signals: None,
6409        confidence_decayed_at: None,
6410        version: 1,
6411    };
6412
6413    insert(conn, &mem)
6414}
6415
6416/// List every registered agent. Rows are drawn from the `_agents` namespace
6417/// and parsed out of each memory's metadata.
6418pub fn list_agents(conn: &Connection) -> Result<Vec<AgentRegistration>> {
6419    let now = Utc::now().to_rfc3339();
6420    let mut stmt = conn.prepare(
6421        "SELECT metadata FROM memories
6422         WHERE namespace = ?1
6423           AND (expires_at IS NULL OR expires_at > ?2)
6424         ORDER BY json_extract(metadata, '$.registered_at') ASC",
6425    )?;
6426    let rows = stmt.query_map(params![AGENTS_NAMESPACE, now], |row| {
6427        row.get::<_, String>(0)
6428    })?;
6429
6430    let mut agents = Vec::new();
6431    for r in rows {
6432        let raw = r?;
6433        let meta: serde_json::Value =
6434            serde_json::from_str(&raw).context("failed to parse agent metadata as JSON")?;
6435        let agent_id = meta
6436            .get("agent_id")
6437            .and_then(serde_json::Value::as_str)
6438            .unwrap_or_default()
6439            .to_string();
6440        let agent_type = meta
6441            .get(field_names::AGENT_TYPE)
6442            .and_then(serde_json::Value::as_str)
6443            .unwrap_or_default()
6444            .to_string();
6445        let capabilities: Vec<String> = meta
6446            .get(field_names::CAPABILITIES)
6447            .and_then(serde_json::Value::as_array)
6448            .map(|arr| {
6449                arr.iter()
6450                    .filter_map(|v| v.as_str().map(String::from))
6451                    .collect()
6452            })
6453            .unwrap_or_default();
6454        let registered_at = meta
6455            .get(field_names::REGISTERED_AT)
6456            .and_then(serde_json::Value::as_str)
6457            .unwrap_or_default()
6458            .to_string();
6459        let last_seen_at = meta
6460            .get(field_names::LAST_SEEN_AT)
6461            .and_then(serde_json::Value::as_str)
6462            .unwrap_or_default()
6463            .to_string();
6464        agents.push(AgentRegistration {
6465            agent_id,
6466            agent_type,
6467            capabilities,
6468            registered_at,
6469            last_seen_at,
6470        });
6471    }
6472    Ok(agents)
6473}
6474
6475/// Bind (or rotate) an agent's Ed25519 public key into its `_agents`
6476/// registration row metadata (#626 Layer-3, Task 1.3 / C3).
6477///
6478/// The pubkey is the anchor the write-path attestation gate verifies
6479/// against: a signed write claiming `agent_id` is upgraded from *claimed*
6480/// to *attested* only when its signature verifies under the key bound
6481/// here. Stored under `metadata.agent_pubkey` (URL-safe-no-pad base64)
6482/// alongside a `pubkey_bound_at` RFC3339 timestamp for rotation
6483/// provenance.
6484///
6485/// Migration-free: the key rides in the existing registration row's
6486/// JSON metadata (no schema bump). `json_set` updates `metadata` and the
6487/// mirrored `content` column atomically so `list_agents` / the verifier
6488/// observe a consistent row.
6489///
6490/// The agent MUST already be registered (`register_agent`) — binding a
6491/// key to an unregistered id is rejected so a stray pubkey can never
6492/// shadow a future legitimate registration. Re-binding overwrites the
6493/// previous key (key rotation / revoke-then-rebind).
6494///
6495/// # Errors
6496///
6497/// - the agent is not registered (no `_agents` row for `agent_id`)
6498/// - the underlying `UPDATE` fails
6499pub fn bind_agent_pubkey(conn: &Connection, agent_id: &str, pubkey_b64: &str) -> Result<()> {
6500    let title = crate::models::agent_registration_title(agent_id);
6501    let now = Utc::now().to_rfc3339();
6502    let affected = conn.execute(
6503        "UPDATE memories SET
6504            metadata = json_set(metadata, '$.agent_pubkey', ?3, '$.pubkey_bound_at', ?4),
6505            content  = json_set(content,  '$.agent_pubkey', ?3, '$.pubkey_bound_at', ?4),
6506            updated_at = ?4
6507         WHERE namespace = ?1 AND title = ?2",
6508        params![AGENTS_NAMESPACE, &title, pubkey_b64, &now],
6509    )?;
6510    if affected == 0 {
6511        anyhow::bail!(
6512            "cannot bind pubkey: agent '{agent_id}' is not registered (register it first)"
6513        );
6514    }
6515    Ok(())
6516}
6517
6518/// Fetch the Ed25519 public key bound to `agent_id`, if any (#626
6519/// Layer-3, Task 1.3 / C3).
6520///
6521/// Returns `Ok(None)` when the agent is registered but has no bound key
6522/// (the permissive-default attestation posture: such an agent can still
6523/// write *claimed* rows), and also when the agent is not registered at
6524/// all — both collapse to "no key to verify against". The verifier
6525/// distinguishes the two only when `AI_MEMORY_REQUIRE_AGENT_ATTESTATION`
6526/// is set, where a missing key on a required write is a hard reject.
6527///
6528/// # Errors
6529///
6530/// Surfaces only underlying query failures.
6531pub fn agent_pubkey(conn: &Connection, agent_id: &str) -> Result<Option<String>> {
6532    let title = crate::models::agent_registration_title(agent_id);
6533    let pubkey = conn
6534        .query_row(
6535            "SELECT json_extract(metadata, '$.agent_pubkey') FROM memories
6536             WHERE namespace = ?1 AND title = ?2",
6537            params![AGENTS_NAMESPACE, &title],
6538            |row| row.get::<_, Option<String>>(0),
6539        )
6540        .ok()
6541        .flatten();
6542    Ok(pubkey)
6543}
6544
6545/// Clear the Ed25519 public key bound to `agent_id` (#626 Layer-3,
6546/// Task 1.3 / C5 — key revocation).
6547///
6548/// Removes the `agent_pubkey` + `pubkey_bound_at` keys from both the
6549/// metadata and the mirrored `content` JSON, stamping a
6550/// `pubkey_revoked_at` marker so the revocation is auditable. After
6551/// revocation the agent reverts to the permissive *claimed* posture
6552/// (no key to verify against) until a fresh key is bound.
6553///
6554/// Idempotent: revoking an agent with no bound key still succeeds (the
6555/// `json_remove` is a no-op) as long as the agent is registered.
6556///
6557/// # Errors
6558///
6559/// - the agent is not registered (no `_agents` row for `agent_id`)
6560/// - the underlying `UPDATE` fails
6561pub fn revoke_agent_pubkey(conn: &Connection, agent_id: &str) -> Result<()> {
6562    let title = crate::models::agent_registration_title(agent_id);
6563    let now = Utc::now().to_rfc3339();
6564    let affected = conn.execute(
6565        "UPDATE memories SET
6566            metadata = json_set(
6567                json_remove(metadata, '$.agent_pubkey', '$.pubkey_bound_at'),
6568                '$.pubkey_revoked_at', ?3),
6569            content  = json_set(
6570                json_remove(content,  '$.agent_pubkey', '$.pubkey_bound_at'),
6571                '$.pubkey_revoked_at', ?3),
6572            updated_at = ?3
6573         WHERE namespace = ?1 AND title = ?2",
6574        params![AGENTS_NAMESPACE, &title, &now],
6575    )?;
6576    if affected == 0 {
6577        anyhow::bail!(
6578            "cannot revoke pubkey: agent '{agent_id}' is not registered (register it first)"
6579        );
6580    }
6581    Ok(())
6582}
6583
6584pub fn stats(conn: &Connection, db_path: &Path) -> Result<Stats> {
6585    let total: usize = conn.query_row("SELECT COUNT(*) FROM memories", [], |r| r.get(0))?;
6586
6587    let mut stmt =
6588        conn.prepare("SELECT tier, COUNT(*) FROM memories GROUP BY tier ORDER BY COUNT(*) DESC")?;
6589    let by_tier = stmt
6590        .query_map([], |row| {
6591            Ok(TierCount {
6592                tier: row.get(0)?,
6593                count: row.get(1)?,
6594            })
6595        })?
6596        .collect::<rusqlite::Result<Vec<_>>>()?;
6597
6598    let mut stmt = conn.prepare(
6599        "SELECT namespace, COUNT(*) FROM memories GROUP BY namespace ORDER BY COUNT(*) DESC",
6600    )?;
6601    let by_namespace = stmt
6602        .query_map([], |row| {
6603            Ok(NamespaceCount {
6604                namespace: row.get(0)?,
6605                count: row.get(1)?,
6606            })
6607        })?
6608        .collect::<rusqlite::Result<Vec<_>>>()?;
6609
6610    let now = Utc::now().to_rfc3339();
6611    let one_hour = (Utc::now() + chrono::Duration::hours(1)).to_rfc3339();
6612    let expiring_soon: usize = conn.query_row(
6613        "SELECT COUNT(*) FROM memories WHERE expires_at IS NOT NULL AND expires_at > ?1 AND expires_at <= ?2",
6614        params![now, one_hour], |r| r.get(0),
6615    )?;
6616
6617    let links_count: usize = conn
6618        .query_row("SELECT COUNT(*) FROM memory_links", [], |r| r.get(0))
6619        .unwrap_or(0);
6620    let db_size_bytes = std::fs::metadata(db_path).map_or(0, |m| m.len());
6621    // v0.6.3.1 P2 (G4) — surface mixed-dim corruption to operators. Best-effort:
6622    // any error here returns 0 rather than failing the stats endpoint.
6623    let dim_violations = dim_violations(conn).unwrap_or(0);
6624
6625    // v0.6.3.1 (P3, G2): cumulative HNSW eviction count is process-local
6626    // state — read from the static counter in src/hnsw.rs. Surfacing it in
6627    // `stats` lets `memory_stats` callers and `ai-memory doctor` (P7) flag
6628    // operators who are sustaining at the index cap.
6629    let index_evictions_total = crate::hnsw::index_evictions_total();
6630
6631    Ok(Stats {
6632        total,
6633        by_tier,
6634        by_namespace,
6635        expiring_soon,
6636        links_count,
6637        db_size_bytes,
6638        dim_violations,
6639        index_evictions_total,
6640    })
6641}
6642
6643/// Run GC if there are any expired memories. Lightweight check first.
6644pub fn gc_if_needed(conn: &Connection, archive: bool) -> Result<usize> {
6645    let now = Utc::now().to_rfc3339();
6646    let has_expired: bool = conn
6647        .query_row(
6648            "SELECT EXISTS(SELECT 1 FROM memories WHERE expires_at IS NOT NULL AND expires_at < ?1)",
6649            params![now],
6650            |r| r.get(0),
6651        )
6652        .unwrap_or(false);
6653    if has_expired {
6654        gc(conn, archive)
6655    } else {
6656        Ok(0)
6657    }
6658}
6659
6660/// Purge old archives if `archive_max_days` is configured.
6661pub fn auto_purge_archive(conn: &Connection, max_days: Option<i64>) -> Result<usize> {
6662    match max_days {
6663        Some(days) if days > 0 => purge_archive(conn, Some(days)),
6664        _ => Ok(0),
6665    }
6666}
6667
6668/// #1579 B6 (F5.7) — expired rows reaped per GC transaction.
6669///
6670/// The pre-fix `gc` ran ONE `BEGIN IMMEDIATE` covering an archive
6671/// `INSERT … SELECT` + `DELETE` over the entire expired set, holding
6672/// the sqlite write lock for the whole sweep (seconds on a 100k-row
6673/// expiry backlog, during which every concurrent writer queues behind
6674/// `busy_timeout`). Chunking bounds the lock-hold per transaction to
6675/// this many rows; the loop in [`gc`] re-runs until the backlog drains.
6676/// 500 keeps each archive-copy + delete transaction in the
6677/// single-digit-millisecond band on the P1 audit corpus while still
6678/// amortising the per-transaction fsync across a useful batch.
6679const GC_CHUNK_ROWS: usize = 500;
6680
6681/// Subquery selecting one bounded chunk of expired row ids. Shared by
6682/// the archive `INSERT … SELECT` and the `DELETE` inside the same
6683/// `BEGIN IMMEDIATE` transaction; `ORDER BY rowid` makes the selection
6684/// fully deterministic, so both statements — which run against the
6685/// identical snapshot because the transaction holds the write lock —
6686/// target the exact same rows and the archive-before-delete invariant
6687/// is preserved chunk by chunk.
6688const SQL_GC_EXPIRED_CHUNK_IDS: &str = "SELECT id FROM memories \
6689     WHERE expires_at IS NOT NULL AND expires_at < ?1 \
6690     ORDER BY rowid LIMIT ?2";
6691
6692pub fn gc(conn: &Connection, archive: bool) -> Result<usize> {
6693    let now = Utc::now().to_rfc3339();
6694    // #1579 B6 (F5.7) — bounded-lock-hold chunked sweep. Each loop
6695    // iteration archives + deletes at most GC_CHUNK_ROWS expired rows
6696    // inside its own BEGIN IMMEDIATE transaction, so concurrent
6697    // writers interleave between chunks instead of stalling behind one
6698    // giant sweep transaction. Archive semantics are preserved: within
6699    // a chunk the archive INSERT and the DELETE address the same
6700    // deterministic id set (see SQL_GC_EXPIRED_CHUNK_IDS), and a
6701    // failure rolls back only the in-flight chunk (already-committed
6702    // chunks remain reaped — same observable contract as repeated
6703    // smaller gc calls).
6704    let mut total = 0usize;
6705    loop {
6706        conn.execute_batch(connection::SQL_BEGIN_IMMEDIATE)?;
6707        let result = (|| -> Result<usize> {
6708            if archive {
6709                // v0.6.3.1 P2 (G5) — preserve embedding + tier + expiry on GC archive.
6710                let mut archive_stmt = conn.prepare_cached(&format!(
6711                    "INSERT OR REPLACE INTO archived_memories
6712                     (id, tier, namespace, title, content, tags, priority, confidence,
6713                      source, access_count, created_at, updated_at, last_accessed_at,
6714                      expires_at, archived_at, archive_reason, metadata,
6715                      embedding, embedding_dim, original_tier, original_expires_at,
6716                      reflection_depth, atomised_into, atom_of, memory_kind,
6717                      entity_id, persona_version, citations, source_uri, source_span,
6718                      confidence_source, confidence_signals, confidence_decayed_at,
6719                      mentioned_entity_id, version)
6720                     SELECT id, tier, namespace, title, content, tags, priority, confidence,
6721                            source, access_count, created_at, updated_at, last_accessed_at,
6722                            expires_at, ?1, 'ttl_expired', metadata,
6723                            embedding, embedding_dim, tier, expires_at,
6724                            reflection_depth, atomised_into, atom_of, memory_kind,
6725                            entity_id, persona_version, citations, source_uri, source_span,
6726                            confidence_source, confidence_signals, confidence_decayed_at,
6727                            mentioned_entity_id, version
6728                     FROM memories
6729                     WHERE id IN ({SQL_GC_EXPIRED_CHUNK_IDS})"
6730                ))?;
6731                archive_stmt.execute(params![now, GC_CHUNK_ROWS])?;
6732            }
6733            let mut delete_stmt = conn.prepare_cached(&format!(
6734                "DELETE FROM memories WHERE id IN ({SQL_GC_EXPIRED_CHUNK_IDS})"
6735            ))?;
6736            let deleted = delete_stmt.execute(params![now, GC_CHUNK_ROWS])?;
6737            Ok(deleted)
6738        })();
6739        match result {
6740            Ok(n) => {
6741                conn.execute_batch(connection::SQL_COMMIT)?;
6742                total += n;
6743                if n < GC_CHUNK_ROWS {
6744                    break;
6745                }
6746            }
6747            Err(e) => {
6748                let _ = conn.execute_batch(connection::SQL_ROLLBACK);
6749                return Err(e);
6750            }
6751        }
6752    }
6753    // Clean up namespace_meta rows pointing to deleted memories.
6754    // #1579 B6 — correlated NOT EXISTS instead of the former
6755    // `standard_id NOT IN (SELECT id FROM memories)`, which
6756    // materialised the full id set on every sweep; the rewrite is one
6757    // primary-key probe per namespace_meta row (a small table — one
6758    // row per namespace standard).
6759    let _ = conn.execute(
6760        "DELETE FROM namespace_meta WHERE NOT EXISTS \
6761         (SELECT 1 FROM memories WHERE memories.id = namespace_meta.standard_id)",
6762        [],
6763    );
6764    Ok(total)
6765}
6766
6767// ---------------------------------------------------------------------------
6768// Archive operations
6769// ---------------------------------------------------------------------------
6770
6771pub fn list_archived(
6772    conn: &Connection,
6773    namespace: Option<&str>,
6774    limit: usize,
6775    offset: usize,
6776) -> Result<Vec<serde_json::Value>> {
6777    let (sql, params_vec): (String, Vec<Box<dyn rusqlite::types::ToSql>>) = match namespace {
6778        Some(ns) => (
6779            "SELECT id, tier, namespace, title, content, tags, priority, confidence, \
6780             source, access_count, created_at, updated_at, last_accessed_at, \
6781             expires_at, archived_at, archive_reason, metadata, \
6782             reflection_depth, memory_kind, entity_id, persona_version, \
6783             citations, source_uri, source_span, confidence_source, \
6784             confidence_signals, confidence_decayed_at, version, \
6785             atomised_into, atom_of, mentioned_entity_id \
6786             FROM archived_memories WHERE namespace = ?1 \
6787             ORDER BY archived_at DESC LIMIT ?2 OFFSET ?3"
6788                .to_string(),
6789            vec![Box::new(ns.to_string()), Box::new(limit), Box::new(offset)],
6790        ),
6791        None => (
6792            "SELECT id, tier, namespace, title, content, tags, priority, confidence, \
6793             source, access_count, created_at, updated_at, last_accessed_at, \
6794             expires_at, archived_at, archive_reason, metadata, \
6795             reflection_depth, memory_kind, entity_id, persona_version, \
6796             citations, source_uri, source_span, confidence_source, \
6797             confidence_signals, confidence_decayed_at, version, \
6798             atomised_into, atom_of, mentioned_entity_id \
6799             FROM archived_memories \
6800             ORDER BY archived_at DESC LIMIT ?1 OFFSET ?2"
6801                .to_string(),
6802            vec![Box::new(limit), Box::new(offset)],
6803        ),
6804    };
6805    let params_refs: Vec<&dyn rusqlite::types::ToSql> =
6806        params_vec.iter().map(std::convert::AsRef::as_ref).collect();
6807    let mut stmt = conn.prepare(&sql)?;
6808    let rows = stmt.query_map(params_refs.as_slice(), |row| {
6809        // v0.7.0 issue #861 — `metadata` is stored as a JSON TEXT blob
6810        // in the column. Falling back to `{}` only covers a NULL/empty
6811        // read; the surrounding column projection then re-encodes it
6812        // structured so callers see a real JSON object instead of an
6813        // escaped string. Coupled with the forget-path archive INSERTs
6814        // around lines 1268 / 1289 above (now SELECTing `metadata` so
6815        // the column actually carries the source row's metadata), this
6816        // restores the round-trip `agent_id` / `imported_from_*` /
6817        // `consolidated_from_agents` keys callers rely on for
6818        // attribution + restore.
6819        let metadata_str = row
6820            .get::<_, String>(16)
6821            .unwrap_or_else(|_| "{}".to_string());
6822        let metadata: serde_json::Value =
6823            serde_json::from_str(&metadata_str).unwrap_or_else(|_| serde_json::json!({}));
6824        // v0.7.0 issue #861 — `tags` is stored as a JSON-encoded array
6825        // TEXT (`'["a","b"]'`) by every write path. Returning the raw
6826        // String forced callers to either double-parse or accept a
6827        // string where they expected a JSON array. Parse here so the
6828        // response matches the live-row shape (`memory_get`) and the
6829        // contract tests in `tests/archive_serialization.rs`. NULL /
6830        // malformed columns fall through to an empty array — the
6831        // archive table's CHECK constraint makes the malformed case a
6832        // never-in-practice path, but the fall-through keeps the read
6833        // contract noisy-input-clean rather than panic-on-corruption.
6834        let tags_str = row.get::<_, String>(5).unwrap_or_else(|_| "[]".to_string());
6835        let tags: serde_json::Value =
6836            serde_json::from_str(&tags_str).unwrap_or_else(|_| serde_json::json!([]));
6837        Ok(serde_json::json!({
6838            "id": row.get::<_, String>(0)?,
6839            "tier": row.get::<_, String>(1)?,
6840            "namespace": row.get::<_, String>(2)?,
6841            "title": row.get::<_, String>(3)?,
6842            "content": row.get::<_, String>(4)?,
6843            "tags": tags,
6844            "priority": row.get::<_, i32>(6)?,
6845            (field_names::CONFIDENCE): row.get::<_, f64>(7)?,
6846            "source": row.get::<_, String>(8)?,
6847            (field_names::ACCESS_COUNT): row.get::<_, i64>(9)?,
6848            (field_names::CREATED_AT): row.get::<_, String>(10)?,
6849            (field_names::UPDATED_AT): row.get::<_, String>(11)?,
6850            (field_names::LAST_ACCESSED_AT): row.get::<_, Option<String>>(12)?,
6851            (field_names::EXPIRES_AT): row.get::<_, Option<String>>(13)?,
6852            (field_names::ARCHIVED_AT): row.get::<_, String>(14)?,
6853            (field_names::ARCHIVE_REASON): row.get::<_, String>(15)?,
6854            "metadata": metadata,
6855            // #1637 — the v49 columns (in the table since #1025; restore
6856            // was lossless but the LISTING surface projected only the 17
6857            // legacy columns, so archived v0.7.0 fields were invisible
6858            // to memory_archive_list). Additive keys; JSON-ish columns
6859            // parse to structured like tags/metadata above.
6860            (field_names::REFLECTION_DEPTH): row.get::<_, Option<i64>>(17)?.unwrap_or(0),
6861            (field_names::MEMORY_KIND): row.get::<_, Option<String>>(18)?,
6862            "entity_id": row.get::<_, Option<String>>(19)?,
6863            (field_names::PERSONA_VERSION): row.get::<_, Option<i64>>(20)?,
6864            "citations": row
6865                .get::<_, Option<String>>(21)?
6866                .and_then(|c| serde_json::from_str::<serde_json::Value>(&c).ok())
6867                .unwrap_or_else(|| serde_json::json!([])),
6868            (field_names::SOURCE_URI): row.get::<_, Option<String>>(22)?,
6869            (field_names::SOURCE_SPAN): row
6870                .get::<_, Option<String>>(23)?
6871                .and_then(|c| serde_json::from_str::<serde_json::Value>(&c).ok()),
6872            (field_names::CONFIDENCE_SOURCE): row.get::<_, Option<String>>(24)?,
6873            (field_names::CONFIDENCE_SIGNALS): row
6874                .get::<_, Option<String>>(25)?
6875                .and_then(|c| serde_json::from_str::<serde_json::Value>(&c).ok()),
6876            (field_names::CONFIDENCE_DECAYED_AT): row.get::<_, Option<String>>(26)?,
6877            "version": row.get::<_, Option<i64>>(27)?.unwrap_or(1),
6878            (field_names::ATOMISED_INTO): row.get::<_, Option<i64>>(28)?,
6879            (field_names::ATOM_OF): row.get::<_, Option<String>>(29)?,
6880            (field_names::MENTIONED_ENTITY_ID): row.get::<_, Option<String>>(30)?,
6881        }))
6882    })?;
6883    rows.collect::<rusqlite::Result<Vec<_>>>()
6884        .map_err(Into::into)
6885}
6886
6887pub fn restore_archived(conn: &Connection, id: &str) -> Result<bool> {
6888    let now = Utc::now().to_rfc3339();
6889    conn.execute_batch(connection::SQL_BEGIN_IMMEDIATE)?;
6890    let result = (|| -> Result<bool> {
6891        let exists: bool = conn
6892            .query_row(
6893                "SELECT COUNT(*) > 0 FROM archived_memories WHERE id = ?1",
6894                params![id],
6895                |r| r.get(0),
6896            )
6897            .unwrap_or(false);
6898        if !exists {
6899            return Ok(false);
6900        }
6901        // Check if ID already exists in active memories to prevent silent overwrite
6902        let active_exists: bool = conn
6903            .query_row(SQL_MEMORY_EXISTS_COUNT, params![id], |r| r.get(0))
6904            .unwrap_or(false);
6905        if active_exists {
6906            // #962 typed envelope — ArchiveRestoreCollision (409).
6907            return Err(anyhow::Error::new(StorageError::ArchiveRestoreCollision {
6908                id: id.to_string(),
6909            }));
6910        }
6911        // Validate archived metadata before restoring
6912        let archived_metadata: String = conn
6913            .query_row(
6914                "SELECT metadata FROM archived_memories WHERE id = ?1",
6915                params![id],
6916                |r| r.get(0),
6917            )
6918            .unwrap_or_else(|_| "{}".to_string());
6919        let meta_value: serde_json::Value =
6920            serde_json::from_str(&archived_metadata).unwrap_or_else(|_| serde_json::json!({}));
6921        if let Err(e) = crate::validate::validate_metadata(&meta_value) {
6922            tracing::warn!("archived memory {id} has invalid metadata, resetting to {{}}: {e}");
6923            conn.execute(
6924                "UPDATE archived_memories SET metadata = '{}' WHERE id = ?1",
6925                params![id],
6926            )?;
6927        }
6928        // FX-C5 — substrate governance pre-write hook parity. Restoring
6929        // an archived row mints a fresh live row via a raw INSERT...SELECT
6930        // that bypasses the `db::insert(..)` tail (which is where the
6931        // SQLite path normally consults `GOVERNANCE_PRE_WRITE`). Without
6932        // this call, an operator's signed governance rule could be
6933        // bypassed by restoring a row whose `(title, namespace)` would
6934        // otherwise be refused on a direct write. Load the archived row
6935        // shaped as a `Memory` and fire the hook BEFORE the INSERT;
6936        // a refusal short-circuits the transaction (outer ROLLBACK).
6937        let candidate = load_archived_as_memory(conn, id)?;
6938        consult_governance_pre_write(&candidate)?;
6939
6940        // v0.6.3.1 P2 (G5) — preserve original tier + expires_at + embedding
6941        // on restore. Pre-v17 rows lost this metadata permanently; the
6942        // migration backfills `original_tier='long'` so they still restore
6943        // as permanent (the prior behavior — no regression for legacy data).
6944        // Live writes from v0.6.3.1 onward round-trip the original tier.
6945        // #1025 (CRITICAL, 2026-05-21) — full v0.7.0 column carry on
6946        // archive→restore. Pre-#1025 the SELECT pulled only 17 columns;
6947        // restored row landed with reflection_depth=0 (DEFAULT),
6948        // memory_kind='observation' (DEFAULT), citations=[] (DEFAULT),
6949        // version=1 (DEFAULT) — silent loss of Form-4/5 provenance.
6950        // COALESCE handles legacy already-archived rows where the
6951        // v49-added columns are NULL.
6952        conn.execute(
6953            "INSERT INTO memories
6954             (id, tier, namespace, title, content, tags, priority, confidence,
6955              source, access_count, created_at, updated_at, last_accessed_at,
6956              expires_at, metadata, embedding, embedding_dim,
6957              reflection_depth, atomised_into, atom_of, memory_kind,
6958              entity_id, persona_version, citations, source_uri, source_span,
6959              confidence_source, confidence_signals, confidence_decayed_at,
6960              mentioned_entity_id, version)
6961             SELECT id, COALESCE(original_tier, 'long'), namespace, title, content,
6962                    tags, priority, confidence, source, access_count, created_at,
6963                    ?1, last_accessed_at, original_expires_at, metadata,
6964                    embedding, embedding_dim,
6965                    COALESCE(reflection_depth, 0),
6966                    atomised_into,
6967                    atom_of,
6968                    COALESCE(memory_kind, 'observation'),
6969                    entity_id, persona_version,
6970                    COALESCE(citations, '[]'),
6971                    source_uri, source_span,
6972                    COALESCE(confidence_source, 'caller_provided'),
6973                    confidence_signals, confidence_decayed_at,
6974                    mentioned_entity_id,
6975                    COALESCE(version, 1)
6976             FROM archived_memories WHERE id = ?2",
6977            params![now, id],
6978        )?;
6979        conn.execute("DELETE FROM archived_memories WHERE id = ?1", params![id])?;
6980        Ok(true)
6981    })();
6982    match result {
6983        Ok(v) => {
6984            conn.execute_batch(connection::SQL_COMMIT)?;
6985            Ok(v)
6986        }
6987        Err(e) => {
6988            let _ = conn.execute_batch(connection::SQL_ROLLBACK);
6989            Err(e)
6990        }
6991    }
6992}
6993
6994/// #940 (security-high, 2026-05-20) — caller-scoped restore variant.
6995/// Mirrors [`restore_archived`] but constrains the INSERT-SELECT to
6996/// rows whose `metadata->'agent_id'` JSON field matches `caller`
6997/// (with the inbox-target carve-out: rows whose
6998/// `metadata->'target_agent_id'` matches `caller` are also
6999/// restorable by the inbox owner, matching the SAL
7000/// [`crate::store::is_visible_to_caller`] visibility predicate).
7001///
7002/// Pre-#940 the only restore variant was owner-blind; any
7003/// authenticated HTTP caller could restore any other owner's
7004/// archived rows back into the live working set via
7005/// `POST /api/v1/archive/{id}/restore`. The postgres SAL branch was
7006/// already QC-P1-fixed (2026-05-20) to pass
7007/// `CallerContext::for_agent(caller)`; the sqlite branch is closed
7008/// by this helper. Returns `Ok(false)` on a non-owner attempt so the
7009/// surface cannot be used to probe other owners' archived ids.
7010pub fn restore_archived_for_caller(conn: &Connection, id: &str, caller: &str) -> Result<bool> {
7011    let now = Utc::now().to_rfc3339();
7012    conn.execute_batch(connection::SQL_BEGIN_IMMEDIATE)?;
7013    let result = (|| -> Result<bool> {
7014        // Owner gate: row must exist AND match the caller (or be an
7015        // inbox-target row whose recipient is the caller, or be a
7016        // legacy unowned row — see archive_memory_for_caller for the
7017        // matching SQL + #940 carve-out rationale).
7018        let owned: bool = conn
7019            .query_row(
7020                "SELECT COUNT(*) > 0 FROM archived_memories \
7021                 WHERE id = ?1 \
7022                   AND ( \
7023                     json_extract(metadata, '$.agent_id') = ?2 OR \
7024                     json_extract(metadata, '$.target_agent_id') = ?2 OR \
7025                     json_extract(metadata, '$.agent_id') IS NULL OR \
7026                     json_extract(metadata, '$.agent_id') = '' \
7027                   )",
7028                params![id, caller],
7029                |r| r.get(0),
7030            )
7031            .unwrap_or(false);
7032        if !owned {
7033            return Ok(false);
7034        }
7035        // Check if ID already exists in active memories to prevent silent overwrite.
7036        let active_exists: bool = conn
7037            .query_row(SQL_MEMORY_EXISTS_COUNT, params![id], |r| r.get(0))
7038            .unwrap_or(false);
7039        if active_exists {
7040            // #962 typed envelope — ArchiveRestoreCollision (409).
7041            return Err(anyhow::Error::new(StorageError::ArchiveRestoreCollision {
7042                id: id.to_string(),
7043            }));
7044        }
7045        // Validate archived metadata before restoring (mirror restore_archived).
7046        let archived_metadata: String = conn
7047            .query_row(
7048                "SELECT metadata FROM archived_memories WHERE id = ?1",
7049                params![id],
7050                |r| r.get(0),
7051            )
7052            .unwrap_or_else(|_| "{}".to_string());
7053        let meta_value: serde_json::Value =
7054            serde_json::from_str(&archived_metadata).unwrap_or_else(|_| serde_json::json!({}));
7055        if let Err(e) = crate::validate::validate_metadata(&meta_value) {
7056            tracing::warn!("archived memory {id} has invalid metadata, resetting to {{}}: {e}");
7057            conn.execute(
7058                "UPDATE archived_memories SET metadata = '{}' WHERE id = ?1",
7059                params![id],
7060            )?;
7061        }
7062        // FX-C5 — substrate governance pre-write hook parity. See the
7063        // matching block in `restore_archived` above for rationale.
7064        // Caller-scoped variant uses the same hook contract — the
7065        // hook is owner-agnostic (it sees the Memory payload, not the
7066        // caller context); ownership gating already happened on the
7067        // SELECT above.
7068        let candidate = load_archived_as_memory(conn, id)?;
7069        consult_governance_pre_write(&candidate)?;
7070        // #1025 (CRITICAL, 2026-05-21) — full v0.7.0 column carry on
7071        // archive→restore. Pre-#1025 the SELECT pulled only 17 columns;
7072        // restored row landed with reflection_depth=0 (DEFAULT),
7073        // memory_kind='observation' (DEFAULT), citations=[] (DEFAULT),
7074        // version=1 (DEFAULT) — silent loss of Form-4/5 provenance.
7075        // COALESCE handles legacy already-archived rows where the
7076        // v49-added columns are NULL.
7077        conn.execute(
7078            "INSERT INTO memories
7079             (id, tier, namespace, title, content, tags, priority, confidence,
7080              source, access_count, created_at, updated_at, last_accessed_at,
7081              expires_at, metadata, embedding, embedding_dim,
7082              reflection_depth, atomised_into, atom_of, memory_kind,
7083              entity_id, persona_version, citations, source_uri, source_span,
7084              confidence_source, confidence_signals, confidence_decayed_at,
7085              mentioned_entity_id, version)
7086             SELECT id, COALESCE(original_tier, 'long'), namespace, title, content,
7087                    tags, priority, confidence, source, access_count, created_at,
7088                    ?1, last_accessed_at, original_expires_at, metadata,
7089                    embedding, embedding_dim,
7090                    COALESCE(reflection_depth, 0),
7091                    atomised_into,
7092                    atom_of,
7093                    COALESCE(memory_kind, 'observation'),
7094                    entity_id, persona_version,
7095                    COALESCE(citations, '[]'),
7096                    source_uri, source_span,
7097                    COALESCE(confidence_source, 'caller_provided'),
7098                    confidence_signals, confidence_decayed_at,
7099                    mentioned_entity_id,
7100                    COALESCE(version, 1)
7101             FROM archived_memories WHERE id = ?2",
7102            params![now, id],
7103        )?;
7104        conn.execute("DELETE FROM archived_memories WHERE id = ?1", params![id])?;
7105        Ok(true)
7106    })();
7107    match result {
7108        Ok(v) => {
7109            conn.execute_batch(connection::SQL_COMMIT)?;
7110            Ok(v)
7111        }
7112        Err(e) => {
7113            let _ = conn.execute_batch(connection::SQL_ROLLBACK);
7114            Err(e)
7115        }
7116    }
7117}
7118
7119/// FX-C5 — load a row from `archived_memories` shaped as a [`Memory`]
7120/// so the substrate `GOVERNANCE_PRE_WRITE` hook can inspect the
7121/// restore candidate BEFORE the live INSERT lands. The archived
7122/// table shares the v0.7.0 column shape with `memories` (#1025) so
7123/// the same `row_to_memory` helper applies; columns absent on legacy
7124/// pre-#1025 archived rows fall through to the same defaults
7125/// `row_to_memory` already applies. The `original_tier` column wins
7126/// over the archive-time `tier` so the candidate hook sees the row
7127/// at the tier it will land at post-restore (matches the SQL the
7128/// caller is about to execute).
7129fn load_archived_as_memory(conn: &Connection, id: &str) -> Result<Memory> {
7130    let mut stmt = conn.prepare(
7131        "SELECT id, COALESCE(original_tier, tier) AS tier, namespace, title, content,
7132                tags, priority, confidence, source, access_count, created_at,
7133                updated_at, last_accessed_at,
7134                COALESCE(original_expires_at, expires_at) AS expires_at, metadata,
7135                COALESCE(reflection_depth, 0) AS reflection_depth,
7136                COALESCE(memory_kind, 'observation') AS memory_kind,
7137                entity_id, persona_version,
7138                COALESCE(citations, '[]') AS citations,
7139                source_uri, source_span,
7140                COALESCE(confidence_source, 'caller_provided') AS confidence_source,
7141                confidence_signals, confidence_decayed_at,
7142                COALESCE(version, 1) AS version
7143         FROM archived_memories WHERE id = ?1",
7144    )?;
7145    let mem = stmt.query_row(params![id], row_to_memory)?;
7146    Ok(mem)
7147}
7148
7149pub fn purge_archive(conn: &Connection, older_than_days: Option<i64>) -> Result<usize> {
7150    match older_than_days {
7151        Some(days) if days < 0 => {
7152            // #962 typed envelope.
7153            return Err(anyhow::Error::new(StorageError::InvalidArgument {
7154                reason: crate::errors::msg::older_than_days_negative(days),
7155            }));
7156        }
7157        Some(days) => {
7158            let cutoff = (Utc::now() - chrono::Duration::days(days)).to_rfc3339();
7159            let deleted = conn.execute(
7160                "DELETE FROM archived_memories WHERE archived_at < ?1",
7161                params![cutoff],
7162            )?;
7163            Ok(deleted)
7164        }
7165        None => {
7166            let deleted = conn.execute("DELETE FROM archived_memories", [])?;
7167            Ok(deleted)
7168        }
7169    }
7170}
7171
7172/// #936 (security-critical, 2026-05-20) — caller-scoped purge variant.
7173/// Mirrors [`purge_archive`] but constrains the DELETE to rows whose
7174/// `metadata->'agent_id'` JSON field matches `caller` (with the
7175/// inbox-target carve-out: rows whose `metadata->'target_agent_id'`
7176/// matches `caller` are also purgeable by the inbox owner, matching
7177/// the SAL [`crate::store::is_visible_to_caller`] visibility
7178/// predicate).
7179///
7180/// Pre-#936 the only purge variant was owner-blind; any authenticated
7181/// HTTP caller could destroy every owner's archive corpus via
7182/// `DELETE /api/v1/archive`. The handler at
7183/// `src/handlers/archive.rs::purge_archive` now resolves the caller
7184/// from `X-Agent-Id` and routes through this owner-scoped variant by
7185/// default; the admin/operator path (full owner-blind wipe) is
7186/// reserved for callers whose `agent_id` appears in the
7187/// `[admin].agent_ids` allowlist and is reached via the SAL trait
7188/// path with `CallerContext::bypass_visibility = true`.
7189///
7190/// Returns the count of rows actually deleted; a non-admin call with
7191/// no matching rows returns `Ok(0)` so the caller cannot enumerate
7192/// other owners' archive corpus via this surface.
7193pub fn purge_archive_for_caller(
7194    conn: &Connection,
7195    caller: &str,
7196    older_than_days: Option<i64>,
7197) -> Result<usize> {
7198    match older_than_days {
7199        Some(days) if days < 0 => {
7200            // #962 typed envelope.
7201            return Err(anyhow::Error::new(StorageError::InvalidArgument {
7202                reason: crate::errors::msg::older_than_days_negative(days),
7203            }));
7204        }
7205        Some(days) => {
7206            let cutoff = (Utc::now() - chrono::Duration::days(days)).to_rfc3339();
7207            let deleted = conn.execute(
7208                "DELETE FROM archived_memories \
7209                 WHERE archived_at < ?1 \
7210                   AND ( \
7211                     json_extract(metadata, '$.agent_id') = ?2 OR \
7212                     json_extract(metadata, '$.target_agent_id') = ?2 \
7213                   )",
7214                params![cutoff, caller],
7215            )?;
7216            Ok(deleted)
7217        }
7218        None => {
7219            let deleted = conn.execute(
7220                "DELETE FROM archived_memories \
7221                 WHERE \
7222                   json_extract(metadata, '$.agent_id') = ?1 OR \
7223                   json_extract(metadata, '$.target_agent_id') = ?1",
7224                params![caller],
7225            )?;
7226            Ok(deleted)
7227        }
7228    }
7229}
7230
7231pub fn archive_stats(conn: &Connection) -> Result<serde_json::Value> {
7232    let total: i64 = conn.query_row("SELECT COUNT(*) FROM archived_memories", [], |r| r.get(0))?;
7233    let mut stmt = conn.prepare(
7234        "SELECT namespace, COUNT(*) FROM archived_memories GROUP BY namespace ORDER BY COUNT(*) DESC",
7235    )?;
7236    let by_ns: Vec<serde_json::Value> = stmt
7237        .query_map([], |row| {
7238            Ok(serde_json::json!({
7239                "namespace": row.get::<_, String>(0)?,
7240                "count": row.get::<_, i64>(1)?,
7241            }))
7242        })?
7243        .collect::<rusqlite::Result<Vec<_>>>()?;
7244    Ok(serde_json::json!({
7245        "archived_total": total,
7246        (field_names::BY_NAMESPACE): by_ns,
7247    }))
7248}
7249
7250pub fn export_all(conn: &Connection) -> Result<Vec<Memory>> {
7251    let now = Utc::now().to_rfc3339();
7252    let mut stmt = conn.prepare(
7253        "SELECT * FROM memories WHERE expires_at IS NULL OR expires_at > ?1 ORDER BY created_at ASC",
7254    )?;
7255    let rows = stmt.query_map(params![now], row_to_memory)?;
7256    rows.collect::<rusqlite::Result<Vec<_>>>()
7257        .map_err(Into::into)
7258}
7259
7260pub fn export_links(conn: &Connection) -> Result<Vec<MemoryLink>> {
7261    let now = Utc::now().to_rfc3339();
7262    // v0.7 H3 — also pull the signature blob, the `observed_by` claim,
7263    // and the temporal-validity columns. Federation peers consume these
7264    // through `verify::verify` to gate inbound replication; legacy
7265    // unsigned rows surface NULL for `signature` / `observed_by` and
7266    // the inbound path falls back to `attest_level = "unsigned"`.
7267    let mut stmt = conn.prepare(
7268        "SELECT ml.source_id, ml.target_id, ml.relation, ml.created_at,
7269                ml.signature, ml.observed_by, ml.valid_from, ml.valid_until
7270         FROM memory_links ml
7271         JOIN memories ms ON ms.id = ml.source_id AND (ms.expires_at IS NULL OR ms.expires_at > ?1)
7272         JOIN memories mt ON mt.id = ml.target_id AND (mt.expires_at IS NULL OR mt.expires_at > ?1)",
7273    )?;
7274    let rows = stmt.query_map(params![now], |row| {
7275        let relation_str: String = row.get(2)?;
7276        Ok(MemoryLink {
7277            source_id: row.get(0)?,
7278            target_id: row.get(1)?,
7279            // v0.7.0 fix campaign R1-M4 — see `get_links` for rationale.
7280            relation: crate::models::MemoryLinkRelation::from_str(&relation_str)
7281                .unwrap_or_default(),
7282            created_at: row.get(3)?,
7283            signature: row.get::<_, Option<Vec<u8>>>(4)?,
7284            observed_by: row.get::<_, Option<String>>(5)?,
7285            valid_from: row.get::<_, Option<String>>(6)?,
7286            valid_until: row.get::<_, Option<String>>(7)?,
7287            // v0.7.0 #860 — `export_links` is the federation outbound
7288            // path; the wire shape stays without `attest_level` so
7289            // pre-v0.7 receivers do not see an unknown field. Leaving
7290            // this `None` keeps `skip_serializing_if` from emitting it.
7291            attest_level: None,
7292        })
7293    })?;
7294    rows.collect::<rusqlite::Result<Vec<_>>>()
7295        .map_err(Into::into)
7296}
7297
7298/// Insert with timestamp-aware conflict resolution for sync.
7299/// Only overwrites if the incoming memory is newer (by `updated_at`,
7300/// tiebroken by memory.id for a total order across peers —
7301/// ultrareview #344, #345).
7302///
7303/// Rationale: ISO 8601 / RFC 3339 strings compare lexicographically
7304/// as long as all timestamps carry consistent precision + Z suffix.
7305/// Equal timestamps (common when two nodes edit in the same ms, or
7306/// when NTP aligns clocks) previously produced non-deterministic
7307/// winners per peer, causing permanent mesh divergence. Adding the
7308/// memory.id tiebreaker yields a total order every peer agrees on.
7309pub fn insert_if_newer(conn: &Connection, mem: &Memory) -> Result<String> {
7310    // v0.7.0 L1-6 Deliverable E — substrate governance pre-write
7311    // gate. Federation `sync_push` / catchup-loop peer pushes flow
7312    // through this entry point; treating them identically to direct
7313    // writes is the load-bearing property — an agent that bypasses
7314    // a local rule by routing through a peer would otherwise slip
7315    // past the gate. The hook fires on every newer-wins merge attempt.
7316    consult_governance_pre_write(mem)?;
7317
7318    let tags_json = serde_json::to_string(&mem.tags)?;
7319    let metadata_json = serde_json::to_string(&mem.metadata)?;
7320    // v0.7.0 Form 4 — encode citations + source_span for the schema
7321    // v38 TEXT columns on the federation merge path. The newer-wins
7322    // CASE clauses below pick `excluded.citations` only when the
7323    // incoming row is the winner; otherwise the existing row's
7324    // citations are preserved.
7325    let citations_json = serde_json::to_string(&mem.citations)?;
7326    let source_span_json = match mem.source_span {
7327        Some(span) => Some(serde_json::to_string(&span)?),
7328        None => None,
7329    };
7330    // v0.7.0 Form 5 — encode confidence-provenance fields for the
7331    // schema v39 TEXT columns on the federation merge path. The
7332    // newer-wins CASE clauses pick `excluded.confidence_source` only
7333    // when the incoming row wins the tiebreak; otherwise the local
7334    // row's confidence provenance is preserved.
7335    let confidence_signals_json = match &mem.confidence_signals {
7336        Some(s) => Some(serde_json::to_string(s)?),
7337        None => None,
7338    };
7339    // v0.7.0 polish PERF-8 (#781) — denormalised mention tag for the
7340    // federation `insert_if_newer` merge path. The newer-wins CASE
7341    // clause picks the winner's mentioned_entity_id when the incoming
7342    // row wins the tiebreak; otherwise the local row's tag is preserved
7343    // so a stale peer cannot blank out a value the matcher's index
7344    // depends on.
7345    let mentioned_entity_id = extract_mentioned_entity_id(mem);
7346    // #1579 B6 — federation catch-up replays this newer-wins upsert
7347    // once per pulled row; `prepare_cached` amortises the parse of the
7348    // largest SQL statement in the file across the whole batch.
7349    let mut newer_wins_stmt = conn.prepare_cached(
7350        "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, confidence, source, access_count, created_at, updated_at, last_accessed_at, expires_at, metadata, reflection_depth, memory_kind, entity_id, persona_version, citations, source_uri, source_span, confidence_source, confidence_signals, confidence_decayed_at, mentioned_entity_id, version)
7351         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19, ?20, ?21, ?22, ?23, ?24, ?25, ?26, ?27)
7352         ON CONFLICT(title, namespace) DO UPDATE SET
7353            content = CASE WHEN excluded.updated_at > memories.updated_at
7354                             OR (excluded.updated_at = memories.updated_at AND excluded.id > memories.id)
7355                           THEN excluded.content ELSE memories.content END,
7356            tags = CASE WHEN excluded.updated_at > memories.updated_at
7357                          OR (excluded.updated_at = memories.updated_at AND excluded.id > memories.id)
7358                        THEN excluded.tags ELSE memories.tags END,
7359            priority = MAX(memories.priority, excluded.priority),
7360            confidence = MAX(memories.confidence, excluded.confidence),
7361            source = CASE WHEN excluded.updated_at > memories.updated_at
7362                            OR (excluded.updated_at = memories.updated_at AND excluded.id > memories.id)
7363                          THEN excluded.source ELSE memories.source END,
7364            tier = CASE WHEN excluded.tier = 'long' THEN 'long'
7365                        WHEN memories.tier = 'long' THEN 'long'
7366                        WHEN excluded.tier = 'mid' THEN 'mid'
7367                        ELSE memories.tier END,
7368            updated_at = MAX(memories.updated_at, excluded.updated_at),
7369            access_count = MAX(memories.access_count, excluded.access_count),
7370            expires_at = CASE WHEN excluded.tier = 'long' OR memories.tier = 'long' THEN NULL
7371                              ELSE COALESCE(excluded.expires_at, memories.expires_at) END,
7372            -- Preserve metadata.agent_id across newer-wins merge (NHI provenance immutable).
7373            metadata = CASE
7374                WHEN json_extract(memories.metadata, '$.agent_id') IS NOT NULL
7375                THEN json_set(
7376                    CASE WHEN excluded.updated_at > memories.updated_at
7377                              OR (excluded.updated_at = memories.updated_at AND excluded.id > memories.id)
7378                         THEN excluded.metadata
7379                         ELSE memories.metadata END,
7380                    '$.agent_id',
7381                    json_extract(memories.metadata, '$.agent_id')
7382                )
7383                ELSE CASE WHEN excluded.updated_at > memories.updated_at
7384                               OR (excluded.updated_at = memories.updated_at AND excluded.id > memories.id)
7385                          THEN excluded.metadata
7386                          ELSE memories.metadata END
7387            END,
7388            -- v0.7.0 Task 1/8 — recursion depth takes max so the reflection
7389            -- signal isn't lost on newer-wins federation merges.
7390            reflection_depth = MAX(memories.reflection_depth, excluded.reflection_depth),
7391            -- v0.7.0 L1-1 — kind is sticky across federation merges: a
7392            -- reflection row must not be downgraded to observation by a
7393            -- newer-wins merge from a peer that doesn't know about the kind.
7394            -- v0.7.0 QW-2 — Persona is similarly sticky.
7395            memory_kind = CASE WHEN memories.memory_kind = 'reflection' THEN 'reflection'
7396                               WHEN memories.memory_kind = 'persona' THEN 'persona'
7397                               ELSE excluded.memory_kind END,
7398            -- v0.7.0 QW-2 — entity_id + persona_version are immutable
7399            -- once set so a federation merge can't drop the persona
7400            -- discriminator off a `memory_kind = 'persona'` row.
7401            entity_id = COALESCE(memories.entity_id, excluded.entity_id),
7402            persona_version = COALESCE(memories.persona_version, excluded.persona_version),
7403            -- v0.7.0 Form 4 — fact-provenance: replace the stored
7404            -- citations array only when the incoming row wins the
7405            -- newer-wins tiebreak; source_uri / source_span follow
7406            -- COALESCE semantics so a federation merge that lacks
7407            -- provenance does not blank out a value the local row
7408            -- already had.
7409            citations = CASE WHEN excluded.updated_at > memories.updated_at
7410                                  OR (excluded.updated_at = memories.updated_at AND excluded.id > memories.id)
7411                             THEN excluded.citations ELSE memories.citations END,
7412            source_uri = COALESCE(excluded.source_uri, memories.source_uri),
7413            source_span = COALESCE(excluded.source_span, memories.source_span),
7414            -- v0.7.0 Form 5 — confidence-provenance follows the newer-
7415            -- wins shape established for the other Form 4 columns.
7416            -- A peer pushing an auto-derived/calibrated value wins on
7417            -- the timestamp tiebreak; otherwise the local row's
7418            -- provenance is preserved so a stale peer cannot blank out
7419            -- a fresher local calibration.
7420            confidence_source = CASE WHEN excluded.updated_at > memories.updated_at
7421                                          OR (excluded.updated_at = memories.updated_at AND excluded.id > memories.id)
7422                                     THEN excluded.confidence_source ELSE memories.confidence_source END,
7423            confidence_signals = CASE WHEN excluded.updated_at > memories.updated_at
7424                                           OR (excluded.updated_at = memories.updated_at AND excluded.id > memories.id)
7425                                      THEN excluded.confidence_signals ELSE memories.confidence_signals END,
7426            confidence_decayed_at = CASE WHEN excluded.updated_at > memories.updated_at
7427                                              OR (excluded.updated_at = memories.updated_at AND excluded.id > memories.id)
7428                                         THEN excluded.confidence_decayed_at ELSE memories.confidence_decayed_at END,
7429            -- v0.7.0 polish PERF-8 (#781) — newer-wins on the mention
7430            -- tag (the winning row's content is the one a future matcher
7431            -- query expects to find); otherwise preserve the local tag
7432            -- so a stale peer that lacks the structured entity_id
7433            -- metadata cannot blank out a value the index serves.
7434            mentioned_entity_id = CASE WHEN excluded.updated_at > memories.updated_at
7435                                            OR (excluded.updated_at = memories.updated_at AND excluded.id > memories.id)
7436                                       THEN COALESCE(excluded.mentioned_entity_id, memories.mentioned_entity_id)
7437                                       ELSE memories.mentioned_entity_id END,
7438            -- #1631 (decide-once, #1029 contract) — `version` IS
7439            -- replicated state on the federation merge path: merge via
7440            -- MAX(local, remote) so an out-of-order peer push can't
7441            -- roll the Gap-1 optimistic-concurrency counter backwards.
7442            -- Matches the pg `apply_remote_memory` GREATEST arm.
7443            version = MAX(memories.version, excluded.version)
7444         RETURNING id",
7445    )?;
7446    let actual_id: String = newer_wins_stmt.query_row(
7447        params![
7448            mem.id,
7449            mem.tier.as_str(),
7450            mem.namespace,
7451            mem.title,
7452            mem.content,
7453            tags_json,
7454            mem.priority,
7455            mem.confidence,
7456            mem.source,
7457            mem.access_count,
7458            mem.created_at,
7459            mem.updated_at,
7460            mem.last_accessed_at,
7461            mem.effective_expires_at(),
7462            metadata_json,
7463            mem.reflection_depth,
7464            mem.memory_kind.as_str(),
7465            mem.entity_id,
7466            mem.persona_version,
7467            citations_json,
7468            mem.source_uri,
7469            source_span_json,
7470            mem.confidence_source.as_str(),
7471            confidence_signals_json,
7472            mem.confidence_decayed_at,
7473            mentioned_entity_id,
7474            mem.version,
7475        ],
7476        |r| r.get(0),
7477    )?;
7478    Ok(actual_id)
7479}
7480
7481// --- Embedding support ---
7482
7483/// v0.6.3.1 P2 (G4): error returned by `set_embedding` when a write would
7484/// introduce a new embedding dimensionality into a namespace that has already
7485/// established one via an earlier write. Surfaced as a typed error so the
7486/// MCP/HTTP handlers can map it to a 409 Conflict rather than letting cosine
7487/// silently return 0.0 on every subsequent recall.
7488#[derive(Debug)]
7489pub struct EmbeddingDimMismatch {
7490    pub namespace: String,
7491    pub established: usize,
7492    pub attempted: usize,
7493}
7494
7495impl std::fmt::Display for EmbeddingDimMismatch {
7496    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
7497        write!(
7498            f,
7499            "embedding dim mismatch in namespace '{}': established {}-dim, refused {}-dim write",
7500            self.namespace, self.established, self.attempted
7501        )
7502    }
7503}
7504
7505impl std::error::Error for EmbeddingDimMismatch {}
7506
7507/// Lookup the embedding dimensionality already established for `namespace`.
7508/// Returns `Ok(None)` when no row in that namespace has an embedding yet.
7509///
7510/// # Errors
7511///
7512/// Returns the underlying SQLite error.
7513pub fn namespace_embedding_dim(conn: &Connection, namespace: &str) -> Result<Option<usize>> {
7514    // Use the v17 idx_memories_ns_dim partial index.
7515    let dim: Option<i64> = conn
7516        .query_row(
7517            "SELECT embedding_dim FROM memories \
7518             WHERE namespace = ?1 AND embedding_dim IS NOT NULL \
7519             LIMIT 1",
7520            params![namespace],
7521            |r| r.get(0),
7522        )
7523        .ok();
7524    Ok(dim.and_then(|d| usize::try_from(d).ok()))
7525}
7526
7527/// Count rows whose stored `embedding_dim` does not match what the BLOB
7528/// contains (or where the column is missing while a BLOB exists). Surfaced
7529/// in `Stats::dim_violations` and consumed by P7 doctor.
7530///
7531/// # Errors
7532///
7533/// Returns the underlying SQLite error.
7534pub fn dim_violations(conn: &Connection) -> Result<u64> {
7535    // The expression `length(embedding)` returns the BLOB length; we map
7536    // legacy (no-header) payloads to `length/4` and headed (v17+) payloads
7537    // to `(length-1)/4` because length parity tells us which form is on
7538    // disk. Both forms must match the declared `embedding_dim` column.
7539    let n: i64 = conn
7540        .query_row(
7541            "SELECT COUNT(*) FROM memories \
7542             WHERE embedding IS NOT NULL \
7543               AND length(embedding) >= 4 \
7544               AND ( \
7545                   embedding_dim IS NULL \
7546                   OR ( \
7547                       (length(embedding) % 4 = 0 AND embedding_dim != length(embedding)/4) \
7548                       OR (length(embedding) % 4 = 1 AND embedding_dim != (length(embedding)-1)/4) \
7549                       OR (length(embedding) % 4 NOT IN (0,1)) \
7550                   ) \
7551               )",
7552            [],
7553            |r| r.get(0),
7554        )
7555        .unwrap_or(0);
7556    Ok(u64::try_from(n).unwrap_or(0))
7557}
7558
7559/// #1595/#1598 — the single embedding-UPDATE statement (headed blob +
7560/// declared dim), shared by [`set_embedding`], [`set_embeddings_batch`]
7561/// and [`set_embeddings_batch_reembed`] so the write shape cannot
7562/// drift between the checked and replace-semantics writers.
7563const SQL_UPDATE_EMBEDDING_WITH_DIM: &str =
7564    "UPDATE memories SET embedding = ?1, embedding_dim = ?2 WHERE id = ?3";
7565/// Degenerate empty-vector sibling of [`SQL_UPDATE_EMBEDDING_WITH_DIM`]
7566/// (legacy parity: empty embeddings persist with `embedding_dim = NULL`).
7567const SQL_UPDATE_EMBEDDING_NULL_DIM: &str =
7568    "UPDATE memories SET embedding = ?1, embedding_dim = NULL WHERE id = ?2";
7569
7570/// Store an embedding vector for a memory.
7571///
7572/// v0.6.3.1 P2 — writes are now headed with the magic byte (`encode_embedding_blob`)
7573/// and the namespace's first established dim is enforced. A dim mismatch
7574/// returns a typed [`EmbeddingDimMismatch`] surfaced as a 409 by the handler
7575/// layer. The same call also persists `embedding_dim` so future stats /
7576/// doctor passes don't re-derive from BLOB length.
7577///
7578/// # Errors
7579///
7580/// Returns [`EmbeddingDimMismatch`] (boxed via anyhow) when the embedding's
7581/// dimensionality differs from what the namespace established, or the
7582/// underlying SQLite error on failure.
7583pub fn set_embedding(conn: &Connection, id: &str, embedding: &[f32]) -> Result<()> {
7584    // Resolve namespace + check the dim invariant before mutating.
7585    let namespace: Option<String> = conn
7586        .query_row(
7587            "SELECT namespace FROM memories WHERE id = ?1",
7588            params![id],
7589            |r| r.get(0),
7590        )
7591        .ok();
7592    let attempted = embedding.len();
7593    if attempted == 0 {
7594        // Empty embeddings are a degenerate case — earlier code accepted
7595        // them; preserve that to avoid breaking legacy tests but skip the
7596        // dim check.
7597        let bytes = crate::embeddings::encode_embedding_blob(embedding);
7598        conn.execute(SQL_UPDATE_EMBEDDING_NULL_DIM, params![bytes, id])?;
7599        return Ok(());
7600    }
7601    if let Some(ref ns) = namespace
7602        && let Some(established) = namespace_embedding_dim(conn, ns)?
7603        && established != attempted
7604    {
7605        return Err(EmbeddingDimMismatch {
7606            namespace: ns.clone(),
7607            established,
7608            attempted,
7609        }
7610        .into());
7611    }
7612    let bytes = crate::embeddings::encode_embedding_blob(embedding);
7613    let dim_i64 = i64::try_from(attempted).unwrap_or(i64::MAX);
7614    conn.execute(SQL_UPDATE_EMBEDDING_WITH_DIM, params![bytes, dim_i64, id])?;
7615    Ok(())
7616}
7617
7618/// v0.7.0 Wave-2 A5 (issue #853) — batched embedding writer.
7619///
7620/// Writes a slice of `(id, embedding)` pairs inside a single SQLite
7621/// transaction. Equivalent to calling [`set_embedding`] in a loop, but
7622/// collapses N `UPDATE` round-trips (N implicit commits in autocommit
7623/// mode) into one transaction commit, which is the dominant cost on
7624/// SQLite WAL when N grows past a handful of rows.
7625///
7626/// Dim-invariant policy matches [`set_embedding`]:
7627/// * Empty embeddings are written as `embedding_dim = NULL` (legacy
7628///   degenerate-case parity).
7629/// * Per-namespace established dim is checked once per namespace
7630///   (cached in-flight) and any pair whose embedding length conflicts
7631///   returns an `EmbeddingDimMismatch` error — the whole transaction
7632///   rolls back so callers never see a partial commit. The mismatch
7633///   carries the FIRST offending pair's namespace/established/attempted
7634///   triple (consistent with the single-row path).
7635///
7636/// Returns the number of rows updated (rows whose `id` was not found in
7637/// the `memories` table are silently skipped — same as [`set_embedding`],
7638/// where `UPDATE … WHERE id = ?` returns `Ok(0)` and the function still
7639/// returns `Ok(())`).
7640///
7641/// **Boot backfill use:** [`crate::mcp::run_mcp_server`] calls this in
7642/// fixed-size chunks (see `DEFAULT_EMBED_BACKFILL_BATCH_SIZE`) so the
7643/// embedder produces vectors in parallel-friendly bursts and the
7644/// SQLite commit cost amortises across the batch.
7645///
7646/// # Errors
7647///
7648/// * Returns [`EmbeddingDimMismatch`] (boxed via anyhow) if any pair's
7649///   embedding dim disagrees with the namespace-established dim. The
7650///   transaction is rolled back; no rows are mutated.
7651/// * Returns the underlying SQLite error on transaction/prepare/execute
7652///   failure.
7653pub fn set_embeddings_batch(
7654    conn: &mut Connection,
7655    entries: &[(String, Vec<f32>)],
7656) -> Result<usize> {
7657    if entries.is_empty() {
7658        return Ok(0);
7659    }
7660
7661    // Lookup table: id -> namespace. Needed up-front because we want
7662    // to amortise the dim-check across a batch by resolving namespaces
7663    // in a single query rather than one query per row.
7664    let mut ns_by_id: HashMap<String, Option<String>> = HashMap::with_capacity(entries.len());
7665    {
7666        let mut stmt = conn.prepare("SELECT namespace FROM memories WHERE id = ?1")?;
7667        for (id, _) in entries {
7668            if ns_by_id.contains_key(id) {
7669                continue;
7670            }
7671            let ns: Option<String> = stmt
7672                .query_row(params![id], |r| r.get::<_, Option<String>>(0))
7673                .ok()
7674                .flatten();
7675            ns_by_id.insert(id.clone(), ns);
7676        }
7677    }
7678
7679    // Per-namespace established dim, cached so we only hit the
7680    // namespace_embedding_dim path once per distinct namespace in the
7681    // batch (the cache is intra-batch — the namespace's established
7682    // dim is immutable within this call's transaction window).
7683    let mut ns_dim_cache: HashMap<String, Option<usize>> = HashMap::new();
7684
7685    let tx = conn.transaction()?;
7686    {
7687        let mut update = tx.prepare(SQL_UPDATE_EMBEDDING_WITH_DIM)?;
7688        let mut update_empty = tx.prepare(SQL_UPDATE_EMBEDDING_NULL_DIM)?;
7689
7690        let mut rows_updated = 0usize;
7691        for (id, embedding) in entries {
7692            let attempted = embedding.len();
7693            if attempted == 0 {
7694                let bytes = crate::embeddings::encode_embedding_blob(embedding);
7695                rows_updated += update_empty.execute(params![bytes, id])?;
7696                continue;
7697            }
7698            if let Some(Some(ns)) = ns_by_id.get(id) {
7699                let established = if let Some(cached) = ns_dim_cache.get(ns) {
7700                    *cached
7701                } else {
7702                    let resolved = namespace_embedding_dim(&tx, ns)?;
7703                    ns_dim_cache.insert(ns.clone(), resolved);
7704                    resolved
7705                };
7706                if let Some(established) = established
7707                    && established != attempted
7708                {
7709                    return Err(EmbeddingDimMismatch {
7710                        namespace: ns.clone(),
7711                        established,
7712                        attempted,
7713                    }
7714                    .into());
7715                }
7716                // First successful write in a namespace sets the
7717                // established dim for the rest of this batch — keep
7718                // the cache in sync so subsequent rows in the same
7719                // namespace get a fast path AND so any inconsistent
7720                // pair later in the batch trips the dim check rather
7721                // than committing.
7722                if established.is_none() {
7723                    ns_dim_cache.insert(ns.clone(), Some(attempted));
7724                }
7725            }
7726            let bytes = crate::embeddings::encode_embedding_blob(embedding);
7727            let dim_i64 = i64::try_from(attempted).unwrap_or(i64::MAX);
7728            rows_updated += update.execute(params![bytes, dim_i64, id])?;
7729        }
7730
7731        drop(update);
7732        drop(update_empty);
7733        tx.commit()?;
7734        Ok(rows_updated)
7735    }
7736}
7737
7738/// Load an embedding vector for a memory. Returns None if not set.
7739///
7740/// v0.6.3.1 P2 — tolerant of legacy unheaded payloads (raw LE f32, length
7741/// `4n`) and v17 headed payloads (`0x01` + `4n` bytes). Anything else returns
7742/// an error so the caller can surface a typed corruption signal.
7743///
7744/// # Errors
7745///
7746/// Returns [`EmbeddingFormatError`](crate::embeddings::EmbeddingFormatError)
7747/// when the on-disk BLOB is malformed.
7748pub fn get_embedding(conn: &Connection, id: &str) -> Result<Option<Vec<f32>>> {
7749    let result: Option<Vec<u8>> = conn
7750        .query_row(
7751            "SELECT embedding FROM memories WHERE id = ?1",
7752            params![id],
7753            |row| row.get(0),
7754        )
7755        .ok();
7756    match result {
7757        Some(bytes) if !bytes.is_empty() => {
7758            let floats = crate::embeddings::decode_embedding_blob(&bytes)?;
7759            Ok(Some(floats))
7760        }
7761        _ => Ok(None),
7762    }
7763}
7764
7765/// Get all memory IDs that are missing embeddings.
7766///
7767/// #1579 B6 (F5.6): unbounded — materialises every `(id, title,
7768/// content)` triple in one `Vec`, which on a large backlog is the
7769/// whole corpus in memory. Hot loops (the embed-backfill sweep) should
7770/// use [`get_unembedded_ids_batch`] and drain in bounded passes; this
7771/// variant remains for callers that need the full snapshot semantics.
7772pub fn get_unembedded_ids(conn: &Connection) -> Result<Vec<(String, String, String)>> {
7773    let mut stmt =
7774        conn.prepare("SELECT id, title, content FROM memories WHERE embedding IS NULL")?;
7775    let rows = stmt.query_map([], |row| {
7776        Ok((
7777            row.get::<_, String>(0)?,
7778            row.get::<_, String>(1)?,
7779            row.get::<_, String>(2)?,
7780        ))
7781    })?;
7782    rows.collect::<rusqlite::Result<Vec<_>>>()
7783        .map_err(Into::into)
7784}
7785
7786/// #1579 B6 (F5.6) — bounded variant of [`get_unembedded_ids`].
7787///
7788/// Returns at most `limit` `(id, title, content)` triples so the
7789/// caller's materialisation is bounded by its batch size (the
7790/// `AI_MEMORY_EMBED_BACKFILL_BATCH` resolver semantics) instead of the
7791/// whole unembedded backlog. There is deliberately NO OFFSET: rows
7792/// that gain an embedding drop out of the `embedding IS NULL`
7793/// predicate, so callers drain by re-fetching until the returned batch
7794/// is empty (or stops shrinking — rows whose embedding persistently
7795/// fails stay at the head of the scan).
7796pub fn get_unembedded_ids_batch(
7797    conn: &Connection,
7798    limit: usize,
7799) -> Result<Vec<(String, String, String)>> {
7800    let mut stmt = conn.prepare_cached(
7801        "SELECT id, title, content FROM memories WHERE embedding IS NULL LIMIT ?1",
7802    )?;
7803    let rows = stmt.query_map(params![limit], |row| {
7804        Ok((
7805            row.get::<_, String>(0)?,
7806            row.get::<_, String>(1)?,
7807            row.get::<_, String>(2)?,
7808        ))
7809    })?;
7810    rows.collect::<rusqlite::Result<Vec<_>>>()
7811        .map_err(Into::into)
7812}
7813
7814/// #1595 — keyset-paginated variant of [`get_unembedded_ids_batch`].
7815///
7816/// Returns at most `limit` `(id, title, content)` triples whose `id`
7817/// sorts strictly AFTER `after_id` (or from the start when `None`),
7818/// in `id` order. The resilient backfill sweep advances its cursor
7819/// past every processed row — embedded OR skipped — so a poison row
7820/// (over-context-length content, transient embedder fault) can no
7821/// longer pin the scan head and starve the rest of the backlog (the
7822/// pre-fix `LIMIT`-only fetch re-returned persistently-failing rows
7823/// forever, and the no-progress guard then stopped the whole sweep
7824/// with 0 rows backfilled).
7825///
7826/// Two distinct prepared shapes (with / without the cursor predicate)
7827/// rather than the non-sargable `(?1 IS NULL OR id > ?1)` form, per
7828/// the v55/v56 sargability discipline.
7829///
7830/// # Errors
7831///
7832/// Returns the underlying SQLite error.
7833pub fn get_unembedded_ids_batch_after(
7834    conn: &Connection,
7835    after_id: Option<&str>,
7836    limit: usize,
7837) -> Result<Vec<(String, String, String)>> {
7838    let map_row = |row: &rusqlite::Row<'_>| {
7839        Ok((
7840            row.get::<_, String>(0)?,
7841            row.get::<_, String>(1)?,
7842            row.get::<_, String>(2)?,
7843        ))
7844    };
7845    let rows = if let Some(after) = after_id {
7846        let mut stmt = conn.prepare_cached(
7847            "SELECT id, title, content FROM memories \
7848             WHERE embedding IS NULL AND id > ?1 ORDER BY id LIMIT ?2",
7849        )?;
7850        let rows = stmt.query_map(params![after, limit], map_row)?;
7851        rows.collect::<rusqlite::Result<Vec<_>>>()?
7852    } else {
7853        let mut stmt = conn.prepare_cached(
7854            "SELECT id, title, content FROM memories \
7855             WHERE embedding IS NULL ORDER BY id LIMIT ?1",
7856        )?;
7857        let rows = stmt.query_map(params![limit], map_row)?;
7858        rows.collect::<rusqlite::Result<Vec<_>>>()?
7859    };
7860    Ok(rows)
7861}
7862
7863/// #1598 — keyset-paginated scan over ALL live memories (embedded or
7864/// not), optionally namespace-filtered, for the `ai-memory reembed`
7865/// full-corpus sweep. Same cursor semantics as
7866/// [`get_unembedded_ids_batch_after`]: at most `limit` `(id, title,
7867/// content)` triples with `id` strictly after `after_id`, in `id`
7868/// order. Four distinct prepared shapes (namespace × cursor) keep the
7869/// scan sargable (v55/v56 discipline).
7870///
7871/// # Errors
7872///
7873/// Returns the underlying SQLite error.
7874pub fn get_memory_texts_batch(
7875    conn: &Connection,
7876    namespace: Option<&str>,
7877    after_id: Option<&str>,
7878    limit: usize,
7879) -> Result<Vec<(String, String, String)>> {
7880    let map_row = |row: &rusqlite::Row<'_>| {
7881        Ok((
7882            row.get::<_, String>(0)?,
7883            row.get::<_, String>(1)?,
7884            row.get::<_, String>(2)?,
7885        ))
7886    };
7887    let rows = match (namespace, after_id) {
7888        (Some(ns), Some(after)) => {
7889            let mut stmt = conn.prepare_cached(
7890                "SELECT id, title, content FROM memories \
7891                 WHERE namespace = ?1 AND id > ?2 ORDER BY id LIMIT ?3",
7892            )?;
7893            let rows = stmt.query_map(params![ns, after, limit], map_row)?;
7894            rows.collect::<rusqlite::Result<Vec<_>>>()?
7895        }
7896        (Some(ns), None) => {
7897            let mut stmt = conn.prepare_cached(
7898                "SELECT id, title, content FROM memories \
7899                 WHERE namespace = ?1 ORDER BY id LIMIT ?2",
7900            )?;
7901            let rows = stmt.query_map(params![ns, limit], map_row)?;
7902            rows.collect::<rusqlite::Result<Vec<_>>>()?
7903        }
7904        (None, Some(after)) => {
7905            let mut stmt = conn.prepare_cached(
7906                "SELECT id, title, content FROM memories \
7907                 WHERE id > ?1 ORDER BY id LIMIT ?2",
7908            )?;
7909            let rows = stmt.query_map(params![after, limit], map_row)?;
7910            rows.collect::<rusqlite::Result<Vec<_>>>()?
7911        }
7912        (None, None) => {
7913            let mut stmt = conn
7914                .prepare_cached("SELECT id, title, content FROM memories ORDER BY id LIMIT ?1")?;
7915            let rows = stmt.query_map(params![limit], map_row)?;
7916            rows.collect::<rusqlite::Result<Vec<_>>>()?
7917        }
7918    };
7919    Ok(rows)
7920}
7921
7922/// #1598 — REPLACE-semantics sibling of [`set_embeddings_batch`] for
7923/// the `ai-memory reembed` vector-space migration.
7924///
7925/// Identical single-transaction write shape, but it deliberately does
7926/// NOT enforce the per-namespace established-dim invariant: re-embed
7927/// is exactly the tool that migrates a namespace from one model/dim to
7928/// another, so mid-run the namespace legitimately holds mixed dims
7929/// (the H7 recall read-guards skip dim-mismatched vectors during the
7930/// transition, and the sweep converges every row to the target dim).
7931/// Every other caller MUST keep using [`set_embeddings_batch`] — the
7932/// G4 invariant is what stops a misconfigured writer from silently
7933/// zeroing cosine scores.
7934///
7935/// Returns the number of rows updated (unknown ids are skipped, same
7936/// as the checked sibling).
7937///
7938/// # Errors
7939///
7940/// Returns the underlying SQLite transaction / statement error.
7941pub fn set_embeddings_batch_reembed(
7942    conn: &mut Connection,
7943    entries: &[(String, Vec<f32>)],
7944) -> Result<usize> {
7945    if entries.is_empty() {
7946        return Ok(0);
7947    }
7948    let tx = conn.transaction()?;
7949    let mut rows_updated = 0usize;
7950    {
7951        let mut update = tx.prepare(SQL_UPDATE_EMBEDDING_WITH_DIM)?;
7952        let mut update_empty = tx.prepare(SQL_UPDATE_EMBEDDING_NULL_DIM)?;
7953        for (id, embedding) in entries {
7954            let bytes = crate::embeddings::encode_embedding_blob(embedding);
7955            if embedding.is_empty() {
7956                // Legacy degenerate-case parity with `set_embedding`.
7957                rows_updated += update_empty.execute(params![bytes, id])?;
7958            } else {
7959                let dim_i64 = i64::try_from(embedding.len()).unwrap_or(i64::MAX);
7960                rows_updated += update.execute(params![bytes, dim_i64, id])?;
7961            }
7962        }
7963    }
7964    tx.commit()?;
7965    Ok(rows_updated)
7966}
7967
7968/// #1598 — `(total_rows, rows_with_embeddings)` for the reembed
7969/// dry-run plan, optionally namespace-filtered. `COUNT(embedding)`
7970/// counts non-NULL values, so the missing count is the difference.
7971///
7972/// # Errors
7973///
7974/// Returns the underlying SQLite error.
7975pub fn embedding_coverage(conn: &Connection, namespace: Option<&str>) -> Result<(u64, u64)> {
7976    let (total, embedded): (i64, i64) = if let Some(ns) = namespace {
7977        conn.query_row(
7978            "SELECT COUNT(*), COUNT(embedding) FROM memories WHERE namespace = ?1",
7979            params![ns],
7980            |r| Ok((r.get(0)?, r.get(1)?)),
7981        )?
7982    } else {
7983        conn.query_row("SELECT COUNT(*), COUNT(embedding) FROM memories", [], |r| {
7984            Ok((r.get(0)?, r.get(1)?))
7985        })?
7986    };
7987    Ok((
7988        u64::try_from(total).unwrap_or(0),
7989        u64::try_from(embedded).unwrap_or(0),
7990    ))
7991}
7992
7993/// #1598 — distinct embedding dimensionalities currently stored,
7994/// optionally namespace-filtered, for the reembed pre-flight banner
7995/// (the loud "old dims vs target dim" disclosure before a vector-space
7996/// migration). Prefers the declared `embedding_dim` column and falls
7997/// back to deriving from the BLOB length for legacy rows — `4n+1`
7998/// bytes is the v17 headed form (`(len-1)/4` floats), `4n` the
7999/// legacy unheaded form (`len/4`), mirroring [`dim_violations`].
8000///
8001/// # Errors
8002///
8003/// Returns the underlying SQLite error.
8004pub fn distinct_embedding_dims(conn: &Connection, namespace: Option<&str>) -> Result<Vec<usize>> {
8005    const DIM_EXPR: &str = "COALESCE(embedding_dim, \
8006         CASE WHEN length(embedding) % 4 = 1 THEN (length(embedding)-1)/4 \
8007              ELSE length(embedding)/4 END)";
8008    let collect = |stmt: &mut rusqlite::Statement<'_>,
8009                   params: &[&dyn rusqlite::ToSql]|
8010     -> Result<Vec<usize>> {
8011        let rows = stmt.query_map(params, |r| r.get::<_, i64>(0))?;
8012        Ok(rows
8013            .collect::<rusqlite::Result<Vec<_>>>()?
8014            .into_iter()
8015            .filter_map(|d| usize::try_from(d).ok())
8016            .collect())
8017    };
8018    if let Some(ns) = namespace {
8019        let mut stmt = conn.prepare(&format!(
8020            "SELECT DISTINCT {DIM_EXPR} AS dim FROM memories \
8021             WHERE embedding IS NOT NULL AND namespace = ?1 ORDER BY dim"
8022        ))?;
8023        collect(&mut stmt, &[&ns])
8024    } else {
8025        let mut stmt = conn.prepare(&format!(
8026            "SELECT DISTINCT {DIM_EXPR} AS dim FROM memories \
8027             WHERE embedding IS NOT NULL ORDER BY dim"
8028        ))?;
8029        collect(&mut stmt, &[])
8030    }
8031}
8032
8033/// #1579 B3 — count of rows carrying a stored embedding. Cheap probe
8034/// (no blob decode, no row materialisation) used by the CLI recall
8035/// path to decide whether a one-shot invocation should pay the HNSW
8036/// graph-construction cost at all (see
8037/// [`crate::hnsw::CLI_HNSW_BUILD_MIN_ENTRIES`]).
8038///
8039/// # Errors
8040///
8041/// Bubbles the rusqlite error from the COUNT query.
8042pub fn count_embedded_memories(conn: &Connection) -> Result<i64> {
8043    conn.query_row(
8044        "SELECT COUNT(*) FROM memories WHERE embedding IS NOT NULL",
8045        [],
8046        |row| row.get(0),
8047    )
8048    .map_err(Into::into)
8049}
8050
8051/// Get all stored embeddings as (id, embedding) pairs for building the HNSW index.
8052///
8053/// v0.6.3.1 P2 — uses the magic-byte tolerant decoder. Rows whose BLOB is
8054/// malformed are logged and skipped (the alternative — bailing the entire
8055/// HNSW build — would take the whole semantic-search surface offline for one
8056/// corrupt row).
8057pub fn get_all_embeddings(conn: &Connection) -> Result<Vec<(String, Vec<f32>)>> {
8058    let mut stmt =
8059        conn.prepare("SELECT id, embedding FROM memories WHERE embedding IS NOT NULL")?;
8060    let rows = stmt.query_map([], |row| {
8061        let id: String = row.get(0)?;
8062        let bytes: Vec<u8> = row.get(1)?;
8063        Ok((id, bytes))
8064    })?;
8065    let mut entries = Vec::new();
8066    for row in rows {
8067        let (id, bytes) = row?;
8068        if bytes.is_empty() {
8069            continue;
8070        }
8071        match crate::embeddings::decode_embedding_blob(&bytes) {
8072            Ok(floats) => entries.push((id, floats)),
8073            Err(e) => {
8074                tracing::warn!(
8075                    memory_id = %id,
8076                    error = %e,
8077                    "skipping memory with malformed embedding BLOB during HNSW build"
8078                );
8079            }
8080        }
8081    }
8082    Ok(entries)
8083}
8084
8085/// Hybrid recall — FTS5 keyword search + semantic cosine similarity.
8086/// Returns memories ranked by a blended score of keyword and semantic relevance.
8087/// When an HNSW `vector_index` is provided, uses approximate nearest-neighbor
8088/// search instead of scanning all embeddings linearly.
8089#[allow(clippy::too_many_arguments)]
8090/// v0.6.3.1 (P3): hybrid recall preserving the existing 2-tuple return
8091/// shape for HTTP / CLI / bench callers. Delegates to
8092/// [`recall_hybrid_with_telemetry`] and discards the telemetry. Kept so
8093/// the dozen-plus call sites need no churn for a feature only MCP
8094/// `handle_recall` consumes.
8095#[allow(clippy::too_many_arguments)]
8096pub fn recall_hybrid(
8097    conn: &Connection,
8098    context: &str,
8099    query_embedding: &[f32],
8100    namespace: Option<&str>,
8101    limit: usize,
8102    tags_filter: Option<&str>,
8103    since: Option<&str>,
8104    until: Option<&str>,
8105    vector_index: Option<&crate::hnsw::VectorIndex>,
8106    short_extend: i64,
8107    mid_extend: i64,
8108    as_agent: Option<&str>,
8109    budget_tokens: Option<usize>,
8110    scoring: &crate::config::ResolvedScoring,
8111    // v0.7.0 WT-1-E — see [`recall_with_telemetry`] for the
8112    // archived-source exclusion contract.
8113    include_archived: bool,
8114    // v0.7.0 Form 4 / Cluster-A PERF-3 — push `--source-uri-prefix`
8115    // into the SQL WHERE on both the FTS and semantic branches so the
8116    // partial `idx_memories_source_uri` index covers the lookup. See
8117    // [`recall`] for the contract.
8118    source_uri_prefix: Option<&str>,
8119) -> Result<(Vec<(Memory, f64)>, BudgetOutcome)> {
8120    let (results, outcome, _telemetry) = recall_hybrid_with_telemetry(
8121        conn,
8122        context,
8123        query_embedding,
8124        namespace,
8125        limit,
8126        tags_filter,
8127        since,
8128        until,
8129        vector_index,
8130        short_extend,
8131        mid_extend,
8132        as_agent,
8133        budget_tokens,
8134        scoring,
8135        include_archived,
8136        source_uri_prefix,
8137    )?;
8138    Ok((results, outcome))
8139}
8140
8141/// FX-4 / PERF-2 (2026-05-26) — convenience wrapper for the HTTP
8142/// recall handler. Same return shape as [`recall_hybrid`] but accepts
8143/// a pre-computed HNSW hit slice (caller ran `idx.search()` outside
8144/// the DB lock) so the DB-mutex hold window does not cover the
8145/// CPU-bound ANN walk. Telemetry is dropped on this path; the HTTP
8146/// surface does not consume it today.
8147#[allow(clippy::too_many_arguments)]
8148pub fn recall_hybrid_precomputed_hnsw(
8149    conn: &Connection,
8150    context: &str,
8151    query_embedding: &[f32],
8152    namespace: Option<&str>,
8153    limit: usize,
8154    tags_filter: Option<&str>,
8155    since: Option<&str>,
8156    until: Option<&str>,
8157    precomputed_hnsw_hits: &[crate::hnsw::VectorHit],
8158    short_extend: i64,
8159    mid_extend: i64,
8160    as_agent: Option<&str>,
8161    budget_tokens: Option<usize>,
8162    scoring: &crate::config::ResolvedScoring,
8163    include_archived: bool,
8164    source_uri_prefix: Option<&str>,
8165) -> Result<(Vec<(Memory, f64)>, BudgetOutcome)> {
8166    let (results, outcome, _telemetry) = recall_hybrid_with_telemetry_precomputed_hnsw(
8167        conn,
8168        context,
8169        query_embedding,
8170        namespace,
8171        limit,
8172        tags_filter,
8173        since,
8174        until,
8175        precomputed_hnsw_hits,
8176        short_extend,
8177        mid_extend,
8178        as_agent,
8179        budget_tokens,
8180        scoring,
8181        include_archived,
8182        source_uri_prefix,
8183    )?;
8184    Ok((results, outcome))
8185}
8186
8187/// v0.6.3.1 (P3 + P6): hybrid recall reporting per-stage candidate counts,
8188/// the average semantic blend weight, and the full budget outcome. MCP
8189/// `handle_recall` uses the telemetry to populate the `meta` block (closes
8190/// audit gaps G2/G8/G11) and the BudgetOutcome to populate R1 budget fields.
8191///
8192/// The retrieval logic is unchanged — anti-goal of P3 is "do not change
8193/// recall scoring or fusion logic." Counters are computed in place:
8194/// `fts_candidates` is the pre-fusion FTS5 row count, `hnsw_candidates`
8195/// is the pre-fusion HNSW (or linear-scan) hit count admitted past the
8196/// 0.2 cosine gate, `blend_weight_avg` is the mean `semantic_weight`
8197/// across the *returned* set (not the full candidate pool — operators
8198/// care about what made it out).
8199// ---------------------------------------------------------------------------
8200// #871 — `recall_hybrid_with_telemetry` stage helpers.
8201//
8202// The original function was ~508 LOC carrying query preparation,
8203// FTS5 keyword retrieval, semantic (HNSW or linear-scan) retrieval,
8204// adaptive blend + decay scoring, touch ops + budget application,
8205// and telemetry assembly. Per the code-review verdict the function
8206// is split into focused stage-helpers so each phase has a clear
8207// contract and the orchestrator stays readable.
8208//
8209// The stages are kept inside `storage::mod` (rather than carved into
8210// a sub-module) because the helpers all share access to private
8211// helpers like `row_to_memory`, `sanitize_fts_query`,
8212// `archived_source_clause`, etc., and the SQL is tightly tied to
8213// the schema living in this module.
8214//
8215// Behaviour is byte-for-byte preserved: the same SQL runs, the same
8216// fusion produces the same blended scores, and `touch_many` mutates
8217// the same surviving set. Only the function-internal structure
8218// changes.
8219// ---------------------------------------------------------------------------
8220
8221/// Result of [`prepare_hybrid_query`] — the pre-computed SQL
8222/// fragments + bind params the FTS and semantic phases need.
8223struct HybridPrep<'a> {
8224    fts_query: String,
8225    now: String,
8226    prefixes: VisibilityPrefixes,
8227    fts_hierarchy_fragment: String,
8228    sem_hierarchy_fragment: String,
8229    effective_namespace: Option<&'a str>,
8230    hierarchy_active: bool,
8231    fts_archived_fragment: &'static str,
8232    sem_archived_fragment: &'static str,
8233    fts_source_uri_fragment: &'static str,
8234    sem_source_uri_fragment: &'static str,
8235    source_uri_like_param: Option<String>,
8236}
8237
8238/// #871 stage 1 — query preparation. Sanitises the FTS5 expression,
8239/// resolves namespace hierarchy expansion (`Task 1.12`), computes
8240/// visibility prefixes for the `?8..?11` (FTS) / `?6..?9` (semantic)
8241/// bind slots, and stamps the archived-source / source-URI-prefix
8242/// SQL fragments.
8243///
8244/// The `'now'` timestamp is captured here so all subsequent stages
8245/// see the same monotonic instant.
8246fn prepare_hybrid_query<'a>(
8247    context: &str,
8248    namespace: Option<&'a str>,
8249    as_agent: Option<&str>,
8250    include_archived: bool,
8251    source_uri_prefix: Option<&str>,
8252) -> HybridPrep<'a> {
8253    let now = Utc::now().to_rfc3339();
8254    let fts_query = sanitize_fts_query(context, true);
8255    let prefixes = compute_visibility_prefixes(as_agent);
8256    let (fts_hierarchy_in, hierarchy_active) = hierarchy_in_clause(namespace);
8257    let fts_hierarchy_fragment = fts_hierarchy_in.unwrap_or_default();
8258    let sem_hierarchy_fragment = if hierarchy_active {
8259        if let Some(ns) = namespace {
8260            let ancestors = crate::models::namespace_ancestors(ns);
8261            let quoted: Vec<String> = ancestors
8262                .iter()
8263                .map(|a| format!("'{}'", a.replace('\'', "''")))
8264                .collect();
8265            format!("AND memories.namespace IN ({})", quoted.join(","))
8266        } else {
8267            String::new()
8268        }
8269    } else {
8270        String::new()
8271    };
8272    let effective_namespace = if hierarchy_active { None } else { namespace };
8273    let fts_archived_fragment = archived_source_clause(include_archived, "m");
8274    let sem_archived_fragment = archived_source_clause(include_archived, "memories");
8275    let source_uri_like_param: Option<String> = match source_uri_prefix {
8276        Some(prefix) if !prefix.is_empty() => Some(format!("{}%", escape_like_pattern(prefix))),
8277        _ => None,
8278    };
8279    let fts_source_uri_fragment = if source_uri_like_param.is_some() {
8280        "AND m.source_uri LIKE ?12 ESCAPE '\\'"
8281    } else {
8282        ""
8283    };
8284    let sem_source_uri_fragment = if source_uri_like_param.is_some() {
8285        "AND memories.source_uri LIKE ?10 ESCAPE '\\'"
8286    } else {
8287        ""
8288    };
8289    HybridPrep {
8290        fts_query,
8291        now,
8292        prefixes,
8293        fts_hierarchy_fragment,
8294        sem_hierarchy_fragment,
8295        effective_namespace,
8296        hierarchy_active,
8297        fts_archived_fragment,
8298        sem_archived_fragment,
8299        fts_source_uri_fragment,
8300        sem_source_uri_fragment,
8301        source_uri_like_param,
8302    }
8303}
8304
8305/// #871 stage 2 — FTS5 keyword phase. Builds + executes the FTS SQL
8306/// with the per-row `fts_score` projection, returns the raw
8307/// `(Memory, fts_score, embedding_bytes)` tuples for the fusion
8308/// stage. The embedding bytes are pulled inline from the same
8309/// SELECT (Cluster-F PERF-2) so the fusion stage can compute cosine
8310/// without an N+1 round-trip.
8311fn fts_keyword_phase(
8312    conn: &Connection,
8313    prep: &HybridPrep<'_>,
8314    tags_filter: Option<&str>,
8315    since: Option<&str>,
8316    until: Option<&str>,
8317    limit: usize,
8318) -> Result<Vec<(Memory, f64, Option<Vec<u8>>)>> {
8319    let fts_limit = (limit * 3).max(30);
8320    let fts_sql = format!(
8321        "SELECT m.id, m.tier, m.namespace, m.title, m.content, m.tags, m.priority,
8322                m.confidence, m.source, m.access_count, m.created_at, m.updated_at,
8323                m.last_accessed_at, m.expires_at, m.metadata, m.reflection_depth,
8324                m.memory_kind, m.entity_id, m.persona_version,
8325                m.citations, m.source_uri, m.source_span,
8326                m.confidence_source, m.confidence_signals, m.confidence_decayed_at, m.embedding,
8327                (fts.rank * -1) + (m.priority * 0.5) + (MIN(m.access_count, 50) * 0.1)
8328                + (m.confidence * 2.0)
8329                + (CASE m.tier WHEN 'long' THEN 3.0 WHEN 'mid' THEN 1.0 ELSE 0.0 END)
8330                + (1.0 / (1.0 + (julianday('now') - julianday(m.updated_at)) * 0.1))
8331                AS fts_score
8332         FROM memories_fts fts
8333         JOIN memories m ON m.rowid = fts.rowid
8334         WHERE memories_fts MATCH ?1
8335           AND (?2 IS NULL OR m.namespace = ?2)
8336           {fts_hierarchy_fragment}
8337           AND (m.expires_at IS NULL OR m.expires_at > ?3)
8338           AND (?4 IS NULL OR EXISTS (SELECT 1 FROM json_each(m.tags) WHERE json_each.value = ?4))
8339           AND (?5 IS NULL OR m.created_at >= ?5)
8340           AND (?6 IS NULL OR m.created_at <= ?6)
8341           {fts_archived_fragment}
8342           {fts_source_uri_fragment}
8343           {vis}
8344         ORDER BY fts_score DESC
8345         LIMIT ?7",
8346        fts_hierarchy_fragment = prep.fts_hierarchy_fragment,
8347        fts_archived_fragment = prep.fts_archived_fragment,
8348        fts_source_uri_fragment = prep.fts_source_uri_fragment,
8349        vis = visibility_clause(8, "m"),
8350    );
8351    // #1579 B6 — recall’s FTS branch is the hottest read statement;
8352    // prepare_cached amortises re-parsing across recalls (shape cardinality
8353    // is small: the optional fragments expand to a handful of variants).
8354    let mut fts_stmt = conn.prepare_cached(&fts_sql)?;
8355    let fts_row_handler =
8356        |row: &rusqlite::Row<'_>| -> rusqlite::Result<(Memory, f64, Option<Vec<u8>>)> {
8357            let mem = row_to_memory(row)?;
8358            let fts_score: f64 = row.get("fts_score")?;
8359            // Index 25 = `m.embedding` (the SELECT list above places it
8360            // after `confidence_decayed_at`). Pull as `Option<Vec<u8>>`
8361            // so legacy rows without embeddings surface as `None`.
8362            let embedding_bytes: Option<Vec<u8>> = row.get(25)?;
8363            Ok((mem, fts_score, embedding_bytes))
8364        };
8365    let (vis_p, vis_t, vis_u, vis_o) = prep.prefixes.clone();
8366    let rows: Vec<(Memory, f64, Option<Vec<u8>>)> =
8367        if let Some(ref uri_param) = prep.source_uri_like_param {
8368            fts_stmt
8369                .query_map(
8370                    params![
8371                        prep.fts_query,
8372                        prep.effective_namespace,
8373                        prep.now,
8374                        tags_filter,
8375                        since,
8376                        until,
8377                        fts_limit,
8378                        vis_p,
8379                        vis_t,
8380                        vis_u,
8381                        vis_o,
8382                        uri_param,
8383                    ],
8384                    fts_row_handler,
8385                )?
8386                .collect::<rusqlite::Result<Vec<_>>>()?
8387        } else {
8388            fts_stmt
8389                .query_map(
8390                    params![
8391                        prep.fts_query,
8392                        prep.effective_namespace,
8393                        prep.now,
8394                        tags_filter,
8395                        since,
8396                        until,
8397                        fts_limit,
8398                        vis_p,
8399                        vis_t,
8400                        vis_u,
8401                        vis_o,
8402                    ],
8403                    fts_row_handler,
8404                )?
8405                .collect::<rusqlite::Result<Vec<_>>>()?
8406        };
8407    Ok(rows)
8408}
8409
8410/// #871 stage 3 — semantic phase. Two paths share the same `scored`
8411/// HashMap mutation contract:
8412///
8413///   - HNSW path (when a `vector_index` is supplied): runs an ANN
8414///     search bounded at `5×limit`, gates each hit at `cosine > 0.2`,
8415///     and re-applies the FTS WHERE-clause filters in Rust because
8416///     the HNSW index returns raw vector neighbours (no SQL
8417///     visibility / archived-source / source-URI-prefix filter has
8418///     run).
8419///   - Linear-scan fallback (HNSW absent): runs the semantic SQL,
8420///     decodes embedding BLOBs, applies the same `cosine > 0.2`
8421///     gate, and inserts surviving rows into `scored`.
8422///
8423/// Returns the running `hnsw_candidates_count` for telemetry. Rows
8424/// already present in `scored` (i.e. FTS-side hits) are skipped so
8425/// the FTS embedding-based cosine wins (consistent with the
8426/// pre-refactor behaviour).
8427#[allow(clippy::too_many_arguments)]
8428fn semantic_phase(
8429    conn: &Connection,
8430    prep: &HybridPrep<'_>,
8431    query_embedding: &[f32],
8432    vector_index: Option<&crate::hnsw::VectorIndex>,
8433    // FX-4 / PERF-2 (2026-05-26) — when supplied, the HNSW search
8434    // has already been executed OUTSIDE the DB lock by the caller
8435    // (HTTP recall handler) and the hits are passed in here. The
8436    // function uses these directly instead of re-running
8437    // `idx.search()`, which keeps the CPU-bound ANN walk off the
8438    // DB-mutex hold window so concurrent recalls do not serialise
8439    // behind one another. When both `vector_index` and
8440    // `precomputed_hnsw_hits` are supplied, the precomputed slice
8441    // wins — callers that already paid the search cost outside the
8442    // lock must not pay it again inside. Existing callers (MCP /
8443    // CLI / SAL) pass `None` and keep the legacy single-call
8444    // behaviour where `semantic_phase` runs the search itself.
8445    precomputed_hnsw_hits: Option<&[crate::hnsw::VectorHit]>,
8446    namespace: Option<&str>,
8447    tags_filter: Option<&str>,
8448    since: Option<&str>,
8449    until: Option<&str>,
8450    limit: usize,
8451    include_archived: bool,
8452    source_uri_prefix: Option<&str>,
8453    scored: &mut HashMap<String, (Memory, f64, f64)>,
8454    // v0.7.0 H7 — bumped once per stored embedding whose dimensionality
8455    // disagrees with `query_embedding` (embedder-model switch). Accumulated
8456    // across the whole recall and surfaced via telemetry + an aggregated warn.
8457    dim_mismatch_count: &mut usize,
8458) -> Result<usize> {
8459    let mut hnsw_candidates_count: usize = 0;
8460    let now = prep.now.as_str();
8461    // FX-4 / PERF-2 — when `precomputed_hnsw_hits` is supplied OR a
8462    // `vector_index` is supplied, run the HNSW-hit ingestion path.
8463    // The precomputed path skips the `idx.search()` call (already
8464    // paid outside the lock); the legacy path runs the search
8465    // inline.
8466    if precomputed_hnsw_hits.is_some() || vector_index.is_some() {
8467        let owned_hits;
8468        let hits: &[crate::hnsw::VectorHit] = if let Some(pre) = precomputed_hnsw_hits {
8469            pre
8470        } else {
8471            let ann_limit = (limit * 5).max(50);
8472            owned_hits = vector_index
8473                .expect("vector_index set in legacy branch")
8474                .search(query_embedding, ann_limit);
8475            owned_hits.as_slice()
8476        };
8477        // v0.7.0 #981 — pre-#981 this branch called `get(conn, &hit.id)`
8478        // per hit, producing 50-250 round-trips per recall on a warm
8479        // index. The fix collects the ids that pass the
8480        // `cosine > 0.2` + not-yet-scored cosine gate, batches the
8481        // SELECT via `get_many`, and re-applies the row-side filter
8482        // ladder against the fetched map. Net effect: one SELECT
8483        // instead of N, no behavioural drift on the per-row filters
8484        // because they're applied identically against `&mem`. See
8485        // `tests/recall_semantic_batch_fetch_981.rs` for the pin.
8486        let mut needed_ids: Vec<String> = Vec::with_capacity(hits.len());
8487        let mut hit_meta: Vec<(String, f64)> = Vec::with_capacity(hits.len());
8488        for hit in hits {
8489            if scored.contains_key(&hit.id) {
8490                continue;
8491            }
8492            let cosine = f64::from(1.0 - hit.distance);
8493            // v0.6.2 (S18 iteration): cosine gate relaxed 0.3 → 0.2 —
8494            // see the matching comment in the linear-scan branch below.
8495            if cosine > crate::RECALL_COSINE_GATE {
8496                needed_ids.push(hit.id.clone());
8497                hit_meta.push((hit.id.clone(), cosine));
8498            }
8499        }
8500        let fetched = get_many(conn, &needed_ids)?;
8501        for (id, cosine) in hit_meta {
8502            let Some(mem) = fetched.get(&id) else {
8503                continue;
8504            };
8505            // #1692 — the HNSW distance was computed by `cosine_distance` over
8506            // the stored vector, which silently TRUNCATES when the query/stored
8507            // dims differ (after an embedder swap): a wrong-but-finite cosine
8508            // that the >0.2 gate lets through, ranking garbage while
8509            // `dim_mismatch_count` stays 0. Recompute against the stored
8510            // embedding with the checked comparator (the same #1584-guarded
8511            // path the FTS + linear-scan branches use) and count + skip a
8512            // dimension mismatch instead of trusting `hit.distance`. For a
8513            // same-dim hit the checked cosine equals `1.0 - hit.distance`
8514            // (both reduce to the dot product), so valid hits are unchanged.
8515            let cosine = match get_embedding(conn, &id) {
8516                Ok(Some(stored)) => match crate::embeddings::Embedder::cosine_similarity_checked(
8517                    query_embedding,
8518                    &stored,
8519                ) {
8520                    crate::embeddings::CosineComparison::Comparable(c) => f64::from(c),
8521                    crate::embeddings::CosineComparison::DimensionMismatch { .. } => {
8522                        *dim_mismatch_count += 1;
8523                        continue;
8524                    }
8525                },
8526                // Legacy row with no stored embedding (or a fetch error): fall
8527                // back to the HNSW distance-derived cosine rather than dropping.
8528                _ => cosine,
8529            };
8530            if let Some(ns) = namespace {
8531                if prep.hierarchy_active {
8532                    let ancestors = crate::models::namespace_ancestors(ns);
8533                    if !ancestors.iter().any(|a| a == &mem.namespace) {
8534                        continue;
8535                    }
8536                } else if mem.namespace != ns {
8537                    continue;
8538                }
8539            }
8540            if let Some(exp) = &mem.expires_at
8541                && exp.as_str() <= now
8542            {
8543                continue;
8544            }
8545            if let Some(tf) = tags_filter
8546                && !mem.tags.iter().any(|t| t == tf)
8547            {
8548                continue;
8549            }
8550            if let Some(s) = since
8551                && mem.created_at.as_str() < s
8552            {
8553                continue;
8554            }
8555            if let Some(u) = until
8556                && mem.created_at.as_str() > u
8557            {
8558                continue;
8559            }
8560            if !is_visible(mem, &prep.prefixes) {
8561                continue;
8562            }
8563            if !include_archived && is_archived_source(mem) {
8564                continue;
8565            }
8566            if let Some(prefix) = source_uri_prefix
8567                && !prefix.is_empty()
8568                && !mem
8569                    .source_uri
8570                    .as_deref()
8571                    .is_some_and(|u| u.starts_with(prefix))
8572            {
8573                continue;
8574            }
8575            // Clone is unavoidable here — `scored` owns the Memory
8576            // for the final cross-phase merge, and `fetched` may be
8577            // re-read for downstream phases.
8578            scored.insert(mem.id.clone(), (mem.clone(), 0.0, cosine));
8579            hnsw_candidates_count += 1;
8580        }
8581        return Ok(hnsw_candidates_count);
8582    }
8583
8584    // Fallback: linear scan over all embeddings.
8585    let sem_sql = format!(
8586        "SELECT id, tier, namespace, title, content, tags, priority,
8587                confidence, source, access_count, created_at, updated_at,
8588                last_accessed_at, expires_at, metadata, reflection_depth, memory_kind, embedding
8589         FROM memories
8590         WHERE embedding IS NOT NULL
8591           AND (?1 IS NULL OR namespace = ?1)
8592           {sem_hierarchy_fragment}
8593           AND (expires_at IS NULL OR expires_at > ?2)
8594           AND (?3 IS NULL OR EXISTS (SELECT 1 FROM json_each(memories.tags) WHERE json_each.value = ?3))
8595           AND (?4 IS NULL OR created_at >= ?4)
8596           AND (?5 IS NULL OR created_at <= ?5)
8597           {sem_archived_fragment}
8598           {sem_source_uri_fragment}
8599           {vis}",
8600        sem_hierarchy_fragment = prep.sem_hierarchy_fragment,
8601        sem_archived_fragment = prep.sem_archived_fragment,
8602        sem_source_uri_fragment = prep.sem_source_uri_fragment,
8603        vis = visibility_clause(6, "memories"),
8604    );
8605    // #1579 B6 — same prepare_cached treatment as the FTS branch above.
8606    let mut sem_stmt = conn.prepare_cached(&sem_sql)?;
8607    let sem_row_handler = |row: &rusqlite::Row<'_>| -> rusqlite::Result<(Memory, Option<Vec<u8>>)> {
8608        let mem = row_to_memory(row)?;
8609        // v0.7.x Form 6 — `memory_kind` was inserted between
8610        // `reflection_depth` and `embedding` in the SELECT list
8611        // above; `embedding` sits at zero-based index 17.
8612        let emb_bytes: Option<Vec<u8>> = row.get(17)?;
8613        Ok((mem, emb_bytes))
8614    };
8615    let (vis_p, vis_t, vis_u, vis_o) = prep.prefixes.clone();
8616    let sem_results: Vec<(Memory, Option<Vec<u8>>)> =
8617        if let Some(ref uri_param) = prep.source_uri_like_param {
8618            sem_stmt
8619                .query_map(
8620                    params![
8621                        prep.effective_namespace,
8622                        prep.now,
8623                        tags_filter,
8624                        since,
8625                        until,
8626                        vis_p,
8627                        vis_t,
8628                        vis_u,
8629                        vis_o,
8630                        uri_param,
8631                    ],
8632                    sem_row_handler,
8633                )?
8634                .collect::<rusqlite::Result<Vec<_>>>()?
8635        } else {
8636            sem_stmt
8637                .query_map(
8638                    params![
8639                        prep.effective_namespace,
8640                        prep.now,
8641                        tags_filter,
8642                        since,
8643                        until,
8644                        vis_p,
8645                        vis_t,
8646                        vis_u,
8647                        vis_o,
8648                    ],
8649                    sem_row_handler,
8650                )?
8651                .collect::<rusqlite::Result<Vec<_>>>()?
8652        };
8653    for (mem, emb_bytes) in sem_results {
8654        if scored.contains_key(&mem.id) {
8655            continue;
8656        }
8657        if let Some(bytes) = emb_bytes
8658            && !bytes.is_empty()
8659        {
8660            // v0.6.3.1 P2 — tolerate legacy + headed payloads; skip
8661            // (with telemetry) on malformed BLOBs so a single corrupt
8662            // row can't poison the whole semantic stage.
8663            let Ok(emb) = crate::embeddings::decode_embedding_blob(&bytes) else {
8664                tracing::warn!(
8665                    memory_id = %mem.id,
8666                    "skipping malformed embedding BLOB during semantic recall"
8667                );
8668                continue;
8669            };
8670            let cosine =
8671                match crate::embeddings::Embedder::cosine_similarity_checked(query_embedding, &emb)
8672                {
8673                    crate::embeddings::CosineComparison::Comparable(c) => f64::from(c),
8674                    crate::embeddings::CosineComparison::DimensionMismatch { .. } => {
8675                        // v0.7.0 H7 — stored embedding came from a different
8676                        // embedder model; counted (not silently dropped) so the
8677                        // aggregated warn + telemetry can flag the model switch.
8678                        *dim_mismatch_count += 1;
8679                        continue;
8680                    }
8681                };
8682            if cosine > crate::RECALL_COSINE_GATE {
8683                scored.insert(mem.id.clone(), (mem, 0.0, cosine));
8684                hnsw_candidates_count += 1;
8685            }
8686        }
8687    }
8688    Ok(hnsw_candidates_count)
8689}
8690
8691/// #871 stage 4 — adaptive blend + decay.
8692///
8693/// Per-row: normalises `fts_score` by `max_fts_score`, lerp-derives
8694/// `semantic_weight` from content length (0.50 ≤500 chars → 0.15
8695/// ≥5000 chars; embeddings lose information on long text, FTS stays
8696/// precise), and multiplies by the per-tier exponential decay from
8697/// `scoring`. Returns the ranked (sort by blended score, truncated
8698/// to `limit`) result list AND the captured per-candidate
8699/// `semantic_weight` vector for telemetry.
8700fn blend_and_rank(
8701    scored: HashMap<String, (Memory, f64, f64)>,
8702    max_fts_score: f64,
8703    scoring: &crate::config::ResolvedScoring,
8704    limit: usize,
8705) -> (Vec<(Memory, f64)>, Vec<f64>) {
8706    let now_utc = Utc::now();
8707    let mut weights: Vec<f64> = Vec::new();
8708    let mut results: Vec<(Memory, f64)> = scored
8709        .into_values()
8710        .map(|(mem, fts_score, cosine)| {
8711            let norm_fts = if max_fts_score > 0.0 {
8712                fts_score / max_fts_score
8713            } else {
8714                0.0
8715            };
8716            // B4 (R2-LOW) — clamp to i32::MAX instead of panicking when
8717            // a memory's content is >2GB. The lerp below treats anything
8718            // ≥5000 chars as the long-tail bucket regardless, so the
8719            // clamp does not change scoring; it only closes a panic
8720            // window a hostile import could otherwise reach.
8721            let content_len = f64::from(i32::try_from(mem.content.len()).unwrap_or(i32::MAX));
8722            let semantic_weight = if content_len <= 500.0 {
8723                0.50
8724            } else if content_len >= 5000.0 {
8725                0.15
8726            } else {
8727                0.50 - 0.35 * ((content_len - 500.0) / 4500.0)
8728            };
8729            weights.push(semantic_weight);
8730            let blended = semantic_weight * cosine + (1.0 - semantic_weight) * norm_fts;
8731            let age_days = chrono::DateTime::parse_from_rfc3339(&mem.created_at)
8732                .ok()
8733                .map_or(0.0, |ts| {
8734                    let secs = (now_utc - ts.with_timezone(&Utc)).num_seconds();
8735                    #[allow(clippy::cast_precision_loss)]
8736                    {
8737                        secs as f64 / crate::SECS_PER_DAY as f64
8738                    }
8739                });
8740            let decay = scoring.decay_multiplier(&mem.tier, age_days);
8741            (mem, blended * decay)
8742        })
8743        .collect();
8744    results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
8745    results.truncate(limit);
8746    (results, weights)
8747}
8748
8749/// #871 stage 5 — post-fusion ops: proximity boost (when hierarchy
8750/// expansion is active), token-budget application, and the batched
8751/// `touch_many` write that bumps `access_count` + slides the per-tier
8752/// expiry on every memory in the surviving set.
8753fn apply_recall_post_ops(
8754    conn: &Connection,
8755    results: Vec<(Memory, f64)>,
8756    hierarchy_active: bool,
8757    namespace: Option<&str>,
8758    budget_tokens: Option<usize>,
8759    short_extend: i64,
8760    mid_extend: i64,
8761) -> (Vec<(Memory, f64)>, BudgetOutcome) {
8762    let boosted = if let (true, Some(anchor)) = (hierarchy_active, namespace) {
8763        apply_proximity_boost(results, anchor)
8764    } else {
8765        results
8766    };
8767    let (budgeted, outcome) = apply_token_budget(boosted, budget_tokens);
8768    let touch_ids: Vec<&str> = budgeted.iter().map(|(mem, _)| mem.id.as_str()).collect();
8769    if let Err(e) = touch_many(conn, &touch_ids, short_extend, mid_extend) {
8770        tracing::warn!("touch_many failed for hybrid recall set: {}", e);
8771    }
8772    (budgeted, outcome)
8773}
8774
8775/// #871 stage 6 — telemetry assembly. Aggregates the per-stage
8776/// candidate counters and the mean `semantic_weight` across the
8777/// returned set (NOT the full candidate pool — operators care about
8778/// what made it out).
8779fn assemble_recall_telemetry(
8780    fts_candidates: usize,
8781    hnsw_candidates: usize,
8782    blend_weights: &[f64],
8783    embedding_dim_mismatch: usize,
8784) -> crate::models::RecallTelemetry {
8785    let blend_weight_avg = if blend_weights.is_empty() {
8786        0.0
8787    } else {
8788        #[allow(clippy::cast_precision_loss)]
8789        let n = blend_weights.len() as f64;
8790        blend_weights.iter().sum::<f64>() / n
8791    };
8792    crate::models::RecallTelemetry {
8793        fts_candidates,
8794        hnsw_candidates,
8795        blend_weight_avg,
8796        embedding_dim_mismatch,
8797    }
8798}
8799
8800#[allow(clippy::too_many_arguments)]
8801pub fn recall_hybrid_with_telemetry(
8802    conn: &Connection,
8803    context: &str,
8804    query_embedding: &[f32],
8805    namespace: Option<&str>,
8806    limit: usize,
8807    tags_filter: Option<&str>,
8808    since: Option<&str>,
8809    until: Option<&str>,
8810    vector_index: Option<&crate::hnsw::VectorIndex>,
8811    short_extend: i64,
8812    mid_extend: i64,
8813    as_agent: Option<&str>,
8814    budget_tokens: Option<usize>,
8815    scoring: &crate::config::ResolvedScoring,
8816    // v0.7.0 WT-1-E — see [`recall_with_telemetry`] for the
8817    // archived-source exclusion contract.
8818    include_archived: bool,
8819    // v0.7.0 Form 4 / Cluster-A PERF-3 — see [`recall`] for the
8820    // contract. Pushed into both the FTS and semantic branch SQL so
8821    // both pools are constrained by the partial
8822    // `idx_memories_source_uri` index, not the post-fetch Rust filter.
8823    source_uri_prefix: Option<&str>,
8824) -> Result<(
8825    Vec<(Memory, f64)>,
8826    BudgetOutcome,
8827    crate::models::RecallTelemetry,
8828)> {
8829    recall_hybrid_with_telemetry_inner(
8830        conn,
8831        context,
8832        query_embedding,
8833        namespace,
8834        limit,
8835        tags_filter,
8836        since,
8837        until,
8838        vector_index,
8839        None,
8840        short_extend,
8841        mid_extend,
8842        as_agent,
8843        budget_tokens,
8844        scoring,
8845        include_archived,
8846        source_uri_prefix,
8847    )
8848}
8849
8850/// FX-4 / PERF-2 (2026-05-26) — variant of
8851/// [`recall_hybrid_with_telemetry`] that accepts a pre-computed slice
8852/// of HNSW hits in place of the in-pipeline `idx.search()` call. The
8853/// HTTP recall handler runs the ANN walk OUTSIDE the DB mutex (the
8854/// HNSW index lives behind its own `vector_index` mutex; the DB lock
8855/// is not required for the search) and passes the result here so the
8856/// DB-mutex hold window covers only the FTS5 query + the batched
8857/// `get_many` fetch + the touch ops. Concurrent recalls overlap
8858/// their CPU-bound ANN walks instead of serialising behind the
8859/// single shared connection.
8860///
8861/// Semantics-preserving by construction: the precomputed hits feed
8862/// the same per-hit `cosine > 0.2` gate + `get_many` round-trip
8863/// inside [`semantic_phase`] that the legacy single-call path uses.
8864/// Existing callers (MCP / CLI / SAL) continue to call
8865/// [`recall_hybrid_with_telemetry`] and pay the search cost inside
8866/// the lock; only the HTTP handler swaps in the new path.
8867#[allow(clippy::too_many_arguments)]
8868pub fn recall_hybrid_with_telemetry_precomputed_hnsw(
8869    conn: &Connection,
8870    context: &str,
8871    query_embedding: &[f32],
8872    namespace: Option<&str>,
8873    limit: usize,
8874    tags_filter: Option<&str>,
8875    since: Option<&str>,
8876    until: Option<&str>,
8877    precomputed_hnsw_hits: &[crate::hnsw::VectorHit],
8878    short_extend: i64,
8879    mid_extend: i64,
8880    as_agent: Option<&str>,
8881    budget_tokens: Option<usize>,
8882    scoring: &crate::config::ResolvedScoring,
8883    include_archived: bool,
8884    source_uri_prefix: Option<&str>,
8885) -> Result<(
8886    Vec<(Memory, f64)>,
8887    BudgetOutcome,
8888    crate::models::RecallTelemetry,
8889)> {
8890    recall_hybrid_with_telemetry_inner(
8891        conn,
8892        context,
8893        query_embedding,
8894        namespace,
8895        limit,
8896        tags_filter,
8897        since,
8898        until,
8899        None,
8900        Some(precomputed_hnsw_hits),
8901        short_extend,
8902        mid_extend,
8903        as_agent,
8904        budget_tokens,
8905        scoring,
8906        include_archived,
8907        source_uri_prefix,
8908    )
8909}
8910
8911/// Inner dispatch shared by [`recall_hybrid_with_telemetry`] (legacy,
8912/// runs `idx.search()` inside the DB-lock window) and
8913/// [`recall_hybrid_with_telemetry_precomputed_hnsw`] (FX-4 / PERF-2,
8914/// caller pre-ran the ANN walk outside the DB lock). Exactly one of
8915/// `vector_index` / `precomputed_hnsw_hits` is `Some` on any given
8916/// call; the inner is private so the variant choice cannot drift.
8917#[allow(clippy::too_many_arguments)]
8918fn recall_hybrid_with_telemetry_inner(
8919    conn: &Connection,
8920    context: &str,
8921    query_embedding: &[f32],
8922    namespace: Option<&str>,
8923    limit: usize,
8924    tags_filter: Option<&str>,
8925    since: Option<&str>,
8926    until: Option<&str>,
8927    vector_index: Option<&crate::hnsw::VectorIndex>,
8928    precomputed_hnsw_hits: Option<&[crate::hnsw::VectorHit]>,
8929    short_extend: i64,
8930    mid_extend: i64,
8931    as_agent: Option<&str>,
8932    budget_tokens: Option<usize>,
8933    scoring: &crate::config::ResolvedScoring,
8934    include_archived: bool,
8935    source_uri_prefix: Option<&str>,
8936) -> Result<(
8937    Vec<(Memory, f64)>,
8938    BudgetOutcome,
8939    crate::models::RecallTelemetry,
8940)> {
8941    // Stage 1 — query preparation (FTS sanitisation, namespace
8942    // hierarchy expansion, visibility prefixes, SQL fragments).
8943    let prep = prepare_hybrid_query(
8944        context,
8945        namespace,
8946        as_agent,
8947        include_archived,
8948        source_uri_prefix,
8949    );
8950
8951    // Stage 2 — FTS5 keyword phase.
8952    let fts_results = fts_keyword_phase(conn, &prep, tags_filter, since, until, limit)?;
8953
8954    // Fusion pool (id → (memory, fts_score, cosine_score)). FTS rows
8955    // land first so their inline-fetched embedding-cosine wins; the
8956    // semantic phase only inserts ids it hasn't seen.
8957    //
8958    // PERF-6 (med/low review batch) — pre-size the map so we avoid the
8959    // 4-realloc growth path (4 → 8 → 16 → 32) on every recall. Upper
8960    // bound is fts_results.len() (already in scope) + the upcoming
8961    // semantic phase's `ann_limit = max(limit*5, 50)`; the slight
8962    // over-allocation is dwarfed by the saved zeroing + rehashing cost
8963    // at default `limit=10` where the natural growth path would have
8964    // run through ~3 reallocations.
8965    let scored_cap = fts_results
8966        .len()
8967        .saturating_add(limit.saturating_mul(5).max(50));
8968    let mut scored: HashMap<String, (Memory, f64, f64)> = HashMap::with_capacity(scored_cap);
8969    let mut max_fts_score: f64 = 1.0;
8970    let mut fts_candidates_count: usize = 0;
8971    // v0.7.0 H7 — accumulates stored embeddings whose dimensionality
8972    // disagrees with the active model's `query_embedding` across BOTH the
8973    // FTS branch (here) and the semantic linear-scan branch (below).
8974    let mut dim_mismatch_count: usize = 0;
8975    for (mem, fts_score, embedding_bytes) in fts_results {
8976        if fts_score > max_fts_score {
8977            max_fts_score = fts_score;
8978        }
8979        // Cluster-F PERF-2 — cosine from the inline-fetched embedding
8980        // bytes. Malformed BLOBs degrade to cosine=0 + warn-log so a
8981        // single corrupt row does not poison the whole recall.
8982        let cosine = match embedding_bytes {
8983            Some(bytes) if !bytes.is_empty() => {
8984                match crate::embeddings::decode_embedding_blob(&bytes) {
8985                    Ok(emb) => match crate::embeddings::Embedder::cosine_similarity_checked(
8986                        query_embedding,
8987                        &emb,
8988                    ) {
8989                        crate::embeddings::CosineComparison::Comparable(c) => f64::from(c),
8990                        crate::embeddings::CosineComparison::DimensionMismatch { .. } => {
8991                            // v0.7.0 H7 — embedder-model switch: count the
8992                            // stale-dimension row instead of letting it score a
8993                            // silent 0.0 cosine. FTS keyword score still applies.
8994                            dim_mismatch_count += 1;
8995                            0.0
8996                        }
8997                    },
8998                    Err(_) => {
8999                        tracing::warn!(
9000                            memory_id = %mem.id,
9001                            "skipping malformed embedding BLOB during hybrid recall (FTS branch)"
9002                        );
9003                        0.0
9004                    }
9005                }
9006            }
9007            _ => 0.0,
9008        };
9009        scored.insert(mem.id.clone(), (mem, fts_score, cosine));
9010        fts_candidates_count += 1;
9011    }
9012
9013    // Stage 3 — semantic phase (HNSW when available, linear-scan
9014    // fallback). When `precomputed_hnsw_hits` is supplied the search
9015    // step is skipped (already paid outside the DB lock); otherwise
9016    // the in-pipeline `idx.search()` runs as before.
9017    let hnsw_candidates_count = semantic_phase(
9018        conn,
9019        &prep,
9020        query_embedding,
9021        vector_index,
9022        precomputed_hnsw_hits,
9023        namespace,
9024        tags_filter,
9025        since,
9026        until,
9027        limit,
9028        include_archived,
9029        source_uri_prefix,
9030        &mut scored,
9031        &mut dim_mismatch_count,
9032    )?;
9033
9034    // v0.7.0 H7 — de-silence embedder-model switches. A non-zero count means
9035    // stored embeddings were produced by a different model (different
9036    // dimensionality) than the active embedder, so their semantic signal was
9037    // forced to 0.0 for this query. One aggregated warn per recall (not per
9038    // row) tells the operator the affected rows need re-embedding.
9039    if dim_mismatch_count > 0 {
9040        tracing::warn!(
9041            dim_mismatch_count,
9042            active_query_dim = query_embedding.len(),
9043            "recall skipped {dim_mismatch_count} stored embedding(s) with mismatched \
9044             dimensionality — the embedder model appears to have changed; re-embed the \
9045             affected memories to restore their semantic recall signal"
9046        );
9047    }
9048
9049    // Stage 4 — adaptive blend + per-tier decay.
9050    let (results, blend_weights) = blend_and_rank(scored, max_fts_score, scoring, limit);
9051
9052    // Stage 5 — proximity boost + token budget + batched touch.
9053    let (budgeted, outcome) = apply_recall_post_ops(
9054        conn,
9055        results,
9056        prep.hierarchy_active,
9057        namespace,
9058        budget_tokens,
9059        short_extend,
9060        mid_extend,
9061    );
9062
9063    // Stage 6 — telemetry assembly.
9064    let telemetry = assemble_recall_telemetry(
9065        fts_candidates_count,
9066        hnsw_candidates_count,
9067        &blend_weights,
9068        dim_mismatch_count,
9069    );
9070
9071    Ok((budgeted, outcome, telemetry))
9072}
9073
9074/// Checkpoint WAL for clean shutdown.
9075pub fn checkpoint(conn: &Connection) -> Result<()> {
9076    conn.pragma_update(None, "wal_checkpoint", "TRUNCATE")?;
9077    Ok(())
9078}
9079
9080// ---------------------------------------------------------------------------
9081// Phase 3 foundation (issue #224) — sync_state helpers.
9082//
9083// These are additive: they do not change how the existing `ai-memory sync`
9084// command behaves in v0.6.0 GA. They exist so HTTP sync endpoints and the
9085// CRDT-lite merge follow-up can durably track "last updated_at seen from
9086// peer X" per local agent.
9087// ---------------------------------------------------------------------------
9088
9089/// Record the latest `updated_at` this local agent has observed from `peer_id`.
9090/// Monotonic by timestamp — older writes do not overwrite newer ones.
9091/// Lazily creates the row on first observation.
9092pub fn sync_state_observe(
9093    conn: &Connection,
9094    agent_id: &str,
9095    peer_id: &str,
9096    seen_at: &str,
9097) -> Result<()> {
9098    let now = Utc::now().to_rfc3339();
9099    conn.execute(
9100        "INSERT INTO sync_state (agent_id, peer_id, last_seen_at, last_pulled_at) \
9101         VALUES (?1, ?2, ?3, ?4) \
9102         ON CONFLICT(agent_id, peer_id) DO UPDATE SET \
9103            last_seen_at = CASE WHEN excluded.last_seen_at > last_seen_at \
9104                                THEN excluded.last_seen_at \
9105                                ELSE last_seen_at END, \
9106            last_pulled_at = excluded.last_pulled_at",
9107        params![agent_id, peer_id, seen_at, now],
9108    )?;
9109    Ok(())
9110}
9111
9112/// Load the full vector clock for `agent_id` — the set of
9113/// (`peer_id` -> `last_seen_at`) this local agent tracks.
9114pub fn sync_state_load(conn: &Connection, agent_id: &str) -> Result<crate::models::VectorClock> {
9115    let mut stmt =
9116        conn.prepare("SELECT peer_id, last_seen_at FROM sync_state WHERE agent_id = ?1")?;
9117    let rows = stmt.query_map(params![agent_id], |row| {
9118        Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
9119    })?;
9120    let mut clock = crate::models::VectorClock::default();
9121    for row in rows {
9122        let (peer, at) = row?;
9123        clock.entries.insert(peer, at);
9124    }
9125    Ok(clock)
9126}
9127
9128/// Look up this peer's last-push watermark for `peer_id`. Returns `None`
9129/// if we've never successfully pushed to them (foundation-era rows also
9130/// return `None` because the column was added in schema v12).
9131#[must_use]
9132#[allow(dead_code)] // called via lib crate (daemon_runtime); bin sees it as unused
9133pub fn sync_state_last_pushed(conn: &Connection, agent_id: &str, peer_id: &str) -> Option<String> {
9134    conn.query_row(
9135        "SELECT last_pushed_at FROM sync_state WHERE agent_id = ?1 AND peer_id = ?2",
9136        params![agent_id, peer_id],
9137        |r| r.get::<_, Option<String>>(0),
9138    )
9139    .ok()
9140    .flatten()
9141}
9142
9143/// Record that local memories up to `updated_at = pushed_at` have been
9144/// accepted by `peer_id`. Creates the row if it doesn't exist; monotonic.
9145#[allow(dead_code)] // called via lib crate (daemon_runtime); bin sees it as unused
9146pub fn sync_state_record_push(
9147    conn: &Connection,
9148    agent_id: &str,
9149    peer_id: &str,
9150    pushed_at: &str,
9151) -> Result<()> {
9152    let now = Utc::now().to_rfc3339();
9153    conn.execute(
9154        "INSERT INTO sync_state (agent_id, peer_id, last_seen_at, last_pulled_at, last_pushed_at) \
9155         VALUES (?1, ?2, ?3, ?3, ?4) \
9156         ON CONFLICT(agent_id, peer_id) DO UPDATE SET \
9157            last_pushed_at = CASE \
9158                WHEN excluded.last_pushed_at IS NULL THEN last_pushed_at \
9159                WHEN last_pushed_at IS NULL THEN excluded.last_pushed_at \
9160                WHEN excluded.last_pushed_at > last_pushed_at THEN excluded.last_pushed_at \
9161                ELSE last_pushed_at END",
9162        params![agent_id, peer_id, now, pushed_at],
9163    )?;
9164    Ok(())
9165}
9166
9167/// Return memories whose `updated_at > since`, ordered by `updated_at`
9168/// ascending. Used by `GET /api/v1/sync/since` to stream incremental
9169/// updates to a peer. Caps at `limit` rows (caller-chosen pagination).
9170pub fn memories_updated_since(
9171    conn: &Connection,
9172    since: Option<&str>,
9173    limit: usize,
9174) -> Result<Vec<Memory>> {
9175    // #1028 (HIGH, 2026-05-21) — REVERTED 2026-05-21 via QC pass-2.
9176    // The first-pass fix added a SAL-level
9177    // `COALESCE(scope, 'private') <> 'private'` filter here on the
9178    // grounds of "defense-in-depth". That was wrong: the federation
9179    // visibility gate (federation_legacy_row_visibility_978 + the
9180    // dispatch logic in src/federation/) is a RICHER contract than
9181    // pure scope=private — it handles owner-signed-private projection
9182    // back to the owner peer, inbox-target private projection, and
9183    // federation_share opt-in on legacy rows. The SAL-level filter
9184    // bypassed those branches and broke 5 federation tests. The
9185    // visibility gate runs DOWNSTREAM of this method and already
9186    // refuses to project rows that shouldn't federate. The proper
9187    // fix would belong in the federation handler (or the visibility
9188    // gate audit) — tracked under follow-up rather than at the SAL.
9189    // #1476 — sargable split, mirrors src/store/postgres.rs. The former
9190    // `(?1 IS NULL OR updated_at > ?1)` predicate is non-sargable: SQLite
9191    // cannot use `idx_memories_updated_at` to satisfy an OR-NULL branch,
9192    // so it falls back to a full table scan. Splitting on `since` lets
9193    // the None path read in index order (no predicate) and the Some path
9194    // use the index as a range bound (`updated_at > ?1`), each with
9195    // early-stop under the LIMIT.
9196    const COLS: &str = "SELECT id, tier, namespace, title, content, tags, priority, confidence, \
9197                source, access_count, created_at, updated_at, last_accessed_at, \
9198                expires_at, metadata \
9199         FROM memories ";
9200    let rows = match since {
9201        None => {
9202            let mut stmt = conn.prepare(&format!("{COLS} ORDER BY updated_at ASC LIMIT ?1"))?;
9203            stmt.query_map(params![limit], row_to_memory)?
9204                .collect::<rusqlite::Result<Vec<_>>>()
9205        }
9206        Some(s) => {
9207            let mut stmt = conn.prepare(&format!(
9208                "{COLS} WHERE updated_at > ?1 ORDER BY updated_at ASC LIMIT ?2"
9209            ))?;
9210            stmt.query_map(params![s, limit], row_to_memory)?
9211                .collect::<rusqlite::Result<Vec<_>>>()
9212        }
9213    };
9214    rows.map_err(Into::into)
9215}
9216
9217/// Deep health check — verifies DB is accessible and FTS is functional.
9218pub fn health_check(conn: &Connection) -> Result<bool> {
9219    let _: i64 = conn.query_row("SELECT COUNT(*) FROM memories", [], |r| r.get(0))?;
9220    conn.execute(
9221        "INSERT INTO memories_fts(memories_fts) VALUES('integrity-check')",
9222        [],
9223    )?;
9224    Ok(true)
9225}
9226
9227// ---------------------------------------------------------------------------
9228// Namespace standards
9229// ---------------------------------------------------------------------------
9230
9231/// Set the standard memory for a namespace, with optional parent for rule layering.
9232pub fn set_namespace_standard(
9233    conn: &Connection,
9234    namespace: &str,
9235    standard_id: &str,
9236    parent: Option<&str>,
9237) -> Result<()> {
9238    // Verify the memory exists (but allow cross-namespace — shared policy)
9239    let _mem = get(conn, standard_id)?.ok_or_else(|| {
9240        // #962 typed envelope — 404 NOT_FOUND.
9241        anyhow::Error::new(StorageError::MemoryNotFound {
9242            id: standard_id.to_string(),
9243            role: None,
9244        })
9245    })?;
9246    // Resolve parent: explicit > auto-detect by `-` prefix > none
9247    let resolved_parent = match parent {
9248        Some(p) => {
9249            if p == namespace {
9250                // #962 typed envelope.
9251                return Err(anyhow::Error::new(StorageError::InvalidArgument {
9252                    reason: "namespace cannot be its own parent".to_string(),
9253                }));
9254            }
9255            Some(p.to_string())
9256        }
9257        None => auto_detect_parent(conn, namespace),
9258    };
9259    let now = chrono::Utc::now().to_rfc3339();
9260    conn.execute(
9261        "INSERT INTO namespace_meta (namespace, standard_id, updated_at, parent_namespace)
9262         VALUES (?1, ?2, ?3, ?4)
9263         ON CONFLICT(namespace) DO UPDATE SET standard_id = ?2, updated_at = ?3, parent_namespace = ?4",
9264        params![namespace, standard_id, now, resolved_parent],
9265    )?;
9266    Ok(())
9267}
9268
9269/// Auto-detect parent namespace by `-` prefix.
9270/// "ai-memory-tests" → checks "ai-memory" → checks "ai" → first match wins.
9271fn auto_detect_parent(conn: &Connection, namespace: &str) -> Option<String> {
9272    let mut candidate = namespace.to_string();
9273    while let Some(pos) = candidate.rfind('-') {
9274        candidate.truncate(pos);
9275        if candidate.is_empty() {
9276            break;
9277        }
9278        // Check if this candidate has a standard set
9279        if get_namespace_standard(conn, &candidate)
9280            .ok()
9281            .flatten()
9282            .is_some()
9283        {
9284            return Some(candidate);
9285        }
9286    }
9287    None
9288}
9289
9290/// Get the standard memory ID for a namespace.
9291#[allow(clippy::unnecessary_wraps)]
9292pub fn get_namespace_standard(conn: &Connection, namespace: &str) -> Result<Option<String>> {
9293    let result = conn
9294        .query_row(
9295            "SELECT standard_id FROM namespace_meta WHERE namespace = ?1",
9296            params![namespace],
9297            |r| r.get(0),
9298        )
9299        .ok();
9300    Ok(result)
9301}
9302
9303/// Get the parent namespace for a given namespace.
9304pub fn get_namespace_parent(conn: &Connection, namespace: &str) -> Option<String> {
9305    conn.query_row(
9306        "SELECT parent_namespace FROM namespace_meta WHERE namespace = ?1 AND parent_namespace IS NOT NULL",
9307        params![namespace],
9308        |r| r.get(0),
9309    )
9310    .ok()
9311}
9312
9313/// v0.6.2 (S35): read the full `namespace_meta` row for a namespace so the
9314/// caller can fan it out to peers. Returns `None` when no standard is set.
9315/// Mirrors the (`namespace`, `standard_id`, `parent_namespace`, `updated_at`)
9316/// tuple used by `set_namespace_standard`.
9317#[allow(clippy::unnecessary_wraps)]
9318pub fn get_namespace_meta_entry(
9319    conn: &Connection,
9320    namespace: &str,
9321) -> Result<Option<crate::models::NamespaceMetaEntry>> {
9322    let row = conn
9323        .query_row(
9324            "SELECT namespace, standard_id, parent_namespace, updated_at
9325             FROM namespace_meta WHERE namespace = ?1",
9326            params![namespace],
9327            |r| {
9328                Ok(crate::models::NamespaceMetaEntry {
9329                    namespace: r.get(0)?,
9330                    standard_id: r.get(1)?,
9331                    parent_namespace: r.get(2)?,
9332                    updated_at: r.get::<_, Option<String>>(3)?.unwrap_or_default(),
9333                })
9334            },
9335        )
9336        .ok();
9337    Ok(row)
9338}
9339
9340/// Clear the standard for a namespace.
9341pub fn clear_namespace_standard(conn: &Connection, namespace: &str) -> Result<bool> {
9342    let changed = conn.execute(
9343        "DELETE FROM namespace_meta WHERE namespace = ?1",
9344        params![namespace],
9345    )?;
9346    Ok(changed > 0)
9347}
9348
9349// ---------------------------------------------------------------------------
9350// Task 1.9 — governance enforcement + pending_actions CRUD
9351// ---------------------------------------------------------------------------
9352
9353/// Build the namespace inheritance chain in **top-down** order
9354/// (`["*", root, ..., leaf]`). Mirrors and replaces the historical
9355/// `mcp::build_namespace_chain` so non-MCP call sites (db-layer
9356/// governance enforcement, HTTP handlers, future hook pipelines) can
9357/// reuse the same walk.
9358///
9359/// Properties (preserved from the prior MCP-only implementation):
9360/// - cycle-safe (visited set + bounded by `MAX_EXPLICIT_DEPTH = 8`)
9361/// - includes the global standard `*` as the most-general entry
9362/// - prepends explicit `namespace_meta.parent_namespace` ancestors
9363///   before the `/`-derived hierarchy, supporting flat→hierarchical
9364///   linking (e.g. legacy `ai-memory` → `ai-memory-mcp`)
9365///
9366/// The MCP layer's display path consumes this top-down. The governance
9367/// resolver in [`resolve_governance_policy`] reverses it for a
9368/// leaf-first walk (most-specific wins).
9369#[must_use]
9370pub fn build_namespace_chain(conn: &Connection, namespace: &str) -> Vec<String> {
9371    const MAX_EXPLICIT_DEPTH: usize = 8;
9372    let mut chain: Vec<String> = Vec::new();
9373
9374    if namespace == "*" {
9375        chain.push("*".to_string());
9376        return chain;
9377    }
9378
9379    // Always start with the global standard — most general.
9380    chain.push("*".to_string());
9381
9382    // 1. /-derived ancestors. `namespace_ancestors` returns most-specific-first;
9383    //    reverse for top-down (root ancestor first, then namespace itself last).
9384    let mut hierarchy_chain: Vec<String> = crate::models::namespace_ancestors(namespace)
9385        .into_iter()
9386        .rev()
9387        .collect();
9388
9389    // 2. If the ROOTmost of the /-chain has an explicit `namespace_meta` parent,
9390    //    prepend that chain (bounded by MAX_EXPLICIT_DEPTH + cycle-safe).
9391    //    Supports legacy flat namespaces (e.g. `ai-memory` → `ai-memory-mcp`).
9392    if let Some(root) = hierarchy_chain.first().cloned() {
9393        let mut explicit_above: Vec<String> = Vec::new();
9394        let mut current = root;
9395        for _ in 0..MAX_EXPLICIT_DEPTH {
9396            match get_namespace_parent(conn, &current) {
9397                Some(p)
9398                    if p != "*"
9399                        && !explicit_above.contains(&p)
9400                        && !hierarchy_chain.contains(&p) =>
9401                {
9402                    explicit_above.push(p.clone());
9403                    current = p;
9404                }
9405                _ => break,
9406            }
9407        }
9408        // `explicit_above` is [immediate-explicit-parent, grandparent, ...];
9409        // reverse to prepend in top-down order.
9410        for p in explicit_above.into_iter().rev() {
9411            chain.push(p);
9412        }
9413    }
9414
9415    // 3. Append the /-derived chain (top-down).
9416    for entry in hierarchy_chain.drain(..) {
9417        if !chain.contains(&entry) {
9418            chain.push(entry);
9419        }
9420    }
9421
9422    chain
9423}
9424
9425/// Read the explicit governance policy attached to a single namespace's
9426/// standard memory. Does **not** walk the inheritance chain — callers
9427/// that want hierarchical resolution should use
9428/// [`resolve_governance_policy`] instead.
9429///
9430/// **NHI-P4-T19 (v0.7.0 NHI testing):** returns `None` when the
9431/// standard carries no explicit `metadata.governance`. Operators who
9432/// want enforcement-by-default can either (a) write
9433/// `metadata.governance = {"write": "owner", ...}` into their standard
9434/// memory, or (b) use the
9435/// [`crate::models::GovernancePolicy::default_for_managed_namespace`]
9436/// helper as a starting template. Changing the implicit fallback to
9437/// Owner is deferred to v0.7.1 because it can break inheritance chains
9438/// where a parent's standard was registered under a distinct agent
9439/// identity from descendant operations.
9440fn read_namespace_policy(conn: &Connection, namespace: &str) -> Option<GovernancePolicy> {
9441    let standard_id = get_namespace_standard(conn, namespace).ok()??;
9442    let mem = get(conn, &standard_id).ok()??;
9443    match GovernancePolicy::from_metadata(&mem.metadata) {
9444        Some(Ok(p)) => Some(p),
9445        // #1384 — observability for stored-corruption. The write path
9446        // (`memory_namespace_set_standard` → typed `GovernancePolicy`
9447        // deserialise) rejects unknown enum variants and malformed
9448        // structures (verified live against alice: `write: "approval"`
9449        // returns a typed 400 error). A parse error here therefore
9450        // means the stored JSON drifted out-of-band: direct SQL update,
9451        // migration corruption, older binary writing newer schema,
9452        // etc. Pre-#1384 this arm silently returned `None` and the
9453        // inheritance walk continued to the parent — which may be
9454        // totally permissive, silently downgrading the operator's
9455        // intent. Surface the drift via tracing WARN so operators
9456        // can grep `ai_memory::governance::policy_read` for the lag.
9457        // We still return `None` (don't fail-CLOSED at the read site
9458        // — that could lock callers out of unrelated namespaces) but
9459        // operators now have a structured signal to investigate.
9460        Some(Err(parse_err)) => {
9461            tracing::warn!(
9462                target: "ai_memory::governance::policy_read",
9463                namespace = %namespace,
9464                standard_id = %standard_id,
9465                error = %parse_err,
9466                "stored metadata.governance failed typed deserialise — \
9467                 inheritance walk will continue past this namespace as \
9468                 if no policy were set. Likely cause: direct SQL update, \
9469                 older binary, or corrupted migration. Operator should \
9470                 re-run `memory_namespace_set_standard` to restore the \
9471                 typed shape."
9472            );
9473            None
9474        }
9475        None => None,
9476    }
9477}
9478
9479/// Resolve the governance policy that gates actions in `namespace`.
9480///
9481/// v0.6.3.1 (P4, audit G1): walks the inheritance chain leaf-first and
9482/// returns the most-specific policy. This closes the audit's
9483/// highest-severity finding — prior to this fix the resolver consulted
9484/// only the leaf, which left children of governed parents (e.g.
9485/// `alphaone/secure/team-a` under an `Approve` policy at
9486/// `alphaone/secure`) **completely ungoverned** despite the
9487/// architecture page T2 promising "Hierarchical policy inheritance
9488/// (default at `org/`, overridable at `org/team/`)".
9489///
9490/// **Walk semantics** (carefully — easy to get subtly wrong):
9491///   1. Build the chain via [`build_namespace_chain`] (top-down) and
9492///      reverse it so we walk leaf → root. The leaf is the namespace
9493///      we were asked about; the root is the global `*` standard.
9494///   2. At each level `k`, look up the policy attached to that
9495///      namespace's standard memory.
9496///      - If a policy **exists**, it is the most-specific match seen
9497///        so far. Return it immediately. ("Most specific wins.")
9498///      - If a policy **also says `inherit: false`**, this is already
9499///        the same return path — we never reach the parent because
9500///        we already returned.
9501///   3. If level `k` has **no policy at all**, keep walking — this is
9502///      the implicit-inherit branch (no policy means "I don't override
9503///      my parent").
9504///   4. If we walk off the top of the chain without finding a policy,
9505///      return `None` (enforcement remains opt-in for namespaces with
9506///      no governance configured anywhere in the chain).
9507///
9508/// **Where does `inherit: false` actually do work?** When the most-
9509/// specific policy we hit on the walk has `inherit: false`. That
9510/// policy is returned (same return point as the inherit=true case),
9511/// so its rules govern the action; the false flag is what
9512/// **conceptually stops** the walk above it, but the implementation
9513/// stops the walk simply by virtue of having found a policy. The flag
9514/// matters most as a documented contract surfaced to operators: "a
9515/// policy here authoritatively replaces, not extends, what's above."
9516/// The flag also flows through the queued-pending-action approver
9517/// resolution so consensus/agent rules don't accidentally re-walk to
9518/// a parent.
9519///
9520/// Cycle-safety is inherited from `build_namespace_chain`
9521/// (`MAX_EXPLICIT_DEPTH = 8` + visited set). No new cache is
9522/// introduced — profile-driven optimization is a v0.7 item.
9523pub fn resolve_governance_policy(conn: &Connection, namespace: &str) -> Option<GovernancePolicy> {
9524    // build_namespace_chain returns top-down (`["*", root, ..., leaf]`).
9525    // Governance resolution wants leaf-first (most specific first), so
9526    // we reverse before walking.
9527    let chain = build_namespace_chain(conn, namespace);
9528    for level in chain.into_iter().rev() {
9529        // Most-specific match wins. Returning immediately here means
9530        // an explicit policy at the leaf (or any descendant level
9531        // with a policy) authoritatively overrides anything above —
9532        // which is precisely the inherit=false semantic, applied
9533        // implicitly. The inherit=false flag is preserved on the
9534        // returned policy so callers (e.g. the pending_action
9535        // approver resolver) don't accidentally re-walk to a parent.
9536        if let Some(policy) = read_namespace_policy(conn, &level) {
9537            return Some(policy);
9538        }
9539        // Implicit branch: no policy at this level → keep walking
9540        // toward the root. This is the "default inherit" behavior
9541        // that closes G1.
9542    }
9543    None
9544}
9545
9546/// v0.7.0 L1-8 — read `governance.require_approval_above_depth` from the
9547/// namespace's most-specific governance metadata blob, leaf-first.
9548///
9549/// This is intentionally a free function (not a field on
9550/// [`GovernancePolicy`]) to avoid introducing a new required struct field
9551/// that would need updating at every `GovernancePolicy { … }` literal
9552/// in the codebase. The existing `GovernancePolicy` struct represents
9553/// the resolved enforcement policy; this field is a pre-write interception
9554/// threshold that lives beside it, not inside it.
9555///
9556/// Returns `None` when:
9557/// - no namespace standard is configured at any level of the chain, OR
9558/// - the standard's `metadata.governance` blob is absent or null, OR
9559/// - the blob does not contain a `require_approval_above_depth` key, OR
9560/// - the key is present but `null`.
9561///
9562/// Returns `Some(threshold)` when the key is a non-null unsigned integer.
9563/// Callers in `memory_reflect` compare `proposed_depth > threshold` and
9564/// queue a `pending_actions` row when the condition is true.
9565pub fn resolve_require_approval_above_depth(conn: &Connection, namespace: &str) -> Option<u32> {
9566    let chain = build_namespace_chain(conn, namespace);
9567    for level in chain.into_iter().rev() {
9568        let standard_id = match get_namespace_standard(conn, &level) {
9569            Ok(Some(id)) => id,
9570            _ => continue,
9571        };
9572        let mem = match get(conn, &standard_id) {
9573            Ok(Some(m)) => m,
9574            _ => continue,
9575        };
9576        // Governance blob must exist and not be null.
9577        let gov = match mem.metadata.get(crate::META_KEY_GOVERNANCE) {
9578            Some(g) if !g.is_null() => g,
9579            _ => continue,
9580        };
9581        // The field is optional inside the blob — `None` means skip this
9582        // level and keep walking (inherit semantics: an ancestor that sets
9583        // the field governs if the leaf does not override it).
9584        if let Some(threshold) = gov.get("require_approval_above_depth") {
9585            if let Some(n) = threshold.as_u64() {
9586                // QUAL-3 (FX-5): operator-controlled metadata. Reject the
9587                // silent `n as u32` truncation that would let an operator
9588                // who sets `require_approval_above_depth = 2^32` (which
9589                // would silently land as 0) DISABLE the approval gate
9590                // entirely (depth > 0 was the original intent, but
9591                // `low_32(2^32) == 0` makes `depth > 0` the actual gate;
9592                // any value ≥ 2^32 whose low-32 bits are also high turns
9593                // off the gate). Fail-CLOSED on overflow: saturate to 0
9594                // so EVERY depth triggers approval — this is the
9595                // conservative posture per CLAUDE.md K3/K9 governance
9596                // discipline. The companion regression test at
9597                // `tests/governance_metadata_no_silent_truncation.rs`
9598                // pins this behaviour.
9599                return Some(u32::try_from(n).unwrap_or(0));
9600            }
9601            // Key present but null → no gate at this level; keep walking.
9602        }
9603        // Policy found at this level but no require_approval_above_depth
9604        // key → no gate; stop walking (same leaf-first-wins semantics as
9605        // the main resolve_governance_policy walker: a leaf policy that
9606        // doesn't set the field takes precedence over a parent that does).
9607        if GovernancePolicy::from_metadata(&mem.metadata).is_some() {
9608            return None;
9609        }
9610    }
9611    None
9612}
9613
9614/// v0.7.0 L2-6 — read `governance.skill_promotion_min_depth` from the
9615/// namespace's most-specific governance metadata blob, leaf-first.
9616///
9617/// Mirrors [`resolve_require_approval_above_depth`] in shape and walk
9618/// semantics: it's a free function (not a [`GovernancePolicy`] field)
9619/// so it can land without churning every `GovernancePolicy { … }`
9620/// literal in the codebase, and it's a per-namespace threshold rather
9621/// than part of the resolved enforcement policy.
9622///
9623/// Returns `None` when:
9624/// - no namespace standard is configured at any level of the chain, OR
9625/// - the standard's `metadata.governance` blob is absent or null, OR
9626/// - the blob does not contain a `skill_promotion_min_depth` key, OR
9627/// - the key is present but `null`.
9628///
9629/// Returns `Some(threshold)` when the key is a non-null unsigned integer.
9630/// The `memory_skill_promote_from_reflection` MCP tool falls back to the
9631/// compiled-in default of `1` when this returns `None` — a reflection
9632/// must have at least one level of synthesised insight (depth ≥ 1)
9633/// before it can be promoted to a reusable skill.
9634pub fn resolve_skill_promotion_min_depth(conn: &Connection, namespace: &str) -> Option<u32> {
9635    let chain = build_namespace_chain(conn, namespace);
9636    for level in chain.into_iter().rev() {
9637        let standard_id = match get_namespace_standard(conn, &level) {
9638            Ok(Some(id)) => id,
9639            _ => continue,
9640        };
9641        let mem = match get(conn, &standard_id) {
9642            Ok(Some(m)) => m,
9643            _ => continue,
9644        };
9645        let gov = match mem.metadata.get(crate::META_KEY_GOVERNANCE) {
9646            Some(g) if !g.is_null() => g,
9647            _ => continue,
9648        };
9649        if let Some(threshold) = gov.get("skill_promotion_min_depth") {
9650            if let Some(n) = threshold.as_u64() {
9651                // QUAL-3 (FX-5): operator-controlled metadata. Reject the
9652                // silent `n as u32` truncation that would let an operator
9653                // who sets `skill_promotion_min_depth = 2^32 + k` silently
9654                // land as `k` after truncation — including the
9655                // catastrophic `k == 0` case which would mean "every
9656                // reflection can be promoted to a skill regardless of
9657                // depth". Fail-CLOSED on overflow: saturate to `u32::MAX`
9658                // so NO reflection can be promoted (the
9659                // `actual_depth_u32 < min_depth` check at
9660                // `src/mcp/tools/skill_promote.rs:174` becomes
9661                // permanently true). The companion regression test at
9662                // `tests/governance_metadata_no_silent_truncation.rs`
9663                // pins this behaviour.
9664                return Some(u32::try_from(n).unwrap_or(u32::MAX));
9665            }
9666            // Key present but null → no override at this level; keep walking.
9667        }
9668        // Policy found at this level but no skill_promotion_min_depth
9669        // key → no override; stop walking (leaf-first-wins semantics).
9670        if GovernancePolicy::from_metadata(&mem.metadata).is_some() {
9671            return None;
9672        }
9673    }
9674    None
9675}
9676
9677/// Return true if `agent_id` matches a registered agent in `_agents`.
9678pub fn is_registered_agent(conn: &Connection, agent_id: &str) -> bool {
9679    let title = crate::models::agent_registration_title(agent_id);
9680    conn.query_row(
9681        "SELECT 1 FROM memories WHERE namespace = ?1 AND title = ?2",
9682        params![AGENTS_NAMESPACE, &title],
9683        |r| r.get::<_, i64>(0),
9684    )
9685    .is_ok()
9686}
9687
9688/// Evaluate a governance level against caller context.
9689/// - `action`: the [`GovernedAction`] under evaluation; threaded into the
9690///   [`crate::governance::GovernanceRefusal`] envelope so refusal Display
9691///   includes the action verb without the caller having to wrap.
9692/// - `namespace`: target namespace; attached to the refusal envelope.
9693/// - `memory_owner`: the existing memory's `metadata.agent_id` (delete/promote paths).
9694///   Pass `None` for store operations.
9695/// - `namespace_owner`: the `metadata.agent_id` of the namespace's standard memory,
9696///   used as the "owner" for store operations. Resolved once by the caller.
9697///
9698/// #963 Phase 2 — `Deny` returns a typed
9699/// [`crate::governance::GovernanceRefusal`]. The `reason` field carries
9700/// the human-readable phrase WITHOUT the `"governance: "` prefix (the
9701/// envelope's `Display` adds the `"<action> denied by governance: "`
9702/// header). Pre-#963 the same path produced
9703/// `Deny(format!("governance: ..."))` which doubled the prefix when
9704/// consumers re-wrapped via `deny_message`.
9705fn evaluate_level(
9706    conn: &Connection,
9707    action: GovernedAction,
9708    namespace: &str,
9709    level: &GovernanceLevel,
9710    agent_id: &str,
9711    memory_owner: Option<&str>,
9712    namespace_owner: Option<&str>,
9713) -> GovernanceDecision {
9714    use crate::governance::GovernanceRefusal;
9715    match level {
9716        GovernanceLevel::Any => GovernanceDecision::Allow,
9717        GovernanceLevel::Registered => {
9718            if is_registered_agent(conn, agent_id) {
9719                GovernanceDecision::Allow
9720            } else {
9721                GovernanceDecision::Deny(
9722                    GovernanceRefusal::new(
9723                        action,
9724                        GovernanceLevel::Registered,
9725                        agent_id,
9726                        format!("caller '{agent_id}' is not a registered agent"),
9727                    )
9728                    .with_namespace(namespace),
9729                )
9730            }
9731        }
9732        GovernanceLevel::Owner => {
9733            let owner = memory_owner.or(namespace_owner);
9734            match owner {
9735                Some(o) if o == agent_id => GovernanceDecision::Allow,
9736                Some(o) => GovernanceDecision::Deny(
9737                    GovernanceRefusal::new(
9738                        action,
9739                        GovernanceLevel::Owner,
9740                        agent_id,
9741                        format!("caller '{agent_id}' is not the owner ('{o}')"),
9742                    )
9743                    .with_namespace(namespace)
9744                    .with_owner(o),
9745                ),
9746                None => GovernanceDecision::Deny(
9747                    GovernanceRefusal::new(
9748                        action,
9749                        GovernanceLevel::Owner,
9750                        agent_id,
9751                        "owner-level action has no resolvable owner",
9752                    )
9753                    .with_namespace(namespace),
9754                ),
9755            }
9756        }
9757        GovernanceLevel::Approve => {
9758            // Caller translates this into a queued pending_action — the enforcement
9759            // helpers below own the queueing so the db layer is the single source
9760            // of truth for pending ids.
9761            GovernanceDecision::Pending(String::new())
9762        }
9763    }
9764}
9765
9766/// Resolve the namespace-owner (`metadata.agent_id` of the namespace's
9767/// standard memory) used for `Owner`-level store checks.
9768///
9769/// **F1 (v0.7.0 round-2-fixes):** the lookup now walks the inheritance
9770/// chain leaf-first via [`build_namespace_chain`], returning the
9771/// `agent_id` of the first standard memory found. This mirrors
9772/// [`resolve_governance_policy`]'s semantics so that when a deep child
9773/// inherits a parent's `governance.write = owner` policy, the owner
9774/// check resolves to the parent's standard owner — matching operator
9775/// intuition that the helper means "owner of the effective policy at
9776/// this namespace".
9777///
9778/// Without this walk, deep children with no standard of their own
9779/// triggered `governance: owner-level action has no resolvable owner`
9780/// despite the parent's policy being correctly inherited.
9781fn namespace_owner(conn: &Connection, namespace: &str) -> Option<String> {
9782    // build_namespace_chain returns top-down (`["*", root, ..., leaf]`).
9783    // We want leaf-first so the most-specific owner wins, matching how
9784    // resolve_governance_policy picks up the most-specific policy.
9785    let chain = build_namespace_chain(conn, namespace);
9786    for level in chain.into_iter().rev() {
9787        let Some(standard_id) = get_namespace_standard(conn, &level).ok().flatten() else {
9788            continue;
9789        };
9790        let Some(mem) = get(conn, &standard_id).ok().flatten() else {
9791            continue;
9792        };
9793        if let Some(owner) = mem
9794            .metadata
9795            .get("agent_id")
9796            .and_then(|v| v.as_str())
9797            .map(str::to_string)
9798        {
9799            return Some(owner);
9800        }
9801    }
9802    None
9803}
9804
9805/// Enforce governance for a `GovernedAction`. On [`GovernanceDecision::Pending`],
9806/// a row is inserted into `pending_actions` and the returned `pending_id` is
9807/// embedded in the decision.
9808///
9809/// v0.7.0 K3 — the gate now consults
9810/// [`crate::config::active_permissions_mode`] and branches on the
9811/// active [`crate::config::PermissionsMode`]:
9812///
9813/// - [`PermissionsMode::Off`]: skip the gate entirely. Returns `Allow`
9814///   without touching `resolve_governance_policy` or `pending_actions`.
9815/// - [`PermissionsMode::Advisory`]: resolve the policy, log any
9816///   would-be `Deny`/`Pending` outcome at `WARN`, then return `Allow`.
9817///   No `pending_actions` row is queued. This is the v0.7.0 default —
9818///   it preserves the v0.6.x posture for upgrading operators where
9819///   governance metadata was advertised but the wider permission
9820///   system was honest-disclosed as advisory.
9821/// - [`PermissionsMode::Enforce`]: the historical strict path.
9822///   `Deny`/`Pending` decisions surface verbatim and the
9823///   `pending_actions` row is queued. Audit-ready posture; opt in via
9824///   `[permissions] mode = "enforce"` in `config.toml`.
9825///
9826/// Every consult increments the per-mode counter exposed via
9827/// [`crate::config::permissions_decision_counts`] so doctor +
9828/// capabilities can surface gate activity.
9829///
9830/// [`PermissionsMode`]: crate::config::PermissionsMode
9831pub fn enforce_governance(
9832    conn: &Connection,
9833    action: GovernedAction,
9834    namespace: &str,
9835    agent_id: &str,
9836    memory_id: Option<&str>,
9837    memory_owner: Option<&str>,
9838    payload: &serde_json::Value,
9839) -> Result<GovernanceDecision> {
9840    use crate::config::{PermissionsMode, active_permissions_mode, record_permissions_decision};
9841
9842    let mode = active_permissions_mode();
9843    record_permissions_decision(mode);
9844
9845    // K3 — `Off` short-circuits before any policy lookup.
9846    if mode == PermissionsMode::Off {
9847        return Ok(GovernanceDecision::Allow);
9848    }
9849
9850    // Opt-in enforcement: namespaces without an explicit policy are unaffected.
9851    let Some(policy) = resolve_governance_policy(conn, namespace) else {
9852        return Ok(GovernanceDecision::Allow);
9853    };
9854    // #880 — `write`/`delete`/`promote` live on `policy.core` after
9855    // the governance decomposition.
9856    let level = match action {
9857        GovernedAction::Store => &policy.core.write,
9858        GovernedAction::Delete => &policy.core.delete,
9859        GovernedAction::Promote => &policy.core.promote,
9860        // v0.7.0 L1-8: Reflect is gated by the L1-8 approval mechanism
9861        // (`require_approval_above_depth`) in the MCP handler rather than
9862        // the standard `enforce_governance` pipeline. Map to `write`
9863        // as the conservative fallback so the arm compiles; in practice
9864        // no current callsite passes `GovernedAction::Reflect` here.
9865        GovernedAction::Reflect => &policy.core.write,
9866    };
9867    let ns_owner = if matches!(action, GovernedAction::Store) {
9868        namespace_owner(conn, namespace)
9869    } else {
9870        None
9871    };
9872
9873    let decision = evaluate_level(
9874        conn,
9875        action,
9876        namespace,
9877        level,
9878        agent_id,
9879        memory_owner,
9880        ns_owner.as_deref(),
9881    );
9882
9883    // K3 — `Advisory` logs the would-be outcome but does not block or
9884    // queue a pending row. The capabilities surface continues to
9885    // advertise `permissions.mode = "advisory"` so external integrators
9886    // see the consistent posture.
9887    if mode == PermissionsMode::Advisory {
9888        match &decision {
9889            GovernanceDecision::Allow => {}
9890            GovernanceDecision::Deny(refusal) => {
9891                tracing::warn!(
9892                    target: "ai_memory::governance",
9893                    namespace = %namespace,
9894                    agent_id = %agent_id,
9895                    action = ?action,
9896                    reason = %refusal.reason,
9897                    denied_level = %refusal.denied_level.as_str(),
9898                    "permissions.mode=advisory: would-deny suppressed (allowing)"
9899                );
9900            }
9901            GovernanceDecision::Pending(_) => {
9902                tracing::warn!(
9903                    target: "ai_memory::governance",
9904                    namespace = %namespace,
9905                    agent_id = %agent_id,
9906                    action = ?action,
9907                    "permissions.mode=advisory: would-queue-approval suppressed (allowing)"
9908                );
9909            }
9910        }
9911        return Ok(GovernanceDecision::Allow);
9912    }
9913
9914    // K3 — `Enforce`: the historical strict path. `Pending` queues a
9915    // `pending_actions` row and returns the canonical id.
9916    if let GovernanceDecision::Pending(_) = decision {
9917        let pending_id =
9918            queue_pending_action(conn, action, namespace, memory_id, agent_id, payload)?;
9919        return Ok(GovernanceDecision::Pending(pending_id));
9920    }
9921    Ok(decision)
9922}
9923
9924/// Insert a `pending_actions` row and return its id.
9925pub fn queue_pending_action(
9926    conn: &Connection,
9927    action: GovernedAction,
9928    namespace: &str,
9929    memory_id: Option<&str>,
9930    requested_by: &str,
9931    payload: &serde_json::Value,
9932) -> Result<String> {
9933    let id = uuid::Uuid::new_v4().to_string();
9934    let now = Utc::now().to_rfc3339();
9935    let payload_json = serde_json::to_string(payload)?;
9936    conn.execute(
9937        "INSERT INTO pending_actions (id, action_type, memory_id, namespace, payload, requested_by, requested_at, status)
9938         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, 'pending')",
9939        params![
9940            id,
9941            action.as_str(),
9942            memory_id,
9943            namespace,
9944            payload_json,
9945            requested_by,
9946            now,
9947        ],
9948    )?;
9949    Ok(id)
9950}
9951
9952/// v0.6.2 (S34): upsert a `pending_actions` row from a canonical `PendingAction`
9953/// struct — used by `sync_push` to apply a peer-originated pending row so
9954/// governance state is cluster-consistent. Preserves `approvals` and
9955/// decision fields verbatim so re-plays converge. Uses `INSERT ... ON
9956/// CONFLICT(id) DO UPDATE` because the originator's id is stable across
9957/// peers (unlike `queue_pending_action` which mints a fresh UUID per
9958/// queue call).
9959pub fn upsert_pending_action(conn: &Connection, pa: &PendingAction) -> Result<()> {
9960    let payload_json = serde_json::to_string(&pa.payload)?;
9961    let approvals_json = serde_json::to_string(&pa.approvals)?;
9962    conn.execute(
9963        "INSERT INTO pending_actions
9964         (id, action_type, memory_id, namespace, payload, requested_by,
9965          requested_at, status, decided_by, decided_at, approvals)
9966         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)
9967         ON CONFLICT(id) DO UPDATE SET
9968            action_type  = excluded.action_type,
9969            memory_id    = excluded.memory_id,
9970            namespace    = excluded.namespace,
9971            payload      = excluded.payload,
9972            requested_by = excluded.requested_by,
9973            requested_at = excluded.requested_at,
9974            status       = excluded.status,
9975            decided_by   = excluded.decided_by,
9976            decided_at   = excluded.decided_at,
9977            approvals    = excluded.approvals",
9978        params![
9979            pa.id,
9980            pa.action_type,
9981            pa.memory_id,
9982            pa.namespace,
9983            payload_json,
9984            pa.requested_by,
9985            pa.requested_at,
9986            pa.status,
9987            pa.decided_by,
9988            pa.decided_at,
9989            approvals_json,
9990        ],
9991    )?;
9992    Ok(())
9993}
9994
9995pub fn list_pending_actions(
9996    conn: &Connection,
9997    status: Option<&str>,
9998    limit: usize,
9999) -> Result<Vec<PendingAction>> {
10000    let mut stmt = conn.prepare(
10001        "SELECT id, action_type, memory_id, namespace, payload, requested_by,
10002                requested_at, status, decided_by, decided_at, approvals
10003         FROM pending_actions
10004         WHERE (?1 IS NULL OR status = ?1)
10005         ORDER BY requested_at DESC
10006         LIMIT ?2",
10007    )?;
10008    let rows = stmt.query_map(params![status, limit], |row| {
10009        let payload_str: String = row.get(4)?;
10010        let payload: serde_json::Value =
10011            serde_json::from_str(&payload_str).unwrap_or(serde_json::Value::Null);
10012        let approvals_str: String = row.get(10)?;
10013        let approvals: Vec<Approval> = serde_json::from_str(&approvals_str).unwrap_or_default();
10014        Ok(PendingAction {
10015            id: row.get(0)?,
10016            action_type: row.get(1)?,
10017            memory_id: row.get(2)?,
10018            namespace: row.get(3)?,
10019            payload,
10020            requested_by: row.get(5)?,
10021            requested_at: row.get(6)?,
10022            status: row.get(7)?,
10023            decided_by: row.get(8)?,
10024            decided_at: row.get(9)?,
10025            approvals,
10026        })
10027    })?;
10028    rows.collect::<rusqlite::Result<Vec<_>>>()
10029        .map_err(Into::into)
10030}
10031
10032pub fn get_pending_action(conn: &Connection, id: &str) -> Result<Option<PendingAction>> {
10033    let row = conn.query_row(
10034        "SELECT id, action_type, memory_id, namespace, payload, requested_by,
10035                requested_at, status, decided_by, decided_at, approvals
10036         FROM pending_actions WHERE id = ?1",
10037        params![id],
10038        |row| {
10039            let payload_str: String = row.get(4)?;
10040            let payload: serde_json::Value =
10041                serde_json::from_str(&payload_str).unwrap_or(serde_json::Value::Null);
10042            let approvals_str: String = row.get(10)?;
10043            let approvals: Vec<Approval> = serde_json::from_str(&approvals_str).unwrap_or_default();
10044            Ok(PendingAction {
10045                id: row.get(0)?,
10046                action_type: row.get(1)?,
10047                memory_id: row.get(2)?,
10048                namespace: row.get(3)?,
10049                payload,
10050                requested_by: row.get(5)?,
10051                requested_at: row.get(6)?,
10052                status: row.get(7)?,
10053                decided_by: row.get(8)?,
10054                decided_at: row.get(9)?,
10055                approvals,
10056            })
10057        },
10058    );
10059    match row {
10060        Ok(p) => Ok(Some(p)),
10061        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
10062        Err(e) => Err(e.into()),
10063    }
10064}
10065
10066/// Mark a pending action as approved or rejected. Returns true on status
10067/// transition. Does NOT execute the action itself — the caller replays
10068/// the payload on approval (the db layer doesn't know how to execute
10069/// cross-interface write semantics).
10070///
10071/// v0.7.0 S5-M2 — on a successful deny transition this function appends a
10072/// `pending_action.denied` row to `signed_events` so the audit chain
10073/// captures every governance refusal alongside the approval and timeout
10074/// events. The emit is best-effort: failure is logged but does NOT roll
10075/// back the decision write (operators inspecting the audit chain see a
10076/// gap rather than losing the underlying decision).
10077pub fn decide_pending_action(
10078    conn: &Connection,
10079    id: &str,
10080    approve: bool,
10081    decided_by: &str,
10082) -> Result<bool> {
10083    let new_status = if approve { "approved" } else { "rejected" };
10084    let now = Utc::now().to_rfc3339();
10085    let updated = conn.execute(
10086        "UPDATE pending_actions SET status = ?1, decided_by = ?2, decided_at = ?3
10087         WHERE id = ?4 AND status = 'pending'",
10088        params![new_status, decided_by, now, id],
10089    )?;
10090    // S5-M2: emit a `pending_action.denied` audit row when the transition
10091    // landed and the decision is a deny. Approve emits later (after
10092    // execution) so the audit row captures the post-execute state — see
10093    // `execute_pending_action`.
10094    if updated > 0 && !approve {
10095        if let Ok(Some(pa)) = get_pending_action(conn, id) {
10096            emit_pending_action_event(conn, &pa, "pending_action.denied", Some(decided_by));
10097        }
10098    }
10099    Ok(updated > 0)
10100}
10101
10102/// v0.7.0 S5-M1/M2 — append a `pending_action.<state>` row to
10103/// `signed_events` so the audit chain captures every governance
10104/// decision transition (approve / deny / timeout).
10105///
10106/// `event_type` is one of:
10107/// - `"pending_action.approved"` (emitted from `execute_pending_action`
10108///   after a successful execute)
10109/// - `"pending_action.denied"` (emitted from `decide_pending_action`
10110///   on a deny transition)
10111/// - `"pending_action.timed_out"` (emitted from
10112///   `sweep_pending_action_timeouts` per expired row)
10113///
10114/// The CBOR payload encodes `(pending_id, action_type, namespace,
10115/// requested_by, decided_by, status, timestamp)` so a downstream
10116/// auditor can replay decision provenance without re-reading the
10117/// (mutable) `pending_actions` table.
10118///
10119/// Best-effort: any encode / append failure is logged at WARN; the
10120/// caller's primary mutation MUST NOT roll back on audit failure.
10121/// Mirrors the same posture as `memory_link.invalidated` emit (the
10122/// audit chain is allowed to gap, the underlying write is not).
10123fn emit_pending_action_event(
10124    conn: &Connection,
10125    pa: &PendingAction,
10126    event_type: &str,
10127    decided_by_override: Option<&str>,
10128) {
10129    // Build the canonical CBOR payload. We sort keys via a BTreeMap so
10130    // the encoding is stable across releases — the SHA-256 over these
10131    // bytes is the audit chain's commitment to the decision shape.
10132    // Mirrors the encoding pattern used by `identity::sign::canonical_cbor`
10133    // (ciborium + BTreeMap-ordered keys) so the audit chain stays
10134    // canonicalized across emit sites.
10135    use std::collections::BTreeMap;
10136    let decided_by = decided_by_override
10137        .map(str::to_string)
10138        .or_else(|| pa.decided_by.clone())
10139        .unwrap_or_default();
10140    let timestamp = Utc::now().to_rfc3339();
10141    let mut map: BTreeMap<&str, ciborium::Value> = BTreeMap::new();
10142    map.insert(
10143        field_names::PENDING_ID,
10144        ciborium::Value::Text(pa.id.clone()),
10145    );
10146    map.insert(
10147        field_names::ACTION_TYPE,
10148        ciborium::Value::Text(pa.action_type.clone()),
10149    );
10150    map.insert("namespace", ciborium::Value::Text(pa.namespace.clone()));
10151    map.insert(
10152        field_names::REQUESTED_BY,
10153        ciborium::Value::Text(pa.requested_by.clone()),
10154    );
10155    map.insert(
10156        field_names::DECIDED_BY,
10157        ciborium::Value::Text(decided_by.clone()),
10158    );
10159    map.insert("status", ciborium::Value::Text(pa.status.clone()));
10160    map.insert("timestamp", ciborium::Value::Text(timestamp.clone()));
10161    let entries: Vec<(ciborium::Value, ciborium::Value)> = map
10162        .into_iter()
10163        .map(|(k, v)| (ciborium::Value::Text(k.to_string()), v))
10164        .collect();
10165    let value = ciborium::Value::Map(entries);
10166    let mut cbor: Vec<u8> = Vec::with_capacity(128);
10167    if let Err(e) = ciborium::ser::into_writer(&value, &mut cbor) {
10168        tracing::warn!(
10169            target: crate::signed_events::SIGNED_EVENTS_TRACE_TARGET,
10170            pending_id = %pa.id,
10171            event_type,
10172            "failed to encode canonical CBOR for pending_action event: {e}"
10173        );
10174        return;
10175    }
10176
10177    // Audit row's `agent_id` field: the decision actor (decider) for
10178    // approve / deny, the requester for the requester-less timeout
10179    // path (no human/agent decided — the sweeper transitioned the
10180    // row, so the "actor" is the originating requester).
10181    let agent_id = if event_type == "pending_action.timed_out" {
10182        pa.requested_by.clone()
10183    } else {
10184        decided_by
10185    };
10186
10187    // v0.7.0 #1099 (SR-1 #4, HIGH) — sign pending_action audit rows
10188    // with the daemon's installed signing key when one is available.
10189    // Pre-#1099 every pending_action.{approved,rejected,timed_out}
10190    // row landed with `signature: None, attest_level: "unsigned"`
10191    // even when the daemon had loaded a signing key — breaking the
10192    // procurement-grade tamper-evidence claim on the approval audit
10193    // trail. Falls back to (None, "unsigned") cleanly when no key
10194    // is installed (legacy posture).
10195    let event = crate::signed_events::SignedEvent::with_daemon_signature(
10196        crate::signed_events::payload_hash(&cbor),
10197        agent_id,
10198        event_type.to_string(),
10199        timestamp,
10200    );
10201    if let Err(e) = crate::signed_events::append_signed_event(conn, &event) {
10202        tracing::warn!(
10203            target: crate::signed_events::SIGNED_EVENTS_TRACE_TARGET,
10204            pending_id = %pa.id,
10205            event_type,
10206            "failed to append pending_action audit row: {e}"
10207        );
10208    }
10209}
10210
10211/// v0.7.0 S5-H4 — extract `metadata.agent_id` from a pending-action
10212/// store/reflect payload and verify it matches `pa.requested_by`.
10213///
10214/// The S5 audit caught an approver-on-behalf laundering hole: a caller
10215/// could queue a `pending_action` with `requested_by = "alice"` but
10216/// embed a payload whose `metadata.agent_id = "bob"`, and on execute
10217/// the new memory would land attributed to bob — the approver, not the
10218/// requester, was attributing the write. This helper closes the gap by
10219/// requiring the payload's claimed agent to equal the pending row's
10220/// `requested_by`. If the payload omits an agent_id, we treat that as
10221/// a match (older callers may not have populated the field; the
10222/// substrate still records `pa.requested_by` as the canonical attributor
10223/// and the memory's `metadata.agent_id` gets stamped from there).
10224///
10225/// The check fires only on payload shapes that carry an agent_id —
10226/// today: `store` (full Memory JSON) and `reflect` (the L1-8 payload
10227/// that includes `agent_id`). `delete` / `promote` payloads do not
10228/// carry an agent_id (the action is attributed to `pa.requested_by`
10229/// directly), so this function returns `Ok(())` on those.
10230fn verify_payload_agent_id(pa: &PendingAction) -> Result<()> {
10231    let payload_agent_id = pa
10232        .payload
10233        .get("agent_id")
10234        .and_then(serde_json::Value::as_str)
10235        .or_else(|| {
10236            pa.payload
10237                .get("metadata")
10238                .and_then(|m| m.get("agent_id"))
10239                .and_then(serde_json::Value::as_str)
10240        });
10241    if let Some(claimed) = payload_agent_id
10242        && claimed != pa.requested_by
10243    {
10244        // #962 typed envelope — ApproverLaundering maps to 403 FORBIDDEN
10245        // via MemoryError::RefusedByGovernance (S5-H4 contract).
10246        return Err(anyhow::Error::new(StorageError::ApproverLaundering {
10247            pending_id: pa.id.clone(),
10248            claimed: claimed.to_string(),
10249            requester: pa.requested_by.clone(),
10250        }));
10251    }
10252    Ok(())
10253}
10254
10255/// Task 1.10 — outcome of an approver-aware approve call.
10256#[derive(Debug, Clone, PartialEq, Eq)]
10257pub enum ApproveOutcome {
10258    /// #1620 — no pending row with this id exists. Maps to 404 on
10259    /// every surface; pre-#1620 this collapsed into `Rejected` and
10260    /// surfaced as 403 on sqlite while postgres returned 404 for the
10261    /// same probe.
10262    NotFound,
10263    /// Approver check failed; policy identifies the reason.
10264    Rejected(String),
10265    /// Consensus quorum not yet met; vote recorded.
10266    Pending { votes: usize, quorum: u32 },
10267    /// Fully approved (Human single-step, matching Agent, or consensus
10268    /// threshold met). Caller may now replay the payload via
10269    /// `execute_pending_action`.
10270    Approved,
10271}
10272
10273/// Task 1.10 — approver-type aware approve. Enforces the
10274/// `metadata.governance.approver` of the pending action's namespace.
10275pub fn approve_with_approver_type(
10276    conn: &Connection,
10277    pending_id: &str,
10278    approver_agent_id: &str,
10279) -> Result<ApproveOutcome> {
10280    let Some(pa) = get_pending_action(conn, pending_id)? else {
10281        // #1620 — typed NotFound (was Rejected → 403; postgres 404'd).
10282        return Ok(ApproveOutcome::NotFound);
10283    };
10284    if pa.status != "pending" {
10285        return Ok(ApproveOutcome::Rejected(format!(
10286            "already decided: status={}",
10287            pa.status
10288        )));
10289    }
10290    // Resolve the namespace's approver type. If no policy, default to Human —
10291    // which accepts any approval (back-compat with 1.9 callers).
10292    // #880 — `approver` lives on `policy.core` after the governance
10293    // decomposition.
10294    let approver = resolve_governance_policy(conn, &pa.namespace)
10295        .map_or(ApproverType::Human, |p| p.core.approver);
10296
10297    match approver {
10298        ApproverType::Human => {
10299            let ok = decide_pending_action(conn, pending_id, true, approver_agent_id)?;
10300            if ok {
10301                Ok(ApproveOutcome::Approved)
10302            } else {
10303                Ok(ApproveOutcome::Rejected(
10304                    crate::errors::msg::DECISION_WRITE_FAILED.into(),
10305                ))
10306            }
10307        }
10308        ApproverType::Agent(required) => {
10309            if approver_agent_id != required {
10310                return Ok(ApproveOutcome::Rejected(format!(
10311                    "designated approver is '{required}'; got '{approver_agent_id}'"
10312                )));
10313            }
10314            let ok = decide_pending_action(conn, pending_id, true, approver_agent_id)?;
10315            if ok {
10316                Ok(ApproveOutcome::Approved)
10317            } else {
10318                Ok(ApproveOutcome::Rejected(
10319                    crate::errors::msg::DECISION_WRITE_FAILED.into(),
10320                ))
10321            }
10322        }
10323        ApproverType::Consensus(quorum) => {
10324            // Issue #216: a single caller could previously satisfy any
10325            // Consensus(n) quorum by varying the unauthenticated `agent_id`
10326            // (`alice`, `bob`, `Alice`/`alice` were three distinct votes).
10327            // Two changes harden the path:
10328            //   1. Require each voter to be a registered agent — raises the
10329            //      bar from "claim any string" to "operator pre-registered
10330            //      this id". Combined with auth on the approve endpoint
10331            //      (operator-deployed) this gives a real multi-party gate.
10332            //   2. Canonicalize the agent_id to lowercase for both the
10333            //      duplicate-vote check and storage so case-variants of the
10334            //      same id collapse to a single vote.
10335            if !is_registered_agent(conn, approver_agent_id) {
10336                return Ok(ApproveOutcome::Rejected(format!(
10337                    "consensus voter '{approver_agent_id}' is not a registered agent"
10338                )));
10339            }
10340            let canonical_id = approver_agent_id.to_ascii_lowercase();
10341            let mut approvals = pa.approvals.clone();
10342            if approvals
10343                .iter()
10344                .any(|a| a.agent_id.eq_ignore_ascii_case(&canonical_id))
10345            {
10346                return Ok(ApproveOutcome::Pending {
10347                    votes: approvals.len(),
10348                    quorum,
10349                });
10350            }
10351            approvals.push(Approval {
10352                agent_id: canonical_id.clone(),
10353                approved_at: Utc::now().to_rfc3339(),
10354            });
10355            let approvals_json = serde_json::to_string(&approvals)?;
10356            conn.execute(
10357                "UPDATE pending_actions SET approvals = ?1 WHERE id = ?2 AND status = 'pending'",
10358                params![approvals_json, pending_id],
10359            )?;
10360            let votes = approvals.len();
10361            if u32::try_from(votes).unwrap_or(u32::MAX) >= quorum {
10362                // Threshold met — transition status so the caller can replay.
10363                let ok = decide_pending_action(conn, pending_id, true, &canonical_id)?;
10364                if ok {
10365                    return Ok(ApproveOutcome::Approved);
10366                }
10367                return Ok(ApproveOutcome::Rejected(
10368                    "decision write failed at consensus threshold".into(),
10369                ));
10370            }
10371            Ok(ApproveOutcome::Pending { votes, quorum })
10372        }
10373    }
10374}
10375
10376/// Task 1.10 — Execute an approved pending action's payload. Callers invoke
10377/// this after `approve_with_approver_type` returns `Approved`. Returns the
10378/// affected memory id (new id for store, existing id for delete/promote).
10379///
10380/// v0.7.0 S5-H1 — adds a `"reflect"` arm so an approved deep-reflection
10381/// queued by the L1-8 MCP gate (see `mcp::tools::reflect`) actually lands
10382/// instead of erroring out as "unknown action_type". The arm reconstructs
10383/// the original [`ReflectInput`] from the queued payload and replays it
10384/// through [`reflect`], inheriting the same depth-cap / source-resolution
10385/// checks the direct write path runs.
10386///
10387/// v0.7.0 S5-H4 — every arm runs [`verify_payload_agent_id`] BEFORE the
10388/// side-effecting mutation so an approver cannot launder a payload whose
10389/// embedded `agent_id` disagrees with the original requester (the
10390/// `pending_actions.requested_by` column). The refusal is a hard
10391/// `MemoryError::Validation`-shaped anyhow bail; on refusal we emit a
10392/// `pending_action.refused_agent_id_mismatch` audit row so the laundering
10393/// attempt is captured by the signed_events chain.
10394///
10395/// v0.7.0 S5-M1 — on a successful execute the function appends a
10396/// `pending_action.approved` row to `signed_events` (the deny + timeout
10397/// emits live in `decide_pending_action` and
10398/// `sweep_pending_action_timeouts` respectively, so the three governance
10399/// transitions are audit-complete together).
10400pub fn execute_pending_action(conn: &Connection, pending_id: &str) -> Result<Option<String>> {
10401    let Some(pa) = get_pending_action(conn, pending_id)? else {
10402        // #962 typed envelope — 404 NOT_FOUND.
10403        return Err(anyhow::Error::new(StorageError::PendingActionNotFound {
10404            pending_id: pending_id.to_string(),
10405        }));
10406    };
10407    if pa.status != "approved" {
10408        // #962 typed envelope — 409 CONFLICT (action is in the wrong state).
10409        return Err(anyhow::Error::new(
10410            StorageError::PendingActionStateInvalid {
10411                pending_id: pending_id.to_string(),
10412                status: pa.status.clone(),
10413            },
10414        ));
10415    }
10416    // S5-H4: refuse approver-on-behalf laundering BEFORE the side-effecting
10417    // write. Emit an audit row on refusal so the laundering attempt is
10418    // captured by the signed_events chain even when the substrate
10419    // bails the execute.
10420    if let Err(e) = verify_payload_agent_id(&pa) {
10421        emit_pending_action_event(conn, &pa, "pending_action.refused_agent_id_mismatch", None);
10422        return Err(e);
10423    }
10424    let memory_id = match pa.action_type.as_str() {
10425        "store" => {
10426            let mut mem: Memory = serde_json::from_value(pa.payload.clone()).map_err(|e| {
10427                // #962 typed envelope.
10428                anyhow::Error::new(StorageError::InvalidArgument {
10429                    reason: format!("invalid store payload: {e}"),
10430                })
10431            })?;
10432            // Stamp fresh id + timestamps so the execution is idempotent on replay.
10433            mem.id = uuid::Uuid::new_v4().to_string();
10434            let now = Utc::now().to_rfc3339();
10435            mem.created_at.clone_from(&now);
10436            mem.updated_at = now;
10437            mem.access_count = 0;
10438            let actual_id = insert(conn, &mem)?;
10439            Some(actual_id)
10440        }
10441        "delete" => {
10442            if let Some(mid) = pa.memory_id.clone() {
10443                delete(conn, &mid)?;
10444                Some(mid)
10445            } else {
10446                None
10447            }
10448        }
10449        "promote" => {
10450            if let Some(mid) = pa.memory_id.clone() {
10451                if let Some(to_ns) = pa
10452                    .payload
10453                    .get(field_names::TO_NAMESPACE)
10454                    .and_then(|v| v.as_str())
10455                {
10456                    // Vertical promotion to ancestor.
10457                    let clone_id = promote_to_namespace(conn, &mid, to_ns)?;
10458                    Some(clone_id)
10459                } else {
10460                    // Tier bump to long + clear expiry.
10461                    let (_found, _changed) = update(
10462                        conn,
10463                        &mid,
10464                        None,
10465                        None,
10466                        Some(&Tier::Long),
10467                        None,
10468                        None,
10469                        None,
10470                        None,
10471                        Some(""),
10472                        None,
10473                    )?;
10474                    Some(mid)
10475                }
10476            } else {
10477                None
10478            }
10479        }
10480        "reflect" => execute_reflect_from_payload(conn, &pa)?,
10481        other => {
10482            // #962 typed envelope.
10483            return Err(anyhow::Error::new(StorageError::InvalidArgument {
10484                reason: format!("unknown action_type: {other}"),
10485            }));
10486        }
10487    };
10488    // S5-M1: emit the approve audit row after the side-effecting write
10489    // succeeded so the audit chain reflects the post-execute state. The
10490    // emit is best-effort (warn-only) so an audit-side failure does not
10491    // roll back the governance decision.
10492    emit_pending_action_event(
10493        conn,
10494        &pa,
10495        "pending_action.approved",
10496        pa.decided_by.as_deref(),
10497    );
10498    Ok(memory_id)
10499}
10500
10501/// v0.7.0 S5-H1 — replay an approved reflect pending action through
10502/// [`reflect`]. Factored out of [`execute_pending_action`] so the arm
10503/// stays focused on payload deserialization + the substrate call, and
10504/// so the unit test (`test_execute_reflect_arm_succeeds_round_trip`)
10505/// can exercise the helper without duplicating the wrapper logic.
10506///
10507/// Payload shape (mirrors what `mcp::tools::reflect` queued in L1-8):
10508///
10509/// ```json
10510/// {
10511///   "source_ids": ["…", "…"],
10512///   "title": "…",
10513///   "content": "…",
10514///   "namespace": "…",
10515///   "tier": "mid",
10516///   "tags": ["…"],
10517///   "priority": 5,
10518///   "confidence": 1.0,
10519///   "agent_id": "…",
10520///   "proposed_depth": 3,
10521///   "metadata": { … }
10522/// }
10523/// ```
10524///
10525/// All fields are optional except `source_ids`, `title`, and `content`
10526/// (the substrate validator rejects empty values, so missing keys
10527/// surface as a `Validation` error rather than a panic).
10528fn execute_reflect_from_payload(conn: &Connection, pa: &PendingAction) -> Result<Option<String>> {
10529    let payload = &pa.payload;
10530    let source_ids: Vec<String> = payload
10531        .get(field_names::SOURCE_IDS)
10532        .and_then(|v| v.as_array())
10533        .map(|arr| {
10534            arr.iter()
10535                .filter_map(|v| v.as_str().map(str::to_string))
10536                .collect()
10537        })
10538        .unwrap_or_default();
10539    if source_ids.is_empty() {
10540        // #962 typed envelope.
10541        return Err(anyhow::Error::new(StorageError::InvalidArgument {
10542            reason: "invalid reflect payload: source_ids missing or empty".to_string(),
10543        }));
10544    }
10545    let title = payload
10546        .get("title")
10547        .and_then(|v| v.as_str())
10548        .ok_or_else(|| {
10549            // #962 typed envelope.
10550            anyhow::Error::new(StorageError::InvalidArgument {
10551                reason: "invalid reflect payload: title missing".to_string(),
10552            })
10553        })?
10554        .to_string();
10555    let content = payload
10556        .get("content")
10557        .and_then(|v| v.as_str())
10558        .ok_or_else(|| {
10559            // #962 typed envelope.
10560            anyhow::Error::new(StorageError::InvalidArgument {
10561                reason: "invalid reflect payload: content missing".to_string(),
10562            })
10563        })?
10564        .to_string();
10565    let namespace = payload
10566        .get("namespace")
10567        .and_then(|v| v.as_str())
10568        .map(str::to_string)
10569        .or_else(|| Some(pa.namespace.clone()));
10570    let tier = payload
10571        .get("tier")
10572        .and_then(|v| v.as_str())
10573        .and_then(Tier::from_str)
10574        .unwrap_or(Tier::Mid);
10575    let tags: Vec<String> = payload
10576        .get("tags")
10577        .and_then(|v| v.as_array())
10578        .map(|arr| {
10579            arr.iter()
10580                .filter_map(|v| v.as_str().map(str::to_string))
10581                .collect()
10582        })
10583        .unwrap_or_default();
10584    let priority = i32::try_from(
10585        payload
10586            .get("priority")
10587            .and_then(|v| v.as_i64())
10588            .unwrap_or(5),
10589    )
10590    .unwrap_or(5);
10591    let confidence = payload
10592        .get(field_names::CONFIDENCE)
10593        .and_then(|v| v.as_f64())
10594        .unwrap_or(1.0);
10595    // Use the queued payload's agent_id when present (already verified
10596    // to match `pa.requested_by` by `verify_payload_agent_id`), else
10597    // fall back to `pa.requested_by` — the substrate stamps the value
10598    // onto `metadata.agent_id` so attribution stays consistent.
10599    let agent_id = payload
10600        .get("agent_id")
10601        .and_then(|v| v.as_str())
10602        .map(str::to_string)
10603        .unwrap_or_else(|| pa.requested_by.clone());
10604    let metadata = payload
10605        .get("metadata")
10606        .cloned()
10607        .unwrap_or_else(|| serde_json::json!({}));
10608
10609    let input = crate::storage::reflect::ReflectInput {
10610        source_ids,
10611        title,
10612        content,
10613        namespace,
10614        tier,
10615        tags,
10616        priority,
10617        confidence,
10618        // v0.7.x (issue #1175): vendor-neutral substrate default.
10619        // Mirrors the MCP-side default at `src/mcp/tools/reflect.rs`
10620        // — see the comment there for the heterogeneous-NHI rationale.
10621        // Vendor identity stays in `metadata.agent_id`.
10622        source: crate::validate::DEFAULT_NHI_SOURCE.to_string(),
10623        agent_id,
10624        metadata,
10625    };
10626    let outcome = crate::storage::reflect::reflect(conn, &input)
10627        .map_err(|e| anyhow::anyhow!("reflect execute failed: {e}"))?;
10628    Ok(Some(outcome.id))
10629}
10630
10631/// Check if a memory ID is a namespace standard (used by consolidate to warn).
10632pub fn is_namespace_standard(conn: &Connection, id: &str) -> bool {
10633    conn.query_row(
10634        "SELECT COUNT(*) FROM namespace_meta WHERE standard_id = ?1",
10635        params![id],
10636        |r| r.get::<_, i64>(0),
10637    )
10638    .unwrap_or(0)
10639        > 0
10640}
10641
10642/// v0.6.3 (capabilities schema v2): count namespace standards whose
10643/// `metadata.governance` is non-null. A "rule" here means a namespace
10644/// has an explicit governance policy attached to its standard memory.
10645/// The count is a transparent passthrough — the full permission system
10646/// arrives in v0.7 (arch-enhancement-spec §3).
10647pub fn count_active_governance_rules(conn: &Connection) -> Result<usize> {
10648    let count: i64 = conn
10649        .query_row(
10650            "SELECT COUNT(*) FROM memories m
10651             INNER JOIN namespace_meta nm ON nm.standard_id = m.id
10652             WHERE json_extract(m.metadata, '$.governance') IS NOT NULL",
10653            [],
10654            |r| r.get(0),
10655        )
10656        .unwrap_or(0);
10657    Ok(usize::try_from(count.max(0)).unwrap_or(0))
10658}
10659
10660/// v0.7.0 K5 — enumerate every namespace whose standard memory carries an
10661/// explicit `metadata.governance` policy and return `(namespace, policy)`
10662/// pairs sorted lexicographically by namespace.
10663///
10664/// Companion to [`count_active_governance_rules`] (which returns just the
10665/// count). Powers the `permissions.rule_summary` field surfaced by
10666/// capabilities v3 — the K5 increment closes the v0.6.3.1 honesty
10667/// disclosure that the field was previously dropped from the wire because
10668/// no per-rule serializer existed.
10669///
10670/// Rows whose `metadata.governance` payload fails to round-trip through
10671/// `GovernancePolicy::from_metadata` are silently skipped — the
10672/// capabilities surface is best-effort and a malformed policy must not
10673/// take down the entire response. The wider gate
10674/// (`enforce_governance` → `read_namespace_policy`) already swallows the
10675/// same parse failures, so the surfaces stay consistent.
10676///
10677/// # Errors
10678///
10679/// Returns `Err` only on hard SQLite failures (e.g. table missing); the
10680/// row-level parse failures noted above are handled internally.
10681pub fn list_active_governance_policies(
10682    conn: &Connection,
10683) -> Result<Vec<(String, GovernancePolicy)>> {
10684    // Pull the raw `(namespace, metadata)` tuples for every namespace
10685    // whose standard memory has a non-null `metadata.governance`. We
10686    // ORDER BY at the SQL layer so the lex sort comes free and the
10687    // caller doesn't have to re-sort.
10688    let mut stmt = conn.prepare(
10689        "SELECT nm.namespace, m.metadata
10690         FROM namespace_meta nm
10691         INNER JOIN memories m ON m.id = nm.standard_id
10692         WHERE json_extract(m.metadata, '$.governance') IS NOT NULL
10693         ORDER BY nm.namespace ASC",
10694    )?;
10695    let rows = stmt.query_map([], |r| {
10696        let ns: String = r.get(0)?;
10697        let meta_str: String = r.get(1)?;
10698        Ok((ns, meta_str))
10699    })?;
10700
10701    let mut out = Vec::new();
10702    for row in rows.flatten() {
10703        let (ns, meta_str) = row;
10704        // Parse the metadata blob; skip rows that don't deserialize.
10705        let Ok(meta) = serde_json::from_str::<serde_json::Value>(&meta_str) else {
10706            continue;
10707        };
10708        // `from_metadata` returns `None` when the field is missing/null
10709        // (the SQL filter already excludes that path) and
10710        // `Some(Err(_))` on a malformed policy payload — skip both.
10711        match GovernancePolicy::from_metadata(&meta) {
10712            Some(Ok(policy)) => out.push((ns, policy)),
10713            _ => continue,
10714        }
10715    }
10716    Ok(out)
10717}
10718
10719/// v0.6.3 (capabilities schema v2): count rows in the `subscriptions`
10720/// table. Used by `handle_capabilities` as a proxy for "registered
10721/// hooks" — the hook pipeline itself is v0.7 Bucket 0 work.
10722pub fn count_subscriptions(conn: &Connection) -> Result<usize> {
10723    let count: i64 = conn
10724        .query_row("SELECT COUNT(*) FROM subscriptions", [], |r| r.get(0))
10725        .unwrap_or(0);
10726    Ok(usize::try_from(count.max(0)).unwrap_or(0))
10727}
10728
10729/// v0.6.3 (capabilities schema v2): count `pending_actions` rows whose
10730/// `status` matches the predicate. Used by `handle_capabilities` to
10731/// surface live approval queue depth.
10732pub fn count_pending_actions_by_status(conn: &Connection, status: &str) -> Result<usize> {
10733    let count: i64 = conn
10734        .query_row(
10735            "SELECT COUNT(*) FROM pending_actions WHERE status = ?1",
10736            params![status],
10737            |r| r.get(0),
10738        )
10739        .unwrap_or(0);
10740    Ok(usize::try_from(count.max(0)).unwrap_or(0))
10741}
10742
10743/// v0.7.0 K2 — pending_actions timeout sweeper.
10744///
10745/// Scans `pending_actions` for `status='pending'` rows whose age exceeds
10746/// the per-row `default_timeout_seconds` (or `global_default_secs` when
10747/// the per-row column is NULL). Transitions matching rows to
10748/// `status='expired'` and stamps `expired_at = now`.
10749///
10750/// Returns the list of `(id, namespace)` tuples that were just expired
10751/// so the caller can fan out approval-decision events. Empty queue is a
10752/// silent no-op.
10753///
10754/// Closes the v0.6.3.1 honest-Capabilities-v2 disclosure that
10755/// `default_timeout_seconds` was previously advertised but unused (the
10756/// v2 honesty patch had dropped it from the wire shape; K2 ships the
10757/// backing sweeper so the field is meaningful again).
10758///
10759/// # Errors
10760///
10761/// Returns `Err` only on hard SQLite failures (e.g. table missing).
10762pub fn sweep_pending_action_timeouts(
10763    conn: &Connection,
10764    global_default_secs: i64,
10765) -> Result<Vec<(String, String)>> {
10766    // Step 1 — find candidates. We compute age in SQL via julianday()
10767    // arithmetic so the sweep is index-friendly and avoids parsing
10768    // every `requested_at` row in Rust. The composite index
10769    // `idx_pending_status_requested` (added in migration v21) keeps
10770    // the planner from full-scanning the table.
10771    //
10772    // The `default_timeout_seconds` column is nullable; rows with NULL
10773    // fall back to `global_default_secs`. A non-positive global default
10774    // disables the sweeper entirely (operator escape hatch).
10775    if global_default_secs <= 0 {
10776        return Ok(Vec::new());
10777    }
10778    let mut stmt = conn.prepare(
10779        "SELECT id, namespace FROM pending_actions
10780         WHERE status = 'pending'
10781           AND (julianday('now') - julianday(requested_at)) * 86400.0
10782               > COALESCE(default_timeout_seconds, ?1)",
10783    )?;
10784    let rows: Vec<(String, String)> = stmt
10785        .query_map(params![global_default_secs], |row| {
10786            Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
10787        })?
10788        .collect::<rusqlite::Result<Vec<_>>>()?;
10789    if rows.is_empty() {
10790        return Ok(Vec::new());
10791    }
10792
10793    // Step 2 — flip status='expired' + stamp expired_at. We update
10794    // row-by-row inside a single transaction so a failure mid-batch
10795    // rolls back cleanly. The WHERE clause re-checks status='pending'
10796    // so a concurrent decide_pending_action wins (its decision is
10797    // not overwritten).
10798    let now = Utc::now().to_rfc3339();
10799    let tx_savepoint = conn.unchecked_transaction()?;
10800    {
10801        let mut update = tx_savepoint.prepare(
10802            "UPDATE pending_actions
10803             SET status = 'expired', expired_at = ?1
10804             WHERE id = ?2 AND status = 'pending'",
10805        )?;
10806        for (id, _) in &rows {
10807            update.execute(params![now, id])?;
10808        }
10809    }
10810    tx_savepoint.commit()?;
10811    // v0.7.0 S5-M2 — emit a `pending_action.timed_out` audit row per
10812    // expired pending row so the audit chain captures the timeout
10813    // transition alongside approve / deny. Best-effort: a missing
10814    // pending row or audit failure is logged at WARN; the sweep
10815    // itself has already committed.
10816    for (id, _) in &rows {
10817        if let Ok(Some(pa)) = get_pending_action(conn, id) {
10818            emit_pending_action_event(conn, &pa, "pending_action.timed_out", None);
10819        }
10820    }
10821    Ok(rows)
10822}
10823
10824// ---------------------------------------------------------------------------
10825// `ai-memory doctor` (P7 / R7) — query helpers.
10826// ---------------------------------------------------------------------------
10827//
10828// These read-only helpers back the `ai-memory doctor` CLI subcommand. Each
10829// query is a single indexed `COUNT(*)` (or close to it) so the reporter can
10830// run an entire health pass without holding the DB lock long enough to
10831// block live writers.
10832//
10833// Surfaces consumed:
10834// - `count_dim_violations` reads the post-P2 `embedding_dim` column when
10835//   present and gracefully reports `Ok(None)` on pre-P2 schemas (the column
10836//   doesn't exist yet on `release/v0.6.3`).
10837// - `count_index_evictions` reads the post-P3 `index_evictions_total` global
10838//   counter when wired (there is no schema-level surface today; it returns
10839//   `Ok(None)` so the doctor can render a "not yet observed" line).
10840// - `count_oldest_pending_action_age_secs` is portable today and reports the
10841//   age of the oldest `pending` row in seconds.
10842// - `count_governance_chain_depth` walks `parent_namespace` for each
10843//   namespace_meta row to estimate the inheritance depth distribution
10844//   the P4 enforcer will eventually consume.
10845
10846/// Count rows whose `embedding_dim` (post-P2) does not match the modal
10847/// dim within their namespace. On pre-P2 schemas the `embedding_dim`
10848/// column doesn't exist; the function returns `Ok(None)` so the doctor
10849/// can render "not yet observed (pre-P2 schema)".
10850///
10851/// # Errors
10852///
10853/// Returns `Err` only on hard SQLite failures — a missing column is
10854/// reported as `Ok(None)`, not an error.
10855pub fn doctor_dim_violations(conn: &Connection) -> Result<Option<usize>> {
10856    let has_dim = conn
10857        .prepare("SELECT embedding_dim FROM memories LIMIT 0")
10858        .is_ok();
10859    if !has_dim {
10860        return Ok(None);
10861    }
10862    // For each namespace, find the modal dim (most-frequent non-null value)
10863    // and count rows whose dim differs from it. Rows with NULL dim but a
10864    // non-empty embedding count as violations too — they are mid-migration.
10865    let n: i64 = conn
10866        .query_row(
10867            "WITH per_ns_modes AS (
10868                 SELECT namespace, embedding_dim, COUNT(*) AS c
10869                 FROM memories
10870                 WHERE embedding IS NOT NULL AND embedding_dim IS NOT NULL
10871                 GROUP BY namespace, embedding_dim
10872             ),
10873             ranked AS (
10874                 SELECT namespace, embedding_dim,
10875                        ROW_NUMBER() OVER (PARTITION BY namespace ORDER BY c DESC) AS rn
10876                 FROM per_ns_modes
10877             ),
10878             modes AS (
10879                 SELECT namespace, embedding_dim AS modal_dim
10880                 FROM ranked WHERE rn = 1
10881             )
10882             SELECT COUNT(*)
10883             FROM memories m
10884             LEFT JOIN modes mo ON mo.namespace = m.namespace
10885             WHERE m.embedding IS NOT NULL
10886               AND (m.embedding_dim IS NULL
10887                    OR (mo.modal_dim IS NOT NULL AND m.embedding_dim != mo.modal_dim))",
10888            [],
10889            |r| r.get(0),
10890        )
10891        .unwrap_or(0);
10892    Ok(Some(usize::try_from(n.max(0)).unwrap_or(0)))
10893}
10894
10895/// Age in seconds of the oldest `pending` row in `pending_actions`, or
10896/// `None` if the queue is empty (or the column is unparseable). The
10897/// doctor uses this to flag a backlog older than 24h as critical.
10898///
10899/// # Errors
10900///
10901/// Returns `Err` only on hard SQLite failures (e.g. missing table).
10902pub fn doctor_oldest_pending_age_secs(conn: &Connection) -> Result<Option<i64>> {
10903    let row: Option<String> = conn
10904        .query_row(
10905            "SELECT requested_at FROM pending_actions WHERE status = 'pending'
10906             ORDER BY requested_at ASC LIMIT 1",
10907            [],
10908            |r| r.get(0),
10909        )
10910        .ok();
10911    let Some(ts) = row else {
10912        return Ok(None);
10913    };
10914    let Ok(parsed) = chrono::DateTime::parse_from_rfc3339(&ts) else {
10915        return Ok(None);
10916    };
10917    // M11 (v0.7.0 round-2) — clamp negative ages to 0. `requested_at`
10918    // is stamped by the writer's clock; on a host with skewed time
10919    // (NTP slewing back, intentional misconfiguration, or VM time
10920    // travel) `now - parsed` can land negative and downstream
10921    // consumers (the doctor surface treats this as "age in seconds")
10922    // would surface a nonsensical figure. The WARN gives operators
10923    // the signal so they can investigate the clock drift instead of
10924    // chasing a phantom backlog.
10925    let raw_age = (Utc::now() - parsed.with_timezone(&Utc)).num_seconds();
10926    let age = if raw_age < 0 {
10927        tracing::warn!(
10928            requested_at = %ts,
10929            raw_age_seconds = raw_age,
10930            "pending_actions row has future timestamp; clamping age to 0"
10931        );
10932        0
10933    } else {
10934        raw_age
10935    };
10936    Ok(Some(age))
10937}
10938
10939/// Count of namespaces that have a standard registered with a non-null
10940/// `metadata.governance` block, and the count without (just a standard
10941/// memory but no policy attached).
10942///
10943/// # Errors
10944///
10945/// Returns `Err` only on hard SQLite failures.
10946pub fn doctor_governance_coverage(conn: &Connection) -> Result<(usize, usize)> {
10947    let with_policy: i64 = conn
10948        .query_row(
10949            "SELECT COUNT(*) FROM memories m
10950             INNER JOIN namespace_meta nm ON nm.standard_id = m.id
10951             WHERE json_extract(m.metadata, '$.governance') IS NOT NULL",
10952            [],
10953            |r| r.get(0),
10954        )
10955        .unwrap_or(0);
10956    let total_meta: i64 = conn
10957        .query_row("SELECT COUNT(*) FROM namespace_meta", [], |r| r.get(0))
10958        .unwrap_or(0);
10959    let with = usize::try_from(with_policy.max(0)).unwrap_or(0);
10960    let total = usize::try_from(total_meta.max(0)).unwrap_or(0);
10961    Ok((with, total.saturating_sub(with)))
10962}
10963
10964/// Distribution of the `parent_namespace` chain depth across
10965/// `namespace_meta` rows. Returns a Vec where index `i` is the count of
10966/// namespaces with chain depth `i` (depth 0 = no parent).
10967///
10968/// Walks each row's `parent_namespace` chain up to a hard cap of 16 to
10969/// avoid runaway loops on malformed data. Rows whose chain exceeds the
10970/// cap are bucketed at the cap.
10971///
10972/// # Errors
10973///
10974/// Returns `Err` only on hard SQLite failures.
10975pub fn doctor_governance_depth_distribution(conn: &Connection) -> Result<Vec<usize>> {
10976    const MAX_DEPTH: usize = 16;
10977    let mut stmt = conn.prepare("SELECT namespace, parent_namespace FROM namespace_meta")?;
10978    let rows = stmt.query_map([], |r| {
10979        Ok((r.get::<_, String>(0)?, r.get::<_, Option<String>>(1)?))
10980    })?;
10981    let parent_map: HashMap<String, Option<String>> = rows
10982        .filter_map(rusqlite::Result::ok)
10983        .collect::<HashMap<_, _>>();
10984    let mut hist = vec![0_usize; MAX_DEPTH + 1];
10985    for ns in parent_map.keys() {
10986        let mut depth = 0_usize;
10987        let mut cur = parent_map.get(ns).cloned().flatten();
10988        while let Some(p) = cur {
10989            depth += 1;
10990            if depth >= MAX_DEPTH {
10991                break;
10992            }
10993            cur = parent_map.get(&p).cloned().flatten();
10994        }
10995        let bucket = depth.min(MAX_DEPTH);
10996        hist[bucket] += 1;
10997    }
10998    Ok(hist)
10999}
11000
11001/// Sum of `subscriptions.dispatch_count` and `subscriptions.failure_count`
11002/// across all rows. Returns `(dispatched, failed)`. Used by the doctor to
11003/// estimate webhook delivery success rate.
11004///
11005/// # Errors
11006///
11007/// Returns `Err` only on hard SQLite failures.
11008pub fn doctor_webhook_delivery_totals(conn: &Connection) -> Result<(u64, u64)> {
11009    let dispatched: i64 = conn
11010        .query_row(
11011            "SELECT COALESCE(SUM(dispatch_count), 0) FROM subscriptions",
11012            [],
11013            |r| r.get(0),
11014        )
11015        .unwrap_or(0);
11016    let failed: i64 = conn
11017        .query_row(
11018            "SELECT COALESCE(SUM(failure_count), 0) FROM subscriptions",
11019            [],
11020            |r| r.get(0),
11021        )
11022        .unwrap_or(0);
11023    Ok((
11024        u64::try_from(dispatched.max(0)).unwrap_or(0),
11025        u64::try_from(failed.max(0)).unwrap_or(0),
11026    ))
11027}
11028
11029/// Maximum sync-clock skew in seconds across the `sync_state` table —
11030/// the largest gap between `last_pulled_at` (when this peer last heard
11031/// from a peer) and `last_seen_at` (the peer's own `updated_at` advance).
11032/// Returns `Ok(None)` when `sync_state` is empty or the columns are
11033/// missing on a pre-T3 schema.
11034///
11035/// # Errors
11036///
11037/// Returns `Err` only on hard SQLite failures.
11038// ---------------------------------------------------------------------
11039// v0.6.4-009 — capability-expansion audit log
11040// ---------------------------------------------------------------------
11041
11042/// Single audit_log row (capability-expansion shape — extensible).
11043#[derive(Debug, Clone)]
11044pub struct CapabilityExpansionRow {
11045    pub id: String,
11046    pub agent_id: Option<String>,
11047    pub event_type: String,
11048    pub requested_family: Option<String>,
11049    pub granted: bool,
11050    pub attestation_tier: Option<String>,
11051    pub timestamp: String,
11052}
11053
11054/// Record a capability-expansion attempt. Used by
11055/// `handle_capabilities_family` after the allowlist decision is made.
11056/// Records BOTH grant and deny outcomes so operators can see attempted
11057/// access patterns even when the gate refused.
11058///
11059/// `granted=true` means the agent received the schemas; `granted=false`
11060/// means the agent was denied or the family was unknown.
11061///
11062/// Best-effort: a failed insert (e.g., disk full) is logged via tracing
11063/// but does not propagate the error to the caller — the audit trail
11064/// must never block the actual call.
11065pub fn record_capability_expansion(
11066    conn: &Connection,
11067    agent_id: Option<&str>,
11068    family: &str,
11069    granted: bool,
11070    attestation_tier: Option<&str>,
11071) {
11072    let id = uuid::Uuid::new_v4().to_string();
11073    let now = Utc::now().to_rfc3339();
11074    let result = conn.execute(
11075        "INSERT INTO audit_log (id, agent_id, event_type, requested_family, \
11076         granted, attestation_tier, timestamp) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
11077        rusqlite::params![
11078            id,
11079            agent_id,
11080            "capability_expansion",
11081            family,
11082            i32::from(granted),
11083            attestation_tier,
11084            now,
11085        ],
11086    );
11087    if let Err(e) = result {
11088        tracing::warn!(
11089            "audit_log insert failed (capability_expansion / agent={:?} / family={}): {e}",
11090            agent_id,
11091            family,
11092        );
11093    }
11094}
11095
11096/// List recent capability-expansion rows, newest first. `limit` clamps
11097/// the row count.
11098pub fn list_capability_expansions(
11099    conn: &Connection,
11100    limit: usize,
11101    agent_filter: Option<&str>,
11102) -> Result<Vec<CapabilityExpansionRow>> {
11103    let n = (limit.min(10_000)) as i64;
11104    let map_row = |r: &rusqlite::Row<'_>| -> rusqlite::Result<CapabilityExpansionRow> {
11105        Ok(CapabilityExpansionRow {
11106            id: r.get(0)?,
11107            agent_id: r.get(1)?,
11108            event_type: r.get(2)?,
11109            requested_family: r.get(3)?,
11110            granted: r.get::<_, i64>(4)? != 0,
11111            attestation_tier: r.get(5)?,
11112            timestamp: r.get(6)?,
11113        })
11114    };
11115    if let Some(a) = agent_filter {
11116        let mut stmt = conn.prepare(
11117            "SELECT id, agent_id, event_type, requested_family, granted, \
11118             attestation_tier, timestamp FROM audit_log \
11119             WHERE event_type = 'capability_expansion' AND agent_id = ?1 \
11120             ORDER BY timestamp DESC LIMIT ?2",
11121        )?;
11122        let rows = stmt.query_map(rusqlite::params![a, n], map_row)?;
11123        rows.collect::<Result<Vec<_>, _>>().map_err(Into::into)
11124    } else {
11125        let mut stmt = conn.prepare(
11126            "SELECT id, agent_id, event_type, requested_family, granted, \
11127             attestation_tier, timestamp FROM audit_log \
11128             WHERE event_type = 'capability_expansion' \
11129             ORDER BY timestamp DESC LIMIT ?1",
11130        )?;
11131        let rows = stmt.query_map(rusqlite::params![n], map_row)?;
11132        rows.collect::<Result<Vec<_>, _>>().map_err(Into::into)
11133    }
11134}
11135
11136pub fn doctor_max_sync_skew_secs(conn: &Connection) -> Result<Option<i64>> {
11137    let mut stmt = match conn.prepare(
11138        "SELECT last_seen_at, last_pulled_at FROM sync_state WHERE last_pulled_at IS NOT NULL",
11139    ) {
11140        Ok(s) => s,
11141        Err(_) => return Ok(None),
11142    };
11143    let rows = stmt.query_map([], |r| Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?)))?;
11144    let mut max_skew: Option<i64> = None;
11145    for row in rows {
11146        let Ok((seen, pulled)) = row else { continue };
11147        let Ok(s) = chrono::DateTime::parse_from_rfc3339(&seen) else {
11148            continue;
11149        };
11150        let Ok(p) = chrono::DateTime::parse_from_rfc3339(&pulled) else {
11151            continue;
11152        };
11153        let skew = (s.with_timezone(&Utc) - p.with_timezone(&Utc))
11154            .num_seconds()
11155            .abs();
11156        max_skew = Some(max_skew.map_or(skew, |m| m.max(skew)));
11157    }
11158    Ok(max_skew)
11159}
11160
11161// ---------------------------------------------------------------------------
11162// L1-4 — Reflection-depth telemetry for `ai-memory doctor`.
11163// ---------------------------------------------------------------------------
11164
11165/// One namespace's reflection-depth distribution row returned by
11166/// [`doctor_reflection_depth_distribution`].
11167///
11168/// The four depth buckets mirror the default `max_reflection_depth=3`
11169/// cap: depth 0 (direct memories), depth 1, depth 2, depth 3+. Depth
11170/// 3+ is collapsed into a single counter because depths beyond the cap
11171/// are impossible to store under standard policy; the bucket exists so
11172/// future schemas with raised caps still produce a non-zero column.
11173pub struct ReflectionDepthRow {
11174    pub namespace: String,
11175    pub depth0: i64,
11176    pub depth1: i64,
11177    pub depth2: i64,
11178    pub depth3_plus: i64,
11179    pub avg_depth: f64,
11180    pub max_depth: i64,
11181    pub total: i64,
11182}
11183
11184/// Depth distribution across all namespaces that hold at least one
11185/// memory with `reflection_depth > 0`, plus the `_global_` aggregate.
11186///
11187/// Uses a single GROUP BY pass so the query is a single indexed scan
11188/// over `memories.reflection_depth`. A fresh DB (all rows at depth 0)
11189/// returns an empty `Vec` — the caller (doctor) renders that as
11190/// "no reflections observed".
11191///
11192/// # Errors
11193///
11194/// Returns `Err` only on hard SQLite failures (e.g. the `memories`
11195/// table does not exist yet — pre-migration schemas).
11196pub fn doctor_reflection_depth_distribution(conn: &Connection) -> Result<Vec<ReflectionDepthRow>> {
11197    // Aggregate per namespace, only namespaces that contain at least
11198    // one reflected memory (depth > 0). The doctor renders a global
11199    // summary from the returned rows; the SQL avoids a second pass by
11200    // letting the caller roll up the namespace rows.
11201    let mut stmt = conn.prepare(
11202        "SELECT
11203             namespace,
11204             SUM(CASE WHEN reflection_depth = 0 THEN 1 ELSE 0 END),
11205             SUM(CASE WHEN reflection_depth = 1 THEN 1 ELSE 0 END),
11206             SUM(CASE WHEN reflection_depth = 2 THEN 1 ELSE 0 END),
11207             SUM(CASE WHEN reflection_depth >= 3 THEN 1 ELSE 0 END),
11208             AVG(CAST(reflection_depth AS REAL)),
11209             MAX(reflection_depth),
11210             COUNT(*)
11211         FROM memories
11212         GROUP BY namespace
11213         HAVING MAX(reflection_depth) > 0
11214         ORDER BY namespace",
11215    )?;
11216    let rows = stmt.query_map([], |r| {
11217        Ok(ReflectionDepthRow {
11218            namespace: r.get(0)?,
11219            depth0: r.get(1)?,
11220            depth1: r.get(2)?,
11221            depth2: r.get(3)?,
11222            depth3_plus: r.get(4)?,
11223            avg_depth: r.get(5)?,
11224            max_depth: r.get(6)?,
11225            total: r.get(7)?,
11226        })
11227    })?;
11228    let mut out = Vec::new();
11229    for row in rows {
11230        out.push(row?);
11231    }
11232    Ok(out)
11233}
11234
11235/// Count of `reflection.depth_exceeded` audit events in `signed_events`
11236/// within a given look-back window.
11237///
11238/// `since_rfc3339` is an RFC 3339 timestamp; only events with
11239/// `timestamp >= since_rfc3339` are counted. Pass the epoch
11240/// (`"1970-01-01T00:00:00Z"`) to count all-time.
11241///
11242/// Returns `0` when the `signed_events` table does not exist (pre-H5
11243/// schemas) rather than propagating the error, matching the pattern
11244/// in other doctor helpers.
11245///
11246/// # Errors
11247///
11248/// Returns `Err` only on hard query failures (table exists but query
11249/// is malformed — should not happen in practice).
11250pub fn doctor_reflection_depth_exceeded_count(
11251    conn: &Connection,
11252    since_rfc3339: &str,
11253) -> Result<i64> {
11254    let n: i64 = conn
11255        .query_row(
11256            "SELECT COUNT(*) FROM signed_events
11257             WHERE event_type = 'reflection.depth_exceeded'
11258               AND timestamp >= ?1",
11259            params![since_rfc3339],
11260            |r| r.get(0),
11261        )
11262        .unwrap_or(0);
11263    Ok(n)
11264}
11265
11266/// Reflection totals per namespace: memories created in the last 24h,
11267/// 7d, and all-time that have `reflection_depth > 0`.
11268///
11269/// Returns one tuple `(ns, last_24h, last_7d, all_time)` per
11270/// namespace that has at least one reflected memory. Namespaces with
11271/// no reflections are omitted; the caller renders "no reflections" for
11272/// the global summary.
11273///
11274/// # Errors
11275///
11276/// Returns `Err` on hard SQLite failures.
11277pub fn doctor_reflection_totals_by_namespace(
11278    conn: &Connection,
11279) -> Result<Vec<(String, i64, i64, i64)>> {
11280    let now = Utc::now();
11281    let last_day_cutoff = (now - chrono::Duration::hours(24)).to_rfc3339();
11282    let cutoff_7d = (now - chrono::Duration::days(7)).to_rfc3339();
11283
11284    let mut stmt = conn.prepare(
11285        "SELECT
11286             namespace,
11287             SUM(CASE WHEN created_at >= ?1 THEN 1 ELSE 0 END),
11288             SUM(CASE WHEN created_at >= ?2 THEN 1 ELSE 0 END),
11289             COUNT(*)
11290         FROM memories
11291         WHERE reflection_depth > 0
11292         GROUP BY namespace
11293         ORDER BY namespace",
11294    )?;
11295    let rows = stmt.query_map(params![last_day_cutoff, cutoff_7d], |r| {
11296        Ok((
11297            r.get::<_, String>(0)?,
11298            r.get::<_, i64>(1)?,
11299            r.get::<_, i64>(2)?,
11300            r.get::<_, i64>(3)?,
11301        ))
11302    })?;
11303    let mut out = Vec::new();
11304    for row in rows {
11305        out.push(row?);
11306    }
11307    Ok(out)
11308}
11309
11310#[cfg(test)]
11311mod tests {
11312    use super::*;
11313    use crate::models::{MID_TTL_EXTEND_SECS, Memory, SHORT_TTL_EXTEND_SECS, Tier};
11314
11315    fn test_db() -> Connection {
11316        open(std::path::Path::new(":memory:")).unwrap()
11317    }
11318
11319    /// Insert a minimal memory row with an explicit `updated_at` so the
11320    /// federation-catchup tests can control the range boundary. Only the
11321    /// NOT-NULL/no-default columns are specified; everything else falls to
11322    /// the schema defaults (which `row_to_memory` reads cleanly).
11323    fn insert_memory_at(conn: &Connection, id: &str, updated_at: &str) {
11324        conn.execute(
11325            "INSERT INTO memories (id, tier, namespace, title, content, created_at, updated_at) \
11326             VALUES (?1, 'mid', 'ns', ?1, 'content body', ?2, ?2)",
11327            params![id, updated_at],
11328        )
11329        .expect("insert memory row");
11330    }
11331
11332    #[test]
11333    fn memories_updated_since_sargable_split_none_and_some_paths() {
11334        // #1476 — the OR-NULL predicate was split into a None path (no
11335        // predicate, ORDER BY updated_at ASC) and a Some path (strict
11336        // `updated_at > ?1`). Pin the behavioral contract of both branches
11337        // so the sargable rewrite can never silently change which rows a
11338        // peer catchup observes.
11339        let conn = test_db();
11340        let t1 = "2026-01-01T00:00:00+00:00";
11341        let t2 = "2026-01-02T00:00:00+00:00";
11342        let t3 = "2026-01-03T00:00:00+00:00";
11343        // Insert out of order to prove ORDER BY actually sorts.
11344        insert_memory_at(&conn, "b", t2);
11345        insert_memory_at(&conn, "c", t3);
11346        insert_memory_at(&conn, "a", t1);
11347
11348        // None path: every row, ascending by updated_at.
11349        let all = memories_updated_since(&conn, None, 100).expect("none path");
11350        let ids: Vec<&str> = all.iter().map(|m| m.id.as_str()).collect();
11351        assert_eq!(
11352            ids,
11353            vec!["a", "b", "c"],
11354            "None path: all rows ASC by updated_at"
11355        );
11356
11357        // Some path is STRICTLY greater — the boundary row (t1) is excluded.
11358        let after_t1 = memories_updated_since(&conn, Some(t1), 100).expect("some path");
11359        let ids: Vec<&str> = after_t1.iter().map(|m| m.id.as_str()).collect();
11360        assert_eq!(
11361            ids,
11362            vec!["b", "c"],
11363            "Some(t1): strict > excludes the boundary row"
11364        );
11365
11366        // Past the newest row → empty.
11367        let after_t3 = memories_updated_since(&conn, Some(t3), 100).expect("some path empty");
11368        assert!(
11369            after_t3.is_empty(),
11370            "Some(t3): nothing strictly newer than the max"
11371        );
11372
11373        // LIMIT caps from the low end of the range (oldest-first under ASC).
11374        let one = memories_updated_since(&conn, Some(t1), 1).expect("some path limited");
11375        let ids: Vec<&str> = one.iter().map(|m| m.id.as_str()).collect();
11376        assert_eq!(
11377            ids,
11378            vec!["b"],
11379            "Some(t1) LIMIT 1: oldest row strictly after t1"
11380        );
11381    }
11382
11383    #[test]
11384    fn memories_updated_since_uses_updated_at_index() {
11385        // #1476 — the sargable Some path must resolve through
11386        // `idx_memories_updated_at`, not a full table scan. Assert the
11387        // query plan references the index via EXPLAIN QUERY PLAN.
11388        let conn = test_db();
11389        let mut stmt = conn
11390            .prepare(
11391                "EXPLAIN QUERY PLAN \
11392                 SELECT id FROM memories WHERE updated_at > ?1 \
11393                 ORDER BY updated_at ASC LIMIT ?2",
11394            )
11395            .expect("prepare explain");
11396        let plan: String = stmt
11397            .query_map(params!["2026-01-01T00:00:00+00:00", 10_i64], |r| {
11398                r.get::<_, String>(3)
11399            })
11400            .expect("explain rows")
11401            .map(|r| r.expect("explain detail"))
11402            .collect::<Vec<_>>()
11403            .join(" | ");
11404        assert!(
11405            plan.contains("idx_memories_updated_at"),
11406            "sargable catchup query must use idx_memories_updated_at; plan was: {plan}"
11407        );
11408    }
11409
11410    #[test]
11411    fn perf_8_hierarchy_in_clause_cache_hits_on_repeat() {
11412        // PERF-8 — verify cached fragment matches the freshly-
11413        // computed value byte-equal. Cache invalidation isn't part
11414        // of the public contract (ancestors are deterministic on
11415        // the namespace input), so a cache hit must be wire-equal
11416        // to a cold compute.
11417        hierarchy_cache_clear_for_tests();
11418        let ns = Some("alphaone/team/alice");
11419        let (a, active_a) = hierarchy_in_clause(ns);
11420        let (b, active_b) = hierarchy_in_clause(ns);
11421        assert!(active_a && active_b);
11422        assert_eq!(
11423            a, b,
11424            "PERF-8: cached hierarchy_in_clause result drift on second lookup",
11425        );
11426        assert!(
11427            a.expect("non-None fragment")
11428                .contains("AND m.namespace IN ("),
11429            "PERF-8: fragment shape regressed",
11430        );
11431    }
11432
11433    #[test]
11434    fn perf_8_hierarchy_cache_handles_non_hierarchical_ns() {
11435        // Non-hierarchical namespaces (no `/`) MUST short-circuit
11436        // before touching the cache so the cache only stores the
11437        // legitimate entries.
11438        hierarchy_cache_clear_for_tests();
11439        let (frag, active) = hierarchy_in_clause(Some("global"));
11440        assert_eq!(frag, None);
11441        assert!(!active);
11442    }
11443
11444    #[test]
11445    fn perf_8_hierarchy_cache_bounded_under_pressure() {
11446        // Filling the cache past HIERARCHY_CACHE_MAX must not
11447        // unbounded-grow it; eviction kicks in beyond the cap.
11448        hierarchy_cache_clear_for_tests();
11449        for i in 0..(HIERARCHY_CACHE_MAX * 2) {
11450            let ns = format!("tenant{i}/sub");
11451            let _ = hierarchy_in_clause(Some(&ns));
11452        }
11453        let cache_len = hierarchy_cache().lock().unwrap().len();
11454        assert!(
11455            cache_len <= HIERARCHY_CACHE_MAX,
11456            "PERF-8: hierarchy cache grew unbounded: {cache_len} > {HIERARCHY_CACHE_MAX}",
11457        );
11458    }
11459
11460    /// v0.7.0 #981 — `get_many` batches the SELECTs the semantic-phase
11461    /// HNSW recall branch previously issued per-id. This test pins:
11462    ///   1. Empty `ids` short-circuits to an empty map without touching
11463    ///      the connection.
11464    ///   2. All requested + existing rows land in the result map.
11465    ///   3. Missing ids are silently dropped (no error, no panic) —
11466    ///      the caller observes via `map.get(&id).is_none()`.
11467    ///   4. Order doesn't matter — `IN (...)` is unordered; callers
11468    ///      that need original ordering re-apply via the hit list.
11469    ///   5. Chunking >500 ids still returns every row.
11470    #[test]
11471    fn get_many_batches_and_handles_empty_missing_and_chunked_inputs_981() {
11472        let conn = test_db();
11473        // Seed 3 rows.
11474        let m1 = make_memory("alpha", "ns/a", Tier::Long, 5);
11475        let m2 = make_memory("beta", "ns/b", Tier::Long, 5);
11476        let m3 = make_memory("gamma", "ns/c", Tier::Long, 5);
11477        insert(&conn, &m1).unwrap();
11478        insert(&conn, &m2).unwrap();
11479        insert(&conn, &m3).unwrap();
11480
11481        // (1) Empty input.
11482        assert!(get_many(&conn, &[]).unwrap().is_empty());
11483
11484        // (2) Existing ids.
11485        let ids = vec![m1.id.clone(), m2.id.clone()];
11486        let got = get_many(&conn, &ids).unwrap();
11487        assert_eq!(got.len(), 2);
11488        assert!(got.contains_key(&m1.id));
11489        assert!(got.contains_key(&m2.id));
11490        assert!(!got.contains_key(&m3.id));
11491
11492        // (3) Mixed existing + missing — missing silently dropped.
11493        let mixed = vec![m1.id.clone(), "nope-not-a-real-id".to_string()];
11494        let got = get_many(&conn, &mixed).unwrap();
11495        assert_eq!(got.len(), 1);
11496        assert!(got.contains_key(&m1.id));
11497
11498        // (4) Order doesn't matter — IN clause is set-like.
11499        let reversed = vec![m3.id.clone(), m2.id.clone(), m1.id.clone()];
11500        let got = get_many(&conn, &reversed).unwrap();
11501        assert_eq!(got.len(), 3);
11502        for id in &reversed {
11503            assert!(got.contains_key(id), "id {id} missing from set-fetch");
11504        }
11505
11506        // (5) Chunked >500 ids still returns every row.
11507        let mut bulk: Vec<Memory> = Vec::with_capacity(750);
11508        let mut bulk_ids: Vec<String> = Vec::with_capacity(750);
11509        for i in 0..750 {
11510            let m = make_memory(&format!("bulk-{i}"), "ns/bulk", Tier::Long, 1);
11511            insert(&conn, &m).unwrap();
11512            bulk_ids.push(m.id.clone());
11513            bulk.push(m);
11514        }
11515        let got = get_many(&conn, &bulk_ids).unwrap();
11516        assert_eq!(
11517            got.len(),
11518            750,
11519            "chunked fetch >500 must still return every row",
11520        );
11521    }
11522
11523    fn make_memory(title: &str, ns: &str, tier: Tier, priority: i32) -> Memory {
11524        let now = chrono::Utc::now().to_rfc3339();
11525        Memory {
11526            id: uuid::Uuid::new_v4().to_string(),
11527            tier: tier.clone(),
11528            namespace: ns.to_string(),
11529            title: title.to_string(),
11530            content: format!("Content for {title}"),
11531            tags: vec![],
11532            priority,
11533            confidence: 1.0,
11534            source: "test".to_string(),
11535            access_count: 0,
11536            created_at: now.clone(),
11537            updated_at: now,
11538            last_accessed_at: None,
11539            expires_at: tier
11540                .default_ttl_secs()
11541                .map(|s| (chrono::Utc::now() + chrono::Duration::seconds(s)).to_rfc3339()),
11542            metadata: serde_json::json!({}),
11543            reflection_depth: 0,
11544            memory_kind: crate::models::MemoryKind::Observation,
11545            entity_id: None,
11546            persona_version: None,
11547            citations: Vec::new(),
11548            source_uri: None,
11549            source_span: None,
11550            confidence_source: ConfidenceSource::CallerProvided,
11551            confidence_signals: None,
11552            confidence_decayed_at: None,
11553            version: 1,
11554        }
11555    }
11556
11557    fn mem_with_scope(ns: &str, scope: Option<&str>) -> Memory {
11558        let mut m = make_memory("scoped", ns, Tier::Long, 5);
11559        if let Some(s) = scope {
11560            let mut map = serde_json::Map::new();
11561            map.insert(
11562                crate::META_KEY_SCOPE.to_string(),
11563                serde_json::Value::String(s.to_string()),
11564            );
11565            m.metadata = serde_json::Value::Object(map);
11566        }
11567        m
11568    }
11569
11570    // Pins the Rust-side visibility predicate (`is_visible`) that the HNSW
11571    // recall branch uses when SQL-side visibility can't be attached. Exercises
11572    // every `MemoryScope` arm plus `matches_subtree`, which the integration
11573    // recall paths only hit for whichever scope the fixture corpus happens to
11574    // carry — leaving the other arms uncovered. Deterministic, no DB.
11575    #[test]
11576    fn is_visible_scope_matrix_covers_every_arm() {
11577        // No-agent caller (all-None prefixes) bypasses the filter entirely.
11578        let unfiltered = (None, None, None, None);
11579        assert!(super::is_visible(
11580            &mem_with_scope("acme/eng/web", Some("private")),
11581            &unfiltered
11582        ));
11583
11584        // 4-level agent ns populates every prefix slot:
11585        // p=acme/eng/web/team, t=acme/eng/web, u=acme/eng, o=acme.
11586        let prefixes = super::compute_visibility_prefixes(Some("acme/eng/web/team"));
11587        assert_eq!(
11588            prefixes,
11589            (
11590                Some("acme/eng/web/team".to_string()),
11591                Some("acme/eng/web".to_string()),
11592                Some("acme/eng".to_string()),
11593                Some("acme".to_string()),
11594            )
11595        );
11596
11597        // Collective: visible to anyone.
11598        assert!(super::is_visible(
11599            &mem_with_scope("zzz/other", Some("collective")),
11600            &prefixes
11601        ));
11602
11603        // Private: only the caller's own namespace (p) is visible.
11604        assert!(super::is_visible(
11605            &mem_with_scope("acme/eng/web/team", Some("private")),
11606            &prefixes
11607        ));
11608        assert!(!super::is_visible(
11609            &mem_with_scope("acme/eng/web", Some("private")),
11610            &prefixes
11611        ));
11612
11613        // Absent scope key → MemoryScope::default() (Private) semantics.
11614        assert!(super::is_visible(
11615            &mem_with_scope("acme/eng/web/team", None),
11616            &prefixes
11617        ));
11618        assert!(!super::is_visible(
11619            &mem_with_scope("acme/other", None),
11620            &prefixes
11621        ));
11622
11623        // Team subtree (t = acme/eng/web): exact + descendant in, sibling out.
11624        assert!(super::is_visible(
11625            &mem_with_scope("acme/eng/web", Some("team")),
11626            &prefixes
11627        ));
11628        assert!(super::is_visible(
11629            &mem_with_scope("acme/eng/web/team/v2", Some("team")),
11630            &prefixes
11631        ));
11632        assert!(!super::is_visible(
11633            &mem_with_scope("acme/eng/api", Some("team")),
11634            &prefixes
11635        ));
11636
11637        // Unit subtree (u = acme/eng).
11638        assert!(super::is_visible(
11639            &mem_with_scope("acme/eng", Some("unit")),
11640            &prefixes
11641        ));
11642        assert!(!super::is_visible(
11643            &mem_with_scope("acme/sales", Some("unit")),
11644            &prefixes
11645        ));
11646
11647        // Org subtree (o = acme).
11648        assert!(super::is_visible(
11649            &mem_with_scope("acme", Some("org")),
11650            &prefixes
11651        ));
11652        assert!(!super::is_visible(
11653            &mem_with_scope("globex", Some("org")),
11654            &prefixes
11655        ));
11656
11657        // matches_subtree None arm: a shallow agent leaves the org slot empty,
11658        // so an org-scoped memory is denied (no prefix to match against).
11659        let shallow = super::compute_visibility_prefixes(Some("acme"));
11660        assert_eq!(shallow.3, None);
11661        assert!(!super::is_visible(
11662            &mem_with_scope("acme", Some("org")),
11663            &shallow
11664        ));
11665
11666        // Unknown scope string → from_str None → caller denied.
11667        assert!(!super::is_visible(
11668            &mem_with_scope("acme/eng/web/team", Some("definitely-not-a-scope")),
11669            &prefixes
11670        ));
11671
11672        // None-agent → all-None tuple (the no-filter sentinel).
11673        assert_eq!(
11674            super::compute_visibility_prefixes(None),
11675            (None, None, None, None)
11676        );
11677    }
11678
11679    #[test]
11680    fn open_creates_schema() {
11681        let conn = test_db();
11682        let count: i64 = conn
11683            .query_row("SELECT COUNT(*) FROM memories", [], |r| r.get(0))
11684            .unwrap();
11685        assert_eq!(count, 0);
11686    }
11687
11688    #[test]
11689    fn insert_and_get() {
11690        let conn = test_db();
11691        let mem = make_memory("Test insert", "test", Tier::Long, 5);
11692        let id = insert(&conn, &mem).unwrap();
11693        let got = get(&conn, &id).unwrap().unwrap();
11694        assert_eq!(got.title, "Test insert");
11695        assert_eq!(got.namespace, "test");
11696        assert_eq!(got.priority, 5);
11697    }
11698
11699    #[test]
11700    fn get_nonexistent() {
11701        let conn = test_db();
11702        let got = get(&conn, "nonexistent-id").unwrap();
11703        assert!(got.is_none());
11704    }
11705
11706    // #1466 — write-path chokepoint regression. A non-Long memory handed
11707    // to any insert path with `expires_at: None` must land with a
11708    // tier-default expiry so GC (`expires_at IS NOT NULL AND expires_at <
11709    // now`) can eventually reap it; before the fix it landed NULL =
11710    // immortal. Long stays NULL; an explicit expiry is preserved.
11711
11712    fn ttl_gap_secs(created_at: &str, expires_at: &str) -> i64 {
11713        let base = chrono::DateTime::parse_from_rfc3339(created_at).unwrap();
11714        let exp = chrono::DateTime::parse_from_rfc3339(expires_at).unwrap();
11715        (exp - base).num_seconds()
11716    }
11717
11718    #[test]
11719    fn insert_backfills_mid_expiry_when_none() {
11720        let conn = test_db();
11721        let mut mem = make_memory("mid none", "test", Tier::Mid, 5);
11722        mem.expires_at = None;
11723        let id = insert(&conn, &mem).unwrap();
11724        let got = get(&conn, &id).unwrap().unwrap();
11725        let exp = got.expires_at.expect("mid must not land immortal");
11726        assert_eq!(ttl_gap_secs(&got.created_at, &exp), crate::SECS_PER_WEEK);
11727    }
11728
11729    #[test]
11730    fn insert_backfills_short_expiry_when_none() {
11731        let conn = test_db();
11732        let mut mem = make_memory("short none", "test", Tier::Short, 5);
11733        mem.expires_at = None;
11734        let id = insert(&conn, &mem).unwrap();
11735        let got = get(&conn, &id).unwrap().unwrap();
11736        let exp = got.expires_at.expect("short must not land immortal");
11737        assert_eq!(
11738            ttl_gap_secs(&got.created_at, &exp),
11739            6 * crate::SECS_PER_HOUR
11740        );
11741    }
11742
11743    #[test]
11744    fn insert_leaves_long_expiry_none() {
11745        let conn = test_db();
11746        let mut mem = make_memory("long none", "test", Tier::Long, 5);
11747        mem.expires_at = None;
11748        let id = insert(&conn, &mem).unwrap();
11749        let got = get(&conn, &id).unwrap().unwrap();
11750        assert!(got.expires_at.is_none(), "long has no TTL — must stay NULL");
11751    }
11752
11753    #[test]
11754    fn insert_preserves_explicit_expiry() {
11755        let conn = test_db();
11756        let explicit = "2027-06-15T12:00:00+00:00".to_string();
11757        let mut mem = make_memory("mid explicit", "test", Tier::Mid, 5);
11758        mem.expires_at = Some(explicit.clone());
11759        let id = insert(&conn, &mem).unwrap();
11760        let got = get(&conn, &id).unwrap().unwrap();
11761        assert_eq!(got.expires_at, Some(explicit));
11762    }
11763
11764    #[test]
11765    fn insert_with_conflict_backfills_mid_expiry_when_none() {
11766        let conn = test_db();
11767        let mut mem = make_memory("conflict mid", "test", Tier::Mid, 5);
11768        mem.expires_at = None;
11769        let id = insert_with_conflict(&conn, &mem, ConflictMode::Merge).unwrap();
11770        let got = get(&conn, &id).unwrap().unwrap();
11771        let exp = got.expires_at.expect("mid must not land immortal");
11772        assert_eq!(ttl_gap_secs(&got.created_at, &exp), crate::SECS_PER_WEEK);
11773    }
11774
11775    #[test]
11776    fn insert_if_newer_backfills_mid_expiry_when_none() {
11777        let conn = test_db();
11778        let mut mem = make_memory("newer mid", "test", Tier::Mid, 5);
11779        mem.expires_at = None;
11780        let id = insert_if_newer(&conn, &mem).unwrap();
11781        let got = get(&conn, &id).unwrap().unwrap();
11782        let exp = got.expires_at.expect("mid must not land immortal");
11783        assert_eq!(ttl_gap_secs(&got.created_at, &exp), crate::SECS_PER_WEEK);
11784    }
11785
11786    #[test]
11787    fn consolidate_backfills_mid_expiry() {
11788        let conn = test_db();
11789        let a = make_memory("src a", "test", Tier::Mid, 5);
11790        let b = make_memory("src b", "test", Tier::Mid, 5);
11791        let id_a = insert(&conn, &a).unwrap();
11792        let id_b = insert(&conn, &b).unwrap();
11793        let new_id = consolidate(
11794            &conn,
11795            &[id_a, id_b],
11796            "merged",
11797            "summary body",
11798            "test",
11799            &Tier::Mid,
11800            "test",
11801            "agent-x",
11802        )
11803        .unwrap();
11804        let got = get(&conn, &new_id).unwrap().unwrap();
11805        let exp = got
11806            .expires_at
11807            .expect("consolidated mid must not land immortal");
11808        assert_eq!(ttl_gap_secs(&got.created_at, &exp), crate::SECS_PER_WEEK);
11809    }
11810
11811    #[test]
11812    fn update_partial_fields() {
11813        let conn = test_db();
11814        let mem = make_memory("Original", "test", Tier::Mid, 5);
11815        let id = insert(&conn, &mem).unwrap();
11816
11817        let (found, content_changed) = update(
11818            &conn,
11819            &id,
11820            Some("Updated Title"),
11821            None,
11822            None,
11823            None,
11824            None,
11825            Some(9),
11826            None,
11827            None,
11828            None,
11829        )
11830        .unwrap();
11831        assert!(found);
11832        assert!(content_changed); // title changed
11833
11834        let got = get(&conn, &id).unwrap().unwrap();
11835        assert_eq!(got.title, "Updated Title");
11836        assert_eq!(got.priority, 9);
11837        assert_eq!(got.content, mem.content); // unchanged
11838    }
11839
11840    #[test]
11841    fn update_content_changed_flag() {
11842        let conn = test_db();
11843        let mem = make_memory("Stable", "test", Tier::Mid, 5);
11844        let id = insert(&conn, &mem).unwrap();
11845
11846        // Updating only priority — content_changed should be false
11847        let (found, content_changed) = update(
11848            &conn,
11849            &id,
11850            None,
11851            None,
11852            None,
11853            None,
11854            None,
11855            Some(8),
11856            None,
11857            None,
11858            None,
11859        )
11860        .unwrap();
11861        assert!(found);
11862        assert!(!content_changed);
11863
11864        // Updating content — content_changed should be true
11865        let (found, content_changed) = update(
11866            &conn,
11867            &id,
11868            None,
11869            Some("New content"),
11870            None,
11871            None,
11872            None,
11873            None,
11874            None,
11875            None,
11876            None,
11877        )
11878        .unwrap();
11879        assert!(found);
11880        assert!(content_changed);
11881    }
11882
11883    #[test]
11884    fn update_nonexistent_returns_false() {
11885        let conn = test_db();
11886        let (found, _) = update(
11887            &conn,
11888            "bad-id",
11889            Some("New"),
11890            None,
11891            None,
11892            None,
11893            None,
11894            None,
11895            None,
11896            None,
11897            None,
11898        )
11899        .unwrap();
11900        assert!(!found);
11901    }
11902
11903    #[test]
11904    fn update_tier_downgrade_protection() {
11905        let conn = test_db();
11906        // Long-tier memory should never be downgraded
11907        let mem = make_memory("Permanent", "test", Tier::Long, 9);
11908        let id = insert(&conn, &mem).unwrap();
11909
11910        let (found, _) = update(
11911            &conn,
11912            &id,
11913            None,
11914            None,
11915            Some(&Tier::Short),
11916            None,
11917            None,
11918            None,
11919            None,
11920            None,
11921            None,
11922        )
11923        .unwrap();
11924        assert!(found);
11925        let got = get(&conn, &id).unwrap().unwrap();
11926        assert_eq!(got.tier, Tier::Long); // still long
11927
11928        // Mid-tier should not downgrade to short
11929        let mem2 = make_memory("Working", "test", Tier::Mid, 5);
11930        let id2 = insert(&conn, &mem2).unwrap();
11931
11932        let (found, _) = update(
11933            &conn,
11934            &id2,
11935            None,
11936            None,
11937            Some(&Tier::Short),
11938            None,
11939            None,
11940            None,
11941            None,
11942            None,
11943            None,
11944        )
11945        .unwrap();
11946        assert!(found);
11947        let got2 = get(&conn, &id2).unwrap().unwrap();
11948        assert_eq!(got2.tier, Tier::Mid); // still mid
11949
11950        // Mid-tier CAN upgrade to long
11951        let (found, _) = update(
11952            &conn,
11953            &id2,
11954            None,
11955            None,
11956            Some(&Tier::Long),
11957            None,
11958            None,
11959            None,
11960            None,
11961            None,
11962            None,
11963        )
11964        .unwrap();
11965        assert!(found);
11966        let got3 = get(&conn, &id2).unwrap().unwrap();
11967        assert_eq!(got3.tier, Tier::Long); // upgraded
11968    }
11969
11970    #[test]
11971    fn update_title_collision_returns_error() {
11972        let conn = test_db();
11973        let mem_a = make_memory("Alpha", "test", Tier::Mid, 5);
11974        let mem_b = make_memory("Beta", "test", Tier::Mid, 5);
11975        let id_a = insert(&conn, &mem_a).unwrap();
11976        let _id_b = insert(&conn, &mem_b).unwrap();
11977
11978        // Updating Alpha's title to "Beta" in same namespace should fail
11979        let result = update(
11980            &conn,
11981            &id_a,
11982            Some("Beta"),
11983            None,
11984            None,
11985            None,
11986            None,
11987            None,
11988            None,
11989            None,
11990            None,
11991        );
11992        assert!(result.is_err());
11993        let err = result.unwrap_err().to_string();
11994        assert!(err.contains("already exists in namespace"));
11995    }
11996
11997    #[test]
11998    fn delete_existing() {
11999        let conn = test_db();
12000        let mem = make_memory("To delete", "test", Tier::Short, 3);
12001        let id = insert(&conn, &mem).unwrap();
12002        assert!(delete(&conn, &id).unwrap());
12003        assert!(get(&conn, &id).unwrap().is_none());
12004    }
12005
12006    #[test]
12007    fn delete_nonexistent() {
12008        let conn = test_db();
12009        assert!(!delete(&conn, "bad-id").unwrap());
12010    }
12011
12012    #[test]
12013    fn list_with_namespace_filter() {
12014        let conn = test_db();
12015        insert(&conn, &make_memory("A", "ns1", Tier::Long, 5)).unwrap();
12016        insert(&conn, &make_memory("B", "ns2", Tier::Long, 5)).unwrap();
12017        insert(&conn, &make_memory("C", "ns1", Tier::Long, 5)).unwrap();
12018
12019        let results = list(
12020            &conn,
12021            Some("ns1"),
12022            None,
12023            100,
12024            0,
12025            None,
12026            None,
12027            None,
12028            None,
12029            None,
12030        )
12031        .unwrap();
12032        assert_eq!(results.len(), 2);
12033    }
12034
12035    #[test]
12036    fn list_with_tier_filter() {
12037        let conn = test_db();
12038        insert(&conn, &make_memory("Long", "test", Tier::Long, 5)).unwrap();
12039        insert(&conn, &make_memory("Mid", "test", Tier::Mid, 5)).unwrap();
12040
12041        let results = list(
12042            &conn,
12043            None,
12044            Some(&Tier::Long),
12045            100,
12046            0,
12047            None,
12048            None,
12049            None,
12050            None,
12051            None,
12052        )
12053        .unwrap();
12054        assert_eq!(results.len(), 1);
12055        assert_eq!(results[0].title, "Long");
12056    }
12057
12058    #[test]
12059    fn list_with_limit() {
12060        let conn = test_db();
12061        for i in 0..5 {
12062            insert(
12063                &conn,
12064                &make_memory(&format!("Mem {i}"), "test", Tier::Long, 5),
12065            )
12066            .unwrap();
12067        }
12068        let results = list(&conn, None, None, 3, 0, None, None, None, None, None).unwrap();
12069        assert_eq!(results.len(), 3);
12070    }
12071
12072    #[test]
12073    fn search_keyword_match() {
12074        let conn = test_db();
12075        insert(
12076            &conn,
12077            &make_memory("PostgreSQL config", "test", Tier::Long, 5),
12078        )
12079        .unwrap();
12080        insert(&conn, &make_memory("Redis cache", "test", Tier::Long, 5)).unwrap();
12081
12082        let results = search(
12083            &conn,
12084            "PostgreSQL",
12085            None,
12086            None,
12087            10,
12088            None,
12089            None,
12090            None,
12091            None,
12092            None,
12093            None,
12094            false,
12095        )
12096        .unwrap();
12097        assert_eq!(results.len(), 1);
12098        assert!(results[0].title.contains("PostgreSQL"));
12099    }
12100
12101    #[test]
12102    fn search_no_match() {
12103        let conn = test_db();
12104        insert(&conn, &make_memory("PostgreSQL", "test", Tier::Long, 5)).unwrap();
12105        let results = search(
12106            &conn,
12107            "nonexistent_term_xyz",
12108            None,
12109            None,
12110            10,
12111            None,
12112            None,
12113            None,
12114            None,
12115            None,
12116            None,
12117            false,
12118        )
12119        .unwrap();
12120        assert_eq!(results.len(), 0);
12121    }
12122
12123    #[test]
12124    fn recall_returns_scored() {
12125        let conn = test_db();
12126        insert(
12127            &conn,
12128            &make_memory("Rust programming language", "test", Tier::Long, 8),
12129        )
12130        .unwrap();
12131        insert(
12132            &conn,
12133            &make_memory("Python scripting", "test", Tier::Long, 5),
12134        )
12135        .unwrap();
12136
12137        let (results, _tokens) = recall(
12138            &conn,
12139            "Rust programming",
12140            None,
12141            10,
12142            None,
12143            None,
12144            None,
12145            SHORT_TTL_EXTEND_SECS,
12146            MID_TTL_EXTEND_SECS,
12147            None,
12148            None,
12149            false,
12150            None,
12151        )
12152        .unwrap();
12153        assert!(!results.is_empty());
12154        // Score should be present
12155        let (mem, score) = &results[0];
12156        assert!(mem.title.contains("Rust"));
12157        assert!(*score > 0.0);
12158    }
12159
12160    #[test]
12161    fn recall_empty_context() {
12162        let conn = test_db();
12163        insert(&conn, &make_memory("Test", "test", Tier::Long, 5)).unwrap();
12164        // Empty context should not crash
12165        let results = recall(
12166            &conn,
12167            "",
12168            None,
12169            10,
12170            None,
12171            None,
12172            None,
12173            SHORT_TTL_EXTEND_SECS,
12174            MID_TTL_EXTEND_SECS,
12175            None,
12176            None,
12177            false,
12178            None,
12179        );
12180        // May return empty or error, both acceptable
12181        assert!(results.is_ok() || results.is_err());
12182    }
12183
12184    #[test]
12185    fn touch_increments_access_count() {
12186        let conn = test_db();
12187        let mem = make_memory("Touchable", "test", Tier::Mid, 5);
12188        let id = insert(&conn, &mem).unwrap();
12189        assert_eq!(get(&conn, &id).unwrap().unwrap().access_count, 0);
12190
12191        touch(&conn, &id, SHORT_TTL_EXTEND_SECS, MID_TTL_EXTEND_SECS).unwrap();
12192        assert_eq!(get(&conn, &id).unwrap().unwrap().access_count, 1);
12193
12194        touch(&conn, &id, SHORT_TTL_EXTEND_SECS, MID_TTL_EXTEND_SECS).unwrap();
12195        assert_eq!(get(&conn, &id).unwrap().unwrap().access_count, 2);
12196    }
12197
12198    #[test]
12199    fn find_contradictions_similar_titles() {
12200        let conn = test_db();
12201        insert(
12202            &conn,
12203            &make_memory("Database is PostgreSQL", "infra", Tier::Long, 8),
12204        )
12205        .unwrap();
12206        insert(
12207            &conn,
12208            &make_memory("Database is MySQL", "infra", Tier::Long, 5),
12209        )
12210        .unwrap();
12211
12212        let contradictions = find_contradictions(&conn, "Database is PostgreSQL", "infra").unwrap();
12213        assert!(!contradictions.is_empty());
12214    }
12215
12216    /// Issue #1320 regression — disjoint-topic titles that share only
12217    /// English stopwords ("are", "is", "the") MUST NOT surface as
12218    /// potential contradictions of each other. Pre-fix the FTS5
12219    /// OR-joined query matched any row containing the stopword, so a
12220    /// tomato-fact stored alongside a moon-landing fact and a
12221    /// retrieval-mechanics fact returned every cross-topic pair as
12222    /// `potential_contradictions`. Post-fix the Jaccard floor on
12223    /// stopword-stripped title tokens drops the false positives;
12224    /// `Vec::is_empty()` is the post-condition.
12225    #[test]
12226    fn find_contradictions_disjoint_topics_no_false_positives_1320() {
12227        let conn = test_db();
12228        insert(
12229            &conn,
12230            &make_memory("Tomatoes are red fruit", "v1-p5-disjoint", Tier::Long, 5),
12231        )
12232        .unwrap();
12233        insert(
12234            &conn,
12235            &make_memory(
12236                "Moon landing happened in 1969",
12237                "v1-p5-disjoint",
12238                Tier::Long,
12239                5,
12240            ),
12241        )
12242        .unwrap();
12243        insert(
12244            &conn,
12245            &make_memory(
12246                "Retrieval-augmented generation works by combining recall with synthesis",
12247                "v1-p5-disjoint",
12248                Tier::Long,
12249                5,
12250            ),
12251        )
12252        .unwrap();
12253
12254        // Tomato seed must not flag moon-landing or retrieval rows.
12255        let hits = find_contradictions(&conn, "Tomatoes are red fruit", "v1-p5-disjoint").unwrap();
12256        assert!(
12257            hits.iter().all(|m| m.title == "Tomatoes are red fruit"),
12258            "tomato seed leaked false positives: {:?}",
12259            hits.iter().map(|m| m.title.as_str()).collect::<Vec<_>>(),
12260        );
12261
12262        // Moon-landing seed must not flag tomato or retrieval rows.
12263        let hits =
12264            find_contradictions(&conn, "Moon landing happened in 1969", "v1-p5-disjoint").unwrap();
12265        assert!(
12266            hits.iter()
12267                .all(|m| m.title == "Moon landing happened in 1969"),
12268            "moon-landing seed leaked false positives: {:?}",
12269            hits.iter().map(|m| m.title.as_str()).collect::<Vec<_>>(),
12270        );
12271
12272        // Retrieval seed must not flag tomato or moon-landing rows.
12273        let hits = find_contradictions(
12274            &conn,
12275            "Retrieval-augmented generation works by combining recall with synthesis",
12276            "v1-p5-disjoint",
12277        )
12278        .unwrap();
12279        assert!(
12280            hits.iter().all(|m| m.title.starts_with("Retrieval")),
12281            "retrieval seed leaked false positives: {:?}",
12282            hits.iter().map(|m| m.title.as_str()).collect::<Vec<_>>(),
12283        );
12284    }
12285
12286    /// Issue #1320 regression — pure-stopword seed title must not pull
12287    /// any rows. Pre-fix the FTS5 OR-query expanded to a no-op against
12288    /// the stopword set; post-fix the seed tokenises to empty after
12289    /// stopword removal so the Jaccard floor returns 0 for every
12290    /// candidate.
12291    #[test]
12292    fn find_contradictions_pure_stopword_seed_returns_empty_1320() {
12293        let conn = test_db();
12294        insert(
12295            &conn,
12296            &make_memory(
12297                "The thing is the other thing",
12298                "v1-p5-stopword",
12299                Tier::Long,
12300                5,
12301            ),
12302        )
12303        .unwrap();
12304        let hits = find_contradictions(&conn, "the is a", "v1-p5-stopword").unwrap();
12305        assert!(
12306            hits.is_empty(),
12307            "pure-stopword seed pulled candidates: {:?}",
12308            hits.iter().map(|m| m.title.as_str()).collect::<Vec<_>>(),
12309        );
12310    }
12311
12312    /// Issue #1320 — stage-2 filter must not over-prune the legitimate
12313    /// near-duplicate case. "Database is PostgreSQL" and "Database is
12314    /// MySQL" share `{database}` after stopword removal — Jaccard 1/3,
12315    /// passes the 0.30 floor. Pinned alongside the false-positive test
12316    /// so a future tightening of the floor can't silently regress the
12317    /// supported "similar-title" detection.
12318    #[test]
12319    fn find_contradictions_similar_titles_still_caught_1320() {
12320        let conn = test_db();
12321        insert(
12322            &conn,
12323            &make_memory("Database is PostgreSQL", "v1-p5-positive", Tier::Long, 8),
12324        )
12325        .unwrap();
12326        insert(
12327            &conn,
12328            &make_memory("Database is MySQL", "v1-p5-positive", Tier::Long, 5),
12329        )
12330        .unwrap();
12331        let hits = find_contradictions(&conn, "Database is PostgreSQL", "v1-p5-positive").unwrap();
12332        let titles: Vec<&str> = hits.iter().map(|m| m.title.as_str()).collect();
12333        assert!(
12334            titles.contains(&"Database is MySQL"),
12335            "similar-title detection regressed: {titles:?}",
12336        );
12337    }
12338
12339    #[test]
12340    fn contradiction_title_jaccard_floor_pinned_1320() {
12341        // Pin the compiled floor at 0.30 (the v0.7.0 #1320 calibration
12342        // landing). Lowering it re-introduces stopword noise; raising
12343        // it breaks the "Database is PostgreSQL / MySQL" near-duplicate
12344        // case (Jaccard 1/3 ≈ 0.333). Either direction needs an issue
12345        // ticket and a fresh calibration sweep.
12346        assert!(
12347            (CONTRADICTION_TITLE_JACCARD_FLOOR - 0.30).abs() < f32::EPSILON,
12348            "floor drifted: {CONTRADICTION_TITLE_JACCARD_FLOOR}",
12349        );
12350    }
12351
12352    #[test]
12353    fn contradiction_title_tokens_strips_stopwords_and_lowercases_1320() {
12354        let toks = contradiction_title_tokens("The Database Is PostgreSQL");
12355        assert!(toks.contains("database"));
12356        assert!(toks.contains("postgresql"));
12357        assert!(!toks.contains("the"));
12358        assert!(!toks.contains("is"));
12359    }
12360
12361    #[test]
12362    fn create_and_get_links() {
12363        let conn = test_db();
12364        let id1 = insert(&conn, &make_memory("Memory A", "test", Tier::Long, 5)).unwrap();
12365        let id2 = insert(&conn, &make_memory("Memory B", "test", Tier::Long, 5)).unwrap();
12366
12367        create_link(&conn, &id1, &id2, "related_to").unwrap();
12368        let links = get_links(&conn, &id1).unwrap();
12369        assert_eq!(links.len(), 1);
12370        assert_eq!(
12371            links[0].relation,
12372            crate::models::MemoryLinkRelation::RelatedTo
12373        );
12374    }
12375
12376    #[test]
12377    fn consolidate_merges_memories() {
12378        let conn = test_db();
12379        let id1 = insert(&conn, &make_memory("Part 1", "test", Tier::Mid, 5)).unwrap();
12380        let id2 = insert(&conn, &make_memory("Part 2", "test", Tier::Mid, 5)).unwrap();
12381
12382        let new_id = consolidate(
12383            &conn,
12384            &[id1.clone(), id2.clone()],
12385            "Combined",
12386            "Part 1 + Part 2",
12387            "test",
12388            &Tier::Long,
12389            "test",
12390            "test-consolidator",
12391        )
12392        .unwrap();
12393        // Original memories should be deleted
12394        assert!(get(&conn, &id1).unwrap().is_none());
12395        assert!(get(&conn, &id2).unwrap().is_none());
12396        // New memory should exist
12397        let combined = get(&conn, &new_id).unwrap().unwrap();
12398        assert_eq!(combined.title, "Combined");
12399        assert_eq!(combined.tier, Tier::Long);
12400    }
12401
12402    #[test]
12403    fn stats_counts() {
12404        let conn = test_db();
12405        let path = std::path::Path::new(":memory:");
12406        insert(&conn, &make_memory("A", "ns1", Tier::Long, 5)).unwrap();
12407        insert(&conn, &make_memory("B", "ns1", Tier::Mid, 5)).unwrap();
12408        insert(&conn, &make_memory("C", "ns2", Tier::Short, 5)).unwrap();
12409
12410        let s = stats(&conn, path).unwrap();
12411        assert_eq!(s.total, 3);
12412    }
12413
12414    #[test]
12415    fn gc_removes_expired() {
12416        let conn = test_db();
12417        let mut mem = make_memory("Expired", "test", Tier::Short, 5);
12418        mem.expires_at = Some("2020-01-01T00:00:00+00:00".to_string()); // past
12419        insert(&conn, &mem).unwrap();
12420
12421        let removed = gc(&conn, false).unwrap();
12422        assert_eq!(removed, 1);
12423    }
12424
12425    #[test]
12426    fn gc_preserves_long_term() {
12427        let conn = test_db();
12428        insert(&conn, &make_memory("Permanent", "test", Tier::Long, 5)).unwrap();
12429        let removed = gc(&conn, false).unwrap();
12430        assert_eq!(removed, 0);
12431    }
12432
12433    #[test]
12434    fn gc_archives_before_delete() {
12435        let conn = test_db();
12436        let mut mem = make_memory("Archivable", "test", Tier::Short, 5);
12437        mem.expires_at = Some("2020-01-01T00:00:00+00:00".to_string());
12438        insert(&conn, &mem).unwrap();
12439
12440        let removed = gc(&conn, true).unwrap();
12441        assert_eq!(removed, 1);
12442
12443        // Should be in archive
12444        let archived = list_archived(&conn, None, 10, 0).unwrap();
12445        assert_eq!(archived.len(), 1);
12446        assert_eq!(archived[0]["title"], "Archivable");
12447        assert_eq!(archived[0]["archive_reason"], "ttl_expired");
12448    }
12449
12450    #[test]
12451    fn restore_archived_memory() {
12452        // v0.6.3.1 P2 (G5) — restore preserves the original tier and
12453        // expires_at instead of resetting to long/permanent. Pre-v17 this
12454        // test asserted `is_none()` for expires_at — that was the bug
12455        // being fixed.
12456        let conn = test_db();
12457        let mut mem = make_memory("Restorable", "test", Tier::Short, 5);
12458        let original_expiry = "2020-01-01T00:00:00+00:00".to_string();
12459        mem.expires_at = Some(original_expiry.clone());
12460        let id = insert(&conn, &mem).unwrap();
12461
12462        gc(&conn, true).unwrap();
12463        assert!(get(&conn, &id).unwrap().is_none()); // gone from active
12464
12465        let restored = restore_archived(&conn, &id).unwrap();
12466        assert!(restored);
12467
12468        let got = get(&conn, &id).unwrap().unwrap();
12469        assert_eq!(got.title, "Restorable");
12470        assert_eq!(
12471            got.tier.as_str(),
12472            Tier::Short.as_str(),
12473            "G5: restore must preserve the original tier"
12474        );
12475        assert_eq!(
12476            got.expires_at,
12477            Some(original_expiry),
12478            "G5: restore must preserve the original expires_at"
12479        );
12480    }
12481
12482    #[test]
12483    fn purge_archive_removes_all() {
12484        let conn = test_db();
12485        let mut mem = make_memory("Purgeable", "test", Tier::Short, 5);
12486        mem.expires_at = Some("2020-01-01T00:00:00+00:00".to_string());
12487        insert(&conn, &mem).unwrap();
12488        gc(&conn, true).unwrap();
12489
12490        let purged = purge_archive(&conn, None).unwrap();
12491        assert_eq!(purged, 1);
12492        assert_eq!(list_archived(&conn, None, 10, 0).unwrap().len(), 0);
12493    }
12494
12495    #[test]
12496    fn purge_archive_rejects_negative_days() {
12497        let conn = test_db();
12498        let result = purge_archive(&conn, Some(-1));
12499        assert!(result.is_err());
12500        assert!(result.unwrap_err().to_string().contains("non-negative"));
12501    }
12502
12503    #[test]
12504    fn restore_rejects_active_id_collision() {
12505        let conn = test_db();
12506        let mut mem = make_memory("Collision Test", "test", Tier::Short, 5);
12507        mem.expires_at = Some("2020-01-01T00:00:00+00:00".to_string());
12508        let id = insert(&conn, &mem).unwrap();
12509
12510        // Archive it via GC
12511        gc(&conn, true).unwrap();
12512        assert!(get(&conn, &id).unwrap().is_none());
12513
12514        // Manually insert a memory with the SAME id but different title into active table
12515        conn.execute(
12516            "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, confidence, source, access_count, created_at, updated_at)
12517             VALUES (?1, 'long', 'test', 'Blocker Title', 'blocks restore', '[]', 5, 1.0, 'test', 0, datetime('now'), datetime('now'))",
12518            rusqlite::params![id],
12519        ).unwrap();
12520
12521        // Restore should fail because id exists in active table
12522        let result = restore_archived(&conn, &id);
12523        assert!(result.is_err());
12524        assert!(
12525            result
12526                .unwrap_err()
12527                .to_string()
12528                .contains("already exists in active table")
12529        );
12530    }
12531
12532    #[test]
12533    fn archive_stats_counts() {
12534        let conn = test_db();
12535        let mut m1 = make_memory("Stats A", "ns1", Tier::Short, 5);
12536        m1.expires_at = Some("2020-01-01T00:00:00+00:00".to_string());
12537        let mut m2 = make_memory("Stats B", "ns1", Tier::Short, 5);
12538        m2.expires_at = Some("2020-01-01T00:00:00+00:00".to_string());
12539        insert(&conn, &m1).unwrap();
12540        insert(&conn, &m2).unwrap();
12541        gc(&conn, true).unwrap();
12542
12543        let stats = archive_stats(&conn).unwrap();
12544        assert_eq!(stats["archived_total"], 2);
12545    }
12546
12547    #[test]
12548    fn archive_memory_moves_live_row_to_archive() {
12549        // S29 — explicit archive endpoint must move the row out of
12550        // `memories` and into `archived_memories` with the caller-supplied
12551        // reason. Unlike gc(archive=true), this is NOT gated on
12552        // `expires_at` — the caller is asking for it right now.
12553        let conn = test_db();
12554        let mem = make_memory("Archive me", "s29", Tier::Long, 5);
12555        let id = insert(&conn, &mem).unwrap();
12556
12557        let moved = archive_memory(&conn, &id, Some("explicit")).unwrap();
12558        assert!(moved, "live row must be archived on first call");
12559        assert!(
12560            get(&conn, &id).unwrap().is_none(),
12561            "row must be removed from active table"
12562        );
12563
12564        let archived = list_archived(&conn, None, 10, 0).unwrap();
12565        assert_eq!(archived.len(), 1);
12566        assert_eq!(archived[0]["id"], id);
12567        assert_eq!(archived[0]["archive_reason"], "explicit");
12568
12569        // Second call is a no-op — row is already out of `memories`.
12570        let second = archive_memory(&conn, &id, Some("explicit")).unwrap();
12571        assert!(
12572            !second,
12573            "second archive call must report no-op (no live row)"
12574        );
12575    }
12576
12577    #[test]
12578    fn archive_memory_missing_id_returns_false() {
12579        // Peers that never saw M1 must no-op, not error, on sync_push
12580        // archives fanout.
12581        let conn = test_db();
12582        let moved = archive_memory(&conn, "nonexistent-id", None).unwrap();
12583        assert!(!moved);
12584    }
12585
12586    #[test]
12587    fn archive_memory_default_reason_is_archive() {
12588        let conn = test_db();
12589        let mem = make_memory("Default reason", "s29", Tier::Long, 5);
12590        let id = insert(&conn, &mem).unwrap();
12591        assert!(archive_memory(&conn, &id, None).unwrap());
12592        let archived = list_archived(&conn, None, 10, 0).unwrap();
12593        assert_eq!(archived[0]["archive_reason"], "archive");
12594    }
12595
12596    #[test]
12597    fn export_all_and_links() {
12598        let conn = test_db();
12599        let id1 = insert(&conn, &make_memory("Export A", "test", Tier::Long, 5)).unwrap();
12600        let id2 = insert(&conn, &make_memory("Export B", "test", Tier::Long, 5)).unwrap();
12601        create_link(&conn, &id1, &id2, "supersedes").unwrap();
12602
12603        let mems = export_all(&conn).unwrap();
12604        assert_eq!(mems.len(), 2);
12605        let links = export_links(&conn).unwrap();
12606        assert_eq!(links.len(), 1);
12607    }
12608
12609    #[test]
12610    fn list_namespaces_counts() {
12611        let conn = test_db();
12612        insert(&conn, &make_memory("A", "alpha", Tier::Long, 5)).unwrap();
12613        insert(&conn, &make_memory("B", "alpha", Tier::Long, 5)).unwrap();
12614        insert(&conn, &make_memory("C", "beta", Tier::Long, 5)).unwrap();
12615
12616        let ns = list_namespaces(&conn).unwrap();
12617        assert_eq!(ns.len(), 2);
12618    }
12619
12620    #[test]
12621    fn taxonomy_flat_namespaces_only() {
12622        // No `/` anywhere — every namespace is a direct child of the root.
12623        let conn = test_db();
12624        insert(&conn, &make_memory("A", "alpha", Tier::Long, 5)).unwrap();
12625        insert(&conn, &make_memory("B", "alpha", Tier::Long, 5)).unwrap();
12626        insert(&conn, &make_memory("C", "beta", Tier::Long, 5)).unwrap();
12627
12628        let tax = get_taxonomy(&conn, None, 8, 1000).unwrap();
12629        assert_eq!(tax.total_count, 3);
12630        assert!(!tax.truncated);
12631        assert_eq!(tax.tree.namespace, "");
12632        assert_eq!(tax.tree.subtree_count, 3);
12633        assert_eq!(tax.tree.count, 0); // no memories at the synthetic root
12634        assert_eq!(tax.tree.children.len(), 2);
12635        let alpha = tax
12636            .tree
12637            .children
12638            .iter()
12639            .find(|c| c.name == "alpha")
12640            .unwrap();
12641        assert_eq!(alpha.count, 2);
12642        assert_eq!(alpha.subtree_count, 2);
12643        assert!(alpha.children.is_empty());
12644        let beta = tax.tree.children.iter().find(|c| c.name == "beta").unwrap();
12645        assert_eq!(beta.count, 1);
12646    }
12647
12648    #[test]
12649    fn taxonomy_hierarchical_tree() {
12650        // Mixed depths: tree must aggregate counts up the spine.
12651        let conn = test_db();
12652        insert(&conn, &make_memory("a", "alphaone", Tier::Long, 5)).unwrap();
12653        insert(&conn, &make_memory("b", "alphaone/eng", Tier::Long, 5)).unwrap();
12654        insert(
12655            &conn,
12656            &make_memory("c", "alphaone/eng/platform", Tier::Long, 5),
12657        )
12658        .unwrap();
12659        insert(
12660            &conn,
12661            &make_memory("d", "alphaone/eng/platform", Tier::Long, 5),
12662        )
12663        .unwrap();
12664        insert(&conn, &make_memory("e", "alphaone/sales", Tier::Long, 5)).unwrap();
12665
12666        let tax = get_taxonomy(&conn, None, 8, 1000).unwrap();
12667        assert_eq!(tax.total_count, 5);
12668        assert_eq!(tax.tree.subtree_count, 5);
12669        assert_eq!(tax.tree.children.len(), 1);
12670
12671        let alphaone = &tax.tree.children[0];
12672        assert_eq!(alphaone.name, "alphaone");
12673        assert_eq!(alphaone.namespace, "alphaone");
12674        assert_eq!(alphaone.count, 1); // memory "a" lives at exactly "alphaone"
12675        assert_eq!(alphaone.subtree_count, 5);
12676        assert_eq!(alphaone.children.len(), 2);
12677
12678        let eng = alphaone.children.iter().find(|c| c.name == "eng").unwrap();
12679        assert_eq!(eng.namespace, "alphaone/eng");
12680        assert_eq!(eng.count, 1);
12681        assert_eq!(eng.subtree_count, 3);
12682        let platform = &eng.children[0];
12683        assert_eq!(platform.name, "platform");
12684        assert_eq!(platform.namespace, "alphaone/eng/platform");
12685        assert_eq!(platform.count, 2);
12686        assert_eq!(platform.subtree_count, 2);
12687        assert!(platform.children.is_empty());
12688    }
12689
12690    #[test]
12691    fn taxonomy_prefix_scopes_subtree() {
12692        let conn = test_db();
12693        insert(&conn, &make_memory("a", "alphaone/eng", Tier::Long, 5)).unwrap();
12694        insert(
12695            &conn,
12696            &make_memory("b", "alphaone/eng/platform", Tier::Long, 5),
12697        )
12698        .unwrap();
12699        insert(&conn, &make_memory("c", "alphaone/sales", Tier::Long, 5)).unwrap();
12700        // Sibling that happens to share a string prefix — must NOT bleed in.
12701        insert(&conn, &make_memory("d", "alphaone-sibling", Tier::Long, 5)).unwrap();
12702        insert(&conn, &make_memory("e", "other", Tier::Long, 5)).unwrap();
12703
12704        let tax = get_taxonomy(&conn, Some("alphaone/eng"), 8, 1000).unwrap();
12705        assert_eq!(tax.total_count, 2);
12706        assert_eq!(tax.tree.namespace, "alphaone/eng");
12707        assert_eq!(tax.tree.name, "eng");
12708        assert_eq!(tax.tree.count, 1);
12709        assert_eq!(tax.tree.subtree_count, 2);
12710        assert_eq!(tax.tree.children.len(), 1);
12711        assert_eq!(tax.tree.children[0].name, "platform");
12712        assert_eq!(tax.tree.children[0].count, 1);
12713    }
12714
12715    /// #1531 L5 — `validate_namespace` permits the LIKE metacharacters
12716    /// `%` / `_` in segments (historical flexibility), so the taxonomy
12717    /// prefix walk must escape its descendant pattern. Pre-fix the
12718    /// unescaped `LIKE ?2 || '/%'` let prefix `a%` aggregate the `ax/...`
12719    /// subtree.
12720    #[test]
12721    fn taxonomy_prefix_like_metacharacters_do_not_widen_match_l5() {
12722        let conn = test_db();
12723        insert(&conn, &make_memory("a", "a%/child", Tier::Long, 5)).unwrap();
12724        insert(&conn, &make_memory("b", "ax/child", Tier::Long, 5)).unwrap();
12725        insert(&conn, &make_memory("c", "a_/child", Tier::Long, 5)).unwrap();
12726
12727        // Literal `a%` prefix must scope to the `a%` subtree only.
12728        let tax = get_taxonomy(&conn, Some("a%"), 8, 1000).unwrap();
12729        assert_eq!(
12730            tax.total_count, 1,
12731            "prefix 'a%' must not aggregate 'ax/...' or 'a_/...' subtrees"
12732        );
12733
12734        // Literal `a_` prefix likewise.
12735        let tax = get_taxonomy(&conn, Some("a_"), 8, 1000).unwrap();
12736        assert_eq!(
12737            tax.total_count, 1,
12738            "prefix 'a_' must not aggregate single-char-wildcard siblings"
12739        );
12740
12741        // Plain prefixes are unchanged.
12742        let tax = get_taxonomy(&conn, Some("ax"), 8, 1000).unwrap();
12743        assert_eq!(tax.total_count, 1);
12744    }
12745
12746    #[test]
12747    fn taxonomy_depth_clamps_but_preserves_subtree_counts() {
12748        let conn = test_db();
12749        insert(
12750            &conn,
12751            &make_memory("a", "alphaone/eng/platform/db", Tier::Long, 5),
12752        )
12753        .unwrap();
12754        insert(
12755            &conn,
12756            &make_memory("b", "alphaone/eng/platform/api", Tier::Long, 5),
12757        )
12758        .unwrap();
12759
12760        let tax = get_taxonomy(&conn, None, 2, 1000).unwrap();
12761        assert_eq!(tax.total_count, 2);
12762        let alphaone = &tax.tree.children[0];
12763        let eng = &alphaone.children[0];
12764        // Depth=2 below the empty prefix means we descend exactly two
12765        // levels (alphaone → eng); deeper segments are folded into
12766        // `eng.subtree_count` without rendering child nodes.
12767        assert!(eng.children.is_empty());
12768        assert_eq!(eng.subtree_count, 2);
12769        assert_eq!(eng.count, 0); // nothing at exactly "alphaone/eng"
12770    }
12771
12772    #[test]
12773    fn taxonomy_excludes_expired_memories() {
12774        // Mirror of `list_namespaces` semantics — expired rows must not
12775        // count toward either the tree or `total_count`.
12776        let conn = test_db();
12777        let mut alive = make_memory("alive", "alpha", Tier::Long, 5);
12778        let mut dead = make_memory("dead", "alpha", Tier::Short, 5);
12779        // Force the short-tier memory's expiry into the past.
12780        dead.expires_at = Some("2000-01-01T00:00:00Z".to_string());
12781        alive.expires_at = None;
12782        insert(&conn, &alive).unwrap();
12783        insert(&conn, &dead).unwrap();
12784
12785        let tax = get_taxonomy(&conn, None, 8, 1000).unwrap();
12786        assert_eq!(tax.total_count, 1);
12787        assert_eq!(tax.tree.children.len(), 1);
12788        assert_eq!(tax.tree.children[0].count, 1);
12789    }
12790
12791    #[test]
12792    fn taxonomy_truncates_at_limit_but_total_stays_honest() {
12793        let conn = test_db();
12794        for ns in ["aa", "bb", "cc", "dd", "ee"] {
12795            insert(&conn, &make_memory("m", ns, Tier::Long, 5)).unwrap();
12796        }
12797        let tax = get_taxonomy(&conn, None, 8, 2).unwrap();
12798        // Limit drops 3 namespaces from the walk; total_count must
12799        // still see all 5 memories so renderers can warn the user.
12800        assert_eq!(tax.total_count, 5);
12801        assert!(tax.truncated);
12802        assert_eq!(tax.tree.children.len(), 2);
12803    }
12804
12805    #[test]
12806    fn forget_by_namespace() {
12807        let conn = test_db();
12808        insert(&conn, &make_memory("A", "delete-me", Tier::Long, 5)).unwrap();
12809        insert(&conn, &make_memory("B", "delete-me", Tier::Long, 5)).unwrap();
12810        insert(&conn, &make_memory("C", "keep", Tier::Long, 5)).unwrap();
12811
12812        let deleted = forget(&conn, Some("delete-me"), None, None, false).unwrap();
12813        assert_eq!(deleted, 2);
12814        let remaining = list(&conn, None, None, 100, 0, None, None, None, None, None).unwrap();
12815        assert_eq!(remaining.len(), 1);
12816    }
12817
12818    #[test]
12819    fn set_and_get_embedding() {
12820        let conn = test_db();
12821        let mem = make_memory("Embed test", "test", Tier::Long, 5);
12822        let id = insert(&conn, &mem).unwrap();
12823
12824        let emb = vec![0.1f32, 0.2, 0.3, 0.4];
12825        set_embedding(&conn, &id, &emb).unwrap();
12826
12827        let got = get_embedding(&conn, &id).unwrap().unwrap();
12828        assert_eq!(got.len(), 4);
12829        assert!((got[0] - 0.1).abs() < 1e-6);
12830    }
12831
12832    // -- #1595 / #1598 — resilient-backfill + reembed storage helpers --
12833
12834    /// #1595 — the keyset fetch pages strictly past the cursor in `id`
12835    /// order, and rows that gain an embedding drop out of the scan.
12836    #[test]
12837    fn unembedded_batch_after_cursor_paginates_1595() {
12838        let conn = test_db();
12839        let mut ids: Vec<String> = (0..5)
12840            .map(|i| {
12841                insert(
12842                    &conn,
12843                    &make_memory(&format!("row-{i}"), "bf-1595", Tier::Long, 5),
12844                )
12845                .unwrap()
12846            })
12847            .collect();
12848        ids.sort();
12849
12850        let first = get_unembedded_ids_batch_after(&conn, None, 2).unwrap();
12851        assert_eq!(first.len(), 2);
12852        assert_eq!(first[0].0, ids[0], "scan starts at the smallest id");
12853        let cursor = first.last().unwrap().0.clone();
12854
12855        let rest = get_unembedded_ids_batch_after(&conn, Some(&cursor), 10).unwrap();
12856        assert_eq!(rest.len(), 3);
12857        assert!(
12858            rest.iter().all(|(id, _, _)| id.as_str() > cursor.as_str()),
12859            "every row must sort strictly after the cursor"
12860        );
12861
12862        // Embedded rows leave the unembedded predicate.
12863        set_embedding(&conn, &ids[0], &[0.1, 0.2]).unwrap();
12864        let after = get_unembedded_ids_batch_after(&conn, None, 10).unwrap();
12865        assert_eq!(after.len(), 4);
12866        assert!(after.iter().all(|(id, _, _)| id != &ids[0]));
12867    }
12868
12869    /// #1598 — the reembed full-corpus scan returns embedded AND
12870    /// unembedded rows, honors the namespace filter, and pages by
12871    /// cursor.
12872    #[test]
12873    fn memory_texts_batch_namespace_and_cursor_1598() {
12874        let conn = test_db();
12875        let mut ns_a_ids: Vec<String> = (0..3)
12876            .map(|i| {
12877                insert(
12878                    &conn,
12879                    &make_memory(&format!("a-{i}"), "reembed-a", Tier::Long, 5),
12880                )
12881                .unwrap()
12882            })
12883            .collect();
12884        ns_a_ids.sort();
12885        for i in 0..2 {
12886            insert(
12887                &conn,
12888                &make_memory(&format!("b-{i}"), "reembed-b", Tier::Long, 5),
12889            )
12890            .unwrap();
12891        }
12892        // An already-embedded row MUST still be scanned — reembed
12893        // replaces existing vectors, it is not a backfill.
12894        set_embedding(&conn, &ns_a_ids[0], &[0.5, 0.5]).unwrap();
12895
12896        let all = get_memory_texts_batch(&conn, None, None, 100).unwrap();
12897        assert_eq!(all.len(), 5, "unfiltered scan sees every live row");
12898
12899        let ns_a = get_memory_texts_batch(&conn, Some("reembed-a"), None, 100).unwrap();
12900        assert_eq!(ns_a.len(), 3);
12901        assert_eq!(ns_a[0].0, ns_a_ids[0], "embedded row still scanned");
12902
12903        let first = get_memory_texts_batch(&conn, Some("reembed-a"), None, 1).unwrap();
12904        let cursor = first[0].0.clone();
12905        let rest = get_memory_texts_batch(&conn, Some("reembed-a"), Some(&cursor), 100).unwrap();
12906        assert_eq!(rest.len(), 2);
12907        assert!(rest.iter().all(|(id, _, _)| id.as_str() > cursor.as_str()));
12908    }
12909
12910    /// #1598 — the reembed writer REPLACES vectors across a dim change
12911    /// that the checked writer (G4 invariant) refuses, and skips
12912    /// unknown ids like its checked sibling.
12913    #[test]
12914    fn set_embeddings_batch_reembed_bypasses_dim_invariant_1598() {
12915        let mut conn = test_db();
12916        let id1 = insert(&conn, &make_memory("dim-est", "reembed-dim", Tier::Long, 5)).unwrap();
12917        let id2 = insert(&conn, &make_memory("dim-mig", "reembed-dim", Tier::Long, 5)).unwrap();
12918        // Establish a 4-dim namespace.
12919        set_embedding(&conn, &id1, &[0.1, 0.2, 0.3, 0.4]).unwrap();
12920
12921        // The checked writer enforces the established dim…
12922        let refused =
12923            set_embeddings_batch(&mut conn, &[(id2.clone(), vec![0.1_f32; 8])]).unwrap_err();
12924        assert!(
12925            refused.downcast_ref::<EmbeddingDimMismatch>().is_some(),
12926            "checked writer must refuse the dim change: {refused}"
12927        );
12928
12929        // …the migration writer replaces every row to the new dim.
12930        let entries = vec![
12931            (id1.clone(), vec![0.9_f32; 8]),
12932            (id2.clone(), vec![0.8_f32; 8]),
12933        ];
12934        let written = set_embeddings_batch_reembed(&mut conn, &entries).unwrap();
12935        assert_eq!(written, 2);
12936        assert_eq!(get_embedding(&conn, &id1).unwrap().unwrap().len(), 8);
12937        assert_eq!(get_embedding(&conn, &id2).unwrap().unwrap().len(), 8);
12938        assert_eq!(
12939            namespace_embedding_dim(&conn, "reembed-dim").unwrap(),
12940            Some(8),
12941            "namespace converges to the target dim"
12942        );
12943
12944        // Unknown ids are skipped; empty input is a no-op.
12945        let n = set_embeddings_batch_reembed(
12946            &mut conn,
12947            &[("no-such-id".to_string(), vec![0.1_f32; 8])],
12948        )
12949        .unwrap();
12950        assert_eq!(n, 0);
12951        assert_eq!(set_embeddings_batch_reembed(&mut conn, &[]).unwrap(), 0);
12952    }
12953
12954    /// #1598 — dry-run coverage counts, with and without the namespace
12955    /// filter.
12956    #[test]
12957    fn embedding_coverage_counts_1598() {
12958        let conn = test_db();
12959        let id_a = insert(&conn, &make_memory("c-a", "cov-a", Tier::Long, 5)).unwrap();
12960        insert(&conn, &make_memory("c-b", "cov-a", Tier::Long, 5)).unwrap();
12961        insert(&conn, &make_memory("c-c", "cov-b", Tier::Long, 5)).unwrap();
12962        set_embedding(&conn, &id_a, &[0.1, 0.2]).unwrap();
12963
12964        assert_eq!(embedding_coverage(&conn, None).unwrap(), (3, 1));
12965        assert_eq!(embedding_coverage(&conn, Some("cov-a")).unwrap(), (2, 1));
12966        assert_eq!(embedding_coverage(&conn, Some("cov-b")).unwrap(), (1, 0));
12967        assert_eq!(embedding_coverage(&conn, Some("cov-none")).unwrap(), (0, 0));
12968    }
12969
12970    /// #1598 — the pre-flight dim survey lists every stored dim
12971    /// (sorted) and honors the namespace filter.
12972    #[test]
12973    fn distinct_embedding_dims_lists_mixed_1598() {
12974        let mut conn = test_db();
12975        let id_a = insert(&conn, &make_memory("d-a", "dims-a", Tier::Long, 5)).unwrap();
12976        let id_b = insert(&conn, &make_memory("d-b", "dims-b", Tier::Long, 5)).unwrap();
12977        let id_c = insert(&conn, &make_memory("d-c", "dims-b", Tier::Long, 5)).unwrap();
12978        set_embedding(&conn, &id_a, &[0.1, 0.2]).unwrap();
12979        set_embedding(&conn, &id_b, &[0.1; 8]).unwrap();
12980        // Mixed dims inside ONE namespace only arise mid-migration —
12981        // land them via the reembed writer.
12982        set_embeddings_batch_reembed(&mut conn, &[(id_c, vec![0.2_f32; 4])]).unwrap();
12983
12984        assert_eq!(distinct_embedding_dims(&conn, None).unwrap(), vec![2, 4, 8]);
12985        assert_eq!(
12986            distinct_embedding_dims(&conn, Some("dims-b")).unwrap(),
12987            vec![4, 8]
12988        );
12989        assert!(
12990            distinct_embedding_dims(&conn, Some("dims-none"))
12991                .unwrap()
12992                .is_empty()
12993        );
12994    }
12995
12996    // -- Pillar 2 / Stream D — memory_check_duplicate -------------------
12997
12998    fn insert_with_embedding(
12999        conn: &Connection,
13000        title: &str,
13001        ns: &str,
13002        embedding: &[f32],
13003    ) -> String {
13004        let mem = make_memory(title, ns, Tier::Long, 5);
13005        let id = insert(conn, &mem).unwrap();
13006        set_embedding(conn, &id, embedding).unwrap();
13007        id
13008    }
13009
13010    #[test]
13011    fn check_duplicate_empty_db_returns_no_match() {
13012        let conn = test_db();
13013        let q = vec![1.0_f32, 0.0, 0.0];
13014        let r = check_duplicate(&conn, &q, None, 0.85).unwrap();
13015        assert!(!r.is_duplicate);
13016        assert!(r.nearest.is_none());
13017        assert_eq!(r.candidates_scanned, 0);
13018    }
13019
13020    #[test]
13021    fn check_duplicate_finds_highest_cosine_match() {
13022        let conn = test_db();
13023        // a = [1,0,0]; b = [0,1,0]; c = [0.99,0.01,0]. Query = [1,0,0]
13024        // expects `c` (cos ~0.9999) > `a` (cos =1.0 actually).
13025        // Use distinct vectors: a=[1,0,0] cos 1.0, b=[0.7,0.7,0] cos 0.707,
13026        // c=[0,1,0] cos 0.0. Best should be `a`.
13027        let id_a = insert_with_embedding(&conn, "alpha", "ns", &[1.0, 0.0, 0.0]);
13028        let _id_b = insert_with_embedding(&conn, "beta", "ns", &[0.7, 0.7, 0.0]);
13029        let _id_c = insert_with_embedding(&conn, "gamma", "ns", &[0.0, 1.0, 0.0]);
13030
13031        let q = vec![1.0_f32, 0.0, 0.0];
13032        let r = check_duplicate(&conn, &q, None, 0.85).unwrap();
13033        let nearest = r.nearest.expect("expected a nearest match");
13034        assert_eq!(nearest.id, id_a);
13035        assert!(nearest.similarity > 0.99);
13036        assert_eq!(r.candidates_scanned, 3);
13037        assert!(r.is_duplicate);
13038        assert!((r.threshold - 0.85).abs() < 1e-6);
13039    }
13040
13041    #[test]
13042    fn check_duplicate_below_threshold_not_flagged_but_returns_nearest() {
13043        let conn = test_db();
13044        let id_b = insert_with_embedding(&conn, "beta", "ns", &[0.7, 0.7, 0.0]);
13045
13046        // Cosine([1,0,0], [0.7,0.7,0]) ~ 0.707 — below default 0.85.
13047        let q = vec![1.0_f32, 0.0, 0.0];
13048        let r = check_duplicate(&conn, &q, None, 0.85).unwrap();
13049        let nearest = r
13050            .nearest
13051            .expect("nearest must surface even when below threshold");
13052        assert_eq!(nearest.id, id_b);
13053        assert!(!r.is_duplicate);
13054    }
13055
13056    #[test]
13057    fn check_duplicate_threshold_clamped_to_floor() {
13058        let conn = test_db();
13059        // Caller passes a permissive 0.0; the response threshold must
13060        // be clamped to DUPLICATE_THRESHOLD_MIN so unrelated content
13061        // can't be dressed as a merge candidate.
13062        let _ = insert_with_embedding(&conn, "x", "ns", &[1.0, 0.0, 0.0]);
13063        let q = vec![0.0_f32, 1.0, 0.0]; // orthogonal — cosine 0.0
13064        let r = check_duplicate(&conn, &q, None, 0.0).unwrap();
13065        assert!((r.threshold - DUPLICATE_THRESHOLD_MIN).abs() < 1e-6);
13066        assert!(!r.is_duplicate);
13067    }
13068
13069    #[test]
13070    fn check_duplicate_namespace_filter_isolates_scan() {
13071        let conn = test_db();
13072        let _hit_in_other_ns = insert_with_embedding(&conn, "x", "other", &[1.0, 0.0, 0.0]);
13073        let id_target = insert_with_embedding(&conn, "y", "ns", &[0.6, 0.8, 0.0]);
13074
13075        let q = vec![1.0_f32, 0.0, 0.0];
13076        let r = check_duplicate(&conn, &q, Some("ns"), 0.85).unwrap();
13077        assert_eq!(r.candidates_scanned, 1);
13078        assert_eq!(r.nearest.expect("namespace filter ignored").id, id_target);
13079    }
13080
13081    #[test]
13082    fn check_duplicate_skips_expired_rows() {
13083        let conn = test_db();
13084        // Short-tier memory with a backdated `expires_at` is past the
13085        // live-row gate and must not be a candidate.
13086        let mut mem = make_memory("expired", "ns", Tier::Short, 5);
13087        mem.expires_at = Some((chrono::Utc::now() - chrono::Duration::seconds(60)).to_rfc3339());
13088        let id = insert(&conn, &mem).unwrap();
13089        set_embedding(&conn, &id, &[1.0, 0.0, 0.0]).unwrap();
13090
13091        let q = vec![1.0_f32, 0.0, 0.0];
13092        let r = check_duplicate(&conn, &q, None, 0.85).unwrap();
13093        assert_eq!(r.candidates_scanned, 0);
13094        assert!(r.nearest.is_none());
13095    }
13096
13097    #[test]
13098    fn check_duplicate_skips_unembedded_rows() {
13099        let conn = test_db();
13100        // One memory with an embedding, one without — only the embedded
13101        // row should appear in `candidates_scanned`.
13102        let id_embedded = insert_with_embedding(&conn, "with-emb", "ns", &[1.0, 0.0, 0.0]);
13103        let mem = make_memory("no-emb", "ns", Tier::Long, 5);
13104        let _ = insert(&conn, &mem).unwrap();
13105
13106        let q = vec![1.0_f32, 0.0, 0.0];
13107        let r = check_duplicate(&conn, &q, None, 0.85).unwrap();
13108        assert_eq!(r.candidates_scanned, 1);
13109        assert_eq!(r.nearest.expect("embedded match").id, id_embedded);
13110    }
13111
13112    #[test]
13113    fn check_duplicate_skips_blob_with_non_multiple_of_4_length() {
13114        // Regression: pre-fix, an embedding blob whose length was not
13115        // a multiple of 4 would silently drop a trailing partial chunk
13116        // via chunks_exact and compute cosine against a shorter
13117        // candidate vector — producing a misleading score. The bounds
13118        // check now skips the row entirely.
13119        let conn = test_db();
13120        let mem = make_memory("malformed-blob", "ns", Tier::Long, 5);
13121        let id = insert(&conn, &mem).unwrap();
13122        // Write a 7-byte blob (1 short of 8 = 2 f32s) directly to
13123        // sqlite, bypassing set_embedding which only takes &[f32].
13124        conn.execute(
13125            "UPDATE memories SET embedding = ?1 WHERE id = ?2",
13126            params![&[0u8; 7][..], &id],
13127        )
13128        .unwrap();
13129
13130        let q = vec![1.0_f32, 0.0];
13131        let r = check_duplicate(&conn, &q, None, 0.85).unwrap();
13132        assert_eq!(
13133            r.candidates_scanned, 0,
13134            "malformed blob must be skipped, not silently truncated"
13135        );
13136        assert!(r.nearest.is_none());
13137    }
13138
13139    #[test]
13140    fn check_duplicate_skips_blob_with_dimension_mismatch() {
13141        // Regression: a blob with a valid length (multiple of 4) but
13142        // wrong dimension vs the query embedding must NOT be scored;
13143        // cosine_similarity zips and would silently truncate to the
13144        // shorter input, producing a wrong similarity.
13145        let conn = test_db();
13146        // Insert a memory with a 3-dim embedding via the normal path.
13147        let _id = insert_with_embedding(&conn, "different-dim", "ns", &[1.0, 0.0, 0.0]);
13148
13149        // Query with a 4-dim embedding — different from the candidate.
13150        let q = vec![1.0_f32, 0.0, 0.0, 0.0];
13151        let r = check_duplicate(&conn, &q, None, 0.85).unwrap();
13152        assert_eq!(
13153            r.candidates_scanned, 0,
13154            "dimension-mismatched candidate must be skipped"
13155        );
13156        assert!(r.nearest.is_none());
13157    }
13158
13159    #[test]
13160    fn get_unembedded_returns_memoryless() {
13161        let conn = test_db();
13162        let mem = make_memory("No embed", "test", Tier::Long, 5);
13163        insert(&conn, &mem).unwrap();
13164
13165        let unembedded = get_unembedded_ids(&conn).unwrap();
13166        assert_eq!(unembedded.len(), 1);
13167    }
13168
13169    #[test]
13170    fn health_check_passes() {
13171        let conn = test_db();
13172        assert!(health_check(&conn).unwrap());
13173    }
13174
13175    #[test]
13176    fn sanitize_fts_strips_operators_and_quotes() {
13177        // FTS5 special chars: " * ^ { } ( ) : - | are stripped
13178        let sanitized = sanitize_fts_query("test* \"injection\" (drop)", true);
13179        assert!(!sanitized.contains('*'));
13180        assert!(!sanitized.contains('('));
13181        assert!(!sanitized.contains(')'));
13182        // Standalone boolean operators are removed
13183        let sanitized2 = sanitize_fts_query("hello AND world OR NOT NEAR test", true);
13184        assert!(sanitized2.contains("hello"));
13185        assert!(sanitized2.contains("world"));
13186        assert!(sanitized2.contains("test"));
13187        // Empty input returns placeholder
13188        let sanitized3 = sanitize_fts_query("", true);
13189        assert_eq!(sanitized3, "\"_empty_\"");
13190        // `+` prefix operator is stripped (prevents exclusion injection);
13191        // `-` is now preserved inside phrase-quoted tokens so hyphenated
13192        // content ("well-known", "foo-bar") searches correctly against
13193        // the unicode61 tokenizer. Phrase-quoting keeps `-` from reaching
13194        // FTS5 as a prefix operator, closing the injection hole.
13195        let sanitized4 = sanitize_fts_query("-secret +required", true);
13196        assert!(!sanitized4.contains('+'));
13197        assert!(sanitized4.contains("secret"));
13198        assert!(sanitized4.contains("required"));
13199        // Hyphenated tokens pass through as phrase searches.
13200        let sanitized5 = sanitize_fts_query("well-known", true);
13201        assert!(sanitized5.contains("well-known"));
13202    }
13203
13204    #[test]
13205    fn get_by_prefix_8char() {
13206        let conn = test_db();
13207        let mem = make_memory("Prefix test", "test", Tier::Long, 5);
13208        let id = insert(&conn, &mem).unwrap();
13209        let prefix = &id[..8];
13210        let got = get_by_prefix(&conn, prefix).unwrap().unwrap();
13211        assert_eq!(got.id, id);
13212        assert_eq!(got.title, "Prefix test");
13213    }
13214
13215    #[test]
13216    fn get_by_prefix_full_uuid() {
13217        let conn = test_db();
13218        let mem = make_memory("Full UUID prefix", "test", Tier::Long, 5);
13219        let id = insert(&conn, &mem).unwrap();
13220        // Full UUID used as prefix still works (LIKE 'full-uuid%' matches exact)
13221        let got = get_by_prefix(&conn, &id).unwrap().unwrap();
13222        assert_eq!(got.id, id);
13223    }
13224
13225    #[test]
13226    fn get_by_prefix_nonexistent() {
13227        let conn = test_db();
13228        let got = get_by_prefix(&conn, "ffffffff").unwrap();
13229        assert!(got.is_none());
13230    }
13231
13232    #[test]
13233    fn get_by_prefix_ambiguous() {
13234        let conn = test_db();
13235        // Insert two memories with IDs sharing a common prefix
13236        let mut mem1 = make_memory("Ambig A", "test", Tier::Long, 5);
13237        mem1.id = "aaaa1111-0000-0000-0000-000000000001".to_string();
13238        insert(&conn, &mem1).unwrap();
13239        let mut mem2 = make_memory("Ambig B", "test2", Tier::Long, 5);
13240        mem2.id = "aaaa2222-0000-0000-0000-000000000002".to_string();
13241        insert(&conn, &mem2).unwrap();
13242        let result = get_by_prefix(&conn, "aaaa");
13243        assert!(result.is_err());
13244        let err_msg = result.unwrap_err().to_string();
13245        assert!(err_msg.contains("ambiguous"));
13246        assert!(err_msg.contains("2 matches"));
13247        // Error should list the matching full IDs so the user can pick one
13248        assert!(
13249            err_msg.contains("aaaa1111-0000-0000-0000-000000000001"),
13250            "error should list matching IDs, got: {err_msg}"
13251        );
13252        assert!(err_msg.contains("aaaa2222-0000-0000-0000-000000000002"));
13253    }
13254
13255    #[test]
13256    fn resolve_id_exact_then_prefix() {
13257        let conn = test_db();
13258        let mem = make_memory("Resolve test", "test", Tier::Long, 5);
13259        let id = insert(&conn, &mem).unwrap();
13260        // Exact match
13261        let got = resolve_id(&conn, &id).unwrap().unwrap();
13262        assert_eq!(got.id, id);
13263        // Prefix match
13264        let got2 = resolve_id(&conn, &id[..8]).unwrap().unwrap();
13265        assert_eq!(got2.id, id);
13266        // Nonexistent
13267        let got3 = resolve_id(&conn, "zzzzzzzz").unwrap();
13268        assert!(got3.is_none());
13269    }
13270
13271    #[test]
13272    fn insert_if_newer_updates() {
13273        let conn = test_db();
13274        let mut mem = make_memory("Sync test", "test", Tier::Long, 5);
13275        let id = insert(&conn, &mem).unwrap();
13276
13277        mem.id = id.clone();
13278        mem.content = "Updated via sync".to_string();
13279        mem.updated_at = (chrono::Utc::now() + chrono::Duration::hours(1)).to_rfc3339();
13280        let result_id = insert_if_newer(&conn, &mem).unwrap();
13281        assert_eq!(result_id, id);
13282
13283        let got = get(&conn, &id).unwrap().unwrap();
13284        assert_eq!(got.content, "Updated via sync");
13285    }
13286
13287    // --- Metadata tests (Task 1.1) ---
13288
13289    #[test]
13290    fn metadata_default_empty_object() {
13291        let conn = test_db();
13292        let mem = make_memory("Default metadata", "test", Tier::Long, 5);
13293        let id = insert(&conn, &mem).unwrap();
13294        let got = get(&conn, &id).unwrap().unwrap();
13295        assert_eq!(got.metadata, serde_json::json!({}));
13296    }
13297
13298    #[test]
13299    fn metadata_store_and_retrieve() {
13300        let conn = test_db();
13301        let mut mem = make_memory("With metadata", "test", Tier::Long, 5);
13302        mem.metadata = serde_json::json!({"agent_id": "claude-1", "session": 42});
13303        let id = insert(&conn, &mem).unwrap();
13304        let got = get(&conn, &id).unwrap().unwrap();
13305        assert_eq!(got.metadata["agent_id"], "claude-1");
13306        assert_eq!(got.metadata["session"], 42);
13307    }
13308
13309    #[test]
13310    fn metadata_roundtrip_nested_json() {
13311        let conn = test_db();
13312        let mut mem = make_memory("Nested metadata", "test", Tier::Long, 5);
13313        mem.metadata = serde_json::json!({
13314            "agent": {"type": "ai:claude", "version": "4.6"},
13315            "tags_extra": ["experimental"],
13316            "score": 0.95
13317        });
13318        let id = insert(&conn, &mem).unwrap();
13319        let got = get(&conn, &id).unwrap().unwrap();
13320        assert_eq!(got.metadata["agent"]["type"], "ai:claude");
13321        assert_eq!(got.metadata["tags_extra"][0], "experimental");
13322        assert!((got.metadata["score"].as_f64().unwrap() - 0.95).abs() < f64::EPSILON);
13323    }
13324
13325    #[test]
13326    fn metadata_preserved_on_update() {
13327        let conn = test_db();
13328        let mut mem = make_memory("Update metadata", "test", Tier::Long, 5);
13329        mem.metadata = serde_json::json!({"key": "original"});
13330        let id = insert(&conn, &mem).unwrap();
13331
13332        // Update without metadata — should preserve existing
13333        let (found, _) = update(
13334            &conn,
13335            &id,
13336            None,
13337            Some("new content"),
13338            None,
13339            None,
13340            None,
13341            None,
13342            None,
13343            None,
13344            None,
13345        )
13346        .unwrap();
13347        assert!(found);
13348        let got = get(&conn, &id).unwrap().unwrap();
13349        assert_eq!(got.metadata["key"], "original");
13350        assert_eq!(got.content, "new content");
13351
13352        // Update with new metadata — should replace
13353        let new_meta = serde_json::json!({"key": "updated", "extra": true});
13354        let (found, _) = update(
13355            &conn,
13356            &id,
13357            None,
13358            None,
13359            None,
13360            None,
13361            None,
13362            None,
13363            None,
13364            None,
13365            Some(&new_meta),
13366        )
13367        .unwrap();
13368        assert!(found);
13369        let got = get(&conn, &id).unwrap().unwrap();
13370        assert_eq!(got.metadata["key"], "updated");
13371        assert_eq!(got.metadata["extra"], true);
13372    }
13373
13374    #[test]
13375    fn metadata_preserved_on_upsert() {
13376        let conn = test_db();
13377        let mut mem = make_memory("Upsert meta", "test", Tier::Long, 5);
13378        mem.metadata = serde_json::json!({"version": 1});
13379        insert(&conn, &mem).unwrap();
13380
13381        // Insert again with same title+namespace — upsert should update metadata
13382        let mut mem2 = make_memory("Upsert meta", "test", Tier::Long, 5);
13383        mem2.metadata = serde_json::json!({"version": 2});
13384        let id = insert(&conn, &mem2).unwrap();
13385        let got = get(&conn, &id).unwrap().unwrap();
13386        assert_eq!(got.metadata["version"], 2);
13387    }
13388
13389    #[test]
13390    fn metadata_in_list_and_search() {
13391        let conn = test_db();
13392        let mut mem = make_memory("Searchable metadata", "test", Tier::Long, 8);
13393        mem.metadata = serde_json::json!({"source_model": "opus"});
13394        insert(&conn, &mem).unwrap();
13395
13396        let results = list(
13397            &conn,
13398            Some("test"),
13399            None,
13400            10,
13401            0,
13402            None,
13403            None,
13404            None,
13405            None,
13406            None,
13407        )
13408        .unwrap();
13409        assert_eq!(results.len(), 1);
13410        assert_eq!(results[0].metadata["source_model"], "opus");
13411
13412        let results = search(
13413            &conn,
13414            "Searchable",
13415            Some("test"),
13416            None,
13417            10,
13418            None,
13419            None,
13420            None,
13421            None,
13422            None,
13423            None,
13424            false,
13425        )
13426        .unwrap();
13427        assert_eq!(results.len(), 1);
13428        assert_eq!(results[0].metadata["source_model"], "opus");
13429    }
13430
13431    #[test]
13432    fn metadata_in_recall() {
13433        let conn = test_db();
13434        let mut mem = make_memory("Recallable metadata", "test", Tier::Long, 8);
13435        mem.metadata = serde_json::json!({"context": "test-recall"});
13436        insert(&conn, &mem).unwrap();
13437
13438        let (results, _tokens) = recall(
13439            &conn,
13440            "Recallable",
13441            Some("test"),
13442            10,
13443            None,
13444            None,
13445            None,
13446            crate::SECS_PER_HOUR,
13447            crate::SECS_PER_DAY,
13448            None,
13449            None,
13450            false,
13451            None,
13452        )
13453        .unwrap();
13454        assert!(!results.is_empty());
13455        assert_eq!(results[0].0.metadata["context"], "test-recall");
13456    }
13457
13458    #[test]
13459    fn metadata_in_export_import() {
13460        let conn = test_db();
13461        let mut mem = make_memory("Export metadata", "test", Tier::Long, 5);
13462        mem.metadata = serde_json::json!({"exported": true});
13463        insert(&conn, &mem).unwrap();
13464
13465        let exported = export_all(&conn).unwrap();
13466        assert_eq!(exported.len(), 1);
13467        assert_eq!(exported[0].metadata["exported"], true);
13468
13469        // Import into fresh DB
13470        let conn2 = test_db();
13471        insert(&conn2, &exported[0]).unwrap();
13472        let got = get(&conn2, &exported[0].id).unwrap().unwrap();
13473        assert_eq!(got.metadata["exported"], true);
13474    }
13475
13476    #[test]
13477    fn metadata_schema_migration() {
13478        // Simulate a pre-v7 database (no metadata column) by creating one
13479        // and checking that migration adds the column with correct default
13480        let conn = test_db();
13481        let mem = make_memory("Migration test", "test", Tier::Long, 5);
13482        let id = insert(&conn, &mem).unwrap();
13483
13484        // Verify the column exists and has the default value
13485        let metadata_str: String = conn
13486            .query_row(
13487                "SELECT metadata FROM memories WHERE id = ?1",
13488                params![id],
13489                |r| r.get(0),
13490            )
13491            .unwrap();
13492        assert_eq!(metadata_str, "{}");
13493    }
13494
13495    #[test]
13496    fn metadata_survives_archive_restore_cycle() {
13497        let conn = test_db();
13498        let mut mem = make_memory("Archivable", "test", Tier::Short, 5);
13499        mem.metadata = serde_json::json!({"origin": "archive-test"});
13500        // Set expiry in the past so GC will archive it
13501        mem.expires_at = Some("2020-01-01T00:00:00+00:00".to_string());
13502        let id = insert(&conn, &mem).unwrap();
13503
13504        // Run GC with archive=true — should archive the expired memory
13505        let deleted = gc(&conn, true).unwrap();
13506        assert_eq!(deleted, 1);
13507
13508        // Verify metadata is in the archive
13509        let archived = list_archived(&conn, None, 10, 0).unwrap();
13510        assert_eq!(archived.len(), 1);
13511        assert_eq!(archived[0]["metadata"]["origin"], "archive-test");
13512
13513        // Restore and verify metadata survives the round-trip
13514        let restored = restore_archived(&conn, &id).unwrap();
13515        assert!(restored);
13516        let got = get(&conn, &id).unwrap().unwrap();
13517        assert_eq!(got.metadata["origin"], "archive-test");
13518    }
13519
13520    #[test]
13521    fn metadata_in_insert_if_newer() {
13522        let conn = test_db();
13523        let mut mem = make_memory("Sync metadata", "test", Tier::Long, 5);
13524        mem.metadata = serde_json::json!({"version": 1});
13525        let id = insert(&conn, &mem).unwrap();
13526
13527        // Insert newer version with different metadata
13528        mem.id = id.clone();
13529        mem.metadata = serde_json::json!({"version": 2, "synced": true});
13530        mem.updated_at = (chrono::Utc::now() + chrono::Duration::hours(1)).to_rfc3339();
13531        insert_if_newer(&conn, &mem).unwrap();
13532
13533        let got = get(&conn, &id).unwrap().unwrap();
13534        assert_eq!(got.metadata["version"], 2);
13535        assert_eq!(got.metadata["synced"], true);
13536
13537        // Insert older version — metadata should NOT be overwritten
13538        mem.metadata = serde_json::json!({"version": 0, "stale": true});
13539        mem.updated_at = "2020-01-01T00:00:00+00:00".to_string();
13540        insert_if_newer(&conn, &mem).unwrap();
13541
13542        let got = get(&conn, &id).unwrap().unwrap();
13543        assert_eq!(got.metadata["version"], 2); // still the newer one
13544        assert!(got.metadata.get("stale").is_none());
13545    }
13546
13547    #[test]
13548    fn metadata_merged_in_consolidate() {
13549        let conn = test_db();
13550        let mut mem_a = make_memory("Consolidate A", "test", Tier::Long, 5);
13551        mem_a.metadata = serde_json::json!({"agent": "claude", "shared": "from_a"});
13552        let id_a = insert(&conn, &mem_a).unwrap();
13553
13554        let mut mem_b = make_memory("Consolidate B", "test", Tier::Long, 7);
13555        mem_b.metadata = serde_json::json!({"model": "opus", "shared": "from_b"});
13556        let id_b = insert(&conn, &mem_b).unwrap();
13557
13558        let new_id = consolidate(
13559            &conn,
13560            &[id_a, id_b],
13561            "Merged",
13562            "Combined content",
13563            "test",
13564            &Tier::Long,
13565            "consolidation",
13566            "test-consolidator",
13567        )
13568        .unwrap();
13569
13570        let got = get(&conn, &new_id).unwrap().unwrap();
13571        // Both keys present; "shared" key takes value from later source (mem_b)
13572        assert_eq!(got.metadata["agent"], "claude");
13573        assert_eq!(got.metadata["model"], "opus");
13574        assert_eq!(got.metadata["shared"], "from_b");
13575    }
13576
13577    #[test]
13578    fn metadata_consolidate_rejects_oversized_merge() {
13579        let conn = test_db();
13580        // Create two memories with large unique-key metadata that together exceed 64KB
13581        let mut mem_a = make_memory("Big meta A", "test", Tier::Long, 5);
13582        let big_val_a: serde_json::Map<String, serde_json::Value> = (0..500)
13583            .map(|i| {
13584                (
13585                    format!("key_a_{i}"),
13586                    serde_json::Value::String("x".repeat(60)),
13587                )
13588            })
13589            .collect();
13590        mem_a.metadata = serde_json::Value::Object(big_val_a);
13591        let id_a = insert(&conn, &mem_a).unwrap();
13592
13593        let mut mem_b = make_memory("Big meta B", "test", Tier::Long, 5);
13594        let big_val_b: serde_json::Map<String, serde_json::Value> = (0..500)
13595            .map(|i| {
13596                (
13597                    format!("key_b_{i}"),
13598                    serde_json::Value::String("x".repeat(60)),
13599                )
13600            })
13601            .collect();
13602        mem_b.metadata = serde_json::Value::Object(big_val_b);
13603        let id_b = insert(&conn, &mem_b).unwrap();
13604
13605        // Consolidate should fail because merged metadata exceeds 64KB
13606        let result = consolidate(
13607            &conn,
13608            &[id_a, id_b],
13609            "Oversized merge",
13610            "Should fail",
13611            "test",
13612            &Tier::Long,
13613            "consolidation",
13614            "test-consolidator",
13615        );
13616        let err = result.expect_err("consolidate should fail for oversized merged metadata");
13617        let msg = err.to_string();
13618        assert!(
13619            msg.contains("merged metadata exceeds size limit"),
13620            "expected metadata size error, got: {msg}"
13621        );
13622    }
13623
13624    #[test]
13625    fn metadata_special_characters_roundtrip() {
13626        let conn = test_db();
13627        let mut mem = make_memory("Special chars metadata", "test", Tier::Long, 5);
13628        mem.metadata = serde_json::json!({
13629            "pipe": "a|b|c",
13630            "newline": "line1\nline2",
13631            "tab": "col1\tcol2",
13632            "backslash": "path\\to\\file",
13633            "unicode": "\u{1F600}\u{1F4A9}",
13634            "cjk": "\u{4e16}\u{754c}",
13635            "empty": "",
13636            "nested_special": {"inner|key": "val\nue"}
13637        });
13638        let id = insert(&conn, &mem).unwrap();
13639        let got = get(&conn, &id).unwrap().unwrap();
13640        assert_eq!(got.metadata["pipe"], "a|b|c");
13641        assert_eq!(got.metadata["newline"], "line1\nline2");
13642        assert_eq!(got.metadata["unicode"], "\u{1F600}\u{1F4A9}");
13643        assert_eq!(got.metadata["cjk"], "\u{4e16}\u{754c}");
13644        assert_eq!(got.metadata["nested_special"]["inner|key"], "val\nue");
13645    }
13646
13647    #[test]
13648    fn metadata_corrupt_column_falls_back_to_empty() {
13649        let conn = test_db();
13650        let mem = make_memory("Corrupt test", "test", Tier::Long, 5);
13651        let id = insert(&conn, &mem).unwrap();
13652
13653        // Manually corrupt the metadata column
13654        conn.execute(
13655            "UPDATE memories SET metadata = 'NOT VALID JSON {{{{' WHERE id = ?1",
13656            params![id],
13657        )
13658        .unwrap();
13659
13660        // row_to_memory should fall back to {} without panicking
13661        let got = get(&conn, &id).unwrap().unwrap();
13662        assert_eq!(got.metadata, serde_json::json!({}));
13663    }
13664
13665    #[test]
13666    fn metadata_restore_resets_corrupt_archived_metadata() {
13667        let conn = test_db();
13668        let mut mem = make_memory("Corrupt archive", "test", Tier::Short, 5);
13669        mem.metadata = serde_json::json!({"valid": true});
13670        mem.expires_at = Some("2020-01-01T00:00:00+00:00".to_string());
13671        let id = insert(&conn, &mem).unwrap();
13672
13673        // Archive via GC
13674        gc(&conn, true).unwrap();
13675
13676        // Corrupt the archived metadata directly
13677        conn.execute(
13678            "UPDATE archived_memories SET metadata = 'CORRUPT JSON' WHERE id = ?1",
13679            params![id],
13680        )
13681        .unwrap();
13682
13683        // Restore — should reset metadata to {} instead of failing
13684        let restored = restore_archived(&conn, &id).unwrap();
13685        assert!(restored);
13686        let got = get(&conn, &id).unwrap().unwrap();
13687        assert_eq!(got.metadata, serde_json::json!({}));
13688    }
13689
13690    #[test]
13691    fn scope_index_exists_after_migration() {
13692        // v0.6.0 GA (schema v10) — the `scope_idx` generated column and its
13693        // B-tree index must exist after `open()` runs migration.
13694        let conn = test_db();
13695        let has_col: bool = conn
13696            .prepare("SELECT scope_idx FROM memories LIMIT 0")
13697            .is_ok();
13698        assert!(has_col, "scope_idx generated column missing");
13699        let idx_exists: i64 = conn
13700            .query_row(
13701                "SELECT COUNT(*) FROM sqlite_master WHERE type='index' AND name='idx_memories_scope_idx'",
13702                [],
13703                |row| row.get(0),
13704            )
13705            .unwrap();
13706        assert_eq!(idx_exists, 1, "idx_memories_scope_idx missing");
13707    }
13708
13709    #[test]
13710    fn scope_index_used_for_direct_scope_filter() {
13711        // v0.6.0 GA — confirm `idx_memories_scope_idx` is picked for a
13712        // direct `WHERE scope_idx = ?` predicate. This is the shape the
13713        // query planner sees for `scope = 'collective'` fast-paths and
13714        // the branch-local predicate inside `visibility_clause`.
13715        //
13716        // We deliberately do NOT assert the index is used for the full
13717        // visibility_clause OR-chain — SQLite's planner may (correctly)
13718        // choose a scan when the OR-chain has variable selectivity across
13719        // branches. The point of the index is to accelerate the common
13720        // case when a recall narrows to one scope; the multi-branch
13721        // visibility clause still benefits because each branch evaluates
13722        // the predicate against a single column rather than a JSON extract.
13723        let conn = test_db();
13724        // Seed enough rows + ANALYZE so planner cost model is honest.
13725        for i in 0..200 {
13726            let scope = if i % 3 == 0 { "collective" } else { "private" };
13727            let mut mem = make_memory(&format!("row-{i}"), "test", Tier::Long, 5);
13728            mem.metadata = serde_json::json!({"scope": scope});
13729            insert(&conn, &mem).unwrap();
13730        }
13731        conn.execute("ANALYZE", []).unwrap();
13732        let plan: Vec<String> = conn
13733            .prepare("EXPLAIN QUERY PLAN SELECT id FROM memories WHERE scope_idx = ?1")
13734            .unwrap()
13735            .query_map(params!["collective"], |row| row.get::<_, String>(3))
13736            .unwrap()
13737            .collect::<rusqlite::Result<_>>()
13738            .unwrap();
13739        let joined = plan.join("\n");
13740        assert!(
13741            joined.contains("idx_memories_scope_idx"),
13742            "direct scope filter must use idx_memories_scope_idx; got:\n{joined}"
13743        );
13744    }
13745
13746    #[test]
13747    fn scope_idx_reflects_metadata_on_insert_and_update() {
13748        // v0.6.0 GA — the VIRTUAL generated column must track metadata.scope
13749        // across insert and update without manual maintenance.
13750        let conn = test_db();
13751        let mut mem = make_memory("scope-tracking", "test", Tier::Long, 5);
13752        mem.metadata = serde_json::json!({"scope": "team"});
13753        let id = insert(&conn, &mem).unwrap();
13754        let scope: String = conn
13755            .query_row(
13756                "SELECT scope_idx FROM memories WHERE id = ?1",
13757                params![id],
13758                |r| r.get(0),
13759            )
13760            .unwrap();
13761        assert_eq!(scope, "team");
13762
13763        // Flip scope to unit via metadata update — generated column updates.
13764        let new_meta = serde_json::json!({"scope": "unit"});
13765        update(
13766            &conn,
13767            &id,
13768            None,
13769            None,
13770            None,
13771            None,
13772            None,
13773            None,
13774            None,
13775            None,
13776            Some(&new_meta),
13777        )
13778        .unwrap();
13779        let scope2: String = conn
13780            .query_row(
13781                "SELECT scope_idx FROM memories WHERE id = ?1",
13782                params![id],
13783                |r| r.get(0),
13784            )
13785            .unwrap();
13786        assert_eq!(scope2, "unit");
13787
13788        // Memory with no scope key — virtual column returns the default.
13789        let mut bare = make_memory("no-scope-key", "test", Tier::Long, 5);
13790        bare.metadata = serde_json::json!({});
13791        let id2 = insert(&conn, &bare).unwrap();
13792        let scope3: String = conn
13793            .query_row(
13794                "SELECT scope_idx FROM memories WHERE id = ?1",
13795                params![id2],
13796                |r| r.get(0),
13797            )
13798            .unwrap();
13799        assert_eq!(scope3, "private");
13800    }
13801
13802    #[test]
13803    fn auto_purge_archive_respects_max_days() {
13804        let conn = test_db();
13805        let mut mem = make_memory("Purge test", "test", Tier::Short, 5);
13806        mem.expires_at = Some("2020-01-01T00:00:00+00:00".to_string());
13807        insert(&conn, &mem).unwrap();
13808        gc(&conn, true).unwrap();
13809
13810        // Archive exists
13811        let archived = list_archived(&conn, None, 10, 0).unwrap();
13812        assert_eq!(archived.len(), 1);
13813
13814        // Backdate archived_at to 30 days ago so purge can detect it
13815        conn.execute(
13816            "UPDATE archived_memories SET archived_at = ?1",
13817            params![(chrono::Utc::now() - chrono::Duration::days(30)).to_rfc3339()],
13818        )
13819        .unwrap();
13820
13821        // Purge with None (disabled) — no-op
13822        let purged = auto_purge_archive(&conn, None).unwrap();
13823        assert_eq!(purged, 0);
13824        assert_eq!(list_archived(&conn, None, 10, 0).unwrap().len(), 1);
13825
13826        // Purge with 0 days — should NOT purge (guard condition)
13827        let purged = auto_purge_archive(&conn, Some(0)).unwrap();
13828        assert_eq!(purged, 0);
13829
13830        // Purge with 90 days — archive is only 30 days old, should NOT purge
13831        let purged = auto_purge_archive(&conn, Some(90)).unwrap();
13832        assert_eq!(purged, 0);
13833
13834        // Purge with 7 days — archive is 30 days old, should be purged
13835        let purged = auto_purge_archive(&conn, Some(7)).unwrap();
13836        assert_eq!(purged, 1);
13837        assert!(list_archived(&conn, None, 10, 0).unwrap().is_empty());
13838    }
13839
13840    // ─────────────────────────────────────────────────────────────────
13841    // Schema v15 (v0.6.3 Stream B) — temporal-validity KG migration.
13842    // ─────────────────────────────────────────────────────────────────
13843
13844    fn column_exists(conn: &Connection, table: &str, column: &str) -> bool {
13845        let mut stmt = conn
13846            .prepare(&format!("PRAGMA table_info({table})"))
13847            .unwrap();
13848        let cols: Vec<String> = stmt
13849            .query_map([], |row| row.get::<_, String>(1))
13850            .unwrap()
13851            .filter_map(Result::ok)
13852            .collect();
13853        cols.iter().any(|c| c == column)
13854    }
13855
13856    fn index_exists(conn: &Connection, name: &str) -> bool {
13857        conn.query_row(
13858            "SELECT 1 FROM sqlite_master WHERE type='index' AND name=?1",
13859            params![name],
13860            |r| r.get::<_, i64>(0),
13861        )
13862        .is_ok()
13863    }
13864
13865    #[test]
13866    fn schema_v15_memory_links_has_temporal_columns() {
13867        let conn = test_db();
13868        assert!(column_exists(&conn, "memory_links", "valid_from"));
13869        assert!(column_exists(&conn, "memory_links", "valid_until"));
13870        assert!(column_exists(&conn, "memory_links", "observed_by"));
13871        assert!(column_exists(&conn, "memory_links", "signature"));
13872    }
13873
13874    #[test]
13875    fn schema_v15_memory_links_temporal_indexes_exist() {
13876        let conn = test_db();
13877        assert!(index_exists(&conn, "idx_links_temporal_src"));
13878        assert!(index_exists(&conn, "idx_links_temporal_tgt"));
13879        assert!(index_exists(&conn, "idx_links_relation"));
13880    }
13881
13882    #[test]
13883    fn schema_v15_entity_aliases_table_exists() {
13884        let conn = test_db();
13885        let count: i64 = conn
13886            .query_row("SELECT COUNT(*) FROM entity_aliases", [], |r| r.get(0))
13887            .unwrap();
13888        assert_eq!(count, 0);
13889        assert!(index_exists(&conn, "idx_entity_aliases_alias"));
13890    }
13891
13892    #[test]
13893    fn schema_v15_entity_aliases_primary_key_unique() {
13894        let conn = test_db();
13895        let now = chrono::Utc::now().to_rfc3339();
13896        conn.execute(
13897            "INSERT INTO entity_aliases (entity_id, alias, created_at) VALUES (?1, ?2, ?3)",
13898            params!["e1", "Alpha", &now],
13899        )
13900        .unwrap();
13901        let dup = conn.execute(
13902            "INSERT INTO entity_aliases (entity_id, alias, created_at) VALUES (?1, ?2, ?3)",
13903            params!["e1", "Alpha", &now],
13904        );
13905        assert!(dup.is_err(), "expected PK uniqueness violation");
13906    }
13907
13908    // -- Pillar 2 / Stream B — entity_register / entity_get_by_alias ------
13909
13910    #[test]
13911    fn entity_register_creates_new_entity_with_aliases() {
13912        let conn = test_db();
13913        let aliases = vec!["pa".to_string(), "Project A".to_string()];
13914        let reg = entity_register(
13915            &conn,
13916            "Project Alpha",
13917            "projects/alpha",
13918            &aliases,
13919            &serde_json::json!({}),
13920            Some("test-agent"),
13921        )
13922        .unwrap();
13923        assert!(reg.created, "first registration must be created=true");
13924        assert_eq!(reg.canonical_name, "Project Alpha");
13925        assert_eq!(reg.namespace, "projects/alpha");
13926        // Aliases inserted in one call share a created_at; the
13927        // secondary `alias ASC` sort orders by ASCII codepoint, so
13928        // uppercase 'P' (80) < lowercase 'p' (112). canonical_name is
13929        // auto-inserted as an alias so entity_get_by_alias resolves it.
13930        assert_eq!(
13931            reg.aliases,
13932            vec![
13933                "Project A".to_string(),
13934                "Project Alpha".to_string(),
13935                "pa".to_string()
13936            ]
13937        );
13938
13939        let m = get(&conn, &reg.entity_id).unwrap().unwrap();
13940        assert_eq!(m.title, "Project Alpha");
13941        assert_eq!(m.tier.rank(), Tier::Long.rank());
13942        assert!(m.tags.contains(&"entity".to_string()));
13943        assert_eq!(m.metadata["kind"], "entity");
13944        assert_eq!(m.metadata["agent_id"], "test-agent");
13945    }
13946
13947    #[test]
13948    fn entity_register_reuses_existing_and_merges_aliases() {
13949        let conn = test_db();
13950        let first = entity_register(
13951            &conn,
13952            "Project Alpha",
13953            "projects/alpha",
13954            &["pa".to_string()],
13955            &serde_json::json!({}),
13956            Some("a1"),
13957        )
13958        .unwrap();
13959        let second = entity_register(
13960            &conn,
13961            "Project Alpha",
13962            "projects/alpha",
13963            &["pa".to_string(), "alpha".to_string()],
13964            &serde_json::json!({}),
13965            Some("a2"),
13966        )
13967        .unwrap();
13968        assert!(first.created);
13969        assert!(!second.created, "second call must reuse the entity");
13970        assert_eq!(first.entity_id, second.entity_id);
13971        // First call inserted ["Project Alpha", "pa"] at ts1; second
13972        // call inserted "alpha" at ts2 (ts1 < ts2). Sort is created_at
13973        // ASC, alias ASC.
13974        assert_eq!(
13975            second.aliases,
13976            vec![
13977                "Project Alpha".to_string(),
13978                "pa".to_string(),
13979                "alpha".to_string()
13980            ]
13981        );
13982    }
13983
13984    #[test]
13985    fn entity_register_errors_on_collision_with_non_entity_memory() {
13986        let conn = test_db();
13987        let mem = make_memory("Conflict", "projects/alpha", Tier::Long, 5);
13988        insert(&conn, &mem).unwrap();
13989        let err = entity_register(
13990            &conn,
13991            "Conflict",
13992            "projects/alpha",
13993            &[],
13994            &serde_json::json!({}),
13995            None,
13996        )
13997        .unwrap_err();
13998        let msg = format!("{err}");
13999        assert!(
14000            msg.contains("non-entity memory"),
14001            "expected collision error, got: {msg}"
14002        );
14003    }
14004
14005    #[test]
14006    fn entity_register_skips_blank_aliases() {
14007        let conn = test_db();
14008        let reg = entity_register(
14009            &conn,
14010            "Trim Test",
14011            "test",
14012            &[String::new(), "   ".to_string(), "ok".to_string()],
14013            &serde_json::json!({}),
14014            None,
14015        )
14016        .unwrap();
14017        // canonical_name "Trim Test" auto-included; "T" (84) < "o" (111).
14018        assert_eq!(reg.aliases, vec!["Trim Test".to_string(), "ok".to_string()]);
14019    }
14020
14021    #[test]
14022    fn entity_register_preserves_caller_metadata_keys() {
14023        let conn = test_db();
14024        let extra = serde_json::json!({"team": "platform", "kind": "ignored"});
14025        let reg = entity_register(&conn, "Service X", "svc", &[], &extra, None).unwrap();
14026        let m = get(&conn, &reg.entity_id).unwrap().unwrap();
14027        assert_eq!(m.metadata["team"], "platform");
14028        // Caller's `kind` is overwritten — entity records must always
14029        // carry kind=entity for the resolver to find them.
14030        assert_eq!(m.metadata["kind"], "entity");
14031    }
14032
14033    #[test]
14034    fn entity_get_by_alias_returns_record_with_full_alias_set() {
14035        let conn = test_db();
14036        let reg = entity_register(
14037            &conn,
14038            "Project Alpha",
14039            "projects/alpha",
14040            &["pa".to_string(), "alpha".to_string()],
14041            &serde_json::json!({}),
14042            None,
14043        )
14044        .unwrap();
14045        let got = entity_get_by_alias(&conn, "pa", None).unwrap().unwrap();
14046        assert_eq!(got.entity_id, reg.entity_id);
14047        assert_eq!(got.canonical_name, "Project Alpha");
14048        assert_eq!(got.namespace, "projects/alpha");
14049        // Same-batch aliases share a created_at; alphabetical
14050        // tiebreak orders by ASCII codepoint: "Project Alpha" (P=80)
14051        // < "alpha" (a=97) < "pa" (p=112). canonical_name auto-included.
14052        assert_eq!(
14053            got.aliases,
14054            vec![
14055                "Project Alpha".to_string(),
14056                "alpha".to_string(),
14057                "pa".to_string()
14058            ]
14059        );
14060    }
14061
14062    #[test]
14063    fn entity_register_canonical_name_resolves_via_get_by_alias() {
14064        // Regression test for NHI-P3-T2 (v0.7.0 NHI test playbook):
14065        // registering an entity with no aliases must still leave it
14066        // reachable via entity_get_by_alias("<canonical_name>") so the
14067        // alias-resolution pathway isn't dead-on-arrival when the
14068        // caller only knows the canonical name.
14069        let conn = test_db();
14070        let reg = entity_register(
14071            &conn,
14072            "OnlyCanonical",
14073            "test",
14074            &[],
14075            &serde_json::json!({}),
14076            None,
14077        )
14078        .unwrap();
14079        assert!(reg.created);
14080        assert_eq!(
14081            reg.aliases,
14082            vec!["OnlyCanonical".to_string()],
14083            "canonical_name must be auto-inserted as an alias"
14084        );
14085        let got = entity_get_by_alias(&conn, "OnlyCanonical", Some("test"))
14086            .unwrap()
14087            .expect("canonical_name must resolve via entity_get_by_alias");
14088        assert_eq!(got.entity_id, reg.entity_id);
14089        assert_eq!(got.canonical_name, "OnlyCanonical");
14090    }
14091
14092    #[test]
14093    fn entity_get_by_alias_returns_none_for_unknown_alias() {
14094        let conn = test_db();
14095        let got = entity_get_by_alias(&conn, "missing", None).unwrap();
14096        assert!(got.is_none());
14097    }
14098
14099    #[test]
14100    fn entity_get_by_alias_filters_by_namespace() {
14101        let conn = test_db();
14102        entity_register(
14103            &conn,
14104            "Acme",
14105            "ns_a",
14106            &["a".to_string()],
14107            &serde_json::json!({}),
14108            None,
14109        )
14110        .unwrap();
14111        entity_register(
14112            &conn,
14113            "Acme Corp",
14114            "ns_b",
14115            &["a".to_string()],
14116            &serde_json::json!({}),
14117            None,
14118        )
14119        .unwrap();
14120        let in_a = entity_get_by_alias(&conn, "a", Some("ns_a"))
14121            .unwrap()
14122            .unwrap();
14123        assert_eq!(in_a.namespace, "ns_a");
14124        assert_eq!(in_a.canonical_name, "Acme");
14125        let in_b = entity_get_by_alias(&conn, "a", Some("ns_b"))
14126            .unwrap()
14127            .unwrap();
14128        assert_eq!(in_b.namespace, "ns_b");
14129        assert_eq!(in_b.canonical_name, "Acme Corp");
14130    }
14131
14132    #[test]
14133    fn entity_get_by_alias_without_namespace_picks_most_recent() {
14134        let conn = test_db();
14135        // Older entity created first.
14136        entity_register(
14137            &conn,
14138            "Older",
14139            "ns_old",
14140            &["dup".to_string()],
14141            &serde_json::json!({}),
14142            None,
14143        )
14144        .unwrap();
14145        // Sleep just enough to guarantee a strictly later created_at.
14146        std::thread::sleep(std::time::Duration::from_millis(5));
14147        entity_register(
14148            &conn,
14149            "Newer",
14150            "ns_new",
14151            &["dup".to_string()],
14152            &serde_json::json!({}),
14153            None,
14154        )
14155        .unwrap();
14156        let got = entity_get_by_alias(&conn, "dup", None).unwrap().unwrap();
14157        assert_eq!(got.canonical_name, "Newer");
14158        assert_eq!(got.namespace, "ns_new");
14159    }
14160
14161    #[test]
14162    fn entity_get_by_alias_ignores_non_entity_memory_with_matching_alias() {
14163        let conn = test_db();
14164        // Insert a regular (non-entity) memory and a stray
14165        // entity_aliases row pointing at it. The resolver must skip
14166        // it because `kind != 'entity'`.
14167        let mut mem = make_memory("Decoy", "test", Tier::Long, 5);
14168        mem.metadata = serde_json::json!({});
14169        let mid = insert(&conn, &mem).unwrap();
14170        let now = chrono::Utc::now().to_rfc3339();
14171        conn.execute(
14172            "INSERT INTO entity_aliases (entity_id, alias, created_at) VALUES (?1, ?2, ?3)",
14173            params![&mid, "decoy", &now],
14174        )
14175        .unwrap();
14176        let got = entity_get_by_alias(&conn, "decoy", None).unwrap();
14177        assert!(got.is_none(), "non-entity memories must not resolve");
14178    }
14179
14180    #[test]
14181    fn entity_register_idempotent_aliases_are_deduped() {
14182        let conn = test_db();
14183        let reg = entity_register(
14184            &conn,
14185            "Dedup",
14186            "test",
14187            &["x".to_string(), "x".to_string(), "y".to_string()],
14188            &serde_json::json!({}),
14189            None,
14190        )
14191        .unwrap();
14192        // INSERT OR IGNORE collapses the duplicate "x"; canonical
14193        // ("Dedup") auto-inserted as well, so 3 distinct aliases.
14194        assert_eq!(reg.aliases.len(), 3);
14195        assert!(reg.aliases.contains(&"Dedup".to_string()));
14196        assert!(reg.aliases.contains(&"x".to_string()));
14197        assert!(reg.aliases.contains(&"y".to_string()));
14198    }
14199
14200    // -- Pillar 2 / Stream C — kg_timeline ---------------------------------
14201
14202    /// Insert a link with an explicit `valid_from` so timeline tests can
14203    /// pin event ordering without relying on wall-clock spread.
14204    fn insert_link_at(
14205        conn: &Connection,
14206        source_id: &str,
14207        target_id: &str,
14208        relation: &str,
14209        valid_from: &str,
14210    ) {
14211        let now = chrono::Utc::now().to_rfc3339();
14212        conn.execute(
14213            "INSERT INTO memory_links (source_id, target_id, relation, created_at, valid_from) \
14214             VALUES (?1, ?2, ?3, ?4, ?5)",
14215            params![source_id, target_id, relation, now, valid_from],
14216        )
14217        .unwrap();
14218    }
14219
14220    #[test]
14221    fn create_link_populates_valid_from_for_new_rows() {
14222        let conn = test_db();
14223        let src = make_memory("kg-src", "test", Tier::Long, 5);
14224        let tgt = make_memory("kg-tgt", "test", Tier::Long, 5);
14225        insert(&conn, &src).unwrap();
14226        insert(&conn, &tgt).unwrap();
14227        create_link(&conn, &src.id, &tgt.id, "related_to").unwrap();
14228        let valid_from: Option<String> = conn
14229            .query_row(
14230                "SELECT valid_from FROM memory_links WHERE source_id = ?1",
14231                params![&src.id],
14232                |r| r.get(0),
14233            )
14234            .unwrap();
14235        assert!(
14236            valid_from.is_some(),
14237            "create_link must populate valid_from so kg_timeline can see new links"
14238        );
14239    }
14240
14241    // v0.7 H2 — schema v23: `attest_level` column present + populated.
14242    #[test]
14243    fn schema_v23_memory_links_has_attest_level_column() {
14244        let conn = test_db();
14245        assert!(
14246            column_exists(&conn, "memory_links", "attest_level"),
14247            "v23 must add attest_level column to memory_links"
14248        );
14249    }
14250
14251    // v0.7 H2 — no-keypair path: signature stays NULL, attest_level
14252    // is recorded as "unsigned". This is the v0.6.4 backward-compat
14253    // contract — operators that haven't generated a keypair keep the
14254    // pre-H2 behaviour.
14255    #[test]
14256    fn create_link_signed_without_keypair_is_unsigned() {
14257        let conn = test_db();
14258        let src = make_memory("h2-src-unsigned", "test", Tier::Long, 5);
14259        let tgt = make_memory("h2-tgt-unsigned", "test", Tier::Long, 5);
14260        insert(&conn, &src).unwrap();
14261        insert(&conn, &tgt).unwrap();
14262
14263        let level = create_link_signed(&conn, &src.id, &tgt.id, "related_to", None).unwrap();
14264        assert_eq!(level, "unsigned");
14265
14266        let (sig, attest): (Option<Vec<u8>>, Option<String>) = conn
14267            .query_row(
14268                "SELECT signature, attest_level FROM memory_links \
14269                 WHERE source_id = ?1 AND target_id = ?2",
14270                params![&src.id, &tgt.id],
14271                |r| Ok((r.get(0)?, r.get(1)?)),
14272            )
14273            .unwrap();
14274        assert!(sig.is_none(), "no keypair → signature must be NULL");
14275        assert_eq!(attest.as_deref(), Some("unsigned"));
14276    }
14277
14278    // v0.7 H2 — happy path: with an active keypair, every link write
14279    // gets a 64-byte Ed25519 signature in the `signature` column and
14280    // attest_level = "self_signed". The signature must verify against
14281    // the keypair's public key over the canonical CBOR payload.
14282    #[test]
14283    fn create_link_signed_with_keypair_persists_valid_signature() {
14284        use crate::identity::{keypair, sign as link_sign};
14285        use ed25519_dalek::Verifier;
14286
14287        let conn = test_db();
14288        let src = make_memory("h2-src-signed", "test", Tier::Long, 5);
14289        let tgt = make_memory("h2-tgt-signed", "test", Tier::Long, 5);
14290        insert(&conn, &src).unwrap();
14291        insert(&conn, &tgt).unwrap();
14292
14293        let kp = keypair::generate("alice").unwrap();
14294        let level = create_link_signed(&conn, &src.id, &tgt.id, "supersedes", Some(&kp)).unwrap();
14295        assert_eq!(level, "self_signed");
14296
14297        // Read back the persisted row and confirm the signature shape.
14298        let (sig, attest, valid_from): (Option<Vec<u8>>, Option<String>, Option<String>) = conn
14299            .query_row(
14300                "SELECT signature, attest_level, valid_from FROM memory_links \
14301                 WHERE source_id = ?1 AND target_id = ?2",
14302                params![&src.id, &tgt.id],
14303                |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
14304            )
14305            .unwrap();
14306        let sig_bytes = sig.expect("signature must be present when keypair is provided");
14307        assert_eq!(sig_bytes.len(), 64, "Ed25519 signature is 64 bytes");
14308        assert_eq!(attest.as_deref(), Some("self_signed"));
14309        let valid_from = valid_from.expect("valid_from must be set on the insert path");
14310
14311        // Re-derive the canonical bytes the writer signed over and
14312        // verify with the keypair's public key. This is what H3's
14313        // inbound verifier will do on every received link.
14314        let signable = link_sign::SignableLink {
14315            src_id: &src.id,
14316            dst_id: &tgt.id,
14317            relation: "supersedes",
14318            observed_by: Some(kp.agent_id.as_str()),
14319            valid_from: Some(valid_from.as_str()),
14320            valid_until: None,
14321        };
14322        let payload = link_sign::canonical_cbor(&signable).unwrap();
14323        let mut sig_arr = [0u8; 64];
14324        sig_arr.copy_from_slice(&sig_bytes);
14325        let sig_obj = ed25519_dalek::Signature::from_bytes(&sig_arr);
14326        kp.public
14327            .verify(&payload, &sig_obj)
14328            .expect("persisted signature must verify against the writer's public key");
14329    }
14330
14331    // v0.7.0 H6 (round-2) — regression: the SQLite write path must
14332    // truncate `valid_from` to microsecond precision BEFORE signing
14333    // and persisting, so the row a federation peer receives serialises
14334    // back to the same canonical RFC3339 string regardless of the
14335    // adapter that wrote it. We assert two properties:
14336    //
14337    // 1. The `valid_from` column NEVER contains a 9-digit fractional
14338    //    second (nanoseconds), only at most 6 digits (microseconds).
14339    // 2. The persisted signature verifies against canonical CBOR
14340    //    derived from the same microsecond-truncated string the row
14341    //    holds — i.e. the round-trip is byte-stable.
14342    #[test]
14343    fn h6_create_link_signed_truncates_valid_from_to_microseconds() {
14344        use crate::identity::{keypair, sign as link_sign};
14345        use ed25519_dalek::Verifier;
14346
14347        let conn = test_db();
14348        let src = make_memory("h6-src", "test", Tier::Long, 5);
14349        let tgt = make_memory("h6-tgt", "test", Tier::Long, 5);
14350        insert(&conn, &src).unwrap();
14351        insert(&conn, &tgt).unwrap();
14352
14353        let kp = keypair::generate("alice").unwrap();
14354        let level = create_link_signed(&conn, &src.id, &tgt.id, "related_to", Some(&kp)).unwrap();
14355        assert_eq!(level, "self_signed");
14356
14357        let (sig, valid_from): (Option<Vec<u8>>, Option<String>) = conn
14358            .query_row(
14359                "SELECT signature, valid_from FROM memory_links \
14360                 WHERE source_id = ?1 AND target_id = ?2",
14361                params![&src.id, &tgt.id],
14362                |r| Ok((r.get(0)?, r.get(1)?)),
14363            )
14364            .unwrap();
14365        let valid_from = valid_from.expect("valid_from set on signed insert path");
14366
14367        // RFC3339 fractional-second precision check. The string looks
14368        // like `2026-05-10T12:34:56.123456+00:00` (microsecond) or
14369        // `...:56.123456789+00:00` (nanosecond). After H6, the maximum
14370        // length of the fractional run must be 6.
14371        if let Some(dot) = valid_from.find('.') {
14372            let after = &valid_from[dot + 1..];
14373            let frac_len = after.chars().take_while(|c| c.is_ascii_digit()).count();
14374            assert!(
14375                frac_len <= 6,
14376                "H6 regression: valid_from has {frac_len}-digit fractional second; expected ≤ 6 (microseconds). Value: {valid_from}"
14377            );
14378        }
14379
14380        // Round-trip the signature against canonical CBOR computed
14381        // from the EXACT string stored in the row. If the writer
14382        // signed over a nanosecond-precision string but the column
14383        // round-trips at microsecond precision, this verify fails —
14384        // which is exactly the postgres-G3 failure mode SQLite is now
14385        // immunised against.
14386        let sig_bytes = sig.expect("signature persisted");
14387        let signable = link_sign::SignableLink {
14388            src_id: &src.id,
14389            dst_id: &tgt.id,
14390            relation: "related_to",
14391            observed_by: Some(kp.agent_id.as_str()),
14392            valid_from: Some(valid_from.as_str()),
14393            valid_until: None,
14394        };
14395        let payload = link_sign::canonical_cbor(&signable).unwrap();
14396        let mut sig_arr = [0u8; 64];
14397        sig_arr.copy_from_slice(&sig_bytes);
14398        let sig_obj = ed25519_dalek::Signature::from_bytes(&sig_arr);
14399        kp.public.verify(&payload, &sig_obj).expect(
14400            "H6 regression: signature must verify against canonical CBOR \
14401             derived from the stored (microsecond-truncated) valid_from",
14402        );
14403    }
14404
14405    // v0.7.0 fix-campaign A3 (LINK-PARITY, #690) — the cycle check
14406    // refuses a `reflects_on` edge whose target already transitively
14407    // reflects back on the source. This is the storage-layer
14408    // invariant the HTTP / SAL / federation paths now share with the
14409    // MCP path.
14410    #[test]
14411    fn a3_validate_link_pre_create_refuses_reflection_cycle() {
14412        use crate::config::{
14413            PermissionsMode, lock_permissions_mode_for_test,
14414            override_active_permissions_mode_for_test,
14415        };
14416        // The active permissions mode is process-wide; hold the
14417        // serialisation guard so parallel lib tests cannot flip the
14418        // mode out from under us. See `pin_governance_enforce_for_test`
14419        // in handlers/mod.rs for the same pattern.
14420        let _gate = lock_permissions_mode_for_test();
14421        // Pin mode to Off so the K9 evaluator stays out of the way —
14422        // this test only exercises the cycle gate.
14423        override_active_permissions_mode_for_test(PermissionsMode::Off);
14424
14425        let conn = test_db();
14426        let a = make_memory("a3-a", "ns", Tier::Long, 5);
14427        let b = make_memory("a3-b", "ns", Tier::Long, 5);
14428        let c = make_memory("a3-c", "ns", Tier::Long, 5);
14429        insert(&conn, &a).unwrap();
14430        insert(&conn, &b).unwrap();
14431        insert(&conn, &c).unwrap();
14432
14433        // Build chain: a --reflects_on--> b --reflects_on--> c.
14434        create_link(&conn, &a.id, &b.id, "reflects_on").unwrap();
14435        create_link(&conn, &b.id, &c.id, "reflects_on").unwrap();
14436
14437        // Attempting c --reflects_on--> a would close the cycle.
14438        let err = create_link(&conn, &c.id, &a.id, "reflects_on")
14439            .expect_err("cycle-closing reflects_on must be refused");
14440        let msg = err.to_string();
14441        assert!(
14442            msg.starts_with(LINK_CYCLE_ERR_PREFIX),
14443            "expected {LINK_CYCLE_ERR_PREFIX} prefix, got: {msg}"
14444        );
14445
14446        // A `related_to` edge between the same pair is still allowed —
14447        // only `reflects_on` participates in the DAG invariant.
14448        create_link(&conn, &c.id, &a.id, "related_to")
14449            .expect("related_to is not gated by the cycle check");
14450    }
14451
14452    // v0.7.0 fix-campaign A3 (LINK-PARITY, #690) — the K9 permission
14453    // pipeline gates link writes at the storage layer (not just at
14454    // the MCP entry point). A `Deny` rule on `memory_link` refuses
14455    // the write through `create_link` / `create_link_signed`.
14456    #[test]
14457    fn a3_validate_link_pre_create_respects_governance_deny() {
14458        use crate::config::{
14459            PermissionsMode, lock_permissions_mode_for_test,
14460            override_active_permissions_mode_for_test,
14461        };
14462        use crate::permissions::{
14463            PermissionRule, RuleDecision, clear_active_permission_rules_for_test,
14464            set_active_permission_rules,
14465        };
14466        let _gate = lock_permissions_mode_for_test();
14467        override_active_permissions_mode_for_test(PermissionsMode::Enforce);
14468        clear_active_permission_rules_for_test();
14469        set_active_permission_rules(vec![PermissionRule {
14470            namespace_pattern: "a3-deny/**".to_string(),
14471            op: "memory_link".to_string(),
14472            agent_pattern: "*".to_string(),
14473            decision: RuleDecision::Deny,
14474            reason: Some("test: link denied by a3 rule".to_string()),
14475        }]);
14476
14477        let conn = test_db();
14478        let s = make_memory("a3-src", "a3-deny/scope", Tier::Long, 5);
14479        let t = make_memory("a3-tgt", "a3-deny/scope", Tier::Long, 5);
14480        insert(&conn, &s).unwrap();
14481        insert(&conn, &t).unwrap();
14482
14483        let err = create_link(&conn, &s.id, &t.id, "related_to")
14484            .expect_err("a Deny rule must refuse the link write");
14485        let msg = err.to_string();
14486        assert!(
14487            msg.starts_with(LINK_PERMISSION_DENIED_ERR_PREFIX),
14488            "expected {LINK_PERMISSION_DENIED_ERR_PREFIX} prefix, got: {msg}"
14489        );
14490
14491        // Cleanup so the global registry does not leak into other tests
14492        // running in the same process.
14493        clear_active_permission_rules_for_test();
14494        override_active_permissions_mode_for_test(PermissionsMode::Advisory);
14495    }
14496
14497    // v0.7.0 fix-campaign A3 (LINK-PARITY, #690) — federation receive
14498    // path: peer-attested inbound links bypass the K9 governance
14499    // gate (the peer is trusted by mTLS + Ed25519 attestation), but
14500    // the cycle check ALWAYS runs even on peer writes.
14501    #[test]
14502    fn a3_create_link_inbound_peer_attested_bypasses_governance() {
14503        use crate::config::{
14504            PermissionsMode, lock_permissions_mode_for_test,
14505            override_active_permissions_mode_for_test,
14506        };
14507        use crate::permissions::{
14508            PermissionRule, RuleDecision, clear_active_permission_rules_for_test,
14509            set_active_permission_rules,
14510        };
14511        let _gate = lock_permissions_mode_for_test();
14512        override_active_permissions_mode_for_test(PermissionsMode::Enforce);
14513        clear_active_permission_rules_for_test();
14514        set_active_permission_rules(vec![PermissionRule {
14515            namespace_pattern: "**".to_string(),
14516            op: "memory_link".to_string(),
14517            agent_pattern: "*".to_string(),
14518            decision: RuleDecision::Deny,
14519            reason: Some("test: every link denied".to_string()),
14520        }]);
14521
14522        let conn = test_db();
14523        let s = make_memory("inbound-src", "a3-fed", Tier::Long, 5);
14524        let t = make_memory("inbound-tgt", "a3-fed", Tier::Long, 5);
14525        insert(&conn, &s).unwrap();
14526        insert(&conn, &t).unwrap();
14527
14528        // v0.7.0 issue #810 / #813 — the CHECK trigger on memory_links
14529        // refuses any peer_attested row whose signature blob is NULL /
14530        // wrong-length. The pre-#810 test passed a NULL signature here
14531        // because the legacy invariant did not police that pairing;
14532        // now we synthesise a 64-byte fake signature blob so the row
14533        // satisfies the trigger's WHEN clause. The K9-bypass property
14534        // under test is orthogonal to whether the signature bytes
14535        // actually verify (verification is `memory_verify`'s job, not
14536        // this insertion path's).
14537        let link = MemoryLink {
14538            source_id: s.id.clone(),
14539            target_id: t.id.clone(),
14540            relation: crate::models::MemoryLinkRelation::RelatedTo,
14541            created_at: chrono::Utc::now().to_rfc3339(),
14542            valid_from: None,
14543            valid_until: None,
14544            observed_by: Some("peer:remote".to_string()),
14545            signature: Some(vec![0xAB_u8; 64]),
14546            attest_level: None,
14547        };
14548
14549        // Peer-attested inbound bypasses the K9 deny.
14550        create_link_inbound(&conn, &link, "peer_attested")
14551            .expect("peer_attested must bypass K9 governance");
14552
14553        // But an unsigned inbound link is still gated locally.
14554        let link2 = MemoryLink {
14555            source_id: t.id.clone(),
14556            target_id: s.id.clone(),
14557            relation: crate::models::MemoryLinkRelation::RelatedTo,
14558            created_at: chrono::Utc::now().to_rfc3339(),
14559            valid_from: None,
14560            valid_until: None,
14561            observed_by: Some("peer:remote".to_string()),
14562            signature: None,
14563            attest_level: None,
14564        };
14565        let err = create_link_inbound(&conn, &link2, "unsigned")
14566            .expect_err("unsigned inbound must NOT bypass governance");
14567        assert!(
14568            err.to_string()
14569                .starts_with(LINK_PERMISSION_DENIED_ERR_PREFIX)
14570        );
14571
14572        clear_active_permission_rules_for_test();
14573        override_active_permissions_mode_for_test(PermissionsMode::Advisory);
14574    }
14575
14576    // v0.7.0 fix-campaign A3 (LINK-PARITY, #690) — even a trusted
14577    // peer cannot extend a `reflects_on` cycle on the receiver. The
14578    // cycle gate runs regardless of attest_level.
14579    #[test]
14580    fn a3_create_link_inbound_peer_attested_still_refuses_cycle() {
14581        use crate::config::{
14582            PermissionsMode, lock_permissions_mode_for_test,
14583            override_active_permissions_mode_for_test,
14584        };
14585        let _gate = lock_permissions_mode_for_test();
14586        override_active_permissions_mode_for_test(PermissionsMode::Off);
14587
14588        let conn = test_db();
14589        let a = make_memory("inbound-cycle-a", "ns", Tier::Long, 5);
14590        let b = make_memory("inbound-cycle-b", "ns", Tier::Long, 5);
14591        insert(&conn, &a).unwrap();
14592        insert(&conn, &b).unwrap();
14593        create_link(&conn, &a.id, &b.id, "reflects_on").unwrap();
14594
14595        let cycle_link = MemoryLink {
14596            source_id: b.id.clone(),
14597            target_id: a.id.clone(),
14598            relation: crate::models::MemoryLinkRelation::ReflectsOn,
14599            created_at: chrono::Utc::now().to_rfc3339(),
14600            valid_from: None,
14601            valid_until: None,
14602            observed_by: Some("peer:remote".to_string()),
14603            signature: None,
14604            attest_level: None,
14605        };
14606        let err = create_link_inbound(&conn, &cycle_link, "peer_attested")
14607            .expect_err("cycle check must run even on peer_attested inbound");
14608        assert!(err.to_string().starts_with(LINK_CYCLE_ERR_PREFIX));
14609    }
14610
14611    // v0.7.0 H6 (round-2) — pure-function test: the truncation helper
14612    // itself must collapse only sub-microsecond digits and leave
14613    // microsecond-aligned inputs unchanged.
14614    #[test]
14615    fn h6_truncate_to_microseconds_drops_nanos() {
14616        use chrono::{TimeZone, Timelike};
14617        let ns = Utc.with_ymd_and_hms(2026, 5, 10, 12, 34, 56).unwrap();
14618        let ns = ns.with_nanosecond(123_456_789).unwrap();
14619        let truncated = truncate_to_microseconds(ns);
14620        // 123_456_789 ns → 123_456 µs → 123_456_000 ns.
14621        assert_eq!(truncated.nanosecond(), 123_456_000);
14622        // Round-trip through to_rfc3339 must produce a 6-digit
14623        // fractional second (the property H6 commits to).
14624        let s = truncated.to_rfc3339();
14625        let dot = s.find('.').expect("fractional second present");
14626        let frac = &s[dot + 1..];
14627        let frac_len = frac.chars().take_while(|c| c.is_ascii_digit()).count();
14628        assert_eq!(frac_len, 6, "expected exactly 6-digit fractional; got: {s}");
14629    }
14630
14631    #[test]
14632    fn kg_timeline_returns_events_ordered_by_valid_from_ascending() {
14633        let conn = test_db();
14634        let src = make_memory("alpha", "kg/projects/alpha", Tier::Long, 5);
14635        let s1 = make_memory("kickoff", "kg/projects/alpha", Tier::Long, 5);
14636        let s2 = make_memory("design phase", "kg/projects/alpha", Tier::Long, 5);
14637        let s3 = make_memory("implementation", "kg/projects/alpha", Tier::Long, 5);
14638        insert(&conn, &src).unwrap();
14639        insert(&conn, &s1).unwrap();
14640        insert(&conn, &s2).unwrap();
14641        insert(&conn, &s3).unwrap();
14642
14643        // Insert in a deliberately-shuffled order so ORDER BY isn't
14644        // a happy accident of insertion order.
14645        insert_link_at(
14646            &conn,
14647            &src.id,
14648            &s2.id,
14649            "supersedes",
14650            "2026-02-03T00:00:00+00:00",
14651        );
14652        insert_link_at(
14653            &conn,
14654            &src.id,
14655            &s1.id,
14656            "related_to",
14657            "2026-01-15T00:00:00+00:00",
14658        );
14659        insert_link_at(
14660            &conn,
14661            &src.id,
14662            &s3.id,
14663            "supersedes",
14664            "2026-03-22T00:00:00+00:00",
14665        );
14666
14667        let events = kg_timeline(&conn, &src.id, None, None, None).unwrap();
14668        assert_eq!(events.len(), 3);
14669        assert_eq!(events[0].target_id, s1.id);
14670        assert_eq!(events[1].target_id, s2.id);
14671        assert_eq!(events[2].target_id, s3.id);
14672        assert_eq!(events[0].title, "kickoff");
14673        assert_eq!(events[1].relation, "supersedes");
14674        assert_eq!(events[0].target_namespace, "kg/projects/alpha");
14675    }
14676
14677    #[test]
14678    fn kg_timeline_filters_by_since_inclusive() {
14679        let conn = test_db();
14680        let src = make_memory("e", "ns", Tier::Long, 5);
14681        let t1 = make_memory("e1", "ns", Tier::Long, 5);
14682        let t2 = make_memory("e2", "ns", Tier::Long, 5);
14683        insert(&conn, &src).unwrap();
14684        insert(&conn, &t1).unwrap();
14685        insert(&conn, &t2).unwrap();
14686        insert_link_at(
14687            &conn,
14688            &src.id,
14689            &t1.id,
14690            "related_to",
14691            "2026-01-01T00:00:00+00:00",
14692        );
14693        insert_link_at(
14694            &conn,
14695            &src.id,
14696            &t2.id,
14697            "related_to",
14698            "2026-03-01T00:00:00+00:00",
14699        );
14700
14701        let events = kg_timeline(
14702            &conn,
14703            &src.id,
14704            Some("2026-02-01T00:00:00+00:00"),
14705            None,
14706            None,
14707        )
14708        .unwrap();
14709        assert_eq!(events.len(), 1);
14710        assert_eq!(events[0].target_id, t2.id);
14711
14712        // Boundary: since == valid_from should match (inclusive).
14713        let on_boundary = kg_timeline(
14714            &conn,
14715            &src.id,
14716            Some("2026-03-01T00:00:00+00:00"),
14717            None,
14718            None,
14719        )
14720        .unwrap();
14721        assert_eq!(on_boundary.len(), 1);
14722    }
14723
14724    #[test]
14725    fn kg_timeline_filters_by_until_inclusive() {
14726        let conn = test_db();
14727        let src = make_memory("e", "ns", Tier::Long, 5);
14728        let t1 = make_memory("e1", "ns", Tier::Long, 5);
14729        let t2 = make_memory("e2", "ns", Tier::Long, 5);
14730        insert(&conn, &src).unwrap();
14731        insert(&conn, &t1).unwrap();
14732        insert(&conn, &t2).unwrap();
14733        insert_link_at(
14734            &conn,
14735            &src.id,
14736            &t1.id,
14737            "related_to",
14738            "2026-01-01T00:00:00+00:00",
14739        );
14740        insert_link_at(
14741            &conn,
14742            &src.id,
14743            &t2.id,
14744            "related_to",
14745            "2026-03-01T00:00:00+00:00",
14746        );
14747
14748        let events = kg_timeline(
14749            &conn,
14750            &src.id,
14751            None,
14752            Some("2026-02-01T00:00:00+00:00"),
14753            None,
14754        )
14755        .unwrap();
14756        assert_eq!(events.len(), 1);
14757        assert_eq!(events[0].target_id, t1.id);
14758    }
14759
14760    #[test]
14761    fn kg_timeline_skips_links_with_null_valid_from() {
14762        let conn = test_db();
14763        let src = make_memory("s", "ns", Tier::Long, 5);
14764        let t1 = make_memory("t1", "ns", Tier::Long, 5);
14765        let t2 = make_memory("t2", "ns", Tier::Long, 5);
14766        insert(&conn, &src).unwrap();
14767        insert(&conn, &t1).unwrap();
14768        insert(&conn, &t2).unwrap();
14769        // Direct insert with NULL valid_from to simulate an external
14770        // writer that bypassed `create_link`.
14771        let now = chrono::Utc::now().to_rfc3339();
14772        // v0.7.0 fix campaign R1-M2 — direct-SQL writer must use a
14773        // value in the closed-set; the trigger now refuses 'rel'.
14774        conn.execute(
14775            "INSERT INTO memory_links (source_id, target_id, relation, created_at, valid_from) \
14776             VALUES (?1, ?2, 'related_to', ?3, NULL)",
14777            params![&src.id, &t1.id, &now],
14778        )
14779        .unwrap();
14780        insert_link_at(
14781            &conn,
14782            &src.id,
14783            &t2.id,
14784            "supersedes",
14785            "2026-01-01T00:00:00+00:00",
14786        );
14787
14788        let events = kg_timeline(&conn, &src.id, None, None, None).unwrap();
14789        assert_eq!(events.len(), 1);
14790        assert_eq!(events[0].target_id, t2.id);
14791    }
14792
14793    #[test]
14794    fn kg_timeline_excludes_links_where_source_is_target() {
14795        // The query is anchored on `source_id`; inbound edges (where the
14796        // entity is the target) are intentionally NOT part of the
14797        // timeline. This guards against accidentally widening the
14798        // contract to a bidirectional view.
14799        let conn = test_db();
14800        let entity = make_memory("entity", "ns", Tier::Long, 5);
14801        let other = make_memory("other", "ns", Tier::Long, 5);
14802        insert(&conn, &entity).unwrap();
14803        insert(&conn, &other).unwrap();
14804        insert_link_at(
14805            &conn,
14806            &other.id,
14807            &entity.id,
14808            "related_to",
14809            "2026-01-01T00:00:00+00:00",
14810        );
14811        let events = kg_timeline(&conn, &entity.id, None, None, None).unwrap();
14812        assert!(events.is_empty());
14813    }
14814
14815    #[test]
14816    fn kg_timeline_limit_clamped_to_max() {
14817        let conn = test_db();
14818        let src = make_memory("s", "ns", Tier::Long, 5);
14819        insert(&conn, &src).unwrap();
14820        for i in 0..5 {
14821            let t = make_memory(&format!("t{i}"), "ns", Tier::Long, 5);
14822            insert(&conn, &t).unwrap();
14823            insert_link_at(
14824                &conn,
14825                &src.id,
14826                &t.id,
14827                "related_to",
14828                &format!("2026-01-0{}T00:00:00+00:00", i + 1),
14829            );
14830        }
14831        // Caller passes a wildly oversized limit — should be clamped
14832        // to KG_TIMELINE_MAX_LIMIT (i.e. accepted, not errored), and
14833        // since the row count is small, should return all 5.
14834        let events = kg_timeline(&conn, &src.id, None, None, Some(usize::MAX)).unwrap();
14835        assert_eq!(events.len(), 5);
14836
14837        // Caller passes 0 — clamp to 1.
14838        let one = kg_timeline(&conn, &src.id, None, None, Some(0)).unwrap();
14839        assert_eq!(one.len(), 1);
14840    }
14841
14842    #[test]
14843    fn kg_timeline_carries_observed_by_and_valid_until() {
14844        let conn = test_db();
14845        let src = make_memory("s", "ns", Tier::Long, 5);
14846        let t = make_memory("t", "ns", Tier::Long, 5);
14847        insert(&conn, &src).unwrap();
14848        insert(&conn, &t).unwrap();
14849        let now = chrono::Utc::now().to_rfc3339();
14850        conn.execute(
14851            "INSERT INTO memory_links (source_id, target_id, relation, created_at, valid_from, valid_until, observed_by) \
14852             VALUES (?1, ?2, 'supersedes', ?3, '2026-01-01T00:00:00+00:00', '2026-12-31T23:59:59+00:00', 'agent-pm-1')",
14853            params![&src.id, &t.id, &now],
14854        )
14855        .unwrap();
14856        let events = kg_timeline(&conn, &src.id, None, None, None).unwrap();
14857        assert_eq!(events.len(), 1);
14858        assert_eq!(events[0].observed_by.as_deref(), Some("agent-pm-1"));
14859        assert_eq!(
14860            events[0].valid_until.as_deref(),
14861            Some("2026-12-31T23:59:59+00:00")
14862        );
14863    }
14864
14865    #[test]
14866    fn kg_timeline_empty_for_unknown_source() {
14867        let conn = test_db();
14868        let events = kg_timeline(&conn, "nonexistent-id", None, None, None).unwrap();
14869        assert!(events.is_empty());
14870    }
14871
14872    // -- Pillar 2 / Stream C — kg_invalidate -------------------------------
14873
14874    #[test]
14875    fn invalidate_link_sets_valid_until_to_provided_timestamp() {
14876        let conn = test_db();
14877        let src = make_memory("inv-s", "test", Tier::Long, 5);
14878        let tgt = make_memory("inv-t", "test", Tier::Long, 5);
14879        insert(&conn, &src).unwrap();
14880        insert(&conn, &tgt).unwrap();
14881        create_link(&conn, &src.id, &tgt.id, "related_to").unwrap();
14882        let stamp = "2026-12-31T23:59:59+00:00";
14883        let res = invalidate_link(&conn, &src.id, &tgt.id, "related_to", Some(stamp))
14884            .unwrap()
14885            .expect("link must exist");
14886        assert_eq!(res.valid_until, stamp);
14887        assert!(res.previous_valid_until.is_none());
14888        let stored: Option<String> = conn
14889            .query_row(
14890                "SELECT valid_until FROM memory_links \
14891                 WHERE source_id = ?1 AND target_id = ?2 AND relation = ?3",
14892                params![&src.id, &tgt.id, "related_to"],
14893                |r| r.get(0),
14894            )
14895            .unwrap();
14896        assert_eq!(stored.as_deref(), Some(stamp));
14897    }
14898
14899    #[test]
14900    fn invalidate_link_defaults_to_now_when_no_timestamp_provided() {
14901        let conn = test_db();
14902        let src = make_memory("inv-s", "test", Tier::Long, 5);
14903        let tgt = make_memory("inv-t", "test", Tier::Long, 5);
14904        insert(&conn, &src).unwrap();
14905        insert(&conn, &tgt).unwrap();
14906        create_link(&conn, &src.id, &tgt.id, "related_to").unwrap();
14907        let res = invalidate_link(&conn, &src.id, &tgt.id, "related_to", None)
14908            .unwrap()
14909            .expect("link must exist");
14910        // The default is wall-clock now; assert it parses as RFC3339 and
14911        // is within a small window of the test's "now" (allow 60s skew
14912        // to accommodate slow runners).
14913        let parsed = chrono::DateTime::parse_from_rfc3339(&res.valid_until)
14914            .expect("default valid_until must be RFC3339");
14915        let now = chrono::Utc::now();
14916        let drift = now.signed_duration_since(parsed.with_timezone(&chrono::Utc));
14917        assert!(
14918            drift.num_seconds().abs() < 60,
14919            "default valid_until {} should be near now {now}",
14920            res.valid_until
14921        );
14922    }
14923
14924    #[test]
14925    fn invalidate_link_returns_none_for_unknown_triple() {
14926        let conn = test_db();
14927        // No memories or links created.
14928        let res = invalidate_link(&conn, "missing-src", "missing-tgt", "related_to", None).unwrap();
14929        assert!(res.is_none());
14930    }
14931
14932    #[test]
14933    fn invalidate_link_returns_none_when_relation_does_not_match() {
14934        // Link exists for ("related_to") but caller asks for ("supersedes").
14935        let conn = test_db();
14936        let src = make_memory("inv-s", "test", Tier::Long, 5);
14937        let tgt = make_memory("inv-t", "test", Tier::Long, 5);
14938        insert(&conn, &src).unwrap();
14939        insert(&conn, &tgt).unwrap();
14940        create_link(&conn, &src.id, &tgt.id, "related_to").unwrap();
14941        let res = invalidate_link(&conn, &src.id, &tgt.id, "supersedes", None).unwrap();
14942        assert!(res.is_none(), "must not match across relation values");
14943    }
14944
14945    #[test]
14946    fn invalidate_link_overwrites_existing_valid_until_and_reports_prior() {
14947        let conn = test_db();
14948        let src = make_memory("inv-s", "test", Tier::Long, 5);
14949        let tgt = make_memory("inv-t", "test", Tier::Long, 5);
14950        insert(&conn, &src).unwrap();
14951        insert(&conn, &tgt).unwrap();
14952        create_link(&conn, &src.id, &tgt.id, "related_to").unwrap();
14953        let first = "2026-06-01T00:00:00+00:00";
14954        let second = "2026-12-01T00:00:00+00:00";
14955        let r1 = invalidate_link(&conn, &src.id, &tgt.id, "related_to", Some(first))
14956            .unwrap()
14957            .unwrap();
14958        assert!(r1.previous_valid_until.is_none());
14959        let r2 = invalidate_link(&conn, &src.id, &tgt.id, "related_to", Some(second))
14960            .unwrap()
14961            .unwrap();
14962        assert_eq!(r2.previous_valid_until.as_deref(), Some(first));
14963        assert_eq!(r2.valid_until, second);
14964    }
14965
14966    #[test]
14967    fn invalidate_link_distinguishes_relation_when_multiple_links_share_endpoints() {
14968        // Two links between the same pair, different relations. Invalidating
14969        // one must not affect the other.
14970        let conn = test_db();
14971        let src = make_memory("inv-s", "test", Tier::Long, 5);
14972        let tgt = make_memory("inv-t", "test", Tier::Long, 5);
14973        insert(&conn, &src).unwrap();
14974        insert(&conn, &tgt).unwrap();
14975        create_link(&conn, &src.id, &tgt.id, "related_to").unwrap();
14976        create_link(&conn, &src.id, &tgt.id, "supersedes").unwrap();
14977        let stamp = "2026-07-15T12:00:00+00:00";
14978        invalidate_link(&conn, &src.id, &tgt.id, "related_to", Some(stamp))
14979            .unwrap()
14980            .unwrap();
14981        let related: Option<String> = conn
14982            .query_row(
14983                "SELECT valid_until FROM memory_links \
14984                 WHERE source_id = ?1 AND target_id = ?2 AND relation = 'related_to'",
14985                params![&src.id, &tgt.id],
14986                |r| r.get(0),
14987            )
14988            .unwrap();
14989        let supers: Option<String> = conn
14990            .query_row(
14991                "SELECT valid_until FROM memory_links \
14992                 WHERE source_id = ?1 AND target_id = ?2 AND relation = 'supersedes'",
14993                params![&src.id, &tgt.id],
14994                |r| r.get(0),
14995            )
14996            .unwrap();
14997        assert_eq!(related.as_deref(), Some(stamp));
14998        assert!(
14999            supers.is_none(),
15000            "the sibling 'supersedes' link must remain valid"
15001        );
15002    }
15003
15004    #[test]
15005    fn invalidate_link_preserves_other_columns() {
15006        // valid_from, observed_by, created_at, signature must not be
15007        // touched by the invalidate UPDATE.
15008        let conn = test_db();
15009        let src = make_memory("inv-s", "test", Tier::Long, 5);
15010        let tgt = make_memory("inv-t", "test", Tier::Long, 5);
15011        insert(&conn, &src).unwrap();
15012        insert(&conn, &tgt).unwrap();
15013        let now = chrono::Utc::now().to_rfc3339();
15014        conn.execute(
15015            "INSERT INTO memory_links \
15016             (source_id, target_id, relation, created_at, valid_from, observed_by) \
15017             VALUES (?1, ?2, 'related_to', ?3, '2026-01-01T00:00:00+00:00', 'agent-x')",
15018            params![&src.id, &tgt.id, &now],
15019        )
15020        .unwrap();
15021        invalidate_link(
15022            &conn,
15023            &src.id,
15024            &tgt.id,
15025            "related_to",
15026            Some("2026-12-31T23:59:59+00:00"),
15027        )
15028        .unwrap()
15029        .unwrap();
15030        let (vf, ob, ca): (Option<String>, Option<String>, String) = conn
15031            .query_row(
15032                "SELECT valid_from, observed_by, created_at FROM memory_links \
15033                 WHERE source_id = ?1 AND target_id = ?2 AND relation = 'related_to'",
15034                params![&src.id, &tgt.id],
15035                |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
15036            )
15037            .unwrap();
15038        assert_eq!(vf.as_deref(), Some("2026-01-01T00:00:00+00:00"));
15039        assert_eq!(ob.as_deref(), Some("agent-x"));
15040        assert_eq!(ca, now);
15041    }
15042
15043    #[test]
15044    fn kg_query_default_excludes_invalidated_edges() {
15045        // NHI-P3-T7 regression: prior versions returned invalidated
15046        // edges in default kg_query results. The "current view" filter
15047        // must exclude any edge whose `valid_until` lies in the past.
15048        let conn = test_db();
15049        let src = make_memory("inv-src", "ns", Tier::Long, 5);
15050        let live = make_memory("inv-live", "ns", Tier::Long, 5);
15051        let dead = make_memory("inv-dead", "ns", Tier::Long, 5);
15052        insert(&conn, &src).unwrap();
15053        insert(&conn, &live).unwrap();
15054        insert(&conn, &dead).unwrap();
15055        // Live edge — no valid_until.
15056        insert_link_full(&conn, &src.id, &live.id, "related_to", None, None, None);
15057        // Dead edge — valid_until set in the past.
15058        insert_link_full(
15059            &conn,
15060            &src.id,
15061            &dead.id,
15062            "supersedes",
15063            None,
15064            Some("2020-01-01T00:00:00+00:00"),
15065            None,
15066        );
15067
15068        // Default ("current view"): only the live edge shows up.
15069        let current = kg_query(&conn, &src.id, 1, None, None, None, false).unwrap();
15070        assert_eq!(current.len(), 1);
15071        assert_eq!(current[0].target_id, live.id);
15072
15073        // Opt-in: include_invalidated=true returns both edges.
15074        let full = kg_query(&conn, &src.id, 1, None, None, None, true).unwrap();
15075        assert_eq!(full.len(), 2);
15076    }
15077
15078    #[test]
15079    fn default_for_managed_namespace_helper_yields_write_owner() {
15080        // NHI-P4-T19 (v0.7.0 NHI testing): the
15081        // `GovernancePolicy::default_for_managed_namespace` helper
15082        // exists so operators can opt into K9 namespace-lock semantics
15083        // by writing the policy into their standard memory's metadata.
15084        // Changing the implicit fallback in `read_namespace_policy`
15085        // is deferred to v0.7.1 because it would break inheritance
15086        // chains where parent and child standards were registered
15087        // under distinct agent identities. Tests ensures the helper
15088        // returns the documented shape.
15089        let policy = crate::models::GovernancePolicy::default_for_managed_namespace();
15090        assert_eq!(policy.core.write, crate::models::GovernanceLevel::Owner);
15091        assert_eq!(policy.core.promote, crate::models::GovernanceLevel::Any);
15092        assert_eq!(policy.core.delete, crate::models::GovernanceLevel::Owner);
15093        assert!(policy.core.inherit);
15094    }
15095
15096    #[test]
15097    fn namespace_set_standard_with_explicit_owner_policy_enforces_lock() {
15098        // NHI-P4-T19 regression: when the operator explicitly writes
15099        // `governance.write=owner` into the standard memory's
15100        // metadata, the namespace lock is enforced. This is the
15101        // opt-in path the v0.7.0 verdict recommends documenting; the
15102        // helper `default_for_managed_namespace` is the canonical
15103        // shape.
15104        let conn = test_db();
15105        let mut standard = make_memory("std", "ns/locked", Tier::Long, 8);
15106        let policy =
15107            serde_json::to_value(crate::models::GovernancePolicy::default_for_managed_namespace())
15108                .unwrap();
15109        standard.metadata = serde_json::json!({"governance": policy});
15110        let standard_id = insert(&conn, &standard).unwrap();
15111        set_namespace_standard(&conn, "ns/locked", &standard_id, None).unwrap();
15112
15113        let resolved = resolve_governance_policy(&conn, "ns/locked")
15114            .expect("policy must resolve when explicitly set");
15115        assert_eq!(resolved.core.write, crate::models::GovernanceLevel::Owner);
15116    }
15117
15118    /// F1 regression (v0.7.0 round-2-fixes): when a parent namespace
15119    /// has `governance.write = owner` with `inherit: true` and a deep
15120    /// child has no standard of its own, the owner-level check must
15121    /// resolve the namespace owner by walking the same chain that
15122    /// `resolve_governance_policy` walks. Pre-fix the helper looked
15123    /// only at the leaf's standard, returning None and producing a
15124    /// "no resolvable owner" Deny even for the rightful owner.
15125    #[test]
15126    fn enforce_governance_inherits_owner_for_deep_child_owner_write() {
15127        use crate::config::{
15128            PermissionsMode, lock_permissions_mode_for_test,
15129            override_active_permissions_mode_for_test,
15130        };
15131        use crate::models::{
15132            ApproverType, CorePolicy, GovernanceDecision, GovernanceLevel, GovernancePolicy,
15133            GovernedAction, default_metadata,
15134        };
15135
15136        let _gate = lock_permissions_mode_for_test();
15137        override_active_permissions_mode_for_test(PermissionsMode::Enforce);
15138
15139        let conn = test_db();
15140
15141        // Seed a parent standard that enforces write=owner with inherit=true.
15142        let parent_ns = "f1/parent";
15143        let owner = "ai:alice";
15144        let policy = GovernancePolicy {
15145            core: CorePolicy {
15146                write: GovernanceLevel::Owner,
15147                promote: GovernanceLevel::Any,
15148                delete: GovernanceLevel::Owner,
15149                approver: ApproverType::Human,
15150                inherit: true,
15151                max_reflection_depth: None,
15152            },
15153            ..Default::default()
15154        };
15155
15156        let now = chrono::Utc::now().to_rfc3339();
15157        let mut metadata = default_metadata();
15158        if let Some(obj) = metadata.as_object_mut() {
15159            obj.insert(
15160                "agent_id".to_string(),
15161                serde_json::Value::String(owner.to_string()),
15162            );
15163            obj.insert(
15164                "governance".to_string(),
15165                serde_json::to_value(&policy).unwrap(),
15166            );
15167        }
15168        let standard = Memory {
15169            id: uuid::Uuid::new_v4().to_string(),
15170            tier: Tier::Long,
15171            namespace: format!("_standards-{parent_ns}"),
15172            title: "f1-standard".to_string(),
15173            content: "f1 policy".to_string(),
15174            tags: vec![],
15175            priority: 9,
15176            confidence: 1.0,
15177            source: "test".to_string(),
15178            access_count: 0,
15179            created_at: now.clone(),
15180            updated_at: now,
15181            last_accessed_at: None,
15182            expires_at: None,
15183            metadata,
15184            reflection_depth: 0,
15185            memory_kind: crate::models::MemoryKind::Observation,
15186            entity_id: None,
15187            persona_version: None,
15188            citations: Vec::new(),
15189            source_uri: None,
15190            source_span: None,
15191            confidence_source: ConfidenceSource::CallerProvided,
15192            confidence_signals: None,
15193            confidence_decayed_at: None,
15194            version: 1,
15195        };
15196        let standard_id = insert(&conn, &standard).unwrap();
15197        set_namespace_standard(&conn, parent_ns, &standard_id, None).unwrap();
15198
15199        // Deep child has NO standard of its own; everything must
15200        // resolve via the chain walk.
15201        let child_ns = "f1/parent/a/b/c";
15202        let payload = serde_json::json!({"title": "deep-child"});
15203
15204        // Owner-level write by the rightful owner: ALLOW.
15205        let allow = enforce_governance(
15206            &conn,
15207            GovernedAction::Store,
15208            child_ns,
15209            owner,
15210            None,
15211            None,
15212            &payload,
15213        )
15214        .expect("enforce_governance must not error on inherited owner policy");
15215        assert!(
15216            matches!(allow, GovernanceDecision::Allow),
15217            "owner write at deep child must Allow when chain walk finds the parent's owner: got {allow:?}"
15218        );
15219
15220        // Owner-level write by a non-owner: DENY.
15221        let deny = enforce_governance(
15222            &conn,
15223            GovernedAction::Store,
15224            child_ns,
15225            "ai:eve",
15226            None,
15227            None,
15228            &payload,
15229        )
15230        .expect("enforce_governance must not error");
15231        match deny {
15232            GovernanceDecision::Deny(refusal) => {
15233                assert!(
15234                    refusal.reason.contains("not the owner"),
15235                    "non-owner deny should cite ownership mismatch, got: {refusal:?}"
15236                );
15237                assert_eq!(
15238                    refusal.denied_level,
15239                    GovernanceLevel::Owner,
15240                    "owner-level refusal must carry GovernanceLevel::Owner; got {refusal:?}",
15241                );
15242            }
15243            other => panic!("expected Deny for non-owner, got {other:?}"),
15244        }
15245    }
15246
15247    /// F1 corollary: `inherit = false` on the parent must STOP the
15248    /// chain walk at the parent. The deep child has no policy of its
15249    /// own and the parent declines to share, so the action is
15250    /// ungoverned (Allow).
15251    ///
15252    /// Note: under `resolve_governance_policy` semantics, the
15253    /// `inherit` flag is documentation/contract — the leaf-first walk
15254    /// stops at the most-specific policy regardless. The flag flows
15255    /// through to consumers (e.g. pending_action approver resolution)
15256    /// to signal "do not re-walk above me." This test pins the
15257    /// observable outcome: a deep child with NO standard inherits a
15258    /// parent policy regardless of the `inherit` flag value, because
15259    /// the walk only stops at policies that exist. The flag's
15260    /// "stop" semantics apply when an intermediate policy declines to
15261    /// be inherited above itself, not below.
15262    #[test]
15263    fn enforce_governance_deep_child_with_inherit_false_still_resolves_via_walk() {
15264        use crate::config::{
15265            PermissionsMode, lock_permissions_mode_for_test,
15266            override_active_permissions_mode_for_test,
15267        };
15268        use crate::models::{
15269            ApproverType, CorePolicy, GovernanceDecision, GovernanceLevel, GovernancePolicy,
15270            GovernedAction, default_metadata,
15271        };
15272
15273        let _gate = lock_permissions_mode_for_test();
15274        override_active_permissions_mode_for_test(PermissionsMode::Enforce);
15275
15276        let conn = test_db();
15277
15278        // Parent has inherit=false: descendants without a policy of
15279        // their own should still resolve to this policy on the
15280        // leaf-first walk; inherit=false is a forward-blocker
15281        // ("nothing above me applies to namespaces I govern"), not a
15282        // backward-blocker ("namespaces below me cannot inherit").
15283        // This matches the documented semantics in
15284        // `resolve_governance_policy`'s docstring.
15285        let parent_ns = "f1nb/parent";
15286        let owner = "ai:alice";
15287        let policy = GovernancePolicy {
15288            core: CorePolicy {
15289                write: GovernanceLevel::Owner,
15290                promote: GovernanceLevel::Any,
15291                delete: GovernanceLevel::Owner,
15292                approver: ApproverType::Human,
15293                inherit: false,
15294                max_reflection_depth: None,
15295            },
15296            ..Default::default()
15297        };
15298        let now = chrono::Utc::now().to_rfc3339();
15299        let mut metadata = default_metadata();
15300        if let Some(obj) = metadata.as_object_mut() {
15301            obj.insert(
15302                "agent_id".to_string(),
15303                serde_json::Value::String(owner.to_string()),
15304            );
15305            obj.insert(
15306                "governance".to_string(),
15307                serde_json::to_value(&policy).unwrap(),
15308            );
15309        }
15310        let standard = Memory {
15311            id: uuid::Uuid::new_v4().to_string(),
15312            tier: Tier::Long,
15313            namespace: format!("_standards-{parent_ns}"),
15314            title: "f1nb-standard".to_string(),
15315            content: "policy".to_string(),
15316            tags: vec![],
15317            priority: 9,
15318            confidence: 1.0,
15319            source: "test".to_string(),
15320            access_count: 0,
15321            created_at: now.clone(),
15322            updated_at: now,
15323            last_accessed_at: None,
15324            expires_at: None,
15325            metadata,
15326            reflection_depth: 0,
15327            memory_kind: crate::models::MemoryKind::Observation,
15328            entity_id: None,
15329            persona_version: None,
15330            citations: Vec::new(),
15331            source_uri: None,
15332            source_span: None,
15333            confidence_source: ConfidenceSource::CallerProvided,
15334            confidence_signals: None,
15335            confidence_decayed_at: None,
15336            version: 1,
15337        };
15338        let standard_id = insert(&conn, &standard).unwrap();
15339        set_namespace_standard(&conn, parent_ns, &standard_id, None).unwrap();
15340
15341        // Deep child write by owner is still Allow (chain walk finds
15342        // parent owner; inherit=false on the parent does not block
15343        // descendants).
15344        let decision = enforce_governance(
15345            &conn,
15346            GovernedAction::Store,
15347            "f1nb/parent/x/y",
15348            owner,
15349            None,
15350            None,
15351            &serde_json::json!({}),
15352        )
15353        .unwrap();
15354        assert!(
15355            matches!(decision, GovernanceDecision::Allow),
15356            "owner write at deep child resolves via leaf-first walk: got {decision:?}"
15357        );
15358    }
15359
15360    #[test]
15361    fn find_paths_default_excludes_invalidated_edges() {
15362        // NHI-P3-T7 regression: find_paths must skip edges whose
15363        // valid_until lies in the past unless the caller asks for the
15364        // full historical link graph.
15365        let conn = test_db();
15366        let a = make_memory("fp-a", "ns", Tier::Long, 5);
15367        let b = make_memory("fp-b", "ns", Tier::Long, 5);
15368        let c = make_memory("fp-c", "ns", Tier::Long, 5);
15369        insert(&conn, &a).unwrap();
15370        insert(&conn, &b).unwrap();
15371        insert(&conn, &c).unwrap();
15372        // Live path A → C.
15373        insert_link_full(&conn, &a.id, &c.id, "related_to", None, None, None);
15374        // Dead path A → B → C (the A→B leg is invalidated).
15375        insert_link_full(
15376            &conn,
15377            &a.id,
15378            &b.id,
15379            "supersedes",
15380            None,
15381            Some("2020-01-01T00:00:00+00:00"),
15382            None,
15383        );
15384        insert_link_full(&conn, &b.id, &c.id, "related_to", None, None, None);
15385
15386        // Default: only the live A→C path.
15387        let current = find_paths(&conn, &a.id, &c.id, Some(3), None, false).unwrap();
15388        assert_eq!(current.len(), 1);
15389        assert_eq!(current[0], vec![a.id.clone(), c.id.clone()]);
15390
15391        // Opt-in: include_invalidated=true returns both paths.
15392        let full = find_paths(&conn, &a.id, &c.id, Some(3), None, true).unwrap();
15393        assert_eq!(full.len(), 2);
15394    }
15395
15396    // -- Pillar 2 / Stream C — kg_query (depth=1) ---------------------------
15397
15398    /// Insert a link with explicit `temporal/observed_by` columns so the
15399    /// `kg_query` filter tests can pin behavior without relying on
15400    /// wall-clock spread.
15401    fn insert_link_full(
15402        conn: &Connection,
15403        source_id: &str,
15404        target_id: &str,
15405        relation: &str,
15406        valid_from: Option<&str>,
15407        valid_until: Option<&str>,
15408        observed_by: Option<&str>,
15409    ) {
15410        let now = chrono::Utc::now().to_rfc3339();
15411        conn.execute(
15412            "INSERT INTO memory_links \
15413             (source_id, target_id, relation, created_at, valid_from, valid_until, observed_by) \
15414             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
15415            params![
15416                source_id,
15417                target_id,
15418                relation,
15419                now,
15420                valid_from,
15421                valid_until,
15422                observed_by
15423            ],
15424        )
15425        .unwrap();
15426    }
15427
15428    #[test]
15429    fn kg_query_returns_outbound_neighbors_at_depth_1() {
15430        let conn = test_db();
15431        let src = make_memory("alpha", "kg/projects/alpha", Tier::Long, 5);
15432        let n1 = make_memory("kickoff", "kg/projects/alpha", Tier::Long, 5);
15433        let n2 = make_memory("design", "kg/projects/alpha", Tier::Long, 5);
15434        insert(&conn, &src).unwrap();
15435        insert(&conn, &n1).unwrap();
15436        insert(&conn, &n2).unwrap();
15437        insert_link_full(
15438            &conn,
15439            &src.id,
15440            &n1.id,
15441            "related_to",
15442            Some("2026-01-15T00:00:00+00:00"),
15443            None,
15444            Some("agent-1"),
15445        );
15446        insert_link_full(
15447            &conn,
15448            &src.id,
15449            &n2.id,
15450            "supersedes",
15451            Some("2026-02-03T00:00:00+00:00"),
15452            None,
15453            Some("agent-2"),
15454        );
15455
15456        let nodes = kg_query(&conn, &src.id, 1, None, None, None, false).unwrap();
15457        assert_eq!(nodes.len(), 2);
15458        // Ordered by COALESCE(valid_from, created_at) ASC.
15459        assert_eq!(nodes[0].target_id, n1.id);
15460        assert_eq!(nodes[1].target_id, n2.id);
15461        assert_eq!(nodes[0].title, "kickoff");
15462        assert_eq!(nodes[0].relation, "related_to");
15463        assert_eq!(nodes[0].observed_by.as_deref(), Some("agent-1"));
15464        assert_eq!(nodes[0].depth, 1);
15465        assert_eq!(nodes[0].path, format!("{}->{}", src.id, n1.id));
15466        assert_eq!(nodes[0].target_namespace, "kg/projects/alpha");
15467    }
15468
15469    #[test]
15470    fn kg_query_filters_by_valid_at_window() {
15471        let conn = test_db();
15472        let src = make_memory("e", "ns", Tier::Long, 5);
15473        let t1 = make_memory("e1", "ns", Tier::Long, 5);
15474        let t2 = make_memory("e2", "ns", Tier::Long, 5);
15475        insert(&conn, &src).unwrap();
15476        insert(&conn, &t1).unwrap();
15477        insert(&conn, &t2).unwrap();
15478        // t1 valid 2026-01-01 → 2026-02-01; t2 valid from 2026-03-01.
15479        insert_link_full(
15480            &conn,
15481            &src.id,
15482            &t1.id,
15483            "related_to",
15484            Some("2026-01-01T00:00:00+00:00"),
15485            Some("2026-02-01T00:00:00+00:00"),
15486            None,
15487        );
15488        insert_link_full(
15489            &conn,
15490            &src.id,
15491            &t2.id,
15492            "related_to",
15493            Some("2026-03-01T00:00:00+00:00"),
15494            None,
15495            None,
15496        );
15497
15498        // At 2026-01-15 only t1 is valid.
15499        let n_jan = kg_query(
15500            &conn,
15501            &src.id,
15502            1,
15503            Some("2026-01-15T00:00:00+00:00"),
15504            None,
15505            None,
15506            false,
15507        )
15508        .unwrap();
15509        assert_eq!(n_jan.len(), 1);
15510        assert_eq!(n_jan[0].target_id, t1.id);
15511
15512        // At 2026-02-15 the first link is closed, the second hasn't
15513        // started yet — empty.
15514        let n_feb = kg_query(
15515            &conn,
15516            &src.id,
15517            1,
15518            Some("2026-02-15T00:00:00+00:00"),
15519            None,
15520            None,
15521            false,
15522        )
15523        .unwrap();
15524        assert!(n_feb.is_empty());
15525
15526        // At 2026-04-01 only t2 is valid.
15527        let n_apr = kg_query(
15528            &conn,
15529            &src.id,
15530            1,
15531            Some("2026-04-01T00:00:00+00:00"),
15532            None,
15533            None,
15534            false,
15535        )
15536        .unwrap();
15537        assert_eq!(n_apr.len(), 1);
15538        assert_eq!(n_apr[0].target_id, t2.id);
15539    }
15540
15541    #[test]
15542    fn kg_query_skips_null_valid_from_when_valid_at_filter_active() {
15543        let conn = test_db();
15544        let src = make_memory("s", "ns", Tier::Long, 5);
15545        let t = make_memory("t", "ns", Tier::Long, 5);
15546        insert(&conn, &src).unwrap();
15547        insert(&conn, &t).unwrap();
15548        // Link with NULL valid_from — must be invisible to a temporally
15549        // scoped query (we cannot tell if it was valid at any point).
15550        insert_link_full(&conn, &src.id, &t.id, "related_to", None, None, None);
15551
15552        let with_filter = kg_query(
15553            &conn,
15554            &src.id,
15555            1,
15556            Some("2026-01-15T00:00:00+00:00"),
15557            None,
15558            None,
15559            false,
15560        )
15561        .unwrap();
15562        assert!(with_filter.is_empty());
15563
15564        // Without the filter, the same link IS returned.
15565        let without = kg_query(&conn, &src.id, 1, None, None, None, false).unwrap();
15566        assert_eq!(without.len(), 1);
15567        assert_eq!(without[0].target_id, t.id);
15568    }
15569
15570    #[test]
15571    fn kg_query_filters_by_allowed_agents() {
15572        let conn = test_db();
15573        let src = make_memory("s", "ns", Tier::Long, 5);
15574        let t1 = make_memory("t1", "ns", Tier::Long, 5);
15575        let t2 = make_memory("t2", "ns", Tier::Long, 5);
15576        let t3 = make_memory("t3", "ns", Tier::Long, 5);
15577        insert(&conn, &src).unwrap();
15578        insert(&conn, &t1).unwrap();
15579        insert(&conn, &t2).unwrap();
15580        insert(&conn, &t3).unwrap();
15581        insert_link_full(
15582            &conn,
15583            &src.id,
15584            &t1.id,
15585            "related_to",
15586            Some("2026-01-01T00:00:00+00:00"),
15587            None,
15588            Some("agent-a"),
15589        );
15590        insert_link_full(
15591            &conn,
15592            &src.id,
15593            &t2.id,
15594            "related_to",
15595            Some("2026-01-02T00:00:00+00:00"),
15596            None,
15597            Some("agent-b"),
15598        );
15599        // Link with NULL observed_by must be excluded once the agent
15600        // filter is active (`NULL IN (...)` is NULL/false in SQLite).
15601        insert_link_full(
15602            &conn,
15603            &src.id,
15604            &t3.id,
15605            "related_to",
15606            Some("2026-01-03T00:00:00+00:00"),
15607            None,
15608            None,
15609        );
15610
15611        let allow_a = vec!["agent-a".to_string()];
15612        let only_a = kg_query(&conn, &src.id, 1, None, Some(&allow_a), None, false).unwrap();
15613        assert_eq!(only_a.len(), 1);
15614        assert_eq!(only_a[0].target_id, t1.id);
15615
15616        let allow_both = vec!["agent-a".to_string(), "agent-b".to_string()];
15617        let both = kg_query(&conn, &src.id, 1, None, Some(&allow_both), None, false).unwrap();
15618        assert_eq!(both.len(), 2);
15619    }
15620
15621    #[test]
15622    fn kg_query_empty_allowed_agents_returns_zero_rows() {
15623        let conn = test_db();
15624        let src = make_memory("s", "ns", Tier::Long, 5);
15625        let t = make_memory("t", "ns", Tier::Long, 5);
15626        insert(&conn, &src).unwrap();
15627        insert(&conn, &t).unwrap();
15628        insert_link_full(
15629            &conn,
15630            &src.id,
15631            &t.id,
15632            "related_to",
15633            Some("2026-01-01T00:00:00+00:00"),
15634            None,
15635            Some("agent-a"),
15636        );
15637
15638        // Sanity: no filter returns the link.
15639        let unfiltered = kg_query(&conn, &src.id, 1, None, None, None, false).unwrap();
15640        assert_eq!(unfiltered.len(), 1);
15641
15642        // Empty allowlist == "no agents trusted" — must return zero
15643        // rows, not silently fall through to the unfiltered path.
15644        let empty: Vec<String> = Vec::new();
15645        let none = kg_query(&conn, &src.id, 1, None, Some(&empty), None, false).unwrap();
15646        assert!(none.is_empty());
15647    }
15648
15649    #[test]
15650    fn kg_query_rejects_max_depth_zero() {
15651        let conn = test_db();
15652        let src = make_memory("s", "ns", Tier::Long, 5);
15653        insert(&conn, &src).unwrap();
15654        let err = kg_query(&conn, &src.id, 0, None, None, None, false).unwrap_err();
15655        assert!(err.to_string().contains("max_depth"));
15656    }
15657
15658    #[test]
15659    fn kg_query_rejects_unsupported_max_depth() {
15660        // The recursive-CTE slice supports depth 1..=5; passing 6+ must
15661        // produce an explicit error so callers learn they hit the
15662        // ceiling rather than receiving a partial graph.
15663        let conn = test_db();
15664        let src = make_memory("s", "ns", Tier::Long, 5);
15665        insert(&conn, &src).unwrap();
15666        let err = kg_query(
15667            &conn,
15668            &src.id,
15669            KG_QUERY_MAX_SUPPORTED_DEPTH + 1,
15670            None,
15671            None,
15672            None,
15673            false,
15674        )
15675        .unwrap_err();
15676        let msg = err.to_string();
15677        assert!(msg.contains(&format!("max_depth={}", KG_QUERY_MAX_SUPPORTED_DEPTH + 1)));
15678        assert!(msg.contains(&format!("supported depth={KG_QUERY_MAX_SUPPORTED_DEPTH}")));
15679    }
15680
15681    #[test]
15682    fn kg_query_traverses_multiple_hops() {
15683        // src -> mid -> leaf. depth=2 must return both hops, with
15684        // depth/path reflecting the chain.
15685        let conn = test_db();
15686        let src = make_memory("src", "ns", Tier::Long, 5);
15687        let mid = make_memory("mid", "ns", Tier::Long, 5);
15688        let leaf = make_memory("leaf", "ns", Tier::Long, 5);
15689        insert(&conn, &src).unwrap();
15690        insert(&conn, &mid).unwrap();
15691        insert(&conn, &leaf).unwrap();
15692        insert_link_full(
15693            &conn,
15694            &src.id,
15695            &mid.id,
15696            "related_to",
15697            Some("2026-01-01T00:00:00+00:00"),
15698            None,
15699            Some("agent-x"),
15700        );
15701        insert_link_full(
15702            &conn,
15703            &mid.id,
15704            &leaf.id,
15705            "supersedes",
15706            Some("2026-01-02T00:00:00+00:00"),
15707            None,
15708            Some("agent-x"),
15709        );
15710
15711        // depth=1 sees only mid.
15712        let d1 = kg_query(&conn, &src.id, 1, None, None, None, false).unwrap();
15713        assert_eq!(d1.len(), 1);
15714        assert_eq!(d1[0].target_id, mid.id);
15715        assert_eq!(d1[0].depth, 1);
15716
15717        // depth=2 sees both, ordered shallow-first.
15718        let d2 = kg_query(&conn, &src.id, 2, None, None, None, false).unwrap();
15719        assert_eq!(d2.len(), 2);
15720        assert_eq!(d2[0].target_id, mid.id);
15721        assert_eq!(d2[0].depth, 1);
15722        assert_eq!(d2[0].path, format!("{}->{}", src.id, mid.id));
15723        assert_eq!(d2[1].target_id, leaf.id);
15724        assert_eq!(d2[1].depth, 2);
15725        assert_eq!(d2[1].relation, "supersedes");
15726        assert_eq!(d2[1].path, format!("{}->{}->{}", src.id, mid.id, leaf.id));
15727    }
15728
15729    #[test]
15730    fn kg_query_multi_hop_respects_valid_at_per_hop() {
15731        // src -> mid valid 2026-01..02; mid -> leaf valid 2026-04+.
15732        // At valid_at=2026-01-15 the second hop is not yet valid, so
15733        // only mid is returned; at valid_at=2026-04-15 the first hop is
15734        // closed, so both are filtered out.
15735        let conn = test_db();
15736        let src = make_memory("s", "ns", Tier::Long, 5);
15737        let mid = make_memory("m", "ns", Tier::Long, 5);
15738        let leaf = make_memory("l", "ns", Tier::Long, 5);
15739        insert(&conn, &src).unwrap();
15740        insert(&conn, &mid).unwrap();
15741        insert(&conn, &leaf).unwrap();
15742        insert_link_full(
15743            &conn,
15744            &src.id,
15745            &mid.id,
15746            "related_to",
15747            Some("2026-01-01T00:00:00+00:00"),
15748            Some("2026-02-01T00:00:00+00:00"),
15749            None,
15750        );
15751        insert_link_full(
15752            &conn,
15753            &mid.id,
15754            &leaf.id,
15755            "related_to",
15756            Some("2026-04-01T00:00:00+00:00"),
15757            None,
15758            None,
15759        );
15760
15761        let mid_only = kg_query(
15762            &conn,
15763            &src.id,
15764            3,
15765            Some("2026-01-15T00:00:00+00:00"),
15766            None,
15767            None,
15768            false,
15769        )
15770        .unwrap();
15771        assert_eq!(mid_only.len(), 1);
15772        assert_eq!(mid_only[0].target_id, mid.id);
15773
15774        let neither = kg_query(
15775            &conn,
15776            &src.id,
15777            3,
15778            Some("2026-04-15T00:00:00+00:00"),
15779            None,
15780            None,
15781            false,
15782        )
15783        .unwrap();
15784        assert!(neither.is_empty());
15785    }
15786
15787    #[test]
15788    fn kg_query_detects_cycles() {
15789        // a -> b -> c -> a forms a cycle. Even with max_depth=5, the
15790        // traversal must stop revisiting nodes that are already on the
15791        // path; the result lists each reachable node at most once.
15792        let conn = test_db();
15793        let a = make_memory("a", "ns", Tier::Long, 5);
15794        let b = make_memory("b", "ns", Tier::Long, 5);
15795        let c = make_memory("c", "ns", Tier::Long, 5);
15796        insert(&conn, &a).unwrap();
15797        insert(&conn, &b).unwrap();
15798        insert(&conn, &c).unwrap();
15799        insert_link_full(
15800            &conn,
15801            &a.id,
15802            &b.id,
15803            "related_to",
15804            Some("2026-01-01T00:00:00+00:00"),
15805            None,
15806            None,
15807        );
15808        insert_link_full(
15809            &conn,
15810            &b.id,
15811            &c.id,
15812            "related_to",
15813            Some("2026-01-02T00:00:00+00:00"),
15814            None,
15815            None,
15816        );
15817        insert_link_full(
15818            &conn,
15819            &c.id,
15820            &a.id,
15821            "related_to",
15822            Some("2026-01-03T00:00:00+00:00"),
15823            None,
15824            None,
15825        );
15826
15827        let nodes = kg_query(&conn, &a.id, 5, None, None, None, false).unwrap();
15828        // Expect b at depth 1 and c at depth 2; the cycle back to a is
15829        // pruned. (The c->a edge could in principle surface a again at
15830        // depth 3, but only if a is not on its own path — and the
15831        // anchor seeds path with `a->b`, so a IS on every descendant
15832        // path through b/c.)
15833        assert_eq!(nodes.len(), 2);
15834        assert_eq!(nodes[0].target_id, b.id);
15835        assert_eq!(nodes[0].depth, 1);
15836        assert_eq!(nodes[1].target_id, c.id);
15837        assert_eq!(nodes[1].depth, 2);
15838    }
15839
15840    #[test]
15841    fn kg_query_multi_hop_filters_by_allowed_agents_per_hop() {
15842        // src -> mid (agent-a), mid -> leaf (agent-b). With allow=[a]
15843        // only the first hop survives; with allow=[a,b] both surface.
15844        let conn = test_db();
15845        let src = make_memory("s", "ns", Tier::Long, 5);
15846        let mid = make_memory("m", "ns", Tier::Long, 5);
15847        let leaf = make_memory("l", "ns", Tier::Long, 5);
15848        insert(&conn, &src).unwrap();
15849        insert(&conn, &mid).unwrap();
15850        insert(&conn, &leaf).unwrap();
15851        insert_link_full(
15852            &conn,
15853            &src.id,
15854            &mid.id,
15855            "related_to",
15856            Some("2026-01-01T00:00:00+00:00"),
15857            None,
15858            Some("agent-a"),
15859        );
15860        insert_link_full(
15861            &conn,
15862            &mid.id,
15863            &leaf.id,
15864            "related_to",
15865            Some("2026-01-02T00:00:00+00:00"),
15866            None,
15867            Some("agent-b"),
15868        );
15869
15870        let allow_a = vec!["agent-a".to_string()];
15871        let only_first = kg_query(&conn, &src.id, 3, None, Some(&allow_a), None, false).unwrap();
15872        assert_eq!(only_first.len(), 1);
15873        assert_eq!(only_first[0].target_id, mid.id);
15874
15875        let allow_both = vec!["agent-a".to_string(), "agent-b".to_string()];
15876        let both = kg_query(&conn, &src.id, 3, None, Some(&allow_both), None, false).unwrap();
15877        assert_eq!(both.len(), 2);
15878        assert_eq!(both[1].target_id, leaf.id);
15879        assert_eq!(both[1].depth, 2);
15880    }
15881
15882    #[test]
15883    fn kg_query_limit_clamped_to_max() {
15884        let conn = test_db();
15885        let src = make_memory("s", "ns", Tier::Long, 5);
15886        insert(&conn, &src).unwrap();
15887        for i in 0..3 {
15888            let t = make_memory(&format!("t{i}"), "ns", Tier::Long, 5);
15889            insert(&conn, &t).unwrap();
15890            insert_link_full(
15891                &conn,
15892                &src.id,
15893                &t.id,
15894                "related_to",
15895                Some(&format!("2026-01-{:02}T00:00:00+00:00", i + 1)),
15896                None,
15897                None,
15898            );
15899        }
15900
15901        // limit=usize::MAX clamps to KG_QUERY_MAX_LIMIT (1000),
15902        // which is bigger than our 3 rows — all returned.
15903        let all = kg_query(&conn, &src.id, 1, None, None, Some(usize::MAX), false).unwrap();
15904        assert_eq!(all.len(), 3);
15905
15906        // limit=0 clamps up to 1.
15907        let one = kg_query(&conn, &src.id, 1, None, None, Some(0), false).unwrap();
15908        assert_eq!(one.len(), 1);
15909    }
15910
15911    #[test]
15912    fn kg_query_empty_for_unknown_source() {
15913        let conn = test_db();
15914        let nodes = kg_query(&conn, "no-such-id", 1, None, None, None, false).unwrap();
15915        assert!(nodes.is_empty());
15916    }
15917
15918    #[test]
15919    fn schema_v15_existing_links_get_valid_from_backfilled() {
15920        // Simulate a v14 database with one link, then re-run the
15921        // v15 migration and assert valid_from was backfilled to the
15922        // source memory's created_at. We do this by opening a fresh
15923        // db (which is at v15), inserting a link with NULL valid_from,
15924        // rolling schema_version back to 14, and re-opening to force
15925        // the v15 block to re-execute the backfill UPDATE.
15926        let path = std::env::temp_dir().join(format!(
15927            "ai_memory_v15_backfill_{}.db",
15928            uuid::Uuid::new_v4()
15929        ));
15930        {
15931            let conn = open(&path).unwrap();
15932            let src = make_memory("src", "test", Tier::Long, 5);
15933            let tgt = make_memory("tgt", "test", Tier::Long, 5);
15934            insert(&conn, &src).unwrap();
15935            insert(&conn, &tgt).unwrap();
15936            // Insert a link directly with NULL valid_from to mimic
15937            // pre-migration state.
15938            conn.execute(
15939                "INSERT INTO memory_links (source_id, target_id, relation, created_at, valid_from) \
15940                 VALUES (?1, ?2, 'related_to', ?3, NULL)",
15941                params![&src.id, &tgt.id, &chrono::Utc::now().to_rfc3339()],
15942            )
15943            .unwrap();
15944            // Roll schema back to v14 and re-run migrate via re-open.
15945            conn.execute("DELETE FROM schema_version", []).unwrap();
15946            conn.execute("INSERT INTO schema_version (version) VALUES (14)", [])
15947                .unwrap();
15948        }
15949
15950        let conn2 = open(&path).unwrap();
15951        let backfilled: Option<String> = conn2
15952            .query_row("SELECT valid_from FROM memory_links LIMIT 1", [], |r| {
15953                r.get(0)
15954            })
15955            .unwrap();
15956        assert!(
15957            backfilled.is_some(),
15958            "expected valid_from to be backfilled, got NULL"
15959        );
15960        let _ = std::fs::remove_file(&path);
15961    }
15962
15963    #[test]
15964    fn namespace_prefix_query_index_available() {
15965        let conn = test_db();
15966        // SQLite's default BINARY collation supports prefix-matching LIKE queries
15967        // with the idx_memories_namespace index. Verify the index exists and a
15968        // simple prefix query can execute (EXPLAIN QUERY PLAN output varies by
15969        // SQLite version and query planner heuristics, so we just check that the
15970        // query completes without error).
15971        let result: Option<String> = conn
15972            .query_row(
15973                "SELECT name FROM sqlite_master WHERE type='index' AND name='idx_memories_namespace'",
15974                [],
15975                |r| r.get(0),
15976            )
15977            .unwrap();
15978        assert_eq!(
15979            result,
15980            Some("idx_memories_namespace".to_string()),
15981            "idx_memories_namespace index should exist"
15982        );
15983
15984        // Execute a prefix LIKE query to ensure it compiles and runs
15985        let count: i64 = conn
15986            .query_row(
15987                "SELECT COUNT(*) FROM memories WHERE namespace LIKE 'test/%'",
15988                [],
15989                |r| r.get(0),
15990            )
15991            .unwrap();
15992        assert_eq!(count, 0);
15993    }
15994
15995    // -----------------------------------------------------------------
15996    // Doctor (P7) helper unit tests.
15997    // -----------------------------------------------------------------
15998
15999    #[test]
16000    fn doctor_dim_violations_post_p2_returns_zero_on_fresh_db() {
16001        // Post-P2 (schema v18+), a fresh DB has the `embedding_dim` column
16002        // but zero rows in violation. The helper must report Some(0), not
16003        // None. (Pre-P2 it returned None to indicate "column not yet
16004        // present"; that path is now obsolete.)
16005        let conn = test_db();
16006        let result = doctor_dim_violations(&conn).unwrap();
16007        assert_eq!(result, Some(0));
16008    }
16009
16010    #[test]
16011    fn doctor_oldest_pending_age_secs_empty_queue() {
16012        let conn = test_db();
16013        let age = doctor_oldest_pending_age_secs(&conn).unwrap();
16014        assert_eq!(age, None);
16015    }
16016
16017    #[test]
16018    fn doctor_oldest_pending_age_secs_reports_age() {
16019        let conn = test_db();
16020        let one_hour_ago = (Utc::now() - chrono::Duration::hours(1)).to_rfc3339();
16021        conn.execute(
16022            "INSERT INTO pending_actions (id, action_type, namespace, payload, requested_by, requested_at, status)
16023             VALUES ('p1', 'store', 'ns', '{}', 'agent', ?1, 'pending')",
16024            params![one_hour_ago],
16025        )
16026        .unwrap();
16027        let age = doctor_oldest_pending_age_secs(&conn).unwrap().unwrap();
16028        // Allow a generous margin — the test machine clock is the source of truth.
16029        assert!((3500..=3700).contains(&age), "expected ~3600s, got {age}");
16030    }
16031
16032    #[test]
16033    fn doctor_governance_coverage_with_namespace_meta() {
16034        let conn = test_db();
16035        // No namespaces — both counts zero.
16036        let (with, without) = doctor_governance_coverage(&conn).unwrap();
16037        assert_eq!((with, without), (0, 0));
16038    }
16039
16040    #[test]
16041    fn doctor_governance_depth_distribution_chains() {
16042        let conn = test_db();
16043        // Build a small inheritance tree: root -> a -> a/b -> a/b/c
16044        let now = Utc::now().to_rfc3339();
16045        conn.execute(
16046            "INSERT INTO namespace_meta (namespace, parent_namespace, updated_at) VALUES ('root', NULL, ?1)",
16047            params![now],
16048        ).unwrap();
16049        conn.execute(
16050            "INSERT INTO namespace_meta (namespace, parent_namespace, updated_at) VALUES ('a', 'root', ?1)",
16051            params![now],
16052        ).unwrap();
16053        conn.execute(
16054            "INSERT INTO namespace_meta (namespace, parent_namespace, updated_at) VALUES ('a/b', 'a', ?1)",
16055            params![now],
16056        ).unwrap();
16057        conn.execute(
16058            "INSERT INTO namespace_meta (namespace, parent_namespace, updated_at) VALUES ('a/b/c', 'a/b', ?1)",
16059            params![now],
16060        ).unwrap();
16061        let dist = doctor_governance_depth_distribution(&conn).unwrap();
16062        assert_eq!(dist[0], 1, "root has depth 0");
16063        assert_eq!(dist[1], 1, "a has depth 1");
16064        assert_eq!(dist[2], 1, "a/b has depth 2");
16065        assert_eq!(dist[3], 1, "a/b/c has depth 3");
16066    }
16067
16068    #[test]
16069    fn doctor_webhook_delivery_totals_empty() {
16070        let conn = test_db();
16071        let (dispatched, failed) = doctor_webhook_delivery_totals(&conn).unwrap();
16072        assert_eq!((dispatched, failed), (0, 0));
16073    }
16074
16075    #[test]
16076    fn doctor_max_sync_skew_secs_empty() {
16077        let conn = test_db();
16078        let skew = doctor_max_sync_skew_secs(&conn).unwrap();
16079        assert_eq!(skew, None);
16080    }
16081
16082    // ---- v0.6.4-009 — capability-expansion audit log ----
16083
16084    #[test]
16085    fn audit_log_record_and_list_grant_and_deny() {
16086        let conn = test_db();
16087        record_capability_expansion(&conn, Some("alice"), "graph", true, None);
16088        record_capability_expansion(&conn, Some("bob"), "power", false, None);
16089        let rows = list_capability_expansions(&conn, 50, None).unwrap();
16090        assert_eq!(rows.len(), 2);
16091        // Newest first.
16092        assert!(rows[0].timestamp >= rows[1].timestamp);
16093        let grant_row = rows
16094            .iter()
16095            .find(|r| r.agent_id.as_deref() == Some("alice"))
16096            .unwrap();
16097        assert!(grant_row.granted);
16098        assert_eq!(grant_row.requested_family.as_deref(), Some("graph"));
16099        let deny_row = rows
16100            .iter()
16101            .find(|r| r.agent_id.as_deref() == Some("bob"))
16102            .unwrap();
16103        assert!(!deny_row.granted);
16104        assert_eq!(deny_row.requested_family.as_deref(), Some("power"));
16105    }
16106
16107    #[test]
16108    fn audit_log_filter_by_agent() {
16109        let conn = test_db();
16110        record_capability_expansion(&conn, Some("alice"), "graph", true, None);
16111        record_capability_expansion(&conn, Some("bob"), "power", false, None);
16112        let alice = list_capability_expansions(&conn, 50, Some("alice")).unwrap();
16113        assert_eq!(alice.len(), 1);
16114        assert_eq!(alice[0].agent_id.as_deref(), Some("alice"));
16115        let none_match = list_capability_expansions(&conn, 50, Some("nobody")).unwrap();
16116        assert!(none_match.is_empty());
16117    }
16118
16119    #[test]
16120    fn audit_log_anonymous_caller() {
16121        let conn = test_db();
16122        record_capability_expansion(&conn, None, "core", true, None);
16123        let rows = list_capability_expansions(&conn, 50, None).unwrap();
16124        assert_eq!(rows.len(), 1);
16125        assert!(rows[0].agent_id.is_none());
16126    }
16127
16128    #[test]
16129    fn audit_log_migration_idempotent_on_re_open() {
16130        // Open the DB twice in succession; the audit_log CREATE TABLE
16131        // IF NOT EXISTS path must not error.
16132        let p = tempfile::NamedTempFile::new().unwrap();
16133        let p = p.path().to_path_buf();
16134        let _ = open(&p).unwrap();
16135        let conn = open(&p).unwrap();
16136        // And the indexes are present.
16137        let cnt: i64 = conn
16138            .query_row(
16139                "SELECT count(*) FROM sqlite_master WHERE name LIKE 'idx_audit_log_%'",
16140                [],
16141                |r| r.get(0),
16142            )
16143            .unwrap();
16144        assert_eq!(
16145            cnt, 3,
16146            "expected 3 audit_log indexes (agent_id, ts, event_type)"
16147        );
16148    }
16149
16150    // ---------------------------------------------------------------
16151    // v0.7.0 K2 — pending_actions timeout sweeper.
16152    //
16153    // Closes the v0.6.3.1 honest-Capabilities-v2 disclosure that
16154    // `default_timeout_seconds` was advertised but unused.
16155    // ---------------------------------------------------------------
16156
16157    /// Insert a `pending_actions` row with a back-dated `requested_at`
16158    /// so we can drive the sweeper without `tokio::time` games.
16159    fn insert_stale_pending(
16160        conn: &Connection,
16161        id: &str,
16162        namespace: &str,
16163        age_secs: i64,
16164        per_row_timeout: Option<i64>,
16165    ) {
16166        let requested_at = (chrono::Utc::now() - chrono::Duration::seconds(age_secs)).to_rfc3339();
16167        conn.execute(
16168            "INSERT INTO pending_actions
16169             (id, action_type, namespace, payload, requested_by, requested_at,
16170              status, default_timeout_seconds)
16171             VALUES (?1, 'store', ?2, '{}', 'tester', ?3, 'pending', ?4)",
16172            params![id, namespace, requested_at, per_row_timeout],
16173        )
16174        .unwrap();
16175    }
16176
16177    #[test]
16178    fn sweep_marks_stale_pending_row_expired() {
16179        let conn = test_db();
16180        // 2-hour-old pending row; global default is 1 hour → must expire.
16181        insert_stale_pending(&conn, "stale-1", "ns/a", 7_200, None);
16182
16183        let expired = sweep_pending_action_timeouts(&conn, crate::SECS_PER_HOUR).unwrap();
16184        assert_eq!(expired.len(), 1, "expected exactly one expiry");
16185        assert_eq!(expired[0], ("stale-1".to_string(), "ns/a".to_string()));
16186
16187        // Row is now status='expired' with expired_at populated.
16188        let (status, expired_at): (String, Option<String>) = conn
16189            .query_row(
16190                "SELECT status, expired_at FROM pending_actions WHERE id = ?1",
16191                params!["stale-1"],
16192                |r| Ok((r.get(0)?, r.get(1)?)),
16193            )
16194            .unwrap();
16195        assert_eq!(status, "expired");
16196        assert!(
16197            expired_at.is_some(),
16198            "expired_at must be stamped by the sweeper"
16199        );
16200    }
16201
16202    #[test]
16203    fn sweep_leaves_fresh_pending_alone() {
16204        let conn = test_db();
16205        // 30-second-old pending row; global default is 1 hour → still pending.
16206        insert_stale_pending(&conn, "fresh-1", "ns/a", 30, None);
16207
16208        let expired = sweep_pending_action_timeouts(&conn, crate::SECS_PER_HOUR).unwrap();
16209        assert!(expired.is_empty());
16210        let status: String = conn
16211            .query_row(
16212                "SELECT status FROM pending_actions WHERE id = ?1",
16213                params!["fresh-1"],
16214                |r| r.get(0),
16215            )
16216            .unwrap();
16217        assert_eq!(status, "pending");
16218    }
16219
16220    #[test]
16221    fn sweep_per_row_timeout_overrides_global_default() {
16222        let conn = test_db();
16223        // 5-minute-old row; per-row TTL = 60s → MUST expire even
16224        // though the global default (1h) would say "still fresh".
16225        insert_stale_pending(&conn, "short-ttl", "ns/a", 300, Some(60));
16226        // Same age, no per-row override → still pending under the
16227        // 1h global default.
16228        insert_stale_pending(&conn, "no-override", "ns/a", 300, None);
16229
16230        let expired = sweep_pending_action_timeouts(&conn, crate::SECS_PER_HOUR).unwrap();
16231        let ids: Vec<&String> = expired.iter().map(|(id, _)| id).collect();
16232        assert_eq!(ids, vec![&"short-ttl".to_string()]);
16233    }
16234
16235    #[test]
16236    fn sweep_skips_already_decided_rows() {
16237        let conn = test_db();
16238        // Pre-insert an OLD row already approved — must not touch it.
16239        let approved_at = (chrono::Utc::now() - chrono::Duration::seconds(7_200)).to_rfc3339();
16240        conn.execute(
16241            "INSERT INTO pending_actions
16242             (id, action_type, namespace, payload, requested_by, requested_at,
16243              status, decided_by, decided_at)
16244             VALUES ('approved-old', 'store', 'ns/a', '{}', 'alice', ?1,
16245                     'approved', 'bob', ?1)",
16246            params![approved_at],
16247        )
16248        .unwrap();
16249
16250        let expired = sweep_pending_action_timeouts(&conn, 60).unwrap();
16251        assert!(expired.is_empty(), "non-pending rows must be ignored");
16252        let status: String = conn
16253            .query_row(
16254                "SELECT status FROM pending_actions WHERE id = 'approved-old'",
16255                [],
16256                |r| r.get(0),
16257            )
16258            .unwrap();
16259        assert_eq!(status, "approved", "decided row status preserved");
16260    }
16261
16262    #[test]
16263    fn sweep_disabled_when_global_default_non_positive() {
16264        let conn = test_db();
16265        // Stale row with no per-row TTL.
16266        insert_stale_pending(&conn, "stale-2", "ns/a", 7_200, None);
16267        // Operator escape hatch: 0 (or negative) global default
16268        // disables the sweep entirely.
16269        let expired = sweep_pending_action_timeouts(&conn, 0).unwrap();
16270        assert!(expired.is_empty());
16271        let expired_neg = sweep_pending_action_timeouts(&conn, -1).unwrap();
16272        assert!(expired_neg.is_empty());
16273    }
16274
16275    #[test]
16276    fn sweep_empty_queue_is_silent_noop() {
16277        let conn = test_db();
16278        let expired = sweep_pending_action_timeouts(&conn, 60).unwrap();
16279        assert!(expired.is_empty());
16280    }
16281
16282    // -----------------------------------------------------------------
16283    // v0.7.0 fix campaign R1-M2 / R1-M3 / R1-M4 (#690)
16284    //
16285    // Substrate-side defense-in-depth: SQL CHECK triggers + typed
16286    // `MemoryLinkRelation` + `ConflictMode`-aware insert primitive.
16287    // The tests below pin the contract the brief calls out by name so
16288    // a future regression surfaces here, not in a downstream consumer.
16289    // -----------------------------------------------------------------
16290
16291    /// R1-M2 — direct-SQL INSERT with a tier outside the closed set is
16292    /// refused by the trigger.
16293    #[test]
16294    fn test_memories_tier_check_rejects_invalid() {
16295        let conn = test_db();
16296        let now = chrono::Utc::now().to_rfc3339();
16297        let err = conn.execute(
16298            "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, confidence, source, access_count, created_at, updated_at, metadata) \
16299             VALUES (?1, 'long-term', 'ns-ck', 'bad-tier', 'x', '[]', 5, 1.0, 'test', 0, ?2, ?2, '{}')",
16300            params!["m-bad-tier", now],
16301        ).unwrap_err();
16302        let msg = err.to_string();
16303        assert!(
16304            msg.contains("memories.tier must be one of"),
16305            "expected R1-M2 tier check, got: {msg}"
16306        );
16307    }
16308
16309    /// R1-M2 — direct-SQL INSERT with priority out of `[1, 10]` is
16310    /// refused by the trigger.
16311    #[test]
16312    fn test_memories_priority_check_rejects_oob() {
16313        let conn = test_db();
16314        let now = chrono::Utc::now().to_rfc3339();
16315        let err = conn.execute(
16316            "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, confidence, source, access_count, created_at, updated_at, metadata) \
16317             VALUES (?1, 'mid', 'ns-ck', 'bad-prio', 'x', '[]', 11, 1.0, 'test', 0, ?2, ?2, '{}')",
16318            params!["m-bad-prio", now],
16319        ).unwrap_err();
16320        assert!(
16321            err.to_string()
16322                .contains("memories.priority must be between 1 and 10"),
16323            "expected R1-M2 priority check, got: {err}"
16324        );
16325        // Lower bound mirror: priority = 0 is also out-of-band.
16326        let err_low = conn.execute(
16327            "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, confidence, source, access_count, created_at, updated_at, metadata) \
16328             VALUES (?1, 'mid', 'ns-ck', 'bad-prio-low', 'x', '[]', 0, 1.0, 'test', 0, ?2, ?2, '{}')",
16329            params!["m-bad-prio-low", now],
16330        ).unwrap_err();
16331        assert!(err_low.to_string().contains("priority"));
16332    }
16333
16334    /// R1-M2 — confidence outside `[0.0, 1.0]` is refused by the trigger.
16335    #[test]
16336    fn test_memories_confidence_check_rejects_oob() {
16337        let conn = test_db();
16338        let now = chrono::Utc::now().to_rfc3339();
16339        let err = conn.execute(
16340            "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, confidence, source, access_count, created_at, updated_at, metadata) \
16341             VALUES (?1, 'mid', 'ns-ck', 'bad-conf', 'x', '[]', 5, 1.5, 'test', 0, ?2, ?2, '{}')",
16342            params!["m-bad-conf", now],
16343        ).unwrap_err();
16344        assert!(
16345            err.to_string().contains("memories.confidence"),
16346            "expected R1-M2 confidence check, got: {err}"
16347        );
16348    }
16349
16350    /// R1-M2 — direct-SQL link INSERT with an off-closed-set relation
16351    /// is refused by the trigger.
16352    #[test]
16353    fn test_memory_links_relation_check_rejects_unknown() {
16354        let conn = test_db();
16355        let src = insert(&conn, &make_memory("rel-src", "ns-ck", Tier::Mid, 5)).unwrap();
16356        let tgt = insert(&conn, &make_memory("rel-tgt", "ns-ck", Tier::Mid, 5)).unwrap();
16357        let now = chrono::Utc::now().to_rfc3339();
16358        let err = conn
16359            .execute(
16360                "INSERT INTO memory_links (source_id, target_id, relation, created_at, valid_from) \
16361             VALUES (?1, ?2, 'follows', ?3, ?3)",
16362                params![src, tgt, now],
16363            )
16364            .unwrap_err();
16365        assert!(
16366            err.to_string()
16367                .contains("memory_links.relation must be one of"),
16368            "expected R1-M2 relation check, got: {err}"
16369        );
16370    }
16371
16372    /// R1-M2 — direct-SQL link INSERT with an unknown `attest_level` is
16373    /// refused; legacy `NULL` stays allowed.
16374    #[test]
16375    fn test_memory_links_attest_level_check_rejects_unknown() {
16376        let conn = test_db();
16377        let src = insert(&conn, &make_memory("att-src", "ns-ck", Tier::Mid, 5)).unwrap();
16378        let tgt = insert(&conn, &make_memory("att-tgt", "ns-ck", Tier::Mid, 5)).unwrap();
16379        let now = chrono::Utc::now().to_rfc3339();
16380        // NULL attest_level OK (legacy).
16381        conn.execute(
16382            "INSERT INTO memory_links (source_id, target_id, relation, created_at, valid_from, attest_level) \
16383             VALUES (?1, ?2, 'related_to', ?3, ?3, NULL)",
16384            params![src, tgt, now],
16385        )
16386        .expect("NULL attest_level must remain accepted");
16387        // Bogus attest_level refused.
16388        let err = conn.execute(
16389            "INSERT INTO memory_links (source_id, target_id, relation, created_at, valid_from, attest_level) \
16390             VALUES (?1, ?2, 'supersedes', ?3, ?3, 'totally-fake')",
16391            params![src, tgt, now],
16392        ).unwrap_err();
16393        assert!(err.to_string().contains("memory_links.attest_level"));
16394    }
16395
16396    /// R1-M3 — `insert_with_conflict(.., ConflictMode::Error)` refuses
16397    /// the second write when `(title, namespace)` collides.
16398    #[test]
16399    fn test_insert_with_conflict_error_mode_refuses_duplicate() {
16400        let conn = test_db();
16401        let m1 = make_memory("dup-title", "ns-conflict", Tier::Mid, 5);
16402        let _id = insert_with_conflict(&conn, &m1, ConflictMode::Error).unwrap();
16403        let mut m2 = make_memory("dup-title", "ns-conflict", Tier::Mid, 7);
16404        m2.content = "second writer should be refused".to_string();
16405        let err = insert_with_conflict(&conn, &m2, ConflictMode::Error).unwrap_err();
16406        let conflict = err.downcast_ref::<ConflictError>();
16407        assert!(
16408            conflict.is_some(),
16409            "expected typed ConflictError, got: {err}"
16410        );
16411        // First writer's content is preserved (no silent overwrite).
16412        let row = find_by_title_namespace(&conn, "dup-title", "ns-conflict")
16413            .unwrap()
16414            .expect("first row still present");
16415        let fetched = get(&conn, &row).unwrap().unwrap();
16416        assert_ne!(
16417            fetched.content, "second writer should be refused",
16418            "Error mode must not mutate the existing row"
16419        );
16420    }
16421
16422    /// R1-M3 — `insert_with_conflict(.., ConflictMode::Merge)` is
16423    /// byte-equivalent to the legacy `insert()` silent-merge path.
16424    #[test]
16425    fn test_insert_with_conflict_merge_mode_updates() {
16426        let conn = test_db();
16427        let m1 = make_memory("merge-title", "ns-merge", Tier::Mid, 5);
16428        let id_a = insert_with_conflict(&conn, &m1, ConflictMode::Merge).unwrap();
16429        let mut m2 = make_memory("merge-title", "ns-merge", Tier::Mid, 7);
16430        m2.content = "merged-content".to_string();
16431        let id_b = insert_with_conflict(&conn, &m2, ConflictMode::Merge).unwrap();
16432        assert_eq!(id_a, id_b, "merge mode returns the existing row id");
16433        let fetched = get(&conn, &id_a).unwrap().unwrap();
16434        assert_eq!(fetched.content, "merged-content");
16435    }
16436
16437    /// R1-M3 — `insert_with_conflict(.., ConflictMode::Version)` keeps
16438    /// both rows; the second writer lands under a versioned title.
16439    #[test]
16440    fn test_insert_with_conflict_version_keeps_both() {
16441        let conn = test_db();
16442        let m1 = make_memory("versioned", "ns-v", Tier::Mid, 5);
16443        let id_a = insert_with_conflict(&conn, &m1, ConflictMode::Version).unwrap();
16444        let mut m2 = make_memory("versioned", "ns-v", Tier::Mid, 5);
16445        m2.content = "second version content".to_string();
16446        let id_b = insert_with_conflict(&conn, &m2, ConflictMode::Version).unwrap();
16447        assert_ne!(id_a, id_b, "version mode produces a distinct row");
16448        // Both titles are reachable: original + `(2)` suffix.
16449        let original_id = find_by_title_namespace(&conn, "versioned", "ns-v")
16450            .unwrap()
16451            .expect("original row");
16452        let versioned_id = find_by_title_namespace(&conn, "versioned (2)", "ns-v")
16453            .unwrap()
16454            .expect("versioned row");
16455        assert_eq!(original_id, id_a);
16456        assert_eq!(versioned_id, id_b);
16457    }
16458
16459    /// R1-M4 — `MemoryLink.relation` round-trips through the typed
16460    /// closed set across `create_link` + `get_links`.
16461    #[test]
16462    fn test_memory_link_relation_round_trips() {
16463        let conn = test_db();
16464        let src = insert(&conn, &make_memory("rt-src", "ns-rt", Tier::Mid, 5)).unwrap();
16465        let tgt = insert(&conn, &make_memory("rt-tgt", "ns-rt", Tier::Mid, 5)).unwrap();
16466        create_link(&conn, &src, &tgt, "supersedes").unwrap();
16467        let links = get_links(&conn, &src).unwrap();
16468        assert_eq!(links.len(), 1);
16469        assert_eq!(
16470            links[0].relation,
16471            crate::models::MemoryLinkRelation::Supersedes,
16472            "relation must round-trip as the typed Supersedes variant"
16473        );
16474        // Cross-check serde wire shape: enum → `"supersedes"` string.
16475        let wire = serde_json::to_string(&links[0]).unwrap();
16476        assert!(
16477            wire.contains("\"relation\":\"supersedes\""),
16478            "serde wire form must be the canonical lowercase snake_case \
16479             string; got {wire}"
16480        );
16481    }
16482
16483    // ---------------------------------------------------------------
16484    // v0.7.0 S5 verdict — approval exec fixes:
16485    //   S5-H1 reflect arm, S5-H4 agent_id verify,
16486    //   S5-M1/M2 signed_events emit on approve/deny/timeout.
16487    // ---------------------------------------------------------------
16488
16489    /// Helper — count signed_events rows matching `event_type`. Used by
16490    /// the audit-emit tests below so they don't have to scrape the table
16491    /// in raw SQL each time.
16492    fn count_signed_events(conn: &Connection, event_type: &str) -> usize {
16493        crate::signed_events::list_signed_events(conn, None, 1000, 0)
16494            .unwrap_or_default()
16495            .into_iter()
16496            .filter(|e| e.event_type == event_type)
16497            .count()
16498    }
16499
16500    /// S5-H1 — an approved `reflect` pending action MUST execute through
16501    /// `db::reflect` and persist a new reflection memory whose
16502    /// `metadata.reflection_metadata.sources` matches the queued
16503    /// `source_ids`. Pre-fix this would error with
16504    /// "unknown action_type: reflect" and the queued row would never land.
16505    #[test]
16506    fn test_execute_reflect_arm_succeeds_round_trip() {
16507        let conn = test_db();
16508        // Seed two source memories the reflection will reflect on.
16509        let src1 = make_memory("src-1", "ns/reflect", Tier::Mid, 5);
16510        let src2 = make_memory("src-2", "ns/reflect", Tier::Mid, 5);
16511        let src1_id = insert(&conn, &src1).unwrap();
16512        let src2_id = insert(&conn, &src2).unwrap();
16513
16514        // Queue an approved reflect pending action with the L1-8 payload shape.
16515        let payload = serde_json::json!({
16516            "source_ids": [src1_id, src2_id],
16517            "title": "reflective synthesis",
16518            "content": "deep observation across sources",
16519            "namespace": "ns/reflect",
16520            "tier": Tier::Mid.as_str(),
16521            "tags": ["reflective"],
16522            "priority": 6,
16523            "confidence": 0.9,
16524            "agent_id": "alice",
16525            "proposed_depth": 1,
16526        });
16527        let pending_id = queue_pending_action(
16528            &conn,
16529            crate::models::GovernedAction::Reflect,
16530            "ns/reflect",
16531            None,
16532            "alice",
16533            &payload,
16534        )
16535        .unwrap();
16536        // Approve so execute_pending_action accepts the row.
16537        assert!(decide_pending_action(&conn, &pending_id, true, "approver").unwrap());
16538
16539        let result = execute_pending_action(&conn, &pending_id).expect("reflect execute ok");
16540        let new_id = result.expect("reflect must return the new reflection id");
16541        let mem = get(&conn, &new_id)
16542            .unwrap()
16543            .expect("reflection memory landed");
16544        assert_eq!(mem.title, "reflective synthesis");
16545        assert_eq!(mem.namespace, "ns/reflect");
16546        assert_eq!(mem.reflection_depth, 1, "depth = max(source depths) + 1");
16547        // The substrate stamps `metadata.agent_id` from the input.agent_id field.
16548        assert_eq!(mem.metadata["agent_id"], "alice");
16549    }
16550
16551    /// S5-H4 — a queued payload whose `agent_id` does NOT match
16552    /// `pa.requested_by` is approver-on-behalf laundering. Execute MUST
16553    /// refuse, MUST NOT insert the memory, AND MUST emit a
16554    /// `pending_action.refused_agent_id_mismatch` audit row so the
16555    /// attempt is captured by the signed_events chain.
16556    #[test]
16557    fn test_execute_refuses_payload_agent_id_mismatch() {
16558        let conn = test_db();
16559        let mut mem = make_memory("laundered store", "ns/launder", Tier::Mid, 5);
16560        // Requester is "alice", but the payload claims agent_id "bob" —
16561        // pre-fix this would land a memory attributed to "bob" even
16562        // though the original requester was "alice".
16563        mem.metadata = serde_json::json!({"agent_id": "bob"});
16564        let payload = serde_json::to_value(&mem).unwrap();
16565        let pending_id = queue_pending_action(
16566            &conn,
16567            crate::models::GovernedAction::Store,
16568            "ns/launder",
16569            None,
16570            "alice",
16571            &payload,
16572        )
16573        .unwrap();
16574        assert!(decide_pending_action(&conn, &pending_id, true, "approver").unwrap());
16575
16576        let err = execute_pending_action(&conn, &pending_id)
16577            .expect_err("execute MUST refuse laundered agent_id");
16578        let msg = format!("{err}");
16579        assert!(
16580            msg.contains("approver-on-behalf laundering refused"),
16581            "expected laundering-refusal message, got: {msg}"
16582        );
16583        // No memory landed.
16584        let count: i64 = conn
16585            .query_row(
16586                "SELECT COUNT(*) FROM memories WHERE namespace = 'ns/launder'",
16587                [],
16588                |r| r.get(0),
16589            )
16590            .unwrap();
16591        assert_eq!(count, 0, "refused execute must not insert a memory");
16592        // Audit row captured.
16593        assert_eq!(
16594            count_signed_events(&conn, "pending_action.refused_agent_id_mismatch"),
16595            1,
16596            "refusal must append a signed_events row"
16597        );
16598        // No approve audit emitted on refused path.
16599        assert_eq!(count_signed_events(&conn, "pending_action.approved"), 0);
16600    }
16601
16602    /// S5-M1 — a successful approve+execute MUST append a
16603    /// `pending_action.approved` row to `signed_events`. Pre-fix the
16604    /// audit chain had no record of the approval transition.
16605    #[test]
16606    fn test_approve_emits_signed_event() {
16607        let conn = test_db();
16608        let mem = make_memory("approved store", "ns/approve", Tier::Mid, 5);
16609        let payload = serde_json::to_value(&mem).unwrap();
16610        let pending_id = queue_pending_action(
16611            &conn,
16612            crate::models::GovernedAction::Store,
16613            "ns/approve",
16614            None,
16615            mem.metadata["agent_id"].as_str().unwrap_or("alice"),
16616            &payload,
16617        )
16618        .unwrap();
16619        // Requester field is the same as the payload metadata.agent_id
16620        // (default fixture leaves it as `{}`), so to keep the verifier
16621        // happy we re-fetch and assert the queue happened. Then approve.
16622        assert!(decide_pending_action(&conn, &pending_id, true, "approver").unwrap());
16623        let _ = execute_pending_action(&conn, &pending_id).expect("execute ok");
16624        assert_eq!(
16625            count_signed_events(&conn, "pending_action.approved"),
16626            1,
16627            "approve+execute must append one audit row"
16628        );
16629        // Deny / timeout MUST NOT have been emitted.
16630        assert_eq!(count_signed_events(&conn, "pending_action.denied"), 0);
16631        assert_eq!(count_signed_events(&conn, "pending_action.timed_out"), 0);
16632    }
16633
16634    /// S5-M2 — a deny transition (decide_pending_action with approve=false)
16635    /// MUST append a `pending_action.denied` row to `signed_events`.
16636    /// Pre-fix the deny path was silent in the audit chain.
16637    #[test]
16638    fn test_deny_emits_signed_event() {
16639        let conn = test_db();
16640        let payload = serde_json::json!({"title": "to-deny", "content": "x"});
16641        let pending_id = queue_pending_action(
16642            &conn,
16643            crate::models::GovernedAction::Store,
16644            "ns/deny",
16645            None,
16646            "alice",
16647            &payload,
16648        )
16649        .unwrap();
16650        let transitioned = decide_pending_action(&conn, &pending_id, false, "approver").unwrap();
16651        assert!(transitioned, "deny transition must succeed on pending row");
16652        assert_eq!(
16653            count_signed_events(&conn, "pending_action.denied"),
16654            1,
16655            "deny must append one audit row"
16656        );
16657        // Approve / timeout MUST NOT have been emitted.
16658        assert_eq!(count_signed_events(&conn, "pending_action.approved"), 0);
16659        assert_eq!(count_signed_events(&conn, "pending_action.timed_out"), 0);
16660    }
16661
16662    /// S5-M2 — the timeout sweeper MUST append one
16663    /// `pending_action.timed_out` row per expired pending row.
16664    /// Pre-fix the sweep transitioned rows silently, leaving the audit
16665    /// chain blind to the auto-expiration.
16666    #[test]
16667    fn test_timeout_sweeper_emits_signed_event() {
16668        let conn = test_db();
16669        // Two stale pending rows + one fresh row. Only the stale rows
16670        // expire under a 1-hour global default; the fresh row stays.
16671        insert_stale_pending(&conn, "stale-a", "ns/x", 7_200, None);
16672        insert_stale_pending(&conn, "stale-b", "ns/y", 7_200, None);
16673        insert_stale_pending(&conn, "fresh-c", "ns/z", 30, None);
16674
16675        let expired = sweep_pending_action_timeouts(&conn, crate::SECS_PER_HOUR).unwrap();
16676        assert_eq!(expired.len(), 2, "two stale rows must expire");
16677        assert_eq!(
16678            count_signed_events(&conn, "pending_action.timed_out"),
16679            2,
16680            "one audit row per expired pending row"
16681        );
16682        // The fresh row is still pending; no audit emit for it.
16683        let fresh_status: String = conn
16684            .query_row(
16685                "SELECT status FROM pending_actions WHERE id = 'fresh-c'",
16686                [],
16687                |r| r.get(0),
16688            )
16689            .unwrap();
16690        assert_eq!(fresh_status, "pending");
16691    }
16692
16693    // -----------------------------------------------------------------
16694    // v0.7.0 S4-INFO2 — `memory_link.created` audit emit
16695    // -----------------------------------------------------------------
16696
16697    /// Count the number of `signed_events` rows for a given event_type
16698    /// and substring match on the row's `payload_hash`-bearing row.
16699    /// Used by the audit emit tests below.
16700    fn count_signed_events_of_type(conn: &Connection, event_type: &str) -> i64 {
16701        conn.query_row(
16702            "SELECT COUNT(*) FROM signed_events WHERE event_type = ?1",
16703            params![event_type],
16704            |r| r.get(0),
16705        )
16706        .unwrap()
16707    }
16708
16709    #[test]
16710    fn test_memory_link_created_emits_signed_event_unsigned_path() {
16711        // S4-INFO2 — every successful link create appends one
16712        // `memory_link.created` row, even on the unsigned path. The
16713        // emit's `attest_level` and `signature` columns must mirror
16714        // the source row.
16715        let conn = test_db();
16716        let src = make_memory("s4info2-src-u", "test", Tier::Long, 5);
16717        let tgt = make_memory("s4info2-tgt-u", "test", Tier::Long, 5);
16718        insert(&conn, &src).unwrap();
16719        insert(&conn, &tgt).unwrap();
16720
16721        let before = count_signed_events_of_type(&conn, "memory_link.created");
16722        create_link_signed(&conn, &src.id, &tgt.id, "related_to", None).unwrap();
16723        let after = count_signed_events_of_type(&conn, "memory_link.created");
16724        assert_eq!(after, before + 1, "unsigned create must emit one audit row");
16725
16726        // Inspect the emitted row's signing-surface columns.
16727        let (attest, sig): (String, Option<Vec<u8>>) = conn
16728            .query_row(
16729                "SELECT attest_level, signature FROM signed_events \
16730                 WHERE event_type = 'memory_link.created' \
16731                 ORDER BY timestamp DESC LIMIT 1",
16732                [],
16733                |r| Ok((r.get(0)?, r.get(1)?)),
16734            )
16735            .unwrap();
16736        assert_eq!(attest, "unsigned");
16737        assert!(sig.is_none(), "unsigned create must emit NULL signature");
16738    }
16739
16740    #[test]
16741    fn test_memory_link_created_emits_signed_event_signed_path() {
16742        // S4-INFO2 — signed path: the emitted row's payload_hash
16743        // must match SHA-256 over the canonical CBOR that the H2
16744        // signer just committed to, AND the `signature` must equal
16745        // the link row's signature byte-for-byte (auditor cross-check).
16746        use crate::identity::{keypair, sign as link_sign};
16747
16748        let conn = test_db();
16749        let src = make_memory("s4info2-src-s", "test", Tier::Long, 5);
16750        let tgt = make_memory("s4info2-tgt-s", "test", Tier::Long, 5);
16751        insert(&conn, &src).unwrap();
16752        insert(&conn, &tgt).unwrap();
16753
16754        let kp = keypair::generate("alice").unwrap();
16755        create_link_signed(&conn, &src.id, &tgt.id, "supersedes", Some(&kp)).unwrap();
16756
16757        // Read back the link row's signature + valid_from so we can
16758        // re-derive the canonical CBOR the audit row should commit to.
16759        let (link_sig, valid_from): (Vec<u8>, String) = conn
16760            .query_row(
16761                "SELECT signature, valid_from FROM memory_links \
16762                 WHERE source_id = ?1 AND target_id = ?2",
16763                params![&src.id, &tgt.id],
16764                |r| Ok((r.get::<_, Vec<u8>>(0)?, r.get::<_, String>(1)?)),
16765            )
16766            .unwrap();
16767        let signable = link_sign::SignableLink {
16768            src_id: &src.id,
16769            dst_id: &tgt.id,
16770            relation: "supersedes",
16771            observed_by: Some(kp.agent_id.as_str()),
16772            valid_from: Some(valid_from.as_str()),
16773            valid_until: None,
16774        };
16775        let expected_hash = crate::signed_events::payload_hash(
16776            &link_sign::canonical_cbor(&signable).expect("cbor"),
16777        );
16778
16779        let (agent, attest, sig, payload): (String, String, Option<Vec<u8>>, Vec<u8>) = conn
16780            .query_row(
16781                "SELECT agent_id, attest_level, signature, payload_hash \
16782                 FROM signed_events \
16783                 WHERE event_type = 'memory_link.created' \
16784                 ORDER BY timestamp DESC LIMIT 1",
16785                [],
16786                |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?)),
16787            )
16788            .unwrap();
16789        assert_eq!(agent, "alice");
16790        assert_eq!(attest, "self_signed");
16791        assert_eq!(
16792            sig.as_deref(),
16793            Some(link_sig.as_slice()),
16794            "audit row signature must mirror memory_links.signature byte-for-byte"
16795        );
16796        assert_eq!(
16797            payload, expected_hash,
16798            "audit row payload_hash must SHA-256 the canonical CBOR H2 signed over"
16799        );
16800    }
16801
16802    #[test]
16803    fn test_memory_link_created_emit_is_idempotent_on_replay() {
16804        // INSERT OR IGNORE collapses duplicate (src,dst,relation)
16805        // writes to a no-op at the link layer. The audit emit must
16806        // NOT fire on the replay — otherwise an idempotent retry by
16807        // a federation peer would inflate the audit row count for
16808        // the same logical event.
16809        let conn = test_db();
16810        let src = make_memory("s4info2-src-d", "test", Tier::Long, 5);
16811        let tgt = make_memory("s4info2-tgt-d", "test", Tier::Long, 5);
16812        insert(&conn, &src).unwrap();
16813        insert(&conn, &tgt).unwrap();
16814
16815        create_link_signed(&conn, &src.id, &tgt.id, "related_to", None).unwrap();
16816        let after_first = count_signed_events_of_type(&conn, "memory_link.created");
16817        create_link_signed(&conn, &src.id, &tgt.id, "related_to", None).unwrap();
16818        let after_second = count_signed_events_of_type(&conn, "memory_link.created");
16819        assert_eq!(
16820            after_second, after_first,
16821            "duplicate (src,dst,relation) replay must not emit a second audit row"
16822        );
16823    }
16824
16825    #[test]
16826    fn test_create_link_inbound_emits_signed_event() {
16827        // The federation-replicated path must emit too — the audit
16828        // ledger reflects every link visible locally.
16829        let conn = test_db();
16830        let src = make_memory("s4info2-in-src", "test", Tier::Long, 5);
16831        let tgt = make_memory("s4info2-in-tgt", "test", Tier::Long, 5);
16832        insert(&conn, &src).unwrap();
16833        insert(&conn, &tgt).unwrap();
16834
16835        let now = chrono::Utc::now().to_rfc3339();
16836        let link = MemoryLink {
16837            source_id: src.id.clone(),
16838            target_id: tgt.id.clone(),
16839            relation: crate::models::MemoryLinkRelation::RelatedTo,
16840            created_at: now.clone(),
16841            signature: None,
16842            observed_by: Some("peer-bob".to_string()),
16843            valid_from: Some(now.clone()),
16844            valid_until: None,
16845            attest_level: None,
16846        };
16847        let before = count_signed_events_of_type(&conn, "memory_link.created");
16848        create_link_inbound(&conn, &link, "unsigned").unwrap();
16849        let after = count_signed_events_of_type(&conn, "memory_link.created");
16850        assert_eq!(after, before + 1);
16851
16852        let agent: String = conn
16853            .query_row(
16854                "SELECT agent_id FROM signed_events \
16855                 WHERE event_type = 'memory_link.created' \
16856                 ORDER BY timestamp DESC LIMIT 1",
16857                [],
16858                |r| r.get(0),
16859            )
16860            .unwrap();
16861        assert_eq!(
16862            agent, "peer-bob",
16863            "inbound emit must record the peer's claimed observed_by"
16864        );
16865    }
16866
16867    #[test]
16868    fn test_create_link_signed_emit_failure_does_not_roll_back() {
16869        // Drop the signed_events table to simulate a substrate
16870        // problem (schema drift, disk error mapped to a SQL
16871        // failure). The link create must still commit and the
16872        // function must return Ok — the audit emit is best-effort.
16873        let conn = test_db();
16874        let src = make_memory("s4info2-fail-src", "test", Tier::Long, 5);
16875        let tgt = make_memory("s4info2-fail-tgt", "test", Tier::Long, 5);
16876        insert(&conn, &src).unwrap();
16877        insert(&conn, &tgt).unwrap();
16878
16879        // Knock out the audit substrate.
16880        conn.execute("DROP TABLE signed_events", []).unwrap();
16881
16882        let result = create_link_signed(&conn, &src.id, &tgt.id, "related_to", None);
16883        assert!(
16884            result.is_ok(),
16885            "audit emit failure must not crater the link create: {result:?}"
16886        );
16887
16888        // The link itself must have persisted.
16889        let count: i64 = conn
16890            .query_row(
16891                "SELECT COUNT(*) FROM memory_links \
16892                 WHERE source_id = ?1 AND target_id = ?2",
16893                params![&src.id, &tgt.id],
16894                |r| r.get(0),
16895            )
16896            .unwrap();
16897        assert_eq!(
16898            count, 1,
16899            "link row must have committed despite audit failure"
16900        );
16901    }
16902
16903    // ─────────────────────────────────────────────────────────────────────────
16904    // L1-1 (v0.7.0) — MemoryKind typed enum + migration v31 tests
16905    //
16906    // Migration v31 (memory_kind) was originally authored as v30 on
16907    // l1/typed-memorykind; renumbered during the L1 wave merge after
16908    // substrate-rules (issue #691) took v30. The backfill SQL is unchanged.
16909    // ─────────────────────────────────────────────────────────────────────────
16910
16911    /// Migration v31 backfill: a row with `memory_kind='observation'` and
16912    /// `metadata.type='reflection'` should be updated to
16913    /// `memory_kind='reflection'` by the backfill SQL in the migration.
16914    #[test]
16915    fn l1_1_migration_backfill_sets_reflection_kind() {
16916        let conn = test_db();
16917        let now = chrono::Utc::now().to_rfc3339();
16918        let id = uuid::Uuid::new_v4().to_string();
16919        // Insert a row that looks like a pre-v31 reflection: memory_kind
16920        // defaults to 'observation' (the old schema had no such column)
16921        // but metadata.type = 'reflection' signals it was produced by
16922        // memory_reflect.
16923        conn.execute(
16924            "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, \
16925             confidence, source, access_count, created_at, updated_at, metadata, \
16926             reflection_depth, memory_kind) \
16927             VALUES (?1,'mid','ns','backfill-test','content','[]',5,1.0,'test',0,?2,?2,?3,0,'observation')",
16928            rusqlite::params![id, now, r#"{"type":"reflection"}"#],
16929        )
16930        .unwrap();
16931
16932        // Confirm the row starts with memory_kind='observation'.
16933        let before: String = conn
16934            .query_row(
16935                "SELECT memory_kind FROM memories WHERE id = ?1",
16936                [&id],
16937                |r| r.get(0),
16938            )
16939            .unwrap();
16940        assert_eq!(before, "observation");
16941
16942        // Run the backfill SQL (same logic as migration v31).
16943        conn.execute(
16944            "UPDATE memories SET memory_kind = 'reflection' \
16945             WHERE memory_kind = 'observation' \
16946               AND json_valid(metadata) \
16947               AND json_extract(metadata, '$.type') = 'reflection'",
16948            [],
16949        )
16950        .unwrap();
16951
16952        let after: String = conn
16953            .query_row(
16954                "SELECT memory_kind FROM memories WHERE id = ?1",
16955                [&id],
16956                |r| r.get(0),
16957            )
16958            .unwrap();
16959        assert_eq!(
16960            after, "reflection",
16961            "backfill must upgrade metadata.type=reflection rows to memory_kind=reflection"
16962        );
16963    }
16964
16965    /// Backfill must NOT touch rows where `metadata.type` is absent or is
16966    /// something other than `'reflection'`.
16967    #[test]
16968    fn l1_1_migration_backfill_leaves_non_reflection_rows_alone() {
16969        let conn = test_db();
16970        let now = chrono::Utc::now().to_rfc3339();
16971        let id = uuid::Uuid::new_v4().to_string();
16972        conn.execute(
16973            "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, \
16974             confidence, source, access_count, created_at, updated_at, metadata, \
16975             reflection_depth, memory_kind) \
16976             VALUES (?1,'mid','ns','obs-test','content','[]',5,1.0,'test',0,?2,?2,'{}',0,'observation')",
16977            rusqlite::params![id, now],
16978        )
16979        .unwrap();
16980
16981        conn.execute(
16982            "UPDATE memories SET memory_kind = 'reflection' \
16983             WHERE memory_kind = 'observation' \
16984               AND json_valid(metadata) \
16985               AND json_extract(metadata, '$.type') = 'reflection'",
16986            [],
16987        )
16988        .unwrap();
16989
16990        let after: String = conn
16991            .query_row(
16992                "SELECT memory_kind FROM memories WHERE id = ?1",
16993                [&id],
16994                |r| r.get(0),
16995            )
16996            .unwrap();
16997        assert_eq!(
16998            after, "observation",
16999            "backfill must not change rows without metadata.type=reflection"
17000        );
17001    }
17002
17003    /// `memories_by_kind(Observation)` returns only observation memories;
17004    /// `memories_by_kind(Reflection)` returns only reflection memories.
17005    #[test]
17006    fn l1_1_memories_by_kind_returns_correct_subset() {
17007        let conn = test_db();
17008
17009        // Insert one observation and one reflection memory.
17010        let obs = Memory {
17011            id: uuid::Uuid::new_v4().to_string(),
17012            tier: Tier::Long,
17013            namespace: "kind-ns".to_string(),
17014            title: "obs-memory".to_string(),
17015            content: "observation content".to_string(),
17016            tags: vec![],
17017            priority: 5,
17018            confidence: 1.0,
17019            source: "test".to_string(),
17020            access_count: 0,
17021            created_at: chrono::Utc::now().to_rfc3339(),
17022            updated_at: chrono::Utc::now().to_rfc3339(),
17023            last_accessed_at: None,
17024            expires_at: None,
17025            metadata: serde_json::json!({}),
17026            reflection_depth: 0,
17027            memory_kind: crate::models::MemoryKind::Observation,
17028            entity_id: None,
17029            persona_version: None,
17030            citations: Vec::new(),
17031            source_uri: None,
17032            source_span: None,
17033            confidence_source: ConfidenceSource::CallerProvided,
17034            confidence_signals: None,
17035            confidence_decayed_at: None,
17036            version: 1,
17037        };
17038        let ref_mem = Memory {
17039            id: uuid::Uuid::new_v4().to_string(),
17040            tier: Tier::Long,
17041            namespace: "kind-ns".to_string(),
17042            title: "ref-memory".to_string(),
17043            content: "reflection content".to_string(),
17044            tags: vec![],
17045            priority: 5,
17046            confidence: 1.0,
17047            source: "test".to_string(),
17048            access_count: 0,
17049            created_at: chrono::Utc::now().to_rfc3339(),
17050            updated_at: chrono::Utc::now().to_rfc3339(),
17051            last_accessed_at: None,
17052            expires_at: None,
17053            metadata: serde_json::json!({}),
17054            reflection_depth: 1,
17055            memory_kind: crate::models::MemoryKind::Reflection,
17056            entity_id: None,
17057            persona_version: None,
17058            citations: Vec::new(),
17059            source_uri: None,
17060            source_span: None,
17061            confidence_source: ConfidenceSource::CallerProvided,
17062            confidence_signals: None,
17063            confidence_decayed_at: None,
17064            version: 1,
17065        };
17066
17067        insert(&conn, &obs).unwrap();
17068        insert(&conn, &ref_mem).unwrap();
17069
17070        let obs_rows = memories_by_kind(&conn, &crate::models::MemoryKind::Observation).unwrap();
17071        let ref_rows = memories_by_kind(&conn, &crate::models::MemoryKind::Reflection).unwrap();
17072
17073        assert!(
17074            obs_rows
17075                .iter()
17076                .all(|m| m.memory_kind == crate::models::MemoryKind::Observation),
17077            "memories_by_kind(Observation) must return only Observation memories"
17078        );
17079        assert!(
17080            ref_rows
17081                .iter()
17082                .all(|m| m.memory_kind == crate::models::MemoryKind::Reflection),
17083            "memories_by_kind(Reflection) must return only Reflection memories"
17084        );
17085        // The inserted observation must appear in obs_rows.
17086        assert!(
17087            obs_rows.iter().any(|m| m.title == "obs-memory"),
17088            "obs-memory must be in Observation results"
17089        );
17090        // The inserted reflection must appear in ref_rows.
17091        assert!(
17092            ref_rows.iter().any(|m| m.title == "ref-memory"),
17093            "ref-memory must be in Reflection results"
17094        );
17095        // Cross-check: obs memory must NOT be in reflection results.
17096        assert!(
17097            !ref_rows.iter().any(|m| m.title == "obs-memory"),
17098            "obs-memory must not appear in Reflection results"
17099        );
17100    }
17101
17102    /// Inserting a memory with `memory_kind=Reflection` and then reading it
17103    /// back via `get()` must preserve the `Reflection` variant.
17104    #[test]
17105    fn l1_1_memory_kind_roundtrips_through_insert_get() {
17106        let conn = test_db();
17107        let mem = Memory {
17108            id: uuid::Uuid::new_v4().to_string(),
17109            tier: Tier::Long,
17110            namespace: "roundtrip-ns".to_string(),
17111            title: "kind-roundtrip".to_string(),
17112            content: "roundtrip content".to_string(),
17113            tags: vec![],
17114            priority: 5,
17115            confidence: 1.0,
17116            source: "test".to_string(),
17117            access_count: 0,
17118            created_at: chrono::Utc::now().to_rfc3339(),
17119            updated_at: chrono::Utc::now().to_rfc3339(),
17120            last_accessed_at: None,
17121            expires_at: None,
17122            metadata: serde_json::json!({}),
17123            reflection_depth: 1,
17124            memory_kind: crate::models::MemoryKind::Reflection,
17125            entity_id: None,
17126            persona_version: None,
17127            citations: Vec::new(),
17128            source_uri: None,
17129            source_span: None,
17130            confidence_source: ConfidenceSource::CallerProvided,
17131            confidence_signals: None,
17132            confidence_decayed_at: None,
17133            version: 1,
17134        };
17135        let id = insert(&conn, &mem).unwrap();
17136        let got = get(&conn, &id)
17137            .unwrap()
17138            .expect("inserted memory must be found");
17139        assert_eq!(
17140            got.memory_kind,
17141            crate::models::MemoryKind::Reflection,
17142            "memory_kind=Reflection must roundtrip through insert→get"
17143        );
17144    }
17145
17146    /// The upsert sticky-field logic: if a row already has
17147    /// `memory_kind='reflection'`, a subsequent upsert with
17148    /// `memory_kind='observation'` must NOT overwrite it.
17149    #[test]
17150    fn l1_1_upsert_preserves_reflection_kind() {
17151        let conn = test_db();
17152        let now = chrono::Utc::now().to_rfc3339();
17153        let id = uuid::Uuid::new_v4().to_string();
17154
17155        // First insert: Reflection.
17156        let mem_reflection = Memory {
17157            id: id.clone(),
17158            tier: Tier::Long,
17159            namespace: "sticky-ns".to_string(),
17160            title: "sticky-title".to_string(),
17161            content: "original content".to_string(),
17162            tags: vec![],
17163            priority: 5,
17164            confidence: 1.0,
17165            source: "test".to_string(),
17166            access_count: 0,
17167            created_at: now.clone(),
17168            updated_at: now.clone(),
17169            last_accessed_at: None,
17170            expires_at: None,
17171            metadata: serde_json::json!({}),
17172            reflection_depth: 1,
17173            memory_kind: crate::models::MemoryKind::Reflection,
17174            entity_id: None,
17175            persona_version: None,
17176            citations: Vec::new(),
17177            source_uri: None,
17178            source_span: None,
17179            confidence_source: ConfidenceSource::CallerProvided,
17180            confidence_signals: None,
17181            confidence_decayed_at: None,
17182            version: 1,
17183        };
17184        insert(&conn, &mem_reflection).unwrap();
17185
17186        // Second upsert: Observation (same title+namespace → triggers ON CONFLICT).
17187        let mem_obs = Memory {
17188            id: uuid::Uuid::new_v4().to_string(), // different id, same title+ns
17189            tier: Tier::Long,
17190            namespace: "sticky-ns".to_string(),
17191            title: "sticky-title".to_string(),
17192            content: "updated content".to_string(),
17193            tags: vec![],
17194            priority: 6,
17195            confidence: 1.0,
17196            source: "test".to_string(),
17197            access_count: 0,
17198            created_at: now.clone(),
17199            updated_at: now,
17200            last_accessed_at: None,
17201            expires_at: None,
17202            metadata: serde_json::json!({}),
17203            reflection_depth: 0,
17204            memory_kind: crate::models::MemoryKind::Observation,
17205            entity_id: None,
17206            persona_version: None,
17207            citations: Vec::new(),
17208            source_uri: None,
17209            source_span: None,
17210            confidence_source: ConfidenceSource::CallerProvided,
17211            confidence_signals: None,
17212            confidence_decayed_at: None,
17213            version: 1,
17214        };
17215        insert(&conn, &mem_obs).unwrap();
17216
17217        // The row must still be Reflection (sticky field wins).
17218        let got = get(&conn, &id)
17219            .unwrap()
17220            .expect("original memory must still exist");
17221        assert_eq!(
17222            got.memory_kind,
17223            crate::models::MemoryKind::Reflection,
17224            "upsert with Observation must not overwrite an existing Reflection kind"
17225        );
17226    }
17227
17228    // -----------------------------------------------------------------
17229    // v0.7.0 issue #810 / #812 / #813 — CHECK trigger + strongest_attest
17230    // -----------------------------------------------------------------
17231
17232    #[test]
17233    fn strongest_attest_returns_unsigned_for_isolate_source() {
17234        // A source with no outbound links — the only honest default
17235        // is `unsigned`.
17236        let conn = test_db();
17237        let lonely = make_memory("lonely", "test", Tier::Long, 5);
17238        insert(&conn, &lonely).unwrap();
17239        let got = strongest_attest_level_for_source(&conn, &lonely.id).unwrap();
17240        assert_eq!(got, "unsigned");
17241    }
17242
17243    #[test]
17244    fn strongest_attest_picks_self_signed_over_unsigned() {
17245        use crate::identity::keypair;
17246        // Serialise against the a3 tests that flip the *global* permissions
17247        // mode to Enforce + install a deny-all link rule; without this gate
17248        // their Enforce window can race this create_link_signed call and
17249        // surface a spurious "link denied by permission rule". See the
17250        // governance-mode test-isolation tracking issue. #626 Layer-3 QC.
17251        let _gate = crate::config::lock_permissions_mode_for_test();
17252        let conn = test_db();
17253        let src = make_memory("attest-src", "test", Tier::Long, 5);
17254        let a = make_memory("attest-a", "test", Tier::Long, 5);
17255        let b = make_memory("attest-b", "test", Tier::Long, 5);
17256        insert(&conn, &src).unwrap();
17257        insert(&conn, &a).unwrap();
17258        insert(&conn, &b).unwrap();
17259        // One unsigned + one signed outbound link.
17260        create_link_signed(&conn, &src.id, &a.id, "related_to", None).unwrap();
17261        let kp = keypair::generate("alice").unwrap();
17262        create_link_signed(&conn, &src.id, &b.id, "supersedes", Some(&kp)).unwrap();
17263        let got = strongest_attest_level_for_source(&conn, &src.id).unwrap();
17264        assert_eq!(got, "self_signed", "self_signed beats unsigned");
17265    }
17266
17267    #[test]
17268    fn strongest_attest_picks_peer_attested_over_self_signed() {
17269        // Construct a peer-attested row by hand-rolling the
17270        // create_link_inbound path so we don't depend on a remote
17271        // signature. The CHECK trigger requires a 64-byte sig blob
17272        // for `peer_attested` — fabricate one.
17273        let conn = test_db();
17274        let src = make_memory("attest-pa-src", "test", Tier::Long, 5);
17275        let a = make_memory("attest-pa-a", "test", Tier::Long, 5);
17276        let b = make_memory("attest-pa-b", "test", Tier::Long, 5);
17277        insert(&conn, &src).unwrap();
17278        insert(&conn, &a).unwrap();
17279        insert(&conn, &b).unwrap();
17280        // Self-signed link.
17281        let kp = crate::identity::keypair::generate("alice").unwrap();
17282        create_link_signed(&conn, &src.id, &a.id, "related_to", Some(&kp)).unwrap();
17283        // Hand-inject a peer_attested row with a 64-byte signature so
17284        // the CHECK trigger admits it.
17285        let now = chrono::Utc::now().to_rfc3339();
17286        let sig = vec![0xAB_u8; 64];
17287        conn.execute(
17288            "INSERT INTO memory_links \
17289                (source_id, target_id, relation, created_at, valid_from, signature, attest_level, observed_by) \
17290             VALUES (?1, ?2, 'related_to', ?3, ?3, ?4, 'peer_attested', 'peer-bob')",
17291            params![&src.id, &b.id, &now, &sig],
17292        )
17293        .unwrap();
17294        let got = strongest_attest_level_for_source(&conn, &src.id).unwrap();
17295        assert_eq!(got, "peer_attested", "peer_attested beats self_signed");
17296    }
17297
17298    #[test]
17299    fn ck_trigger_refuses_self_signed_insert_without_signature() {
17300        // BUG-A regression test — a direct INSERT that claims
17301        // `self_signed` with NULL signature must fail at the SQLite
17302        // trigger layer. Closes the phantom-attest-level defect at
17303        // the substrate boundary even when a future caller (or
17304        // operator UPDATE) bypasses `create_link_signed`'s match arm.
17305        let conn = test_db();
17306        let s = make_memory("ck-src", "test", Tier::Long, 5);
17307        let t = make_memory("ck-tgt", "test", Tier::Long, 5);
17308        insert(&conn, &s).unwrap();
17309        insert(&conn, &t).unwrap();
17310        let now = chrono::Utc::now().to_rfc3339();
17311        let res = conn.execute(
17312            "INSERT INTO memory_links \
17313                (source_id, target_id, relation, created_at, valid_from, signature, attest_level) \
17314             VALUES (?1, ?2, 'related_to', ?3, ?3, NULL, 'self_signed')",
17315            params![&s.id, &t.id, &now],
17316        );
17317        let err = res.expect_err("CHECK trigger must reject self_signed + NULL signature");
17318        let msg = format!("{err}");
17319        assert!(
17320            msg.contains("CHECK constraint failed")
17321                || msg.contains("attest_level")
17322                || msg.contains("64-byte signature"),
17323            "trigger error must name the failure mode, got: {msg}"
17324        );
17325    }
17326
17327    #[test]
17328    fn ck_trigger_refuses_self_signed_insert_with_wrong_length_signature() {
17329        // Same defense for a non-NULL but wrong-length signature
17330        // (e.g. truncated by a partial wire-read or a malformed
17331        // operator INSERT).
17332        let conn = test_db();
17333        let s = make_memory("ck-src-wlen", "test", Tier::Long, 5);
17334        let t = make_memory("ck-tgt-wlen", "test", Tier::Long, 5);
17335        insert(&conn, &s).unwrap();
17336        insert(&conn, &t).unwrap();
17337        let now = chrono::Utc::now().to_rfc3339();
17338        let res = conn.execute(
17339            "INSERT INTO memory_links \
17340                (source_id, target_id, relation, created_at, valid_from, signature, attest_level) \
17341             VALUES (?1, ?2, 'related_to', ?3, ?3, ?4, 'self_signed')",
17342            params![&s.id, &t.id, &now, &[0u8; 8][..]],
17343        );
17344        assert!(
17345            res.is_err(),
17346            "CHECK trigger must reject wrong-length signature"
17347        );
17348    }
17349
17350    #[test]
17351    fn ck_trigger_refuses_update_to_self_signed_without_signature() {
17352        // The CHECK trigger fires on UPDATE as well as INSERT — a
17353        // post-hoc UPDATE that flips an unsigned row to self_signed
17354        // without supplying signature bytes must be refused.
17355        let conn = test_db();
17356        let s = make_memory("ck-upd-src", "test", Tier::Long, 5);
17357        let t = make_memory("ck-upd-tgt", "test", Tier::Long, 5);
17358        insert(&conn, &s).unwrap();
17359        insert(&conn, &t).unwrap();
17360        create_link_signed(&conn, &s.id, &t.id, "related_to", None).unwrap();
17361        let res = conn.execute(
17362            "UPDATE memory_links SET attest_level = 'self_signed' \
17363             WHERE source_id = ?1 AND target_id = ?2",
17364            params![&s.id, &t.id],
17365        );
17366        assert!(
17367            res.is_err(),
17368            "CHECK trigger must reject UPDATE to self_signed with NULL signature"
17369        );
17370    }
17371
17372    #[test]
17373    fn ck_trigger_admits_unsigned_with_null_signature() {
17374        // The trigger's `WHEN` clause is scoped to self_signed /
17375        // peer_attested — the unsigned path with NULL signature
17376        // (the v0.6.4 default) must still admit. Negative-control
17377        // test pinning the trigger's narrow scope.
17378        let conn = test_db();
17379        let s = make_memory("ck-unsigned-src", "test", Tier::Long, 5);
17380        let t = make_memory("ck-unsigned-tgt", "test", Tier::Long, 5);
17381        insert(&conn, &s).unwrap();
17382        insert(&conn, &t).unwrap();
17383        // create_link_signed's unsigned branch sets (NULL, "unsigned");
17384        // confirm it still works under the new trigger.
17385        create_link_signed(&conn, &s.id, &t.id, "related_to", None)
17386            .expect("unsigned create must still succeed under the new CHECK trigger");
17387    }
17388
17389    // -----------------------------------------------------------------
17390    // #626 Layer-3 (Task 1.3 / C3) — bind_agent_pubkey + agent_pubkey
17391    // -----------------------------------------------------------------
17392
17393    #[test]
17394    fn agent_pubkey_none_before_bind_and_some_after() {
17395        let conn = test_db();
17396        register_agent(&conn, "ai:curator", "ai:generic", &[]).expect("register");
17397        // Registered but unbound → permissive None.
17398        assert_eq!(agent_pubkey(&conn, "ai:curator").unwrap(), None);
17399
17400        let kp = crate::identity::keypair::generate("ai:curator").expect("generate");
17401        let b64 = kp.public_base64();
17402        bind_agent_pubkey(&conn, "ai:curator", &b64).expect("bind");
17403        assert_eq!(agent_pubkey(&conn, "ai:curator").unwrap(), Some(b64));
17404    }
17405
17406    #[test]
17407    fn agent_pubkey_none_for_unregistered_agent() {
17408        let conn = test_db();
17409        // Never registered → None (collapses to "no key to verify").
17410        assert_eq!(agent_pubkey(&conn, "ai:ghost").unwrap(), None);
17411    }
17412
17413    #[test]
17414    fn bind_agent_pubkey_rejects_unregistered_agent() {
17415        let conn = test_db();
17416        let err = bind_agent_pubkey(&conn, "ai:ghost", "AAAA").unwrap_err();
17417        assert!(
17418            err.to_string().contains("not registered"),
17419            "binding to an unregistered agent must be rejected; got: {err}",
17420        );
17421    }
17422
17423    #[test]
17424    fn bind_agent_pubkey_rotates_key_in_place() {
17425        let conn = test_db();
17426        register_agent(&conn, "ai:curator", "ai:generic", &[]).expect("register");
17427        let k1 = crate::identity::keypair::generate("ai:curator")
17428            .unwrap()
17429            .public_base64();
17430        let k2 = crate::identity::keypair::generate("ai:curator")
17431            .unwrap()
17432            .public_base64();
17433        assert_ne!(k1, k2, "two fresh keys differ");
17434        bind_agent_pubkey(&conn, "ai:curator", &k1).expect("bind k1");
17435        assert_eq!(agent_pubkey(&conn, "ai:curator").unwrap(), Some(k1));
17436        // Rotation overwrites in place.
17437        bind_agent_pubkey(&conn, "ai:curator", &k2).expect("rotate to k2");
17438        assert_eq!(agent_pubkey(&conn, "ai:curator").unwrap(), Some(k2));
17439    }
17440
17441    #[test]
17442    fn bind_agent_pubkey_preserves_registration_fields() {
17443        // Binding a key must not clobber agent_type / capabilities /
17444        // registered_at — list_agents must still see the full row.
17445        let conn = test_db();
17446        register_agent(
17447            &conn,
17448            "ai:curator",
17449            "ai:claude-opus",
17450            &["recall".to_string(), "write".to_string()],
17451        )
17452        .expect("register");
17453        let before = list_agents(&conn).expect("list before");
17454        let kp = crate::identity::keypair::generate("ai:curator").unwrap();
17455        bind_agent_pubkey(&conn, "ai:curator", &kp.public_base64()).expect("bind");
17456        let after = list_agents(&conn).expect("list after");
17457
17458        let a_before = before
17459            .iter()
17460            .find(|a| a.agent_id == "ai:curator")
17461            .expect("present before");
17462        let a_after = after
17463            .iter()
17464            .find(|a| a.agent_id == "ai:curator")
17465            .expect("present after");
17466        assert_eq!(a_after.agent_type, a_before.agent_type);
17467        assert_eq!(a_after.capabilities, a_before.capabilities);
17468        assert_eq!(a_after.registered_at, a_before.registered_at);
17469    }
17470
17471    // -----------------------------------------------------------------
17472    // #626 Layer-3 (Task 1.3 / C5) — revoke_agent_pubkey
17473    // -----------------------------------------------------------------
17474
17475    #[test]
17476    fn revoke_agent_pubkey_clears_bound_key() {
17477        let conn = test_db();
17478        register_agent(&conn, "ai:curator", "ai:generic", &[]).expect("register");
17479        let kp = crate::identity::keypair::generate("ai:curator").unwrap();
17480        bind_agent_pubkey(&conn, "ai:curator", &kp.public_base64()).expect("bind");
17481        assert!(agent_pubkey(&conn, "ai:curator").unwrap().is_some());
17482        revoke_agent_pubkey(&conn, "ai:curator").expect("revoke");
17483        assert_eq!(agent_pubkey(&conn, "ai:curator").unwrap(), None);
17484    }
17485
17486    #[test]
17487    fn revoke_agent_pubkey_is_idempotent_without_bound_key() {
17488        let conn = test_db();
17489        register_agent(&conn, "ai:curator", "ai:generic", &[]).expect("register");
17490        // No key ever bound — revoke still succeeds and stays None.
17491        revoke_agent_pubkey(&conn, "ai:curator").expect("revoke unbound");
17492        assert_eq!(agent_pubkey(&conn, "ai:curator").unwrap(), None);
17493    }
17494
17495    #[test]
17496    fn revoke_agent_pubkey_rejects_unregistered_agent() {
17497        let conn = test_db();
17498        let err = revoke_agent_pubkey(&conn, "ai:ghost").unwrap_err();
17499        assert!(
17500            err.to_string().contains("not registered"),
17501            "revoking an unregistered agent must be rejected; got: {err}",
17502        );
17503    }
17504
17505    #[test]
17506    fn revoke_agent_pubkey_preserves_registration_fields() {
17507        let conn = test_db();
17508        register_agent(
17509            &conn,
17510            "ai:curator",
17511            "ai:claude-opus",
17512            &["recall".to_string(), "write".to_string()],
17513        )
17514        .expect("register");
17515        let kp = crate::identity::keypair::generate("ai:curator").unwrap();
17516        bind_agent_pubkey(&conn, "ai:curator", &kp.public_base64()).expect("bind");
17517        revoke_agent_pubkey(&conn, "ai:curator").expect("revoke");
17518        let after = list_agents(&conn).expect("list after");
17519        let a = after
17520            .iter()
17521            .find(|a| a.agent_id == "ai:curator")
17522            .expect("present after revoke");
17523        assert_eq!(a.agent_type, "ai:claude-opus");
17524        assert_eq!(
17525            a.capabilities,
17526            vec!["recall".to_string(), "write".to_string()]
17527        );
17528    }
17529
17530    #[test]
17531    fn revoke_then_rebind_restores_attestable_key() {
17532        let conn = test_db();
17533        register_agent(&conn, "ai:curator", "ai:generic", &[]).expect("register");
17534        let k1 = crate::identity::keypair::generate("ai:curator")
17535            .unwrap()
17536            .public_base64();
17537        bind_agent_pubkey(&conn, "ai:curator", &k1).expect("bind k1");
17538        revoke_agent_pubkey(&conn, "ai:curator").expect("revoke");
17539        assert_eq!(agent_pubkey(&conn, "ai:curator").unwrap(), None);
17540        let k2 = crate::identity::keypair::generate("ai:curator")
17541            .unwrap()
17542            .public_base64();
17543        bind_agent_pubkey(&conn, "ai:curator", &k2).expect("rebind k2");
17544        assert_eq!(agent_pubkey(&conn, "ai:curator").unwrap(), Some(k2));
17545    }
17546}
ai_memory/storage/mod.rs

ai_memory/storage/
mod.rs