ai_memory/storage/
mod.rs

1// Copyright 2026 AlphaOne LLC
2// SPDX-License-Identifier: Apache-2.0
3
4// #873 — `recall_hybrid_with_telemetry` exceeds the per-function 250-
5// line budget; tracked for split as #871 (stage-helpers: param-prep /
6// fts-branch / semantic-branch / blend+rerank / touch+telemetry). The
7// allowance is module-scope so future too-big helpers in the same
8// file are caught by the lint at PR-time instead of silently growing.
9#![allow(clippy::too_many_lines)]
10
11use crate::models::field_names;
12use anyhow::{Context, Result};
13use chrono::{DateTime, Utc};
14use rusqlite::{Connection, params};
15use std::collections::HashMap;
16use std::path::Path;
17
18// ── #1558 batch 6 — file-local SQL SSOT (pm-v3.1 hardcoded-literal gate) ──
19const SQL_DELETE_MEMORY_BY_ID: &str = "DELETE FROM memories WHERE id = ?1";
20const SQL_DELETE_NAMESPACE_META_BY_STANDARD_ID: &str =
21    "DELETE FROM namespace_meta WHERE standard_id = ?1";
22const SQL_MEMORY_EXISTS_COUNT: &str = "SELECT COUNT(*) > 0 FROM memories WHERE id = ?1";
23const SQL_MEMORY_EXISTS: &str = "SELECT EXISTS(SELECT 1 FROM memories WHERE id = ?1)";
24const SQL_SELECT_MEMORY_ROW_BY_ID: &str = "SELECT * FROM memories WHERE id = ?1";
25// ── #1579 A2 — sargable `list` SQL fragments ──────────────────────────────
26// The always-present expiry guard opens the WHERE clause; every other
27// filter is appended by `build_list_query` ONLY when the caller supplied
28// it, so the planner sees bare `col = ?` / `col >= ?` predicates it can
29// drive through `idx_memories_list_order` / `idx_memories_ns_list_order`
30// instead of the formerly non-sargable `(?N IS NULL OR col = ?N)` arms.
31const SQL_LIST_BASE: &str = "SELECT * FROM memories WHERE (expires_at IS NULL OR expires_at > ?)";
32const SQL_LIST_ORDER_LIMIT: &str = " ORDER BY priority DESC, updated_at DESC LIMIT ? OFFSET ?";
33
34/// v0.7.0 H6 (round-2) — truncate a `DateTime<Utc>` to microsecond
35/// precision. Companion of the same-named helper in
36/// `store/postgres.rs:3539` (G3 fix); both ends of the link sign/verify
37/// roundtrip now collapse sub-microsecond digits BEFORE CBOR
38/// canonicalisation. PostgreSQL's `TIMESTAMPTZ` stores microseconds —
39/// the SQLite path was lossless, but a link created on SQLite and
40/// later re-verified on Postgres (or vice versa via federation) would
41/// see the canonical RFC3339 string change shape on the storage hop
42/// and break the Ed25519 signature. Truncating at write time makes the
43/// shape stable across adapters. See `store/postgres.rs:3520-3543` for
44/// the full design context.
45#[must_use]
46pub fn truncate_to_microseconds(t: DateTime<Utc>) -> DateTime<Utc> {
47    use chrono::Timelike;
48    let micros = t.nanosecond() / 1_000;
49    t.with_nanosecond(micros * 1_000).unwrap_or(t)
50}
51
52use crate::models::{
53    AGENTS_NAMESPACE, AgentRegistration, Approval, ApproverType, ConfidenceSource, DuplicateCheck,
54    DuplicateMatch, GovernanceDecision, GovernanceLevel, GovernancePolicy, GovernedAction,
55    MAX_NAMESPACE_DEPTH, Memory, MemoryKind, MemoryLink, NamespaceCount, PROMOTION_THRESHOLD,
56    PendingAction, SourceSpan, Stats, Taxonomy, TaxonomyNode, Tier, TierCount, namespace_ancestors,
57};
58
59// #962 — typed substrate-layer error envelope. Substrate code emits
60// `anyhow::Error::new(StorageError::…)` instead of the legacy
61// `anyhow::bail!("…")`; handlers downcast via
62// `MemoryError::from(anyhow::Error)` to map each variant to its
63// canonical HTTP status. The error-prefix constants live alongside the
64// typed enum so the Display impl and the prefix tokens stay in lockstep.
65mod error;
66pub use error::{LINK_CYCLE_ERR_PREFIX, LINK_PERMISSION_DENIED_ERR_PREFIX, LinkEnd, StorageError};
67
68// ---------------------------------------------------------------------------
69// v0.7.0 L1-6 Deliverable E — governance pre-write hook (issue #691)
70// ---------------------------------------------------------------------------
71//
72// Substrate-internal: layering-preserving insertion point for the
73// agent-action rules engine. The hook is a process-wide `OnceLock`
74// holding an optional closure of the shape
75//
76//     Fn(&Memory) -> Result<(), String> + Send + Sync
77//
78// installed exactly once at daemon `serve` boot (BEFORE binding the
79// listener) and consulted by every substrate write path
80// (`storage::insert`, `storage::insert_with_conflict`,
81// `storage::insert_if_newer`) immediately BEFORE the SQL `INSERT`.
82//
83// Why a `OnceLock` and not a thread-local or `RwLock<Option<_>>`:
84//
85//   1. Operator standing directive: "rules and standards can NEVER be
86//      bypassed by AI/AI Agents — 100% of the time". A `OnceLock`
87//      enforces installation-is-one-shot at the type level — no
88//      reset, no override, no test-only escape hatch reachable from
89//      production code paths.
90//   2. The hook closure is read on every write; an `RwLock` would add
91//      contention on the hot path. `OnceLock::get()` is lock-free.
92//   3. CLI one-shot mode (`ai-memory store …`, `ai-memory mine …`,
93//      etc.) MUST NOT install the hook — the operator's direct
94//      substrate ops stay unimpeded by design. `OnceLock` defaults to
95//      empty, so the CLI path is the no-op default; only the daemon's
96//      `serve` boot reaches the `.set` callsite.
97//
98// Refusal contract: when the hook fires it returns `Err(reason)`.
99// The caller wraps `reason` in a typed [`GovernanceRefusal`] (which
100// implements [`std::error::Error`]) and propagates via `anyhow::Error`.
101// The handler layer's `MemoryError::from(anyhow::Error)` impl
102// downcasts and promotes it to [`crate::errors::MemoryError::RefusedByGovernance`]
103// — see `src/errors.rs` for the 403 / `GOVERNANCE_REFUSED` mapping.
104
105/// Optional governance pre-write hook. When `Some`, every substrate
106/// `INSERT` path consults the closure BEFORE the SQL write; an
107/// `Err(reason)` short-circuits the write with no row touched.
108///
109/// Installation is one-shot (`OnceLock::set`); the daemon `serve`
110/// bootstrap is the only caller in production. CLI one-shot binaries
111/// must leave this empty.
112///
113/// See module-level comment for the full layering rationale.
114pub static GOVERNANCE_PRE_WRITE: std::sync::OnceLock<
115    Box<dyn Fn(&Memory) -> std::result::Result<(), String> + Send + Sync>,
116> = std::sync::OnceLock::new();
117
118/// Typed substrate-layer marker error for the pre-write hook refusal
119/// path. Wrapped in `anyhow::Error` so the existing
120/// `anyhow::Result<String>` return shape of `storage::insert*` stays
121/// unchanged — the handler layer downcasts via
122/// `MemoryError::from(anyhow::Error)` (see `src/errors.rs`) to map
123/// the refusal to HTTP `403 FORBIDDEN` + code `GOVERNANCE_REFUSED`.
124///
125/// Carries the operator-authored `reason` verbatim. The MCP layer
126/// surfaces the same string (audit log + tool error data field).
127#[derive(Debug, Clone)]
128pub struct GovernanceRefusal {
129    pub reason: String,
130}
131
132impl std::fmt::Display for GovernanceRefusal {
133    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
134        write!(f, "governance-refused: {}", self.reason)
135    }
136}
137
138impl std::error::Error for GovernanceRefusal {}
139
140/// Internal helper consulted by every substrate write path BEFORE
141/// the SQL write. When the [`GOVERNANCE_PRE_WRITE`] hook is unset
142/// (CLI mode or pre-hook-install daemon path), this is a zero-cost
143/// no-op `Ok(())`. When the hook is set, the closure runs and an
144/// `Err(reason)` wraps into a [`GovernanceRefusal`] propagated up the
145/// `anyhow` chain.
146///
147/// Visibility: `pub(crate)` so the `PostgresStore` SAL adapter
148/// (`src/store/postgres.rs`) can consult the same hook on its write
149/// paths — fixing ARCH-1 (substrate governance pre-write parity
150/// between the SQLite and Postgres backends). The hook itself is
151/// process-wide and installed once by the daemon `serve` bootstrap;
152/// every substrate write path on EVERY backend MUST consult it before
153/// touching SQL.
154///
155/// The function is hot-path; avoid heap allocation on the Allow leg.
156#[inline]
157pub(crate) fn consult_governance_pre_write(mem: &Memory) -> Result<()> {
158    if let Some(hook) = GOVERNANCE_PRE_WRITE.get() {
159        if let Err(reason) = hook(mem) {
160            return Err(anyhow::Error::new(GovernanceRefusal { reason }));
161        }
162    }
163    Ok(())
164}
165
166/// Computed 4-tuple of visibility prefixes for an agent position (Task 1.5).
167/// Index 0 = agent's own namespace (private), 1 = parent (team),
168/// 2 = grandparent (unit), 3 = great-grandparent (org). Missing = `None`.
169type VisibilityPrefixes = (
170    Option<String>,
171    Option<String>,
172    Option<String>,
173    Option<String>,
174);
175
176fn compute_visibility_prefixes(as_agent: Option<&str>) -> VisibilityPrefixes {
177    let Some(ns) = as_agent else {
178        return (None, None, None, None);
179    };
180    let ancestors = namespace_ancestors(ns);
181    let p = ancestors.first().cloned();
182    let t = ancestors.get(1).cloned();
183    let u = ancestors.get(2).cloned();
184    let o = ancestors.get(3).cloned();
185    (p, t, u, o)
186}
187
188/// Rust-side visibility check for paths that can't easily attach SQL
189/// visibility (the HNSW branch of `recall_hybrid` iterates memories loaded
190/// via `get()`). Returns `true` when `as_agent` is unset (no filter) or
191/// when the memory's scope + namespace grant visibility to the caller.
192fn is_visible(mem: &Memory, prefixes: &VisibilityPrefixes) -> bool {
193    // v0.7.0 multi-agent literal-sweep (scanner B finding F-B8.x):
194    // typed-enum exhaustive match via `MemoryScope` + `META_KEY_SCOPE`
195    // SSOT. Adding a new scope variant from here forward is a
196    // compile-time error in this match (was a silent `_ => false`
197    // fall-through pre-refactor — masked drift). Unknown-scope
198    // strings still degrade to `false` via the `from_str` → `None`
199    // arm, preserving pre-refactor semantics byte-for-byte.
200    use crate::models::namespace::MemoryScope;
201    let (p, t, u, o) = prefixes;
202    if p.is_none() {
203        return true;
204    }
205    let Some(scope) = mem
206        .metadata
207        .get(crate::META_KEY_SCOPE)
208        .and_then(|v| v.as_str())
209        .map_or(Some(MemoryScope::default()), MemoryScope::from_str)
210    else {
211        return false;
212    };
213    match scope {
214        MemoryScope::Collective => true,
215        MemoryScope::Private => p.as_ref().is_some_and(|ns| &mem.namespace == ns),
216        MemoryScope::Team => matches_subtree(&mem.namespace, t.as_deref()),
217        MemoryScope::Unit => matches_subtree(&mem.namespace, u.as_deref()),
218        MemoryScope::Org => matches_subtree(&mem.namespace, o.as_deref()),
219    }
220}
221
222fn matches_subtree(namespace: &str, prefix: Option<&str>) -> bool {
223    match prefix {
224        None => false,
225        Some(p) => namespace == p || namespace.starts_with(&format!("{p}/")),
226    }
227}
228
229/// Generate the visibility WHERE-clause fragment starting at placeholder `start`.
230/// Uses placeholders `?start .. ?start+3` for private/team/unit/org prefixes.
231/// See `compute_visibility_prefixes` for the bind order.
232///
233/// Performance (v0.6.0 GA): each scope branch compares against the indexed
234/// generated column `scope_idx` (schema v10) rather than re-evaluating
235/// `json_extract(metadata, '$.scope')` per row. The query planner picks
236/// `idx_memories_scope_idx` whenever the predicate narrows by scope,
237/// dropping recall from "scan every namespace row and parse its JSON" to
238/// an index seek + per-row refinement. See `docs/ARCHITECTURAL_LIMITS.md`
239/// for which `SQLite` limits remain structural.
240///
241/// Security (issue #217): the team/unit/org branches use `LIKE` to expand a
242/// prefix into its sub-tree. Without escaping, a caller who can influence the
243/// prefix could inject SQL `LIKE` meta-characters (`%`, `_`) and broaden the
244/// match across unrelated namespaces. We neutralise this at SQL evaluation
245/// time by `replace()`-escaping `%` and `_` in the bound prefix and pairing
246/// the LIKE with `ESCAPE '\'`. `validate_namespace` already rejects backslash,
247/// so `\` cannot appear in the bound prefix and the escape sentinel is safe.
248/// The `=` equality side is unaffected by LIKE wildcards and binds the raw
249/// value so that legitimate namespaces containing `_` (e.g. `under_score`)
250/// continue to match exactly.
251/// v0.7.0 WT-1-E — atom-preference WHERE fragment.
252///
253/// Default recall surfaces atoms (the canonical post-atomisation
254/// unit) in place of the archived source row. An archived source is
255/// one where:
256///
257///   * `atomised_into > 0` — the substrate-visible count of atoms
258///     emitted by the WT-1-B atomiser.
259///   * `metadata.atomisation_archived_at` is set — the RFC3339 stamp
260///     WT-1-B writes alongside the column flip (see
261///     `src/atomisation/mod.rs::archive_source`). The column is the
262///     fast index target; the metadata key is the substrate-visible
263///     read signal that the row is "atomised and archived" — both
264///     are checked so a hypothetical column-only or metadata-only
265///     drift gets filtered consistently.
266///
267/// Atoms themselves (rows where `atom_of IS NOT NULL`) are unaffected
268/// — they are not "archived" by this definition. The fragment
269/// excludes archived sources only.
270///
271/// When `include_archived` is true the fragment is empty (no
272/// filter), so auditors and the forensic-export path see the full
273/// chain. The atom rows are returned in both cases.
274fn archived_source_clause(include_archived: bool, table_alias: &str) -> &'static str {
275    if include_archived {
276        ""
277    } else {
278        // Two-part predicate: a row is archived-source when BOTH
279        // (a) atomised_into > 0 and
280        // (b) metadata.atomisation_archived_at IS NOT NULL.
281        // Either one alone could be a partial-state row (e.g. a
282        // crash between the column flip and the metadata write); we
283        // only filter rows that present BOTH signals so a partial-
284        // state row still surfaces under default recall.
285        // Static fragment with the alias baked in — recall and
286        // recall_hybrid pass `"m"`, search passes `"m"` too.
287        match table_alias {
288            "m" => {
289                "AND NOT (\
290                m.atomised_into IS NOT NULL AND m.atomised_into > 0 \
291                AND json_extract(m.metadata, '$.atomisation_archived_at') IS NOT NULL\
292            )"
293            }
294            "memories" => {
295                "AND NOT (\
296                memories.atomised_into IS NOT NULL AND memories.atomised_into > 0 \
297                AND json_extract(memories.metadata, '$.atomisation_archived_at') IS NOT NULL\
298            )"
299            }
300            _ => "",
301        }
302    }
303}
304
305/// v0.7.0 WT-1-E — Rust-side mirror of [`archived_source_clause`].
306///
307/// Used by the HNSW retrieval branch of `recall_hybrid_with_telemetry`
308/// where the bypass-the-SQL-WHERE walk fetches each candidate via
309/// `get()` and then applies post-load filters in Rust. The check
310/// reads `metadata.atomisation_archived_at` (the WT-1-B substrate-
311/// visible read signal) and tolerates the absence of the metadata
312/// key — only rows that DO present the key are excluded.
313///
314/// Note: the SQL fragment also requires `atomised_into > 0` to be
315/// set. The HNSW branch deliberately only checks the metadata key
316/// because the loaded `Memory` struct does not carry the
317/// `atomised_into` column. The two signals are written in the same
318/// `archive_source` transaction (see `src/atomisation/mod.rs`), so
319/// in steady-state every row presents both signals together; the
320/// pathological partial-state row that exists only momentarily
321/// during a crash window still surfaces through HNSW until the next
322/// recall — accepted as a tolerable looseness on the cold-fallback
323/// path.
324fn is_archived_source(mem: &Memory) -> bool {
325    mem.metadata
326        .get(field_names::ATOMISATION_ARCHIVED_AT)
327        .is_some_and(|v| !v.is_null())
328}
329
330fn visibility_clause(start: usize, table_alias: &str) -> String {
331    let private_ph = start;
332    let team_ph = start + 1;
333    let unit_ph = start + 2;
334    let org_ph = start + 3;
335    let ta = table_alias;
336    format!(
337        "AND (\
338            ?{private_ph} IS NULL \
339            OR {ta}.scope_idx = 'collective' \
340            OR ({ta}.scope_idx = 'private' AND {ta}.namespace = ?{private_ph}) \
341            OR ({ta}.scope_idx = 'team' AND ?{team_ph} IS NOT NULL AND ({ta}.namespace = ?{team_ph} OR {ta}.namespace LIKE replace(replace(?{team_ph}, '%', '\\%'), '_', '\\_') || '/%' ESCAPE '\\')) \
342            OR ({ta}.scope_idx = 'unit' AND ?{unit_ph} IS NOT NULL AND ({ta}.namespace = ?{unit_ph} OR {ta}.namespace LIKE replace(replace(?{unit_ph}, '%', '\\%'), '_', '\\_') || '/%' ESCAPE '\\')) \
343            OR ({ta}.scope_idx = 'org'  AND ?{org_ph}  IS NOT NULL AND ({ta}.namespace = ?{org_ph}  OR {ta}.namespace LIKE replace(replace(?{org_ph}, '%', '\\%'), '_', '\\_') || '/%' ESCAPE '\\'))\
344        )"
345    )
346}
347
348/// v0.7.0 Form 4 / Cluster-A PERF-3 — escape SQL `LIKE` metacharacters
349/// (`%`, `_`, `\`) in a user-supplied substring so the substring matches
350/// literally when paired with `LIKE ... ESCAPE '\\'`. Used by the
351/// `source_uri LIKE 'prefix%'` filter in [`recall`] and
352/// [`recall_hybrid_with_telemetry`] to push the `--source-uri-prefix`
353/// filter into SQL.
354fn escape_like_pattern(s: &str) -> String {
355    let mut out = String::with_capacity(s.len());
356    for ch in s.chars() {
357        match ch {
358            '\\' | '%' | '_' => {
359                out.push('\\');
360                out.push(ch);
361            }
362            _ => out.push(ch),
363        }
364    }
365    out
366}
367
368// v0.7.0 L0.5-3 — flat `src/db.rs` decomposed into `src/storage/`.
369// Sub-modules stay private to this module per the L0.5-1 pattern;
370// only the re-exports below form the public surface. The
371// `pub use storage as db;` shim in `src/lib.rs` preserves the
372// historical `crate::db::*` paths used elsewhere.
373pub(crate) mod connection;
374// `pub` (rather than `pub(crate)`) so the V-4 closeout
375// integration test suite (`tests/signed_events_chain_v34.rs`) can
376// invoke `migrate_v34_backfill_chain` directly to exercise the
377// idempotent-replay property without going through a full daemon
378// boot cycle.
379pub mod migration_meta;
380pub mod migrations;
381pub(crate) mod reflect;
382
383// Re-exports — every `pub` item that previously lived in `src/db.rs`
384// is re-published at `crate::storage::*` (and therefore `crate::db::*`
385// via the lib.rs shim) so callsites keep resolving without churn.
386pub use connection::open;
387// #1579 B7 — mmap_size knob. `set_db_mmap_size` is the boot-time
388// seeding hook (`daemon_runtime::run`); the DEFAULT const is the
389// compiled fallback the `AppConfig::resolve_storage()` ladder bottoms
390// out on (also consumed by the config-precedence tests).
391pub use connection::{DEFAULT_DB_MMAP_SIZE_BYTES, set_db_mmap_size};
392// v0.7.0 refactor PR-1 (#793) — schema-pins SSOT. Re-export the
393// test-facing helper so callers can use either
394// `ai_memory::storage::current_schema_version_for_tests()` or the
395// existing `ai_memory::db::current_schema_version_for_tests()` shim
396// (via `pub use storage as db;` in `src/lib.rs`).
397pub use migrations::current_schema_version_for_tests;
398// Pre-migration safety-snapshot infix accessor — lets coverage tests
399// locate / name-assert the snapshot file without restamping the literal.
400pub use migrations::pre_migration_backup_infix_for_tests;
401pub use reflect::{
402    ReflectError, ReflectHookDecision, ReflectHooks, ReflectInput, ReflectOutcome,
403    canonical_cbor_reflection_depth_exceeded, reflect, reflect_with_hooks,
404};
405// `emit_reflection_depth_exceeded_audit` is `pub(crate)` — preserve
406// the same visibility on the re-export so it remains reachable from
407// `crate::db::emit_reflection_depth_exceeded_audit` (the original
408// path) without widening the public surface. The current crate has
409// no external callers (the path is only used internally by
410// `reflect_with_hooks`); the re-export is retained for surface
411// parity with pre-L0.5-3.
412#[allow(unused_imports)]
413pub(crate) use reflect::emit_reflection_depth_exceeded_audit;
414
415pub(crate) fn row_to_memory(row: &rusqlite::Row) -> rusqlite::Result<Memory> {
416    let row_id: String = row.get("id")?;
417    let tags_json: String = row.get("tags")?;
418    let tags: Vec<String> = serde_json::from_str(&tags_json).unwrap_or_default();
419    let tier_str: String = row.get("tier")?;
420    let tier = Tier::from_str(&tier_str).unwrap_or(Tier::Mid);
421    let metadata_str: String = row
422        .get::<_, String>("metadata")
423        .unwrap_or_else(|_| "{}".to_string());
424    let metadata: serde_json::Value = serde_json::from_str(&metadata_str).unwrap_or_else(|e| {
425        tracing::warn!(
426            row_id = %row_id,
427            column = "metadata",
428            error = %e,
429            "corrupt metadata in DB row, defaulting to {{}}"
430        );
431        crate::metrics::record_corrupt_provenance("metadata");
432        serde_json::json!({})
433    });
434    // v0.7.0 Form 4 / Cluster-A COR-3 — citations JSON. Pre-fix used a
435    // bare `.ok()` chain that silently turned corrupt JSON into an empty
436    // vec with no operator signal. Now: log via `tracing::warn!` with the
437    // row id + column + parse error, bump the
438    // `corrupt_provenance_rows_total{column=...}` counter, then return
439    // the safe default.
440    let citations = match row.get::<_, String>("citations").ok() {
441        Some(s) => match serde_json::from_str::<Vec<crate::models::Citation>>(&s) {
442            Ok(v) => v,
443            Err(e) => {
444                tracing::warn!(
445                    row_id = %row_id,
446                    column = "citations",
447                    error = %e,
448                    "corrupt citations JSON in DB row, defaulting to []"
449                );
450                crate::metrics::record_corrupt_provenance("citations");
451                Vec::new()
452            }
453        },
454        None => Vec::new(),
455    };
456    let source_span: Option<SourceSpan> = row
457        .get::<_, Option<String>>(field_names::SOURCE_SPAN)
458        .unwrap_or(None)
459        .and_then(|s| match serde_json::from_str::<SourceSpan>(&s) {
460            Ok(span) => Some(span),
461            Err(e) => {
462                tracing::warn!(
463                    row_id = %row_id,
464                    column = field_names::SOURCE_SPAN,
465                    error = %e,
466                    "corrupt source_span JSON in DB row, defaulting to None"
467                );
468                crate::metrics::record_corrupt_provenance(field_names::SOURCE_SPAN);
469                None
470            }
471        });
472    let confidence_signals = row
473        .get::<_, Option<String>>(field_names::CONFIDENCE_SIGNALS)
474        .unwrap_or(None)
475        .and_then(
476            |s| match serde_json::from_str::<crate::models::ConfidenceSignals>(&s) {
477                Ok(v) => Some(v),
478                Err(e) => {
479                    tracing::warn!(
480                        row_id = %row_id,
481                        column = field_names::CONFIDENCE_SIGNALS,
482                        error = %e,
483                        "corrupt confidence_signals JSON in DB row, defaulting to None"
484                    );
485                    crate::metrics::record_corrupt_provenance(field_names::CONFIDENCE_SIGNALS);
486                    None
487                }
488            },
489        );
490    Ok(Memory {
491        id: row_id,
492        tier,
493        namespace: row.get("namespace")?,
494        title: row.get("title")?,
495        content: row.get("content")?,
496        tags,
497        priority: row.get("priority")?,
498        confidence: row.get(field_names::CONFIDENCE).unwrap_or(1.0),
499        source: row.get("source").unwrap_or_else(|_| "api".to_string()),
500        access_count: row.get(field_names::ACCESS_COUNT)?,
501        created_at: row.get(field_names::CREATED_AT)?,
502        updated_at: row.get(field_names::UPDATED_AT)?,
503        last_accessed_at: row.get(field_names::LAST_ACCESSED_AT)?,
504        expires_at: row.get(field_names::EXPIRES_AT)?,
505        metadata,
506        // v0.7.0 Task 1/8 — schema v29 column. `.unwrap_or(0)` keeps the
507        // reader tolerant of pre-v29 row reads (no panic if the migration
508        // ladder hasn't reached this DB yet) and is consistent with the
509        // SQL-side `DEFAULT 0`.
510        reflection_depth: row.get(field_names::REFLECTION_DEPTH).unwrap_or(0_i32),
511        // v0.7.0 L1-1 — schema v30 column. Falls back to `Observation` on
512        // pre-v30 rows (column absent) and on any unrecognised value from a
513        // future schema (forward-compat).
514        memory_kind: row
515            .get::<_, String>(field_names::MEMORY_KIND)
516            .ok()
517            .and_then(|s| crate::models::MemoryKind::from_str(&s))
518            .unwrap_or_default(),
519        // v0.7.0 QW-2 — Persona-as-artifact discriminator columns.
520        // Populated only for `memory_kind = 'persona'` rows. NULL on
521        // every observation/reflection row. Pre-v36 rows lack the
522        // column entirely — the `.ok()` fallthrough yields None.
523        entity_id: row.get::<_, Option<String>>("entity_id").unwrap_or(None),
524        persona_version: row
525            .get::<_, Option<i32>>(field_names::PERSONA_VERSION)
526            .unwrap_or(None),
527        // v0.7.0 Form 4 — schema v38 fact-provenance columns. `citations`
528        // / `source_span` corruption now logs WARN + bumps the
529        // `corrupt_provenance_rows_total` counter above so silent JSON
530        // drops surface in operator observability (Cluster-A COR-3 fix).
531        // `source_uri` is a plain TEXT column (NULL on legacy rows).
532        citations,
533        source_uri: row
534            .get::<_, Option<String>>(field_names::SOURCE_URI)
535            .unwrap_or(None),
536        source_span,
537        // v0.7.0 Form 5 — schema v39 columns. Legacy rows resolve
538        // to `CallerProvided` (SQL DEFAULT), NULL signals, NULL
539        // decayed_at. `.ok()` fallthrough keeps the reader tolerant
540        // of pre-v39 row reads (no panic when migrate hasn't fired
541        // yet).
542        confidence_source: row
543            .get::<_, String>(field_names::CONFIDENCE_SOURCE)
544            .ok()
545            .and_then(|s| crate::models::ConfidenceSource::from_str(&s))
546            .unwrap_or_default(),
547        confidence_signals,
548        confidence_decayed_at: row
549            .get::<_, Option<String>>(field_names::CONFIDENCE_DECAYED_AT)
550            .unwrap_or(None),
551        // v0.7.0 Provenance Gap 1 (#884) — schema v45 optimistic-
552        // concurrency column. Pre-v45 rows lack the column entirely
553        // — the `.ok()` fallthrough yields the SQL DEFAULT 1 (same
554        // value a pre-v45 row would land at the moment the ALTER
555        // fires in the migrate ladder).
556        version: row.get::<_, i64>("version").unwrap_or(1),
557    })
558}
559
560/// v0.7.0 polish PERF-8 (issue #781) — extract the canonical
561/// `mentioned_entity_id` from a memory at write time.
562///
563/// The auto-persona matcher (`hooks::post_reflect::auto_persona`) and
564/// the persona source-pool loader (`persona::load_reflections_for_entity`)
565/// previously scanned `(title|content|metadata) LIKE '%<entity>%'` to
566/// find candidate reflections — a full-table scan against three TEXT
567/// columns for every reflection in the namespace. PERF-8 denormalises
568/// the entity descriptor onto a dedicated indexed column so the matcher
569/// resolves with `WHERE mentioned_entity_id = ?` instead.
570///
571/// Resolution order mirrors the runtime extractor in
572/// `auto_persona::resolve_entity_id`:
573///
574/// 1. `metadata.entity_id` (the structured tag the curator + most
575///    operators supply when minting a reflection about a known entity).
576/// 2. `[entity:X]` marker in the title (operator-supplied fallback
577///    when no structured tag exists yet).
578///
579/// Returns `None` when neither yields a non-empty string — the row
580/// stays NULL on the column and contributes zero index pages (matches
581/// the partial index predicate `WHERE mentioned_entity_id IS NOT NULL`).
582///
583/// Restricted to `memory_kind = 'reflection'` rows: the matcher only
584/// scans reflections, so populating the column on observations would
585/// inflate the index footprint without speeding any query. (Persona
586/// rows already use the orthogonal QW-2 `entity_id` column for their
587/// own attribution.)
588pub(crate) fn extract_mentioned_entity_id(mem: &Memory) -> Option<String> {
589    if mem.memory_kind != MemoryKind::Reflection {
590        return None;
591    }
592    // Step 1: structured metadata.entity_id tag.
593    if let Some(eid) = mem
594        .metadata
595        .get("entity_id")
596        .and_then(|v| v.as_str())
597        .map(str::trim)
598        .filter(|s| !s.is_empty())
599    {
600        return Some(eid.to_string());
601    }
602    // Step 2: `[entity:X]` title marker. Mirrors the runtime extractor
603    // in `auto_persona::resolve_entity_id` so cadence accounting and
604    // matcher selection agree on the same descriptor for a given row.
605    if let Some(start) = mem.title.find("[entity:") {
606        let rest = &mem.title[start + "[entity:".len()..];
607        if let Some(end) = rest.find(']') {
608            let extracted = rest[..end].trim();
609            if !extracted.is_empty() {
610                return Some(extracted.to_string());
611            }
612        }
613    }
614    None
615}
616
617/// Insert with upsert on title+namespace. Returns the ID (existing or new).
618///
619/// Ultrareview #352: collapses the previous `INSERT`/`ON CONFLICT` +
620/// separate `SELECT` into a single `INSERT ... RETURNING id`. Another
621/// concurrent writer could otherwise slot in between the two statements
622/// and the `SELECT` would return the wrong row id. `SQLite` 3.35+
623/// supports `RETURNING`; it executes atomically within the `INSERT`.
624pub fn insert(conn: &Connection, mem: &Memory) -> Result<String> {
625    // v0.7.0 L1-6 Deliverable E — substrate governance pre-write
626    // gate. Consults the (optional) `GOVERNANCE_PRE_WRITE` hook
627    // BEFORE any SQL touches the DB; a refusal returns cleanly with
628    // no row written. See module-level comment for layering details.
629    consult_governance_pre_write(mem)?;
630
631    let tags_json = serde_json::to_string(&mem.tags)?;
632    let metadata_json = serde_json::to_string(&mem.metadata)?;
633    // v0.7.0 Form 4 — encode citations/source_span to JSON for the
634    // schema v38 TEXT columns. citations always lands as a JSON array
635    // (default `[]` when caller supplied nothing); source_span lands as
636    // `{start,end}` or NULL.
637    let citations_json = serde_json::to_string(&mem.citations)?;
638    let source_span_json = match mem.source_span {
639        Some(span) => Some(serde_json::to_string(&span)?),
640        None => None,
641    };
642    // v0.7.0 Form 5 — encode confidence-provenance fields for the
643    // schema v39 TEXT columns. The `confidence_source` column has a
644    // SQL DEFAULT of 'caller_provided' so legacy/default rows land
645    // there; `confidence_signals` is a JSON envelope (or NULL); and
646    // `confidence_decayed_at` is RFC3339 (or NULL).
647    let confidence_signals_json = match &mem.confidence_signals {
648        Some(s) => Some(serde_json::to_string(s)?),
649        None => None,
650    };
651    // v0.7.0 polish PERF-8 (#781) — denormalised `mentioned_entity_id`
652    // column, populated at write time from `metadata.entity_id` (or a
653    // `[entity:X]` title-marker fallback) on reflection rows. See
654    // `extract_mentioned_entity_id` for the resolution order.
655    let mentioned_entity_id = extract_mentioned_entity_id(mem);
656    // #1579 B6 — `insert` is the hottest write statement in the
657    // substrate (every store / upsert / capture-turn / federation push
658    // lands here). `prepare_cached` skips the re-parse of this ~60-line
659    // upsert on every call after the first.
660    let mut insert_stmt = conn.prepare_cached(
661        "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, confidence, source, access_count, created_at, updated_at, last_accessed_at, expires_at, metadata, reflection_depth, memory_kind, entity_id, persona_version, citations, source_uri, source_span, confidence_source, confidence_signals, confidence_decayed_at, mentioned_entity_id)
662         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19, ?20, ?21, ?22, ?23, ?24, ?25, ?26)
663         ON CONFLICT(title, namespace) DO UPDATE SET
664            content = excluded.content,
665            tags = excluded.tags,
666            priority = MAX(memories.priority, excluded.priority),
667            confidence = MAX(memories.confidence, excluded.confidence),
668            source = excluded.source,
669            tier = CASE WHEN excluded.tier = 'long' THEN 'long'
670                        WHEN memories.tier = 'long' THEN 'long'
671                        WHEN excluded.tier = 'mid' THEN 'mid'
672                        ELSE memories.tier END,
673            updated_at = excluded.updated_at,
674            expires_at = CASE WHEN excluded.tier = 'long' OR memories.tier = 'long' THEN NULL
675                              ELSE COALESCE(excluded.expires_at, memories.expires_at) END,
676            -- Preserve metadata.agent_id across upsert (NHI provenance is immutable).
677            metadata = CASE
678                WHEN json_extract(memories.metadata, '$.agent_id') IS NOT NULL
679                THEN json_set(
680                    excluded.metadata,
681                    '$.agent_id',
682                    json_extract(memories.metadata, '$.agent_id')
683                )
684                ELSE excluded.metadata
685            END,
686            -- v0.7.0 Task 1/8 — recursion depth takes the max across upsert
687            -- so a subsequent reflection at higher depth doesn't lose its
688            -- provenance signal when re-stored at the same (title, namespace).
689            reflection_depth = MAX(memories.reflection_depth, excluded.reflection_depth),
690            -- v0.7.0 L1-1 — kind is sticky: once Reflection, always Reflection.
691            -- An upsert of an observation onto an existing reflection row must
692            -- not downgrade the kind (reflect is not reversible by re-store).
693            -- v0.7.0 QW-2 — Persona is also sticky once set; the engine
694            -- writes new versions via fresh rows under a unique
695            -- `__persona_<entity>_v<n>` title rather than upsert.
696            memory_kind = CASE WHEN memories.memory_kind = 'reflection' THEN 'reflection'
697                               WHEN memories.memory_kind = 'persona' THEN 'persona'
698                               ELSE excluded.memory_kind END,
699            -- v0.7.0 QW-2 — entity_id + persona_version stay attached to
700            -- the row they were minted with (Persona-kind upserts use
701            -- versioned titles so the conflict path is exercised only
702            -- on accidental same-title collisions).
703            entity_id = COALESCE(memories.entity_id, excluded.entity_id),
704            persona_version = COALESCE(memories.persona_version, excluded.persona_version),
705            -- v0.7.0 Form 4 — fact-provenance: when the incoming row
706            -- carries a non-empty citations array, replace the stored
707            -- value (caller re-asserted provenance); otherwise keep
708            -- the existing value (silent merge would lose freshly-cited
709            -- evidence). source_uri / source_span follow COALESCE
710            -- semantics so a new write that omits them does not blank
711            -- out existing provenance pointers.
712            citations = CASE WHEN excluded.citations = '[]'
713                             THEN memories.citations
714                             ELSE excluded.citations END,
715            source_uri = COALESCE(excluded.source_uri, memories.source_uri),
716            source_span = COALESCE(excluded.source_span, memories.source_span),
717            -- v0.7.0 Form 5 — confidence-provenance follows the same
718            -- shape as Form 4 columns: explicit non-default replaces;
719            -- caller_provided + NULL signals keep the existing
720            -- provenance signal so a re-store doesn't blank out an
721            -- auto-derived or calibrated value.
722            confidence_source = CASE WHEN excluded.confidence_source != 'caller_provided'
723                                     THEN excluded.confidence_source
724                                     ELSE memories.confidence_source END,
725            confidence_signals = COALESCE(excluded.confidence_signals, memories.confidence_signals),
726            confidence_decayed_at = COALESCE(excluded.confidence_decayed_at, memories.confidence_decayed_at),
727            -- v0.7.0 polish PERF-8 (#781) — denormalised mention tag.
728            -- COALESCE keeps any pre-existing tag (re-write that
729            -- omits the structured entity_id metadata should NOT
730            -- blank out the indexed column) while letting a fresh
731            -- extraction populate previously-NULL rows.
732            mentioned_entity_id = COALESCE(excluded.mentioned_entity_id, memories.mentioned_entity_id),
733            -- #1632 — upsert-merge IS a mutation (content/tags/priority
734            -- can change), so the Gap-1 optimistic-concurrency counter
735            -- bumps here exactly like db::update. Pre-#1632 a re-store
736            -- rewrote content while version stood still, so a stale
737            -- If-Match could overwrite the merge invisibly. The decay
738            -- sweep remains the only documented non-bumping mutator
739            -- (tests/non_version_bumping_sites_1036.rs).
740            version = memories.version + 1
741         RETURNING id",
742    )?;
743    let actual_id: String = insert_stmt.query_row(
744        params![
745            mem.id,
746            mem.tier.as_str(),
747            mem.namespace,
748            mem.title,
749            mem.content,
750            tags_json,
751            mem.priority,
752            mem.confidence,
753            mem.source,
754            mem.access_count,
755            mem.created_at,
756            mem.updated_at,
757            mem.last_accessed_at,
758            mem.effective_expires_at(),
759            metadata_json,
760            mem.reflection_depth,
761            mem.memory_kind.as_str(),
762            mem.entity_id,
763            mem.persona_version,
764            citations_json,
765            mem.source_uri,
766            source_span_json,
767            mem.confidence_source.as_str(),
768            confidence_signals_json,
769            mem.confidence_decayed_at,
770            mentioned_entity_id,
771        ],
772        |r| r.get(0),
773    )?;
774    Ok(actual_id)
775}
776
777/// v0.7.0 fix campaign R1-M3 (#690) — substrate-side `on_conflict`
778/// policy for [`insert_with_conflict`].
779///
780/// Before this enum existed, every call into [`insert`] silently
781/// merged on `(title, namespace)` collision. The G6 work in v0.6.3.1
782/// closed the silent-merge gap at the MCP / HTTP **handler** layer
783/// (see `mcp::tools::store` and `handlers::http::create_link`), but
784/// substrate-internal writers — `storage::reflect`, the curator
785/// consolidation surface, and the federation `sync_push` link loop —
786/// kept calling [`insert`] directly and inheriting the silent-merge
787/// behaviour. R1-M3 surfaces the same three policies the handler
788/// layer already exposes on a typed enum so substrate callers can
789/// opt into the right semantics explicitly.
790///
791/// Policies:
792///
793/// * [`ConflictMode::Error`] — refuse the write when a `(title,
794///   namespace)` row already exists, returning a typed error. Used
795///   by `storage::reflect` so a duplicate reflection cannot silently
796///   replace an earlier one.
797///
798/// * [`ConflictMode::Merge`] — current silent-merge behaviour (the
799///   v0.6.3 default). [`insert`] continues to call into the merge
800///   path verbatim for backward compatibility.
801///
802/// * [`ConflictMode::Version`] — append a monotonic suffix to the
803///   title until a free `(title, namespace)` slot is found, then
804///   insert a new row. Mirrors the `on_conflict='version'` handler
805///   policy.
806#[derive(Debug, Clone, Copy, PartialEq, Eq)]
807pub enum ConflictMode {
808    /// Refuse the write with a typed `(title, namespace)` collision
809    /// error. The existing row is left untouched.
810    Error,
811    /// Silently merge on `(title, namespace)` collision (the legacy
812    /// v0.6.3 substrate default). The existing row's content / tags /
813    /// metadata.agent_id / reflection_depth are merged with the
814    /// incoming row per the SQL in [`insert`].
815    Merge,
816    /// Append `(2)`, `(3)`, … to the title until a free slot is found,
817    /// then insert a new row. Both old and new rows persist.
818    Version,
819}
820
821/// Typed error returned by [`insert_with_conflict`] under
822/// [`ConflictMode::Error`] when a `(title, namespace)` row already
823/// exists. Carries the existing row's id so callers can surface a
824/// well-shaped diagnostic instead of leaking a generic SQL string.
825#[derive(Debug)]
826pub struct ConflictError {
827    pub existing_id: String,
828    pub title: String,
829    pub namespace: String,
830}
831
832impl std::fmt::Display for ConflictError {
833    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
834        write!(
835            f,
836            "CONFLICT: memory with title '{}' already exists in namespace '{}' \
837             (existing id: {})",
838            self.title, self.namespace, self.existing_id
839        )
840    }
841}
842
843impl std::error::Error for ConflictError {}
844
845/// v0.7.0 #1416 / RFC-0001 — sqlite SSOT for the L4 layered-capture
846/// idempotent write. Both the MCP `memory_capture_turn` handler (which
847/// holds a raw `&rusqlite::Connection`) and `SqliteStore::
848/// capture_turn_idempotent` (the SAL trait surface) call through here,
849/// so the dedup-lookup + atomic three-row insert exists in exactly one
850/// place on the sqlite path.
851///
852/// Mirrors the original inline handler transaction verbatim:
853/// 1. dedup SELECT on `(host_session_id, host_turn_index)` (the
854///    `IS NOT NULL` predicate pins the partial index from schema v52).
855/// 2. On hit → return the existing id with `dedup_hit: true`, no write.
856/// 3. On miss → `BEGIN IMMEDIATE` → `insert` (merge upsert) →
857///    `transcript_line_dedup` INSERT → `signed_events` chain row →
858///    COMMIT; any failure rolls all three rows back atomically.
859///
860/// # Errors
861///
862/// String-stable codes per the MCP error convention: `DEDUP_QUERY_FAILED`,
863/// `TX_BEGIN_FAILED`, `MEMORY_INSERT_FAILED`, `DEDUP_INSERT_FAILED`,
864/// `SIGNED_EVENTS_APPEND_FAILED`, `TX_COMMIT_FAILED`.
865pub fn capture_turn_idempotent(
866    conn: &Connection,
867    write: &crate::models::CaptureTurnWrite,
868) -> std::result::Result<crate::models::CaptureTurnResult, String> {
869    use rusqlite::OptionalExtension;
870
871    // #1579 B6 — the dedup probe fires on EVERY captured turn before
872    // any write; `prepare_cached` keeps the per-turn cost at bind+step.
873    let existing: Option<String> = conn
874        .prepare_cached(
875            "SELECT memory_id FROM transcript_line_dedup \
876             WHERE host_session_id IS NOT NULL \
877               AND host_session_id = ?1 \
878               AND host_turn_index = ?2",
879        )
880        .and_then(|mut stmt| {
881            stmt.query_row(
882                params![&write.host_session_id, write.host_turn_index],
883                |row| row.get(0),
884            )
885            .optional()
886        })
887        .map_err(|e| format!("DEDUP_QUERY_FAILED: {e}"))?;
888
889    if let Some(memory_id) = existing {
890        return Ok(crate::models::CaptureTurnResult {
891            memory_id,
892            dedup_hit: true,
893        });
894    }
895
896    conn.execute_batch(connection::SQL_BEGIN_IMMEDIATE)
897        .map_err(|e| format!("TX_BEGIN_FAILED: {e}"))?;
898
899    let tx_result = (|| -> std::result::Result<String, String> {
900        let inserted_id =
901            insert(conn, &write.memory).map_err(|e| format!("MEMORY_INSERT_FAILED: {e}"))?;
902
903        conn.prepare_cached(
904            "INSERT INTO transcript_line_dedup \
905             (sha256, memory_id, host_kind, transcript_path, \
906              host_session_id, host_turn_index, recovered_at) \
907             VALUES (?1, ?2, ?3, NULL, ?4, ?5, ?6)",
908        )
909        .and_then(|mut stmt| {
910            stmt.execute(params![
911                write.sha256,
912                inserted_id,
913                write.host_kind,
914                write.host_session_id,
915                write.host_turn_index,
916                write.recovered_at_ms,
917            ])
918        })
919        .map_err(|e| format!("DEDUP_INSERT_FAILED: {e}"))?;
920
921        crate::signed_events::append_signed_event_no_tx(conn, &write.signed_event)
922            .map_err(|e| format!("SIGNED_EVENTS_APPEND_FAILED: {e}"))?;
923
924        Ok(inserted_id)
925    })();
926
927    match tx_result {
928        Ok(memory_id) => {
929            conn.execute_batch(connection::SQL_COMMIT)
930                .map_err(|e| format!("TX_COMMIT_FAILED: {e}"))?;
931            Ok(crate::models::CaptureTurnResult {
932                memory_id,
933                dedup_hit: false,
934            })
935        }
936        Err(e) => {
937            let _ = conn.execute_batch(connection::SQL_ROLLBACK);
938            Err(e)
939        }
940    }
941}
942
943/// v0.7.0 fix campaign R1-M3 (#690) — insert a memory under an
944/// explicit [`ConflictMode`].
945///
946/// This is the substrate primitive every direct-DB writer that cares
947/// about collision semantics should reach for. Callers that want the
948/// legacy silent-merge behaviour (most of the existing surface) keep
949/// calling [`insert`] — it is now thin glue around
950/// `insert_with_conflict(.., ConflictMode::Merge)` so backward compat
951/// is preserved without invasive churn.
952///
953/// # Errors
954///
955/// * Bubbles up rusqlite errors from the underlying INSERT.
956/// * Under [`ConflictMode::Error`], returns a typed [`ConflictError`]
957///   when `(mem.title, mem.namespace)` already exists. The existing
958///   row is left untouched.
959/// * Under [`ConflictMode::Version`], returns an error when no free
960///   `title (N)` slot is found within the safety cap (see
961///   [`next_versioned_title`]).
962pub fn insert_with_conflict(conn: &Connection, mem: &Memory, mode: ConflictMode) -> Result<String> {
963    match mode {
964        ConflictMode::Merge => insert(conn, mem),
965        ConflictMode::Error => {
966            // v0.7.0 L1-6 Deliverable E — fire the pre-write governance
967            // hook BEFORE the existence-check `SELECT`. The Merge and
968            // Version branches reach the hook via the `insert(..)`
969            // tail call below; the `Error` branch needs its own gate
970            // because it bypasses `insert` to issue the unannotated
971            // INSERT itself. Refusal here returns no row written and
972            // no SELECT performed — symmetric with the Merge path.
973            consult_governance_pre_write(mem)?;
974            // Existence check + INSERT must be atomic against
975            // concurrent writers. We rely on the (title, namespace)
976            // UNIQUE index — issue a plain INSERT WITHOUT the upsert
977            // tail, let SQLite enforce the constraint, and translate
978            // the constraint violation into a typed error.
979            //
980            // The SELECT before INSERT is intentionally kept as an
981            // up-front read so the typed error message can carry the
982            // existing row's id. Two queries open a TOCTOU window
983            // (another writer slots in between SELECT and INSERT and
984            // we return Error pointing at the *wrong* existing id) —
985            // but the constraint violation on the subsequent INSERT
986            // still fires loud, and the caller's retry sees the new
987            // state. Reading the id is best-effort context for the
988            // diagnostic.
989            if let Some(existing_id) = find_by_title_namespace(conn, &mem.title, &mem.namespace)? {
990                return Err(ConflictError {
991                    existing_id,
992                    title: mem.title.clone(),
993                    namespace: mem.namespace.clone(),
994                }
995                .into());
996            }
997            let tags_json = serde_json::to_string(&mem.tags)?;
998            let metadata_json = serde_json::to_string(&mem.metadata)?;
999            // v0.7.0 Form 4 — encode citations + source_span for the
1000            // schema v38 TEXT columns. Mirrors the encode in
1001            // `insert(...)` above; the ConflictMode::Error path lands
1002            // here on the first-write happy path and must persist the
1003            // provenance columns the caller supplied.
1004            let citations_json = serde_json::to_string(&mem.citations)?;
1005            let source_span_json = match mem.source_span {
1006                Some(span) => Some(serde_json::to_string(&span)?),
1007                None => None,
1008            };
1009            // v0.7.0 Form 5 — encode confidence-provenance fields for
1010            // the schema v39 TEXT columns. Mirrors the encode in
1011            // `insert(...)` above.
1012            let confidence_signals_json = match &mem.confidence_signals {
1013                Some(s) => Some(serde_json::to_string(s)?),
1014                None => None,
1015            };
1016            // v0.7.0 polish PERF-8 (#781) — same denormalised mention
1017            // tag wired here so the ConflictMode::Error path (used by
1018            // `storage::reflect`) populates the indexed column on the
1019            // first-write happy path; otherwise the auto-persona matcher
1020            // would miss every reflection minted via reflect.
1021            let mentioned_entity_id = extract_mentioned_entity_id(mem);
1022            // v0.7.0 L1-1 wave merge — include the `memory_kind` column.
1023            // This INSERT path was added by the fix-campaign R1-M3
1024            // (ConflictMode::Error refuses duplicates) and originally
1025            // omitted the new L1-1 column because L1-1 was authored
1026            // against the pre-fix-campaign storage layer. Without
1027            // memory_kind here, a `db::reflect` call (which uses
1028            // `insert_with_conflict(.., ConflictMode::Error)`) loses
1029            // its `MemoryKind::Reflection` typing and the stored row
1030            // falls back to the column DEFAULT 'observation'.
1031            let actual_id: String = conn.query_row(
1032                "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, confidence, source, access_count, created_at, updated_at, last_accessed_at, expires_at, metadata, reflection_depth, memory_kind, entity_id, persona_version, citations, source_uri, source_span, confidence_source, confidence_signals, confidence_decayed_at, mentioned_entity_id)
1033                 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19, ?20, ?21, ?22, ?23, ?24, ?25, ?26)
1034                 RETURNING id",
1035                params![
1036                    mem.id, mem.tier.as_str(), mem.namespace, mem.title, mem.content,
1037                    tags_json, mem.priority, mem.confidence, mem.source, mem.access_count,
1038                    mem.created_at, mem.updated_at, mem.last_accessed_at, mem.effective_expires_at(),
1039                    metadata_json, mem.reflection_depth, mem.memory_kind.as_str(),
1040                    mem.entity_id, mem.persona_version,
1041                    citations_json, mem.source_uri, source_span_json,
1042                    mem.confidence_source.as_str(), confidence_signals_json, mem.confidence_decayed_at,
1043                    mentioned_entity_id,
1044                ],
1045                |r| r.get(0),
1046            ).map_err(|e| {
1047                // Translate a UNIQUE constraint violation that
1048                // raced past the SELECT into the typed error so
1049                // callers see the same shape on TOCTOU as on the
1050                // happy path.
1051                let msg = e.to_string();
1052                if msg.contains("UNIQUE constraint failed") {
1053                    anyhow::Error::new(ConflictError {
1054                        existing_id: String::new(),
1055                        title: mem.title.clone(),
1056                        namespace: mem.namespace.clone(),
1057                    })
1058                } else {
1059                    e.into()
1060                }
1061            })?;
1062            Ok(actual_id)
1063        }
1064        ConflictMode::Version => {
1065            let resolved_title = next_versioned_title(conn, &mem.title, &mem.namespace)?;
1066            let mut versioned = mem.clone();
1067            versioned.title = resolved_title;
1068            // The chosen title is fresh — fall into the plain insert
1069            // path (which still calls into the upsert SQL, but the
1070            // upsert branch is unreachable for a fresh title).
1071            insert(conn, &versioned)
1072        }
1073    }
1074}
1075
1076pub fn get(conn: &Connection, id: &str) -> Result<Option<Memory>> {
1077    let mut stmt = conn.prepare_cached(SQL_SELECT_MEMORY_ROW_BY_ID)?;
1078    let mut rows = stmt.query_map(params![id], row_to_memory)?;
1079    match rows.next() {
1080        Some(Ok(m)) => Ok(Some(m)),
1081        Some(Err(e)) => Err(e.into()),
1082        None => Ok(None),
1083    }
1084}
1085
1086/// Batch-fetch memories by ID. Mirrors [`get`] but issues a single
1087/// `WHERE id IN (?, ?, ...)` SELECT instead of N per-id round-trips.
1088///
1089/// v0.7.0 #981 — used by the HNSW [`semantic_phase`] recall branch
1090/// where ANN-hit batches of 50–250 IDs need to materialise as
1091/// `Memory` rows; the per-id `get` loop was 5–10× slower on a warm
1092/// cache and extended the DB-mutex hold (which compounds the
1093/// single-connection serialization the daemon ships with on sqlite).
1094///
1095/// Returns a `HashMap<String, Memory>` keyed by id so the caller can
1096/// re-apply the original hit ordering via the HNSW hit list.
1097///
1098/// Chunks ids into batches of 500 to stay well under SQLite's default
1099/// `SQLITE_LIMIT_VARIABLE_NUMBER = 999` regardless of how the operator
1100/// has compiled their sqlite (Debian ships 999, Alpine ships 250000;
1101/// 500 is a safe middle ground that also keeps the prepared-statement
1102/// plan reusable across calls).
1103///
1104/// Empty `ids` short-circuits to an empty map without touching the
1105/// connection. Missing rows are silently skipped — the caller can
1106/// observe via `fetched.get(&id).is_none()` and fall through to
1107/// whatever default the original per-id path would have produced.
1108pub fn get_many(conn: &Connection, ids: &[String]) -> Result<HashMap<String, Memory>> {
1109    let mut out: HashMap<String, Memory> = HashMap::with_capacity(ids.len());
1110    if ids.is_empty() {
1111        return Ok(out);
1112    }
1113    const CHUNK: usize = 500;
1114    for chunk in ids.chunks(CHUNK) {
1115        let placeholders = std::iter::repeat("?")
1116            .take(chunk.len())
1117            .collect::<Vec<_>>()
1118            .join(",");
1119        let sql = format!("SELECT * FROM memories WHERE id IN ({placeholders})");
1120        let mut stmt = conn.prepare(&sql)?;
1121        let rows = stmt.query_map(rusqlite::params_from_iter(chunk.iter()), row_to_memory)?;
1122        for r in rows {
1123            let mem = r?;
1124            out.insert(mem.id.clone(), mem);
1125        }
1126    }
1127    Ok(out)
1128}
1129
1130/// Look up a memory by ID prefix. Returns the memory if exactly one match is found.
1131/// Returns `Ok(None)` if no matches. Returns an error if the prefix is ambiguous (>1 match).
1132pub fn get_by_prefix(conn: &Connection, prefix: &str) -> Result<Option<Memory>> {
1133    // Escape SQL LIKE wildcards in the prefix to prevent % and _ from matching broadly
1134    let escaped = prefix.replace('%', "\\%").replace('_', "\\_");
1135    let pattern = format!("{escaped}%");
1136    let mut stmt = conn.prepare("SELECT * FROM memories WHERE id LIKE ?1 ESCAPE '\\'")?;
1137    let rows: Vec<Memory> = stmt
1138        .query_map(params![pattern], row_to_memory)?
1139        .filter_map(Result::ok)
1140        .collect();
1141    match rows.len() {
1142        0 => Ok(None),
1143        1 => Ok(Some(rows.into_iter().next().expect("len checked"))),
1144        _ => {
1145            let ids: Vec<String> = rows.iter().map(|m| m.id.clone()).collect();
1146            // #962 — typed envelope; handler downcasts via
1147            // `MemoryError::from(anyhow::Error)` to map to 400 BAD_REQUEST.
1148            // The match-count is preserved in `candidates.len()` so the
1149            // Display format ("ambiguous ID prefix 'X': N matches\n…")
1150            // stays byte-identical to the legacy bail!() string.
1151            Err(anyhow::Error::new(StorageError::AmbiguousIdPrefix {
1152                prefix: prefix.to_string(),
1153                candidates: ids,
1154            }))
1155        }
1156    }
1157}
1158
1159/// Resolve an ID that may be a prefix. Tries exact match first, then prefix match.
1160pub fn resolve_id(conn: &Connection, id: &str) -> Result<Option<Memory>> {
1161    if let Some(mem) = get(conn, id)? {
1162        return Ok(Some(mem));
1163    }
1164    get_by_prefix(conn, id)
1165}
1166
1167/// Bump access count, extend TTL, auto-promote — atomic via transaction.
1168pub fn touch(conn: &Connection, id: &str, short_extend: i64, mid_extend: i64) -> Result<()> {
1169    let now = Utc::now();
1170    let now_str = now.to_rfc3339();
1171    let short_expires = (now + chrono::Duration::seconds(short_extend)).to_rfc3339();
1172    let mid_expires = (now + chrono::Duration::seconds(mid_extend)).to_rfc3339();
1173
1174    conn.execute_batch(connection::SQL_BEGIN_IMMEDIATE)?;
1175
1176    let result = (|| -> Result<()> {
1177        // #1596 — the per-access TTL window is an extension FLOOR, not a
1178        // replacement. `MAX(expires_at, ?N)` keeps whichever expiry is
1179        // later, so a fresh mid-tier row carrying its create-time +7d
1180        // backstop is no longer pulled IN to now+1d on first recall
1181        // (lived evidence: row 4c7e7cc1 went 2026-06-18 → 2026-06-12).
1182        // Both operands are UTC RFC3339 strings, so SQLite's scalar
1183        // MAX() lexicographic comparison is chronological. Long-tier
1184        // (NULL expiry) rows stay NULL via the first CASE arm.
1185        conn.execute(
1186            "UPDATE memories SET
1187                access_count = MIN(access_count + 1, 1000000),
1188                last_accessed_at = ?1,
1189                expires_at = CASE
1190                    WHEN tier = 'long' THEN expires_at
1191                    WHEN tier = 'short' AND expires_at IS NOT NULL THEN MAX(expires_at, ?2)
1192                    WHEN tier = 'mid' AND expires_at IS NOT NULL THEN MAX(expires_at, ?3)
1193                    ELSE expires_at
1194                END
1195             WHERE id = ?4",
1196            params![now_str, short_expires, mid_expires, id],
1197        )?;
1198
1199        conn.execute(
1200            "UPDATE memories SET tier = 'long', expires_at = NULL, updated_at = ?1
1201             WHERE id = ?2 AND tier = 'mid' AND access_count >= ?3",
1202            params![now_str, id, PROMOTION_THRESHOLD],
1203        )?;
1204
1205        conn.execute(
1206            "UPDATE memories SET priority = MIN(priority + 1, 10)
1207             WHERE id = ?1 AND access_count > 0 AND access_count % 10 = 0 AND priority < 10",
1208            params![id],
1209        )?;
1210
1211        Ok(())
1212    })();
1213
1214    match result {
1215        Ok(()) => {
1216            conn.execute_batch(connection::SQL_COMMIT)?;
1217            Ok(())
1218        }
1219        Err(e) => {
1220            if let Err(rb) = conn.execute_batch(connection::SQL_ROLLBACK) {
1221                tracing::error!("ROLLBACK failed in touch: {}", rb);
1222            }
1223            Err(e)
1224        }
1225    }
1226}
1227
1228/// Cluster-F PERF-6 — batched touch.
1229///
1230/// Equivalent to invoking [`touch`] K times in sequence, but
1231/// collapses the per-row `BEGIN IMMEDIATE` … `COMMIT` cycle into a
1232/// SINGLE outer transaction so a K-row recall pays the SQLite
1233/// write-lock + commit cost ONCE instead of K times. The three
1234/// per-row UPDATE statements still run (same semantics: access bump
1235/// + TTL extend, mid→long promotion at `PROMOTION_THRESHOLD`,
1236/// priority+1 every 10 accesses); only the transaction framing
1237/// changes.
1238///
1239/// A failure mid-batch rolls back the entire transaction (no partial
1240/// touches survive) and surfaces a single error to the caller — which
1241/// matches the existing behaviour where any failed touch surfaces
1242/// to the recall log path.
1243///
1244/// Returns the number of rows successfully touched (always equal to
1245/// `ids.len()` on success).
1246pub fn touch_many(
1247    conn: &Connection,
1248    ids: &[&str],
1249    short_extend: i64,
1250    mid_extend: i64,
1251) -> Result<usize> {
1252    if ids.is_empty() {
1253        return Ok(0);
1254    }
1255    let now = Utc::now();
1256    let now_str = now.to_rfc3339();
1257    let short_expires = (now + chrono::Duration::seconds(short_extend)).to_rfc3339();
1258    let mid_expires = (now + chrono::Duration::seconds(mid_extend)).to_rfc3339();
1259
1260    conn.execute_batch(connection::SQL_BEGIN_IMMEDIATE)?;
1261
1262    let result = (|| -> Result<()> {
1263        // Cache the three prepared statements once for the whole
1264        // batch; each `execute` reuses the cached query plan instead
1265        // of re-parsing per row.
1266        // #1596 — extension-floor semantics, mirroring [`touch`]: the
1267        // per-access window only ever EXTENDS expiry (MAX over the
1268        // existing column), never shortens it. One batched UPDATE per
1269        // row is preserved.
1270        let mut bump_stmt = conn.prepare_cached(
1271            "UPDATE memories SET
1272                access_count = MIN(access_count + 1, 1000000),
1273                last_accessed_at = ?1,
1274                expires_at = CASE
1275                    WHEN tier = 'long' THEN expires_at
1276                    WHEN tier = 'short' AND expires_at IS NOT NULL THEN MAX(expires_at, ?2)
1277                    WHEN tier = 'mid' AND expires_at IS NOT NULL THEN MAX(expires_at, ?3)
1278                    ELSE expires_at
1279                END
1280             WHERE id = ?4",
1281        )?;
1282        let mut promote_stmt = conn.prepare_cached(
1283            "UPDATE memories SET tier = 'long', expires_at = NULL, updated_at = ?1
1284             WHERE id = ?2 AND tier = 'mid' AND access_count >= ?3",
1285        )?;
1286        let mut priority_stmt = conn.prepare_cached(
1287            "UPDATE memories SET priority = MIN(priority + 1, 10)
1288             WHERE id = ?1 AND access_count > 0 AND access_count % 10 = 0 AND priority < 10",
1289        )?;
1290        for id in ids {
1291            bump_stmt.execute(params![now_str, short_expires, mid_expires, id])?;
1292            promote_stmt.execute(params![now_str, id, PROMOTION_THRESHOLD])?;
1293            priority_stmt.execute(params![id])?;
1294        }
1295        Ok(())
1296    })();
1297
1298    match result {
1299        Ok(()) => {
1300            conn.execute_batch(connection::SQL_COMMIT)?;
1301            Ok(ids.len())
1302        }
1303        Err(e) => {
1304            if let Err(rb) = conn.execute_batch(connection::SQL_ROLLBACK) {
1305                tracing::error!("ROLLBACK failed in touch_many: {}", rb);
1306            }
1307            Err(e)
1308        }
1309    }
1310}
1311
1312#[allow(clippy::too_many_arguments)]
1313/// Update a memory by ID. Returns (found, `content_changed`) so callers can
1314/// re-generate embeddings when the searchable text has changed.
1315/// v0.7.0 Provenance Gap 1 (issue #884) — typed optimistic-concurrency
1316/// error returned by [`update_with_expected_version`] when the caller
1317/// passed `expected_version` and the stored row's current `version`
1318/// has drifted. Carries both expected + current so the caller can
1319/// surface a useful diagnostic and choose between re-read+re-apply
1320/// or bubbling CONFLICT upstream.
1321#[derive(Debug, Clone)]
1322pub struct VersionConflict {
1323    pub id: String,
1324    pub expected: i64,
1325    pub current: i64,
1326}
1327
1328impl std::fmt::Display for VersionConflict {
1329    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1330        write!(
1331            f,
1332            "CONFLICT: memory {} expected_version={} but stored version={}",
1333            self.id, self.expected, self.current
1334        )
1335    }
1336}
1337
1338impl std::error::Error for VersionConflict {}
1339
1340#[allow(clippy::too_many_arguments)]
1341pub fn update(
1342    conn: &Connection,
1343    id: &str,
1344    title: Option<&str>,
1345    content: Option<&str>,
1346    tier: Option<&Tier>,
1347    namespace: Option<&str>,
1348    tags: Option<&Vec<String>>,
1349    priority: Option<i32>,
1350    confidence: Option<f64>,
1351    expires_at: Option<&str>,
1352    metadata: Option<&serde_json::Value>,
1353) -> Result<(bool, bool)> {
1354    update_with_expected_version(
1355        conn, id, title, content, tier, namespace, tags, priority, confidence, expires_at,
1356        metadata, None, None,
1357    )
1358}
1359
1360/// v0.7.0 Provenance Gap 1 (issue #884) — optimistic-concurrency aware
1361/// variant of [`update`]. When `expected_version` is `Some(v)`, the
1362/// update fails with a typed [`VersionConflict`] error if the stored
1363/// row's `version` is not equal to `v`. When `None`, the legacy
1364/// last-write-wins behaviour is preserved (still bumps `version` on
1365/// success). On a successful mutation the row's `version` is
1366/// monotonically incremented; the new value is observable on the
1367/// subsequent read.
1368///
1369/// # Errors
1370///
1371/// * [`VersionConflict`] — when `expected_version` is `Some` and the
1372///   stored value has drifted.
1373/// * Other rusqlite errors bubble up from the prepare/execute pair.
1374#[allow(clippy::too_many_arguments, clippy::too_many_lines)]
1375pub fn update_with_expected_version(
1376    conn: &Connection,
1377    id: &str,
1378    title: Option<&str>,
1379    content: Option<&str>,
1380    tier: Option<&Tier>,
1381    namespace: Option<&str>,
1382    tags: Option<&Vec<String>>,
1383    priority: Option<i32>,
1384    confidence: Option<f64>,
1385    expires_at: Option<&str>,
1386    metadata: Option<&serde_json::Value>,
1387    source_uri: Option<&str>,
1388    expected_version: Option<i64>,
1389) -> Result<(bool, bool)> {
1390    let mut stmt = conn.prepare_cached(SQL_SELECT_MEMORY_ROW_BY_ID)?;
1391    let mut rows = stmt.query_map(params![id], row_to_memory)?;
1392    let Some(Ok(existing)) = rows.next() else {
1393        return Ok((false, false));
1394    };
1395    drop(rows);
1396    drop(stmt);
1397
1398    // v0.7.0 Provenance Gap 1 (#884) — pre-check optimistic gate.
1399    // The same predicate is also asserted atomically inside the
1400    // UPDATE statement below so a racing writer that slipped in
1401    // between the SELECT and the UPDATE still fails CONFLICT.
1402    if let Some(expected) = expected_version
1403        && existing.version != expected
1404    {
1405        return Err(VersionConflict {
1406            id: existing.id.clone(),
1407            expected,
1408            current: existing.version,
1409        }
1410        .into());
1411    }
1412
1413    let new_title = title.unwrap_or(&existing.title);
1414    let new_content = content.unwrap_or(&existing.content);
1415    let content_changed = new_title != existing.title || new_content != existing.content;
1416
1417    // Tier downgrade protection: never downgrade, consistent with insert path.
1418    let effective_tier = match (tier, &existing.tier) {
1419        (Some(requested), existing_tier) => match (existing_tier, requested) {
1420            (Tier::Long, _) => &Tier::Long,         // long never downgrades
1421            (Tier::Mid, Tier::Short) => &Tier::Mid, // mid never downgrades to short
1422            (_, requested) => requested,            // upgrades and same-tier are fine
1423        },
1424        (None, existing_tier) => existing_tier,
1425    };
1426
1427    let namespace = namespace.unwrap_or(&existing.namespace);
1428    let tags = tags.unwrap_or(&existing.tags);
1429    let priority = priority.unwrap_or(existing.priority);
1430    let confidence = confidence.unwrap_or(existing.confidence);
1431    // Treat empty string as None (clear expiry) — don't store "" in the DB
1432    let expires_at = match expires_at {
1433        Some("" | "null") => None,
1434        Some(v) => Some(v),
1435        None => existing.expires_at.as_deref(),
1436    };
1437    let metadata = metadata.unwrap_or(&existing.metadata);
1438
1439    // #1451 (SEC, HIGH) — substrate governance pre-write gate on the
1440    // optimistic-update path. The insert/supersede/consolidate/restore
1441    // paths all consult GOVERNANCE_PRE_WRITE; update was the lone gap,
1442    // so a refuse rule could be evaded by storing benign content then
1443    // updating it into the refused namespace/tier/title. Build the
1444    // post-merge row and consult BEFORE any SQL touches the DB; a
1445    // refusal returns the typed GovernanceRefusal with no row mutated.
1446    let governed = Memory {
1447        tier: effective_tier.clone(),
1448        namespace: namespace.to_string(),
1449        title: new_title.to_string(),
1450        content: new_content.to_string(),
1451        tags: tags.clone(),
1452        priority,
1453        confidence,
1454        expires_at: expires_at.map(str::to_string),
1455        metadata: metadata.clone(),
1456        source_uri: source_uri
1457            .map(str::to_string)
1458            .or_else(|| existing.source_uri.clone()),
1459        ..existing.clone()
1460    };
1461    consult_governance_pre_write(&governed)?;
1462
1463    let tags_json = serde_json::to_string(tags)?;
1464    let metadata_json = serde_json::to_string(metadata)?;
1465    let now = Utc::now().to_rfc3339();
1466
1467    // Ultrareview #354: rely on the UNIQUE INDEX on (title, namespace)
1468    // to enforce collision atomically at the DB layer. The previous
1469    // check-then-update sequence had a race — another transaction
1470    // could insert a colliding row between the SELECT and the UPDATE,
1471    // and the UPDATE would surface as a generic SQLite constraint
1472    // error to the caller. Now the collision check is inline: the
1473    // UPDATE fails with a well-scoped UniqueViolation, and we re-
1474    // query the colliding row's id only on that specific error for
1475    // the friendly message.
1476    //
1477    // v0.7.0 Provenance Gap 1 (#884) — UPDATE re-asserts
1478    // `expected_version` atomically and bumps `version + 1` on
1479    // success so a racing caller that read the SAME expected_version
1480    // sees a CONFLICT (their WHERE clause no longer matches the
1481    // bumped value). When `expected_version` is NULL the
1482    // `?12 IS NULL` predicate short-circuits the gate.
1483    // v0.7.0 Provenance Gap 2 (#906) — `source_uri` is an opt-in patch
1484    // field. When `None`, the COALESCE keeps the stored value (a
1485    // patch that doesn't touch source_uri must NOT blank it out).
1486    // When `Some(uri)`, the row's source_uri is rewritten verbatim
1487    // (rename / scheme migration / bad-data correction).
1488    let update_res = conn.execute(
1489        "UPDATE memories SET tier=?1, namespace=?2, title=?3, content=?4, tags=?5, priority=?6, confidence=?7, updated_at=?8, expires_at=?9, metadata=?10, source_uri = COALESCE(?11, source_uri), version = version + 1
1490         WHERE id=?12 AND (?13 IS NULL OR version = ?13)",
1491        params![effective_tier.as_str(), namespace, new_title, new_content, tags_json, priority, confidence, now, expires_at, metadata_json, source_uri, id, expected_version],
1492    );
1493    match update_res {
1494        Ok(0) => {
1495            // Either the row vanished between SELECT and UPDATE, or
1496            // the version drifted (racing writer slipped in). When
1497            // expected_version was supplied, re-read so the CONFLICT
1498            // envelope carries the current stored value.
1499            if let Some(expected) = expected_version {
1500                let current_version: Option<i64> = conn
1501                    .query_row(
1502                        "SELECT version FROM memories WHERE id = ?1",
1503                        params![id],
1504                        |r| r.get(0),
1505                    )
1506                    .ok();
1507                if let Some(current) = current_version {
1508                    return Err(VersionConflict {
1509                        id: id.to_string(),
1510                        expected,
1511                        current,
1512                    }
1513                    .into());
1514                }
1515            }
1516            Ok((false, false))
1517        }
1518        Ok(_) => Ok((true, content_changed)),
1519        Err(rusqlite::Error::SqliteFailure(err, _))
1520            if err.code == rusqlite::ErrorCode::ConstraintViolation =>
1521        {
1522            let other: Option<String> = conn
1523                .query_row(
1524                    "SELECT id FROM memories WHERE title = ?1 AND namespace = ?2 AND id != ?3",
1525                    params![new_title, namespace, id],
1526                    |r| r.get(0),
1527                )
1528                .ok();
1529            if let Some(other_id) = other {
1530                // #962 typed envelope — UniqueConflict surfaces as
1531                // `MemoryError::Conflict` (HTTP 409).
1532                return Err(anyhow::Error::new(StorageError::UniqueConflict {
1533                    reason: format!(
1534                        "title '{new_title}' already exists in namespace '{namespace}' (memory {other_id})"
1535                    ),
1536                }));
1537            }
1538            Err(anyhow::anyhow!("update failed with constraint violation"))
1539        }
1540        Err(e) => Err(e.into()),
1541    }
1542}
1543
1544/// v0.7.0 Provenance Gap 5 (issue #888) — append-and-archive result
1545/// returned by [`update_with_archive_on_supersede`].
1546///
1547/// * `archived_id` is the OLD memory's id (now in
1548///   `archived_memories` with `archive_reason='superseded'`).
1549/// * `new_id` is the freshly-minted row carrying the patched
1550///   content. The supersede lineage is encoded via TWO mechanisms
1551///   (NOT three): (1) `archived_memories.archive_reason='superseded'`
1552///   on the OLD row, (2) `new_memory.metadata.superseded_id` forward
1553///   pointer on the NEW row. A `memory_links` `supersedes` edge is
1554///   NOT written because the FK `target_id REFERENCES memories(id)`
1555///   would reject it (the archived row no longer lives in the live
1556///   `memories` table). See #895 for the future archive-cross-ref
1557///   path that would unblock a uniform link surface.
1558#[derive(Debug, Clone)]
1559pub struct SupersedeResult {
1560    pub archived_id: String,
1561    pub new_id: String,
1562}
1563
1564/// v0.7.0 Provenance Gap 5 (issue #888) — append-and-archive write
1565/// path. Used by the MCP `memory_update` tool when the caller passes
1566/// `edit_source` of `llm` or `hook`. Atomic: every step runs inside
1567/// a `BEGIN IMMEDIATE` / `COMMIT` pair so a failure mid-way leaves
1568/// the old row live (no partial supersede).
1569///
1570/// Sequence (mirrors mem9's split-write-path pattern):
1571///
1572/// 1. Honor the optimistic-concurrency gate (`expected_version`)
1573///    against the OLD row. Conflict surfaces as
1574///    [`VersionConflict`] before any mutation lands.
1575/// 2. Archive the OLD row with `archive_reason='superseded'` and a
1576///    `superseded_at` timestamp in the archive metadata so a
1577///    rewind via `memory_archive_list` can find it.
1578/// 3. Insert a NEW memory row carrying the patched fields. The new
1579///    row's `(title, namespace)` may collide with the archived
1580///    row's (since the archive is in a separate table); the new
1581///    row's `id` is fresh.
1582/// 4. Stamp the supersede pointer in the new row's
1583///    `metadata.superseded_id`. A `memory_links` `supersedes` row
1584///    is intentionally NOT written — the FK target would point at
1585///    the archived id which has left the live `memories` table.
1586///    See impl comment + #895 for the archive-cross-ref follow-on.
1587///
1588/// # Errors
1589///
1590/// * [`VersionConflict`] — when `expected_version` is `Some` and
1591///   the stored row's `version` has drifted.
1592/// * rusqlite / serde errors bubble up from the underlying
1593///   archive + insert + link writes.
1594#[allow(clippy::too_many_arguments, clippy::too_many_lines)]
1595pub fn update_with_archive_on_supersede(
1596    conn: &Connection,
1597    id: &str,
1598    title: Option<&str>,
1599    content: Option<&str>,
1600    tier: Option<&Tier>,
1601    namespace: Option<&str>,
1602    tags: Option<&Vec<String>>,
1603    priority: Option<i32>,
1604    confidence: Option<f64>,
1605    expires_at: Option<&str>,
1606    metadata: Option<&serde_json::Value>,
1607    source_uri: Option<&str>,
1608    expected_version: Option<i64>,
1609    edit_source: crate::models::EditSource,
1610) -> Result<SupersedeResult> {
1611    // Read the existing row so we can compose the patched NEW row.
1612    let mut stmt = conn.prepare_cached(SQL_SELECT_MEMORY_ROW_BY_ID)?;
1613    let mut rows = stmt.query_map(params![id], row_to_memory)?;
1614    let Some(Ok(existing)) = rows.next() else {
1615        // #962 typed envelope — 404 NOT_FOUND through MemoryError mapping.
1616        return Err(anyhow::Error::new(StorageError::MemoryNotFound {
1617            id: id.to_string(),
1618            role: None,
1619        }));
1620    };
1621    drop(rows);
1622    drop(stmt);
1623
1624    // v0.7.0 Provenance Gap 1 (#884) — optimistic-concurrency gate.
1625    if let Some(expected) = expected_version
1626        && existing.version != expected
1627    {
1628        return Err(VersionConflict {
1629            id: existing.id.clone(),
1630            expected,
1631            current: existing.version,
1632        }
1633        .into());
1634    }
1635
1636    // Compose the NEW memory row by overlaying the patch on the
1637    // OLD row. Mirrors the in-place `update` patch semantics:
1638    // unspecified fields inherit from the existing row.
1639    let new_id = uuid::Uuid::new_v4().to_string();
1640    let now = Utc::now().to_rfc3339();
1641    let new_title = title.unwrap_or(&existing.title).to_string();
1642    let new_content = content.unwrap_or(&existing.content).to_string();
1643    // Tier monotonicity preserved (long ≥ mid ≥ short).
1644    let new_tier = match (tier, &existing.tier) {
1645        (Some(requested), existing_tier) => match (existing_tier, requested) {
1646            (Tier::Long, _) => Tier::Long,
1647            (Tier::Mid, Tier::Short) => Tier::Mid,
1648            (_, r) => r.clone(),
1649        },
1650        (None, existing_tier) => existing_tier.clone(),
1651    };
1652    let new_namespace = namespace.unwrap_or(&existing.namespace).to_string();
1653    let new_tags = tags.cloned().unwrap_or_else(|| existing.tags.clone());
1654    let new_priority = priority.unwrap_or(existing.priority);
1655    let new_confidence = confidence.unwrap_or(existing.confidence);
1656    let new_expires = match expires_at {
1657        Some("" | "null") => None,
1658        Some(v) => Some(v.to_string()),
1659        None => existing.expires_at.clone(),
1660    };
1661    // v0.7.0 Provenance Gap 2 (#906) — caller-supplied source_uri
1662    // wins; otherwise inherit from the OLD row. Mirrors the pattern
1663    // used for title/content/tier above.
1664    let new_source_uri = match source_uri {
1665        Some(uri) => Some(uri.to_string()),
1666        None => existing.source_uri.clone(),
1667    };
1668    // Stamp the edit-source provenance into the new row's metadata so
1669    // downstream observers can tell this row came from an
1670    // append-and-archive supersede vs. a direct user write.
1671    let mut new_metadata = metadata
1672        .cloned()
1673        .unwrap_or_else(|| existing.metadata.clone());
1674    if let serde_json::Value::Object(ref mut m) = new_metadata {
1675        m.insert(
1676            "edit_source".to_string(),
1677            serde_json::Value::String(edit_source.as_str().to_string()),
1678        );
1679        m.insert(
1680            field_names::SUPERSEDED_ID.to_string(),
1681            serde_json::Value::String(existing.id.clone()),
1682        );
1683    }
1684
1685    // #1638 — archive + insert run inside ONE BEGIN IMMEDIATE (below),
1686    // honoring the documented atomicity contract: a failure mid-way
1687    // (SQLITE_BUSY from a concurrent CLI-process writer, ENOSPC, FTS
1688    // trigger I/O error on the insert) rolls back the archive too, so
1689    // the OLD row stays live instead of vanishing into the archive
1690    // with an error returned. Uses `archive_memory_no_tx` (the
1691    // `append_signed_event_no_tx` idiom) because SQLite refuses
1692    // nested transactions.
1693    let archived_id = existing.id.clone();
1694
1695    // FX-C5 — compose the NEW row up front so the substrate
1696    // pre-write governance hook (`GOVERNANCE_PRE_WRITE`) gets a
1697    // chance to refuse BEFORE the archive step destroys the live
1698    // OLD row. Pre-FX-C5 the hook was consulted transitively via
1699    // `insert(..)` at the tail of this function; archive ran first
1700    // so a refusal left the live table without the OLD row AND
1701    // without the patched NEW row. Now the hook fires on a fully-
1702    // composed candidate before any state mutation, mirroring the
1703    // FX-2 pattern on the postgres adapter (see
1704    // `consult_governance_pre_write_pg` in `src/store/postgres.rs`).
1705    let mut new_mem = existing.clone();
1706    new_mem.id = new_id.clone();
1707    new_mem.title = new_title;
1708    new_mem.content = new_content;
1709    new_mem.tier = new_tier;
1710    new_mem.namespace = new_namespace;
1711    new_mem.tags = new_tags;
1712    new_mem.priority = new_priority;
1713    new_mem.confidence = new_confidence;
1714    new_mem.expires_at = new_expires;
1715    new_mem.metadata = new_metadata;
1716    new_mem.source_uri = new_source_uri;
1717    new_mem.created_at = now.clone();
1718    new_mem.updated_at = now.clone();
1719    new_mem.access_count = 0;
1720    new_mem.last_accessed_at = None;
1721    // The NEW row starts at version=1 — it is a fresh row, not a
1722    // continuation of the OLD row's version chain (the chain is
1723    // preserved via the supersede link stamped in metadata).
1724    new_mem.version = crate::models::default_memory_version();
1725
1726    // FX-C5 — consult the substrate governance pre-write hook on
1727    // the composed NEW row BEFORE archiving the OLD row. A refusal
1728    // returns cleanly with no state change.
1729    consult_governance_pre_write(&new_mem)?;
1730
1731    // Steps 1+2 (#1638): one transaction around archive + insert.
1732    conn.execute_batch(connection::SQL_BEGIN_IMMEDIATE)?;
1733    let tx_result = (|| -> Result<()> {
1734        // Step 1: archive the OLD row with reason='superseded'.
1735        let moved = archive_memory_no_tx(conn, &archived_id, Some("superseded"))?;
1736        if !moved {
1737            // #962 typed envelope — substrate-internal fault (DB row
1738            // vanished between read and write or row count drifted).
1739            // Maps to 500.
1740            return Err(anyhow::Error::new(StorageError::ArchiveSupersedeFailed {
1741                archived_id: archived_id.clone(),
1742            }));
1743        }
1744        // Step 2: insert the NEW row carrying the patched content.
1745        insert(conn, &new_mem)?;
1746        Ok(())
1747    })();
1748    match tx_result {
1749        Ok(()) => conn.execute_batch(connection::SQL_COMMIT)?,
1750        Err(e) => {
1751            let _ = conn.execute_batch(connection::SQL_ROLLBACK);
1752            return Err(e);
1753        }
1754    }
1755
1756    // Step 3: the supersede edge from new→archived id is preserved
1757    // in the new row's `metadata.superseded_id` (see above). A
1758    // proper `memory_links` row would trip the FK CHECK on
1759    // `target_id REFERENCES memories(id)` because the OLD row no
1760    // longer lives in `memories`; the metadata pointer is the
1761    // substrate-clean way to record the lineage until archive
1762    // cross-references land (tracked separately).
1763    Ok(SupersedeResult {
1764        archived_id,
1765        new_id,
1766    })
1767}
1768
1769pub fn delete(conn: &Connection, id: &str) -> Result<bool> {
1770    // Clean up namespace_meta if this memory was a namespace standard
1771    conn.execute(SQL_DELETE_NAMESPACE_META_BY_STANDARD_ID, params![id])?;
1772    let changed = conn.execute(SQL_DELETE_MEMORY_BY_ID, params![id])?;
1773    Ok(changed > 0)
1774}
1775
1776/// Move a memory from `memories` to `archived_memories`. Used by the
1777/// HTTP `/api/v1/archive` explicit-archive endpoint (S29) and by
1778/// `sync_push` when a peer pushes an `archives: [id]` record.
1779///
1780/// Unlike `gc(archive=true)` this does not filter on `expires_at` — the
1781/// caller is explicitly asking for the row to be archived right now.
1782///
1783/// Returns `true` if a row was moved, `false` if no live memory existed
1784/// with this id (e.g. it was already archived or never written locally).
1785/// A missing-on-peer id is expected during normal fanout and callers
1786/// treat it as a no-op.
1787///
1788/// # Errors
1789///
1790/// Returns an error if the INSERT-SELECT or DELETE fails.
1791pub fn archive_memory(conn: &Connection, id: &str, reason: Option<&str>) -> Result<bool> {
1792    conn.execute_batch(connection::SQL_BEGIN_IMMEDIATE)?;
1793    let result = archive_memory_no_tx(conn, id, reason);
1794    match result {
1795        Ok(moved) => {
1796            conn.execute_batch(connection::SQL_COMMIT)?;
1797            Ok(moved)
1798        }
1799        Err(e) => {
1800            let _ = conn.execute_batch(connection::SQL_ROLLBACK);
1801            Err(e)
1802        }
1803    }
1804}
1805
1806/// #1638 — transaction-free core of [`archive_memory`], for callers
1807/// that already hold an open transaction (the supersede path wraps
1808/// archive + insert in ONE `BEGIN IMMEDIATE` so a mid-failure leaves
1809/// the OLD row live, per the function's documented atomicity
1810/// contract). Same idiom as `append_signed_event_no_tx`.
1811pub(crate) fn archive_memory_no_tx(
1812    conn: &Connection,
1813    id: &str,
1814    reason: Option<&str>,
1815) -> Result<bool> {
1816    let now = Utc::now().to_rfc3339();
1817    let reason = reason.unwrap_or("archive");
1818    let result = (|| -> Result<bool> {
1819        let exists: bool = conn
1820            .query_row(SQL_MEMORY_EXISTS_COUNT, params![id], |r| r.get(0))
1821            .unwrap_or(false);
1822        if !exists {
1823            return Ok(false);
1824        }
1825        // v0.6.3.1 P2 (G5) — copy embedding + embedding_dim into the archive
1826        // and capture original tier + expires_at so restore_archived can
1827        // round-trip the row instead of resetting to long/permanent.
1828        conn.execute(
1829            "INSERT OR REPLACE INTO archived_memories
1830             (id, tier, namespace, title, content, tags, priority, confidence,
1831              source, access_count, created_at, updated_at, last_accessed_at,
1832              expires_at, archived_at, archive_reason, metadata,
1833              embedding, embedding_dim, original_tier, original_expires_at,
1834              reflection_depth, atomised_into, atom_of, memory_kind,
1835              entity_id, persona_version, citations, source_uri, source_span,
1836              confidence_source, confidence_signals, confidence_decayed_at,
1837              mentioned_entity_id, version)
1838             SELECT id, tier, namespace, title, content, tags, priority, confidence,
1839                    source, access_count, created_at, updated_at, last_accessed_at,
1840                    expires_at, ?1, ?2, metadata,
1841                    embedding, embedding_dim, tier, expires_at,
1842                    reflection_depth, atomised_into, atom_of, memory_kind,
1843                    entity_id, persona_version, citations, source_uri, source_span,
1844                    confidence_source, confidence_signals, confidence_decayed_at,
1845                    mentioned_entity_id, version
1846             FROM memories WHERE id = ?3",
1847            params![now, reason, id],
1848        )?;
1849        // Clean up namespace_meta — mirrors `delete`'s cleanup so an archived
1850        // row is not still referenced as the namespace standard.
1851        conn.execute(SQL_DELETE_NAMESPACE_META_BY_STANDARD_ID, params![id])?;
1852        let removed = conn.execute(SQL_DELETE_MEMORY_BY_ID, params![id])?;
1853        Ok(removed > 0)
1854    })();
1855    result
1856}
1857
1858/// #940 (security-high, 2026-05-20) — caller-scoped archive variant.
1859/// Mirrors [`archive_memory`] but constrains the soft-move to rows
1860/// in the live `memories` table whose `metadata->'agent_id'` JSON
1861/// field matches `caller` (with the inbox-target carve-out:
1862/// `metadata->'target_agent_id' == caller` is also archivable by
1863/// the inbox owner, matching
1864/// [`crate::store::is_visible_to_caller`]).
1865///
1866/// Pre-#940 the HTTP handler at
1867/// `src/handlers/archive.rs::archive_by_ids` (sqlite branch) called
1868/// the owner-blind [`archive_memory`] directly; any authenticated
1869/// HTTP caller could bulk-archive any other owner's live rows
1870/// (cross-tenant denial-of-service primitive). The postgres SAL
1871/// branch was already QC-P1-fixed (2026-05-20) to pass
1872/// `CallerContext::for_agent(caller)`; the sqlite branch is closed
1873/// by this helper. Returns `Ok(false)` on a non-owner attempt so
1874/// the surface cannot be used to probe other owners' live ids.
1875///
1876/// # Errors
1877///
1878/// Returns an error if the INSERT-SELECT or DELETE fails.
1879pub fn archive_memory_for_caller(
1880    conn: &Connection,
1881    id: &str,
1882    reason: Option<&str>,
1883    caller: &str,
1884) -> Result<bool> {
1885    let now = Utc::now().to_rfc3339();
1886    let reason = reason.unwrap_or("archive");
1887    conn.execute_batch(connection::SQL_BEGIN_IMMEDIATE)?;
1888    let result = (|| -> Result<bool> {
1889        // Owner gate: row must exist AND match the caller (or be an
1890        // inbox-target row whose recipient is the caller).
1891        let owned: bool = conn
1892            .query_row(
1893                "SELECT COUNT(*) > 0 FROM memories \
1894                 WHERE id = ?1 \
1895                   AND ( \
1896                     json_extract(metadata, '$.agent_id') = ?2 OR \
1897                     json_extract(metadata, '$.target_agent_id') = ?2 OR \
1898                     json_extract(metadata, '$.agent_id') IS NULL OR \
1899                     json_extract(metadata, '$.agent_id') = '' \
1900                   )",
1901                params![id, caller],
1902                |r| r.get(0),
1903            )
1904            .unwrap_or(false);
1905        if !owned {
1906            return Ok(false);
1907        }
1908        conn.execute(
1909            "INSERT OR REPLACE INTO archived_memories
1910             (id, tier, namespace, title, content, tags, priority, confidence,
1911              source, access_count, created_at, updated_at, last_accessed_at,
1912              expires_at, archived_at, archive_reason, metadata,
1913              embedding, embedding_dim, original_tier, original_expires_at,
1914              reflection_depth, atomised_into, atom_of, memory_kind,
1915              entity_id, persona_version, citations, source_uri, source_span,
1916              confidence_source, confidence_signals, confidence_decayed_at,
1917              mentioned_entity_id, version)
1918             SELECT id, tier, namespace, title, content, tags, priority, confidence,
1919                    source, access_count, created_at, updated_at, last_accessed_at,
1920                    expires_at, ?1, ?2, metadata,
1921                    embedding, embedding_dim, tier, expires_at,
1922                    reflection_depth, atomised_into, atom_of, memory_kind,
1923                    entity_id, persona_version, citations, source_uri, source_span,
1924                    confidence_source, confidence_signals, confidence_decayed_at,
1925                    mentioned_entity_id, version
1926             FROM memories WHERE id = ?3",
1927            params![now, reason, id],
1928        )?;
1929        // Clean up namespace_meta — mirrors `delete`'s cleanup so an archived
1930        // row is not still referenced as the namespace standard.
1931        conn.execute(SQL_DELETE_NAMESPACE_META_BY_STANDARD_ID, params![id])?;
1932        let removed = conn.execute(SQL_DELETE_MEMORY_BY_ID, params![id])?;
1933        Ok(removed > 0)
1934    })();
1935    match result {
1936        Ok(moved) => {
1937            conn.execute_batch(connection::SQL_COMMIT)?;
1938            Ok(moved)
1939        }
1940        Err(e) => {
1941            let _ = conn.execute_batch(connection::SQL_ROLLBACK);
1942            Err(e)
1943        }
1944    }
1945}
1946
1947/// #1601 — build the FTS5 query for the DESTRUCTIVE forget paths.
1948///
1949/// `forget` / `forget_count` historically routed the caller's pattern
1950/// through `sanitize_fts_query(pat, /* use_or = */ true)` — the fuzzy
1951/// OR join the recall path uses for high RANKED retrieval. For a bulk
1952/// DELETE that over-matches catastrophically: pattern "D6 scratch"
1953/// matched (and would delete) every row containing EITHER token, and
1954/// "D6 nonexistentzzzword" still matched rows containing just "D6".
1955/// Destructive matching must be conservative: every
1956/// whitespace-separated token must match (FTS5 implicit AND — the
1957/// sanitized phrase-quoted tokens are space-joined). All three forget
1958/// sites (`forget_count`, the `forget` delete arm, and the
1959/// archive-before-delete arm) route through this single builder so
1960/// their match sets can never drift apart.
1961fn forget_fts_query(pat: &str) -> String {
1962    sanitize_fts_query(pat, false)
1963}
1964
1965/// Count memories that would be deleted by forget (for `dry_run`).
1966pub fn forget_count(
1967    conn: &Connection,
1968    namespace: Option<&str>,
1969    pattern: Option<&str>,
1970    tier: Option<&Tier>,
1971) -> Result<usize> {
1972    if pattern.is_none() && namespace.is_none() && tier.is_none() {
1973        // #962 typed envelope — 400 BAD_REQUEST via ValidationFailed.
1974        return Err(anyhow::Error::new(StorageError::InvalidArgument {
1975            reason: crate::errors::msg::FORGET_FILTER_REQUIRED.to_string(),
1976        }));
1977    }
1978    if let Some(pat) = pattern {
1979        let fts_query = forget_fts_query(pat);
1980        let tier_str = tier.map(|t| t.as_str().to_string());
1981        let count: i64 = conn.query_row(
1982            "SELECT COUNT(*) FROM memories WHERE rowid IN (
1983                SELECT m.rowid FROM memories_fts fts
1984                JOIN memories m ON m.rowid = fts.rowid
1985                WHERE memories_fts MATCH ?1
1986                  AND (?2 IS NULL OR m.namespace = ?2)
1987                  AND (?3 IS NULL OR m.tier = ?3)
1988            )",
1989            params![fts_query, namespace, tier_str],
1990            |r| r.get(0),
1991        )?;
1992        return Ok(usize::try_from(count).unwrap_or(0));
1993    }
1994    let tier_str = tier.map(|t| t.as_str().to_string());
1995    let count: i64 = conn.query_row(
1996        "SELECT COUNT(*) FROM memories WHERE (?1 IS NULL OR namespace = ?1) AND (?2 IS NULL OR tier = ?2)",
1997        params![namespace, tier_str],
1998        |r| r.get(0),
1999    )?;
2000    Ok(usize::try_from(count).unwrap_or(0))
2001}
2002
2003/// Forget by pattern — delete memories matching namespace + FTS pattern + tier.
2004/// If `archive` is true, archives memories before deletion.
2005pub fn forget(
2006    conn: &Connection,
2007    namespace: Option<&str>,
2008    pattern: Option<&str>,
2009    tier: Option<&Tier>,
2010    archive: bool,
2011) -> Result<usize> {
2012    if pattern.is_none() && namespace.is_none() && tier.is_none() {
2013        // #962 typed envelope — 400 BAD_REQUEST via ValidationFailed.
2014        return Err(anyhow::Error::new(StorageError::InvalidArgument {
2015            reason: crate::errors::msg::FORGET_FILTER_REQUIRED.to_string(),
2016        }));
2017    }
2018
2019    if archive {
2020        // Archive matching memories before deletion
2021        let now = Utc::now().to_rfc3339();
2022        if let Some(pat) = pattern {
2023            let fts_query = forget_fts_query(pat);
2024            let tier_str = tier.map(|t| t.as_str().to_string());
2025            // v0.6.3.1 P2 (G5) — preserve embedding + tier + expiry on forget-archive.
2026            // v0.7.0 issue #861 — also project `metadata` into the
2027            // archive row. The pre-fix INSERT omitted both the column
2028            // and the SELECT expression, so the column defaulted to
2029            // `'{}'` and `memory_archive_list` returned an empty object
2030            // for every forget-archived row (silently stripping
2031            // `agent_id`, `imported_from_*`, and every other operator-
2032            // visible attribution key). Mirrors the gc + explicit-
2033            // archive paths that already preserve metadata.
2034            conn.execute(
2035                "INSERT OR REPLACE INTO archived_memories
2036                 (id, tier, namespace, title, content, tags, priority, confidence,
2037                  source, access_count, created_at, updated_at, last_accessed_at,
2038                  expires_at, archived_at, archive_reason, metadata,
2039                  embedding, embedding_dim, original_tier, original_expires_at,
2040                  reflection_depth, atomised_into, atom_of, memory_kind,
2041                  entity_id, persona_version, citations, source_uri, source_span,
2042                  confidence_source, confidence_signals, confidence_decayed_at,
2043                  mentioned_entity_id, version)
2044                 SELECT id, tier, namespace, title, content, tags, priority, confidence,
2045                        source, access_count, created_at, updated_at, last_accessed_at,
2046                        expires_at, ?4, 'forget', metadata,
2047                        embedding, embedding_dim, tier, expires_at,
2048                        reflection_depth, atomised_into, atom_of, memory_kind,
2049                        entity_id, persona_version, citations, source_uri, source_span,
2050                        confidence_source, confidence_signals, confidence_decayed_at,
2051                        mentioned_entity_id, version
2052                 FROM memories WHERE rowid IN (
2053                    SELECT m.rowid FROM memories_fts fts
2054                    JOIN memories m ON m.rowid = fts.rowid
2055                    WHERE memories_fts MATCH ?1
2056                      AND (?2 IS NULL OR m.namespace = ?2)
2057                      AND (?3 IS NULL OR m.tier = ?3)
2058                 )",
2059                params![fts_query, namespace, tier_str, now],
2060            )?;
2061        } else {
2062            let tier_str = tier.map(|t| t.as_str().to_string());
2063            // v0.7.0 issue #861 — same metadata-projection fix as the
2064            // patterned branch above. Forget without a pattern still
2065            // archives whole namespaces/tiers, so the same bug applied.
2066            conn.execute(
2067                "INSERT OR REPLACE INTO archived_memories
2068                 (id, tier, namespace, title, content, tags, priority, confidence,
2069                  source, access_count, created_at, updated_at, last_accessed_at,
2070                  expires_at, archived_at, archive_reason, metadata,
2071                  embedding, embedding_dim, original_tier, original_expires_at,
2072                  reflection_depth, atomised_into, atom_of, memory_kind,
2073                  entity_id, persona_version, citations, source_uri, source_span,
2074                  confidence_source, confidence_signals, confidence_decayed_at,
2075                  mentioned_entity_id, version)
2076                 SELECT id, tier, namespace, title, content, tags, priority, confidence,
2077                        source, access_count, created_at, updated_at, last_accessed_at,
2078                        expires_at, ?3, 'forget', metadata,
2079                        embedding, embedding_dim, tier, expires_at,
2080                        reflection_depth, atomised_into, atom_of, memory_kind,
2081                        entity_id, persona_version, citations, source_uri, source_span,
2082                        confidence_source, confidence_signals, confidence_decayed_at,
2083                        mentioned_entity_id, version
2084                 FROM memories WHERE (?1 IS NULL OR namespace = ?1) AND (?2 IS NULL OR tier = ?2)",
2085                params![namespace, tier_str, now],
2086            )?;
2087        }
2088    }
2089
2090    // If pattern provided, use FTS to find matching IDs
2091    if let Some(pat) = pattern {
2092        let fts_query = forget_fts_query(pat);
2093        let tier_str = tier.map(|t| t.as_str().to_string());
2094        let deleted = conn.execute(
2095            "DELETE FROM memories WHERE rowid IN (
2096                SELECT m.rowid FROM memories_fts fts
2097                JOIN memories m ON m.rowid = fts.rowid
2098                WHERE memories_fts MATCH ?1
2099                  AND (?2 IS NULL OR m.namespace = ?2)
2100                  AND (?3 IS NULL OR m.tier = ?3)
2101            )",
2102            params![fts_query, namespace, tier_str],
2103        )?;
2104        return Ok(deleted);
2105    }
2106
2107    let tier_str = tier.map(|t| t.as_str().to_string());
2108    let deleted = conn.execute(
2109        "DELETE FROM memories WHERE (?1 IS NULL OR namespace = ?1) AND (?2 IS NULL OR tier = ?2)",
2110        params![namespace, tier_str],
2111    )?;
2112    Ok(deleted)
2113}
2114
2115/// #1602 — one row of a forget preview / deletion audit listing.
2116#[derive(Debug, Clone, serde::Serialize)]
2117pub struct ForgetMatch {
2118    pub id: String,
2119    pub title: String,
2120    pub namespace: String,
2121    pub tier: String,
2122}
2123
2124/// #1602 — list the rows the forget filters currently match, capped
2125/// at `limit`.
2126///
2127/// `memory_forget {dry_run:true}` previously returned only a blind
2128/// `{would_delete: N}` count, so callers had no way to see WHAT a
2129/// destructive pattern was about to remove; the live run likewise
2130/// returned only a count, leaving recovery (archive restore) a
2131/// guessing game. This helper shares filter semantics with [`forget`]
2132/// / [`forget_count`] — including the #1601 AND pattern matching via
2133/// [`forget_fts_query`] — so the preview is exactly the set `forget`
2134/// would delete. Rows come back in stable `rowid` order; callers pass
2135/// `cap + 1` to detect truncation without a second COUNT query.
2136pub fn forget_matches(
2137    conn: &Connection,
2138    namespace: Option<&str>,
2139    pattern: Option<&str>,
2140    tier: Option<&Tier>,
2141    limit: usize,
2142) -> Result<Vec<ForgetMatch>> {
2143    if pattern.is_none() && namespace.is_none() && tier.is_none() {
2144        // #962 typed envelope — same refusal as `forget` / `forget_count`.
2145        return Err(anyhow::Error::new(StorageError::InvalidArgument {
2146            reason: crate::errors::msg::FORGET_FILTER_REQUIRED.to_string(),
2147        }));
2148    }
2149    let tier_str = tier.map(|t| t.as_str().to_string());
2150    let limit_i64 = i64::try_from(limit).unwrap_or(i64::MAX);
2151    let row_to_match = |row: &rusqlite::Row<'_>| -> rusqlite::Result<ForgetMatch> {
2152        Ok(ForgetMatch {
2153            id: row.get(0)?,
2154            title: row.get(1)?,
2155            namespace: row.get(2)?,
2156            tier: row.get(3)?,
2157        })
2158    };
2159    if let Some(pat) = pattern {
2160        let fts_query = forget_fts_query(pat);
2161        let mut stmt = conn.prepare(
2162            "SELECT m.id, m.title, m.namespace, m.tier
2163             FROM memories_fts fts
2164             JOIN memories m ON m.rowid = fts.rowid
2165             WHERE memories_fts MATCH ?1
2166               AND (?2 IS NULL OR m.namespace = ?2)
2167               AND (?3 IS NULL OR m.tier = ?3)
2168             ORDER BY m.rowid
2169             LIMIT ?4",
2170        )?;
2171        let rows = stmt
2172            .query_map(
2173                params![fts_query, namespace, tier_str, limit_i64],
2174                row_to_match,
2175            )?
2176            .collect::<rusqlite::Result<Vec<_>>>()?;
2177        return Ok(rows);
2178    }
2179    let mut stmt = conn.prepare(
2180        "SELECT id, title, namespace, tier FROM memories
2181         WHERE (?1 IS NULL OR namespace = ?1) AND (?2 IS NULL OR tier = ?2)
2182         ORDER BY rowid
2183         LIMIT ?3",
2184    )?;
2185    let rows = stmt
2186        .query_map(params![namespace, tier_str, limit_i64], row_to_match)?
2187        .collect::<rusqlite::Result<Vec<_>>>()?;
2188    Ok(rows)
2189}
2190
2191/// #1579 A2 — build the sargable `list` SQL + parameter vector.
2192///
2193/// The legacy single-shape query expressed every optional filter as a
2194/// `(?N IS NULL OR col = ?N)` arm. SQLite cannot drive such an arm
2195/// through an index (the predicate is not sargable), so the P1 perf
2196/// audit measured the 100k-row list page at ~141 ms: the plan answered
2197/// the expiry guard via `idx_memories_expires` and paid a USE TEMP
2198/// B-TREE FOR ORDER BY over the whole table. Appending each filter
2199/// ONLY when the caller supplied it gives the planner bare `col = ?` /
2200/// `col >= ?` predicates, so it walks `idx_memories_list_order
2201/// (priority DESC, updated_at DESC)` — or `idx_memories_ns_list_order
2202/// (namespace, priority DESC, updated_at DESC)` for namespace-filtered
2203/// shapes — in ORDER BY order with early-stop under the LIMIT
2204/// (~0.06 ms on the same corpus). EXPLAIN QUERY PLAN proof is pinned
2205/// by `tests/issue_1579_storage_perf.rs`.
2206///
2207/// The distinct shapes repeat across calls, so `list` prepares them
2208/// via `prepare_cached` — at most 2^7 shapes exist and real traffic
2209/// concentrates on a handful.
2210///
2211/// Public as the test-facing SSOT accessor for the EXPLAIN-pinning
2212/// regression tests (the `current_schema_version_for_tests` precedent):
2213/// the tests must plan the EXACT SQL production runs, not a restated
2214/// copy that could drift.
2215#[allow(clippy::too_many_arguments)]
2216#[must_use]
2217pub fn build_list_query(
2218    namespace: Option<&str>,
2219    tier: Option<&Tier>,
2220    min_priority: Option<i32>,
2221    now: &str,
2222    since: Option<&str>,
2223    until: Option<&str>,
2224    tags_filter: Option<&str>,
2225    agent_id: Option<&str>,
2226    limit: usize,
2227    offset: usize,
2228) -> (String, Vec<Box<dyn rusqlite::types::ToSql>>) {
2229    let mut sql = String::from(SQL_LIST_BASE);
2230    let mut params_vec: Vec<Box<dyn rusqlite::types::ToSql>> = vec![Box::new(now.to_string())];
2231    if let Some(ns) = namespace {
2232        sql.push_str(" AND namespace = ?");
2233        params_vec.push(Box::new(ns.to_string()));
2234    }
2235    if let Some(t) = tier {
2236        sql.push_str(" AND tier = ?");
2237        params_vec.push(Box::new(t.as_str().to_string()));
2238    }
2239    if let Some(p) = min_priority {
2240        sql.push_str(" AND priority >= ?");
2241        params_vec.push(Box::new(p));
2242    }
2243    if let Some(s) = since {
2244        sql.push_str(" AND created_at >= ?");
2245        params_vec.push(Box::new(s.to_string()));
2246    }
2247    if let Some(u) = until {
2248        sql.push_str(" AND created_at <= ?");
2249        params_vec.push(Box::new(u.to_string()));
2250    }
2251    if let Some(tag) = tags_filter {
2252        sql.push_str(
2253            " AND EXISTS (SELECT 1 FROM json_each(memories.tags) WHERE json_each.value = ?)",
2254        );
2255        params_vec.push(Box::new(tag.to_string()));
2256    }
2257    if let Some(a) = agent_id {
2258        sql.push_str(" AND agent_id_idx = ?");
2259        params_vec.push(Box::new(a.to_string()));
2260    }
2261    sql.push_str(SQL_LIST_ORDER_LIMIT);
2262    params_vec.push(Box::new(limit));
2263    params_vec.push(Box::new(offset));
2264    (sql, params_vec)
2265}
2266
2267#[allow(clippy::too_many_arguments)]
2268pub fn list(
2269    conn: &Connection,
2270    namespace: Option<&str>,
2271    tier: Option<&Tier>,
2272    limit: usize,
2273    offset: usize,
2274    min_priority: Option<i32>,
2275    since: Option<&str>,
2276    until: Option<&str>,
2277    tags_filter: Option<&str>,
2278    agent_id: Option<&str>,
2279) -> Result<Vec<Memory>> {
2280    let now = Utc::now().to_rfc3339();
2281    let (sql, params_vec) = build_list_query(
2282        namespace,
2283        tier,
2284        min_priority,
2285        &now,
2286        since,
2287        until,
2288        tags_filter,
2289        agent_id,
2290        limit,
2291        offset,
2292    );
2293    let params_refs: Vec<&dyn rusqlite::types::ToSql> =
2294        params_vec.iter().map(std::convert::AsRef::as_ref).collect();
2295    let mut stmt = conn.prepare_cached(&sql)?;
2296    let rows = stmt.query_map(params_refs.as_slice(), row_to_memory)?;
2297    rows.collect::<rusqlite::Result<Vec<_>>>()
2298        .map_err(Into::into)
2299}
2300
2301/// L1-1 (v0.7.0) — return all non-expired memories that match the given
2302/// [`crate::models::MemoryKind`]. Used by the L2-1 curator reflection pass to
2303/// enumerate observation-class memories as synthesis candidates.
2304///
2305/// The query is deliberately minimal: no tier filter, no priority floor, no
2306/// pagination. Callers that need subsetting should post-filter the returned
2307/// `Vec<Memory>`. The index on `memory_kind` (added in migration v30) keeps
2308/// this query O(kind-count) rather than O(table-size) on production data.
2309#[allow(dead_code)] // consumed by L2-1 curator; not yet wired in this PR
2310pub(crate) fn memories_by_kind(
2311    conn: &Connection,
2312    kind: &crate::models::MemoryKind,
2313) -> Result<Vec<Memory>> {
2314    let now = Utc::now().to_rfc3339();
2315    let mut stmt = conn.prepare(
2316        "SELECT * FROM memories
2317         WHERE memory_kind = ?1
2318           AND (expires_at IS NULL OR expires_at > ?2)
2319         ORDER BY priority DESC, updated_at DESC",
2320    )?;
2321    let rows = stmt.query_map(params![kind.as_str(), now], row_to_memory)?;
2322    rows.collect::<rusqlite::Result<Vec<_>>>()
2323        .map_err(Into::into)
2324}
2325
2326#[allow(clippy::too_many_arguments)]
2327pub fn search(
2328    conn: &Connection,
2329    query: &str,
2330    namespace: Option<&str>,
2331    tier: Option<&Tier>,
2332    limit: usize,
2333    min_priority: Option<i32>,
2334    since: Option<&str>,
2335    until: Option<&str>,
2336    tags_filter: Option<&str>,
2337    agent_id: Option<&str>,
2338    as_agent: Option<&str>,
2339    // v0.7.0 WT-1-E — when false (default), search excludes archived
2340    // sources whose atoms surface in their place. See
2341    // [`recall_with_telemetry`] for the full contract.
2342    include_archived: bool,
2343) -> Result<Vec<Memory>> {
2344    search_with_source_uri(
2345        conn,
2346        query,
2347        namespace,
2348        tier,
2349        limit,
2350        min_priority,
2351        since,
2352        until,
2353        tags_filter,
2354        agent_id,
2355        as_agent,
2356        include_archived,
2357        None,
2358    )
2359}
2360
2361/// v0.7.0 Provenance Gap 6 (issue #889) — search with optional
2362/// reciprocal `source_uri` filter. When `source_uri` is `Some(uri)`,
2363/// the FTS search is post-filtered (in SQL) to memories whose
2364/// `source_uri` column equals the supplied value verbatim. The
2365/// partial `idx_memories_source_uri` index (created at v38) covers
2366/// the lookup, keeping it O(log N) over the URI-keyed subspace.
2367///
2368/// When `source_uri` is `None`, this delegates to the legacy
2369/// [`search`] path verbatim.
2370#[allow(clippy::too_many_arguments)]
2371pub fn search_with_source_uri(
2372    conn: &Connection,
2373    query: &str,
2374    namespace: Option<&str>,
2375    tier: Option<&Tier>,
2376    limit: usize,
2377    min_priority: Option<i32>,
2378    since: Option<&str>,
2379    until: Option<&str>,
2380    tags_filter: Option<&str>,
2381    agent_id: Option<&str>,
2382    as_agent: Option<&str>,
2383    include_archived: bool,
2384    source_uri: Option<&str>,
2385) -> Result<Vec<Memory>> {
2386    let now = Utc::now().to_rfc3339();
2387    let tier_str = tier.map(|t| t.as_str().to_string());
2388    let fts_query = sanitize_fts_query(query, false);
2389    let (vis_p, vis_t, vis_u, vis_o) = compute_visibility_prefixes(as_agent);
2390    let archived_fragment = archived_source_clause(include_archived, "m");
2391    let source_uri_fragment = if source_uri.is_some() {
2392        "AND m.source_uri = ?15"
2393    } else {
2394        ""
2395    };
2396
2397    let sql = format!(
2398        "SELECT m.id, m.tier, m.namespace, m.title, m.content, m.tags, m.priority,
2399                m.confidence, m.source, m.access_count, m.created_at, m.updated_at,
2400                m.last_accessed_at, m.expires_at, m.metadata, m.reflection_depth,
2401                m.memory_kind, m.entity_id, m.persona_version,
2402                m.citations, m.source_uri, m.source_span,
2403                m.confidence_source, m.confidence_signals, m.confidence_decayed_at
2404         FROM memories_fts fts
2405         JOIN memories m ON m.rowid = fts.rowid
2406         WHERE memories_fts MATCH ?1
2407           AND (?2 IS NULL OR m.namespace = ?2)
2408           AND (?3 IS NULL OR m.tier = ?3)
2409           AND (?4 IS NULL OR m.priority >= ?4)
2410           AND (m.expires_at IS NULL OR m.expires_at > ?5)
2411           AND (?6 IS NULL OR m.created_at >= ?6)
2412           AND (?7 IS NULL OR m.created_at <= ?7)
2413           AND (?8 IS NULL OR EXISTS (SELECT 1 FROM json_each(m.tags) WHERE json_each.value = ?8))
2414           AND (?10 IS NULL OR m.agent_id_idx = ?10)
2415           {archived_fragment}
2416           {source_uri_fragment}
2417           {vis}
2418         ORDER BY (fts.rank * -1)
2419           + (m.priority * 0.5)
2420           + (MIN(m.access_count, 50) * 0.1)
2421           + (m.confidence * 2.0)
2422           + (1.0 / (1.0 + (julianday('now') - julianday(m.updated_at)) * 0.1))
2423           DESC
2424         LIMIT ?9",
2425        vis = visibility_clause(11, "m"),
2426    );
2427    let mut stmt = conn.prepare(&sql)?;
2428    let rows = if let Some(uri) = source_uri {
2429        stmt.query_map(
2430            params![
2431                fts_query,
2432                namespace,
2433                tier_str,
2434                min_priority,
2435                now,
2436                since,
2437                until,
2438                tags_filter,
2439                limit,
2440                agent_id,
2441                vis_p,
2442                vis_t,
2443                vis_u,
2444                vis_o,
2445                uri,
2446            ],
2447            row_to_memory,
2448        )?
2449        .collect::<rusqlite::Result<Vec<_>>>()
2450        .map_err(Into::into)
2451    } else {
2452        stmt.query_map(
2453            params![
2454                fts_query,
2455                namespace,
2456                tier_str,
2457                min_priority,
2458                now,
2459                since,
2460                until,
2461                tags_filter,
2462                limit,
2463                agent_id,
2464                vis_p,
2465                vis_t,
2466                vis_u,
2467                vis_o,
2468            ],
2469            row_to_memory,
2470        )?
2471        .collect::<rusqlite::Result<Vec<_>>>()
2472        .map_err(Into::into)
2473    };
2474    rows
2475}
2476
2477/// v0.7.0 Provenance Gap 6 (issue #889) — list every memory carrying
2478/// the supplied `source_uri`. Bypasses the FTS layer so callers that
2479/// want the full reciprocal set ("every memory from this document")
2480/// don't need to type a query. Hits the partial
2481/// `idx_memories_source_uri` index directly. Pure read.
2482///
2483/// `as_agent` is the visibility principal. When `Some(...)`, the
2484/// `compute_visibility_prefixes` + `visibility_clause` pair is applied
2485/// so the reciprocal source-uri endpoint respects the same
2486/// scope=private gate as `search_with_source_uri` (#942 + #975
2487/// follow-up: any query path returning Memory MUST inherit the SAL
2488/// #910 visibility filter). When `None`, the filter is bypassed —
2489/// reserved for substrate-internal callers + tests that explicitly
2490/// opt out.
2491pub fn list_by_source_uri(
2492    conn: &Connection,
2493    source_uri: &str,
2494    namespace: Option<&str>,
2495    limit: Option<usize>,
2496    as_agent: Option<&str>,
2497) -> Result<Vec<Memory>> {
2498    let cap = limit.unwrap_or(LIST_DEFAULT_CAP).min(LIST_MAX_LIMIT);
2499    let (vis_p, vis_t, vis_u, vis_o) = compute_visibility_prefixes(as_agent);
2500    // Placeholder layout: ?1 = uri, ?2 = namespace, ?3 = limit,
2501    // ?4..?7 = visibility prefixes (private/team/unit/org).
2502    let sql = format!(
2503        "SELECT m.id, m.tier, m.namespace, m.title, m.content, m.tags, m.priority,
2504                m.confidence, m.source, m.access_count, m.created_at, m.updated_at,
2505                m.last_accessed_at, m.expires_at, m.metadata, m.reflection_depth,
2506                m.memory_kind, m.entity_id, m.persona_version,
2507                m.citations, m.source_uri, m.source_span,
2508                m.confidence_source, m.confidence_signals, m.confidence_decayed_at,
2509                m.version
2510         FROM memories m
2511         WHERE m.source_uri = ?1
2512           AND (?2 IS NULL OR m.namespace = ?2)
2513           {vis}
2514         ORDER BY m.created_at ASC
2515         LIMIT ?3",
2516        vis = visibility_clause(4, "m"),
2517    );
2518    let mut stmt = conn.prepare(&sql)?;
2519    let rows = stmt.query_map(
2520        params![
2521            source_uri,
2522            namespace,
2523            i64::try_from(cap).unwrap_or(i64::MAX),
2524            vis_p,
2525            vis_t,
2526            vis_u,
2527            vis_o,
2528        ],
2529        row_to_memory,
2530    )?;
2531    rows.collect::<rusqlite::Result<Vec<_>>>()
2532        .map_err(Into::into)
2533}
2534
2535/// Task 1.12 — proximity boost applied to a memory's score based on its
2536/// depth distance from the queried agent namespace. Uses the formula
2537/// `1 / (1 + depth_distance * 0.3)` per spec. Distance 0 = full strength
2538/// (1.0), each step up the hierarchy dampens linearly.
2539#[must_use]
2540pub fn proximity_boost(agent_ns: &str, memory_ns: &str) -> f64 {
2541    let agent_depth = crate::models::namespace_depth(agent_ns);
2542    let memory_depth = crate::models::namespace_depth(memory_ns);
2543    let distance = agent_depth.saturating_sub(memory_depth);
2544    #[allow(clippy::cast_precision_loss)]
2545    let d = distance as f64;
2546    1.0 / (1.0 + d * 0.3)
2547}
2548
2549/// Task 1.12 — SQL fragment + boolean indicating whether hierarchy
2550/// expansion is in play. When active the `namespace` SQL param binds
2551/// NULL (so `?N IS NULL OR m.namespace = ?N` passes trivially) and a
2552/// separate `AND m.namespace IN (<ancestors>)` clause narrows to the
2553/// hierarchy. When inactive the returned fragment is empty.
2554///
2555/// Ancestor strings are interpolated because `SQLite` `IN` with a
2556/// variable-length positional list is awkward, and the inputs come
2557/// from `namespace_ancestors()` → `validate_namespace`-approved
2558/// strings. Single-quote doubling is applied defensively.
2559///
2560/// PERF-8 (FX-C4-batch2, 2026-05-26): the hierarchy fragment is a
2561/// pure function of `namespace`, so a bounded LRU cache amortises
2562/// the `format!` + `Vec<String>::join` cost across the recall
2563/// hot path. Cache hits return a clone of the cached `String`
2564/// (still allocates, but skips the per-call SQL string build); the
2565/// cache itself is keyed by namespace string and capped at
2566/// `HIERARCHY_CACHE_MAX` entries to bound memory in the face of
2567/// per-tenant namespace explosions.
2568fn hierarchy_in_clause(namespace: Option<&str>) -> (Option<String>, bool) {
2569    let Some(ns) = namespace else {
2570        return (None, false);
2571    };
2572    if !ns.contains('/') {
2573        return (None, false);
2574    }
2575
2576    // PERF-8 cache lookup. The cache stores the rendered SQL
2577    // fragment Option<String>; the `bool` shadow flag is always
2578    // `true` for cached entries (we only cache hierarchical
2579    // namespaces — the `!ns.contains('/')` short-circuit above
2580    // never reaches the cache).
2581    if let Some(cached) = hierarchy_cache_get(ns) {
2582        return (Some(cached), true);
2583    }
2584
2585    let ancestors = crate::models::namespace_ancestors(ns);
2586    if ancestors.is_empty() {
2587        return (None, false);
2588    }
2589    let quoted: Vec<String> = ancestors
2590        .iter()
2591        .map(|a| format!("'{}'", a.replace('\'', "''")))
2592        .collect();
2593    let fragment = format!("AND m.namespace IN ({})", quoted.join(","));
2594    hierarchy_cache_put(ns, &fragment);
2595    (Some(fragment), true)
2596}
2597
2598// PERF-8 (FX-C4-batch2, 2026-05-26) — bounded LRU cache for the
2599// rendered `hierarchy_in_clause` SQL fragment. Cap chosen to be
2600// large enough for the typical few-hundred-namespace deployment
2601// while keeping memory bounded on multi-tenant hosts.
2602const HIERARCHY_CACHE_MAX: usize = 256;
2603
2604fn hierarchy_cache() -> &'static std::sync::Mutex<std::collections::HashMap<String, String>> {
2605    static CACHE: std::sync::OnceLock<std::sync::Mutex<std::collections::HashMap<String, String>>> =
2606        std::sync::OnceLock::new();
2607    CACHE.get_or_init(|| std::sync::Mutex::new(std::collections::HashMap::new()))
2608}
2609
2610fn hierarchy_cache_get(ns: &str) -> Option<String> {
2611    let cache = hierarchy_cache().lock().ok()?;
2612    cache.get(ns).cloned()
2613}
2614
2615fn hierarchy_cache_put(ns: &str, fragment: &str) {
2616    let Ok(mut cache) = hierarchy_cache().lock() else {
2617        return;
2618    };
2619    if cache.len() >= HIERARCHY_CACHE_MAX {
2620        // Bounded eviction: drop one arbitrary entry. The cache is
2621        // not a true LRU because the recall hot path runs in
2622        // microseconds and a full LRU's bookkeeping cost would
2623        // dominate the cache-hit savings. Random eviction is fine
2624        // because the hot working set typically stays well under
2625        // the cap; the eviction only fires on the long tail.
2626        if let Some(k) = cache.keys().next().cloned() {
2627            cache.remove(&k);
2628        }
2629    }
2630    cache.insert(ns.to_string(), fragment.to_string());
2631}
2632
2633#[cfg(test)]
2634fn hierarchy_cache_clear_for_tests() {
2635    if let Ok(mut cache) = hierarchy_cache().lock() {
2636        cache.clear();
2637    }
2638}
2639
2640/// Task 1.12 — apply proximity boost to scored memories ranked against
2641/// an agent's hierarchical namespace. Re-sorts by boosted score.
2642fn apply_proximity_boost(scored: Vec<(Memory, f64)>, agent_ns: &str) -> Vec<(Memory, f64)> {
2643    let mut boosted: Vec<(Memory, f64)> = scored
2644        .into_iter()
2645        .map(|(mem, score)| {
2646            let boost = proximity_boost(agent_ns, &mem.namespace);
2647            (mem, score * boost)
2648        })
2649        .collect();
2650    boosted.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
2651    boosted
2652}
2653
2654/// Phase P6 (R1) — count tokens in `text` using OpenAI's `cl100k_base`
2655/// BPE encoding. This is the de-facto standard for Claude / GPT context
2656/// budgeting and is shipped with `tiktoken-rs` (the BPE table is embedded
2657/// in the crate, ~1.7 MB, so the count is offline-deterministic across
2658/// all hosts). The encoder is built lazily and cached process-wide via
2659/// `OnceLock` — `cl100k_base()` itself parses the embedded table on every
2660/// call, which adds a few ms; we pay that cost once.
2661///
2662/// Returns the token count. On the (vanishingly rare) cl100k_base init
2663/// failure, falls back to the prior `len/4` byte heuristic so a budget
2664/// request never hard-errors.
2665#[must_use]
2666pub fn count_tokens_cl100k(text: &str) -> usize {
2667    use std::sync::OnceLock;
2668    static BPE: OnceLock<Option<tiktoken_rs::CoreBPE>> = OnceLock::new();
2669    let bpe = BPE.get_or_init(|| tiktoken_rs::cl100k_base().ok());
2670    if let Some(bpe) = bpe.as_ref() {
2671        bpe.encode_with_special_tokens(text).len()
2672    } else {
2673        // Defensive fallback — should never trigger in practice because
2674        // the BPE table is bundled in the crate, but we never want a
2675        // budget call to fail because of tokenizer init.
2676        text.len() / 4
2677    }
2678}
2679
2680/// Phase P6 — token cost of a memory's `content` only (not title), per
2681/// the R1 spec which budgets against the LLM context window. Title and
2682/// metadata are caller-side ornament; `content` is what gets stuffed
2683/// into the prompt.
2684#[must_use]
2685pub fn count_memory_tokens(mem: &Memory) -> usize {
2686    count_tokens_cl100k(&mem.content)
2687}
2688
2689/// Phase P6 — kept for backward compatibility with the Task 1.11 byte-
2690/// heuristic surface. New code should use `count_memory_tokens`. The
2691/// returned value is now BPE-accurate (cl100k_base) rather than the
2692/// prior `len/4` estimate, so callers reading this through the public
2693/// API get the more accurate value automatically.
2694#[must_use]
2695pub fn estimate_memory_tokens(mem: &Memory) -> usize {
2696    count_memory_tokens(mem)
2697}
2698
2699/// Phase P6 — outcome of applying a token budget to a ranked recall
2700/// list. Carries everything `mcp::handle_recall` needs to populate the
2701/// new RecallMeta block (`budget_tokens_used`, `budget_tokens_remaining`,
2702/// `memories_dropped`, `budget_overflow`).
2703#[derive(Debug, Clone)]
2704pub struct BudgetOutcome {
2705    /// Cumulative cl100k_base token count of the returned content.
2706    pub tokens_used: usize,
2707    /// `budget - tokens_used`, saturating at 0. `None` when no budget set.
2708    pub tokens_remaining: Option<usize>,
2709    /// How many candidates the budget cut from the ranked list.
2710    pub memories_dropped: usize,
2711    /// True iff the highest-ranked memory alone exceeded the budget and
2712    /// was returned anyway (R1 guarantee: at least one memory if any
2713    /// matched). Always false when no budget is set.
2714    pub budget_overflow: bool,
2715}
2716
2717/// Phase P6 (R1) — context-budget greedy fill. Iterates over scored
2718/// candidates in rank order; stops at the first memory whose inclusion
2719/// would exceed the budget — UNLESS the output is still empty, in
2720/// which case the highest-ranked memory is returned anyway with
2721/// `budget_overflow = true`. This preserves the R1 guarantee that a
2722/// successful recall always returns at least one result when any
2723/// matched, even if the user supplied an unrealistically tight budget.
2724///
2725/// When `budget_tokens` is `None`, every candidate is returned and the
2726/// `tokens_used` tally falls back to the cheap byte-heuristic (`len/4`)
2727/// — running cl100k_base on every recall regardless of caller intent
2728/// would impose ~200 ms cold-start (BPE table parse) and several ms per
2729/// memory on the hot path. The heuristic is byte-exact-deterministic,
2730/// honoring the prior Task 1.11 contract for "observe the cost without
2731/// enforcing it". When `budget_tokens` is `Some(_)`, the BPE-accurate
2732/// cl100k count is used because the caller cares enough about the
2733/// number to enforce on it. When `budget_tokens` is `Some(0)`, **zero
2734/// memories are returned** with `budget_overflow = false` — the spec
2735/// semantics for "no budget at all, please" (R1 §6 acceptance #3).
2736#[must_use]
2737pub fn apply_token_budget(
2738    scored: Vec<(Memory, f64)>,
2739    budget_tokens: Option<usize>,
2740) -> (Vec<(Memory, f64)>, BudgetOutcome) {
2741    let total_candidates = scored.len();
2742
2743    // Phase P6 — explicit `0` budget short-circuits to an empty result.
2744    // Per the R1 acceptance test `budget_tokens_zero_returns_zero_memories`,
2745    // this is a deliberate no-op fill (overflow is *false* — the user
2746    // said "give me nothing").
2747    if budget_tokens == Some(0) {
2748        return (
2749            Vec::new(),
2750            BudgetOutcome {
2751                tokens_used: 0,
2752                tokens_remaining: Some(0),
2753                memories_dropped: total_candidates,
2754                budget_overflow: false,
2755            },
2756        );
2757    }
2758
2759    // No-budget fast path: skip cl100k entirely. The byte heuristic is
2760    // a few ns vs. the BPE encoder's couple-of-µs per memory plus the
2761    // one-shot ~200 ms init. Bench harness benchmarks recall with
2762    // `budget_tokens=None`; this keeps the hot path cl100k-free.
2763    if budget_tokens.is_none() {
2764        let mut used: usize = 0;
2765        let mut out: Vec<(Memory, f64)> = Vec::with_capacity(scored.len());
2766        for (mem, score) in scored {
2767            used = used.saturating_add(mem.content.len() / 4);
2768            out.push((mem, score));
2769        }
2770        return (
2771            out,
2772            BudgetOutcome {
2773                tokens_used: used,
2774                tokens_remaining: None,
2775                memories_dropped: 0,
2776                budget_overflow: false,
2777            },
2778        );
2779    }
2780
2781    // Budget path — caller asked for enforcement, so spend the tokens
2782    // for accurate cl100k accounting.
2783    let mut used: usize = 0;
2784    let mut out: Vec<(Memory, f64)> = Vec::with_capacity(scored.len());
2785    let mut overflow = false;
2786
2787    for (mem, score) in scored {
2788        let cost = count_memory_tokens(&mem);
2789        if let Some(budget) = budget_tokens
2790            && used.saturating_add(cost) > budget
2791        {
2792            // R1 always-return-at-least-one guarantee: if we've collected
2793            // nothing yet, take the top-ranked memory and flag overflow.
2794            if out.is_empty() {
2795                used = used.saturating_add(cost);
2796                out.push((mem, score));
2797                overflow = true;
2798            }
2799            break;
2800        }
2801        used = used.saturating_add(cost);
2802        out.push((mem, score));
2803    }
2804
2805    let dropped = total_candidates.saturating_sub(out.len());
2806    let tokens_remaining = budget_tokens.map(|b| b.saturating_sub(used));
2807    (
2808        out,
2809        BudgetOutcome {
2810            tokens_used: used,
2811            tokens_remaining,
2812            memories_dropped: dropped,
2813            budget_overflow: overflow,
2814        },
2815    )
2816}
2817
2818/// Recall — fuzzy OR search + touch + auto-promote + TTL extension.
2819/// Task 1.11: after ranking, applies optional `budget_tokens` cap.
2820/// Phase P6: returns the full `BudgetOutcome` (tokens_used,
2821/// tokens_remaining, memories_dropped, budget_overflow) instead of just
2822/// the prior bare `tokens_used`. Callers that only need `tokens_used`
2823/// read `outcome.tokens_used`.
2824#[allow(clippy::too_many_arguments)]
2825/// v0.6.3.1 (P3): keyword-only recall with retrieval-stage telemetry.
2826///
2827/// Identical to [`recall`] but additionally returns a [`crate::models::RecallTelemetry`]
2828/// describing the FTS5 candidate count (HNSW count is always 0 for this
2829/// path — no semantic stage runs). MCP `handle_recall` uses this to build
2830/// the `meta` block; [`recall`] is preserved as a thin wrapper for
2831/// existing callers (HTTP handlers, CLI, bench).
2832#[allow(clippy::too_many_arguments)]
2833pub fn recall_with_telemetry(
2834    conn: &Connection,
2835    context: &str,
2836    namespace: Option<&str>,
2837    limit: usize,
2838    tags_filter: Option<&str>,
2839    since: Option<&str>,
2840    until: Option<&str>,
2841    short_extend: i64,
2842    mid_extend: i64,
2843    as_agent: Option<&str>,
2844    budget_tokens: Option<usize>,
2845    // v0.7.0 WT-1-E — when false (default), recall excludes archived
2846    // sources whose atoms now surface in their place. When true, the
2847    // archive-filter WHERE clause is dropped so forensic-export and
2848    // explicit auditor recall returns both atoms and sources.
2849    include_archived: bool,
2850    // v0.7.0 Form 4 / Cluster-A PERF-3 — push `--source-uri-prefix`
2851    // into the SQL WHERE so the partial `idx_memories_source_uri`
2852    // index covers the lookup and excluded rows never enter the
2853    // top-K. See [`recall`] for the contract.
2854    source_uri_prefix: Option<&str>,
2855) -> Result<(
2856    Vec<(Memory, f64)>,
2857    BudgetOutcome,
2858    crate::models::RecallTelemetry,
2859)> {
2860    let (results, outcome) = recall(
2861        conn,
2862        context,
2863        namespace,
2864        limit,
2865        tags_filter,
2866        since,
2867        until,
2868        short_extend,
2869        mid_extend,
2870        as_agent,
2871        budget_tokens,
2872        include_archived,
2873        source_uri_prefix,
2874    )?;
2875    let telemetry = crate::models::RecallTelemetry {
2876        fts_candidates: results.len(),
2877        hnsw_candidates: 0,
2878        blend_weight_avg: 0.0,
2879        embedding_dim_mismatch: 0,
2880    };
2881    Ok((results, outcome, telemetry))
2882}
2883
2884pub fn recall(
2885    conn: &Connection,
2886    context: &str,
2887    namespace: Option<&str>,
2888    limit: usize,
2889    tags_filter: Option<&str>,
2890    since: Option<&str>,
2891    until: Option<&str>,
2892    short_extend: i64,
2893    mid_extend: i64,
2894    as_agent: Option<&str>,
2895    budget_tokens: Option<usize>,
2896    // v0.7.0 WT-1-E — see [`recall_with_telemetry`] for the
2897    // archived-source exclusion contract.
2898    include_archived: bool,
2899    // v0.7.0 Form 4 / Cluster-A PERF-3 — when `Some(prefix)`, restrict
2900    // results to memories whose `source_uri` starts with `prefix`. The
2901    // predicate is `source_uri LIKE 'prefix%'` so the partial
2902    // `idx_memories_source_uri` index (defined in migration
2903    // `0032_v07_form4_provenance.sql`) covers the scan. Pre-fix this
2904    // filter ran in Rust AFTER the SQL returned, which excluded valid
2905    // matches from the top-K when the substrate returned `limit` rows
2906    // that subsequently filtered to fewer. `None` preserves the legacy
2907    // no-filter behaviour for callers that filter post-hoc.
2908    source_uri_prefix: Option<&str>,
2909) -> Result<(Vec<(Memory, f64)>, BudgetOutcome)> {
2910    let now = Utc::now().to_rfc3339();
2911    let fts_query = sanitize_fts_query(context, true);
2912    let (vis_p, vis_t, vis_u, vis_o) = compute_visibility_prefixes(as_agent);
2913
2914    // Task 1.12: hierarchy expansion. If `namespace` is hierarchical (contains
2915    // `/`), broaden the filter to the full ancestor chain. Flat namespaces
2916    // keep exact-match semantics (backward compat).
2917    let (hierarchy_in, hierarchy_active) = hierarchy_in_clause(namespace);
2918    let hierarchy_fragment = hierarchy_in.unwrap_or_default();
2919    let effective_namespace = if hierarchy_active { None } else { namespace };
2920
2921    // v0.7.0 WT-1-E — archived-source exclusion (default) / pass-
2922    // through (include_archived=true). Composes with the existing
2923    // namespace, expiry, tag, time-window, and visibility filters.
2924    let archived_fragment = archived_source_clause(include_archived, "m");
2925
2926    // v0.7.0 Form 4 / Cluster-A PERF-3 — push the source-URI prefix
2927    // predicate into SQL. We escape SQL LIKE metacharacters (`%`, `_`,
2928    // `\`) in the supplied prefix so a caller passing e.g. `doc:abc_`
2929    // matches only that literal value (not `doc:abcX`). The LIKE
2930    // pattern is constructed with the bound parameter holding the
2931    // already-escaped prefix + `%`; combined with the partial index
2932    // on `source_uri WHERE source_uri IS NOT NULL`, SQLite picks the
2933    // index for the lookup. See [`escape_like_pattern`].
2934    let (source_uri_fragment, source_uri_param): (&str, Option<String>) = match source_uri_prefix {
2935        Some(prefix) if !prefix.is_empty() => (
2936            "AND m.source_uri LIKE ?12 ESCAPE '\\'",
2937            Some(format!("{}%", escape_like_pattern(prefix))),
2938        ),
2939        _ => ("", None),
2940    };
2941
2942    let sql = format!(
2943        "SELECT m.id, m.tier, m.namespace, m.title, m.content, m.tags, m.priority,
2944                m.confidence, m.source, m.access_count, m.created_at, m.updated_at,
2945                m.last_accessed_at, m.expires_at, m.metadata, m.reflection_depth,
2946                m.memory_kind, m.entity_id, m.persona_version,
2947                m.citations, m.source_uri, m.source_span,
2948                m.confidence_source, m.confidence_signals, m.confidence_decayed_at,
2949                (fts.rank * -1)
2950                + (m.priority * 0.5)
2951                + (MIN(m.access_count, 50) * 0.1)
2952                + (m.confidence * 2.0)
2953                + (CASE m.tier WHEN 'long' THEN 3.0 WHEN 'mid' THEN 1.0 ELSE 0.0 END)
2954                + (1.0 / (1.0 + (julianday('now') - julianday(m.updated_at)) * 0.1))
2955                AS score
2956         FROM memories_fts fts
2957         JOIN memories m ON m.rowid = fts.rowid
2958         WHERE memories_fts MATCH ?1
2959           AND (?2 IS NULL OR m.namespace = ?2)
2960           {hierarchy_fragment}
2961           AND (m.expires_at IS NULL OR m.expires_at > ?3)
2962           AND (?4 IS NULL OR EXISTS (SELECT 1 FROM json_each(m.tags) WHERE json_each.value = ?4))
2963           AND (?5 IS NULL OR m.created_at >= ?5)
2964           AND (?6 IS NULL OR m.created_at <= ?6)
2965           {archived_fragment}
2966           {source_uri_fragment}
2967           {vis}
2968         ORDER BY score DESC
2969         LIMIT ?7",
2970        vis = visibility_clause(8, "m"),
2971    );
2972    let mut stmt = conn.prepare(&sql)?;
2973    // Bind ?12 only when the source-URI fragment is active; SQLite
2974    // errors on parameter-count mismatch.
2975    let row_handler = |row: &rusqlite::Row<'_>| -> rusqlite::Result<(Memory, f64)> {
2976        let mem = row_to_memory(row)?;
2977        // v0.7.0 Form 4 / v0.7.x Form 6 — name-based read for the
2978        // trailing score column. Switched from positional `row.get`
2979        // after schema v38 (citations, source_uri, source_span) and
2980        // Form 6's `memory_kind`/`entity_id`/`persona_version`
2981        // shifted the trailing column's index; name-based reads
2982        // survive future column additions without further churn.
2983        let score: f64 = row.get("score")?;
2984        Ok((mem, score))
2985    };
2986    let results: Vec<(Memory, f64)> = if let Some(ref uri_param) = source_uri_param {
2987        let rows = stmt.query_map(
2988            params![
2989                fts_query,
2990                effective_namespace,
2991                now,
2992                tags_filter,
2993                since,
2994                until,
2995                limit,
2996                vis_p,
2997                vis_t,
2998                vis_u,
2999                vis_o,
3000                uri_param,
3001            ],
3002            row_handler,
3003        )?;
3004        rows.collect::<rusqlite::Result<Vec<_>>>()?
3005    } else {
3006        let rows = stmt.query_map(
3007            params![
3008                fts_query,
3009                effective_namespace,
3010                now,
3011                tags_filter,
3012                since,
3013                until,
3014                limit,
3015                vis_p,
3016                vis_t,
3017                vis_u,
3018                vis_o,
3019            ],
3020            row_handler,
3021        )?;
3022        rows.collect::<rusqlite::Result<Vec<_>>>()?
3023    };
3024
3025    // Task 1.12: proximity boost when hierarchy expansion is active.
3026    let boosted = if let (true, Some(anchor)) = (hierarchy_active, namespace) {
3027        apply_proximity_boost(results, anchor)
3028    } else {
3029        results
3030    };
3031
3032    // Task 1.11 / Phase P6: apply optional token budget in rank order
3033    // (AFTER proximity). Returns BudgetOutcome with all R1 meta fields.
3034    let (budgeted, outcome) = apply_token_budget(boosted, budget_tokens);
3035
3036    // Cluster-F PERF-6 — collapse K per-row touches into a single
3037    // `BEGIN IMMEDIATE` transaction. Same semantics (access bump,
3038    // TTL extend, promotion, priority bump every 10 accesses); the
3039    // 3K UPDATE round-trips now share one commit instead of K.
3040    let touch_ids: Vec<&str> = budgeted.iter().map(|(mem, _)| mem.id.as_str()).collect();
3041    if let Err(e) = touch_many(conn, &touch_ids, short_extend, mid_extend) {
3042        tracing::warn!("touch_many failed for recall set: {}", e);
3043    }
3044    Ok((budgeted, outcome))
3045}
3046
3047/// Task 1.7 — vertical memory promotion.
3048///
3049/// Clones `source_id` into `to_namespace`, which must be a proper `/`-derived
3050/// ancestor of the memory's current namespace. The original memory is
3051/// **untouched** (vertical promotion is a fan-out, not a move). A
3052/// `derived_from` link is created from the new clone back to the source so
3053/// the promotion trail is queryable.
3054///
3055/// Returns the clone's new ID.
3056///
3057/// Errors when:
3058/// - source doesn't exist
3059/// - `to_namespace` is empty, equal to the source namespace, or not an
3060///   ancestor of it (see `namespace_ancestors`)
3061pub fn promote_to_namespace(
3062    conn: &Connection,
3063    source_id: &str,
3064    to_namespace: &str,
3065) -> Result<String> {
3066    if to_namespace.is_empty() {
3067        // #962 typed envelope.
3068        return Err(anyhow::Error::new(StorageError::InvalidArgument {
3069            reason: "to_namespace cannot be empty".to_string(),
3070        }));
3071    }
3072    let source = get(conn, source_id)?.ok_or_else(|| {
3073        // #962 typed envelope. `Source` here labels the promotion source,
3074        // not a link end, but the user-facing message ("source memory
3075        // not found: …") is preserved via the LinkEnd::Source Display arm.
3076        anyhow::Error::new(StorageError::MemoryNotFound {
3077            id: source_id.to_string(),
3078            role: Some(LinkEnd::Source),
3079        })
3080    })?;
3081    if to_namespace == source.namespace {
3082        // #962 typed envelope.
3083        return Err(anyhow::Error::new(StorageError::InvalidArgument {
3084            reason: format!(
3085                "to_namespace must be a proper ancestor of the memory's namespace (got self: {})",
3086                source.namespace
3087            ),
3088        }));
3089    }
3090    let ancestors = namespace_ancestors(&source.namespace);
3091    if !ancestors.iter().any(|a| a == to_namespace) {
3092        // #962 typed envelope.
3093        return Err(anyhow::Error::new(StorageError::InvalidArgument {
3094            reason: format!(
3095                "to_namespace '{to_namespace}' is not an ancestor of '{}' (ancestors: {ancestors:?})",
3096                source.namespace
3097            ),
3098        }));
3099    }
3100
3101    let now = Utc::now().to_rfc3339();
3102    let clone = Memory {
3103        id: uuid::Uuid::new_v4().to_string(),
3104        tier: source.tier.clone(),
3105        namespace: to_namespace.to_string(),
3106        title: source.title.clone(),
3107        content: source.content.clone(),
3108        tags: source.tags.clone(),
3109        priority: source.priority,
3110        confidence: source.confidence,
3111        source: source.source.clone(),
3112        access_count: 0,
3113        created_at: now.clone(),
3114        updated_at: now,
3115        last_accessed_at: None,
3116        expires_at: source.expires_at.clone(),
3117        metadata: source.metadata.clone(),
3118        reflection_depth: source.reflection_depth,
3119        memory_kind: source.memory_kind.clone(),
3120        entity_id: None,
3121        persona_version: None,
3122        citations: Vec::new(),
3123        source_uri: None,
3124        source_span: None,
3125        confidence_source: ConfidenceSource::CallerProvided,
3126        confidence_signals: None,
3127        confidence_decayed_at: None,
3128        version: 1,
3129    };
3130    let actual_id = insert(conn, &clone)?;
3131    // Clone → source: derived_from. Safe to ignore if the link layer
3132    // short-circuits on self-link (impossible here — distinct IDs).
3133    create_link(
3134        conn,
3135        &actual_id,
3136        source_id,
3137        crate::models::MemoryLinkRelation::DerivedFrom.as_str(),
3138    )?;
3139    Ok(actual_id)
3140}
3141
3142/// v0.6.3.1 P2 (G6) — quick existence check for `(title, namespace)`. Used by
3143/// `on_conflict='error'` callers to short-circuit before the full upsert
3144/// machinery runs. Returns the existing row id if there is one.
3145///
3146/// # Errors
3147///
3148/// Returns the underlying SQLite error.
3149pub fn find_by_title_namespace(
3150    conn: &Connection,
3151    title: &str,
3152    namespace: &str,
3153) -> Result<Option<String>> {
3154    let id: Option<String> = conn
3155        .query_row(
3156            "SELECT id FROM memories WHERE title = ?1 AND namespace = ?2 LIMIT 1",
3157            params![title, namespace],
3158            |r| r.get(0),
3159        )
3160        .ok();
3161    Ok(id)
3162}
3163
3164/// v0.6.3.1 P2 (G6) — pick a title that does not collide with an existing
3165/// `(title, namespace)` row by appending `(2)`, `(3)`, ... up to a hard cap.
3166/// The first available suffix wins. Used by `on_conflict='version'`.
3167///
3168/// The cap (`MAX_VERSION_SUFFIX`) prevents an infinite loop in pathological
3169/// cases (e.g. an attacker spamming the same title in a loop). Once the cap
3170/// is hit, the caller falls back to error mode.
3171const MAX_VERSION_SUFFIX: u32 = 1024;
3172
3173/// # Errors
3174///
3175/// Returns the underlying SQLite error or an error if no free suffix is
3176/// found within `MAX_VERSION_SUFFIX` attempts.
3177pub fn next_versioned_title(
3178    conn: &Connection,
3179    base_title: &str,
3180    namespace: &str,
3181) -> Result<String> {
3182    if find_by_title_namespace(conn, base_title, namespace)?.is_none() {
3183        return Ok(base_title.to_string());
3184    }
3185    for n in 2..=MAX_VERSION_SUFFIX {
3186        let candidate = format!("{base_title} ({n})");
3187        if find_by_title_namespace(conn, &candidate, namespace)?.is_none() {
3188            return Ok(candidate);
3189        }
3190    }
3191    // #962 typed envelope — UniqueConflict (the substrate could not
3192    // mint a non-colliding versioned title within the cap). Caller is
3193    // expected to retry with a different base title or raise the cap.
3194    Err(anyhow::Error::new(StorageError::UniqueConflict {
3195        reason: format!(
3196            "could not find a free versioned title for '{base_title}' in namespace '{namespace}' \
3197             within {MAX_VERSION_SUFFIX} attempts"
3198        ),
3199    }))
3200}
3201
3202/// Stopwords stripped before computing the title-similarity Jaccard floor
3203/// in [`find_contradictions`]. The list is intentionally tiny — a small
3204/// closed-class English set — because a maximalist stopword list would
3205/// over-filter agglutinative or short titles and re-introduce noise on
3206/// the other side. The substrate's contradiction surface is supposed to
3207/// be a near-duplicate-titles signal, not a generic content search.
3208const CONTRADICTION_TITLE_STOPWORDS: &[&str] = &[
3209    "a", "an", "and", "are", "as", "at", "be", "by", "for", "from", "has", "have", "in", "is",
3210    "it", "its", "of", "on", "or", "that", "the", "this", "to", "was", "were", "will", "with",
3211];
3212
3213/// Minimum Jaccard-of-content-tokens between the seed title and a
3214/// candidate title for the candidate to qualify as a contradiction
3215/// hit. Computed after lowercasing + stopword removal.
3216///
3217/// **Why this exists** (issue #1320). Pre-fix, [`find_contradictions`]
3218/// returned the top 5 FTS5 matches on an OR-joined sanitised query
3219/// against the title. With seed title "Tomatoes are red" the OR list
3220/// becomes `"tomatoes" OR "are" OR "red"`, and FTS5 happily ranked
3221/// every row containing the common stopword "are" near the top.
3222/// Operators observed unrelated memories ("Moon landing happened in
3223/// 1969", "Retrieval-augmented generation works by...") flagged as
3224/// `potential_contradictions` against tomato facts — pure stopword
3225/// noise. The Jaccard floor below preserves the documented "similar
3226/// titles" semantics (e.g. "Database is PostgreSQL" vs "Database is
3227/// MySQL" share `{database}` after stopword removal — Jaccard
3228/// `1/3 ≈ 0.33`, passes the 0.3 floor) while rejecting the
3229/// disjoint-topic false positives (Jaccard 0).
3230const CONTRADICTION_TITLE_JACCARD_FLOOR: f32 = 0.30;
3231
3232/// Lowercase + stopword-strip a title for the contradiction Jaccard
3233/// comparison. Splits on non-alphanumeric so titles like
3234/// `"Database is PostgreSQL"` and `"Database/is/PostgreSQL"` produce
3235/// the same token set.
3236fn contradiction_title_tokens(title: &str) -> std::collections::HashSet<String> {
3237    title
3238        .split(|c: char| !c.is_alphanumeric())
3239        .map(str::to_ascii_lowercase)
3240        .filter(|t| !t.is_empty())
3241        .filter(|t| !CONTRADICTION_TITLE_STOPWORDS.contains(&t.as_str()))
3242        .collect()
3243}
3244
3245/// Jaccard token overlap between two pre-tokenised title sets. Returns
3246/// `0.0` when either side is empty so a seed title that's pure
3247/// stopwords (e.g. `"the"`) cannot produce phantom hits.
3248#[allow(clippy::cast_precision_loss)]
3249fn contradiction_title_jaccard(
3250    a: &std::collections::HashSet<String>,
3251    b: &std::collections::HashSet<String>,
3252) -> f32 {
3253    if a.is_empty() || b.is_empty() {
3254        return 0.0;
3255    }
3256    let inter = a.intersection(b).count() as f32;
3257    let union = a.union(b).count() as f32;
3258    if union > 0.0 { inter / union } else { 0.0 }
3259}
3260
3261/// Stage-1 FTS5 recall for similar-title candidates. Returns up to
3262/// `limit` rows from `memories_fts` matching the sanitised seed
3263/// title, ordered by FTS5 rank.
3264///
3265/// This is the broader recall pool that feeds both
3266/// [`find_contradictions`] (wire-side `potential_contradictions`,
3267/// post Stage-2 Jaccard floor) and [`find_synthesis_candidates`]
3268/// (Form 1 synthesis curator, NO Jaccard floor). Two consumers,
3269/// two different relevance budgets; see #1320 + #1337 for why the
3270/// pool can't be filtered universally.
3271fn find_similar_title_candidates(
3272    conn: &Connection,
3273    title: &str,
3274    namespace: &str,
3275    limit: usize,
3276) -> Result<Vec<Memory>> {
3277    let fts_query = sanitize_fts_query(title, true);
3278    let mut stmt = conn.prepare(
3279        "SELECT m.id, m.tier, m.namespace, m.title, m.content, m.tags, m.priority,
3280                m.confidence, m.source, m.access_count, m.created_at, m.updated_at,
3281                m.last_accessed_at, m.expires_at, m.metadata, m.reflection_depth,
3282                m.memory_kind, m.entity_id, m.persona_version,
3283                m.citations, m.source_uri, m.source_span,
3284                m.confidence_source, m.confidence_signals, m.confidence_decayed_at
3285         FROM memories_fts fts
3286         JOIN memories m ON m.rowid = fts.rowid
3287         WHERE memories_fts MATCH ?1 AND m.namespace = ?2
3288         ORDER BY fts.rank
3289         LIMIT ?3",
3290    )?;
3291    let rows = stmt.query_map(
3292        params![fts_query, namespace, i64::try_from(limit).unwrap_or(20)],
3293        row_to_memory,
3294    )?;
3295    rows.collect::<rusqlite::Result<Vec<_>>>()
3296        .map_err(Into::into)
3297}
3298
3299/// Detect potential contradictions: memories in same namespace with similar titles.
3300///
3301/// Two-stage filter (#1320 calibration):
3302/// 1. FTS5 OR-match on stopword-tolerant query — fast recall over
3303///    `memories_fts`, capped at a candidate ceiling so a pathological
3304///    common-word title can't pull the entire namespace.
3305/// 2. Jaccard-token-overlap floor on the stopword-stripped title sets,
3306///    keeping only candidates whose title shares at least
3307///    [`CONTRADICTION_TITLE_JACCARD_FLOOR`] of the seed's content
3308///    tokens. Final result is capped at 5 (the pre-fix wire ceiling).
3309///
3310/// The two-stage design preserves the "similar title" semantics that
3311/// the wire-side `potential_contradictions` field documents while
3312/// removing the stopword-OR noise floor that crossed unrelated topics
3313/// at v0.6.x / pre-fix v0.7.0.
3314///
3315/// **Scope** (#1337): this function is the WIRE-output filter. The
3316/// Form 1 synthesis curator path uses [`find_synthesis_candidates`]
3317/// instead, which omits the Stage-2 Jaccard floor — the curator needs
3318/// the broader Stage-1 pool to see legitimately-similar memories
3319/// whose titles share only one strong content token (e.g.
3320/// `"kubernetes deployment notes"` vs
3321/// `"kubernetes rolling deploy strategy"`, Jaccard 1/6 ≈ 0.167)
3322/// without depending on whether 0.30 happens to be the right
3323/// stopword-noise floor for the wire surface.
3324pub fn find_contradictions(conn: &Connection, title: &str, namespace: &str) -> Result<Vec<Memory>> {
3325    // Stage 1 — FTS5 recall. Pull a wider candidate pool (20) so the
3326    // stage-2 Jaccard filter has headroom; the final cap of 5 is
3327    // applied after the filter so the wire shape is preserved.
3328    let candidates = find_similar_title_candidates(conn, title, namespace, 20)?;
3329
3330    // Stage 2 — Jaccard floor on stopword-stripped title tokens.
3331    let seed_tokens = contradiction_title_tokens(title);
3332    let mut filtered: Vec<Memory> = candidates
3333        .into_iter()
3334        .filter(|cand| {
3335            let cand_tokens = contradiction_title_tokens(&cand.title);
3336            contradiction_title_jaccard(&seed_tokens, &cand_tokens)
3337                >= CONTRADICTION_TITLE_JACCARD_FLOOR
3338        })
3339        .collect();
3340    filtered.truncate(5);
3341    Ok(filtered)
3342}
3343
3344/// Stage-1-only FTS5 candidate recall for the Form 1 synthesis
3345/// curator path.
3346///
3347/// The synthesis curator (`mcp/tools/store/synthesis.rs`) needs the
3348/// broader similar-title pool — every namespace row whose title
3349/// matches the seed under FTS5 — so the LLM can decide which
3350/// candidates legitimately overlap with the incoming write.
3351///
3352/// This intentionally OMITS the Stage-2 Jaccard floor that
3353/// [`find_contradictions`] applies to its wire output: the floor was
3354/// calibrated for "stopword-only overlap" wire-noise rejection
3355/// (#1320), but the synthesis tests exercise legitimate single-strong-
3356/// token overlaps (e.g. `"kubernetes deployment notes"` vs
3357/// `"kubernetes rolling deploy strategy"` share `{kubernetes}` =
3358/// Jaccard 1/6 ≈ 0.167 < 0.30). Applying the wire-floor here would
3359/// hide those candidates from the curator and short-circuit every
3360/// add/update/delete verb in the synthesis verdict matrix (#1337).
3361///
3362/// Returns up to 5 candidates (matches the wire ceiling for
3363/// `potential_contradictions`, the historical synthesis prompt cap).
3364pub fn find_synthesis_candidates(
3365    conn: &Connection,
3366    title: &str,
3367    namespace: &str,
3368) -> Result<Vec<Memory>> {
3369    let mut candidates = find_similar_title_candidates(conn, title, namespace, 20)?;
3370    candidates.truncate(5);
3371    Ok(candidates)
3372}
3373
3374// --- Links ---
3375//
3376// v0.7.0 fix-campaign A3 (LINK-PARITY) error prefix constants
3377// (`LINK_CYCLE_ERR_PREFIX`, `LINK_PERMISSION_DENIED_ERR_PREFIX`) moved
3378// to `super::error` under #962 so they stay co-located with the typed
3379// `StorageError` variants whose Display impl emits them. Re-exported
3380// at the module root above for `db::LINK_CYCLE_ERR_PREFIX` path
3381// stability.
3382
3383/// v0.7.0 fix-campaign A3 (LINK-PARITY) — shared pre-create validator
3384/// invoked by every link-write entry point.
3385///
3386/// Closes the S5-H2 HIGH finding (#690): before A3 the L1-2 cycle
3387/// check + K9 permission pipeline ran only in
3388/// `src/mcp/tools/link.rs::handle_link`, so the HTTP `POST /api/v1/links`
3389/// path and the federation-receive `sync_push` link loop could land
3390/// `reflects_on` edges that the MCP path would have refused. The fix
3391/// is defense-in-depth at the storage layer: every path — MCP, HTTP,
3392/// SAL, federation — calls this helper, so the gates enforce no
3393/// matter which entry point initiates the write.
3394///
3395/// Pipeline:
3396///
3397/// 1. Cycle check — invoked only when `relation == "reflects_on"`.
3398///    Calls [`crate::kg::cycle_check::would_create_reflection_cycle`]
3399///    with the namespace-scoped `effective_max_reflection_depth` cap; on
3400///    a `would_cycle` hit, returns an error prefixed with
3401///    [`LINK_CYCLE_ERR_PREFIX`] so HTTP can surface 409 CONFLICT and
3402///    signed-event emit can record the refusal. The walk fails CLOSED on
3403///    SQL errors and on depth-ceiling truncation.
3404/// 2. K9 permission eval — runs the unified
3405///    [`crate::permissions::Permissions::evaluate`] pipeline against the
3406///    source memory's namespace. On `Deny`, returns an error prefixed
3407///    with [`LINK_PERMISSION_DENIED_ERR_PREFIX`] so HTTP surfaces 403.
3408///    `Ask` is treated as `Deny` here because the storage-layer
3409///    helper has no Ask-channel back to the operator; entry points
3410///    that want interactive Ask handling (MCP) should invoke
3411///    `Permissions::evaluate` directly BEFORE calling create_link.
3412///
3413/// `skip_governance` lets federation-receive bypass the K9 gate when
3414/// the inbound link has already been cryptographically attested by an
3415/// enrolled peer (attest_level == "peer_attested"). The cycle check
3416/// always runs — even a trusted peer should not be able to extend a
3417/// reflection cycle on the receiver. See `create_link_inbound` for the
3418/// caller-side decision logic.
3419///
3420/// `agent_id` defaults to `"system"` when the caller cannot resolve a
3421/// concrete claimant (federation receive path with no claim, etc.) —
3422/// the permission rule matcher uses it for `agent_pattern` matching.
3423pub fn validate_link_pre_create(
3424    conn: &Connection,
3425    source_id: &str,
3426    target_id: &str,
3427    relation: &str,
3428    agent_id: &str,
3429    skip_governance: bool,
3430) -> Result<()> {
3431    // Pass 1: cycle check. Only `reflects_on` participates in the
3432    // DAG invariant — the other four relations are intentionally
3433    // allowed to form cycles (e.g. mutual `related_to`).
3434    if relation == crate::models::MemoryLinkRelation::ReflectsOn.as_str() {
3435        // Resolve the namespace-scoped reflection-depth cap so the cycle
3436        // walk's fail-CLOSED ceiling tracks the same governance policy the
3437        // MCP link path uses (`src/mcp/tools/link.rs`). The source memory's
3438        // namespace governs; a missing source falls back to the default
3439        // namespace (create_link's FK guard surfaces the missing row later).
3440        let link_ns = match get(conn, source_id) {
3441            Ok(Some(m)) => m.namespace,
3442            _ => crate::DEFAULT_NAMESPACE.to_string(),
3443        };
3444        let max_depth = resolve_governance_policy(conn, &link_ns)
3445            .unwrap_or_default()
3446            .effective_max_reflection_depth();
3447        if crate::kg::cycle_check::would_create_reflection_cycle(
3448            conn, source_id, target_id, max_depth,
3449        )?
3450        .would_cycle
3451        {
3452            // #962 typed envelope. Display preserves `LINK_CYCLE_ERR_PREFIX`.
3453            return Err(anyhow::Error::new(StorageError::LinkReflectionCycle {
3454                source_id: source_id.to_string(),
3455                target_id: target_id.to_string(),
3456            }));
3457        }
3458    }
3459
3460    // Pass 2: K9 permission eval. Skip when the caller has already
3461    // established external attestation (federation peer_attested).
3462    if !skip_governance {
3463        // Link evaluation is scoped to the *source* memory's
3464        // namespace — matches the MCP path's choice at
3465        // `src/mcp/tools/link.rs:31`. Missing source memory falls
3466        // back to "global"; create_link's own FK guard will surface
3467        // the missing-memory error after this returns.
3468        let link_ns = match get(conn, source_id) {
3469            Ok(Some(m)) => m.namespace,
3470            _ => crate::DEFAULT_NAMESPACE.to_string(),
3471        };
3472        evaluate_link_permission(&link_ns, source_id, target_id, relation, agent_id)
3473            .map_err(anyhow::Error::new)?;
3474    }
3475    Ok(())
3476}
3477
3478/// #1568 (H1 residual) — backend-agnostic K9 permission evaluation for
3479/// a pending link write. This is Pass 2 of [`validate_link_pre_create`]
3480/// hoisted into a shared free fn so BOTH adapters consult the same
3481/// governance gate: the sqlite path delegates from
3482/// `validate_link_pre_create`; the postgres SAL adapter's
3483/// `link_internal` (`src/store/postgres.rs`) calls it directly after
3484/// resolving the source memory's namespace via SQL. Keeping the
3485/// evaluation here means the two backends cannot drift on link
3486/// governance semantics.
3487///
3488/// # Errors
3489///
3490/// Returns [`StorageError::LinkPermissionDenied`] (Display preserves
3491/// [`LINK_PERMISSION_DENIED_ERR_PREFIX`]) on `Deny`, and on `Ask` —
3492/// the storage layer has no Ask channel; entry points that want
3493/// interactive Ask handling (MCP) run `Permissions::evaluate`
3494/// themselves BEFORE the storage write.
3495pub(crate) fn evaluate_link_permission(
3496    link_ns: &str,
3497    source_id: &str,
3498    target_id: &str,
3499    relation: &str,
3500    agent_id: &str,
3501) -> std::result::Result<(), StorageError> {
3502    use crate::permissions::{Decision, Op, PermissionContext, Permissions};
3503    let ctx = PermissionContext {
3504        op: Op::MemoryLink,
3505        namespace: link_ns.to_string(),
3506        agent_id: agent_id.to_string(),
3507        payload: serde_json::json!({
3508            "source_id": source_id,
3509            "target_id": target_id,
3510            "relation": relation,
3511        }),
3512    };
3513    match Permissions::evaluate(&ctx, &[]) {
3514        Decision::Allow | Decision::Modify(_) => Ok(()),
3515        // #962 typed envelope. Display preserves
3516        // `LINK_PERMISSION_DENIED_ERR_PREFIX`.
3517        Decision::Deny(reason) => Err(StorageError::LinkPermissionDenied { reason }),
3518        Decision::Ask(prompt) => Err(StorageError::LinkPermissionDenied {
3519            reason: format!("ask deferred to storage layer ({prompt})"),
3520        }),
3521    }
3522}
3523
3524/// Insert a directional `(source_id, target_id, relation)` link.
3525///
3526/// Backward-compat shim around [`create_link_signed`] with no active
3527/// keypair — every call here writes `signature = NULL` and
3528/// `attest_level = "unsigned"`. New code that wants signing should
3529/// route through [`create_link_signed`] directly.
3530pub fn create_link(
3531    conn: &Connection,
3532    source_id: &str,
3533    target_id: &str,
3534    relation: &str,
3535) -> Result<()> {
3536    create_link_signed(conn, source_id, target_id, relation, None).map(|_| ())
3537}
3538
3539/// v0.7 H2 — link write that optionally signs with the active agent's
3540/// Ed25519 keypair.
3541///
3542/// When `keypair` carries a private key, the six signable fields
3543/// (`src_id`, `dst_id`, `relation`, `observed_by`, `valid_from`,
3544/// `valid_until`) are encoded to deterministic CBOR per RFC 8949
3545/// §4.2.1, signed, and the 64-byte signature is persisted in the
3546/// existing `signature` BLOB column with `attest_level = "self_signed"`.
3547///
3548/// When `keypair` is `None` or carries only a public key, the row is
3549/// written with `signature = NULL` and `attest_level = "unsigned"` —
3550/// preserving v0.6.4 behaviour for callers that haven't generated a
3551/// keypair yet.
3552///
3553/// `observed_by` on the signed payload is set to the keypair's
3554/// `agent_id` when a keypair is present (the writer is, by definition,
3555/// the observer). The `observed_by` *column* itself is intentionally
3556/// left at the v0.6.3 default (NULL on this insert path) so existing
3557/// KG queries that join on `observed_by` keep their current shape; H4's
3558/// `memory_verify` will surface the signing identity from the keypair
3559/// + signature, not from this column.
3560///
3561/// Returns the chosen attest level so callers (HTTP/MCP wrappers) can
3562/// surface it in the wire response without re-querying the row.
3563pub fn create_link_signed(
3564    conn: &Connection,
3565    source_id: &str,
3566    target_id: &str,
3567    relation: &str,
3568    keypair: Option<&crate::identity::keypair::AgentKeypair>,
3569) -> Result<&'static str> {
3570    // v0.7.0 fix-campaign A3 (LINK-PARITY, #690) — gates that were
3571    // previously enforced only at `src/mcp/tools/link.rs::handle_link`
3572    // now run here so EVERY caller (MCP, HTTP, SAL, federation) hits
3573    // them. The agent_id used for the K9 evaluation is the keypair's
3574    // claim when present (the writer is by definition the actor);
3575    // when no keypair is configured we fall back to "system" — the
3576    // unified evaluator's `agent_pattern` defaults to `*`, so an
3577    // operator who has not authored agent-narrow rules sees no
3578    // behaviour change. The MCP path runs its own evaluate BEFORE
3579    // calling here (it needs Ask-channel handling we can't surface
3580    // from storage); the second evaluation here is idempotent under
3581    // the registry's deny-first semantics.
3582    let agent_id_for_eval = keypair
3583        .as_ref()
3584        .map(|kp| kp.agent_id.as_str())
3585        .unwrap_or("system");
3586    validate_link_pre_create(
3587        conn,
3588        source_id,
3589        target_id,
3590        relation,
3591        agent_id_for_eval,
3592        false,
3593    )?;
3594    // Verify both IDs exist before creating link
3595    let source_exists: bool = conn
3596        .query_row(SQL_MEMORY_EXISTS, params![source_id], |r| r.get(0))
3597        .unwrap_or(false);
3598    if !source_exists {
3599        // #962 typed envelope — MemoryNotFound{role=Source}.
3600        return Err(anyhow::Error::new(StorageError::MemoryNotFound {
3601            id: source_id.to_string(),
3602            role: Some(LinkEnd::Source),
3603        }));
3604    }
3605    let target_exists: bool = conn
3606        .query_row(SQL_MEMORY_EXISTS, params![target_id], |r| r.get(0))
3607        .unwrap_or(false);
3608    if !target_exists {
3609        // #962 typed envelope — MemoryNotFound{role=Target}.
3610        return Err(anyhow::Error::new(StorageError::MemoryNotFound {
3611            id: target_id.to_string(),
3612            role: Some(LinkEnd::Target),
3613        }));
3614    }
3615    // Schema v15 (Pillar 2 / Stream B) added `valid_from` for temporal
3616    // KG queries. Backfill on migration handled legacy rows; here we
3617    // populate it on the insert path so newly created links are
3618    // visible to `memory_kg_timeline` without a downstream backfill.
3619    //
3620    // v0.7.0 H6 (round-2): mirror the postgres G3 fix at
3621    // `store/postgres.rs:3539` — truncate the timestamp to microsecond
3622    // precision BEFORE we both sign over it and persist it. SQLite
3623    // stores RFC3339 TEXT and round-trips losslessly so this is a
3624    // no-op for SQLite reads, BUT a link created on the SQLite path
3625    // and later re-verified on the postgres path (or vice versa)
3626    // must commit to the same canonical RFC3339 string on both
3627    // sides. Postgres's `TIMESTAMPTZ` quantises at microsecond
3628    // resolution, so sub-microsecond digits silently disappear on
3629    // round-trip and break the Ed25519 signature. Truncating here
3630    // makes the sign/verify CBOR byte-stable across the storage
3631    // boundary regardless of which adapter wrote the row originally.
3632    let now = truncate_to_microseconds(Utc::now()).to_rfc3339();
3633
3634    // v0.7 H2 — sign if we have a private key. We compute the signature
3635    // BEFORE issuing INSERT so a CBOR/sign failure surfaces as an
3636    // outright write error (vs. a silent partial-write). The signed
3637    // payload includes `valid_from = now` and matching `observed_by`
3638    // so H3's verifier can re-derive the same bytes from the row.
3639    //
3640    // v0.7 H3 follow-up: the `observed_by` *column* is now populated
3641    // from the keypair's `agent_id` on signed inserts so federation
3642    // export (`export_links`) ships the same claim the signature
3643    // commits to. Receivers re-derive `SignableLink` from the wire
3644    // record (see `verify::verify`); without populating the column,
3645    // verification would always fail with `Tampered` because the
3646    // sender signed `Some(agent_id)` but exported `None`.
3647    let (signature, attest_level, observed_by_col): (Option<Vec<u8>>, &'static str, Option<&str>) =
3648        match keypair {
3649            Some(kp) if kp.can_sign() => {
3650                let link = crate::identity::sign::SignableLink {
3651                    src_id: source_id,
3652                    dst_id: target_id,
3653                    relation,
3654                    observed_by: Some(kp.agent_id.as_str()),
3655                    valid_from: Some(now.as_str()),
3656                    valid_until: None,
3657                };
3658                let sig = crate::identity::sign::sign(kp, &link)?;
3659                (
3660                    Some(sig),
3661                    crate::models::AttestLevel::SelfSigned.as_str(),
3662                    Some(kp.agent_id.as_str()),
3663                )
3664            }
3665            _ => (None, crate::models::AttestLevel::Unsigned.as_str(), None),
3666        };
3667
3668    let inserted = conn.execute(
3669        "INSERT OR IGNORE INTO memory_links \
3670            (source_id, target_id, relation, created_at, valid_from, signature, attest_level, observed_by) \
3671         VALUES (?1, ?2, ?3, ?4, ?4, ?5, ?6, ?7)",
3672        params![
3673            source_id,
3674            target_id,
3675            relation,
3676            now,
3677            signature,
3678            attest_level,
3679            observed_by_col
3680        ],
3681    )?;
3682
3683    // v0.7.0 S4-INFO2 — append a `memory_link.created` row to
3684    // `signed_events` so the audit ledger reflects every new link
3685    // (signed or unsigned). The `payload_hash` binds to the same
3686    // canonical CBOR that the H2 signer hashed (or would have, for
3687    // unsigned rows) so an auditor can re-derive the bytes and check
3688    // them against the row.
3689    //
3690    // Best-effort: a failure here logs a warn but does NOT roll back
3691    // the link insert. Cratering a legitimate write because the
3692    // append-only ledger had a transient SQLite error would punish
3693    // the caller for a substrate problem they cannot fix — same
3694    // discipline as `invalidate_link`'s `memory_link.invalidated`
3695    // emit (see also A2's pattern on `execute_pending_action`).
3696    //
3697    // We only emit when the INSERT actually wrote a row.
3698    // `INSERT OR IGNORE` returns `Ok(0)` on a uniqueness-conflict
3699    // replay of an existing `(source_id, target_id, relation)`; in
3700    // that case the audit row was already appended on the original
3701    // create call, and re-appending would generate a misleading
3702    // duplicate-create event.
3703    if inserted > 0 {
3704        let agent_for_event = observed_by_col
3705            .map(str::to_string)
3706            .unwrap_or_else(|| "unknown".to_string());
3707        let signable = crate::identity::sign::SignableLink {
3708            src_id: source_id,
3709            dst_id: target_id,
3710            relation,
3711            observed_by: observed_by_col,
3712            valid_from: Some(now.as_str()),
3713            valid_until: None,
3714        };
3715        match crate::identity::sign::canonical_cbor(&signable) {
3716            Ok(cbor) => {
3717                let event = crate::signed_events::SignedEvent {
3718                    id: uuid::Uuid::new_v4().to_string(),
3719                    agent_id: agent_for_event,
3720                    event_type: crate::signed_events::event_types::MEMORY_LINK_CREATED.to_string(),
3721                    payload_hash: crate::signed_events::payload_hash(&cbor),
3722                    signature: signature.clone(),
3723                    attest_level: attest_level.to_string(),
3724                    timestamp: Utc::now().to_rfc3339(),
3725                    ..crate::signed_events::SignedEvent::default()
3726                };
3727                if let Err(e) = crate::signed_events::append_signed_event(conn, &event) {
3728                    tracing::warn!(
3729                        target: crate::signed_events::SIGNED_EVENTS_TRACE_TARGET,
3730                        source_id, target_id, relation,
3731                        "failed to append memory_link.created audit row: {e}"
3732                    );
3733                }
3734            }
3735            Err(e) => {
3736                tracing::warn!(
3737                    target: crate::signed_events::SIGNED_EVENTS_TRACE_TARGET,
3738                    source_id, target_id, relation,
3739                    "failed to encode canonical CBOR for memory_link.created audit: {e}"
3740                );
3741            }
3742        }
3743    }
3744
3745    Ok(attest_level)
3746}
3747
3748/// v0.7.0 issue #812 / #813 — return the strongest `attest_level`
3749/// label across every outbound link rooted at `source_id`.
3750///
3751/// Strength ladder (highest first):
3752///
3753///   `peer_attested` > `self_signed` > `unsigned`
3754///
3755/// The persona-signing path (`PersonaGenerator::generate`) uses this
3756/// to stamp the Persona's own `attest_level` metadata so the
3757/// downstream `memory_persona` / `memory_persona_generate` wire
3758/// response carries the same attestation level the substrate's
3759/// `derives_from` edges actually hold — a Persona whose source
3760/// links are all signed is itself self-signed, whereas a Persona
3761/// whose source links are unsigned cannot truthfully claim
3762/// `self_signed` no matter what label the curator stamps on it.
3763///
3764/// Returns `"unsigned"` for a source with no outbound links — the
3765/// only honest default for a row whose attestation surface is
3766/// empty.
3767///
3768/// # Errors
3769///
3770/// Bubbles up `rusqlite` errors from the SELECT.
3771pub fn strongest_attest_level_for_source(conn: &Connection, source_id: &str) -> Result<String> {
3772    let mut stmt = conn.prepare(
3773        "SELECT attest_level FROM memory_links \
3774         WHERE source_id = ?1",
3775    )?;
3776    let rows = stmt.query_map(params![source_id], |r| r.get::<_, String>(0))?;
3777    let unsigned = crate::models::AttestLevel::Unsigned.as_str();
3778    let self_signed = crate::models::AttestLevel::SelfSigned.as_str();
3779    let peer_attested = crate::models::AttestLevel::PeerAttested.as_str();
3780    let mut strongest = unsigned;
3781    for row in rows {
3782        let level = row?;
3783        if level == peer_attested {
3784            return Ok(peer_attested.to_string());
3785        }
3786        if level == self_signed && strongest == unsigned {
3787            strongest = self_signed;
3788        }
3789    }
3790    Ok(strongest.to_string())
3791}
3792
3793/// v0.7 H3 — insert an inbound (federation-replicated) link with a
3794/// pre-computed signature and attest level.
3795///
3796/// Distinct from [`create_link_signed`] because the receiver is *not*
3797/// the signer: it must persist whatever bytes the peer signed
3798/// (signature + observed_by + valid_from + valid_until) verbatim, so a
3799/// later `memory_verify` (H4) can re-derive the same canonical CBOR
3800/// from the stored row and re-check against the peer's public key. We
3801/// can't re-sign on the receiver — we don't hold the peer's private
3802/// key, by design.
3803///
3804/// The caller (federation `sync_push` link loop) is responsible for:
3805/// 1. Looking up the peer's public key via
3806///    [`crate::identity::verify::lookup_peer_public_key`].
3807/// 2. Calling [`crate::identity::verify::verify`] when a public key is
3808///    known, and rejecting the link when verification fails.
3809/// 3. Choosing the `attest_level` literal:
3810///    - `"peer_attested"` — verified successfully against an enrolled key,
3811///    - `"unsigned"` — no public key enrolled for `observed_by`, or the
3812///      sender shipped no signature (legacy peer).
3813///
3814/// Idempotent on the unique `(source_id, target_id, relation)` index —
3815/// duplicate inbound replays collapse to a no-op without error.
3816///
3817/// Both `source_id` and `target_id` must already exist locally; the
3818/// receiver is expected to apply incoming `memories` *before* incoming
3819/// `links` in the same `sync_push` request, which the existing handler
3820/// already does.
3821///
3822/// `valid_from` defaults to "now" only when the inbound row carries
3823/// `None` (legacy peer that never populated the column); otherwise the
3824/// peer's value is preserved so the signature still verifies.
3825///
3826/// # Errors
3827///
3828/// Bubbles up the same DB / FK errors as `create_link_signed`. Pre-flight
3829/// existence checks mirror the outbound path so the receiver fails loud
3830/// on missing memories rather than silently dropping the link.
3831pub fn create_link_inbound(conn: &Connection, link: &MemoryLink, attest_level: &str) -> Result<()> {
3832    // v0.7.0 fix-campaign A3 (LINK-PARITY, #690) — defense-in-depth at
3833    // the receiver. The cycle check ALWAYS runs even on inbound peer
3834    // writes: a peer should not be able to extend a `reflects_on`
3835    // cycle on the receiver any more than a local caller can. The K9
3836    // permission gate is BYPASSED only when the inbound link is
3837    // `peer_attested` (the peer's signature was cryptographically
3838    // verified against an enrolled public key in
3839    // `handlers::federation_receive::sync_push` before this call). For
3840    // every other attest_level — including `"unsigned"`, which covers
3841    // legacy peers AND peers whose public key we have not enrolled —
3842    // the local K9 rules enforce. This is the design choice documented
3843    // in #690: mTLS + Ed25519 sig verification is the federation's
3844    // attestation layer; once that passes, namespace governance is the
3845    // peer's local responsibility, not the receiver's. The
3846    // `observed_by` claim becomes the `agent_id` for the K9 evaluation
3847    // when not bypassed — that's the peer's claimed writer and matches
3848    // what the rule matcher already uses for outbound links.
3849    let skip_governance = attest_level == crate::models::AttestLevel::PeerAttested.as_str();
3850    let peer_agent_id = link.observed_by.as_deref().unwrap_or("system");
3851    validate_link_pre_create(
3852        conn,
3853        &link.source_id,
3854        &link.target_id,
3855        link.relation.as_str(),
3856        peer_agent_id,
3857        skip_governance,
3858    )?;
3859    // Same FK guard as create_link_signed — a missing memory means the
3860    // peer raced ahead of us; we surface that to the caller's warn log
3861    // rather than papering over with INSERT OR IGNORE silently.
3862    let source_exists: bool = conn
3863        .query_row(SQL_MEMORY_EXISTS, params![link.source_id], |r| r.get(0))
3864        .unwrap_or(false);
3865    if !source_exists {
3866        // #962 typed envelope — MemoryNotFound{role=Source}.
3867        return Err(anyhow::Error::new(StorageError::MemoryNotFound {
3868            id: link.source_id.clone(),
3869            role: Some(LinkEnd::Source),
3870        }));
3871    }
3872    let target_exists: bool = conn
3873        .query_row(SQL_MEMORY_EXISTS, params![link.target_id], |r| r.get(0))
3874        .unwrap_or(false);
3875    if !target_exists {
3876        // #962 typed envelope — MemoryNotFound{role=Target}.
3877        return Err(anyhow::Error::new(StorageError::MemoryNotFound {
3878            id: link.target_id.clone(),
3879            role: Some(LinkEnd::Target),
3880        }));
3881    }
3882
3883    let now = Utc::now().to_rfc3339();
3884    // Preserve peer's `valid_from` byte-identical so `memory_verify`
3885    // (H4) can re-derive the signed payload from the stored row.
3886    let valid_from = link.valid_from.clone().unwrap_or_else(|| now.clone());
3887    let created_at = if link.created_at.is_empty() {
3888        now
3889    } else {
3890        link.created_at.clone()
3891    };
3892
3893    let inserted = conn.execute(
3894        "INSERT OR IGNORE INTO memory_links \
3895            (source_id, target_id, relation, created_at, valid_from, valid_until, \
3896             signature, attest_level, observed_by) \
3897         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)",
3898        params![
3899            link.source_id,
3900            link.target_id,
3901            link.relation.as_str(),
3902            created_at,
3903            valid_from,
3904            link.valid_until,
3905            link.signature,
3906            attest_level,
3907            link.observed_by,
3908        ],
3909    )?;
3910
3911    // v0.7.0 S4-INFO2 — append a `memory_link.created` row to
3912    // `signed_events` for inbound replicated links too. The audit
3913    // ledger should reflect every new link visible locally, not just
3914    // outbound writes. `payload_hash` binds to the canonical CBOR
3915    // re-derived from the wire-shape link the peer signed, so an
3916    // auditor can replay the exact bytes that were verified at
3917    // ingress.
3918    //
3919    // Best-effort: a failure logs a warn but does NOT roll back the
3920    // link insert (same discipline as the outbound path above and as
3921    // `invalidate_link`'s emit).
3922    //
3923    // Only emit when the INSERT actually wrote a row (idempotent
3924    // sync replays must not generate duplicate-create events).
3925    if inserted > 0 {
3926        let agent_for_event = link
3927            .observed_by
3928            .clone()
3929            .unwrap_or_else(|| "unknown".to_string());
3930        let signable = crate::identity::sign::SignableLink {
3931            src_id: link.source_id.as_str(),
3932            dst_id: link.target_id.as_str(),
3933            relation: link.relation.as_str(),
3934            observed_by: link.observed_by.as_deref(),
3935            valid_from: Some(valid_from.as_str()),
3936            valid_until: link.valid_until.as_deref(),
3937        };
3938        match crate::identity::sign::canonical_cbor(&signable) {
3939            Ok(cbor) => {
3940                let event = crate::signed_events::SignedEvent {
3941                    id: uuid::Uuid::new_v4().to_string(),
3942                    agent_id: agent_for_event,
3943                    event_type: crate::signed_events::event_types::MEMORY_LINK_CREATED.to_string(),
3944                    payload_hash: crate::signed_events::payload_hash(&cbor),
3945                    signature: link.signature.clone(),
3946                    attest_level: attest_level.to_string(),
3947                    timestamp: Utc::now().to_rfc3339(),
3948                    ..crate::signed_events::SignedEvent::default()
3949                };
3950                if let Err(e) = crate::signed_events::append_signed_event(conn, &event) {
3951                    tracing::warn!(
3952                        target: crate::signed_events::SIGNED_EVENTS_TRACE_TARGET,
3953                        source_id = %link.source_id,
3954                        target_id = %link.target_id,
3955                        relation = %link.relation,
3956                        "failed to append memory_link.created audit row (inbound): {e}"
3957                    );
3958                }
3959            }
3960            Err(e) => {
3961                tracing::warn!(
3962                    target: crate::signed_events::SIGNED_EVENTS_TRACE_TARGET,
3963                    source_id = %link.source_id,
3964                    target_id = %link.target_id,
3965                    relation = %link.relation,
3966                    "failed to encode canonical CBOR for inbound memory_link.created audit: {e}"
3967                );
3968            }
3969        }
3970    }
3971
3972    Ok(())
3973}
3974
3975pub fn get_links(conn: &Connection, id: &str) -> Result<Vec<MemoryLink>> {
3976    // v0.7.0 issue #860 — the `memory_get_links` MCP tool's docstring
3977    // promises attestation level + temporal-validity columns
3978    // (`valid_from`, `valid_until`, `observed_by`, `attest_level`) per
3979    // link. The pre-fix SELECT only pulled 4 columns and hard-coded the
3980    // optional fields to `None`, so the promised columns never reached
3981    // the caller. Expand the SELECT to the full row projection that
3982    // the docs commit to. `signature` is intentionally NOT surfaced —
3983    // it is the verification surface owned by the `memory_verify` tool
3984    // (`LinkVerifyRecord` below), not the read-only graph view.
3985    let mut stmt = conn.prepare(
3986        "SELECT source_id, target_id, relation, created_at, \
3987                valid_from, valid_until, observed_by, attest_level \
3988         FROM memory_links \
3989         WHERE source_id = ?1 OR target_id = ?1",
3990    )?;
3991    let rows = stmt.query_map(params![id], |row| {
3992        let relation_str: String = row.get(2)?;
3993        Ok(MemoryLink {
3994            source_id: row.get(0)?,
3995            target_id: row.get(1)?,
3996            // v0.7.0 fix campaign R1-M4 — parse the TEXT column into the
3997            // typed `MemoryLinkRelation` closed set. Unknown values (only
3998            // possible from pre-CHECK rows or a buggy direct-SQL writer)
3999            // fall back to the canonical default so the read-side never
4000            // panics; the SQL CHECK on the write side prevents new bad
4001            // rows from landing.
4002            relation: crate::models::MemoryLinkRelation::from_str(&relation_str)
4003                .unwrap_or_default(),
4004            created_at: row.get(3)?,
4005            // v0.7.0 #860 — temporal-validity + attestation columns
4006            // promised by the `memory_get_links` docstring. `signature`
4007            // stays `None`: that bytes-on-the-wire surface is the
4008            // verifier's concern (`LinkVerifyRecord`), and exposing it
4009            // here would force the JSON response to carry a base64 blob
4010            // every existing caller would have to ignore.
4011            signature: None,
4012            valid_from: row.get::<_, Option<String>>(4)?,
4013            valid_until: row.get::<_, Option<String>>(5)?,
4014            observed_by: row.get::<_, Option<String>>(6)?,
4015            attest_level: row.get::<_, Option<String>>(7)?,
4016        })
4017    })?;
4018    rows.collect::<rusqlite::Result<Vec<_>>>()
4019        .map_err(Into::into)
4020}
4021
4022#[allow(dead_code)]
4023pub fn delete_link(conn: &Connection, source_id: &str, target_id: &str) -> Result<bool> {
4024    let changed = conn.execute(
4025        "DELETE FROM memory_links WHERE source_id = ?1 AND target_id = ?2",
4026        params![source_id, target_id],
4027    )?;
4028    Ok(changed > 0)
4029}
4030
4031/// v0.7 H4 — full row-projection used by the `memory_verify` MCP tool.
4032///
4033/// `get_links` (above) was deliberately scoped to the four columns the
4034/// graph-traversal callers care about; H4 needs the *signed bundle* —
4035/// the raw signature blob, the agent_id that signed (`observed_by`),
4036/// and the temporal-validity columns the signature commits to. Splitting
4037/// it from `get_links` keeps the existing read path's wire shape
4038/// unchanged (and its column-count tested by callers).
4039///
4040/// Returns `Ok(None)` when the row is absent so the caller can shape a
4041/// "not found" response instead of bubbling up a generic SQL error.
4042#[derive(Debug, Clone)]
4043pub struct LinkVerifyRecord {
4044    pub source_id: String,
4045    pub target_id: String,
4046    pub relation: String,
4047    pub signature: Option<Vec<u8>>,
4048    pub observed_by: Option<String>,
4049    pub valid_from: Option<String>,
4050    pub valid_until: Option<String>,
4051    /// Raw column value as stored by H2/H3 (`"unsigned"`, `"self_signed"`,
4052    /// `"peer_attested"`, or rarely `NULL` for very old rows that
4053    /// pre-date the H2 `attest_level` column). H4's MCP handler
4054    /// normalises a `NULL` to the `Unsigned` enum variant.
4055    pub attest_level: Option<String>,
4056}
4057
4058/// Fetch the single link identified by the `(source_id, target_id, relation)`
4059/// composite primary key — the only unique identifier `memory_links`
4060/// exposes today.
4061///
4062/// Used by the H4 `memory_verify` MCP tool to re-derive the canonical
4063/// CBOR payload from the stored row before re-checking the signature.
4064///
4065/// # Errors
4066///
4067/// Bubbles up rusqlite errors. Returns `Ok(None)` when the row is
4068/// absent — this is the load-bearing distinction `memory_verify` needs
4069/// to surface a structured "link not found" response to its caller.
4070pub fn get_link_for_verify(
4071    conn: &Connection,
4072    source_id: &str,
4073    target_id: &str,
4074    relation: &str,
4075) -> Result<Option<LinkVerifyRecord>> {
4076    let mut stmt = conn.prepare(
4077        "SELECT source_id, target_id, relation, signature, observed_by, \
4078                valid_from, valid_until, attest_level \
4079         FROM memory_links \
4080         WHERE source_id = ?1 AND target_id = ?2 AND relation = ?3",
4081    )?;
4082    let mut rows = stmt.query(params![source_id, target_id, relation])?;
4083    if let Some(row) = rows.next()? {
4084        Ok(Some(LinkVerifyRecord {
4085            source_id: row.get(0)?,
4086            target_id: row.get(1)?,
4087            relation: row.get(2)?,
4088            signature: row.get::<_, Option<Vec<u8>>>(3)?,
4089            observed_by: row.get::<_, Option<String>>(4)?,
4090            valid_from: row.get::<_, Option<String>>(5)?,
4091            valid_until: row.get::<_, Option<String>>(6)?,
4092            attest_level: row.get::<_, Option<String>>(7)?,
4093        }))
4094    } else {
4095        Ok(None)
4096    }
4097}
4098
4099// --- Consolidation ---
4100
4101/// #1558 batch 5 wave 3 — canonical `source` value stamped on rows
4102/// minted by [`consolidate`] (MCP `memory_consolidate` + the HTTP
4103/// power-consolidation handler pass it verbatim). Listed in
4104/// `validate::VALID_SOURCES`; one spelling, hoist-only.
4105pub const CONSOLIDATION_SOURCE: &str = "consolidation";
4106
4107/// Consolidate multiple memories into one. Returns the new memory ID.
4108/// Deletes the source memories and creates links from new → old (`derived_from`).
4109#[allow(clippy::too_many_arguments)]
4110pub fn consolidate(
4111    conn: &Connection,
4112    ids: &[String],
4113    title: &str,
4114    summary: &str,
4115    namespace: &str,
4116    tier: &Tier,
4117    source: &str,
4118    consolidator_agent_id: &str,
4119) -> Result<String> {
4120    let now = Utc::now().to_rfc3339();
4121    let new_id = uuid::Uuid::new_v4().to_string();
4122
4123    conn.execute_batch(connection::SQL_BEGIN_IMMEDIATE)?;
4124
4125    let result = (|| -> Result<String> {
4126        // Verify all IDs exist and collect metadata in one pass
4127        let mut max_priority = 5i32;
4128        let mut all_tags: Vec<String> = Vec::new();
4129        let mut total_access = 0i64;
4130        let mut merged_metadata = serde_json::Map::new();
4131        // Collect original agent_ids separately — they go into
4132        // `consolidated_from_agents` for forensic attribution.
4133        // The consolidator's own agent_id becomes `agent_id` on the result.
4134        let mut source_agent_ids: Vec<String> = Vec::new();
4135        for id in ids {
4136            match get(conn, id)? {
4137                Some(mem) => {
4138                    max_priority = max_priority.max(mem.priority);
4139                    all_tags.extend(mem.tags);
4140                    total_access = total_access.saturating_add(mem.access_count);
4141                    // Merge metadata: later values overwrite earlier ones on key conflict.
4142                    // Intentionally SKIP `agent_id` to avoid last-write-wins forgery;
4143                    // the consolidator's id is authoritative on the result.
4144                    if let serde_json::Value::Object(map) = mem.metadata {
4145                        for (k, v) in map {
4146                            if k == "agent_id" {
4147                                if let serde_json::Value::String(aid) = &v
4148                                    && !source_agent_ids.contains(aid)
4149                                {
4150                                    source_agent_ids.push(aid.clone());
4151                                }
4152                                continue;
4153                            }
4154                            if let Some(existing) = merged_metadata.get(&k)
4155                                && std::mem::discriminant(existing) != std::mem::discriminant(&v)
4156                            {
4157                                tracing::warn!(
4158                                    "consolidate: key '{}' type changed during merge",
4159                                    k
4160                                );
4161                            }
4162                            merged_metadata.insert(k, v);
4163                        }
4164                    } else {
4165                        tracing::warn!(
4166                            "memory {} has non-object metadata during consolidate, skipping",
4167                            id
4168                        );
4169                    }
4170                }
4171                None => {
4172                    // #962 typed envelope.
4173                    return Err(anyhow::Error::new(StorageError::MemoryNotFound {
4174                        id: id.to_string(),
4175                        role: None,
4176                    }));
4177                }
4178            }
4179        }
4180        all_tags.sort();
4181        all_tags.dedup();
4182        let tags_json = serde_json::to_string(&all_tags)?;
4183        // Record source IDs in metadata for provenance (links would be CASCADE-deleted)
4184        merged_metadata.insert(
4185            crate::models::MemoryLinkRelation::DerivedFrom
4186                .as_str()
4187                .to_string(),
4188            serde_json::Value::Array(
4189                ids.iter()
4190                    .map(|id| serde_json::Value::String(id.clone()))
4191                    .collect(),
4192            ),
4193        );
4194        // NHI: the consolidator owns the new memory (authoritative agent_id);
4195        // original authors are preserved as a separate array for forensics.
4196        merged_metadata.insert(
4197            "agent_id".to_string(),
4198            serde_json::Value::String(consolidator_agent_id.to_string()),
4199        );
4200        if !source_agent_ids.is_empty() {
4201            merged_metadata.insert(
4202                "consolidated_from_agents".to_string(),
4203                serde_json::Value::Array(
4204                    source_agent_ids
4205                        .into_iter()
4206                        .map(serde_json::Value::String)
4207                        .collect(),
4208                ),
4209            );
4210        }
4211        let merged_metadata_value = serde_json::Value::Object(merged_metadata);
4212        crate::validate::validate_metadata(&merged_metadata_value)
4213            .context("merged metadata exceeds size limit")?;
4214        let metadata_json = serde_json::to_string(&merged_metadata_value)?;
4215
4216        // FX-C5 — substrate governance pre-write hook parity. Consolidate
4217        // mints a fresh memory via a raw INSERT that bypasses the
4218        // `db::insert(..)` tail (which is where the SQLite path normally
4219        // consults `GOVERNANCE_PRE_WRITE`). Without this call the
4220        // operator's signed governance rules could be bypassed by
4221        // routing through the consolidate surface. Compose the candidate
4222        // memory shape the way the INSERT below would persist it and
4223        // fire the hook; a refusal short-circuits the transaction body
4224        // and the outer ROLLBACK undoes any work already done in this
4225        // closure.
4226        let candidate = Memory {
4227            id: new_id.clone(),
4228            tier: tier.clone(),
4229            namespace: namespace.to_string(),
4230            title: title.to_string(),
4231            content: summary.to_string(),
4232            tags: all_tags.clone(),
4233            priority: max_priority,
4234            confidence: 1.0,
4235            source: source.to_string(),
4236            access_count: total_access,
4237            created_at: now.clone(),
4238            updated_at: now.clone(),
4239            last_accessed_at: None,
4240            expires_at: None,
4241            metadata: merged_metadata_value.clone(),
4242            reflection_depth: 0,
4243            memory_kind: crate::models::MemoryKind::Observation,
4244            entity_id: None,
4245            persona_version: None,
4246            citations: Vec::new(),
4247            source_uri: None,
4248            source_span: None,
4249            // #1633 — the engine pins confidence=1.0, so the honest
4250            // provenance is CuratorDerived (the #1242 audit-honesty
4251            // invariant: engine-derived values must be discoverable to
4252            // the calibration sweep; 'caller_provided' rows are
4253            // excluded by idx_memories_confidence_source).
4254            confidence_source: crate::models::ConfidenceSource::CuratorDerived,
4255            confidence_signals: None,
4256            confidence_decayed_at: None,
4257            version: crate::models::default_memory_version(),
4258        };
4259        consult_governance_pre_write(&candidate)?;
4260
4261        // v0.7.0 #1466 — consolidate mints a fresh memory via this raw
4262        // INSERT, so it must carry the tier-default expiry too; otherwise a
4263        // consolidated mid/short row would be immortal (NULL expires_at) and
4264        // never reaped by GC. `candidate.created_at == now` so the backfill
4265        // here matches the `?10` bound below.
4266        conn.execute(
4267            "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, confidence, source, access_count, created_at, updated_at, expires_at, metadata, confidence_source)
4268             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, 1.0, ?8, ?9, ?10, ?10, ?11, ?12, ?13)",
4269            params![new_id, tier.as_str(), namespace, title, summary, tags_json, max_priority, source, total_access, now, candidate.effective_expires_at(), metadata_json, candidate.confidence_source.as_str()],
4270        )?;
4271
4272        // Delete source memories first. Note: we intentionally do NOT create
4273        // derived_from links before deletion because ON DELETE CASCADE would
4274        // immediately remove them. Instead, source IDs are recorded in the
4275        // consolidated memory's metadata for provenance.
4276        for id in ids {
4277            delete(conn, id)?;
4278        }
4279
4280        Ok(new_id.clone())
4281    })();
4282
4283    match result {
4284        Ok(id) => {
4285            conn.execute_batch(connection::SQL_COMMIT)?;
4286            Ok(id)
4287        }
4288        Err(e) => {
4289            if let Err(rb) = conn.execute_batch(connection::SQL_ROLLBACK) {
4290                tracing::error!("ROLLBACK failed in consolidate: {}", rb);
4291            }
4292            Err(e)
4293        }
4294    }
4295}
4296
4297// ---------------------------------------------------------------------------
4298// Reflection (v0.7.0 recursive-learning Task 4/8, issue #655).
4299// ---------------------------------------------------------------------------
4300
4301/// Strip zero-width and invisible Unicode characters that could bypass FTS search.
4302fn strip_invisible(s: &str) -> String {
4303    s.chars()
4304        .filter(|c| {
4305            !matches!(c,
4306                '\u{200B}' | '\u{200C}' | '\u{200D}' | '\u{FEFF}' |
4307                '\u{00AD}' | '\u{034F}' | '\u{061C}' |
4308                '\u{180E}' | '\u{2060}' | '\u{2061}'..='\u{2064}' |
4309                '\u{FE00}'..='\u{FE0F}' | '\u{200E}' | '\u{200F}' |
4310                '\u{202A}'..='\u{202E}' | '\u{2066}'..='\u{2069}'
4311            )
4312        })
4313        .collect()
4314}
4315
4316fn sanitize_fts_query(input: &str, use_or: bool) -> String {
4317    let joiner = if use_or { " OR " } else { " " };
4318    let cleaned = strip_invisible(input);
4319    let tokens: Vec<String> = cleaned
4320        .split_whitespace()
4321        .filter(|t| !t.is_empty())
4322        .filter(|t| {
4323            // Filter out FTS5 boolean operators as standalone tokens
4324            let upper = t.to_uppercase();
4325            upper != "AND" && upper != "OR" && upper != "NOT" && upper != "NEAR"
4326        })
4327        .map(|token| {
4328            // Strip FTS5 special characters to prevent injection.
4329            // Hyphens are allowed inside words (e.g. "well-known"): the
4330            // unicode61 tokenizer treats `-` as a separator when indexing,
4331            // so `foo-bar` indexes as `foo` + `bar`. Keeping the hyphen in
4332            // the per-token phrase (below we wrap each token in `"…"`)
4333            // produces a phrase query that FTS5 evaluates by matching the
4334            // hyphen-split component terms in order — which is exactly
4335            // what callers expect when searching for hyphenated content.
4336            // Dropping the `'-'` filter here fixes scenario S28 without
4337            // reopening the `+`/`-` exclusion-injection hole (every token
4338            // is already phrase-quoted before being joined, so `-` cannot
4339            // reach FTS5 as a prefix operator).
4340            let clean: String = token
4341                .chars()
4342                .filter(|c| {
4343                    *c != '"'
4344                        && *c != '*'
4345                        && *c != '^'
4346                        && *c != '{'
4347                        && *c != '}'
4348                        && *c != '('
4349                        && *c != ')'
4350                        && *c != ':'
4351                        && *c != '|'
4352                        && *c != '+'
4353                })
4354                .collect();
4355            if clean.is_empty() {
4356                return String::new();
4357            }
4358            format!("\"{clean}\"")
4359        })
4360        .filter(|t| !t.is_empty())
4361        .collect();
4362    if tokens.is_empty() {
4363        return "\"_empty_\"".to_string();
4364    }
4365    tokens.join(joiner)
4366}
4367
4368pub fn list_namespaces(conn: &Connection) -> Result<Vec<NamespaceCount>> {
4369    let now = Utc::now().to_rfc3339();
4370    let mut stmt = conn.prepare(
4371        "SELECT namespace, COUNT(*) FROM memories WHERE expires_at IS NULL OR expires_at > ?1 GROUP BY namespace ORDER BY COUNT(*) DESC",
4372    )?;
4373    let rows = stmt.query_map(params![now], |row| {
4374        Ok(NamespaceCount {
4375            namespace: row.get(0)?,
4376            count: row.get(1)?,
4377        })
4378    })?;
4379    rows.collect::<rusqlite::Result<Vec<_>>>()
4380        .map_err(Into::into)
4381}
4382
4383/// Hard cap on input groups walked when assembling a taxonomy tree.
4384/// Even when callers pass a wildly large `limit`, we never walk more
4385/// than this many `(namespace, count)` rows — bounds memory + time.
4386/// Shared by the sqlite + postgres taxonomy paths and the HTTP / MCP
4387/// taxonomy surfaces so all four clamp identically.
4388pub const TAXONOMY_MAX_LIMIT: usize = 10_000;
4389
4390/// Default group budget for taxonomy listings when the caller passes
4391/// no explicit `limit` (HTTP `/api/v1/namespaces`, MCP
4392/// `memory_get_taxonomy`).
4393pub const TAXONOMY_DEFAULT_LIMIT: usize = 1000;
4394
4395/// Build a hierarchical namespace taxonomy (Pillar 1 / Stream A).
4396///
4397/// Groups live (non-expired) memories by `namespace`, splits each on
4398/// `/`, and folds them into a `TaxonomyNode` tree. The returned root
4399/// represents `namespace_prefix` (or the synthetic empty-string root if
4400/// no prefix is supplied); each child level descends one segment.
4401///
4402/// `max_depth` is interpreted as "show at most N levels *below the
4403/// prefix*". Memories whose namespace would have required descending
4404/// past the cutoff still contribute to the `subtree_count` of the
4405/// boundary ancestor (their counts are not lost — only the leaf
4406/// rendering is suppressed).
4407///
4408/// `limit` caps the number of input `(namespace, count)` rows we walk
4409/// — when truncated, `total_count` still reflects the full prefix
4410/// total (a separate aggregation), and `truncated` is set so callers
4411/// can warn the user. Hard ceiling: [`TAXONOMY_MAX_LIMIT`].
4412// Body is intentionally one logical pipeline (SQL aggregation → tree
4413// assembly → root materialisation); pulling helpers out hurts
4414// readability more than it helps.
4415#[allow(clippy::too_many_lines)]
4416pub fn get_taxonomy(
4417    conn: &Connection,
4418    namespace_prefix: Option<&str>,
4419    max_depth: usize,
4420    limit: usize,
4421) -> Result<Taxonomy> {
4422    let now = Utc::now().to_rfc3339();
4423    let effective_limit = limit.min(TAXONOMY_MAX_LIMIT);
4424    // Clamp depth so callers asking for "everything" can't construct a
4425    // pathological deep walk; the namespace validator already rejects
4426    // depths > MAX_NAMESPACE_DEPTH on writes.
4427    let effective_depth = max_depth.min(MAX_NAMESPACE_DEPTH);
4428
4429    let prefix = namespace_prefix.unwrap_or("");
4430    // #1531 L5 — `validate_namespace` deliberately places no per-segment
4431    // character restriction (historical flexibility), so a stored
4432    // namespace/prefix may contain the LIKE metacharacters `%` / `_`.
4433    // Escape the descendant pattern (mirroring the visibility clause at
4434    // the top of this file and the postgres `taxonomy_namespaces`
4435    // twin) so a prefix like `a%` cannot over-match `aX/...` subtrees.
4436    let descendant_pattern = format!(
4437        "{}/%",
4438        prefix
4439            .replace('\\', "\\\\")
4440            .replace('%', "\\%")
4441            .replace('_', "\\_")
4442    );
4443
4444    // Total count for the prefix is computed independently of the
4445    // truncated row walk so the caller-visible total stays honest even
4446    // when `limit` drops rows from the tree.
4447    let total_count: usize = if prefix.is_empty() {
4448        let v: i64 = conn.query_row(
4449            "SELECT COUNT(*) FROM memories WHERE expires_at IS NULL OR expires_at > ?1",
4450            params![now],
4451            |row| row.get(0),
4452        )?;
4453        usize::try_from(v).unwrap_or(0)
4454    } else {
4455        let v: i64 = conn.query_row(
4456            "SELECT COUNT(*) FROM memories
4457             WHERE (expires_at IS NULL OR expires_at > ?1)
4458               AND (namespace = ?2 OR namespace LIKE ?3 ESCAPE '\\')",
4459            params![now, prefix, descendant_pattern],
4460            |row| row.get(0),
4461        )?;
4462        usize::try_from(v).unwrap_or(0)
4463    };
4464
4465    // Group rows ordered by count DESC so a small `limit` keeps the
4466    // densest namespaces, then alphabetic for stable tie-breaking.
4467    let groups: Vec<(String, usize)> = if prefix.is_empty() {
4468        let mut stmt = conn.prepare(
4469            "SELECT namespace, COUNT(*) FROM memories
4470             WHERE expires_at IS NULL OR expires_at > ?1
4471             GROUP BY namespace
4472             ORDER BY COUNT(*) DESC, namespace ASC
4473             LIMIT ?2",
4474        )?;
4475        let rows = stmt.query_map(
4476            params![now, i64::try_from(effective_limit).unwrap_or(i64::MAX)],
4477            |row| {
4478                let ns: String = row.get(0)?;
4479                let c: i64 = row.get(1)?;
4480                Ok((ns, usize::try_from(c).unwrap_or(0)))
4481            },
4482        )?;
4483        rows.collect::<rusqlite::Result<Vec<_>>>()?
4484    } else {
4485        let mut stmt = conn.prepare(
4486            "SELECT namespace, COUNT(*) FROM memories
4487             WHERE (expires_at IS NULL OR expires_at > ?1)
4488               AND (namespace = ?2 OR namespace LIKE ?3 ESCAPE '\\')
4489             GROUP BY namespace
4490             ORDER BY COUNT(*) DESC, namespace ASC
4491             LIMIT ?4",
4492        )?;
4493        let rows = stmt.query_map(
4494            params![
4495                now,
4496                prefix,
4497                descendant_pattern,
4498                i64::try_from(effective_limit).unwrap_or(i64::MAX)
4499            ],
4500            |row| {
4501                let ns: String = row.get(0)?;
4502                let c: i64 = row.get(1)?;
4503                Ok((ns, usize::try_from(c).unwrap_or(0)))
4504            },
4505        )?;
4506        rows.collect::<rusqlite::Result<Vec<_>>>()?
4507    };
4508
4509    let walked_count: usize = groups.iter().map(|(_, c)| *c).sum();
4510    let truncated = walked_count < total_count;
4511
4512    // Synthesize the root node. `name` is the trailing segment of the
4513    // prefix (or empty for the global root) so renderers can label it.
4514    let root_name = prefix.rsplit('/').next().unwrap_or("").to_string();
4515    let mut root = TaxonomyNode {
4516        namespace: prefix.to_string(),
4517        name: root_name,
4518        count: 0,
4519        subtree_count: 0,
4520        children: Vec::new(),
4521    };
4522
4523    for (ns, c) in groups {
4524        // Compute path segments below the prefix. When prefix is empty,
4525        // the whole namespace becomes the suffix; when ns == prefix
4526        // exactly, segments is empty and the count lands on the root.
4527        let suffix: &str = if prefix.is_empty() {
4528            ns.as_str()
4529        } else if ns == prefix {
4530            ""
4531        } else if ns.len() > prefix.len() + 1
4532            && ns.starts_with(prefix)
4533            && ns.as_bytes()[prefix.len()] == b'/'
4534        {
4535            &ns[prefix.len() + 1..]
4536        } else {
4537            // Defensive: SQL filter shouldn't return this, but skip rather
4538            // than panic if it ever does (e.g. a stray match like
4539            // "alphaone-sibling" matching prefix "alphaone").
4540            continue;
4541        };
4542        let all_segments: Vec<&str> = if suffix.is_empty() {
4543            Vec::new()
4544        } else {
4545            suffix.split('/').collect()
4546        };
4547        let take = all_segments.len().min(effective_depth);
4548        let used = &all_segments[..take];
4549        let exact_match_in_view = take == all_segments.len();
4550
4551        // Walk into the tree. Every ancestor's subtree_count grows by c
4552        // — including the root — and only the deepest visible node's
4553        // `count` does, and only when it represents the exact namespace
4554        // (not a clamped boundary).
4555        root.subtree_count += c;
4556        if used.is_empty() {
4557            root.count += c;
4558            continue;
4559        }
4560
4561        let mut path_so_far = prefix.to_string();
4562        let mut node = &mut root;
4563        for (i, seg) in used.iter().enumerate() {
4564            if !path_so_far.is_empty() {
4565                path_so_far.push('/');
4566            }
4567            path_so_far.push_str(seg);
4568            let pos = node.children.iter().position(|ch| ch.name == *seg);
4569            let idx = if let Some(p) = pos {
4570                p
4571            } else {
4572                node.children.push(TaxonomyNode {
4573                    namespace: path_so_far.clone(),
4574                    name: (*seg).to_string(),
4575                    count: 0,
4576                    subtree_count: 0,
4577                    children: Vec::new(),
4578                });
4579                node.children.len() - 1
4580            };
4581            node = &mut node.children[idx];
4582            node.subtree_count += c;
4583            let is_leaf = i + 1 == used.len();
4584            if is_leaf && exact_match_in_view {
4585                node.count += c;
4586            }
4587        }
4588    }
4589
4590    sort_taxonomy(&mut root);
4591
4592    Ok(Taxonomy {
4593        tree: root,
4594        total_count,
4595        truncated,
4596    })
4597}
4598
4599fn sort_taxonomy(node: &mut TaxonomyNode) {
4600    node.children.sort_by(|a, b| a.name.cmp(&b.name));
4601    for child in &mut node.children {
4602        sort_taxonomy(child);
4603    }
4604}
4605
4606/// v0.7.0 ARCH-2 followup (FX-C2-batch3) — backend-blind taxonomy
4607/// tree-folding helper. Lifted out of `get_taxonomy` so the Postgres
4608/// SAL adapter can share the exact same fold logic with the SQLite
4609/// adapter, holding the cross-backend wire shape byte-for-byte.
4610///
4611/// Inputs:
4612/// - `prefix`: the namespace prefix the caller queried (`""` = global root).
4613/// - `effective_depth`: clamped depth, already `min(MAX_NAMESPACE_DEPTH)`.
4614/// - `total_count`: full prefix total (NOT truncated by the row walk).
4615/// - `truncated`: caller-computed truncation flag.
4616/// - `groups`: walked `(namespace, count)` rows.
4617///
4618/// Returns the assembled [`Taxonomy`] tree with sorted children.
4619#[doc(hidden)]
4620pub fn fold_taxonomy_groups(
4621    prefix: &str,
4622    effective_depth: usize,
4623    total_count: usize,
4624    truncated: bool,
4625    groups: Vec<(String, usize)>,
4626) -> Taxonomy {
4627    let root_name = prefix.rsplit('/').next().unwrap_or("").to_string();
4628    let mut root = TaxonomyNode {
4629        namespace: prefix.to_string(),
4630        name: root_name,
4631        count: 0,
4632        subtree_count: 0,
4633        children: Vec::new(),
4634    };
4635
4636    for (ns, c) in groups {
4637        let suffix: &str = if prefix.is_empty() {
4638            ns.as_str()
4639        } else if ns == prefix {
4640            ""
4641        } else if ns.len() > prefix.len() + 1
4642            && ns.starts_with(prefix)
4643            && ns.as_bytes()[prefix.len()] == b'/'
4644        {
4645            &ns[prefix.len() + 1..]
4646        } else {
4647            continue;
4648        };
4649        let all_segments: Vec<&str> = if suffix.is_empty() {
4650            Vec::new()
4651        } else {
4652            suffix.split('/').collect()
4653        };
4654        let take = all_segments.len().min(effective_depth);
4655        let used = &all_segments[..take];
4656        let exact_match_in_view = take == all_segments.len();
4657
4658        root.subtree_count += c;
4659        if used.is_empty() {
4660            root.count += c;
4661            continue;
4662        }
4663
4664        let mut path_so_far = prefix.to_string();
4665        let mut node = &mut root;
4666        for (i, seg) in used.iter().enumerate() {
4667            if !path_so_far.is_empty() {
4668                path_so_far.push('/');
4669            }
4670            path_so_far.push_str(seg);
4671            let pos = node.children.iter().position(|ch| ch.name == *seg);
4672            let idx = if let Some(p) = pos {
4673                p
4674            } else {
4675                node.children.push(TaxonomyNode {
4676                    namespace: path_so_far.clone(),
4677                    name: (*seg).to_string(),
4678                    count: 0,
4679                    subtree_count: 0,
4680                    children: Vec::new(),
4681                });
4682                node.children.len() - 1
4683            };
4684            node = &mut node.children[idx];
4685            node.subtree_count += c;
4686            let is_leaf = i + 1 == used.len();
4687            if is_leaf && exact_match_in_view {
4688                node.count += c;
4689            }
4690        }
4691    }
4692
4693    sort_taxonomy(&mut root);
4694
4695    Taxonomy {
4696        tree: root,
4697        total_count,
4698        truncated,
4699    }
4700}
4701
4702/// Default row cap for memory list/search surfaces when the caller
4703/// passes no explicit limit. Mirrored by the postgres SAL adapter
4704/// (`src/store/postgres.rs::list_by_source_uri`) so both backends
4705/// page identically.
4706pub const LIST_DEFAULT_CAP: usize = 200;
4707
4708/// Hard ceiling on rows returned by the memory list/search surfaces.
4709/// One shared knob across the sqlite + postgres SAL adapters; same
4710/// family as `KG_TIMELINE_MAX_LIMIT` / `KG_QUERY_MAX_LIMIT`.
4711pub const LIST_MAX_LIMIT: usize = 1000;
4712
4713/// Post-clamp `usize → i64` conversion fallback for list/query limits.
4714/// Unreachable in practice (values are already clamped to at most
4715/// `LIST_MAX_LIMIT`, which always fits `i64`); kept as a named knob so
4716/// the fallback page size is explicit rather than magic.
4717pub const LIST_FALLBACK_LIMIT: usize = 100;
4718
4719/// Default page size for archive listings (HTTP `/api/v1/archive` and
4720/// MCP `memory_archive_list`) when the caller passes no explicit
4721/// `limit` — one knob so both surfaces page identically.
4722pub const ARCHIVE_DEFAULT_PAGE_LIMIT: usize = 50;
4723
4724/// Default page size for governance pending-action listings (MCP
4725/// `memory_pending_list` / subscription approval feeds).
4726pub const PENDING_DEFAULT_PAGE_LIMIT: usize = 100;
4727
4728/// Hard floor for duplicate-check threshold. Below this, anything can match
4729/// random unrelated content — refuse to honor the lookup so callers don't
4730/// silently get garbage merge suggestions.
4731pub const DUPLICATE_THRESHOLD_MIN: f32 = 0.5;
4732
4733/// Default cosine similarity threshold for declaring a candidate a
4734/// duplicate. Empirically tuned for MiniLM-L6-v2 (the local embedder):
4735/// near-paraphrases of the same memory tend to land at 0.88+, while
4736/// loosely related content sits well below 0.85. Callers can override.
4737pub const DUPLICATE_THRESHOLD_DEFAULT: f32 = 0.85;
4738
4739/// Find the nearest-neighbor live memory by cosine similarity (Pillar 2 /
4740/// Stream D — `memory_check_duplicate`).
4741///
4742/// Linear scan over `memories.embedding` rows that pass the live-row
4743/// (non-expired) gate and the optional namespace filter. The chosen
4744/// candidate is the highest-cosine match across the pool; the
4745/// caller-supplied `threshold` is used purely to set `is_duplicate` on
4746/// the response — the nearest neighbor is always returned (when the
4747/// pool is non-empty) so callers can show "closest existing memory was
4748/// X at similarity Y" even on a not-quite-duplicate.
4749///
4750/// Threshold is clamped at [`DUPLICATE_THRESHOLD_MIN`] so that wildly
4751/// permissive thresholds can't be used to dress unrelated content as a
4752/// merge suggestion.
4753///
4754/// Returns `(check, scanned)` where `scanned` is the count of embedded
4755/// candidates compared (useful for diagnostics).
4756pub fn check_duplicate(
4757    conn: &Connection,
4758    query_embedding: &[f32],
4759    namespace: Option<&str>,
4760    threshold: f32,
4761) -> Result<DuplicateCheck> {
4762    let effective_threshold = threshold.max(DUPLICATE_THRESHOLD_MIN);
4763    let now = Utc::now().to_rfc3339();
4764
4765    // SQL filter handles the live-row + optional namespace gate; the
4766    // cosine pass happens in Rust because SQLite has no native vector
4767    // op. We only pull rows with non-NULL embeddings — anything missing
4768    // an embedding can't be a near-duplicate by this definition.
4769    let rows: Vec<(String, String, String, Vec<u8>)> = if let Some(ns) = namespace {
4770        let mut stmt = conn.prepare(
4771            "SELECT id, title, namespace, embedding FROM memories
4772             WHERE embedding IS NOT NULL
4773               AND (expires_at IS NULL OR expires_at > ?1)
4774               AND namespace = ?2",
4775        )?;
4776        let mapped = stmt.query_map(params![now, ns], |row| {
4777            Ok((
4778                row.get::<_, String>(0)?,
4779                row.get::<_, String>(1)?,
4780                row.get::<_, String>(2)?,
4781                row.get::<_, Vec<u8>>(3)?,
4782            ))
4783        })?;
4784        mapped.collect::<rusqlite::Result<Vec<_>>>()?
4785    } else {
4786        let mut stmt = conn.prepare(
4787            "SELECT id, title, namespace, embedding FROM memories
4788             WHERE embedding IS NOT NULL
4789               AND (expires_at IS NULL OR expires_at > ?1)",
4790        )?;
4791        let mapped = stmt.query_map(params![now], |row| {
4792            Ok((
4793                row.get::<_, String>(0)?,
4794                row.get::<_, String>(1)?,
4795                row.get::<_, String>(2)?,
4796                row.get::<_, Vec<u8>>(3)?,
4797            ))
4798        })?;
4799        mapped.collect::<rusqlite::Result<Vec<_>>>()?
4800    };
4801
4802    let mut best: Option<DuplicateMatch> = None;
4803    let mut scanned: usize = 0;
4804    for (id, title, ns, bytes) in rows {
4805        if bytes.is_empty() {
4806            continue;
4807        }
4808        // v0.6.3.1 P2 — magic-byte aware decode. Malformed payloads
4809        // (anything other than headed-LE or legacy-LE) are skipped with
4810        // telemetry so a corrupted row can't poison duplicate detection.
4811        let candidate = match crate::embeddings::decode_embedding_blob(&bytes) {
4812            Ok(v) => v,
4813            Err(e) => {
4814                tracing::warn!(
4815                    memory_id = %id,
4816                    blob_len = bytes.len(),
4817                    error = %e,
4818                    "skipping duplicate-check candidate with malformed embedding"
4819                );
4820                continue;
4821            }
4822        };
4823        // Vectors of mismatched dimension would compute against a
4824        // truncated query (Embedder::cosine_similarity zips). Skip
4825        // rather than report a misleading similarity score.
4826        if candidate.len() != query_embedding.len() {
4827            tracing::warn!(
4828                memory_id = %id,
4829                expected = query_embedding.len(),
4830                got = candidate.len(),
4831                "skipping duplicate-check candidate with dimension mismatch"
4832            );
4833            continue;
4834        }
4835        let similarity =
4836            crate::embeddings::Embedder::cosine_similarity(query_embedding, &candidate);
4837        scanned += 1;
4838        let is_better = best.as_ref().is_none_or(|m| similarity > m.similarity);
4839        if is_better {
4840            best = Some(DuplicateMatch {
4841                id,
4842                title,
4843                namespace: ns,
4844                similarity,
4845            });
4846        }
4847    }
4848
4849    let is_duplicate = best
4850        .as_ref()
4851        .is_some_and(|m| m.similarity >= effective_threshold);
4852    Ok(DuplicateCheck {
4853        is_duplicate,
4854        threshold: effective_threshold,
4855        nearest: best,
4856        candidates_scanned: scanned,
4857    })
4858}
4859
4860/// Canonical hash used by [`check_duplicate_with_text`] to detect
4861/// byte-identical `title + content` pairs even when the embedding
4862/// pipeline (lower-casing, prefix tagging, etc.) prevents the cosine
4863/// similarity from saturating at 1.0.
4864///
4865/// The input is the *exact* text the MCP/HTTP layer hands to the
4866/// embedder — `crate::embeddings::embedding_document(title, content)` — and we hash its raw
4867/// UTF-8 bytes with no normalization. Lower-casing or whitespace
4868/// stripping at this layer would re-introduce the very ambiguity we
4869/// are trying to short-circuit (two semantically-identical strings
4870/// hashing to the same value but being substantively different in,
4871/// e.g., a code snippet that differs only in whitespace).
4872///
4873/// SHA-256 is the same primitive the audit/subscriptions/signed-events
4874/// layers already use, so callers don't have to reach for a new
4875/// dependency.
4876#[must_use]
4877pub fn canonical_content_hash(text: &str) -> [u8; 32] {
4878    use sha2::{Digest, Sha256};
4879    let mut hasher = Sha256::new();
4880    hasher.update(text.as_bytes());
4881    hasher.finalize().into()
4882}
4883
4884// ---------------------------------------------------------------------------
4885// v0.7.0 (issue #519) — proactive conflict detection on memory_store
4886// ---------------------------------------------------------------------------
4887
4888/// Cosine-similarity threshold above which a candidate is treated as a
4889/// near-duplicate for the purpose of [`proactive_conflict_check`].
4890///
4891/// Empirically tuned for the MiniLM-L6-v2 / Nomic embedder pair: rows
4892/// whose `(title, content)` paraphrase the query at this level are
4893/// already considered "the same memory" by the existing duplicate
4894/// machinery (`DUPLICATE_THRESHOLD_DEFAULT` sits at 0.85 for the
4895/// merge-suggestion surface). 0.95 is the stricter "this is the same
4896/// fact, restated" bar; combined with the textual contradiction signal
4897/// below, we surface only writes that proactively conflict with an
4898/// established near-duplicate.
4899///
4900/// **Known miss class (pre-existing; deliberately unchanged by the
4901/// #1579 A5 remediation):** genuine paraphrases can embed just BELOW
4902/// this bar — the P2-audit probe pair ("deadline is june 15" vs
4903/// "deadline is june 22" in otherwise-identical sentences) scored
4904/// 0.945 cosine on the release MiniLM and is therefore not detected.
4905/// Safe direction for an advisory gate (the write is ALLOWED; nothing
4906/// is wrongly refused); lowering the bar instead would re-open the
4907/// false-409 epidemic the
4908/// [`PROACTIVE_CONFLICT_CONTENT_JACCARD_FLOOR`] corroboration exists
4909/// to close. The deeper `detect_contradiction` tooling remains the
4910/// surface for sub-threshold contradictions.
4911pub const PROACTIVE_CONFLICT_SIM_THRESHOLD: f32 = 0.95;
4912
4913/// Top-K cap for the candidate pool inspected by
4914/// [`proactive_conflict_check`]. Bounded so the per-write cost is O(K)
4915/// rather than O(namespace_size).
4916pub const PROACTIVE_CONFLICT_TOP_K: usize = 5;
4917
4918/// #1579 A5 — row cap on the bounded fallback scan in
4919/// [`proactive_conflict_check`] (most-recently-updated rows first).
4920///
4921/// Pre-#1579 the check decoded + cosine-scored EVERY embedded live row
4922/// in the namespace per write — an O(N) scan that (under the HTTP
4923/// daemon's single-connection mutex) collapsed semantic-tier write
4924/// throughput to 0.3-1.7 rps in the P2 audit. The fallback path (used
4925/// when no fully-searchable HNSW index is available: keyword tier,
4926/// the async-boot warm window, CLI one-shots) now scans only the
4927/// `PROACTIVE_CONFLICT_SCAN_LIMIT` most-recently-updated candidates.
4928/// Recency ordering is the right prior for an advisory near-duplicate
4929/// gate: conflicting restatements cluster temporally (an agent
4930/// re-asserting a fact it just learned), and the indexed path (the
4931/// production semantic-tier route) covers the long tail. A miss here
4932/// only ALLOWS a write that deeper inspection might have refused —
4933/// never refuses a legitimate one — which is the safe direction for
4934/// an advisory check with a `force=true` bypass.
4935pub const PROACTIVE_CONFLICT_SCAN_LIMIT: usize = 1024;
4936
4937/// #1579 A5 — `k` requested from the HNSW index by
4938/// [`proactive_conflict_check_with_index`]. Deliberately larger than
4939/// [`PROACTIVE_CONFLICT_TOP_K`] because the index is global while the
4940/// conflict check is namespace-scoped: the namespace filter is applied
4941/// AFTER the ANN search (post-filter semantics), so foreign-namespace
4942/// hits consume slots. 32 gives the in-namespace pool ample headroom
4943/// (the ≥ 0.95 cosine gate means only near-identical vectors matter,
4944/// and > 32 near-identical foreign-namespace rows crowding out an
4945/// in-namespace conflict is a pathology the bounded fallback's
4946/// advisory contract already tolerates — see
4947/// [`PROACTIVE_CONFLICT_SCAN_LIMIT`]).
4948pub const PROACTIVE_CONFLICT_INDEX_K: usize = 32;
4949
4950/// #1579 A5 — minimum Jaccard token overlap between the incoming
4951/// `content` and a cosine-near-duplicate candidate's `content` for the
4952/// pair to be classified as a proactive conflict.
4953///
4954/// **Why this exists** (the P2 false-409 epidemic). The P2 perf audit
4955/// measured **81% of semantic-tier writes refused with 409** when a
4956/// loadtest wrote unique random-alphanumeric payloads: MiniLM-L6-v2
4957/// assigns ≥ 0.95 cosine to ~28% of PAIRS of unrelated 256-byte noise
4958/// documents (probe on the release model: pairwise min 0.44 / mean
4959/// 0.83 / max 0.97), so with a 1k-row namespace virtually every write
4960/// found SOME ≥ 0.95 "near-duplicate" — while a genuine paraphrase
4961/// pair ("deadline is june 15" vs "deadline is june 22" in identical
4962/// sentences) scored 0.945, BELOW the threshold. Embedding cosine
4963/// alone is therefore not sufficient evidence of "the same fact,
4964/// restated". The deterministic corroboration is lexical: a true
4965/// restatement shares vocabulary. We reuse the #1320 tokenizer
4966/// (lowercase, split on non-alphanumeric, stopword-strip — see
4967/// [`CONTRADICTION_TITLE_JACCARD_FLOOR`]) over the CONTENT bodies and
4968/// require this floor, which rejects the disjoint-token noise pairs
4969/// (Jaccard ≈ 0) while keeping real restatements (the june-15/june-22
4970/// pair scores 0.5).
4971pub const PROACTIVE_CONFLICT_CONTENT_JACCARD_FLOOR: f32 = 0.30;
4972
4973/// Result envelope returned by [`proactive_conflict_check`] when an
4974/// existing memory near-duplicates AND textually contradicts the
4975/// incoming write.
4976#[derive(Debug, Clone)]
4977pub struct ProactiveConflict {
4978    /// `id` of the existing memory the new write conflicts with.
4979    pub existing_id: String,
4980    /// Title of the existing memory (for diagnostic surfacing).
4981    pub existing_title: String,
4982    /// Cosine similarity (always `>= PROACTIVE_CONFLICT_SIM_THRESHOLD`
4983    /// in returned values).
4984    pub similarity: f32,
4985    /// Reason the candidate was classified as conflicting. Currently
4986    /// always `"near_duplicate_with_differing_content"`; future
4987    /// extensions (LLM-backed detector, negation-flip heuristic) can
4988    /// surface a different reason string here.
4989    pub reason: &'static str,
4990}
4991
4992/// v0.7.0 (issue #519) — proactive contradiction detection on the
4993/// `memory_store` write path.
4994///
4995/// Scans the top-`PROACTIVE_CONFLICT_TOP_K` most similar live memories
4996/// in the new memory's namespace (by cosine similarity over the
4997/// existing `memories.embedding` column) and returns the first match
4998/// whose similarity meets `PROACTIVE_CONFLICT_SIM_THRESHOLD` AND whose
4999/// stored `content` differs from the incoming `mem.content` exactly.
5000///
5001/// The "differs exactly" check is the deterministic substrate-layer
5002/// contradiction signal — a row that paraphrases the same fact at
5003/// ≥ 0.95 cosine but spells out a different content body is, by
5004/// construction, asserting a near-duplicate fact with a different
5005/// substantive payload (the LLM detector would call this a soft
5006/// contradiction; the substrate check calls it a near-duplicate with
5007/// differing content). Callers that want the full LLM-backed
5008/// `detect_contradiction` round-trip can layer it on top of the
5009/// proactive-check result; the substrate path stays LLM-independent so
5010/// it runs deterministically under `AI_MEMORY_NO_CONFIG=1` and in
5011/// every CI environment.
5012///
5013/// A `force=true` switch at the handler layer (MCP / CLI / HTTP)
5014/// bypasses this check entirely — see `src/mcp/tools/store.rs` and
5015/// `src/handlers/http.rs::create_memory`.
5016///
5017/// Returns:
5018/// * `Ok(None)` — no conflict detected; the caller may proceed with
5019///   the insert.
5020/// * `Ok(Some(ProactiveConflict))` — at least one candidate triggered
5021///   the near-duplicate-with-differing-content guard; the caller
5022///   should refuse the insert (and return an error envelope naming
5023///   `existing_id`) unless `force=true` was set.
5024///
5025/// # Errors
5026///
5027/// Bubbles rusqlite errors from the candidate-pool SELECT. The cosine
5028/// pass itself is in-memory and infallible (mismatched-dim candidates
5029/// are skipped with a tracing warn, mirroring `check_duplicate`).
5030pub fn proactive_conflict_check(
5031    conn: &Connection,
5032    mem: &Memory,
5033    query_embedding: &[f32],
5034) -> Result<Option<ProactiveConflict>> {
5035    if query_embedding.is_empty() {
5036        return Ok(None);
5037    }
5038    let now = Utc::now().to_rfc3339();
5039
5040    // Pull (id, title, content, embedding) for the live, in-namespace
5041    // pool. We restrict to the same namespace as the incoming write
5042    // because cross-namespace "contradictions" are not a substrate
5043    // concept (namespaces are deliberately isolated scopes); the
5044    // namespace-scoped check matches the `find_contradictions` /
5045    // `find_by_title_namespace` semantics already used by the
5046    // `OnConflict::Error` branch of `insert_with_conflict`.
5047    //
5048    // #1579 A5 — BOUNDED: most-recently-updated rows first, capped at
5049    // `PROACTIVE_CONFLICT_SCAN_LIMIT`. See the const for the recency
5050    // rationale and the advisory-miss contract. The unbounded
5051    // full-namespace decode+scan this replaces was the P2-measured
5052    // write-throughput collapse (0.3-1.7 rps under the HTTP mutex).
5053    let mut stmt = conn.prepare(
5054        "SELECT id, title, content, embedding FROM memories
5055         WHERE embedding IS NOT NULL
5056           AND (expires_at IS NULL OR expires_at > ?1)
5057           AND namespace = ?2
5058         ORDER BY updated_at DESC
5059         LIMIT ?3",
5060    )?;
5061    let rows: Vec<(String, String, String, Vec<u8>)> = stmt
5062        .query_map(
5063            params![
5064                now,
5065                &mem.namespace,
5066                i64::try_from(PROACTIVE_CONFLICT_SCAN_LIMIT).unwrap_or(i64::MAX)
5067            ],
5068            |row| {
5069                Ok((
5070                    row.get::<_, String>(0)?,
5071                    row.get::<_, String>(1)?,
5072                    row.get::<_, String>(2)?,
5073                    row.get::<_, Vec<u8>>(3)?,
5074                ))
5075            },
5076        )?
5077        .collect::<rusqlite::Result<Vec<_>>>()?;
5078
5079    Ok(proactive_conflict_verdict(mem, query_embedding, rows))
5080}
5081
5082/// #1579 A5 — HNSW-routed entry point for the proactive conflict
5083/// check. This is the production write-path dispatcher:
5084///
5085/// * When a [`crate::hnsw::VectorIndex`] is available AND fully
5086///   searchable (its graph covers `all_entries` — see
5087///   [`crate::hnsw::VectorIndex::is_fully_searchable`]), the candidate
5088///   pool comes from an O(log N) ANN query instead of the table scan;
5089///   candidates are then re-verified against the DB (live, same
5090///   namespace, EXACT cosine recomputed from the stored blob — the
5091///   index's distance is approximate and assumes L2-normalised
5092///   vectors, so the stored-blob recompute keeps the decision function
5093///   byte-equal to the scan path).
5094/// * Otherwise (no index at keyword tier, the async-boot warm window
5095///   before the first graph swap, CLI one-shots below the build
5096///   threshold) it falls back to the BOUNDED recency scan in
5097///   [`proactive_conflict_check`]. An EMPTY index also routes to the
5098///   fallback (#1579 QC): emptiness makes `is_fully_searchable`
5099///   vacuously true, but during the async-boot LOAD phase (daemon
5100///   bound with `VectorIndex::empty()`, boot loader still reading the
5101///   stored embeddings, `seed_entries` not yet landed) it says
5102///   nothing about what the DB holds — consulting it would silently
5103///   SKIP the check instead of degrading to the documented bounded
5104///   scan. On a genuinely empty corpus the fallback scan matches zero
5105///   rows, so the routing is behaviour-neutral outside that window.
5106///
5107/// Known under-detection windows, both safe-direction (a missed
5108/// conflict ALLOWS a write; the check never wrongly refuses):
5109/// rows evicted from the index's 100k entry cap are invisible to the
5110/// ANN query, and a warm-window write beyond the bounded scan's
5111/// recency horizon is invisible to the fallback. Callers that need a
5112/// hard guarantee already have the `(title, namespace)` SQL conflict
5113/// gate; this check is the advisory #519 layer with a `force=true`
5114/// bypass.
5115///
5116/// # Errors
5117///
5118/// Bubbles rusqlite errors from the candidate SELECTs (same contract
5119/// as [`proactive_conflict_check`]).
5120pub fn proactive_conflict_check_with_index(
5121    conn: &Connection,
5122    mem: &Memory,
5123    query_embedding: &[f32],
5124    vector_index: Option<&crate::hnsw::VectorIndex>,
5125) -> Result<Option<ProactiveConflict>> {
5126    if query_embedding.is_empty() {
5127        return Ok(None);
5128    }
5129    if let Some(idx) = vector_index
5130        && idx.is_fully_searchable()
5131        // #1579 QC — an empty index is vacuously fully-searchable but
5132        // proves nothing about the DB during the async-boot LOAD
5133        // phase; see the doc comment above and
5134        // `crate::hnsw::VectorIndex::is_empty`.
5135        && !idx.is_empty()
5136    {
5137        let hits = idx.search(query_embedding, PROACTIVE_CONFLICT_INDEX_K);
5138        let ids: Vec<String> = hits.into_iter().map(|h| h.id).collect();
5139        return proactive_conflict_check_candidates(conn, mem, query_embedding, &ids);
5140    }
5141    tracing::trace!(
5142        target: "proactive_conflict",
5143        namespace = %mem.namespace,
5144        "no fully-searchable (or empty) vector index — bounded recency-scan fallback (#1579 A5)"
5145    );
5146    proactive_conflict_check(conn, mem, query_embedding)
5147}
5148
5149/// #1579 A5 — verify an ANN-derived candidate id list against the DB
5150/// and apply the conflict verdict. Fetches only the named rows (point
5151/// lookups by PK), re-applies the live/namespace filters the table
5152/// scan used, and recomputes EXACT cosine from the stored embedding
5153/// blob so the decision function is identical to the scan path.
5154///
5155/// Public so the HTTP create handler (which holds the vector index
5156/// behind an async mutex and must run the ANN search BEFORE taking
5157/// the DB lock) can split the search from the verification.
5158///
5159/// # Errors
5160///
5161/// Bubbles rusqlite errors from the `IN (...)` candidate SELECT.
5162pub fn proactive_conflict_check_candidates(
5163    conn: &Connection,
5164    mem: &Memory,
5165    query_embedding: &[f32],
5166    candidate_ids: &[String],
5167) -> Result<Option<ProactiveConflict>> {
5168    if query_embedding.is_empty() || candidate_ids.is_empty() {
5169        return Ok(None);
5170    }
5171    let now = Utc::now().to_rfc3339();
5172    let placeholders = std::iter::repeat_n("?", candidate_ids.len())
5173        .collect::<Vec<_>>()
5174        .join(",");
5175    let sql = format!(
5176        "SELECT id, title, content, embedding FROM memories
5177         WHERE id IN ({placeholders})
5178           AND embedding IS NOT NULL
5179           AND (expires_at IS NULL OR expires_at > ?{p_now})
5180           AND namespace = ?{p_ns}",
5181        p_now = candidate_ids.len() + 1,
5182        p_ns = candidate_ids.len() + 2,
5183    );
5184    let mut stmt = conn.prepare(&sql)?;
5185    let bind_iter = candidate_ids
5186        .iter()
5187        .map(String::as_str)
5188        .chain([now.as_str(), mem.namespace.as_str()]);
5189    let rows: Vec<(String, String, String, Vec<u8>)> = stmt
5190        .query_map(rusqlite::params_from_iter(bind_iter), |row| {
5191            Ok((
5192                row.get::<_, String>(0)?,
5193                row.get::<_, String>(1)?,
5194                row.get::<_, String>(2)?,
5195                row.get::<_, Vec<u8>>(3)?,
5196            ))
5197        })?
5198        .collect::<rusqlite::Result<Vec<_>>>()?;
5199
5200    Ok(proactive_conflict_verdict(mem, query_embedding, rows))
5201}
5202
5203/// #1579 A5 — shared scoring + verdict tail of the proactive conflict
5204/// check. Decodes candidate blobs, cosine-scores against the query,
5205/// sorts descending, and applies the conflict rule to the top
5206/// [`PROACTIVE_CONFLICT_TOP_K`]:
5207///
5208///   near-duplicate (≥ [`PROACTIVE_CONFLICT_SIM_THRESHOLD`] cosine)
5209///   AND content differs
5210///   AND content token-overlap ≥ [`PROACTIVE_CONFLICT_CONTENT_JACCARD_FLOOR`]
5211///
5212/// The Jaccard corroboration is the #1579 false-409 fix — see the
5213/// floor const for the P2 evidence (81% of semantic-tier loadtest
5214/// writes refused because MiniLM clusters unrelated noise documents
5215/// above 0.95 cosine).
5216fn proactive_conflict_verdict(
5217    mem: &Memory,
5218    query_embedding: &[f32],
5219    rows: Vec<(String, String, String, Vec<u8>)>,
5220) -> Option<ProactiveConflict> {
5221    // Score every candidate and keep the top-K by cosine.
5222    let mut scored: Vec<(f32, String, String, String)> = Vec::with_capacity(rows.len());
5223    for (id, title, content, blob) in rows {
5224        if blob.is_empty() {
5225            continue;
5226        }
5227        // Skip self (same id) — happens when a re-store reuses the
5228        // existing memory id (NHI replay path).
5229        if id == mem.id {
5230            continue;
5231        }
5232        let candidate = match crate::embeddings::decode_embedding_blob(&blob) {
5233            Ok(v) => v,
5234            Err(e) => {
5235                tracing::warn!(
5236                    memory_id = %id,
5237                    blob_len = blob.len(),
5238                    error = %e,
5239                    "proactive_conflict_check: skipping candidate with malformed embedding"
5240                );
5241                continue;
5242            }
5243        };
5244        if candidate.len() != query_embedding.len() {
5245            tracing::warn!(
5246                memory_id = %id,
5247                expected = query_embedding.len(),
5248                got = candidate.len(),
5249                "proactive_conflict_check: skipping candidate with dimension mismatch"
5250            );
5251            continue;
5252        }
5253        let sim = crate::embeddings::Embedder::cosine_similarity(query_embedding, &candidate);
5254        scored.push((sim, id, title, content));
5255    }
5256    // Sort descending by similarity so we visit the strongest matches
5257    // first; bail at the top-K cap.
5258    scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
5259    let incoming_tokens = contradiction_title_tokens(&mem.content);
5260    for (sim, id, title, content) in scored.into_iter().take(PROACTIVE_CONFLICT_TOP_K) {
5261        if sim < PROACTIVE_CONFLICT_SIM_THRESHOLD {
5262            // The top-K cap is sorted descending — once we drop below
5263            // the threshold we can't find any conflicts in the tail.
5264            break;
5265        }
5266        // Deterministic textual contradiction signal: the candidate
5267        // is near-duplicate (≥ 0.95 cosine) AND its content body
5268        // differs from the incoming write's content. Same-content
5269        // near-duplicates are not contradictions; they are the upsert
5270        // happy-path that the SQL `ON CONFLICT(title, namespace)`
5271        // already handles.
5272        //
5273        // #1579 A5 — lexical corroboration: a true "same fact,
5274        // restated" pair shares vocabulary. Without this floor,
5275        // unrelated documents that the embedder happens to cluster
5276        // above 0.95 cosine (P2-measured on random-alphanumeric
5277        // payloads) produced the 81% false-409 epidemic.
5278        if content != mem.content
5279            && contradiction_title_jaccard(&incoming_tokens, &contradiction_title_tokens(&content))
5280                >= PROACTIVE_CONFLICT_CONTENT_JACCARD_FLOOR
5281        {
5282            return Some(ProactiveConflict {
5283                existing_id: id,
5284                existing_title: title,
5285                similarity: sim,
5286                reason: "near_duplicate_with_differing_content",
5287            });
5288        }
5289    }
5290    None
5291}
5292
5293/// v0.7.0 F18 — exact-match-aware nearest-neighbor duplicate check.
5294///
5295/// Wraps [`check_duplicate`] with a SHA-256 short-circuit on the raw
5296/// `query_text` so byte-identical content scores `similarity = 1.0`
5297/// even when the embedding pipeline (Nomic prefixes, casing, whitespace
5298/// normalization) would otherwise cap cosine similarity at ~0.92 for
5299/// the same string. Round-2 evidence: storing content `C` and then
5300/// asking `check_duplicate` about `C` returned similarity 0.92 because
5301/// the stored embedding was prefixed with `search_document:` while the
5302/// query embedding got `search_query:` — mismatched prefixes prevent
5303/// cosine from saturating at 1.0.
5304///
5305/// Algorithm:
5306/// 1. Compute `H_query = SHA-256(query_text)`.
5307/// 2. For each live, namespace-matching candidate, compute
5308///    `H_row = SHA-256(format!("{row.title} {row.content}"))` and
5309///    compare. The first match wins and is returned with
5310///    `similarity = 1.0`, `is_duplicate = true`.
5311/// 3. If no hash match is found, fall through to embedding-based
5312///    cosine similarity (i.e. delegate to [`check_duplicate`]).
5313///
5314/// The hash compare is computed per call (no schema migration); it
5315/// scales linearly in the candidate pool, but so does the existing
5316/// embedding loop, so worst-case asymptotics are unchanged. A future
5317/// `content_hash` column on `memories` would make this O(1) per
5318/// candidate via an index — flagged for a separate migration PR.
5319///
5320/// `query_text` MUST be the exact string used to produce
5321/// `query_embedding` (typically `crate::embeddings::embedding_document(title, content)`).
5322/// Passing a different string is not a correctness bug — the function
5323/// just falls through to the embedding-similarity path — but it
5324/// defeats the point of the short-circuit.
5325pub fn check_duplicate_with_text(
5326    conn: &Connection,
5327    query_embedding: &[f32],
5328    query_text: &str,
5329    namespace: Option<&str>,
5330    threshold: f32,
5331) -> Result<DuplicateCheck> {
5332    let effective_threshold = threshold.max(DUPLICATE_THRESHOLD_MIN);
5333    let now = Utc::now().to_rfc3339();
5334    let query_hash = canonical_content_hash(query_text);
5335
5336    // Pull (id, title, namespace, content) for the live candidate pool.
5337    // We keep the same gates as `check_duplicate` (live row, optional
5338    // namespace) but do NOT require a non-NULL embedding here — an
5339    // identical row with a missing embedding is still a valid exact-
5340    // match short-circuit candidate.
5341    let rows: Vec<(String, String, String, String)> = if let Some(ns) = namespace {
5342        let mut stmt = conn.prepare(
5343            "SELECT id, title, namespace, content FROM memories
5344             WHERE (expires_at IS NULL OR expires_at > ?1)
5345               AND namespace = ?2",
5346        )?;
5347        let mapped = stmt.query_map(params![now, ns], |row| {
5348            Ok((
5349                row.get::<_, String>(0)?,
5350                row.get::<_, String>(1)?,
5351                row.get::<_, String>(2)?,
5352                row.get::<_, String>(3)?,
5353            ))
5354        })?;
5355        mapped.collect::<rusqlite::Result<Vec<_>>>()?
5356    } else {
5357        let mut stmt = conn.prepare(
5358            "SELECT id, title, namespace, content FROM memories
5359             WHERE (expires_at IS NULL OR expires_at > ?1)",
5360        )?;
5361        let mapped = stmt.query_map(params![now], |row| {
5362            Ok((
5363                row.get::<_, String>(0)?,
5364                row.get::<_, String>(1)?,
5365                row.get::<_, String>(2)?,
5366                row.get::<_, String>(3)?,
5367            ))
5368        })?;
5369        mapped.collect::<rusqlite::Result<Vec<_>>>()?
5370    };
5371
5372    // Phase 1 — SHA-256 exact-match short-circuit. We hash the same
5373    // `crate::embeddings::embedding_document(title, content)` shape the MCP/HTTP layers use to
5374    // build the embedding text so an identical store-then-check sequence
5375    // surfaces as similarity=1.0 even when the embedding pipeline would
5376    // otherwise cap at ~0.92 due to prefix asymmetry.
5377    for (id, title, ns, content) in &rows {
5378        let row_text = crate::embeddings::embedding_document(title, content);
5379        let row_hash = canonical_content_hash(&row_text);
5380        if row_hash == query_hash {
5381            return Ok(DuplicateCheck {
5382                is_duplicate: true,
5383                threshold: effective_threshold,
5384                nearest: Some(DuplicateMatch {
5385                    id: id.clone(),
5386                    title: title.clone(),
5387                    namespace: ns.clone(),
5388                    similarity: 1.0,
5389                }),
5390                // We scanned every row through the hash compare to find
5391                // the match — report that, not just the first one.
5392                candidates_scanned: rows.len(),
5393            });
5394        }
5395    }
5396
5397    // Phase 2 — no hash match; fall back to the embedding-based
5398    // nearest-neighbor scan so callers still get the "closest existing
5399    // memory was X at similarity Y" signal on near-but-not-exact hits.
5400    check_duplicate(conn, query_embedding, namespace, threshold)
5401}
5402
5403/// Register an entity (canonical name + aliases) under a namespace
5404/// (Pillar 2 / Stream B).
5405///
5406/// An entity is stored as a long-tier memory:
5407/// - `title = canonical_name`
5408/// - `namespace = namespace`
5409/// - `tags` includes [`ENTITY_TAG`]
5410/// - `metadata.kind = "entity"` (so the resolver can never confuse an
5411///   entity with a regular memory that happens to share a title)
5412///
5413/// Aliases live in the `entity_aliases` side table keyed by
5414/// `(entity_id, alias)`.
5415///
5416/// **Idempotency:** if an entity with this `(canonical_name, namespace)`
5417/// already exists, its ID is reused and `aliases` are merged with
5418/// `INSERT OR IGNORE`. The returned [`EntityRegistration::created`] is
5419/// `false` in that case.
5420///
5421/// **Collision detection:** if a non-entity memory already occupies
5422/// `(title=canonical_name, namespace=namespace)`, the call errors
5423/// rather than silently upgrading it (the upsert path on `insert`
5424/// would otherwise overwrite the existing row's content/tags). Callers
5425/// must rename the entity or its colliding memory.
5426///
5427/// `extra_metadata` is merged into the entity memory's metadata; any
5428/// caller-supplied `kind` field is overwritten with `"entity"` and
5429/// `agent_id` is stamped from the caller (NHI provenance) when
5430/// `extra_metadata` does not already specify one.
5431pub fn entity_register(
5432    conn: &Connection,
5433    canonical_name: &str,
5434    namespace: &str,
5435    aliases: &[String],
5436    extra_metadata: &serde_json::Value,
5437    agent_id: Option<&str>,
5438) -> Result<crate::models::EntityRegistration> {
5439    use crate::models::{ENTITY_KIND, ENTITY_TAG, EntityRegistration};
5440
5441    // Look up an existing entity in this namespace by canonical_name +
5442    // metadata.kind. If a non-entity memory occupies the same
5443    // (title, namespace), surface a hard error instead of upserting.
5444    let existing_id: Option<String> = match conn.query_row(
5445        "SELECT id FROM memories
5446         WHERE namespace = ?1 AND title = ?2
5447           AND COALESCE(json_extract(metadata, '$.kind'), '') = ?3",
5448        params![namespace, canonical_name, ENTITY_KIND],
5449        |r| r.get::<_, String>(0),
5450    ) {
5451        Ok(id) => Some(id),
5452        Err(rusqlite::Error::QueryReturnedNoRows) => None,
5453        Err(e) => return Err(e.into()),
5454    };
5455
5456    let (entity_id, created) = if let Some(id) = existing_id {
5457        (id, false)
5458    } else {
5459        let collision: Option<String> = match conn.query_row(
5460            "SELECT id FROM memories
5461             WHERE namespace = ?1 AND title = ?2
5462               AND COALESCE(json_extract(metadata, '$.kind'), '') != ?3",
5463            params![namespace, canonical_name, ENTITY_KIND],
5464            |r| r.get::<_, String>(0),
5465        ) {
5466            Ok(id) => Some(id),
5467            Err(rusqlite::Error::QueryReturnedNoRows) => None,
5468            Err(e) => return Err(e.into()),
5469        };
5470        if collision.is_some() {
5471            // #962 typed envelope — UniqueConflict (409).
5472            return Err(anyhow::Error::new(StorageError::UniqueConflict {
5473                reason: format!(
5474                    "entity_register: title '{canonical_name}' in namespace '{namespace}' is already used by a non-entity memory"
5475                ),
5476            }));
5477        }
5478
5479        // Build metadata: caller-supplied object merged, kind forced
5480        // to "entity", agent_id preserved from caller when not set.
5481        let mut meta_map = match extra_metadata {
5482            serde_json::Value::Object(m) => m.clone(),
5483            _ => serde_json::Map::new(),
5484        };
5485        meta_map.insert(
5486            "kind".to_string(),
5487            serde_json::Value::String(ENTITY_KIND.to_string()),
5488        );
5489        if let Some(a) = agent_id {
5490            meta_map
5491                .entry("agent_id".to_string())
5492                .or_insert(serde_json::Value::String(a.to_string()));
5493        }
5494        let metadata = serde_json::Value::Object(meta_map);
5495
5496        let now = Utc::now().to_rfc3339();
5497        let mem = Memory {
5498            id: uuid::Uuid::new_v4().to_string(),
5499            tier: Tier::Long,
5500            namespace: namespace.to_string(),
5501            title: canonical_name.to_string(),
5502            content: canonical_name.to_string(),
5503            tags: vec![ENTITY_TAG.to_string()],
5504            priority: 7,
5505            confidence: 1.0,
5506            source: "api".to_string(),
5507            access_count: 0,
5508            created_at: now.clone(),
5509            updated_at: now,
5510            last_accessed_at: None,
5511            expires_at: None,
5512            metadata,
5513            reflection_depth: 0,
5514            memory_kind: crate::models::MemoryKind::Observation,
5515            entity_id: None,
5516            persona_version: None,
5517            citations: Vec::new(),
5518            source_uri: None,
5519            source_span: None,
5520            confidence_source: ConfidenceSource::CallerProvided,
5521            confidence_signals: None,
5522            confidence_decayed_at: None,
5523            version: 1,
5524        };
5525        let id = insert(conn, &mem).context("insert entity memory")?;
5526        (id, true)
5527    };
5528
5529    let now = Utc::now().to_rfc3339();
5530    {
5531        let mut stmt = conn.prepare(
5532            "INSERT OR IGNORE INTO entity_aliases (entity_id, alias, created_at)
5533             VALUES (?1, ?2, ?3)",
5534        )?;
5535        // canonical_name is always reachable via entity_get_by_alias.
5536        // Without this row, registering an entity with no aliases makes
5537        // it unreachable by name (NHI-P3-T2).
5538        stmt.execute(params![entity_id, canonical_name, now])?;
5539        for alias in aliases {
5540            let trimmed = alias.trim();
5541            if trimmed.is_empty() || trimmed == canonical_name {
5542                continue;
5543            }
5544            stmt.execute(params![entity_id, trimmed, now])?;
5545        }
5546    }
5547
5548    let aliases_out = list_entity_aliases(conn, &entity_id)?;
5549
5550    Ok(EntityRegistration {
5551        entity_id,
5552        canonical_name: canonical_name.to_string(),
5553        namespace: namespace.to_string(),
5554        aliases: aliases_out,
5555        created,
5556    })
5557}
5558
5559/// Resolve an alias to its registered entity (Pillar 2 / Stream B).
5560///
5561/// When `namespace` is `Some`, only entities in that namespace are
5562/// considered. When `None`, all namespaces are searched and the
5563/// most-recently-created matching entity wins (deterministic
5564/// disambiguation when the same alias was registered in multiple
5565/// namespaces).
5566///
5567/// Returns `Ok(None)` if no entity claims this alias under the given
5568/// filter. Returns the full alias set for the resolved entity.
5569pub fn entity_get_by_alias(
5570    conn: &Connection,
5571    alias: &str,
5572    namespace: Option<&str>,
5573) -> Result<Option<crate::models::EntityRecord>> {
5574    use crate::models::{ENTITY_KIND, EntityRecord};
5575
5576    let trimmed = alias.trim();
5577    if trimmed.is_empty() {
5578        return Ok(None);
5579    }
5580
5581    let row: std::result::Result<(String, String, String), rusqlite::Error> =
5582        if let Some(ns) = namespace {
5583            conn.query_row(
5584                "SELECT m.id, m.title, m.namespace
5585                 FROM entity_aliases ea
5586                 JOIN memories m ON m.id = ea.entity_id
5587                 WHERE ea.alias = ?1
5588                   AND m.namespace = ?2
5589                   AND COALESCE(json_extract(m.metadata, '$.kind'), '') = ?3
5590                 ORDER BY m.created_at DESC
5591                 LIMIT 1",
5592                params![trimmed, ns, ENTITY_KIND],
5593                |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
5594            )
5595        } else {
5596            conn.query_row(
5597                "SELECT m.id, m.title, m.namespace
5598                 FROM entity_aliases ea
5599                 JOIN memories m ON m.id = ea.entity_id
5600                 WHERE ea.alias = ?1
5601                   AND COALESCE(json_extract(m.metadata, '$.kind'), '') = ?2
5602                 ORDER BY m.created_at DESC
5603                 LIMIT 1",
5604                params![trimmed, ENTITY_KIND],
5605                |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
5606            )
5607        };
5608
5609    let (entity_id, canonical_name, ns) = match row {
5610        Ok(t) => t,
5611        Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
5612        Err(e) => return Err(e.into()),
5613    };
5614
5615    let aliases = list_entity_aliases(conn, &entity_id)?;
5616    Ok(Some(EntityRecord {
5617        entity_id,
5618        canonical_name,
5619        namespace: ns,
5620        aliases,
5621    }))
5622}
5623
5624/// Default cap on rows returned by `kg_timeline` when the caller does
5625/// not specify one (Pillar 2 / Stream C). Sized to fit a reasonable
5626/// agent context window without paging — callers needing more should
5627/// pass an explicit limit.
5628pub const KG_TIMELINE_DEFAULT_LIMIT: usize = 200;
5629
5630/// Hard ceiling on `kg_timeline` rows. Matches the existing list/recall
5631/// caps to keep the timeline bounded against pathological entities.
5632pub const KG_TIMELINE_MAX_LIMIT: usize = 1000;
5633
5634/// Ordered fact timeline for an entity (Pillar 2 / Stream C —
5635/// `memory_kg_timeline`). Returns outbound assertions from
5636/// `source_id`, ordered by `valid_from ASC` and tie-broken by
5637/// `created_at ASC` for deterministic display.
5638///
5639/// Filters:
5640/// - `since` (RFC3339, inclusive): drop events with `valid_from < since`
5641/// - `until` (RFC3339, inclusive): drop events with `valid_from > until`
5642/// - `limit`: row cap, clamped to [1, [`KG_TIMELINE_MAX_LIMIT`]]
5643///
5644/// Rows with NULL `valid_from` are excluded — a link without a
5645/// valid-from anchor cannot be ordered on the timeline. The schema-v15
5646/// migration backfilled legacy rows to `created_at`, and the `link()`
5647/// path stamps the column on every new insert, so this is a hard
5648/// guarantee for current code; the explicit `IS NOT NULL` guard exists
5649/// to keep external writes (`store/sqlite.rs`, custom migrations) from
5650/// silently producing invisible links.
5651///
5652/// Cross-namespace by design: timelines often span the same canonical
5653/// entity asserted by agents in different namespaces. Callers can
5654/// post-filter by `target_namespace` if they need a namespace-scoped
5655/// view.
5656///
5657/// v0.7 AGE acceleration onramp (charter §"Stream C" bullet 4). When
5658/// the v0.7 SAL ships with Apache AGE, the equivalent property-graph
5659/// query is:
5660///
5661/// ```cypher
5662/// MATCH (s {id: $source_id})-[r {valid_from IS NOT NULL,
5663///        valid_from >= $since, valid_from <= $until}]->(t)
5664/// WHERE t.id <> s.id  // exclude self-loops
5665/// RETURN t.id, r.relation, r.valid_from, r.valid_until, r.observed_by
5666/// ORDER BY r.valid_from ASC, r.created_at ASC
5667/// LIMIT $limit
5668/// ```
5669///
5670/// Stub left here per charter intent so the v0.7 migration has a 1:1
5671/// reference query.
5672pub fn kg_timeline(
5673    conn: &Connection,
5674    source_id: &str,
5675    since: Option<&str>,
5676    until: Option<&str>,
5677    limit: Option<usize>,
5678) -> Result<Vec<crate::models::KgTimelineEvent>> {
5679    use crate::models::KgTimelineEvent;
5680
5681    let cap = limit
5682        .unwrap_or(KG_TIMELINE_DEFAULT_LIMIT)
5683        .clamp(1, KG_TIMELINE_MAX_LIMIT);
5684
5685    // Compose the predicate dynamically for `since` / `until`. Bind
5686    // values are appended in the same order so the placeholders line up.
5687    let mut sql = String::from(
5688        "SELECT ml.target_id, ml.relation, ml.valid_from, ml.valid_until,
5689                ml.observed_by, m.title, m.namespace, ml.created_at
5690         FROM memory_links ml
5691         JOIN memories m ON m.id = ml.target_id
5692         WHERE ml.source_id = ?1
5693           AND ml.valid_from IS NOT NULL",
5694    );
5695    let mut binds: Vec<Box<dyn rusqlite::ToSql>> = vec![Box::new(source_id.to_string())];
5696    if let Some(s) = since {
5697        sql.push_str(" AND ml.valid_from >= ?");
5698        sql.push_str(&(binds.len() + 1).to_string());
5699        binds.push(Box::new(s.to_string()));
5700    }
5701    if let Some(u) = until {
5702        sql.push_str(" AND ml.valid_from <= ?");
5703        sql.push_str(&(binds.len() + 1).to_string());
5704        binds.push(Box::new(u.to_string()));
5705    }
5706    sql.push_str(" ORDER BY ml.valid_from ASC, ml.created_at ASC LIMIT ?");
5707    sql.push_str(&(binds.len() + 1).to_string());
5708    binds.push(Box::new(i64::try_from(cap).unwrap_or(i64::MAX)));
5709
5710    let mut stmt = conn.prepare(&sql)?;
5711    let bind_refs: Vec<&dyn rusqlite::ToSql> = binds.iter().map(AsRef::as_ref).collect();
5712    let rows = stmt.query_map(rusqlite::params_from_iter(bind_refs), |row| {
5713        Ok(KgTimelineEvent {
5714            target_id: row.get(0)?,
5715            relation: row.get(1)?,
5716            valid_from: row.get(2)?,
5717            valid_until: row.get(3)?,
5718            observed_by: row.get(4)?,
5719            title: row.get(5)?,
5720            target_namespace: row.get(6)?,
5721        })
5722    })?;
5723    rows.collect::<rusqlite::Result<Vec<_>>>()
5724        .map_err(Into::into)
5725}
5726
5727/// Outcome of [`invalidate_link`] (Pillar 2 / Stream C —
5728/// `memory_kg_invalidate`). `valid_until` is the timestamp now stored on
5729/// the link; `previous_valid_until` is the prior value, or `None` if
5730/// this was the first invalidation. Callers can use the prior value to
5731/// distinguish a fresh supersession from an idempotent retry.
5732#[derive(Debug, Clone, PartialEq, Eq)]
5733pub struct InvalidateResult {
5734    pub valid_until: String,
5735    pub previous_valid_until: Option<String>,
5736}
5737
5738/// Mark a KG link as superseded by setting its `valid_until` column
5739/// (Pillar 2 / Stream C — `memory_kg_invalidate`). Returns `Ok(None)`
5740/// when the `(source_id, target_id, relation)` triple does not match an
5741/// existing link. The supplied `valid_until` defaults to the current
5742/// wall-clock time in RFC3339 form when omitted; callers needing
5743/// historical or future supersession can pass an explicit value.
5744///
5745/// Idempotent: calling repeatedly overwrites the prior `valid_until`
5746/// (the prior value is returned in `previous_valid_until` so callers
5747/// can detect the overwrite). The schema does not yet carry an audit
5748/// column for the supersession reason; that arrives with v0.7
5749/// attestation. Until then, callers should record the rationale in
5750/// their own logs or a paired memory.
5751///
5752/// # v0.7.0 #628 H5 — signed-row preservation
5753///
5754/// `valid_until` is one of the six fields the H2 outbound signer
5755/// commits to (see [`crate::identity::sign::SignableLink`]). Mutating
5756/// it on a previously self-signed link silently flips every future
5757/// `memory_verify` to `signature_verified=false / attest_level=unsigned`
5758/// — legitimate supersession would be indistinguishable from
5759/// tampering on the wire. To preserve the audit chain we:
5760///
5761/// 1. NULL the `signature` column (and reset `attest_level` to
5762///    `"unsigned"`) so a future verify reports an honest "no
5763///    signature on this row" rather than a misleading "signature
5764///    mismatch".
5765/// 2. Append a `memory_link.invalidated` row to `signed_events` whose
5766///    `payload_hash` binds to the post-supersession canonical CBOR —
5767///    the auditor can replay both the original `memory_link.created`
5768///    row AND the matching `memory_link.invalidated` row to prove the
5769///    supersession was an intentional act by the same agent.
5770///
5771/// The audit append is best-effort: if the `signed_events` write
5772/// fails (vanishingly unlikely outside disk-full / schema-drift
5773/// scenarios), the supersession still persists and the failure is
5774/// surfaced in `tracing::warn!`. Cratering the supersession on an
5775/// audit-write failure would punish the legitimate caller for a
5776/// substrate problem they cannot fix.
5777pub fn invalidate_link(
5778    conn: &Connection,
5779    source_id: &str,
5780    target_id: &str,
5781    relation: &str,
5782    valid_until: Option<&str>,
5783) -> Result<Option<InvalidateResult>> {
5784    let stamp = valid_until.map_or_else(|| Utc::now().to_rfc3339(), str::to_string);
5785
5786    // P2 (#628 agent-3 follow-up): wrap the SELECT-then-UPDATE-then-
5787    // audit-INSERT in a single `BEGIN IMMEDIATE` transaction. Without
5788    // this, a daemon crash between the UPDATE (which clears the
5789    // signature) and the audit INSERT leaves H5's silent-supersession
5790    // state — the exact thing H5 was added to prevent. RESERVED-lock
5791    // semantics also serialise concurrent writers across processes.
5792    conn.execute(connection::SQL_BEGIN_IMMEDIATE, [])?;
5793    // From here on, every early return MUST `ROLLBACK` first.
5794    let rollback = || {
5795        let _ = conn.execute(connection::SQL_ROLLBACK, []);
5796    };
5797
5798    // Pull the prior `valid_until` AND the signing surface so the
5799    // audit append can reflect the row's pre-mutation attest state.
5800    // A single round-trip keeps the SELECT cheap.
5801    let prior_row: (
5802        Option<String>,
5803        Option<Vec<u8>>,
5804        Option<String>,
5805        Option<String>,
5806        Option<String>,
5807    ) = match conn.query_row(
5808        "SELECT valid_until, signature, attest_level, observed_by, valid_from \
5809             FROM memory_links \
5810             WHERE source_id = ?1 AND target_id = ?2 AND relation = ?3",
5811        params![source_id, target_id, relation],
5812        |r| {
5813            Ok((
5814                r.get::<_, Option<String>>(0)?,
5815                r.get::<_, Option<Vec<u8>>>(1)?,
5816                r.get::<_, Option<String>>(2)?,
5817                r.get::<_, Option<String>>(3)?,
5818                r.get::<_, Option<String>>(4)?,
5819            ))
5820        },
5821    ) {
5822        Ok(v) => v,
5823        Err(rusqlite::Error::QueryReturnedNoRows) => {
5824            rollback();
5825            return Ok(None);
5826        }
5827        Err(e) => {
5828            rollback();
5829            return Err(e.into());
5830        }
5831    };
5832    let (prior, prior_signature, _prior_attest, observed_by, valid_from) = prior_row;
5833    let was_signed = prior_signature.is_some();
5834
5835    let update_result = if was_signed {
5836        // v0.7.0 #628 H5 — clear the signing surface so a future
5837        // `memory_verify` honestly reports "unsigned" instead of
5838        // "signature mismatch". Resetting `attest_level` keeps the
5839        // column consistent with the now-NULL signature blob.
5840        conn.execute(
5841            "UPDATE memory_links \
5842                SET valid_until = ?4, signature = NULL, attest_level = 'unsigned' \
5843              WHERE source_id = ?1 AND target_id = ?2 AND relation = ?3",
5844            params![source_id, target_id, relation, &stamp],
5845        )
5846    } else {
5847        conn.execute(
5848            "UPDATE memory_links SET valid_until = ?4 \
5849             WHERE source_id = ?1 AND target_id = ?2 AND relation = ?3",
5850            params![source_id, target_id, relation, &stamp],
5851        )
5852    };
5853    if let Err(e) = update_result {
5854        rollback();
5855        return Err(e.into());
5856    }
5857
5858    // v0.7.0 #628 H5 — append an `invalidated` audit row when we
5859    // cleared a signature. The `payload_hash` commits to the
5860    // canonical CBOR over the post-supersession SignableLink so the
5861    // auditor sees exactly what the row looks like now (`valid_until`
5862    // populated). The `signature` column on the audit row is the
5863    // *previous* signature — the auditor can compare it byte-for-byte
5864    // against the original `memory_link.created` row's signature to
5865    // confirm the same key issued both events. We deliberately do NOT
5866    // re-sign here: this writer has no guarantee that the original
5867    // signing keypair is loaded (federation may have applied an
5868    // inbound `peer_attested` row), so an honest "the signing surface
5869    // was cleared" event is the only response that doesn't risk
5870    // forgery.
5871    if was_signed {
5872        let signable = crate::identity::sign::SignableLink {
5873            src_id: source_id,
5874            dst_id: target_id,
5875            relation,
5876            observed_by: observed_by.as_deref(),
5877            valid_from: valid_from.as_deref(),
5878            valid_until: Some(stamp.as_str()),
5879        };
5880        match crate::identity::sign::canonical_cbor(&signable) {
5881            Ok(cbor) => {
5882                let event = crate::signed_events::SignedEvent {
5883                    id: uuid::Uuid::new_v4().to_string(),
5884                    // Best-effort agent_id: the `observed_by` claim
5885                    // from the original signed row (the agent that
5886                    // attested the supersession's source row). Falls
5887                    // back to "unknown" when the legacy row carried
5888                    // no observed_by — vanishingly rare for signed
5889                    // rows since H2 always populates the column on
5890                    // self-signed inserts.
5891                    agent_id: observed_by.clone().unwrap_or_else(|| "unknown".to_string()),
5892                    event_type: crate::signed_events::event_types::MEMORY_LINK_INVALIDATED
5893                        .to_string(),
5894                    payload_hash: crate::signed_events::payload_hash(&cbor),
5895                    signature: prior_signature,
5896                    attest_level: crate::models::AttestLevel::Unsigned.as_str().to_string(),
5897                    timestamp: Utc::now().to_rfc3339(),
5898                    ..crate::signed_events::SignedEvent::default()
5899                };
5900                // v0.7.0 ship-readiness: use the `_no_tx` variant — we
5901                // are already inside the BEGIN IMMEDIATE wrap (line 3560
5902                // above). The public `append_signed_event` opens its own
5903                // unchecked_transaction which would fail under nesting
5904                // (SQLite does not allow nested transactions on a single
5905                // connection).
5906                if let Err(e) = crate::signed_events::append_signed_event_no_tx(conn, &event) {
5907                    // P2 (#628 agent-3): refuse to commit the UPDATE if
5908                    // the audit row can't be appended. Otherwise the
5909                    // signature clearing happens silently and we lose
5910                    // the audit trail H5 was added to provide.
5911                    rollback();
5912                    return Err(anyhow::anyhow!(
5913                        "failed to append memory_link.invalidated audit row \
5914                         (rolled back signature clearing): {e}"
5915                    ));
5916                }
5917            }
5918            Err(e) => {
5919                rollback();
5920                return Err(anyhow::anyhow!(
5921                    "failed to encode canonical CBOR for invalidation audit \
5922                     (rolled back signature clearing): {e}"
5923                ));
5924            }
5925        }
5926    }
5927
5928    conn.execute(connection::SQL_COMMIT, [])?;
5929    Ok(Some(InvalidateResult {
5930        valid_until: stamp,
5931        previous_valid_until: prior,
5932    }))
5933}
5934
5935/// Default cap on rows returned by `kg_query` when the caller does not
5936/// specify one (Pillar 2 / Stream C). Mirrors `kg_timeline`'s default so
5937/// the two traversal tools behave consistently for agents driving them.
5938pub const KG_QUERY_DEFAULT_LIMIT: usize = 200;
5939
5940/// Hard ceiling on `kg_query` rows. Matches `kg_timeline` and the
5941/// existing list/recall caps to keep traversal bounded against
5942/// pathological fan-out.
5943pub const KG_QUERY_MAX_LIMIT: usize = 1000;
5944
5945/// Maximum traversal depth supported by [`kg_query`]. The recursive-CTE
5946/// implementation enforces an explicit ceiling so a crafted call cannot
5947/// run an unbounded traversal; the charter (`v0.6.3-grand-slam.md`
5948/// § Performance Budgets) sets the published budget at depth ≤ 5.
5949pub const KG_QUERY_MAX_SUPPORTED_DEPTH: usize = 5;
5950
5951/// Outbound KG traversal from a source memory (Pillar 2 / Stream C —
5952/// `memory_kg_query`). Returns one row per link reachable within
5953/// `max_depth` hops, filtered by:
5954///
5955/// - `valid_at` (RFC3339, optional): only links valid at that instant —
5956///   `valid_from <= valid_at AND (valid_until IS NULL OR valid_until > valid_at)`.
5957///   When omitted, the temporal filter is skipped and rows with NULL
5958///   `valid_from` are also returned (legacy / un-anchored links).
5959/// - `allowed_agents` (optional): when provided, only links with
5960///   `observed_by` in the set are returned. An **empty** allowlist
5961///   returns zero rows by design — callers signaling "no agents are
5962///   trusted" must get an empty traversal, not the unfiltered fallback.
5963///   When omitted entirely (`None`), the agent filter is skipped.
5964/// - `limit`: row cap, clamped to [1, [`KG_QUERY_MAX_LIMIT`]].
5965///
5966/// `max_depth` must be in `[1, KG_QUERY_MAX_SUPPORTED_DEPTH]`; passing
5967/// a larger value yields an explicit error rather than a silent
5968/// truncation, so callers learn they hit the ceiling instead of
5969/// receiving a partial graph.
5970///
5971/// Multi-hop traversal uses a recursive CTE with cycle detection on
5972/// the accumulated path, so cycles in the link graph cannot loop the
5973/// traversal indefinitely. Each hop reapplies the same temporal /
5974/// agent filters as the anchor — a chain only extends through links
5975/// that pass every filter on every hop.
5976///
5977/// Ordering is `depth ASC, COALESCE(valid_from, created_at) ASC,
5978/// created_at ASC` — shallower hops first, then time-ordered within
5979/// each level. For depth=1 callers this collapses to the original
5980/// time ordering. The `depth` field reflects the actual hop count and
5981/// `path` is the full `src->mid->target` chain.
5982pub fn kg_query(
5983    conn: &Connection,
5984    source_id: &str,
5985    max_depth: usize,
5986    valid_at: Option<&str>,
5987    allowed_agents: Option<&[String]>,
5988    limit: Option<usize>,
5989    include_invalidated: bool,
5990) -> Result<Vec<crate::models::KgQueryNode>> {
5991    use crate::models::KgQueryNode;
5992
5993    if max_depth == 0 {
5994        // #962 typed envelope.
5995        return Err(anyhow::Error::new(StorageError::InvalidArgument {
5996            reason: crate::errors::msg::MAX_DEPTH_MIN.to_string(),
5997        }));
5998    }
5999    if max_depth > KG_QUERY_MAX_SUPPORTED_DEPTH {
6000        // #962 typed envelope.
6001        return Err(anyhow::Error::new(StorageError::InvalidArgument {
6002            reason: format!(
6003                "max_depth={max_depth} exceeds supported depth={KG_QUERY_MAX_SUPPORTED_DEPTH}"
6004            ),
6005        }));
6006    }
6007
6008    // Empty allowlist == "no agents are trusted" — short-circuit so we
6009    // don't have to invent a SQL `IN ()` clause (which is invalid).
6010    if let Some(agents) = allowed_agents
6011        && agents.is_empty()
6012    {
6013        return Ok(Vec::new());
6014    }
6015
6016    let cap = limit
6017        .unwrap_or(KG_QUERY_DEFAULT_LIMIT)
6018        .clamp(1, KG_QUERY_MAX_LIMIT);
6019
6020    // Build the per-hop predicate once; the anchor and recursive members
6021    // both apply it to a row aliased `ml`. Bind values are appended in
6022    // resolution order so positional placeholders line up.
6023    let mut binds: Vec<Box<dyn rusqlite::ToSql>> = Vec::new();
6024    let mut hop_filter = String::new();
6025    if let Some(t) = valid_at {
6026        hop_filter.push_str(" AND ml.valid_from IS NOT NULL AND ml.valid_from <= ?");
6027        binds.push(Box::new(t.to_string()));
6028        hop_filter.push_str(&binds.len().to_string());
6029        hop_filter.push_str(" AND (ml.valid_until IS NULL OR ml.valid_until > ?");
6030        binds.push(Box::new(t.to_string()));
6031        hop_filter.push_str(&binds.len().to_string());
6032        hop_filter.push(')');
6033    } else if !include_invalidated {
6034        // "Current view" default — exclude edges that have been
6035        // invalidated via memory_kg_invalidate (valid_until set in the
6036        // past). NHI-P3-T7 regression: prior versions returned
6037        // invalidated edges in default kg_query results.
6038        // Caller can pass include_invalidated=true to opt in to the
6039        // full-history view.
6040        hop_filter.push_str(
6041            " AND (ml.valid_until IS NULL OR ml.valid_until > strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))",
6042        );
6043    }
6044    if let Some(agents) = allowed_agents {
6045        // Already short-circuited the empty case above.
6046        hop_filter.push_str(" AND ml.observed_by IN (");
6047        for (i, a) in agents.iter().enumerate() {
6048            binds.push(Box::new(a.clone()));
6049            if i > 0 {
6050                hop_filter.push_str(", ");
6051            }
6052            hop_filter.push('?');
6053            hop_filter.push_str(&binds.len().to_string());
6054        }
6055        hop_filter.push(')');
6056    }
6057
6058    // Anchor binds source_id, recursive member binds max_depth, final
6059    // SELECT binds the row cap. Order matters — placeholders are
6060    // resolved by the position they occupy in the assembled string.
6061    binds.push(Box::new(source_id.to_string()));
6062    let source_ph = binds.len();
6063    binds.push(Box::new(i64::try_from(max_depth).unwrap_or(i64::MAX)));
6064    let max_depth_ph = binds.len();
6065    binds.push(Box::new(i64::try_from(cap).unwrap_or(i64::MAX)));
6066    let limit_ph = binds.len();
6067
6068    // v0.7 AGE acceleration onramp (charter §"Stream C — KG Query Layer"
6069    // bullet 4). The recursive CTE below is the v0.6.3 SQLite/Postgres
6070    // implementation. When the v0.7 SAL ships with Apache AGE wired in,
6071    // the equivalent property-graph query will look like:
6072    //
6073    //   MATCH (s {id: $source_id})-[r*1..$max_depth {valid_from <= $t,
6074    //          observed_by IN $allowed_agents}]->(t)
6075    //   WHERE NONE(n IN nodes(path) WHERE n.id = t.id)  -- cycle prune
6076    //   RETURN t.id, last(r).relation, t.title, length(r) AS depth,
6077    //          [n IN nodes(path) | n.id] AS path
6078    //   ORDER BY depth, last(r).valid_from
6079    //   LIMIT $limit
6080    //
6081    // Stub left here per charter intent so the v0.7 migration to AGE
6082    // has a 1:1 reference query alongside the SQL implementation.
6083
6084    let sql = format!(
6085        "WITH RECURSIVE traversal(\
6086            target_id, relation, valid_from, valid_until, observed_by, \
6087            link_created_at, depth, path\
6088         ) AS (\
6089            SELECT ml.target_id, ml.relation, ml.valid_from, ml.valid_until, \
6090                   ml.observed_by, ml.created_at, 1, \
6091                   json_array(ml.source_id, ml.target_id) \
6092            FROM memory_links ml \
6093            WHERE ml.source_id = ?{source_ph}{hop_filter} \
6094            UNION ALL \
6095            SELECT ml.target_id, ml.relation, ml.valid_from, ml.valid_until, \
6096                   ml.observed_by, ml.created_at, t.depth + 1, \
6097                   json_insert(t.path, '$[' || json_array_length(t.path) || ']', ml.target_id) \
6098            FROM memory_links ml \
6099            JOIN traversal t ON ml.source_id = t.target_id \
6100            WHERE t.depth < ?{max_depth_ph} \
6101              AND NOT EXISTS (SELECT 1 FROM json_each(t.path) WHERE value = ml.target_id)\
6102              {hop_filter}\
6103         ) \
6104         SELECT t.target_id, t.relation, t.valid_from, t.valid_until, \
6105                t.observed_by, m.title, m.namespace, t.depth, \
6106                (SELECT group_concat(value, '->') FROM json_each(t.path)) \
6107         FROM traversal t \
6108         JOIN memories m ON m.id = t.target_id \
6109         ORDER BY t.depth ASC, COALESCE(t.valid_from, t.link_created_at) ASC, \
6110                  t.link_created_at ASC \
6111         LIMIT ?{limit_ph}",
6112    );
6113
6114    let mut stmt = conn.prepare(&sql)?;
6115    let bind_refs: Vec<&dyn rusqlite::ToSql> = binds.iter().map(AsRef::as_ref).collect();
6116    let rows = stmt.query_map(rusqlite::params_from_iter(bind_refs), |row| {
6117        let target_id: String = row.get(0)?;
6118        let depth: i64 = row.get(7)?;
6119        Ok(KgQueryNode {
6120            target_id,
6121            relation: row.get(1)?,
6122            valid_from: row.get(2)?,
6123            valid_until: row.get(3)?,
6124            observed_by: row.get(4)?,
6125            title: row.get(5)?,
6126            target_namespace: row.get(6)?,
6127            depth: usize::try_from(depth).unwrap_or(0),
6128            path: row.get(8)?,
6129        })
6130    })?;
6131    rows.collect::<rusqlite::Result<Vec<_>>>()
6132        .map_err(Into::into)
6133}
6134
6135/// Default cap on paths returned by [`find_paths`] when the caller does
6136/// not specify one. Matches the v0.7 J7 charter.
6137pub const FIND_PATHS_DEFAULT_LIMIT: usize = 10;
6138
6139/// Hard ceiling on paths returned by [`find_paths`]. A crafted call
6140/// asking for more than this many paths is clamped down. Matches the
6141/// v0.7 J7 charter.
6142pub const FIND_PATHS_MAX_LIMIT: usize = 50;
6143
6144/// Hard ceiling on traversal depth supported by [`find_paths`].
6145/// Distinct from [`KG_QUERY_MAX_SUPPORTED_DEPTH`] because path
6146/// enumeration is more expensive than reachability — we can afford a
6147/// slightly deeper budget for the BFS but not by much.
6148///
6149/// **Cap = 7.** Asking for more is rejected with an error that names
6150/// this constant explicitly so callers see exactly which knob to file
6151/// against. Contact maintainers to raise this bound *after* benchmarking
6152/// the new ceiling on a representative KG; the BFS is `O(d * |E|)` per
6153/// hop with a `json_each` cycle check, and depth-8+ has not been load-
6154/// tested as of v0.7.0.
6155pub const FIND_PATHS_MAX_DEPTH: usize = 7;
6156
6157/// Default depth used when the caller omits `max_depth`. Mirrors the
6158/// v0.7 J7 charter's "shallow by default, opt-in deep traversal" rule.
6159pub const FIND_PATHS_DEFAULT_DEPTH: usize = 4;
6160
6161/// v0.7 J7 — enumerate up to N undirected paths between two memories.
6162///
6163/// Walks `memory_links` with a recursive CTE that carries the full
6164/// visited-id chain on each row, both as the outbound `path` rendered
6165/// for callers and as the cycle-detection set so the traversal cannot
6166/// loop on a cyclic link graph. Each row of the CTE represents one
6167/// candidate prefix; rows that reach `target_id` are projected out as
6168/// completed paths.
6169///
6170/// # Directionality contract (v0.7.0)
6171///
6172/// **`find_paths` is UNDIRECTED** (UNION of forward + reverse edges at
6173/// every hop) — **`kg_query` is DIRECTED** (forward edges only, by
6174/// design). The two tools answer different questions and are not
6175/// interchangeable:
6176///
6177/// - `find_paths(a, b)` — *are these two memories connected through any
6178///   relation chain?* Symmetric closure: `find_paths(a, b)` and
6179///   `find_paths(b, a)` return the same path set (modulo reversal).
6180/// - `kg_query(start, depth)` — *what does the directed `source →
6181///   target` subgraph rooted at `start` look like at depth ≤ N?*
6182///   `kg_query(b, …)` will not surface `a → b`.
6183///
6184/// **`include_invalidated` is honored identically** by both tools: when
6185/// `false` (default), edges whose `valid_until` lies in the past are
6186/// excluded from the traversal; when `true`, the full historical link
6187/// graph is walked. The flag's semantics do not change with directionality.
6188///
6189/// The KG corpus uses directional links to model temporal ordering of an
6190/// assertion (`source → target`), so path queries — which are "are these
6191/// two memories connected via *any* relation chain?" — apply the
6192/// symmetric closure here via `UNION ALL` over the original edge and the
6193/// reverse edge at each hop.
6194///
6195/// # Limits
6196///
6197/// `max_depth` defaults to [`FIND_PATHS_DEFAULT_DEPTH`] and is hard-
6198/// capped at [`FIND_PATHS_MAX_DEPTH`] (= 7); passing a larger value
6199/// yields an explicit error rather than silent truncation. The error
6200/// message names `FIND_PATHS_MAX_DEPTH` so operators can grep the
6201/// codebase for the single tunable knob. `max_results` defaults to
6202/// [`FIND_PATHS_DEFAULT_LIMIT`] and is clamped at
6203/// [`FIND_PATHS_MAX_LIMIT`]; passing a larger value collapses to the
6204/// ceiling without error (paths beyond the cap are dropped, the
6205/// shortest paths win on the `ORDER BY`).
6206///
6207/// Returns `Vec<Vec<String>>` — one inner vector per discovered path,
6208/// each carrying the chain of memory ids from `source_id` (first) to
6209/// `target_id` (last). Self-paths (`source_id == target_id`) collapse
6210/// to a single one-element path. Disconnected pairs return an empty
6211/// outer vector.
6212pub fn find_paths(
6213    conn: &Connection,
6214    source_id: &str,
6215    target_id: &str,
6216    max_depth: Option<usize>,
6217    max_results: Option<usize>,
6218    include_invalidated: bool,
6219) -> Result<Vec<Vec<String>>> {
6220    let depth = max_depth.unwrap_or(FIND_PATHS_DEFAULT_DEPTH);
6221    if depth == 0 {
6222        // #962 typed envelope.
6223        return Err(anyhow::Error::new(StorageError::InvalidArgument {
6224            reason: crate::errors::msg::MAX_DEPTH_MIN.to_string(),
6225        }));
6226    }
6227    if depth > FIND_PATHS_MAX_DEPTH {
6228        // #962 typed envelope.
6229        return Err(anyhow::Error::new(StorageError::InvalidArgument {
6230            reason: format!(
6231                "max_depth={depth} exceeds supported depth={FIND_PATHS_MAX_DEPTH} (FIND_PATHS_MAX_DEPTH); contact maintainers to raise this bound after benchmarking"
6232            ),
6233        }));
6234    }
6235    let cap = max_results
6236        .unwrap_or(FIND_PATHS_DEFAULT_LIMIT)
6237        .clamp(1, FIND_PATHS_MAX_LIMIT);
6238
6239    // Self-path short-circuit. The recursive CTE below requires depth>=1
6240    // before it can match `target_id`; the trivial chain is just the
6241    // single-element path through the start node.
6242    if source_id == target_id {
6243        return Ok(vec![vec![source_id.to_string()]]);
6244    }
6245
6246    // "Current view" filter — exclude edges whose `valid_until` lies in
6247    // the past (invalidated via `memory_kg_invalidate`). Caller can pass
6248    // `include_invalidated=true` to traverse the full historical link
6249    // graph. NHI-P3-T7 regression: prior versions enumerated paths
6250    // through invalidated edges by default.
6251    let invalidated_filter = if include_invalidated {
6252        ""
6253    } else {
6254        " WHERE (valid_until IS NULL OR valid_until > strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))"
6255    };
6256
6257    // The CTE walks symmetric edges: for each row in `memory_links` we
6258    // also generate its reverse so the traversal is undirected. Cycle
6259    // detection uses the JSON-encoded path array (same trick as
6260    // `kg_query`) — `NOT EXISTS (... json_each ...)` short-circuits the
6261    // recursion as soon as the next hop would revisit a node already in
6262    // the prefix.
6263    //
6264    // The completed-path filter sits in the outer SELECT rather than
6265    // the recursive member because a partial prefix that lands on
6266    // `target_id` should be reported AND continue to extend (a longer
6267    // path through `target_id` might reach itself through a different
6268    // route — though for the KG that should be rare, the CTE doesn't
6269    // need to know that). `ORDER BY depth, path` keeps the shortest
6270    // paths first so the `LIMIT` cap drops the longest tail.
6271    let sql = format!(
6272        "WITH RECURSIVE traversal(current_id, depth, path) AS (
6273            SELECT ?1, 0, json_array(?1)
6274            UNION ALL
6275            SELECT next_id, t.depth + 1,
6276                   json_insert(t.path, '$[' || json_array_length(t.path) || ']', next_id)
6277            FROM traversal t
6278            JOIN (
6279                SELECT source_id AS from_id, target_id AS next_id
6280                FROM memory_links{invalidated_filter}
6281                UNION
6282                SELECT target_id AS from_id, source_id AS next_id
6283                FROM memory_links{invalidated_filter}
6284            ) edges ON edges.from_id = t.current_id
6285            WHERE t.depth < ?3
6286              AND NOT EXISTS (
6287                  SELECT 1 FROM json_each(t.path) WHERE value = next_id
6288              )
6289         )
6290         SELECT path
6291         FROM traversal
6292         WHERE current_id = ?2 AND depth >= 1
6293         ORDER BY depth ASC, path ASC
6294         LIMIT ?4"
6295    );
6296
6297    let depth_i64 = i64::try_from(depth).unwrap_or(i64::MAX);
6298    let cap_i64 = i64::try_from(cap).unwrap_or(i64::MAX);
6299
6300    let mut stmt = conn.prepare(&sql)?;
6301    let rows = stmt.query_map(params![source_id, target_id, depth_i64, cap_i64], |row| {
6302        let json_path: String = row.get(0)?;
6303        Ok(json_path)
6304    })?;
6305
6306    let mut paths: Vec<Vec<String>> = Vec::new();
6307    for row in rows {
6308        let json = row?;
6309        let parsed: Vec<String> = serde_json::from_str(&json).map_err(|e| {
6310            rusqlite::Error::FromSqlConversionFailure(0, rusqlite::types::Type::Text, Box::new(e))
6311        })?;
6312        paths.push(parsed);
6313    }
6314
6315    Ok(paths)
6316}
6317
6318/// List all aliases registered for an entity, ordered by registration
6319/// time then alphabetical for stable display.
6320fn list_entity_aliases(conn: &Connection, entity_id: &str) -> Result<Vec<String>> {
6321    let mut stmt = conn.prepare(
6322        "SELECT alias FROM entity_aliases
6323         WHERE entity_id = ?1
6324         ORDER BY created_at ASC, alias ASC",
6325    )?;
6326    let aliases: Vec<String> = stmt
6327        .query_map(params![entity_id], |r| r.get::<_, String>(0))?
6328        .collect::<rusqlite::Result<Vec<_>>>()?;
6329    Ok(aliases)
6330}
6331
6332/// Register or refresh an agent in the reserved `_agents` namespace.
6333///
6334/// Each agent is stored as a long-tier memory with `title = "agent:<agent_id>"`.
6335/// Duplicate registration for the same `agent_id` refreshes `last_seen_at` and
6336/// overwrites `agent_type` + `capabilities`, while preserving the original
6337/// `registered_at` timestamp (caller-observable provenance).
6338///
6339/// Returns the stored memory ID.
6340pub fn register_agent(
6341    conn: &Connection,
6342    agent_id: &str,
6343    agent_type: &str,
6344    capabilities: &[String],
6345) -> Result<String> {
6346    let title = crate::models::agent_registration_title(agent_id);
6347    let now = Utc::now().to_rfc3339();
6348
6349    // Preserve original registered_at across re-registration.
6350    let registered_at = conn
6351        .query_row(
6352            "SELECT json_extract(metadata, '$.registered_at') FROM memories
6353             WHERE namespace = ?1 AND title = ?2",
6354            params![AGENTS_NAMESPACE, &title],
6355            |row| row.get::<_, Option<String>>(0),
6356        )
6357        .ok()
6358        .flatten()
6359        .unwrap_or_else(|| now.clone());
6360
6361    let caps_json: Vec<serde_json::Value> = capabilities
6362        .iter()
6363        .map(|c| serde_json::Value::String(c.clone()))
6364        .collect();
6365
6366    let metadata = serde_json::json!({
6367        "agent_id": agent_id,
6368        (field_names::AGENT_TYPE): agent_type,
6369        (field_names::CAPABILITIES): caps_json,
6370        (field_names::REGISTERED_AT): registered_at,
6371        (field_names::LAST_SEEN_AT): now,
6372        // #910 (SAL-level enforcement) — agent-registration rows live
6373        // in the `_agents` namespace and are a public roster: every
6374        // agent has a legitimate need to know which other agents are
6375        // registered (consensus voting, peer attestation, etc.). Stamp
6376        // scope=collective so the SAL visibility filter doesn't drop
6377        // them on cross-agent reads.
6378        "scope": crate::models::MemoryScope::Collective.as_str(),
6379    });
6380
6381    let content = serde_json::to_string(&metadata)
6382        .context("failed to serialize agent registration content")?;
6383
6384    let mem = Memory {
6385        id: uuid::Uuid::new_v4().to_string(),
6386        tier: Tier::Long,
6387        namespace: AGENTS_NAMESPACE.to_string(),
6388        title,
6389        content,
6390        tags: vec!["agent-registration".to_string()],
6391        priority: 5,
6392        confidence: 1.0,
6393        source: "system".to_string(),
6394        access_count: 0,
6395        created_at: now.clone(),
6396        updated_at: now,
6397        last_accessed_at: None,
6398        expires_at: None,
6399        metadata,
6400        reflection_depth: 0,
6401        memory_kind: crate::models::MemoryKind::Observation,
6402        entity_id: None,
6403        persona_version: None,
6404        citations: Vec::new(),
6405        source_uri: None,
6406        source_span: None,
6407        confidence_source: ConfidenceSource::CallerProvided,
6408        confidence_signals: None,
6409        confidence_decayed_at: None,
6410        version: 1,
6411    };
6412
6413    insert(conn, &mem)
6414}
6415
6416/// List every registered agent. Rows are drawn from the `_agents` namespace
6417/// and parsed out of each memory's metadata.
6418pub fn list_agents(conn: &Connection) -> Result<Vec<AgentRegistration>> {
6419    let now = Utc::now().to_rfc3339();
6420    let mut stmt = conn.prepare(
6421        "SELECT metadata FROM memories
6422         WHERE namespace = ?1
6423           AND (expires_at IS NULL OR expires_at > ?2)
6424         ORDER BY json_extract(metadata, '$.registered_at') ASC",
6425    )?;
6426    let rows = stmt.query_map(params![AGENTS_NAMESPACE, now], |row| {
6427        row.get::<_, String>(0)
6428    })?;
6429
6430    let mut agents = Vec::new();
6431    for r in rows {
6432        let raw = r?;
6433        let meta: serde_json::Value =
6434            serde_json::from_str(&raw).context("failed to parse agent metadata as JSON")?;
6435        let agent_id = meta
6436            .get("agent_id")
6437            .and_then(serde_json::Value::as_str)
6438            .unwrap_or_default()
6439            .to_string();
6440        let agent_type = meta
6441            .get(field_names::AGENT_TYPE)
6442            .and_then(serde_json::Value::as_str)
6443            .unwrap_or_default()
6444            .to_string();
6445        let capabilities: Vec<String> = meta
6446            .get(field_names::CAPABILITIES)
6447            .and_then(serde_json::Value::as_array)
6448            .map(|arr| {
6449                arr.iter()
6450                    .filter_map(|v| v.as_str().map(String::from))
6451                    .collect()
6452            })
6453            .unwrap_or_default();
6454        let registered_at = meta
6455            .get(field_names::REGISTERED_AT)
6456            .and_then(serde_json::Value::as_str)
6457            .unwrap_or_default()
6458            .to_string();
6459        let last_seen_at = meta
6460            .get(field_names::LAST_SEEN_AT)
6461            .and_then(serde_json::Value::as_str)
6462            .unwrap_or_default()
6463            .to_string();
6464        agents.push(AgentRegistration {
6465            agent_id,
6466            agent_type,
6467            capabilities,
6468            registered_at,
6469            last_seen_at,
6470        });
6471    }
6472    Ok(agents)
6473}
6474
6475/// Bind (or rotate) an agent's Ed25519 public key into its `_agents`
6476/// registration row metadata (#626 Layer-3, Task 1.3 / C3).
6477///
6478/// The pubkey is the anchor the write-path attestation gate verifies
6479/// against: a signed write claiming `agent_id` is upgraded from *claimed*
6480/// to *attested* only when its signature verifies under the key bound
6481/// here. Stored under `metadata.agent_pubkey` (URL-safe-no-pad base64)
6482/// alongside a `pubkey_bound_at` RFC3339 timestamp for rotation
6483/// provenance.
6484///
6485/// Migration-free: the key rides in the existing registration row's
6486/// JSON metadata (no schema bump). `json_set` updates `metadata` and the
6487/// mirrored `content` column atomically so `list_agents` / the verifier
6488/// observe a consistent row.
6489///
6490/// The agent MUST already be registered (`register_agent`) — binding a
6491/// key to an unregistered id is rejected so a stray pubkey can never
6492/// shadow a future legitimate registration. Re-binding overwrites the
6493/// previous key (key rotation / revoke-then-rebind).
6494///
6495/// # Errors
6496///
6497/// - the agent is not registered (no `_agents` row for `agent_id`)
6498/// - the underlying `UPDATE` fails
6499pub fn bind_agent_pubkey(conn: &Connection, agent_id: &str, pubkey_b64: &str) -> Result<()> {
6500    let title = crate::models::agent_registration_title(agent_id);
6501    let now = Utc::now().to_rfc3339();
6502    let affected = conn.execute(
6503        "UPDATE memories SET
6504            metadata = json_set(metadata, '$.agent_pubkey', ?3, '$.pubkey_bound_at', ?4),
6505            content  = json_set(content,  '$.agent_pubkey', ?3, '$.pubkey_bound_at', ?4),
6506            updated_at = ?4
6507         WHERE namespace = ?1 AND title = ?2",
6508        params![AGENTS_NAMESPACE, &title, pubkey_b64, &now],
6509    )?;
6510    if affected == 0 {
6511        anyhow::bail!(
6512            "cannot bind pubkey: agent '{agent_id}' is not registered (register it first)"
6513        );
6514    }
6515    Ok(())
6516}
6517
6518/// Fetch the Ed25519 public key bound to `agent_id`, if any (#626
6519/// Layer-3, Task 1.3 / C3).
6520///
6521/// Returns `Ok(None)` when the agent is registered but has no bound key
6522/// (the permissive-default attestation posture: such an agent can still
6523/// write *claimed* rows), and also when the agent is not registered at
6524/// all — both collapse to "no key to verify against". The verifier
6525/// distinguishes the two only when `AI_MEMORY_REQUIRE_AGENT_ATTESTATION`
6526/// is set, where a missing key on a required write is a hard reject.
6527///
6528/// # Errors
6529///
6530/// Surfaces only underlying query failures.
6531pub fn agent_pubkey(conn: &Connection, agent_id: &str) -> Result<Option<String>> {
6532    let title = crate::models::agent_registration_title(agent_id);
6533    let pubkey = conn
6534        .query_row(
6535            "SELECT json_extract(metadata, '$.agent_pubkey') FROM memories
6536             WHERE namespace = ?1 AND title = ?2",
6537            params![AGENTS_NAMESPACE, &title],
6538            |row| row.get::<_, Option<String>>(0),
6539        )
6540        .ok()
6541        .flatten();
6542    Ok(pubkey)
6543}
6544
6545/// Clear the Ed25519 public key bound to `agent_id` (#626 Layer-3,
6546/// Task 1.3 / C5 — key revocation).
6547///
6548/// Removes the `agent_pubkey` + `pubkey_bound_at` keys from both the
6549/// metadata and the mirrored `content` JSON, stamping a
6550/// `pubkey_revoked_at` marker so the revocation is auditable. After
6551/// revocation the agent reverts to the permissive *claimed* posture
6552/// (no key to verify against) until a fresh key is bound.
6553///
6554/// Idempotent: revoking an agent with no bound key still succeeds (the
6555/// `json_remove` is a no-op) as long as the agent is registered.
6556///
6557/// # Errors
6558///
6559/// - the agent is not registered (no `_agents` row for `agent_id`)
6560/// - the underlying `UPDATE` fails
6561pub fn revoke_agent_pubkey(conn: &Connection, agent_id: &str) -> Result<()> {
6562    let title = crate::models::agent_registration_title(agent_id);
6563    let now = Utc::now().to_rfc3339();
6564    let affected = conn.execute(
6565        "UPDATE memories SET
6566            metadata = json_set(
6567                json_remove(metadata, '$.agent_pubkey', '$.pubkey_bound_at'),
6568                '$.pubkey_revoked_at', ?3),
6569            content  = json_set(
6570                json_remove(content,  '$.agent_pubkey', '$.pubkey_bound_at'),
6571                '$.pubkey_revoked_at', ?3),
6572            updated_at = ?3
6573         WHERE namespace = ?1 AND title = ?2",
6574        params![AGENTS_NAMESPACE, &title, &now],
6575    )?;
6576    if affected == 0 {
6577        anyhow::bail!(
6578            "cannot revoke pubkey: agent '{agent_id}' is not registered (register it first)"
6579        );
6580    }
6581    Ok(())
6582}
6583
6584pub fn stats(conn: &Connection, db_path: &Path) -> Result<Stats> {
6585    let total: usize = conn.query_row("SELECT COUNT(*) FROM memories", [], |r| r.get(0))?;
6586
6587    let mut stmt =
6588        conn.prepare("SELECT tier, COUNT(*) FROM memories GROUP BY tier ORDER BY COUNT(*) DESC")?;
6589    let by_tier = stmt
6590        .query_map([], |row| {
6591            Ok(TierCount {
6592                tier: row.get(0)?,
6593                count: row.get(1)?,
6594            })
6595        })?
6596        .collect::<rusqlite::Result<Vec<_>>>()?;
6597
6598    let mut stmt = conn.prepare(
6599        "SELECT namespace, COUNT(*) FROM memories GROUP BY namespace ORDER BY COUNT(*) DESC",
6600    )?;
6601    let by_namespace = stmt
6602        .query_map([], |row| {
6603            Ok(NamespaceCount {
6604                namespace: row.get(0)?,
6605                count: row.get(1)?,
6606            })
6607        })?
6608        .collect::<rusqlite::Result<Vec<_>>>()?;
6609
6610    let now = Utc::now().to_rfc3339();
6611    let one_hour = (Utc::now() + chrono::Duration::hours(1)).to_rfc3339();
6612    let expiring_soon: usize = conn.query_row(
6613        "SELECT COUNT(*) FROM memories WHERE expires_at IS NOT NULL AND expires_at > ?1 AND expires_at <= ?2",
6614        params![now, one_hour], |r| r.get(0),
6615    )?;
6616
6617    let links_count: usize = conn
6618        .query_row("SELECT COUNT(*) FROM memory_links", [], |r| r.get(0))
6619        .unwrap_or(0);
6620    let db_size_bytes = std::fs::metadata(db_path).map_or(0, |m| m.len());
6621    // v0.6.3.1 P2 (G4) — surface mixed-dim corruption to operators. Best-effort:
6622    // any error here returns 0 rather than failing the stats endpoint.
6623    let dim_violations = dim_violations(conn).unwrap_or(0);
6624
6625    // v0.6.3.1 (P3, G2): cumulative HNSW eviction count is process-local
6626    // state — read from the static counter in src/hnsw.rs. Surfacing it in
6627    // `stats` lets `memory_stats` callers and `ai-memory doctor` (P7) flag
6628    // operators who are sustaining at the index cap.
6629    let index_evictions_total = crate::hnsw::index_evictions_total();
6630
6631    Ok(Stats {
6632        total,
6633        by_tier,
6634        by_namespace,
6635        expiring_soon,
6636        links_count,
6637        db_size_bytes,
6638        dim_violations,
6639        index_evictions_total,
6640    })
6641}
6642
6643/// Run GC if there are any expired memories. Lightweight check first.
6644pub fn gc_if_needed(conn: &Connection, archive: bool) -> Result<usize> {
6645    let now = Utc::now().to_rfc3339();
6646    let has_expired: bool = conn
6647        .query_row(
6648            "SELECT EXISTS(SELECT 1 FROM memories WHERE expires_at IS NOT NULL AND expires_at < ?1)",
6649            params![now],
6650            |r| r.get(0),
6651        )
6652        .unwrap_or(false);
6653    if has_expired {
6654        gc(conn, archive)
6655    } else {
6656        Ok(0)
6657    }
6658}
6659
6660/// Purge old archives if `archive_max_days` is configured.
6661pub fn auto_purge_archive(conn: &Connection, max_days: Option<i64>) -> Result<usize> {
6662    match max_days {
6663        Some(days) if days > 0 => purge_archive(conn, Some(days)),
6664        _ => Ok(0),
6665    }
6666}
6667
6668/// #1579 B6 (F5.7) — expired rows reaped per GC transaction.
6669///
6670/// The pre-fix `gc` ran ONE `BEGIN IMMEDIATE` covering an archive
6671/// `INSERT … SELECT` + `DELETE` over the entire expired set, holding
6672/// the sqlite write lock for the whole sweep (seconds on a 100k-row
6673/// expiry backlog, during which every concurrent writer queues behind
6674/// `busy_timeout`). Chunking bounds the lock-hold per transaction to
6675/// this many rows; the loop in [`gc`] re-runs until the backlog drains.
6676/// 500 keeps each archive-copy + delete transaction in the
6677/// single-digit-millisecond band on the P1 audit corpus while still
6678/// amortising the per-transaction fsync across a useful batch.
6679const GC_CHUNK_ROWS: usize = 500;
6680
6681/// Subquery selecting one bounded chunk of expired row ids. Shared by
6682/// the archive `INSERT … SELECT` and the `DELETE` inside the same
6683/// `BEGIN IMMEDIATE` transaction; `ORDER BY rowid` makes the selection
6684/// fully deterministic, so both statements — which run against the
6685/// identical snapshot because the transaction holds the write lock —
6686/// target the exact same rows and the archive-before-delete invariant
6687/// is preserved chunk by chunk.
6688const SQL_GC_EXPIRED_CHUNK_IDS: &str = "SELECT id FROM memories \
6689     WHERE expires_at IS NOT NULL AND expires_at < ?1 \
6690     ORDER BY rowid LIMIT ?2";
6691
6692pub fn gc(conn: &Connection, archive: bool) -> Result<usize> {
6693    let now = Utc::now().to_rfc3339();
6694    // #1579 B6 (F5.7) — bounded-lock-hold chunked sweep. Each loop
6695    // iteration archives + deletes at most GC_CHUNK_ROWS expired rows
6696    // inside its own BEGIN IMMEDIATE transaction, so concurrent
6697    // writers interleave between chunks instead of stalling behind one
6698    // giant sweep transaction. Archive semantics are preserved: within
6699    // a chunk the archive INSERT and the DELETE address the same
6700    // deterministic id set (see SQL_GC_EXPIRED_CHUNK_IDS), and a
6701    // failure rolls back only the in-flight chunk (already-committed
6702    // chunks remain reaped — same observable contract as repeated
6703    // smaller gc calls).
6704    let mut total = 0usize;
6705    loop {
6706        conn.execute_batch(connection::SQL_BEGIN_IMMEDIATE)?;
6707        let result = (|| -> Result<usize> {
6708            if archive {
6709                // v0.6.3.1 P2 (G5) — preserve embedding + tier + expiry on GC archive.
6710                let mut archive_stmt = conn.prepare_cached(&format!(
6711                    "INSERT OR REPLACE INTO archived_memories
6712                     (id, tier, namespace, title, content, tags, priority, confidence,
6713                      source, access_count, created_at, updated_at, last_accessed_at,
6714                      expires_at, archived_at, archive_reason, metadata,
6715                      embedding, embedding_dim, original_tier, original_expires_at,
6716                      reflection_depth, atomised_into, atom_of, memory_kind,
6717                      entity_id, persona_version, citations, source_uri, source_span,
6718                      confidence_source, confidence_signals, confidence_decayed_at,
6719                      mentioned_entity_id, version)
6720                     SELECT id, tier, namespace, title, content, tags, priority, confidence,
6721                            source, access_count, created_at, updated_at, last_accessed_at,
6722                            expires_at, ?1, 'ttl_expired', metadata,
6723                            embedding, embedding_dim, tier, expires_at,
6724                            reflection_depth, atomised_into, atom_of, memory_kind,
6725                            entity_id, persona_version, citations, source_uri, source_span,
6726                            confidence_source, confidence_signals, confidence_decayed_at,
6727                            mentioned_entity_id, version
6728                     FROM memories
6729                     WHERE id IN ({SQL_GC_EXPIRED_CHUNK_IDS})"
6730                ))?;
6731                archive_stmt.execute(params![now, GC_CHUNK_ROWS])?;
6732            }
6733            let mut delete_stmt = conn.prepare_cached(&format!(
6734                "DELETE FROM memories WHERE id IN ({SQL_GC_EXPIRED_CHUNK_IDS})"
6735            ))?;
6736            let deleted = delete_stmt.execute(params![now, GC_CHUNK_ROWS])?;
6737            Ok(deleted)
6738        })();
6739        match result {
6740            Ok(n) => {
6741                conn.execute_batch(connection::SQL_COMMIT)?;
6742                total += n;
6743                if n < GC_CHUNK_ROWS {
6744                    break;
6745                }
6746            }
6747            Err(e) => {
6748                let _ = conn.execute_batch(connection::SQL_ROLLBACK);
6749                return Err(e);
6750            }
6751        }
6752    }
6753    // Clean up namespace_meta rows pointing to deleted memories.
6754    // #1579 B6 — correlated NOT EXISTS instead of the former
6755    // `standard_id NOT IN (SELECT id FROM memories)`, which
6756    // materialised the full id set on every sweep; the rewrite is one
6757    // primary-key probe per namespace_meta row (a small table — one
6758    // row per namespace standard).
6759    let _ = conn.execute(
6760        "DELETE FROM namespace_meta WHERE NOT EXISTS \
6761         (SELECT 1 FROM memories WHERE memories.id = namespace_meta.standard_id)",
6762        [],
6763    );
6764    Ok(total)
6765}
6766
6767// ---------------------------------------------------------------------------
6768// Archive operations
6769// ---------------------------------------------------------------------------
6770
6771pub fn list_archived(
6772    conn: &Connection,
6773    namespace: Option<&str>,
6774    limit: usize,
6775    offset: usize,
6776) -> Result<Vec<serde_json::Value>> {
6777    let (sql, params_vec): (String, Vec<Box<dyn rusqlite::types::ToSql>>) = match namespace {
6778        Some(ns) => (
6779            "SELECT id, tier, namespace, title, content, tags, priority, confidence, \
6780             source, access_count, created_at, updated_at, last_accessed_at, \
6781             expires_at, archived_at, archive_reason, metadata, \
6782             reflection_depth, memory_kind, entity_id, persona_version, \
6783             citations, source_uri, source_span, confidence_source, \
6784             confidence_signals, confidence_decayed_at, version, \
6785             atomised_into, atom_of, mentioned_entity_id \
6786             FROM archived_memories WHERE namespace = ?1 \
6787             ORDER BY archived_at DESC LIMIT ?2 OFFSET ?3"
6788                .to_string(),
6789            vec![Box::new(ns.to_string()), Box::new(limit), Box::new(offset)],
6790        ),
6791        None => (
6792            "SELECT id, tier, namespace, title, content, tags, priority, confidence, \
6793             source, access_count, created_at, updated_at, last_accessed_at, \
6794             expires_at, archived_at, archive_reason, metadata, \
6795             reflection_depth, memory_kind, entity_id, persona_version, \
6796             citations, source_uri, source_span, confidence_source, \
6797             confidence_signals, confidence_decayed_at, version, \
6798             atomised_into, atom_of, mentioned_entity_id \
6799             FROM archived_memories \
6800             ORDER BY archived_at DESC LIMIT ?1 OFFSET ?2"
6801                .to_string(),
6802            vec![Box::new(limit), Box::new(offset)],
6803        ),
6804    };
6805    let params_refs: Vec<&dyn rusqlite::types::ToSql> =
6806        params_vec.iter().map(std::convert::AsRef::as_ref).collect();
6807    let mut stmt = conn.prepare(&sql)?;
6808    let rows = stmt.query_map(params_refs.as_slice(), |row| {
6809        // v0.7.0 issue #861 — `metadata` is stored as a JSON TEXT blob
6810        // in the column. Falling back to `{}` only covers a NULL/empty
6811        // read; the surrounding column projection then re-encodes it
6812        // structured so callers see a real JSON object instead of an
6813        // escaped string. Coupled with the forget-path archive INSERTs
6814        // around lines 1268 / 1289 above (now SELECTing `metadata` so
6815        // the column actually carries the source row's metadata), this
6816        // restores the round-trip `agent_id` / `imported_from_*` /
6817        // `consolidated_from_agents` keys callers rely on for
6818        // attribution + restore.
6819        let metadata_str = row
6820            .get::<_, String>(16)
6821            .unwrap_or_else(|_| "{}".to_string());
6822        let metadata: serde_json::Value =
6823            serde_json::from_str(&metadata_str).unwrap_or_else(|_| serde_json::json!({}));
6824        // v0.7.0 issue #861 — `tags` is stored as a JSON-encoded array
6825        // TEXT (`'["a","b"]'`) by every write path. Returning the raw
6826        // String forced callers to either double-parse or accept a
6827        // string where they expected a JSON array. Parse here so the
6828        // response matches the live-row shape (`memory_get`) and the
6829        // contract tests in `tests/archive_serialization.rs`. NULL /
6830        // malformed columns fall through to an empty array — the
6831        // archive table's CHECK constraint makes the malformed case a
6832        // never-in-practice path, but the fall-through keeps the read
6833        // contract noisy-input-clean rather than panic-on-corruption.
6834        let tags_str = row.get::<_, String>(5).unwrap_or_else(|_| "[]".to_string());
6835        let tags: serde_json::Value =
6836            serde_json::from_str(&tags_str).unwrap_or_else(|_| serde_json::json!([]));
6837        Ok(serde_json::json!({
6838            "id": row.get::<_, String>(0)?,
6839            "tier": row.get::<_, String>(1)?,
6840            "namespace": row.get::<_, String>(2)?,
6841            "title": row.get::<_, String>(3)?,
6842            "content": row.get::<_, String>(4)?,
6843            "tags": tags,
6844            "priority": row.get::<_, i32>(6)?,
6845            (field_names::CONFIDENCE): row.get::<_, f64>(7)?,
6846            "source": row.get::<_, String>(8)?,
6847            (field_names::ACCESS_COUNT): row.get::<_, i64>(9)?,
6848            (field_names::CREATED_AT): row.get::<_, String>(10)?,
6849            (field_names::UPDATED_AT): row.get::<_, String>(11)?,
6850            (field_names::LAST_ACCESSED_AT): row.get::<_, Option<String>>(12)?,
6851            (field_names::EXPIRES_AT): row.get::<_, Option<String>>(13)?,
6852            (field_names::ARCHIVED_AT): row.get::<_, String>(14)?,
6853            (field_names::ARCHIVE_REASON): row.get::<_, String>(15)?,
6854            "metadata": metadata,
6855            // #1637 — the v49 columns (in the table since #1025; restore
6856            // was lossless but the LISTING surface projected only the 17
6857            // legacy columns, so archived v0.7.0 fields were invisible
6858            // to memory_archive_list). Additive keys; JSON-ish columns
6859            // parse to structured like tags/metadata above.
6860            (field_names::REFLECTION_DEPTH): row.get::<_, Option<i64>>(17)?.unwrap_or(0),
6861            (field_names::MEMORY_KIND): row.get::<_, Option<String>>(18)?,
6862            "entity_id": row.get::<_, Option<String>>(19)?,
6863            (field_names::PERSONA_VERSION): row.get::<_, Option<i64>>(20)?,
6864            "citations": row
6865                .get::<_, Option<String>>(21)?
6866                .and_then(|c| serde_json::from_str::<serde_json::Value>(&c).ok())
6867                .unwrap_or_else(|| serde_json::json!([])),
6868            (field_names::SOURCE_URI): row.get::<_, Option<String>>(22)?,
6869            (field_names::SOURCE_SPAN): row
6870                .get::<_, Option<String>>(23)?
6871                .and_then(|c| serde_json::from_str::<serde_json::Value>(&c).ok()),
6872            (field_names::CONFIDENCE_SOURCE): row.get::<_, Option<String>>(24)?,
6873            (field_names::CONFIDENCE_SIGNALS): row
6874                .get::<_, Option<String>>(25)?
6875                .and_then(|c| serde_json::from_str::<serde_json::Value>(&c).ok()),
6876            (field_names::CONFIDENCE_DECAYED_AT): row.get::<_, Option<String>>(26)?,
6877            "version": row.get::<_, Option<i64>>(27)?.unwrap_or(1),
6878            (field_names::ATOMISED_INTO): row.get::<_, Option<i64>>(28)?,
6879            (field_names::ATOM_OF): row.get::<_, Option<String>>(29)?,
6880            (field_names::MENTIONED_ENTITY_ID): row.get::<_, Option<String>>(30)?,
6881        }))
6882    })?;
6883    rows.collect::<rusqlite::Result<Vec<_>>>()
6884        .map_err(Into::into)
6885}
6886
6887pub fn restore_archived(conn: &Connection, id: &str) -> Result<bool> {
6888    let now = Utc::now().to_rfc3339();
6889    conn.execute_batch(connection::SQL_BEGIN_IMMEDIATE)?;
6890    let result = (|| -> Result<bool> {
6891        let exists: bool = conn
6892            .query_row(
6893                "SELECT COUNT(*) > 0 FROM archived_memories WHERE id = ?1",
6894                params![id],
6895                |r| r.get(0),
6896            )
6897            .unwrap_or(false);
6898        if !exists {
6899            return Ok(false);
6900        }
6901        // Check if ID already exists in active memories to prevent silent overwrite
6902        let active_exists: bool = conn
6903            .query_row(SQL_MEMORY_EXISTS_COUNT, params![id], |r| r.get(0))
6904            .unwrap_or(false);
6905        if active_exists {
6906            // #962 typed envelope — ArchiveRestoreCollision (409).
6907            return Err(anyhow::Error::new(StorageError::ArchiveRestoreCollision {
6908                id: id.to_string(),
6909            }));
6910        }
6911        // Validate archived metadata before restoring
6912        let archived_metadata: String = conn
6913            .query_row(
6914                "SELECT metadata FROM archived_memories WHERE id = ?1",
6915                params![id],
6916                |r| r.get(0),
6917            )
6918            .unwrap_or_else(|_| "{}".to_string());
6919        let meta_value: serde_json::Value =
6920            serde_json::from_str(&archived_metadata).unwrap_or_else(|_| serde_json::json!({}));
6921        if let Err(e) = crate::validate::validate_metadata(&meta_value) {
6922            tracing::warn!("archived memory {id} has invalid metadata, resetting to {{}}: {e}");
6923            conn.execute(
6924                "UPDATE archived_memories SET metadata = '{}' WHERE id = ?1",
6925                params![id],
6926            )?;
6927        }
6928        // FX-C5 — substrate governance pre-write hook parity. Restoring
6929        // an archived row mints a fresh live row via a raw INSERT...SELECT
6930        // that bypasses the `db::insert(..)` tail (which is where the
6931        // SQLite path normally consults `GOVERNANCE_PRE_WRITE`). Without
6932        // this call, an operator's signed governance rule could be
6933        // bypassed by restoring a row whose `(title, namespace)` would
6934        // otherwise be refused on a direct write. Load the archived row
6935        // shaped as a `Memory` and fire the hook BEFORE the INSERT;
6936        // a refusal short-circuits the transaction (outer ROLLBACK).
6937        let candidate = load_archived_as_memory(conn, id)?;
6938        consult_governance_pre_write(&candidate)?;
6939
6940        // v0.6.3.1 P2 (G5) — preserve original tier + expires_at + embedding
6941        // on restore. Pre-v17 rows lost this metadata permanently; the
6942        // migration backfills `original_tier='long'` so they still restore
6943        // as permanent (the prior behavior — no regression for legacy data).
6944        // Live writes from v0.6.3.1 onward round-trip the original tier.
6945        // #1025 (CRITICAL, 2026-05-21) — full v0.7.0 column carry on
6946        // archive→restore. Pre-#1025 the SELECT pulled only 17 columns;
6947        // restored row landed with reflection_depth=0 (DEFAULT),
6948        // memory_kind='observation' (DEFAULT), citations=[] (DEFAULT),
6949        // version=1 (DEFAULT) — silent loss of Form-4/5 provenance.
6950        // COALESCE handles legacy already-archived rows where the
6951        // v49-added columns are NULL.
6952        conn.execute(
6953            "INSERT INTO memories
6954             (id, tier, namespace, title, content, tags, priority, confidence,
6955              source, access_count, created_at, updated_at, last_accessed_at,
6956              expires_at, metadata, embedding, embedding_dim,
6957              reflection_depth, atomised_into, atom_of, memory_kind,
6958              entity_id, persona_version, citations, source_uri, source_span,
6959              confidence_source, confidence_signals, confidence_decayed_at,
6960              mentioned_entity_id, version)
6961             SELECT id, COALESCE(original_tier, 'long'), namespace, title, content,
6962                    tags, priority, confidence, source, access_count, created_at,
6963                    ?1, last_accessed_at, original_expires_at, metadata,
6964                    embedding, embedding_dim,
6965                    COALESCE(reflection_depth, 0),
6966                    atomised_into,
6967                    atom_of,
6968                    COALESCE(memory_kind, 'observation'),
6969                    entity_id, persona_version,
6970                    COALESCE(citations, '[]'),
6971                    source_uri, source_span,
6972                    COALESCE(confidence_source, 'caller_provided'),
6973                    confidence_signals, confidence_decayed_at,
6974                    mentioned_entity_id,
6975                    COALESCE(version, 1)
6976             FROM archived_memories WHERE id = ?2",
6977            params![now, id],
6978        )?;
6979        conn.execute("DELETE FROM archived_memories WHERE id = ?1", params![id])?;
6980        Ok(true)
6981    })();
6982    match result {
6983        Ok(v) => {
6984            conn.execute_batch(connection::SQL_COMMIT)?;
6985            Ok(v)
6986        }
6987        Err(e) => {
6988            let _ = conn.execute_batch(connection::SQL_ROLLBACK);
6989            Err(e)
6990        }
6991    }
6992}
6993
6994/// #940 (security-high, 2026-05-20) — caller-scoped restore variant.
6995/// Mirrors [`restore_archived`] but constrains the INSERT-SELECT to
6996/// rows whose `metadata->'agent_id'` JSON field matches `caller`
6997/// (with the inbox-target carve-out: rows whose
6998/// `metadata->'target_agent_id'` matches `caller` are also
6999/// restorable by the inbox owner, matching the SAL
7000/// [`crate::store::is_visible_to_caller`] visibility predicate).
7001///
7002/// Pre-#940 the only restore variant was owner-blind; any
7003/// authenticated HTTP caller could restore any other owner's
7004/// archived rows back into the live working set via
7005/// `POST /api/v1/archive/{id}/restore`. The postgres SAL branch was
7006/// already QC-P1-fixed (2026-05-20) to pass
7007/// `CallerContext::for_agent(caller)`; the sqlite branch is closed
7008/// by this helper. Returns `Ok(false)` on a non-owner attempt so the
7009/// surface cannot be used to probe other owners' archived ids.
7010pub fn restore_archived_for_caller(conn: &Connection, id: &str, caller: &str) -> Result<bool> {
7011    let now = Utc::now().to_rfc3339();
7012    conn.execute_batch(connection::SQL_BEGIN_IMMEDIATE)?;
7013    let result = (|| -> Result<bool> {
7014        // Owner gate: row must exist AND match the caller (or be an
7015        // inbox-target row whose recipient is the caller, or be a
7016        // legacy unowned row — see archive_memory_for_caller for the
7017        // matching SQL + #940 carve-out rationale).
7018        let owned: bool = conn
7019            .query_row(
7020                "SELECT COUNT(*) > 0 FROM archived_memories \
7021                 WHERE id = ?1 \
7022                   AND ( \
7023                     json_extract(metadata, '$.agent_id') = ?2 OR \
7024                     json_extract(metadata, '$.target_agent_id') = ?2 OR \
7025                     json_extract(metadata, '$.agent_id') IS NULL OR \
7026                     json_extract(metadata, '$.agent_id') = '' \
7027                   )",
7028                params![id, caller],
7029                |r| r.get(0),
7030            )
7031            .unwrap_or(false);
7032        if !owned {
7033            return Ok(false);
7034        }
7035        // Check if ID already exists in active memories to prevent silent overwrite.
7036        let active_exists: bool = conn
7037            .query_row(SQL_MEMORY_EXISTS_COUNT, params![id], |r| r.get(0))
7038            .unwrap_or(false);
7039        if active_exists {
7040            // #962 typed envelope — ArchiveRestoreCollision (409).
7041            return Err(anyhow::Error::new(StorageError::ArchiveRestoreCollision {
7042                id: id.to_string(),
7043            }));
7044        }
7045        // Validate archived metadata before restoring (mirror restore_archived).
7046        let archived_metadata: String = conn
7047            .query_row(
7048                "SELECT metadata FROM archived_memories WHERE id = ?1",
7049                params![id],
7050                |r| r.get(0),
7051            )
7052            .unwrap_or_else(|_| "{}".to_string());
7053        let meta_value: serde_json::Value =
7054            serde_json::from_str(&archived_metadata).unwrap_or_else(|_| serde_json::json!({}));
7055        if let Err(e) = crate::validate::validate_metadata(&meta_value) {
7056            tracing::warn!("archived memory {id} has invalid metadata, resetting to {{}}: {e}");
7057            conn.execute(
7058                "UPDATE archived_memories SET metadata = '{}' WHERE id = ?1",
7059                params![id],
7060            )?;
7061        }
7062        // FX-C5 — substrate governance pre-write hook parity. See the
7063        // matching block in `restore_archived` above for rationale.
7064        // Caller-scoped variant uses the same hook contract — the
7065        // hook is owner-agnostic (it sees the Memory payload, not the
7066        // caller context); ownership gating already happened on the
7067        // SELECT above.
7068        let candidate = load_archived_as_memory(conn, id)?;
7069        consult_governance_pre_write(&candidate)?;
7070        // #1025 (CRITICAL, 2026-05-21) — full v0.7.0 column carry on
7071        // archive→restore. Pre-#1025 the SELECT pulled only 17 columns;
7072        // restored row landed with reflection_depth=0 (DEFAULT),
7073        // memory_kind='observation' (DEFAULT), citations=[] (DEFAULT),
7074        // version=1 (DEFAULT) — silent loss of Form-4/5 provenance.
7075        // COALESCE handles legacy already-archived rows where the
7076        // v49-added columns are NULL.
7077        conn.execute(
7078            "INSERT INTO memories
7079             (id, tier, namespace, title, content, tags, priority, confidence,
7080              source, access_count, created_at, updated_at, last_accessed_at,
7081              expires_at, metadata, embedding, embedding_dim,
7082              reflection_depth, atomised_into, atom_of, memory_kind,
7083              entity_id, persona_version, citations, source_uri, source_span,
7084              confidence_source, confidence_signals, confidence_decayed_at,
7085              mentioned_entity_id, version)
7086             SELECT id, COALESCE(original_tier, 'long'), namespace, title, content,
7087                    tags, priority, confidence, source, access_count, created_at,
7088                    ?1, last_accessed_at, original_expires_at, metadata,
7089                    embedding, embedding_dim,
7090                    COALESCE(reflection_depth, 0),
7091                    atomised_into,
7092                    atom_of,
7093                    COALESCE(memory_kind, 'observation'),
7094                    entity_id, persona_version,
7095                    COALESCE(citations, '[]'),
7096                    source_uri, source_span,
7097                    COALESCE(confidence_source, 'caller_provided'),
7098                    confidence_signals, confidence_decayed_at,
7099                    mentioned_entity_id,
7100                    COALESCE(version, 1)
7101             FROM archived_memories WHERE id = ?2",
7102            params![now, id],
7103        )?;
7104        conn.execute("DELETE FROM archived_memories WHERE id = ?1", params![id])?;
7105        Ok(true)
7106    })();
7107    match result {
7108        Ok(v) => {
7109            conn.execute_batch(connection::SQL_COMMIT)?;
7110            Ok(v)
7111        }
7112        Err(e) => {
7113            let _ = conn.execute_batch(connection::SQL_ROLLBACK);
7114            Err(e)
7115        }
7116    }
7117}
7118
7119/// FX-C5 — load a row from `archived_memories` shaped as a [`Memory`]
7120/// so the substrate `GOVERNANCE_PRE_WRITE` hook can inspect the
7121/// restore candidate BEFORE the live INSERT lands. The archived
7122/// table shares the v0.7.0 column shape with `memories` (#1025) so
7123/// the same `row_to_memory` helper applies; columns absent on legacy
7124/// pre-#1025 archived rows fall through to the same defaults
7125/// `row_to_memory` already applies. The `original_tier` column wins
7126/// over the archive-time `tier` so the candidate hook sees the row
7127/// at the tier it will land at post-restore (matches the SQL the
7128/// caller is about to execute).
7129fn load_archived_as_memory(conn: &Connection, id: &str) -> Result<Memory> {
7130    let mut stmt = conn.prepare(
7131        "SELECT id, COALESCE(original_tier, tier) AS tier, namespace, title, content,
7132                tags, priority, confidence, source, access_count, created_at,
7133                updated_at, last_accessed_at,
7134                COALESCE(original_expires_at, expires_at) AS expires_at, metadata,
7135                COALESCE(reflection_depth, 0) AS reflection_depth,
7136                COALESCE(memory_kind, 'observation') AS memory_kind,
7137                entity_id, persona_version,
7138                COALESCE(citations, '[]') AS citations,
7139                source_uri, source_span,
7140                COALESCE(confidence_source, 'caller_provided') AS confidence_source,
7141                confidence_signals, confidence_decayed_at,
7142                COALESCE(version, 1) AS version
7143         FROM archived_memories WHERE id = ?1",
7144    )?;
7145    let mem = stmt.query_row(params![id], row_to_memory)?;
7146    Ok(mem)
7147}
7148
7149pub fn purge_archive(conn: &Connection, older_than_days: Option<i64>) -> Result<usize> {
7150    match older_than_days {
7151        Some(days) if days < 0 => {
7152            // #962 typed envelope.
7153            return Err(anyhow::Error::new(StorageError::InvalidArgument {
7154                reason: crate::errors::msg::older_than_days_negative(days),
7155            }));
7156        }
7157        Some(days) => {
7158            let cutoff = (Utc::now() - chrono::Duration::days(days)).to_rfc3339();
7159            let deleted = conn.execute(
7160                "DELETE FROM archived_memories WHERE archived_at < ?1",
7161                params![cutoff],
7162            )?;
7163            Ok(deleted)
7164        }
7165        None => {
7166            let deleted = conn.execute("DELETE FROM archived_memories", [])?;
7167            Ok(deleted)
7168        }
7169    }
7170}
7171
7172/// #936 (security-critical, 2026-05-20) — caller-scoped purge variant.
7173/// Mirrors [`purge_archive`] but constrains the DELETE to rows whose
7174/// `metadata->'agent_id'` JSON field matches `caller` (with the
7175/// inbox-target carve-out: rows whose `metadata->'target_agent_id'`
7176/// matches `caller` are also purgeable by the inbox owner, matching
7177/// the SAL [`crate::store::is_visible_to_caller`] visibility
7178/// predicate).
7179///
7180/// Pre-#936 the only purge variant was owner-blind; any authenticated
7181/// HTTP caller could destroy every owner's archive corpus via
7182/// `DELETE /api/v1/archive`. The handler at
7183/// `src/handlers/archive.rs::purge_archive` now resolves the caller
7184/// from `X-Agent-Id` and routes through this owner-scoped variant by
7185/// default; the admin/operator path (full owner-blind wipe) is
7186/// reserved for callers whose `agent_id` appears in the
7187/// `[admin].agent_ids` allowlist and is reached via the SAL trait
7188/// path with `CallerContext::bypass_visibility = true`.
7189///
7190/// Returns the count of rows actually deleted; a non-admin call with
7191/// no matching rows returns `Ok(0)` so the caller cannot enumerate
7192/// other owners' archive corpus via this surface.
7193pub fn purge_archive_for_caller(
7194    conn: &Connection,
7195    caller: &str,
7196    older_than_days: Option<i64>,
7197) -> Result<usize> {
7198    match older_than_days {
7199        Some(days) if days < 0 => {
7200            // #962 typed envelope.
7201            return Err(anyhow::Error::new(StorageError::InvalidArgument {
7202                reason: crate::errors::msg::older_than_days_negative(days),
7203            }));
7204        }
7205        Some(days) => {
7206            let cutoff = (Utc::now() - chrono::Duration::days(days)).to_rfc3339();
7207            let deleted = conn.execute(
7208                "DELETE FROM archived_memories \
7209                 WHERE archived_at < ?1 \
7210                   AND ( \
7211                     json_extract(metadata, '$.agent_id') = ?2 OR \
7212                     json_extract(metadata, '$.target_agent_id') = ?2 \
7213                   )",
7214                params![cutoff, caller],
7215            )?;
7216            Ok(deleted)
7217        }
7218        None => {
7219            let deleted = conn.execute(
7220                "DELETE FROM archived_memories \
7221                 WHERE \
7222                   json_extract(metadata, '$.agent_id') = ?1 OR \
7223                   json_extract(metadata, '$.target_agent_id') = ?1",
7224                params![caller],
7225            )?;
7226            Ok(deleted)
7227        }
7228    }
7229}
7230
7231pub fn archive_stats(conn: &Connection) -> Result<serde_json::Value> {
7232    let total: i64 = conn.query_row("SELECT COUNT(*) FROM archived_memories", [], |r| r.get(0))?;
7233    let mut stmt = conn.prepare(
7234        "SELECT namespace, COUNT(*) FROM archived_memories GROUP BY namespace ORDER BY COUNT(*) DESC",
7235    )?;
7236    let by_ns: Vec<serde_json::Value> = stmt
7237        .query_map([], |row| {
7238            Ok(serde_json::json!({
7239                "namespace": row.get::<_, String>(0)?,
7240                "count": row.get::<_, i64>(1)?,
7241            }))
7242        })?
7243        .collect::<rusqlite::Result<Vec<_>>>()?;
7244    Ok(serde_json::json!({
7245        "archived_total": total,
7246        (field_names::BY_NAMESPACE): by_ns,
7247    }))
7248}
7249
7250pub fn export_all(conn: &Connection) -> Result<Vec<Memory>> {
7251    let now = Utc::now().to_rfc3339();
7252    let mut stmt = conn.prepare(
7253        "SELECT * FROM memories WHERE expires_at IS NULL OR expires_at > ?1 ORDER BY created_at ASC",
7254    )?;
7255    let rows = stmt.query_map(params![now], row_to_memory)?;
7256    rows.collect::<rusqlite::Result<Vec<_>>>()
7257        .map_err(Into::into)
7258}
7259
7260pub fn export_links(conn: &Connection) -> Result<Vec<MemoryLink>> {
7261    let now = Utc::now().to_rfc3339();
7262    // v0.7 H3 — also pull the signature blob, the `observed_by` claim,
7263    // and the temporal-validity columns. Federation peers consume these
7264    // through `verify::verify` to gate inbound replication; legacy
7265    // unsigned rows surface NULL for `signature` / `observed_by` and
7266    // the inbound path falls back to `attest_level = "unsigned"`.
7267    let mut stmt = conn.prepare(
7268        "SELECT ml.source_id, ml.target_id, ml.relation, ml.created_at,
7269                ml.signature, ml.observed_by, ml.valid_from, ml.valid_until
7270         FROM memory_links ml
7271         JOIN memories ms ON ms.id = ml.source_id AND (ms.expires_at IS NULL OR ms.expires_at > ?1)
7272         JOIN memories mt ON mt.id = ml.target_id AND (mt.expires_at IS NULL OR mt.expires_at > ?1)",
7273    )?;
7274    let rows = stmt.query_map(params![now], |row| {
7275        let relation_str: String = row.get(2)?;
7276        Ok(MemoryLink {
7277            source_id: row.get(0)?,
7278            target_id: row.get(1)?,
7279            // v0.7.0 fix campaign R1-M4 — see `get_links` for rationale.
7280            relation: crate::models::MemoryLinkRelation::from_str(&relation_str)
7281                .unwrap_or_default(),
7282            created_at: row.get(3)?,
7283            signature: row.get::<_, Option<Vec<u8>>>(4)?,
7284            observed_by: row.get::<_, Option<String>>(5)?,
7285            valid_from: row.get::<_, Option<String>>(6)?,
7286            valid_until: row.get::<_, Option<String>>(7)?,
7287            // v0.7.0 #860 — `export_links` is the federation outbound
7288            // path; the wire shape stays without `attest_level` so
7289            // pre-v0.7 receivers do not see an unknown field. Leaving
7290            // this `None` keeps `skip_serializing_if` from emitting it.
7291            attest_level: None,
7292        })
7293    })?;
7294    rows.collect::<rusqlite::Result<Vec<_>>>()
7295        .map_err(Into::into)
7296}
7297
7298/// Insert with timestamp-aware conflict resolution for sync.
7299/// Only overwrites if the incoming memory is newer (by `updated_at`,
7300/// tiebroken by memory.id for a total order across peers —
7301/// ultrareview #344, #345).
7302///
7303/// Rationale: ISO 8601 / RFC 3339 strings compare lexicographically
7304/// as long as all timestamps carry consistent precision + Z suffix.
7305/// Equal timestamps (common when two nodes edit in the same ms, or
7306/// when NTP aligns clocks) previously produced non-deterministic
7307/// winners per peer, causing permanent mesh divergence. Adding the
7308/// memory.id tiebreaker yields a total order every peer agrees on.
7309pub fn insert_if_newer(conn: &Connection, mem: &Memory) -> Result<String> {
7310    // v0.7.0 L1-6 Deliverable E — substrate governance pre-write
7311    // gate. Federation `sync_push` / catchup-loop peer pushes flow
7312    // through this entry point; treating them identically to direct
7313    // writes is the load-bearing property — an agent that bypasses
7314    // a local rule by routing through a peer would otherwise slip
7315    // past the gate. The hook fires on every newer-wins merge attempt.
7316    consult_governance_pre_write(mem)?;
7317
7318    let tags_json = serde_json::to_string(&mem.tags)?;
7319    let metadata_json = serde_json::to_string(&mem.metadata)?;
7320    // v0.7.0 Form 4 — encode citations + source_span for the schema
7321    // v38 TEXT columns on the federation merge path. The newer-wins
7322    // CASE clauses below pick `excluded.citations` only when the
7323    // incoming row is the winner; otherwise the existing row's
7324    // citations are preserved.
7325    let citations_json = serde_json::to_string(&mem.citations)?;
7326    let source_span_json = match mem.source_span {
7327        Some(span) => Some(serde_json::to_string(&span)?),
7328        None => None,
7329    };
7330    // v0.7.0 Form 5 — encode confidence-provenance fields for the
7331    // schema v39 TEXT columns on the federation merge path. The
7332    // newer-wins CASE clauses pick `excluded.confidence_source` only
7333    // when the incoming row wins the tiebreak; otherwise the local
7334    // row's confidence provenance is preserved.
7335    let confidence_signals_json = match &mem.confidence_signals {
7336        Some(s) => Some(serde_json::to_string(s)?),
7337        None => None,
7338    };
7339    // v0.7.0 polish PERF-8 (#781) — denormalised mention tag for the
7340    // federation `insert_if_newer` merge path. The newer-wins CASE
7341    // clause picks the winner's mentioned_entity_id when the incoming
7342    // row wins the tiebreak; otherwise the local row's tag is preserved
7343    // so a stale peer cannot blank out a value the matcher's index
7344    // depends on.
7345    let mentioned_entity_id = extract_mentioned_entity_id(mem);
7346    // #1579 B6 — federation catch-up replays this newer-wins upsert
7347    // once per pulled row; `prepare_cached` amortises the parse of the
7348    // largest SQL statement in the file across the whole batch.
7349    let mut newer_wins_stmt = conn.prepare_cached(
7350        "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, confidence, source, access_count, created_at, updated_at, last_accessed_at, expires_at, metadata, reflection_depth, memory_kind, entity_id, persona_version, citations, source_uri, source_span, confidence_source, confidence_signals, confidence_decayed_at, mentioned_entity_id, version)
7351         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19, ?20, ?21, ?22, ?23, ?24, ?25, ?26, ?27)
7352         ON CONFLICT(title, namespace) DO UPDATE SET
7353            content = CASE WHEN excluded.updated_at > memories.updated_at
7354                             OR (excluded.updated_at = memories.updated_at AND excluded.id > memories.id)
7355                           THEN excluded.content ELSE memories.content END,
7356            tags = CASE WHEN excluded.updated_at > memories.updated_at
7357                          OR (excluded.updated_at = memories.updated_at AND excluded.id > memories.id)
7358                        THEN excluded.tags ELSE memories.tags END,
7359            priority = MAX(memories.priority, excluded.priority),
7360            confidence = MAX(memories.confidence, excluded.confidence),
7361            source = CASE WHEN excluded.updated_at > memories.updated_at
7362                            OR (excluded.updated_at = memories.updated_at AND excluded.id > memories.id)
7363                          THEN excluded.source ELSE memories.source END,
7364            tier = CASE WHEN excluded.tier = 'long' THEN 'long'
7365                        WHEN memories.tier = 'long' THEN 'long'
7366                        WHEN excluded.tier = 'mid' THEN 'mid'
7367                        ELSE memories.tier END,
7368            updated_at = MAX(memories.updated_at, excluded.updated_at),
7369            access_count = MAX(memories.access_count, excluded.access_count),
7370            expires_at = CASE WHEN excluded.tier = 'long' OR memories.tier = 'long' THEN NULL
7371                              ELSE COALESCE(excluded.expires_at, memories.expires_at) END,
7372            -- Preserve metadata.agent_id across newer-wins merge (NHI provenance immutable).
7373            metadata = CASE
7374                WHEN json_extract(memories.metadata, '$.agent_id') IS NOT NULL
7375                THEN json_set(
7376                    CASE WHEN excluded.updated_at > memories.updated_at
7377                              OR (excluded.updated_at = memories.updated_at AND excluded.id > memories.id)
7378                         THEN excluded.metadata
7379                         ELSE memories.metadata END,
7380                    '$.agent_id',
7381                    json_extract(memories.metadata, '$.agent_id')
7382                )
7383                ELSE CASE WHEN excluded.updated_at > memories.updated_at
7384                               OR (excluded.updated_at = memories.updated_at AND excluded.id > memories.id)
7385                          THEN excluded.metadata
7386                          ELSE memories.metadata END
7387            END,
7388            -- v0.7.0 Task 1/8 — recursion depth takes max so the reflection
7389            -- signal isn't lost on newer-wins federation merges.
7390            reflection_depth = MAX(memories.reflection_depth, excluded.reflection_depth),
7391            -- v0.7.0 L1-1 — kind is sticky across federation merges: a
7392            -- reflection row must not be downgraded to observation by a
7393            -- newer-wins merge from a peer that doesn't know about the kind.
7394            -- v0.7.0 QW-2 — Persona is similarly sticky.
7395            memory_kind = CASE WHEN memories.memory_kind = 'reflection' THEN 'reflection'
7396                               WHEN memories.memory_kind = 'persona' THEN 'persona'
7397                               ELSE excluded.memory_kind END,
7398            -- v0.7.0 QW-2 — entity_id + persona_version are immutable
7399            -- once set so a federation merge can't drop the persona
7400            -- discriminator off a `memory_kind = 'persona'` row.
7401            entity_id = COALESCE(memories.entity_id, excluded.entity_id),
7402            persona_version = COALESCE(memories.persona_version, excluded.persona_version),
7403            -- v0.7.0 Form 4 — fact-provenance: replace the stored
7404            -- citations array only when the incoming row wins the
7405            -- newer-wins tiebreak; source_uri / source_span follow
7406            -- COALESCE semantics so a federation merge that lacks
7407            -- provenance does not blank out a value the local row
7408            -- already had.
7409            citations = CASE WHEN excluded.updated_at > memories.updated_at
7410                                  OR (excluded.updated_at = memories.updated_at AND excluded.id > memories.id)
7411                             THEN excluded.citations ELSE memories.citations END,
7412            source_uri = COALESCE(excluded.source_uri, memories.source_uri),
7413            source_span = COALESCE(excluded.source_span, memories.source_span),
7414            -- v0.7.0 Form 5 — confidence-provenance follows the newer-
7415            -- wins shape established for the other Form 4 columns.
7416            -- A peer pushing an auto-derived/calibrated value wins on
7417            -- the timestamp tiebreak; otherwise the local row's
7418            -- provenance is preserved so a stale peer cannot blank out
7419            -- a fresher local calibration.
7420            confidence_source = CASE WHEN excluded.updated_at > memories.updated_at
7421                                          OR (excluded.updated_at = memories.updated_at AND excluded.id > memories.id)
7422                                     THEN excluded.confidence_source ELSE memories.confidence_source END,
7423            confidence_signals = CASE WHEN excluded.updated_at > memories.updated_at
7424                                           OR (excluded.updated_at = memories.updated_at AND excluded.id > memories.id)
7425                                      THEN excluded.confidence_signals ELSE memories.confidence_signals END,
7426            confidence_decayed_at = CASE WHEN excluded.updated_at > memories.updated_at
7427                                              OR (excluded.updated_at = memories.updated_at AND excluded.id > memories.id)
7428                                         THEN excluded.confidence_decayed_at ELSE memories.confidence_decayed_at END,
7429            -- v0.7.0 polish PERF-8 (#781) — newer-wins on the mention
7430            -- tag (the winning row's content is the one a future matcher
7431            -- query expects to find); otherwise preserve the local tag
7432            -- so a stale peer that lacks the structured entity_id
7433            -- metadata cannot blank out a value the index serves.
7434            mentioned_entity_id = CASE WHEN excluded.updated_at > memories.updated_at
7435                                            OR (excluded.updated_at = memories.updated_at AND excluded.id > memories.id)
7436                                       THEN COALESCE(excluded.mentioned_entity_id, memories.mentioned_entity_id)
7437                                       ELSE memories.mentioned_entity_id END,
7438            -- #1631 (decide-once, #1029 contract) — `version` IS
7439            -- replicated state on the federation merge path: merge via
7440            -- MAX(local, remote) so an out-of-order peer push can't
7441            -- roll the Gap-1 optimistic-concurrency counter backwards.
7442            -- Matches the pg `apply_remote_memory` GREATEST arm.
7443            version = MAX(memories.version, excluded.version)
7444         RETURNING id",
7445    )?;
7446    let actual_id: String = newer_wins_stmt.query_row(
7447        params![
7448            mem.id,
7449            mem.tier.as_str(),
7450            mem.namespace,
7451            mem.title,
7452            mem.content,
7453            tags_json,
7454            mem.priority,
7455            mem.confidence,
7456            mem.source,
7457            mem.access_count,
7458            mem.created_at,
7459            mem.updated_at,
7460            mem.last_accessed_at,
7461            mem.effective_expires_at(),
7462            metadata_json,
7463            mem.reflection_depth,
7464            mem.memory_kind.as_str(),
7465            mem.entity_id,
7466            mem.persona_version,
7467            citations_json,
7468            mem.source_uri,
7469            source_span_json,
7470            mem.confidence_source.as_str(),
7471            confidence_signals_json,
7472            mem.confidence_decayed_at,
7473            mentioned_entity_id,
7474            mem.version,
7475        ],
7476        |r| r.get(0),
7477    )?;
7478    Ok(actual_id)
7479}
7480
7481// --- Embedding support ---
7482
7483/// v0.6.3.1 P2 (G4): error returned by `set_embedding` when a write would
7484/// introduce a new embedding dimensionality into a namespace that has already
7485/// established one via an earlier write. Surfaced as a typed error so the
7486/// MCP/HTTP handlers can map it to a 409 Conflict rather than letting cosine
7487/// silently return 0.0 on every subsequent recall.
7488#[derive(Debug)]
7489pub struct EmbeddingDimMismatch {
7490    pub namespace: String,
7491    pub established: usize,
7492    pub attempted: usize,
7493}
7494
7495impl std::fmt::Display for EmbeddingDimMismatch {
7496    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
7497        write!(
7498            f,
7499            "embedding dim mismatch in namespace '{}': established {}-dim, refused {}-dim write",
7500            self.namespace, self.established, self.attempted
7501        )
7502    }
7503}
7504
7505impl std::error::Error for EmbeddingDimMismatch {}
7506
7507/// Lookup the embedding dimensionality already established for `namespace`.
7508/// Returns `Ok(None)` when no row in that namespace has an embedding yet.
7509///
7510/// # Errors
7511///
7512/// Returns the underlying SQLite error.
7513pub fn namespace_embedding_dim(conn: &Connection, namespace: &str) -> Result<Option<usize>> {
7514    // Use the v17 idx_memories_ns_dim partial index.
7515    let dim: Option<i64> = conn
7516        .query_row(
7517            "SELECT embedding_dim FROM memories \
7518             WHERE namespace = ?1 AND embedding_dim IS NOT NULL \
7519             LIMIT 1",
7520            params![namespace],
7521            |r| r.get(0),
7522        )
7523        .ok();
7524    Ok(dim.and_then(|d| usize::try_from(d).ok()))
7525}
7526
7527/// Count rows whose stored `embedding_dim` does not match what the BLOB
7528/// contains (or where the column is missing while a BLOB exists). Surfaced
7529/// in `Stats::dim_violations` and consumed by P7 doctor.
7530///
7531/// # Errors
7532///
7533/// Returns the underlying SQLite error.
7534pub fn dim_violations(conn: &Connection) -> Result<u64> {
7535    // The expression `length(embedding)` returns the BLOB length; we map
7536    // legacy (no-header) payloads to `length/4` and headed (v17+) payloads
7537    // to `(length-1)/4` because length parity tells us which form is on
7538    // disk. Both forms must match the declared `embedding_dim` column.
7539    let n: i64 = conn
7540        .query_row(
7541            "SELECT COUNT(*) FROM memories \
7542             WHERE embedding IS NOT NULL \
7543               AND length(embedding) >= 4 \
7544               AND ( \
7545                   embedding_dim IS NULL \
7546                   OR ( \
7547                       (length(embedding) % 4 = 0 AND embedding_dim != length(embedding)/4) \
7548                       OR (length(embedding) % 4 = 1 AND embedding_dim != (length(embedding)-1)/4) \
7549                       OR (length(embedding) % 4 NOT IN (0,1)) \
7550                   ) \
7551               )",
7552            [],
7553            |r| r.get(0),
7554        )
7555        .unwrap_or(0);
7556    Ok(u64::try_from(n).unwrap_or(0))
7557}
7558
7559/// #1595/#1598 — the single embedding-UPDATE statement (headed blob +
7560/// declared dim), shared by [`set_embedding`], [`set_embeddings_batch`]
7561/// and [`set_embeddings_batch_reembed`] so the write shape cannot
7562/// drift between the checked and replace-semantics writers.
7563const SQL_UPDATE_EMBEDDING_WITH_DIM: &str =
7564    "UPDATE memories SET embedding = ?1, embedding_dim = ?2 WHERE id = ?3";
7565/// Degenerate empty-vector sibling of [`SQL_UPDATE_EMBEDDING_WITH_DIM`]
7566/// (legacy parity: empty embeddings persist with `embedding_dim = NULL`).
7567const SQL_UPDATE_EMBEDDING_NULL_DIM: &str =
7568    "UPDATE memories SET embedding = ?1, embedding_dim = NULL WHERE id = ?2";
7569
7570/// Store an embedding vector for a memory.
7571///
7572/// v0.6.3.1 P2 — writes are now headed with the magic byte (`encode_embedding_blob`)
7573/// and the namespace's first established dim is enforced. A dim mismatch
7574/// returns a typed [`EmbeddingDimMismatch`] surfaced as a 409 by the handler
7575/// layer. The same call also persists `embedding_dim` so future stats /
7576/// doctor passes don't re-derive from BLOB length.
7577///
7578/// # Errors
7579///
7580/// Returns [`EmbeddingDimMismatch`] (boxed via anyhow) when the embedding's
7581/// dimensionality differs from what the namespace established, or the
7582/// underlying SQLite error on failure.
7583pub fn set_embedding(conn: &Connection, id: &str, embedding: &[f32]) -> Result<()> {
7584    // Resolve namespace + check the dim invariant before mutating.
7585    let namespace: Option<String> = conn
7586        .query_row(
7587            "SELECT namespace FROM memories WHERE id = ?1",
7588            params![id],
7589            |r| r.get(0),
7590        )
7591        .ok();
7592    let attempted = embedding.len();
7593    if attempted == 0 {
7594        // Empty embeddings are a degenerate case — earlier code accepted
7595        // them; preserve that to avoid breaking legacy tests but skip the
7596        // dim check.
7597        let bytes = crate::embeddings::encode_embedding_blob(embedding);
7598        conn.execute(SQL_UPDATE_EMBEDDING_NULL_DIM, params![bytes, id])?;
7599        return Ok(());
7600    }
7601    if let Some(ref ns) = namespace
7602        && let Some(established) = namespace_embedding_dim(conn, ns)?
7603        && established != attempted
7604    {
7605        return Err(EmbeddingDimMismatch {
7606            namespace: ns.clone(),
7607            established,
7608            attempted,
7609        }
7610        .into());
7611    }
7612    let bytes = crate::embeddings::encode_embedding_blob(embedding);
7613    let dim_i64 = i64::try_from(attempted).unwrap_or(i64::MAX);
7614    conn.execute(SQL_UPDATE_EMBEDDING_WITH_DIM, params![bytes, dim_i64, id])?;
7615    Ok(())
7616}
7617
7618/// v0.7.0 Wave-2 A5 (issue #853) — batched embedding writer.
7619///
7620/// Writes a slice of `(id, embedding)` pairs inside a single SQLite
7621/// transaction. Equivalent to calling [`set_embedding`] in a loop, but
7622/// collapses N `UPDATE` round-trips (N implicit commits in autocommit
7623/// mode) into one transaction commit, which is the dominant cost on
7624/// SQLite WAL when N grows past a handful of rows.
7625///
7626/// Dim-invariant policy matches [`set_embedding`]:
7627/// * Empty embeddings are written as `embedding_dim = NULL` (legacy
7628///   degenerate-case parity).
7629/// * Per-namespace established dim is checked once per namespace
7630///   (cached in-flight) and any pair whose embedding length conflicts
7631///   returns an `EmbeddingDimMismatch` error — the whole transaction
7632///   rolls back so callers never see a partial commit. The mismatch
7633///   carries the FIRST offending pair's namespace/established/attempted
7634///   triple (consistent with the single-row path).
7635///
7636/// Returns the number of rows updated (rows whose `id` was not found in
7637/// the `memories` table are silently skipped — same as [`set_embedding`],
7638/// where `UPDATE … WHERE id = ?` returns `Ok(0)` and the function still
7639/// returns `Ok(())`).
7640///
7641/// **Boot backfill use:** [`crate::mcp::run_mcp_server`] calls this in
7642/// fixed-size chunks (see `DEFAULT_EMBED_BACKFILL_BATCH_SIZE`) so the
7643/// embedder produces vectors in parallel-friendly bursts and the
7644/// SQLite commit cost amortises across the batch.
7645///
7646/// # Errors
7647///
7648/// * Returns [`EmbeddingDimMismatch`] (boxed via anyhow) if any pair's
7649///   embedding dim disagrees with the namespace-established dim. The
7650///   transaction is rolled back; no rows are mutated.
7651/// * Returns the underlying SQLite error on transaction/prepare/execute
7652///   failure.
7653pub fn set_embeddings_batch(
7654    conn: &mut Connection,
7655    entries: &[(String, Vec<f32>)],
7656) -> Result<usize> {
7657    if entries.is_empty() {
7658        return Ok(0);
7659    }
7660
7661    // Lookup table: id -> namespace. Needed up-front because we want
7662    // to amortise the dim-check across a batch by resolving namespaces
7663    // in a single query rather than one query per row.
7664    let mut ns_by_id: HashMap<String, Option<String>> = HashMap::with_capacity(entries.len());
7665    {
7666        let mut stmt = conn.prepare("SELECT namespace FROM memories WHERE id = ?1")?;
7667        for (id, _) in entries {
7668            if ns_by_id.contains_key(id) {
7669                continue;
7670            }
7671            let ns: Option<String> = stmt
7672                .query_row(params![id], |r| r.get::<_, Option<String>>(0))
7673                .ok()
7674                .flatten();
7675            ns_by_id.insert(id.clone(), ns);
7676        }
7677    }
7678
7679    // Per-namespace established dim, cached so we only hit the
7680    // namespace_embedding_dim path once per distinct namespace in the
7681    // batch (the cache is intra-batch — the namespace's established
7682    // dim is immutable within this call's transaction window).
7683    let mut ns_dim_cache: HashMap<String, Option<usize>> = HashMap::new();
7684
7685    let tx = conn.transaction()?;
7686    {
7687        let mut update = tx.prepare(SQL_UPDATE_EMBEDDING_WITH_DIM)?;
7688        let mut update_empty = tx.prepare(SQL_UPDATE_EMBEDDING_NULL_DIM)?;
7689
7690        let mut rows_updated = 0usize;
7691        for (id, embedding) in entries {
7692            let attempted = embedding.len();
7693            if attempted == 0 {
7694                let bytes = crate::embeddings::encode_embedding_blob(embedding);
7695                rows_updated += update_empty.execute(params![bytes, id])?;
7696                continue;
7697            }
7698            if let Some(Some(ns)) = ns_by_id.get(id) {
7699                let established = if let Some(cached) = ns_dim_cache.get(ns) {
7700                    *cached
7701                } else {
7702                    let resolved = namespace_embedding_dim(&tx, ns)?;
7703                    ns_dim_cache.insert(ns.clone(), resolved);
7704                    resolved
7705                };
7706                if let Some(established) = established
7707                    && established != attempted
7708                {
7709                    return Err(EmbeddingDimMismatch {
7710                        namespace: ns.clone(),
7711                        established,
7712                        attempted,
7713                    }
7714                    .into());
7715                }
7716                // First successful write in a namespace sets the
7717                // established dim for the rest of this batch — keep
7718                // the cache in sync so subsequent rows in the same
7719                // namespace get a fast path AND so any inconsistent
7720                // pair later in the batch trips the dim check rather
7721                // than committing.
7722                if established.is_none() {
7723                    ns_dim_cache.insert(ns.clone(), Some(attempted));
7724                }
7725            }
7726            let bytes = crate::embeddings::encode_embedding_blob(embedding);
7727            let dim_i64 = i64::try_from(attempted).unwrap_or(i64::MAX);
7728            rows_updated += update.execute(params![bytes, dim_i64, id])?;
7729        }
7730
7731        drop(update);
7732        drop(update_empty);
7733        tx.commit()?;
7734        Ok(rows_updated)
7735    }
7736}
7737
7738/// Load an embedding vector for a memory. Returns None if not set.
7739///
7740/// v0.6.3.1 P2 — tolerant of legacy unheaded payloads (raw LE f32, length
7741/// `4n`) and v17 headed payloads (`0x01` + `4n` bytes). Anything else returns
7742/// an error so the caller can surface a typed corruption signal.
7743///
7744/// # Errors
7745///
7746/// Returns [`EmbeddingFormatError`](crate::embeddings::EmbeddingFormatError)
7747/// when the on-disk BLOB is malformed.
7748pub fn get_embedding(conn: &Connection, id: &str) -> Result<Option<Vec<f32>>> {
7749    let result: Option<Vec<u8>> = conn
7750        .query_row(
7751            "SELECT embedding FROM memories WHERE id = ?1",
7752            params![id],
7753            |row| row.get(0),
7754        )
7755        .ok();
7756    match result {
7757        Some(bytes) if !bytes.is_empty() => {
7758            let floats = crate::embeddings::decode_embedding_blob(&bytes)?;
7759            Ok(Some(floats))
7760        }
7761        _ => Ok(None),
7762    }
7763}
7764
7765/// Get all memory IDs that are missing embeddings.
7766///
7767/// #1579 B6 (F5.6): unbounded — materialises every `(id, title,
7768/// content)` triple in one `Vec`, which on a large backlog is the
7769/// whole corpus in memory. Hot loops (the embed-backfill sweep) should
7770/// use [`get_unembedded_ids_batch`] and drain in bounded passes; this
7771/// variant remains for callers that need the full snapshot semantics.
7772pub fn get_unembedded_ids(conn: &Connection) -> Result<Vec<(String, String, String)>> {
7773    let mut stmt =
7774        conn.prepare("SELECT id, title, content FROM memories WHERE embedding IS NULL")?;
7775    let rows = stmt.query_map([], |row| {
7776        Ok((
7777            row.get::<_, String>(0)?,
7778            row.get::<_, String>(1)?,
7779            row.get::<_, String>(2)?,
7780        ))
7781    })?;
7782    rows.collect::<rusqlite::Result<Vec<_>>>()
7783        .map_err(Into::into)
7784}
7785
7786/// #1579 B6 (F5.6) — bounded variant of [`get_unembedded_ids`].
7787///
7788/// Returns at most `limit` `(id, title, content)` triples so the
7789/// caller's materialisation is bounded by its batch size (the
7790/// `AI_MEMORY_EMBED_BACKFILL_BATCH` resolver semantics) instead of the
7791/// whole unembedded backlog. There is deliberately NO OFFSET: rows
7792/// that gain an embedding drop out of the `embedding IS NULL`
7793/// predicate, so callers drain by re-fetching until the returned batch
7794/// is empty (or stops shrinking — rows whose embedding persistently
7795/// fails stay at the head of the scan).
7796pub fn get_unembedded_ids_batch(
7797    conn: &Connection,
7798    limit: usize,
7799) -> Result<Vec<(String, String, String)>> {
7800    let mut stmt = conn.prepare_cached(
7801        "SELECT id, title, content FROM memories WHERE embedding IS NULL LIMIT ?1",
7802    )?;
7803    let rows = stmt.query_map(params![limit], |row| {
7804        Ok((
7805            row.get::<_, String>(0)?,
7806            row.get::<_, String>(1)?,
7807            row.get::<_, String>(2)?,
7808        ))
7809    })?;
7810    rows.collect::<rusqlite::Result<Vec<_>>>()
7811        .map_err(Into::into)
7812}
7813
7814/// #1595 — keyset-paginated variant of [`get_unembedded_ids_batch`].
7815///
7816/// Returns at most `limit` `(id, title, content)` triples whose `id`
7817/// sorts strictly AFTER `after_id` (or from the start when `None`),
7818/// in `id` order. The resilient backfill sweep advances its cursor
7819/// past every processed row — embedded OR skipped — so a poison row
7820/// (over-context-length content, transient embedder fault) can no
7821/// longer pin the scan head and starve the rest of the backlog (the
7822/// pre-fix `LIMIT`-only fetch re-returned persistently-failing rows
7823/// forever, and the no-progress guard then stopped the whole sweep
7824/// with 0 rows backfilled).
7825///
7826/// Two distinct prepared shapes (with / without the cursor predicate)
7827/// rather than the non-sargable `(?1 IS NULL OR id > ?1)` form, per
7828/// the v55/v56 sargability discipline.
7829///
7830/// # Errors
7831///
7832/// Returns the underlying SQLite error.
7833pub fn get_unembedded_ids_batch_after(
7834    conn: &Connection,
7835    after_id: Option<&str>,
7836    limit: usize,
7837) -> Result<Vec<(String, String, String)>> {
7838    let map_row = |row: &rusqlite::Row<'_>| {
7839        Ok((
7840            row.get::<_, String>(0)?,
7841            row.get::<_, String>(1)?,
7842            row.get::<_, String>(2)?,
7843        ))
7844    };
7845    let rows = if let Some(after) = after_id {
7846        let mut stmt = conn.prepare_cached(
7847            "SELECT id, title, content FROM memories \
7848             WHERE embedding IS NULL AND id > ?1 ORDER BY id LIMIT ?2",
7849        )?;
7850        let rows = stmt.query_map(params![after, limit], map_row)?;
7851        rows.collect::<rusqlite::Result<Vec<_>>>()?
7852    } else {
7853        let mut stmt = conn.prepare_cached(
7854            "SELECT id, title, content FROM memories \
7855             WHERE embedding IS NULL ORDER BY id LIMIT ?1",
7856        )?;
7857        let rows = stmt.query_map(params![limit], map_row)?;
7858        rows.collect::<rusqlite::Result<Vec<_>>>()?
7859    };
7860    Ok(rows)
7861}
7862
7863/// #1598 — keyset-paginated scan over ALL live memories (embedded or
7864/// not), optionally namespace-filtered, for the `ai-memory reembed`
7865/// full-corpus sweep. Same cursor semantics as
7866/// [`get_unembedded_ids_batch_after`]: at most `limit` `(id, title,
7867/// content)` triples with `id` strictly after `after_id`, in `id`
7868/// order. Four distinct prepared shapes (namespace × cursor) keep the
7869/// scan sargable (v55/v56 discipline).
7870///
7871/// # Errors
7872///
7873/// Returns the underlying SQLite error.
7874pub fn get_memory_texts_batch(
7875    conn: &Connection,
7876    namespace: Option<&str>,
7877    after_id: Option<&str>,
7878    limit: usize,
7879) -> Result<Vec<(String, String, String)>> {
7880    let map_row = |row: &rusqlite::Row<'_>| {
7881        Ok((
7882            row.get::<_, String>(0)?,
7883            row.get::<_, String>(1)?,
7884            row.get::<_, String>(2)?,
7885        ))
7886    };
7887    let rows = match (namespace, after_id) {
7888        (Some(ns), Some(after)) => {
7889            let mut stmt = conn.prepare_cached(
7890                "SELECT id, title, content FROM memories \
7891                 WHERE namespace = ?1 AND id > ?2 ORDER BY id LIMIT ?3",
7892            )?;
7893            let rows = stmt.query_map(params![ns, after, limit], map_row)?;
7894            rows.collect::<rusqlite::Result<Vec<_>>>()?
7895        }
7896        (Some(ns), None) => {
7897            let mut stmt = conn.prepare_cached(
7898                "SELECT id, title, content FROM memories \
7899                 WHERE namespace = ?1 ORDER BY id LIMIT ?2",
7900            )?;
7901            let rows = stmt.query_map(params![ns, limit], map_row)?;
7902            rows.collect::<rusqlite::Result<Vec<_>>>()?
7903        }
7904        (None, Some(after)) => {
7905            let mut stmt = conn.prepare_cached(
7906                "SELECT id, title, content FROM memories \
7907                 WHERE id > ?1 ORDER BY id LIMIT ?2",
7908            )?;
7909            let rows = stmt.query_map(params![after, limit], map_row)?;
7910            rows.collect::<rusqlite::Result<Vec<_>>>()?
7911        }
7912        (None, None) => {
7913            let mut stmt = conn
7914                .prepare_cached("SELECT id, title, content FROM memories ORDER BY id LIMIT ?1")?;
7915            let rows = stmt.query_map(params![limit], map_row)?;
7916            rows.collect::<rusqlite::Result<Vec<_>>>()?
7917        }
7918    };
7919    Ok(rows)
7920}
7921
7922/// #1598 — REPLACE-semantics sibling of [`set_embeddings_batch`] for
7923/// the `ai-memory reembed` vector-space migration.
7924///
7925/// Identical single-transaction write shape, but it deliberately does
7926/// NOT enforce the per-namespace established-dim invariant: re-embed
7927/// is exactly the tool that migrates a namespace from one model/dim to
7928/// another, so mid-run the namespace legitimately holds mixed dims
7929/// (the H7 recall read-guards skip dim-mismatched vectors during the
7930/// transition, and the sweep converges every row to the target dim).
7931/// Every other caller MUST keep using [`set_embeddings_batch`] — the
7932/// G4 invariant is what stops a misconfigured writer from silently
7933/// zeroing cosine scores.
7934///
7935/// Returns the number of rows updated (unknown ids are skipped, same
7936/// as the checked sibling).
7937///
7938/// # Errors
7939///
7940/// Returns the underlying SQLite transaction / statement error.
7941pub fn set_embeddings_batch_reembed(
7942    conn: &mut Connection,
7943    entries: &[(String, Vec<f32>)],
7944) -> Result<usize> {
7945    if entries.is_empty() {
7946        return Ok(0);
7947    }
7948    let tx = conn.transaction()?;
7949    let mut rows_updated = 0usize;
7950    {
7951        let mut update = tx.prepare(SQL_UPDATE_EMBEDDING_WITH_DIM)?;
7952        let mut update_empty = tx.prepare(SQL_UPDATE_EMBEDDING_NULL_DIM)?;
7953        for (id, embedding) in entries {
7954            let bytes = crate::embeddings::encode_embedding_blob(embedding);
7955            if embedding.is_empty() {
7956                // Legacy degenerate-case parity with `set_embedding`.
7957                rows_updated += update_empty.execute(params![bytes, id])?;
7958            } else {
7959                let dim_i64 = i64::try_from(embedding.len()).unwrap_or(i64::MAX);
7960                rows_updated += update.execute(params![bytes, dim_i64, id])?;
7961            }
7962        }
7963    }
7964    tx.commit()?;
7965    Ok(rows_updated)
7966}
7967
7968/// #1598 — `(total_rows, rows_with_embeddings)` for the reembed
7969/// dry-run plan, optionally namespace-filtered. `COUNT(embedding)`
7970/// counts non-NULL values, so the missing count is the difference.
7971///
7972/// # Errors
7973///
7974/// Returns the underlying SQLite error.
7975pub fn embedding_coverage(conn: &Connection, namespace: Option<&str>) -> Result<(u64, u64)> {
7976    let (total, embedded): (i64, i64) = if let Some(ns) = namespace {
7977        conn.query_row(
7978            "SELECT COUNT(*), COUNT(embedding) FROM memories WHERE namespace = ?1",
7979            params![ns],
7980            |r| Ok((r.get(0)?, r.get(1)?)),
7981        )?
7982    } else {
7983        conn.query_row("SELECT COUNT(*), COUNT(embedding) FROM memories", [], |r| {
7984            Ok((r.get(0)?, r.get(1)?))
7985        })?
7986    };
7987    Ok((
7988        u64::try_from(total).unwrap_or(0),
7989        u64::try_from(embedded).unwrap_or(0),
7990    ))
7991}
7992
7993/// #1598 — distinct embedding dimensionalities currently stored,
7994/// optionally namespace-filtered, for the reembed pre-flight banner
7995/// (the loud "old dims vs target dim" disclosure before a vector-space
7996/// migration). Prefers the declared `embedding_dim` column and falls
7997/// back to deriving from the BLOB length for legacy rows — `4n+1`
7998/// bytes is the v17 headed form (`(len-1)/4` floats), `4n` the
7999/// legacy unheaded form (`len/4`), mirroring [`dim_violations`].
8000///
8001/// # Errors
8002///
8003/// Returns the underlying SQLite error.
8004pub fn distinct_embedding_dims(conn: &Connection, namespace: Option<&str>) -> Result<Vec<usize>> {
8005    const DIM_EXPR: &str = "COALESCE(embedding_dim, \
8006         CASE WHEN length(embedding) % 4 = 1 THEN (length(embedding)-1)/4 \
8007              ELSE length(embedding)/4 END)";
8008    let collect = |stmt: &mut rusqlite::Statement<'_>,
8009                   params: &[&dyn rusqlite::ToSql]|
8010     -> Result<Vec<usize>> {
8011        let rows = stmt.query_map(params, |r| r.get::<_, i64>(0))?;
8012        Ok(rows
8013            .collect::<rusqlite::Result<Vec<_>>>()?
8014            .into_iter()
8015            .filter_map(|d| usize::try_from(d).ok())
8016            .collect())
8017    };
8018    if let Some(ns) = namespace {
8019        let mut stmt = conn.prepare(&format!(
8020            "SELECT DISTINCT {DIM_EXPR} AS dim FROM memories \
8021             WHERE embedding IS NOT NULL AND namespace = ?1 ORDER BY dim"
8022        ))?;
8023        collect(&mut stmt, &[&ns])
8024    } else {
8025        let mut stmt = conn.prepare(&format!(
8026            "SELECT DISTINCT {DIM_EXPR} AS dim FROM memories \
8027             WHERE embedding IS NOT NULL ORDER BY dim"
8028        ))?;
8029        collect(&mut stmt, &[])
8030    }
8031}
8032
8033/// #1579 B3 — count of rows carrying a stored embedding. Cheap probe
8034/// (no blob decode, no row materialisation) used by the CLI recall
8035/// path to decide whether a one-shot invocation should pay the HNSW
8036/// graph-construction cost at all (see
8037/// [`crate::hnsw::CLI_HNSW_BUILD_MIN_ENTRIES`]).
8038///
8039/// # Errors
8040///
8041/// Bubbles the rusqlite error from the COUNT query.
8042pub fn count_embedded_memories(conn: &Connection) -> Result<i64> {
8043    conn.query_row(
8044        "SELECT COUNT(*) FROM memories WHERE embedding IS NOT NULL",
8045        [],
8046        |row| row.get(0),
8047    )
8048    .map_err(Into::into)
8049}
8050
8051/// Get all stored embeddings as (id, embedding) pairs for building the HNSW index.
8052///
8053/// v0.6.3.1 P2 — uses the magic-byte tolerant decoder. Rows whose BLOB is
8054/// malformed are logged and skipped (the alternative — bailing the entire
8055/// HNSW build — would take the whole semantic-search surface offline for one
8056/// corrupt row).
8057pub fn get_all_embeddings(conn: &Connection) -> Result<Vec<(String, Vec<f32>)>> {
8058    let mut stmt =
8059        conn.prepare("SELECT id, embedding FROM memories WHERE embedding IS NOT NULL")?;
8060    let rows = stmt.query_map([], |row| {
8061        let id: String = row.get(0)?;
8062        let bytes: Vec<u8> = row.get(1)?;
8063        Ok((id, bytes))
8064    })?;
8065    let mut entries = Vec::new();
8066    for row in rows {
8067        let (id, bytes) = row?;
8068        if bytes.is_empty() {
8069            continue;
8070        }
8071        match crate::embeddings::decode_embedding_blob(&bytes) {
8072            Ok(floats) => entries.push((id, floats)),
8073            Err(e) => {
8074                tracing::warn!(
8075                    memory_id = %id,
8076                    error = %e,
8077                    "skipping memory with malformed embedding BLOB during HNSW build"
8078                );
8079            }
8080        }
8081    }
8082    Ok(entries)
8083}
8084
8085/// Hybrid recall — FTS5 keyword search + semantic cosine similarity.
8086/// Returns memories ranked by a blended score of keyword and semantic relevance.
8087/// When an HNSW `vector_index` is provided, uses approximate nearest-neighbor
8088/// search instead of scanning all embeddings linearly.
8089#[allow(clippy::too_many_arguments)]
8090/// v0.6.3.1 (P3): hybrid recall preserving the existing 2-tuple return
8091/// shape for HTTP / CLI / bench callers. Delegates to
8092/// [`recall_hybrid_with_telemetry`] and discards the telemetry. Kept so
8093/// the dozen-plus call sites need no churn for a feature only MCP
8094/// `handle_recall` consumes.
8095#[allow(clippy::too_many_arguments)]
8096pub fn recall_hybrid(
8097    conn: &Connection,
8098    context: &str,
8099    query_embedding: &[f32],
8100    namespace: Option<&str>,
8101    limit: usize,
8102    tags_filter: Option<&str>,
8103    since: Option<&str>,
8104    until: Option<&str>,
8105    vector_index: Option<&crate::hnsw::VectorIndex>,
8106    short_extend: i64,
8107    mid_extend: i64,
8108    as_agent: Option<&str>,
8109    budget_tokens: Option<usize>,
8110    scoring: &crate::config::ResolvedScoring,
8111    // v0.7.0 WT-1-E — see [`recall_with_telemetry`] for the
8112    // archived-source exclusion contract.
8113    include_archived: bool,
8114    // v0.7.0 Form 4 / Cluster-A PERF-3 — push `--source-uri-prefix`
8115    // into the SQL WHERE on both the FTS and semantic branches so the
8116    // partial `idx_memories_source_uri` index covers the lookup. See
8117    // [`recall`] for the contract.
8118    source_uri_prefix: Option<&str>,
8119) -> Result<(Vec<(Memory, f64)>, BudgetOutcome)> {
8120    let (results, outcome, _telemetry) = recall_hybrid_with_telemetry(
8121        conn,
8122        context,
8123        query_embedding,
8124        namespace,
8125        limit,
8126        tags_filter,
8127        since,
8128        until,
8129        vector_index,
8130        short_extend,
8131        mid_extend,
8132        as_agent,
8133        budget_tokens,
8134        scoring,
8135        include_archived,
8136        source_uri_prefix,
8137    )?;
8138    Ok((results, outcome))
8139}
8140
8141/// FX-4 / PERF-2 (2026-05-26) — convenience wrapper for the HTTP
8142/// recall handler. Same return shape as [`recall_hybrid`] but accepts
8143/// a pre-computed HNSW hit slice (caller ran `idx.search()` outside
8144/// the DB lock) so the DB-mutex hold window does not cover the
8145/// CPU-bound ANN walk. Telemetry is dropped on this path; the HTTP
8146/// surface does not consume it today.
8147#[allow(clippy::too_many_arguments)]
8148pub fn recall_hybrid_precomputed_hnsw(
8149    conn: &Connection,
8150    context: &str,
8151    query_embedding: &[f32],
8152    namespace: Option<&str>,
8153    limit: usize,
8154    tags_filter: Option<&str>,
8155    since: Option<&str>,
8156    until: Option<&str>,
8157    precomputed_hnsw_hits: &[crate::hnsw::VectorHit],
8158    short_extend: i64,
8159    mid_extend: i64,
8160    as_agent: Option<&str>,
8161    budget_tokens: Option<usize>,
8162    scoring: &crate::config::ResolvedScoring,
8163    include_archived: bool,
8164    source_uri_prefix: Option<&str>,
8165) -> Result<(Vec<(Memory, f64)>, BudgetOutcome)> {
8166    let (results, outcome, _telemetry) = recall_hybrid_with_telemetry_precomputed_hnsw(
8167        conn,
8168        context,
8169        query_embedding,
8170        namespace,
8171        limit,
8172        tags_filter,
8173        since,
8174        until,
8175        precomputed_hnsw_hits,
8176        short_extend,
8177        mid_extend,
8178        as_agent,
8179        budget_tokens,
8180        scoring,
8181        include_archived,
8182        source_uri_prefix,
8183    )?;
8184    Ok((results, outcome))
8185}
8186
8187/// v0.6.3.1 (P3 + P6): hybrid recall reporting per-stage candidate counts,
8188/// the average semantic blend weight, and the full budget outcome. MCP
8189/// `handle_recall` uses the telemetry to populate the `meta` block (closes
8190/// audit gaps G2/G8/G11) and the BudgetOutcome to populate R1 budget fields.
8191///
8192/// The retrieval logic is unchanged — anti-goal of P3 is "do not change
8193/// recall scoring or fusion logic." Counters are computed in place:
8194/// `fts_candidates` is the pre-fusion FTS5 row count, `hnsw_candidates`
8195/// is the pre-fusion HNSW (or linear-scan) hit count admitted past the
8196/// 0.2 cosine gate, `blend_weight_avg` is the mean `semantic_weight`
8197/// across the *returned* set (not the full candidate pool — operators
8198/// care about what made it out).
8199// ---------------------------------------------------------------------------
8200// #871 — `recall_hybrid_with_telemetry` stage helpers.
8201//
8202// The original function was ~508 LOC carrying query preparation,
8203// FTS5 keyword retrieval, semantic (HNSW or linear-scan) retrieval,
8204// adaptive blend + decay scoring, touch ops + budget application,
8205// and telemetry assembly. Per the code-review verdict the function
8206// is split into focused stage-helpers so each phase has a clear
8207// contract and the orchestrator stays readable.
8208//
8209// The stages are kept inside `storage::mod` (rather than carved into
8210// a sub-module) because the helpers all share access to private
8211// helpers like `row_to_memory`, `sanitize_fts_query`,
8212// `archived_source_clause`, etc., and the SQL is tightly tied to
8213// the schema living in this module.
8214//
8215// Behaviour is byte-for-byte preserved: the same SQL runs, the same
8216// fusion produces the same blended scores, and `touch_many` mutates
8217// the same surviving set. Only the function-internal structure
8218// changes.
8219// ---------------------------------------------------------------------------
8220
8221/// Result of [`prepare_hybrid_query`] — the pre-computed SQL
8222/// fragments + bind params the FTS and semantic phases need.
8223struct HybridPrep<'a> {
8224    fts_query: String,
8225    now: String,
8226    prefixes: VisibilityPrefixes,
8227    fts_hierarchy_fragment: String,
8228    sem_hierarchy_fragment: String,
8229    effective_namespace: Option<&'a str>,
8230    hierarchy_active: bool,
8231    fts_archived_fragment: &'static str,
8232    sem_archived_fragment: &'static str,
8233    fts_source_uri_fragment: &'static str,
8234    sem_source_uri_fragment: &'static str,
8235    source_uri_like_param: Option<String>,
8236}
8237
8238/// #871 stage 1 — query preparation. Sanitises the FTS5 expression,
8239/// resolves namespace hierarchy expansion (`Task 1.12`), computes
8240/// visibility prefixes for the `?8..?11` (FTS) / `?6..?9` (semantic)
8241/// bind slots, and stamps the archived-source / source-URI-prefix
8242/// SQL fragments.
8243///
8244/// The `'now'` timestamp is captured here so all subsequent stages
8245/// see the same monotonic instant.
8246fn prepare_hybrid_query<'a>(
8247    context: &str,
8248    namespace: Option<&'a str>,
8249    as_agent: Option<&str>,
8250    include_archived: bool,
8251    source_uri_prefix: Option<&str>,
8252) -> HybridPrep<'a> {
8253    let now = Utc::now().to_rfc3339();
8254    let fts_query = sanitize_fts_query(context, true);
8255    let prefixes = compute_visibility_prefixes(as_agent);
8256    let (fts_hierarchy_in, hierarchy_active) = hierarchy_in_clause(namespace);
8257    let fts_hierarchy_fragment = fts_hierarchy_in.unwrap_or_default();
8258    let sem_hierarchy_fragment = if hierarchy_active {
8259        if let Some(ns) = namespace {
8260            let ancestors = crate::models::namespace_ancestors(ns);
8261            let quoted: Vec<String> = ancestors
8262                .iter()
8263                .map(|a| format!("'{}'", a.replace('\'', "''")))
8264                .collect();
8265            format!("AND memories.namespace IN ({})", quoted.join(","))
8266        } else {
8267            String::new()
8268        }
8269    } else {
8270        String::new()
8271    };
8272    let effective_namespace = if hierarchy_active { None } else { namespace };
8273    let fts_archived_fragment = archived_source_clause(include_archived, "m");
8274    let sem_archived_fragment = archived_source_clause(include_archived, "memories");
8275    let source_uri_like_param: Option<String> = match source_uri_prefix {
8276        Some(prefix) if !prefix.is_empty() => Some(format!("{}%", escape_like_pattern(prefix))),
8277        _ => None,
8278    };
8279    let fts_source_uri_fragment = if source_uri_like_param.is_some() {
8280        "AND m.source_uri LIKE ?12 ESCAPE '\\'"
8281    } else {
8282        ""
8283    };
8284    let sem_source_uri_fragment = if source_uri_like_param.is_some() {
8285        "AND memories.source_uri LIKE ?10 ESCAPE '\\'"
8286    } else {
8287        ""
8288    };
8289    HybridPrep {
8290        fts_query,
8291        now,
8292        prefixes,
8293        fts_hierarchy_fragment,
8294        sem_hierarchy_fragment,
8295        effective_namespace,
8296        hierarchy_active,
8297        fts_archived_fragment,
8298        sem_archived_fragment,
8299        fts_source_uri_fragment,
8300        sem_source_uri_fragment,
8301        source_uri_like_param,
8302    }
8303}
8304
8305/// #871 stage 2 — FTS5 keyword phase. Builds + executes the FTS SQL
8306/// with the per-row `fts_score` projection, returns the raw
8307/// `(Memory, fts_score, embedding_bytes)` tuples for the fusion
8308/// stage. The embedding bytes are pulled inline from the same
8309/// SELECT (Cluster-F PERF-2) so the fusion stage can compute cosine
8310/// without an N+1 round-trip.
8311fn fts_keyword_phase(
8312    conn: &Connection,
8313    prep: &HybridPrep<'_>,
8314    tags_filter: Option<&str>,
8315    since: Option<&str>,
8316    until: Option<&str>,
8317    limit: usize,
8318) -> Result<Vec<(Memory, f64, Option<Vec<u8>>)>> {
8319    let fts_limit = (limit * 3).max(30);
8320    let fts_sql = format!(
8321        "SELECT m.id, m.tier, m.namespace, m.title, m.content, m.tags, m.priority,
8322                m.confidence, m.source, m.access_count, m.created_at, m.updated_at,
8323                m.last_accessed_at, m.expires_at, m.metadata, m.reflection_depth,
8324                m.memory_kind, m.entity_id, m.persona_version,
8325                m.citations, m.source_uri, m.source_span,
8326                m.confidence_source, m.confidence_signals, m.confidence_decayed_at, m.embedding,
8327                (fts.rank * -1) + (m.priority * 0.5) + (MIN(m.access_count, 50) * 0.1)
8328                + (m.confidence * 2.0)
8329                + (CASE m.tier WHEN 'long' THEN 3.0 WHEN 'mid' THEN 1.0 ELSE 0.0 END)
8330                + (1.0 / (1.0 + (julianday('now') - julianday(m.updated_at)) * 0.1))
8331                AS fts_score
8332         FROM memories_fts fts
8333         JOIN memories m ON m.rowid = fts.rowid
8334         WHERE memories_fts MATCH ?1
8335           AND (?2 IS NULL OR m.namespace = ?2)
8336           {fts_hierarchy_fragment}
8337           AND (m.expires_at IS NULL OR m.expires_at > ?3)
8338           AND (?4 IS NULL OR EXISTS (SELECT 1 FROM json_each(m.tags) WHERE json_each.value = ?4))
8339           AND (?5 IS NULL OR m.created_at >= ?5)
8340           AND (?6 IS NULL OR m.created_at <= ?6)
8341           {fts_archived_fragment}
8342           {fts_source_uri_fragment}
8343           {vis}
8344         ORDER BY fts_score DESC
8345         LIMIT ?7",
8346        fts_hierarchy_fragment = prep.fts_hierarchy_fragment,
8347        fts_archived_fragment = prep.fts_archived_fragment,
8348        fts_source_uri_fragment = prep.fts_source_uri_fragment,
8349        vis = visibility_clause(8, "m"),
8350    );
8351    // #1579 B6 — recall’s FTS branch is the hottest read statement;
8352    // prepare_cached amortises re-parsing across recalls (shape cardinality
8353    // is small: the optional fragments expand to a handful of variants).
8354    let mut fts_stmt = conn.prepare_cached(&fts_sql)?;
8355    let fts_row_handler =
8356        |row: &rusqlite::Row<'_>| -> rusqlite::Result<(Memory, f64, Option<Vec<u8>>)> {
8357            let mem = row_to_memory(row)?;
8358            let fts_score: f64 = row.get("fts_score")?;
8359            // Index 25 = `m.embedding` (the SELECT list above places it
8360            // after `confidence_decayed_at`). Pull as `Option<Vec<u8>>`
8361            // so legacy rows without embeddings surface as `None`.
8362            let embedding_bytes: Option<Vec<u8>> = row.get(25)?;
8363            Ok((mem, fts_score, embedding_bytes))
8364        };
8365    let (vis_p, vis_t, vis_u, vis_o) = prep.prefixes.clone();
8366    let rows: Vec<(Memory, f64, Option<Vec<u8>>)> =
8367        if let Some(ref uri_param) = prep.source_uri_like_param {
8368            fts_stmt
8369                .query_map(
8370                    params![
8371                        prep.fts_query,
8372                        prep.effective_namespace,
8373                        prep.now,
8374                        tags_filter,
8375                        since,
8376                        until,
8377                        fts_limit,
8378                        vis_p,
8379                        vis_t,
8380                        vis_u,
8381                        vis_o,
8382                        uri_param,
8383                    ],
8384                    fts_row_handler,
8385                )?
8386                .collect::<rusqlite::Result<Vec<_>>>()?
8387        } else {
8388            fts_stmt
8389                .query_map(
8390                    params![
8391                        prep.fts_query,
8392                        prep.effective_namespace,
8393                        prep.now,
8394                        tags_filter,
8395                        since,
8396                        until,
8397                        fts_limit,
8398                        vis_p,
8399                        vis_t,
8400                        vis_u,
8401                        vis_o,
8402                    ],
8403                    fts_row_handler,
8404                )?
8405                .collect::<rusqlite::Result<Vec<_>>>()?
8406        };
8407    Ok(rows)
8408}
8409
8410/// #871 stage 3 — semantic phase. Two paths share the same `scored`
8411/// HashMap mutation contract:
8412///
8413///   - HNSW path (when a `vector_index` is supplied): runs an ANN
8414///     search bounded at `5×limit`, gates each hit at `cosine > 0.2`,
8415///     and re-applies the FTS WHERE-clause filters in Rust because
8416///     the HNSW index returns raw vector neighbours (no SQL
8417///     visibility / archived-source / source-URI-prefix filter has
8418///     run).
8419///   - Linear-scan fallback (HNSW absent): runs the semantic SQL,
8420///     decodes embedding BLOBs, applies the same `cosine > 0.2`
8421///     gate, and inserts surviving rows into `scored`.
8422///
8423/// Returns the running `hnsw_candidates_count` for telemetry. Rows
8424/// already present in `scored` (i.e. FTS-side hits) are skipped so
8425/// the FTS embedding-based cosine wins (consistent with the
8426/// pre-refactor behaviour).
8427#[allow(clippy::too_many_arguments)]
8428fn semantic_phase(
8429    conn: &Connection,
8430    prep: &HybridPrep<'_>,
8431    query_embedding: &[f32],
8432    vector_index: Option<&crate::hnsw::VectorIndex>,
8433    // FX-4 / PERF-2 (2026-05-26) — when supplied, the HNSW search
8434    // has already been executed OUTSIDE the DB lock by the caller
8435    // (HTTP recall handler) and the hits are passed in here. The
8436    // function uses these directly instead of re-running
8437    // `idx.search()`, which keeps the CPU-bound ANN walk off the
8438    // DB-mutex hold window so concurrent recalls do not serialise
8439    // behind one another. When both `vector_index` and
8440    // `precomputed_hnsw_hits` are supplied, the precomputed slice
8441    // wins — callers that already paid the search cost outside the
8442    // lock must not pay it again inside. Existing callers (MCP /
8443    // CLI / SAL) pass `None` and keep the legacy single-call
8444    // behaviour where `semantic_phase` runs the search itself.
8445    precomputed_hnsw_hits: Option<&[crate::hnsw::VectorHit]>,
8446    namespace: Option<&str>,
8447    tags_filter: Option<&str>,
8448    since: Option<&str>,
8449    until: Option<&str>,
8450    limit: usize,
8451    include_archived: bool,
8452    source_uri_prefix: Option<&str>,
8453    scored: &mut HashMap<String, (Memory, f64, f64)>,
8454    // v0.7.0 H7 — bumped once per stored embedding whose dimensionality
8455    // disagrees with `query_embedding` (embedder-model switch). Accumulated
8456    // across the whole recall and surfaced via telemetry + an aggregated warn.
8457    dim_mismatch_count: &mut usize,
8458) -> Result<usize> {
8459    let mut hnsw_candidates_count: usize = 0;
8460    let now = prep.now.as_str();
8461    // FX-4 / PERF-2 — when `precomputed_hnsw_hits` is supplied OR a
8462    // `vector_index` is supplied, run the HNSW-hit ingestion path.
8463    // The precomputed path skips the `idx.search()` call (already
8464    // paid outside the lock); the legacy path runs the search
8465    // inline.
8466    if precomputed_hnsw_hits.is_some() || vector_index.is_some() {
8467        let owned_hits;
8468        let hits: &[crate::hnsw::VectorHit] = if let Some(pre) = precomputed_hnsw_hits {
8469            pre
8470        } else {
8471            let ann_limit = (limit * 5).max(50);
8472            owned_hits = vector_index
8473                .expect("vector_index set in legacy branch")
8474                .search(query_embedding, ann_limit);
8475            owned_hits.as_slice()
8476        };
8477        // v0.7.0 #981 — pre-#981 this branch called `get(conn, &hit.id)`
8478        // per hit, producing 50-250 round-trips per recall on a warm
8479        // index. The fix collects the ids that pass the
8480        // `cosine > 0.2` + not-yet-scored cosine gate, batches the
8481        // SELECT via `get_many`, and re-applies the row-side filter
8482        // ladder against the fetched map. Net effect: one SELECT
8483        // instead of N, no behavioural drift on the per-row filters
8484        // because they're applied identically against `&mem`. See
8485        // `tests/recall_semantic_batch_fetch_981.rs` for the pin.
8486        let mut needed_ids: Vec<String> = Vec::with_capacity(hits.len());
8487        let mut hit_meta: Vec<(String, f64)> = Vec::with_capacity(hits.len());
8488        for hit in hits {
8489            if scored.contains_key(&hit.id) {
8490                continue;
8491            }
8492            let cosine = f64::from(1.0 - hit.distance);
8493            // v0.6.2 (S18 iteration): cosine gate relaxed 0.3 → 0.2 —
8494            // see the matching comment in the linear-scan branch below.
8495            if cosine > crate::RECALL_COSINE_GATE {
8496                needed_ids.push(hit.id.clone());
8497                hit_meta.push((hit.id.clone(), cosine));
8498            }
8499        }
8500        let fetched = get_many(conn, &needed_ids)?;
8501        for (id, cosine) in hit_meta {
8502            let Some(mem) = fetched.get(&id) else {
8503                continue;
8504            };
8505            if let Some(ns) = namespace {
8506                if prep.hierarchy_active {
8507                    let ancestors = crate::models::namespace_ancestors(ns);
8508                    if !ancestors.iter().any(|a| a == &mem.namespace) {
8509                        continue;
8510                    }
8511                } else if mem.namespace != ns {
8512                    continue;
8513                }
8514            }
8515            if let Some(exp) = &mem.expires_at
8516                && exp.as_str() <= now
8517            {
8518                continue;
8519            }
8520            if let Some(tf) = tags_filter
8521                && !mem.tags.iter().any(|t| t == tf)
8522            {
8523                continue;
8524            }
8525            if let Some(s) = since
8526                && mem.created_at.as_str() < s
8527            {
8528                continue;
8529            }
8530            if let Some(u) = until
8531                && mem.created_at.as_str() > u
8532            {
8533                continue;
8534            }
8535            if !is_visible(mem, &prep.prefixes) {
8536                continue;
8537            }
8538            if !include_archived && is_archived_source(mem) {
8539                continue;
8540            }
8541            if let Some(prefix) = source_uri_prefix
8542                && !prefix.is_empty()
8543                && !mem
8544                    .source_uri
8545                    .as_deref()
8546                    .is_some_and(|u| u.starts_with(prefix))
8547            {
8548                continue;
8549            }
8550            // Clone is unavoidable here — `scored` owns the Memory
8551            // for the final cross-phase merge, and `fetched` may be
8552            // re-read for downstream phases.
8553            scored.insert(mem.id.clone(), (mem.clone(), 0.0, cosine));
8554            hnsw_candidates_count += 1;
8555        }
8556        return Ok(hnsw_candidates_count);
8557    }
8558
8559    // Fallback: linear scan over all embeddings.
8560    let sem_sql = format!(
8561        "SELECT id, tier, namespace, title, content, tags, priority,
8562                confidence, source, access_count, created_at, updated_at,
8563                last_accessed_at, expires_at, metadata, reflection_depth, memory_kind, embedding
8564         FROM memories
8565         WHERE embedding IS NOT NULL
8566           AND (?1 IS NULL OR namespace = ?1)
8567           {sem_hierarchy_fragment}
8568           AND (expires_at IS NULL OR expires_at > ?2)
8569           AND (?3 IS NULL OR EXISTS (SELECT 1 FROM json_each(memories.tags) WHERE json_each.value = ?3))
8570           AND (?4 IS NULL OR created_at >= ?4)
8571           AND (?5 IS NULL OR created_at <= ?5)
8572           {sem_archived_fragment}
8573           {sem_source_uri_fragment}
8574           {vis}",
8575        sem_hierarchy_fragment = prep.sem_hierarchy_fragment,
8576        sem_archived_fragment = prep.sem_archived_fragment,
8577        sem_source_uri_fragment = prep.sem_source_uri_fragment,
8578        vis = visibility_clause(6, "memories"),
8579    );
8580    // #1579 B6 — same prepare_cached treatment as the FTS branch above.
8581    let mut sem_stmt = conn.prepare_cached(&sem_sql)?;
8582    let sem_row_handler = |row: &rusqlite::Row<'_>| -> rusqlite::Result<(Memory, Option<Vec<u8>>)> {
8583        let mem = row_to_memory(row)?;
8584        // v0.7.x Form 6 — `memory_kind` was inserted between
8585        // `reflection_depth` and `embedding` in the SELECT list
8586        // above; `embedding` sits at zero-based index 17.
8587        let emb_bytes: Option<Vec<u8>> = row.get(17)?;
8588        Ok((mem, emb_bytes))
8589    };
8590    let (vis_p, vis_t, vis_u, vis_o) = prep.prefixes.clone();
8591    let sem_results: Vec<(Memory, Option<Vec<u8>>)> =
8592        if let Some(ref uri_param) = prep.source_uri_like_param {
8593            sem_stmt
8594                .query_map(
8595                    params![
8596                        prep.effective_namespace,
8597                        prep.now,
8598                        tags_filter,
8599                        since,
8600                        until,
8601                        vis_p,
8602                        vis_t,
8603                        vis_u,
8604                        vis_o,
8605                        uri_param,
8606                    ],
8607                    sem_row_handler,
8608                )?
8609                .collect::<rusqlite::Result<Vec<_>>>()?
8610        } else {
8611            sem_stmt
8612                .query_map(
8613                    params![
8614                        prep.effective_namespace,
8615                        prep.now,
8616                        tags_filter,
8617                        since,
8618                        until,
8619                        vis_p,
8620                        vis_t,
8621                        vis_u,
8622                        vis_o,
8623                    ],
8624                    sem_row_handler,
8625                )?
8626                .collect::<rusqlite::Result<Vec<_>>>()?
8627        };
8628    for (mem, emb_bytes) in sem_results {
8629        if scored.contains_key(&mem.id) {
8630            continue;
8631        }
8632        if let Some(bytes) = emb_bytes
8633            && !bytes.is_empty()
8634        {
8635            // v0.6.3.1 P2 — tolerate legacy + headed payloads; skip
8636            // (with telemetry) on malformed BLOBs so a single corrupt
8637            // row can't poison the whole semantic stage.
8638            let Ok(emb) = crate::embeddings::decode_embedding_blob(&bytes) else {
8639                tracing::warn!(
8640                    memory_id = %mem.id,
8641                    "skipping malformed embedding BLOB during semantic recall"
8642                );
8643                continue;
8644            };
8645            let cosine =
8646                match crate::embeddings::Embedder::cosine_similarity_checked(query_embedding, &emb)
8647                {
8648                    crate::embeddings::CosineComparison::Comparable(c) => f64::from(c),
8649                    crate::embeddings::CosineComparison::DimensionMismatch { .. } => {
8650                        // v0.7.0 H7 — stored embedding came from a different
8651                        // embedder model; counted (not silently dropped) so the
8652                        // aggregated warn + telemetry can flag the model switch.
8653                        *dim_mismatch_count += 1;
8654                        continue;
8655                    }
8656                };
8657            if cosine > crate::RECALL_COSINE_GATE {
8658                scored.insert(mem.id.clone(), (mem, 0.0, cosine));
8659                hnsw_candidates_count += 1;
8660            }
8661        }
8662    }
8663    Ok(hnsw_candidates_count)
8664}
8665
8666/// #871 stage 4 — adaptive blend + decay.
8667///
8668/// Per-row: normalises `fts_score` by `max_fts_score`, lerp-derives
8669/// `semantic_weight` from content length (0.50 ≤500 chars → 0.15
8670/// ≥5000 chars; embeddings lose information on long text, FTS stays
8671/// precise), and multiplies by the per-tier exponential decay from
8672/// `scoring`. Returns the ranked (sort by blended score, truncated
8673/// to `limit`) result list AND the captured per-candidate
8674/// `semantic_weight` vector for telemetry.
8675fn blend_and_rank(
8676    scored: HashMap<String, (Memory, f64, f64)>,
8677    max_fts_score: f64,
8678    scoring: &crate::config::ResolvedScoring,
8679    limit: usize,
8680) -> (Vec<(Memory, f64)>, Vec<f64>) {
8681    let now_utc = Utc::now();
8682    let mut weights: Vec<f64> = Vec::new();
8683    let mut results: Vec<(Memory, f64)> = scored
8684        .into_values()
8685        .map(|(mem, fts_score, cosine)| {
8686            let norm_fts = if max_fts_score > 0.0 {
8687                fts_score / max_fts_score
8688            } else {
8689                0.0
8690            };
8691            // B4 (R2-LOW) — clamp to i32::MAX instead of panicking when
8692            // a memory's content is >2GB. The lerp below treats anything
8693            // ≥5000 chars as the long-tail bucket regardless, so the
8694            // clamp does not change scoring; it only closes a panic
8695            // window a hostile import could otherwise reach.
8696            let content_len = f64::from(i32::try_from(mem.content.len()).unwrap_or(i32::MAX));
8697            let semantic_weight = if content_len <= 500.0 {
8698                0.50
8699            } else if content_len >= 5000.0 {
8700                0.15
8701            } else {
8702                0.50 - 0.35 * ((content_len - 500.0) / 4500.0)
8703            };
8704            weights.push(semantic_weight);
8705            let blended = semantic_weight * cosine + (1.0 - semantic_weight) * norm_fts;
8706            let age_days = chrono::DateTime::parse_from_rfc3339(&mem.created_at)
8707                .ok()
8708                .map_or(0.0, |ts| {
8709                    let secs = (now_utc - ts.with_timezone(&Utc)).num_seconds();
8710                    #[allow(clippy::cast_precision_loss)]
8711                    {
8712                        secs as f64 / crate::SECS_PER_DAY as f64
8713                    }
8714                });
8715            let decay = scoring.decay_multiplier(&mem.tier, age_days);
8716            (mem, blended * decay)
8717        })
8718        .collect();
8719    results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
8720    results.truncate(limit);
8721    (results, weights)
8722}
8723
8724/// #871 stage 5 — post-fusion ops: proximity boost (when hierarchy
8725/// expansion is active), token-budget application, and the batched
8726/// `touch_many` write that bumps `access_count` + slides the per-tier
8727/// expiry on every memory in the surviving set.
8728fn apply_recall_post_ops(
8729    conn: &Connection,
8730    results: Vec<(Memory, f64)>,
8731    hierarchy_active: bool,
8732    namespace: Option<&str>,
8733    budget_tokens: Option<usize>,
8734    short_extend: i64,
8735    mid_extend: i64,
8736) -> (Vec<(Memory, f64)>, BudgetOutcome) {
8737    let boosted = if let (true, Some(anchor)) = (hierarchy_active, namespace) {
8738        apply_proximity_boost(results, anchor)
8739    } else {
8740        results
8741    };
8742    let (budgeted, outcome) = apply_token_budget(boosted, budget_tokens);
8743    let touch_ids: Vec<&str> = budgeted.iter().map(|(mem, _)| mem.id.as_str()).collect();
8744    if let Err(e) = touch_many(conn, &touch_ids, short_extend, mid_extend) {
8745        tracing::warn!("touch_many failed for hybrid recall set: {}", e);
8746    }
8747    (budgeted, outcome)
8748}
8749
8750/// #871 stage 6 — telemetry assembly. Aggregates the per-stage
8751/// candidate counters and the mean `semantic_weight` across the
8752/// returned set (NOT the full candidate pool — operators care about
8753/// what made it out).
8754fn assemble_recall_telemetry(
8755    fts_candidates: usize,
8756    hnsw_candidates: usize,
8757    blend_weights: &[f64],
8758    embedding_dim_mismatch: usize,
8759) -> crate::models::RecallTelemetry {
8760    let blend_weight_avg = if blend_weights.is_empty() {
8761        0.0
8762    } else {
8763        #[allow(clippy::cast_precision_loss)]
8764        let n = blend_weights.len() as f64;
8765        blend_weights.iter().sum::<f64>() / n
8766    };
8767    crate::models::RecallTelemetry {
8768        fts_candidates,
8769        hnsw_candidates,
8770        blend_weight_avg,
8771        embedding_dim_mismatch,
8772    }
8773}
8774
8775#[allow(clippy::too_many_arguments)]
8776pub fn recall_hybrid_with_telemetry(
8777    conn: &Connection,
8778    context: &str,
8779    query_embedding: &[f32],
8780    namespace: Option<&str>,
8781    limit: usize,
8782    tags_filter: Option<&str>,
8783    since: Option<&str>,
8784    until: Option<&str>,
8785    vector_index: Option<&crate::hnsw::VectorIndex>,
8786    short_extend: i64,
8787    mid_extend: i64,
8788    as_agent: Option<&str>,
8789    budget_tokens: Option<usize>,
8790    scoring: &crate::config::ResolvedScoring,
8791    // v0.7.0 WT-1-E — see [`recall_with_telemetry`] for the
8792    // archived-source exclusion contract.
8793    include_archived: bool,
8794    // v0.7.0 Form 4 / Cluster-A PERF-3 — see [`recall`] for the
8795    // contract. Pushed into both the FTS and semantic branch SQL so
8796    // both pools are constrained by the partial
8797    // `idx_memories_source_uri` index, not the post-fetch Rust filter.
8798    source_uri_prefix: Option<&str>,
8799) -> Result<(
8800    Vec<(Memory, f64)>,
8801    BudgetOutcome,
8802    crate::models::RecallTelemetry,
8803)> {
8804    recall_hybrid_with_telemetry_inner(
8805        conn,
8806        context,
8807        query_embedding,
8808        namespace,
8809        limit,
8810        tags_filter,
8811        since,
8812        until,
8813        vector_index,
8814        None,
8815        short_extend,
8816        mid_extend,
8817        as_agent,
8818        budget_tokens,
8819        scoring,
8820        include_archived,
8821        source_uri_prefix,
8822    )
8823}
8824
8825/// FX-4 / PERF-2 (2026-05-26) — variant of
8826/// [`recall_hybrid_with_telemetry`] that accepts a pre-computed slice
8827/// of HNSW hits in place of the in-pipeline `idx.search()` call. The
8828/// HTTP recall handler runs the ANN walk OUTSIDE the DB mutex (the
8829/// HNSW index lives behind its own `vector_index` mutex; the DB lock
8830/// is not required for the search) and passes the result here so the
8831/// DB-mutex hold window covers only the FTS5 query + the batched
8832/// `get_many` fetch + the touch ops. Concurrent recalls overlap
8833/// their CPU-bound ANN walks instead of serialising behind the
8834/// single shared connection.
8835///
8836/// Semantics-preserving by construction: the precomputed hits feed
8837/// the same per-hit `cosine > 0.2` gate + `get_many` round-trip
8838/// inside [`semantic_phase`] that the legacy single-call path uses.
8839/// Existing callers (MCP / CLI / SAL) continue to call
8840/// [`recall_hybrid_with_telemetry`] and pay the search cost inside
8841/// the lock; only the HTTP handler swaps in the new path.
8842#[allow(clippy::too_many_arguments)]
8843pub fn recall_hybrid_with_telemetry_precomputed_hnsw(
8844    conn: &Connection,
8845    context: &str,
8846    query_embedding: &[f32],
8847    namespace: Option<&str>,
8848    limit: usize,
8849    tags_filter: Option<&str>,
8850    since: Option<&str>,
8851    until: Option<&str>,
8852    precomputed_hnsw_hits: &[crate::hnsw::VectorHit],
8853    short_extend: i64,
8854    mid_extend: i64,
8855    as_agent: Option<&str>,
8856    budget_tokens: Option<usize>,
8857    scoring: &crate::config::ResolvedScoring,
8858    include_archived: bool,
8859    source_uri_prefix: Option<&str>,
8860) -> Result<(
8861    Vec<(Memory, f64)>,
8862    BudgetOutcome,
8863    crate::models::RecallTelemetry,
8864)> {
8865    recall_hybrid_with_telemetry_inner(
8866        conn,
8867        context,
8868        query_embedding,
8869        namespace,
8870        limit,
8871        tags_filter,
8872        since,
8873        until,
8874        None,
8875        Some(precomputed_hnsw_hits),
8876        short_extend,
8877        mid_extend,
8878        as_agent,
8879        budget_tokens,
8880        scoring,
8881        include_archived,
8882        source_uri_prefix,
8883    )
8884}
8885
8886/// Inner dispatch shared by [`recall_hybrid_with_telemetry`] (legacy,
8887/// runs `idx.search()` inside the DB-lock window) and
8888/// [`recall_hybrid_with_telemetry_precomputed_hnsw`] (FX-4 / PERF-2,
8889/// caller pre-ran the ANN walk outside the DB lock). Exactly one of
8890/// `vector_index` / `precomputed_hnsw_hits` is `Some` on any given
8891/// call; the inner is private so the variant choice cannot drift.
8892#[allow(clippy::too_many_arguments)]
8893fn recall_hybrid_with_telemetry_inner(
8894    conn: &Connection,
8895    context: &str,
8896    query_embedding: &[f32],
8897    namespace: Option<&str>,
8898    limit: usize,
8899    tags_filter: Option<&str>,
8900    since: Option<&str>,
8901    until: Option<&str>,
8902    vector_index: Option<&crate::hnsw::VectorIndex>,
8903    precomputed_hnsw_hits: Option<&[crate::hnsw::VectorHit]>,
8904    short_extend: i64,
8905    mid_extend: i64,
8906    as_agent: Option<&str>,
8907    budget_tokens: Option<usize>,
8908    scoring: &crate::config::ResolvedScoring,
8909    include_archived: bool,
8910    source_uri_prefix: Option<&str>,
8911) -> Result<(
8912    Vec<(Memory, f64)>,
8913    BudgetOutcome,
8914    crate::models::RecallTelemetry,
8915)> {
8916    // Stage 1 — query preparation (FTS sanitisation, namespace
8917    // hierarchy expansion, visibility prefixes, SQL fragments).
8918    let prep = prepare_hybrid_query(
8919        context,
8920        namespace,
8921        as_agent,
8922        include_archived,
8923        source_uri_prefix,
8924    );
8925
8926    // Stage 2 — FTS5 keyword phase.
8927    let fts_results = fts_keyword_phase(conn, &prep, tags_filter, since, until, limit)?;
8928
8929    // Fusion pool (id → (memory, fts_score, cosine_score)). FTS rows
8930    // land first so their inline-fetched embedding-cosine wins; the
8931    // semantic phase only inserts ids it hasn't seen.
8932    //
8933    // PERF-6 (med/low review batch) — pre-size the map so we avoid the
8934    // 4-realloc growth path (4 → 8 → 16 → 32) on every recall. Upper
8935    // bound is fts_results.len() (already in scope) + the upcoming
8936    // semantic phase's `ann_limit = max(limit*5, 50)`; the slight
8937    // over-allocation is dwarfed by the saved zeroing + rehashing cost
8938    // at default `limit=10` where the natural growth path would have
8939    // run through ~3 reallocations.
8940    let scored_cap = fts_results
8941        .len()
8942        .saturating_add(limit.saturating_mul(5).max(50));
8943    let mut scored: HashMap<String, (Memory, f64, f64)> = HashMap::with_capacity(scored_cap);
8944    let mut max_fts_score: f64 = 1.0;
8945    let mut fts_candidates_count: usize = 0;
8946    // v0.7.0 H7 — accumulates stored embeddings whose dimensionality
8947    // disagrees with the active model's `query_embedding` across BOTH the
8948    // FTS branch (here) and the semantic linear-scan branch (below).
8949    let mut dim_mismatch_count: usize = 0;
8950    for (mem, fts_score, embedding_bytes) in fts_results {
8951        if fts_score > max_fts_score {
8952            max_fts_score = fts_score;
8953        }
8954        // Cluster-F PERF-2 — cosine from the inline-fetched embedding
8955        // bytes. Malformed BLOBs degrade to cosine=0 + warn-log so a
8956        // single corrupt row does not poison the whole recall.
8957        let cosine = match embedding_bytes {
8958            Some(bytes) if !bytes.is_empty() => {
8959                match crate::embeddings::decode_embedding_blob(&bytes) {
8960                    Ok(emb) => match crate::embeddings::Embedder::cosine_similarity_checked(
8961                        query_embedding,
8962                        &emb,
8963                    ) {
8964                        crate::embeddings::CosineComparison::Comparable(c) => f64::from(c),
8965                        crate::embeddings::CosineComparison::DimensionMismatch { .. } => {
8966                            // v0.7.0 H7 — embedder-model switch: count the
8967                            // stale-dimension row instead of letting it score a
8968                            // silent 0.0 cosine. FTS keyword score still applies.
8969                            dim_mismatch_count += 1;
8970                            0.0
8971                        }
8972                    },
8973                    Err(_) => {
8974                        tracing::warn!(
8975                            memory_id = %mem.id,
8976                            "skipping malformed embedding BLOB during hybrid recall (FTS branch)"
8977                        );
8978                        0.0
8979                    }
8980                }
8981            }
8982            _ => 0.0,
8983        };
8984        scored.insert(mem.id.clone(), (mem, fts_score, cosine));
8985        fts_candidates_count += 1;
8986    }
8987
8988    // Stage 3 — semantic phase (HNSW when available, linear-scan
8989    // fallback). When `precomputed_hnsw_hits` is supplied the search
8990    // step is skipped (already paid outside the DB lock); otherwise
8991    // the in-pipeline `idx.search()` runs as before.
8992    let hnsw_candidates_count = semantic_phase(
8993        conn,
8994        &prep,
8995        query_embedding,
8996        vector_index,
8997        precomputed_hnsw_hits,
8998        namespace,
8999        tags_filter,
9000        since,
9001        until,
9002        limit,
9003        include_archived,
9004        source_uri_prefix,
9005        &mut scored,
9006        &mut dim_mismatch_count,
9007    )?;
9008
9009    // v0.7.0 H7 — de-silence embedder-model switches. A non-zero count means
9010    // stored embeddings were produced by a different model (different
9011    // dimensionality) than the active embedder, so their semantic signal was
9012    // forced to 0.0 for this query. One aggregated warn per recall (not per
9013    // row) tells the operator the affected rows need re-embedding.
9014    if dim_mismatch_count > 0 {
9015        tracing::warn!(
9016            dim_mismatch_count,
9017            active_query_dim = query_embedding.len(),
9018            "recall skipped {dim_mismatch_count} stored embedding(s) with mismatched \
9019             dimensionality — the embedder model appears to have changed; re-embed the \
9020             affected memories to restore their semantic recall signal"
9021        );
9022    }
9023
9024    // Stage 4 — adaptive blend + per-tier decay.
9025    let (results, blend_weights) = blend_and_rank(scored, max_fts_score, scoring, limit);
9026
9027    // Stage 5 — proximity boost + token budget + batched touch.
9028    let (budgeted, outcome) = apply_recall_post_ops(
9029        conn,
9030        results,
9031        prep.hierarchy_active,
9032        namespace,
9033        budget_tokens,
9034        short_extend,
9035        mid_extend,
9036    );
9037
9038    // Stage 6 — telemetry assembly.
9039    let telemetry = assemble_recall_telemetry(
9040        fts_candidates_count,
9041        hnsw_candidates_count,
9042        &blend_weights,
9043        dim_mismatch_count,
9044    );
9045
9046    Ok((budgeted, outcome, telemetry))
9047}
9048
9049/// Checkpoint WAL for clean shutdown.
9050pub fn checkpoint(conn: &Connection) -> Result<()> {
9051    conn.pragma_update(None, "wal_checkpoint", "TRUNCATE")?;
9052    Ok(())
9053}
9054
9055// ---------------------------------------------------------------------------
9056// Phase 3 foundation (issue #224) — sync_state helpers.
9057//
9058// These are additive: they do not change how the existing `ai-memory sync`
9059// command behaves in v0.6.0 GA. They exist so HTTP sync endpoints and the
9060// CRDT-lite merge follow-up can durably track "last updated_at seen from
9061// peer X" per local agent.
9062// ---------------------------------------------------------------------------
9063
9064/// Record the latest `updated_at` this local agent has observed from `peer_id`.
9065/// Monotonic by timestamp — older writes do not overwrite newer ones.
9066/// Lazily creates the row on first observation.
9067pub fn sync_state_observe(
9068    conn: &Connection,
9069    agent_id: &str,
9070    peer_id: &str,
9071    seen_at: &str,
9072) -> Result<()> {
9073    let now = Utc::now().to_rfc3339();
9074    conn.execute(
9075        "INSERT INTO sync_state (agent_id, peer_id, last_seen_at, last_pulled_at) \
9076         VALUES (?1, ?2, ?3, ?4) \
9077         ON CONFLICT(agent_id, peer_id) DO UPDATE SET \
9078            last_seen_at = CASE WHEN excluded.last_seen_at > last_seen_at \
9079                                THEN excluded.last_seen_at \
9080                                ELSE last_seen_at END, \
9081            last_pulled_at = excluded.last_pulled_at",
9082        params![agent_id, peer_id, seen_at, now],
9083    )?;
9084    Ok(())
9085}
9086
9087/// Load the full vector clock for `agent_id` — the set of
9088/// (`peer_id` -> `last_seen_at`) this local agent tracks.
9089pub fn sync_state_load(conn: &Connection, agent_id: &str) -> Result<crate::models::VectorClock> {
9090    let mut stmt =
9091        conn.prepare("SELECT peer_id, last_seen_at FROM sync_state WHERE agent_id = ?1")?;
9092    let rows = stmt.query_map(params![agent_id], |row| {
9093        Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
9094    })?;
9095    let mut clock = crate::models::VectorClock::default();
9096    for row in rows {
9097        let (peer, at) = row?;
9098        clock.entries.insert(peer, at);
9099    }
9100    Ok(clock)
9101}
9102
9103/// Look up this peer's last-push watermark for `peer_id`. Returns `None`
9104/// if we've never successfully pushed to them (foundation-era rows also
9105/// return `None` because the column was added in schema v12).
9106#[must_use]
9107#[allow(dead_code)] // called via lib crate (daemon_runtime); bin sees it as unused
9108pub fn sync_state_last_pushed(conn: &Connection, agent_id: &str, peer_id: &str) -> Option<String> {
9109    conn.query_row(
9110        "SELECT last_pushed_at FROM sync_state WHERE agent_id = ?1 AND peer_id = ?2",
9111        params![agent_id, peer_id],
9112        |r| r.get::<_, Option<String>>(0),
9113    )
9114    .ok()
9115    .flatten()
9116}
9117
9118/// Record that local memories up to `updated_at = pushed_at` have been
9119/// accepted by `peer_id`. Creates the row if it doesn't exist; monotonic.
9120#[allow(dead_code)] // called via lib crate (daemon_runtime); bin sees it as unused
9121pub fn sync_state_record_push(
9122    conn: &Connection,
9123    agent_id: &str,
9124    peer_id: &str,
9125    pushed_at: &str,
9126) -> Result<()> {
9127    let now = Utc::now().to_rfc3339();
9128    conn.execute(
9129        "INSERT INTO sync_state (agent_id, peer_id, last_seen_at, last_pulled_at, last_pushed_at) \
9130         VALUES (?1, ?2, ?3, ?3, ?4) \
9131         ON CONFLICT(agent_id, peer_id) DO UPDATE SET \
9132            last_pushed_at = CASE \
9133                WHEN excluded.last_pushed_at IS NULL THEN last_pushed_at \
9134                WHEN last_pushed_at IS NULL THEN excluded.last_pushed_at \
9135                WHEN excluded.last_pushed_at > last_pushed_at THEN excluded.last_pushed_at \
9136                ELSE last_pushed_at END",
9137        params![agent_id, peer_id, now, pushed_at],
9138    )?;
9139    Ok(())
9140}
9141
9142/// Return memories whose `updated_at > since`, ordered by `updated_at`
9143/// ascending. Used by `GET /api/v1/sync/since` to stream incremental
9144/// updates to a peer. Caps at `limit` rows (caller-chosen pagination).
9145pub fn memories_updated_since(
9146    conn: &Connection,
9147    since: Option<&str>,
9148    limit: usize,
9149) -> Result<Vec<Memory>> {
9150    // #1028 (HIGH, 2026-05-21) — REVERTED 2026-05-21 via QC pass-2.
9151    // The first-pass fix added a SAL-level
9152    // `COALESCE(scope, 'private') <> 'private'` filter here on the
9153    // grounds of "defense-in-depth". That was wrong: the federation
9154    // visibility gate (federation_legacy_row_visibility_978 + the
9155    // dispatch logic in src/federation/) is a RICHER contract than
9156    // pure scope=private — it handles owner-signed-private projection
9157    // back to the owner peer, inbox-target private projection, and
9158    // federation_share opt-in on legacy rows. The SAL-level filter
9159    // bypassed those branches and broke 5 federation tests. The
9160    // visibility gate runs DOWNSTREAM of this method and already
9161    // refuses to project rows that shouldn't federate. The proper
9162    // fix would belong in the federation handler (or the visibility
9163    // gate audit) — tracked under follow-up rather than at the SAL.
9164    // #1476 — sargable split, mirrors src/store/postgres.rs. The former
9165    // `(?1 IS NULL OR updated_at > ?1)` predicate is non-sargable: SQLite
9166    // cannot use `idx_memories_updated_at` to satisfy an OR-NULL branch,
9167    // so it falls back to a full table scan. Splitting on `since` lets
9168    // the None path read in index order (no predicate) and the Some path
9169    // use the index as a range bound (`updated_at > ?1`), each with
9170    // early-stop under the LIMIT.
9171    const COLS: &str = "SELECT id, tier, namespace, title, content, tags, priority, confidence, \
9172                source, access_count, created_at, updated_at, last_accessed_at, \
9173                expires_at, metadata \
9174         FROM memories ";
9175    let rows = match since {
9176        None => {
9177            let mut stmt = conn.prepare(&format!("{COLS} ORDER BY updated_at ASC LIMIT ?1"))?;
9178            stmt.query_map(params![limit], row_to_memory)?
9179                .collect::<rusqlite::Result<Vec<_>>>()
9180        }
9181        Some(s) => {
9182            let mut stmt = conn.prepare(&format!(
9183                "{COLS} WHERE updated_at > ?1 ORDER BY updated_at ASC LIMIT ?2"
9184            ))?;
9185            stmt.query_map(params![s, limit], row_to_memory)?
9186                .collect::<rusqlite::Result<Vec<_>>>()
9187        }
9188    };
9189    rows.map_err(Into::into)
9190}
9191
9192/// Deep health check — verifies DB is accessible and FTS is functional.
9193pub fn health_check(conn: &Connection) -> Result<bool> {
9194    let _: i64 = conn.query_row("SELECT COUNT(*) FROM memories", [], |r| r.get(0))?;
9195    conn.execute(
9196        "INSERT INTO memories_fts(memories_fts) VALUES('integrity-check')",
9197        [],
9198    )?;
9199    Ok(true)
9200}
9201
9202// ---------------------------------------------------------------------------
9203// Namespace standards
9204// ---------------------------------------------------------------------------
9205
9206/// Set the standard memory for a namespace, with optional parent for rule layering.
9207pub fn set_namespace_standard(
9208    conn: &Connection,
9209    namespace: &str,
9210    standard_id: &str,
9211    parent: Option<&str>,
9212) -> Result<()> {
9213    // Verify the memory exists (but allow cross-namespace — shared policy)
9214    let _mem = get(conn, standard_id)?.ok_or_else(|| {
9215        // #962 typed envelope — 404 NOT_FOUND.
9216        anyhow::Error::new(StorageError::MemoryNotFound {
9217            id: standard_id.to_string(),
9218            role: None,
9219        })
9220    })?;
9221    // Resolve parent: explicit > auto-detect by `-` prefix > none
9222    let resolved_parent = match parent {
9223        Some(p) => {
9224            if p == namespace {
9225                // #962 typed envelope.
9226                return Err(anyhow::Error::new(StorageError::InvalidArgument {
9227                    reason: "namespace cannot be its own parent".to_string(),
9228                }));
9229            }
9230            Some(p.to_string())
9231        }
9232        None => auto_detect_parent(conn, namespace),
9233    };
9234    let now = chrono::Utc::now().to_rfc3339();
9235    conn.execute(
9236        "INSERT INTO namespace_meta (namespace, standard_id, updated_at, parent_namespace)
9237         VALUES (?1, ?2, ?3, ?4)
9238         ON CONFLICT(namespace) DO UPDATE SET standard_id = ?2, updated_at = ?3, parent_namespace = ?4",
9239        params![namespace, standard_id, now, resolved_parent],
9240    )?;
9241    Ok(())
9242}
9243
9244/// Auto-detect parent namespace by `-` prefix.
9245/// "ai-memory-tests" → checks "ai-memory" → checks "ai" → first match wins.
9246fn auto_detect_parent(conn: &Connection, namespace: &str) -> Option<String> {
9247    let mut candidate = namespace.to_string();
9248    while let Some(pos) = candidate.rfind('-') {
9249        candidate.truncate(pos);
9250        if candidate.is_empty() {
9251            break;
9252        }
9253        // Check if this candidate has a standard set
9254        if get_namespace_standard(conn, &candidate)
9255            .ok()
9256            .flatten()
9257            .is_some()
9258        {
9259            return Some(candidate);
9260        }
9261    }
9262    None
9263}
9264
9265/// Get the standard memory ID for a namespace.
9266#[allow(clippy::unnecessary_wraps)]
9267pub fn get_namespace_standard(conn: &Connection, namespace: &str) -> Result<Option<String>> {
9268    let result = conn
9269        .query_row(
9270            "SELECT standard_id FROM namespace_meta WHERE namespace = ?1",
9271            params![namespace],
9272            |r| r.get(0),
9273        )
9274        .ok();
9275    Ok(result)
9276}
9277
9278/// Get the parent namespace for a given namespace.
9279pub fn get_namespace_parent(conn: &Connection, namespace: &str) -> Option<String> {
9280    conn.query_row(
9281        "SELECT parent_namespace FROM namespace_meta WHERE namespace = ?1 AND parent_namespace IS NOT NULL",
9282        params![namespace],
9283        |r| r.get(0),
9284    )
9285    .ok()
9286}
9287
9288/// v0.6.2 (S35): read the full `namespace_meta` row for a namespace so the
9289/// caller can fan it out to peers. Returns `None` when no standard is set.
9290/// Mirrors the (`namespace`, `standard_id`, `parent_namespace`, `updated_at`)
9291/// tuple used by `set_namespace_standard`.
9292#[allow(clippy::unnecessary_wraps)]
9293pub fn get_namespace_meta_entry(
9294    conn: &Connection,
9295    namespace: &str,
9296) -> Result<Option<crate::models::NamespaceMetaEntry>> {
9297    let row = conn
9298        .query_row(
9299            "SELECT namespace, standard_id, parent_namespace, updated_at
9300             FROM namespace_meta WHERE namespace = ?1",
9301            params![namespace],
9302            |r| {
9303                Ok(crate::models::NamespaceMetaEntry {
9304                    namespace: r.get(0)?,
9305                    standard_id: r.get(1)?,
9306                    parent_namespace: r.get(2)?,
9307                    updated_at: r.get::<_, Option<String>>(3)?.unwrap_or_default(),
9308                })
9309            },
9310        )
9311        .ok();
9312    Ok(row)
9313}
9314
9315/// Clear the standard for a namespace.
9316pub fn clear_namespace_standard(conn: &Connection, namespace: &str) -> Result<bool> {
9317    let changed = conn.execute(
9318        "DELETE FROM namespace_meta WHERE namespace = ?1",
9319        params![namespace],
9320    )?;
9321    Ok(changed > 0)
9322}
9323
9324// ---------------------------------------------------------------------------
9325// Task 1.9 — governance enforcement + pending_actions CRUD
9326// ---------------------------------------------------------------------------
9327
9328/// Build the namespace inheritance chain in **top-down** order
9329/// (`["*", root, ..., leaf]`). Mirrors and replaces the historical
9330/// `mcp::build_namespace_chain` so non-MCP call sites (db-layer
9331/// governance enforcement, HTTP handlers, future hook pipelines) can
9332/// reuse the same walk.
9333///
9334/// Properties (preserved from the prior MCP-only implementation):
9335/// - cycle-safe (visited set + bounded by `MAX_EXPLICIT_DEPTH = 8`)
9336/// - includes the global standard `*` as the most-general entry
9337/// - prepends explicit `namespace_meta.parent_namespace` ancestors
9338///   before the `/`-derived hierarchy, supporting flat→hierarchical
9339///   linking (e.g. legacy `ai-memory` → `ai-memory-mcp`)
9340///
9341/// The MCP layer's display path consumes this top-down. The governance
9342/// resolver in [`resolve_governance_policy`] reverses it for a
9343/// leaf-first walk (most-specific wins).
9344#[must_use]
9345pub fn build_namespace_chain(conn: &Connection, namespace: &str) -> Vec<String> {
9346    const MAX_EXPLICIT_DEPTH: usize = 8;
9347    let mut chain: Vec<String> = Vec::new();
9348
9349    if namespace == "*" {
9350        chain.push("*".to_string());
9351        return chain;
9352    }
9353
9354    // Always start with the global standard — most general.
9355    chain.push("*".to_string());
9356
9357    // 1. /-derived ancestors. `namespace_ancestors` returns most-specific-first;
9358    //    reverse for top-down (root ancestor first, then namespace itself last).
9359    let mut hierarchy_chain: Vec<String> = crate::models::namespace_ancestors(namespace)
9360        .into_iter()
9361        .rev()
9362        .collect();
9363
9364    // 2. If the ROOTmost of the /-chain has an explicit `namespace_meta` parent,
9365    //    prepend that chain (bounded by MAX_EXPLICIT_DEPTH + cycle-safe).
9366    //    Supports legacy flat namespaces (e.g. `ai-memory` → `ai-memory-mcp`).
9367    if let Some(root) = hierarchy_chain.first().cloned() {
9368        let mut explicit_above: Vec<String> = Vec::new();
9369        let mut current = root;
9370        for _ in 0..MAX_EXPLICIT_DEPTH {
9371            match get_namespace_parent(conn, &current) {
9372                Some(p)
9373                    if p != "*"
9374                        && !explicit_above.contains(&p)
9375                        && !hierarchy_chain.contains(&p) =>
9376                {
9377                    explicit_above.push(p.clone());
9378                    current = p;
9379                }
9380                _ => break,
9381            }
9382        }
9383        // `explicit_above` is [immediate-explicit-parent, grandparent, ...];
9384        // reverse to prepend in top-down order.
9385        for p in explicit_above.into_iter().rev() {
9386            chain.push(p);
9387        }
9388    }
9389
9390    // 3. Append the /-derived chain (top-down).
9391    for entry in hierarchy_chain.drain(..) {
9392        if !chain.contains(&entry) {
9393            chain.push(entry);
9394        }
9395    }
9396
9397    chain
9398}
9399
9400/// Read the explicit governance policy attached to a single namespace's
9401/// standard memory. Does **not** walk the inheritance chain — callers
9402/// that want hierarchical resolution should use
9403/// [`resolve_governance_policy`] instead.
9404///
9405/// **NHI-P4-T19 (v0.7.0 NHI testing):** returns `None` when the
9406/// standard carries no explicit `metadata.governance`. Operators who
9407/// want enforcement-by-default can either (a) write
9408/// `metadata.governance = {"write": "owner", ...}` into their standard
9409/// memory, or (b) use the
9410/// [`crate::models::GovernancePolicy::default_for_managed_namespace`]
9411/// helper as a starting template. Changing the implicit fallback to
9412/// Owner is deferred to v0.7.1 because it can break inheritance chains
9413/// where a parent's standard was registered under a distinct agent
9414/// identity from descendant operations.
9415fn read_namespace_policy(conn: &Connection, namespace: &str) -> Option<GovernancePolicy> {
9416    let standard_id = get_namespace_standard(conn, namespace).ok()??;
9417    let mem = get(conn, &standard_id).ok()??;
9418    match GovernancePolicy::from_metadata(&mem.metadata) {
9419        Some(Ok(p)) => Some(p),
9420        // #1384 — observability for stored-corruption. The write path
9421        // (`memory_namespace_set_standard` → typed `GovernancePolicy`
9422        // deserialise) rejects unknown enum variants and malformed
9423        // structures (verified live against alice: `write: "approval"`
9424        // returns a typed 400 error). A parse error here therefore
9425        // means the stored JSON drifted out-of-band: direct SQL update,
9426        // migration corruption, older binary writing newer schema,
9427        // etc. Pre-#1384 this arm silently returned `None` and the
9428        // inheritance walk continued to the parent — which may be
9429        // totally permissive, silently downgrading the operator's
9430        // intent. Surface the drift via tracing WARN so operators
9431        // can grep `ai_memory::governance::policy_read` for the lag.
9432        // We still return `None` (don't fail-CLOSED at the read site
9433        // — that could lock callers out of unrelated namespaces) but
9434        // operators now have a structured signal to investigate.
9435        Some(Err(parse_err)) => {
9436            tracing::warn!(
9437                target: "ai_memory::governance::policy_read",
9438                namespace = %namespace,
9439                standard_id = %standard_id,
9440                error = %parse_err,
9441                "stored metadata.governance failed typed deserialise — \
9442                 inheritance walk will continue past this namespace as \
9443                 if no policy were set. Likely cause: direct SQL update, \
9444                 older binary, or corrupted migration. Operator should \
9445                 re-run `memory_namespace_set_standard` to restore the \
9446                 typed shape."
9447            );
9448            None
9449        }
9450        None => None,
9451    }
9452}
9453
9454/// Resolve the governance policy that gates actions in `namespace`.
9455///
9456/// v0.6.3.1 (P4, audit G1): walks the inheritance chain leaf-first and
9457/// returns the most-specific policy. This closes the audit's
9458/// highest-severity finding — prior to this fix the resolver consulted
9459/// only the leaf, which left children of governed parents (e.g.
9460/// `alphaone/secure/team-a` under an `Approve` policy at
9461/// `alphaone/secure`) **completely ungoverned** despite the
9462/// architecture page T2 promising "Hierarchical policy inheritance
9463/// (default at `org/`, overridable at `org/team/`)".
9464///
9465/// **Walk semantics** (carefully — easy to get subtly wrong):
9466///   1. Build the chain via [`build_namespace_chain`] (top-down) and
9467///      reverse it so we walk leaf → root. The leaf is the namespace
9468///      we were asked about; the root is the global `*` standard.
9469///   2. At each level `k`, look up the policy attached to that
9470///      namespace's standard memory.
9471///      - If a policy **exists**, it is the most-specific match seen
9472///        so far. Return it immediately. ("Most specific wins.")
9473///      - If a policy **also says `inherit: false`**, this is already
9474///        the same return path — we never reach the parent because
9475///        we already returned.
9476///   3. If level `k` has **no policy at all**, keep walking — this is
9477///      the implicit-inherit branch (no policy means "I don't override
9478///      my parent").
9479///   4. If we walk off the top of the chain without finding a policy,
9480///      return `None` (enforcement remains opt-in for namespaces with
9481///      no governance configured anywhere in the chain).
9482///
9483/// **Where does `inherit: false` actually do work?** When the most-
9484/// specific policy we hit on the walk has `inherit: false`. That
9485/// policy is returned (same return point as the inherit=true case),
9486/// so its rules govern the action; the false flag is what
9487/// **conceptually stops** the walk above it, but the implementation
9488/// stops the walk simply by virtue of having found a policy. The flag
9489/// matters most as a documented contract surfaced to operators: "a
9490/// policy here authoritatively replaces, not extends, what's above."
9491/// The flag also flows through the queued-pending-action approver
9492/// resolution so consensus/agent rules don't accidentally re-walk to
9493/// a parent.
9494///
9495/// Cycle-safety is inherited from `build_namespace_chain`
9496/// (`MAX_EXPLICIT_DEPTH = 8` + visited set). No new cache is
9497/// introduced — profile-driven optimization is a v0.7 item.
9498pub fn resolve_governance_policy(conn: &Connection, namespace: &str) -> Option<GovernancePolicy> {
9499    // build_namespace_chain returns top-down (`["*", root, ..., leaf]`).
9500    // Governance resolution wants leaf-first (most specific first), so
9501    // we reverse before walking.
9502    let chain = build_namespace_chain(conn, namespace);
9503    for level in chain.into_iter().rev() {
9504        // Most-specific match wins. Returning immediately here means
9505        // an explicit policy at the leaf (or any descendant level
9506        // with a policy) authoritatively overrides anything above —
9507        // which is precisely the inherit=false semantic, applied
9508        // implicitly. The inherit=false flag is preserved on the
9509        // returned policy so callers (e.g. the pending_action
9510        // approver resolver) don't accidentally re-walk to a parent.
9511        if let Some(policy) = read_namespace_policy(conn, &level) {
9512            return Some(policy);
9513        }
9514        // Implicit branch: no policy at this level → keep walking
9515        // toward the root. This is the "default inherit" behavior
9516        // that closes G1.
9517    }
9518    None
9519}
9520
9521/// v0.7.0 L1-8 — read `governance.require_approval_above_depth` from the
9522/// namespace's most-specific governance metadata blob, leaf-first.
9523///
9524/// This is intentionally a free function (not a field on
9525/// [`GovernancePolicy`]) to avoid introducing a new required struct field
9526/// that would need updating at every `GovernancePolicy { … }` literal
9527/// in the codebase. The existing `GovernancePolicy` struct represents
9528/// the resolved enforcement policy; this field is a pre-write interception
9529/// threshold that lives beside it, not inside it.
9530///
9531/// Returns `None` when:
9532/// - no namespace standard is configured at any level of the chain, OR
9533/// - the standard's `metadata.governance` blob is absent or null, OR
9534/// - the blob does not contain a `require_approval_above_depth` key, OR
9535/// - the key is present but `null`.
9536///
9537/// Returns `Some(threshold)` when the key is a non-null unsigned integer.
9538/// Callers in `memory_reflect` compare `proposed_depth > threshold` and
9539/// queue a `pending_actions` row when the condition is true.
9540pub fn resolve_require_approval_above_depth(conn: &Connection, namespace: &str) -> Option<u32> {
9541    let chain = build_namespace_chain(conn, namespace);
9542    for level in chain.into_iter().rev() {
9543        let standard_id = match get_namespace_standard(conn, &level) {
9544            Ok(Some(id)) => id,
9545            _ => continue,
9546        };
9547        let mem = match get(conn, &standard_id) {
9548            Ok(Some(m)) => m,
9549            _ => continue,
9550        };
9551        // Governance blob must exist and not be null.
9552        let gov = match mem.metadata.get(crate::META_KEY_GOVERNANCE) {
9553            Some(g) if !g.is_null() => g,
9554            _ => continue,
9555        };
9556        // The field is optional inside the blob — `None` means skip this
9557        // level and keep walking (inherit semantics: an ancestor that sets
9558        // the field governs if the leaf does not override it).
9559        if let Some(threshold) = gov.get("require_approval_above_depth") {
9560            if let Some(n) = threshold.as_u64() {
9561                // QUAL-3 (FX-5): operator-controlled metadata. Reject the
9562                // silent `n as u32` truncation that would let an operator
9563                // who sets `require_approval_above_depth = 2^32` (which
9564                // would silently land as 0) DISABLE the approval gate
9565                // entirely (depth > 0 was the original intent, but
9566                // `low_32(2^32) == 0` makes `depth > 0` the actual gate;
9567                // any value ≥ 2^32 whose low-32 bits are also high turns
9568                // off the gate). Fail-CLOSED on overflow: saturate to 0
9569                // so EVERY depth triggers approval — this is the
9570                // conservative posture per CLAUDE.md K3/K9 governance
9571                // discipline. The companion regression test at
9572                // `tests/governance_metadata_no_silent_truncation.rs`
9573                // pins this behaviour.
9574                return Some(u32::try_from(n).unwrap_or(0));
9575            }
9576            // Key present but null → no gate at this level; keep walking.
9577        }
9578        // Policy found at this level but no require_approval_above_depth
9579        // key → no gate; stop walking (same leaf-first-wins semantics as
9580        // the main resolve_governance_policy walker: a leaf policy that
9581        // doesn't set the field takes precedence over a parent that does).
9582        if GovernancePolicy::from_metadata(&mem.metadata).is_some() {
9583            return None;
9584        }
9585    }
9586    None
9587}
9588
9589/// v0.7.0 L2-6 — read `governance.skill_promotion_min_depth` from the
9590/// namespace's most-specific governance metadata blob, leaf-first.
9591///
9592/// Mirrors [`resolve_require_approval_above_depth`] in shape and walk
9593/// semantics: it's a free function (not a [`GovernancePolicy`] field)
9594/// so it can land without churning every `GovernancePolicy { … }`
9595/// literal in the codebase, and it's a per-namespace threshold rather
9596/// than part of the resolved enforcement policy.
9597///
9598/// Returns `None` when:
9599/// - no namespace standard is configured at any level of the chain, OR
9600/// - the standard's `metadata.governance` blob is absent or null, OR
9601/// - the blob does not contain a `skill_promotion_min_depth` key, OR
9602/// - the key is present but `null`.
9603///
9604/// Returns `Some(threshold)` when the key is a non-null unsigned integer.
9605/// The `memory_skill_promote_from_reflection` MCP tool falls back to the
9606/// compiled-in default of `1` when this returns `None` — a reflection
9607/// must have at least one level of synthesised insight (depth ≥ 1)
9608/// before it can be promoted to a reusable skill.
9609pub fn resolve_skill_promotion_min_depth(conn: &Connection, namespace: &str) -> Option<u32> {
9610    let chain = build_namespace_chain(conn, namespace);
9611    for level in chain.into_iter().rev() {
9612        let standard_id = match get_namespace_standard(conn, &level) {
9613            Ok(Some(id)) => id,
9614            _ => continue,
9615        };
9616        let mem = match get(conn, &standard_id) {
9617            Ok(Some(m)) => m,
9618            _ => continue,
9619        };
9620        let gov = match mem.metadata.get(crate::META_KEY_GOVERNANCE) {
9621            Some(g) if !g.is_null() => g,
9622            _ => continue,
9623        };
9624        if let Some(threshold) = gov.get("skill_promotion_min_depth") {
9625            if let Some(n) = threshold.as_u64() {
9626                // QUAL-3 (FX-5): operator-controlled metadata. Reject the
9627                // silent `n as u32` truncation that would let an operator
9628                // who sets `skill_promotion_min_depth = 2^32 + k` silently
9629                // land as `k` after truncation — including the
9630                // catastrophic `k == 0` case which would mean "every
9631                // reflection can be promoted to a skill regardless of
9632                // depth". Fail-CLOSED on overflow: saturate to `u32::MAX`
9633                // so NO reflection can be promoted (the
9634                // `actual_depth_u32 < min_depth` check at
9635                // `src/mcp/tools/skill_promote.rs:174` becomes
9636                // permanently true). The companion regression test at
9637                // `tests/governance_metadata_no_silent_truncation.rs`
9638                // pins this behaviour.
9639                return Some(u32::try_from(n).unwrap_or(u32::MAX));
9640            }
9641            // Key present but null → no override at this level; keep walking.
9642        }
9643        // Policy found at this level but no skill_promotion_min_depth
9644        // key → no override; stop walking (leaf-first-wins semantics).
9645        if GovernancePolicy::from_metadata(&mem.metadata).is_some() {
9646            return None;
9647        }
9648    }
9649    None
9650}
9651
9652/// Return true if `agent_id` matches a registered agent in `_agents`.
9653pub fn is_registered_agent(conn: &Connection, agent_id: &str) -> bool {
9654    let title = crate::models::agent_registration_title(agent_id);
9655    conn.query_row(
9656        "SELECT 1 FROM memories WHERE namespace = ?1 AND title = ?2",
9657        params![AGENTS_NAMESPACE, &title],
9658        |r| r.get::<_, i64>(0),
9659    )
9660    .is_ok()
9661}
9662
9663/// Evaluate a governance level against caller context.
9664/// - `action`: the [`GovernedAction`] under evaluation; threaded into the
9665///   [`crate::governance::GovernanceRefusal`] envelope so refusal Display
9666///   includes the action verb without the caller having to wrap.
9667/// - `namespace`: target namespace; attached to the refusal envelope.
9668/// - `memory_owner`: the existing memory's `metadata.agent_id` (delete/promote paths).
9669///   Pass `None` for store operations.
9670/// - `namespace_owner`: the `metadata.agent_id` of the namespace's standard memory,
9671///   used as the "owner" for store operations. Resolved once by the caller.
9672///
9673/// #963 Phase 2 — `Deny` returns a typed
9674/// [`crate::governance::GovernanceRefusal`]. The `reason` field carries
9675/// the human-readable phrase WITHOUT the `"governance: "` prefix (the
9676/// envelope's `Display` adds the `"<action> denied by governance: "`
9677/// header). Pre-#963 the same path produced
9678/// `Deny(format!("governance: ..."))` which doubled the prefix when
9679/// consumers re-wrapped via `deny_message`.
9680fn evaluate_level(
9681    conn: &Connection,
9682    action: GovernedAction,
9683    namespace: &str,
9684    level: &GovernanceLevel,
9685    agent_id: &str,
9686    memory_owner: Option<&str>,
9687    namespace_owner: Option<&str>,
9688) -> GovernanceDecision {
9689    use crate::governance::GovernanceRefusal;
9690    match level {
9691        GovernanceLevel::Any => GovernanceDecision::Allow,
9692        GovernanceLevel::Registered => {
9693            if is_registered_agent(conn, agent_id) {
9694                GovernanceDecision::Allow
9695            } else {
9696                GovernanceDecision::Deny(
9697                    GovernanceRefusal::new(
9698                        action,
9699                        GovernanceLevel::Registered,
9700                        agent_id,
9701                        format!("caller '{agent_id}' is not a registered agent"),
9702                    )
9703                    .with_namespace(namespace),
9704                )
9705            }
9706        }
9707        GovernanceLevel::Owner => {
9708            let owner = memory_owner.or(namespace_owner);
9709            match owner {
9710                Some(o) if o == agent_id => GovernanceDecision::Allow,
9711                Some(o) => GovernanceDecision::Deny(
9712                    GovernanceRefusal::new(
9713                        action,
9714                        GovernanceLevel::Owner,
9715                        agent_id,
9716                        format!("caller '{agent_id}' is not the owner ('{o}')"),
9717                    )
9718                    .with_namespace(namespace)
9719                    .with_owner(o),
9720                ),
9721                None => GovernanceDecision::Deny(
9722                    GovernanceRefusal::new(
9723                        action,
9724                        GovernanceLevel::Owner,
9725                        agent_id,
9726                        "owner-level action has no resolvable owner",
9727                    )
9728                    .with_namespace(namespace),
9729                ),
9730            }
9731        }
9732        GovernanceLevel::Approve => {
9733            // Caller translates this into a queued pending_action — the enforcement
9734            // helpers below own the queueing so the db layer is the single source
9735            // of truth for pending ids.
9736            GovernanceDecision::Pending(String::new())
9737        }
9738    }
9739}
9740
9741/// Resolve the namespace-owner (`metadata.agent_id` of the namespace's
9742/// standard memory) used for `Owner`-level store checks.
9743///
9744/// **F1 (v0.7.0 round-2-fixes):** the lookup now walks the inheritance
9745/// chain leaf-first via [`build_namespace_chain`], returning the
9746/// `agent_id` of the first standard memory found. This mirrors
9747/// [`resolve_governance_policy`]'s semantics so that when a deep child
9748/// inherits a parent's `governance.write = owner` policy, the owner
9749/// check resolves to the parent's standard owner — matching operator
9750/// intuition that the helper means "owner of the effective policy at
9751/// this namespace".
9752///
9753/// Without this walk, deep children with no standard of their own
9754/// triggered `governance: owner-level action has no resolvable owner`
9755/// despite the parent's policy being correctly inherited.
9756fn namespace_owner(conn: &Connection, namespace: &str) -> Option<String> {
9757    // build_namespace_chain returns top-down (`["*", root, ..., leaf]`).
9758    // We want leaf-first so the most-specific owner wins, matching how
9759    // resolve_governance_policy picks up the most-specific policy.
9760    let chain = build_namespace_chain(conn, namespace);
9761    for level in chain.into_iter().rev() {
9762        let Some(standard_id) = get_namespace_standard(conn, &level).ok().flatten() else {
9763            continue;
9764        };
9765        let Some(mem) = get(conn, &standard_id).ok().flatten() else {
9766            continue;
9767        };
9768        if let Some(owner) = mem
9769            .metadata
9770            .get("agent_id")
9771            .and_then(|v| v.as_str())
9772            .map(str::to_string)
9773        {
9774            return Some(owner);
9775        }
9776    }
9777    None
9778}
9779
9780/// Enforce governance for a `GovernedAction`. On [`GovernanceDecision::Pending`],
9781/// a row is inserted into `pending_actions` and the returned `pending_id` is
9782/// embedded in the decision.
9783///
9784/// v0.7.0 K3 — the gate now consults
9785/// [`crate::config::active_permissions_mode`] and branches on the
9786/// active [`crate::config::PermissionsMode`]:
9787///
9788/// - [`PermissionsMode::Off`]: skip the gate entirely. Returns `Allow`
9789///   without touching `resolve_governance_policy` or `pending_actions`.
9790/// - [`PermissionsMode::Advisory`]: resolve the policy, log any
9791///   would-be `Deny`/`Pending` outcome at `WARN`, then return `Allow`.
9792///   No `pending_actions` row is queued. This is the v0.7.0 default —
9793///   it preserves the v0.6.x posture for upgrading operators where
9794///   governance metadata was advertised but the wider permission
9795///   system was honest-disclosed as advisory.
9796/// - [`PermissionsMode::Enforce`]: the historical strict path.
9797///   `Deny`/`Pending` decisions surface verbatim and the
9798///   `pending_actions` row is queued. Audit-ready posture; opt in via
9799///   `[permissions] mode = "enforce"` in `config.toml`.
9800///
9801/// Every consult increments the per-mode counter exposed via
9802/// [`crate::config::permissions_decision_counts`] so doctor +
9803/// capabilities can surface gate activity.
9804///
9805/// [`PermissionsMode`]: crate::config::PermissionsMode
9806pub fn enforce_governance(
9807    conn: &Connection,
9808    action: GovernedAction,
9809    namespace: &str,
9810    agent_id: &str,
9811    memory_id: Option<&str>,
9812    memory_owner: Option<&str>,
9813    payload: &serde_json::Value,
9814) -> Result<GovernanceDecision> {
9815    use crate::config::{PermissionsMode, active_permissions_mode, record_permissions_decision};
9816
9817    let mode = active_permissions_mode();
9818    record_permissions_decision(mode);
9819
9820    // K3 — `Off` short-circuits before any policy lookup.
9821    if mode == PermissionsMode::Off {
9822        return Ok(GovernanceDecision::Allow);
9823    }
9824
9825    // Opt-in enforcement: namespaces without an explicit policy are unaffected.
9826    let Some(policy) = resolve_governance_policy(conn, namespace) else {
9827        return Ok(GovernanceDecision::Allow);
9828    };
9829    // #880 — `write`/`delete`/`promote` live on `policy.core` after
9830    // the governance decomposition.
9831    let level = match action {
9832        GovernedAction::Store => &policy.core.write,
9833        GovernedAction::Delete => &policy.core.delete,
9834        GovernedAction::Promote => &policy.core.promote,
9835        // v0.7.0 L1-8: Reflect is gated by the L1-8 approval mechanism
9836        // (`require_approval_above_depth`) in the MCP handler rather than
9837        // the standard `enforce_governance` pipeline. Map to `write`
9838        // as the conservative fallback so the arm compiles; in practice
9839        // no current callsite passes `GovernedAction::Reflect` here.
9840        GovernedAction::Reflect => &policy.core.write,
9841    };
9842    let ns_owner = if matches!(action, GovernedAction::Store) {
9843        namespace_owner(conn, namespace)
9844    } else {
9845        None
9846    };
9847
9848    let decision = evaluate_level(
9849        conn,
9850        action,
9851        namespace,
9852        level,
9853        agent_id,
9854        memory_owner,
9855        ns_owner.as_deref(),
9856    );
9857
9858    // K3 — `Advisory` logs the would-be outcome but does not block or
9859    // queue a pending row. The capabilities surface continues to
9860    // advertise `permissions.mode = "advisory"` so external integrators
9861    // see the consistent posture.
9862    if mode == PermissionsMode::Advisory {
9863        match &decision {
9864            GovernanceDecision::Allow => {}
9865            GovernanceDecision::Deny(refusal) => {
9866                tracing::warn!(
9867                    target: "ai_memory::governance",
9868                    namespace = %namespace,
9869                    agent_id = %agent_id,
9870                    action = ?action,
9871                    reason = %refusal.reason,
9872                    denied_level = %refusal.denied_level.as_str(),
9873                    "permissions.mode=advisory: would-deny suppressed (allowing)"
9874                );
9875            }
9876            GovernanceDecision::Pending(_) => {
9877                tracing::warn!(
9878                    target: "ai_memory::governance",
9879                    namespace = %namespace,
9880                    agent_id = %agent_id,
9881                    action = ?action,
9882                    "permissions.mode=advisory: would-queue-approval suppressed (allowing)"
9883                );
9884            }
9885        }
9886        return Ok(GovernanceDecision::Allow);
9887    }
9888
9889    // K3 — `Enforce`: the historical strict path. `Pending` queues a
9890    // `pending_actions` row and returns the canonical id.
9891    if let GovernanceDecision::Pending(_) = decision {
9892        let pending_id =
9893            queue_pending_action(conn, action, namespace, memory_id, agent_id, payload)?;
9894        return Ok(GovernanceDecision::Pending(pending_id));
9895    }
9896    Ok(decision)
9897}
9898
9899/// Insert a `pending_actions` row and return its id.
9900pub fn queue_pending_action(
9901    conn: &Connection,
9902    action: GovernedAction,
9903    namespace: &str,
9904    memory_id: Option<&str>,
9905    requested_by: &str,
9906    payload: &serde_json::Value,
9907) -> Result<String> {
9908    let id = uuid::Uuid::new_v4().to_string();
9909    let now = Utc::now().to_rfc3339();
9910    let payload_json = serde_json::to_string(payload)?;
9911    conn.execute(
9912        "INSERT INTO pending_actions (id, action_type, memory_id, namespace, payload, requested_by, requested_at, status)
9913         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, 'pending')",
9914        params![
9915            id,
9916            action.as_str(),
9917            memory_id,
9918            namespace,
9919            payload_json,
9920            requested_by,
9921            now,
9922        ],
9923    )?;
9924    Ok(id)
9925}
9926
9927/// v0.6.2 (S34): upsert a `pending_actions` row from a canonical `PendingAction`
9928/// struct — used by `sync_push` to apply a peer-originated pending row so
9929/// governance state is cluster-consistent. Preserves `approvals` and
9930/// decision fields verbatim so re-plays converge. Uses `INSERT ... ON
9931/// CONFLICT(id) DO UPDATE` because the originator's id is stable across
9932/// peers (unlike `queue_pending_action` which mints a fresh UUID per
9933/// queue call).
9934pub fn upsert_pending_action(conn: &Connection, pa: &PendingAction) -> Result<()> {
9935    let payload_json = serde_json::to_string(&pa.payload)?;
9936    let approvals_json = serde_json::to_string(&pa.approvals)?;
9937    conn.execute(
9938        "INSERT INTO pending_actions
9939         (id, action_type, memory_id, namespace, payload, requested_by,
9940          requested_at, status, decided_by, decided_at, approvals)
9941         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)
9942         ON CONFLICT(id) DO UPDATE SET
9943            action_type  = excluded.action_type,
9944            memory_id    = excluded.memory_id,
9945            namespace    = excluded.namespace,
9946            payload      = excluded.payload,
9947            requested_by = excluded.requested_by,
9948            requested_at = excluded.requested_at,
9949            status       = excluded.status,
9950            decided_by   = excluded.decided_by,
9951            decided_at   = excluded.decided_at,
9952            approvals    = excluded.approvals",
9953        params![
9954            pa.id,
9955            pa.action_type,
9956            pa.memory_id,
9957            pa.namespace,
9958            payload_json,
9959            pa.requested_by,
9960            pa.requested_at,
9961            pa.status,
9962            pa.decided_by,
9963            pa.decided_at,
9964            approvals_json,
9965        ],
9966    )?;
9967    Ok(())
9968}
9969
9970pub fn list_pending_actions(
9971    conn: &Connection,
9972    status: Option<&str>,
9973    limit: usize,
9974) -> Result<Vec<PendingAction>> {
9975    let mut stmt = conn.prepare(
9976        "SELECT id, action_type, memory_id, namespace, payload, requested_by,
9977                requested_at, status, decided_by, decided_at, approvals
9978         FROM pending_actions
9979         WHERE (?1 IS NULL OR status = ?1)
9980         ORDER BY requested_at DESC
9981         LIMIT ?2",
9982    )?;
9983    let rows = stmt.query_map(params![status, limit], |row| {
9984        let payload_str: String = row.get(4)?;
9985        let payload: serde_json::Value =
9986            serde_json::from_str(&payload_str).unwrap_or(serde_json::Value::Null);
9987        let approvals_str: String = row.get(10)?;
9988        let approvals: Vec<Approval> = serde_json::from_str(&approvals_str).unwrap_or_default();
9989        Ok(PendingAction {
9990            id: row.get(0)?,
9991            action_type: row.get(1)?,
9992            memory_id: row.get(2)?,
9993            namespace: row.get(3)?,
9994            payload,
9995            requested_by: row.get(5)?,
9996            requested_at: row.get(6)?,
9997            status: row.get(7)?,
9998            decided_by: row.get(8)?,
9999            decided_at: row.get(9)?,
10000            approvals,
10001        })
10002    })?;
10003    rows.collect::<rusqlite::Result<Vec<_>>>()
10004        .map_err(Into::into)
10005}
10006
10007pub fn get_pending_action(conn: &Connection, id: &str) -> Result<Option<PendingAction>> {
10008    let row = conn.query_row(
10009        "SELECT id, action_type, memory_id, namespace, payload, requested_by,
10010                requested_at, status, decided_by, decided_at, approvals
10011         FROM pending_actions WHERE id = ?1",
10012        params![id],
10013        |row| {
10014            let payload_str: String = row.get(4)?;
10015            let payload: serde_json::Value =
10016                serde_json::from_str(&payload_str).unwrap_or(serde_json::Value::Null);
10017            let approvals_str: String = row.get(10)?;
10018            let approvals: Vec<Approval> = serde_json::from_str(&approvals_str).unwrap_or_default();
10019            Ok(PendingAction {
10020                id: row.get(0)?,
10021                action_type: row.get(1)?,
10022                memory_id: row.get(2)?,
10023                namespace: row.get(3)?,
10024                payload,
10025                requested_by: row.get(5)?,
10026                requested_at: row.get(6)?,
10027                status: row.get(7)?,
10028                decided_by: row.get(8)?,
10029                decided_at: row.get(9)?,
10030                approvals,
10031            })
10032        },
10033    );
10034    match row {
10035        Ok(p) => Ok(Some(p)),
10036        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
10037        Err(e) => Err(e.into()),
10038    }
10039}
10040
10041/// Mark a pending action as approved or rejected. Returns true on status
10042/// transition. Does NOT execute the action itself — the caller replays
10043/// the payload on approval (the db layer doesn't know how to execute
10044/// cross-interface write semantics).
10045///
10046/// v0.7.0 S5-M2 — on a successful deny transition this function appends a
10047/// `pending_action.denied` row to `signed_events` so the audit chain
10048/// captures every governance refusal alongside the approval and timeout
10049/// events. The emit is best-effort: failure is logged but does NOT roll
10050/// back the decision write (operators inspecting the audit chain see a
10051/// gap rather than losing the underlying decision).
10052pub fn decide_pending_action(
10053    conn: &Connection,
10054    id: &str,
10055    approve: bool,
10056    decided_by: &str,
10057) -> Result<bool> {
10058    let new_status = if approve { "approved" } else { "rejected" };
10059    let now = Utc::now().to_rfc3339();
10060    let updated = conn.execute(
10061        "UPDATE pending_actions SET status = ?1, decided_by = ?2, decided_at = ?3
10062         WHERE id = ?4 AND status = 'pending'",
10063        params![new_status, decided_by, now, id],
10064    )?;
10065    // S5-M2: emit a `pending_action.denied` audit row when the transition
10066    // landed and the decision is a deny. Approve emits later (after
10067    // execution) so the audit row captures the post-execute state — see
10068    // `execute_pending_action`.
10069    if updated > 0 && !approve {
10070        if let Ok(Some(pa)) = get_pending_action(conn, id) {
10071            emit_pending_action_event(conn, &pa, "pending_action.denied", Some(decided_by));
10072        }
10073    }
10074    Ok(updated > 0)
10075}
10076
10077/// v0.7.0 S5-M1/M2 — append a `pending_action.<state>` row to
10078/// `signed_events` so the audit chain captures every governance
10079/// decision transition (approve / deny / timeout).
10080///
10081/// `event_type` is one of:
10082/// - `"pending_action.approved"` (emitted from `execute_pending_action`
10083///   after a successful execute)
10084/// - `"pending_action.denied"` (emitted from `decide_pending_action`
10085///   on a deny transition)
10086/// - `"pending_action.timed_out"` (emitted from
10087///   `sweep_pending_action_timeouts` per expired row)
10088///
10089/// The CBOR payload encodes `(pending_id, action_type, namespace,
10090/// requested_by, decided_by, status, timestamp)` so a downstream
10091/// auditor can replay decision provenance without re-reading the
10092/// (mutable) `pending_actions` table.
10093///
10094/// Best-effort: any encode / append failure is logged at WARN; the
10095/// caller's primary mutation MUST NOT roll back on audit failure.
10096/// Mirrors the same posture as `memory_link.invalidated` emit (the
10097/// audit chain is allowed to gap, the underlying write is not).
10098fn emit_pending_action_event(
10099    conn: &Connection,
10100    pa: &PendingAction,
10101    event_type: &str,
10102    decided_by_override: Option<&str>,
10103) {
10104    // Build the canonical CBOR payload. We sort keys via a BTreeMap so
10105    // the encoding is stable across releases — the SHA-256 over these
10106    // bytes is the audit chain's commitment to the decision shape.
10107    // Mirrors the encoding pattern used by `identity::sign::canonical_cbor`
10108    // (ciborium + BTreeMap-ordered keys) so the audit chain stays
10109    // canonicalized across emit sites.
10110    use std::collections::BTreeMap;
10111    let decided_by = decided_by_override
10112        .map(str::to_string)
10113        .or_else(|| pa.decided_by.clone())
10114        .unwrap_or_default();
10115    let timestamp = Utc::now().to_rfc3339();
10116    let mut map: BTreeMap<&str, ciborium::Value> = BTreeMap::new();
10117    map.insert(
10118        field_names::PENDING_ID,
10119        ciborium::Value::Text(pa.id.clone()),
10120    );
10121    map.insert(
10122        field_names::ACTION_TYPE,
10123        ciborium::Value::Text(pa.action_type.clone()),
10124    );
10125    map.insert("namespace", ciborium::Value::Text(pa.namespace.clone()));
10126    map.insert(
10127        field_names::REQUESTED_BY,
10128        ciborium::Value::Text(pa.requested_by.clone()),
10129    );
10130    map.insert(
10131        field_names::DECIDED_BY,
10132        ciborium::Value::Text(decided_by.clone()),
10133    );
10134    map.insert("status", ciborium::Value::Text(pa.status.clone()));
10135    map.insert("timestamp", ciborium::Value::Text(timestamp.clone()));
10136    let entries: Vec<(ciborium::Value, ciborium::Value)> = map
10137        .into_iter()
10138        .map(|(k, v)| (ciborium::Value::Text(k.to_string()), v))
10139        .collect();
10140    let value = ciborium::Value::Map(entries);
10141    let mut cbor: Vec<u8> = Vec::with_capacity(128);
10142    if let Err(e) = ciborium::ser::into_writer(&value, &mut cbor) {
10143        tracing::warn!(
10144            target: crate::signed_events::SIGNED_EVENTS_TRACE_TARGET,
10145            pending_id = %pa.id,
10146            event_type,
10147            "failed to encode canonical CBOR for pending_action event: {e}"
10148        );
10149        return;
10150    }
10151
10152    // Audit row's `agent_id` field: the decision actor (decider) for
10153    // approve / deny, the requester for the requester-less timeout
10154    // path (no human/agent decided — the sweeper transitioned the
10155    // row, so the "actor" is the originating requester).
10156    let agent_id = if event_type == "pending_action.timed_out" {
10157        pa.requested_by.clone()
10158    } else {
10159        decided_by
10160    };
10161
10162    // v0.7.0 #1099 (SR-1 #4, HIGH) — sign pending_action audit rows
10163    // with the daemon's installed signing key when one is available.
10164    // Pre-#1099 every pending_action.{approved,rejected,timed_out}
10165    // row landed with `signature: None, attest_level: "unsigned"`
10166    // even when the daemon had loaded a signing key — breaking the
10167    // procurement-grade tamper-evidence claim on the approval audit
10168    // trail. Falls back to (None, "unsigned") cleanly when no key
10169    // is installed (legacy posture).
10170    let event = crate::signed_events::SignedEvent::with_daemon_signature(
10171        crate::signed_events::payload_hash(&cbor),
10172        agent_id,
10173        event_type.to_string(),
10174        timestamp,
10175    );
10176    if let Err(e) = crate::signed_events::append_signed_event(conn, &event) {
10177        tracing::warn!(
10178            target: crate::signed_events::SIGNED_EVENTS_TRACE_TARGET,
10179            pending_id = %pa.id,
10180            event_type,
10181            "failed to append pending_action audit row: {e}"
10182        );
10183    }
10184}
10185
10186/// v0.7.0 S5-H4 — extract `metadata.agent_id` from a pending-action
10187/// store/reflect payload and verify it matches `pa.requested_by`.
10188///
10189/// The S5 audit caught an approver-on-behalf laundering hole: a caller
10190/// could queue a `pending_action` with `requested_by = "alice"` but
10191/// embed a payload whose `metadata.agent_id = "bob"`, and on execute
10192/// the new memory would land attributed to bob — the approver, not the
10193/// requester, was attributing the write. This helper closes the gap by
10194/// requiring the payload's claimed agent to equal the pending row's
10195/// `requested_by`. If the payload omits an agent_id, we treat that as
10196/// a match (older callers may not have populated the field; the
10197/// substrate still records `pa.requested_by` as the canonical attributor
10198/// and the memory's `metadata.agent_id` gets stamped from there).
10199///
10200/// The check fires only on payload shapes that carry an agent_id —
10201/// today: `store` (full Memory JSON) and `reflect` (the L1-8 payload
10202/// that includes `agent_id`). `delete` / `promote` payloads do not
10203/// carry an agent_id (the action is attributed to `pa.requested_by`
10204/// directly), so this function returns `Ok(())` on those.
10205fn verify_payload_agent_id(pa: &PendingAction) -> Result<()> {
10206    let payload_agent_id = pa
10207        .payload
10208        .get("agent_id")
10209        .and_then(serde_json::Value::as_str)
10210        .or_else(|| {
10211            pa.payload
10212                .get("metadata")
10213                .and_then(|m| m.get("agent_id"))
10214                .and_then(serde_json::Value::as_str)
10215        });
10216    if let Some(claimed) = payload_agent_id
10217        && claimed != pa.requested_by
10218    {
10219        // #962 typed envelope — ApproverLaundering maps to 403 FORBIDDEN
10220        // via MemoryError::RefusedByGovernance (S5-H4 contract).
10221        return Err(anyhow::Error::new(StorageError::ApproverLaundering {
10222            pending_id: pa.id.clone(),
10223            claimed: claimed.to_string(),
10224            requester: pa.requested_by.clone(),
10225        }));
10226    }
10227    Ok(())
10228}
10229
10230/// Task 1.10 — outcome of an approver-aware approve call.
10231#[derive(Debug, Clone, PartialEq, Eq)]
10232pub enum ApproveOutcome {
10233    /// #1620 — no pending row with this id exists. Maps to 404 on
10234    /// every surface; pre-#1620 this collapsed into `Rejected` and
10235    /// surfaced as 403 on sqlite while postgres returned 404 for the
10236    /// same probe.
10237    NotFound,
10238    /// Approver check failed; policy identifies the reason.
10239    Rejected(String),
10240    /// Consensus quorum not yet met; vote recorded.
10241    Pending { votes: usize, quorum: u32 },
10242    /// Fully approved (Human single-step, matching Agent, or consensus
10243    /// threshold met). Caller may now replay the payload via
10244    /// `execute_pending_action`.
10245    Approved,
10246}
10247
10248/// Task 1.10 — approver-type aware approve. Enforces the
10249/// `metadata.governance.approver` of the pending action's namespace.
10250pub fn approve_with_approver_type(
10251    conn: &Connection,
10252    pending_id: &str,
10253    approver_agent_id: &str,
10254) -> Result<ApproveOutcome> {
10255    let Some(pa) = get_pending_action(conn, pending_id)? else {
10256        // #1620 — typed NotFound (was Rejected → 403; postgres 404'd).
10257        return Ok(ApproveOutcome::NotFound);
10258    };
10259    if pa.status != "pending" {
10260        return Ok(ApproveOutcome::Rejected(format!(
10261            "already decided: status={}",
10262            pa.status
10263        )));
10264    }
10265    // Resolve the namespace's approver type. If no policy, default to Human —
10266    // which accepts any approval (back-compat with 1.9 callers).
10267    // #880 — `approver` lives on `policy.core` after the governance
10268    // decomposition.
10269    let approver = resolve_governance_policy(conn, &pa.namespace)
10270        .map_or(ApproverType::Human, |p| p.core.approver);
10271
10272    match approver {
10273        ApproverType::Human => {
10274            let ok = decide_pending_action(conn, pending_id, true, approver_agent_id)?;
10275            if ok {
10276                Ok(ApproveOutcome::Approved)
10277            } else {
10278                Ok(ApproveOutcome::Rejected(
10279                    crate::errors::msg::DECISION_WRITE_FAILED.into(),
10280                ))
10281            }
10282        }
10283        ApproverType::Agent(required) => {
10284            if approver_agent_id != required {
10285                return Ok(ApproveOutcome::Rejected(format!(
10286                    "designated approver is '{required}'; got '{approver_agent_id}'"
10287                )));
10288            }
10289            let ok = decide_pending_action(conn, pending_id, true, approver_agent_id)?;
10290            if ok {
10291                Ok(ApproveOutcome::Approved)
10292            } else {
10293                Ok(ApproveOutcome::Rejected(
10294                    crate::errors::msg::DECISION_WRITE_FAILED.into(),
10295                ))
10296            }
10297        }
10298        ApproverType::Consensus(quorum) => {
10299            // Issue #216: a single caller could previously satisfy any
10300            // Consensus(n) quorum by varying the unauthenticated `agent_id`
10301            // (`alice`, `bob`, `Alice`/`alice` were three distinct votes).
10302            // Two changes harden the path:
10303            //   1. Require each voter to be a registered agent — raises the
10304            //      bar from "claim any string" to "operator pre-registered
10305            //      this id". Combined with auth on the approve endpoint
10306            //      (operator-deployed) this gives a real multi-party gate.
10307            //   2. Canonicalize the agent_id to lowercase for both the
10308            //      duplicate-vote check and storage so case-variants of the
10309            //      same id collapse to a single vote.
10310            if !is_registered_agent(conn, approver_agent_id) {
10311                return Ok(ApproveOutcome::Rejected(format!(
10312                    "consensus voter '{approver_agent_id}' is not a registered agent"
10313                )));
10314            }
10315            let canonical_id = approver_agent_id.to_ascii_lowercase();
10316            let mut approvals = pa.approvals.clone();
10317            if approvals
10318                .iter()
10319                .any(|a| a.agent_id.eq_ignore_ascii_case(&canonical_id))
10320            {
10321                return Ok(ApproveOutcome::Pending {
10322                    votes: approvals.len(),
10323                    quorum,
10324                });
10325            }
10326            approvals.push(Approval {
10327                agent_id: canonical_id.clone(),
10328                approved_at: Utc::now().to_rfc3339(),
10329            });
10330            let approvals_json = serde_json::to_string(&approvals)?;
10331            conn.execute(
10332                "UPDATE pending_actions SET approvals = ?1 WHERE id = ?2 AND status = 'pending'",
10333                params![approvals_json, pending_id],
10334            )?;
10335            let votes = approvals.len();
10336            if u32::try_from(votes).unwrap_or(u32::MAX) >= quorum {
10337                // Threshold met — transition status so the caller can replay.
10338                let ok = decide_pending_action(conn, pending_id, true, &canonical_id)?;
10339                if ok {
10340                    return Ok(ApproveOutcome::Approved);
10341                }
10342                return Ok(ApproveOutcome::Rejected(
10343                    "decision write failed at consensus threshold".into(),
10344                ));
10345            }
10346            Ok(ApproveOutcome::Pending { votes, quorum })
10347        }
10348    }
10349}
10350
10351/// Task 1.10 — Execute an approved pending action's payload. Callers invoke
10352/// this after `approve_with_approver_type` returns `Approved`. Returns the
10353/// affected memory id (new id for store, existing id for delete/promote).
10354///
10355/// v0.7.0 S5-H1 — adds a `"reflect"` arm so an approved deep-reflection
10356/// queued by the L1-8 MCP gate (see `mcp::tools::reflect`) actually lands
10357/// instead of erroring out as "unknown action_type". The arm reconstructs
10358/// the original [`ReflectInput`] from the queued payload and replays it
10359/// through [`reflect`], inheriting the same depth-cap / source-resolution
10360/// checks the direct write path runs.
10361///
10362/// v0.7.0 S5-H4 — every arm runs [`verify_payload_agent_id`] BEFORE the
10363/// side-effecting mutation so an approver cannot launder a payload whose
10364/// embedded `agent_id` disagrees with the original requester (the
10365/// `pending_actions.requested_by` column). The refusal is a hard
10366/// `MemoryError::Validation`-shaped anyhow bail; on refusal we emit a
10367/// `pending_action.refused_agent_id_mismatch` audit row so the laundering
10368/// attempt is captured by the signed_events chain.
10369///
10370/// v0.7.0 S5-M1 — on a successful execute the function appends a
10371/// `pending_action.approved` row to `signed_events` (the deny + timeout
10372/// emits live in `decide_pending_action` and
10373/// `sweep_pending_action_timeouts` respectively, so the three governance
10374/// transitions are audit-complete together).
10375pub fn execute_pending_action(conn: &Connection, pending_id: &str) -> Result<Option<String>> {
10376    let Some(pa) = get_pending_action(conn, pending_id)? else {
10377        // #962 typed envelope — 404 NOT_FOUND.
10378        return Err(anyhow::Error::new(StorageError::PendingActionNotFound {
10379            pending_id: pending_id.to_string(),
10380        }));
10381    };
10382    if pa.status != "approved" {
10383        // #962 typed envelope — 409 CONFLICT (action is in the wrong state).
10384        return Err(anyhow::Error::new(
10385            StorageError::PendingActionStateInvalid {
10386                pending_id: pending_id.to_string(),
10387                status: pa.status.clone(),
10388            },
10389        ));
10390    }
10391    // S5-H4: refuse approver-on-behalf laundering BEFORE the side-effecting
10392    // write. Emit an audit row on refusal so the laundering attempt is
10393    // captured by the signed_events chain even when the substrate
10394    // bails the execute.
10395    if let Err(e) = verify_payload_agent_id(&pa) {
10396        emit_pending_action_event(conn, &pa, "pending_action.refused_agent_id_mismatch", None);
10397        return Err(e);
10398    }
10399    let memory_id = match pa.action_type.as_str() {
10400        "store" => {
10401            let mut mem: Memory = serde_json::from_value(pa.payload.clone()).map_err(|e| {
10402                // #962 typed envelope.
10403                anyhow::Error::new(StorageError::InvalidArgument {
10404                    reason: format!("invalid store payload: {e}"),
10405                })
10406            })?;
10407            // Stamp fresh id + timestamps so the execution is idempotent on replay.
10408            mem.id = uuid::Uuid::new_v4().to_string();
10409            let now = Utc::now().to_rfc3339();
10410            mem.created_at.clone_from(&now);
10411            mem.updated_at = now;
10412            mem.access_count = 0;
10413            let actual_id = insert(conn, &mem)?;
10414            Some(actual_id)
10415        }
10416        "delete" => {
10417            if let Some(mid) = pa.memory_id.clone() {
10418                delete(conn, &mid)?;
10419                Some(mid)
10420            } else {
10421                None
10422            }
10423        }
10424        "promote" => {
10425            if let Some(mid) = pa.memory_id.clone() {
10426                if let Some(to_ns) = pa
10427                    .payload
10428                    .get(field_names::TO_NAMESPACE)
10429                    .and_then(|v| v.as_str())
10430                {
10431                    // Vertical promotion to ancestor.
10432                    let clone_id = promote_to_namespace(conn, &mid, to_ns)?;
10433                    Some(clone_id)
10434                } else {
10435                    // Tier bump to long + clear expiry.
10436                    let (_found, _changed) = update(
10437                        conn,
10438                        &mid,
10439                        None,
10440                        None,
10441                        Some(&Tier::Long),
10442                        None,
10443                        None,
10444                        None,
10445                        None,
10446                        Some(""),
10447                        None,
10448                    )?;
10449                    Some(mid)
10450                }
10451            } else {
10452                None
10453            }
10454        }
10455        "reflect" => execute_reflect_from_payload(conn, &pa)?,
10456        other => {
10457            // #962 typed envelope.
10458            return Err(anyhow::Error::new(StorageError::InvalidArgument {
10459                reason: format!("unknown action_type: {other}"),
10460            }));
10461        }
10462    };
10463    // S5-M1: emit the approve audit row after the side-effecting write
10464    // succeeded so the audit chain reflects the post-execute state. The
10465    // emit is best-effort (warn-only) so an audit-side failure does not
10466    // roll back the governance decision.
10467    emit_pending_action_event(
10468        conn,
10469        &pa,
10470        "pending_action.approved",
10471        pa.decided_by.as_deref(),
10472    );
10473    Ok(memory_id)
10474}
10475
10476/// v0.7.0 S5-H1 — replay an approved reflect pending action through
10477/// [`reflect`]. Factored out of [`execute_pending_action`] so the arm
10478/// stays focused on payload deserialization + the substrate call, and
10479/// so the unit test (`test_execute_reflect_arm_succeeds_round_trip`)
10480/// can exercise the helper without duplicating the wrapper logic.
10481///
10482/// Payload shape (mirrors what `mcp::tools::reflect` queued in L1-8):
10483///
10484/// ```json
10485/// {
10486///   "source_ids": ["…", "…"],
10487///   "title": "…",
10488///   "content": "…",
10489///   "namespace": "…",
10490///   "tier": "mid",
10491///   "tags": ["…"],
10492///   "priority": 5,
10493///   "confidence": 1.0,
10494///   "agent_id": "…",
10495///   "proposed_depth": 3,
10496///   "metadata": { … }
10497/// }
10498/// ```
10499///
10500/// All fields are optional except `source_ids`, `title`, and `content`
10501/// (the substrate validator rejects empty values, so missing keys
10502/// surface as a `Validation` error rather than a panic).
10503fn execute_reflect_from_payload(conn: &Connection, pa: &PendingAction) -> Result<Option<String>> {
10504    let payload = &pa.payload;
10505    let source_ids: Vec<String> = payload
10506        .get(field_names::SOURCE_IDS)
10507        .and_then(|v| v.as_array())
10508        .map(|arr| {
10509            arr.iter()
10510                .filter_map(|v| v.as_str().map(str::to_string))
10511                .collect()
10512        })
10513        .unwrap_or_default();
10514    if source_ids.is_empty() {
10515        // #962 typed envelope.
10516        return Err(anyhow::Error::new(StorageError::InvalidArgument {
10517            reason: "invalid reflect payload: source_ids missing or empty".to_string(),
10518        }));
10519    }
10520    let title = payload
10521        .get("title")
10522        .and_then(|v| v.as_str())
10523        .ok_or_else(|| {
10524            // #962 typed envelope.
10525            anyhow::Error::new(StorageError::InvalidArgument {
10526                reason: "invalid reflect payload: title missing".to_string(),
10527            })
10528        })?
10529        .to_string();
10530    let content = payload
10531        .get("content")
10532        .and_then(|v| v.as_str())
10533        .ok_or_else(|| {
10534            // #962 typed envelope.
10535            anyhow::Error::new(StorageError::InvalidArgument {
10536                reason: "invalid reflect payload: content missing".to_string(),
10537            })
10538        })?
10539        .to_string();
10540    let namespace = payload
10541        .get("namespace")
10542        .and_then(|v| v.as_str())
10543        .map(str::to_string)
10544        .or_else(|| Some(pa.namespace.clone()));
10545    let tier = payload
10546        .get("tier")
10547        .and_then(|v| v.as_str())
10548        .and_then(Tier::from_str)
10549        .unwrap_or(Tier::Mid);
10550    let tags: Vec<String> = payload
10551        .get("tags")
10552        .and_then(|v| v.as_array())
10553        .map(|arr| {
10554            arr.iter()
10555                .filter_map(|v| v.as_str().map(str::to_string))
10556                .collect()
10557        })
10558        .unwrap_or_default();
10559    let priority = i32::try_from(
10560        payload
10561            .get("priority")
10562            .and_then(|v| v.as_i64())
10563            .unwrap_or(5),
10564    )
10565    .unwrap_or(5);
10566    let confidence = payload
10567        .get(field_names::CONFIDENCE)
10568        .and_then(|v| v.as_f64())
10569        .unwrap_or(1.0);
10570    // Use the queued payload's agent_id when present (already verified
10571    // to match `pa.requested_by` by `verify_payload_agent_id`), else
10572    // fall back to `pa.requested_by` — the substrate stamps the value
10573    // onto `metadata.agent_id` so attribution stays consistent.
10574    let agent_id = payload
10575        .get("agent_id")
10576        .and_then(|v| v.as_str())
10577        .map(str::to_string)
10578        .unwrap_or_else(|| pa.requested_by.clone());
10579    let metadata = payload
10580        .get("metadata")
10581        .cloned()
10582        .unwrap_or_else(|| serde_json::json!({}));
10583
10584    let input = crate::storage::reflect::ReflectInput {
10585        source_ids,
10586        title,
10587        content,
10588        namespace,
10589        tier,
10590        tags,
10591        priority,
10592        confidence,
10593        // v0.7.x (issue #1175): vendor-neutral substrate default.
10594        // Mirrors the MCP-side default at `src/mcp/tools/reflect.rs`
10595        // — see the comment there for the heterogeneous-NHI rationale.
10596        // Vendor identity stays in `metadata.agent_id`.
10597        source: crate::validate::DEFAULT_NHI_SOURCE.to_string(),
10598        agent_id,
10599        metadata,
10600    };
10601    let outcome = crate::storage::reflect::reflect(conn, &input)
10602        .map_err(|e| anyhow::anyhow!("reflect execute failed: {e}"))?;
10603    Ok(Some(outcome.id))
10604}
10605
10606/// Check if a memory ID is a namespace standard (used by consolidate to warn).
10607pub fn is_namespace_standard(conn: &Connection, id: &str) -> bool {
10608    conn.query_row(
10609        "SELECT COUNT(*) FROM namespace_meta WHERE standard_id = ?1",
10610        params![id],
10611        |r| r.get::<_, i64>(0),
10612    )
10613    .unwrap_or(0)
10614        > 0
10615}
10616
10617/// v0.6.3 (capabilities schema v2): count namespace standards whose
10618/// `metadata.governance` is non-null. A "rule" here means a namespace
10619/// has an explicit governance policy attached to its standard memory.
10620/// The count is a transparent passthrough — the full permission system
10621/// arrives in v0.7 (arch-enhancement-spec §3).
10622pub fn count_active_governance_rules(conn: &Connection) -> Result<usize> {
10623    let count: i64 = conn
10624        .query_row(
10625            "SELECT COUNT(*) FROM memories m
10626             INNER JOIN namespace_meta nm ON nm.standard_id = m.id
10627             WHERE json_extract(m.metadata, '$.governance') IS NOT NULL",
10628            [],
10629            |r| r.get(0),
10630        )
10631        .unwrap_or(0);
10632    Ok(usize::try_from(count.max(0)).unwrap_or(0))
10633}
10634
10635/// v0.7.0 K5 — enumerate every namespace whose standard memory carries an
10636/// explicit `metadata.governance` policy and return `(namespace, policy)`
10637/// pairs sorted lexicographically by namespace.
10638///
10639/// Companion to [`count_active_governance_rules`] (which returns just the
10640/// count). Powers the `permissions.rule_summary` field surfaced by
10641/// capabilities v3 — the K5 increment closes the v0.6.3.1 honesty
10642/// disclosure that the field was previously dropped from the wire because
10643/// no per-rule serializer existed.
10644///
10645/// Rows whose `metadata.governance` payload fails to round-trip through
10646/// `GovernancePolicy::from_metadata` are silently skipped — the
10647/// capabilities surface is best-effort and a malformed policy must not
10648/// take down the entire response. The wider gate
10649/// (`enforce_governance` → `read_namespace_policy`) already swallows the
10650/// same parse failures, so the surfaces stay consistent.
10651///
10652/// # Errors
10653///
10654/// Returns `Err` only on hard SQLite failures (e.g. table missing); the
10655/// row-level parse failures noted above are handled internally.
10656pub fn list_active_governance_policies(
10657    conn: &Connection,
10658) -> Result<Vec<(String, GovernancePolicy)>> {
10659    // Pull the raw `(namespace, metadata)` tuples for every namespace
10660    // whose standard memory has a non-null `metadata.governance`. We
10661    // ORDER BY at the SQL layer so the lex sort comes free and the
10662    // caller doesn't have to re-sort.
10663    let mut stmt = conn.prepare(
10664        "SELECT nm.namespace, m.metadata
10665         FROM namespace_meta nm
10666         INNER JOIN memories m ON m.id = nm.standard_id
10667         WHERE json_extract(m.metadata, '$.governance') IS NOT NULL
10668         ORDER BY nm.namespace ASC",
10669    )?;
10670    let rows = stmt.query_map([], |r| {
10671        let ns: String = r.get(0)?;
10672        let meta_str: String = r.get(1)?;
10673        Ok((ns, meta_str))
10674    })?;
10675
10676    let mut out = Vec::new();
10677    for row in rows.flatten() {
10678        let (ns, meta_str) = row;
10679        // Parse the metadata blob; skip rows that don't deserialize.
10680        let Ok(meta) = serde_json::from_str::<serde_json::Value>(&meta_str) else {
10681            continue;
10682        };
10683        // `from_metadata` returns `None` when the field is missing/null
10684        // (the SQL filter already excludes that path) and
10685        // `Some(Err(_))` on a malformed policy payload — skip both.
10686        match GovernancePolicy::from_metadata(&meta) {
10687            Some(Ok(policy)) => out.push((ns, policy)),
10688            _ => continue,
10689        }
10690    }
10691    Ok(out)
10692}
10693
10694/// v0.6.3 (capabilities schema v2): count rows in the `subscriptions`
10695/// table. Used by `handle_capabilities` as a proxy for "registered
10696/// hooks" — the hook pipeline itself is v0.7 Bucket 0 work.
10697pub fn count_subscriptions(conn: &Connection) -> Result<usize> {
10698    let count: i64 = conn
10699        .query_row("SELECT COUNT(*) FROM subscriptions", [], |r| r.get(0))
10700        .unwrap_or(0);
10701    Ok(usize::try_from(count.max(0)).unwrap_or(0))
10702}
10703
10704/// v0.6.3 (capabilities schema v2): count `pending_actions` rows whose
10705/// `status` matches the predicate. Used by `handle_capabilities` to
10706/// surface live approval queue depth.
10707pub fn count_pending_actions_by_status(conn: &Connection, status: &str) -> Result<usize> {
10708    let count: i64 = conn
10709        .query_row(
10710            "SELECT COUNT(*) FROM pending_actions WHERE status = ?1",
10711            params![status],
10712            |r| r.get(0),
10713        )
10714        .unwrap_or(0);
10715    Ok(usize::try_from(count.max(0)).unwrap_or(0))
10716}
10717
10718/// v0.7.0 K2 — pending_actions timeout sweeper.
10719///
10720/// Scans `pending_actions` for `status='pending'` rows whose age exceeds
10721/// the per-row `default_timeout_seconds` (or `global_default_secs` when
10722/// the per-row column is NULL). Transitions matching rows to
10723/// `status='expired'` and stamps `expired_at = now`.
10724///
10725/// Returns the list of `(id, namespace)` tuples that were just expired
10726/// so the caller can fan out approval-decision events. Empty queue is a
10727/// silent no-op.
10728///
10729/// Closes the v0.6.3.1 honest-Capabilities-v2 disclosure that
10730/// `default_timeout_seconds` was previously advertised but unused (the
10731/// v2 honesty patch had dropped it from the wire shape; K2 ships the
10732/// backing sweeper so the field is meaningful again).
10733///
10734/// # Errors
10735///
10736/// Returns `Err` only on hard SQLite failures (e.g. table missing).
10737pub fn sweep_pending_action_timeouts(
10738    conn: &Connection,
10739    global_default_secs: i64,
10740) -> Result<Vec<(String, String)>> {
10741    // Step 1 — find candidates. We compute age in SQL via julianday()
10742    // arithmetic so the sweep is index-friendly and avoids parsing
10743    // every `requested_at` row in Rust. The composite index
10744    // `idx_pending_status_requested` (added in migration v21) keeps
10745    // the planner from full-scanning the table.
10746    //
10747    // The `default_timeout_seconds` column is nullable; rows with NULL
10748    // fall back to `global_default_secs`. A non-positive global default
10749    // disables the sweeper entirely (operator escape hatch).
10750    if global_default_secs <= 0 {
10751        return Ok(Vec::new());
10752    }
10753    let mut stmt = conn.prepare(
10754        "SELECT id, namespace FROM pending_actions
10755         WHERE status = 'pending'
10756           AND (julianday('now') - julianday(requested_at)) * 86400.0
10757               > COALESCE(default_timeout_seconds, ?1)",
10758    )?;
10759    let rows: Vec<(String, String)> = stmt
10760        .query_map(params![global_default_secs], |row| {
10761            Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
10762        })?
10763        .collect::<rusqlite::Result<Vec<_>>>()?;
10764    if rows.is_empty() {
10765        return Ok(Vec::new());
10766    }
10767
10768    // Step 2 — flip status='expired' + stamp expired_at. We update
10769    // row-by-row inside a single transaction so a failure mid-batch
10770    // rolls back cleanly. The WHERE clause re-checks status='pending'
10771    // so a concurrent decide_pending_action wins (its decision is
10772    // not overwritten).
10773    let now = Utc::now().to_rfc3339();
10774    let tx_savepoint = conn.unchecked_transaction()?;
10775    {
10776        let mut update = tx_savepoint.prepare(
10777            "UPDATE pending_actions
10778             SET status = 'expired', expired_at = ?1
10779             WHERE id = ?2 AND status = 'pending'",
10780        )?;
10781        for (id, _) in &rows {
10782            update.execute(params![now, id])?;
10783        }
10784    }
10785    tx_savepoint.commit()?;
10786    // v0.7.0 S5-M2 — emit a `pending_action.timed_out` audit row per
10787    // expired pending row so the audit chain captures the timeout
10788    // transition alongside approve / deny. Best-effort: a missing
10789    // pending row or audit failure is logged at WARN; the sweep
10790    // itself has already committed.
10791    for (id, _) in &rows {
10792        if let Ok(Some(pa)) = get_pending_action(conn, id) {
10793            emit_pending_action_event(conn, &pa, "pending_action.timed_out", None);
10794        }
10795    }
10796    Ok(rows)
10797}
10798
10799// ---------------------------------------------------------------------------
10800// `ai-memory doctor` (P7 / R7) — query helpers.
10801// ---------------------------------------------------------------------------
10802//
10803// These read-only helpers back the `ai-memory doctor` CLI subcommand. Each
10804// query is a single indexed `COUNT(*)` (or close to it) so the reporter can
10805// run an entire health pass without holding the DB lock long enough to
10806// block live writers.
10807//
10808// Surfaces consumed:
10809// - `count_dim_violations` reads the post-P2 `embedding_dim` column when
10810//   present and gracefully reports `Ok(None)` on pre-P2 schemas (the column
10811//   doesn't exist yet on `release/v0.6.3`).
10812// - `count_index_evictions` reads the post-P3 `index_evictions_total` global
10813//   counter when wired (there is no schema-level surface today; it returns
10814//   `Ok(None)` so the doctor can render a "not yet observed" line).
10815// - `count_oldest_pending_action_age_secs` is portable today and reports the
10816//   age of the oldest `pending` row in seconds.
10817// - `count_governance_chain_depth` walks `parent_namespace` for each
10818//   namespace_meta row to estimate the inheritance depth distribution
10819//   the P4 enforcer will eventually consume.
10820
10821/// Count rows whose `embedding_dim` (post-P2) does not match the modal
10822/// dim within their namespace. On pre-P2 schemas the `embedding_dim`
10823/// column doesn't exist; the function returns `Ok(None)` so the doctor
10824/// can render "not yet observed (pre-P2 schema)".
10825///
10826/// # Errors
10827///
10828/// Returns `Err` only on hard SQLite failures — a missing column is
10829/// reported as `Ok(None)`, not an error.
10830pub fn doctor_dim_violations(conn: &Connection) -> Result<Option<usize>> {
10831    let has_dim = conn
10832        .prepare("SELECT embedding_dim FROM memories LIMIT 0")
10833        .is_ok();
10834    if !has_dim {
10835        return Ok(None);
10836    }
10837    // For each namespace, find the modal dim (most-frequent non-null value)
10838    // and count rows whose dim differs from it. Rows with NULL dim but a
10839    // non-empty embedding count as violations too — they are mid-migration.
10840    let n: i64 = conn
10841        .query_row(
10842            "WITH per_ns_modes AS (
10843                 SELECT namespace, embedding_dim, COUNT(*) AS c
10844                 FROM memories
10845                 WHERE embedding IS NOT NULL AND embedding_dim IS NOT NULL
10846                 GROUP BY namespace, embedding_dim
10847             ),
10848             ranked AS (
10849                 SELECT namespace, embedding_dim,
10850                        ROW_NUMBER() OVER (PARTITION BY namespace ORDER BY c DESC) AS rn
10851                 FROM per_ns_modes
10852             ),
10853             modes AS (
10854                 SELECT namespace, embedding_dim AS modal_dim
10855                 FROM ranked WHERE rn = 1
10856             )
10857             SELECT COUNT(*)
10858             FROM memories m
10859             LEFT JOIN modes mo ON mo.namespace = m.namespace
10860             WHERE m.embedding IS NOT NULL
10861               AND (m.embedding_dim IS NULL
10862                    OR (mo.modal_dim IS NOT NULL AND m.embedding_dim != mo.modal_dim))",
10863            [],
10864            |r| r.get(0),
10865        )
10866        .unwrap_or(0);
10867    Ok(Some(usize::try_from(n.max(0)).unwrap_or(0)))
10868}
10869
10870/// Age in seconds of the oldest `pending` row in `pending_actions`, or
10871/// `None` if the queue is empty (or the column is unparseable). The
10872/// doctor uses this to flag a backlog older than 24h as critical.
10873///
10874/// # Errors
10875///
10876/// Returns `Err` only on hard SQLite failures (e.g. missing table).
10877pub fn doctor_oldest_pending_age_secs(conn: &Connection) -> Result<Option<i64>> {
10878    let row: Option<String> = conn
10879        .query_row(
10880            "SELECT requested_at FROM pending_actions WHERE status = 'pending'
10881             ORDER BY requested_at ASC LIMIT 1",
10882            [],
10883            |r| r.get(0),
10884        )
10885        .ok();
10886    let Some(ts) = row else {
10887        return Ok(None);
10888    };
10889    let Ok(parsed) = chrono::DateTime::parse_from_rfc3339(&ts) else {
10890        return Ok(None);
10891    };
10892    // M11 (v0.7.0 round-2) — clamp negative ages to 0. `requested_at`
10893    // is stamped by the writer's clock; on a host with skewed time
10894    // (NTP slewing back, intentional misconfiguration, or VM time
10895    // travel) `now - parsed` can land negative and downstream
10896    // consumers (the doctor surface treats this as "age in seconds")
10897    // would surface a nonsensical figure. The WARN gives operators
10898    // the signal so they can investigate the clock drift instead of
10899    // chasing a phantom backlog.
10900    let raw_age = (Utc::now() - parsed.with_timezone(&Utc)).num_seconds();
10901    let age = if raw_age < 0 {
10902        tracing::warn!(
10903            requested_at = %ts,
10904            raw_age_seconds = raw_age,
10905            "pending_actions row has future timestamp; clamping age to 0"
10906        );
10907        0
10908    } else {
10909        raw_age
10910    };
10911    Ok(Some(age))
10912}
10913
10914/// Count of namespaces that have a standard registered with a non-null
10915/// `metadata.governance` block, and the count without (just a standard
10916/// memory but no policy attached).
10917///
10918/// # Errors
10919///
10920/// Returns `Err` only on hard SQLite failures.
10921pub fn doctor_governance_coverage(conn: &Connection) -> Result<(usize, usize)> {
10922    let with_policy: i64 = conn
10923        .query_row(
10924            "SELECT COUNT(*) FROM memories m
10925             INNER JOIN namespace_meta nm ON nm.standard_id = m.id
10926             WHERE json_extract(m.metadata, '$.governance') IS NOT NULL",
10927            [],
10928            |r| r.get(0),
10929        )
10930        .unwrap_or(0);
10931    let total_meta: i64 = conn
10932        .query_row("SELECT COUNT(*) FROM namespace_meta", [], |r| r.get(0))
10933        .unwrap_or(0);
10934    let with = usize::try_from(with_policy.max(0)).unwrap_or(0);
10935    let total = usize::try_from(total_meta.max(0)).unwrap_or(0);
10936    Ok((with, total.saturating_sub(with)))
10937}
10938
10939/// Distribution of the `parent_namespace` chain depth across
10940/// `namespace_meta` rows. Returns a Vec where index `i` is the count of
10941/// namespaces with chain depth `i` (depth 0 = no parent).
10942///
10943/// Walks each row's `parent_namespace` chain up to a hard cap of 16 to
10944/// avoid runaway loops on malformed data. Rows whose chain exceeds the
10945/// cap are bucketed at the cap.
10946///
10947/// # Errors
10948///
10949/// Returns `Err` only on hard SQLite failures.
10950pub fn doctor_governance_depth_distribution(conn: &Connection) -> Result<Vec<usize>> {
10951    const MAX_DEPTH: usize = 16;
10952    let mut stmt = conn.prepare("SELECT namespace, parent_namespace FROM namespace_meta")?;
10953    let rows = stmt.query_map([], |r| {
10954        Ok((r.get::<_, String>(0)?, r.get::<_, Option<String>>(1)?))
10955    })?;
10956    let parent_map: HashMap<String, Option<String>> = rows
10957        .filter_map(rusqlite::Result::ok)
10958        .collect::<HashMap<_, _>>();
10959    let mut hist = vec![0_usize; MAX_DEPTH + 1];
10960    for ns in parent_map.keys() {
10961        let mut depth = 0_usize;
10962        let mut cur = parent_map.get(ns).cloned().flatten();
10963        while let Some(p) = cur {
10964            depth += 1;
10965            if depth >= MAX_DEPTH {
10966                break;
10967            }
10968            cur = parent_map.get(&p).cloned().flatten();
10969        }
10970        let bucket = depth.min(MAX_DEPTH);
10971        hist[bucket] += 1;
10972    }
10973    Ok(hist)
10974}
10975
10976/// Sum of `subscriptions.dispatch_count` and `subscriptions.failure_count`
10977/// across all rows. Returns `(dispatched, failed)`. Used by the doctor to
10978/// estimate webhook delivery success rate.
10979///
10980/// # Errors
10981///
10982/// Returns `Err` only on hard SQLite failures.
10983pub fn doctor_webhook_delivery_totals(conn: &Connection) -> Result<(u64, u64)> {
10984    let dispatched: i64 = conn
10985        .query_row(
10986            "SELECT COALESCE(SUM(dispatch_count), 0) FROM subscriptions",
10987            [],
10988            |r| r.get(0),
10989        )
10990        .unwrap_or(0);
10991    let failed: i64 = conn
10992        .query_row(
10993            "SELECT COALESCE(SUM(failure_count), 0) FROM subscriptions",
10994            [],
10995            |r| r.get(0),
10996        )
10997        .unwrap_or(0);
10998    Ok((
10999        u64::try_from(dispatched.max(0)).unwrap_or(0),
11000        u64::try_from(failed.max(0)).unwrap_or(0),
11001    ))
11002}
11003
11004/// Maximum sync-clock skew in seconds across the `sync_state` table —
11005/// the largest gap between `last_pulled_at` (when this peer last heard
11006/// from a peer) and `last_seen_at` (the peer's own `updated_at` advance).
11007/// Returns `Ok(None)` when `sync_state` is empty or the columns are
11008/// missing on a pre-T3 schema.
11009///
11010/// # Errors
11011///
11012/// Returns `Err` only on hard SQLite failures.
11013// ---------------------------------------------------------------------
11014// v0.6.4-009 — capability-expansion audit log
11015// ---------------------------------------------------------------------
11016
11017/// Single audit_log row (capability-expansion shape — extensible).
11018#[derive(Debug, Clone)]
11019pub struct CapabilityExpansionRow {
11020    pub id: String,
11021    pub agent_id: Option<String>,
11022    pub event_type: String,
11023    pub requested_family: Option<String>,
11024    pub granted: bool,
11025    pub attestation_tier: Option<String>,
11026    pub timestamp: String,
11027}
11028
11029/// Record a capability-expansion attempt. Used by
11030/// `handle_capabilities_family` after the allowlist decision is made.
11031/// Records BOTH grant and deny outcomes so operators can see attempted
11032/// access patterns even when the gate refused.
11033///
11034/// `granted=true` means the agent received the schemas; `granted=false`
11035/// means the agent was denied or the family was unknown.
11036///
11037/// Best-effort: a failed insert (e.g., disk full) is logged via tracing
11038/// but does not propagate the error to the caller — the audit trail
11039/// must never block the actual call.
11040pub fn record_capability_expansion(
11041    conn: &Connection,
11042    agent_id: Option<&str>,
11043    family: &str,
11044    granted: bool,
11045    attestation_tier: Option<&str>,
11046) {
11047    let id = uuid::Uuid::new_v4().to_string();
11048    let now = Utc::now().to_rfc3339();
11049    let result = conn.execute(
11050        "INSERT INTO audit_log (id, agent_id, event_type, requested_family, \
11051         granted, attestation_tier, timestamp) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
11052        rusqlite::params![
11053            id,
11054            agent_id,
11055            "capability_expansion",
11056            family,
11057            i32::from(granted),
11058            attestation_tier,
11059            now,
11060        ],
11061    );
11062    if let Err(e) = result {
11063        tracing::warn!(
11064            "audit_log insert failed (capability_expansion / agent={:?} / family={}): {e}",
11065            agent_id,
11066            family,
11067        );
11068    }
11069}
11070
11071/// List recent capability-expansion rows, newest first. `limit` clamps
11072/// the row count.
11073pub fn list_capability_expansions(
11074    conn: &Connection,
11075    limit: usize,
11076    agent_filter: Option<&str>,
11077) -> Result<Vec<CapabilityExpansionRow>> {
11078    let n = (limit.min(10_000)) as i64;
11079    let map_row = |r: &rusqlite::Row<'_>| -> rusqlite::Result<CapabilityExpansionRow> {
11080        Ok(CapabilityExpansionRow {
11081            id: r.get(0)?,
11082            agent_id: r.get(1)?,
11083            event_type: r.get(2)?,
11084            requested_family: r.get(3)?,
11085            granted: r.get::<_, i64>(4)? != 0,
11086            attestation_tier: r.get(5)?,
11087            timestamp: r.get(6)?,
11088        })
11089    };
11090    if let Some(a) = agent_filter {
11091        let mut stmt = conn.prepare(
11092            "SELECT id, agent_id, event_type, requested_family, granted, \
11093             attestation_tier, timestamp FROM audit_log \
11094             WHERE event_type = 'capability_expansion' AND agent_id = ?1 \
11095             ORDER BY timestamp DESC LIMIT ?2",
11096        )?;
11097        let rows = stmt.query_map(rusqlite::params![a, n], map_row)?;
11098        rows.collect::<Result<Vec<_>, _>>().map_err(Into::into)
11099    } else {
11100        let mut stmt = conn.prepare(
11101            "SELECT id, agent_id, event_type, requested_family, granted, \
11102             attestation_tier, timestamp FROM audit_log \
11103             WHERE event_type = 'capability_expansion' \
11104             ORDER BY timestamp DESC LIMIT ?1",
11105        )?;
11106        let rows = stmt.query_map(rusqlite::params![n], map_row)?;
11107        rows.collect::<Result<Vec<_>, _>>().map_err(Into::into)
11108    }
11109}
11110
11111pub fn doctor_max_sync_skew_secs(conn: &Connection) -> Result<Option<i64>> {
11112    let mut stmt = match conn.prepare(
11113        "SELECT last_seen_at, last_pulled_at FROM sync_state WHERE last_pulled_at IS NOT NULL",
11114    ) {
11115        Ok(s) => s,
11116        Err(_) => return Ok(None),
11117    };
11118    let rows = stmt.query_map([], |r| Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?)))?;
11119    let mut max_skew: Option<i64> = None;
11120    for row in rows {
11121        let Ok((seen, pulled)) = row else { continue };
11122        let Ok(s) = chrono::DateTime::parse_from_rfc3339(&seen) else {
11123            continue;
11124        };
11125        let Ok(p) = chrono::DateTime::parse_from_rfc3339(&pulled) else {
11126            continue;
11127        };
11128        let skew = (s.with_timezone(&Utc) - p.with_timezone(&Utc))
11129            .num_seconds()
11130            .abs();
11131        max_skew = Some(max_skew.map_or(skew, |m| m.max(skew)));
11132    }
11133    Ok(max_skew)
11134}
11135
11136// ---------------------------------------------------------------------------
11137// L1-4 — Reflection-depth telemetry for `ai-memory doctor`.
11138// ---------------------------------------------------------------------------
11139
11140/// One namespace's reflection-depth distribution row returned by
11141/// [`doctor_reflection_depth_distribution`].
11142///
11143/// The four depth buckets mirror the default `max_reflection_depth=3`
11144/// cap: depth 0 (direct memories), depth 1, depth 2, depth 3+. Depth
11145/// 3+ is collapsed into a single counter because depths beyond the cap
11146/// are impossible to store under standard policy; the bucket exists so
11147/// future schemas with raised caps still produce a non-zero column.
11148pub struct ReflectionDepthRow {
11149    pub namespace: String,
11150    pub depth0: i64,
11151    pub depth1: i64,
11152    pub depth2: i64,
11153    pub depth3_plus: i64,
11154    pub avg_depth: f64,
11155    pub max_depth: i64,
11156    pub total: i64,
11157}
11158
11159/// Depth distribution across all namespaces that hold at least one
11160/// memory with `reflection_depth > 0`, plus the `_global_` aggregate.
11161///
11162/// Uses a single GROUP BY pass so the query is a single indexed scan
11163/// over `memories.reflection_depth`. A fresh DB (all rows at depth 0)
11164/// returns an empty `Vec` — the caller (doctor) renders that as
11165/// "no reflections observed".
11166///
11167/// # Errors
11168///
11169/// Returns `Err` only on hard SQLite failures (e.g. the `memories`
11170/// table does not exist yet — pre-migration schemas).
11171pub fn doctor_reflection_depth_distribution(conn: &Connection) -> Result<Vec<ReflectionDepthRow>> {
11172    // Aggregate per namespace, only namespaces that contain at least
11173    // one reflected memory (depth > 0). The doctor renders a global
11174    // summary from the returned rows; the SQL avoids a second pass by
11175    // letting the caller roll up the namespace rows.
11176    let mut stmt = conn.prepare(
11177        "SELECT
11178             namespace,
11179             SUM(CASE WHEN reflection_depth = 0 THEN 1 ELSE 0 END),
11180             SUM(CASE WHEN reflection_depth = 1 THEN 1 ELSE 0 END),
11181             SUM(CASE WHEN reflection_depth = 2 THEN 1 ELSE 0 END),
11182             SUM(CASE WHEN reflection_depth >= 3 THEN 1 ELSE 0 END),
11183             AVG(CAST(reflection_depth AS REAL)),
11184             MAX(reflection_depth),
11185             COUNT(*)
11186         FROM memories
11187         GROUP BY namespace
11188         HAVING MAX(reflection_depth) > 0
11189         ORDER BY namespace",
11190    )?;
11191    let rows = stmt.query_map([], |r| {
11192        Ok(ReflectionDepthRow {
11193            namespace: r.get(0)?,
11194            depth0: r.get(1)?,
11195            depth1: r.get(2)?,
11196            depth2: r.get(3)?,
11197            depth3_plus: r.get(4)?,
11198            avg_depth: r.get(5)?,
11199            max_depth: r.get(6)?,
11200            total: r.get(7)?,
11201        })
11202    })?;
11203    let mut out = Vec::new();
11204    for row in rows {
11205        out.push(row?);
11206    }
11207    Ok(out)
11208}
11209
11210/// Count of `reflection.depth_exceeded` audit events in `signed_events`
11211/// within a given look-back window.
11212///
11213/// `since_rfc3339` is an RFC 3339 timestamp; only events with
11214/// `timestamp >= since_rfc3339` are counted. Pass the epoch
11215/// (`"1970-01-01T00:00:00Z"`) to count all-time.
11216///
11217/// Returns `0` when the `signed_events` table does not exist (pre-H5
11218/// schemas) rather than propagating the error, matching the pattern
11219/// in other doctor helpers.
11220///
11221/// # Errors
11222///
11223/// Returns `Err` only on hard query failures (table exists but query
11224/// is malformed — should not happen in practice).
11225pub fn doctor_reflection_depth_exceeded_count(
11226    conn: &Connection,
11227    since_rfc3339: &str,
11228) -> Result<i64> {
11229    let n: i64 = conn
11230        .query_row(
11231            "SELECT COUNT(*) FROM signed_events
11232             WHERE event_type = 'reflection.depth_exceeded'
11233               AND timestamp >= ?1",
11234            params![since_rfc3339],
11235            |r| r.get(0),
11236        )
11237        .unwrap_or(0);
11238    Ok(n)
11239}
11240
11241/// Reflection totals per namespace: memories created in the last 24h,
11242/// 7d, and all-time that have `reflection_depth > 0`.
11243///
11244/// Returns one tuple `(ns, last_24h, last_7d, all_time)` per
11245/// namespace that has at least one reflected memory. Namespaces with
11246/// no reflections are omitted; the caller renders "no reflections" for
11247/// the global summary.
11248///
11249/// # Errors
11250///
11251/// Returns `Err` on hard SQLite failures.
11252pub fn doctor_reflection_totals_by_namespace(
11253    conn: &Connection,
11254) -> Result<Vec<(String, i64, i64, i64)>> {
11255    let now = Utc::now();
11256    let last_day_cutoff = (now - chrono::Duration::hours(24)).to_rfc3339();
11257    let cutoff_7d = (now - chrono::Duration::days(7)).to_rfc3339();
11258
11259    let mut stmt = conn.prepare(
11260        "SELECT
11261             namespace,
11262             SUM(CASE WHEN created_at >= ?1 THEN 1 ELSE 0 END),
11263             SUM(CASE WHEN created_at >= ?2 THEN 1 ELSE 0 END),
11264             COUNT(*)
11265         FROM memories
11266         WHERE reflection_depth > 0
11267         GROUP BY namespace
11268         ORDER BY namespace",
11269    )?;
11270    let rows = stmt.query_map(params![last_day_cutoff, cutoff_7d], |r| {
11271        Ok((
11272            r.get::<_, String>(0)?,
11273            r.get::<_, i64>(1)?,
11274            r.get::<_, i64>(2)?,
11275            r.get::<_, i64>(3)?,
11276        ))
11277    })?;
11278    let mut out = Vec::new();
11279    for row in rows {
11280        out.push(row?);
11281    }
11282    Ok(out)
11283}
11284
11285#[cfg(test)]
11286mod tests {
11287    use super::*;
11288    use crate::models::{MID_TTL_EXTEND_SECS, Memory, SHORT_TTL_EXTEND_SECS, Tier};
11289
11290    fn test_db() -> Connection {
11291        open(std::path::Path::new(":memory:")).unwrap()
11292    }
11293
11294    /// Insert a minimal memory row with an explicit `updated_at` so the
11295    /// federation-catchup tests can control the range boundary. Only the
11296    /// NOT-NULL/no-default columns are specified; everything else falls to
11297    /// the schema defaults (which `row_to_memory` reads cleanly).
11298    fn insert_memory_at(conn: &Connection, id: &str, updated_at: &str) {
11299        conn.execute(
11300            "INSERT INTO memories (id, tier, namespace, title, content, created_at, updated_at) \
11301             VALUES (?1, 'mid', 'ns', ?1, 'content body', ?2, ?2)",
11302            params![id, updated_at],
11303        )
11304        .expect("insert memory row");
11305    }
11306
11307    #[test]
11308    fn memories_updated_since_sargable_split_none_and_some_paths() {
11309        // #1476 — the OR-NULL predicate was split into a None path (no
11310        // predicate, ORDER BY updated_at ASC) and a Some path (strict
11311        // `updated_at > ?1`). Pin the behavioral contract of both branches
11312        // so the sargable rewrite can never silently change which rows a
11313        // peer catchup observes.
11314        let conn = test_db();
11315        let t1 = "2026-01-01T00:00:00+00:00";
11316        let t2 = "2026-01-02T00:00:00+00:00";
11317        let t3 = "2026-01-03T00:00:00+00:00";
11318        // Insert out of order to prove ORDER BY actually sorts.
11319        insert_memory_at(&conn, "b", t2);
11320        insert_memory_at(&conn, "c", t3);
11321        insert_memory_at(&conn, "a", t1);
11322
11323        // None path: every row, ascending by updated_at.
11324        let all = memories_updated_since(&conn, None, 100).expect("none path");
11325        let ids: Vec<&str> = all.iter().map(|m| m.id.as_str()).collect();
11326        assert_eq!(
11327            ids,
11328            vec!["a", "b", "c"],
11329            "None path: all rows ASC by updated_at"
11330        );
11331
11332        // Some path is STRICTLY greater — the boundary row (t1) is excluded.
11333        let after_t1 = memories_updated_since(&conn, Some(t1), 100).expect("some path");
11334        let ids: Vec<&str> = after_t1.iter().map(|m| m.id.as_str()).collect();
11335        assert_eq!(
11336            ids,
11337            vec!["b", "c"],
11338            "Some(t1): strict > excludes the boundary row"
11339        );
11340
11341        // Past the newest row → empty.
11342        let after_t3 = memories_updated_since(&conn, Some(t3), 100).expect("some path empty");
11343        assert!(
11344            after_t3.is_empty(),
11345            "Some(t3): nothing strictly newer than the max"
11346        );
11347
11348        // LIMIT caps from the low end of the range (oldest-first under ASC).
11349        let one = memories_updated_since(&conn, Some(t1), 1).expect("some path limited");
11350        let ids: Vec<&str> = one.iter().map(|m| m.id.as_str()).collect();
11351        assert_eq!(
11352            ids,
11353            vec!["b"],
11354            "Some(t1) LIMIT 1: oldest row strictly after t1"
11355        );
11356    }
11357
11358    #[test]
11359    fn memories_updated_since_uses_updated_at_index() {
11360        // #1476 — the sargable Some path must resolve through
11361        // `idx_memories_updated_at`, not a full table scan. Assert the
11362        // query plan references the index via EXPLAIN QUERY PLAN.
11363        let conn = test_db();
11364        let mut stmt = conn
11365            .prepare(
11366                "EXPLAIN QUERY PLAN \
11367                 SELECT id FROM memories WHERE updated_at > ?1 \
11368                 ORDER BY updated_at ASC LIMIT ?2",
11369            )
11370            .expect("prepare explain");
11371        let plan: String = stmt
11372            .query_map(params!["2026-01-01T00:00:00+00:00", 10_i64], |r| {
11373                r.get::<_, String>(3)
11374            })
11375            .expect("explain rows")
11376            .map(|r| r.expect("explain detail"))
11377            .collect::<Vec<_>>()
11378            .join(" | ");
11379        assert!(
11380            plan.contains("idx_memories_updated_at"),
11381            "sargable catchup query must use idx_memories_updated_at; plan was: {plan}"
11382        );
11383    }
11384
11385    #[test]
11386    fn perf_8_hierarchy_in_clause_cache_hits_on_repeat() {
11387        // PERF-8 — verify cached fragment matches the freshly-
11388        // computed value byte-equal. Cache invalidation isn't part
11389        // of the public contract (ancestors are deterministic on
11390        // the namespace input), so a cache hit must be wire-equal
11391        // to a cold compute.
11392        hierarchy_cache_clear_for_tests();
11393        let ns = Some("alphaone/team/alice");
11394        let (a, active_a) = hierarchy_in_clause(ns);
11395        let (b, active_b) = hierarchy_in_clause(ns);
11396        assert!(active_a && active_b);
11397        assert_eq!(
11398            a, b,
11399            "PERF-8: cached hierarchy_in_clause result drift on second lookup",
11400        );
11401        assert!(
11402            a.expect("non-None fragment")
11403                .contains("AND m.namespace IN ("),
11404            "PERF-8: fragment shape regressed",
11405        );
11406    }
11407
11408    #[test]
11409    fn perf_8_hierarchy_cache_handles_non_hierarchical_ns() {
11410        // Non-hierarchical namespaces (no `/`) MUST short-circuit
11411        // before touching the cache so the cache only stores the
11412        // legitimate entries.
11413        hierarchy_cache_clear_for_tests();
11414        let (frag, active) = hierarchy_in_clause(Some("global"));
11415        assert_eq!(frag, None);
11416        assert!(!active);
11417    }
11418
11419    #[test]
11420    fn perf_8_hierarchy_cache_bounded_under_pressure() {
11421        // Filling the cache past HIERARCHY_CACHE_MAX must not
11422        // unbounded-grow it; eviction kicks in beyond the cap.
11423        hierarchy_cache_clear_for_tests();
11424        for i in 0..(HIERARCHY_CACHE_MAX * 2) {
11425            let ns = format!("tenant{i}/sub");
11426            let _ = hierarchy_in_clause(Some(&ns));
11427        }
11428        let cache_len = hierarchy_cache().lock().unwrap().len();
11429        assert!(
11430            cache_len <= HIERARCHY_CACHE_MAX,
11431            "PERF-8: hierarchy cache grew unbounded: {cache_len} > {HIERARCHY_CACHE_MAX}",
11432        );
11433    }
11434
11435    /// v0.7.0 #981 — `get_many` batches the SELECTs the semantic-phase
11436    /// HNSW recall branch previously issued per-id. This test pins:
11437    ///   1. Empty `ids` short-circuits to an empty map without touching
11438    ///      the connection.
11439    ///   2. All requested + existing rows land in the result map.
11440    ///   3. Missing ids are silently dropped (no error, no panic) —
11441    ///      the caller observes via `map.get(&id).is_none()`.
11442    ///   4. Order doesn't matter — `IN (...)` is unordered; callers
11443    ///      that need original ordering re-apply via the hit list.
11444    ///   5. Chunking >500 ids still returns every row.
11445    #[test]
11446    fn get_many_batches_and_handles_empty_missing_and_chunked_inputs_981() {
11447        let conn = test_db();
11448        // Seed 3 rows.
11449        let m1 = make_memory("alpha", "ns/a", Tier::Long, 5);
11450        let m2 = make_memory("beta", "ns/b", Tier::Long, 5);
11451        let m3 = make_memory("gamma", "ns/c", Tier::Long, 5);
11452        insert(&conn, &m1).unwrap();
11453        insert(&conn, &m2).unwrap();
11454        insert(&conn, &m3).unwrap();
11455
11456        // (1) Empty input.
11457        assert!(get_many(&conn, &[]).unwrap().is_empty());
11458
11459        // (2) Existing ids.
11460        let ids = vec![m1.id.clone(), m2.id.clone()];
11461        let got = get_many(&conn, &ids).unwrap();
11462        assert_eq!(got.len(), 2);
11463        assert!(got.contains_key(&m1.id));
11464        assert!(got.contains_key(&m2.id));
11465        assert!(!got.contains_key(&m3.id));
11466
11467        // (3) Mixed existing + missing — missing silently dropped.
11468        let mixed = vec![m1.id.clone(), "nope-not-a-real-id".to_string()];
11469        let got = get_many(&conn, &mixed).unwrap();
11470        assert_eq!(got.len(), 1);
11471        assert!(got.contains_key(&m1.id));
11472
11473        // (4) Order doesn't matter — IN clause is set-like.
11474        let reversed = vec![m3.id.clone(), m2.id.clone(), m1.id.clone()];
11475        let got = get_many(&conn, &reversed).unwrap();
11476        assert_eq!(got.len(), 3);
11477        for id in &reversed {
11478            assert!(got.contains_key(id), "id {id} missing from set-fetch");
11479        }
11480
11481        // (5) Chunked >500 ids still returns every row.
11482        let mut bulk: Vec<Memory> = Vec::with_capacity(750);
11483        let mut bulk_ids: Vec<String> = Vec::with_capacity(750);
11484        for i in 0..750 {
11485            let m = make_memory(&format!("bulk-{i}"), "ns/bulk", Tier::Long, 1);
11486            insert(&conn, &m).unwrap();
11487            bulk_ids.push(m.id.clone());
11488            bulk.push(m);
11489        }
11490        let got = get_many(&conn, &bulk_ids).unwrap();
11491        assert_eq!(
11492            got.len(),
11493            750,
11494            "chunked fetch >500 must still return every row",
11495        );
11496    }
11497
11498    fn make_memory(title: &str, ns: &str, tier: Tier, priority: i32) -> Memory {
11499        let now = chrono::Utc::now().to_rfc3339();
11500        Memory {
11501            id: uuid::Uuid::new_v4().to_string(),
11502            tier: tier.clone(),
11503            namespace: ns.to_string(),
11504            title: title.to_string(),
11505            content: format!("Content for {title}"),
11506            tags: vec![],
11507            priority,
11508            confidence: 1.0,
11509            source: "test".to_string(),
11510            access_count: 0,
11511            created_at: now.clone(),
11512            updated_at: now,
11513            last_accessed_at: None,
11514            expires_at: tier
11515                .default_ttl_secs()
11516                .map(|s| (chrono::Utc::now() + chrono::Duration::seconds(s)).to_rfc3339()),
11517            metadata: serde_json::json!({}),
11518            reflection_depth: 0,
11519            memory_kind: crate::models::MemoryKind::Observation,
11520            entity_id: None,
11521            persona_version: None,
11522            citations: Vec::new(),
11523            source_uri: None,
11524            source_span: None,
11525            confidence_source: ConfidenceSource::CallerProvided,
11526            confidence_signals: None,
11527            confidence_decayed_at: None,
11528            version: 1,
11529        }
11530    }
11531
11532    fn mem_with_scope(ns: &str, scope: Option<&str>) -> Memory {
11533        let mut m = make_memory("scoped", ns, Tier::Long, 5);
11534        if let Some(s) = scope {
11535            let mut map = serde_json::Map::new();
11536            map.insert(
11537                crate::META_KEY_SCOPE.to_string(),
11538                serde_json::Value::String(s.to_string()),
11539            );
11540            m.metadata = serde_json::Value::Object(map);
11541        }
11542        m
11543    }
11544
11545    // Pins the Rust-side visibility predicate (`is_visible`) that the HNSW
11546    // recall branch uses when SQL-side visibility can't be attached. Exercises
11547    // every `MemoryScope` arm plus `matches_subtree`, which the integration
11548    // recall paths only hit for whichever scope the fixture corpus happens to
11549    // carry — leaving the other arms uncovered. Deterministic, no DB.
11550    #[test]
11551    fn is_visible_scope_matrix_covers_every_arm() {
11552        // No-agent caller (all-None prefixes) bypasses the filter entirely.
11553        let unfiltered = (None, None, None, None);
11554        assert!(super::is_visible(
11555            &mem_with_scope("acme/eng/web", Some("private")),
11556            &unfiltered
11557        ));
11558
11559        // 4-level agent ns populates every prefix slot:
11560        // p=acme/eng/web/team, t=acme/eng/web, u=acme/eng, o=acme.
11561        let prefixes = super::compute_visibility_prefixes(Some("acme/eng/web/team"));
11562        assert_eq!(
11563            prefixes,
11564            (
11565                Some("acme/eng/web/team".to_string()),
11566                Some("acme/eng/web".to_string()),
11567                Some("acme/eng".to_string()),
11568                Some("acme".to_string()),
11569            )
11570        );
11571
11572        // Collective: visible to anyone.
11573        assert!(super::is_visible(
11574            &mem_with_scope("zzz/other", Some("collective")),
11575            &prefixes
11576        ));
11577
11578        // Private: only the caller's own namespace (p) is visible.
11579        assert!(super::is_visible(
11580            &mem_with_scope("acme/eng/web/team", Some("private")),
11581            &prefixes
11582        ));
11583        assert!(!super::is_visible(
11584            &mem_with_scope("acme/eng/web", Some("private")),
11585            &prefixes
11586        ));
11587
11588        // Absent scope key → MemoryScope::default() (Private) semantics.
11589        assert!(super::is_visible(
11590            &mem_with_scope("acme/eng/web/team", None),
11591            &prefixes
11592        ));
11593        assert!(!super::is_visible(
11594            &mem_with_scope("acme/other", None),
11595            &prefixes
11596        ));
11597
11598        // Team subtree (t = acme/eng/web): exact + descendant in, sibling out.
11599        assert!(super::is_visible(
11600            &mem_with_scope("acme/eng/web", Some("team")),
11601            &prefixes
11602        ));
11603        assert!(super::is_visible(
11604            &mem_with_scope("acme/eng/web/team/v2", Some("team")),
11605            &prefixes
11606        ));
11607        assert!(!super::is_visible(
11608            &mem_with_scope("acme/eng/api", Some("team")),
11609            &prefixes
11610        ));
11611
11612        // Unit subtree (u = acme/eng).
11613        assert!(super::is_visible(
11614            &mem_with_scope("acme/eng", Some("unit")),
11615            &prefixes
11616        ));
11617        assert!(!super::is_visible(
11618            &mem_with_scope("acme/sales", Some("unit")),
11619            &prefixes
11620        ));
11621
11622        // Org subtree (o = acme).
11623        assert!(super::is_visible(
11624            &mem_with_scope("acme", Some("org")),
11625            &prefixes
11626        ));
11627        assert!(!super::is_visible(
11628            &mem_with_scope("globex", Some("org")),
11629            &prefixes
11630        ));
11631
11632        // matches_subtree None arm: a shallow agent leaves the org slot empty,
11633        // so an org-scoped memory is denied (no prefix to match against).
11634        let shallow = super::compute_visibility_prefixes(Some("acme"));
11635        assert_eq!(shallow.3, None);
11636        assert!(!super::is_visible(
11637            &mem_with_scope("acme", Some("org")),
11638            &shallow
11639        ));
11640
11641        // Unknown scope string → from_str None → caller denied.
11642        assert!(!super::is_visible(
11643            &mem_with_scope("acme/eng/web/team", Some("definitely-not-a-scope")),
11644            &prefixes
11645        ));
11646
11647        // None-agent → all-None tuple (the no-filter sentinel).
11648        assert_eq!(
11649            super::compute_visibility_prefixes(None),
11650            (None, None, None, None)
11651        );
11652    }
11653
11654    #[test]
11655    fn open_creates_schema() {
11656        let conn = test_db();
11657        let count: i64 = conn
11658            .query_row("SELECT COUNT(*) FROM memories", [], |r| r.get(0))
11659            .unwrap();
11660        assert_eq!(count, 0);
11661    }
11662
11663    #[test]
11664    fn insert_and_get() {
11665        let conn = test_db();
11666        let mem = make_memory("Test insert", "test", Tier::Long, 5);
11667        let id = insert(&conn, &mem).unwrap();
11668        let got = get(&conn, &id).unwrap().unwrap();
11669        assert_eq!(got.title, "Test insert");
11670        assert_eq!(got.namespace, "test");
11671        assert_eq!(got.priority, 5);
11672    }
11673
11674    #[test]
11675    fn get_nonexistent() {
11676        let conn = test_db();
11677        let got = get(&conn, "nonexistent-id").unwrap();
11678        assert!(got.is_none());
11679    }
11680
11681    // #1466 — write-path chokepoint regression. A non-Long memory handed
11682    // to any insert path with `expires_at: None` must land with a
11683    // tier-default expiry so GC (`expires_at IS NOT NULL AND expires_at <
11684    // now`) can eventually reap it; before the fix it landed NULL =
11685    // immortal. Long stays NULL; an explicit expiry is preserved.
11686
11687    fn ttl_gap_secs(created_at: &str, expires_at: &str) -> i64 {
11688        let base = chrono::DateTime::parse_from_rfc3339(created_at).unwrap();
11689        let exp = chrono::DateTime::parse_from_rfc3339(expires_at).unwrap();
11690        (exp - base).num_seconds()
11691    }
11692
11693    #[test]
11694    fn insert_backfills_mid_expiry_when_none() {
11695        let conn = test_db();
11696        let mut mem = make_memory("mid none", "test", Tier::Mid, 5);
11697        mem.expires_at = None;
11698        let id = insert(&conn, &mem).unwrap();
11699        let got = get(&conn, &id).unwrap().unwrap();
11700        let exp = got.expires_at.expect("mid must not land immortal");
11701        assert_eq!(ttl_gap_secs(&got.created_at, &exp), crate::SECS_PER_WEEK);
11702    }
11703
11704    #[test]
11705    fn insert_backfills_short_expiry_when_none() {
11706        let conn = test_db();
11707        let mut mem = make_memory("short none", "test", Tier::Short, 5);
11708        mem.expires_at = None;
11709        let id = insert(&conn, &mem).unwrap();
11710        let got = get(&conn, &id).unwrap().unwrap();
11711        let exp = got.expires_at.expect("short must not land immortal");
11712        assert_eq!(
11713            ttl_gap_secs(&got.created_at, &exp),
11714            6 * crate::SECS_PER_HOUR
11715        );
11716    }
11717
11718    #[test]
11719    fn insert_leaves_long_expiry_none() {
11720        let conn = test_db();
11721        let mut mem = make_memory("long none", "test", Tier::Long, 5);
11722        mem.expires_at = None;
11723        let id = insert(&conn, &mem).unwrap();
11724        let got = get(&conn, &id).unwrap().unwrap();
11725        assert!(got.expires_at.is_none(), "long has no TTL — must stay NULL");
11726    }
11727
11728    #[test]
11729    fn insert_preserves_explicit_expiry() {
11730        let conn = test_db();
11731        let explicit = "2027-06-15T12:00:00+00:00".to_string();
11732        let mut mem = make_memory("mid explicit", "test", Tier::Mid, 5);
11733        mem.expires_at = Some(explicit.clone());
11734        let id = insert(&conn, &mem).unwrap();
11735        let got = get(&conn, &id).unwrap().unwrap();
11736        assert_eq!(got.expires_at, Some(explicit));
11737    }
11738
11739    #[test]
11740    fn insert_with_conflict_backfills_mid_expiry_when_none() {
11741        let conn = test_db();
11742        let mut mem = make_memory("conflict mid", "test", Tier::Mid, 5);
11743        mem.expires_at = None;
11744        let id = insert_with_conflict(&conn, &mem, ConflictMode::Merge).unwrap();
11745        let got = get(&conn, &id).unwrap().unwrap();
11746        let exp = got.expires_at.expect("mid must not land immortal");
11747        assert_eq!(ttl_gap_secs(&got.created_at, &exp), crate::SECS_PER_WEEK);
11748    }
11749
11750    #[test]
11751    fn insert_if_newer_backfills_mid_expiry_when_none() {
11752        let conn = test_db();
11753        let mut mem = make_memory("newer mid", "test", Tier::Mid, 5);
11754        mem.expires_at = None;
11755        let id = insert_if_newer(&conn, &mem).unwrap();
11756        let got = get(&conn, &id).unwrap().unwrap();
11757        let exp = got.expires_at.expect("mid must not land immortal");
11758        assert_eq!(ttl_gap_secs(&got.created_at, &exp), crate::SECS_PER_WEEK);
11759    }
11760
11761    #[test]
11762    fn consolidate_backfills_mid_expiry() {
11763        let conn = test_db();
11764        let a = make_memory("src a", "test", Tier::Mid, 5);
11765        let b = make_memory("src b", "test", Tier::Mid, 5);
11766        let id_a = insert(&conn, &a).unwrap();
11767        let id_b = insert(&conn, &b).unwrap();
11768        let new_id = consolidate(
11769            &conn,
11770            &[id_a, id_b],
11771            "merged",
11772            "summary body",
11773            "test",
11774            &Tier::Mid,
11775            "test",
11776            "agent-x",
11777        )
11778        .unwrap();
11779        let got = get(&conn, &new_id).unwrap().unwrap();
11780        let exp = got
11781            .expires_at
11782            .expect("consolidated mid must not land immortal");
11783        assert_eq!(ttl_gap_secs(&got.created_at, &exp), crate::SECS_PER_WEEK);
11784    }
11785
11786    #[test]
11787    fn update_partial_fields() {
11788        let conn = test_db();
11789        let mem = make_memory("Original", "test", Tier::Mid, 5);
11790        let id = insert(&conn, &mem).unwrap();
11791
11792        let (found, content_changed) = update(
11793            &conn,
11794            &id,
11795            Some("Updated Title"),
11796            None,
11797            None,
11798            None,
11799            None,
11800            Some(9),
11801            None,
11802            None,
11803            None,
11804        )
11805        .unwrap();
11806        assert!(found);
11807        assert!(content_changed); // title changed
11808
11809        let got = get(&conn, &id).unwrap().unwrap();
11810        assert_eq!(got.title, "Updated Title");
11811        assert_eq!(got.priority, 9);
11812        assert_eq!(got.content, mem.content); // unchanged
11813    }
11814
11815    #[test]
11816    fn update_content_changed_flag() {
11817        let conn = test_db();
11818        let mem = make_memory("Stable", "test", Tier::Mid, 5);
11819        let id = insert(&conn, &mem).unwrap();
11820
11821        // Updating only priority — content_changed should be false
11822        let (found, content_changed) = update(
11823            &conn,
11824            &id,
11825            None,
11826            None,
11827            None,
11828            None,
11829            None,
11830            Some(8),
11831            None,
11832            None,
11833            None,
11834        )
11835        .unwrap();
11836        assert!(found);
11837        assert!(!content_changed);
11838
11839        // Updating content — content_changed should be true
11840        let (found, content_changed) = update(
11841            &conn,
11842            &id,
11843            None,
11844            Some("New content"),
11845            None,
11846            None,
11847            None,
11848            None,
11849            None,
11850            None,
11851            None,
11852        )
11853        .unwrap();
11854        assert!(found);
11855        assert!(content_changed);
11856    }
11857
11858    #[test]
11859    fn update_nonexistent_returns_false() {
11860        let conn = test_db();
11861        let (found, _) = update(
11862            &conn,
11863            "bad-id",
11864            Some("New"),
11865            None,
11866            None,
11867            None,
11868            None,
11869            None,
11870            None,
11871            None,
11872            None,
11873        )
11874        .unwrap();
11875        assert!(!found);
11876    }
11877
11878    #[test]
11879    fn update_tier_downgrade_protection() {
11880        let conn = test_db();
11881        // Long-tier memory should never be downgraded
11882        let mem = make_memory("Permanent", "test", Tier::Long, 9);
11883        let id = insert(&conn, &mem).unwrap();
11884
11885        let (found, _) = update(
11886            &conn,
11887            &id,
11888            None,
11889            None,
11890            Some(&Tier::Short),
11891            None,
11892            None,
11893            None,
11894            None,
11895            None,
11896            None,
11897        )
11898        .unwrap();
11899        assert!(found);
11900        let got = get(&conn, &id).unwrap().unwrap();
11901        assert_eq!(got.tier, Tier::Long); // still long
11902
11903        // Mid-tier should not downgrade to short
11904        let mem2 = make_memory("Working", "test", Tier::Mid, 5);
11905        let id2 = insert(&conn, &mem2).unwrap();
11906
11907        let (found, _) = update(
11908            &conn,
11909            &id2,
11910            None,
11911            None,
11912            Some(&Tier::Short),
11913            None,
11914            None,
11915            None,
11916            None,
11917            None,
11918            None,
11919        )
11920        .unwrap();
11921        assert!(found);
11922        let got2 = get(&conn, &id2).unwrap().unwrap();
11923        assert_eq!(got2.tier, Tier::Mid); // still mid
11924
11925        // Mid-tier CAN upgrade to long
11926        let (found, _) = update(
11927            &conn,
11928            &id2,
11929            None,
11930            None,
11931            Some(&Tier::Long),
11932            None,
11933            None,
11934            None,
11935            None,
11936            None,
11937            None,
11938        )
11939        .unwrap();
11940        assert!(found);
11941        let got3 = get(&conn, &id2).unwrap().unwrap();
11942        assert_eq!(got3.tier, Tier::Long); // upgraded
11943    }
11944
11945    #[test]
11946    fn update_title_collision_returns_error() {
11947        let conn = test_db();
11948        let mem_a = make_memory("Alpha", "test", Tier::Mid, 5);
11949        let mem_b = make_memory("Beta", "test", Tier::Mid, 5);
11950        let id_a = insert(&conn, &mem_a).unwrap();
11951        let _id_b = insert(&conn, &mem_b).unwrap();
11952
11953        // Updating Alpha's title to "Beta" in same namespace should fail
11954        let result = update(
11955            &conn,
11956            &id_a,
11957            Some("Beta"),
11958            None,
11959            None,
11960            None,
11961            None,
11962            None,
11963            None,
11964            None,
11965            None,
11966        );
11967        assert!(result.is_err());
11968        let err = result.unwrap_err().to_string();
11969        assert!(err.contains("already exists in namespace"));
11970    }
11971
11972    #[test]
11973    fn delete_existing() {
11974        let conn = test_db();
11975        let mem = make_memory("To delete", "test", Tier::Short, 3);
11976        let id = insert(&conn, &mem).unwrap();
11977        assert!(delete(&conn, &id).unwrap());
11978        assert!(get(&conn, &id).unwrap().is_none());
11979    }
11980
11981    #[test]
11982    fn delete_nonexistent() {
11983        let conn = test_db();
11984        assert!(!delete(&conn, "bad-id").unwrap());
11985    }
11986
11987    #[test]
11988    fn list_with_namespace_filter() {
11989        let conn = test_db();
11990        insert(&conn, &make_memory("A", "ns1", Tier::Long, 5)).unwrap();
11991        insert(&conn, &make_memory("B", "ns2", Tier::Long, 5)).unwrap();
11992        insert(&conn, &make_memory("C", "ns1", Tier::Long, 5)).unwrap();
11993
11994        let results = list(
11995            &conn,
11996            Some("ns1"),
11997            None,
11998            100,
11999            0,
12000            None,
12001            None,
12002            None,
12003            None,
12004            None,
12005        )
12006        .unwrap();
12007        assert_eq!(results.len(), 2);
12008    }
12009
12010    #[test]
12011    fn list_with_tier_filter() {
12012        let conn = test_db();
12013        insert(&conn, &make_memory("Long", "test", Tier::Long, 5)).unwrap();
12014        insert(&conn, &make_memory("Mid", "test", Tier::Mid, 5)).unwrap();
12015
12016        let results = list(
12017            &conn,
12018            None,
12019            Some(&Tier::Long),
12020            100,
12021            0,
12022            None,
12023            None,
12024            None,
12025            None,
12026            None,
12027        )
12028        .unwrap();
12029        assert_eq!(results.len(), 1);
12030        assert_eq!(results[0].title, "Long");
12031    }
12032
12033    #[test]
12034    fn list_with_limit() {
12035        let conn = test_db();
12036        for i in 0..5 {
12037            insert(
12038                &conn,
12039                &make_memory(&format!("Mem {i}"), "test", Tier::Long, 5),
12040            )
12041            .unwrap();
12042        }
12043        let results = list(&conn, None, None, 3, 0, None, None, None, None, None).unwrap();
12044        assert_eq!(results.len(), 3);
12045    }
12046
12047    #[test]
12048    fn search_keyword_match() {
12049        let conn = test_db();
12050        insert(
12051            &conn,
12052            &make_memory("PostgreSQL config", "test", Tier::Long, 5),
12053        )
12054        .unwrap();
12055        insert(&conn, &make_memory("Redis cache", "test", Tier::Long, 5)).unwrap();
12056
12057        let results = search(
12058            &conn,
12059            "PostgreSQL",
12060            None,
12061            None,
12062            10,
12063            None,
12064            None,
12065            None,
12066            None,
12067            None,
12068            None,
12069            false,
12070        )
12071        .unwrap();
12072        assert_eq!(results.len(), 1);
12073        assert!(results[0].title.contains("PostgreSQL"));
12074    }
12075
12076    #[test]
12077    fn search_no_match() {
12078        let conn = test_db();
12079        insert(&conn, &make_memory("PostgreSQL", "test", Tier::Long, 5)).unwrap();
12080        let results = search(
12081            &conn,
12082            "nonexistent_term_xyz",
12083            None,
12084            None,
12085            10,
12086            None,
12087            None,
12088            None,
12089            None,
12090            None,
12091            None,
12092            false,
12093        )
12094        .unwrap();
12095        assert_eq!(results.len(), 0);
12096    }
12097
12098    #[test]
12099    fn recall_returns_scored() {
12100        let conn = test_db();
12101        insert(
12102            &conn,
12103            &make_memory("Rust programming language", "test", Tier::Long, 8),
12104        )
12105        .unwrap();
12106        insert(
12107            &conn,
12108            &make_memory("Python scripting", "test", Tier::Long, 5),
12109        )
12110        .unwrap();
12111
12112        let (results, _tokens) = recall(
12113            &conn,
12114            "Rust programming",
12115            None,
12116            10,
12117            None,
12118            None,
12119            None,
12120            SHORT_TTL_EXTEND_SECS,
12121            MID_TTL_EXTEND_SECS,
12122            None,
12123            None,
12124            false,
12125            None,
12126        )
12127        .unwrap();
12128        assert!(!results.is_empty());
12129        // Score should be present
12130        let (mem, score) = &results[0];
12131        assert!(mem.title.contains("Rust"));
12132        assert!(*score > 0.0);
12133    }
12134
12135    #[test]
12136    fn recall_empty_context() {
12137        let conn = test_db();
12138        insert(&conn, &make_memory("Test", "test", Tier::Long, 5)).unwrap();
12139        // Empty context should not crash
12140        let results = recall(
12141            &conn,
12142            "",
12143            None,
12144            10,
12145            None,
12146            None,
12147            None,
12148            SHORT_TTL_EXTEND_SECS,
12149            MID_TTL_EXTEND_SECS,
12150            None,
12151            None,
12152            false,
12153            None,
12154        );
12155        // May return empty or error, both acceptable
12156        assert!(results.is_ok() || results.is_err());
12157    }
12158
12159    #[test]
12160    fn touch_increments_access_count() {
12161        let conn = test_db();
12162        let mem = make_memory("Touchable", "test", Tier::Mid, 5);
12163        let id = insert(&conn, &mem).unwrap();
12164        assert_eq!(get(&conn, &id).unwrap().unwrap().access_count, 0);
12165
12166        touch(&conn, &id, SHORT_TTL_EXTEND_SECS, MID_TTL_EXTEND_SECS).unwrap();
12167        assert_eq!(get(&conn, &id).unwrap().unwrap().access_count, 1);
12168
12169        touch(&conn, &id, SHORT_TTL_EXTEND_SECS, MID_TTL_EXTEND_SECS).unwrap();
12170        assert_eq!(get(&conn, &id).unwrap().unwrap().access_count, 2);
12171    }
12172
12173    #[test]
12174    fn find_contradictions_similar_titles() {
12175        let conn = test_db();
12176        insert(
12177            &conn,
12178            &make_memory("Database is PostgreSQL", "infra", Tier::Long, 8),
12179        )
12180        .unwrap();
12181        insert(
12182            &conn,
12183            &make_memory("Database is MySQL", "infra", Tier::Long, 5),
12184        )
12185        .unwrap();
12186
12187        let contradictions = find_contradictions(&conn, "Database is PostgreSQL", "infra").unwrap();
12188        assert!(!contradictions.is_empty());
12189    }
12190
12191    /// Issue #1320 regression — disjoint-topic titles that share only
12192    /// English stopwords ("are", "is", "the") MUST NOT surface as
12193    /// potential contradictions of each other. Pre-fix the FTS5
12194    /// OR-joined query matched any row containing the stopword, so a
12195    /// tomato-fact stored alongside a moon-landing fact and a
12196    /// retrieval-mechanics fact returned every cross-topic pair as
12197    /// `potential_contradictions`. Post-fix the Jaccard floor on
12198    /// stopword-stripped title tokens drops the false positives;
12199    /// `Vec::is_empty()` is the post-condition.
12200    #[test]
12201    fn find_contradictions_disjoint_topics_no_false_positives_1320() {
12202        let conn = test_db();
12203        insert(
12204            &conn,
12205            &make_memory("Tomatoes are red fruit", "v1-p5-disjoint", Tier::Long, 5),
12206        )
12207        .unwrap();
12208        insert(
12209            &conn,
12210            &make_memory(
12211                "Moon landing happened in 1969",
12212                "v1-p5-disjoint",
12213                Tier::Long,
12214                5,
12215            ),
12216        )
12217        .unwrap();
12218        insert(
12219            &conn,
12220            &make_memory(
12221                "Retrieval-augmented generation works by combining recall with synthesis",
12222                "v1-p5-disjoint",
12223                Tier::Long,
12224                5,
12225            ),
12226        )
12227        .unwrap();
12228
12229        // Tomato seed must not flag moon-landing or retrieval rows.
12230        let hits = find_contradictions(&conn, "Tomatoes are red fruit", "v1-p5-disjoint").unwrap();
12231        assert!(
12232            hits.iter().all(|m| m.title == "Tomatoes are red fruit"),
12233            "tomato seed leaked false positives: {:?}",
12234            hits.iter().map(|m| m.title.as_str()).collect::<Vec<_>>(),
12235        );
12236
12237        // Moon-landing seed must not flag tomato or retrieval rows.
12238        let hits =
12239            find_contradictions(&conn, "Moon landing happened in 1969", "v1-p5-disjoint").unwrap();
12240        assert!(
12241            hits.iter()
12242                .all(|m| m.title == "Moon landing happened in 1969"),
12243            "moon-landing seed leaked false positives: {:?}",
12244            hits.iter().map(|m| m.title.as_str()).collect::<Vec<_>>(),
12245        );
12246
12247        // Retrieval seed must not flag tomato or moon-landing rows.
12248        let hits = find_contradictions(
12249            &conn,
12250            "Retrieval-augmented generation works by combining recall with synthesis",
12251            "v1-p5-disjoint",
12252        )
12253        .unwrap();
12254        assert!(
12255            hits.iter().all(|m| m.title.starts_with("Retrieval")),
12256            "retrieval seed leaked false positives: {:?}",
12257            hits.iter().map(|m| m.title.as_str()).collect::<Vec<_>>(),
12258        );
12259    }
12260
12261    /// Issue #1320 regression — pure-stopword seed title must not pull
12262    /// any rows. Pre-fix the FTS5 OR-query expanded to a no-op against
12263    /// the stopword set; post-fix the seed tokenises to empty after
12264    /// stopword removal so the Jaccard floor returns 0 for every
12265    /// candidate.
12266    #[test]
12267    fn find_contradictions_pure_stopword_seed_returns_empty_1320() {
12268        let conn = test_db();
12269        insert(
12270            &conn,
12271            &make_memory(
12272                "The thing is the other thing",
12273                "v1-p5-stopword",
12274                Tier::Long,
12275                5,
12276            ),
12277        )
12278        .unwrap();
12279        let hits = find_contradictions(&conn, "the is a", "v1-p5-stopword").unwrap();
12280        assert!(
12281            hits.is_empty(),
12282            "pure-stopword seed pulled candidates: {:?}",
12283            hits.iter().map(|m| m.title.as_str()).collect::<Vec<_>>(),
12284        );
12285    }
12286
12287    /// Issue #1320 — stage-2 filter must not over-prune the legitimate
12288    /// near-duplicate case. "Database is PostgreSQL" and "Database is
12289    /// MySQL" share `{database}` after stopword removal — Jaccard 1/3,
12290    /// passes the 0.30 floor. Pinned alongside the false-positive test
12291    /// so a future tightening of the floor can't silently regress the
12292    /// supported "similar-title" detection.
12293    #[test]
12294    fn find_contradictions_similar_titles_still_caught_1320() {
12295        let conn = test_db();
12296        insert(
12297            &conn,
12298            &make_memory("Database is PostgreSQL", "v1-p5-positive", Tier::Long, 8),
12299        )
12300        .unwrap();
12301        insert(
12302            &conn,
12303            &make_memory("Database is MySQL", "v1-p5-positive", Tier::Long, 5),
12304        )
12305        .unwrap();
12306        let hits = find_contradictions(&conn, "Database is PostgreSQL", "v1-p5-positive").unwrap();
12307        let titles: Vec<&str> = hits.iter().map(|m| m.title.as_str()).collect();
12308        assert!(
12309            titles.contains(&"Database is MySQL"),
12310            "similar-title detection regressed: {titles:?}",
12311        );
12312    }
12313
12314    #[test]
12315    fn contradiction_title_jaccard_floor_pinned_1320() {
12316        // Pin the compiled floor at 0.30 (the v0.7.0 #1320 calibration
12317        // landing). Lowering it re-introduces stopword noise; raising
12318        // it breaks the "Database is PostgreSQL / MySQL" near-duplicate
12319        // case (Jaccard 1/3 ≈ 0.333). Either direction needs an issue
12320        // ticket and a fresh calibration sweep.
12321        assert!(
12322            (CONTRADICTION_TITLE_JACCARD_FLOOR - 0.30).abs() < f32::EPSILON,
12323            "floor drifted: {CONTRADICTION_TITLE_JACCARD_FLOOR}",
12324        );
12325    }
12326
12327    #[test]
12328    fn contradiction_title_tokens_strips_stopwords_and_lowercases_1320() {
12329        let toks = contradiction_title_tokens("The Database Is PostgreSQL");
12330        assert!(toks.contains("database"));
12331        assert!(toks.contains("postgresql"));
12332        assert!(!toks.contains("the"));
12333        assert!(!toks.contains("is"));
12334    }
12335
12336    #[test]
12337    fn create_and_get_links() {
12338        let conn = test_db();
12339        let id1 = insert(&conn, &make_memory("Memory A", "test", Tier::Long, 5)).unwrap();
12340        let id2 = insert(&conn, &make_memory("Memory B", "test", Tier::Long, 5)).unwrap();
12341
12342        create_link(&conn, &id1, &id2, "related_to").unwrap();
12343        let links = get_links(&conn, &id1).unwrap();
12344        assert_eq!(links.len(), 1);
12345        assert_eq!(
12346            links[0].relation,
12347            crate::models::MemoryLinkRelation::RelatedTo
12348        );
12349    }
12350
12351    #[test]
12352    fn consolidate_merges_memories() {
12353        let conn = test_db();
12354        let id1 = insert(&conn, &make_memory("Part 1", "test", Tier::Mid, 5)).unwrap();
12355        let id2 = insert(&conn, &make_memory("Part 2", "test", Tier::Mid, 5)).unwrap();
12356
12357        let new_id = consolidate(
12358            &conn,
12359            &[id1.clone(), id2.clone()],
12360            "Combined",
12361            "Part 1 + Part 2",
12362            "test",
12363            &Tier::Long,
12364            "test",
12365            "test-consolidator",
12366        )
12367        .unwrap();
12368        // Original memories should be deleted
12369        assert!(get(&conn, &id1).unwrap().is_none());
12370        assert!(get(&conn, &id2).unwrap().is_none());
12371        // New memory should exist
12372        let combined = get(&conn, &new_id).unwrap().unwrap();
12373        assert_eq!(combined.title, "Combined");
12374        assert_eq!(combined.tier, Tier::Long);
12375    }
12376
12377    #[test]
12378    fn stats_counts() {
12379        let conn = test_db();
12380        let path = std::path::Path::new(":memory:");
12381        insert(&conn, &make_memory("A", "ns1", Tier::Long, 5)).unwrap();
12382        insert(&conn, &make_memory("B", "ns1", Tier::Mid, 5)).unwrap();
12383        insert(&conn, &make_memory("C", "ns2", Tier::Short, 5)).unwrap();
12384
12385        let s = stats(&conn, path).unwrap();
12386        assert_eq!(s.total, 3);
12387    }
12388
12389    #[test]
12390    fn gc_removes_expired() {
12391        let conn = test_db();
12392        let mut mem = make_memory("Expired", "test", Tier::Short, 5);
12393        mem.expires_at = Some("2020-01-01T00:00:00+00:00".to_string()); // past
12394        insert(&conn, &mem).unwrap();
12395
12396        let removed = gc(&conn, false).unwrap();
12397        assert_eq!(removed, 1);
12398    }
12399
12400    #[test]
12401    fn gc_preserves_long_term() {
12402        let conn = test_db();
12403        insert(&conn, &make_memory("Permanent", "test", Tier::Long, 5)).unwrap();
12404        let removed = gc(&conn, false).unwrap();
12405        assert_eq!(removed, 0);
12406    }
12407
12408    #[test]
12409    fn gc_archives_before_delete() {
12410        let conn = test_db();
12411        let mut mem = make_memory("Archivable", "test", Tier::Short, 5);
12412        mem.expires_at = Some("2020-01-01T00:00:00+00:00".to_string());
12413        insert(&conn, &mem).unwrap();
12414
12415        let removed = gc(&conn, true).unwrap();
12416        assert_eq!(removed, 1);
12417
12418        // Should be in archive
12419        let archived = list_archived(&conn, None, 10, 0).unwrap();
12420        assert_eq!(archived.len(), 1);
12421        assert_eq!(archived[0]["title"], "Archivable");
12422        assert_eq!(archived[0]["archive_reason"], "ttl_expired");
12423    }
12424
12425    #[test]
12426    fn restore_archived_memory() {
12427        // v0.6.3.1 P2 (G5) — restore preserves the original tier and
12428        // expires_at instead of resetting to long/permanent. Pre-v17 this
12429        // test asserted `is_none()` for expires_at — that was the bug
12430        // being fixed.
12431        let conn = test_db();
12432        let mut mem = make_memory("Restorable", "test", Tier::Short, 5);
12433        let original_expiry = "2020-01-01T00:00:00+00:00".to_string();
12434        mem.expires_at = Some(original_expiry.clone());
12435        let id = insert(&conn, &mem).unwrap();
12436
12437        gc(&conn, true).unwrap();
12438        assert!(get(&conn, &id).unwrap().is_none()); // gone from active
12439
12440        let restored = restore_archived(&conn, &id).unwrap();
12441        assert!(restored);
12442
12443        let got = get(&conn, &id).unwrap().unwrap();
12444        assert_eq!(got.title, "Restorable");
12445        assert_eq!(
12446            got.tier.as_str(),
12447            Tier::Short.as_str(),
12448            "G5: restore must preserve the original tier"
12449        );
12450        assert_eq!(
12451            got.expires_at,
12452            Some(original_expiry),
12453            "G5: restore must preserve the original expires_at"
12454        );
12455    }
12456
12457    #[test]
12458    fn purge_archive_removes_all() {
12459        let conn = test_db();
12460        let mut mem = make_memory("Purgeable", "test", Tier::Short, 5);
12461        mem.expires_at = Some("2020-01-01T00:00:00+00:00".to_string());
12462        insert(&conn, &mem).unwrap();
12463        gc(&conn, true).unwrap();
12464
12465        let purged = purge_archive(&conn, None).unwrap();
12466        assert_eq!(purged, 1);
12467        assert_eq!(list_archived(&conn, None, 10, 0).unwrap().len(), 0);
12468    }
12469
12470    #[test]
12471    fn purge_archive_rejects_negative_days() {
12472        let conn = test_db();
12473        let result = purge_archive(&conn, Some(-1));
12474        assert!(result.is_err());
12475        assert!(result.unwrap_err().to_string().contains("non-negative"));
12476    }
12477
12478    #[test]
12479    fn restore_rejects_active_id_collision() {
12480        let conn = test_db();
12481        let mut mem = make_memory("Collision Test", "test", Tier::Short, 5);
12482        mem.expires_at = Some("2020-01-01T00:00:00+00:00".to_string());
12483        let id = insert(&conn, &mem).unwrap();
12484
12485        // Archive it via GC
12486        gc(&conn, true).unwrap();
12487        assert!(get(&conn, &id).unwrap().is_none());
12488
12489        // Manually insert a memory with the SAME id but different title into active table
12490        conn.execute(
12491            "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, confidence, source, access_count, created_at, updated_at)
12492             VALUES (?1, 'long', 'test', 'Blocker Title', 'blocks restore', '[]', 5, 1.0, 'test', 0, datetime('now'), datetime('now'))",
12493            rusqlite::params![id],
12494        ).unwrap();
12495
12496        // Restore should fail because id exists in active table
12497        let result = restore_archived(&conn, &id);
12498        assert!(result.is_err());
12499        assert!(
12500            result
12501                .unwrap_err()
12502                .to_string()
12503                .contains("already exists in active table")
12504        );
12505    }
12506
12507    #[test]
12508    fn archive_stats_counts() {
12509        let conn = test_db();
12510        let mut m1 = make_memory("Stats A", "ns1", Tier::Short, 5);
12511        m1.expires_at = Some("2020-01-01T00:00:00+00:00".to_string());
12512        let mut m2 = make_memory("Stats B", "ns1", Tier::Short, 5);
12513        m2.expires_at = Some("2020-01-01T00:00:00+00:00".to_string());
12514        insert(&conn, &m1).unwrap();
12515        insert(&conn, &m2).unwrap();
12516        gc(&conn, true).unwrap();
12517
12518        let stats = archive_stats(&conn).unwrap();
12519        assert_eq!(stats["archived_total"], 2);
12520    }
12521
12522    #[test]
12523    fn archive_memory_moves_live_row_to_archive() {
12524        // S29 — explicit archive endpoint must move the row out of
12525        // `memories` and into `archived_memories` with the caller-supplied
12526        // reason. Unlike gc(archive=true), this is NOT gated on
12527        // `expires_at` — the caller is asking for it right now.
12528        let conn = test_db();
12529        let mem = make_memory("Archive me", "s29", Tier::Long, 5);
12530        let id = insert(&conn, &mem).unwrap();
12531
12532        let moved = archive_memory(&conn, &id, Some("explicit")).unwrap();
12533        assert!(moved, "live row must be archived on first call");
12534        assert!(
12535            get(&conn, &id).unwrap().is_none(),
12536            "row must be removed from active table"
12537        );
12538
12539        let archived = list_archived(&conn, None, 10, 0).unwrap();
12540        assert_eq!(archived.len(), 1);
12541        assert_eq!(archived[0]["id"], id);
12542        assert_eq!(archived[0]["archive_reason"], "explicit");
12543
12544        // Second call is a no-op — row is already out of `memories`.
12545        let second = archive_memory(&conn, &id, Some("explicit")).unwrap();
12546        assert!(
12547            !second,
12548            "second archive call must report no-op (no live row)"
12549        );
12550    }
12551
12552    #[test]
12553    fn archive_memory_missing_id_returns_false() {
12554        // Peers that never saw M1 must no-op, not error, on sync_push
12555        // archives fanout.
12556        let conn = test_db();
12557        let moved = archive_memory(&conn, "nonexistent-id", None).unwrap();
12558        assert!(!moved);
12559    }
12560
12561    #[test]
12562    fn archive_memory_default_reason_is_archive() {
12563        let conn = test_db();
12564        let mem = make_memory("Default reason", "s29", Tier::Long, 5);
12565        let id = insert(&conn, &mem).unwrap();
12566        assert!(archive_memory(&conn, &id, None).unwrap());
12567        let archived = list_archived(&conn, None, 10, 0).unwrap();
12568        assert_eq!(archived[0]["archive_reason"], "archive");
12569    }
12570
12571    #[test]
12572    fn export_all_and_links() {
12573        let conn = test_db();
12574        let id1 = insert(&conn, &make_memory("Export A", "test", Tier::Long, 5)).unwrap();
12575        let id2 = insert(&conn, &make_memory("Export B", "test", Tier::Long, 5)).unwrap();
12576        create_link(&conn, &id1, &id2, "supersedes").unwrap();
12577
12578        let mems = export_all(&conn).unwrap();
12579        assert_eq!(mems.len(), 2);
12580        let links = export_links(&conn).unwrap();
12581        assert_eq!(links.len(), 1);
12582    }
12583
12584    #[test]
12585    fn list_namespaces_counts() {
12586        let conn = test_db();
12587        insert(&conn, &make_memory("A", "alpha", Tier::Long, 5)).unwrap();
12588        insert(&conn, &make_memory("B", "alpha", Tier::Long, 5)).unwrap();
12589        insert(&conn, &make_memory("C", "beta", Tier::Long, 5)).unwrap();
12590
12591        let ns = list_namespaces(&conn).unwrap();
12592        assert_eq!(ns.len(), 2);
12593    }
12594
12595    #[test]
12596    fn taxonomy_flat_namespaces_only() {
12597        // No `/` anywhere — every namespace is a direct child of the root.
12598        let conn = test_db();
12599        insert(&conn, &make_memory("A", "alpha", Tier::Long, 5)).unwrap();
12600        insert(&conn, &make_memory("B", "alpha", Tier::Long, 5)).unwrap();
12601        insert(&conn, &make_memory("C", "beta", Tier::Long, 5)).unwrap();
12602
12603        let tax = get_taxonomy(&conn, None, 8, 1000).unwrap();
12604        assert_eq!(tax.total_count, 3);
12605        assert!(!tax.truncated);
12606        assert_eq!(tax.tree.namespace, "");
12607        assert_eq!(tax.tree.subtree_count, 3);
12608        assert_eq!(tax.tree.count, 0); // no memories at the synthetic root
12609        assert_eq!(tax.tree.children.len(), 2);
12610        let alpha = tax
12611            .tree
12612            .children
12613            .iter()
12614            .find(|c| c.name == "alpha")
12615            .unwrap();
12616        assert_eq!(alpha.count, 2);
12617        assert_eq!(alpha.subtree_count, 2);
12618        assert!(alpha.children.is_empty());
12619        let beta = tax.tree.children.iter().find(|c| c.name == "beta").unwrap();
12620        assert_eq!(beta.count, 1);
12621    }
12622
12623    #[test]
12624    fn taxonomy_hierarchical_tree() {
12625        // Mixed depths: tree must aggregate counts up the spine.
12626        let conn = test_db();
12627        insert(&conn, &make_memory("a", "alphaone", Tier::Long, 5)).unwrap();
12628        insert(&conn, &make_memory("b", "alphaone/eng", Tier::Long, 5)).unwrap();
12629        insert(
12630            &conn,
12631            &make_memory("c", "alphaone/eng/platform", Tier::Long, 5),
12632        )
12633        .unwrap();
12634        insert(
12635            &conn,
12636            &make_memory("d", "alphaone/eng/platform", Tier::Long, 5),
12637        )
12638        .unwrap();
12639        insert(&conn, &make_memory("e", "alphaone/sales", Tier::Long, 5)).unwrap();
12640
12641        let tax = get_taxonomy(&conn, None, 8, 1000).unwrap();
12642        assert_eq!(tax.total_count, 5);
12643        assert_eq!(tax.tree.subtree_count, 5);
12644        assert_eq!(tax.tree.children.len(), 1);
12645
12646        let alphaone = &tax.tree.children[0];
12647        assert_eq!(alphaone.name, "alphaone");
12648        assert_eq!(alphaone.namespace, "alphaone");
12649        assert_eq!(alphaone.count, 1); // memory "a" lives at exactly "alphaone"
12650        assert_eq!(alphaone.subtree_count, 5);
12651        assert_eq!(alphaone.children.len(), 2);
12652
12653        let eng = alphaone.children.iter().find(|c| c.name == "eng").unwrap();
12654        assert_eq!(eng.namespace, "alphaone/eng");
12655        assert_eq!(eng.count, 1);
12656        assert_eq!(eng.subtree_count, 3);
12657        let platform = &eng.children[0];
12658        assert_eq!(platform.name, "platform");
12659        assert_eq!(platform.namespace, "alphaone/eng/platform");
12660        assert_eq!(platform.count, 2);
12661        assert_eq!(platform.subtree_count, 2);
12662        assert!(platform.children.is_empty());
12663    }
12664
12665    #[test]
12666    fn taxonomy_prefix_scopes_subtree() {
12667        let conn = test_db();
12668        insert(&conn, &make_memory("a", "alphaone/eng", Tier::Long, 5)).unwrap();
12669        insert(
12670            &conn,
12671            &make_memory("b", "alphaone/eng/platform", Tier::Long, 5),
12672        )
12673        .unwrap();
12674        insert(&conn, &make_memory("c", "alphaone/sales", Tier::Long, 5)).unwrap();
12675        // Sibling that happens to share a string prefix — must NOT bleed in.
12676        insert(&conn, &make_memory("d", "alphaone-sibling", Tier::Long, 5)).unwrap();
12677        insert(&conn, &make_memory("e", "other", Tier::Long, 5)).unwrap();
12678
12679        let tax = get_taxonomy(&conn, Some("alphaone/eng"), 8, 1000).unwrap();
12680        assert_eq!(tax.total_count, 2);
12681        assert_eq!(tax.tree.namespace, "alphaone/eng");
12682        assert_eq!(tax.tree.name, "eng");
12683        assert_eq!(tax.tree.count, 1);
12684        assert_eq!(tax.tree.subtree_count, 2);
12685        assert_eq!(tax.tree.children.len(), 1);
12686        assert_eq!(tax.tree.children[0].name, "platform");
12687        assert_eq!(tax.tree.children[0].count, 1);
12688    }
12689
12690    /// #1531 L5 — `validate_namespace` permits the LIKE metacharacters
12691    /// `%` / `_` in segments (historical flexibility), so the taxonomy
12692    /// prefix walk must escape its descendant pattern. Pre-fix the
12693    /// unescaped `LIKE ?2 || '/%'` let prefix `a%` aggregate the `ax/...`
12694    /// subtree.
12695    #[test]
12696    fn taxonomy_prefix_like_metacharacters_do_not_widen_match_l5() {
12697        let conn = test_db();
12698        insert(&conn, &make_memory("a", "a%/child", Tier::Long, 5)).unwrap();
12699        insert(&conn, &make_memory("b", "ax/child", Tier::Long, 5)).unwrap();
12700        insert(&conn, &make_memory("c", "a_/child", Tier::Long, 5)).unwrap();
12701
12702        // Literal `a%` prefix must scope to the `a%` subtree only.
12703        let tax = get_taxonomy(&conn, Some("a%"), 8, 1000).unwrap();
12704        assert_eq!(
12705            tax.total_count, 1,
12706            "prefix 'a%' must not aggregate 'ax/...' or 'a_/...' subtrees"
12707        );
12708
12709        // Literal `a_` prefix likewise.
12710        let tax = get_taxonomy(&conn, Some("a_"), 8, 1000).unwrap();
12711        assert_eq!(
12712            tax.total_count, 1,
12713            "prefix 'a_' must not aggregate single-char-wildcard siblings"
12714        );
12715
12716        // Plain prefixes are unchanged.
12717        let tax = get_taxonomy(&conn, Some("ax"), 8, 1000).unwrap();
12718        assert_eq!(tax.total_count, 1);
12719    }
12720
12721    #[test]
12722    fn taxonomy_depth_clamps_but_preserves_subtree_counts() {
12723        let conn = test_db();
12724        insert(
12725            &conn,
12726            &make_memory("a", "alphaone/eng/platform/db", Tier::Long, 5),
12727        )
12728        .unwrap();
12729        insert(
12730            &conn,
12731            &make_memory("b", "alphaone/eng/platform/api", Tier::Long, 5),
12732        )
12733        .unwrap();
12734
12735        let tax = get_taxonomy(&conn, None, 2, 1000).unwrap();
12736        assert_eq!(tax.total_count, 2);
12737        let alphaone = &tax.tree.children[0];
12738        let eng = &alphaone.children[0];
12739        // Depth=2 below the empty prefix means we descend exactly two
12740        // levels (alphaone → eng); deeper segments are folded into
12741        // `eng.subtree_count` without rendering child nodes.
12742        assert!(eng.children.is_empty());
12743        assert_eq!(eng.subtree_count, 2);
12744        assert_eq!(eng.count, 0); // nothing at exactly "alphaone/eng"
12745    }
12746
12747    #[test]
12748    fn taxonomy_excludes_expired_memories() {
12749        // Mirror of `list_namespaces` semantics — expired rows must not
12750        // count toward either the tree or `total_count`.
12751        let conn = test_db();
12752        let mut alive = make_memory("alive", "alpha", Tier::Long, 5);
12753        let mut dead = make_memory("dead", "alpha", Tier::Short, 5);
12754        // Force the short-tier memory's expiry into the past.
12755        dead.expires_at = Some("2000-01-01T00:00:00Z".to_string());
12756        alive.expires_at = None;
12757        insert(&conn, &alive).unwrap();
12758        insert(&conn, &dead).unwrap();
12759
12760        let tax = get_taxonomy(&conn, None, 8, 1000).unwrap();
12761        assert_eq!(tax.total_count, 1);
12762        assert_eq!(tax.tree.children.len(), 1);
12763        assert_eq!(tax.tree.children[0].count, 1);
12764    }
12765
12766    #[test]
12767    fn taxonomy_truncates_at_limit_but_total_stays_honest() {
12768        let conn = test_db();
12769        for ns in ["aa", "bb", "cc", "dd", "ee"] {
12770            insert(&conn, &make_memory("m", ns, Tier::Long, 5)).unwrap();
12771        }
12772        let tax = get_taxonomy(&conn, None, 8, 2).unwrap();
12773        // Limit drops 3 namespaces from the walk; total_count must
12774        // still see all 5 memories so renderers can warn the user.
12775        assert_eq!(tax.total_count, 5);
12776        assert!(tax.truncated);
12777        assert_eq!(tax.tree.children.len(), 2);
12778    }
12779
12780    #[test]
12781    fn forget_by_namespace() {
12782        let conn = test_db();
12783        insert(&conn, &make_memory("A", "delete-me", Tier::Long, 5)).unwrap();
12784        insert(&conn, &make_memory("B", "delete-me", Tier::Long, 5)).unwrap();
12785        insert(&conn, &make_memory("C", "keep", Tier::Long, 5)).unwrap();
12786
12787        let deleted = forget(&conn, Some("delete-me"), None, None, false).unwrap();
12788        assert_eq!(deleted, 2);
12789        let remaining = list(&conn, None, None, 100, 0, None, None, None, None, None).unwrap();
12790        assert_eq!(remaining.len(), 1);
12791    }
12792
12793    #[test]
12794    fn set_and_get_embedding() {
12795        let conn = test_db();
12796        let mem = make_memory("Embed test", "test", Tier::Long, 5);
12797        let id = insert(&conn, &mem).unwrap();
12798
12799        let emb = vec![0.1f32, 0.2, 0.3, 0.4];
12800        set_embedding(&conn, &id, &emb).unwrap();
12801
12802        let got = get_embedding(&conn, &id).unwrap().unwrap();
12803        assert_eq!(got.len(), 4);
12804        assert!((got[0] - 0.1).abs() < 1e-6);
12805    }
12806
12807    // -- #1595 / #1598 — resilient-backfill + reembed storage helpers --
12808
12809    /// #1595 — the keyset fetch pages strictly past the cursor in `id`
12810    /// order, and rows that gain an embedding drop out of the scan.
12811    #[test]
12812    fn unembedded_batch_after_cursor_paginates_1595() {
12813        let conn = test_db();
12814        let mut ids: Vec<String> = (0..5)
12815            .map(|i| {
12816                insert(
12817                    &conn,
12818                    &make_memory(&format!("row-{i}"), "bf-1595", Tier::Long, 5),
12819                )
12820                .unwrap()
12821            })
12822            .collect();
12823        ids.sort();
12824
12825        let first = get_unembedded_ids_batch_after(&conn, None, 2).unwrap();
12826        assert_eq!(first.len(), 2);
12827        assert_eq!(first[0].0, ids[0], "scan starts at the smallest id");
12828        let cursor = first.last().unwrap().0.clone();
12829
12830        let rest = get_unembedded_ids_batch_after(&conn, Some(&cursor), 10).unwrap();
12831        assert_eq!(rest.len(), 3);
12832        assert!(
12833            rest.iter().all(|(id, _, _)| id.as_str() > cursor.as_str()),
12834            "every row must sort strictly after the cursor"
12835        );
12836
12837        // Embedded rows leave the unembedded predicate.
12838        set_embedding(&conn, &ids[0], &[0.1, 0.2]).unwrap();
12839        let after = get_unembedded_ids_batch_after(&conn, None, 10).unwrap();
12840        assert_eq!(after.len(), 4);
12841        assert!(after.iter().all(|(id, _, _)| id != &ids[0]));
12842    }
12843
12844    /// #1598 — the reembed full-corpus scan returns embedded AND
12845    /// unembedded rows, honors the namespace filter, and pages by
12846    /// cursor.
12847    #[test]
12848    fn memory_texts_batch_namespace_and_cursor_1598() {
12849        let conn = test_db();
12850        let mut ns_a_ids: Vec<String> = (0..3)
12851            .map(|i| {
12852                insert(
12853                    &conn,
12854                    &make_memory(&format!("a-{i}"), "reembed-a", Tier::Long, 5),
12855                )
12856                .unwrap()
12857            })
12858            .collect();
12859        ns_a_ids.sort();
12860        for i in 0..2 {
12861            insert(
12862                &conn,
12863                &make_memory(&format!("b-{i}"), "reembed-b", Tier::Long, 5),
12864            )
12865            .unwrap();
12866        }
12867        // An already-embedded row MUST still be scanned — reembed
12868        // replaces existing vectors, it is not a backfill.
12869        set_embedding(&conn, &ns_a_ids[0], &[0.5, 0.5]).unwrap();
12870
12871        let all = get_memory_texts_batch(&conn, None, None, 100).unwrap();
12872        assert_eq!(all.len(), 5, "unfiltered scan sees every live row");
12873
12874        let ns_a = get_memory_texts_batch(&conn, Some("reembed-a"), None, 100).unwrap();
12875        assert_eq!(ns_a.len(), 3);
12876        assert_eq!(ns_a[0].0, ns_a_ids[0], "embedded row still scanned");
12877
12878        let first = get_memory_texts_batch(&conn, Some("reembed-a"), None, 1).unwrap();
12879        let cursor = first[0].0.clone();
12880        let rest = get_memory_texts_batch(&conn, Some("reembed-a"), Some(&cursor), 100).unwrap();
12881        assert_eq!(rest.len(), 2);
12882        assert!(rest.iter().all(|(id, _, _)| id.as_str() > cursor.as_str()));
12883    }
12884
12885    /// #1598 — the reembed writer REPLACES vectors across a dim change
12886    /// that the checked writer (G4 invariant) refuses, and skips
12887    /// unknown ids like its checked sibling.
12888    #[test]
12889    fn set_embeddings_batch_reembed_bypasses_dim_invariant_1598() {
12890        let mut conn = test_db();
12891        let id1 = insert(&conn, &make_memory("dim-est", "reembed-dim", Tier::Long, 5)).unwrap();
12892        let id2 = insert(&conn, &make_memory("dim-mig", "reembed-dim", Tier::Long, 5)).unwrap();
12893        // Establish a 4-dim namespace.
12894        set_embedding(&conn, &id1, &[0.1, 0.2, 0.3, 0.4]).unwrap();
12895
12896        // The checked writer enforces the established dim…
12897        let refused =
12898            set_embeddings_batch(&mut conn, &[(id2.clone(), vec![0.1_f32; 8])]).unwrap_err();
12899        assert!(
12900            refused.downcast_ref::<EmbeddingDimMismatch>().is_some(),
12901            "checked writer must refuse the dim change: {refused}"
12902        );
12903
12904        // …the migration writer replaces every row to the new dim.
12905        let entries = vec![
12906            (id1.clone(), vec![0.9_f32; 8]),
12907            (id2.clone(), vec![0.8_f32; 8]),
12908        ];
12909        let written = set_embeddings_batch_reembed(&mut conn, &entries).unwrap();
12910        assert_eq!(written, 2);
12911        assert_eq!(get_embedding(&conn, &id1).unwrap().unwrap().len(), 8);
12912        assert_eq!(get_embedding(&conn, &id2).unwrap().unwrap().len(), 8);
12913        assert_eq!(
12914            namespace_embedding_dim(&conn, "reembed-dim").unwrap(),
12915            Some(8),
12916            "namespace converges to the target dim"
12917        );
12918
12919        // Unknown ids are skipped; empty input is a no-op.
12920        let n = set_embeddings_batch_reembed(
12921            &mut conn,
12922            &[("no-such-id".to_string(), vec![0.1_f32; 8])],
12923        )
12924        .unwrap();
12925        assert_eq!(n, 0);
12926        assert_eq!(set_embeddings_batch_reembed(&mut conn, &[]).unwrap(), 0);
12927    }
12928
12929    /// #1598 — dry-run coverage counts, with and without the namespace
12930    /// filter.
12931    #[test]
12932    fn embedding_coverage_counts_1598() {
12933        let conn = test_db();
12934        let id_a = insert(&conn, &make_memory("c-a", "cov-a", Tier::Long, 5)).unwrap();
12935        insert(&conn, &make_memory("c-b", "cov-a", Tier::Long, 5)).unwrap();
12936        insert(&conn, &make_memory("c-c", "cov-b", Tier::Long, 5)).unwrap();
12937        set_embedding(&conn, &id_a, &[0.1, 0.2]).unwrap();
12938
12939        assert_eq!(embedding_coverage(&conn, None).unwrap(), (3, 1));
12940        assert_eq!(embedding_coverage(&conn, Some("cov-a")).unwrap(), (2, 1));
12941        assert_eq!(embedding_coverage(&conn, Some("cov-b")).unwrap(), (1, 0));
12942        assert_eq!(embedding_coverage(&conn, Some("cov-none")).unwrap(), (0, 0));
12943    }
12944
12945    /// #1598 — the pre-flight dim survey lists every stored dim
12946    /// (sorted) and honors the namespace filter.
12947    #[test]
12948    fn distinct_embedding_dims_lists_mixed_1598() {
12949        let mut conn = test_db();
12950        let id_a = insert(&conn, &make_memory("d-a", "dims-a", Tier::Long, 5)).unwrap();
12951        let id_b = insert(&conn, &make_memory("d-b", "dims-b", Tier::Long, 5)).unwrap();
12952        let id_c = insert(&conn, &make_memory("d-c", "dims-b", Tier::Long, 5)).unwrap();
12953        set_embedding(&conn, &id_a, &[0.1, 0.2]).unwrap();
12954        set_embedding(&conn, &id_b, &[0.1; 8]).unwrap();
12955        // Mixed dims inside ONE namespace only arise mid-migration —
12956        // land them via the reembed writer.
12957        set_embeddings_batch_reembed(&mut conn, &[(id_c, vec![0.2_f32; 4])]).unwrap();
12958
12959        assert_eq!(distinct_embedding_dims(&conn, None).unwrap(), vec![2, 4, 8]);
12960        assert_eq!(
12961            distinct_embedding_dims(&conn, Some("dims-b")).unwrap(),
12962            vec![4, 8]
12963        );
12964        assert!(
12965            distinct_embedding_dims(&conn, Some("dims-none"))
12966                .unwrap()
12967                .is_empty()
12968        );
12969    }
12970
12971    // -- Pillar 2 / Stream D — memory_check_duplicate -------------------
12972
12973    fn insert_with_embedding(
12974        conn: &Connection,
12975        title: &str,
12976        ns: &str,
12977        embedding: &[f32],
12978    ) -> String {
12979        let mem = make_memory(title, ns, Tier::Long, 5);
12980        let id = insert(conn, &mem).unwrap();
12981        set_embedding(conn, &id, embedding).unwrap();
12982        id
12983    }
12984
12985    #[test]
12986    fn check_duplicate_empty_db_returns_no_match() {
12987        let conn = test_db();
12988        let q = vec![1.0_f32, 0.0, 0.0];
12989        let r = check_duplicate(&conn, &q, None, 0.85).unwrap();
12990        assert!(!r.is_duplicate);
12991        assert!(r.nearest.is_none());
12992        assert_eq!(r.candidates_scanned, 0);
12993    }
12994
12995    #[test]
12996    fn check_duplicate_finds_highest_cosine_match() {
12997        let conn = test_db();
12998        // a = [1,0,0]; b = [0,1,0]; c = [0.99,0.01,0]. Query = [1,0,0]
12999        // expects `c` (cos ~0.9999) > `a` (cos =1.0 actually).
13000        // Use distinct vectors: a=[1,0,0] cos 1.0, b=[0.7,0.7,0] cos 0.707,
13001        // c=[0,1,0] cos 0.0. Best should be `a`.
13002        let id_a = insert_with_embedding(&conn, "alpha", "ns", &[1.0, 0.0, 0.0]);
13003        let _id_b = insert_with_embedding(&conn, "beta", "ns", &[0.7, 0.7, 0.0]);
13004        let _id_c = insert_with_embedding(&conn, "gamma", "ns", &[0.0, 1.0, 0.0]);
13005
13006        let q = vec![1.0_f32, 0.0, 0.0];
13007        let r = check_duplicate(&conn, &q, None, 0.85).unwrap();
13008        let nearest = r.nearest.expect("expected a nearest match");
13009        assert_eq!(nearest.id, id_a);
13010        assert!(nearest.similarity > 0.99);
13011        assert_eq!(r.candidates_scanned, 3);
13012        assert!(r.is_duplicate);
13013        assert!((r.threshold - 0.85).abs() < 1e-6);
13014    }
13015
13016    #[test]
13017    fn check_duplicate_below_threshold_not_flagged_but_returns_nearest() {
13018        let conn = test_db();
13019        let id_b = insert_with_embedding(&conn, "beta", "ns", &[0.7, 0.7, 0.0]);
13020
13021        // Cosine([1,0,0], [0.7,0.7,0]) ~ 0.707 — below default 0.85.
13022        let q = vec![1.0_f32, 0.0, 0.0];
13023        let r = check_duplicate(&conn, &q, None, 0.85).unwrap();
13024        let nearest = r
13025            .nearest
13026            .expect("nearest must surface even when below threshold");
13027        assert_eq!(nearest.id, id_b);
13028        assert!(!r.is_duplicate);
13029    }
13030
13031    #[test]
13032    fn check_duplicate_threshold_clamped_to_floor() {
13033        let conn = test_db();
13034        // Caller passes a permissive 0.0; the response threshold must
13035        // be clamped to DUPLICATE_THRESHOLD_MIN so unrelated content
13036        // can't be dressed as a merge candidate.
13037        let _ = insert_with_embedding(&conn, "x", "ns", &[1.0, 0.0, 0.0]);
13038        let q = vec![0.0_f32, 1.0, 0.0]; // orthogonal — cosine 0.0
13039        let r = check_duplicate(&conn, &q, None, 0.0).unwrap();
13040        assert!((r.threshold - DUPLICATE_THRESHOLD_MIN).abs() < 1e-6);
13041        assert!(!r.is_duplicate);
13042    }
13043
13044    #[test]
13045    fn check_duplicate_namespace_filter_isolates_scan() {
13046        let conn = test_db();
13047        let _hit_in_other_ns = insert_with_embedding(&conn, "x", "other", &[1.0, 0.0, 0.0]);
13048        let id_target = insert_with_embedding(&conn, "y", "ns", &[0.6, 0.8, 0.0]);
13049
13050        let q = vec![1.0_f32, 0.0, 0.0];
13051        let r = check_duplicate(&conn, &q, Some("ns"), 0.85).unwrap();
13052        assert_eq!(r.candidates_scanned, 1);
13053        assert_eq!(r.nearest.expect("namespace filter ignored").id, id_target);
13054    }
13055
13056    #[test]
13057    fn check_duplicate_skips_expired_rows() {
13058        let conn = test_db();
13059        // Short-tier memory with a backdated `expires_at` is past the
13060        // live-row gate and must not be a candidate.
13061        let mut mem = make_memory("expired", "ns", Tier::Short, 5);
13062        mem.expires_at = Some((chrono::Utc::now() - chrono::Duration::seconds(60)).to_rfc3339());
13063        let id = insert(&conn, &mem).unwrap();
13064        set_embedding(&conn, &id, &[1.0, 0.0, 0.0]).unwrap();
13065
13066        let q = vec![1.0_f32, 0.0, 0.0];
13067        let r = check_duplicate(&conn, &q, None, 0.85).unwrap();
13068        assert_eq!(r.candidates_scanned, 0);
13069        assert!(r.nearest.is_none());
13070    }
13071
13072    #[test]
13073    fn check_duplicate_skips_unembedded_rows() {
13074        let conn = test_db();
13075        // One memory with an embedding, one without — only the embedded
13076        // row should appear in `candidates_scanned`.
13077        let id_embedded = insert_with_embedding(&conn, "with-emb", "ns", &[1.0, 0.0, 0.0]);
13078        let mem = make_memory("no-emb", "ns", Tier::Long, 5);
13079        let _ = insert(&conn, &mem).unwrap();
13080
13081        let q = vec![1.0_f32, 0.0, 0.0];
13082        let r = check_duplicate(&conn, &q, None, 0.85).unwrap();
13083        assert_eq!(r.candidates_scanned, 1);
13084        assert_eq!(r.nearest.expect("embedded match").id, id_embedded);
13085    }
13086
13087    #[test]
13088    fn check_duplicate_skips_blob_with_non_multiple_of_4_length() {
13089        // Regression: pre-fix, an embedding blob whose length was not
13090        // a multiple of 4 would silently drop a trailing partial chunk
13091        // via chunks_exact and compute cosine against a shorter
13092        // candidate vector — producing a misleading score. The bounds
13093        // check now skips the row entirely.
13094        let conn = test_db();
13095        let mem = make_memory("malformed-blob", "ns", Tier::Long, 5);
13096        let id = insert(&conn, &mem).unwrap();
13097        // Write a 7-byte blob (1 short of 8 = 2 f32s) directly to
13098        // sqlite, bypassing set_embedding which only takes &[f32].
13099        conn.execute(
13100            "UPDATE memories SET embedding = ?1 WHERE id = ?2",
13101            params![&[0u8; 7][..], &id],
13102        )
13103        .unwrap();
13104
13105        let q = vec![1.0_f32, 0.0];
13106        let r = check_duplicate(&conn, &q, None, 0.85).unwrap();
13107        assert_eq!(
13108            r.candidates_scanned, 0,
13109            "malformed blob must be skipped, not silently truncated"
13110        );
13111        assert!(r.nearest.is_none());
13112    }
13113
13114    #[test]
13115    fn check_duplicate_skips_blob_with_dimension_mismatch() {
13116        // Regression: a blob with a valid length (multiple of 4) but
13117        // wrong dimension vs the query embedding must NOT be scored;
13118        // cosine_similarity zips and would silently truncate to the
13119        // shorter input, producing a wrong similarity.
13120        let conn = test_db();
13121        // Insert a memory with a 3-dim embedding via the normal path.
13122        let _id = insert_with_embedding(&conn, "different-dim", "ns", &[1.0, 0.0, 0.0]);
13123
13124        // Query with a 4-dim embedding — different from the candidate.
13125        let q = vec![1.0_f32, 0.0, 0.0, 0.0];
13126        let r = check_duplicate(&conn, &q, None, 0.85).unwrap();
13127        assert_eq!(
13128            r.candidates_scanned, 0,
13129            "dimension-mismatched candidate must be skipped"
13130        );
13131        assert!(r.nearest.is_none());
13132    }
13133
13134    #[test]
13135    fn get_unembedded_returns_memoryless() {
13136        let conn = test_db();
13137        let mem = make_memory("No embed", "test", Tier::Long, 5);
13138        insert(&conn, &mem).unwrap();
13139
13140        let unembedded = get_unembedded_ids(&conn).unwrap();
13141        assert_eq!(unembedded.len(), 1);
13142    }
13143
13144    #[test]
13145    fn health_check_passes() {
13146        let conn = test_db();
13147        assert!(health_check(&conn).unwrap());
13148    }
13149
13150    #[test]
13151    fn sanitize_fts_strips_operators_and_quotes() {
13152        // FTS5 special chars: " * ^ { } ( ) : - | are stripped
13153        let sanitized = sanitize_fts_query("test* \"injection\" (drop)", true);
13154        assert!(!sanitized.contains('*'));
13155        assert!(!sanitized.contains('('));
13156        assert!(!sanitized.contains(')'));
13157        // Standalone boolean operators are removed
13158        let sanitized2 = sanitize_fts_query("hello AND world OR NOT NEAR test", true);
13159        assert!(sanitized2.contains("hello"));
13160        assert!(sanitized2.contains("world"));
13161        assert!(sanitized2.contains("test"));
13162        // Empty input returns placeholder
13163        let sanitized3 = sanitize_fts_query("", true);
13164        assert_eq!(sanitized3, "\"_empty_\"");
13165        // `+` prefix operator is stripped (prevents exclusion injection);
13166        // `-` is now preserved inside phrase-quoted tokens so hyphenated
13167        // content ("well-known", "foo-bar") searches correctly against
13168        // the unicode61 tokenizer. Phrase-quoting keeps `-` from reaching
13169        // FTS5 as a prefix operator, closing the injection hole.
13170        let sanitized4 = sanitize_fts_query("-secret +required", true);
13171        assert!(!sanitized4.contains('+'));
13172        assert!(sanitized4.contains("secret"));
13173        assert!(sanitized4.contains("required"));
13174        // Hyphenated tokens pass through as phrase searches.
13175        let sanitized5 = sanitize_fts_query("well-known", true);
13176        assert!(sanitized5.contains("well-known"));
13177    }
13178
13179    #[test]
13180    fn get_by_prefix_8char() {
13181        let conn = test_db();
13182        let mem = make_memory("Prefix test", "test", Tier::Long, 5);
13183        let id = insert(&conn, &mem).unwrap();
13184        let prefix = &id[..8];
13185        let got = get_by_prefix(&conn, prefix).unwrap().unwrap();
13186        assert_eq!(got.id, id);
13187        assert_eq!(got.title, "Prefix test");
13188    }
13189
13190    #[test]
13191    fn get_by_prefix_full_uuid() {
13192        let conn = test_db();
13193        let mem = make_memory("Full UUID prefix", "test", Tier::Long, 5);
13194        let id = insert(&conn, &mem).unwrap();
13195        // Full UUID used as prefix still works (LIKE 'full-uuid%' matches exact)
13196        let got = get_by_prefix(&conn, &id).unwrap().unwrap();
13197        assert_eq!(got.id, id);
13198    }
13199
13200    #[test]
13201    fn get_by_prefix_nonexistent() {
13202        let conn = test_db();
13203        let got = get_by_prefix(&conn, "ffffffff").unwrap();
13204        assert!(got.is_none());
13205    }
13206
13207    #[test]
13208    fn get_by_prefix_ambiguous() {
13209        let conn = test_db();
13210        // Insert two memories with IDs sharing a common prefix
13211        let mut mem1 = make_memory("Ambig A", "test", Tier::Long, 5);
13212        mem1.id = "aaaa1111-0000-0000-0000-000000000001".to_string();
13213        insert(&conn, &mem1).unwrap();
13214        let mut mem2 = make_memory("Ambig B", "test2", Tier::Long, 5);
13215        mem2.id = "aaaa2222-0000-0000-0000-000000000002".to_string();
13216        insert(&conn, &mem2).unwrap();
13217        let result = get_by_prefix(&conn, "aaaa");
13218        assert!(result.is_err());
13219        let err_msg = result.unwrap_err().to_string();
13220        assert!(err_msg.contains("ambiguous"));
13221        assert!(err_msg.contains("2 matches"));
13222        // Error should list the matching full IDs so the user can pick one
13223        assert!(
13224            err_msg.contains("aaaa1111-0000-0000-0000-000000000001"),
13225            "error should list matching IDs, got: {err_msg}"
13226        );
13227        assert!(err_msg.contains("aaaa2222-0000-0000-0000-000000000002"));
13228    }
13229
13230    #[test]
13231    fn resolve_id_exact_then_prefix() {
13232        let conn = test_db();
13233        let mem = make_memory("Resolve test", "test", Tier::Long, 5);
13234        let id = insert(&conn, &mem).unwrap();
13235        // Exact match
13236        let got = resolve_id(&conn, &id).unwrap().unwrap();
13237        assert_eq!(got.id, id);
13238        // Prefix match
13239        let got2 = resolve_id(&conn, &id[..8]).unwrap().unwrap();
13240        assert_eq!(got2.id, id);
13241        // Nonexistent
13242        let got3 = resolve_id(&conn, "zzzzzzzz").unwrap();
13243        assert!(got3.is_none());
13244    }
13245
13246    #[test]
13247    fn insert_if_newer_updates() {
13248        let conn = test_db();
13249        let mut mem = make_memory("Sync test", "test", Tier::Long, 5);
13250        let id = insert(&conn, &mem).unwrap();
13251
13252        mem.id = id.clone();
13253        mem.content = "Updated via sync".to_string();
13254        mem.updated_at = (chrono::Utc::now() + chrono::Duration::hours(1)).to_rfc3339();
13255        let result_id = insert_if_newer(&conn, &mem).unwrap();
13256        assert_eq!(result_id, id);
13257
13258        let got = get(&conn, &id).unwrap().unwrap();
13259        assert_eq!(got.content, "Updated via sync");
13260    }
13261
13262    // --- Metadata tests (Task 1.1) ---
13263
13264    #[test]
13265    fn metadata_default_empty_object() {
13266        let conn = test_db();
13267        let mem = make_memory("Default metadata", "test", Tier::Long, 5);
13268        let id = insert(&conn, &mem).unwrap();
13269        let got = get(&conn, &id).unwrap().unwrap();
13270        assert_eq!(got.metadata, serde_json::json!({}));
13271    }
13272
13273    #[test]
13274    fn metadata_store_and_retrieve() {
13275        let conn = test_db();
13276        let mut mem = make_memory("With metadata", "test", Tier::Long, 5);
13277        mem.metadata = serde_json::json!({"agent_id": "claude-1", "session": 42});
13278        let id = insert(&conn, &mem).unwrap();
13279        let got = get(&conn, &id).unwrap().unwrap();
13280        assert_eq!(got.metadata["agent_id"], "claude-1");
13281        assert_eq!(got.metadata["session"], 42);
13282    }
13283
13284    #[test]
13285    fn metadata_roundtrip_nested_json() {
13286        let conn = test_db();
13287        let mut mem = make_memory("Nested metadata", "test", Tier::Long, 5);
13288        mem.metadata = serde_json::json!({
13289            "agent": {"type": "ai:claude", "version": "4.6"},
13290            "tags_extra": ["experimental"],
13291            "score": 0.95
13292        });
13293        let id = insert(&conn, &mem).unwrap();
13294        let got = get(&conn, &id).unwrap().unwrap();
13295        assert_eq!(got.metadata["agent"]["type"], "ai:claude");
13296        assert_eq!(got.metadata["tags_extra"][0], "experimental");
13297        assert!((got.metadata["score"].as_f64().unwrap() - 0.95).abs() < f64::EPSILON);
13298    }
13299
13300    #[test]
13301    fn metadata_preserved_on_update() {
13302        let conn = test_db();
13303        let mut mem = make_memory("Update metadata", "test", Tier::Long, 5);
13304        mem.metadata = serde_json::json!({"key": "original"});
13305        let id = insert(&conn, &mem).unwrap();
13306
13307        // Update without metadata — should preserve existing
13308        let (found, _) = update(
13309            &conn,
13310            &id,
13311            None,
13312            Some("new content"),
13313            None,
13314            None,
13315            None,
13316            None,
13317            None,
13318            None,
13319            None,
13320        )
13321        .unwrap();
13322        assert!(found);
13323        let got = get(&conn, &id).unwrap().unwrap();
13324        assert_eq!(got.metadata["key"], "original");
13325        assert_eq!(got.content, "new content");
13326
13327        // Update with new metadata — should replace
13328        let new_meta = serde_json::json!({"key": "updated", "extra": true});
13329        let (found, _) = update(
13330            &conn,
13331            &id,
13332            None,
13333            None,
13334            None,
13335            None,
13336            None,
13337            None,
13338            None,
13339            None,
13340            Some(&new_meta),
13341        )
13342        .unwrap();
13343        assert!(found);
13344        let got = get(&conn, &id).unwrap().unwrap();
13345        assert_eq!(got.metadata["key"], "updated");
13346        assert_eq!(got.metadata["extra"], true);
13347    }
13348
13349    #[test]
13350    fn metadata_preserved_on_upsert() {
13351        let conn = test_db();
13352        let mut mem = make_memory("Upsert meta", "test", Tier::Long, 5);
13353        mem.metadata = serde_json::json!({"version": 1});
13354        insert(&conn, &mem).unwrap();
13355
13356        // Insert again with same title+namespace — upsert should update metadata
13357        let mut mem2 = make_memory("Upsert meta", "test", Tier::Long, 5);
13358        mem2.metadata = serde_json::json!({"version": 2});
13359        let id = insert(&conn, &mem2).unwrap();
13360        let got = get(&conn, &id).unwrap().unwrap();
13361        assert_eq!(got.metadata["version"], 2);
13362    }
13363
13364    #[test]
13365    fn metadata_in_list_and_search() {
13366        let conn = test_db();
13367        let mut mem = make_memory("Searchable metadata", "test", Tier::Long, 8);
13368        mem.metadata = serde_json::json!({"source_model": "opus"});
13369        insert(&conn, &mem).unwrap();
13370
13371        let results = list(
13372            &conn,
13373            Some("test"),
13374            None,
13375            10,
13376            0,
13377            None,
13378            None,
13379            None,
13380            None,
13381            None,
13382        )
13383        .unwrap();
13384        assert_eq!(results.len(), 1);
13385        assert_eq!(results[0].metadata["source_model"], "opus");
13386
13387        let results = search(
13388            &conn,
13389            "Searchable",
13390            Some("test"),
13391            None,
13392            10,
13393            None,
13394            None,
13395            None,
13396            None,
13397            None,
13398            None,
13399            false,
13400        )
13401        .unwrap();
13402        assert_eq!(results.len(), 1);
13403        assert_eq!(results[0].metadata["source_model"], "opus");
13404    }
13405
13406    #[test]
13407    fn metadata_in_recall() {
13408        let conn = test_db();
13409        let mut mem = make_memory("Recallable metadata", "test", Tier::Long, 8);
13410        mem.metadata = serde_json::json!({"context": "test-recall"});
13411        insert(&conn, &mem).unwrap();
13412
13413        let (results, _tokens) = recall(
13414            &conn,
13415            "Recallable",
13416            Some("test"),
13417            10,
13418            None,
13419            None,
13420            None,
13421            crate::SECS_PER_HOUR,
13422            crate::SECS_PER_DAY,
13423            None,
13424            None,
13425            false,
13426            None,
13427        )
13428        .unwrap();
13429        assert!(!results.is_empty());
13430        assert_eq!(results[0].0.metadata["context"], "test-recall");
13431    }
13432
13433    #[test]
13434    fn metadata_in_export_import() {
13435        let conn = test_db();
13436        let mut mem = make_memory("Export metadata", "test", Tier::Long, 5);
13437        mem.metadata = serde_json::json!({"exported": true});
13438        insert(&conn, &mem).unwrap();
13439
13440        let exported = export_all(&conn).unwrap();
13441        assert_eq!(exported.len(), 1);
13442        assert_eq!(exported[0].metadata["exported"], true);
13443
13444        // Import into fresh DB
13445        let conn2 = test_db();
13446        insert(&conn2, &exported[0]).unwrap();
13447        let got = get(&conn2, &exported[0].id).unwrap().unwrap();
13448        assert_eq!(got.metadata["exported"], true);
13449    }
13450
13451    #[test]
13452    fn metadata_schema_migration() {
13453        // Simulate a pre-v7 database (no metadata column) by creating one
13454        // and checking that migration adds the column with correct default
13455        let conn = test_db();
13456        let mem = make_memory("Migration test", "test", Tier::Long, 5);
13457        let id = insert(&conn, &mem).unwrap();
13458
13459        // Verify the column exists and has the default value
13460        let metadata_str: String = conn
13461            .query_row(
13462                "SELECT metadata FROM memories WHERE id = ?1",
13463                params![id],
13464                |r| r.get(0),
13465            )
13466            .unwrap();
13467        assert_eq!(metadata_str, "{}");
13468    }
13469
13470    #[test]
13471    fn metadata_survives_archive_restore_cycle() {
13472        let conn = test_db();
13473        let mut mem = make_memory("Archivable", "test", Tier::Short, 5);
13474        mem.metadata = serde_json::json!({"origin": "archive-test"});
13475        // Set expiry in the past so GC will archive it
13476        mem.expires_at = Some("2020-01-01T00:00:00+00:00".to_string());
13477        let id = insert(&conn, &mem).unwrap();
13478
13479        // Run GC with archive=true — should archive the expired memory
13480        let deleted = gc(&conn, true).unwrap();
13481        assert_eq!(deleted, 1);
13482
13483        // Verify metadata is in the archive
13484        let archived = list_archived(&conn, None, 10, 0).unwrap();
13485        assert_eq!(archived.len(), 1);
13486        assert_eq!(archived[0]["metadata"]["origin"], "archive-test");
13487
13488        // Restore and verify metadata survives the round-trip
13489        let restored = restore_archived(&conn, &id).unwrap();
13490        assert!(restored);
13491        let got = get(&conn, &id).unwrap().unwrap();
13492        assert_eq!(got.metadata["origin"], "archive-test");
13493    }
13494
13495    #[test]
13496    fn metadata_in_insert_if_newer() {
13497        let conn = test_db();
13498        let mut mem = make_memory("Sync metadata", "test", Tier::Long, 5);
13499        mem.metadata = serde_json::json!({"version": 1});
13500        let id = insert(&conn, &mem).unwrap();
13501
13502        // Insert newer version with different metadata
13503        mem.id = id.clone();
13504        mem.metadata = serde_json::json!({"version": 2, "synced": true});
13505        mem.updated_at = (chrono::Utc::now() + chrono::Duration::hours(1)).to_rfc3339();
13506        insert_if_newer(&conn, &mem).unwrap();
13507
13508        let got = get(&conn, &id).unwrap().unwrap();
13509        assert_eq!(got.metadata["version"], 2);
13510        assert_eq!(got.metadata["synced"], true);
13511
13512        // Insert older version — metadata should NOT be overwritten
13513        mem.metadata = serde_json::json!({"version": 0, "stale": true});
13514        mem.updated_at = "2020-01-01T00:00:00+00:00".to_string();
13515        insert_if_newer(&conn, &mem).unwrap();
13516
13517        let got = get(&conn, &id).unwrap().unwrap();
13518        assert_eq!(got.metadata["version"], 2); // still the newer one
13519        assert!(got.metadata.get("stale").is_none());
13520    }
13521
13522    #[test]
13523    fn metadata_merged_in_consolidate() {
13524        let conn = test_db();
13525        let mut mem_a = make_memory("Consolidate A", "test", Tier::Long, 5);
13526        mem_a.metadata = serde_json::json!({"agent": "claude", "shared": "from_a"});
13527        let id_a = insert(&conn, &mem_a).unwrap();
13528
13529        let mut mem_b = make_memory("Consolidate B", "test", Tier::Long, 7);
13530        mem_b.metadata = serde_json::json!({"model": "opus", "shared": "from_b"});
13531        let id_b = insert(&conn, &mem_b).unwrap();
13532
13533        let new_id = consolidate(
13534            &conn,
13535            &[id_a, id_b],
13536            "Merged",
13537            "Combined content",
13538            "test",
13539            &Tier::Long,
13540            "consolidation",
13541            "test-consolidator",
13542        )
13543        .unwrap();
13544
13545        let got = get(&conn, &new_id).unwrap().unwrap();
13546        // Both keys present; "shared" key takes value from later source (mem_b)
13547        assert_eq!(got.metadata["agent"], "claude");
13548        assert_eq!(got.metadata["model"], "opus");
13549        assert_eq!(got.metadata["shared"], "from_b");
13550    }
13551
13552    #[test]
13553    fn metadata_consolidate_rejects_oversized_merge() {
13554        let conn = test_db();
13555        // Create two memories with large unique-key metadata that together exceed 64KB
13556        let mut mem_a = make_memory("Big meta A", "test", Tier::Long, 5);
13557        let big_val_a: serde_json::Map<String, serde_json::Value> = (0..500)
13558            .map(|i| {
13559                (
13560                    format!("key_a_{i}"),
13561                    serde_json::Value::String("x".repeat(60)),
13562                )
13563            })
13564            .collect();
13565        mem_a.metadata = serde_json::Value::Object(big_val_a);
13566        let id_a = insert(&conn, &mem_a).unwrap();
13567
13568        let mut mem_b = make_memory("Big meta B", "test", Tier::Long, 5);
13569        let big_val_b: serde_json::Map<String, serde_json::Value> = (0..500)
13570            .map(|i| {
13571                (
13572                    format!("key_b_{i}"),
13573                    serde_json::Value::String("x".repeat(60)),
13574                )
13575            })
13576            .collect();
13577        mem_b.metadata = serde_json::Value::Object(big_val_b);
13578        let id_b = insert(&conn, &mem_b).unwrap();
13579
13580        // Consolidate should fail because merged metadata exceeds 64KB
13581        let result = consolidate(
13582            &conn,
13583            &[id_a, id_b],
13584            "Oversized merge",
13585            "Should fail",
13586            "test",
13587            &Tier::Long,
13588            "consolidation",
13589            "test-consolidator",
13590        );
13591        let err = result.expect_err("consolidate should fail for oversized merged metadata");
13592        let msg = err.to_string();
13593        assert!(
13594            msg.contains("merged metadata exceeds size limit"),
13595            "expected metadata size error, got: {msg}"
13596        );
13597    }
13598
13599    #[test]
13600    fn metadata_special_characters_roundtrip() {
13601        let conn = test_db();
13602        let mut mem = make_memory("Special chars metadata", "test", Tier::Long, 5);
13603        mem.metadata = serde_json::json!({
13604            "pipe": "a|b|c",
13605            "newline": "line1\nline2",
13606            "tab": "col1\tcol2",
13607            "backslash": "path\\to\\file",
13608            "unicode": "\u{1F600}\u{1F4A9}",
13609            "cjk": "\u{4e16}\u{754c}",
13610            "empty": "",
13611            "nested_special": {"inner|key": "val\nue"}
13612        });
13613        let id = insert(&conn, &mem).unwrap();
13614        let got = get(&conn, &id).unwrap().unwrap();
13615        assert_eq!(got.metadata["pipe"], "a|b|c");
13616        assert_eq!(got.metadata["newline"], "line1\nline2");
13617        assert_eq!(got.metadata["unicode"], "\u{1F600}\u{1F4A9}");
13618        assert_eq!(got.metadata["cjk"], "\u{4e16}\u{754c}");
13619        assert_eq!(got.metadata["nested_special"]["inner|key"], "val\nue");
13620    }
13621
13622    #[test]
13623    fn metadata_corrupt_column_falls_back_to_empty() {
13624        let conn = test_db();
13625        let mem = make_memory("Corrupt test", "test", Tier::Long, 5);
13626        let id = insert(&conn, &mem).unwrap();
13627
13628        // Manually corrupt the metadata column
13629        conn.execute(
13630            "UPDATE memories SET metadata = 'NOT VALID JSON {{{{' WHERE id = ?1",
13631            params![id],
13632        )
13633        .unwrap();
13634
13635        // row_to_memory should fall back to {} without panicking
13636        let got = get(&conn, &id).unwrap().unwrap();
13637        assert_eq!(got.metadata, serde_json::json!({}));
13638    }
13639
13640    #[test]
13641    fn metadata_restore_resets_corrupt_archived_metadata() {
13642        let conn = test_db();
13643        let mut mem = make_memory("Corrupt archive", "test", Tier::Short, 5);
13644        mem.metadata = serde_json::json!({"valid": true});
13645        mem.expires_at = Some("2020-01-01T00:00:00+00:00".to_string());
13646        let id = insert(&conn, &mem).unwrap();
13647
13648        // Archive via GC
13649        gc(&conn, true).unwrap();
13650
13651        // Corrupt the archived metadata directly
13652        conn.execute(
13653            "UPDATE archived_memories SET metadata = 'CORRUPT JSON' WHERE id = ?1",
13654            params![id],
13655        )
13656        .unwrap();
13657
13658        // Restore — should reset metadata to {} instead of failing
13659        let restored = restore_archived(&conn, &id).unwrap();
13660        assert!(restored);
13661        let got = get(&conn, &id).unwrap().unwrap();
13662        assert_eq!(got.metadata, serde_json::json!({}));
13663    }
13664
13665    #[test]
13666    fn scope_index_exists_after_migration() {
13667        // v0.6.0 GA (schema v10) — the `scope_idx` generated column and its
13668        // B-tree index must exist after `open()` runs migration.
13669        let conn = test_db();
13670        let has_col: bool = conn
13671            .prepare("SELECT scope_idx FROM memories LIMIT 0")
13672            .is_ok();
13673        assert!(has_col, "scope_idx generated column missing");
13674        let idx_exists: i64 = conn
13675            .query_row(
13676                "SELECT COUNT(*) FROM sqlite_master WHERE type='index' AND name='idx_memories_scope_idx'",
13677                [],
13678                |row| row.get(0),
13679            )
13680            .unwrap();
13681        assert_eq!(idx_exists, 1, "idx_memories_scope_idx missing");
13682    }
13683
13684    #[test]
13685    fn scope_index_used_for_direct_scope_filter() {
13686        // v0.6.0 GA — confirm `idx_memories_scope_idx` is picked for a
13687        // direct `WHERE scope_idx = ?` predicate. This is the shape the
13688        // query planner sees for `scope = 'collective'` fast-paths and
13689        // the branch-local predicate inside `visibility_clause`.
13690        //
13691        // We deliberately do NOT assert the index is used for the full
13692        // visibility_clause OR-chain — SQLite's planner may (correctly)
13693        // choose a scan when the OR-chain has variable selectivity across
13694        // branches. The point of the index is to accelerate the common
13695        // case when a recall narrows to one scope; the multi-branch
13696        // visibility clause still benefits because each branch evaluates
13697        // the predicate against a single column rather than a JSON extract.
13698        let conn = test_db();
13699        // Seed enough rows + ANALYZE so planner cost model is honest.
13700        for i in 0..200 {
13701            let scope = if i % 3 == 0 { "collective" } else { "private" };
13702            let mut mem = make_memory(&format!("row-{i}"), "test", Tier::Long, 5);
13703            mem.metadata = serde_json::json!({"scope": scope});
13704            insert(&conn, &mem).unwrap();
13705        }
13706        conn.execute("ANALYZE", []).unwrap();
13707        let plan: Vec<String> = conn
13708            .prepare("EXPLAIN QUERY PLAN SELECT id FROM memories WHERE scope_idx = ?1")
13709            .unwrap()
13710            .query_map(params!["collective"], |row| row.get::<_, String>(3))
13711            .unwrap()
13712            .collect::<rusqlite::Result<_>>()
13713            .unwrap();
13714        let joined = plan.join("\n");
13715        assert!(
13716            joined.contains("idx_memories_scope_idx"),
13717            "direct scope filter must use idx_memories_scope_idx; got:\n{joined}"
13718        );
13719    }
13720
13721    #[test]
13722    fn scope_idx_reflects_metadata_on_insert_and_update() {
13723        // v0.6.0 GA — the VIRTUAL generated column must track metadata.scope
13724        // across insert and update without manual maintenance.
13725        let conn = test_db();
13726        let mut mem = make_memory("scope-tracking", "test", Tier::Long, 5);
13727        mem.metadata = serde_json::json!({"scope": "team"});
13728        let id = insert(&conn, &mem).unwrap();
13729        let scope: String = conn
13730            .query_row(
13731                "SELECT scope_idx FROM memories WHERE id = ?1",
13732                params![id],
13733                |r| r.get(0),
13734            )
13735            .unwrap();
13736        assert_eq!(scope, "team");
13737
13738        // Flip scope to unit via metadata update — generated column updates.
13739        let new_meta = serde_json::json!({"scope": "unit"});
13740        update(
13741            &conn,
13742            &id,
13743            None,
13744            None,
13745            None,
13746            None,
13747            None,
13748            None,
13749            None,
13750            None,
13751            Some(&new_meta),
13752        )
13753        .unwrap();
13754        let scope2: String = conn
13755            .query_row(
13756                "SELECT scope_idx FROM memories WHERE id = ?1",
13757                params![id],
13758                |r| r.get(0),
13759            )
13760            .unwrap();
13761        assert_eq!(scope2, "unit");
13762
13763        // Memory with no scope key — virtual column returns the default.
13764        let mut bare = make_memory("no-scope-key", "test", Tier::Long, 5);
13765        bare.metadata = serde_json::json!({});
13766        let id2 = insert(&conn, &bare).unwrap();
13767        let scope3: String = conn
13768            .query_row(
13769                "SELECT scope_idx FROM memories WHERE id = ?1",
13770                params![id2],
13771                |r| r.get(0),
13772            )
13773            .unwrap();
13774        assert_eq!(scope3, "private");
13775    }
13776
13777    #[test]
13778    fn auto_purge_archive_respects_max_days() {
13779        let conn = test_db();
13780        let mut mem = make_memory("Purge test", "test", Tier::Short, 5);
13781        mem.expires_at = Some("2020-01-01T00:00:00+00:00".to_string());
13782        insert(&conn, &mem).unwrap();
13783        gc(&conn, true).unwrap();
13784
13785        // Archive exists
13786        let archived = list_archived(&conn, None, 10, 0).unwrap();
13787        assert_eq!(archived.len(), 1);
13788
13789        // Backdate archived_at to 30 days ago so purge can detect it
13790        conn.execute(
13791            "UPDATE archived_memories SET archived_at = ?1",
13792            params![(chrono::Utc::now() - chrono::Duration::days(30)).to_rfc3339()],
13793        )
13794        .unwrap();
13795
13796        // Purge with None (disabled) — no-op
13797        let purged = auto_purge_archive(&conn, None).unwrap();
13798        assert_eq!(purged, 0);
13799        assert_eq!(list_archived(&conn, None, 10, 0).unwrap().len(), 1);
13800
13801        // Purge with 0 days — should NOT purge (guard condition)
13802        let purged = auto_purge_archive(&conn, Some(0)).unwrap();
13803        assert_eq!(purged, 0);
13804
13805        // Purge with 90 days — archive is only 30 days old, should NOT purge
13806        let purged = auto_purge_archive(&conn, Some(90)).unwrap();
13807        assert_eq!(purged, 0);
13808
13809        // Purge with 7 days — archive is 30 days old, should be purged
13810        let purged = auto_purge_archive(&conn, Some(7)).unwrap();
13811        assert_eq!(purged, 1);
13812        assert!(list_archived(&conn, None, 10, 0).unwrap().is_empty());
13813    }
13814
13815    // ─────────────────────────────────────────────────────────────────
13816    // Schema v15 (v0.6.3 Stream B) — temporal-validity KG migration.
13817    // ─────────────────────────────────────────────────────────────────
13818
13819    fn column_exists(conn: &Connection, table: &str, column: &str) -> bool {
13820        let mut stmt = conn
13821            .prepare(&format!("PRAGMA table_info({table})"))
13822            .unwrap();
13823        let cols: Vec<String> = stmt
13824            .query_map([], |row| row.get::<_, String>(1))
13825            .unwrap()
13826            .filter_map(Result::ok)
13827            .collect();
13828        cols.iter().any(|c| c == column)
13829    }
13830
13831    fn index_exists(conn: &Connection, name: &str) -> bool {
13832        conn.query_row(
13833            "SELECT 1 FROM sqlite_master WHERE type='index' AND name=?1",
13834            params![name],
13835            |r| r.get::<_, i64>(0),
13836        )
13837        .is_ok()
13838    }
13839
13840    #[test]
13841    fn schema_v15_memory_links_has_temporal_columns() {
13842        let conn = test_db();
13843        assert!(column_exists(&conn, "memory_links", "valid_from"));
13844        assert!(column_exists(&conn, "memory_links", "valid_until"));
13845        assert!(column_exists(&conn, "memory_links", "observed_by"));
13846        assert!(column_exists(&conn, "memory_links", "signature"));
13847    }
13848
13849    #[test]
13850    fn schema_v15_memory_links_temporal_indexes_exist() {
13851        let conn = test_db();
13852        assert!(index_exists(&conn, "idx_links_temporal_src"));
13853        assert!(index_exists(&conn, "idx_links_temporal_tgt"));
13854        assert!(index_exists(&conn, "idx_links_relation"));
13855    }
13856
13857    #[test]
13858    fn schema_v15_entity_aliases_table_exists() {
13859        let conn = test_db();
13860        let count: i64 = conn
13861            .query_row("SELECT COUNT(*) FROM entity_aliases", [], |r| r.get(0))
13862            .unwrap();
13863        assert_eq!(count, 0);
13864        assert!(index_exists(&conn, "idx_entity_aliases_alias"));
13865    }
13866
13867    #[test]
13868    fn schema_v15_entity_aliases_primary_key_unique() {
13869        let conn = test_db();
13870        let now = chrono::Utc::now().to_rfc3339();
13871        conn.execute(
13872            "INSERT INTO entity_aliases (entity_id, alias, created_at) VALUES (?1, ?2, ?3)",
13873            params!["e1", "Alpha", &now],
13874        )
13875        .unwrap();
13876        let dup = conn.execute(
13877            "INSERT INTO entity_aliases (entity_id, alias, created_at) VALUES (?1, ?2, ?3)",
13878            params!["e1", "Alpha", &now],
13879        );
13880        assert!(dup.is_err(), "expected PK uniqueness violation");
13881    }
13882
13883    // -- Pillar 2 / Stream B — entity_register / entity_get_by_alias ------
13884
13885    #[test]
13886    fn entity_register_creates_new_entity_with_aliases() {
13887        let conn = test_db();
13888        let aliases = vec!["pa".to_string(), "Project A".to_string()];
13889        let reg = entity_register(
13890            &conn,
13891            "Project Alpha",
13892            "projects/alpha",
13893            &aliases,
13894            &serde_json::json!({}),
13895            Some("test-agent"),
13896        )
13897        .unwrap();
13898        assert!(reg.created, "first registration must be created=true");
13899        assert_eq!(reg.canonical_name, "Project Alpha");
13900        assert_eq!(reg.namespace, "projects/alpha");
13901        // Aliases inserted in one call share a created_at; the
13902        // secondary `alias ASC` sort orders by ASCII codepoint, so
13903        // uppercase 'P' (80) < lowercase 'p' (112). canonical_name is
13904        // auto-inserted as an alias so entity_get_by_alias resolves it.
13905        assert_eq!(
13906            reg.aliases,
13907            vec![
13908                "Project A".to_string(),
13909                "Project Alpha".to_string(),
13910                "pa".to_string()
13911            ]
13912        );
13913
13914        let m = get(&conn, &reg.entity_id).unwrap().unwrap();
13915        assert_eq!(m.title, "Project Alpha");
13916        assert_eq!(m.tier.rank(), Tier::Long.rank());
13917        assert!(m.tags.contains(&"entity".to_string()));
13918        assert_eq!(m.metadata["kind"], "entity");
13919        assert_eq!(m.metadata["agent_id"], "test-agent");
13920    }
13921
13922    #[test]
13923    fn entity_register_reuses_existing_and_merges_aliases() {
13924        let conn = test_db();
13925        let first = entity_register(
13926            &conn,
13927            "Project Alpha",
13928            "projects/alpha",
13929            &["pa".to_string()],
13930            &serde_json::json!({}),
13931            Some("a1"),
13932        )
13933        .unwrap();
13934        let second = entity_register(
13935            &conn,
13936            "Project Alpha",
13937            "projects/alpha",
13938            &["pa".to_string(), "alpha".to_string()],
13939            &serde_json::json!({}),
13940            Some("a2"),
13941        )
13942        .unwrap();
13943        assert!(first.created);
13944        assert!(!second.created, "second call must reuse the entity");
13945        assert_eq!(first.entity_id, second.entity_id);
13946        // First call inserted ["Project Alpha", "pa"] at ts1; second
13947        // call inserted "alpha" at ts2 (ts1 < ts2). Sort is created_at
13948        // ASC, alias ASC.
13949        assert_eq!(
13950            second.aliases,
13951            vec![
13952                "Project Alpha".to_string(),
13953                "pa".to_string(),
13954                "alpha".to_string()
13955            ]
13956        );
13957    }
13958
13959    #[test]
13960    fn entity_register_errors_on_collision_with_non_entity_memory() {
13961        let conn = test_db();
13962        let mem = make_memory("Conflict", "projects/alpha", Tier::Long, 5);
13963        insert(&conn, &mem).unwrap();
13964        let err = entity_register(
13965            &conn,
13966            "Conflict",
13967            "projects/alpha",
13968            &[],
13969            &serde_json::json!({}),
13970            None,
13971        )
13972        .unwrap_err();
13973        let msg = format!("{err}");
13974        assert!(
13975            msg.contains("non-entity memory"),
13976            "expected collision error, got: {msg}"
13977        );
13978    }
13979
13980    #[test]
13981    fn entity_register_skips_blank_aliases() {
13982        let conn = test_db();
13983        let reg = entity_register(
13984            &conn,
13985            "Trim Test",
13986            "test",
13987            &[String::new(), "   ".to_string(), "ok".to_string()],
13988            &serde_json::json!({}),
13989            None,
13990        )
13991        .unwrap();
13992        // canonical_name "Trim Test" auto-included; "T" (84) < "o" (111).
13993        assert_eq!(reg.aliases, vec!["Trim Test".to_string(), "ok".to_string()]);
13994    }
13995
13996    #[test]
13997    fn entity_register_preserves_caller_metadata_keys() {
13998        let conn = test_db();
13999        let extra = serde_json::json!({"team": "platform", "kind": "ignored"});
14000        let reg = entity_register(&conn, "Service X", "svc", &[], &extra, None).unwrap();
14001        let m = get(&conn, &reg.entity_id).unwrap().unwrap();
14002        assert_eq!(m.metadata["team"], "platform");
14003        // Caller's `kind` is overwritten — entity records must always
14004        // carry kind=entity for the resolver to find them.
14005        assert_eq!(m.metadata["kind"], "entity");
14006    }
14007
14008    #[test]
14009    fn entity_get_by_alias_returns_record_with_full_alias_set() {
14010        let conn = test_db();
14011        let reg = entity_register(
14012            &conn,
14013            "Project Alpha",
14014            "projects/alpha",
14015            &["pa".to_string(), "alpha".to_string()],
14016            &serde_json::json!({}),
14017            None,
14018        )
14019        .unwrap();
14020        let got = entity_get_by_alias(&conn, "pa", None).unwrap().unwrap();
14021        assert_eq!(got.entity_id, reg.entity_id);
14022        assert_eq!(got.canonical_name, "Project Alpha");
14023        assert_eq!(got.namespace, "projects/alpha");
14024        // Same-batch aliases share a created_at; alphabetical
14025        // tiebreak orders by ASCII codepoint: "Project Alpha" (P=80)
14026        // < "alpha" (a=97) < "pa" (p=112). canonical_name auto-included.
14027        assert_eq!(
14028            got.aliases,
14029            vec![
14030                "Project Alpha".to_string(),
14031                "alpha".to_string(),
14032                "pa".to_string()
14033            ]
14034        );
14035    }
14036
14037    #[test]
14038    fn entity_register_canonical_name_resolves_via_get_by_alias() {
14039        // Regression test for NHI-P3-T2 (v0.7.0 NHI test playbook):
14040        // registering an entity with no aliases must still leave it
14041        // reachable via entity_get_by_alias("<canonical_name>") so the
14042        // alias-resolution pathway isn't dead-on-arrival when the
14043        // caller only knows the canonical name.
14044        let conn = test_db();
14045        let reg = entity_register(
14046            &conn,
14047            "OnlyCanonical",
14048            "test",
14049            &[],
14050            &serde_json::json!({}),
14051            None,
14052        )
14053        .unwrap();
14054        assert!(reg.created);
14055        assert_eq!(
14056            reg.aliases,
14057            vec!["OnlyCanonical".to_string()],
14058            "canonical_name must be auto-inserted as an alias"
14059        );
14060        let got = entity_get_by_alias(&conn, "OnlyCanonical", Some("test"))
14061            .unwrap()
14062            .expect("canonical_name must resolve via entity_get_by_alias");
14063        assert_eq!(got.entity_id, reg.entity_id);
14064        assert_eq!(got.canonical_name, "OnlyCanonical");
14065    }
14066
14067    #[test]
14068    fn entity_get_by_alias_returns_none_for_unknown_alias() {
14069        let conn = test_db();
14070        let got = entity_get_by_alias(&conn, "missing", None).unwrap();
14071        assert!(got.is_none());
14072    }
14073
14074    #[test]
14075    fn entity_get_by_alias_filters_by_namespace() {
14076        let conn = test_db();
14077        entity_register(
14078            &conn,
14079            "Acme",
14080            "ns_a",
14081            &["a".to_string()],
14082            &serde_json::json!({}),
14083            None,
14084        )
14085        .unwrap();
14086        entity_register(
14087            &conn,
14088            "Acme Corp",
14089            "ns_b",
14090            &["a".to_string()],
14091            &serde_json::json!({}),
14092            None,
14093        )
14094        .unwrap();
14095        let in_a = entity_get_by_alias(&conn, "a", Some("ns_a"))
14096            .unwrap()
14097            .unwrap();
14098        assert_eq!(in_a.namespace, "ns_a");
14099        assert_eq!(in_a.canonical_name, "Acme");
14100        let in_b = entity_get_by_alias(&conn, "a", Some("ns_b"))
14101            .unwrap()
14102            .unwrap();
14103        assert_eq!(in_b.namespace, "ns_b");
14104        assert_eq!(in_b.canonical_name, "Acme Corp");
14105    }
14106
14107    #[test]
14108    fn entity_get_by_alias_without_namespace_picks_most_recent() {
14109        let conn = test_db();
14110        // Older entity created first.
14111        entity_register(
14112            &conn,
14113            "Older",
14114            "ns_old",
14115            &["dup".to_string()],
14116            &serde_json::json!({}),
14117            None,
14118        )
14119        .unwrap();
14120        // Sleep just enough to guarantee a strictly later created_at.
14121        std::thread::sleep(std::time::Duration::from_millis(5));
14122        entity_register(
14123            &conn,
14124            "Newer",
14125            "ns_new",
14126            &["dup".to_string()],
14127            &serde_json::json!({}),
14128            None,
14129        )
14130        .unwrap();
14131        let got = entity_get_by_alias(&conn, "dup", None).unwrap().unwrap();
14132        assert_eq!(got.canonical_name, "Newer");
14133        assert_eq!(got.namespace, "ns_new");
14134    }
14135
14136    #[test]
14137    fn entity_get_by_alias_ignores_non_entity_memory_with_matching_alias() {
14138        let conn = test_db();
14139        // Insert a regular (non-entity) memory and a stray
14140        // entity_aliases row pointing at it. The resolver must skip
14141        // it because `kind != 'entity'`.
14142        let mut mem = make_memory("Decoy", "test", Tier::Long, 5);
14143        mem.metadata = serde_json::json!({});
14144        let mid = insert(&conn, &mem).unwrap();
14145        let now = chrono::Utc::now().to_rfc3339();
14146        conn.execute(
14147            "INSERT INTO entity_aliases (entity_id, alias, created_at) VALUES (?1, ?2, ?3)",
14148            params![&mid, "decoy", &now],
14149        )
14150        .unwrap();
14151        let got = entity_get_by_alias(&conn, "decoy", None).unwrap();
14152        assert!(got.is_none(), "non-entity memories must not resolve");
14153    }
14154
14155    #[test]
14156    fn entity_register_idempotent_aliases_are_deduped() {
14157        let conn = test_db();
14158        let reg = entity_register(
14159            &conn,
14160            "Dedup",
14161            "test",
14162            &["x".to_string(), "x".to_string(), "y".to_string()],
14163            &serde_json::json!({}),
14164            None,
14165        )
14166        .unwrap();
14167        // INSERT OR IGNORE collapses the duplicate "x"; canonical
14168        // ("Dedup") auto-inserted as well, so 3 distinct aliases.
14169        assert_eq!(reg.aliases.len(), 3);
14170        assert!(reg.aliases.contains(&"Dedup".to_string()));
14171        assert!(reg.aliases.contains(&"x".to_string()));
14172        assert!(reg.aliases.contains(&"y".to_string()));
14173    }
14174
14175    // -- Pillar 2 / Stream C — kg_timeline ---------------------------------
14176
14177    /// Insert a link with an explicit `valid_from` so timeline tests can
14178    /// pin event ordering without relying on wall-clock spread.
14179    fn insert_link_at(
14180        conn: &Connection,
14181        source_id: &str,
14182        target_id: &str,
14183        relation: &str,
14184        valid_from: &str,
14185    ) {
14186        let now = chrono::Utc::now().to_rfc3339();
14187        conn.execute(
14188            "INSERT INTO memory_links (source_id, target_id, relation, created_at, valid_from) \
14189             VALUES (?1, ?2, ?3, ?4, ?5)",
14190            params![source_id, target_id, relation, now, valid_from],
14191        )
14192        .unwrap();
14193    }
14194
14195    #[test]
14196    fn create_link_populates_valid_from_for_new_rows() {
14197        let conn = test_db();
14198        let src = make_memory("kg-src", "test", Tier::Long, 5);
14199        let tgt = make_memory("kg-tgt", "test", Tier::Long, 5);
14200        insert(&conn, &src).unwrap();
14201        insert(&conn, &tgt).unwrap();
14202        create_link(&conn, &src.id, &tgt.id, "related_to").unwrap();
14203        let valid_from: Option<String> = conn
14204            .query_row(
14205                "SELECT valid_from FROM memory_links WHERE source_id = ?1",
14206                params![&src.id],
14207                |r| r.get(0),
14208            )
14209            .unwrap();
14210        assert!(
14211            valid_from.is_some(),
14212            "create_link must populate valid_from so kg_timeline can see new links"
14213        );
14214    }
14215
14216    // v0.7 H2 — schema v23: `attest_level` column present + populated.
14217    #[test]
14218    fn schema_v23_memory_links_has_attest_level_column() {
14219        let conn = test_db();
14220        assert!(
14221            column_exists(&conn, "memory_links", "attest_level"),
14222            "v23 must add attest_level column to memory_links"
14223        );
14224    }
14225
14226    // v0.7 H2 — no-keypair path: signature stays NULL, attest_level
14227    // is recorded as "unsigned". This is the v0.6.4 backward-compat
14228    // contract — operators that haven't generated a keypair keep the
14229    // pre-H2 behaviour.
14230    #[test]
14231    fn create_link_signed_without_keypair_is_unsigned() {
14232        let conn = test_db();
14233        let src = make_memory("h2-src-unsigned", "test", Tier::Long, 5);
14234        let tgt = make_memory("h2-tgt-unsigned", "test", Tier::Long, 5);
14235        insert(&conn, &src).unwrap();
14236        insert(&conn, &tgt).unwrap();
14237
14238        let level = create_link_signed(&conn, &src.id, &tgt.id, "related_to", None).unwrap();
14239        assert_eq!(level, "unsigned");
14240
14241        let (sig, attest): (Option<Vec<u8>>, Option<String>) = conn
14242            .query_row(
14243                "SELECT signature, attest_level FROM memory_links \
14244                 WHERE source_id = ?1 AND target_id = ?2",
14245                params![&src.id, &tgt.id],
14246                |r| Ok((r.get(0)?, r.get(1)?)),
14247            )
14248            .unwrap();
14249        assert!(sig.is_none(), "no keypair → signature must be NULL");
14250        assert_eq!(attest.as_deref(), Some("unsigned"));
14251    }
14252
14253    // v0.7 H2 — happy path: with an active keypair, every link write
14254    // gets a 64-byte Ed25519 signature in the `signature` column and
14255    // attest_level = "self_signed". The signature must verify against
14256    // the keypair's public key over the canonical CBOR payload.
14257    #[test]
14258    fn create_link_signed_with_keypair_persists_valid_signature() {
14259        use crate::identity::{keypair, sign as link_sign};
14260        use ed25519_dalek::Verifier;
14261
14262        let conn = test_db();
14263        let src = make_memory("h2-src-signed", "test", Tier::Long, 5);
14264        let tgt = make_memory("h2-tgt-signed", "test", Tier::Long, 5);
14265        insert(&conn, &src).unwrap();
14266        insert(&conn, &tgt).unwrap();
14267
14268        let kp = keypair::generate("alice").unwrap();
14269        let level = create_link_signed(&conn, &src.id, &tgt.id, "supersedes", Some(&kp)).unwrap();
14270        assert_eq!(level, "self_signed");
14271
14272        // Read back the persisted row and confirm the signature shape.
14273        let (sig, attest, valid_from): (Option<Vec<u8>>, Option<String>, Option<String>) = conn
14274            .query_row(
14275                "SELECT signature, attest_level, valid_from FROM memory_links \
14276                 WHERE source_id = ?1 AND target_id = ?2",
14277                params![&src.id, &tgt.id],
14278                |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
14279            )
14280            .unwrap();
14281        let sig_bytes = sig.expect("signature must be present when keypair is provided");
14282        assert_eq!(sig_bytes.len(), 64, "Ed25519 signature is 64 bytes");
14283        assert_eq!(attest.as_deref(), Some("self_signed"));
14284        let valid_from = valid_from.expect("valid_from must be set on the insert path");
14285
14286        // Re-derive the canonical bytes the writer signed over and
14287        // verify with the keypair's public key. This is what H3's
14288        // inbound verifier will do on every received link.
14289        let signable = link_sign::SignableLink {
14290            src_id: &src.id,
14291            dst_id: &tgt.id,
14292            relation: "supersedes",
14293            observed_by: Some(kp.agent_id.as_str()),
14294            valid_from: Some(valid_from.as_str()),
14295            valid_until: None,
14296        };
14297        let payload = link_sign::canonical_cbor(&signable).unwrap();
14298        let mut sig_arr = [0u8; 64];
14299        sig_arr.copy_from_slice(&sig_bytes);
14300        let sig_obj = ed25519_dalek::Signature::from_bytes(&sig_arr);
14301        kp.public
14302            .verify(&payload, &sig_obj)
14303            .expect("persisted signature must verify against the writer's public key");
14304    }
14305
14306    // v0.7.0 H6 (round-2) — regression: the SQLite write path must
14307    // truncate `valid_from` to microsecond precision BEFORE signing
14308    // and persisting, so the row a federation peer receives serialises
14309    // back to the same canonical RFC3339 string regardless of the
14310    // adapter that wrote it. We assert two properties:
14311    //
14312    // 1. The `valid_from` column NEVER contains a 9-digit fractional
14313    //    second (nanoseconds), only at most 6 digits (microseconds).
14314    // 2. The persisted signature verifies against canonical CBOR
14315    //    derived from the same microsecond-truncated string the row
14316    //    holds — i.e. the round-trip is byte-stable.
14317    #[test]
14318    fn h6_create_link_signed_truncates_valid_from_to_microseconds() {
14319        use crate::identity::{keypair, sign as link_sign};
14320        use ed25519_dalek::Verifier;
14321
14322        let conn = test_db();
14323        let src = make_memory("h6-src", "test", Tier::Long, 5);
14324        let tgt = make_memory("h6-tgt", "test", Tier::Long, 5);
14325        insert(&conn, &src).unwrap();
14326        insert(&conn, &tgt).unwrap();
14327
14328        let kp = keypair::generate("alice").unwrap();
14329        let level = create_link_signed(&conn, &src.id, &tgt.id, "related_to", Some(&kp)).unwrap();
14330        assert_eq!(level, "self_signed");
14331
14332        let (sig, valid_from): (Option<Vec<u8>>, Option<String>) = conn
14333            .query_row(
14334                "SELECT signature, valid_from FROM memory_links \
14335                 WHERE source_id = ?1 AND target_id = ?2",
14336                params![&src.id, &tgt.id],
14337                |r| Ok((r.get(0)?, r.get(1)?)),
14338            )
14339            .unwrap();
14340        let valid_from = valid_from.expect("valid_from set on signed insert path");
14341
14342        // RFC3339 fractional-second precision check. The string looks
14343        // like `2026-05-10T12:34:56.123456+00:00` (microsecond) or
14344        // `...:56.123456789+00:00` (nanosecond). After H6, the maximum
14345        // length of the fractional run must be 6.
14346        if let Some(dot) = valid_from.find('.') {
14347            let after = &valid_from[dot + 1..];
14348            let frac_len = after.chars().take_while(|c| c.is_ascii_digit()).count();
14349            assert!(
14350                frac_len <= 6,
14351                "H6 regression: valid_from has {frac_len}-digit fractional second; expected ≤ 6 (microseconds). Value: {valid_from}"
14352            );
14353        }
14354
14355        // Round-trip the signature against canonical CBOR computed
14356        // from the EXACT string stored in the row. If the writer
14357        // signed over a nanosecond-precision string but the column
14358        // round-trips at microsecond precision, this verify fails —
14359        // which is exactly the postgres-G3 failure mode SQLite is now
14360        // immunised against.
14361        let sig_bytes = sig.expect("signature persisted");
14362        let signable = link_sign::SignableLink {
14363            src_id: &src.id,
14364            dst_id: &tgt.id,
14365            relation: "related_to",
14366            observed_by: Some(kp.agent_id.as_str()),
14367            valid_from: Some(valid_from.as_str()),
14368            valid_until: None,
14369        };
14370        let payload = link_sign::canonical_cbor(&signable).unwrap();
14371        let mut sig_arr = [0u8; 64];
14372        sig_arr.copy_from_slice(&sig_bytes);
14373        let sig_obj = ed25519_dalek::Signature::from_bytes(&sig_arr);
14374        kp.public.verify(&payload, &sig_obj).expect(
14375            "H6 regression: signature must verify against canonical CBOR \
14376             derived from the stored (microsecond-truncated) valid_from",
14377        );
14378    }
14379
14380    // v0.7.0 fix-campaign A3 (LINK-PARITY, #690) — the cycle check
14381    // refuses a `reflects_on` edge whose target already transitively
14382    // reflects back on the source. This is the storage-layer
14383    // invariant the HTTP / SAL / federation paths now share with the
14384    // MCP path.
14385    #[test]
14386    fn a3_validate_link_pre_create_refuses_reflection_cycle() {
14387        use crate::config::{
14388            PermissionsMode, lock_permissions_mode_for_test,
14389            override_active_permissions_mode_for_test,
14390        };
14391        // The active permissions mode is process-wide; hold the
14392        // serialisation guard so parallel lib tests cannot flip the
14393        // mode out from under us. See `pin_governance_enforce_for_test`
14394        // in handlers/mod.rs for the same pattern.
14395        let _gate = lock_permissions_mode_for_test();
14396        // Pin mode to Off so the K9 evaluator stays out of the way —
14397        // this test only exercises the cycle gate.
14398        override_active_permissions_mode_for_test(PermissionsMode::Off);
14399
14400        let conn = test_db();
14401        let a = make_memory("a3-a", "ns", Tier::Long, 5);
14402        let b = make_memory("a3-b", "ns", Tier::Long, 5);
14403        let c = make_memory("a3-c", "ns", Tier::Long, 5);
14404        insert(&conn, &a).unwrap();
14405        insert(&conn, &b).unwrap();
14406        insert(&conn, &c).unwrap();
14407
14408        // Build chain: a --reflects_on--> b --reflects_on--> c.
14409        create_link(&conn, &a.id, &b.id, "reflects_on").unwrap();
14410        create_link(&conn, &b.id, &c.id, "reflects_on").unwrap();
14411
14412        // Attempting c --reflects_on--> a would close the cycle.
14413        let err = create_link(&conn, &c.id, &a.id, "reflects_on")
14414            .expect_err("cycle-closing reflects_on must be refused");
14415        let msg = err.to_string();
14416        assert!(
14417            msg.starts_with(LINK_CYCLE_ERR_PREFIX),
14418            "expected {LINK_CYCLE_ERR_PREFIX} prefix, got: {msg}"
14419        );
14420
14421        // A `related_to` edge between the same pair is still allowed —
14422        // only `reflects_on` participates in the DAG invariant.
14423        create_link(&conn, &c.id, &a.id, "related_to")
14424            .expect("related_to is not gated by the cycle check");
14425    }
14426
14427    // v0.7.0 fix-campaign A3 (LINK-PARITY, #690) — the K9 permission
14428    // pipeline gates link writes at the storage layer (not just at
14429    // the MCP entry point). A `Deny` rule on `memory_link` refuses
14430    // the write through `create_link` / `create_link_signed`.
14431    #[test]
14432    fn a3_validate_link_pre_create_respects_governance_deny() {
14433        use crate::config::{
14434            PermissionsMode, lock_permissions_mode_for_test,
14435            override_active_permissions_mode_for_test,
14436        };
14437        use crate::permissions::{
14438            PermissionRule, RuleDecision, clear_active_permission_rules_for_test,
14439            set_active_permission_rules,
14440        };
14441        let _gate = lock_permissions_mode_for_test();
14442        override_active_permissions_mode_for_test(PermissionsMode::Enforce);
14443        clear_active_permission_rules_for_test();
14444        set_active_permission_rules(vec![PermissionRule {
14445            namespace_pattern: "a3-deny/**".to_string(),
14446            op: "memory_link".to_string(),
14447            agent_pattern: "*".to_string(),
14448            decision: RuleDecision::Deny,
14449            reason: Some("test: link denied by a3 rule".to_string()),
14450        }]);
14451
14452        let conn = test_db();
14453        let s = make_memory("a3-src", "a3-deny/scope", Tier::Long, 5);
14454        let t = make_memory("a3-tgt", "a3-deny/scope", Tier::Long, 5);
14455        insert(&conn, &s).unwrap();
14456        insert(&conn, &t).unwrap();
14457
14458        let err = create_link(&conn, &s.id, &t.id, "related_to")
14459            .expect_err("a Deny rule must refuse the link write");
14460        let msg = err.to_string();
14461        assert!(
14462            msg.starts_with(LINK_PERMISSION_DENIED_ERR_PREFIX),
14463            "expected {LINK_PERMISSION_DENIED_ERR_PREFIX} prefix, got: {msg}"
14464        );
14465
14466        // Cleanup so the global registry does not leak into other tests
14467        // running in the same process.
14468        clear_active_permission_rules_for_test();
14469        override_active_permissions_mode_for_test(PermissionsMode::Advisory);
14470    }
14471
14472    // v0.7.0 fix-campaign A3 (LINK-PARITY, #690) — federation receive
14473    // path: peer-attested inbound links bypass the K9 governance
14474    // gate (the peer is trusted by mTLS + Ed25519 attestation), but
14475    // the cycle check ALWAYS runs even on peer writes.
14476    #[test]
14477    fn a3_create_link_inbound_peer_attested_bypasses_governance() {
14478        use crate::config::{
14479            PermissionsMode, lock_permissions_mode_for_test,
14480            override_active_permissions_mode_for_test,
14481        };
14482        use crate::permissions::{
14483            PermissionRule, RuleDecision, clear_active_permission_rules_for_test,
14484            set_active_permission_rules,
14485        };
14486        let _gate = lock_permissions_mode_for_test();
14487        override_active_permissions_mode_for_test(PermissionsMode::Enforce);
14488        clear_active_permission_rules_for_test();
14489        set_active_permission_rules(vec![PermissionRule {
14490            namespace_pattern: "**".to_string(),
14491            op: "memory_link".to_string(),
14492            agent_pattern: "*".to_string(),
14493            decision: RuleDecision::Deny,
14494            reason: Some("test: every link denied".to_string()),
14495        }]);
14496
14497        let conn = test_db();
14498        let s = make_memory("inbound-src", "a3-fed", Tier::Long, 5);
14499        let t = make_memory("inbound-tgt", "a3-fed", Tier::Long, 5);
14500        insert(&conn, &s).unwrap();
14501        insert(&conn, &t).unwrap();
14502
14503        // v0.7.0 issue #810 / #813 — the CHECK trigger on memory_links
14504        // refuses any peer_attested row whose signature blob is NULL /
14505        // wrong-length. The pre-#810 test passed a NULL signature here
14506        // because the legacy invariant did not police that pairing;
14507        // now we synthesise a 64-byte fake signature blob so the row
14508        // satisfies the trigger's WHEN clause. The K9-bypass property
14509        // under test is orthogonal to whether the signature bytes
14510        // actually verify (verification is `memory_verify`'s job, not
14511        // this insertion path's).
14512        let link = MemoryLink {
14513            source_id: s.id.clone(),
14514            target_id: t.id.clone(),
14515            relation: crate::models::MemoryLinkRelation::RelatedTo,
14516            created_at: chrono::Utc::now().to_rfc3339(),
14517            valid_from: None,
14518            valid_until: None,
14519            observed_by: Some("peer:remote".to_string()),
14520            signature: Some(vec![0xAB_u8; 64]),
14521            attest_level: None,
14522        };
14523
14524        // Peer-attested inbound bypasses the K9 deny.
14525        create_link_inbound(&conn, &link, "peer_attested")
14526            .expect("peer_attested must bypass K9 governance");
14527
14528        // But an unsigned inbound link is still gated locally.
14529        let link2 = MemoryLink {
14530            source_id: t.id.clone(),
14531            target_id: s.id.clone(),
14532            relation: crate::models::MemoryLinkRelation::RelatedTo,
14533            created_at: chrono::Utc::now().to_rfc3339(),
14534            valid_from: None,
14535            valid_until: None,
14536            observed_by: Some("peer:remote".to_string()),
14537            signature: None,
14538            attest_level: None,
14539        };
14540        let err = create_link_inbound(&conn, &link2, "unsigned")
14541            .expect_err("unsigned inbound must NOT bypass governance");
14542        assert!(
14543            err.to_string()
14544                .starts_with(LINK_PERMISSION_DENIED_ERR_PREFIX)
14545        );
14546
14547        clear_active_permission_rules_for_test();
14548        override_active_permissions_mode_for_test(PermissionsMode::Advisory);
14549    }
14550
14551    // v0.7.0 fix-campaign A3 (LINK-PARITY, #690) — even a trusted
14552    // peer cannot extend a `reflects_on` cycle on the receiver. The
14553    // cycle gate runs regardless of attest_level.
14554    #[test]
14555    fn a3_create_link_inbound_peer_attested_still_refuses_cycle() {
14556        use crate::config::{
14557            PermissionsMode, lock_permissions_mode_for_test,
14558            override_active_permissions_mode_for_test,
14559        };
14560        let _gate = lock_permissions_mode_for_test();
14561        override_active_permissions_mode_for_test(PermissionsMode::Off);
14562
14563        let conn = test_db();
14564        let a = make_memory("inbound-cycle-a", "ns", Tier::Long, 5);
14565        let b = make_memory("inbound-cycle-b", "ns", Tier::Long, 5);
14566        insert(&conn, &a).unwrap();
14567        insert(&conn, &b).unwrap();
14568        create_link(&conn, &a.id, &b.id, "reflects_on").unwrap();
14569
14570        let cycle_link = MemoryLink {
14571            source_id: b.id.clone(),
14572            target_id: a.id.clone(),
14573            relation: crate::models::MemoryLinkRelation::ReflectsOn,
14574            created_at: chrono::Utc::now().to_rfc3339(),
14575            valid_from: None,
14576            valid_until: None,
14577            observed_by: Some("peer:remote".to_string()),
14578            signature: None,
14579            attest_level: None,
14580        };
14581        let err = create_link_inbound(&conn, &cycle_link, "peer_attested")
14582            .expect_err("cycle check must run even on peer_attested inbound");
14583        assert!(err.to_string().starts_with(LINK_CYCLE_ERR_PREFIX));
14584    }
14585
14586    // v0.7.0 H6 (round-2) — pure-function test: the truncation helper
14587    // itself must collapse only sub-microsecond digits and leave
14588    // microsecond-aligned inputs unchanged.
14589    #[test]
14590    fn h6_truncate_to_microseconds_drops_nanos() {
14591        use chrono::{TimeZone, Timelike};
14592        let ns = Utc.with_ymd_and_hms(2026, 5, 10, 12, 34, 56).unwrap();
14593        let ns = ns.with_nanosecond(123_456_789).unwrap();
14594        let truncated = truncate_to_microseconds(ns);
14595        // 123_456_789 ns → 123_456 µs → 123_456_000 ns.
14596        assert_eq!(truncated.nanosecond(), 123_456_000);
14597        // Round-trip through to_rfc3339 must produce a 6-digit
14598        // fractional second (the property H6 commits to).
14599        let s = truncated.to_rfc3339();
14600        let dot = s.find('.').expect("fractional second present");
14601        let frac = &s[dot + 1..];
14602        let frac_len = frac.chars().take_while(|c| c.is_ascii_digit()).count();
14603        assert_eq!(frac_len, 6, "expected exactly 6-digit fractional; got: {s}");
14604    }
14605
14606    #[test]
14607    fn kg_timeline_returns_events_ordered_by_valid_from_ascending() {
14608        let conn = test_db();
14609        let src = make_memory("alpha", "kg/projects/alpha", Tier::Long, 5);
14610        let s1 = make_memory("kickoff", "kg/projects/alpha", Tier::Long, 5);
14611        let s2 = make_memory("design phase", "kg/projects/alpha", Tier::Long, 5);
14612        let s3 = make_memory("implementation", "kg/projects/alpha", Tier::Long, 5);
14613        insert(&conn, &src).unwrap();
14614        insert(&conn, &s1).unwrap();
14615        insert(&conn, &s2).unwrap();
14616        insert(&conn, &s3).unwrap();
14617
14618        // Insert in a deliberately-shuffled order so ORDER BY isn't
14619        // a happy accident of insertion order.
14620        insert_link_at(
14621            &conn,
14622            &src.id,
14623            &s2.id,
14624            "supersedes",
14625            "2026-02-03T00:00:00+00:00",
14626        );
14627        insert_link_at(
14628            &conn,
14629            &src.id,
14630            &s1.id,
14631            "related_to",
14632            "2026-01-15T00:00:00+00:00",
14633        );
14634        insert_link_at(
14635            &conn,
14636            &src.id,
14637            &s3.id,
14638            "supersedes",
14639            "2026-03-22T00:00:00+00:00",
14640        );
14641
14642        let events = kg_timeline(&conn, &src.id, None, None, None).unwrap();
14643        assert_eq!(events.len(), 3);
14644        assert_eq!(events[0].target_id, s1.id);
14645        assert_eq!(events[1].target_id, s2.id);
14646        assert_eq!(events[2].target_id, s3.id);
14647        assert_eq!(events[0].title, "kickoff");
14648        assert_eq!(events[1].relation, "supersedes");
14649        assert_eq!(events[0].target_namespace, "kg/projects/alpha");
14650    }
14651
14652    #[test]
14653    fn kg_timeline_filters_by_since_inclusive() {
14654        let conn = test_db();
14655        let src = make_memory("e", "ns", Tier::Long, 5);
14656        let t1 = make_memory("e1", "ns", Tier::Long, 5);
14657        let t2 = make_memory("e2", "ns", Tier::Long, 5);
14658        insert(&conn, &src).unwrap();
14659        insert(&conn, &t1).unwrap();
14660        insert(&conn, &t2).unwrap();
14661        insert_link_at(
14662            &conn,
14663            &src.id,
14664            &t1.id,
14665            "related_to",
14666            "2026-01-01T00:00:00+00:00",
14667        );
14668        insert_link_at(
14669            &conn,
14670            &src.id,
14671            &t2.id,
14672            "related_to",
14673            "2026-03-01T00:00:00+00:00",
14674        );
14675
14676        let events = kg_timeline(
14677            &conn,
14678            &src.id,
14679            Some("2026-02-01T00:00:00+00:00"),
14680            None,
14681            None,
14682        )
14683        .unwrap();
14684        assert_eq!(events.len(), 1);
14685        assert_eq!(events[0].target_id, t2.id);
14686
14687        // Boundary: since == valid_from should match (inclusive).
14688        let on_boundary = kg_timeline(
14689            &conn,
14690            &src.id,
14691            Some("2026-03-01T00:00:00+00:00"),
14692            None,
14693            None,
14694        )
14695        .unwrap();
14696        assert_eq!(on_boundary.len(), 1);
14697    }
14698
14699    #[test]
14700    fn kg_timeline_filters_by_until_inclusive() {
14701        let conn = test_db();
14702        let src = make_memory("e", "ns", Tier::Long, 5);
14703        let t1 = make_memory("e1", "ns", Tier::Long, 5);
14704        let t2 = make_memory("e2", "ns", Tier::Long, 5);
14705        insert(&conn, &src).unwrap();
14706        insert(&conn, &t1).unwrap();
14707        insert(&conn, &t2).unwrap();
14708        insert_link_at(
14709            &conn,
14710            &src.id,
14711            &t1.id,
14712            "related_to",
14713            "2026-01-01T00:00:00+00:00",
14714        );
14715        insert_link_at(
14716            &conn,
14717            &src.id,
14718            &t2.id,
14719            "related_to",
14720            "2026-03-01T00:00:00+00:00",
14721        );
14722
14723        let events = kg_timeline(
14724            &conn,
14725            &src.id,
14726            None,
14727            Some("2026-02-01T00:00:00+00:00"),
14728            None,
14729        )
14730        .unwrap();
14731        assert_eq!(events.len(), 1);
14732        assert_eq!(events[0].target_id, t1.id);
14733    }
14734
14735    #[test]
14736    fn kg_timeline_skips_links_with_null_valid_from() {
14737        let conn = test_db();
14738        let src = make_memory("s", "ns", Tier::Long, 5);
14739        let t1 = make_memory("t1", "ns", Tier::Long, 5);
14740        let t2 = make_memory("t2", "ns", Tier::Long, 5);
14741        insert(&conn, &src).unwrap();
14742        insert(&conn, &t1).unwrap();
14743        insert(&conn, &t2).unwrap();
14744        // Direct insert with NULL valid_from to simulate an external
14745        // writer that bypassed `create_link`.
14746        let now = chrono::Utc::now().to_rfc3339();
14747        // v0.7.0 fix campaign R1-M2 — direct-SQL writer must use a
14748        // value in the closed-set; the trigger now refuses 'rel'.
14749        conn.execute(
14750            "INSERT INTO memory_links (source_id, target_id, relation, created_at, valid_from) \
14751             VALUES (?1, ?2, 'related_to', ?3, NULL)",
14752            params![&src.id, &t1.id, &now],
14753        )
14754        .unwrap();
14755        insert_link_at(
14756            &conn,
14757            &src.id,
14758            &t2.id,
14759            "supersedes",
14760            "2026-01-01T00:00:00+00:00",
14761        );
14762
14763        let events = kg_timeline(&conn, &src.id, None, None, None).unwrap();
14764        assert_eq!(events.len(), 1);
14765        assert_eq!(events[0].target_id, t2.id);
14766    }
14767
14768    #[test]
14769    fn kg_timeline_excludes_links_where_source_is_target() {
14770        // The query is anchored on `source_id`; inbound edges (where the
14771        // entity is the target) are intentionally NOT part of the
14772        // timeline. This guards against accidentally widening the
14773        // contract to a bidirectional view.
14774        let conn = test_db();
14775        let entity = make_memory("entity", "ns", Tier::Long, 5);
14776        let other = make_memory("other", "ns", Tier::Long, 5);
14777        insert(&conn, &entity).unwrap();
14778        insert(&conn, &other).unwrap();
14779        insert_link_at(
14780            &conn,
14781            &other.id,
14782            &entity.id,
14783            "related_to",
14784            "2026-01-01T00:00:00+00:00",
14785        );
14786        let events = kg_timeline(&conn, &entity.id, None, None, None).unwrap();
14787        assert!(events.is_empty());
14788    }
14789
14790    #[test]
14791    fn kg_timeline_limit_clamped_to_max() {
14792        let conn = test_db();
14793        let src = make_memory("s", "ns", Tier::Long, 5);
14794        insert(&conn, &src).unwrap();
14795        for i in 0..5 {
14796            let t = make_memory(&format!("t{i}"), "ns", Tier::Long, 5);
14797            insert(&conn, &t).unwrap();
14798            insert_link_at(
14799                &conn,
14800                &src.id,
14801                &t.id,
14802                "related_to",
14803                &format!("2026-01-0{}T00:00:00+00:00", i + 1),
14804            );
14805        }
14806        // Caller passes a wildly oversized limit — should be clamped
14807        // to KG_TIMELINE_MAX_LIMIT (i.e. accepted, not errored), and
14808        // since the row count is small, should return all 5.
14809        let events = kg_timeline(&conn, &src.id, None, None, Some(usize::MAX)).unwrap();
14810        assert_eq!(events.len(), 5);
14811
14812        // Caller passes 0 — clamp to 1.
14813        let one = kg_timeline(&conn, &src.id, None, None, Some(0)).unwrap();
14814        assert_eq!(one.len(), 1);
14815    }
14816
14817    #[test]
14818    fn kg_timeline_carries_observed_by_and_valid_until() {
14819        let conn = test_db();
14820        let src = make_memory("s", "ns", Tier::Long, 5);
14821        let t = make_memory("t", "ns", Tier::Long, 5);
14822        insert(&conn, &src).unwrap();
14823        insert(&conn, &t).unwrap();
14824        let now = chrono::Utc::now().to_rfc3339();
14825        conn.execute(
14826            "INSERT INTO memory_links (source_id, target_id, relation, created_at, valid_from, valid_until, observed_by) \
14827             VALUES (?1, ?2, 'supersedes', ?3, '2026-01-01T00:00:00+00:00', '2026-12-31T23:59:59+00:00', 'agent-pm-1')",
14828            params![&src.id, &t.id, &now],
14829        )
14830        .unwrap();
14831        let events = kg_timeline(&conn, &src.id, None, None, None).unwrap();
14832        assert_eq!(events.len(), 1);
14833        assert_eq!(events[0].observed_by.as_deref(), Some("agent-pm-1"));
14834        assert_eq!(
14835            events[0].valid_until.as_deref(),
14836            Some("2026-12-31T23:59:59+00:00")
14837        );
14838    }
14839
14840    #[test]
14841    fn kg_timeline_empty_for_unknown_source() {
14842        let conn = test_db();
14843        let events = kg_timeline(&conn, "nonexistent-id", None, None, None).unwrap();
14844        assert!(events.is_empty());
14845    }
14846
14847    // -- Pillar 2 / Stream C — kg_invalidate -------------------------------
14848
14849    #[test]
14850    fn invalidate_link_sets_valid_until_to_provided_timestamp() {
14851        let conn = test_db();
14852        let src = make_memory("inv-s", "test", Tier::Long, 5);
14853        let tgt = make_memory("inv-t", "test", Tier::Long, 5);
14854        insert(&conn, &src).unwrap();
14855        insert(&conn, &tgt).unwrap();
14856        create_link(&conn, &src.id, &tgt.id, "related_to").unwrap();
14857        let stamp = "2026-12-31T23:59:59+00:00";
14858        let res = invalidate_link(&conn, &src.id, &tgt.id, "related_to", Some(stamp))
14859            .unwrap()
14860            .expect("link must exist");
14861        assert_eq!(res.valid_until, stamp);
14862        assert!(res.previous_valid_until.is_none());
14863        let stored: Option<String> = conn
14864            .query_row(
14865                "SELECT valid_until FROM memory_links \
14866                 WHERE source_id = ?1 AND target_id = ?2 AND relation = ?3",
14867                params![&src.id, &tgt.id, "related_to"],
14868                |r| r.get(0),
14869            )
14870            .unwrap();
14871        assert_eq!(stored.as_deref(), Some(stamp));
14872    }
14873
14874    #[test]
14875    fn invalidate_link_defaults_to_now_when_no_timestamp_provided() {
14876        let conn = test_db();
14877        let src = make_memory("inv-s", "test", Tier::Long, 5);
14878        let tgt = make_memory("inv-t", "test", Tier::Long, 5);
14879        insert(&conn, &src).unwrap();
14880        insert(&conn, &tgt).unwrap();
14881        create_link(&conn, &src.id, &tgt.id, "related_to").unwrap();
14882        let res = invalidate_link(&conn, &src.id, &tgt.id, "related_to", None)
14883            .unwrap()
14884            .expect("link must exist");
14885        // The default is wall-clock now; assert it parses as RFC3339 and
14886        // is within a small window of the test's "now" (allow 60s skew
14887        // to accommodate slow runners).
14888        let parsed = chrono::DateTime::parse_from_rfc3339(&res.valid_until)
14889            .expect("default valid_until must be RFC3339");
14890        let now = chrono::Utc::now();
14891        let drift = now.signed_duration_since(parsed.with_timezone(&chrono::Utc));
14892        assert!(
14893            drift.num_seconds().abs() < 60,
14894            "default valid_until {} should be near now {now}",
14895            res.valid_until
14896        );
14897    }
14898
14899    #[test]
14900    fn invalidate_link_returns_none_for_unknown_triple() {
14901        let conn = test_db();
14902        // No memories or links created.
14903        let res = invalidate_link(&conn, "missing-src", "missing-tgt", "related_to", None).unwrap();
14904        assert!(res.is_none());
14905    }
14906
14907    #[test]
14908    fn invalidate_link_returns_none_when_relation_does_not_match() {
14909        // Link exists for ("related_to") but caller asks for ("supersedes").
14910        let conn = test_db();
14911        let src = make_memory("inv-s", "test", Tier::Long, 5);
14912        let tgt = make_memory("inv-t", "test", Tier::Long, 5);
14913        insert(&conn, &src).unwrap();
14914        insert(&conn, &tgt).unwrap();
14915        create_link(&conn, &src.id, &tgt.id, "related_to").unwrap();
14916        let res = invalidate_link(&conn, &src.id, &tgt.id, "supersedes", None).unwrap();
14917        assert!(res.is_none(), "must not match across relation values");
14918    }
14919
14920    #[test]
14921    fn invalidate_link_overwrites_existing_valid_until_and_reports_prior() {
14922        let conn = test_db();
14923        let src = make_memory("inv-s", "test", Tier::Long, 5);
14924        let tgt = make_memory("inv-t", "test", Tier::Long, 5);
14925        insert(&conn, &src).unwrap();
14926        insert(&conn, &tgt).unwrap();
14927        create_link(&conn, &src.id, &tgt.id, "related_to").unwrap();
14928        let first = "2026-06-01T00:00:00+00:00";
14929        let second = "2026-12-01T00:00:00+00:00";
14930        let r1 = invalidate_link(&conn, &src.id, &tgt.id, "related_to", Some(first))
14931            .unwrap()
14932            .unwrap();
14933        assert!(r1.previous_valid_until.is_none());
14934        let r2 = invalidate_link(&conn, &src.id, &tgt.id, "related_to", Some(second))
14935            .unwrap()
14936            .unwrap();
14937        assert_eq!(r2.previous_valid_until.as_deref(), Some(first));
14938        assert_eq!(r2.valid_until, second);
14939    }
14940
14941    #[test]
14942    fn invalidate_link_distinguishes_relation_when_multiple_links_share_endpoints() {
14943        // Two links between the same pair, different relations. Invalidating
14944        // one must not affect the other.
14945        let conn = test_db();
14946        let src = make_memory("inv-s", "test", Tier::Long, 5);
14947        let tgt = make_memory("inv-t", "test", Tier::Long, 5);
14948        insert(&conn, &src).unwrap();
14949        insert(&conn, &tgt).unwrap();
14950        create_link(&conn, &src.id, &tgt.id, "related_to").unwrap();
14951        create_link(&conn, &src.id, &tgt.id, "supersedes").unwrap();
14952        let stamp = "2026-07-15T12:00:00+00:00";
14953        invalidate_link(&conn, &src.id, &tgt.id, "related_to", Some(stamp))
14954            .unwrap()
14955            .unwrap();
14956        let related: Option<String> = conn
14957            .query_row(
14958                "SELECT valid_until FROM memory_links \
14959                 WHERE source_id = ?1 AND target_id = ?2 AND relation = 'related_to'",
14960                params![&src.id, &tgt.id],
14961                |r| r.get(0),
14962            )
14963            .unwrap();
14964        let supers: Option<String> = conn
14965            .query_row(
14966                "SELECT valid_until FROM memory_links \
14967                 WHERE source_id = ?1 AND target_id = ?2 AND relation = 'supersedes'",
14968                params![&src.id, &tgt.id],
14969                |r| r.get(0),
14970            )
14971            .unwrap();
14972        assert_eq!(related.as_deref(), Some(stamp));
14973        assert!(
14974            supers.is_none(),
14975            "the sibling 'supersedes' link must remain valid"
14976        );
14977    }
14978
14979    #[test]
14980    fn invalidate_link_preserves_other_columns() {
14981        // valid_from, observed_by, created_at, signature must not be
14982        // touched by the invalidate UPDATE.
14983        let conn = test_db();
14984        let src = make_memory("inv-s", "test", Tier::Long, 5);
14985        let tgt = make_memory("inv-t", "test", Tier::Long, 5);
14986        insert(&conn, &src).unwrap();
14987        insert(&conn, &tgt).unwrap();
14988        let now = chrono::Utc::now().to_rfc3339();
14989        conn.execute(
14990            "INSERT INTO memory_links \
14991             (source_id, target_id, relation, created_at, valid_from, observed_by) \
14992             VALUES (?1, ?2, 'related_to', ?3, '2026-01-01T00:00:00+00:00', 'agent-x')",
14993            params![&src.id, &tgt.id, &now],
14994        )
14995        .unwrap();
14996        invalidate_link(
14997            &conn,
14998            &src.id,
14999            &tgt.id,
15000            "related_to",
15001            Some("2026-12-31T23:59:59+00:00"),
15002        )
15003        .unwrap()
15004        .unwrap();
15005        let (vf, ob, ca): (Option<String>, Option<String>, String) = conn
15006            .query_row(
15007                "SELECT valid_from, observed_by, created_at FROM memory_links \
15008                 WHERE source_id = ?1 AND target_id = ?2 AND relation = 'related_to'",
15009                params![&src.id, &tgt.id],
15010                |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
15011            )
15012            .unwrap();
15013        assert_eq!(vf.as_deref(), Some("2026-01-01T00:00:00+00:00"));
15014        assert_eq!(ob.as_deref(), Some("agent-x"));
15015        assert_eq!(ca, now);
15016    }
15017
15018    #[test]
15019    fn kg_query_default_excludes_invalidated_edges() {
15020        // NHI-P3-T7 regression: prior versions returned invalidated
15021        // edges in default kg_query results. The "current view" filter
15022        // must exclude any edge whose `valid_until` lies in the past.
15023        let conn = test_db();
15024        let src = make_memory("inv-src", "ns", Tier::Long, 5);
15025        let live = make_memory("inv-live", "ns", Tier::Long, 5);
15026        let dead = make_memory("inv-dead", "ns", Tier::Long, 5);
15027        insert(&conn, &src).unwrap();
15028        insert(&conn, &live).unwrap();
15029        insert(&conn, &dead).unwrap();
15030        // Live edge — no valid_until.
15031        insert_link_full(&conn, &src.id, &live.id, "related_to", None, None, None);
15032        // Dead edge — valid_until set in the past.
15033        insert_link_full(
15034            &conn,
15035            &src.id,
15036            &dead.id,
15037            "supersedes",
15038            None,
15039            Some("2020-01-01T00:00:00+00:00"),
15040            None,
15041        );
15042
15043        // Default ("current view"): only the live edge shows up.
15044        let current = kg_query(&conn, &src.id, 1, None, None, None, false).unwrap();
15045        assert_eq!(current.len(), 1);
15046        assert_eq!(current[0].target_id, live.id);
15047
15048        // Opt-in: include_invalidated=true returns both edges.
15049        let full = kg_query(&conn, &src.id, 1, None, None, None, true).unwrap();
15050        assert_eq!(full.len(), 2);
15051    }
15052
15053    #[test]
15054    fn default_for_managed_namespace_helper_yields_write_owner() {
15055        // NHI-P4-T19 (v0.7.0 NHI testing): the
15056        // `GovernancePolicy::default_for_managed_namespace` helper
15057        // exists so operators can opt into K9 namespace-lock semantics
15058        // by writing the policy into their standard memory's metadata.
15059        // Changing the implicit fallback in `read_namespace_policy`
15060        // is deferred to v0.7.1 because it would break inheritance
15061        // chains where parent and child standards were registered
15062        // under distinct agent identities. Tests ensures the helper
15063        // returns the documented shape.
15064        let policy = crate::models::GovernancePolicy::default_for_managed_namespace();
15065        assert_eq!(policy.core.write, crate::models::GovernanceLevel::Owner);
15066        assert_eq!(policy.core.promote, crate::models::GovernanceLevel::Any);
15067        assert_eq!(policy.core.delete, crate::models::GovernanceLevel::Owner);
15068        assert!(policy.core.inherit);
15069    }
15070
15071    #[test]
15072    fn namespace_set_standard_with_explicit_owner_policy_enforces_lock() {
15073        // NHI-P4-T19 regression: when the operator explicitly writes
15074        // `governance.write=owner` into the standard memory's
15075        // metadata, the namespace lock is enforced. This is the
15076        // opt-in path the v0.7.0 verdict recommends documenting; the
15077        // helper `default_for_managed_namespace` is the canonical
15078        // shape.
15079        let conn = test_db();
15080        let mut standard = make_memory("std", "ns/locked", Tier::Long, 8);
15081        let policy =
15082            serde_json::to_value(crate::models::GovernancePolicy::default_for_managed_namespace())
15083                .unwrap();
15084        standard.metadata = serde_json::json!({"governance": policy});
15085        let standard_id = insert(&conn, &standard).unwrap();
15086        set_namespace_standard(&conn, "ns/locked", &standard_id, None).unwrap();
15087
15088        let resolved = resolve_governance_policy(&conn, "ns/locked")
15089            .expect("policy must resolve when explicitly set");
15090        assert_eq!(resolved.core.write, crate::models::GovernanceLevel::Owner);
15091    }
15092
15093    /// F1 regression (v0.7.0 round-2-fixes): when a parent namespace
15094    /// has `governance.write = owner` with `inherit: true` and a deep
15095    /// child has no standard of its own, the owner-level check must
15096    /// resolve the namespace owner by walking the same chain that
15097    /// `resolve_governance_policy` walks. Pre-fix the helper looked
15098    /// only at the leaf's standard, returning None and producing a
15099    /// "no resolvable owner" Deny even for the rightful owner.
15100    #[test]
15101    fn enforce_governance_inherits_owner_for_deep_child_owner_write() {
15102        use crate::config::{
15103            PermissionsMode, lock_permissions_mode_for_test,
15104            override_active_permissions_mode_for_test,
15105        };
15106        use crate::models::{
15107            ApproverType, CorePolicy, GovernanceDecision, GovernanceLevel, GovernancePolicy,
15108            GovernedAction, default_metadata,
15109        };
15110
15111        let _gate = lock_permissions_mode_for_test();
15112        override_active_permissions_mode_for_test(PermissionsMode::Enforce);
15113
15114        let conn = test_db();
15115
15116        // Seed a parent standard that enforces write=owner with inherit=true.
15117        let parent_ns = "f1/parent";
15118        let owner = "ai:alice";
15119        let policy = GovernancePolicy {
15120            core: CorePolicy {
15121                write: GovernanceLevel::Owner,
15122                promote: GovernanceLevel::Any,
15123                delete: GovernanceLevel::Owner,
15124                approver: ApproverType::Human,
15125                inherit: true,
15126                max_reflection_depth: None,
15127            },
15128            ..Default::default()
15129        };
15130
15131        let now = chrono::Utc::now().to_rfc3339();
15132        let mut metadata = default_metadata();
15133        if let Some(obj) = metadata.as_object_mut() {
15134            obj.insert(
15135                "agent_id".to_string(),
15136                serde_json::Value::String(owner.to_string()),
15137            );
15138            obj.insert(
15139                "governance".to_string(),
15140                serde_json::to_value(&policy).unwrap(),
15141            );
15142        }
15143        let standard = Memory {
15144            id: uuid::Uuid::new_v4().to_string(),
15145            tier: Tier::Long,
15146            namespace: format!("_standards-{parent_ns}"),
15147            title: "f1-standard".to_string(),
15148            content: "f1 policy".to_string(),
15149            tags: vec![],
15150            priority: 9,
15151            confidence: 1.0,
15152            source: "test".to_string(),
15153            access_count: 0,
15154            created_at: now.clone(),
15155            updated_at: now,
15156            last_accessed_at: None,
15157            expires_at: None,
15158            metadata,
15159            reflection_depth: 0,
15160            memory_kind: crate::models::MemoryKind::Observation,
15161            entity_id: None,
15162            persona_version: None,
15163            citations: Vec::new(),
15164            source_uri: None,
15165            source_span: None,
15166            confidence_source: ConfidenceSource::CallerProvided,
15167            confidence_signals: None,
15168            confidence_decayed_at: None,
15169            version: 1,
15170        };
15171        let standard_id = insert(&conn, &standard).unwrap();
15172        set_namespace_standard(&conn, parent_ns, &standard_id, None).unwrap();
15173
15174        // Deep child has NO standard of its own; everything must
15175        // resolve via the chain walk.
15176        let child_ns = "f1/parent/a/b/c";
15177        let payload = serde_json::json!({"title": "deep-child"});
15178
15179        // Owner-level write by the rightful owner: ALLOW.
15180        let allow = enforce_governance(
15181            &conn,
15182            GovernedAction::Store,
15183            child_ns,
15184            owner,
15185            None,
15186            None,
15187            &payload,
15188        )
15189        .expect("enforce_governance must not error on inherited owner policy");
15190        assert!(
15191            matches!(allow, GovernanceDecision::Allow),
15192            "owner write at deep child must Allow when chain walk finds the parent's owner: got {allow:?}"
15193        );
15194
15195        // Owner-level write by a non-owner: DENY.
15196        let deny = enforce_governance(
15197            &conn,
15198            GovernedAction::Store,
15199            child_ns,
15200            "ai:eve",
15201            None,
15202            None,
15203            &payload,
15204        )
15205        .expect("enforce_governance must not error");
15206        match deny {
15207            GovernanceDecision::Deny(refusal) => {
15208                assert!(
15209                    refusal.reason.contains("not the owner"),
15210                    "non-owner deny should cite ownership mismatch, got: {refusal:?}"
15211                );
15212                assert_eq!(
15213                    refusal.denied_level,
15214                    GovernanceLevel::Owner,
15215                    "owner-level refusal must carry GovernanceLevel::Owner; got {refusal:?}",
15216                );
15217            }
15218            other => panic!("expected Deny for non-owner, got {other:?}"),
15219        }
15220    }
15221
15222    /// F1 corollary: `inherit = false` on the parent must STOP the
15223    /// chain walk at the parent. The deep child has no policy of its
15224    /// own and the parent declines to share, so the action is
15225    /// ungoverned (Allow).
15226    ///
15227    /// Note: under `resolve_governance_policy` semantics, the
15228    /// `inherit` flag is documentation/contract — the leaf-first walk
15229    /// stops at the most-specific policy regardless. The flag flows
15230    /// through to consumers (e.g. pending_action approver resolution)
15231    /// to signal "do not re-walk above me." This test pins the
15232    /// observable outcome: a deep child with NO standard inherits a
15233    /// parent policy regardless of the `inherit` flag value, because
15234    /// the walk only stops at policies that exist. The flag's
15235    /// "stop" semantics apply when an intermediate policy declines to
15236    /// be inherited above itself, not below.
15237    #[test]
15238    fn enforce_governance_deep_child_with_inherit_false_still_resolves_via_walk() {
15239        use crate::config::{
15240            PermissionsMode, lock_permissions_mode_for_test,
15241            override_active_permissions_mode_for_test,
15242        };
15243        use crate::models::{
15244            ApproverType, CorePolicy, GovernanceDecision, GovernanceLevel, GovernancePolicy,
15245            GovernedAction, default_metadata,
15246        };
15247
15248        let _gate = lock_permissions_mode_for_test();
15249        override_active_permissions_mode_for_test(PermissionsMode::Enforce);
15250
15251        let conn = test_db();
15252
15253        // Parent has inherit=false: descendants without a policy of
15254        // their own should still resolve to this policy on the
15255        // leaf-first walk; inherit=false is a forward-blocker
15256        // ("nothing above me applies to namespaces I govern"), not a
15257        // backward-blocker ("namespaces below me cannot inherit").
15258        // This matches the documented semantics in
15259        // `resolve_governance_policy`'s docstring.
15260        let parent_ns = "f1nb/parent";
15261        let owner = "ai:alice";
15262        let policy = GovernancePolicy {
15263            core: CorePolicy {
15264                write: GovernanceLevel::Owner,
15265                promote: GovernanceLevel::Any,
15266                delete: GovernanceLevel::Owner,
15267                approver: ApproverType::Human,
15268                inherit: false,
15269                max_reflection_depth: None,
15270            },
15271            ..Default::default()
15272        };
15273        let now = chrono::Utc::now().to_rfc3339();
15274        let mut metadata = default_metadata();
15275        if let Some(obj) = metadata.as_object_mut() {
15276            obj.insert(
15277                "agent_id".to_string(),
15278                serde_json::Value::String(owner.to_string()),
15279            );
15280            obj.insert(
15281                "governance".to_string(),
15282                serde_json::to_value(&policy).unwrap(),
15283            );
15284        }
15285        let standard = Memory {
15286            id: uuid::Uuid::new_v4().to_string(),
15287            tier: Tier::Long,
15288            namespace: format!("_standards-{parent_ns}"),
15289            title: "f1nb-standard".to_string(),
15290            content: "policy".to_string(),
15291            tags: vec![],
15292            priority: 9,
15293            confidence: 1.0,
15294            source: "test".to_string(),
15295            access_count: 0,
15296            created_at: now.clone(),
15297            updated_at: now,
15298            last_accessed_at: None,
15299            expires_at: None,
15300            metadata,
15301            reflection_depth: 0,
15302            memory_kind: crate::models::MemoryKind::Observation,
15303            entity_id: None,
15304            persona_version: None,
15305            citations: Vec::new(),
15306            source_uri: None,
15307            source_span: None,
15308            confidence_source: ConfidenceSource::CallerProvided,
15309            confidence_signals: None,
15310            confidence_decayed_at: None,
15311            version: 1,
15312        };
15313        let standard_id = insert(&conn, &standard).unwrap();
15314        set_namespace_standard(&conn, parent_ns, &standard_id, None).unwrap();
15315
15316        // Deep child write by owner is still Allow (chain walk finds
15317        // parent owner; inherit=false on the parent does not block
15318        // descendants).
15319        let decision = enforce_governance(
15320            &conn,
15321            GovernedAction::Store,
15322            "f1nb/parent/x/y",
15323            owner,
15324            None,
15325            None,
15326            &serde_json::json!({}),
15327        )
15328        .unwrap();
15329        assert!(
15330            matches!(decision, GovernanceDecision::Allow),
15331            "owner write at deep child resolves via leaf-first walk: got {decision:?}"
15332        );
15333    }
15334
15335    #[test]
15336    fn find_paths_default_excludes_invalidated_edges() {
15337        // NHI-P3-T7 regression: find_paths must skip edges whose
15338        // valid_until lies in the past unless the caller asks for the
15339        // full historical link graph.
15340        let conn = test_db();
15341        let a = make_memory("fp-a", "ns", Tier::Long, 5);
15342        let b = make_memory("fp-b", "ns", Tier::Long, 5);
15343        let c = make_memory("fp-c", "ns", Tier::Long, 5);
15344        insert(&conn, &a).unwrap();
15345        insert(&conn, &b).unwrap();
15346        insert(&conn, &c).unwrap();
15347        // Live path A → C.
15348        insert_link_full(&conn, &a.id, &c.id, "related_to", None, None, None);
15349        // Dead path A → B → C (the A→B leg is invalidated).
15350        insert_link_full(
15351            &conn,
15352            &a.id,
15353            &b.id,
15354            "supersedes",
15355            None,
15356            Some("2020-01-01T00:00:00+00:00"),
15357            None,
15358        );
15359        insert_link_full(&conn, &b.id, &c.id, "related_to", None, None, None);
15360
15361        // Default: only the live A→C path.
15362        let current = find_paths(&conn, &a.id, &c.id, Some(3), None, false).unwrap();
15363        assert_eq!(current.len(), 1);
15364        assert_eq!(current[0], vec![a.id.clone(), c.id.clone()]);
15365
15366        // Opt-in: include_invalidated=true returns both paths.
15367        let full = find_paths(&conn, &a.id, &c.id, Some(3), None, true).unwrap();
15368        assert_eq!(full.len(), 2);
15369    }
15370
15371    // -- Pillar 2 / Stream C — kg_query (depth=1) ---------------------------
15372
15373    /// Insert a link with explicit `temporal/observed_by` columns so the
15374    /// `kg_query` filter tests can pin behavior without relying on
15375    /// wall-clock spread.
15376    fn insert_link_full(
15377        conn: &Connection,
15378        source_id: &str,
15379        target_id: &str,
15380        relation: &str,
15381        valid_from: Option<&str>,
15382        valid_until: Option<&str>,
15383        observed_by: Option<&str>,
15384    ) {
15385        let now = chrono::Utc::now().to_rfc3339();
15386        conn.execute(
15387            "INSERT INTO memory_links \
15388             (source_id, target_id, relation, created_at, valid_from, valid_until, observed_by) \
15389             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
15390            params![
15391                source_id,
15392                target_id,
15393                relation,
15394                now,
15395                valid_from,
15396                valid_until,
15397                observed_by
15398            ],
15399        )
15400        .unwrap();
15401    }
15402
15403    #[test]
15404    fn kg_query_returns_outbound_neighbors_at_depth_1() {
15405        let conn = test_db();
15406        let src = make_memory("alpha", "kg/projects/alpha", Tier::Long, 5);
15407        let n1 = make_memory("kickoff", "kg/projects/alpha", Tier::Long, 5);
15408        let n2 = make_memory("design", "kg/projects/alpha", Tier::Long, 5);
15409        insert(&conn, &src).unwrap();
15410        insert(&conn, &n1).unwrap();
15411        insert(&conn, &n2).unwrap();
15412        insert_link_full(
15413            &conn,
15414            &src.id,
15415            &n1.id,
15416            "related_to",
15417            Some("2026-01-15T00:00:00+00:00"),
15418            None,
15419            Some("agent-1"),
15420        );
15421        insert_link_full(
15422            &conn,
15423            &src.id,
15424            &n2.id,
15425            "supersedes",
15426            Some("2026-02-03T00:00:00+00:00"),
15427            None,
15428            Some("agent-2"),
15429        );
15430
15431        let nodes = kg_query(&conn, &src.id, 1, None, None, None, false).unwrap();
15432        assert_eq!(nodes.len(), 2);
15433        // Ordered by COALESCE(valid_from, created_at) ASC.
15434        assert_eq!(nodes[0].target_id, n1.id);
15435        assert_eq!(nodes[1].target_id, n2.id);
15436        assert_eq!(nodes[0].title, "kickoff");
15437        assert_eq!(nodes[0].relation, "related_to");
15438        assert_eq!(nodes[0].observed_by.as_deref(), Some("agent-1"));
15439        assert_eq!(nodes[0].depth, 1);
15440        assert_eq!(nodes[0].path, format!("{}->{}", src.id, n1.id));
15441        assert_eq!(nodes[0].target_namespace, "kg/projects/alpha");
15442    }
15443
15444    #[test]
15445    fn kg_query_filters_by_valid_at_window() {
15446        let conn = test_db();
15447        let src = make_memory("e", "ns", Tier::Long, 5);
15448        let t1 = make_memory("e1", "ns", Tier::Long, 5);
15449        let t2 = make_memory("e2", "ns", Tier::Long, 5);
15450        insert(&conn, &src).unwrap();
15451        insert(&conn, &t1).unwrap();
15452        insert(&conn, &t2).unwrap();
15453        // t1 valid 2026-01-01 → 2026-02-01; t2 valid from 2026-03-01.
15454        insert_link_full(
15455            &conn,
15456            &src.id,
15457            &t1.id,
15458            "related_to",
15459            Some("2026-01-01T00:00:00+00:00"),
15460            Some("2026-02-01T00:00:00+00:00"),
15461            None,
15462        );
15463        insert_link_full(
15464            &conn,
15465            &src.id,
15466            &t2.id,
15467            "related_to",
15468            Some("2026-03-01T00:00:00+00:00"),
15469            None,
15470            None,
15471        );
15472
15473        // At 2026-01-15 only t1 is valid.
15474        let n_jan = kg_query(
15475            &conn,
15476            &src.id,
15477            1,
15478            Some("2026-01-15T00:00:00+00:00"),
15479            None,
15480            None,
15481            false,
15482        )
15483        .unwrap();
15484        assert_eq!(n_jan.len(), 1);
15485        assert_eq!(n_jan[0].target_id, t1.id);
15486
15487        // At 2026-02-15 the first link is closed, the second hasn't
15488        // started yet — empty.
15489        let n_feb = kg_query(
15490            &conn,
15491            &src.id,
15492            1,
15493            Some("2026-02-15T00:00:00+00:00"),
15494            None,
15495            None,
15496            false,
15497        )
15498        .unwrap();
15499        assert!(n_feb.is_empty());
15500
15501        // At 2026-04-01 only t2 is valid.
15502        let n_apr = kg_query(
15503            &conn,
15504            &src.id,
15505            1,
15506            Some("2026-04-01T00:00:00+00:00"),
15507            None,
15508            None,
15509            false,
15510        )
15511        .unwrap();
15512        assert_eq!(n_apr.len(), 1);
15513        assert_eq!(n_apr[0].target_id, t2.id);
15514    }
15515
15516    #[test]
15517    fn kg_query_skips_null_valid_from_when_valid_at_filter_active() {
15518        let conn = test_db();
15519        let src = make_memory("s", "ns", Tier::Long, 5);
15520        let t = make_memory("t", "ns", Tier::Long, 5);
15521        insert(&conn, &src).unwrap();
15522        insert(&conn, &t).unwrap();
15523        // Link with NULL valid_from — must be invisible to a temporally
15524        // scoped query (we cannot tell if it was valid at any point).
15525        insert_link_full(&conn, &src.id, &t.id, "related_to", None, None, None);
15526
15527        let with_filter = kg_query(
15528            &conn,
15529            &src.id,
15530            1,
15531            Some("2026-01-15T00:00:00+00:00"),
15532            None,
15533            None,
15534            false,
15535        )
15536        .unwrap();
15537        assert!(with_filter.is_empty());
15538
15539        // Without the filter, the same link IS returned.
15540        let without = kg_query(&conn, &src.id, 1, None, None, None, false).unwrap();
15541        assert_eq!(without.len(), 1);
15542        assert_eq!(without[0].target_id, t.id);
15543    }
15544
15545    #[test]
15546    fn kg_query_filters_by_allowed_agents() {
15547        let conn = test_db();
15548        let src = make_memory("s", "ns", Tier::Long, 5);
15549        let t1 = make_memory("t1", "ns", Tier::Long, 5);
15550        let t2 = make_memory("t2", "ns", Tier::Long, 5);
15551        let t3 = make_memory("t3", "ns", Tier::Long, 5);
15552        insert(&conn, &src).unwrap();
15553        insert(&conn, &t1).unwrap();
15554        insert(&conn, &t2).unwrap();
15555        insert(&conn, &t3).unwrap();
15556        insert_link_full(
15557            &conn,
15558            &src.id,
15559            &t1.id,
15560            "related_to",
15561            Some("2026-01-01T00:00:00+00:00"),
15562            None,
15563            Some("agent-a"),
15564        );
15565        insert_link_full(
15566            &conn,
15567            &src.id,
15568            &t2.id,
15569            "related_to",
15570            Some("2026-01-02T00:00:00+00:00"),
15571            None,
15572            Some("agent-b"),
15573        );
15574        // Link with NULL observed_by must be excluded once the agent
15575        // filter is active (`NULL IN (...)` is NULL/false in SQLite).
15576        insert_link_full(
15577            &conn,
15578            &src.id,
15579            &t3.id,
15580            "related_to",
15581            Some("2026-01-03T00:00:00+00:00"),
15582            None,
15583            None,
15584        );
15585
15586        let allow_a = vec!["agent-a".to_string()];
15587        let only_a = kg_query(&conn, &src.id, 1, None, Some(&allow_a), None, false).unwrap();
15588        assert_eq!(only_a.len(), 1);
15589        assert_eq!(only_a[0].target_id, t1.id);
15590
15591        let allow_both = vec!["agent-a".to_string(), "agent-b".to_string()];
15592        let both = kg_query(&conn, &src.id, 1, None, Some(&allow_both), None, false).unwrap();
15593        assert_eq!(both.len(), 2);
15594    }
15595
15596    #[test]
15597    fn kg_query_empty_allowed_agents_returns_zero_rows() {
15598        let conn = test_db();
15599        let src = make_memory("s", "ns", Tier::Long, 5);
15600        let t = make_memory("t", "ns", Tier::Long, 5);
15601        insert(&conn, &src).unwrap();
15602        insert(&conn, &t).unwrap();
15603        insert_link_full(
15604            &conn,
15605            &src.id,
15606            &t.id,
15607            "related_to",
15608            Some("2026-01-01T00:00:00+00:00"),
15609            None,
15610            Some("agent-a"),
15611        );
15612
15613        // Sanity: no filter returns the link.
15614        let unfiltered = kg_query(&conn, &src.id, 1, None, None, None, false).unwrap();
15615        assert_eq!(unfiltered.len(), 1);
15616
15617        // Empty allowlist == "no agents trusted" — must return zero
15618        // rows, not silently fall through to the unfiltered path.
15619        let empty: Vec<String> = Vec::new();
15620        let none = kg_query(&conn, &src.id, 1, None, Some(&empty), None, false).unwrap();
15621        assert!(none.is_empty());
15622    }
15623
15624    #[test]
15625    fn kg_query_rejects_max_depth_zero() {
15626        let conn = test_db();
15627        let src = make_memory("s", "ns", Tier::Long, 5);
15628        insert(&conn, &src).unwrap();
15629        let err = kg_query(&conn, &src.id, 0, None, None, None, false).unwrap_err();
15630        assert!(err.to_string().contains("max_depth"));
15631    }
15632
15633    #[test]
15634    fn kg_query_rejects_unsupported_max_depth() {
15635        // The recursive-CTE slice supports depth 1..=5; passing 6+ must
15636        // produce an explicit error so callers learn they hit the
15637        // ceiling rather than receiving a partial graph.
15638        let conn = test_db();
15639        let src = make_memory("s", "ns", Tier::Long, 5);
15640        insert(&conn, &src).unwrap();
15641        let err = kg_query(
15642            &conn,
15643            &src.id,
15644            KG_QUERY_MAX_SUPPORTED_DEPTH + 1,
15645            None,
15646            None,
15647            None,
15648            false,
15649        )
15650        .unwrap_err();
15651        let msg = err.to_string();
15652        assert!(msg.contains(&format!("max_depth={}", KG_QUERY_MAX_SUPPORTED_DEPTH + 1)));
15653        assert!(msg.contains(&format!("supported depth={KG_QUERY_MAX_SUPPORTED_DEPTH}")));
15654    }
15655
15656    #[test]
15657    fn kg_query_traverses_multiple_hops() {
15658        // src -> mid -> leaf. depth=2 must return both hops, with
15659        // depth/path reflecting the chain.
15660        let conn = test_db();
15661        let src = make_memory("src", "ns", Tier::Long, 5);
15662        let mid = make_memory("mid", "ns", Tier::Long, 5);
15663        let leaf = make_memory("leaf", "ns", Tier::Long, 5);
15664        insert(&conn, &src).unwrap();
15665        insert(&conn, &mid).unwrap();
15666        insert(&conn, &leaf).unwrap();
15667        insert_link_full(
15668            &conn,
15669            &src.id,
15670            &mid.id,
15671            "related_to",
15672            Some("2026-01-01T00:00:00+00:00"),
15673            None,
15674            Some("agent-x"),
15675        );
15676        insert_link_full(
15677            &conn,
15678            &mid.id,
15679            &leaf.id,
15680            "supersedes",
15681            Some("2026-01-02T00:00:00+00:00"),
15682            None,
15683            Some("agent-x"),
15684        );
15685
15686        // depth=1 sees only mid.
15687        let d1 = kg_query(&conn, &src.id, 1, None, None, None, false).unwrap();
15688        assert_eq!(d1.len(), 1);
15689        assert_eq!(d1[0].target_id, mid.id);
15690        assert_eq!(d1[0].depth, 1);
15691
15692        // depth=2 sees both, ordered shallow-first.
15693        let d2 = kg_query(&conn, &src.id, 2, None, None, None, false).unwrap();
15694        assert_eq!(d2.len(), 2);
15695        assert_eq!(d2[0].target_id, mid.id);
15696        assert_eq!(d2[0].depth, 1);
15697        assert_eq!(d2[0].path, format!("{}->{}", src.id, mid.id));
15698        assert_eq!(d2[1].target_id, leaf.id);
15699        assert_eq!(d2[1].depth, 2);
15700        assert_eq!(d2[1].relation, "supersedes");
15701        assert_eq!(d2[1].path, format!("{}->{}->{}", src.id, mid.id, leaf.id));
15702    }
15703
15704    #[test]
15705    fn kg_query_multi_hop_respects_valid_at_per_hop() {
15706        // src -> mid valid 2026-01..02; mid -> leaf valid 2026-04+.
15707        // At valid_at=2026-01-15 the second hop is not yet valid, so
15708        // only mid is returned; at valid_at=2026-04-15 the first hop is
15709        // closed, so both are filtered out.
15710        let conn = test_db();
15711        let src = make_memory("s", "ns", Tier::Long, 5);
15712        let mid = make_memory("m", "ns", Tier::Long, 5);
15713        let leaf = make_memory("l", "ns", Tier::Long, 5);
15714        insert(&conn, &src).unwrap();
15715        insert(&conn, &mid).unwrap();
15716        insert(&conn, &leaf).unwrap();
15717        insert_link_full(
15718            &conn,
15719            &src.id,
15720            &mid.id,
15721            "related_to",
15722            Some("2026-01-01T00:00:00+00:00"),
15723            Some("2026-02-01T00:00:00+00:00"),
15724            None,
15725        );
15726        insert_link_full(
15727            &conn,
15728            &mid.id,
15729            &leaf.id,
15730            "related_to",
15731            Some("2026-04-01T00:00:00+00:00"),
15732            None,
15733            None,
15734        );
15735
15736        let mid_only = kg_query(
15737            &conn,
15738            &src.id,
15739            3,
15740            Some("2026-01-15T00:00:00+00:00"),
15741            None,
15742            None,
15743            false,
15744        )
15745        .unwrap();
15746        assert_eq!(mid_only.len(), 1);
15747        assert_eq!(mid_only[0].target_id, mid.id);
15748
15749        let neither = kg_query(
15750            &conn,
15751            &src.id,
15752            3,
15753            Some("2026-04-15T00:00:00+00:00"),
15754            None,
15755            None,
15756            false,
15757        )
15758        .unwrap();
15759        assert!(neither.is_empty());
15760    }
15761
15762    #[test]
15763    fn kg_query_detects_cycles() {
15764        // a -> b -> c -> a forms a cycle. Even with max_depth=5, the
15765        // traversal must stop revisiting nodes that are already on the
15766        // path; the result lists each reachable node at most once.
15767        let conn = test_db();
15768        let a = make_memory("a", "ns", Tier::Long, 5);
15769        let b = make_memory("b", "ns", Tier::Long, 5);
15770        let c = make_memory("c", "ns", Tier::Long, 5);
15771        insert(&conn, &a).unwrap();
15772        insert(&conn, &b).unwrap();
15773        insert(&conn, &c).unwrap();
15774        insert_link_full(
15775            &conn,
15776            &a.id,
15777            &b.id,
15778            "related_to",
15779            Some("2026-01-01T00:00:00+00:00"),
15780            None,
15781            None,
15782        );
15783        insert_link_full(
15784            &conn,
15785            &b.id,
15786            &c.id,
15787            "related_to",
15788            Some("2026-01-02T00:00:00+00:00"),
15789            None,
15790            None,
15791        );
15792        insert_link_full(
15793            &conn,
15794            &c.id,
15795            &a.id,
15796            "related_to",
15797            Some("2026-01-03T00:00:00+00:00"),
15798            None,
15799            None,
15800        );
15801
15802        let nodes = kg_query(&conn, &a.id, 5, None, None, None, false).unwrap();
15803        // Expect b at depth 1 and c at depth 2; the cycle back to a is
15804        // pruned. (The c->a edge could in principle surface a again at
15805        // depth 3, but only if a is not on its own path — and the
15806        // anchor seeds path with `a->b`, so a IS on every descendant
15807        // path through b/c.)
15808        assert_eq!(nodes.len(), 2);
15809        assert_eq!(nodes[0].target_id, b.id);
15810        assert_eq!(nodes[0].depth, 1);
15811        assert_eq!(nodes[1].target_id, c.id);
15812        assert_eq!(nodes[1].depth, 2);
15813    }
15814
15815    #[test]
15816    fn kg_query_multi_hop_filters_by_allowed_agents_per_hop() {
15817        // src -> mid (agent-a), mid -> leaf (agent-b). With allow=[a]
15818        // only the first hop survives; with allow=[a,b] both surface.
15819        let conn = test_db();
15820        let src = make_memory("s", "ns", Tier::Long, 5);
15821        let mid = make_memory("m", "ns", Tier::Long, 5);
15822        let leaf = make_memory("l", "ns", Tier::Long, 5);
15823        insert(&conn, &src).unwrap();
15824        insert(&conn, &mid).unwrap();
15825        insert(&conn, &leaf).unwrap();
15826        insert_link_full(
15827            &conn,
15828            &src.id,
15829            &mid.id,
15830            "related_to",
15831            Some("2026-01-01T00:00:00+00:00"),
15832            None,
15833            Some("agent-a"),
15834        );
15835        insert_link_full(
15836            &conn,
15837            &mid.id,
15838            &leaf.id,
15839            "related_to",
15840            Some("2026-01-02T00:00:00+00:00"),
15841            None,
15842            Some("agent-b"),
15843        );
15844
15845        let allow_a = vec!["agent-a".to_string()];
15846        let only_first = kg_query(&conn, &src.id, 3, None, Some(&allow_a), None, false).unwrap();
15847        assert_eq!(only_first.len(), 1);
15848        assert_eq!(only_first[0].target_id, mid.id);
15849
15850        let allow_both = vec!["agent-a".to_string(), "agent-b".to_string()];
15851        let both = kg_query(&conn, &src.id, 3, None, Some(&allow_both), None, false).unwrap();
15852        assert_eq!(both.len(), 2);
15853        assert_eq!(both[1].target_id, leaf.id);
15854        assert_eq!(both[1].depth, 2);
15855    }
15856
15857    #[test]
15858    fn kg_query_limit_clamped_to_max() {
15859        let conn = test_db();
15860        let src = make_memory("s", "ns", Tier::Long, 5);
15861        insert(&conn, &src).unwrap();
15862        for i in 0..3 {
15863            let t = make_memory(&format!("t{i}"), "ns", Tier::Long, 5);
15864            insert(&conn, &t).unwrap();
15865            insert_link_full(
15866                &conn,
15867                &src.id,
15868                &t.id,
15869                "related_to",
15870                Some(&format!("2026-01-{:02}T00:00:00+00:00", i + 1)),
15871                None,
15872                None,
15873            );
15874        }
15875
15876        // limit=usize::MAX clamps to KG_QUERY_MAX_LIMIT (1000),
15877        // which is bigger than our 3 rows — all returned.
15878        let all = kg_query(&conn, &src.id, 1, None, None, Some(usize::MAX), false).unwrap();
15879        assert_eq!(all.len(), 3);
15880
15881        // limit=0 clamps up to 1.
15882        let one = kg_query(&conn, &src.id, 1, None, None, Some(0), false).unwrap();
15883        assert_eq!(one.len(), 1);
15884    }
15885
15886    #[test]
15887    fn kg_query_empty_for_unknown_source() {
15888        let conn = test_db();
15889        let nodes = kg_query(&conn, "no-such-id", 1, None, None, None, false).unwrap();
15890        assert!(nodes.is_empty());
15891    }
15892
15893    #[test]
15894    fn schema_v15_existing_links_get_valid_from_backfilled() {
15895        // Simulate a v14 database with one link, then re-run the
15896        // v15 migration and assert valid_from was backfilled to the
15897        // source memory's created_at. We do this by opening a fresh
15898        // db (which is at v15), inserting a link with NULL valid_from,
15899        // rolling schema_version back to 14, and re-opening to force
15900        // the v15 block to re-execute the backfill UPDATE.
15901        let path = std::env::temp_dir().join(format!(
15902            "ai_memory_v15_backfill_{}.db",
15903            uuid::Uuid::new_v4()
15904        ));
15905        {
15906            let conn = open(&path).unwrap();
15907            let src = make_memory("src", "test", Tier::Long, 5);
15908            let tgt = make_memory("tgt", "test", Tier::Long, 5);
15909            insert(&conn, &src).unwrap();
15910            insert(&conn, &tgt).unwrap();
15911            // Insert a link directly with NULL valid_from to mimic
15912            // pre-migration state.
15913            conn.execute(
15914                "INSERT INTO memory_links (source_id, target_id, relation, created_at, valid_from) \
15915                 VALUES (?1, ?2, 'related_to', ?3, NULL)",
15916                params![&src.id, &tgt.id, &chrono::Utc::now().to_rfc3339()],
15917            )
15918            .unwrap();
15919            // Roll schema back to v14 and re-run migrate via re-open.
15920            conn.execute("DELETE FROM schema_version", []).unwrap();
15921            conn.execute("INSERT INTO schema_version (version) VALUES (14)", [])
15922                .unwrap();
15923        }
15924
15925        let conn2 = open(&path).unwrap();
15926        let backfilled: Option<String> = conn2
15927            .query_row("SELECT valid_from FROM memory_links LIMIT 1", [], |r| {
15928                r.get(0)
15929            })
15930            .unwrap();
15931        assert!(
15932            backfilled.is_some(),
15933            "expected valid_from to be backfilled, got NULL"
15934        );
15935        let _ = std::fs::remove_file(&path);
15936    }
15937
15938    #[test]
15939    fn namespace_prefix_query_index_available() {
15940        let conn = test_db();
15941        // SQLite's default BINARY collation supports prefix-matching LIKE queries
15942        // with the idx_memories_namespace index. Verify the index exists and a
15943        // simple prefix query can execute (EXPLAIN QUERY PLAN output varies by
15944        // SQLite version and query planner heuristics, so we just check that the
15945        // query completes without error).
15946        let result: Option<String> = conn
15947            .query_row(
15948                "SELECT name FROM sqlite_master WHERE type='index' AND name='idx_memories_namespace'",
15949                [],
15950                |r| r.get(0),
15951            )
15952            .unwrap();
15953        assert_eq!(
15954            result,
15955            Some("idx_memories_namespace".to_string()),
15956            "idx_memories_namespace index should exist"
15957        );
15958
15959        // Execute a prefix LIKE query to ensure it compiles and runs
15960        let count: i64 = conn
15961            .query_row(
15962                "SELECT COUNT(*) FROM memories WHERE namespace LIKE 'test/%'",
15963                [],
15964                |r| r.get(0),
15965            )
15966            .unwrap();
15967        assert_eq!(count, 0);
15968    }
15969
15970    // -----------------------------------------------------------------
15971    // Doctor (P7) helper unit tests.
15972    // -----------------------------------------------------------------
15973
15974    #[test]
15975    fn doctor_dim_violations_post_p2_returns_zero_on_fresh_db() {
15976        // Post-P2 (schema v18+), a fresh DB has the `embedding_dim` column
15977        // but zero rows in violation. The helper must report Some(0), not
15978        // None. (Pre-P2 it returned None to indicate "column not yet
15979        // present"; that path is now obsolete.)
15980        let conn = test_db();
15981        let result = doctor_dim_violations(&conn).unwrap();
15982        assert_eq!(result, Some(0));
15983    }
15984
15985    #[test]
15986    fn doctor_oldest_pending_age_secs_empty_queue() {
15987        let conn = test_db();
15988        let age = doctor_oldest_pending_age_secs(&conn).unwrap();
15989        assert_eq!(age, None);
15990    }
15991
15992    #[test]
15993    fn doctor_oldest_pending_age_secs_reports_age() {
15994        let conn = test_db();
15995        let one_hour_ago = (Utc::now() - chrono::Duration::hours(1)).to_rfc3339();
15996        conn.execute(
15997            "INSERT INTO pending_actions (id, action_type, namespace, payload, requested_by, requested_at, status)
15998             VALUES ('p1', 'store', 'ns', '{}', 'agent', ?1, 'pending')",
15999            params![one_hour_ago],
16000        )
16001        .unwrap();
16002        let age = doctor_oldest_pending_age_secs(&conn).unwrap().unwrap();
16003        // Allow a generous margin — the test machine clock is the source of truth.
16004        assert!((3500..=3700).contains(&age), "expected ~3600s, got {age}");
16005    }
16006
16007    #[test]
16008    fn doctor_governance_coverage_with_namespace_meta() {
16009        let conn = test_db();
16010        // No namespaces — both counts zero.
16011        let (with, without) = doctor_governance_coverage(&conn).unwrap();
16012        assert_eq!((with, without), (0, 0));
16013    }
16014
16015    #[test]
16016    fn doctor_governance_depth_distribution_chains() {
16017        let conn = test_db();
16018        // Build a small inheritance tree: root -> a -> a/b -> a/b/c
16019        let now = Utc::now().to_rfc3339();
16020        conn.execute(
16021            "INSERT INTO namespace_meta (namespace, parent_namespace, updated_at) VALUES ('root', NULL, ?1)",
16022            params![now],
16023        ).unwrap();
16024        conn.execute(
16025            "INSERT INTO namespace_meta (namespace, parent_namespace, updated_at) VALUES ('a', 'root', ?1)",
16026            params![now],
16027        ).unwrap();
16028        conn.execute(
16029            "INSERT INTO namespace_meta (namespace, parent_namespace, updated_at) VALUES ('a/b', 'a', ?1)",
16030            params![now],
16031        ).unwrap();
16032        conn.execute(
16033            "INSERT INTO namespace_meta (namespace, parent_namespace, updated_at) VALUES ('a/b/c', 'a/b', ?1)",
16034            params![now],
16035        ).unwrap();
16036        let dist = doctor_governance_depth_distribution(&conn).unwrap();
16037        assert_eq!(dist[0], 1, "root has depth 0");
16038        assert_eq!(dist[1], 1, "a has depth 1");
16039        assert_eq!(dist[2], 1, "a/b has depth 2");
16040        assert_eq!(dist[3], 1, "a/b/c has depth 3");
16041    }
16042
16043    #[test]
16044    fn doctor_webhook_delivery_totals_empty() {
16045        let conn = test_db();
16046        let (dispatched, failed) = doctor_webhook_delivery_totals(&conn).unwrap();
16047        assert_eq!((dispatched, failed), (0, 0));
16048    }
16049
16050    #[test]
16051    fn doctor_max_sync_skew_secs_empty() {
16052        let conn = test_db();
16053        let skew = doctor_max_sync_skew_secs(&conn).unwrap();
16054        assert_eq!(skew, None);
16055    }
16056
16057    // ---- v0.6.4-009 — capability-expansion audit log ----
16058
16059    #[test]
16060    fn audit_log_record_and_list_grant_and_deny() {
16061        let conn = test_db();
16062        record_capability_expansion(&conn, Some("alice"), "graph", true, None);
16063        record_capability_expansion(&conn, Some("bob"), "power", false, None);
16064        let rows = list_capability_expansions(&conn, 50, None).unwrap();
16065        assert_eq!(rows.len(), 2);
16066        // Newest first.
16067        assert!(rows[0].timestamp >= rows[1].timestamp);
16068        let grant_row = rows
16069            .iter()
16070            .find(|r| r.agent_id.as_deref() == Some("alice"))
16071            .unwrap();
16072        assert!(grant_row.granted);
16073        assert_eq!(grant_row.requested_family.as_deref(), Some("graph"));
16074        let deny_row = rows
16075            .iter()
16076            .find(|r| r.agent_id.as_deref() == Some("bob"))
16077            .unwrap();
16078        assert!(!deny_row.granted);
16079        assert_eq!(deny_row.requested_family.as_deref(), Some("power"));
16080    }
16081
16082    #[test]
16083    fn audit_log_filter_by_agent() {
16084        let conn = test_db();
16085        record_capability_expansion(&conn, Some("alice"), "graph", true, None);
16086        record_capability_expansion(&conn, Some("bob"), "power", false, None);
16087        let alice = list_capability_expansions(&conn, 50, Some("alice")).unwrap();
16088        assert_eq!(alice.len(), 1);
16089        assert_eq!(alice[0].agent_id.as_deref(), Some("alice"));
16090        let none_match = list_capability_expansions(&conn, 50, Some("nobody")).unwrap();
16091        assert!(none_match.is_empty());
16092    }
16093
16094    #[test]
16095    fn audit_log_anonymous_caller() {
16096        let conn = test_db();
16097        record_capability_expansion(&conn, None, "core", true, None);
16098        let rows = list_capability_expansions(&conn, 50, None).unwrap();
16099        assert_eq!(rows.len(), 1);
16100        assert!(rows[0].agent_id.is_none());
16101    }
16102
16103    #[test]
16104    fn audit_log_migration_idempotent_on_re_open() {
16105        // Open the DB twice in succession; the audit_log CREATE TABLE
16106        // IF NOT EXISTS path must not error.
16107        let p = tempfile::NamedTempFile::new().unwrap();
16108        let p = p.path().to_path_buf();
16109        let _ = open(&p).unwrap();
16110        let conn = open(&p).unwrap();
16111        // And the indexes are present.
16112        let cnt: i64 = conn
16113            .query_row(
16114                "SELECT count(*) FROM sqlite_master WHERE name LIKE 'idx_audit_log_%'",
16115                [],
16116                |r| r.get(0),
16117            )
16118            .unwrap();
16119        assert_eq!(
16120            cnt, 3,
16121            "expected 3 audit_log indexes (agent_id, ts, event_type)"
16122        );
16123    }
16124
16125    // ---------------------------------------------------------------
16126    // v0.7.0 K2 — pending_actions timeout sweeper.
16127    //
16128    // Closes the v0.6.3.1 honest-Capabilities-v2 disclosure that
16129    // `default_timeout_seconds` was advertised but unused.
16130    // ---------------------------------------------------------------
16131
16132    /// Insert a `pending_actions` row with a back-dated `requested_at`
16133    /// so we can drive the sweeper without `tokio::time` games.
16134    fn insert_stale_pending(
16135        conn: &Connection,
16136        id: &str,
16137        namespace: &str,
16138        age_secs: i64,
16139        per_row_timeout: Option<i64>,
16140    ) {
16141        let requested_at = (chrono::Utc::now() - chrono::Duration::seconds(age_secs)).to_rfc3339();
16142        conn.execute(
16143            "INSERT INTO pending_actions
16144             (id, action_type, namespace, payload, requested_by, requested_at,
16145              status, default_timeout_seconds)
16146             VALUES (?1, 'store', ?2, '{}', 'tester', ?3, 'pending', ?4)",
16147            params![id, namespace, requested_at, per_row_timeout],
16148        )
16149        .unwrap();
16150    }
16151
16152    #[test]
16153    fn sweep_marks_stale_pending_row_expired() {
16154        let conn = test_db();
16155        // 2-hour-old pending row; global default is 1 hour → must expire.
16156        insert_stale_pending(&conn, "stale-1", "ns/a", 7_200, None);
16157
16158        let expired = sweep_pending_action_timeouts(&conn, crate::SECS_PER_HOUR).unwrap();
16159        assert_eq!(expired.len(), 1, "expected exactly one expiry");
16160        assert_eq!(expired[0], ("stale-1".to_string(), "ns/a".to_string()));
16161
16162        // Row is now status='expired' with expired_at populated.
16163        let (status, expired_at): (String, Option<String>) = conn
16164            .query_row(
16165                "SELECT status, expired_at FROM pending_actions WHERE id = ?1",
16166                params!["stale-1"],
16167                |r| Ok((r.get(0)?, r.get(1)?)),
16168            )
16169            .unwrap();
16170        assert_eq!(status, "expired");
16171        assert!(
16172            expired_at.is_some(),
16173            "expired_at must be stamped by the sweeper"
16174        );
16175    }
16176
16177    #[test]
16178    fn sweep_leaves_fresh_pending_alone() {
16179        let conn = test_db();
16180        // 30-second-old pending row; global default is 1 hour → still pending.
16181        insert_stale_pending(&conn, "fresh-1", "ns/a", 30, None);
16182
16183        let expired = sweep_pending_action_timeouts(&conn, crate::SECS_PER_HOUR).unwrap();
16184        assert!(expired.is_empty());
16185        let status: String = conn
16186            .query_row(
16187                "SELECT status FROM pending_actions WHERE id = ?1",
16188                params!["fresh-1"],
16189                |r| r.get(0),
16190            )
16191            .unwrap();
16192        assert_eq!(status, "pending");
16193    }
16194
16195    #[test]
16196    fn sweep_per_row_timeout_overrides_global_default() {
16197        let conn = test_db();
16198        // 5-minute-old row; per-row TTL = 60s → MUST expire even
16199        // though the global default (1h) would say "still fresh".
16200        insert_stale_pending(&conn, "short-ttl", "ns/a", 300, Some(60));
16201        // Same age, no per-row override → still pending under the
16202        // 1h global default.
16203        insert_stale_pending(&conn, "no-override", "ns/a", 300, None);
16204
16205        let expired = sweep_pending_action_timeouts(&conn, crate::SECS_PER_HOUR).unwrap();
16206        let ids: Vec<&String> = expired.iter().map(|(id, _)| id).collect();
16207        assert_eq!(ids, vec![&"short-ttl".to_string()]);
16208    }
16209
16210    #[test]
16211    fn sweep_skips_already_decided_rows() {
16212        let conn = test_db();
16213        // Pre-insert an OLD row already approved — must not touch it.
16214        let approved_at = (chrono::Utc::now() - chrono::Duration::seconds(7_200)).to_rfc3339();
16215        conn.execute(
16216            "INSERT INTO pending_actions
16217             (id, action_type, namespace, payload, requested_by, requested_at,
16218              status, decided_by, decided_at)
16219             VALUES ('approved-old', 'store', 'ns/a', '{}', 'alice', ?1,
16220                     'approved', 'bob', ?1)",
16221            params![approved_at],
16222        )
16223        .unwrap();
16224
16225        let expired = sweep_pending_action_timeouts(&conn, 60).unwrap();
16226        assert!(expired.is_empty(), "non-pending rows must be ignored");
16227        let status: String = conn
16228            .query_row(
16229                "SELECT status FROM pending_actions WHERE id = 'approved-old'",
16230                [],
16231                |r| r.get(0),
16232            )
16233            .unwrap();
16234        assert_eq!(status, "approved", "decided row status preserved");
16235    }
16236
16237    #[test]
16238    fn sweep_disabled_when_global_default_non_positive() {
16239        let conn = test_db();
16240        // Stale row with no per-row TTL.
16241        insert_stale_pending(&conn, "stale-2", "ns/a", 7_200, None);
16242        // Operator escape hatch: 0 (or negative) global default
16243        // disables the sweep entirely.
16244        let expired = sweep_pending_action_timeouts(&conn, 0).unwrap();
16245        assert!(expired.is_empty());
16246        let expired_neg = sweep_pending_action_timeouts(&conn, -1).unwrap();
16247        assert!(expired_neg.is_empty());
16248    }
16249
16250    #[test]
16251    fn sweep_empty_queue_is_silent_noop() {
16252        let conn = test_db();
16253        let expired = sweep_pending_action_timeouts(&conn, 60).unwrap();
16254        assert!(expired.is_empty());
16255    }
16256
16257    // -----------------------------------------------------------------
16258    // v0.7.0 fix campaign R1-M2 / R1-M3 / R1-M4 (#690)
16259    //
16260    // Substrate-side defense-in-depth: SQL CHECK triggers + typed
16261    // `MemoryLinkRelation` + `ConflictMode`-aware insert primitive.
16262    // The tests below pin the contract the brief calls out by name so
16263    // a future regression surfaces here, not in a downstream consumer.
16264    // -----------------------------------------------------------------
16265
16266    /// R1-M2 — direct-SQL INSERT with a tier outside the closed set is
16267    /// refused by the trigger.
16268    #[test]
16269    fn test_memories_tier_check_rejects_invalid() {
16270        let conn = test_db();
16271        let now = chrono::Utc::now().to_rfc3339();
16272        let err = conn.execute(
16273            "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, confidence, source, access_count, created_at, updated_at, metadata) \
16274             VALUES (?1, 'long-term', 'ns-ck', 'bad-tier', 'x', '[]', 5, 1.0, 'test', 0, ?2, ?2, '{}')",
16275            params!["m-bad-tier", now],
16276        ).unwrap_err();
16277        let msg = err.to_string();
16278        assert!(
16279            msg.contains("memories.tier must be one of"),
16280            "expected R1-M2 tier check, got: {msg}"
16281        );
16282    }
16283
16284    /// R1-M2 — direct-SQL INSERT with priority out of `[1, 10]` is
16285    /// refused by the trigger.
16286    #[test]
16287    fn test_memories_priority_check_rejects_oob() {
16288        let conn = test_db();
16289        let now = chrono::Utc::now().to_rfc3339();
16290        let err = conn.execute(
16291            "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, confidence, source, access_count, created_at, updated_at, metadata) \
16292             VALUES (?1, 'mid', 'ns-ck', 'bad-prio', 'x', '[]', 11, 1.0, 'test', 0, ?2, ?2, '{}')",
16293            params!["m-bad-prio", now],
16294        ).unwrap_err();
16295        assert!(
16296            err.to_string()
16297                .contains("memories.priority must be between 1 and 10"),
16298            "expected R1-M2 priority check, got: {err}"
16299        );
16300        // Lower bound mirror: priority = 0 is also out-of-band.
16301        let err_low = conn.execute(
16302            "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, confidence, source, access_count, created_at, updated_at, metadata) \
16303             VALUES (?1, 'mid', 'ns-ck', 'bad-prio-low', 'x', '[]', 0, 1.0, 'test', 0, ?2, ?2, '{}')",
16304            params!["m-bad-prio-low", now],
16305        ).unwrap_err();
16306        assert!(err_low.to_string().contains("priority"));
16307    }
16308
16309    /// R1-M2 — confidence outside `[0.0, 1.0]` is refused by the trigger.
16310    #[test]
16311    fn test_memories_confidence_check_rejects_oob() {
16312        let conn = test_db();
16313        let now = chrono::Utc::now().to_rfc3339();
16314        let err = conn.execute(
16315            "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, confidence, source, access_count, created_at, updated_at, metadata) \
16316             VALUES (?1, 'mid', 'ns-ck', 'bad-conf', 'x', '[]', 5, 1.5, 'test', 0, ?2, ?2, '{}')",
16317            params!["m-bad-conf", now],
16318        ).unwrap_err();
16319        assert!(
16320            err.to_string().contains("memories.confidence"),
16321            "expected R1-M2 confidence check, got: {err}"
16322        );
16323    }
16324
16325    /// R1-M2 — direct-SQL link INSERT with an off-closed-set relation
16326    /// is refused by the trigger.
16327    #[test]
16328    fn test_memory_links_relation_check_rejects_unknown() {
16329        let conn = test_db();
16330        let src = insert(&conn, &make_memory("rel-src", "ns-ck", Tier::Mid, 5)).unwrap();
16331        let tgt = insert(&conn, &make_memory("rel-tgt", "ns-ck", Tier::Mid, 5)).unwrap();
16332        let now = chrono::Utc::now().to_rfc3339();
16333        let err = conn
16334            .execute(
16335                "INSERT INTO memory_links (source_id, target_id, relation, created_at, valid_from) \
16336             VALUES (?1, ?2, 'follows', ?3, ?3)",
16337                params![src, tgt, now],
16338            )
16339            .unwrap_err();
16340        assert!(
16341            err.to_string()
16342                .contains("memory_links.relation must be one of"),
16343            "expected R1-M2 relation check, got: {err}"
16344        );
16345    }
16346
16347    /// R1-M2 — direct-SQL link INSERT with an unknown `attest_level` is
16348    /// refused; legacy `NULL` stays allowed.
16349    #[test]
16350    fn test_memory_links_attest_level_check_rejects_unknown() {
16351        let conn = test_db();
16352        let src = insert(&conn, &make_memory("att-src", "ns-ck", Tier::Mid, 5)).unwrap();
16353        let tgt = insert(&conn, &make_memory("att-tgt", "ns-ck", Tier::Mid, 5)).unwrap();
16354        let now = chrono::Utc::now().to_rfc3339();
16355        // NULL attest_level OK (legacy).
16356        conn.execute(
16357            "INSERT INTO memory_links (source_id, target_id, relation, created_at, valid_from, attest_level) \
16358             VALUES (?1, ?2, 'related_to', ?3, ?3, NULL)",
16359            params![src, tgt, now],
16360        )
16361        .expect("NULL attest_level must remain accepted");
16362        // Bogus attest_level refused.
16363        let err = conn.execute(
16364            "INSERT INTO memory_links (source_id, target_id, relation, created_at, valid_from, attest_level) \
16365             VALUES (?1, ?2, 'supersedes', ?3, ?3, 'totally-fake')",
16366            params![src, tgt, now],
16367        ).unwrap_err();
16368        assert!(err.to_string().contains("memory_links.attest_level"));
16369    }
16370
16371    /// R1-M3 — `insert_with_conflict(.., ConflictMode::Error)` refuses
16372    /// the second write when `(title, namespace)` collides.
16373    #[test]
16374    fn test_insert_with_conflict_error_mode_refuses_duplicate() {
16375        let conn = test_db();
16376        let m1 = make_memory("dup-title", "ns-conflict", Tier::Mid, 5);
16377        let _id = insert_with_conflict(&conn, &m1, ConflictMode::Error).unwrap();
16378        let mut m2 = make_memory("dup-title", "ns-conflict", Tier::Mid, 7);
16379        m2.content = "second writer should be refused".to_string();
16380        let err = insert_with_conflict(&conn, &m2, ConflictMode::Error).unwrap_err();
16381        let conflict = err.downcast_ref::<ConflictError>();
16382        assert!(
16383            conflict.is_some(),
16384            "expected typed ConflictError, got: {err}"
16385        );
16386        // First writer's content is preserved (no silent overwrite).
16387        let row = find_by_title_namespace(&conn, "dup-title", "ns-conflict")
16388            .unwrap()
16389            .expect("first row still present");
16390        let fetched = get(&conn, &row).unwrap().unwrap();
16391        assert_ne!(
16392            fetched.content, "second writer should be refused",
16393            "Error mode must not mutate the existing row"
16394        );
16395    }
16396
16397    /// R1-M3 — `insert_with_conflict(.., ConflictMode::Merge)` is
16398    /// byte-equivalent to the legacy `insert()` silent-merge path.
16399    #[test]
16400    fn test_insert_with_conflict_merge_mode_updates() {
16401        let conn = test_db();
16402        let m1 = make_memory("merge-title", "ns-merge", Tier::Mid, 5);
16403        let id_a = insert_with_conflict(&conn, &m1, ConflictMode::Merge).unwrap();
16404        let mut m2 = make_memory("merge-title", "ns-merge", Tier::Mid, 7);
16405        m2.content = "merged-content".to_string();
16406        let id_b = insert_with_conflict(&conn, &m2, ConflictMode::Merge).unwrap();
16407        assert_eq!(id_a, id_b, "merge mode returns the existing row id");
16408        let fetched = get(&conn, &id_a).unwrap().unwrap();
16409        assert_eq!(fetched.content, "merged-content");
16410    }
16411
16412    /// R1-M3 — `insert_with_conflict(.., ConflictMode::Version)` keeps
16413    /// both rows; the second writer lands under a versioned title.
16414    #[test]
16415    fn test_insert_with_conflict_version_keeps_both() {
16416        let conn = test_db();
16417        let m1 = make_memory("versioned", "ns-v", Tier::Mid, 5);
16418        let id_a = insert_with_conflict(&conn, &m1, ConflictMode::Version).unwrap();
16419        let mut m2 = make_memory("versioned", "ns-v", Tier::Mid, 5);
16420        m2.content = "second version content".to_string();
16421        let id_b = insert_with_conflict(&conn, &m2, ConflictMode::Version).unwrap();
16422        assert_ne!(id_a, id_b, "version mode produces a distinct row");
16423        // Both titles are reachable: original + `(2)` suffix.
16424        let original_id = find_by_title_namespace(&conn, "versioned", "ns-v")
16425            .unwrap()
16426            .expect("original row");
16427        let versioned_id = find_by_title_namespace(&conn, "versioned (2)", "ns-v")
16428            .unwrap()
16429            .expect("versioned row");
16430        assert_eq!(original_id, id_a);
16431        assert_eq!(versioned_id, id_b);
16432    }
16433
16434    /// R1-M4 — `MemoryLink.relation` round-trips through the typed
16435    /// closed set across `create_link` + `get_links`.
16436    #[test]
16437    fn test_memory_link_relation_round_trips() {
16438        let conn = test_db();
16439        let src = insert(&conn, &make_memory("rt-src", "ns-rt", Tier::Mid, 5)).unwrap();
16440        let tgt = insert(&conn, &make_memory("rt-tgt", "ns-rt", Tier::Mid, 5)).unwrap();
16441        create_link(&conn, &src, &tgt, "supersedes").unwrap();
16442        let links = get_links(&conn, &src).unwrap();
16443        assert_eq!(links.len(), 1);
16444        assert_eq!(
16445            links[0].relation,
16446            crate::models::MemoryLinkRelation::Supersedes,
16447            "relation must round-trip as the typed Supersedes variant"
16448        );
16449        // Cross-check serde wire shape: enum → `"supersedes"` string.
16450        let wire = serde_json::to_string(&links[0]).unwrap();
16451        assert!(
16452            wire.contains("\"relation\":\"supersedes\""),
16453            "serde wire form must be the canonical lowercase snake_case \
16454             string; got {wire}"
16455        );
16456    }
16457
16458    // ---------------------------------------------------------------
16459    // v0.7.0 S5 verdict — approval exec fixes:
16460    //   S5-H1 reflect arm, S5-H4 agent_id verify,
16461    //   S5-M1/M2 signed_events emit on approve/deny/timeout.
16462    // ---------------------------------------------------------------
16463
16464    /// Helper — count signed_events rows matching `event_type`. Used by
16465    /// the audit-emit tests below so they don't have to scrape the table
16466    /// in raw SQL each time.
16467    fn count_signed_events(conn: &Connection, event_type: &str) -> usize {
16468        crate::signed_events::list_signed_events(conn, None, 1000, 0)
16469            .unwrap_or_default()
16470            .into_iter()
16471            .filter(|e| e.event_type == event_type)
16472            .count()
16473    }
16474
16475    /// S5-H1 — an approved `reflect` pending action MUST execute through
16476    /// `db::reflect` and persist a new reflection memory whose
16477    /// `metadata.reflection_metadata.sources` matches the queued
16478    /// `source_ids`. Pre-fix this would error with
16479    /// "unknown action_type: reflect" and the queued row would never land.
16480    #[test]
16481    fn test_execute_reflect_arm_succeeds_round_trip() {
16482        let conn = test_db();
16483        // Seed two source memories the reflection will reflect on.
16484        let src1 = make_memory("src-1", "ns/reflect", Tier::Mid, 5);
16485        let src2 = make_memory("src-2", "ns/reflect", Tier::Mid, 5);
16486        let src1_id = insert(&conn, &src1).unwrap();
16487        let src2_id = insert(&conn, &src2).unwrap();
16488
16489        // Queue an approved reflect pending action with the L1-8 payload shape.
16490        let payload = serde_json::json!({
16491            "source_ids": [src1_id, src2_id],
16492            "title": "reflective synthesis",
16493            "content": "deep observation across sources",
16494            "namespace": "ns/reflect",
16495            "tier": Tier::Mid.as_str(),
16496            "tags": ["reflective"],
16497            "priority": 6,
16498            "confidence": 0.9,
16499            "agent_id": "alice",
16500            "proposed_depth": 1,
16501        });
16502        let pending_id = queue_pending_action(
16503            &conn,
16504            crate::models::GovernedAction::Reflect,
16505            "ns/reflect",
16506            None,
16507            "alice",
16508            &payload,
16509        )
16510        .unwrap();
16511        // Approve so execute_pending_action accepts the row.
16512        assert!(decide_pending_action(&conn, &pending_id, true, "approver").unwrap());
16513
16514        let result = execute_pending_action(&conn, &pending_id).expect("reflect execute ok");
16515        let new_id = result.expect("reflect must return the new reflection id");
16516        let mem = get(&conn, &new_id)
16517            .unwrap()
16518            .expect("reflection memory landed");
16519        assert_eq!(mem.title, "reflective synthesis");
16520        assert_eq!(mem.namespace, "ns/reflect");
16521        assert_eq!(mem.reflection_depth, 1, "depth = max(source depths) + 1");
16522        // The substrate stamps `metadata.agent_id` from the input.agent_id field.
16523        assert_eq!(mem.metadata["agent_id"], "alice");
16524    }
16525
16526    /// S5-H4 — a queued payload whose `agent_id` does NOT match
16527    /// `pa.requested_by` is approver-on-behalf laundering. Execute MUST
16528    /// refuse, MUST NOT insert the memory, AND MUST emit a
16529    /// `pending_action.refused_agent_id_mismatch` audit row so the
16530    /// attempt is captured by the signed_events chain.
16531    #[test]
16532    fn test_execute_refuses_payload_agent_id_mismatch() {
16533        let conn = test_db();
16534        let mut mem = make_memory("laundered store", "ns/launder", Tier::Mid, 5);
16535        // Requester is "alice", but the payload claims agent_id "bob" —
16536        // pre-fix this would land a memory attributed to "bob" even
16537        // though the original requester was "alice".
16538        mem.metadata = serde_json::json!({"agent_id": "bob"});
16539        let payload = serde_json::to_value(&mem).unwrap();
16540        let pending_id = queue_pending_action(
16541            &conn,
16542            crate::models::GovernedAction::Store,
16543            "ns/launder",
16544            None,
16545            "alice",
16546            &payload,
16547        )
16548        .unwrap();
16549        assert!(decide_pending_action(&conn, &pending_id, true, "approver").unwrap());
16550
16551        let err = execute_pending_action(&conn, &pending_id)
16552            .expect_err("execute MUST refuse laundered agent_id");
16553        let msg = format!("{err}");
16554        assert!(
16555            msg.contains("approver-on-behalf laundering refused"),
16556            "expected laundering-refusal message, got: {msg}"
16557        );
16558        // No memory landed.
16559        let count: i64 = conn
16560            .query_row(
16561                "SELECT COUNT(*) FROM memories WHERE namespace = 'ns/launder'",
16562                [],
16563                |r| r.get(0),
16564            )
16565            .unwrap();
16566        assert_eq!(count, 0, "refused execute must not insert a memory");
16567        // Audit row captured.
16568        assert_eq!(
16569            count_signed_events(&conn, "pending_action.refused_agent_id_mismatch"),
16570            1,
16571            "refusal must append a signed_events row"
16572        );
16573        // No approve audit emitted on refused path.
16574        assert_eq!(count_signed_events(&conn, "pending_action.approved"), 0);
16575    }
16576
16577    /// S5-M1 — a successful approve+execute MUST append a
16578    /// `pending_action.approved` row to `signed_events`. Pre-fix the
16579    /// audit chain had no record of the approval transition.
16580    #[test]
16581    fn test_approve_emits_signed_event() {
16582        let conn = test_db();
16583        let mem = make_memory("approved store", "ns/approve", Tier::Mid, 5);
16584        let payload = serde_json::to_value(&mem).unwrap();
16585        let pending_id = queue_pending_action(
16586            &conn,
16587            crate::models::GovernedAction::Store,
16588            "ns/approve",
16589            None,
16590            mem.metadata["agent_id"].as_str().unwrap_or("alice"),
16591            &payload,
16592        )
16593        .unwrap();
16594        // Requester field is the same as the payload metadata.agent_id
16595        // (default fixture leaves it as `{}`), so to keep the verifier
16596        // happy we re-fetch and assert the queue happened. Then approve.
16597        assert!(decide_pending_action(&conn, &pending_id, true, "approver").unwrap());
16598        let _ = execute_pending_action(&conn, &pending_id).expect("execute ok");
16599        assert_eq!(
16600            count_signed_events(&conn, "pending_action.approved"),
16601            1,
16602            "approve+execute must append one audit row"
16603        );
16604        // Deny / timeout MUST NOT have been emitted.
16605        assert_eq!(count_signed_events(&conn, "pending_action.denied"), 0);
16606        assert_eq!(count_signed_events(&conn, "pending_action.timed_out"), 0);
16607    }
16608
16609    /// S5-M2 — a deny transition (decide_pending_action with approve=false)
16610    /// MUST append a `pending_action.denied` row to `signed_events`.
16611    /// Pre-fix the deny path was silent in the audit chain.
16612    #[test]
16613    fn test_deny_emits_signed_event() {
16614        let conn = test_db();
16615        let payload = serde_json::json!({"title": "to-deny", "content": "x"});
16616        let pending_id = queue_pending_action(
16617            &conn,
16618            crate::models::GovernedAction::Store,
16619            "ns/deny",
16620            None,
16621            "alice",
16622            &payload,
16623        )
16624        .unwrap();
16625        let transitioned = decide_pending_action(&conn, &pending_id, false, "approver").unwrap();
16626        assert!(transitioned, "deny transition must succeed on pending row");
16627        assert_eq!(
16628            count_signed_events(&conn, "pending_action.denied"),
16629            1,
16630            "deny must append one audit row"
16631        );
16632        // Approve / timeout MUST NOT have been emitted.
16633        assert_eq!(count_signed_events(&conn, "pending_action.approved"), 0);
16634        assert_eq!(count_signed_events(&conn, "pending_action.timed_out"), 0);
16635    }
16636
16637    /// S5-M2 — the timeout sweeper MUST append one
16638    /// `pending_action.timed_out` row per expired pending row.
16639    /// Pre-fix the sweep transitioned rows silently, leaving the audit
16640    /// chain blind to the auto-expiration.
16641    #[test]
16642    fn test_timeout_sweeper_emits_signed_event() {
16643        let conn = test_db();
16644        // Two stale pending rows + one fresh row. Only the stale rows
16645        // expire under a 1-hour global default; the fresh row stays.
16646        insert_stale_pending(&conn, "stale-a", "ns/x", 7_200, None);
16647        insert_stale_pending(&conn, "stale-b", "ns/y", 7_200, None);
16648        insert_stale_pending(&conn, "fresh-c", "ns/z", 30, None);
16649
16650        let expired = sweep_pending_action_timeouts(&conn, crate::SECS_PER_HOUR).unwrap();
16651        assert_eq!(expired.len(), 2, "two stale rows must expire");
16652        assert_eq!(
16653            count_signed_events(&conn, "pending_action.timed_out"),
16654            2,
16655            "one audit row per expired pending row"
16656        );
16657        // The fresh row is still pending; no audit emit for it.
16658        let fresh_status: String = conn
16659            .query_row(
16660                "SELECT status FROM pending_actions WHERE id = 'fresh-c'",
16661                [],
16662                |r| r.get(0),
16663            )
16664            .unwrap();
16665        assert_eq!(fresh_status, "pending");
16666    }
16667
16668    // -----------------------------------------------------------------
16669    // v0.7.0 S4-INFO2 — `memory_link.created` audit emit
16670    // -----------------------------------------------------------------
16671
16672    /// Count the number of `signed_events` rows for a given event_type
16673    /// and substring match on the row's `payload_hash`-bearing row.
16674    /// Used by the audit emit tests below.
16675    fn count_signed_events_of_type(conn: &Connection, event_type: &str) -> i64 {
16676        conn.query_row(
16677            "SELECT COUNT(*) FROM signed_events WHERE event_type = ?1",
16678            params![event_type],
16679            |r| r.get(0),
16680        )
16681        .unwrap()
16682    }
16683
16684    #[test]
16685    fn test_memory_link_created_emits_signed_event_unsigned_path() {
16686        // S4-INFO2 — every successful link create appends one
16687        // `memory_link.created` row, even on the unsigned path. The
16688        // emit's `attest_level` and `signature` columns must mirror
16689        // the source row.
16690        let conn = test_db();
16691        let src = make_memory("s4info2-src-u", "test", Tier::Long, 5);
16692        let tgt = make_memory("s4info2-tgt-u", "test", Tier::Long, 5);
16693        insert(&conn, &src).unwrap();
16694        insert(&conn, &tgt).unwrap();
16695
16696        let before = count_signed_events_of_type(&conn, "memory_link.created");
16697        create_link_signed(&conn, &src.id, &tgt.id, "related_to", None).unwrap();
16698        let after = count_signed_events_of_type(&conn, "memory_link.created");
16699        assert_eq!(after, before + 1, "unsigned create must emit one audit row");
16700
16701        // Inspect the emitted row's signing-surface columns.
16702        let (attest, sig): (String, Option<Vec<u8>>) = conn
16703            .query_row(
16704                "SELECT attest_level, signature FROM signed_events \
16705                 WHERE event_type = 'memory_link.created' \
16706                 ORDER BY timestamp DESC LIMIT 1",
16707                [],
16708                |r| Ok((r.get(0)?, r.get(1)?)),
16709            )
16710            .unwrap();
16711        assert_eq!(attest, "unsigned");
16712        assert!(sig.is_none(), "unsigned create must emit NULL signature");
16713    }
16714
16715    #[test]
16716    fn test_memory_link_created_emits_signed_event_signed_path() {
16717        // S4-INFO2 — signed path: the emitted row's payload_hash
16718        // must match SHA-256 over the canonical CBOR that the H2
16719        // signer just committed to, AND the `signature` must equal
16720        // the link row's signature byte-for-byte (auditor cross-check).
16721        use crate::identity::{keypair, sign as link_sign};
16722
16723        let conn = test_db();
16724        let src = make_memory("s4info2-src-s", "test", Tier::Long, 5);
16725        let tgt = make_memory("s4info2-tgt-s", "test", Tier::Long, 5);
16726        insert(&conn, &src).unwrap();
16727        insert(&conn, &tgt).unwrap();
16728
16729        let kp = keypair::generate("alice").unwrap();
16730        create_link_signed(&conn, &src.id, &tgt.id, "supersedes", Some(&kp)).unwrap();
16731
16732        // Read back the link row's signature + valid_from so we can
16733        // re-derive the canonical CBOR the audit row should commit to.
16734        let (link_sig, valid_from): (Vec<u8>, String) = conn
16735            .query_row(
16736                "SELECT signature, valid_from FROM memory_links \
16737                 WHERE source_id = ?1 AND target_id = ?2",
16738                params![&src.id, &tgt.id],
16739                |r| Ok((r.get::<_, Vec<u8>>(0)?, r.get::<_, String>(1)?)),
16740            )
16741            .unwrap();
16742        let signable = link_sign::SignableLink {
16743            src_id: &src.id,
16744            dst_id: &tgt.id,
16745            relation: "supersedes",
16746            observed_by: Some(kp.agent_id.as_str()),
16747            valid_from: Some(valid_from.as_str()),
16748            valid_until: None,
16749        };
16750        let expected_hash = crate::signed_events::payload_hash(
16751            &link_sign::canonical_cbor(&signable).expect("cbor"),
16752        );
16753
16754        let (agent, attest, sig, payload): (String, String, Option<Vec<u8>>, Vec<u8>) = conn
16755            .query_row(
16756                "SELECT agent_id, attest_level, signature, payload_hash \
16757                 FROM signed_events \
16758                 WHERE event_type = 'memory_link.created' \
16759                 ORDER BY timestamp DESC LIMIT 1",
16760                [],
16761                |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?)),
16762            )
16763            .unwrap();
16764        assert_eq!(agent, "alice");
16765        assert_eq!(attest, "self_signed");
16766        assert_eq!(
16767            sig.as_deref(),
16768            Some(link_sig.as_slice()),
16769            "audit row signature must mirror memory_links.signature byte-for-byte"
16770        );
16771        assert_eq!(
16772            payload, expected_hash,
16773            "audit row payload_hash must SHA-256 the canonical CBOR H2 signed over"
16774        );
16775    }
16776
16777    #[test]
16778    fn test_memory_link_created_emit_is_idempotent_on_replay() {
16779        // INSERT OR IGNORE collapses duplicate (src,dst,relation)
16780        // writes to a no-op at the link layer. The audit emit must
16781        // NOT fire on the replay — otherwise an idempotent retry by
16782        // a federation peer would inflate the audit row count for
16783        // the same logical event.
16784        let conn = test_db();
16785        let src = make_memory("s4info2-src-d", "test", Tier::Long, 5);
16786        let tgt = make_memory("s4info2-tgt-d", "test", Tier::Long, 5);
16787        insert(&conn, &src).unwrap();
16788        insert(&conn, &tgt).unwrap();
16789
16790        create_link_signed(&conn, &src.id, &tgt.id, "related_to", None).unwrap();
16791        let after_first = count_signed_events_of_type(&conn, "memory_link.created");
16792        create_link_signed(&conn, &src.id, &tgt.id, "related_to", None).unwrap();
16793        let after_second = count_signed_events_of_type(&conn, "memory_link.created");
16794        assert_eq!(
16795            after_second, after_first,
16796            "duplicate (src,dst,relation) replay must not emit a second audit row"
16797        );
16798    }
16799
16800    #[test]
16801    fn test_create_link_inbound_emits_signed_event() {
16802        // The federation-replicated path must emit too — the audit
16803        // ledger reflects every link visible locally.
16804        let conn = test_db();
16805        let src = make_memory("s4info2-in-src", "test", Tier::Long, 5);
16806        let tgt = make_memory("s4info2-in-tgt", "test", Tier::Long, 5);
16807        insert(&conn, &src).unwrap();
16808        insert(&conn, &tgt).unwrap();
16809
16810        let now = chrono::Utc::now().to_rfc3339();
16811        let link = MemoryLink {
16812            source_id: src.id.clone(),
16813            target_id: tgt.id.clone(),
16814            relation: crate::models::MemoryLinkRelation::RelatedTo,
16815            created_at: now.clone(),
16816            signature: None,
16817            observed_by: Some("peer-bob".to_string()),
16818            valid_from: Some(now.clone()),
16819            valid_until: None,
16820            attest_level: None,
16821        };
16822        let before = count_signed_events_of_type(&conn, "memory_link.created");
16823        create_link_inbound(&conn, &link, "unsigned").unwrap();
16824        let after = count_signed_events_of_type(&conn, "memory_link.created");
16825        assert_eq!(after, before + 1);
16826
16827        let agent: String = conn
16828            .query_row(
16829                "SELECT agent_id FROM signed_events \
16830                 WHERE event_type = 'memory_link.created' \
16831                 ORDER BY timestamp DESC LIMIT 1",
16832                [],
16833                |r| r.get(0),
16834            )
16835            .unwrap();
16836        assert_eq!(
16837            agent, "peer-bob",
16838            "inbound emit must record the peer's claimed observed_by"
16839        );
16840    }
16841
16842    #[test]
16843    fn test_create_link_signed_emit_failure_does_not_roll_back() {
16844        // Drop the signed_events table to simulate a substrate
16845        // problem (schema drift, disk error mapped to a SQL
16846        // failure). The link create must still commit and the
16847        // function must return Ok — the audit emit is best-effort.
16848        let conn = test_db();
16849        let src = make_memory("s4info2-fail-src", "test", Tier::Long, 5);
16850        let tgt = make_memory("s4info2-fail-tgt", "test", Tier::Long, 5);
16851        insert(&conn, &src).unwrap();
16852        insert(&conn, &tgt).unwrap();
16853
16854        // Knock out the audit substrate.
16855        conn.execute("DROP TABLE signed_events", []).unwrap();
16856
16857        let result = create_link_signed(&conn, &src.id, &tgt.id, "related_to", None);
16858        assert!(
16859            result.is_ok(),
16860            "audit emit failure must not crater the link create: {result:?}"
16861        );
16862
16863        // The link itself must have persisted.
16864        let count: i64 = conn
16865            .query_row(
16866                "SELECT COUNT(*) FROM memory_links \
16867                 WHERE source_id = ?1 AND target_id = ?2",
16868                params![&src.id, &tgt.id],
16869                |r| r.get(0),
16870            )
16871            .unwrap();
16872        assert_eq!(
16873            count, 1,
16874            "link row must have committed despite audit failure"
16875        );
16876    }
16877
16878    // ─────────────────────────────────────────────────────────────────────────
16879    // L1-1 (v0.7.0) — MemoryKind typed enum + migration v31 tests
16880    //
16881    // Migration v31 (memory_kind) was originally authored as v30 on
16882    // l1/typed-memorykind; renumbered during the L1 wave merge after
16883    // substrate-rules (issue #691) took v30. The backfill SQL is unchanged.
16884    // ─────────────────────────────────────────────────────────────────────────
16885
16886    /// Migration v31 backfill: a row with `memory_kind='observation'` and
16887    /// `metadata.type='reflection'` should be updated to
16888    /// `memory_kind='reflection'` by the backfill SQL in the migration.
16889    #[test]
16890    fn l1_1_migration_backfill_sets_reflection_kind() {
16891        let conn = test_db();
16892        let now = chrono::Utc::now().to_rfc3339();
16893        let id = uuid::Uuid::new_v4().to_string();
16894        // Insert a row that looks like a pre-v31 reflection: memory_kind
16895        // defaults to 'observation' (the old schema had no such column)
16896        // but metadata.type = 'reflection' signals it was produced by
16897        // memory_reflect.
16898        conn.execute(
16899            "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, \
16900             confidence, source, access_count, created_at, updated_at, metadata, \
16901             reflection_depth, memory_kind) \
16902             VALUES (?1,'mid','ns','backfill-test','content','[]',5,1.0,'test',0,?2,?2,?3,0,'observation')",
16903            rusqlite::params![id, now, r#"{"type":"reflection"}"#],
16904        )
16905        .unwrap();
16906
16907        // Confirm the row starts with memory_kind='observation'.
16908        let before: String = conn
16909            .query_row(
16910                "SELECT memory_kind FROM memories WHERE id = ?1",
16911                [&id],
16912                |r| r.get(0),
16913            )
16914            .unwrap();
16915        assert_eq!(before, "observation");
16916
16917        // Run the backfill SQL (same logic as migration v31).
16918        conn.execute(
16919            "UPDATE memories SET memory_kind = 'reflection' \
16920             WHERE memory_kind = 'observation' \
16921               AND json_valid(metadata) \
16922               AND json_extract(metadata, '$.type') = 'reflection'",
16923            [],
16924        )
16925        .unwrap();
16926
16927        let after: String = conn
16928            .query_row(
16929                "SELECT memory_kind FROM memories WHERE id = ?1",
16930                [&id],
16931                |r| r.get(0),
16932            )
16933            .unwrap();
16934        assert_eq!(
16935            after, "reflection",
16936            "backfill must upgrade metadata.type=reflection rows to memory_kind=reflection"
16937        );
16938    }
16939
16940    /// Backfill must NOT touch rows where `metadata.type` is absent or is
16941    /// something other than `'reflection'`.
16942    #[test]
16943    fn l1_1_migration_backfill_leaves_non_reflection_rows_alone() {
16944        let conn = test_db();
16945        let now = chrono::Utc::now().to_rfc3339();
16946        let id = uuid::Uuid::new_v4().to_string();
16947        conn.execute(
16948            "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, \
16949             confidence, source, access_count, created_at, updated_at, metadata, \
16950             reflection_depth, memory_kind) \
16951             VALUES (?1,'mid','ns','obs-test','content','[]',5,1.0,'test',0,?2,?2,'{}',0,'observation')",
16952            rusqlite::params![id, now],
16953        )
16954        .unwrap();
16955
16956        conn.execute(
16957            "UPDATE memories SET memory_kind = 'reflection' \
16958             WHERE memory_kind = 'observation' \
16959               AND json_valid(metadata) \
16960               AND json_extract(metadata, '$.type') = 'reflection'",
16961            [],
16962        )
16963        .unwrap();
16964
16965        let after: String = conn
16966            .query_row(
16967                "SELECT memory_kind FROM memories WHERE id = ?1",
16968                [&id],
16969                |r| r.get(0),
16970            )
16971            .unwrap();
16972        assert_eq!(
16973            after, "observation",
16974            "backfill must not change rows without metadata.type=reflection"
16975        );
16976    }
16977
16978    /// `memories_by_kind(Observation)` returns only observation memories;
16979    /// `memories_by_kind(Reflection)` returns only reflection memories.
16980    #[test]
16981    fn l1_1_memories_by_kind_returns_correct_subset() {
16982        let conn = test_db();
16983
16984        // Insert one observation and one reflection memory.
16985        let obs = Memory {
16986            id: uuid::Uuid::new_v4().to_string(),
16987            tier: Tier::Long,
16988            namespace: "kind-ns".to_string(),
16989            title: "obs-memory".to_string(),
16990            content: "observation content".to_string(),
16991            tags: vec![],
16992            priority: 5,
16993            confidence: 1.0,
16994            source: "test".to_string(),
16995            access_count: 0,
16996            created_at: chrono::Utc::now().to_rfc3339(),
16997            updated_at: chrono::Utc::now().to_rfc3339(),
16998            last_accessed_at: None,
16999            expires_at: None,
17000            metadata: serde_json::json!({}),
17001            reflection_depth: 0,
17002            memory_kind: crate::models::MemoryKind::Observation,
17003            entity_id: None,
17004            persona_version: None,
17005            citations: Vec::new(),
17006            source_uri: None,
17007            source_span: None,
17008            confidence_source: ConfidenceSource::CallerProvided,
17009            confidence_signals: None,
17010            confidence_decayed_at: None,
17011            version: 1,
17012        };
17013        let ref_mem = Memory {
17014            id: uuid::Uuid::new_v4().to_string(),
17015            tier: Tier::Long,
17016            namespace: "kind-ns".to_string(),
17017            title: "ref-memory".to_string(),
17018            content: "reflection content".to_string(),
17019            tags: vec![],
17020            priority: 5,
17021            confidence: 1.0,
17022            source: "test".to_string(),
17023            access_count: 0,
17024            created_at: chrono::Utc::now().to_rfc3339(),
17025            updated_at: chrono::Utc::now().to_rfc3339(),
17026            last_accessed_at: None,
17027            expires_at: None,
17028            metadata: serde_json::json!({}),
17029            reflection_depth: 1,
17030            memory_kind: crate::models::MemoryKind::Reflection,
17031            entity_id: None,
17032            persona_version: None,
17033            citations: Vec::new(),
17034            source_uri: None,
17035            source_span: None,
17036            confidence_source: ConfidenceSource::CallerProvided,
17037            confidence_signals: None,
17038            confidence_decayed_at: None,
17039            version: 1,
17040        };
17041
17042        insert(&conn, &obs).unwrap();
17043        insert(&conn, &ref_mem).unwrap();
17044
17045        let obs_rows = memories_by_kind(&conn, &crate::models::MemoryKind::Observation).unwrap();
17046        let ref_rows = memories_by_kind(&conn, &crate::models::MemoryKind::Reflection).unwrap();
17047
17048        assert!(
17049            obs_rows
17050                .iter()
17051                .all(|m| m.memory_kind == crate::models::MemoryKind::Observation),
17052            "memories_by_kind(Observation) must return only Observation memories"
17053        );
17054        assert!(
17055            ref_rows
17056                .iter()
17057                .all(|m| m.memory_kind == crate::models::MemoryKind::Reflection),
17058            "memories_by_kind(Reflection) must return only Reflection memories"
17059        );
17060        // The inserted observation must appear in obs_rows.
17061        assert!(
17062            obs_rows.iter().any(|m| m.title == "obs-memory"),
17063            "obs-memory must be in Observation results"
17064        );
17065        // The inserted reflection must appear in ref_rows.
17066        assert!(
17067            ref_rows.iter().any(|m| m.title == "ref-memory"),
17068            "ref-memory must be in Reflection results"
17069        );
17070        // Cross-check: obs memory must NOT be in reflection results.
17071        assert!(
17072            !ref_rows.iter().any(|m| m.title == "obs-memory"),
17073            "obs-memory must not appear in Reflection results"
17074        );
17075    }
17076
17077    /// Inserting a memory with `memory_kind=Reflection` and then reading it
17078    /// back via `get()` must preserve the `Reflection` variant.
17079    #[test]
17080    fn l1_1_memory_kind_roundtrips_through_insert_get() {
17081        let conn = test_db();
17082        let mem = Memory {
17083            id: uuid::Uuid::new_v4().to_string(),
17084            tier: Tier::Long,
17085            namespace: "roundtrip-ns".to_string(),
17086            title: "kind-roundtrip".to_string(),
17087            content: "roundtrip content".to_string(),
17088            tags: vec![],
17089            priority: 5,
17090            confidence: 1.0,
17091            source: "test".to_string(),
17092            access_count: 0,
17093            created_at: chrono::Utc::now().to_rfc3339(),
17094            updated_at: chrono::Utc::now().to_rfc3339(),
17095            last_accessed_at: None,
17096            expires_at: None,
17097            metadata: serde_json::json!({}),
17098            reflection_depth: 1,
17099            memory_kind: crate::models::MemoryKind::Reflection,
17100            entity_id: None,
17101            persona_version: None,
17102            citations: Vec::new(),
17103            source_uri: None,
17104            source_span: None,
17105            confidence_source: ConfidenceSource::CallerProvided,
17106            confidence_signals: None,
17107            confidence_decayed_at: None,
17108            version: 1,
17109        };
17110        let id = insert(&conn, &mem).unwrap();
17111        let got = get(&conn, &id)
17112            .unwrap()
17113            .expect("inserted memory must be found");
17114        assert_eq!(
17115            got.memory_kind,
17116            crate::models::MemoryKind::Reflection,
17117            "memory_kind=Reflection must roundtrip through insert→get"
17118        );
17119    }
17120
17121    /// The upsert sticky-field logic: if a row already has
17122    /// `memory_kind='reflection'`, a subsequent upsert with
17123    /// `memory_kind='observation'` must NOT overwrite it.
17124    #[test]
17125    fn l1_1_upsert_preserves_reflection_kind() {
17126        let conn = test_db();
17127        let now = chrono::Utc::now().to_rfc3339();
17128        let id = uuid::Uuid::new_v4().to_string();
17129
17130        // First insert: Reflection.
17131        let mem_reflection = Memory {
17132            id: id.clone(),
17133            tier: Tier::Long,
17134            namespace: "sticky-ns".to_string(),
17135            title: "sticky-title".to_string(),
17136            content: "original content".to_string(),
17137            tags: vec![],
17138            priority: 5,
17139            confidence: 1.0,
17140            source: "test".to_string(),
17141            access_count: 0,
17142            created_at: now.clone(),
17143            updated_at: now.clone(),
17144            last_accessed_at: None,
17145            expires_at: None,
17146            metadata: serde_json::json!({}),
17147            reflection_depth: 1,
17148            memory_kind: crate::models::MemoryKind::Reflection,
17149            entity_id: None,
17150            persona_version: None,
17151            citations: Vec::new(),
17152            source_uri: None,
17153            source_span: None,
17154            confidence_source: ConfidenceSource::CallerProvided,
17155            confidence_signals: None,
17156            confidence_decayed_at: None,
17157            version: 1,
17158        };
17159        insert(&conn, &mem_reflection).unwrap();
17160
17161        // Second upsert: Observation (same title+namespace → triggers ON CONFLICT).
17162        let mem_obs = Memory {
17163            id: uuid::Uuid::new_v4().to_string(), // different id, same title+ns
17164            tier: Tier::Long,
17165            namespace: "sticky-ns".to_string(),
17166            title: "sticky-title".to_string(),
17167            content: "updated content".to_string(),
17168            tags: vec![],
17169            priority: 6,
17170            confidence: 1.0,
17171            source: "test".to_string(),
17172            access_count: 0,
17173            created_at: now.clone(),
17174            updated_at: now,
17175            last_accessed_at: None,
17176            expires_at: None,
17177            metadata: serde_json::json!({}),
17178            reflection_depth: 0,
17179            memory_kind: crate::models::MemoryKind::Observation,
17180            entity_id: None,
17181            persona_version: None,
17182            citations: Vec::new(),
17183            source_uri: None,
17184            source_span: None,
17185            confidence_source: ConfidenceSource::CallerProvided,
17186            confidence_signals: None,
17187            confidence_decayed_at: None,
17188            version: 1,
17189        };
17190        insert(&conn, &mem_obs).unwrap();
17191
17192        // The row must still be Reflection (sticky field wins).
17193        let got = get(&conn, &id)
17194            .unwrap()
17195            .expect("original memory must still exist");
17196        assert_eq!(
17197            got.memory_kind,
17198            crate::models::MemoryKind::Reflection,
17199            "upsert with Observation must not overwrite an existing Reflection kind"
17200        );
17201    }
17202
17203    // -----------------------------------------------------------------
17204    // v0.7.0 issue #810 / #812 / #813 — CHECK trigger + strongest_attest
17205    // -----------------------------------------------------------------
17206
17207    #[test]
17208    fn strongest_attest_returns_unsigned_for_isolate_source() {
17209        // A source with no outbound links — the only honest default
17210        // is `unsigned`.
17211        let conn = test_db();
17212        let lonely = make_memory("lonely", "test", Tier::Long, 5);
17213        insert(&conn, &lonely).unwrap();
17214        let got = strongest_attest_level_for_source(&conn, &lonely.id).unwrap();
17215        assert_eq!(got, "unsigned");
17216    }
17217
17218    #[test]
17219    fn strongest_attest_picks_self_signed_over_unsigned() {
17220        use crate::identity::keypair;
17221        // Serialise against the a3 tests that flip the *global* permissions
17222        // mode to Enforce + install a deny-all link rule; without this gate
17223        // their Enforce window can race this create_link_signed call and
17224        // surface a spurious "link denied by permission rule". See the
17225        // governance-mode test-isolation tracking issue. #626 Layer-3 QC.
17226        let _gate = crate::config::lock_permissions_mode_for_test();
17227        let conn = test_db();
17228        let src = make_memory("attest-src", "test", Tier::Long, 5);
17229        let a = make_memory("attest-a", "test", Tier::Long, 5);
17230        let b = make_memory("attest-b", "test", Tier::Long, 5);
17231        insert(&conn, &src).unwrap();
17232        insert(&conn, &a).unwrap();
17233        insert(&conn, &b).unwrap();
17234        // One unsigned + one signed outbound link.
17235        create_link_signed(&conn, &src.id, &a.id, "related_to", None).unwrap();
17236        let kp = keypair::generate("alice").unwrap();
17237        create_link_signed(&conn, &src.id, &b.id, "supersedes", Some(&kp)).unwrap();
17238        let got = strongest_attest_level_for_source(&conn, &src.id).unwrap();
17239        assert_eq!(got, "self_signed", "self_signed beats unsigned");
17240    }
17241
17242    #[test]
17243    fn strongest_attest_picks_peer_attested_over_self_signed() {
17244        // Construct a peer-attested row by hand-rolling the
17245        // create_link_inbound path so we don't depend on a remote
17246        // signature. The CHECK trigger requires a 64-byte sig blob
17247        // for `peer_attested` — fabricate one.
17248        let conn = test_db();
17249        let src = make_memory("attest-pa-src", "test", Tier::Long, 5);
17250        let a = make_memory("attest-pa-a", "test", Tier::Long, 5);
17251        let b = make_memory("attest-pa-b", "test", Tier::Long, 5);
17252        insert(&conn, &src).unwrap();
17253        insert(&conn, &a).unwrap();
17254        insert(&conn, &b).unwrap();
17255        // Self-signed link.
17256        let kp = crate::identity::keypair::generate("alice").unwrap();
17257        create_link_signed(&conn, &src.id, &a.id, "related_to", Some(&kp)).unwrap();
17258        // Hand-inject a peer_attested row with a 64-byte signature so
17259        // the CHECK trigger admits it.
17260        let now = chrono::Utc::now().to_rfc3339();
17261        let sig = vec![0xAB_u8; 64];
17262        conn.execute(
17263            "INSERT INTO memory_links \
17264                (source_id, target_id, relation, created_at, valid_from, signature, attest_level, observed_by) \
17265             VALUES (?1, ?2, 'related_to', ?3, ?3, ?4, 'peer_attested', 'peer-bob')",
17266            params![&src.id, &b.id, &now, &sig],
17267        )
17268        .unwrap();
17269        let got = strongest_attest_level_for_source(&conn, &src.id).unwrap();
17270        assert_eq!(got, "peer_attested", "peer_attested beats self_signed");
17271    }
17272
17273    #[test]
17274    fn ck_trigger_refuses_self_signed_insert_without_signature() {
17275        // BUG-A regression test — a direct INSERT that claims
17276        // `self_signed` with NULL signature must fail at the SQLite
17277        // trigger layer. Closes the phantom-attest-level defect at
17278        // the substrate boundary even when a future caller (or
17279        // operator UPDATE) bypasses `create_link_signed`'s match arm.
17280        let conn = test_db();
17281        let s = make_memory("ck-src", "test", Tier::Long, 5);
17282        let t = make_memory("ck-tgt", "test", Tier::Long, 5);
17283        insert(&conn, &s).unwrap();
17284        insert(&conn, &t).unwrap();
17285        let now = chrono::Utc::now().to_rfc3339();
17286        let res = conn.execute(
17287            "INSERT INTO memory_links \
17288                (source_id, target_id, relation, created_at, valid_from, signature, attest_level) \
17289             VALUES (?1, ?2, 'related_to', ?3, ?3, NULL, 'self_signed')",
17290            params![&s.id, &t.id, &now],
17291        );
17292        let err = res.expect_err("CHECK trigger must reject self_signed + NULL signature");
17293        let msg = format!("{err}");
17294        assert!(
17295            msg.contains("CHECK constraint failed")
17296                || msg.contains("attest_level")
17297                || msg.contains("64-byte signature"),
17298            "trigger error must name the failure mode, got: {msg}"
17299        );
17300    }
17301
17302    #[test]
17303    fn ck_trigger_refuses_self_signed_insert_with_wrong_length_signature() {
17304        // Same defense for a non-NULL but wrong-length signature
17305        // (e.g. truncated by a partial wire-read or a malformed
17306        // operator INSERT).
17307        let conn = test_db();
17308        let s = make_memory("ck-src-wlen", "test", Tier::Long, 5);
17309        let t = make_memory("ck-tgt-wlen", "test", Tier::Long, 5);
17310        insert(&conn, &s).unwrap();
17311        insert(&conn, &t).unwrap();
17312        let now = chrono::Utc::now().to_rfc3339();
17313        let res = conn.execute(
17314            "INSERT INTO memory_links \
17315                (source_id, target_id, relation, created_at, valid_from, signature, attest_level) \
17316             VALUES (?1, ?2, 'related_to', ?3, ?3, ?4, 'self_signed')",
17317            params![&s.id, &t.id, &now, &[0u8; 8][..]],
17318        );
17319        assert!(
17320            res.is_err(),
17321            "CHECK trigger must reject wrong-length signature"
17322        );
17323    }
17324
17325    #[test]
17326    fn ck_trigger_refuses_update_to_self_signed_without_signature() {
17327        // The CHECK trigger fires on UPDATE as well as INSERT — a
17328        // post-hoc UPDATE that flips an unsigned row to self_signed
17329        // without supplying signature bytes must be refused.
17330        let conn = test_db();
17331        let s = make_memory("ck-upd-src", "test", Tier::Long, 5);
17332        let t = make_memory("ck-upd-tgt", "test", Tier::Long, 5);
17333        insert(&conn, &s).unwrap();
17334        insert(&conn, &t).unwrap();
17335        create_link_signed(&conn, &s.id, &t.id, "related_to", None).unwrap();
17336        let res = conn.execute(
17337            "UPDATE memory_links SET attest_level = 'self_signed' \
17338             WHERE source_id = ?1 AND target_id = ?2",
17339            params![&s.id, &t.id],
17340        );
17341        assert!(
17342            res.is_err(),
17343            "CHECK trigger must reject UPDATE to self_signed with NULL signature"
17344        );
17345    }
17346
17347    #[test]
17348    fn ck_trigger_admits_unsigned_with_null_signature() {
17349        // The trigger's `WHEN` clause is scoped to self_signed /
17350        // peer_attested — the unsigned path with NULL signature
17351        // (the v0.6.4 default) must still admit. Negative-control
17352        // test pinning the trigger's narrow scope.
17353        let conn = test_db();
17354        let s = make_memory("ck-unsigned-src", "test", Tier::Long, 5);
17355        let t = make_memory("ck-unsigned-tgt", "test", Tier::Long, 5);
17356        insert(&conn, &s).unwrap();
17357        insert(&conn, &t).unwrap();
17358        // create_link_signed's unsigned branch sets (NULL, "unsigned");
17359        // confirm it still works under the new trigger.
17360        create_link_signed(&conn, &s.id, &t.id, "related_to", None)
17361            .expect("unsigned create must still succeed under the new CHECK trigger");
17362    }
17363
17364    // -----------------------------------------------------------------
17365    // #626 Layer-3 (Task 1.3 / C3) — bind_agent_pubkey + agent_pubkey
17366    // -----------------------------------------------------------------
17367
17368    #[test]
17369    fn agent_pubkey_none_before_bind_and_some_after() {
17370        let conn = test_db();
17371        register_agent(&conn, "ai:curator", "ai:generic", &[]).expect("register");
17372        // Registered but unbound → permissive None.
17373        assert_eq!(agent_pubkey(&conn, "ai:curator").unwrap(), None);
17374
17375        let kp = crate::identity::keypair::generate("ai:curator").expect("generate");
17376        let b64 = kp.public_base64();
17377        bind_agent_pubkey(&conn, "ai:curator", &b64).expect("bind");
17378        assert_eq!(agent_pubkey(&conn, "ai:curator").unwrap(), Some(b64));
17379    }
17380
17381    #[test]
17382    fn agent_pubkey_none_for_unregistered_agent() {
17383        let conn = test_db();
17384        // Never registered → None (collapses to "no key to verify").
17385        assert_eq!(agent_pubkey(&conn, "ai:ghost").unwrap(), None);
17386    }
17387
17388    #[test]
17389    fn bind_agent_pubkey_rejects_unregistered_agent() {
17390        let conn = test_db();
17391        let err = bind_agent_pubkey(&conn, "ai:ghost", "AAAA").unwrap_err();
17392        assert!(
17393            err.to_string().contains("not registered"),
17394            "binding to an unregistered agent must be rejected; got: {err}",
17395        );
17396    }
17397
17398    #[test]
17399    fn bind_agent_pubkey_rotates_key_in_place() {
17400        let conn = test_db();
17401        register_agent(&conn, "ai:curator", "ai:generic", &[]).expect("register");
17402        let k1 = crate::identity::keypair::generate("ai:curator")
17403            .unwrap()
17404            .public_base64();
17405        let k2 = crate::identity::keypair::generate("ai:curator")
17406            .unwrap()
17407            .public_base64();
17408        assert_ne!(k1, k2, "two fresh keys differ");
17409        bind_agent_pubkey(&conn, "ai:curator", &k1).expect("bind k1");
17410        assert_eq!(agent_pubkey(&conn, "ai:curator").unwrap(), Some(k1));
17411        // Rotation overwrites in place.
17412        bind_agent_pubkey(&conn, "ai:curator", &k2).expect("rotate to k2");
17413        assert_eq!(agent_pubkey(&conn, "ai:curator").unwrap(), Some(k2));
17414    }
17415
17416    #[test]
17417    fn bind_agent_pubkey_preserves_registration_fields() {
17418        // Binding a key must not clobber agent_type / capabilities /
17419        // registered_at — list_agents must still see the full row.
17420        let conn = test_db();
17421        register_agent(
17422            &conn,
17423            "ai:curator",
17424            "ai:claude-opus",
17425            &["recall".to_string(), "write".to_string()],
17426        )
17427        .expect("register");
17428        let before = list_agents(&conn).expect("list before");
17429        let kp = crate::identity::keypair::generate("ai:curator").unwrap();
17430        bind_agent_pubkey(&conn, "ai:curator", &kp.public_base64()).expect("bind");
17431        let after = list_agents(&conn).expect("list after");
17432
17433        let a_before = before
17434            .iter()
17435            .find(|a| a.agent_id == "ai:curator")
17436            .expect("present before");
17437        let a_after = after
17438            .iter()
17439            .find(|a| a.agent_id == "ai:curator")
17440            .expect("present after");
17441        assert_eq!(a_after.agent_type, a_before.agent_type);
17442        assert_eq!(a_after.capabilities, a_before.capabilities);
17443        assert_eq!(a_after.registered_at, a_before.registered_at);
17444    }
17445
17446    // -----------------------------------------------------------------
17447    // #626 Layer-3 (Task 1.3 / C5) — revoke_agent_pubkey
17448    // -----------------------------------------------------------------
17449
17450    #[test]
17451    fn revoke_agent_pubkey_clears_bound_key() {
17452        let conn = test_db();
17453        register_agent(&conn, "ai:curator", "ai:generic", &[]).expect("register");
17454        let kp = crate::identity::keypair::generate("ai:curator").unwrap();
17455        bind_agent_pubkey(&conn, "ai:curator", &kp.public_base64()).expect("bind");
17456        assert!(agent_pubkey(&conn, "ai:curator").unwrap().is_some());
17457        revoke_agent_pubkey(&conn, "ai:curator").expect("revoke");
17458        assert_eq!(agent_pubkey(&conn, "ai:curator").unwrap(), None);
17459    }
17460
17461    #[test]
17462    fn revoke_agent_pubkey_is_idempotent_without_bound_key() {
17463        let conn = test_db();
17464        register_agent(&conn, "ai:curator", "ai:generic", &[]).expect("register");
17465        // No key ever bound — revoke still succeeds and stays None.
17466        revoke_agent_pubkey(&conn, "ai:curator").expect("revoke unbound");
17467        assert_eq!(agent_pubkey(&conn, "ai:curator").unwrap(), None);
17468    }
17469
17470    #[test]
17471    fn revoke_agent_pubkey_rejects_unregistered_agent() {
17472        let conn = test_db();
17473        let err = revoke_agent_pubkey(&conn, "ai:ghost").unwrap_err();
17474        assert!(
17475            err.to_string().contains("not registered"),
17476            "revoking an unregistered agent must be rejected; got: {err}",
17477        );
17478    }
17479
17480    #[test]
17481    fn revoke_agent_pubkey_preserves_registration_fields() {
17482        let conn = test_db();
17483        register_agent(
17484            &conn,
17485            "ai:curator",
17486            "ai:claude-opus",
17487            &["recall".to_string(), "write".to_string()],
17488        )
17489        .expect("register");
17490        let kp = crate::identity::keypair::generate("ai:curator").unwrap();
17491        bind_agent_pubkey(&conn, "ai:curator", &kp.public_base64()).expect("bind");
17492        revoke_agent_pubkey(&conn, "ai:curator").expect("revoke");
17493        let after = list_agents(&conn).expect("list after");
17494        let a = after
17495            .iter()
17496            .find(|a| a.agent_id == "ai:curator")
17497            .expect("present after revoke");
17498        assert_eq!(a.agent_type, "ai:claude-opus");
17499        assert_eq!(
17500            a.capabilities,
17501            vec!["recall".to_string(), "write".to_string()]
17502        );
17503    }
17504
17505    #[test]
17506    fn revoke_then_rebind_restores_attestable_key() {
17507        let conn = test_db();
17508        register_agent(&conn, "ai:curator", "ai:generic", &[]).expect("register");
17509        let k1 = crate::identity::keypair::generate("ai:curator")
17510            .unwrap()
17511            .public_base64();
17512        bind_agent_pubkey(&conn, "ai:curator", &k1).expect("bind k1");
17513        revoke_agent_pubkey(&conn, "ai:curator").expect("revoke");
17514        assert_eq!(agent_pubkey(&conn, "ai:curator").unwrap(), None);
17515        let k2 = crate::identity::keypair::generate("ai:curator")
17516            .unwrap()
17517            .public_base64();
17518        bind_agent_pubkey(&conn, "ai:curator", &k2).expect("rebind k2");
17519        assert_eq!(agent_pubkey(&conn, "ai:curator").unwrap(), Some(k2));
17520    }
17521}
ai_memory/storage/mod.rs

ai_memory/storage/
mod.rs