ai-memory 0.7.0

// Copyright 2026 AlphaOne LLC
// SPDX-License-Identifier: Apache-2.0

// #873 — `recall_hybrid_with_telemetry` exceeds the per-function 250-
// line budget; tracked for split as #871 (stage-helpers: param-prep /
// fts-branch / semantic-branch / blend+rerank / touch+telemetry). The
// allowance is module-scope so future too-big helpers in the same
// file are caught by the lint at PR-time instead of silently growing.
#![allow(clippy::too_many_lines)]

use crate::models::field_names;
use anyhow::{Context, Result};
use chrono::{DateTime, Utc};
use rusqlite::{Connection, params};
use std::collections::HashMap;
use std::path::Path;

// ── #1558 batch 6 — file-local SQL SSOT (pm-v3.1 hardcoded-literal gate) ──
const SQL_DELETE_MEMORY_BY_ID: &str = "DELETE FROM memories WHERE id = ?1";
const SQL_DELETE_NAMESPACE_META_BY_STANDARD_ID: &str =
    "DELETE FROM namespace_meta WHERE standard_id = ?1";
const SQL_MEMORY_EXISTS_COUNT: &str = "SELECT COUNT(*) > 0 FROM memories WHERE id = ?1";
const SQL_MEMORY_EXISTS: &str = "SELECT EXISTS(SELECT 1 FROM memories WHERE id = ?1)";
const SQL_SELECT_MEMORY_ROW_BY_ID: &str = "SELECT * FROM memories WHERE id = ?1";
// ── #1579 A2 — sargable `list` SQL fragments ──────────────────────────────
// The always-present expiry guard opens the WHERE clause; every other
// filter is appended by `build_list_query` ONLY when the caller supplied
// it, so the planner sees bare `col = ?` / `col >= ?` predicates it can
// drive through `idx_memories_list_order` / `idx_memories_ns_list_order`
// instead of the formerly non-sargable `(?N IS NULL OR col = ?N)` arms.
const SQL_LIST_BASE: &str = "SELECT * FROM memories WHERE (expires_at IS NULL OR expires_at > ?)";
const SQL_LIST_ORDER_LIMIT: &str = " ORDER BY priority DESC, updated_at DESC LIMIT ? OFFSET ?";

/// v0.7.0 H6 (round-2) — truncate a `DateTime<Utc>` to microsecond
/// precision. Companion of the same-named helper in
/// `store/postgres.rs:3539` (G3 fix); both ends of the link sign/verify
/// roundtrip now collapse sub-microsecond digits BEFORE CBOR
/// canonicalisation. PostgreSQL's `TIMESTAMPTZ` stores microseconds —
/// the SQLite path was lossless, but a link created on SQLite and
/// later re-verified on Postgres (or vice versa via federation) would
/// see the canonical RFC3339 string change shape on the storage hop
/// and break the Ed25519 signature. Truncating at write time makes the
/// shape stable across adapters. See `store/postgres.rs:3520-3543` for
/// the full design context.
#[must_use]
pub fn truncate_to_microseconds(t: DateTime<Utc>) -> DateTime<Utc> {
    use chrono::Timelike;
    let micros = t.nanosecond() / 1_000;
    t.with_nanosecond(micros * 1_000).unwrap_or(t)
}

use crate::models::{
    AGENTS_NAMESPACE, AgentRegistration, Approval, ApproverType, ConfidenceSource, DuplicateCheck,
    DuplicateMatch, GovernanceDecision, GovernanceLevel, GovernancePolicy, GovernedAction,
    MAX_NAMESPACE_DEPTH, Memory, MemoryKind, MemoryLink, NamespaceCount, PROMOTION_THRESHOLD,
    PendingAction, SourceSpan, Stats, Taxonomy, TaxonomyNode, Tier, TierCount, namespace_ancestors,
};

// #962 — typed substrate-layer error envelope. Substrate code emits
// `anyhow::Error::new(StorageError::…)` instead of the legacy
// `anyhow::bail!("…")`; handlers downcast via
// `MemoryError::from(anyhow::Error)` to map each variant to its
// canonical HTTP status. The error-prefix constants live alongside the
// typed enum so the Display impl and the prefix tokens stay in lockstep.
mod error;
pub use error::{LINK_CYCLE_ERR_PREFIX, LINK_PERMISSION_DENIED_ERR_PREFIX, LinkEnd, StorageError};

// ---------------------------------------------------------------------------
// v0.7.0 L1-6 Deliverable E — governance pre-write hook (issue #691)
// ---------------------------------------------------------------------------
//
// Substrate-internal: layering-preserving insertion point for the
// agent-action rules engine. The hook is a process-wide `OnceLock`
// holding an optional closure of the shape
//
//     Fn(&Memory) -> Result<(), String> + Send + Sync
//
// installed exactly once at daemon `serve` boot (BEFORE binding the
// listener) and consulted by every substrate write path
// (`storage::insert`, `storage::insert_with_conflict`,
// `storage::insert_if_newer`) immediately BEFORE the SQL `INSERT`.
//
// Why a `OnceLock` and not a thread-local or `RwLock<Option<_>>`:
//
//   1. Operator standing directive: "rules and standards can NEVER be
//      bypassed by AI/AI Agents — 100% of the time". A `OnceLock`
//      enforces installation-is-one-shot at the type level — no
//      reset, no override, no test-only escape hatch reachable from
//      production code paths.
//   2. The hook closure is read on every write; an `RwLock` would add
//      contention on the hot path. `OnceLock::get()` is lock-free.
//   3. CLI one-shot mode (`ai-memory store …`, `ai-memory mine …`,
//      etc.) MUST NOT install the hook — the operator's direct
//      substrate ops stay unimpeded by design. `OnceLock` defaults to
//      empty, so the CLI path is the no-op default; only the daemon's
//      `serve` boot reaches the `.set` callsite.
//
// Refusal contract: when the hook fires it returns `Err(reason)`.
// The caller wraps `reason` in a typed [`GovernanceRefusal`] (which
// implements [`std::error::Error`]) and propagates via `anyhow::Error`.
// The handler layer's `MemoryError::from(anyhow::Error)` impl
// downcasts and promotes it to [`crate::errors::MemoryError::RefusedByGovernance`]
// — see `src/errors.rs` for the 403 / `GOVERNANCE_REFUSED` mapping.

/// Optional governance pre-write hook. When `Some`, every substrate
/// `INSERT` path consults the closure BEFORE the SQL write; an
/// `Err(reason)` short-circuits the write with no row touched.
///
/// Installation is one-shot (`OnceLock::set`); the daemon `serve`
/// bootstrap is the only caller in production. CLI one-shot binaries
/// must leave this empty.
///
/// See module-level comment for the full layering rationale.
pub static GOVERNANCE_PRE_WRITE: std::sync::OnceLock<
    Box<dyn Fn(&Memory) -> std::result::Result<(), String> + Send + Sync>,
> = std::sync::OnceLock::new();

/// Typed substrate-layer marker error for the pre-write hook refusal
/// path. Wrapped in `anyhow::Error` so the existing
/// `anyhow::Result<String>` return shape of `storage::insert*` stays
/// unchanged — the handler layer downcasts via
/// `MemoryError::from(anyhow::Error)` (see `src/errors.rs`) to map
/// the refusal to HTTP `403 FORBIDDEN` + code `GOVERNANCE_REFUSED`.
///
/// Carries the operator-authored `reason` verbatim. The MCP layer
/// surfaces the same string (audit log + tool error data field).
#[derive(Debug, Clone)]
pub struct GovernanceRefusal {
    pub reason: String,
}

impl std::fmt::Display for GovernanceRefusal {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "governance-refused: {}", self.reason)
    }
}

impl std::error::Error for GovernanceRefusal {}

/// Internal helper consulted by every substrate write path BEFORE
/// the SQL write. When the [`GOVERNANCE_PRE_WRITE`] hook is unset
/// (CLI mode or pre-hook-install daemon path), this is a zero-cost
/// no-op `Ok(())`. When the hook is set, the closure runs and an
/// `Err(reason)` wraps into a [`GovernanceRefusal`] propagated up the
/// `anyhow` chain.
///
/// Visibility: `pub(crate)` so the `PostgresStore` SAL adapter
/// (`src/store/postgres.rs`) can consult the same hook on its write
/// paths — fixing ARCH-1 (substrate governance pre-write parity
/// between the SQLite and Postgres backends). The hook itself is
/// process-wide and installed once by the daemon `serve` bootstrap;
/// every substrate write path on EVERY backend MUST consult it before
/// touching SQL.
///
/// The function is hot-path; avoid heap allocation on the Allow leg.
#[inline]
pub(crate) fn consult_governance_pre_write(mem: &Memory) -> Result<()> {
    if let Some(hook) = GOVERNANCE_PRE_WRITE.get() {
        if let Err(reason) = hook(mem) {
            return Err(anyhow::Error::new(GovernanceRefusal { reason }));
        }
    }
    Ok(())
}

/// Computed 4-tuple of visibility prefixes for an agent position (Task 1.5).
/// Index 0 = agent's own namespace (private), 1 = parent (team),
/// 2 = grandparent (unit), 3 = great-grandparent (org). Missing = `None`.
type VisibilityPrefixes = (
    Option<String>,
    Option<String>,
    Option<String>,
    Option<String>,
);

fn compute_visibility_prefixes(as_agent: Option<&str>) -> VisibilityPrefixes {
    let Some(ns) = as_agent else {
        return (None, None, None, None);
    };
    let ancestors = namespace_ancestors(ns);
    let p = ancestors.first().cloned();
    let t = ancestors.get(1).cloned();
    let u = ancestors.get(2).cloned();
    let o = ancestors.get(3).cloned();
    (p, t, u, o)
}

/// Rust-side visibility check for paths that can't easily attach SQL
/// visibility (the HNSW branch of `recall_hybrid` iterates memories loaded
/// via `get()`). Returns `true` when `as_agent` is unset (no filter) or
/// when the memory's scope + namespace grant visibility to the caller.
fn is_visible(mem: &Memory, prefixes: &VisibilityPrefixes) -> bool {
    // v0.7.0 multi-agent literal-sweep (scanner B finding F-B8.x):
    // typed-enum exhaustive match via `MemoryScope` + `META_KEY_SCOPE`
    // SSOT. Adding a new scope variant from here forward is a
    // compile-time error in this match (was a silent `_ => false`
    // fall-through pre-refactor — masked drift). Unknown-scope
    // strings still degrade to `false` via the `from_str` → `None`
    // arm, preserving pre-refactor semantics byte-for-byte.
    use crate::models::namespace::MemoryScope;
    let (p, t, u, o) = prefixes;
    if p.is_none() {
        return true;
    }
    let Some(scope) = mem
        .metadata
        .get(crate::META_KEY_SCOPE)
        .and_then(|v| v.as_str())
        .map_or(Some(MemoryScope::default()), MemoryScope::from_str)
    else {
        return false;
    };
    match scope {
        MemoryScope::Collective => true,
        MemoryScope::Private => p.as_ref().is_some_and(|ns| &mem.namespace == ns),
        MemoryScope::Team => matches_subtree(&mem.namespace, t.as_deref()),
        MemoryScope::Unit => matches_subtree(&mem.namespace, u.as_deref()),
        MemoryScope::Org => matches_subtree(&mem.namespace, o.as_deref()),
    }
}

fn matches_subtree(namespace: &str, prefix: Option<&str>) -> bool {
    match prefix {
        None => false,
        Some(p) => namespace == p || namespace.starts_with(&format!("{p}/")),
    }
}

/// Generate the visibility WHERE-clause fragment starting at placeholder `start`.
/// Uses placeholders `?start .. ?start+3` for private/team/unit/org prefixes.
/// See `compute_visibility_prefixes` for the bind order.
///
/// Performance (v0.6.0 GA): each scope branch compares against the indexed
/// generated column `scope_idx` (schema v10) rather than re-evaluating
/// `json_extract(metadata, '$.scope')` per row. The query planner picks
/// `idx_memories_scope_idx` whenever the predicate narrows by scope,
/// dropping recall from "scan every namespace row and parse its JSON" to
/// an index seek + per-row refinement. See `docs/ARCHITECTURAL_LIMITS.md`
/// for which `SQLite` limits remain structural.
///
/// Security (issue #217): the team/unit/org branches use `LIKE` to expand a
/// prefix into its sub-tree. Without escaping, a caller who can influence the
/// prefix could inject SQL `LIKE` meta-characters (`%`, `_`) and broaden the
/// match across unrelated namespaces. We neutralise this at SQL evaluation
/// time by `replace()`-escaping `%` and `_` in the bound prefix and pairing
/// the LIKE with `ESCAPE '\'`. `validate_namespace` already rejects backslash,
/// so `\` cannot appear in the bound prefix and the escape sentinel is safe.
/// The `=` equality side is unaffected by LIKE wildcards and binds the raw
/// value so that legitimate namespaces containing `_` (e.g. `under_score`)
/// continue to match exactly.
/// v0.7.0 WT-1-E — atom-preference WHERE fragment.
///
/// Default recall surfaces atoms (the canonical post-atomisation
/// unit) in place of the archived source row. An archived source is
/// one where:
///
///   * `atomised_into > 0` — the substrate-visible count of atoms
///     emitted by the WT-1-B atomiser.
///   * `metadata.atomisation_archived_at` is set — the RFC3339 stamp
///     WT-1-B writes alongside the column flip (see
///     `src/atomisation/mod.rs::archive_source`). The column is the
///     fast index target; the metadata key is the substrate-visible
///     read signal that the row is "atomised and archived" — both
///     are checked so a hypothetical column-only or metadata-only
///     drift gets filtered consistently.
///
/// Atoms themselves (rows where `atom_of IS NOT NULL`) are unaffected
/// — they are not "archived" by this definition. The fragment
/// excludes archived sources only.
///
/// When `include_archived` is true the fragment is empty (no
/// filter), so auditors and the forensic-export path see the full
/// chain. The atom rows are returned in both cases.
fn archived_source_clause(include_archived: bool, table_alias: &str) -> &'static str {
    if include_archived {
        ""
    } else {
        // Two-part predicate: a row is archived-source when BOTH
        // (a) atomised_into > 0 and
        // (b) metadata.atomisation_archived_at IS NOT NULL.
        // Either one alone could be a partial-state row (e.g. a
        // crash between the column flip and the metadata write); we
        // only filter rows that present BOTH signals so a partial-
        // state row still surfaces under default recall.
        // Static fragment with the alias baked in — recall and
        // recall_hybrid pass `"m"`, search passes `"m"` too.
        match table_alias {
            "m" => {
                "AND NOT (\
                m.atomised_into IS NOT NULL AND m.atomised_into > 0 \
                AND json_extract(m.metadata, '$.atomisation_archived_at') IS NOT NULL\
            )"
            }
            "memories" => {
                "AND NOT (\
                memories.atomised_into IS NOT NULL AND memories.atomised_into > 0 \
                AND json_extract(memories.metadata, '$.atomisation_archived_at') IS NOT NULL\
            )"
            }
            _ => "",
        }
    }
}

/// v0.7.0 WT-1-E — Rust-side mirror of [`archived_source_clause`].
///
/// Used by the HNSW retrieval branch of `recall_hybrid_with_telemetry`
/// where the bypass-the-SQL-WHERE walk fetches each candidate via
/// `get()` and then applies post-load filters in Rust. The check
/// reads `metadata.atomisation_archived_at` (the WT-1-B substrate-
/// visible read signal) and tolerates the absence of the metadata
/// key — only rows that DO present the key are excluded.
///
/// Note: the SQL fragment also requires `atomised_into > 0` to be
/// set. The HNSW branch deliberately only checks the metadata key
/// because the loaded `Memory` struct does not carry the
/// `atomised_into` column. The two signals are written in the same
/// `archive_source` transaction (see `src/atomisation/mod.rs`), so
/// in steady-state every row presents both signals together; the
/// pathological partial-state row that exists only momentarily
/// during a crash window still surfaces through HNSW until the next
/// recall — accepted as a tolerable looseness on the cold-fallback
/// path.
fn is_archived_source(mem: &Memory) -> bool {
    mem.metadata
        .get(field_names::ATOMISATION_ARCHIVED_AT)
        .is_some_and(|v| !v.is_null())
}

fn visibility_clause(start: usize, table_alias: &str) -> String {
    let private_ph = start;
    let team_ph = start + 1;
    let unit_ph = start + 2;
    let org_ph = start + 3;
    let ta = table_alias;
    format!(
        "AND (\
            ?{private_ph} IS NULL \
            OR {ta}.scope_idx = 'collective' \
            OR ({ta}.scope_idx = 'private' AND {ta}.namespace = ?{private_ph}) \
            OR ({ta}.scope_idx = 'team' AND ?{team_ph} IS NOT NULL AND ({ta}.namespace = ?{team_ph} OR {ta}.namespace LIKE replace(replace(?{team_ph}, '%', '\\%'), '_', '\\_') || '/%' ESCAPE '\\')) \
            OR ({ta}.scope_idx = 'unit' AND ?{unit_ph} IS NOT NULL AND ({ta}.namespace = ?{unit_ph} OR {ta}.namespace LIKE replace(replace(?{unit_ph}, '%', '\\%'), '_', '\\_') || '/%' ESCAPE '\\')) \
            OR ({ta}.scope_idx = 'org'  AND ?{org_ph}  IS NOT NULL AND ({ta}.namespace = ?{org_ph}  OR {ta}.namespace LIKE replace(replace(?{org_ph}, '%', '\\%'), '_', '\\_') || '/%' ESCAPE '\\'))\
        )"
    )
}

/// v0.7.0 Form 4 / Cluster-A PERF-3 — escape SQL `LIKE` metacharacters
/// (`%`, `_`, `\`) in a user-supplied substring so the substring matches
/// literally when paired with `LIKE ... ESCAPE '\\'`. Used by the
/// `source_uri LIKE 'prefix%'` filter in [`recall`] and
/// [`recall_hybrid_with_telemetry`] to push the `--source-uri-prefix`
/// filter into SQL.
fn escape_like_pattern(s: &str) -> String {
    let mut out = String::with_capacity(s.len());
    for ch in s.chars() {
        match ch {
            '\\' | '%' | '_' => {
                out.push('\\');
                out.push(ch);
            }
            _ => out.push(ch),
        }
    }
    out
}

// v0.7.0 L0.5-3 — flat `src/db.rs` decomposed into `src/storage/`.
// Sub-modules stay private to this module per the L0.5-1 pattern;
// only the re-exports below form the public surface. The
// `pub use storage as db;` shim in `src/lib.rs` preserves the
// historical `crate::db::*` paths used elsewhere.
pub(crate) mod connection;
// `pub` (rather than `pub(crate)`) so the V-4 closeout
// integration test suite (`tests/signed_events_chain_v34.rs`) can
// invoke `migrate_v34_backfill_chain` directly to exercise the
// idempotent-replay property without going through a full daemon
// boot cycle.
pub mod migration_meta;
pub mod migrations;
pub(crate) mod reflect;

// Re-exports — every `pub` item that previously lived in `src/db.rs`
// is re-published at `crate::storage::*` (and therefore `crate::db::*`
// via the lib.rs shim) so callsites keep resolving without churn.
pub use connection::open;
// #1579 B7 — mmap_size knob. `set_db_mmap_size` is the boot-time
// seeding hook (`daemon_runtime::run`); the DEFAULT const is the
// compiled fallback the `AppConfig::resolve_storage()` ladder bottoms
// out on (also consumed by the config-precedence tests).
pub use connection::{DEFAULT_DB_MMAP_SIZE_BYTES, set_db_mmap_size};
// v0.7.0 refactor PR-1 (#793) — schema-pins SSOT. Re-export the
// test-facing helper so callers can use either
// `ai_memory::storage::current_schema_version_for_tests()` or the
// existing `ai_memory::db::current_schema_version_for_tests()` shim
// (via `pub use storage as db;` in `src/lib.rs`).
pub use migrations::current_schema_version_for_tests;
// Pre-migration safety-snapshot infix accessor — lets coverage tests
// locate / name-assert the snapshot file without restamping the literal.
pub use migrations::pre_migration_backup_infix_for_tests;
pub use reflect::{
    ReflectError, ReflectHookDecision, ReflectHooks, ReflectInput, ReflectOutcome,
    canonical_cbor_reflection_depth_exceeded, reflect, reflect_with_hooks,
};
// `emit_reflection_depth_exceeded_audit` is `pub(crate)` — preserve
// the same visibility on the re-export so it remains reachable from
// `crate::db::emit_reflection_depth_exceeded_audit` (the original
// path) without widening the public surface. The current crate has
// no external callers (the path is only used internally by
// `reflect_with_hooks`); the re-export is retained for surface
// parity with pre-L0.5-3.
#[allow(unused_imports)]
pub(crate) use reflect::emit_reflection_depth_exceeded_audit;

pub(crate) fn row_to_memory(row: &rusqlite::Row) -> rusqlite::Result<Memory> {
    let row_id: String = row.get("id")?;
    let tags_json: String = row.get("tags")?;
    let tags: Vec<String> = serde_json::from_str(&tags_json).unwrap_or_default();
    let tier_str: String = row.get("tier")?;
    let tier = Tier::from_str(&tier_str).unwrap_or(Tier::Mid);
    let metadata_str: String = row
        .get::<_, String>("metadata")
        .unwrap_or_else(|_| "{}".to_string());
    let metadata: serde_json::Value = serde_json::from_str(&metadata_str).unwrap_or_else(|e| {
        tracing::warn!(
            row_id = %row_id,
            column = "metadata",
            error = %e,
            "corrupt metadata in DB row, defaulting to {{}}"
        );
        crate::metrics::record_corrupt_provenance("metadata");
        serde_json::json!({})
    });
    // v0.7.0 Form 4 / Cluster-A COR-3 — citations JSON. Pre-fix used a
    // bare `.ok()` chain that silently turned corrupt JSON into an empty
    // vec with no operator signal. Now: log via `tracing::warn!` with the
    // row id + column + parse error, bump the
    // `corrupt_provenance_rows_total{column=...}` counter, then return
    // the safe default.
    let citations = match row.get::<_, String>("citations").ok() {
        Some(s) => match serde_json::from_str::<Vec<crate::models::Citation>>(&s) {
            Ok(v) => v,
            Err(e) => {
                tracing::warn!(
                    row_id = %row_id,
                    column = "citations",
                    error = %e,
                    "corrupt citations JSON in DB row, defaulting to []"
                );
                crate::metrics::record_corrupt_provenance("citations");
                Vec::new()
            }
        },
        None => Vec::new(),
    };
    let source_span: Option<SourceSpan> = row
        .get::<_, Option<String>>(field_names::SOURCE_SPAN)
        .unwrap_or(None)
        .and_then(|s| match serde_json::from_str::<SourceSpan>(&s) {
            Ok(span) => Some(span),
            Err(e) => {
                tracing::warn!(
                    row_id = %row_id,
                    column = field_names::SOURCE_SPAN,
                    error = %e,
                    "corrupt source_span JSON in DB row, defaulting to None"
                );
                crate::metrics::record_corrupt_provenance(field_names::SOURCE_SPAN);
                None
            }
        });
    let confidence_signals = row
        .get::<_, Option<String>>(field_names::CONFIDENCE_SIGNALS)
        .unwrap_or(None)
        .and_then(
            |s| match serde_json::from_str::<crate::models::ConfidenceSignals>(&s) {
                Ok(v) => Some(v),
                Err(e) => {
                    tracing::warn!(
                        row_id = %row_id,
                        column = field_names::CONFIDENCE_SIGNALS,
                        error = %e,
                        "corrupt confidence_signals JSON in DB row, defaulting to None"
                    );
                    crate::metrics::record_corrupt_provenance(field_names::CONFIDENCE_SIGNALS);
                    None
                }
            },
        );
    Ok(Memory {
        id: row_id,
        tier,
        namespace: row.get("namespace")?,
        title: row.get("title")?,
        content: row.get("content")?,
        tags,
        priority: row.get("priority")?,
        confidence: row.get(field_names::CONFIDENCE).unwrap_or(1.0),
        source: row.get("source").unwrap_or_else(|_| "api".to_string()),
        access_count: row.get(field_names::ACCESS_COUNT)?,
        created_at: row.get(field_names::CREATED_AT)?,
        updated_at: row.get(field_names::UPDATED_AT)?,
        last_accessed_at: row.get(field_names::LAST_ACCESSED_AT)?,
        expires_at: row.get(field_names::EXPIRES_AT)?,
        metadata,
        // v0.7.0 Task 1/8 — schema v29 column. `.unwrap_or(0)` keeps the
        // reader tolerant of pre-v29 row reads (no panic if the migration
        // ladder hasn't reached this DB yet) and is consistent with the
        // SQL-side `DEFAULT 0`.
        reflection_depth: row.get(field_names::REFLECTION_DEPTH).unwrap_or(0_i32),
        // v0.7.0 L1-1 — schema v30 column. Falls back to `Observation` on
        // pre-v30 rows (column absent) and on any unrecognised value from a
        // future schema (forward-compat).
        memory_kind: row
            .get::<_, String>(field_names::MEMORY_KIND)
            .ok()
            .and_then(|s| crate::models::MemoryKind::from_str(&s))
            .unwrap_or_default(),
        // v0.7.0 QW-2 — Persona-as-artifact discriminator columns.
        // Populated only for `memory_kind = 'persona'` rows. NULL on
        // every observation/reflection row. Pre-v36 rows lack the
        // column entirely — the `.ok()` fallthrough yields None.
        entity_id: row.get::<_, Option<String>>("entity_id").unwrap_or(None),
        persona_version: row
            .get::<_, Option<i32>>(field_names::PERSONA_VERSION)
            .unwrap_or(None),
        // v0.7.0 Form 4 — schema v38 fact-provenance columns. `citations`
        // / `source_span` corruption now logs WARN + bumps the
        // `corrupt_provenance_rows_total` counter above so silent JSON
        // drops surface in operator observability (Cluster-A COR-3 fix).
        // `source_uri` is a plain TEXT column (NULL on legacy rows).
        citations,
        source_uri: row
            .get::<_, Option<String>>(field_names::SOURCE_URI)
            .unwrap_or(None),
        source_span,
        // v0.7.0 Form 5 — schema v39 columns. Legacy rows resolve
        // to `CallerProvided` (SQL DEFAULT), NULL signals, NULL
        // decayed_at. `.ok()` fallthrough keeps the reader tolerant
        // of pre-v39 row reads (no panic when migrate hasn't fired
        // yet).
        confidence_source: row
            .get::<_, String>(field_names::CONFIDENCE_SOURCE)
            .ok()
            .and_then(|s| crate::models::ConfidenceSource::from_str(&s))
            .unwrap_or_default(),
        confidence_signals,
        confidence_decayed_at: row
            .get::<_, Option<String>>(field_names::CONFIDENCE_DECAYED_AT)
            .unwrap_or(None),
        // v0.7.0 Provenance Gap 1 (#884) — schema v45 optimistic-
        // concurrency column. Pre-v45 rows lack the column entirely
        // — the `.ok()` fallthrough yields the SQL DEFAULT 1 (same
        // value a pre-v45 row would land at the moment the ALTER
        // fires in the migrate ladder).
        version: row.get::<_, i64>("version").unwrap_or(1),
    })
}

/// v0.7.0 polish PERF-8 (issue #781) — extract the canonical
/// `mentioned_entity_id` from a memory at write time.
///
/// The auto-persona matcher (`hooks::post_reflect::auto_persona`) and
/// the persona source-pool loader (`persona::load_reflections_for_entity`)
/// previously scanned `(title|content|metadata) LIKE '%<entity>%'` to
/// find candidate reflections — a full-table scan against three TEXT
/// columns for every reflection in the namespace. PERF-8 denormalises
/// the entity descriptor onto a dedicated indexed column so the matcher
/// resolves with `WHERE mentioned_entity_id = ?` instead.
///
/// Resolution order mirrors the runtime extractor in
/// `auto_persona::resolve_entity_id`:
///
/// 1. `metadata.entity_id` (the structured tag the curator + most
///    operators supply when minting a reflection about a known entity).
/// 2. `[entity:X]` marker in the title (operator-supplied fallback
///    when no structured tag exists yet).
///
/// Returns `None` when neither yields a non-empty string — the row
/// stays NULL on the column and contributes zero index pages (matches
/// the partial index predicate `WHERE mentioned_entity_id IS NOT NULL`).
///
/// Restricted to `memory_kind = 'reflection'` rows: the matcher only
/// scans reflections, so populating the column on observations would
/// inflate the index footprint without speeding any query. (Persona
/// rows already use the orthogonal QW-2 `entity_id` column for their
/// own attribution.)
pub(crate) fn extract_mentioned_entity_id(mem: &Memory) -> Option<String> {
    if mem.memory_kind != MemoryKind::Reflection {
        return None;
    }
    // Step 1: structured metadata.entity_id tag.
    if let Some(eid) = mem
        .metadata
        .get("entity_id")
        .and_then(|v| v.as_str())
        .map(str::trim)
        .filter(|s| !s.is_empty())
    {
        return Some(eid.to_string());
    }
    // Step 2: `[entity:X]` title marker. Mirrors the runtime extractor
    // in `auto_persona::resolve_entity_id` so cadence accounting and
    // matcher selection agree on the same descriptor for a given row.
    if let Some(start) = mem.title.find("[entity:") {
        let rest = &mem.title[start + "[entity:".len()..];
        if let Some(end) = rest.find(']') {
            let extracted = rest[..end].trim();
            if !extracted.is_empty() {
                return Some(extracted.to_string());
            }
        }
    }
    None
}

/// Insert with upsert on title+namespace. Returns the ID (existing or new).
///
/// Ultrareview #352: collapses the previous `INSERT`/`ON CONFLICT` +
/// separate `SELECT` into a single `INSERT ... RETURNING id`. Another
/// concurrent writer could otherwise slot in between the two statements
/// and the `SELECT` would return the wrong row id. `SQLite` 3.35+
/// supports `RETURNING`; it executes atomically within the `INSERT`.
pub fn insert(conn: &Connection, mem: &Memory) -> Result<String> {
    // v0.7.0 L1-6 Deliverable E — substrate governance pre-write
    // gate. Consults the (optional) `GOVERNANCE_PRE_WRITE` hook
    // BEFORE any SQL touches the DB; a refusal returns cleanly with
    // no row written. See module-level comment for layering details.
    consult_governance_pre_write(mem)?;

    let tags_json = serde_json::to_string(&mem.tags)?;
    let metadata_json = serde_json::to_string(&mem.metadata)?;
    // v0.7.0 Form 4 — encode citations/source_span to JSON for the
    // schema v38 TEXT columns. citations always lands as a JSON array
    // (default `[]` when caller supplied nothing); source_span lands as
    // `{start,end}` or NULL.
    let citations_json = serde_json::to_string(&mem.citations)?;
    let source_span_json = match mem.source_span {
        Some(span) => Some(serde_json::to_string(&span)?),
        None => None,
    };
    // v0.7.0 Form 5 — encode confidence-provenance fields for the
    // schema v39 TEXT columns. The `confidence_source` column has a
    // SQL DEFAULT of 'caller_provided' so legacy/default rows land
    // there; `confidence_signals` is a JSON envelope (or NULL); and
    // `confidence_decayed_at` is RFC3339 (or NULL).
    let confidence_signals_json = match &mem.confidence_signals {
        Some(s) => Some(serde_json::to_string(s)?),
        None => None,
    };
    // v0.7.0 polish PERF-8 (#781) — denormalised `mentioned_entity_id`
    // column, populated at write time from `metadata.entity_id` (or a
    // `[entity:X]` title-marker fallback) on reflection rows. See
    // `extract_mentioned_entity_id` for the resolution order.
    let mentioned_entity_id = extract_mentioned_entity_id(mem);
    // #1579 B6 — `insert` is the hottest write statement in the
    // substrate (every store / upsert / capture-turn / federation push
    // lands here). `prepare_cached` skips the re-parse of this ~60-line
    // upsert on every call after the first.
    let mut insert_stmt = conn.prepare_cached(
        "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, confidence, source, access_count, created_at, updated_at, last_accessed_at, expires_at, metadata, reflection_depth, memory_kind, entity_id, persona_version, citations, source_uri, source_span, confidence_source, confidence_signals, confidence_decayed_at, mentioned_entity_id)
         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19, ?20, ?21, ?22, ?23, ?24, ?25, ?26)
         ON CONFLICT(title, namespace) DO UPDATE SET
            content = excluded.content,
            tags = excluded.tags,
            priority = MAX(memories.priority, excluded.priority),
            confidence = MAX(memories.confidence, excluded.confidence),
            source = excluded.source,
            tier = CASE WHEN excluded.tier = 'long' THEN 'long'
                        WHEN memories.tier = 'long' THEN 'long'
                        WHEN excluded.tier = 'mid' THEN 'mid'
                        ELSE memories.tier END,
            updated_at = excluded.updated_at,
            expires_at = CASE WHEN excluded.tier = 'long' OR memories.tier = 'long' THEN NULL
                              ELSE COALESCE(excluded.expires_at, memories.expires_at) END,
            -- Preserve metadata.agent_id across upsert (NHI provenance is immutable).
            metadata = CASE
                WHEN json_extract(memories.metadata, '$.agent_id') IS NOT NULL
                THEN json_set(
                    excluded.metadata,
                    '$.agent_id',
                    json_extract(memories.metadata, '$.agent_id')
                )
                ELSE excluded.metadata
            END,
            -- v0.7.0 Task 1/8 — recursion depth takes the max across upsert
            -- so a subsequent reflection at higher depth doesn't lose its
            -- provenance signal when re-stored at the same (title, namespace).
            reflection_depth = MAX(memories.reflection_depth, excluded.reflection_depth),
            -- v0.7.0 L1-1 — kind is sticky: once Reflection, always Reflection.
            -- An upsert of an observation onto an existing reflection row must
            -- not downgrade the kind (reflect is not reversible by re-store).
            -- v0.7.0 QW-2 — Persona is also sticky once set; the engine
            -- writes new versions via fresh rows under a unique
            -- `__persona_<entity>_v<n>` title rather than upsert.
            memory_kind = CASE WHEN memories.memory_kind = 'reflection' THEN 'reflection'
                               WHEN memories.memory_kind = 'persona' THEN 'persona'
                               ELSE excluded.memory_kind END,
            -- v0.7.0 QW-2 — entity_id + persona_version stay attached to
            -- the row they were minted with (Persona-kind upserts use
            -- versioned titles so the conflict path is exercised only
            -- on accidental same-title collisions).
            entity_id = COALESCE(memories.entity_id, excluded.entity_id),
            persona_version = COALESCE(memories.persona_version, excluded.persona_version),
            -- v0.7.0 Form 4 — fact-provenance: when the incoming row
            -- carries a non-empty citations array, replace the stored
            -- value (caller re-asserted provenance); otherwise keep
            -- the existing value (silent merge would lose freshly-cited
            -- evidence). source_uri / source_span follow COALESCE
            -- semantics so a new write that omits them does not blank
            -- out existing provenance pointers.
            citations = CASE WHEN excluded.citations = '[]'
                             THEN memories.citations
                             ELSE excluded.citations END,
            source_uri = COALESCE(excluded.source_uri, memories.source_uri),
            source_span = COALESCE(excluded.source_span, memories.source_span),
            -- v0.7.0 Form 5 — confidence-provenance follows the same
            -- shape as Form 4 columns: explicit non-default replaces;
            -- caller_provided + NULL signals keep the existing
            -- provenance signal so a re-store doesn't blank out an
            -- auto-derived or calibrated value.
            confidence_source = CASE WHEN excluded.confidence_source != 'caller_provided'
                                     THEN excluded.confidence_source
                                     ELSE memories.confidence_source END,
            confidence_signals = COALESCE(excluded.confidence_signals, memories.confidence_signals),
            confidence_decayed_at = COALESCE(excluded.confidence_decayed_at, memories.confidence_decayed_at),
            -- v0.7.0 polish PERF-8 (#781) — denormalised mention tag.
            -- COALESCE keeps any pre-existing tag (re-write that
            -- omits the structured entity_id metadata should NOT
            -- blank out the indexed column) while letting a fresh
            -- extraction populate previously-NULL rows.
            mentioned_entity_id = COALESCE(excluded.mentioned_entity_id, memories.mentioned_entity_id),
            -- #1632 — upsert-merge IS a mutation (content/tags/priority
            -- can change), so the Gap-1 optimistic-concurrency counter
            -- bumps here exactly like db::update. Pre-#1632 a re-store
            -- rewrote content while version stood still, so a stale
            -- If-Match could overwrite the merge invisibly. The decay
            -- sweep remains the only documented non-bumping mutator
            -- (tests/non_version_bumping_sites_1036.rs).
            version = memories.version + 1
         RETURNING id",
    )?;
    let actual_id: String = insert_stmt.query_row(
        params![
            mem.id,
            mem.tier.as_str(),
            mem.namespace,
            mem.title,
            mem.content,
            tags_json,
            mem.priority,
            mem.confidence,
            mem.source,
            mem.access_count,
            mem.created_at,
            mem.updated_at,
            mem.last_accessed_at,
            mem.effective_expires_at(),
            metadata_json,
            mem.reflection_depth,
            mem.memory_kind.as_str(),
            mem.entity_id,
            mem.persona_version,
            citations_json,
            mem.source_uri,
            source_span_json,
            mem.confidence_source.as_str(),
            confidence_signals_json,
            mem.confidence_decayed_at,
            mentioned_entity_id,
        ],
        |r| r.get(0),
    )?;
    Ok(actual_id)
}

/// v0.7.0 fix campaign R1-M3 (#690) — substrate-side `on_conflict`
/// policy for [`insert_with_conflict`].
///
/// Before this enum existed, every call into [`insert`] silently
/// merged on `(title, namespace)` collision. The G6 work in v0.6.3.1
/// closed the silent-merge gap at the MCP / HTTP **handler** layer
/// (see `mcp::tools::store` and `handlers::http::create_link`), but
/// substrate-internal writers — `storage::reflect`, the curator
/// consolidation surface, and the federation `sync_push` link loop —
/// kept calling [`insert`] directly and inheriting the silent-merge
/// behaviour. R1-M3 surfaces the same three policies the handler
/// layer already exposes on a typed enum so substrate callers can
/// opt into the right semantics explicitly.
///
/// Policies:
///
/// * [`ConflictMode::Error`] — refuse the write when a `(title,
///   namespace)` row already exists, returning a typed error. Used
///   by `storage::reflect` so a duplicate reflection cannot silently
///   replace an earlier one.
///
/// * [`ConflictMode::Merge`] — current silent-merge behaviour (the
///   v0.6.3 default). [`insert`] continues to call into the merge
///   path verbatim for backward compatibility.
///
/// * [`ConflictMode::Version`] — append a monotonic suffix to the
///   title until a free `(title, namespace)` slot is found, then
///   insert a new row. Mirrors the `on_conflict='version'` handler
///   policy.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ConflictMode {
    /// Refuse the write with a typed `(title, namespace)` collision
    /// error. The existing row is left untouched.
    Error,
    /// Silently merge on `(title, namespace)` collision (the legacy
    /// v0.6.3 substrate default). The existing row's content / tags /
    /// metadata.agent_id / reflection_depth are merged with the
    /// incoming row per the SQL in [`insert`].
    Merge,
    /// Append `(2)`, `(3)`, … to the title until a free slot is found,
    /// then insert a new row. Both old and new rows persist.
    Version,
}

/// Typed error returned by [`insert_with_conflict`] under
/// [`ConflictMode::Error`] when a `(title, namespace)` row already
/// exists. Carries the existing row's id so callers can surface a
/// well-shaped diagnostic instead of leaking a generic SQL string.
#[derive(Debug)]
pub struct ConflictError {
    pub existing_id: String,
    pub title: String,
    pub namespace: String,
}

impl std::fmt::Display for ConflictError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "CONFLICT: memory with title '{}' already exists in namespace '{}' \
             (existing id: {})",
            self.title, self.namespace, self.existing_id
        )
    }
}

impl std::error::Error for ConflictError {}

/// v0.7.0 #1416 / RFC-0001 — sqlite SSOT for the L4 layered-capture
/// idempotent write. Both the MCP `memory_capture_turn` handler (which
/// holds a raw `&rusqlite::Connection`) and `SqliteStore::
/// capture_turn_idempotent` (the SAL trait surface) call through here,
/// so the dedup-lookup + atomic three-row insert exists in exactly one
/// place on the sqlite path.
///
/// Mirrors the original inline handler transaction verbatim:
/// 1. dedup SELECT on `(host_session_id, host_turn_index)` (the
///    `IS NOT NULL` predicate pins the partial index from schema v52).
/// 2. On hit → return the existing id with `dedup_hit: true`, no write.
/// 3. On miss → `BEGIN IMMEDIATE` → `insert` (merge upsert) →
///    `transcript_line_dedup` INSERT → `signed_events` chain row →
///    COMMIT; any failure rolls all three rows back atomically.
///
/// # Errors
///
/// String-stable codes per the MCP error convention: `DEDUP_QUERY_FAILED`,
/// `TX_BEGIN_FAILED`, `MEMORY_INSERT_FAILED`, `DEDUP_INSERT_FAILED`,
/// `SIGNED_EVENTS_APPEND_FAILED`, `TX_COMMIT_FAILED`.
pub fn capture_turn_idempotent(
    conn: &Connection,
    write: &crate::models::CaptureTurnWrite,
) -> std::result::Result<crate::models::CaptureTurnResult, String> {
    use rusqlite::OptionalExtension;

    // #1579 B6 — the dedup probe fires on EVERY captured turn before
    // any write; `prepare_cached` keeps the per-turn cost at bind+step.
    let existing: Option<String> = conn
        .prepare_cached(
            "SELECT memory_id FROM transcript_line_dedup \
             WHERE host_session_id IS NOT NULL \
               AND host_session_id = ?1 \
               AND host_turn_index = ?2",
        )
        .and_then(|mut stmt| {
            stmt.query_row(
                params![&write.host_session_id, write.host_turn_index],
                |row| row.get(0),
            )
            .optional()
        })
        .map_err(|e| format!("DEDUP_QUERY_FAILED: {e}"))?;

    if let Some(memory_id) = existing {
        return Ok(crate::models::CaptureTurnResult {
            memory_id,
            dedup_hit: true,
        });
    }

    conn.execute_batch(connection::SQL_BEGIN_IMMEDIATE)
        .map_err(|e| format!("TX_BEGIN_FAILED: {e}"))?;

    let tx_result = (|| -> std::result::Result<String, String> {
        let inserted_id =
            insert(conn, &write.memory).map_err(|e| format!("MEMORY_INSERT_FAILED: {e}"))?;

        conn.prepare_cached(
            "INSERT INTO transcript_line_dedup \
             (sha256, memory_id, host_kind, transcript_path, \
              host_session_id, host_turn_index, recovered_at) \
             VALUES (?1, ?2, ?3, NULL, ?4, ?5, ?6)",
        )
        .and_then(|mut stmt| {
            stmt.execute(params![
                write.sha256,
                inserted_id,
                write.host_kind,
                write.host_session_id,
                write.host_turn_index,
                write.recovered_at_ms,
            ])
        })
        .map_err(|e| format!("DEDUP_INSERT_FAILED: {e}"))?;

        crate::signed_events::append_signed_event_no_tx(conn, &write.signed_event)
            .map_err(|e| format!("SIGNED_EVENTS_APPEND_FAILED: {e}"))?;

        Ok(inserted_id)
    })();

    match tx_result {
        Ok(memory_id) => {
            conn.execute_batch(connection::SQL_COMMIT)
                .map_err(|e| format!("TX_COMMIT_FAILED: {e}"))?;
            Ok(crate::models::CaptureTurnResult {
                memory_id,
                dedup_hit: false,
            })
        }
        Err(e) => {
            let _ = conn.execute_batch(connection::SQL_ROLLBACK);
            Err(e)
        }
    }
}

/// v0.7.0 fix campaign R1-M3 (#690) — insert a memory under an
/// explicit [`ConflictMode`].
///
/// This is the substrate primitive every direct-DB writer that cares
/// about collision semantics should reach for. Callers that want the
/// legacy silent-merge behaviour (most of the existing surface) keep
/// calling [`insert`] — it is now thin glue around
/// `insert_with_conflict(.., ConflictMode::Merge)` so backward compat
/// is preserved without invasive churn.
///
/// # Errors
///
/// * Bubbles up rusqlite errors from the underlying INSERT.
/// * Under [`ConflictMode::Error`], returns a typed [`ConflictError`]
///   when `(mem.title, mem.namespace)` already exists. The existing
///   row is left untouched.
/// * Under [`ConflictMode::Version`], returns an error when no free
///   `title (N)` slot is found within the safety cap (see
///   [`next_versioned_title`]).
pub fn insert_with_conflict(conn: &Connection, mem: &Memory, mode: ConflictMode) -> Result<String> {
    match mode {
        ConflictMode::Merge => insert(conn, mem),
        ConflictMode::Error => {
            // v0.7.0 L1-6 Deliverable E — fire the pre-write governance
            // hook BEFORE the existence-check `SELECT`. The Merge and
            // Version branches reach the hook via the `insert(..)`
            // tail call below; the `Error` branch needs its own gate
            // because it bypasses `insert` to issue the unannotated
            // INSERT itself. Refusal here returns no row written and
            // no SELECT performed — symmetric with the Merge path.
            consult_governance_pre_write(mem)?;
            // Existence check + INSERT must be atomic against
            // concurrent writers. We rely on the (title, namespace)
            // UNIQUE index — issue a plain INSERT WITHOUT the upsert
            // tail, let SQLite enforce the constraint, and translate
            // the constraint violation into a typed error.
            //
            // The SELECT before INSERT is intentionally kept as an
            // up-front read so the typed error message can carry the
            // existing row's id. Two queries open a TOCTOU window
            // (another writer slots in between SELECT and INSERT and
            // we return Error pointing at the *wrong* existing id) —
            // but the constraint violation on the subsequent INSERT
            // still fires loud, and the caller's retry sees the new
            // state. Reading the id is best-effort context for the
            // diagnostic.
            if let Some(existing_id) = find_by_title_namespace(conn, &mem.title, &mem.namespace)? {
                return Err(ConflictError {
                    existing_id,
                    title: mem.title.clone(),
                    namespace: mem.namespace.clone(),
                }
                .into());
            }
            let tags_json = serde_json::to_string(&mem.tags)?;
            let metadata_json = serde_json::to_string(&mem.metadata)?;
            // v0.7.0 Form 4 — encode citations + source_span for the
            // schema v38 TEXT columns. Mirrors the encode in
            // `insert(...)` above; the ConflictMode::Error path lands
            // here on the first-write happy path and must persist the
            // provenance columns the caller supplied.
            let citations_json = serde_json::to_string(&mem.citations)?;
            let source_span_json = match mem.source_span {
                Some(span) => Some(serde_json::to_string(&span)?),
                None => None,
            };
            // v0.7.0 Form 5 — encode confidence-provenance fields for
            // the schema v39 TEXT columns. Mirrors the encode in
            // `insert(...)` above.
            let confidence_signals_json = match &mem.confidence_signals {
                Some(s) => Some(serde_json::to_string(s)?),
                None => None,
            };
            // v0.7.0 polish PERF-8 (#781) — same denormalised mention
            // tag wired here so the ConflictMode::Error path (used by
            // `storage::reflect`) populates the indexed column on the
            // first-write happy path; otherwise the auto-persona matcher
            // would miss every reflection minted via reflect.
            let mentioned_entity_id = extract_mentioned_entity_id(mem);
            // v0.7.0 L1-1 wave merge — include the `memory_kind` column.
            // This INSERT path was added by the fix-campaign R1-M3
            // (ConflictMode::Error refuses duplicates) and originally
            // omitted the new L1-1 column because L1-1 was authored
            // against the pre-fix-campaign storage layer. Without
            // memory_kind here, a `db::reflect` call (which uses
            // `insert_with_conflict(.., ConflictMode::Error)`) loses
            // its `MemoryKind::Reflection` typing and the stored row
            // falls back to the column DEFAULT 'observation'.
            let actual_id: String = conn.query_row(
                "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, confidence, source, access_count, created_at, updated_at, last_accessed_at, expires_at, metadata, reflection_depth, memory_kind, entity_id, persona_version, citations, source_uri, source_span, confidence_source, confidence_signals, confidence_decayed_at, mentioned_entity_id)
                 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19, ?20, ?21, ?22, ?23, ?24, ?25, ?26)
                 RETURNING id",
                params![
                    mem.id, mem.tier.as_str(), mem.namespace, mem.title, mem.content,
                    tags_json, mem.priority, mem.confidence, mem.source, mem.access_count,
                    mem.created_at, mem.updated_at, mem.last_accessed_at, mem.effective_expires_at(),
                    metadata_json, mem.reflection_depth, mem.memory_kind.as_str(),
                    mem.entity_id, mem.persona_version,
                    citations_json, mem.source_uri, source_span_json,
                    mem.confidence_source.as_str(), confidence_signals_json, mem.confidence_decayed_at,
                    mentioned_entity_id,
                ],
                |r| r.get(0),
            ).map_err(|e| {
                // Translate a UNIQUE constraint violation that
                // raced past the SELECT into the typed error so
                // callers see the same shape on TOCTOU as on the
                // happy path.
                let msg = e.to_string();
                if msg.contains("UNIQUE constraint failed") {
                    anyhow::Error::new(ConflictError {
                        existing_id: String::new(),
                        title: mem.title.clone(),
                        namespace: mem.namespace.clone(),
                    })
                } else {
                    e.into()
                }
            })?;
            Ok(actual_id)
        }
        ConflictMode::Version => {
            let resolved_title = next_versioned_title(conn, &mem.title, &mem.namespace)?;
            let mut versioned = mem.clone();
            versioned.title = resolved_title;
            // The chosen title is fresh — fall into the plain insert
            // path (which still calls into the upsert SQL, but the
            // upsert branch is unreachable for a fresh title).
            insert(conn, &versioned)
        }
    }
}

pub fn get(conn: &Connection, id: &str) -> Result<Option<Memory>> {
    let mut stmt = conn.prepare_cached(SQL_SELECT_MEMORY_ROW_BY_ID)?;
    let mut rows = stmt.query_map(params![id], row_to_memory)?;
    match rows.next() {
        Some(Ok(m)) => Ok(Some(m)),
        Some(Err(e)) => Err(e.into()),
        None => Ok(None),
    }
}

/// Batch-fetch memories by ID. Mirrors [`get`] but issues a single
/// `WHERE id IN (?, ?, ...)` SELECT instead of N per-id round-trips.
///
/// v0.7.0 #981 — used by the HNSW [`semantic_phase`] recall branch
/// where ANN-hit batches of 50–250 IDs need to materialise as
/// `Memory` rows; the per-id `get` loop was 5–10× slower on a warm
/// cache and extended the DB-mutex hold (which compounds the
/// single-connection serialization the daemon ships with on sqlite).
///
/// Returns a `HashMap<String, Memory>` keyed by id so the caller can
/// re-apply the original hit ordering via the HNSW hit list.
///
/// Chunks ids into batches of 500 to stay well under SQLite's default
/// `SQLITE_LIMIT_VARIABLE_NUMBER = 999` regardless of how the operator
/// has compiled their sqlite (Debian ships 999, Alpine ships 250000;
/// 500 is a safe middle ground that also keeps the prepared-statement
/// plan reusable across calls).
///
/// Empty `ids` short-circuits to an empty map without touching the
/// connection. Missing rows are silently skipped — the caller can
/// observe via `fetched.get(&id).is_none()` and fall through to
/// whatever default the original per-id path would have produced.
pub fn get_many(conn: &Connection, ids: &[String]) -> Result<HashMap<String, Memory>> {
    let mut out: HashMap<String, Memory> = HashMap::with_capacity(ids.len());
    if ids.is_empty() {
        return Ok(out);
    }
    const CHUNK: usize = 500;
    for chunk in ids.chunks(CHUNK) {
        let placeholders = std::iter::repeat("?")
            .take(chunk.len())
            .collect::<Vec<_>>()
            .join(",");
        let sql = format!("SELECT * FROM memories WHERE id IN ({placeholders})");
        let mut stmt = conn.prepare(&sql)?;
        let rows = stmt.query_map(rusqlite::params_from_iter(chunk.iter()), row_to_memory)?;
        for r in rows {
            let mem = r?;
            out.insert(mem.id.clone(), mem);
        }
    }
    Ok(out)
}

/// Look up a memory by ID prefix. Returns the memory if exactly one match is found.
/// Returns `Ok(None)` if no matches. Returns an error if the prefix is ambiguous (>1 match).
pub fn get_by_prefix(conn: &Connection, prefix: &str) -> Result<Option<Memory>> {
    // Escape SQL LIKE wildcards in the prefix to prevent % and _ from matching broadly
    let escaped = prefix.replace('%', "\\%").replace('_', "\\_");
    let pattern = format!("{escaped}%");
    let mut stmt = conn.prepare("SELECT * FROM memories WHERE id LIKE ?1 ESCAPE '\\'")?;
    let rows: Vec<Memory> = stmt
        .query_map(params![pattern], row_to_memory)?
        .filter_map(Result::ok)
        .collect();
    match rows.len() {
        0 => Ok(None),
        1 => Ok(Some(rows.into_iter().next().expect("len checked"))),
        _ => {
            let ids: Vec<String> = rows.iter().map(|m| m.id.clone()).collect();
            // #962 — typed envelope; handler downcasts via
            // `MemoryError::from(anyhow::Error)` to map to 400 BAD_REQUEST.
            // The match-count is preserved in `candidates.len()` so the
            // Display format ("ambiguous ID prefix 'X': N matches\n…")
            // stays byte-identical to the legacy bail!() string.
            Err(anyhow::Error::new(StorageError::AmbiguousIdPrefix {
                prefix: prefix.to_string(),
                candidates: ids,
            }))
        }
    }
}

/// Resolve an ID that may be a prefix. Tries exact match first, then prefix match.
pub fn resolve_id(conn: &Connection, id: &str) -> Result<Option<Memory>> {
    if let Some(mem) = get(conn, id)? {
        return Ok(Some(mem));
    }
    get_by_prefix(conn, id)
}

/// Bump access count, extend TTL, auto-promote — atomic via transaction.
pub fn touch(conn: &Connection, id: &str, short_extend: i64, mid_extend: i64) -> Result<()> {
    let now = Utc::now();
    let now_str = now.to_rfc3339();
    let short_expires = (now + chrono::Duration::seconds(short_extend)).to_rfc3339();
    let mid_expires = (now + chrono::Duration::seconds(mid_extend)).to_rfc3339();

    conn.execute_batch(connection::SQL_BEGIN_IMMEDIATE)?;

    let result = (|| -> Result<()> {
        // #1596 — the per-access TTL window is an extension FLOOR, not a
        // replacement. `MAX(expires_at, ?N)` keeps whichever expiry is
        // later, so a fresh mid-tier row carrying its create-time +7d
        // backstop is no longer pulled IN to now+1d on first recall
        // (lived evidence: row 4c7e7cc1 went 2026-06-18 → 2026-06-12).
        // Both operands are UTC RFC3339 strings, so SQLite's scalar
        // MAX() lexicographic comparison is chronological. Long-tier
        // (NULL expiry) rows stay NULL via the first CASE arm.
        conn.execute(
            "UPDATE memories SET
                access_count = MIN(access_count + 1, 1000000),
                last_accessed_at = ?1,
                expires_at = CASE
                    WHEN tier = 'long' THEN expires_at
                    WHEN tier = 'short' AND expires_at IS NOT NULL THEN MAX(expires_at, ?2)
                    WHEN tier = 'mid' AND expires_at IS NOT NULL THEN MAX(expires_at, ?3)
                    ELSE expires_at
                END
             WHERE id = ?4",
            params![now_str, short_expires, mid_expires, id],
        )?;

        conn.execute(
            "UPDATE memories SET tier = 'long', expires_at = NULL, updated_at = ?1
             WHERE id = ?2 AND tier = 'mid' AND access_count >= ?3",
            params![now_str, id, PROMOTION_THRESHOLD],
        )?;

        conn.execute(
            "UPDATE memories SET priority = MIN(priority + 1, 10)
             WHERE id = ?1 AND access_count > 0 AND access_count % 10 = 0 AND priority < 10",
            params![id],
        )?;

        Ok(())
    })();

    match result {
        Ok(()) => {
            conn.execute_batch(connection::SQL_COMMIT)?;
            Ok(())
        }
        Err(e) => {
            if let Err(rb) = conn.execute_batch(connection::SQL_ROLLBACK) {
                tracing::error!("ROLLBACK failed in touch: {}", rb);
            }
            Err(e)
        }
    }
}

/// Cluster-F PERF-6 — batched touch.
///
/// Equivalent to invoking [`touch`] K times in sequence, but
/// collapses the per-row `BEGIN IMMEDIATE` … `COMMIT` cycle into a
/// SINGLE outer transaction so a K-row recall pays the SQLite
/// write-lock + commit cost ONCE instead of K times. The three
/// per-row UPDATE statements still run (same semantics: access bump
/// + TTL extend, mid→long promotion at `PROMOTION_THRESHOLD`,
/// priority+1 every 10 accesses); only the transaction framing
/// changes.
///
/// A failure mid-batch rolls back the entire transaction (no partial
/// touches survive) and surfaces a single error to the caller — which
/// matches the existing behaviour where any failed touch surfaces
/// to the recall log path.
///
/// Returns the number of rows successfully touched (always equal to
/// `ids.len()` on success).
pub fn touch_many(
    conn: &Connection,
    ids: &[&str],
    short_extend: i64,
    mid_extend: i64,
) -> Result<usize> {
    if ids.is_empty() {
        return Ok(0);
    }
    let now = Utc::now();
    let now_str = now.to_rfc3339();
    let short_expires = (now + chrono::Duration::seconds(short_extend)).to_rfc3339();
    let mid_expires = (now + chrono::Duration::seconds(mid_extend)).to_rfc3339();

    conn.execute_batch(connection::SQL_BEGIN_IMMEDIATE)?;

    let result = (|| -> Result<()> {
        // Cache the three prepared statements once for the whole
        // batch; each `execute` reuses the cached query plan instead
        // of re-parsing per row.
        // #1596 — extension-floor semantics, mirroring [`touch`]: the
        // per-access window only ever EXTENDS expiry (MAX over the
        // existing column), never shortens it. One batched UPDATE per
        // row is preserved.
        let mut bump_stmt = conn.prepare_cached(
            "UPDATE memories SET
                access_count = MIN(access_count + 1, 1000000),
                last_accessed_at = ?1,
                expires_at = CASE
                    WHEN tier = 'long' THEN expires_at
                    WHEN tier = 'short' AND expires_at IS NOT NULL THEN MAX(expires_at, ?2)
                    WHEN tier = 'mid' AND expires_at IS NOT NULL THEN MAX(expires_at, ?3)
                    ELSE expires_at
                END
             WHERE id = ?4",
        )?;
        let mut promote_stmt = conn.prepare_cached(
            "UPDATE memories SET tier = 'long', expires_at = NULL, updated_at = ?1
             WHERE id = ?2 AND tier = 'mid' AND access_count >= ?3",
        )?;
        let mut priority_stmt = conn.prepare_cached(
            "UPDATE memories SET priority = MIN(priority + 1, 10)
             WHERE id = ?1 AND access_count > 0 AND access_count % 10 = 0 AND priority < 10",
        )?;
        for id in ids {
            bump_stmt.execute(params![now_str, short_expires, mid_expires, id])?;
            promote_stmt.execute(params![now_str, id, PROMOTION_THRESHOLD])?;
            priority_stmt.execute(params![id])?;
        }
        Ok(())
    })();

    match result {
        Ok(()) => {
            conn.execute_batch(connection::SQL_COMMIT)?;
            Ok(ids.len())
        }
        Err(e) => {
            if let Err(rb) = conn.execute_batch(connection::SQL_ROLLBACK) {
                tracing::error!("ROLLBACK failed in touch_many: {}", rb);
            }
            Err(e)
        }
    }
}

#[allow(clippy::too_many_arguments)]
/// Update a memory by ID. Returns (found, `content_changed`) so callers can
/// re-generate embeddings when the searchable text has changed.
/// v0.7.0 Provenance Gap 1 (issue #884) — typed optimistic-concurrency
/// error returned by [`update_with_expected_version`] when the caller
/// passed `expected_version` and the stored row's current `version`
/// has drifted. Carries both expected + current so the caller can
/// surface a useful diagnostic and choose between re-read+re-apply
/// or bubbling CONFLICT upstream.
#[derive(Debug, Clone)]
pub struct VersionConflict {
    pub id: String,
    pub expected: i64,
    pub current: i64,
}

impl std::fmt::Display for VersionConflict {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "CONFLICT: memory {} expected_version={} but stored version={}",
            self.id, self.expected, self.current
        )
    }
}

impl std::error::Error for VersionConflict {}

#[allow(clippy::too_many_arguments)]
pub fn update(
    conn: &Connection,
    id: &str,
    title: Option<&str>,
    content: Option<&str>,
    tier: Option<&Tier>,
    namespace: Option<&str>,
    tags: Option<&Vec<String>>,
    priority: Option<i32>,
    confidence: Option<f64>,
    expires_at: Option<&str>,
    metadata: Option<&serde_json::Value>,
) -> Result<(bool, bool)> {
    update_with_expected_version(
        conn, id, title, content, tier, namespace, tags, priority, confidence, expires_at,
        metadata, None, None,
    )
}

/// v0.7.0 Provenance Gap 1 (issue #884) — optimistic-concurrency aware
/// variant of [`update`]. When `expected_version` is `Some(v)`, the
/// update fails with a typed [`VersionConflict`] error if the stored
/// row's `version` is not equal to `v`. When `None`, the legacy
/// last-write-wins behaviour is preserved (still bumps `version` on
/// success). On a successful mutation the row's `version` is
/// monotonically incremented; the new value is observable on the
/// subsequent read.
///
/// # Errors
///
/// * [`VersionConflict`] — when `expected_version` is `Some` and the
///   stored value has drifted.
/// * Other rusqlite errors bubble up from the prepare/execute pair.
#[allow(clippy::too_many_arguments, clippy::too_many_lines)]
pub fn update_with_expected_version(
    conn: &Connection,
    id: &str,
    title: Option<&str>,
    content: Option<&str>,
    tier: Option<&Tier>,
    namespace: Option<&str>,
    tags: Option<&Vec<String>>,
    priority: Option<i32>,
    confidence: Option<f64>,
    expires_at: Option<&str>,
    metadata: Option<&serde_json::Value>,
    source_uri: Option<&str>,
    expected_version: Option<i64>,
) -> Result<(bool, bool)> {
    let mut stmt = conn.prepare_cached(SQL_SELECT_MEMORY_ROW_BY_ID)?;
    let mut rows = stmt.query_map(params![id], row_to_memory)?;
    let Some(Ok(existing)) = rows.next() else {
        return Ok((false, false));
    };
    drop(rows);
    drop(stmt);

    // v0.7.0 Provenance Gap 1 (#884) — pre-check optimistic gate.
    // The same predicate is also asserted atomically inside the
    // UPDATE statement below so a racing writer that slipped in
    // between the SELECT and the UPDATE still fails CONFLICT.
    if let Some(expected) = expected_version
        && existing.version != expected
    {
        return Err(VersionConflict {
            id: existing.id.clone(),
            expected,
            current: existing.version,
        }
        .into());
    }

    let new_title = title.unwrap_or(&existing.title);
    let new_content = content.unwrap_or(&existing.content);
    let content_changed = new_title != existing.title || new_content != existing.content;

    // Tier downgrade protection: never downgrade, consistent with insert path.
    let effective_tier = match (tier, &existing.tier) {
        (Some(requested), existing_tier) => match (existing_tier, requested) {
            (Tier::Long, _) => &Tier::Long,         // long never downgrades
            (Tier::Mid, Tier::Short) => &Tier::Mid, // mid never downgrades to short
            (_, requested) => requested,            // upgrades and same-tier are fine
        },
        (None, existing_tier) => existing_tier,
    };

    let namespace = namespace.unwrap_or(&existing.namespace);
    let tags = tags.unwrap_or(&existing.tags);
    let priority = priority.unwrap_or(existing.priority);
    let confidence = confidence.unwrap_or(existing.confidence);
    // Treat empty string as None (clear expiry) — don't store "" in the DB
    let expires_at = match expires_at {
        Some("" | "null") => None,
        Some(v) => Some(v),
        None => existing.expires_at.as_deref(),
    };
    let metadata = metadata.unwrap_or(&existing.metadata);

    // #1451 (SEC, HIGH) — substrate governance pre-write gate on the
    // optimistic-update path. The insert/supersede/consolidate/restore
    // paths all consult GOVERNANCE_PRE_WRITE; update was the lone gap,
    // so a refuse rule could be evaded by storing benign content then
    // updating it into the refused namespace/tier/title. Build the
    // post-merge row and consult BEFORE any SQL touches the DB; a
    // refusal returns the typed GovernanceRefusal with no row mutated.
    let governed = Memory {
        tier: effective_tier.clone(),
        namespace: namespace.to_string(),
        title: new_title.to_string(),
        content: new_content.to_string(),
        tags: tags.clone(),
        priority,
        confidence,
        expires_at: expires_at.map(str::to_string),
        metadata: metadata.clone(),
        source_uri: source_uri
            .map(str::to_string)
            .or_else(|| existing.source_uri.clone()),
        ..existing.clone()
    };
    consult_governance_pre_write(&governed)?;

    let tags_json = serde_json::to_string(tags)?;
    let metadata_json = serde_json::to_string(metadata)?;
    let now = Utc::now().to_rfc3339();

    // Ultrareview #354: rely on the UNIQUE INDEX on (title, namespace)
    // to enforce collision atomically at the DB layer. The previous
    // check-then-update sequence had a race — another transaction
    // could insert a colliding row between the SELECT and the UPDATE,
    // and the UPDATE would surface as a generic SQLite constraint
    // error to the caller. Now the collision check is inline: the
    // UPDATE fails with a well-scoped UniqueViolation, and we re-
    // query the colliding row's id only on that specific error for
    // the friendly message.
    //
    // v0.7.0 Provenance Gap 1 (#884) — UPDATE re-asserts
    // `expected_version` atomically and bumps `version + 1` on
    // success so a racing caller that read the SAME expected_version
    // sees a CONFLICT (their WHERE clause no longer matches the
    // bumped value). When `expected_version` is NULL the
    // `?12 IS NULL` predicate short-circuits the gate.
    // v0.7.0 Provenance Gap 2 (#906) — `source_uri` is an opt-in patch
    // field. When `None`, the COALESCE keeps the stored value (a
    // patch that doesn't touch source_uri must NOT blank it out).
    // When `Some(uri)`, the row's source_uri is rewritten verbatim
    // (rename / scheme migration / bad-data correction).
    let update_res = conn.execute(
        "UPDATE memories SET tier=?1, namespace=?2, title=?3, content=?4, tags=?5, priority=?6, confidence=?7, updated_at=?8, expires_at=?9, metadata=?10, source_uri = COALESCE(?11, source_uri), version = version + 1
         WHERE id=?12 AND (?13 IS NULL OR version = ?13)",
        params![effective_tier.as_str(), namespace, new_title, new_content, tags_json, priority, confidence, now, expires_at, metadata_json, source_uri, id, expected_version],
    );
    match update_res {
        Ok(0) => {
            // Either the row vanished between SELECT and UPDATE, or
            // the version drifted (racing writer slipped in). When
            // expected_version was supplied, re-read so the CONFLICT
            // envelope carries the current stored value.
            if let Some(expected) = expected_version {
                let current_version: Option<i64> = conn
                    .query_row(
                        "SELECT version FROM memories WHERE id = ?1",
                        params![id],
                        |r| r.get(0),
                    )
                    .ok();
                if let Some(current) = current_version {
                    return Err(VersionConflict {
                        id: id.to_string(),
                        expected,
                        current,
                    }
                    .into());
                }
            }
            Ok((false, false))
        }
        Ok(_) => Ok((true, content_changed)),
        Err(rusqlite::Error::SqliteFailure(err, _))
            if err.code == rusqlite::ErrorCode::ConstraintViolation =>
        {
            let other: Option<String> = conn
                .query_row(
                    "SELECT id FROM memories WHERE title = ?1 AND namespace = ?2 AND id != ?3",
                    params![new_title, namespace, id],
                    |r| r.get(0),
                )
                .ok();
            if let Some(other_id) = other {
                // #962 typed envelope — UniqueConflict surfaces as
                // `MemoryError::Conflict` (HTTP 409).
                return Err(anyhow::Error::new(StorageError::UniqueConflict {
                    reason: format!(
                        "title '{new_title}' already exists in namespace '{namespace}' (memory {other_id})"
                    ),
                }));
            }
            Err(anyhow::anyhow!("update failed with constraint violation"))
        }
        Err(e) => Err(e.into()),
    }
}

/// v0.7.0 Provenance Gap 5 (issue #888) — append-and-archive result
/// returned by [`update_with_archive_on_supersede`].
///
/// * `archived_id` is the OLD memory's id (now in
///   `archived_memories` with `archive_reason='superseded'`).
/// * `new_id` is the freshly-minted row carrying the patched
///   content. The supersede lineage is encoded via TWO mechanisms
///   (NOT three): (1) `archived_memories.archive_reason='superseded'`
///   on the OLD row, (2) `new_memory.metadata.superseded_id` forward
///   pointer on the NEW row. A `memory_links` `supersedes` edge is
///   NOT written because the FK `target_id REFERENCES memories(id)`
///   would reject it (the archived row no longer lives in the live
///   `memories` table). See #895 for the future archive-cross-ref
///   path that would unblock a uniform link surface.
#[derive(Debug, Clone)]
pub struct SupersedeResult {
    pub archived_id: String,
    pub new_id: String,
}

/// v0.7.0 Provenance Gap 5 (issue #888) — append-and-archive write
/// path. Used by the MCP `memory_update` tool when the caller passes
/// `edit_source` of `llm` or `hook`. Atomic: every step runs inside
/// a `BEGIN IMMEDIATE` / `COMMIT` pair so a failure mid-way leaves
/// the old row live (no partial supersede).
///
/// Sequence (mirrors mem9's split-write-path pattern):
///
/// 1. Honor the optimistic-concurrency gate (`expected_version`)
///    against the OLD row. Conflict surfaces as
///    [`VersionConflict`] before any mutation lands.
/// 2. Archive the OLD row with `archive_reason='superseded'` and a
///    `superseded_at` timestamp in the archive metadata so a
///    rewind via `memory_archive_list` can find it.
/// 3. Insert a NEW memory row carrying the patched fields. The new
///    row's `(title, namespace)` may collide with the archived
///    row's (since the archive is in a separate table); the new
///    row's `id` is fresh.
/// 4. Stamp the supersede pointer in the new row's
///    `metadata.superseded_id`. A `memory_links` `supersedes` row
///    is intentionally NOT written — the FK target would point at
///    the archived id which has left the live `memories` table.
///    See impl comment + #895 for the archive-cross-ref follow-on.
///
/// # Errors
///
/// * [`VersionConflict`] — when `expected_version` is `Some` and
///   the stored row's `version` has drifted.
/// * rusqlite / serde errors bubble up from the underlying
///   archive + insert + link writes.
#[allow(clippy::too_many_arguments, clippy::too_many_lines)]
pub fn update_with_archive_on_supersede(
    conn: &Connection,
    id: &str,
    title: Option<&str>,
    content: Option<&str>,
    tier: Option<&Tier>,
    namespace: Option<&str>,
    tags: Option<&Vec<String>>,
    priority: Option<i32>,
    confidence: Option<f64>,
    expires_at: Option<&str>,
    metadata: Option<&serde_json::Value>,
    source_uri: Option<&str>,
    expected_version: Option<i64>,
    edit_source: crate::models::EditSource,
) -> Result<SupersedeResult> {
    // Read the existing row so we can compose the patched NEW row.
    let mut stmt = conn.prepare_cached(SQL_SELECT_MEMORY_ROW_BY_ID)?;
    let mut rows = stmt.query_map(params![id], row_to_memory)?;
    let Some(Ok(existing)) = rows.next() else {
        // #962 typed envelope — 404 NOT_FOUND through MemoryError mapping.
        return Err(anyhow::Error::new(StorageError::MemoryNotFound {
            id: id.to_string(),
            role: None,
        }));
    };
    drop(rows);
    drop(stmt);

    // v0.7.0 Provenance Gap 1 (#884) — optimistic-concurrency gate.
    if let Some(expected) = expected_version
        && existing.version != expected
    {
        return Err(VersionConflict {
            id: existing.id.clone(),
            expected,
            current: existing.version,
        }
        .into());
    }

    // Compose the NEW memory row by overlaying the patch on the
    // OLD row. Mirrors the in-place `update` patch semantics:
    // unspecified fields inherit from the existing row.
    let new_id = uuid::Uuid::new_v4().to_string();
    let now = Utc::now().to_rfc3339();
    let new_title = title.unwrap_or(&existing.title).to_string();
    let new_content = content.unwrap_or(&existing.content).to_string();
    // Tier monotonicity preserved (long ≥ mid ≥ short).
    let new_tier = match (tier, &existing.tier) {
        (Some(requested), existing_tier) => match (existing_tier, requested) {
            (Tier::Long, _) => Tier::Long,
            (Tier::Mid, Tier::Short) => Tier::Mid,
            (_, r) => r.clone(),
        },
        (None, existing_tier) => existing_tier.clone(),
    };
    let new_namespace = namespace.unwrap_or(&existing.namespace).to_string();
    let new_tags = tags.cloned().unwrap_or_else(|| existing.tags.clone());
    let new_priority = priority.unwrap_or(existing.priority);
    let new_confidence = confidence.unwrap_or(existing.confidence);
    let new_expires = match expires_at {
        Some("" | "null") => None,
        Some(v) => Some(v.to_string()),
        None => existing.expires_at.clone(),
    };
    // v0.7.0 Provenance Gap 2 (#906) — caller-supplied source_uri
    // wins; otherwise inherit from the OLD row. Mirrors the pattern
    // used for title/content/tier above.
    let new_source_uri = match source_uri {
        Some(uri) => Some(uri.to_string()),
        None => existing.source_uri.clone(),
    };
    // Stamp the edit-source provenance into the new row's metadata so
    // downstream observers can tell this row came from an
    // append-and-archive supersede vs. a direct user write.
    let mut new_metadata = metadata
        .cloned()
        .unwrap_or_else(|| existing.metadata.clone());
    if let serde_json::Value::Object(ref mut m) = new_metadata {
        m.insert(
            "edit_source".to_string(),
            serde_json::Value::String(edit_source.as_str().to_string()),
        );
        m.insert(
            field_names::SUPERSEDED_ID.to_string(),
            serde_json::Value::String(existing.id.clone()),
        );
    }

    // #1638 — archive + insert run inside ONE BEGIN IMMEDIATE (below),
    // honoring the documented atomicity contract: a failure mid-way
    // (SQLITE_BUSY from a concurrent CLI-process writer, ENOSPC, FTS
    // trigger I/O error on the insert) rolls back the archive too, so
    // the OLD row stays live instead of vanishing into the archive
    // with an error returned. Uses `archive_memory_no_tx` (the
    // `append_signed_event_no_tx` idiom) because SQLite refuses
    // nested transactions.
    let archived_id = existing.id.clone();

    // FX-C5 — compose the NEW row up front so the substrate
    // pre-write governance hook (`GOVERNANCE_PRE_WRITE`) gets a
    // chance to refuse BEFORE the archive step destroys the live
    // OLD row. Pre-FX-C5 the hook was consulted transitively via
    // `insert(..)` at the tail of this function; archive ran first
    // so a refusal left the live table without the OLD row AND
    // without the patched NEW row. Now the hook fires on a fully-
    // composed candidate before any state mutation, mirroring the
    // FX-2 pattern on the postgres adapter (see
    // `consult_governance_pre_write_pg` in `src/store/postgres.rs`).
    let mut new_mem = existing.clone();
    new_mem.id = new_id.clone();
    new_mem.title = new_title;
    new_mem.content = new_content;
    new_mem.tier = new_tier;
    new_mem.namespace = new_namespace;
    new_mem.tags = new_tags;
    new_mem.priority = new_priority;
    new_mem.confidence = new_confidence;
    new_mem.expires_at = new_expires;
    new_mem.metadata = new_metadata;
    new_mem.source_uri = new_source_uri;
    new_mem.created_at = now.clone();
    new_mem.updated_at = now.clone();
    new_mem.access_count = 0;
    new_mem.last_accessed_at = None;
    // The NEW row starts at version=1 — it is a fresh row, not a
    // continuation of the OLD row's version chain (the chain is
    // preserved via the supersede link stamped in metadata).
    new_mem.version = crate::models::default_memory_version();

    // FX-C5 — consult the substrate governance pre-write hook on
    // the composed NEW row BEFORE archiving the OLD row. A refusal
    // returns cleanly with no state change.
    consult_governance_pre_write(&new_mem)?;

    // Steps 1+2 (#1638): one transaction around archive + insert.
    conn.execute_batch(connection::SQL_BEGIN_IMMEDIATE)?;
    let tx_result = (|| -> Result<()> {
        // Step 1: archive the OLD row with reason='superseded'.
        let moved = archive_memory_no_tx(conn, &archived_id, Some("superseded"))?;
        if !moved {
            // #962 typed envelope — substrate-internal fault (DB row
            // vanished between read and write or row count drifted).
            // Maps to 500.
            return Err(anyhow::Error::new(StorageError::ArchiveSupersedeFailed {
                archived_id: archived_id.clone(),
            }));
        }
        // Step 2: insert the NEW row carrying the patched content.
        insert(conn, &new_mem)?;
        Ok(())
    })();
    match tx_result {
        Ok(()) => conn.execute_batch(connection::SQL_COMMIT)?,
        Err(e) => {
            let _ = conn.execute_batch(connection::SQL_ROLLBACK);
            return Err(e);
        }
    }

    // Step 3: the supersede edge from new→archived id is preserved
    // in the new row's `metadata.superseded_id` (see above). A
    // proper `memory_links` row would trip the FK CHECK on
    // `target_id REFERENCES memories(id)` because the OLD row no
    // longer lives in `memories`; the metadata pointer is the
    // substrate-clean way to record the lineage until archive
    // cross-references land (tracked separately).
    Ok(SupersedeResult {
        archived_id,
        new_id,
    })
}

pub fn delete(conn: &Connection, id: &str) -> Result<bool> {
    // Clean up namespace_meta if this memory was a namespace standard
    conn.execute(SQL_DELETE_NAMESPACE_META_BY_STANDARD_ID, params![id])?;
    let changed = conn.execute(SQL_DELETE_MEMORY_BY_ID, params![id])?;
    Ok(changed > 0)
}

/// Move a memory from `memories` to `archived_memories`. Used by the
/// HTTP `/api/v1/archive` explicit-archive endpoint (S29) and by
/// `sync_push` when a peer pushes an `archives: [id]` record.
///
/// Unlike `gc(archive=true)` this does not filter on `expires_at` — the
/// caller is explicitly asking for the row to be archived right now.
///
/// Returns `true` if a row was moved, `false` if no live memory existed
/// with this id (e.g. it was already archived or never written locally).
/// A missing-on-peer id is expected during normal fanout and callers
/// treat it as a no-op.
///
/// # Errors
///
/// Returns an error if the INSERT-SELECT or DELETE fails.
pub fn archive_memory(conn: &Connection, id: &str, reason: Option<&str>) -> Result<bool> {
    conn.execute_batch(connection::SQL_BEGIN_IMMEDIATE)?;
    let result = archive_memory_no_tx(conn, id, reason);
    match result {
        Ok(moved) => {
            conn.execute_batch(connection::SQL_COMMIT)?;
            Ok(moved)
        }
        Err(e) => {
            let _ = conn.execute_batch(connection::SQL_ROLLBACK);
            Err(e)
        }
    }
}

/// #1638 — transaction-free core of [`archive_memory`], for callers
/// that already hold an open transaction (the supersede path wraps
/// archive + insert in ONE `BEGIN IMMEDIATE` so a mid-failure leaves
/// the OLD row live, per the function's documented atomicity
/// contract). Same idiom as `append_signed_event_no_tx`.
pub(crate) fn archive_memory_no_tx(
    conn: &Connection,
    id: &str,
    reason: Option<&str>,
) -> Result<bool> {
    let now = Utc::now().to_rfc3339();
    let reason = reason.unwrap_or("archive");
    let result = (|| -> Result<bool> {
        let exists: bool = conn
            .query_row(SQL_MEMORY_EXISTS_COUNT, params![id], |r| r.get(0))
            .unwrap_or(false);
        if !exists {
            return Ok(false);
        }
        // v0.6.3.1 P2 (G5) — copy embedding + embedding_dim into the archive
        // and capture original tier + expires_at so restore_archived can
        // round-trip the row instead of resetting to long/permanent.
        conn.execute(
            "INSERT OR REPLACE INTO archived_memories
             (id, tier, namespace, title, content, tags, priority, confidence,
              source, access_count, created_at, updated_at, last_accessed_at,
              expires_at, archived_at, archive_reason, metadata,
              embedding, embedding_dim, original_tier, original_expires_at,
              reflection_depth, atomised_into, atom_of, memory_kind,
              entity_id, persona_version, citations, source_uri, source_span,
              confidence_source, confidence_signals, confidence_decayed_at,
              mentioned_entity_id, version)
             SELECT id, tier, namespace, title, content, tags, priority, confidence,
                    source, access_count, created_at, updated_at, last_accessed_at,
                    expires_at, ?1, ?2, metadata,
                    embedding, embedding_dim, tier, expires_at,
                    reflection_depth, atomised_into, atom_of, memory_kind,
                    entity_id, persona_version, citations, source_uri, source_span,
                    confidence_source, confidence_signals, confidence_decayed_at,
                    mentioned_entity_id, version
             FROM memories WHERE id = ?3",
            params![now, reason, id],
        )?;
        // Clean up namespace_meta — mirrors `delete`'s cleanup so an archived
        // row is not still referenced as the namespace standard.
        conn.execute(SQL_DELETE_NAMESPACE_META_BY_STANDARD_ID, params![id])?;
        let removed = conn.execute(SQL_DELETE_MEMORY_BY_ID, params![id])?;
        Ok(removed > 0)
    })();
    result
}

/// #940 (security-high, 2026-05-20) — caller-scoped archive variant.
/// Mirrors [`archive_memory`] but constrains the soft-move to rows
/// in the live `memories` table whose `metadata->'agent_id'` JSON
/// field matches `caller` (with the inbox-target carve-out:
/// `metadata->'target_agent_id' == caller` is also archivable by
/// the inbox owner, matching
/// [`crate::store::is_visible_to_caller`]).
///
/// Pre-#940 the HTTP handler at
/// `src/handlers/archive.rs::archive_by_ids` (sqlite branch) called
/// the owner-blind [`archive_memory`] directly; any authenticated
/// HTTP caller could bulk-archive any other owner's live rows
/// (cross-tenant denial-of-service primitive). The postgres SAL
/// branch was already QC-P1-fixed (2026-05-20) to pass
/// `CallerContext::for_agent(caller)`; the sqlite branch is closed
/// by this helper. Returns `Ok(false)` on a non-owner attempt so
/// the surface cannot be used to probe other owners' live ids.
///
/// # Errors
///
/// Returns an error if the INSERT-SELECT or DELETE fails.
pub fn archive_memory_for_caller(
    conn: &Connection,
    id: &str,
    reason: Option<&str>,
    caller: &str,
) -> Result<bool> {
    let now = Utc::now().to_rfc3339();
    let reason = reason.unwrap_or("archive");
    conn.execute_batch(connection::SQL_BEGIN_IMMEDIATE)?;
    let result = (|| -> Result<bool> {
        // Owner gate: row must exist AND match the caller (or be an
        // inbox-target row whose recipient is the caller).
        let owned: bool = conn
            .query_row(
                "SELECT COUNT(*) > 0 FROM memories \
                 WHERE id = ?1 \
                   AND ( \
                     json_extract(metadata, '$.agent_id') = ?2 OR \
                     json_extract(metadata, '$.target_agent_id') = ?2 OR \
                     json_extract(metadata, '$.agent_id') IS NULL OR \
                     json_extract(metadata, '$.agent_id') = '' \
                   )",
                params![id, caller],
                |r| r.get(0),
            )
            .unwrap_or(false);
        if !owned {
            return Ok(false);
        }
        conn.execute(
            "INSERT OR REPLACE INTO archived_memories
             (id, tier, namespace, title, content, tags, priority, confidence,
              source, access_count, created_at, updated_at, last_accessed_at,
              expires_at, archived_at, archive_reason, metadata,
              embedding, embedding_dim, original_tier, original_expires_at,
              reflection_depth, atomised_into, atom_of, memory_kind,
              entity_id, persona_version, citations, source_uri, source_span,
              confidence_source, confidence_signals, confidence_decayed_at,
              mentioned_entity_id, version)
             SELECT id, tier, namespace, title, content, tags, priority, confidence,
                    source, access_count, created_at, updated_at, last_accessed_at,
                    expires_at, ?1, ?2, metadata,
                    embedding, embedding_dim, tier, expires_at,
                    reflection_depth, atomised_into, atom_of, memory_kind,
                    entity_id, persona_version, citations, source_uri, source_span,
                    confidence_source, confidence_signals, confidence_decayed_at,
                    mentioned_entity_id, version
             FROM memories WHERE id = ?3",
            params![now, reason, id],
        )?;
        // Clean up namespace_meta — mirrors `delete`'s cleanup so an archived
        // row is not still referenced as the namespace standard.
        conn.execute(SQL_DELETE_NAMESPACE_META_BY_STANDARD_ID, params![id])?;
        let removed = conn.execute(SQL_DELETE_MEMORY_BY_ID, params![id])?;
        Ok(removed > 0)
    })();
    match result {
        Ok(moved) => {
            conn.execute_batch(connection::SQL_COMMIT)?;
            Ok(moved)
        }
        Err(e) => {
            let _ = conn.execute_batch(connection::SQL_ROLLBACK);
            Err(e)
        }
    }
}

/// #1601 — build the FTS5 query for the DESTRUCTIVE forget paths.
///
/// `forget` / `forget_count` historically routed the caller's pattern
/// through `sanitize_fts_query(pat, /* use_or = */ true)` — the fuzzy
/// OR join the recall path uses for high RANKED retrieval. For a bulk
/// DELETE that over-matches catastrophically: pattern "D6 scratch"
/// matched (and would delete) every row containing EITHER token, and
/// "D6 nonexistentzzzword" still matched rows containing just "D6".
/// Destructive matching must be conservative: every
/// whitespace-separated token must match (FTS5 implicit AND — the
/// sanitized phrase-quoted tokens are space-joined). All three forget
/// sites (`forget_count`, the `forget` delete arm, and the
/// archive-before-delete arm) route through this single builder so
/// their match sets can never drift apart.
fn forget_fts_query(pat: &str) -> String {
    sanitize_fts_query(pat, false)
}

/// Count memories that would be deleted by forget (for `dry_run`).
pub fn forget_count(
    conn: &Connection,
    namespace: Option<&str>,
    pattern: Option<&str>,
    tier: Option<&Tier>,
) -> Result<usize> {
    if pattern.is_none() && namespace.is_none() && tier.is_none() {
        // #962 typed envelope — 400 BAD_REQUEST via ValidationFailed.
        return Err(anyhow::Error::new(StorageError::InvalidArgument {
            reason: crate::errors::msg::FORGET_FILTER_REQUIRED.to_string(),
        }));
    }
    if let Some(pat) = pattern {
        let fts_query = forget_fts_query(pat);
        let tier_str = tier.map(|t| t.as_str().to_string());
        let count: i64 = conn.query_row(
            "SELECT COUNT(*) FROM memories WHERE rowid IN (
                SELECT m.rowid FROM memories_fts fts
                JOIN memories m ON m.rowid = fts.rowid
                WHERE memories_fts MATCH ?1
                  AND (?2 IS NULL OR m.namespace = ?2)
                  AND (?3 IS NULL OR m.tier = ?3)
            )",
            params![fts_query, namespace, tier_str],
            |r| r.get(0),
        )?;
        return Ok(usize::try_from(count).unwrap_or(0));
    }
    let tier_str = tier.map(|t| t.as_str().to_string());
    let count: i64 = conn.query_row(
        "SELECT COUNT(*) FROM memories WHERE (?1 IS NULL OR namespace = ?1) AND (?2 IS NULL OR tier = ?2)",
        params![namespace, tier_str],
        |r| r.get(0),
    )?;
    Ok(usize::try_from(count).unwrap_or(0))
}

/// Forget by pattern — delete memories matching namespace + FTS pattern + tier.
/// If `archive` is true, archives memories before deletion.
pub fn forget(
    conn: &Connection,
    namespace: Option<&str>,
    pattern: Option<&str>,
    tier: Option<&Tier>,
    archive: bool,
) -> Result<usize> {
    if pattern.is_none() && namespace.is_none() && tier.is_none() {
        // #962 typed envelope — 400 BAD_REQUEST via ValidationFailed.
        return Err(anyhow::Error::new(StorageError::InvalidArgument {
            reason: crate::errors::msg::FORGET_FILTER_REQUIRED.to_string(),
        }));
    }

    if archive {
        // Archive matching memories before deletion
        let now = Utc::now().to_rfc3339();
        if let Some(pat) = pattern {
            let fts_query = forget_fts_query(pat);
            let tier_str = tier.map(|t| t.as_str().to_string());
            // v0.6.3.1 P2 (G5) — preserve embedding + tier + expiry on forget-archive.
            // v0.7.0 issue #861 — also project `metadata` into the
            // archive row. The pre-fix INSERT omitted both the column
            // and the SELECT expression, so the column defaulted to
            // `'{}'` and `memory_archive_list` returned an empty object
            // for every forget-archived row (silently stripping
            // `agent_id`, `imported_from_*`, and every other operator-
            // visible attribution key). Mirrors the gc + explicit-
            // archive paths that already preserve metadata.
            conn.execute(
                "INSERT OR REPLACE INTO archived_memories
                 (id, tier, namespace, title, content, tags, priority, confidence,
                  source, access_count, created_at, updated_at, last_accessed_at,
                  expires_at, archived_at, archive_reason, metadata,
                  embedding, embedding_dim, original_tier, original_expires_at,
                  reflection_depth, atomised_into, atom_of, memory_kind,
                  entity_id, persona_version, citations, source_uri, source_span,
                  confidence_source, confidence_signals, confidence_decayed_at,
                  mentioned_entity_id, version)
                 SELECT id, tier, namespace, title, content, tags, priority, confidence,
                        source, access_count, created_at, updated_at, last_accessed_at,
                        expires_at, ?4, 'forget', metadata,
                        embedding, embedding_dim, tier, expires_at,
                        reflection_depth, atomised_into, atom_of, memory_kind,
                        entity_id, persona_version, citations, source_uri, source_span,
                        confidence_source, confidence_signals, confidence_decayed_at,
                        mentioned_entity_id, version
                 FROM memories WHERE rowid IN (
                    SELECT m.rowid FROM memories_fts fts
                    JOIN memories m ON m.rowid = fts.rowid
                    WHERE memories_fts MATCH ?1
                      AND (?2 IS NULL OR m.namespace = ?2)
                      AND (?3 IS NULL OR m.tier = ?3)
                 )",
                params![fts_query, namespace, tier_str, now],
            )?;
        } else {
            let tier_str = tier.map(|t| t.as_str().to_string());
            // v0.7.0 issue #861 — same metadata-projection fix as the
            // patterned branch above. Forget without a pattern still
            // archives whole namespaces/tiers, so the same bug applied.
            conn.execute(
                "INSERT OR REPLACE INTO archived_memories
                 (id, tier, namespace, title, content, tags, priority, confidence,
                  source, access_count, created_at, updated_at, last_accessed_at,
                  expires_at, archived_at, archive_reason, metadata,
                  embedding, embedding_dim, original_tier, original_expires_at,
                  reflection_depth, atomised_into, atom_of, memory_kind,
                  entity_id, persona_version, citations, source_uri, source_span,
                  confidence_source, confidence_signals, confidence_decayed_at,
                  mentioned_entity_id, version)
                 SELECT id, tier, namespace, title, content, tags, priority, confidence,
                        source, access_count, created_at, updated_at, last_accessed_at,
                        expires_at, ?3, 'forget', metadata,
                        embedding, embedding_dim, tier, expires_at,
                        reflection_depth, atomised_into, atom_of, memory_kind,
                        entity_id, persona_version, citations, source_uri, source_span,
                        confidence_source, confidence_signals, confidence_decayed_at,
                        mentioned_entity_id, version
                 FROM memories WHERE (?1 IS NULL OR namespace = ?1) AND (?2 IS NULL OR tier = ?2)",
                params![namespace, tier_str, now],
            )?;
        }
    }

    // If pattern provided, use FTS to find matching IDs
    if let Some(pat) = pattern {
        let fts_query = forget_fts_query(pat);
        let tier_str = tier.map(|t| t.as_str().to_string());
        let deleted = conn.execute(
            "DELETE FROM memories WHERE rowid IN (
                SELECT m.rowid FROM memories_fts fts
                JOIN memories m ON m.rowid = fts.rowid
                WHERE memories_fts MATCH ?1
                  AND (?2 IS NULL OR m.namespace = ?2)
                  AND (?3 IS NULL OR m.tier = ?3)
            )",
            params![fts_query, namespace, tier_str],
        )?;
        return Ok(deleted);
    }

    let tier_str = tier.map(|t| t.as_str().to_string());
    let deleted = conn.execute(
        "DELETE FROM memories WHERE (?1 IS NULL OR namespace = ?1) AND (?2 IS NULL OR tier = ?2)",
        params![namespace, tier_str],
    )?;
    Ok(deleted)
}

/// #1602 — one row of a forget preview / deletion audit listing.
#[derive(Debug, Clone, serde::Serialize)]
pub struct ForgetMatch {
    pub id: String,
    pub title: String,
    pub namespace: String,
    pub tier: String,
}

/// #1602 — list the rows the forget filters currently match, capped
/// at `limit`.
///
/// `memory_forget {dry_run:true}` previously returned only a blind
/// `{would_delete: N}` count, so callers had no way to see WHAT a
/// destructive pattern was about to remove; the live run likewise
/// returned only a count, leaving recovery (archive restore) a
/// guessing game. This helper shares filter semantics with [`forget`]
/// / [`forget_count`] — including the #1601 AND pattern matching via
/// [`forget_fts_query`] — so the preview is exactly the set `forget`
/// would delete. Rows come back in stable `rowid` order; callers pass
/// `cap + 1` to detect truncation without a second COUNT query.
pub fn forget_matches(
    conn: &Connection,
    namespace: Option<&str>,
    pattern: Option<&str>,
    tier: Option<&Tier>,
    limit: usize,
) -> Result<Vec<ForgetMatch>> {
    if pattern.is_none() && namespace.is_none() && tier.is_none() {
        // #962 typed envelope — same refusal as `forget` / `forget_count`.
        return Err(anyhow::Error::new(StorageError::InvalidArgument {
            reason: crate::errors::msg::FORGET_FILTER_REQUIRED.to_string(),
        }));
    }
    let tier_str = tier.map(|t| t.as_str().to_string());
    let limit_i64 = i64::try_from(limit).unwrap_or(i64::MAX);
    let row_to_match = |row: &rusqlite::Row<'_>| -> rusqlite::Result<ForgetMatch> {
        Ok(ForgetMatch {
            id: row.get(0)?,
            title: row.get(1)?,
            namespace: row.get(2)?,
            tier: row.get(3)?,
        })
    };
    if let Some(pat) = pattern {
        let fts_query = forget_fts_query(pat);
        let mut stmt = conn.prepare(
            "SELECT m.id, m.title, m.namespace, m.tier
             FROM memories_fts fts
             JOIN memories m ON m.rowid = fts.rowid
             WHERE memories_fts MATCH ?1
               AND (?2 IS NULL OR m.namespace = ?2)
               AND (?3 IS NULL OR m.tier = ?3)
             ORDER BY m.rowid
             LIMIT ?4",
        )?;
        let rows = stmt
            .query_map(
                params![fts_query, namespace, tier_str, limit_i64],
                row_to_match,
            )?
            .collect::<rusqlite::Result<Vec<_>>>()?;
        return Ok(rows);
    }
    let mut stmt = conn.prepare(
        "SELECT id, title, namespace, tier FROM memories
         WHERE (?1 IS NULL OR namespace = ?1) AND (?2 IS NULL OR tier = ?2)
         ORDER BY rowid
         LIMIT ?3",
    )?;
    let rows = stmt
        .query_map(params![namespace, tier_str, limit_i64], row_to_match)?
        .collect::<rusqlite::Result<Vec<_>>>()?;
    Ok(rows)
}

/// #1579 A2 — build the sargable `list` SQL + parameter vector.
///
/// The legacy single-shape query expressed every optional filter as a
/// `(?N IS NULL OR col = ?N)` arm. SQLite cannot drive such an arm
/// through an index (the predicate is not sargable), so the P1 perf
/// audit measured the 100k-row list page at ~141 ms: the plan answered
/// the expiry guard via `idx_memories_expires` and paid a USE TEMP
/// B-TREE FOR ORDER BY over the whole table. Appending each filter
/// ONLY when the caller supplied it gives the planner bare `col = ?` /
/// `col >= ?` predicates, so it walks `idx_memories_list_order
/// (priority DESC, updated_at DESC)` — or `idx_memories_ns_list_order
/// (namespace, priority DESC, updated_at DESC)` for namespace-filtered
/// shapes — in ORDER BY order with early-stop under the LIMIT
/// (~0.06 ms on the same corpus). EXPLAIN QUERY PLAN proof is pinned
/// by `tests/issue_1579_storage_perf.rs`.
///
/// The distinct shapes repeat across calls, so `list` prepares them
/// via `prepare_cached` — at most 2^7 shapes exist and real traffic
/// concentrates on a handful.
///
/// Public as the test-facing SSOT accessor for the EXPLAIN-pinning
/// regression tests (the `current_schema_version_for_tests` precedent):
/// the tests must plan the EXACT SQL production runs, not a restated
/// copy that could drift.
#[allow(clippy::too_many_arguments)]
#[must_use]
pub fn build_list_query(
    namespace: Option<&str>,
    tier: Option<&Tier>,
    min_priority: Option<i32>,
    now: &str,
    since: Option<&str>,
    until: Option<&str>,
    tags_filter: Option<&str>,
    agent_id: Option<&str>,
    limit: usize,
    offset: usize,
) -> (String, Vec<Box<dyn rusqlite::types::ToSql>>) {
    let mut sql = String::from(SQL_LIST_BASE);
    let mut params_vec: Vec<Box<dyn rusqlite::types::ToSql>> = vec![Box::new(now.to_string())];
    if let Some(ns) = namespace {
        sql.push_str(" AND namespace = ?");
        params_vec.push(Box::new(ns.to_string()));
    }
    if let Some(t) = tier {
        sql.push_str(" AND tier = ?");
        params_vec.push(Box::new(t.as_str().to_string()));
    }
    if let Some(p) = min_priority {
        sql.push_str(" AND priority >= ?");
        params_vec.push(Box::new(p));
    }
    if let Some(s) = since {
        sql.push_str(" AND created_at >= ?");
        params_vec.push(Box::new(s.to_string()));
    }
    if let Some(u) = until {
        sql.push_str(" AND created_at <= ?");
        params_vec.push(Box::new(u.to_string()));
    }
    if let Some(tag) = tags_filter {
        sql.push_str(
            " AND EXISTS (SELECT 1 FROM json_each(memories.tags) WHERE json_each.value = ?)",
        );
        params_vec.push(Box::new(tag.to_string()));
    }
    if let Some(a) = agent_id {
        sql.push_str(" AND agent_id_idx = ?");
        params_vec.push(Box::new(a.to_string()));
    }
    sql.push_str(SQL_LIST_ORDER_LIMIT);
    params_vec.push(Box::new(limit));
    params_vec.push(Box::new(offset));
    (sql, params_vec)
}

#[allow(clippy::too_many_arguments)]
pub fn list(
    conn: &Connection,
    namespace: Option<&str>,
    tier: Option<&Tier>,
    limit: usize,
    offset: usize,
    min_priority: Option<i32>,
    since: Option<&str>,
    until: Option<&str>,
    tags_filter: Option<&str>,
    agent_id: Option<&str>,
) -> Result<Vec<Memory>> {
    let now = Utc::now().to_rfc3339();
    let (sql, params_vec) = build_list_query(
        namespace,
        tier,
        min_priority,
        &now,
        since,
        until,
        tags_filter,
        agent_id,
        limit,
        offset,
    );
    let params_refs: Vec<&dyn rusqlite::types::ToSql> =
        params_vec.iter().map(std::convert::AsRef::as_ref).collect();
    let mut stmt = conn.prepare_cached(&sql)?;
    let rows = stmt.query_map(params_refs.as_slice(), row_to_memory)?;
    rows.collect::<rusqlite::Result<Vec<_>>>()
        .map_err(Into::into)
}

/// L1-1 (v0.7.0) — return all non-expired memories that match the given
/// [`crate::models::MemoryKind`]. Used by the L2-1 curator reflection pass to
/// enumerate observation-class memories as synthesis candidates.
///
/// The query is deliberately minimal: no tier filter, no priority floor, no
/// pagination. Callers that need subsetting should post-filter the returned
/// `Vec<Memory>`. The index on `memory_kind` (added in migration v30) keeps
/// this query O(kind-count) rather than O(table-size) on production data.
#[allow(dead_code)] // consumed by L2-1 curator; not yet wired in this PR
pub(crate) fn memories_by_kind(
    conn: &Connection,
    kind: &crate::models::MemoryKind,
) -> Result<Vec<Memory>> {
    let now = Utc::now().to_rfc3339();
    let mut stmt = conn.prepare(
        "SELECT * FROM memories
         WHERE memory_kind = ?1
           AND (expires_at IS NULL OR expires_at > ?2)
         ORDER BY priority DESC, updated_at DESC",
    )?;
    let rows = stmt.query_map(params![kind.as_str(), now], row_to_memory)?;
    rows.collect::<rusqlite::Result<Vec<_>>>()
        .map_err(Into::into)
}

#[allow(clippy::too_many_arguments)]
pub fn search(
    conn: &Connection,
    query: &str,
    namespace: Option<&str>,
    tier: Option<&Tier>,
    limit: usize,
    min_priority: Option<i32>,
    since: Option<&str>,
    until: Option<&str>,
    tags_filter: Option<&str>,
    agent_id: Option<&str>,
    as_agent: Option<&str>,
    // v0.7.0 WT-1-E — when false (default), search excludes archived
    // sources whose atoms surface in their place. See
    // [`recall_with_telemetry`] for the full contract.
    include_archived: bool,
) -> Result<Vec<Memory>> {
    search_with_source_uri(
        conn,
        query,
        namespace,
        tier,
        limit,
        min_priority,
        since,
        until,
        tags_filter,
        agent_id,
        as_agent,
        include_archived,
        None,
    )
}

/// v0.7.0 Provenance Gap 6 (issue #889) — search with optional
/// reciprocal `source_uri` filter. When `source_uri` is `Some(uri)`,
/// the FTS search is post-filtered (in SQL) to memories whose
/// `source_uri` column equals the supplied value verbatim. The
/// partial `idx_memories_source_uri` index (created at v38) covers
/// the lookup, keeping it O(log N) over the URI-keyed subspace.
///
/// When `source_uri` is `None`, this delegates to the legacy
/// [`search`] path verbatim.
#[allow(clippy::too_many_arguments)]
pub fn search_with_source_uri(
    conn: &Connection,
    query: &str,
    namespace: Option<&str>,
    tier: Option<&Tier>,
    limit: usize,
    min_priority: Option<i32>,
    since: Option<&str>,
    until: Option<&str>,
    tags_filter: Option<&str>,
    agent_id: Option<&str>,
    as_agent: Option<&str>,
    include_archived: bool,
    source_uri: Option<&str>,
) -> Result<Vec<Memory>> {
    let now = Utc::now().to_rfc3339();
    let tier_str = tier.map(|t| t.as_str().to_string());
    let fts_query = sanitize_fts_query(query, false);
    let (vis_p, vis_t, vis_u, vis_o) = compute_visibility_prefixes(as_agent);
    let archived_fragment = archived_source_clause(include_archived, "m");
    let source_uri_fragment = if source_uri.is_some() {
        "AND m.source_uri = ?15"
    } else {
        ""
    };

    let sql = format!(
        "SELECT m.id, m.tier, m.namespace, m.title, m.content, m.tags, m.priority,
                m.confidence, m.source, m.access_count, m.created_at, m.updated_at,
                m.last_accessed_at, m.expires_at, m.metadata, m.reflection_depth,
                m.memory_kind, m.entity_id, m.persona_version,
                m.citations, m.source_uri, m.source_span,
                m.confidence_source, m.confidence_signals, m.confidence_decayed_at
         FROM memories_fts fts
         JOIN memories m ON m.rowid = fts.rowid
         WHERE memories_fts MATCH ?1
           AND (?2 IS NULL OR m.namespace = ?2)
           AND (?3 IS NULL OR m.tier = ?3)
           AND (?4 IS NULL OR m.priority >= ?4)
           AND (m.expires_at IS NULL OR m.expires_at > ?5)
           AND (?6 IS NULL OR m.created_at >= ?6)
           AND (?7 IS NULL OR m.created_at <= ?7)
           AND (?8 IS NULL OR EXISTS (SELECT 1 FROM json_each(m.tags) WHERE json_each.value = ?8))
           AND (?10 IS NULL OR m.agent_id_idx = ?10)
           {archived_fragment}
           {source_uri_fragment}
           {vis}
         ORDER BY (fts.rank * -1)
           + (m.priority * 0.5)
           + (MIN(m.access_count, 50) * 0.1)
           + (m.confidence * 2.0)
           + (1.0 / (1.0 + (julianday('now') - julianday(m.updated_at)) * 0.1))
           DESC
         LIMIT ?9",
        vis = visibility_clause(11, "m"),
    );
    let mut stmt = conn.prepare(&sql)?;
    let rows = if let Some(uri) = source_uri {
        stmt.query_map(
            params![
                fts_query,
                namespace,
                tier_str,
                min_priority,
                now,
                since,
                until,
                tags_filter,
                limit,
                agent_id,
                vis_p,
                vis_t,
                vis_u,
                vis_o,
                uri,
            ],
            row_to_memory,
        )?
        .collect::<rusqlite::Result<Vec<_>>>()
        .map_err(Into::into)
    } else {
        stmt.query_map(
            params![
                fts_query,
                namespace,
                tier_str,
                min_priority,
                now,
                since,
                until,
                tags_filter,
                limit,
                agent_id,
                vis_p,
                vis_t,
                vis_u,
                vis_o,
            ],
            row_to_memory,
        )?
        .collect::<rusqlite::Result<Vec<_>>>()
        .map_err(Into::into)
    };
    rows
}

/// v0.7.0 Provenance Gap 6 (issue #889) — list every memory carrying
/// the supplied `source_uri`. Bypasses the FTS layer so callers that
/// want the full reciprocal set ("every memory from this document")
/// don't need to type a query. Hits the partial
/// `idx_memories_source_uri` index directly. Pure read.
///
/// `as_agent` is the visibility principal. When `Some(...)`, the
/// `compute_visibility_prefixes` + `visibility_clause` pair is applied
/// so the reciprocal source-uri endpoint respects the same
/// scope=private gate as `search_with_source_uri` (#942 + #975
/// follow-up: any query path returning Memory MUST inherit the SAL
/// #910 visibility filter). When `None`, the filter is bypassed —
/// reserved for substrate-internal callers + tests that explicitly
/// opt out.
pub fn list_by_source_uri(
    conn: &Connection,
    source_uri: &str,
    namespace: Option<&str>,
    limit: Option<usize>,
    as_agent: Option<&str>,
) -> Result<Vec<Memory>> {
    let cap = limit.unwrap_or(LIST_DEFAULT_CAP).min(LIST_MAX_LIMIT);
    let (vis_p, vis_t, vis_u, vis_o) = compute_visibility_prefixes(as_agent);
    // Placeholder layout: ?1 = uri, ?2 = namespace, ?3 = limit,
    // ?4..?7 = visibility prefixes (private/team/unit/org).
    let sql = format!(
        "SELECT m.id, m.tier, m.namespace, m.title, m.content, m.tags, m.priority,
                m.confidence, m.source, m.access_count, m.created_at, m.updated_at,
                m.last_accessed_at, m.expires_at, m.metadata, m.reflection_depth,
                m.memory_kind, m.entity_id, m.persona_version,
                m.citations, m.source_uri, m.source_span,
                m.confidence_source, m.confidence_signals, m.confidence_decayed_at,
                m.version
         FROM memories m
         WHERE m.source_uri = ?1
           AND (?2 IS NULL OR m.namespace = ?2)
           {vis}
         ORDER BY m.created_at ASC
         LIMIT ?3",
        vis = visibility_clause(4, "m"),
    );
    let mut stmt = conn.prepare(&sql)?;
    let rows = stmt.query_map(
        params![
            source_uri,
            namespace,
            i64::try_from(cap).unwrap_or(i64::MAX),
            vis_p,
            vis_t,
            vis_u,
            vis_o,
        ],
        row_to_memory,
    )?;
    rows.collect::<rusqlite::Result<Vec<_>>>()
        .map_err(Into::into)
}

/// Task 1.12 — proximity boost applied to a memory's score based on its
/// depth distance from the queried agent namespace. Uses the formula
/// `1 / (1 + depth_distance * 0.3)` per spec. Distance 0 = full strength
/// (1.0), each step up the hierarchy dampens linearly.
#[must_use]
pub fn proximity_boost(agent_ns: &str, memory_ns: &str) -> f64 {
    let agent_depth = crate::models::namespace_depth(agent_ns);
    let memory_depth = crate::models::namespace_depth(memory_ns);
    let distance = agent_depth.saturating_sub(memory_depth);
    #[allow(clippy::cast_precision_loss)]
    let d = distance as f64;
    1.0 / (1.0 + d * 0.3)
}

/// Task 1.12 — SQL fragment + boolean indicating whether hierarchy
/// expansion is in play. When active the `namespace` SQL param binds
/// NULL (so `?N IS NULL OR m.namespace = ?N` passes trivially) and a
/// separate `AND m.namespace IN (<ancestors>)` clause narrows to the
/// hierarchy. When inactive the returned fragment is empty.
///
/// Ancestor strings are interpolated because `SQLite` `IN` with a
/// variable-length positional list is awkward, and the inputs come
/// from `namespace_ancestors()` → `validate_namespace`-approved
/// strings. Single-quote doubling is applied defensively.
///
/// PERF-8 (FX-C4-batch2, 2026-05-26): the hierarchy fragment is a
/// pure function of `namespace`, so a bounded LRU cache amortises
/// the `format!` + `Vec<String>::join` cost across the recall
/// hot path. Cache hits return a clone of the cached `String`
/// (still allocates, but skips the per-call SQL string build); the
/// cache itself is keyed by namespace string and capped at
/// `HIERARCHY_CACHE_MAX` entries to bound memory in the face of
/// per-tenant namespace explosions.
fn hierarchy_in_clause(namespace: Option<&str>) -> (Option<String>, bool) {
    let Some(ns) = namespace else {
        return (None, false);
    };
    if !ns.contains('/') {
        return (None, false);
    }

    // PERF-8 cache lookup. The cache stores the rendered SQL
    // fragment Option<String>; the `bool` shadow flag is always
    // `true` for cached entries (we only cache hierarchical
    // namespaces — the `!ns.contains('/')` short-circuit above
    // never reaches the cache).
    if let Some(cached) = hierarchy_cache_get(ns) {
        return (Some(cached), true);
    }

    let ancestors = crate::models::namespace_ancestors(ns);
    if ancestors.is_empty() {
        return (None, false);
    }
    let quoted: Vec<String> = ancestors
        .iter()
        .map(|a| format!("'{}'", a.replace('\'', "''")))
        .collect();
    let fragment = format!("AND m.namespace IN ({})", quoted.join(","));
    hierarchy_cache_put(ns, &fragment);
    (Some(fragment), true)
}

// PERF-8 (FX-C4-batch2, 2026-05-26) — bounded LRU cache for the
// rendered `hierarchy_in_clause` SQL fragment. Cap chosen to be
// large enough for the typical few-hundred-namespace deployment
// while keeping memory bounded on multi-tenant hosts.
const HIERARCHY_CACHE_MAX: usize = 256;

fn hierarchy_cache() -> &'static std::sync::Mutex<std::collections::HashMap<String, String>> {
    static CACHE: std::sync::OnceLock<std::sync::Mutex<std::collections::HashMap<String, String>>> =
        std::sync::OnceLock::new();
    CACHE.get_or_init(|| std::sync::Mutex::new(std::collections::HashMap::new()))
}

fn hierarchy_cache_get(ns: &str) -> Option<String> {
    let cache = hierarchy_cache().lock().ok()?;
    cache.get(ns).cloned()
}

fn hierarchy_cache_put(ns: &str, fragment: &str) {
    let Ok(mut cache) = hierarchy_cache().lock() else {
        return;
    };
    if cache.len() >= HIERARCHY_CACHE_MAX {
        // Bounded eviction: drop one arbitrary entry. The cache is
        // not a true LRU because the recall hot path runs in
        // microseconds and a full LRU's bookkeeping cost would
        // dominate the cache-hit savings. Random eviction is fine
        // because the hot working set typically stays well under
        // the cap; the eviction only fires on the long tail.
        if let Some(k) = cache.keys().next().cloned() {
            cache.remove(&k);
        }
    }
    cache.insert(ns.to_string(), fragment.to_string());
}

#[cfg(test)]
fn hierarchy_cache_clear_for_tests() {
    if let Ok(mut cache) = hierarchy_cache().lock() {
        cache.clear();
    }
}

/// Task 1.12 — apply proximity boost to scored memories ranked against
/// an agent's hierarchical namespace. Re-sorts by boosted score.
fn apply_proximity_boost(scored: Vec<(Memory, f64)>, agent_ns: &str) -> Vec<(Memory, f64)> {
    let mut boosted: Vec<(Memory, f64)> = scored
        .into_iter()
        .map(|(mem, score)| {
            let boost = proximity_boost(agent_ns, &mem.namespace);
            (mem, score * boost)
        })
        .collect();
    boosted.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
    boosted
}

/// Phase P6 (R1) — count tokens in `text` using OpenAI's `cl100k_base`
/// BPE encoding. This is the de-facto standard for Claude / GPT context
/// budgeting and is shipped with `tiktoken-rs` (the BPE table is embedded
/// in the crate, ~1.7 MB, so the count is offline-deterministic across
/// all hosts). The encoder is built lazily and cached process-wide via
/// `OnceLock` — `cl100k_base()` itself parses the embedded table on every
/// call, which adds a few ms; we pay that cost once.
///
/// Returns the token count. On the (vanishingly rare) cl100k_base init
/// failure, falls back to the prior `len/4` byte heuristic so a budget
/// request never hard-errors.
#[must_use]
pub fn count_tokens_cl100k(text: &str) -> usize {
    use std::sync::OnceLock;
    static BPE: OnceLock<Option<tiktoken_rs::CoreBPE>> = OnceLock::new();
    let bpe = BPE.get_or_init(|| tiktoken_rs::cl100k_base().ok());
    if let Some(bpe) = bpe.as_ref() {
        bpe.encode_with_special_tokens(text).len()
    } else {
        // Defensive fallback — should never trigger in practice because
        // the BPE table is bundled in the crate, but we never want a
        // budget call to fail because of tokenizer init.
        text.len() / 4
    }
}

/// Phase P6 — token cost of a memory's `content` only (not title), per
/// the R1 spec which budgets against the LLM context window. Title and
/// metadata are caller-side ornament; `content` is what gets stuffed
/// into the prompt.
#[must_use]
pub fn count_memory_tokens(mem: &Memory) -> usize {
    count_tokens_cl100k(&mem.content)
}

/// Phase P6 — kept for backward compatibility with the Task 1.11 byte-
/// heuristic surface. New code should use `count_memory_tokens`. The
/// returned value is now BPE-accurate (cl100k_base) rather than the
/// prior `len/4` estimate, so callers reading this through the public
/// API get the more accurate value automatically.
#[must_use]
pub fn estimate_memory_tokens(mem: &Memory) -> usize {
    count_memory_tokens(mem)
}

/// Phase P6 — outcome of applying a token budget to a ranked recall
/// list. Carries everything `mcp::handle_recall` needs to populate the
/// new RecallMeta block (`budget_tokens_used`, `budget_tokens_remaining`,
/// `memories_dropped`, `budget_overflow`).
#[derive(Debug, Clone)]
pub struct BudgetOutcome {
    /// Cumulative cl100k_base token count of the returned content.
    pub tokens_used: usize,
    /// `budget - tokens_used`, saturating at 0. `None` when no budget set.
    pub tokens_remaining: Option<usize>,
    /// How many candidates the budget cut from the ranked list.
    pub memories_dropped: usize,
    /// True iff the highest-ranked memory alone exceeded the budget and
    /// was returned anyway (R1 guarantee: at least one memory if any
    /// matched). Always false when no budget is set.
    pub budget_overflow: bool,
}

/// Phase P6 (R1) — context-budget greedy fill. Iterates over scored
/// candidates in rank order; stops at the first memory whose inclusion
/// would exceed the budget — UNLESS the output is still empty, in
/// which case the highest-ranked memory is returned anyway with
/// `budget_overflow = true`. This preserves the R1 guarantee that a
/// successful recall always returns at least one result when any
/// matched, even if the user supplied an unrealistically tight budget.
///
/// When `budget_tokens` is `None`, every candidate is returned and the
/// `tokens_used` tally falls back to the cheap byte-heuristic (`len/4`)
/// — running cl100k_base on every recall regardless of caller intent
/// would impose ~200 ms cold-start (BPE table parse) and several ms per
/// memory on the hot path. The heuristic is byte-exact-deterministic,
/// honoring the prior Task 1.11 contract for "observe the cost without
/// enforcing it". When `budget_tokens` is `Some(_)`, the BPE-accurate
/// cl100k count is used because the caller cares enough about the
/// number to enforce on it. When `budget_tokens` is `Some(0)`, **zero
/// memories are returned** with `budget_overflow = false` — the spec
/// semantics for "no budget at all, please" (R1 §6 acceptance #3).
#[must_use]
pub fn apply_token_budget(
    scored: Vec<(Memory, f64)>,
    budget_tokens: Option<usize>,
) -> (Vec<(Memory, f64)>, BudgetOutcome) {
    let total_candidates = scored.len();

    // Phase P6 — explicit `0` budget short-circuits to an empty result.
    // Per the R1 acceptance test `budget_tokens_zero_returns_zero_memories`,
    // this is a deliberate no-op fill (overflow is *false* — the user
    // said "give me nothing").
    if budget_tokens == Some(0) {
        return (
            Vec::new(),
            BudgetOutcome {
                tokens_used: 0,
                tokens_remaining: Some(0),
                memories_dropped: total_candidates,
                budget_overflow: false,
            },
        );
    }

    // No-budget fast path: skip cl100k entirely. The byte heuristic is
    // a few ns vs. the BPE encoder's couple-of-µs per memory plus the
    // one-shot ~200 ms init. Bench harness benchmarks recall with
    // `budget_tokens=None`; this keeps the hot path cl100k-free.
    if budget_tokens.is_none() {
        let mut used: usize = 0;
        let mut out: Vec<(Memory, f64)> = Vec::with_capacity(scored.len());
        for (mem, score) in scored {
            used = used.saturating_add(mem.content.len() / 4);
            out.push((mem, score));
        }
        return (
            out,
            BudgetOutcome {
                tokens_used: used,
                tokens_remaining: None,
                memories_dropped: 0,
                budget_overflow: false,
            },
        );
    }

    // Budget path — caller asked for enforcement, so spend the tokens
    // for accurate cl100k accounting.
    let mut used: usize = 0;
    let mut out: Vec<(Memory, f64)> = Vec::with_capacity(scored.len());
    let mut overflow = false;

    for (mem, score) in scored {
        let cost = count_memory_tokens(&mem);
        if let Some(budget) = budget_tokens
            && used.saturating_add(cost) > budget
        {
            // R1 always-return-at-least-one guarantee: if we've collected
            // nothing yet, take the top-ranked memory and flag overflow.
            if out.is_empty() {
                used = used.saturating_add(cost);
                out.push((mem, score));
                overflow = true;
            }
            break;
        }
        used = used.saturating_add(cost);
        out.push((mem, score));
    }

    let dropped = total_candidates.saturating_sub(out.len());
    let tokens_remaining = budget_tokens.map(|b| b.saturating_sub(used));
    (
        out,
        BudgetOutcome {
            tokens_used: used,
            tokens_remaining,
            memories_dropped: dropped,
            budget_overflow: overflow,
        },
    )
}

/// Recall — fuzzy OR search + touch + auto-promote + TTL extension.
/// Task 1.11: after ranking, applies optional `budget_tokens` cap.
/// Phase P6: returns the full `BudgetOutcome` (tokens_used,
/// tokens_remaining, memories_dropped, budget_overflow) instead of just
/// the prior bare `tokens_used`. Callers that only need `tokens_used`
/// read `outcome.tokens_used`.
#[allow(clippy::too_many_arguments)]
/// v0.6.3.1 (P3): keyword-only recall with retrieval-stage telemetry.
///
/// Identical to [`recall`] but additionally returns a [`crate::models::RecallTelemetry`]
/// describing the FTS5 candidate count (HNSW count is always 0 for this
/// path — no semantic stage runs). MCP `handle_recall` uses this to build
/// the `meta` block; [`recall`] is preserved as a thin wrapper for
/// existing callers (HTTP handlers, CLI, bench).
#[allow(clippy::too_many_arguments)]
pub fn recall_with_telemetry(
    conn: &Connection,
    context: &str,
    namespace: Option<&str>,
    limit: usize,
    tags_filter: Option<&str>,
    since: Option<&str>,
    until: Option<&str>,
    short_extend: i64,
    mid_extend: i64,
    as_agent: Option<&str>,
    budget_tokens: Option<usize>,
    // v0.7.0 WT-1-E — when false (default), recall excludes archived
    // sources whose atoms now surface in their place. When true, the
    // archive-filter WHERE clause is dropped so forensic-export and
    // explicit auditor recall returns both atoms and sources.
    include_archived: bool,
    // v0.7.0 Form 4 / Cluster-A PERF-3 — push `--source-uri-prefix`
    // into the SQL WHERE so the partial `idx_memories_source_uri`
    // index covers the lookup and excluded rows never enter the
    // top-K. See [`recall`] for the contract.
    source_uri_prefix: Option<&str>,
) -> Result<(
    Vec<(Memory, f64)>,
    BudgetOutcome,
    crate::models::RecallTelemetry,
)> {
    let (results, outcome) = recall(
        conn,
        context,
        namespace,
        limit,
        tags_filter,
        since,
        until,
        short_extend,
        mid_extend,
        as_agent,
        budget_tokens,
        include_archived,
        source_uri_prefix,
    )?;
    let telemetry = crate::models::RecallTelemetry {
        fts_candidates: results.len(),
        hnsw_candidates: 0,
        blend_weight_avg: 0.0,
        embedding_dim_mismatch: 0,
    };
    Ok((results, outcome, telemetry))
}

pub fn recall(
    conn: &Connection,
    context: &str,
    namespace: Option<&str>,
    limit: usize,
    tags_filter: Option<&str>,
    since: Option<&str>,
    until: Option<&str>,
    short_extend: i64,
    mid_extend: i64,
    as_agent: Option<&str>,
    budget_tokens: Option<usize>,
    // v0.7.0 WT-1-E — see [`recall_with_telemetry`] for the
    // archived-source exclusion contract.
    include_archived: bool,
    // v0.7.0 Form 4 / Cluster-A PERF-3 — when `Some(prefix)`, restrict
    // results to memories whose `source_uri` starts with `prefix`. The
    // predicate is `source_uri LIKE 'prefix%'` so the partial
    // `idx_memories_source_uri` index (defined in migration
    // `0032_v07_form4_provenance.sql`) covers the scan. Pre-fix this
    // filter ran in Rust AFTER the SQL returned, which excluded valid
    // matches from the top-K when the substrate returned `limit` rows
    // that subsequently filtered to fewer. `None` preserves the legacy
    // no-filter behaviour for callers that filter post-hoc.
    source_uri_prefix: Option<&str>,
) -> Result<(Vec<(Memory, f64)>, BudgetOutcome)> {
    let now = Utc::now().to_rfc3339();
    let fts_query = sanitize_fts_query(context, true);
    let (vis_p, vis_t, vis_u, vis_o) = compute_visibility_prefixes(as_agent);

    // Task 1.12: hierarchy expansion. If `namespace` is hierarchical (contains
    // `/`), broaden the filter to the full ancestor chain. Flat namespaces
    // keep exact-match semantics (backward compat).
    let (hierarchy_in, hierarchy_active) = hierarchy_in_clause(namespace);
    let hierarchy_fragment = hierarchy_in.unwrap_or_default();
    let effective_namespace = if hierarchy_active { None } else { namespace };

    // v0.7.0 WT-1-E — archived-source exclusion (default) / pass-
    // through (include_archived=true). Composes with the existing
    // namespace, expiry, tag, time-window, and visibility filters.
    let archived_fragment = archived_source_clause(include_archived, "m");

    // v0.7.0 Form 4 / Cluster-A PERF-3 — push the source-URI prefix
    // predicate into SQL. We escape SQL LIKE metacharacters (`%`, `_`,
    // `\`) in the supplied prefix so a caller passing e.g. `doc:abc_`
    // matches only that literal value (not `doc:abcX`). The LIKE
    // pattern is constructed with the bound parameter holding the
    // already-escaped prefix + `%`; combined with the partial index
    // on `source_uri WHERE source_uri IS NOT NULL`, SQLite picks the
    // index for the lookup. See [`escape_like_pattern`].
    let (source_uri_fragment, source_uri_param): (&str, Option<String>) = match source_uri_prefix {
        Some(prefix) if !prefix.is_empty() => (
            "AND m.source_uri LIKE ?12 ESCAPE '\\'",
            Some(format!("{}%", escape_like_pattern(prefix))),
        ),
        _ => ("", None),
    };

    let sql = format!(
        "SELECT m.id, m.tier, m.namespace, m.title, m.content, m.tags, m.priority,
                m.confidence, m.source, m.access_count, m.created_at, m.updated_at,
                m.last_accessed_at, m.expires_at, m.metadata, m.reflection_depth,
                m.memory_kind, m.entity_id, m.persona_version,
                m.citations, m.source_uri, m.source_span,
                m.confidence_source, m.confidence_signals, m.confidence_decayed_at,
                (fts.rank * -1)
                + (m.priority * 0.5)
                + (MIN(m.access_count, 50) * 0.1)
                + (m.confidence * 2.0)
                + (CASE m.tier WHEN 'long' THEN 3.0 WHEN 'mid' THEN 1.0 ELSE 0.0 END)
                + (1.0 / (1.0 + (julianday('now') - julianday(m.updated_at)) * 0.1))
                AS score
         FROM memories_fts fts
         JOIN memories m ON m.rowid = fts.rowid
         WHERE memories_fts MATCH ?1
           AND (?2 IS NULL OR m.namespace = ?2)
           {hierarchy_fragment}
           AND (m.expires_at IS NULL OR m.expires_at > ?3)
           AND (?4 IS NULL OR EXISTS (SELECT 1 FROM json_each(m.tags) WHERE json_each.value = ?4))
           AND (?5 IS NULL OR m.created_at >= ?5)
           AND (?6 IS NULL OR m.created_at <= ?6)
           {archived_fragment}
           {source_uri_fragment}
           {vis}
         ORDER BY score DESC
         LIMIT ?7",
        vis = visibility_clause(8, "m"),
    );
    let mut stmt = conn.prepare(&sql)?;
    // Bind ?12 only when the source-URI fragment is active; SQLite
    // errors on parameter-count mismatch.
    let row_handler = |row: &rusqlite::Row<'_>| -> rusqlite::Result<(Memory, f64)> {
        let mem = row_to_memory(row)?;
        // v0.7.0 Form 4 / v0.7.x Form 6 — name-based read for the
        // trailing score column. Switched from positional `row.get`
        // after schema v38 (citations, source_uri, source_span) and
        // Form 6's `memory_kind`/`entity_id`/`persona_version`
        // shifted the trailing column's index; name-based reads
        // survive future column additions without further churn.
        let score: f64 = row.get("score")?;
        Ok((mem, score))
    };
    let results: Vec<(Memory, f64)> = if let Some(ref uri_param) = source_uri_param {
        let rows = stmt.query_map(
            params![
                fts_query,
                effective_namespace,
                now,
                tags_filter,
                since,
                until,
                limit,
                vis_p,
                vis_t,
                vis_u,
                vis_o,
                uri_param,
            ],
            row_handler,
        )?;
        rows.collect::<rusqlite::Result<Vec<_>>>()?
    } else {
        let rows = stmt.query_map(
            params![
                fts_query,
                effective_namespace,
                now,
                tags_filter,
                since,
                until,
                limit,
                vis_p,
                vis_t,
                vis_u,
                vis_o,
            ],
            row_handler,
        )?;
        rows.collect::<rusqlite::Result<Vec<_>>>()?
    };

    // Task 1.12: proximity boost when hierarchy expansion is active.
    let boosted = if let (true, Some(anchor)) = (hierarchy_active, namespace) {
        apply_proximity_boost(results, anchor)
    } else {
        results
    };

    // Task 1.11 / Phase P6: apply optional token budget in rank order
    // (AFTER proximity). Returns BudgetOutcome with all R1 meta fields.
    let (budgeted, outcome) = apply_token_budget(boosted, budget_tokens);

    // Cluster-F PERF-6 — collapse K per-row touches into a single
    // `BEGIN IMMEDIATE` transaction. Same semantics (access bump,
    // TTL extend, promotion, priority bump every 10 accesses); the
    // 3K UPDATE round-trips now share one commit instead of K.
    let touch_ids: Vec<&str> = budgeted.iter().map(|(mem, _)| mem.id.as_str()).collect();
    if let Err(e) = touch_many(conn, &touch_ids, short_extend, mid_extend) {
        tracing::warn!("touch_many failed for recall set: {}", e);
    }
    Ok((budgeted, outcome))
}

/// Task 1.7 — vertical memory promotion.
///
/// Clones `source_id` into `to_namespace`, which must be a proper `/`-derived
/// ancestor of the memory's current namespace. The original memory is
/// **untouched** (vertical promotion is a fan-out, not a move). A
/// `derived_from` link is created from the new clone back to the source so
/// the promotion trail is queryable.
///
/// Returns the clone's new ID.
///
/// Errors when:
/// - source doesn't exist
/// - `to_namespace` is empty, equal to the source namespace, or not an
///   ancestor of it (see `namespace_ancestors`)
pub fn promote_to_namespace(
    conn: &Connection,
    source_id: &str,
    to_namespace: &str,
) -> Result<String> {
    if to_namespace.is_empty() {
        // #962 typed envelope.
        return Err(anyhow::Error::new(StorageError::InvalidArgument {
            reason: "to_namespace cannot be empty".to_string(),
        }));
    }
    let source = get(conn, source_id)?.ok_or_else(|| {
        // #962 typed envelope. `Source` here labels the promotion source,
        // not a link end, but the user-facing message ("source memory
        // not found: …") is preserved via the LinkEnd::Source Display arm.
        anyhow::Error::new(StorageError::MemoryNotFound {
            id: source_id.to_string(),
            role: Some(LinkEnd::Source),
        })
    })?;
    if to_namespace == source.namespace {
        // #962 typed envelope.
        return Err(anyhow::Error::new(StorageError::InvalidArgument {
            reason: format!(
                "to_namespace must be a proper ancestor of the memory's namespace (got self: {})",
                source.namespace
            ),
        }));
    }
    let ancestors = namespace_ancestors(&source.namespace);
    if !ancestors.iter().any(|a| a == to_namespace) {
        // #962 typed envelope.
        return Err(anyhow::Error::new(StorageError::InvalidArgument {
            reason: format!(
                "to_namespace '{to_namespace}' is not an ancestor of '{}' (ancestors: {ancestors:?})",
                source.namespace
            ),
        }));
    }

    let now = Utc::now().to_rfc3339();
    let clone = Memory {
        id: uuid::Uuid::new_v4().to_string(),
        tier: source.tier.clone(),
        namespace: to_namespace.to_string(),
        title: source.title.clone(),
        content: source.content.clone(),
        tags: source.tags.clone(),
        priority: source.priority,
        confidence: source.confidence,
        source: source.source.clone(),
        access_count: 0,
        created_at: now.clone(),
        updated_at: now,
        last_accessed_at: None,
        expires_at: source.expires_at.clone(),
        metadata: source.metadata.clone(),
        reflection_depth: source.reflection_depth,
        memory_kind: source.memory_kind.clone(),
        entity_id: None,
        persona_version: None,
        citations: Vec::new(),
        source_uri: None,
        source_span: None,
        confidence_source: ConfidenceSource::CallerProvided,
        confidence_signals: None,
        confidence_decayed_at: None,
        version: 1,
    };
    let actual_id = insert(conn, &clone)?;
    // Clone → source: derived_from. Safe to ignore if the link layer
    // short-circuits on self-link (impossible here — distinct IDs).
    create_link(
        conn,
        &actual_id,
        source_id,
        crate::models::MemoryLinkRelation::DerivedFrom.as_str(),
    )?;
    Ok(actual_id)
}

/// v0.6.3.1 P2 (G6) — quick existence check for `(title, namespace)`. Used by
/// `on_conflict='error'` callers to short-circuit before the full upsert
/// machinery runs. Returns the existing row id if there is one.
///
/// # Errors
///
/// Returns the underlying SQLite error.
pub fn find_by_title_namespace(
    conn: &Connection,
    title: &str,
    namespace: &str,
) -> Result<Option<String>> {
    let id: Option<String> = conn
        .query_row(
            "SELECT id FROM memories WHERE title = ?1 AND namespace = ?2 LIMIT 1",
            params![title, namespace],
            |r| r.get(0),
        )
        .ok();
    Ok(id)
}

/// v0.6.3.1 P2 (G6) — pick a title that does not collide with an existing
/// `(title, namespace)` row by appending `(2)`, `(3)`, ... up to a hard cap.
/// The first available suffix wins. Used by `on_conflict='version'`.
///
/// The cap (`MAX_VERSION_SUFFIX`) prevents an infinite loop in pathological
/// cases (e.g. an attacker spamming the same title in a loop). Once the cap
/// is hit, the caller falls back to error mode.
const MAX_VERSION_SUFFIX: u32 = 1024;

/// # Errors
///
/// Returns the underlying SQLite error or an error if no free suffix is
/// found within `MAX_VERSION_SUFFIX` attempts.
pub fn next_versioned_title(
    conn: &Connection,
    base_title: &str,
    namespace: &str,
) -> Result<String> {
    if find_by_title_namespace(conn, base_title, namespace)?.is_none() {
        return Ok(base_title.to_string());
    }
    for n in 2..=MAX_VERSION_SUFFIX {
        let candidate = format!("{base_title} ({n})");
        if find_by_title_namespace(conn, &candidate, namespace)?.is_none() {
            return Ok(candidate);
        }
    }
    // #962 typed envelope — UniqueConflict (the substrate could not
    // mint a non-colliding versioned title within the cap). Caller is
    // expected to retry with a different base title or raise the cap.
    Err(anyhow::Error::new(StorageError::UniqueConflict {
        reason: format!(
            "could not find a free versioned title for '{base_title}' in namespace '{namespace}' \
             within {MAX_VERSION_SUFFIX} attempts"
        ),
    }))
}

/// Stopwords stripped before computing the title-similarity Jaccard floor
/// in [`find_contradictions`]. The list is intentionally tiny — a small
/// closed-class English set — because a maximalist stopword list would
/// over-filter agglutinative or short titles and re-introduce noise on
/// the other side. The substrate's contradiction surface is supposed to
/// be a near-duplicate-titles signal, not a generic content search.
const CONTRADICTION_TITLE_STOPWORDS: &[&str] = &[
    "a", "an", "and", "are", "as", "at", "be", "by", "for", "from", "has", "have", "in", "is",
    "it", "its", "of", "on", "or", "that", "the", "this", "to", "was", "were", "will", "with",
];

/// Minimum Jaccard-of-content-tokens between the seed title and a
/// candidate title for the candidate to qualify as a contradiction
/// hit. Computed after lowercasing + stopword removal.
///
/// **Why this exists** (issue #1320). Pre-fix, [`find_contradictions`]
/// returned the top 5 FTS5 matches on an OR-joined sanitised query
/// against the title. With seed title "Tomatoes are red" the OR list
/// becomes `"tomatoes" OR "are" OR "red"`, and FTS5 happily ranked
/// every row containing the common stopword "are" near the top.
/// Operators observed unrelated memories ("Moon landing happened in
/// 1969", "Retrieval-augmented generation works by...") flagged as
/// `potential_contradictions` against tomato facts — pure stopword
/// noise. The Jaccard floor below preserves the documented "similar
/// titles" semantics (e.g. "Database is PostgreSQL" vs "Database is
/// MySQL" share `{database}` after stopword removal — Jaccard
/// `1/3 ≈ 0.33`, passes the 0.3 floor) while rejecting the
/// disjoint-topic false positives (Jaccard 0).
const CONTRADICTION_TITLE_JACCARD_FLOOR: f32 = 0.30;

/// Lowercase + stopword-strip a title for the contradiction Jaccard
/// comparison. Splits on non-alphanumeric so titles like
/// `"Database is PostgreSQL"` and `"Database/is/PostgreSQL"` produce
/// the same token set.
fn contradiction_title_tokens(title: &str) -> std::collections::HashSet<String> {
    title
        .split(|c: char| !c.is_alphanumeric())
        .map(str::to_ascii_lowercase)
        .filter(|t| !t.is_empty())
        .filter(|t| !CONTRADICTION_TITLE_STOPWORDS.contains(&t.as_str()))
        .collect()
}

/// Jaccard token overlap between two pre-tokenised title sets. Returns
/// `0.0` when either side is empty so a seed title that's pure
/// stopwords (e.g. `"the"`) cannot produce phantom hits.
#[allow(clippy::cast_precision_loss)]
fn contradiction_title_jaccard(
    a: &std::collections::HashSet<String>,
    b: &std::collections::HashSet<String>,
) -> f32 {
    if a.is_empty() || b.is_empty() {
        return 0.0;
    }
    let inter = a.intersection(b).count() as f32;
    let union = a.union(b).count() as f32;
    if union > 0.0 { inter / union } else { 0.0 }
}

/// Stage-1 FTS5 recall for similar-title candidates. Returns up to
/// `limit` rows from `memories_fts` matching the sanitised seed
/// title, ordered by FTS5 rank.
///
/// This is the broader recall pool that feeds both
/// [`find_contradictions`] (wire-side `potential_contradictions`,
/// post Stage-2 Jaccard floor) and [`find_synthesis_candidates`]
/// (Form 1 synthesis curator, NO Jaccard floor). Two consumers,
/// two different relevance budgets; see #1320 + #1337 for why the
/// pool can't be filtered universally.
fn find_similar_title_candidates(
    conn: &Connection,
    title: &str,
    namespace: &str,
    limit: usize,
) -> Result<Vec<Memory>> {
    let fts_query = sanitize_fts_query(title, true);
    let mut stmt = conn.prepare(
        "SELECT m.id, m.tier, m.namespace, m.title, m.content, m.tags, m.priority,
                m.confidence, m.source, m.access_count, m.created_at, m.updated_at,
                m.last_accessed_at, m.expires_at, m.metadata, m.reflection_depth,
                m.memory_kind, m.entity_id, m.persona_version,
                m.citations, m.source_uri, m.source_span,
                m.confidence_source, m.confidence_signals, m.confidence_decayed_at
         FROM memories_fts fts
         JOIN memories m ON m.rowid = fts.rowid
         WHERE memories_fts MATCH ?1 AND m.namespace = ?2
         ORDER BY fts.rank
         LIMIT ?3",
    )?;
    let rows = stmt.query_map(
        params![fts_query, namespace, i64::try_from(limit).unwrap_or(20)],
        row_to_memory,
    )?;
    rows.collect::<rusqlite::Result<Vec<_>>>()
        .map_err(Into::into)
}

/// Detect potential contradictions: memories in same namespace with similar titles.
///
/// Two-stage filter (#1320 calibration):
/// 1. FTS5 OR-match on stopword-tolerant query — fast recall over
///    `memories_fts`, capped at a candidate ceiling so a pathological
///    common-word title can't pull the entire namespace.
/// 2. Jaccard-token-overlap floor on the stopword-stripped title sets,
///    keeping only candidates whose title shares at least
///    [`CONTRADICTION_TITLE_JACCARD_FLOOR`] of the seed's content
///    tokens. Final result is capped at 5 (the pre-fix wire ceiling).
///
/// The two-stage design preserves the "similar title" semantics that
/// the wire-side `potential_contradictions` field documents while
/// removing the stopword-OR noise floor that crossed unrelated topics
/// at v0.6.x / pre-fix v0.7.0.
///
/// **Scope** (#1337): this function is the WIRE-output filter. The
/// Form 1 synthesis curator path uses [`find_synthesis_candidates`]
/// instead, which omits the Stage-2 Jaccard floor — the curator needs
/// the broader Stage-1 pool to see legitimately-similar memories
/// whose titles share only one strong content token (e.g.
/// `"kubernetes deployment notes"` vs
/// `"kubernetes rolling deploy strategy"`, Jaccard 1/6 ≈ 0.167)
/// without depending on whether 0.30 happens to be the right
/// stopword-noise floor for the wire surface.
pub fn find_contradictions(conn: &Connection, title: &str, namespace: &str) -> Result<Vec<Memory>> {
    // Stage 1 — FTS5 recall. Pull a wider candidate pool (20) so the
    // stage-2 Jaccard filter has headroom; the final cap of 5 is
    // applied after the filter so the wire shape is preserved.
    let candidates = find_similar_title_candidates(conn, title, namespace, 20)?;

    // Stage 2 — Jaccard floor on stopword-stripped title tokens.
    let seed_tokens = contradiction_title_tokens(title);
    let mut filtered: Vec<Memory> = candidates
        .into_iter()
        .filter(|cand| {
            let cand_tokens = contradiction_title_tokens(&cand.title);
            contradiction_title_jaccard(&seed_tokens, &cand_tokens)
                >= CONTRADICTION_TITLE_JACCARD_FLOOR
        })
        .collect();
    filtered.truncate(5);
    Ok(filtered)
}

/// Stage-1-only FTS5 candidate recall for the Form 1 synthesis
/// curator path.
///
/// The synthesis curator (`mcp/tools/store/synthesis.rs`) needs the
/// broader similar-title pool — every namespace row whose title
/// matches the seed under FTS5 — so the LLM can decide which
/// candidates legitimately overlap with the incoming write.
///
/// This intentionally OMITS the Stage-2 Jaccard floor that
/// [`find_contradictions`] applies to its wire output: the floor was
/// calibrated for "stopword-only overlap" wire-noise rejection
/// (#1320), but the synthesis tests exercise legitimate single-strong-
/// token overlaps (e.g. `"kubernetes deployment notes"` vs
/// `"kubernetes rolling deploy strategy"` share `{kubernetes}` =
/// Jaccard 1/6 ≈ 0.167 < 0.30). Applying the wire-floor here would
/// hide those candidates from the curator and short-circuit every
/// add/update/delete verb in the synthesis verdict matrix (#1337).
///
/// Returns up to 5 candidates (matches the wire ceiling for
/// `potential_contradictions`, the historical synthesis prompt cap).
pub fn find_synthesis_candidates(
    conn: &Connection,
    title: &str,
    namespace: &str,
) -> Result<Vec<Memory>> {
    let mut candidates = find_similar_title_candidates(conn, title, namespace, 20)?;
    candidates.truncate(5);
    Ok(candidates)
}

// --- Links ---
//
// v0.7.0 fix-campaign A3 (LINK-PARITY) error prefix constants
// (`LINK_CYCLE_ERR_PREFIX`, `LINK_PERMISSION_DENIED_ERR_PREFIX`) moved
// to `super::error` under #962 so they stay co-located with the typed
// `StorageError` variants whose Display impl emits them. Re-exported
// at the module root above for `db::LINK_CYCLE_ERR_PREFIX` path
// stability.

/// v0.7.0 fix-campaign A3 (LINK-PARITY) — shared pre-create validator
/// invoked by every link-write entry point.
///
/// Closes the S5-H2 HIGH finding (#690): before A3 the L1-2 cycle
/// check + K9 permission pipeline ran only in
/// `src/mcp/tools/link.rs::handle_link`, so the HTTP `POST /api/v1/links`
/// path and the federation-receive `sync_push` link loop could land
/// `reflects_on` edges that the MCP path would have refused. The fix
/// is defense-in-depth at the storage layer: every path — MCP, HTTP,
/// SAL, federation — calls this helper, so the gates enforce no
/// matter which entry point initiates the write.
///
/// Pipeline:
///
/// 1. Cycle check — invoked only when `relation == "reflects_on"`.
///    Calls [`crate::kg::cycle_check::would_create_reflection_cycle`]
///    with the namespace-scoped `effective_max_reflection_depth` cap; on
///    a `would_cycle` hit, returns an error prefixed with
///    [`LINK_CYCLE_ERR_PREFIX`] so HTTP can surface 409 CONFLICT and
///    signed-event emit can record the refusal. The walk fails CLOSED on
///    SQL errors and on depth-ceiling truncation.
/// 2. K9 permission eval — runs the unified
///    [`crate::permissions::Permissions::evaluate`] pipeline against the
///    source memory's namespace. On `Deny`, returns an error prefixed
///    with [`LINK_PERMISSION_DENIED_ERR_PREFIX`] so HTTP surfaces 403.
///    `Ask` is treated as `Deny` here because the storage-layer
///    helper has no Ask-channel back to the operator; entry points
///    that want interactive Ask handling (MCP) should invoke
///    `Permissions::evaluate` directly BEFORE calling create_link.
///
/// `skip_governance` lets federation-receive bypass the K9 gate when
/// the inbound link has already been cryptographically attested by an
/// enrolled peer (attest_level == "peer_attested"). The cycle check
/// always runs — even a trusted peer should not be able to extend a
/// reflection cycle on the receiver. See `create_link_inbound` for the
/// caller-side decision logic.
///
/// `agent_id` defaults to `"system"` when the caller cannot resolve a
/// concrete claimant (federation receive path with no claim, etc.) —
/// the permission rule matcher uses it for `agent_pattern` matching.
pub fn validate_link_pre_create(
    conn: &Connection,
    source_id: &str,
    target_id: &str,
    relation: &str,
    agent_id: &str,
    skip_governance: bool,
) -> Result<()> {
    // Pass 1: cycle check. Only `reflects_on` participates in the
    // DAG invariant — the other four relations are intentionally
    // allowed to form cycles (e.g. mutual `related_to`).
    if relation == crate::models::MemoryLinkRelation::ReflectsOn.as_str() {
        // Resolve the namespace-scoped reflection-depth cap so the cycle
        // walk's fail-CLOSED ceiling tracks the same governance policy the
        // MCP link path uses (`src/mcp/tools/link.rs`). The source memory's
        // namespace governs; a missing source falls back to the default
        // namespace (create_link's FK guard surfaces the missing row later).
        let link_ns = match get(conn, source_id) {
            Ok(Some(m)) => m.namespace,
            _ => crate::DEFAULT_NAMESPACE.to_string(),
        };
        let max_depth = resolve_governance_policy(conn, &link_ns)
            .unwrap_or_default()
            .effective_max_reflection_depth();
        if crate::kg::cycle_check::would_create_reflection_cycle(
            conn, source_id, target_id, max_depth,
        )?
        .would_cycle
        {
            // #962 typed envelope. Display preserves `LINK_CYCLE_ERR_PREFIX`.
            return Err(anyhow::Error::new(StorageError::LinkReflectionCycle {
                source_id: source_id.to_string(),
                target_id: target_id.to_string(),
            }));
        }
    }

    // Pass 2: K9 permission eval. Skip when the caller has already
    // established external attestation (federation peer_attested).
    if !skip_governance {
        // Link evaluation is scoped to the *source* memory's
        // namespace — matches the MCP path's choice at
        // `src/mcp/tools/link.rs:31`. Missing source memory falls
        // back to "global"; create_link's own FK guard will surface
        // the missing-memory error after this returns.
        let link_ns = match get(conn, source_id) {
            Ok(Some(m)) => m.namespace,
            _ => crate::DEFAULT_NAMESPACE.to_string(),
        };
        evaluate_link_permission(&link_ns, source_id, target_id, relation, agent_id)
            .map_err(anyhow::Error::new)?;
    }
    Ok(())
}

/// #1568 (H1 residual) — backend-agnostic K9 permission evaluation for
/// a pending link write. This is Pass 2 of [`validate_link_pre_create`]
/// hoisted into a shared free fn so BOTH adapters consult the same
/// governance gate: the sqlite path delegates from
/// `validate_link_pre_create`; the postgres SAL adapter's
/// `link_internal` (`src/store/postgres.rs`) calls it directly after
/// resolving the source memory's namespace via SQL. Keeping the
/// evaluation here means the two backends cannot drift on link
/// governance semantics.
///
/// # Errors
///
/// Returns [`StorageError::LinkPermissionDenied`] (Display preserves
/// [`LINK_PERMISSION_DENIED_ERR_PREFIX`]) on `Deny`, and on `Ask` —
/// the storage layer has no Ask channel; entry points that want
/// interactive Ask handling (MCP) run `Permissions::evaluate`
/// themselves BEFORE the storage write.
pub(crate) fn evaluate_link_permission(
    link_ns: &str,
    source_id: &str,
    target_id: &str,
    relation: &str,
    agent_id: &str,
) -> std::result::Result<(), StorageError> {
    use crate::permissions::{Decision, Op, PermissionContext, Permissions};
    let ctx = PermissionContext {
        op: Op::MemoryLink,
        namespace: link_ns.to_string(),
        agent_id: agent_id.to_string(),
        payload: serde_json::json!({
            "source_id": source_id,
            "target_id": target_id,
            "relation": relation,
        }),
    };
    match Permissions::evaluate(&ctx, &[]) {
        Decision::Allow | Decision::Modify(_) => Ok(()),
        // #962 typed envelope. Display preserves
        // `LINK_PERMISSION_DENIED_ERR_PREFIX`.
        Decision::Deny(reason) => Err(StorageError::LinkPermissionDenied { reason }),
        Decision::Ask(prompt) => Err(StorageError::LinkPermissionDenied {
            reason: format!("ask deferred to storage layer ({prompt})"),
        }),
    }
}

/// Insert a directional `(source_id, target_id, relation)` link.
///
/// Backward-compat shim around [`create_link_signed`] with no active
/// keypair — every call here writes `signature = NULL` and
/// `attest_level = "unsigned"`. New code that wants signing should
/// route through [`create_link_signed`] directly.
pub fn create_link(
    conn: &Connection,
    source_id: &str,
    target_id: &str,
    relation: &str,
) -> Result<()> {
    create_link_signed(conn, source_id, target_id, relation, None).map(|_| ())
}

/// v0.7 H2 — link write that optionally signs with the active agent's
/// Ed25519 keypair.
///
/// When `keypair` carries a private key, the six signable fields
/// (`src_id`, `dst_id`, `relation`, `observed_by`, `valid_from`,
/// `valid_until`) are encoded to deterministic CBOR per RFC 8949
/// §4.2.1, signed, and the 64-byte signature is persisted in the
/// existing `signature` BLOB column with `attest_level = "self_signed"`.
///
/// When `keypair` is `None` or carries only a public key, the row is
/// written with `signature = NULL` and `attest_level = "unsigned"` —
/// preserving v0.6.4 behaviour for callers that haven't generated a
/// keypair yet.
///
/// `observed_by` on the signed payload is set to the keypair's
/// `agent_id` when a keypair is present (the writer is, by definition,
/// the observer). The `observed_by` *column* itself is intentionally
/// left at the v0.6.3 default (NULL on this insert path) so existing
/// KG queries that join on `observed_by` keep their current shape; H4's
/// `memory_verify` will surface the signing identity from the keypair
/// + signature, not from this column.
///
/// Returns the chosen attest level so callers (HTTP/MCP wrappers) can
/// surface it in the wire response without re-querying the row.
pub fn create_link_signed(
    conn: &Connection,
    source_id: &str,
    target_id: &str,
    relation: &str,
    keypair: Option<&crate::identity::keypair::AgentKeypair>,
) -> Result<&'static str> {
    // v0.7.0 fix-campaign A3 (LINK-PARITY, #690) — gates that were
    // previously enforced only at `src/mcp/tools/link.rs::handle_link`
    // now run here so EVERY caller (MCP, HTTP, SAL, federation) hits
    // them. The agent_id used for the K9 evaluation is the keypair's
    // claim when present (the writer is by definition the actor);
    // when no keypair is configured we fall back to "system" — the
    // unified evaluator's `agent_pattern` defaults to `*`, so an
    // operator who has not authored agent-narrow rules sees no
    // behaviour change. The MCP path runs its own evaluate BEFORE
    // calling here (it needs Ask-channel handling we can't surface
    // from storage); the second evaluation here is idempotent under
    // the registry's deny-first semantics.
    let agent_id_for_eval = keypair
        .as_ref()
        .map(|kp| kp.agent_id.as_str())
        .unwrap_or("system");
    validate_link_pre_create(
        conn,
        source_id,
        target_id,
        relation,
        agent_id_for_eval,
        false,
    )?;
    // Verify both IDs exist before creating link
    let source_exists: bool = conn
        .query_row(SQL_MEMORY_EXISTS, params![source_id], |r| r.get(0))
        .unwrap_or(false);
    if !source_exists {
        // #962 typed envelope — MemoryNotFound{role=Source}.
        return Err(anyhow::Error::new(StorageError::MemoryNotFound {
            id: source_id.to_string(),
            role: Some(LinkEnd::Source),
        }));
    }
    let target_exists: bool = conn
        .query_row(SQL_MEMORY_EXISTS, params![target_id], |r| r.get(0))
        .unwrap_or(false);
    if !target_exists {
        // #962 typed envelope — MemoryNotFound{role=Target}.
        return Err(anyhow::Error::new(StorageError::MemoryNotFound {
            id: target_id.to_string(),
            role: Some(LinkEnd::Target),
        }));
    }
    // Schema v15 (Pillar 2 / Stream B) added `valid_from` for temporal
    // KG queries. Backfill on migration handled legacy rows; here we
    // populate it on the insert path so newly created links are
    // visible to `memory_kg_timeline` without a downstream backfill.
    //
    // v0.7.0 H6 (round-2): mirror the postgres G3 fix at
    // `store/postgres.rs:3539` — truncate the timestamp to microsecond
    // precision BEFORE we both sign over it and persist it. SQLite
    // stores RFC3339 TEXT and round-trips losslessly so this is a
    // no-op for SQLite reads, BUT a link created on the SQLite path
    // and later re-verified on the postgres path (or vice versa)
    // must commit to the same canonical RFC3339 string on both
    // sides. Postgres's `TIMESTAMPTZ` quantises at microsecond
    // resolution, so sub-microsecond digits silently disappear on
    // round-trip and break the Ed25519 signature. Truncating here
    // makes the sign/verify CBOR byte-stable across the storage
    // boundary regardless of which adapter wrote the row originally.
    let now = truncate_to_microseconds(Utc::now()).to_rfc3339();

    // v0.7 H2 — sign if we have a private key. We compute the signature
    // BEFORE issuing INSERT so a CBOR/sign failure surfaces as an
    // outright write error (vs. a silent partial-write). The signed
    // payload includes `valid_from = now` and matching `observed_by`
    // so H3's verifier can re-derive the same bytes from the row.
    //
    // v0.7 H3 follow-up: the `observed_by` *column* is now populated
    // from the keypair's `agent_id` on signed inserts so federation
    // export (`export_links`) ships the same claim the signature
    // commits to. Receivers re-derive `SignableLink` from the wire
    // record (see `verify::verify`); without populating the column,
    // verification would always fail with `Tampered` because the
    // sender signed `Some(agent_id)` but exported `None`.
    let (signature, attest_level, observed_by_col): (Option<Vec<u8>>, &'static str, Option<&str>) =
        match keypair {
            Some(kp) if kp.can_sign() => {
                let link = crate::identity::sign::SignableLink {
                    src_id: source_id,
                    dst_id: target_id,
                    relation,
                    observed_by: Some(kp.agent_id.as_str()),
                    valid_from: Some(now.as_str()),
                    valid_until: None,
                };
                let sig = crate::identity::sign::sign(kp, &link)?;
                (
                    Some(sig),
                    crate::models::AttestLevel::SelfSigned.as_str(),
                    Some(kp.agent_id.as_str()),
                )
            }
            _ => (None, crate::models::AttestLevel::Unsigned.as_str(), None),
        };

    let inserted = conn.execute(
        "INSERT OR IGNORE INTO memory_links \
            (source_id, target_id, relation, created_at, valid_from, signature, attest_level, observed_by) \
         VALUES (?1, ?2, ?3, ?4, ?4, ?5, ?6, ?7)",
        params![
            source_id,
            target_id,
            relation,
            now,
            signature,
            attest_level,
            observed_by_col
        ],
    )?;

    // v0.7.0 S4-INFO2 — append a `memory_link.created` row to
    // `signed_events` so the audit ledger reflects every new link
    // (signed or unsigned). The `payload_hash` binds to the same
    // canonical CBOR that the H2 signer hashed (or would have, for
    // unsigned rows) so an auditor can re-derive the bytes and check
    // them against the row.
    //
    // Best-effort: a failure here logs a warn but does NOT roll back
    // the link insert. Cratering a legitimate write because the
    // append-only ledger had a transient SQLite error would punish
    // the caller for a substrate problem they cannot fix — same
    // discipline as `invalidate_link`'s `memory_link.invalidated`
    // emit (see also A2's pattern on `execute_pending_action`).
    //
    // We only emit when the INSERT actually wrote a row.
    // `INSERT OR IGNORE` returns `Ok(0)` on a uniqueness-conflict
    // replay of an existing `(source_id, target_id, relation)`; in
    // that case the audit row was already appended on the original
    // create call, and re-appending would generate a misleading
    // duplicate-create event.
    if inserted > 0 {
        let agent_for_event = observed_by_col
            .map(str::to_string)
            .unwrap_or_else(|| "unknown".to_string());
        let signable = crate::identity::sign::SignableLink {
            src_id: source_id,
            dst_id: target_id,
            relation,
            observed_by: observed_by_col,
            valid_from: Some(now.as_str()),
            valid_until: None,
        };
        match crate::identity::sign::canonical_cbor(&signable) {
            Ok(cbor) => {
                let event = crate::signed_events::SignedEvent {
                    id: uuid::Uuid::new_v4().to_string(),
                    agent_id: agent_for_event,
                    event_type: crate::signed_events::event_types::MEMORY_LINK_CREATED.to_string(),
                    payload_hash: crate::signed_events::payload_hash(&cbor),
                    signature: signature.clone(),
                    attest_level: attest_level.to_string(),
                    timestamp: Utc::now().to_rfc3339(),
                    ..crate::signed_events::SignedEvent::default()
                };
                if let Err(e) = crate::signed_events::append_signed_event(conn, &event) {
                    tracing::warn!(
                        target: crate::signed_events::SIGNED_EVENTS_TRACE_TARGET,
                        source_id, target_id, relation,
                        "failed to append memory_link.created audit row: {e}"
                    );
                }
            }
            Err(e) => {
                tracing::warn!(
                    target: crate::signed_events::SIGNED_EVENTS_TRACE_TARGET,
                    source_id, target_id, relation,
                    "failed to encode canonical CBOR for memory_link.created audit: {e}"
                );
            }
        }
    }

    Ok(attest_level)
}

/// v0.7.0 issue #812 / #813 — return the strongest `attest_level`
/// label across every outbound link rooted at `source_id`.
///
/// Strength ladder (highest first):
///
///   `peer_attested` > `self_signed` > `unsigned`
///
/// The persona-signing path (`PersonaGenerator::generate`) uses this
/// to stamp the Persona's own `attest_level` metadata so the
/// downstream `memory_persona` / `memory_persona_generate` wire
/// response carries the same attestation level the substrate's
/// `derives_from` edges actually hold — a Persona whose source
/// links are all signed is itself self-signed, whereas a Persona
/// whose source links are unsigned cannot truthfully claim
/// `self_signed` no matter what label the curator stamps on it.
///
/// Returns `"unsigned"` for a source with no outbound links — the
/// only honest default for a row whose attestation surface is
/// empty.
///
/// # Errors
///
/// Bubbles up `rusqlite` errors from the SELECT.
pub fn strongest_attest_level_for_source(conn: &Connection, source_id: &str) -> Result<String> {
    let mut stmt = conn.prepare(
        "SELECT attest_level FROM memory_links \
         WHERE source_id = ?1",
    )?;
    let rows = stmt.query_map(params![source_id], |r| r.get::<_, String>(0))?;
    let unsigned = crate::models::AttestLevel::Unsigned.as_str();
    let self_signed = crate::models::AttestLevel::SelfSigned.as_str();
    let peer_attested = crate::models::AttestLevel::PeerAttested.as_str();
    let mut strongest = unsigned;
    for row in rows {
        let level = row?;
        if level == peer_attested {
            return Ok(peer_attested.to_string());
        }
        if level == self_signed && strongest == unsigned {
            strongest = self_signed;
        }
    }
    Ok(strongest.to_string())
}

/// v0.7 H3 — insert an inbound (federation-replicated) link with a
/// pre-computed signature and attest level.
///
/// Distinct from [`create_link_signed`] because the receiver is *not*
/// the signer: it must persist whatever bytes the peer signed
/// (signature + observed_by + valid_from + valid_until) verbatim, so a
/// later `memory_verify` (H4) can re-derive the same canonical CBOR
/// from the stored row and re-check against the peer's public key. We
/// can't re-sign on the receiver — we don't hold the peer's private
/// key, by design.
///
/// The caller (federation `sync_push` link loop) is responsible for:
/// 1. Looking up the peer's public key via
///    [`crate::identity::verify::lookup_peer_public_key`].
/// 2. Calling [`crate::identity::verify::verify`] when a public key is
///    known, and rejecting the link when verification fails.
/// 3. Choosing the `attest_level` literal:
///    - `"peer_attested"` — verified successfully against an enrolled key,
///    - `"unsigned"` — no public key enrolled for `observed_by`, or the
///      sender shipped no signature (legacy peer).
///
/// Idempotent on the unique `(source_id, target_id, relation)` index —
/// duplicate inbound replays collapse to a no-op without error.
///
/// Both `source_id` and `target_id` must already exist locally; the
/// receiver is expected to apply incoming `memories` *before* incoming
/// `links` in the same `sync_push` request, which the existing handler
/// already does.
///
/// `valid_from` defaults to "now" only when the inbound row carries
/// `None` (legacy peer that never populated the column); otherwise the
/// peer's value is preserved so the signature still verifies.
///
/// # Errors
///
/// Bubbles up the same DB / FK errors as `create_link_signed`. Pre-flight
/// existence checks mirror the outbound path so the receiver fails loud
/// on missing memories rather than silently dropping the link.
pub fn create_link_inbound(conn: &Connection, link: &MemoryLink, attest_level: &str) -> Result<()> {
    // v0.7.0 fix-campaign A3 (LINK-PARITY, #690) — defense-in-depth at
    // the receiver. The cycle check ALWAYS runs even on inbound peer
    // writes: a peer should not be able to extend a `reflects_on`
    // cycle on the receiver any more than a local caller can. The K9
    // permission gate is BYPASSED only when the inbound link is
    // `peer_attested` (the peer's signature was cryptographically
    // verified against an enrolled public key in
    // `handlers::federation_receive::sync_push` before this call). For
    // every other attest_level — including `"unsigned"`, which covers
    // legacy peers AND peers whose public key we have not enrolled —
    // the local K9 rules enforce. This is the design choice documented
    // in #690: mTLS + Ed25519 sig verification is the federation's
    // attestation layer; once that passes, namespace governance is the
    // peer's local responsibility, not the receiver's. The
    // `observed_by` claim becomes the `agent_id` for the K9 evaluation
    // when not bypassed — that's the peer's claimed writer and matches
    // what the rule matcher already uses for outbound links.
    let skip_governance = attest_level == crate::models::AttestLevel::PeerAttested.as_str();
    let peer_agent_id = link.observed_by.as_deref().unwrap_or("system");
    validate_link_pre_create(
        conn,
        &link.source_id,
        &link.target_id,
        link.relation.as_str(),
        peer_agent_id,
        skip_governance,
    )?;
    // Same FK guard as create_link_signed — a missing memory means the
    // peer raced ahead of us; we surface that to the caller's warn log
    // rather than papering over with INSERT OR IGNORE silently.
    let source_exists: bool = conn
        .query_row(SQL_MEMORY_EXISTS, params![link.source_id], |r| r.get(0))
        .unwrap_or(false);
    if !source_exists {
        // #962 typed envelope — MemoryNotFound{role=Source}.
        return Err(anyhow::Error::new(StorageError::MemoryNotFound {
            id: link.source_id.clone(),
            role: Some(LinkEnd::Source),
        }));
    }
    let target_exists: bool = conn
        .query_row(SQL_MEMORY_EXISTS, params![link.target_id], |r| r.get(0))
        .unwrap_or(false);
    if !target_exists {
        // #962 typed envelope — MemoryNotFound{role=Target}.
        return Err(anyhow::Error::new(StorageError::MemoryNotFound {
            id: link.target_id.clone(),
            role: Some(LinkEnd::Target),
        }));
    }

    let now = Utc::now().to_rfc3339();
    // Preserve peer's `valid_from` byte-identical so `memory_verify`
    // (H4) can re-derive the signed payload from the stored row.
    let valid_from = link.valid_from.clone().unwrap_or_else(|| now.clone());
    let created_at = if link.created_at.is_empty() {
        now
    } else {
        link.created_at.clone()
    };

    let inserted = conn.execute(
        "INSERT OR IGNORE INTO memory_links \
            (source_id, target_id, relation, created_at, valid_from, valid_until, \
             signature, attest_level, observed_by) \
         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)",
        params![
            link.source_id,
            link.target_id,
            link.relation.as_str(),
            created_at,
            valid_from,
            link.valid_until,
            link.signature,
            attest_level,
            link.observed_by,
        ],
    )?;

    // v0.7.0 S4-INFO2 — append a `memory_link.created` row to
    // `signed_events` for inbound replicated links too. The audit
    // ledger should reflect every new link visible locally, not just
    // outbound writes. `payload_hash` binds to the canonical CBOR
    // re-derived from the wire-shape link the peer signed, so an
    // auditor can replay the exact bytes that were verified at
    // ingress.
    //
    // Best-effort: a failure logs a warn but does NOT roll back the
    // link insert (same discipline as the outbound path above and as
    // `invalidate_link`'s emit).
    //
    // Only emit when the INSERT actually wrote a row (idempotent
    // sync replays must not generate duplicate-create events).
    if inserted > 0 {
        let agent_for_event = link
            .observed_by
            .clone()
            .unwrap_or_else(|| "unknown".to_string());
        let signable = crate::identity::sign::SignableLink {
            src_id: link.source_id.as_str(),
            dst_id: link.target_id.as_str(),
            relation: link.relation.as_str(),
            observed_by: link.observed_by.as_deref(),
            valid_from: Some(valid_from.as_str()),
            valid_until: link.valid_until.as_deref(),
        };
        match crate::identity::sign::canonical_cbor(&signable) {
            Ok(cbor) => {
                let event = crate::signed_events::SignedEvent {
                    id: uuid::Uuid::new_v4().to_string(),
                    agent_id: agent_for_event,
                    event_type: crate::signed_events::event_types::MEMORY_LINK_CREATED.to_string(),
                    payload_hash: crate::signed_events::payload_hash(&cbor),
                    signature: link.signature.clone(),
                    attest_level: attest_level.to_string(),
                    timestamp: Utc::now().to_rfc3339(),
                    ..crate::signed_events::SignedEvent::default()
                };
                if let Err(e) = crate::signed_events::append_signed_event(conn, &event) {
                    tracing::warn!(
                        target: crate::signed_events::SIGNED_EVENTS_TRACE_TARGET,
                        source_id = %link.source_id,
                        target_id = %link.target_id,
                        relation = %link.relation,
                        "failed to append memory_link.created audit row (inbound): {e}"
                    );
                }
            }
            Err(e) => {
                tracing::warn!(
                    target: crate::signed_events::SIGNED_EVENTS_TRACE_TARGET,
                    source_id = %link.source_id,
                    target_id = %link.target_id,
                    relation = %link.relation,
                    "failed to encode canonical CBOR for inbound memory_link.created audit: {e}"
                );
            }
        }
    }

    Ok(())
}

pub fn get_links(conn: &Connection, id: &str) -> Result<Vec<MemoryLink>> {
    // v0.7.0 issue #860 — the `memory_get_links` MCP tool's docstring
    // promises attestation level + temporal-validity columns
    // (`valid_from`, `valid_until`, `observed_by`, `attest_level`) per
    // link. The pre-fix SELECT only pulled 4 columns and hard-coded the
    // optional fields to `None`, so the promised columns never reached
    // the caller. Expand the SELECT to the full row projection that
    // the docs commit to. `signature` is intentionally NOT surfaced —
    // it is the verification surface owned by the `memory_verify` tool
    // (`LinkVerifyRecord` below), not the read-only graph view.
    let mut stmt = conn.prepare(
        "SELECT source_id, target_id, relation, created_at, \
                valid_from, valid_until, observed_by, attest_level \
         FROM memory_links \
         WHERE source_id = ?1 OR target_id = ?1",
    )?;
    let rows = stmt.query_map(params![id], |row| {
        let relation_str: String = row.get(2)?;
        Ok(MemoryLink {
            source_id: row.get(0)?,
            target_id: row.get(1)?,
            // v0.7.0 fix campaign R1-M4 — parse the TEXT column into the
            // typed `MemoryLinkRelation` closed set. Unknown values (only
            // possible from pre-CHECK rows or a buggy direct-SQL writer)
            // fall back to the canonical default so the read-side never
            // panics; the SQL CHECK on the write side prevents new bad
            // rows from landing.
            relation: crate::models::MemoryLinkRelation::from_str(&relation_str)
                .unwrap_or_default(),
            created_at: row.get(3)?,
            // v0.7.0 #860 — temporal-validity + attestation columns
            // promised by the `memory_get_links` docstring. `signature`
            // stays `None`: that bytes-on-the-wire surface is the
            // verifier's concern (`LinkVerifyRecord`), and exposing it
            // here would force the JSON response to carry a base64 blob
            // every existing caller would have to ignore.
            signature: None,
            valid_from: row.get::<_, Option<String>>(4)?,
            valid_until: row.get::<_, Option<String>>(5)?,
            observed_by: row.get::<_, Option<String>>(6)?,
            attest_level: row.get::<_, Option<String>>(7)?,
        })
    })?;
    rows.collect::<rusqlite::Result<Vec<_>>>()
        .map_err(Into::into)
}

#[allow(dead_code)]
pub fn delete_link(conn: &Connection, source_id: &str, target_id: &str) -> Result<bool> {
    let changed = conn.execute(
        "DELETE FROM memory_links WHERE source_id = ?1 AND target_id = ?2",
        params![source_id, target_id],
    )?;
    Ok(changed > 0)
}

/// v0.7 H4 — full row-projection used by the `memory_verify` MCP tool.
///
/// `get_links` (above) was deliberately scoped to the four columns the
/// graph-traversal callers care about; H4 needs the *signed bundle* —
/// the raw signature blob, the agent_id that signed (`observed_by`),
/// and the temporal-validity columns the signature commits to. Splitting
/// it from `get_links` keeps the existing read path's wire shape
/// unchanged (and its column-count tested by callers).
///
/// Returns `Ok(None)` when the row is absent so the caller can shape a
/// "not found" response instead of bubbling up a generic SQL error.
#[derive(Debug, Clone)]
pub struct LinkVerifyRecord {
    pub source_id: String,
    pub target_id: String,
    pub relation: String,
    pub signature: Option<Vec<u8>>,
    pub observed_by: Option<String>,
    pub valid_from: Option<String>,
    pub valid_until: Option<String>,
    /// Raw column value as stored by H2/H3 (`"unsigned"`, `"self_signed"`,
    /// `"peer_attested"`, or rarely `NULL` for very old rows that
    /// pre-date the H2 `attest_level` column). H4's MCP handler
    /// normalises a `NULL` to the `Unsigned` enum variant.
    pub attest_level: Option<String>,
}

/// Fetch the single link identified by the `(source_id, target_id, relation)`
/// composite primary key — the only unique identifier `memory_links`
/// exposes today.
///
/// Used by the H4 `memory_verify` MCP tool to re-derive the canonical
/// CBOR payload from the stored row before re-checking the signature.
///
/// # Errors
///
/// Bubbles up rusqlite errors. Returns `Ok(None)` when the row is
/// absent — this is the load-bearing distinction `memory_verify` needs
/// to surface a structured "link not found" response to its caller.
pub fn get_link_for_verify(
    conn: &Connection,
    source_id: &str,
    target_id: &str,
    relation: &str,
) -> Result<Option<LinkVerifyRecord>> {
    let mut stmt = conn.prepare(
        "SELECT source_id, target_id, relation, signature, observed_by, \
                valid_from, valid_until, attest_level \
         FROM memory_links \
         WHERE source_id = ?1 AND target_id = ?2 AND relation = ?3",
    )?;
    let mut rows = stmt.query(params![source_id, target_id, relation])?;
    if let Some(row) = rows.next()? {
        Ok(Some(LinkVerifyRecord {
            source_id: row.get(0)?,
            target_id: row.get(1)?,
            relation: row.get(2)?,
            signature: row.get::<_, Option<Vec<u8>>>(3)?,
            observed_by: row.get::<_, Option<String>>(4)?,
            valid_from: row.get::<_, Option<String>>(5)?,
            valid_until: row.get::<_, Option<String>>(6)?,
            attest_level: row.get::<_, Option<String>>(7)?,
        }))
    } else {
        Ok(None)
    }
}

// --- Consolidation ---

/// #1558 batch 5 wave 3 — canonical `source` value stamped on rows
/// minted by [`consolidate`] (MCP `memory_consolidate` + the HTTP
/// power-consolidation handler pass it verbatim). Listed in
/// `validate::VALID_SOURCES`; one spelling, hoist-only.
pub const CONSOLIDATION_SOURCE: &str = "consolidation";

/// Consolidate multiple memories into one. Returns the new memory ID.
/// Deletes the source memories and creates links from new → old (`derived_from`).
#[allow(clippy::too_many_arguments)]
pub fn consolidate(
    conn: &Connection,
    ids: &[String],
    title: &str,
    summary: &str,
    namespace: &str,
    tier: &Tier,
    source: &str,
    consolidator_agent_id: &str,
) -> Result<String> {
    let now = Utc::now().to_rfc3339();
    let new_id = uuid::Uuid::new_v4().to_string();

    conn.execute_batch(connection::SQL_BEGIN_IMMEDIATE)?;

    let result = (|| -> Result<String> {
        // Verify all IDs exist and collect metadata in one pass
        let mut max_priority = 5i32;
        let mut all_tags: Vec<String> = Vec::new();
        let mut total_access = 0i64;
        let mut merged_metadata = serde_json::Map::new();
        // Collect original agent_ids separately — they go into
        // `consolidated_from_agents` for forensic attribution.
        // The consolidator's own agent_id becomes `agent_id` on the result.
        let mut source_agent_ids: Vec<String> = Vec::new();
        for id in ids {
            match get(conn, id)? {
                Some(mem) => {
                    max_priority = max_priority.max(mem.priority);
                    all_tags.extend(mem.tags);
                    total_access = total_access.saturating_add(mem.access_count);
                    // Merge metadata: later values overwrite earlier ones on key conflict.
                    // Intentionally SKIP `agent_id` to avoid last-write-wins forgery;
                    // the consolidator's id is authoritative on the result.
                    if let serde_json::Value::Object(map) = mem.metadata {
                        for (k, v) in map {
                            if k == "agent_id" {
                                if let serde_json::Value::String(aid) = &v
                                    && !source_agent_ids.contains(aid)
                                {
                                    source_agent_ids.push(aid.clone());
                                }
                                continue;
                            }
                            if let Some(existing) = merged_metadata.get(&k)
                                && std::mem::discriminant(existing) != std::mem::discriminant(&v)
                            {
                                tracing::warn!(
                                    "consolidate: key '{}' type changed during merge",
                                    k
                                );
                            }
                            merged_metadata.insert(k, v);
                        }
                    } else {
                        tracing::warn!(
                            "memory {} has non-object metadata during consolidate, skipping",
                            id
                        );
                    }
                }
                None => {
                    // #962 typed envelope.
                    return Err(anyhow::Error::new(StorageError::MemoryNotFound {
                        id: id.to_string(),
                        role: None,
                    }));
                }
            }
        }
        all_tags.sort();
        all_tags.dedup();
        let tags_json = serde_json::to_string(&all_tags)?;
        // Record source IDs in metadata for provenance (links would be CASCADE-deleted)
        merged_metadata.insert(
            crate::models::MemoryLinkRelation::DerivedFrom
                .as_str()
                .to_string(),
            serde_json::Value::Array(
                ids.iter()
                    .map(|id| serde_json::Value::String(id.clone()))
                    .collect(),
            ),
        );
        // NHI: the consolidator owns the new memory (authoritative agent_id);
        // original authors are preserved as a separate array for forensics.
        merged_metadata.insert(
            "agent_id".to_string(),
            serde_json::Value::String(consolidator_agent_id.to_string()),
        );
        if !source_agent_ids.is_empty() {
            merged_metadata.insert(
                "consolidated_from_agents".to_string(),
                serde_json::Value::Array(
                    source_agent_ids
                        .into_iter()
                        .map(serde_json::Value::String)
                        .collect(),
                ),
            );
        }
        let merged_metadata_value = serde_json::Value::Object(merged_metadata);
        crate::validate::validate_metadata(&merged_metadata_value)
            .context("merged metadata exceeds size limit")?;
        let metadata_json = serde_json::to_string(&merged_metadata_value)?;

        // FX-C5 — substrate governance pre-write hook parity. Consolidate
        // mints a fresh memory via a raw INSERT that bypasses the
        // `db::insert(..)` tail (which is where the SQLite path normally
        // consults `GOVERNANCE_PRE_WRITE`). Without this call the
        // operator's signed governance rules could be bypassed by
        // routing through the consolidate surface. Compose the candidate
        // memory shape the way the INSERT below would persist it and
        // fire the hook; a refusal short-circuits the transaction body
        // and the outer ROLLBACK undoes any work already done in this
        // closure.
        let candidate = Memory {
            id: new_id.clone(),
            tier: tier.clone(),
            namespace: namespace.to_string(),
            title: title.to_string(),
            content: summary.to_string(),
            tags: all_tags.clone(),
            priority: max_priority,
            confidence: 1.0,
            source: source.to_string(),
            access_count: total_access,
            created_at: now.clone(),
            updated_at: now.clone(),
            last_accessed_at: None,
            expires_at: None,
            metadata: merged_metadata_value.clone(),
            reflection_depth: 0,
            memory_kind: crate::models::MemoryKind::Observation,
            entity_id: None,
            persona_version: None,
            citations: Vec::new(),
            source_uri: None,
            source_span: None,
            // #1633 — the engine pins confidence=1.0, so the honest
            // provenance is CuratorDerived (the #1242 audit-honesty
            // invariant: engine-derived values must be discoverable to
            // the calibration sweep; 'caller_provided' rows are
            // excluded by idx_memories_confidence_source).
            confidence_source: crate::models::ConfidenceSource::CuratorDerived,
            confidence_signals: None,
            confidence_decayed_at: None,
            version: crate::models::default_memory_version(),
        };
        consult_governance_pre_write(&candidate)?;

        // v0.7.0 #1466 — consolidate mints a fresh memory via this raw
        // INSERT, so it must carry the tier-default expiry too; otherwise a
        // consolidated mid/short row would be immortal (NULL expires_at) and
        // never reaped by GC. `candidate.created_at == now` so the backfill
        // here matches the `?10` bound below.
        conn.execute(
            "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, confidence, source, access_count, created_at, updated_at, expires_at, metadata, confidence_source)
             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, 1.0, ?8, ?9, ?10, ?10, ?11, ?12, ?13)",
            params![new_id, tier.as_str(), namespace, title, summary, tags_json, max_priority, source, total_access, now, candidate.effective_expires_at(), metadata_json, candidate.confidence_source.as_str()],
        )?;

        // Delete source memories first. Note: we intentionally do NOT create
        // derived_from links before deletion because ON DELETE CASCADE would
        // immediately remove them. Instead, source IDs are recorded in the
        // consolidated memory's metadata for provenance.
        for id in ids {
            delete(conn, id)?;
        }

        Ok(new_id.clone())
    })();

    match result {
        Ok(id) => {
            conn.execute_batch(connection::SQL_COMMIT)?;
            Ok(id)
        }
        Err(e) => {
            if let Err(rb) = conn.execute_batch(connection::SQL_ROLLBACK) {
                tracing::error!("ROLLBACK failed in consolidate: {}", rb);
            }
            Err(e)
        }
    }
}

// ---------------------------------------------------------------------------
// Reflection (v0.7.0 recursive-learning Task 4/8, issue #655).
// ---------------------------------------------------------------------------

/// Strip zero-width and invisible Unicode characters that could bypass FTS search.
fn strip_invisible(s: &str) -> String {
    s.chars()
        .filter(|c| {
            !matches!(c,
                '\u{200B}' | '\u{200C}' | '\u{200D}' | '\u{FEFF}' |
                '\u{00AD}' | '\u{034F}' | '\u{061C}' |
                '\u{180E}' | '\u{2060}' | '\u{2061}'..='\u{2064}' |
                '\u{FE00}'..='\u{FE0F}' | '\u{200E}' | '\u{200F}' |
                '\u{202A}'..='\u{202E}' | '\u{2066}'..='\u{2069}'
            )
        })
        .collect()
}

fn sanitize_fts_query(input: &str, use_or: bool) -> String {
    let joiner = if use_or { " OR " } else { " " };
    let cleaned = strip_invisible(input);
    let tokens: Vec<String> = cleaned
        .split_whitespace()
        .filter(|t| !t.is_empty())
        .filter(|t| {
            // Filter out FTS5 boolean operators as standalone tokens
            let upper = t.to_uppercase();
            upper != "AND" && upper != "OR" && upper != "NOT" && upper != "NEAR"
        })
        .map(|token| {
            // Strip FTS5 special characters to prevent injection.
            // Hyphens are allowed inside words (e.g. "well-known"): the
            // unicode61 tokenizer treats `-` as a separator when indexing,
            // so `foo-bar` indexes as `foo` + `bar`. Keeping the hyphen in
            // the per-token phrase (below we wrap each token in `"…"`)
            // produces a phrase query that FTS5 evaluates by matching the
            // hyphen-split component terms in order — which is exactly
            // what callers expect when searching for hyphenated content.
            // Dropping the `'-'` filter here fixes scenario S28 without
            // reopening the `+`/`-` exclusion-injection hole (every token
            // is already phrase-quoted before being joined, so `-` cannot
            // reach FTS5 as a prefix operator).
            let clean: String = token
                .chars()
                .filter(|c| {
                    *c != '"'
                        && *c != '*'
                        && *c != '^'
                        && *c != '{'
                        && *c != '}'
                        && *c != '('
                        && *c != ')'
                        && *c != ':'
                        && *c != '|'
                        && *c != '+'
                })
                .collect();
            if clean.is_empty() {
                return String::new();
            }
            format!("\"{clean}\"")
        })
        .filter(|t| !t.is_empty())
        .collect();
    if tokens.is_empty() {
        return "\"_empty_\"".to_string();
    }
    tokens.join(joiner)
}

pub fn list_namespaces(conn: &Connection) -> Result<Vec<NamespaceCount>> {
    let now = Utc::now().to_rfc3339();
    let mut stmt = conn.prepare(
        "SELECT namespace, COUNT(*) FROM memories WHERE expires_at IS NULL OR expires_at > ?1 GROUP BY namespace ORDER BY COUNT(*) DESC",
    )?;
    let rows = stmt.query_map(params![now], |row| {
        Ok(NamespaceCount {
            namespace: row.get(0)?,
            count: row.get(1)?,
        })
    })?;
    rows.collect::<rusqlite::Result<Vec<_>>>()
        .map_err(Into::into)
}

/// Hard cap on input groups walked when assembling a taxonomy tree.
/// Even when callers pass a wildly large `limit`, we never walk more
/// than this many `(namespace, count)` rows — bounds memory + time.
/// Shared by the sqlite + postgres taxonomy paths and the HTTP / MCP
/// taxonomy surfaces so all four clamp identically.
pub const TAXONOMY_MAX_LIMIT: usize = 10_000;

/// Default group budget for taxonomy listings when the caller passes
/// no explicit `limit` (HTTP `/api/v1/namespaces`, MCP
/// `memory_get_taxonomy`).
pub const TAXONOMY_DEFAULT_LIMIT: usize = 1000;

/// Build a hierarchical namespace taxonomy (Pillar 1 / Stream A).
///
/// Groups live (non-expired) memories by `namespace`, splits each on
/// `/`, and folds them into a `TaxonomyNode` tree. The returned root
/// represents `namespace_prefix` (or the synthetic empty-string root if
/// no prefix is supplied); each child level descends one segment.
///
/// `max_depth` is interpreted as "show at most N levels *below the
/// prefix*". Memories whose namespace would have required descending
/// past the cutoff still contribute to the `subtree_count` of the
/// boundary ancestor (their counts are not lost — only the leaf
/// rendering is suppressed).
///
/// `limit` caps the number of input `(namespace, count)` rows we walk
/// — when truncated, `total_count` still reflects the full prefix
/// total (a separate aggregation), and `truncated` is set so callers
/// can warn the user. Hard ceiling: [`TAXONOMY_MAX_LIMIT`].
// Body is intentionally one logical pipeline (SQL aggregation → tree
// assembly → root materialisation); pulling helpers out hurts
// readability more than it helps.
#[allow(clippy::too_many_lines)]
pub fn get_taxonomy(
    conn: &Connection,
    namespace_prefix: Option<&str>,
    max_depth: usize,
    limit: usize,
) -> Result<Taxonomy> {
    let now = Utc::now().to_rfc3339();
    let effective_limit = limit.min(TAXONOMY_MAX_LIMIT);
    // Clamp depth so callers asking for "everything" can't construct a
    // pathological deep walk; the namespace validator already rejects
    // depths > MAX_NAMESPACE_DEPTH on writes.
    let effective_depth = max_depth.min(MAX_NAMESPACE_DEPTH);

    let prefix = namespace_prefix.unwrap_or("");
    // #1531 L5 — `validate_namespace` deliberately places no per-segment
    // character restriction (historical flexibility), so a stored
    // namespace/prefix may contain the LIKE metacharacters `%` / `_`.
    // Escape the descendant pattern (mirroring the visibility clause at
    // the top of this file and the postgres `taxonomy_namespaces`
    // twin) so a prefix like `a%` cannot over-match `aX/...` subtrees.
    let descendant_pattern = format!(
        "{}/%",
        prefix
            .replace('\\', "\\\\")
            .replace('%', "\\%")
            .replace('_', "\\_")
    );

    // Total count for the prefix is computed independently of the
    // truncated row walk so the caller-visible total stays honest even
    // when `limit` drops rows from the tree.
    let total_count: usize = if prefix.is_empty() {
        let v: i64 = conn.query_row(
            "SELECT COUNT(*) FROM memories WHERE expires_at IS NULL OR expires_at > ?1",
            params![now],
            |row| row.get(0),
        )?;
        usize::try_from(v).unwrap_or(0)
    } else {
        let v: i64 = conn.query_row(
            "SELECT COUNT(*) FROM memories
             WHERE (expires_at IS NULL OR expires_at > ?1)
               AND (namespace = ?2 OR namespace LIKE ?3 ESCAPE '\\')",
            params![now, prefix, descendant_pattern],
            |row| row.get(0),
        )?;
        usize::try_from(v).unwrap_or(0)
    };

    // Group rows ordered by count DESC so a small `limit` keeps the
    // densest namespaces, then alphabetic for stable tie-breaking.
    let groups: Vec<(String, usize)> = if prefix.is_empty() {
        let mut stmt = conn.prepare(
            "SELECT namespace, COUNT(*) FROM memories
             WHERE expires_at IS NULL OR expires_at > ?1
             GROUP BY namespace
             ORDER BY COUNT(*) DESC, namespace ASC
             LIMIT ?2",
        )?;
        let rows = stmt.query_map(
            params![now, i64::try_from(effective_limit).unwrap_or(i64::MAX)],
            |row| {
                let ns: String = row.get(0)?;
                let c: i64 = row.get(1)?;
                Ok((ns, usize::try_from(c).unwrap_or(0)))
            },
        )?;
        rows.collect::<rusqlite::Result<Vec<_>>>()?
    } else {
        let mut stmt = conn.prepare(
            "SELECT namespace, COUNT(*) FROM memories
             WHERE (expires_at IS NULL OR expires_at > ?1)
               AND (namespace = ?2 OR namespace LIKE ?3 ESCAPE '\\')
             GROUP BY namespace
             ORDER BY COUNT(*) DESC, namespace ASC
             LIMIT ?4",
        )?;
        let rows = stmt.query_map(
            params![
                now,
                prefix,
                descendant_pattern,
                i64::try_from(effective_limit).unwrap_or(i64::MAX)
            ],
            |row| {
                let ns: String = row.get(0)?;
                let c: i64 = row.get(1)?;
                Ok((ns, usize::try_from(c).unwrap_or(0)))
            },
        )?;
        rows.collect::<rusqlite::Result<Vec<_>>>()?
    };

    let walked_count: usize = groups.iter().map(|(_, c)| *c).sum();
    let truncated = walked_count < total_count;

    // Synthesize the root node. `name` is the trailing segment of the
    // prefix (or empty for the global root) so renderers can label it.
    let root_name = prefix.rsplit('/').next().unwrap_or("").to_string();
    let mut root = TaxonomyNode {
        namespace: prefix.to_string(),
        name: root_name,
        count: 0,
        subtree_count: 0,
        children: Vec::new(),
    };

    for (ns, c) in groups {
        // Compute path segments below the prefix. When prefix is empty,
        // the whole namespace becomes the suffix; when ns == prefix
        // exactly, segments is empty and the count lands on the root.
        let suffix: &str = if prefix.is_empty() {
            ns.as_str()
        } else if ns == prefix {
            ""
        } else if ns.len() > prefix.len() + 1
            && ns.starts_with(prefix)
            && ns.as_bytes()[prefix.len()] == b'/'
        {
            &ns[prefix.len() + 1..]
        } else {
            // Defensive: SQL filter shouldn't return this, but skip rather
            // than panic if it ever does (e.g. a stray match like
            // "alphaone-sibling" matching prefix "alphaone").
            continue;
        };
        let all_segments: Vec<&str> = if suffix.is_empty() {
            Vec::new()
        } else {
            suffix.split('/').collect()
        };
        let take = all_segments.len().min(effective_depth);
        let used = &all_segments[..take];
        let exact_match_in_view = take == all_segments.len();

        // Walk into the tree. Every ancestor's subtree_count grows by c
        // — including the root — and only the deepest visible node's
        // `count` does, and only when it represents the exact namespace
        // (not a clamped boundary).
        root.subtree_count += c;
        if used.is_empty() {
            root.count += c;
            continue;
        }

        let mut path_so_far = prefix.to_string();
        let mut node = &mut root;
        for (i, seg) in used.iter().enumerate() {
            if !path_so_far.is_empty() {
                path_so_far.push('/');
            }
            path_so_far.push_str(seg);
            let pos = node.children.iter().position(|ch| ch.name == *seg);
            let idx = if let Some(p) = pos {
                p
            } else {
                node.children.push(TaxonomyNode {
                    namespace: path_so_far.clone(),
                    name: (*seg).to_string(),
                    count: 0,
                    subtree_count: 0,
                    children: Vec::new(),
                });
                node.children.len() - 1
            };
            node = &mut node.children[idx];
            node.subtree_count += c;
            let is_leaf = i + 1 == used.len();
            if is_leaf && exact_match_in_view {
                node.count += c;
            }
        }
    }

    sort_taxonomy(&mut root);

    Ok(Taxonomy {
        tree: root,
        total_count,
        truncated,
    })
}

fn sort_taxonomy(node: &mut TaxonomyNode) {
    node.children.sort_by(|a, b| a.name.cmp(&b.name));
    for child in &mut node.children {
        sort_taxonomy(child);
    }
}

/// v0.7.0 ARCH-2 followup (FX-C2-batch3) — backend-blind taxonomy
/// tree-folding helper. Lifted out of `get_taxonomy` so the Postgres
/// SAL adapter can share the exact same fold logic with the SQLite
/// adapter, holding the cross-backend wire shape byte-for-byte.
///
/// Inputs:
/// - `prefix`: the namespace prefix the caller queried (`""` = global root).
/// - `effective_depth`: clamped depth, already `min(MAX_NAMESPACE_DEPTH)`.
/// - `total_count`: full prefix total (NOT truncated by the row walk).
/// - `truncated`: caller-computed truncation flag.
/// - `groups`: walked `(namespace, count)` rows.
///
/// Returns the assembled [`Taxonomy`] tree with sorted children.
#[doc(hidden)]
pub fn fold_taxonomy_groups(
    prefix: &str,
    effective_depth: usize,
    total_count: usize,
    truncated: bool,
    groups: Vec<(String, usize)>,
) -> Taxonomy {
    let root_name = prefix.rsplit('/').next().unwrap_or("").to_string();
    let mut root = TaxonomyNode {
        namespace: prefix.to_string(),
        name: root_name,
        count: 0,
        subtree_count: 0,
        children: Vec::new(),
    };

    for (ns, c) in groups {
        let suffix: &str = if prefix.is_empty() {
            ns.as_str()
        } else if ns == prefix {
            ""
        } else if ns.len() > prefix.len() + 1
            && ns.starts_with(prefix)
            && ns.as_bytes()[prefix.len()] == b'/'
        {
            &ns[prefix.len() + 1..]
        } else {
            continue;
        };
        let all_segments: Vec<&str> = if suffix.is_empty() {
            Vec::new()
        } else {
            suffix.split('/').collect()
        };
        let take = all_segments.len().min(effective_depth);
        let used = &all_segments[..take];
        let exact_match_in_view = take == all_segments.len();

        root.subtree_count += c;
        if used.is_empty() {
            root.count += c;
            continue;
        }

        let mut path_so_far = prefix.to_string();
        let mut node = &mut root;
        for (i, seg) in used.iter().enumerate() {
            if !path_so_far.is_empty() {
                path_so_far.push('/');
            }
            path_so_far.push_str(seg);
            let pos = node.children.iter().position(|ch| ch.name == *seg);
            let idx = if let Some(p) = pos {
                p
            } else {
                node.children.push(TaxonomyNode {
                    namespace: path_so_far.clone(),
                    name: (*seg).to_string(),
                    count: 0,
                    subtree_count: 0,
                    children: Vec::new(),
                });
                node.children.len() - 1
            };
            node = &mut node.children[idx];
            node.subtree_count += c;
            let is_leaf = i + 1 == used.len();
            if is_leaf && exact_match_in_view {
                node.count += c;
            }
        }
    }

    sort_taxonomy(&mut root);

    Taxonomy {
        tree: root,
        total_count,
        truncated,
    }
}

/// Default row cap for memory list/search surfaces when the caller
/// passes no explicit limit. Mirrored by the postgres SAL adapter
/// (`src/store/postgres.rs::list_by_source_uri`) so both backends
/// page identically.
pub const LIST_DEFAULT_CAP: usize = 200;

/// Hard ceiling on rows returned by the memory list/search surfaces.
/// One shared knob across the sqlite + postgres SAL adapters; same
/// family as `KG_TIMELINE_MAX_LIMIT` / `KG_QUERY_MAX_LIMIT`.
pub const LIST_MAX_LIMIT: usize = 1000;

/// Post-clamp `usize → i64` conversion fallback for list/query limits.
/// Unreachable in practice (values are already clamped to at most
/// `LIST_MAX_LIMIT`, which always fits `i64`); kept as a named knob so
/// the fallback page size is explicit rather than magic.
pub const LIST_FALLBACK_LIMIT: usize = 100;

/// Default page size for archive listings (HTTP `/api/v1/archive` and
/// MCP `memory_archive_list`) when the caller passes no explicit
/// `limit` — one knob so both surfaces page identically.
pub const ARCHIVE_DEFAULT_PAGE_LIMIT: usize = 50;

/// Default page size for governance pending-action listings (MCP
/// `memory_pending_list` / subscription approval feeds).
pub const PENDING_DEFAULT_PAGE_LIMIT: usize = 100;

/// Hard floor for duplicate-check threshold. Below this, anything can match
/// random unrelated content — refuse to honor the lookup so callers don't
/// silently get garbage merge suggestions.
pub const DUPLICATE_THRESHOLD_MIN: f32 = 0.5;

/// Default cosine similarity threshold for declaring a candidate a
/// duplicate. Empirically tuned for MiniLM-L6-v2 (the local embedder):
/// near-paraphrases of the same memory tend to land at 0.88+, while
/// loosely related content sits well below 0.85. Callers can override.
pub const DUPLICATE_THRESHOLD_DEFAULT: f32 = 0.85;

/// Find the nearest-neighbor live memory by cosine similarity (Pillar 2 /
/// Stream D — `memory_check_duplicate`).
///
/// Linear scan over `memories.embedding` rows that pass the live-row
/// (non-expired) gate and the optional namespace filter. The chosen
/// candidate is the highest-cosine match across the pool; the
/// caller-supplied `threshold` is used purely to set `is_duplicate` on
/// the response — the nearest neighbor is always returned (when the
/// pool is non-empty) so callers can show "closest existing memory was
/// X at similarity Y" even on a not-quite-duplicate.
///
/// Threshold is clamped at [`DUPLICATE_THRESHOLD_MIN`] so that wildly
/// permissive thresholds can't be used to dress unrelated content as a
/// merge suggestion.
///
/// Returns `(check, scanned)` where `scanned` is the count of embedded
/// candidates compared (useful for diagnostics).
pub fn check_duplicate(
    conn: &Connection,
    query_embedding: &[f32],
    namespace: Option<&str>,
    threshold: f32,
) -> Result<DuplicateCheck> {
    let effective_threshold = threshold.max(DUPLICATE_THRESHOLD_MIN);
    let now = Utc::now().to_rfc3339();

    // SQL filter handles the live-row + optional namespace gate; the
    // cosine pass happens in Rust because SQLite has no native vector
    // op. We only pull rows with non-NULL embeddings — anything missing
    // an embedding can't be a near-duplicate by this definition.
    let rows: Vec<(String, String, String, Vec<u8>)> = if let Some(ns) = namespace {
        let mut stmt = conn.prepare(
            "SELECT id, title, namespace, embedding FROM memories
             WHERE embedding IS NOT NULL
               AND (expires_at IS NULL OR expires_at > ?1)
               AND namespace = ?2",
        )?;
        let mapped = stmt.query_map(params![now, ns], |row| {
            Ok((
                row.get::<_, String>(0)?,
                row.get::<_, String>(1)?,
                row.get::<_, String>(2)?,
                row.get::<_, Vec<u8>>(3)?,
            ))
        })?;
        mapped.collect::<rusqlite::Result<Vec<_>>>()?
    } else {
        let mut stmt = conn.prepare(
            "SELECT id, title, namespace, embedding FROM memories
             WHERE embedding IS NOT NULL
               AND (expires_at IS NULL OR expires_at > ?1)",
        )?;
        let mapped = stmt.query_map(params![now], |row| {
            Ok((
                row.get::<_, String>(0)?,
                row.get::<_, String>(1)?,
                row.get::<_, String>(2)?,
                row.get::<_, Vec<u8>>(3)?,
            ))
        })?;
        mapped.collect::<rusqlite::Result<Vec<_>>>()?
    };

    let mut best: Option<DuplicateMatch> = None;
    let mut scanned: usize = 0;
    for (id, title, ns, bytes) in rows {
        if bytes.is_empty() {
            continue;
        }
        // v0.6.3.1 P2 — magic-byte aware decode. Malformed payloads
        // (anything other than headed-LE or legacy-LE) are skipped with
        // telemetry so a corrupted row can't poison duplicate detection.
        let candidate = match crate::embeddings::decode_embedding_blob(&bytes) {
            Ok(v) => v,
            Err(e) => {
                tracing::warn!(
                    memory_id = %id,
                    blob_len = bytes.len(),
                    error = %e,
                    "skipping duplicate-check candidate with malformed embedding"
                );
                continue;
            }
        };
        // Vectors of mismatched dimension would compute against a
        // truncated query (Embedder::cosine_similarity zips). Skip
        // rather than report a misleading similarity score.
        if candidate.len() != query_embedding.len() {
            tracing::warn!(
                memory_id = %id,
                expected = query_embedding.len(),
                got = candidate.len(),
                "skipping duplicate-check candidate with dimension mismatch"
            );
            continue;
        }
        let similarity =
            crate::embeddings::Embedder::cosine_similarity(query_embedding, &candidate);
        scanned += 1;
        let is_better = best.as_ref().is_none_or(|m| similarity > m.similarity);
        if is_better {
            best = Some(DuplicateMatch {
                id,
                title,
                namespace: ns,
                similarity,
            });
        }
    }

    let is_duplicate = best
        .as_ref()
        .is_some_and(|m| m.similarity >= effective_threshold);
    Ok(DuplicateCheck {
        is_duplicate,
        threshold: effective_threshold,
        nearest: best,
        candidates_scanned: scanned,
    })
}

/// Canonical hash used by [`check_duplicate_with_text`] to detect
/// byte-identical `title + content` pairs even when the embedding
/// pipeline (lower-casing, prefix tagging, etc.) prevents the cosine
/// similarity from saturating at 1.0.
///
/// The input is the *exact* text the MCP/HTTP layer hands to the
/// embedder — `crate::embeddings::embedding_document(title, content)` — and we hash its raw
/// UTF-8 bytes with no normalization. Lower-casing or whitespace
/// stripping at this layer would re-introduce the very ambiguity we
/// are trying to short-circuit (two semantically-identical strings
/// hashing to the same value but being substantively different in,
/// e.g., a code snippet that differs only in whitespace).
///
/// SHA-256 is the same primitive the audit/subscriptions/signed-events
/// layers already use, so callers don't have to reach for a new
/// dependency.
#[must_use]
pub fn canonical_content_hash(text: &str) -> [u8; 32] {
    use sha2::{Digest, Sha256};
    let mut hasher = Sha256::new();
    hasher.update(text.as_bytes());
    hasher.finalize().into()
}

// ---------------------------------------------------------------------------
// v0.7.0 (issue #519) — proactive conflict detection on memory_store
// ---------------------------------------------------------------------------

/// Cosine-similarity threshold above which a candidate is treated as a
/// near-duplicate for the purpose of [`proactive_conflict_check`].
///
/// Empirically tuned for the MiniLM-L6-v2 / Nomic embedder pair: rows
/// whose `(title, content)` paraphrase the query at this level are
/// already considered "the same memory" by the existing duplicate
/// machinery (`DUPLICATE_THRESHOLD_DEFAULT` sits at 0.85 for the
/// merge-suggestion surface). 0.95 is the stricter "this is the same
/// fact, restated" bar; combined with the textual contradiction signal
/// below, we surface only writes that proactively conflict with an
/// established near-duplicate.
///
/// **Known miss class (pre-existing; deliberately unchanged by the
/// #1579 A5 remediation):** genuine paraphrases can embed just BELOW
/// this bar — the P2-audit probe pair ("deadline is june 15" vs
/// "deadline is june 22" in otherwise-identical sentences) scored
/// 0.945 cosine on the release MiniLM and is therefore not detected.
/// Safe direction for an advisory gate (the write is ALLOWED; nothing
/// is wrongly refused); lowering the bar instead would re-open the
/// false-409 epidemic the
/// [`PROACTIVE_CONFLICT_CONTENT_JACCARD_FLOOR`] corroboration exists
/// to close. The deeper `detect_contradiction` tooling remains the
/// surface for sub-threshold contradictions.
pub const PROACTIVE_CONFLICT_SIM_THRESHOLD: f32 = 0.95;

/// Top-K cap for the candidate pool inspected by
/// [`proactive_conflict_check`]. Bounded so the per-write cost is O(K)
/// rather than O(namespace_size).
pub const PROACTIVE_CONFLICT_TOP_K: usize = 5;

/// #1579 A5 — row cap on the bounded fallback scan in
/// [`proactive_conflict_check`] (most-recently-updated rows first).
///
/// Pre-#1579 the check decoded + cosine-scored EVERY embedded live row
/// in the namespace per write — an O(N) scan that (under the HTTP
/// daemon's single-connection mutex) collapsed semantic-tier write
/// throughput to 0.3-1.7 rps in the P2 audit. The fallback path (used
/// when no fully-searchable HNSW index is available: keyword tier,
/// the async-boot warm window, CLI one-shots) now scans only the
/// `PROACTIVE_CONFLICT_SCAN_LIMIT` most-recently-updated candidates.
/// Recency ordering is the right prior for an advisory near-duplicate
/// gate: conflicting restatements cluster temporally (an agent
/// re-asserting a fact it just learned), and the indexed path (the
/// production semantic-tier route) covers the long tail. A miss here
/// only ALLOWS a write that deeper inspection might have refused —
/// never refuses a legitimate one — which is the safe direction for
/// an advisory check with a `force=true` bypass.
pub const PROACTIVE_CONFLICT_SCAN_LIMIT: usize = 1024;

/// #1579 A5 — `k` requested from the HNSW index by
/// [`proactive_conflict_check_with_index`]. Deliberately larger than
/// [`PROACTIVE_CONFLICT_TOP_K`] because the index is global while the
/// conflict check is namespace-scoped: the namespace filter is applied
/// AFTER the ANN search (post-filter semantics), so foreign-namespace
/// hits consume slots. 32 gives the in-namespace pool ample headroom
/// (the ≥ 0.95 cosine gate means only near-identical vectors matter,
/// and > 32 near-identical foreign-namespace rows crowding out an
/// in-namespace conflict is a pathology the bounded fallback's
/// advisory contract already tolerates — see
/// [`PROACTIVE_CONFLICT_SCAN_LIMIT`]).
pub const PROACTIVE_CONFLICT_INDEX_K: usize = 32;

/// #1579 A5 — minimum Jaccard token overlap between the incoming
/// `content` and a cosine-near-duplicate candidate's `content` for the
/// pair to be classified as a proactive conflict.
///
/// **Why this exists** (the P2 false-409 epidemic). The P2 perf audit
/// measured **81% of semantic-tier writes refused with 409** when a
/// loadtest wrote unique random-alphanumeric payloads: MiniLM-L6-v2
/// assigns ≥ 0.95 cosine to ~28% of PAIRS of unrelated 256-byte noise
/// documents (probe on the release model: pairwise min 0.44 / mean
/// 0.83 / max 0.97), so with a 1k-row namespace virtually every write
/// found SOME ≥ 0.95 "near-duplicate" — while a genuine paraphrase
/// pair ("deadline is june 15" vs "deadline is june 22" in identical
/// sentences) scored 0.945, BELOW the threshold. Embedding cosine
/// alone is therefore not sufficient evidence of "the same fact,
/// restated". The deterministic corroboration is lexical: a true
/// restatement shares vocabulary. We reuse the #1320 tokenizer
/// (lowercase, split on non-alphanumeric, stopword-strip — see
/// [`CONTRADICTION_TITLE_JACCARD_FLOOR`]) over the CONTENT bodies and
/// require this floor, which rejects the disjoint-token noise pairs
/// (Jaccard ≈ 0) while keeping real restatements (the june-15/june-22
/// pair scores 0.5).
pub const PROACTIVE_CONFLICT_CONTENT_JACCARD_FLOOR: f32 = 0.30;

/// Result envelope returned by [`proactive_conflict_check`] when an
/// existing memory near-duplicates AND textually contradicts the
/// incoming write.
#[derive(Debug, Clone)]
pub struct ProactiveConflict {
    /// `id` of the existing memory the new write conflicts with.
    pub existing_id: String,
    /// Title of the existing memory (for diagnostic surfacing).
    pub existing_title: String,
    /// Cosine similarity (always `>= PROACTIVE_CONFLICT_SIM_THRESHOLD`
    /// in returned values).
    pub similarity: f32,
    /// Reason the candidate was classified as conflicting. Currently
    /// always `"near_duplicate_with_differing_content"`; future
    /// extensions (LLM-backed detector, negation-flip heuristic) can
    /// surface a different reason string here.
    pub reason: &'static str,
}

/// v0.7.0 (issue #519) — proactive contradiction detection on the
/// `memory_store` write path.
///
/// Scans the top-`PROACTIVE_CONFLICT_TOP_K` most similar live memories
/// in the new memory's namespace (by cosine similarity over the
/// existing `memories.embedding` column) and returns the first match
/// whose similarity meets `PROACTIVE_CONFLICT_SIM_THRESHOLD` AND whose
/// stored `content` differs from the incoming `mem.content` exactly.
///
/// The "differs exactly" check is the deterministic substrate-layer
/// contradiction signal — a row that paraphrases the same fact at
/// ≥ 0.95 cosine but spells out a different content body is, by
/// construction, asserting a near-duplicate fact with a different
/// substantive payload (the LLM detector would call this a soft
/// contradiction; the substrate check calls it a near-duplicate with
/// differing content). Callers that want the full LLM-backed
/// `detect_contradiction` round-trip can layer it on top of the
/// proactive-check result; the substrate path stays LLM-independent so
/// it runs deterministically under `AI_MEMORY_NO_CONFIG=1` and in
/// every CI environment.
///
/// A `force=true` switch at the handler layer (MCP / CLI / HTTP)
/// bypasses this check entirely — see `src/mcp/tools/store.rs` and
/// `src/handlers/http.rs::create_memory`.
///
/// Returns:
/// * `Ok(None)` — no conflict detected; the caller may proceed with
///   the insert.
/// * `Ok(Some(ProactiveConflict))` — at least one candidate triggered
///   the near-duplicate-with-differing-content guard; the caller
///   should refuse the insert (and return an error envelope naming
///   `existing_id`) unless `force=true` was set.
///
/// # Errors
///
/// Bubbles rusqlite errors from the candidate-pool SELECT. The cosine
/// pass itself is in-memory and infallible (mismatched-dim candidates
/// are skipped with a tracing warn, mirroring `check_duplicate`).
pub fn proactive_conflict_check(
    conn: &Connection,
    mem: &Memory,
    query_embedding: &[f32],
) -> Result<Option<ProactiveConflict>> {
    if query_embedding.is_empty() {
        return Ok(None);
    }
    let now = Utc::now().to_rfc3339();

    // Pull (id, title, content, embedding) for the live, in-namespace
    // pool. We restrict to the same namespace as the incoming write
    // because cross-namespace "contradictions" are not a substrate
    // concept (namespaces are deliberately isolated scopes); the
    // namespace-scoped check matches the `find_contradictions` /
    // `find_by_title_namespace` semantics already used by the
    // `OnConflict::Error` branch of `insert_with_conflict`.
    //
    // #1579 A5 — BOUNDED: most-recently-updated rows first, capped at
    // `PROACTIVE_CONFLICT_SCAN_LIMIT`. See the const for the recency
    // rationale and the advisory-miss contract. The unbounded
    // full-namespace decode+scan this replaces was the P2-measured
    // write-throughput collapse (0.3-1.7 rps under the HTTP mutex).
    let mut stmt = conn.prepare(
        "SELECT id, title, content, embedding FROM memories
         WHERE embedding IS NOT NULL
           AND (expires_at IS NULL OR expires_at > ?1)
           AND namespace = ?2
         ORDER BY updated_at DESC
         LIMIT ?3",
    )?;
    let rows: Vec<(String, String, String, Vec<u8>)> = stmt
        .query_map(
            params![
                now,
                &mem.namespace,
                i64::try_from(PROACTIVE_CONFLICT_SCAN_LIMIT).unwrap_or(i64::MAX)
            ],
            |row| {
                Ok((
                    row.get::<_, String>(0)?,
                    row.get::<_, String>(1)?,
                    row.get::<_, String>(2)?,
                    row.get::<_, Vec<u8>>(3)?,
                ))
            },
        )?
        .collect::<rusqlite::Result<Vec<_>>>()?;

    Ok(proactive_conflict_verdict(mem, query_embedding, rows))
}

/// #1579 A5 — HNSW-routed entry point for the proactive conflict
/// check. This is the production write-path dispatcher:
///
/// * When a [`crate::hnsw::VectorIndex`] is available AND fully
///   searchable (its graph covers `all_entries` — see
///   [`crate::hnsw::VectorIndex::is_fully_searchable`]), the candidate
///   pool comes from an O(log N) ANN query instead of the table scan;
///   candidates are then re-verified against the DB (live, same
///   namespace, EXACT cosine recomputed from the stored blob — the
///   index's distance is approximate and assumes L2-normalised
///   vectors, so the stored-blob recompute keeps the decision function
///   byte-equal to the scan path).
/// * Otherwise (no index at keyword tier, the async-boot warm window
///   before the first graph swap, CLI one-shots below the build
///   threshold) it falls back to the BOUNDED recency scan in
///   [`proactive_conflict_check`]. An EMPTY index also routes to the
///   fallback (#1579 QC): emptiness makes `is_fully_searchable`
///   vacuously true, but during the async-boot LOAD phase (daemon
///   bound with `VectorIndex::empty()`, boot loader still reading the
///   stored embeddings, `seed_entries` not yet landed) it says
///   nothing about what the DB holds — consulting it would silently
///   SKIP the check instead of degrading to the documented bounded
///   scan. On a genuinely empty corpus the fallback scan matches zero
///   rows, so the routing is behaviour-neutral outside that window.
///
/// Known under-detection windows, both safe-direction (a missed
/// conflict ALLOWS a write; the check never wrongly refuses):
/// rows evicted from the index's 100k entry cap are invisible to the
/// ANN query, and a warm-window write beyond the bounded scan's
/// recency horizon is invisible to the fallback. Callers that need a
/// hard guarantee already have the `(title, namespace)` SQL conflict
/// gate; this check is the advisory #519 layer with a `force=true`
/// bypass.
///
/// # Errors
///
/// Bubbles rusqlite errors from the candidate SELECTs (same contract
/// as [`proactive_conflict_check`]).
pub fn proactive_conflict_check_with_index(
    conn: &Connection,
    mem: &Memory,
    query_embedding: &[f32],
    vector_index: Option<&crate::hnsw::VectorIndex>,
) -> Result<Option<ProactiveConflict>> {
    if query_embedding.is_empty() {
        return Ok(None);
    }
    if let Some(idx) = vector_index
        && idx.is_fully_searchable()
        // #1579 QC — an empty index is vacuously fully-searchable but
        // proves nothing about the DB during the async-boot LOAD
        // phase; see the doc comment above and
        // `crate::hnsw::VectorIndex::is_empty`.
        && !idx.is_empty()
    {
        let hits = idx.search(query_embedding, PROACTIVE_CONFLICT_INDEX_K);
        let ids: Vec<String> = hits.into_iter().map(|h| h.id).collect();
        return proactive_conflict_check_candidates(conn, mem, query_embedding, &ids);
    }
    tracing::trace!(
        target: "proactive_conflict",
        namespace = %mem.namespace,
        "no fully-searchable (or empty) vector index — bounded recency-scan fallback (#1579 A5)"
    );
    proactive_conflict_check(conn, mem, query_embedding)
}

/// #1579 A5 — verify an ANN-derived candidate id list against the DB
/// and apply the conflict verdict. Fetches only the named rows (point
/// lookups by PK), re-applies the live/namespace filters the table
/// scan used, and recomputes EXACT cosine from the stored embedding
/// blob so the decision function is identical to the scan path.
///
/// Public so the HTTP create handler (which holds the vector index
/// behind an async mutex and must run the ANN search BEFORE taking
/// the DB lock) can split the search from the verification.
///
/// # Errors
///
/// Bubbles rusqlite errors from the `IN (...)` candidate SELECT.
pub fn proactive_conflict_check_candidates(
    conn: &Connection,
    mem: &Memory,
    query_embedding: &[f32],
    candidate_ids: &[String],
) -> Result<Option<ProactiveConflict>> {
    if query_embedding.is_empty() || candidate_ids.is_empty() {
        return Ok(None);
    }
    let now = Utc::now().to_rfc3339();
    let placeholders = std::iter::repeat_n("?", candidate_ids.len())
        .collect::<Vec<_>>()
        .join(",");
    let sql = format!(
        "SELECT id, title, content, embedding FROM memories
         WHERE id IN ({placeholders})
           AND embedding IS NOT NULL
           AND (expires_at IS NULL OR expires_at > ?{p_now})
           AND namespace = ?{p_ns}",
        p_now = candidate_ids.len() + 1,
        p_ns = candidate_ids.len() + 2,
    );
    let mut stmt = conn.prepare(&sql)?;
    let bind_iter = candidate_ids
        .iter()
        .map(String::as_str)
        .chain([now.as_str(), mem.namespace.as_str()]);
    let rows: Vec<(String, String, String, Vec<u8>)> = stmt
        .query_map(rusqlite::params_from_iter(bind_iter), |row| {
            Ok((
                row.get::<_, String>(0)?,
                row.get::<_, String>(1)?,
                row.get::<_, String>(2)?,
                row.get::<_, Vec<u8>>(3)?,
            ))
        })?
        .collect::<rusqlite::Result<Vec<_>>>()?;

    Ok(proactive_conflict_verdict(mem, query_embedding, rows))
}

/// #1579 A5 — shared scoring + verdict tail of the proactive conflict
/// check. Decodes candidate blobs, cosine-scores against the query,
/// sorts descending, and applies the conflict rule to the top
/// [`PROACTIVE_CONFLICT_TOP_K`]:
///
///   near-duplicate (≥ [`PROACTIVE_CONFLICT_SIM_THRESHOLD`] cosine)
///   AND content differs
///   AND content token-overlap ≥ [`PROACTIVE_CONFLICT_CONTENT_JACCARD_FLOOR`]
///
/// The Jaccard corroboration is the #1579 false-409 fix — see the
/// floor const for the P2 evidence (81% of semantic-tier loadtest
/// writes refused because MiniLM clusters unrelated noise documents
/// above 0.95 cosine).
fn proactive_conflict_verdict(
    mem: &Memory,
    query_embedding: &[f32],
    rows: Vec<(String, String, String, Vec<u8>)>,
) -> Option<ProactiveConflict> {
    // Score every candidate and keep the top-K by cosine.
    let mut scored: Vec<(f32, String, String, String)> = Vec::with_capacity(rows.len());
    for (id, title, content, blob) in rows {
        if blob.is_empty() {
            continue;
        }
        // Skip self (same id) — happens when a re-store reuses the
        // existing memory id (NHI replay path).
        if id == mem.id {
            continue;
        }
        let candidate = match crate::embeddings::decode_embedding_blob(&blob) {
            Ok(v) => v,
            Err(e) => {
                tracing::warn!(
                    memory_id = %id,
                    blob_len = blob.len(),
                    error = %e,
                    "proactive_conflict_check: skipping candidate with malformed embedding"
                );
                continue;
            }
        };
        if candidate.len() != query_embedding.len() {
            tracing::warn!(
                memory_id = %id,
                expected = query_embedding.len(),
                got = candidate.len(),
                "proactive_conflict_check: skipping candidate with dimension mismatch"
            );
            continue;
        }
        let sim = crate::embeddings::Embedder::cosine_similarity(query_embedding, &candidate);
        scored.push((sim, id, title, content));
    }
    // Sort descending by similarity so we visit the strongest matches
    // first; bail at the top-K cap.
    scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
    let incoming_tokens = contradiction_title_tokens(&mem.content);
    for (sim, id, title, content) in scored.into_iter().take(PROACTIVE_CONFLICT_TOP_K) {
        if sim < PROACTIVE_CONFLICT_SIM_THRESHOLD {
            // The top-K cap is sorted descending — once we drop below
            // the threshold we can't find any conflicts in the tail.
            break;
        }
        // Deterministic textual contradiction signal: the candidate
        // is near-duplicate (≥ 0.95 cosine) AND its content body
        // differs from the incoming write's content. Same-content
        // near-duplicates are not contradictions; they are the upsert
        // happy-path that the SQL `ON CONFLICT(title, namespace)`
        // already handles.
        //
        // #1579 A5 — lexical corroboration: a true "same fact,
        // restated" pair shares vocabulary. Without this floor,
        // unrelated documents that the embedder happens to cluster
        // above 0.95 cosine (P2-measured on random-alphanumeric
        // payloads) produced the 81% false-409 epidemic.
        if content != mem.content
            && contradiction_title_jaccard(&incoming_tokens, &contradiction_title_tokens(&content))
                >= PROACTIVE_CONFLICT_CONTENT_JACCARD_FLOOR
        {
            return Some(ProactiveConflict {
                existing_id: id,
                existing_title: title,
                similarity: sim,
                reason: "near_duplicate_with_differing_content",
            });
        }
    }
    None
}

/// v0.7.0 F18 — exact-match-aware nearest-neighbor duplicate check.
///
/// Wraps [`check_duplicate`] with a SHA-256 short-circuit on the raw
/// `query_text` so byte-identical content scores `similarity = 1.0`
/// even when the embedding pipeline (Nomic prefixes, casing, whitespace
/// normalization) would otherwise cap cosine similarity at ~0.92 for
/// the same string. Round-2 evidence: storing content `C` and then
/// asking `check_duplicate` about `C` returned similarity 0.92 because
/// the stored embedding was prefixed with `search_document:` while the
/// query embedding got `search_query:` — mismatched prefixes prevent
/// cosine from saturating at 1.0.
///
/// Algorithm:
/// 1. Compute `H_query = SHA-256(query_text)`.
/// 2. For each live, namespace-matching candidate, compute
///    `H_row = SHA-256(format!("{row.title} {row.content}"))` and
///    compare. The first match wins and is returned with
///    `similarity = 1.0`, `is_duplicate = true`.
/// 3. If no hash match is found, fall through to embedding-based
///    cosine similarity (i.e. delegate to [`check_duplicate`]).
///
/// The hash compare is computed per call (no schema migration); it
/// scales linearly in the candidate pool, but so does the existing
/// embedding loop, so worst-case asymptotics are unchanged. A future
/// `content_hash` column on `memories` would make this O(1) per
/// candidate via an index — flagged for a separate migration PR.
///
/// `query_text` MUST be the exact string used to produce
/// `query_embedding` (typically `crate::embeddings::embedding_document(title, content)`).
/// Passing a different string is not a correctness bug — the function
/// just falls through to the embedding-similarity path — but it
/// defeats the point of the short-circuit.
pub fn check_duplicate_with_text(
    conn: &Connection,
    query_embedding: &[f32],
    query_text: &str,
    namespace: Option<&str>,
    threshold: f32,
) -> Result<DuplicateCheck> {
    let effective_threshold = threshold.max(DUPLICATE_THRESHOLD_MIN);
    let now = Utc::now().to_rfc3339();
    let query_hash = canonical_content_hash(query_text);

    // Pull (id, title, namespace, content) for the live candidate pool.
    // We keep the same gates as `check_duplicate` (live row, optional
    // namespace) but do NOT require a non-NULL embedding here — an
    // identical row with a missing embedding is still a valid exact-
    // match short-circuit candidate.
    let rows: Vec<(String, String, String, String)> = if let Some(ns) = namespace {
        let mut stmt = conn.prepare(
            "SELECT id, title, namespace, content FROM memories
             WHERE (expires_at IS NULL OR expires_at > ?1)
               AND namespace = ?2",
        )?;
        let mapped = stmt.query_map(params![now, ns], |row| {
            Ok((
                row.get::<_, String>(0)?,
                row.get::<_, String>(1)?,
                row.get::<_, String>(2)?,
                row.get::<_, String>(3)?,
            ))
        })?;
        mapped.collect::<rusqlite::Result<Vec<_>>>()?
    } else {
        let mut stmt = conn.prepare(
            "SELECT id, title, namespace, content FROM memories
             WHERE (expires_at IS NULL OR expires_at > ?1)",
        )?;
        let mapped = stmt.query_map(params![now], |row| {
            Ok((
                row.get::<_, String>(0)?,
                row.get::<_, String>(1)?,
                row.get::<_, String>(2)?,
                row.get::<_, String>(3)?,
            ))
        })?;
        mapped.collect::<rusqlite::Result<Vec<_>>>()?
    };

    // Phase 1 — SHA-256 exact-match short-circuit. We hash the same
    // `crate::embeddings::embedding_document(title, content)` shape the MCP/HTTP layers use to
    // build the embedding text so an identical store-then-check sequence
    // surfaces as similarity=1.0 even when the embedding pipeline would
    // otherwise cap at ~0.92 due to prefix asymmetry.
    for (id, title, ns, content) in &rows {
        let row_text = crate::embeddings::embedding_document(title, content);
        let row_hash = canonical_content_hash(&row_text);
        if row_hash == query_hash {
            return Ok(DuplicateCheck {
                is_duplicate: true,
                threshold: effective_threshold,
                nearest: Some(DuplicateMatch {
                    id: id.clone(),
                    title: title.clone(),
                    namespace: ns.clone(),
                    similarity: 1.0,
                }),
                // We scanned every row through the hash compare to find
                // the match — report that, not just the first one.
                candidates_scanned: rows.len(),
            });
        }
    }

    // Phase 2 — no hash match; fall back to the embedding-based
    // nearest-neighbor scan so callers still get the "closest existing
    // memory was X at similarity Y" signal on near-but-not-exact hits.
    check_duplicate(conn, query_embedding, namespace, threshold)
}

/// Register an entity (canonical name + aliases) under a namespace
/// (Pillar 2 / Stream B).
///
/// An entity is stored as a long-tier memory:
/// - `title = canonical_name`
/// - `namespace = namespace`
/// - `tags` includes [`ENTITY_TAG`]
/// - `metadata.kind = "entity"` (so the resolver can never confuse an
///   entity with a regular memory that happens to share a title)
///
/// Aliases live in the `entity_aliases` side table keyed by
/// `(entity_id, alias)`.
///
/// **Idempotency:** if an entity with this `(canonical_name, namespace)`
/// already exists, its ID is reused and `aliases` are merged with
/// `INSERT OR IGNORE`. The returned [`EntityRegistration::created`] is
/// `false` in that case.
///
/// **Collision detection:** if a non-entity memory already occupies
/// `(title=canonical_name, namespace=namespace)`, the call errors
/// rather than silently upgrading it (the upsert path on `insert`
/// would otherwise overwrite the existing row's content/tags). Callers
/// must rename the entity or its colliding memory.
///
/// `extra_metadata` is merged into the entity memory's metadata; any
/// caller-supplied `kind` field is overwritten with `"entity"` and
/// `agent_id` is stamped from the caller (NHI provenance) when
/// `extra_metadata` does not already specify one.
pub fn entity_register(
    conn: &Connection,
    canonical_name: &str,
    namespace: &str,
    aliases: &[String],
    extra_metadata: &serde_json::Value,
    agent_id: Option<&str>,
) -> Result<crate::models::EntityRegistration> {
    use crate::models::{ENTITY_KIND, ENTITY_TAG, EntityRegistration};

    // Look up an existing entity in this namespace by canonical_name +
    // metadata.kind. If a non-entity memory occupies the same
    // (title, namespace), surface a hard error instead of upserting.
    let existing_id: Option<String> = match conn.query_row(
        "SELECT id FROM memories
         WHERE namespace = ?1 AND title = ?2
           AND COALESCE(json_extract(metadata, '$.kind'), '') = ?3",
        params![namespace, canonical_name, ENTITY_KIND],
        |r| r.get::<_, String>(0),
    ) {
        Ok(id) => Some(id),
        Err(rusqlite::Error::QueryReturnedNoRows) => None,
        Err(e) => return Err(e.into()),
    };

    let (entity_id, created) = if let Some(id) = existing_id {
        (id, false)
    } else {
        let collision: Option<String> = match conn.query_row(
            "SELECT id FROM memories
             WHERE namespace = ?1 AND title = ?2
               AND COALESCE(json_extract(metadata, '$.kind'), '') != ?3",
            params![namespace, canonical_name, ENTITY_KIND],
            |r| r.get::<_, String>(0),
        ) {
            Ok(id) => Some(id),
            Err(rusqlite::Error::QueryReturnedNoRows) => None,
            Err(e) => return Err(e.into()),
        };
        if collision.is_some() {
            // #962 typed envelope — UniqueConflict (409).
            return Err(anyhow::Error::new(StorageError::UniqueConflict {
                reason: format!(
                    "entity_register: title '{canonical_name}' in namespace '{namespace}' is already used by a non-entity memory"
                ),
            }));
        }

        // Build metadata: caller-supplied object merged, kind forced
        // to "entity", agent_id preserved from caller when not set.
        let mut meta_map = match extra_metadata {
            serde_json::Value::Object(m) => m.clone(),
            _ => serde_json::Map::new(),
        };
        meta_map.insert(
            "kind".to_string(),
            serde_json::Value::String(ENTITY_KIND.to_string()),
        );
        if let Some(a) = agent_id {
            meta_map
                .entry("agent_id".to_string())
                .or_insert(serde_json::Value::String(a.to_string()));
        }
        let metadata = serde_json::Value::Object(meta_map);

        let now = Utc::now().to_rfc3339();
        let mem = Memory {
            id: uuid::Uuid::new_v4().to_string(),
            tier: Tier::Long,
            namespace: namespace.to_string(),
            title: canonical_name.to_string(),
            content: canonical_name.to_string(),
            tags: vec![ENTITY_TAG.to_string()],
            priority: 7,
            confidence: 1.0,
            source: "api".to_string(),
            access_count: 0,
            created_at: now.clone(),
            updated_at: now,
            last_accessed_at: None,
            expires_at: None,
            metadata,
            reflection_depth: 0,
            memory_kind: crate::models::MemoryKind::Observation,
            entity_id: None,
            persona_version: None,
            citations: Vec::new(),
            source_uri: None,
            source_span: None,
            confidence_source: ConfidenceSource::CallerProvided,
            confidence_signals: None,
            confidence_decayed_at: None,
            version: 1,
        };
        let id = insert(conn, &mem).context("insert entity memory")?;
        (id, true)
    };

    let now = Utc::now().to_rfc3339();
    {
        let mut stmt = conn.prepare(
            "INSERT OR IGNORE INTO entity_aliases (entity_id, alias, created_at)
             VALUES (?1, ?2, ?3)",
        )?;
        // canonical_name is always reachable via entity_get_by_alias.
        // Without this row, registering an entity with no aliases makes
        // it unreachable by name (NHI-P3-T2).
        stmt.execute(params![entity_id, canonical_name, now])?;
        for alias in aliases {
            let trimmed = alias.trim();
            if trimmed.is_empty() || trimmed == canonical_name {
                continue;
            }
            stmt.execute(params![entity_id, trimmed, now])?;
        }
    }

    let aliases_out = list_entity_aliases(conn, &entity_id)?;

    Ok(EntityRegistration {
        entity_id,
        canonical_name: canonical_name.to_string(),
        namespace: namespace.to_string(),
        aliases: aliases_out,
        created,
    })
}

/// Resolve an alias to its registered entity (Pillar 2 / Stream B).
///
/// When `namespace` is `Some`, only entities in that namespace are
/// considered. When `None`, all namespaces are searched and the
/// most-recently-created matching entity wins (deterministic
/// disambiguation when the same alias was registered in multiple
/// namespaces).
///
/// Returns `Ok(None)` if no entity claims this alias under the given
/// filter. Returns the full alias set for the resolved entity.
pub fn entity_get_by_alias(
    conn: &Connection,
    alias: &str,
    namespace: Option<&str>,
) -> Result<Option<crate::models::EntityRecord>> {
    use crate::models::{ENTITY_KIND, EntityRecord};

    let trimmed = alias.trim();
    if trimmed.is_empty() {
        return Ok(None);
    }

    let row: std::result::Result<(String, String, String), rusqlite::Error> =
        if let Some(ns) = namespace {
            conn.query_row(
                "SELECT m.id, m.title, m.namespace
                 FROM entity_aliases ea
                 JOIN memories m ON m.id = ea.entity_id
                 WHERE ea.alias = ?1
                   AND m.namespace = ?2
                   AND COALESCE(json_extract(m.metadata, '$.kind'), '') = ?3
                 ORDER BY m.created_at DESC
                 LIMIT 1",
                params![trimmed, ns, ENTITY_KIND],
                |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
            )
        } else {
            conn.query_row(
                "SELECT m.id, m.title, m.namespace
                 FROM entity_aliases ea
                 JOIN memories m ON m.id = ea.entity_id
                 WHERE ea.alias = ?1
                   AND COALESCE(json_extract(m.metadata, '$.kind'), '') = ?2
                 ORDER BY m.created_at DESC
                 LIMIT 1",
                params![trimmed, ENTITY_KIND],
                |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
            )
        };

    let (entity_id, canonical_name, ns) = match row {
        Ok(t) => t,
        Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
        Err(e) => return Err(e.into()),
    };

    let aliases = list_entity_aliases(conn, &entity_id)?;
    Ok(Some(EntityRecord {
        entity_id,
        canonical_name,
        namespace: ns,
        aliases,
    }))
}

/// Default cap on rows returned by `kg_timeline` when the caller does
/// not specify one (Pillar 2 / Stream C). Sized to fit a reasonable
/// agent context window without paging — callers needing more should
/// pass an explicit limit.
pub const KG_TIMELINE_DEFAULT_LIMIT: usize = 200;

/// Hard ceiling on `kg_timeline` rows. Matches the existing list/recall
/// caps to keep the timeline bounded against pathological entities.
pub const KG_TIMELINE_MAX_LIMIT: usize = 1000;

/// Ordered fact timeline for an entity (Pillar 2 / Stream C —
/// `memory_kg_timeline`). Returns outbound assertions from
/// `source_id`, ordered by `valid_from ASC` and tie-broken by
/// `created_at ASC` for deterministic display.
///
/// Filters:
/// - `since` (RFC3339, inclusive): drop events with `valid_from < since`
/// - `until` (RFC3339, inclusive): drop events with `valid_from > until`
/// - `limit`: row cap, clamped to [1, [`KG_TIMELINE_MAX_LIMIT`]]
///
/// Rows with NULL `valid_from` are excluded — a link without a
/// valid-from anchor cannot be ordered on the timeline. The schema-v15
/// migration backfilled legacy rows to `created_at`, and the `link()`
/// path stamps the column on every new insert, so this is a hard
/// guarantee for current code; the explicit `IS NOT NULL` guard exists
/// to keep external writes (`store/sqlite.rs`, custom migrations) from
/// silently producing invisible links.
///
/// Cross-namespace by design: timelines often span the same canonical
/// entity asserted by agents in different namespaces. Callers can
/// post-filter by `target_namespace` if they need a namespace-scoped
/// view.
///
/// v0.7 AGE acceleration onramp (charter §"Stream C" bullet 4). When
/// the v0.7 SAL ships with Apache AGE, the equivalent property-graph
/// query is:
///
/// ```cypher
/// MATCH (s {id: $source_id})-[r {valid_from IS NOT NULL,
///        valid_from >= $since, valid_from <= $until}]->(t)
/// WHERE t.id <> s.id  // exclude self-loops
/// RETURN t.id, r.relation, r.valid_from, r.valid_until, r.observed_by
/// ORDER BY r.valid_from ASC, r.created_at ASC
/// LIMIT $limit
/// ```
///
/// Stub left here per charter intent so the v0.7 migration has a 1:1
/// reference query.
pub fn kg_timeline(
    conn: &Connection,
    source_id: &str,
    since: Option<&str>,
    until: Option<&str>,
    limit: Option<usize>,
) -> Result<Vec<crate::models::KgTimelineEvent>> {
    use crate::models::KgTimelineEvent;

    let cap = limit
        .unwrap_or(KG_TIMELINE_DEFAULT_LIMIT)
        .clamp(1, KG_TIMELINE_MAX_LIMIT);

    // Compose the predicate dynamically for `since` / `until`. Bind
    // values are appended in the same order so the placeholders line up.
    let mut sql = String::from(
        "SELECT ml.target_id, ml.relation, ml.valid_from, ml.valid_until,
                ml.observed_by, m.title, m.namespace, ml.created_at
         FROM memory_links ml
         JOIN memories m ON m.id = ml.target_id
         WHERE ml.source_id = ?1
           AND ml.valid_from IS NOT NULL",
    );
    let mut binds: Vec<Box<dyn rusqlite::ToSql>> = vec![Box::new(source_id.to_string())];
    if let Some(s) = since {
        sql.push_str(" AND ml.valid_from >= ?");
        sql.push_str(&(binds.len() + 1).to_string());
        binds.push(Box::new(s.to_string()));
    }
    if let Some(u) = until {
        sql.push_str(" AND ml.valid_from <= ?");
        sql.push_str(&(binds.len() + 1).to_string());
        binds.push(Box::new(u.to_string()));
    }
    sql.push_str(" ORDER BY ml.valid_from ASC, ml.created_at ASC LIMIT ?");
    sql.push_str(&(binds.len() + 1).to_string());
    binds.push(Box::new(i64::try_from(cap).unwrap_or(i64::MAX)));

    let mut stmt = conn.prepare(&sql)?;
    let bind_refs: Vec<&dyn rusqlite::ToSql> = binds.iter().map(AsRef::as_ref).collect();
    let rows = stmt.query_map(rusqlite::params_from_iter(bind_refs), |row| {
        Ok(KgTimelineEvent {
            target_id: row.get(0)?,
            relation: row.get(1)?,
            valid_from: row.get(2)?,
            valid_until: row.get(3)?,
            observed_by: row.get(4)?,
            title: row.get(5)?,
            target_namespace: row.get(6)?,
        })
    })?;
    rows.collect::<rusqlite::Result<Vec<_>>>()
        .map_err(Into::into)
}

/// Outcome of [`invalidate_link`] (Pillar 2 / Stream C —
/// `memory_kg_invalidate`). `valid_until` is the timestamp now stored on
/// the link; `previous_valid_until` is the prior value, or `None` if
/// this was the first invalidation. Callers can use the prior value to
/// distinguish a fresh supersession from an idempotent retry.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct InvalidateResult {
    pub valid_until: String,
    pub previous_valid_until: Option<String>,
}

/// Mark a KG link as superseded by setting its `valid_until` column
/// (Pillar 2 / Stream C — `memory_kg_invalidate`). Returns `Ok(None)`
/// when the `(source_id, target_id, relation)` triple does not match an
/// existing link. The supplied `valid_until` defaults to the current
/// wall-clock time in RFC3339 form when omitted; callers needing
/// historical or future supersession can pass an explicit value.
///
/// Idempotent: calling repeatedly overwrites the prior `valid_until`
/// (the prior value is returned in `previous_valid_until` so callers
/// can detect the overwrite). The schema does not yet carry an audit
/// column for the supersession reason; that arrives with v0.7
/// attestation. Until then, callers should record the rationale in
/// their own logs or a paired memory.
///
/// # v0.7.0 #628 H5 — signed-row preservation
///
/// `valid_until` is one of the six fields the H2 outbound signer
/// commits to (see [`crate::identity::sign::SignableLink`]). Mutating
/// it on a previously self-signed link silently flips every future
/// `memory_verify` to `signature_verified=false / attest_level=unsigned`
/// — legitimate supersession would be indistinguishable from
/// tampering on the wire. To preserve the audit chain we:
///
/// 1. NULL the `signature` column (and reset `attest_level` to
///    `"unsigned"`) so a future verify reports an honest "no
///    signature on this row" rather than a misleading "signature
///    mismatch".
/// 2. Append a `memory_link.invalidated` row to `signed_events` whose
///    `payload_hash` binds to the post-supersession canonical CBOR —
///    the auditor can replay both the original `memory_link.created`
///    row AND the matching `memory_link.invalidated` row to prove the
///    supersession was an intentional act by the same agent.
///
/// The audit append is best-effort: if the `signed_events` write
/// fails (vanishingly unlikely outside disk-full / schema-drift
/// scenarios), the supersession still persists and the failure is
/// surfaced in `tracing::warn!`. Cratering the supersession on an
/// audit-write failure would punish the legitimate caller for a
/// substrate problem they cannot fix.
pub fn invalidate_link(
    conn: &Connection,
    source_id: &str,
    target_id: &str,
    relation: &str,
    valid_until: Option<&str>,
) -> Result<Option<InvalidateResult>> {
    let stamp = valid_until.map_or_else(|| Utc::now().to_rfc3339(), str::to_string);

    // P2 (#628 agent-3 follow-up): wrap the SELECT-then-UPDATE-then-
    // audit-INSERT in a single `BEGIN IMMEDIATE` transaction. Without
    // this, a daemon crash between the UPDATE (which clears the
    // signature) and the audit INSERT leaves H5's silent-supersession
    // state — the exact thing H5 was added to prevent. RESERVED-lock
    // semantics also serialise concurrent writers across processes.
    conn.execute(connection::SQL_BEGIN_IMMEDIATE, [])?;
    // From here on, every early return MUST `ROLLBACK` first.
    let rollback = || {
        let _ = conn.execute(connection::SQL_ROLLBACK, []);
    };

    // Pull the prior `valid_until` AND the signing surface so the
    // audit append can reflect the row's pre-mutation attest state.
    // A single round-trip keeps the SELECT cheap.
    let prior_row: (
        Option<String>,
        Option<Vec<u8>>,
        Option<String>,
        Option<String>,
        Option<String>,
    ) = match conn.query_row(
        "SELECT valid_until, signature, attest_level, observed_by, valid_from \
             FROM memory_links \
             WHERE source_id = ?1 AND target_id = ?2 AND relation = ?3",
        params![source_id, target_id, relation],
        |r| {
            Ok((
                r.get::<_, Option<String>>(0)?,
                r.get::<_, Option<Vec<u8>>>(1)?,
                r.get::<_, Option<String>>(2)?,
                r.get::<_, Option<String>>(3)?,
                r.get::<_, Option<String>>(4)?,
            ))
        },
    ) {
        Ok(v) => v,
        Err(rusqlite::Error::QueryReturnedNoRows) => {
            rollback();
            return Ok(None);
        }
        Err(e) => {
            rollback();
            return Err(e.into());
        }
    };
    let (prior, prior_signature, _prior_attest, observed_by, valid_from) = prior_row;
    let was_signed = prior_signature.is_some();

    let update_result = if was_signed {
        // v0.7.0 #628 H5 — clear the signing surface so a future
        // `memory_verify` honestly reports "unsigned" instead of
        // "signature mismatch". Resetting `attest_level` keeps the
        // column consistent with the now-NULL signature blob.
        conn.execute(
            "UPDATE memory_links \
                SET valid_until = ?4, signature = NULL, attest_level = 'unsigned' \
              WHERE source_id = ?1 AND target_id = ?2 AND relation = ?3",
            params![source_id, target_id, relation, &stamp],
        )
    } else {
        conn.execute(
            "UPDATE memory_links SET valid_until = ?4 \
             WHERE source_id = ?1 AND target_id = ?2 AND relation = ?3",
            params![source_id, target_id, relation, &stamp],
        )
    };
    if let Err(e) = update_result {
        rollback();
        return Err(e.into());
    }

    // v0.7.0 #628 H5 — append an `invalidated` audit row when we
    // cleared a signature. The `payload_hash` commits to the
    // canonical CBOR over the post-supersession SignableLink so the
    // auditor sees exactly what the row looks like now (`valid_until`
    // populated). The `signature` column on the audit row is the
    // *previous* signature — the auditor can compare it byte-for-byte
    // against the original `memory_link.created` row's signature to
    // confirm the same key issued both events. We deliberately do NOT
    // re-sign here: this writer has no guarantee that the original
    // signing keypair is loaded (federation may have applied an
    // inbound `peer_attested` row), so an honest "the signing surface
    // was cleared" event is the only response that doesn't risk
    // forgery.
    if was_signed {
        let signable = crate::identity::sign::SignableLink {
            src_id: source_id,
            dst_id: target_id,
            relation,
            observed_by: observed_by.as_deref(),
            valid_from: valid_from.as_deref(),
            valid_until: Some(stamp.as_str()),
        };
        match crate::identity::sign::canonical_cbor(&signable) {
            Ok(cbor) => {
                let event = crate::signed_events::SignedEvent {
                    id: uuid::Uuid::new_v4().to_string(),
                    // Best-effort agent_id: the `observed_by` claim
                    // from the original signed row (the agent that
                    // attested the supersession's source row). Falls
                    // back to "unknown" when the legacy row carried
                    // no observed_by — vanishingly rare for signed
                    // rows since H2 always populates the column on
                    // self-signed inserts.
                    agent_id: observed_by.clone().unwrap_or_else(|| "unknown".to_string()),
                    event_type: crate::signed_events::event_types::MEMORY_LINK_INVALIDATED
                        .to_string(),
                    payload_hash: crate::signed_events::payload_hash(&cbor),
                    signature: prior_signature,
                    attest_level: crate::models::AttestLevel::Unsigned.as_str().to_string(),
                    timestamp: Utc::now().to_rfc3339(),
                    ..crate::signed_events::SignedEvent::default()
                };
                // v0.7.0 ship-readiness: use the `_no_tx` variant — we
                // are already inside the BEGIN IMMEDIATE wrap (line 3560
                // above). The public `append_signed_event` opens its own
                // unchecked_transaction which would fail under nesting
                // (SQLite does not allow nested transactions on a single
                // connection).
                if let Err(e) = crate::signed_events::append_signed_event_no_tx(conn, &event) {
                    // P2 (#628 agent-3): refuse to commit the UPDATE if
                    // the audit row can't be appended. Otherwise the
                    // signature clearing happens silently and we lose
                    // the audit trail H5 was added to provide.
                    rollback();
                    return Err(anyhow::anyhow!(
                        "failed to append memory_link.invalidated audit row \
                         (rolled back signature clearing): {e}"
                    ));
                }
            }
            Err(e) => {
                rollback();
                return Err(anyhow::anyhow!(
                    "failed to encode canonical CBOR for invalidation audit \
                     (rolled back signature clearing): {e}"
                ));
            }
        }
    }

    conn.execute(connection::SQL_COMMIT, [])?;
    Ok(Some(InvalidateResult {
        valid_until: stamp,
        previous_valid_until: prior,
    }))
}

/// Default cap on rows returned by `kg_query` when the caller does not
/// specify one (Pillar 2 / Stream C). Mirrors `kg_timeline`'s default so
/// the two traversal tools behave consistently for agents driving them.
pub const KG_QUERY_DEFAULT_LIMIT: usize = 200;

/// Hard ceiling on `kg_query` rows. Matches `kg_timeline` and the
/// existing list/recall caps to keep traversal bounded against
/// pathological fan-out.
pub const KG_QUERY_MAX_LIMIT: usize = 1000;

/// Maximum traversal depth supported by [`kg_query`]. The recursive-CTE
/// implementation enforces an explicit ceiling so a crafted call cannot
/// run an unbounded traversal; the charter (`v0.6.3-grand-slam.md`
/// § Performance Budgets) sets the published budget at depth ≤ 5.
pub const KG_QUERY_MAX_SUPPORTED_DEPTH: usize = 5;

/// Outbound KG traversal from a source memory (Pillar 2 / Stream C —
/// `memory_kg_query`). Returns one row per link reachable within
/// `max_depth` hops, filtered by:
///
/// - `valid_at` (RFC3339, optional): only links valid at that instant —
///   `valid_from <= valid_at AND (valid_until IS NULL OR valid_until > valid_at)`.
///   When omitted, the temporal filter is skipped and rows with NULL
///   `valid_from` are also returned (legacy / un-anchored links).
/// - `allowed_agents` (optional): when provided, only links with
///   `observed_by` in the set are returned. An **empty** allowlist
///   returns zero rows by design — callers signaling "no agents are
///   trusted" must get an empty traversal, not the unfiltered fallback.
///   When omitted entirely (`None`), the agent filter is skipped.
/// - `limit`: row cap, clamped to [1, [`KG_QUERY_MAX_LIMIT`]].
///
/// `max_depth` must be in `[1, KG_QUERY_MAX_SUPPORTED_DEPTH]`; passing
/// a larger value yields an explicit error rather than a silent
/// truncation, so callers learn they hit the ceiling instead of
/// receiving a partial graph.
///
/// Multi-hop traversal uses a recursive CTE with cycle detection on
/// the accumulated path, so cycles in the link graph cannot loop the
/// traversal indefinitely. Each hop reapplies the same temporal /
/// agent filters as the anchor — a chain only extends through links
/// that pass every filter on every hop.
///
/// Ordering is `depth ASC, COALESCE(valid_from, created_at) ASC,
/// created_at ASC` — shallower hops first, then time-ordered within
/// each level. For depth=1 callers this collapses to the original
/// time ordering. The `depth` field reflects the actual hop count and
/// `path` is the full `src->mid->target` chain.
pub fn kg_query(
    conn: &Connection,
    source_id: &str,
    max_depth: usize,
    valid_at: Option<&str>,
    allowed_agents: Option<&[String]>,
    limit: Option<usize>,
    include_invalidated: bool,
) -> Result<Vec<crate::models::KgQueryNode>> {
    use crate::models::KgQueryNode;

    if max_depth == 0 {
        // #962 typed envelope.
        return Err(anyhow::Error::new(StorageError::InvalidArgument {
            reason: crate::errors::msg::MAX_DEPTH_MIN.to_string(),
        }));
    }
    if max_depth > KG_QUERY_MAX_SUPPORTED_DEPTH {
        // #962 typed envelope.
        return Err(anyhow::Error::new(StorageError::InvalidArgument {
            reason: format!(
                "max_depth={max_depth} exceeds supported depth={KG_QUERY_MAX_SUPPORTED_DEPTH}"
            ),
        }));
    }

    // Empty allowlist == "no agents are trusted" — short-circuit so we
    // don't have to invent a SQL `IN ()` clause (which is invalid).
    if let Some(agents) = allowed_agents
        && agents.is_empty()
    {
        return Ok(Vec::new());
    }

    let cap = limit
        .unwrap_or(KG_QUERY_DEFAULT_LIMIT)
        .clamp(1, KG_QUERY_MAX_LIMIT);

    // Build the per-hop predicate once; the anchor and recursive members
    // both apply it to a row aliased `ml`. Bind values are appended in
    // resolution order so positional placeholders line up.
    let mut binds: Vec<Box<dyn rusqlite::ToSql>> = Vec::new();
    let mut hop_filter = String::new();
    if let Some(t) = valid_at {
        hop_filter.push_str(" AND ml.valid_from IS NOT NULL AND ml.valid_from <= ?");
        binds.push(Box::new(t.to_string()));
        hop_filter.push_str(&binds.len().to_string());
        hop_filter.push_str(" AND (ml.valid_until IS NULL OR ml.valid_until > ?");
        binds.push(Box::new(t.to_string()));
        hop_filter.push_str(&binds.len().to_string());
        hop_filter.push(')');
    } else if !include_invalidated {
        // "Current view" default — exclude edges that have been
        // invalidated via memory_kg_invalidate (valid_until set in the
        // past). NHI-P3-T7 regression: prior versions returned
        // invalidated edges in default kg_query results.
        // Caller can pass include_invalidated=true to opt in to the
        // full-history view.
        hop_filter.push_str(
            " AND (ml.valid_until IS NULL OR ml.valid_until > strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))",
        );
    }
    if let Some(agents) = allowed_agents {
        // Already short-circuited the empty case above.
        hop_filter.push_str(" AND ml.observed_by IN (");
        for (i, a) in agents.iter().enumerate() {
            binds.push(Box::new(a.clone()));
            if i > 0 {
                hop_filter.push_str(", ");
            }
            hop_filter.push('?');
            hop_filter.push_str(&binds.len().to_string());
        }
        hop_filter.push(')');
    }

    // Anchor binds source_id, recursive member binds max_depth, final
    // SELECT binds the row cap. Order matters — placeholders are
    // resolved by the position they occupy in the assembled string.
    binds.push(Box::new(source_id.to_string()));
    let source_ph = binds.len();
    binds.push(Box::new(i64::try_from(max_depth).unwrap_or(i64::MAX)));
    let max_depth_ph = binds.len();
    binds.push(Box::new(i64::try_from(cap).unwrap_or(i64::MAX)));
    let limit_ph = binds.len();

    // v0.7 AGE acceleration onramp (charter §"Stream C — KG Query Layer"
    // bullet 4). The recursive CTE below is the v0.6.3 SQLite/Postgres
    // implementation. When the v0.7 SAL ships with Apache AGE wired in,
    // the equivalent property-graph query will look like:
    //
    //   MATCH (s {id: $source_id})-[r*1..$max_depth {valid_from <= $t,
    //          observed_by IN $allowed_agents}]->(t)
    //   WHERE NONE(n IN nodes(path) WHERE n.id = t.id)  -- cycle prune
    //   RETURN t.id, last(r).relation, t.title, length(r) AS depth,
    //          [n IN nodes(path) | n.id] AS path
    //   ORDER BY depth, last(r).valid_from
    //   LIMIT $limit
    //
    // Stub left here per charter intent so the v0.7 migration to AGE
    // has a 1:1 reference query alongside the SQL implementation.

    let sql = format!(
        "WITH RECURSIVE traversal(\
            target_id, relation, valid_from, valid_until, observed_by, \
            link_created_at, depth, path\
         ) AS (\
            SELECT ml.target_id, ml.relation, ml.valid_from, ml.valid_until, \
                   ml.observed_by, ml.created_at, 1, \
                   json_array(ml.source_id, ml.target_id) \
            FROM memory_links ml \
            WHERE ml.source_id = ?{source_ph}{hop_filter} \
            UNION ALL \
            SELECT ml.target_id, ml.relation, ml.valid_from, ml.valid_until, \
                   ml.observed_by, ml.created_at, t.depth + 1, \
                   json_insert(t.path, '$[' || json_array_length(t.path) || ']', ml.target_id) \
            FROM memory_links ml \
            JOIN traversal t ON ml.source_id = t.target_id \
            WHERE t.depth < ?{max_depth_ph} \
              AND NOT EXISTS (SELECT 1 FROM json_each(t.path) WHERE value = ml.target_id)\
              {hop_filter}\
         ) \
         SELECT t.target_id, t.relation, t.valid_from, t.valid_until, \
                t.observed_by, m.title, m.namespace, t.depth, \
                (SELECT group_concat(value, '->') FROM json_each(t.path)) \
         FROM traversal t \
         JOIN memories m ON m.id = t.target_id \
         ORDER BY t.depth ASC, COALESCE(t.valid_from, t.link_created_at) ASC, \
                  t.link_created_at ASC \
         LIMIT ?{limit_ph}",
    );

    let mut stmt = conn.prepare(&sql)?;
    let bind_refs: Vec<&dyn rusqlite::ToSql> = binds.iter().map(AsRef::as_ref).collect();
    let rows = stmt.query_map(rusqlite::params_from_iter(bind_refs), |row| {
        let target_id: String = row.get(0)?;
        let depth: i64 = row.get(7)?;
        Ok(KgQueryNode {
            target_id,
            relation: row.get(1)?,
            valid_from: row.get(2)?,
            valid_until: row.get(3)?,
            observed_by: row.get(4)?,
            title: row.get(5)?,
            target_namespace: row.get(6)?,
            depth: usize::try_from(depth).unwrap_or(0),
            path: row.get(8)?,
        })
    })?;
    rows.collect::<rusqlite::Result<Vec<_>>>()
        .map_err(Into::into)
}

/// Default cap on paths returned by [`find_paths`] when the caller does
/// not specify one. Matches the v0.7 J7 charter.
pub const FIND_PATHS_DEFAULT_LIMIT: usize = 10;

/// Hard ceiling on paths returned by [`find_paths`]. A crafted call
/// asking for more than this many paths is clamped down. Matches the
/// v0.7 J7 charter.
pub const FIND_PATHS_MAX_LIMIT: usize = 50;

/// Hard ceiling on traversal depth supported by [`find_paths`].
/// Distinct from [`KG_QUERY_MAX_SUPPORTED_DEPTH`] because path
/// enumeration is more expensive than reachability — we can afford a
/// slightly deeper budget for the BFS but not by much.
///
/// **Cap = 7.** Asking for more is rejected with an error that names
/// this constant explicitly so callers see exactly which knob to file
/// against. Contact maintainers to raise this bound *after* benchmarking
/// the new ceiling on a representative KG; the BFS is `O(d * |E|)` per
/// hop with a `json_each` cycle check, and depth-8+ has not been load-
/// tested as of v0.7.0.
pub const FIND_PATHS_MAX_DEPTH: usize = 7;

/// Default depth used when the caller omits `max_depth`. Mirrors the
/// v0.7 J7 charter's "shallow by default, opt-in deep traversal" rule.
pub const FIND_PATHS_DEFAULT_DEPTH: usize = 4;

/// v0.7 J7 — enumerate up to N undirected paths between two memories.
///
/// Walks `memory_links` with a recursive CTE that carries the full
/// visited-id chain on each row, both as the outbound `path` rendered
/// for callers and as the cycle-detection set so the traversal cannot
/// loop on a cyclic link graph. Each row of the CTE represents one
/// candidate prefix; rows that reach `target_id` are projected out as
/// completed paths.
///
/// # Directionality contract (v0.7.0)
///
/// **`find_paths` is UNDIRECTED** (UNION of forward + reverse edges at
/// every hop) — **`kg_query` is DIRECTED** (forward edges only, by
/// design). The two tools answer different questions and are not
/// interchangeable:
///
/// - `find_paths(a, b)` — *are these two memories connected through any
///   relation chain?* Symmetric closure: `find_paths(a, b)` and
///   `find_paths(b, a)` return the same path set (modulo reversal).
/// - `kg_query(start, depth)` — *what does the directed `source →
///   target` subgraph rooted at `start` look like at depth ≤ N?*
///   `kg_query(b, …)` will not surface `a → b`.
///
/// **`include_invalidated` is honored identically** by both tools: when
/// `false` (default), edges whose `valid_until` lies in the past are
/// excluded from the traversal; when `true`, the full historical link
/// graph is walked. The flag's semantics do not change with directionality.
///
/// The KG corpus uses directional links to model temporal ordering of an
/// assertion (`source → target`), so path queries — which are "are these
/// two memories connected via *any* relation chain?" — apply the
/// symmetric closure here via `UNION ALL` over the original edge and the
/// reverse edge at each hop.
///
/// # Limits
///
/// `max_depth` defaults to [`FIND_PATHS_DEFAULT_DEPTH`] and is hard-
/// capped at [`FIND_PATHS_MAX_DEPTH`] (= 7); passing a larger value
/// yields an explicit error rather than silent truncation. The error
/// message names `FIND_PATHS_MAX_DEPTH` so operators can grep the
/// codebase for the single tunable knob. `max_results` defaults to
/// [`FIND_PATHS_DEFAULT_LIMIT`] and is clamped at
/// [`FIND_PATHS_MAX_LIMIT`]; passing a larger value collapses to the
/// ceiling without error (paths beyond the cap are dropped, the
/// shortest paths win on the `ORDER BY`).
///
/// Returns `Vec<Vec<String>>` — one inner vector per discovered path,
/// each carrying the chain of memory ids from `source_id` (first) to
/// `target_id` (last). Self-paths (`source_id == target_id`) collapse
/// to a single one-element path. Disconnected pairs return an empty
/// outer vector.
pub fn find_paths(
    conn: &Connection,
    source_id: &str,
    target_id: &str,
    max_depth: Option<usize>,
    max_results: Option<usize>,
    include_invalidated: bool,
) -> Result<Vec<Vec<String>>> {
    let depth = max_depth.unwrap_or(FIND_PATHS_DEFAULT_DEPTH);
    if depth == 0 {
        // #962 typed envelope.
        return Err(anyhow::Error::new(StorageError::InvalidArgument {
            reason: crate::errors::msg::MAX_DEPTH_MIN.to_string(),
        }));
    }
    if depth > FIND_PATHS_MAX_DEPTH {
        // #962 typed envelope.
        return Err(anyhow::Error::new(StorageError::InvalidArgument {
            reason: format!(
                "max_depth={depth} exceeds supported depth={FIND_PATHS_MAX_DEPTH} (FIND_PATHS_MAX_DEPTH); contact maintainers to raise this bound after benchmarking"
            ),
        }));
    }
    let cap = max_results
        .unwrap_or(FIND_PATHS_DEFAULT_LIMIT)
        .clamp(1, FIND_PATHS_MAX_LIMIT);

    // Self-path short-circuit. The recursive CTE below requires depth>=1
    // before it can match `target_id`; the trivial chain is just the
    // single-element path through the start node.
    if source_id == target_id {
        return Ok(vec![vec![source_id.to_string()]]);
    }

    // "Current view" filter — exclude edges whose `valid_until` lies in
    // the past (invalidated via `memory_kg_invalidate`). Caller can pass
    // `include_invalidated=true` to traverse the full historical link
    // graph. NHI-P3-T7 regression: prior versions enumerated paths
    // through invalidated edges by default.
    let invalidated_filter = if include_invalidated {
        ""
    } else {
        " WHERE (valid_until IS NULL OR valid_until > strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))"
    };

    // The CTE walks symmetric edges: for each row in `memory_links` we
    // also generate its reverse so the traversal is undirected. Cycle
    // detection uses the JSON-encoded path array (same trick as
    // `kg_query`) — `NOT EXISTS (... json_each ...)` short-circuits the
    // recursion as soon as the next hop would revisit a node already in
    // the prefix.
    //
    // The completed-path filter sits in the outer SELECT rather than
    // the recursive member because a partial prefix that lands on
    // `target_id` should be reported AND continue to extend (a longer
    // path through `target_id` might reach itself through a different
    // route — though for the KG that should be rare, the CTE doesn't
    // need to know that). `ORDER BY depth, path` keeps the shortest
    // paths first so the `LIMIT` cap drops the longest tail.
    let sql = format!(
        "WITH RECURSIVE traversal(current_id, depth, path) AS (
            SELECT ?1, 0, json_array(?1)
            UNION ALL
            SELECT next_id, t.depth + 1,
                   json_insert(t.path, '$[' || json_array_length(t.path) || ']', next_id)
            FROM traversal t
            JOIN (
                SELECT source_id AS from_id, target_id AS next_id
                FROM memory_links{invalidated_filter}
                UNION
                SELECT target_id AS from_id, source_id AS next_id
                FROM memory_links{invalidated_filter}
            ) edges ON edges.from_id = t.current_id
            WHERE t.depth < ?3
              AND NOT EXISTS (
                  SELECT 1 FROM json_each(t.path) WHERE value = next_id
              )
         )
         SELECT path
         FROM traversal
         WHERE current_id = ?2 AND depth >= 1
         ORDER BY depth ASC, path ASC
         LIMIT ?4"
    );

    let depth_i64 = i64::try_from(depth).unwrap_or(i64::MAX);
    let cap_i64 = i64::try_from(cap).unwrap_or(i64::MAX);

    let mut stmt = conn.prepare(&sql)?;
    let rows = stmt.query_map(params![source_id, target_id, depth_i64, cap_i64], |row| {
        let json_path: String = row.get(0)?;
        Ok(json_path)
    })?;

    let mut paths: Vec<Vec<String>> = Vec::new();
    for row in rows {
        let json = row?;
        let parsed: Vec<String> = serde_json::from_str(&json).map_err(|e| {
            rusqlite::Error::FromSqlConversionFailure(0, rusqlite::types::Type::Text, Box::new(e))
        })?;
        paths.push(parsed);
    }

    Ok(paths)
}

/// List all aliases registered for an entity, ordered by registration
/// time then alphabetical for stable display.
fn list_entity_aliases(conn: &Connection, entity_id: &str) -> Result<Vec<String>> {
    let mut stmt = conn.prepare(
        "SELECT alias FROM entity_aliases
         WHERE entity_id = ?1
         ORDER BY created_at ASC, alias ASC",
    )?;
    let aliases: Vec<String> = stmt
        .query_map(params![entity_id], |r| r.get::<_, String>(0))?
        .collect::<rusqlite::Result<Vec<_>>>()?;
    Ok(aliases)
}

/// Register or refresh an agent in the reserved `_agents` namespace.
///
/// Each agent is stored as a long-tier memory with `title = "agent:<agent_id>"`.
/// Duplicate registration for the same `agent_id` refreshes `last_seen_at` and
/// overwrites `agent_type` + `capabilities`, while preserving the original
/// `registered_at` timestamp (caller-observable provenance).
///
/// Returns the stored memory ID.
pub fn register_agent(
    conn: &Connection,
    agent_id: &str,
    agent_type: &str,
    capabilities: &[String],
) -> Result<String> {
    let title = crate::models::agent_registration_title(agent_id);
    let now = Utc::now().to_rfc3339();

    // Preserve original registered_at across re-registration.
    let registered_at = conn
        .query_row(
            "SELECT json_extract(metadata, '$.registered_at') FROM memories
             WHERE namespace = ?1 AND title = ?2",
            params![AGENTS_NAMESPACE, &title],
            |row| row.get::<_, Option<String>>(0),
        )
        .ok()
        .flatten()
        .unwrap_or_else(|| now.clone());

    let caps_json: Vec<serde_json::Value> = capabilities
        .iter()
        .map(|c| serde_json::Value::String(c.clone()))
        .collect();

    let metadata = serde_json::json!({
        "agent_id": agent_id,
        (field_names::AGENT_TYPE): agent_type,
        (field_names::CAPABILITIES): caps_json,
        (field_names::REGISTERED_AT): registered_at,
        (field_names::LAST_SEEN_AT): now,
        // #910 (SAL-level enforcement) — agent-registration rows live
        // in the `_agents` namespace and are a public roster: every
        // agent has a legitimate need to know which other agents are
        // registered (consensus voting, peer attestation, etc.). Stamp
        // scope=collective so the SAL visibility filter doesn't drop
        // them on cross-agent reads.
        "scope": crate::models::MemoryScope::Collective.as_str(),
    });

    let content = serde_json::to_string(&metadata)
        .context("failed to serialize agent registration content")?;

    let mem = Memory {
        id: uuid::Uuid::new_v4().to_string(),
        tier: Tier::Long,
        namespace: AGENTS_NAMESPACE.to_string(),
        title,
        content,
        tags: vec!["agent-registration".to_string()],
        priority: 5,
        confidence: 1.0,
        source: "system".to_string(),
        access_count: 0,
        created_at: now.clone(),
        updated_at: now,
        last_accessed_at: None,
        expires_at: None,
        metadata,
        reflection_depth: 0,
        memory_kind: crate::models::MemoryKind::Observation,
        entity_id: None,
        persona_version: None,
        citations: Vec::new(),
        source_uri: None,
        source_span: None,
        confidence_source: ConfidenceSource::CallerProvided,
        confidence_signals: None,
        confidence_decayed_at: None,
        version: 1,
    };

    insert(conn, &mem)
}

/// List every registered agent. Rows are drawn from the `_agents` namespace
/// and parsed out of each memory's metadata.
pub fn list_agents(conn: &Connection) -> Result<Vec<AgentRegistration>> {
    let now = Utc::now().to_rfc3339();
    let mut stmt = conn.prepare(
        "SELECT metadata FROM memories
         WHERE namespace = ?1
           AND (expires_at IS NULL OR expires_at > ?2)
         ORDER BY json_extract(metadata, '$.registered_at') ASC",
    )?;
    let rows = stmt.query_map(params![AGENTS_NAMESPACE, now], |row| {
        row.get::<_, String>(0)
    })?;

    let mut agents = Vec::new();
    for r in rows {
        let raw = r?;
        let meta: serde_json::Value =
            serde_json::from_str(&raw).context("failed to parse agent metadata as JSON")?;
        let agent_id = meta
            .get("agent_id")
            .and_then(serde_json::Value::as_str)
            .unwrap_or_default()
            .to_string();
        let agent_type = meta
            .get(field_names::AGENT_TYPE)
            .and_then(serde_json::Value::as_str)
            .unwrap_or_default()
            .to_string();
        let capabilities: Vec<String> = meta
            .get(field_names::CAPABILITIES)
            .and_then(serde_json::Value::as_array)
            .map(|arr| {
                arr.iter()
                    .filter_map(|v| v.as_str().map(String::from))
                    .collect()
            })
            .unwrap_or_default();
        let registered_at = meta
            .get(field_names::REGISTERED_AT)
            .and_then(serde_json::Value::as_str)
            .unwrap_or_default()
            .to_string();
        let last_seen_at = meta
            .get(field_names::LAST_SEEN_AT)
            .and_then(serde_json::Value::as_str)
            .unwrap_or_default()
            .to_string();
        agents.push(AgentRegistration {
            agent_id,
            agent_type,
            capabilities,
            registered_at,
            last_seen_at,
        });
    }
    Ok(agents)
}

/// Bind (or rotate) an agent's Ed25519 public key into its `_agents`
/// registration row metadata (#626 Layer-3, Task 1.3 / C3).
///
/// The pubkey is the anchor the write-path attestation gate verifies
/// against: a signed write claiming `agent_id` is upgraded from *claimed*
/// to *attested* only when its signature verifies under the key bound
/// here. Stored under `metadata.agent_pubkey` (URL-safe-no-pad base64)
/// alongside a `pubkey_bound_at` RFC3339 timestamp for rotation
/// provenance.
///
/// Migration-free: the key rides in the existing registration row's
/// JSON metadata (no schema bump). `json_set` updates `metadata` and the
/// mirrored `content` column atomically so `list_agents` / the verifier
/// observe a consistent row.
///
/// The agent MUST already be registered (`register_agent`) — binding a
/// key to an unregistered id is rejected so a stray pubkey can never
/// shadow a future legitimate registration. Re-binding overwrites the
/// previous key (key rotation / revoke-then-rebind).
///
/// # Errors
///
/// - the agent is not registered (no `_agents` row for `agent_id`)
/// - the underlying `UPDATE` fails
pub fn bind_agent_pubkey(conn: &Connection, agent_id: &str, pubkey_b64: &str) -> Result<()> {
    let title = crate::models::agent_registration_title(agent_id);
    let now = Utc::now().to_rfc3339();
    let affected = conn.execute(
        "UPDATE memories SET
            metadata = json_set(metadata, '$.agent_pubkey', ?3, '$.pubkey_bound_at', ?4),
            content  = json_set(content,  '$.agent_pubkey', ?3, '$.pubkey_bound_at', ?4),
            updated_at = ?4
         WHERE namespace = ?1 AND title = ?2",
        params![AGENTS_NAMESPACE, &title, pubkey_b64, &now],
    )?;
    if affected == 0 {
        anyhow::bail!(
            "cannot bind pubkey: agent '{agent_id}' is not registered (register it first)"
        );
    }
    Ok(())
}

/// Fetch the Ed25519 public key bound to `agent_id`, if any (#626
/// Layer-3, Task 1.3 / C3).
///
/// Returns `Ok(None)` when the agent is registered but has no bound key
/// (the permissive-default attestation posture: such an agent can still
/// write *claimed* rows), and also when the agent is not registered at
/// all — both collapse to "no key to verify against". The verifier
/// distinguishes the two only when `AI_MEMORY_REQUIRE_AGENT_ATTESTATION`
/// is set, where a missing key on a required write is a hard reject.
///
/// # Errors
///
/// Surfaces only underlying query failures.
pub fn agent_pubkey(conn: &Connection, agent_id: &str) -> Result<Option<String>> {
    let title = crate::models::agent_registration_title(agent_id);
    let pubkey = conn
        .query_row(
            "SELECT json_extract(metadata, '$.agent_pubkey') FROM memories
             WHERE namespace = ?1 AND title = ?2",
            params![AGENTS_NAMESPACE, &title],
            |row| row.get::<_, Option<String>>(0),
        )
        .ok()
        .flatten();
    Ok(pubkey)
}

/// Clear the Ed25519 public key bound to `agent_id` (#626 Layer-3,
/// Task 1.3 / C5 — key revocation).
///
/// Removes the `agent_pubkey` + `pubkey_bound_at` keys from both the
/// metadata and the mirrored `content` JSON, stamping a
/// `pubkey_revoked_at` marker so the revocation is auditable. After
/// revocation the agent reverts to the permissive *claimed* posture
/// (no key to verify against) until a fresh key is bound.
///
/// Idempotent: revoking an agent with no bound key still succeeds (the
/// `json_remove` is a no-op) as long as the agent is registered.
///
/// # Errors
///
/// - the agent is not registered (no `_agents` row for `agent_id`)
/// - the underlying `UPDATE` fails
pub fn revoke_agent_pubkey(conn: &Connection, agent_id: &str) -> Result<()> {
    let title = crate::models::agent_registration_title(agent_id);
    let now = Utc::now().to_rfc3339();
    let affected = conn.execute(
        "UPDATE memories SET
            metadata = json_set(
                json_remove(metadata, '$.agent_pubkey', '$.pubkey_bound_at'),
                '$.pubkey_revoked_at', ?3),
            content  = json_set(
                json_remove(content,  '$.agent_pubkey', '$.pubkey_bound_at'),
                '$.pubkey_revoked_at', ?3),
            updated_at = ?3
         WHERE namespace = ?1 AND title = ?2",
        params![AGENTS_NAMESPACE, &title, &now],
    )?;
    if affected == 0 {
        anyhow::bail!(
            "cannot revoke pubkey: agent '{agent_id}' is not registered (register it first)"
        );
    }
    Ok(())
}

pub fn stats(conn: &Connection, db_path: &Path) -> Result<Stats> {
    let total: usize = conn.query_row("SELECT COUNT(*) FROM memories", [], |r| r.get(0))?;

    let mut stmt =
        conn.prepare("SELECT tier, COUNT(*) FROM memories GROUP BY tier ORDER BY COUNT(*) DESC")?;
    let by_tier = stmt
        .query_map([], |row| {
            Ok(TierCount {
                tier: row.get(0)?,
                count: row.get(1)?,
            })
        })?
        .collect::<rusqlite::Result<Vec<_>>>()?;

    let mut stmt = conn.prepare(
        "SELECT namespace, COUNT(*) FROM memories GROUP BY namespace ORDER BY COUNT(*) DESC",
    )?;
    let by_namespace = stmt
        .query_map([], |row| {
            Ok(NamespaceCount {
                namespace: row.get(0)?,
                count: row.get(1)?,
            })
        })?
        .collect::<rusqlite::Result<Vec<_>>>()?;

    let now = Utc::now().to_rfc3339();
    let one_hour = (Utc::now() + chrono::Duration::hours(1)).to_rfc3339();
    let expiring_soon: usize = conn.query_row(
        "SELECT COUNT(*) FROM memories WHERE expires_at IS NOT NULL AND expires_at > ?1 AND expires_at <= ?2",
        params![now, one_hour], |r| r.get(0),
    )?;

    let links_count: usize = conn
        .query_row("SELECT COUNT(*) FROM memory_links", [], |r| r.get(0))
        .unwrap_or(0);
    let db_size_bytes = std::fs::metadata(db_path).map_or(0, |m| m.len());
    // v0.6.3.1 P2 (G4) — surface mixed-dim corruption to operators. Best-effort:
    // any error here returns 0 rather than failing the stats endpoint.
    let dim_violations = dim_violations(conn).unwrap_or(0);

    // v0.6.3.1 (P3, G2): cumulative HNSW eviction count is process-local
    // state — read from the static counter in src/hnsw.rs. Surfacing it in
    // `stats` lets `memory_stats` callers and `ai-memory doctor` (P7) flag
    // operators who are sustaining at the index cap.
    let index_evictions_total = crate::hnsw::index_evictions_total();

    Ok(Stats {
        total,
        by_tier,
        by_namespace,
        expiring_soon,
        links_count,
        db_size_bytes,
        dim_violations,
        index_evictions_total,
    })
}

/// Run GC if there are any expired memories. Lightweight check first.
pub fn gc_if_needed(conn: &Connection, archive: bool) -> Result<usize> {
    let now = Utc::now().to_rfc3339();
    let has_expired: bool = conn
        .query_row(
            "SELECT EXISTS(SELECT 1 FROM memories WHERE expires_at IS NOT NULL AND expires_at < ?1)",
            params![now],
            |r| r.get(0),
        )
        .unwrap_or(false);
    if has_expired {
        gc(conn, archive)
    } else {
        Ok(0)
    }
}

/// Purge old archives if `archive_max_days` is configured.
pub fn auto_purge_archive(conn: &Connection, max_days: Option<i64>) -> Result<usize> {
    match max_days {
        Some(days) if days > 0 => purge_archive(conn, Some(days)),
        _ => Ok(0),
    }
}

/// #1579 B6 (F5.7) — expired rows reaped per GC transaction.
///
/// The pre-fix `gc` ran ONE `BEGIN IMMEDIATE` covering an archive
/// `INSERT … SELECT` + `DELETE` over the entire expired set, holding
/// the sqlite write lock for the whole sweep (seconds on a 100k-row
/// expiry backlog, during which every concurrent writer queues behind
/// `busy_timeout`). Chunking bounds the lock-hold per transaction to
/// this many rows; the loop in [`gc`] re-runs until the backlog drains.
/// 500 keeps each archive-copy + delete transaction in the
/// single-digit-millisecond band on the P1 audit corpus while still
/// amortising the per-transaction fsync across a useful batch.
const GC_CHUNK_ROWS: usize = 500;

/// Subquery selecting one bounded chunk of expired row ids. Shared by
/// the archive `INSERT … SELECT` and the `DELETE` inside the same
/// `BEGIN IMMEDIATE` transaction; `ORDER BY rowid` makes the selection
/// fully deterministic, so both statements — which run against the
/// identical snapshot because the transaction holds the write lock —
/// target the exact same rows and the archive-before-delete invariant
/// is preserved chunk by chunk.
const SQL_GC_EXPIRED_CHUNK_IDS: &str = "SELECT id FROM memories \
     WHERE expires_at IS NOT NULL AND expires_at < ?1 \
     ORDER BY rowid LIMIT ?2";

pub fn gc(conn: &Connection, archive: bool) -> Result<usize> {
    let now = Utc::now().to_rfc3339();
    // #1579 B6 (F5.7) — bounded-lock-hold chunked sweep. Each loop
    // iteration archives + deletes at most GC_CHUNK_ROWS expired rows
    // inside its own BEGIN IMMEDIATE transaction, so concurrent
    // writers interleave between chunks instead of stalling behind one
    // giant sweep transaction. Archive semantics are preserved: within
    // a chunk the archive INSERT and the DELETE address the same
    // deterministic id set (see SQL_GC_EXPIRED_CHUNK_IDS), and a
    // failure rolls back only the in-flight chunk (already-committed
    // chunks remain reaped — same observable contract as repeated
    // smaller gc calls).
    let mut total = 0usize;
    loop {
        conn.execute_batch(connection::SQL_BEGIN_IMMEDIATE)?;
        let result = (|| -> Result<usize> {
            if archive {
                // v0.6.3.1 P2 (G5) — preserve embedding + tier + expiry on GC archive.
                let mut archive_stmt = conn.prepare_cached(&format!(
                    "INSERT OR REPLACE INTO archived_memories
                     (id, tier, namespace, title, content, tags, priority, confidence,
                      source, access_count, created_at, updated_at, last_accessed_at,
                      expires_at, archived_at, archive_reason, metadata,
                      embedding, embedding_dim, original_tier, original_expires_at,
                      reflection_depth, atomised_into, atom_of, memory_kind,
                      entity_id, persona_version, citations, source_uri, source_span,
                      confidence_source, confidence_signals, confidence_decayed_at,
                      mentioned_entity_id, version)
                     SELECT id, tier, namespace, title, content, tags, priority, confidence,
                            source, access_count, created_at, updated_at, last_accessed_at,
                            expires_at, ?1, 'ttl_expired', metadata,
                            embedding, embedding_dim, tier, expires_at,
                            reflection_depth, atomised_into, atom_of, memory_kind,
                            entity_id, persona_version, citations, source_uri, source_span,
                            confidence_source, confidence_signals, confidence_decayed_at,
                            mentioned_entity_id, version
                     FROM memories
                     WHERE id IN ({SQL_GC_EXPIRED_CHUNK_IDS})"
                ))?;
                archive_stmt.execute(params![now, GC_CHUNK_ROWS])?;
            }
            let mut delete_stmt = conn.prepare_cached(&format!(
                "DELETE FROM memories WHERE id IN ({SQL_GC_EXPIRED_CHUNK_IDS})"
            ))?;
            let deleted = delete_stmt.execute(params![now, GC_CHUNK_ROWS])?;
            Ok(deleted)
        })();
        match result {
            Ok(n) => {
                conn.execute_batch(connection::SQL_COMMIT)?;
                total += n;
                if n < GC_CHUNK_ROWS {
                    break;
                }
            }
            Err(e) => {
                let _ = conn.execute_batch(connection::SQL_ROLLBACK);
                return Err(e);
            }
        }
    }
    // Clean up namespace_meta rows pointing to deleted memories.
    // #1579 B6 — correlated NOT EXISTS instead of the former
    // `standard_id NOT IN (SELECT id FROM memories)`, which
    // materialised the full id set on every sweep; the rewrite is one
    // primary-key probe per namespace_meta row (a small table — one
    // row per namespace standard).
    let _ = conn.execute(
        "DELETE FROM namespace_meta WHERE NOT EXISTS \
         (SELECT 1 FROM memories WHERE memories.id = namespace_meta.standard_id)",
        [],
    );
    Ok(total)
}

// ---------------------------------------------------------------------------
// Archive operations
// ---------------------------------------------------------------------------

pub fn list_archived(
    conn: &Connection,
    namespace: Option<&str>,
    limit: usize,
    offset: usize,
) -> Result<Vec<serde_json::Value>> {
    let (sql, params_vec): (String, Vec<Box<dyn rusqlite::types::ToSql>>) = match namespace {
        Some(ns) => (
            "SELECT id, tier, namespace, title, content, tags, priority, confidence, \
             source, access_count, created_at, updated_at, last_accessed_at, \
             expires_at, archived_at, archive_reason, metadata, \
             reflection_depth, memory_kind, entity_id, persona_version, \
             citations, source_uri, source_span, confidence_source, \
             confidence_signals, confidence_decayed_at, version, \
             atomised_into, atom_of, mentioned_entity_id \
             FROM archived_memories WHERE namespace = ?1 \
             ORDER BY archived_at DESC LIMIT ?2 OFFSET ?3"
                .to_string(),
            vec![Box::new(ns.to_string()), Box::new(limit), Box::new(offset)],
        ),
        None => (
            "SELECT id, tier, namespace, title, content, tags, priority, confidence, \
             source, access_count, created_at, updated_at, last_accessed_at, \
             expires_at, archived_at, archive_reason, metadata, \
             reflection_depth, memory_kind, entity_id, persona_version, \
             citations, source_uri, source_span, confidence_source, \
             confidence_signals, confidence_decayed_at, version, \
             atomised_into, atom_of, mentioned_entity_id \
             FROM archived_memories \
             ORDER BY archived_at DESC LIMIT ?1 OFFSET ?2"
                .to_string(),
            vec![Box::new(limit), Box::new(offset)],
        ),
    };
    let params_refs: Vec<&dyn rusqlite::types::ToSql> =
        params_vec.iter().map(std::convert::AsRef::as_ref).collect();
    let mut stmt = conn.prepare(&sql)?;
    let rows = stmt.query_map(params_refs.as_slice(), |row| {
        // v0.7.0 issue #861 — `metadata` is stored as a JSON TEXT blob
        // in the column. Falling back to `{}` only covers a NULL/empty
        // read; the surrounding column projection then re-encodes it
        // structured so callers see a real JSON object instead of an
        // escaped string. Coupled with the forget-path archive INSERTs
        // around lines 1268 / 1289 above (now SELECTing `metadata` so
        // the column actually carries the source row's metadata), this
        // restores the round-trip `agent_id` / `imported_from_*` /
        // `consolidated_from_agents` keys callers rely on for
        // attribution + restore.
        let metadata_str = row
            .get::<_, String>(16)
            .unwrap_or_else(|_| "{}".to_string());
        let metadata: serde_json::Value =
            serde_json::from_str(&metadata_str).unwrap_or_else(|_| serde_json::json!({}));
        // v0.7.0 issue #861 — `tags` is stored as a JSON-encoded array
        // TEXT (`'["a","b"]'`) by every write path. Returning the raw
        // String forced callers to either double-parse or accept a
        // string where they expected a JSON array. Parse here so the
        // response matches the live-row shape (`memory_get`) and the
        // contract tests in `tests/archive_serialization.rs`. NULL /
        // malformed columns fall through to an empty array — the
        // archive table's CHECK constraint makes the malformed case a
        // never-in-practice path, but the fall-through keeps the read
        // contract noisy-input-clean rather than panic-on-corruption.
        let tags_str = row.get::<_, String>(5).unwrap_or_else(|_| "[]".to_string());
        let tags: serde_json::Value =
            serde_json::from_str(&tags_str).unwrap_or_else(|_| serde_json::json!([]));
        Ok(serde_json::json!({
            "id": row.get::<_, String>(0)?,
            "tier": row.get::<_, String>(1)?,
            "namespace": row.get::<_, String>(2)?,
            "title": row.get::<_, String>(3)?,
            "content": row.get::<_, String>(4)?,
            "tags": tags,
            "priority": row.get::<_, i32>(6)?,
            (field_names::CONFIDENCE): row.get::<_, f64>(7)?,
            "source": row.get::<_, String>(8)?,
            (field_names::ACCESS_COUNT): row.get::<_, i64>(9)?,
            (field_names::CREATED_AT): row.get::<_, String>(10)?,
            (field_names::UPDATED_AT): row.get::<_, String>(11)?,
            (field_names::LAST_ACCESSED_AT): row.get::<_, Option<String>>(12)?,
            (field_names::EXPIRES_AT): row.get::<_, Option<String>>(13)?,
            (field_names::ARCHIVED_AT): row.get::<_, String>(14)?,
            (field_names::ARCHIVE_REASON): row.get::<_, String>(15)?,
            "metadata": metadata,
            // #1637 — the v49 columns (in the table since #1025; restore
            // was lossless but the LISTING surface projected only the 17
            // legacy columns, so archived v0.7.0 fields were invisible
            // to memory_archive_list). Additive keys; JSON-ish columns
            // parse to structured like tags/metadata above.
            (field_names::REFLECTION_DEPTH): row.get::<_, Option<i64>>(17)?.unwrap_or(0),
            (field_names::MEMORY_KIND): row.get::<_, Option<String>>(18)?,
            "entity_id": row.get::<_, Option<String>>(19)?,
            (field_names::PERSONA_VERSION): row.get::<_, Option<i64>>(20)?,
            "citations": row
                .get::<_, Option<String>>(21)?
                .and_then(|c| serde_json::from_str::<serde_json::Value>(&c).ok())
                .unwrap_or_else(|| serde_json::json!([])),
            (field_names::SOURCE_URI): row.get::<_, Option<String>>(22)?,
            (field_names::SOURCE_SPAN): row
                .get::<_, Option<String>>(23)?
                .and_then(|c| serde_json::from_str::<serde_json::Value>(&c).ok()),
            (field_names::CONFIDENCE_SOURCE): row.get::<_, Option<String>>(24)?,
            (field_names::CONFIDENCE_SIGNALS): row
                .get::<_, Option<String>>(25)?
                .and_then(|c| serde_json::from_str::<serde_json::Value>(&c).ok()),
            (field_names::CONFIDENCE_DECAYED_AT): row.get::<_, Option<String>>(26)?,
            "version": row.get::<_, Option<i64>>(27)?.unwrap_or(1),
            (field_names::ATOMISED_INTO): row.get::<_, Option<i64>>(28)?,
            (field_names::ATOM_OF): row.get::<_, Option<String>>(29)?,
            (field_names::MENTIONED_ENTITY_ID): row.get::<_, Option<String>>(30)?,
        }))
    })?;
    rows.collect::<rusqlite::Result<Vec<_>>>()
        .map_err(Into::into)
}

pub fn restore_archived(conn: &Connection, id: &str) -> Result<bool> {
    let now = Utc::now().to_rfc3339();
    conn.execute_batch(connection::SQL_BEGIN_IMMEDIATE)?;
    let result = (|| -> Result<bool> {
        let exists: bool = conn
            .query_row(
                "SELECT COUNT(*) > 0 FROM archived_memories WHERE id = ?1",
                params![id],
                |r| r.get(0),
            )
            .unwrap_or(false);
        if !exists {
            return Ok(false);
        }
        // Check if ID already exists in active memories to prevent silent overwrite
        let active_exists: bool = conn
            .query_row(SQL_MEMORY_EXISTS_COUNT, params![id], |r| r.get(0))
            .unwrap_or(false);
        if active_exists {
            // #962 typed envelope — ArchiveRestoreCollision (409).
            return Err(anyhow::Error::new(StorageError::ArchiveRestoreCollision {
                id: id.to_string(),
            }));
        }
        // Validate archived metadata before restoring
        let archived_metadata: String = conn
            .query_row(
                "SELECT metadata FROM archived_memories WHERE id = ?1",
                params![id],
                |r| r.get(0),
            )
            .unwrap_or_else(|_| "{}".to_string());
        let meta_value: serde_json::Value =
            serde_json::from_str(&archived_metadata).unwrap_or_else(|_| serde_json::json!({}));
        if let Err(e) = crate::validate::validate_metadata(&meta_value) {
            tracing::warn!("archived memory {id} has invalid metadata, resetting to {{}}: {e}");
            conn.execute(
                "UPDATE archived_memories SET metadata = '{}' WHERE id = ?1",
                params![id],
            )?;
        }
        // FX-C5 — substrate governance pre-write hook parity. Restoring
        // an archived row mints a fresh live row via a raw INSERT...SELECT
        // that bypasses the `db::insert(..)` tail (which is where the
        // SQLite path normally consults `GOVERNANCE_PRE_WRITE`). Without
        // this call, an operator's signed governance rule could be
        // bypassed by restoring a row whose `(title, namespace)` would
        // otherwise be refused on a direct write. Load the archived row
        // shaped as a `Memory` and fire the hook BEFORE the INSERT;
        // a refusal short-circuits the transaction (outer ROLLBACK).
        let candidate = load_archived_as_memory(conn, id)?;
        consult_governance_pre_write(&candidate)?;

        // v0.6.3.1 P2 (G5) — preserve original tier + expires_at + embedding
        // on restore. Pre-v17 rows lost this metadata permanently; the
        // migration backfills `original_tier='long'` so they still restore
        // as permanent (the prior behavior — no regression for legacy data).
        // Live writes from v0.6.3.1 onward round-trip the original tier.
        // #1025 (CRITICAL, 2026-05-21) — full v0.7.0 column carry on
        // archive→restore. Pre-#1025 the SELECT pulled only 17 columns;
        // restored row landed with reflection_depth=0 (DEFAULT),
        // memory_kind='observation' (DEFAULT), citations=[] (DEFAULT),
        // version=1 (DEFAULT) — silent loss of Form-4/5 provenance.
        // COALESCE handles legacy already-archived rows where the
        // v49-added columns are NULL.
        conn.execute(
            "INSERT INTO memories
             (id, tier, namespace, title, content, tags, priority, confidence,
              source, access_count, created_at, updated_at, last_accessed_at,
              expires_at, metadata, embedding, embedding_dim,
              reflection_depth, atomised_into, atom_of, memory_kind,
              entity_id, persona_version, citations, source_uri, source_span,
              confidence_source, confidence_signals, confidence_decayed_at,
              mentioned_entity_id, version)
             SELECT id, COALESCE(original_tier, 'long'), namespace, title, content,
                    tags, priority, confidence, source, access_count, created_at,
                    ?1, last_accessed_at, original_expires_at, metadata,
                    embedding, embedding_dim,
                    COALESCE(reflection_depth, 0),
                    atomised_into,
                    atom_of,
                    COALESCE(memory_kind, 'observation'),
                    entity_id, persona_version,
                    COALESCE(citations, '[]'),
                    source_uri, source_span,
                    COALESCE(confidence_source, 'caller_provided'),
                    confidence_signals, confidence_decayed_at,
                    mentioned_entity_id,
                    COALESCE(version, 1)
             FROM archived_memories WHERE id = ?2",
            params![now, id],
        )?;
        conn.execute("DELETE FROM archived_memories WHERE id = ?1", params![id])?;
        Ok(true)
    })();
    match result {
        Ok(v) => {
            conn.execute_batch(connection::SQL_COMMIT)?;
            Ok(v)
        }
        Err(e) => {
            let _ = conn.execute_batch(connection::SQL_ROLLBACK);
            Err(e)
        }
    }
}

/// #940 (security-high, 2026-05-20) — caller-scoped restore variant.
/// Mirrors [`restore_archived`] but constrains the INSERT-SELECT to
/// rows whose `metadata->'agent_id'` JSON field matches `caller`
/// (with the inbox-target carve-out: rows whose
/// `metadata->'target_agent_id'` matches `caller` are also
/// restorable by the inbox owner, matching the SAL
/// [`crate::store::is_visible_to_caller`] visibility predicate).
///
/// Pre-#940 the only restore variant was owner-blind; any
/// authenticated HTTP caller could restore any other owner's
/// archived rows back into the live working set via
/// `POST /api/v1/archive/{id}/restore`. The postgres SAL branch was
/// already QC-P1-fixed (2026-05-20) to pass
/// `CallerContext::for_agent(caller)`; the sqlite branch is closed
/// by this helper. Returns `Ok(false)` on a non-owner attempt so the
/// surface cannot be used to probe other owners' archived ids.
pub fn restore_archived_for_caller(conn: &Connection, id: &str, caller: &str) -> Result<bool> {
    let now = Utc::now().to_rfc3339();
    conn.execute_batch(connection::SQL_BEGIN_IMMEDIATE)?;
    let result = (|| -> Result<bool> {
        // Owner gate: row must exist AND match the caller (or be an
        // inbox-target row whose recipient is the caller, or be a
        // legacy unowned row — see archive_memory_for_caller for the
        // matching SQL + #940 carve-out rationale).
        let owned: bool = conn
            .query_row(
                "SELECT COUNT(*) > 0 FROM archived_memories \
                 WHERE id = ?1 \
                   AND ( \
                     json_extract(metadata, '$.agent_id') = ?2 OR \
                     json_extract(metadata, '$.target_agent_id') = ?2 OR \
                     json_extract(metadata, '$.agent_id') IS NULL OR \
                     json_extract(metadata, '$.agent_id') = '' \
                   )",
                params![id, caller],
                |r| r.get(0),
            )
            .unwrap_or(false);
        if !owned {
            return Ok(false);
        }
        // Check if ID already exists in active memories to prevent silent overwrite.
        let active_exists: bool = conn
            .query_row(SQL_MEMORY_EXISTS_COUNT, params![id], |r| r.get(0))
            .unwrap_or(false);
        if active_exists {
            // #962 typed envelope — ArchiveRestoreCollision (409).
            return Err(anyhow::Error::new(StorageError::ArchiveRestoreCollision {
                id: id.to_string(),
            }));
        }
        // Validate archived metadata before restoring (mirror restore_archived).
        let archived_metadata: String = conn
            .query_row(
                "SELECT metadata FROM archived_memories WHERE id = ?1",
                params![id],
                |r| r.get(0),
            )
            .unwrap_or_else(|_| "{}".to_string());
        let meta_value: serde_json::Value =
            serde_json::from_str(&archived_metadata).unwrap_or_else(|_| serde_json::json!({}));
        if let Err(e) = crate::validate::validate_metadata(&meta_value) {
            tracing::warn!("archived memory {id} has invalid metadata, resetting to {{}}: {e}");
            conn.execute(
                "UPDATE archived_memories SET metadata = '{}' WHERE id = ?1",
                params![id],
            )?;
        }
        // FX-C5 — substrate governance pre-write hook parity. See the
        // matching block in `restore_archived` above for rationale.
        // Caller-scoped variant uses the same hook contract — the
        // hook is owner-agnostic (it sees the Memory payload, not the
        // caller context); ownership gating already happened on the
        // SELECT above.
        let candidate = load_archived_as_memory(conn, id)?;
        consult_governance_pre_write(&candidate)?;
        // #1025 (CRITICAL, 2026-05-21) — full v0.7.0 column carry on
        // archive→restore. Pre-#1025 the SELECT pulled only 17 columns;
        // restored row landed with reflection_depth=0 (DEFAULT),
        // memory_kind='observation' (DEFAULT), citations=[] (DEFAULT),
        // version=1 (DEFAULT) — silent loss of Form-4/5 provenance.
        // COALESCE handles legacy already-archived rows where the
        // v49-added columns are NULL.
        conn.execute(
            "INSERT INTO memories
             (id, tier, namespace, title, content, tags, priority, confidence,
              source, access_count, created_at, updated_at, last_accessed_at,
              expires_at, metadata, embedding, embedding_dim,
              reflection_depth, atomised_into, atom_of, memory_kind,
              entity_id, persona_version, citations, source_uri, source_span,
              confidence_source, confidence_signals, confidence_decayed_at,
              mentioned_entity_id, version)
             SELECT id, COALESCE(original_tier, 'long'), namespace, title, content,
                    tags, priority, confidence, source, access_count, created_at,
                    ?1, last_accessed_at, original_expires_at, metadata,
                    embedding, embedding_dim,
                    COALESCE(reflection_depth, 0),
                    atomised_into,
                    atom_of,
                    COALESCE(memory_kind, 'observation'),
                    entity_id, persona_version,
                    COALESCE(citations, '[]'),
                    source_uri, source_span,
                    COALESCE(confidence_source, 'caller_provided'),
                    confidence_signals, confidence_decayed_at,
                    mentioned_entity_id,
                    COALESCE(version, 1)
             FROM archived_memories WHERE id = ?2",
            params![now, id],
        )?;
        conn.execute("DELETE FROM archived_memories WHERE id = ?1", params![id])?;
        Ok(true)
    })();
    match result {
        Ok(v) => {
            conn.execute_batch(connection::SQL_COMMIT)?;
            Ok(v)
        }
        Err(e) => {
            let _ = conn.execute_batch(connection::SQL_ROLLBACK);
            Err(e)
        }
    }
}

/// FX-C5 — load a row from `archived_memories` shaped as a [`Memory`]
/// so the substrate `GOVERNANCE_PRE_WRITE` hook can inspect the
/// restore candidate BEFORE the live INSERT lands. The archived
/// table shares the v0.7.0 column shape with `memories` (#1025) so
/// the same `row_to_memory` helper applies; columns absent on legacy
/// pre-#1025 archived rows fall through to the same defaults
/// `row_to_memory` already applies. The `original_tier` column wins
/// over the archive-time `tier` so the candidate hook sees the row
/// at the tier it will land at post-restore (matches the SQL the
/// caller is about to execute).
fn load_archived_as_memory(conn: &Connection, id: &str) -> Result<Memory> {
    let mut stmt = conn.prepare(
        "SELECT id, COALESCE(original_tier, tier) AS tier, namespace, title, content,
                tags, priority, confidence, source, access_count, created_at,
                updated_at, last_accessed_at,
                COALESCE(original_expires_at, expires_at) AS expires_at, metadata,
                COALESCE(reflection_depth, 0) AS reflection_depth,
                COALESCE(memory_kind, 'observation') AS memory_kind,
                entity_id, persona_version,
                COALESCE(citations, '[]') AS citations,
                source_uri, source_span,
                COALESCE(confidence_source, 'caller_provided') AS confidence_source,
                confidence_signals, confidence_decayed_at,
                COALESCE(version, 1) AS version
         FROM archived_memories WHERE id = ?1",
    )?;
    let mem = stmt.query_row(params![id], row_to_memory)?;
    Ok(mem)
}

pub fn purge_archive(conn: &Connection, older_than_days: Option<i64>) -> Result<usize> {
    match older_than_days {
        Some(days) if days < 0 => {
            // #962 typed envelope.
            return Err(anyhow::Error::new(StorageError::InvalidArgument {
                reason: crate::errors::msg::older_than_days_negative(days),
            }));
        }
        Some(days) => {
            let cutoff = (Utc::now() - chrono::Duration::days(days)).to_rfc3339();
            let deleted = conn.execute(
                "DELETE FROM archived_memories WHERE archived_at < ?1",
                params![cutoff],
            )?;
            Ok(deleted)
        }
        None => {
            let deleted = conn.execute("DELETE FROM archived_memories", [])?;
            Ok(deleted)
        }
    }
}

/// #936 (security-critical, 2026-05-20) — caller-scoped purge variant.
/// Mirrors [`purge_archive`] but constrains the DELETE to rows whose
/// `metadata->'agent_id'` JSON field matches `caller` (with the
/// inbox-target carve-out: rows whose `metadata->'target_agent_id'`
/// matches `caller` are also purgeable by the inbox owner, matching
/// the SAL [`crate::store::is_visible_to_caller`] visibility
/// predicate).
///
/// Pre-#936 the only purge variant was owner-blind; any authenticated
/// HTTP caller could destroy every owner's archive corpus via
/// `DELETE /api/v1/archive`. The handler at
/// `src/handlers/archive.rs::purge_archive` now resolves the caller
/// from `X-Agent-Id` and routes through this owner-scoped variant by
/// default; the admin/operator path (full owner-blind wipe) is
/// reserved for callers whose `agent_id` appears in the
/// `[admin].agent_ids` allowlist and is reached via the SAL trait
/// path with `CallerContext::bypass_visibility = true`.
///
/// Returns the count of rows actually deleted; a non-admin call with
/// no matching rows returns `Ok(0)` so the caller cannot enumerate
/// other owners' archive corpus via this surface.
pub fn purge_archive_for_caller(
    conn: &Connection,
    caller: &str,
    older_than_days: Option<i64>,
) -> Result<usize> {
    match older_than_days {
        Some(days) if days < 0 => {
            // #962 typed envelope.
            return Err(anyhow::Error::new(StorageError::InvalidArgument {
                reason: crate::errors::msg::older_than_days_negative(days),
            }));
        }
        Some(days) => {
            let cutoff = (Utc::now() - chrono::Duration::days(days)).to_rfc3339();
            let deleted = conn.execute(
                "DELETE FROM archived_memories \
                 WHERE archived_at < ?1 \
                   AND ( \
                     json_extract(metadata, '$.agent_id') = ?2 OR \
                     json_extract(metadata, '$.target_agent_id') = ?2 \
                   )",
                params![cutoff, caller],
            )?;
            Ok(deleted)
        }
        None => {
            let deleted = conn.execute(
                "DELETE FROM archived_memories \
                 WHERE \
                   json_extract(metadata, '$.agent_id') = ?1 OR \
                   json_extract(metadata, '$.target_agent_id') = ?1",
                params![caller],
            )?;
            Ok(deleted)
        }
    }
}

pub fn archive_stats(conn: &Connection) -> Result<serde_json::Value> {
    let total: i64 = conn.query_row("SELECT COUNT(*) FROM archived_memories", [], |r| r.get(0))?;
    let mut stmt = conn.prepare(
        "SELECT namespace, COUNT(*) FROM archived_memories GROUP BY namespace ORDER BY COUNT(*) DESC",
    )?;
    let by_ns: Vec<serde_json::Value> = stmt
        .query_map([], |row| {
            Ok(serde_json::json!({
                "namespace": row.get::<_, String>(0)?,
                "count": row.get::<_, i64>(1)?,
            }))
        })?
        .collect::<rusqlite::Result<Vec<_>>>()?;
    Ok(serde_json::json!({
        "archived_total": total,
        (field_names::BY_NAMESPACE): by_ns,
    }))
}

pub fn export_all(conn: &Connection) -> Result<Vec<Memory>> {
    let now = Utc::now().to_rfc3339();
    let mut stmt = conn.prepare(
        "SELECT * FROM memories WHERE expires_at IS NULL OR expires_at > ?1 ORDER BY created_at ASC",
    )?;
    let rows = stmt.query_map(params![now], row_to_memory)?;
    rows.collect::<rusqlite::Result<Vec<_>>>()
        .map_err(Into::into)
}

pub fn export_links(conn: &Connection) -> Result<Vec<MemoryLink>> {
    let now = Utc::now().to_rfc3339();
    // v0.7 H3 — also pull the signature blob, the `observed_by` claim,
    // and the temporal-validity columns. Federation peers consume these
    // through `verify::verify` to gate inbound replication; legacy
    // unsigned rows surface NULL for `signature` / `observed_by` and
    // the inbound path falls back to `attest_level = "unsigned"`.
    let mut stmt = conn.prepare(
        "SELECT ml.source_id, ml.target_id, ml.relation, ml.created_at,
                ml.signature, ml.observed_by, ml.valid_from, ml.valid_until
         FROM memory_links ml
         JOIN memories ms ON ms.id = ml.source_id AND (ms.expires_at IS NULL OR ms.expires_at > ?1)
         JOIN memories mt ON mt.id = ml.target_id AND (mt.expires_at IS NULL OR mt.expires_at > ?1)",
    )?;
    let rows = stmt.query_map(params![now], |row| {
        let relation_str: String = row.get(2)?;
        Ok(MemoryLink {
            source_id: row.get(0)?,
            target_id: row.get(1)?,
            // v0.7.0 fix campaign R1-M4 — see `get_links` for rationale.
            relation: crate::models::MemoryLinkRelation::from_str(&relation_str)
                .unwrap_or_default(),
            created_at: row.get(3)?,
            signature: row.get::<_, Option<Vec<u8>>>(4)?,
            observed_by: row.get::<_, Option<String>>(5)?,
            valid_from: row.get::<_, Option<String>>(6)?,
            valid_until: row.get::<_, Option<String>>(7)?,
            // v0.7.0 #860 — `export_links` is the federation outbound
            // path; the wire shape stays without `attest_level` so
            // pre-v0.7 receivers do not see an unknown field. Leaving
            // this `None` keeps `skip_serializing_if` from emitting it.
            attest_level: None,
        })
    })?;
    rows.collect::<rusqlite::Result<Vec<_>>>()
        .map_err(Into::into)
}

/// Insert with timestamp-aware conflict resolution for sync.
/// Only overwrites if the incoming memory is newer (by `updated_at`,
/// tiebroken by memory.id for a total order across peers —
/// ultrareview #344, #345).
///
/// Rationale: ISO 8601 / RFC 3339 strings compare lexicographically
/// as long as all timestamps carry consistent precision + Z suffix.
/// Equal timestamps (common when two nodes edit in the same ms, or
/// when NTP aligns clocks) previously produced non-deterministic
/// winners per peer, causing permanent mesh divergence. Adding the
/// memory.id tiebreaker yields a total order every peer agrees on.
pub fn insert_if_newer(conn: &Connection, mem: &Memory) -> Result<String> {
    // v0.7.0 L1-6 Deliverable E — substrate governance pre-write
    // gate. Federation `sync_push` / catchup-loop peer pushes flow
    // through this entry point; treating them identically to direct
    // writes is the load-bearing property — an agent that bypasses
    // a local rule by routing through a peer would otherwise slip
    // past the gate. The hook fires on every newer-wins merge attempt.
    consult_governance_pre_write(mem)?;

    let tags_json = serde_json::to_string(&mem.tags)?;
    let metadata_json = serde_json::to_string(&mem.metadata)?;
    // v0.7.0 Form 4 — encode citations + source_span for the schema
    // v38 TEXT columns on the federation merge path. The newer-wins
    // CASE clauses below pick `excluded.citations` only when the
    // incoming row is the winner; otherwise the existing row's
    // citations are preserved.
    let citations_json = serde_json::to_string(&mem.citations)?;
    let source_span_json = match mem.source_span {
        Some(span) => Some(serde_json::to_string(&span)?),
        None => None,
    };
    // v0.7.0 Form 5 — encode confidence-provenance fields for the
    // schema v39 TEXT columns on the federation merge path. The
    // newer-wins CASE clauses pick `excluded.confidence_source` only
    // when the incoming row wins the tiebreak; otherwise the local
    // row's confidence provenance is preserved.
    let confidence_signals_json = match &mem.confidence_signals {
        Some(s) => Some(serde_json::to_string(s)?),
        None => None,
    };
    // v0.7.0 polish PERF-8 (#781) — denormalised mention tag for the
    // federation `insert_if_newer` merge path. The newer-wins CASE
    // clause picks the winner's mentioned_entity_id when the incoming
    // row wins the tiebreak; otherwise the local row's tag is preserved
    // so a stale peer cannot blank out a value the matcher's index
    // depends on.
    let mentioned_entity_id = extract_mentioned_entity_id(mem);
    // #1579 B6 — federation catch-up replays this newer-wins upsert
    // once per pulled row; `prepare_cached` amortises the parse of the
    // largest SQL statement in the file across the whole batch.
    let mut newer_wins_stmt = conn.prepare_cached(
        "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, confidence, source, access_count, created_at, updated_at, last_accessed_at, expires_at, metadata, reflection_depth, memory_kind, entity_id, persona_version, citations, source_uri, source_span, confidence_source, confidence_signals, confidence_decayed_at, mentioned_entity_id, version)
         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19, ?20, ?21, ?22, ?23, ?24, ?25, ?26, ?27)
         ON CONFLICT(title, namespace) DO UPDATE SET
            content = CASE WHEN excluded.updated_at > memories.updated_at
                             OR (excluded.updated_at = memories.updated_at AND excluded.id > memories.id)
                           THEN excluded.content ELSE memories.content END,
            tags = CASE WHEN excluded.updated_at > memories.updated_at
                          OR (excluded.updated_at = memories.updated_at AND excluded.id > memories.id)
                        THEN excluded.tags ELSE memories.tags END,
            priority = MAX(memories.priority, excluded.priority),
            confidence = MAX(memories.confidence, excluded.confidence),
            source = CASE WHEN excluded.updated_at > memories.updated_at
                            OR (excluded.updated_at = memories.updated_at AND excluded.id > memories.id)
                          THEN excluded.source ELSE memories.source END,
            tier = CASE WHEN excluded.tier = 'long' THEN 'long'
                        WHEN memories.tier = 'long' THEN 'long'
                        WHEN excluded.tier = 'mid' THEN 'mid'
                        ELSE memories.tier END,
            updated_at = MAX(memories.updated_at, excluded.updated_at),
            access_count = MAX(memories.access_count, excluded.access_count),
            expires_at = CASE WHEN excluded.tier = 'long' OR memories.tier = 'long' THEN NULL
                              ELSE COALESCE(excluded.expires_at, memories.expires_at) END,
            -- Preserve metadata.agent_id across newer-wins merge (NHI provenance immutable).
            metadata = CASE
                WHEN json_extract(memories.metadata, '$.agent_id') IS NOT NULL
                THEN json_set(
                    CASE WHEN excluded.updated_at > memories.updated_at
                              OR (excluded.updated_at = memories.updated_at AND excluded.id > memories.id)
                         THEN excluded.metadata
                         ELSE memories.metadata END,
                    '$.agent_id',
                    json_extract(memories.metadata, '$.agent_id')
                )
                ELSE CASE WHEN excluded.updated_at > memories.updated_at
                               OR (excluded.updated_at = memories.updated_at AND excluded.id > memories.id)
                          THEN excluded.metadata
                          ELSE memories.metadata END
            END,
            -- v0.7.0 Task 1/8 — recursion depth takes max so the reflection
            -- signal isn't lost on newer-wins federation merges.
            reflection_depth = MAX(memories.reflection_depth, excluded.reflection_depth),
            -- v0.7.0 L1-1 — kind is sticky across federation merges: a
            -- reflection row must not be downgraded to observation by a
            -- newer-wins merge from a peer that doesn't know about the kind.
            -- v0.7.0 QW-2 — Persona is similarly sticky.
            memory_kind = CASE WHEN memories.memory_kind = 'reflection' THEN 'reflection'
                               WHEN memories.memory_kind = 'persona' THEN 'persona'
                               ELSE excluded.memory_kind END,
            -- v0.7.0 QW-2 — entity_id + persona_version are immutable
            -- once set so a federation merge can't drop the persona
            -- discriminator off a `memory_kind = 'persona'` row.
            entity_id = COALESCE(memories.entity_id, excluded.entity_id),
            persona_version = COALESCE(memories.persona_version, excluded.persona_version),
            -- v0.7.0 Form 4 — fact-provenance: replace the stored
            -- citations array only when the incoming row wins the
            -- newer-wins tiebreak; source_uri / source_span follow
            -- COALESCE semantics so a federation merge that lacks
            -- provenance does not blank out a value the local row
            -- already had.
            citations = CASE WHEN excluded.updated_at > memories.updated_at
                                  OR (excluded.updated_at = memories.updated_at AND excluded.id > memories.id)
                             THEN excluded.citations ELSE memories.citations END,
            source_uri = COALESCE(excluded.source_uri, memories.source_uri),
            source_span = COALESCE(excluded.source_span, memories.source_span),
            -- v0.7.0 Form 5 — confidence-provenance follows the newer-
            -- wins shape established for the other Form 4 columns.
            -- A peer pushing an auto-derived/calibrated value wins on
            -- the timestamp tiebreak; otherwise the local row's
            -- provenance is preserved so a stale peer cannot blank out
            -- a fresher local calibration.
            confidence_source = CASE WHEN excluded.updated_at > memories.updated_at
                                          OR (excluded.updated_at = memories.updated_at AND excluded.id > memories.id)
                                     THEN excluded.confidence_source ELSE memories.confidence_source END,
            confidence_signals = CASE WHEN excluded.updated_at > memories.updated_at
                                           OR (excluded.updated_at = memories.updated_at AND excluded.id > memories.id)
                                      THEN excluded.confidence_signals ELSE memories.confidence_signals END,
            confidence_decayed_at = CASE WHEN excluded.updated_at > memories.updated_at
                                              OR (excluded.updated_at = memories.updated_at AND excluded.id > memories.id)
                                         THEN excluded.confidence_decayed_at ELSE memories.confidence_decayed_at END,
            -- v0.7.0 polish PERF-8 (#781) — newer-wins on the mention
            -- tag (the winning row's content is the one a future matcher
            -- query expects to find); otherwise preserve the local tag
            -- so a stale peer that lacks the structured entity_id
            -- metadata cannot blank out a value the index serves.
            mentioned_entity_id = CASE WHEN excluded.updated_at > memories.updated_at
                                            OR (excluded.updated_at = memories.updated_at AND excluded.id > memories.id)
                                       THEN COALESCE(excluded.mentioned_entity_id, memories.mentioned_entity_id)
                                       ELSE memories.mentioned_entity_id END,
            -- #1631 (decide-once, #1029 contract) — `version` IS
            -- replicated state on the federation merge path: merge via
            -- MAX(local, remote) so an out-of-order peer push can't
            -- roll the Gap-1 optimistic-concurrency counter backwards.
            -- Matches the pg `apply_remote_memory` GREATEST arm.
            version = MAX(memories.version, excluded.version)
         RETURNING id",
    )?;
    let actual_id: String = newer_wins_stmt.query_row(
        params![
            mem.id,
            mem.tier.as_str(),
            mem.namespace,
            mem.title,
            mem.content,
            tags_json,
            mem.priority,
            mem.confidence,
            mem.source,
            mem.access_count,
            mem.created_at,
            mem.updated_at,
            mem.last_accessed_at,
            mem.effective_expires_at(),
            metadata_json,
            mem.reflection_depth,
            mem.memory_kind.as_str(),
            mem.entity_id,
            mem.persona_version,
            citations_json,
            mem.source_uri,
            source_span_json,
            mem.confidence_source.as_str(),
            confidence_signals_json,
            mem.confidence_decayed_at,
            mentioned_entity_id,
            mem.version,
        ],
        |r| r.get(0),
    )?;
    Ok(actual_id)
}

// --- Embedding support ---

/// v0.6.3.1 P2 (G4): error returned by `set_embedding` when a write would
/// introduce a new embedding dimensionality into a namespace that has already
/// established one via an earlier write. Surfaced as a typed error so the
/// MCP/HTTP handlers can map it to a 409 Conflict rather than letting cosine
/// silently return 0.0 on every subsequent recall.
#[derive(Debug)]
pub struct EmbeddingDimMismatch {
    pub namespace: String,
    pub established: usize,
    pub attempted: usize,
}

impl std::fmt::Display for EmbeddingDimMismatch {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "embedding dim mismatch in namespace '{}': established {}-dim, refused {}-dim write",
            self.namespace, self.established, self.attempted
        )
    }
}

impl std::error::Error for EmbeddingDimMismatch {}

/// Lookup the embedding dimensionality already established for `namespace`.
/// Returns `Ok(None)` when no row in that namespace has an embedding yet.
///
/// # Errors
///
/// Returns the underlying SQLite error.
pub fn namespace_embedding_dim(conn: &Connection, namespace: &str) -> Result<Option<usize>> {
    // Use the v17 idx_memories_ns_dim partial index.
    let dim: Option<i64> = conn
        .query_row(
            "SELECT embedding_dim FROM memories \
             WHERE namespace = ?1 AND embedding_dim IS NOT NULL \
             LIMIT 1",
            params![namespace],
            |r| r.get(0),
        )
        .ok();
    Ok(dim.and_then(|d| usize::try_from(d).ok()))
}

/// Count rows whose stored `embedding_dim` does not match what the BLOB
/// contains (or where the column is missing while a BLOB exists). Surfaced
/// in `Stats::dim_violations` and consumed by P7 doctor.
///
/// # Errors
///
/// Returns the underlying SQLite error.
pub fn dim_violations(conn: &Connection) -> Result<u64> {
    // The expression `length(embedding)` returns the BLOB length; we map
    // legacy (no-header) payloads to `length/4` and headed (v17+) payloads
    // to `(length-1)/4` because length parity tells us which form is on
    // disk. Both forms must match the declared `embedding_dim` column.
    let n: i64 = conn
        .query_row(
            "SELECT COUNT(*) FROM memories \
             WHERE embedding IS NOT NULL \
               AND length(embedding) >= 4 \
               AND ( \
                   embedding_dim IS NULL \
                   OR ( \
                       (length(embedding) % 4 = 0 AND embedding_dim != length(embedding)/4) \
                       OR (length(embedding) % 4 = 1 AND embedding_dim != (length(embedding)-1)/4) \
                       OR (length(embedding) % 4 NOT IN (0,1)) \
                   ) \
               )",
            [],
            |r| r.get(0),
        )
        .unwrap_or(0);
    Ok(u64::try_from(n).unwrap_or(0))
}

/// #1595/#1598 — the single embedding-UPDATE statement (headed blob +
/// declared dim), shared by [`set_embedding`], [`set_embeddings_batch`]
/// and [`set_embeddings_batch_reembed`] so the write shape cannot
/// drift between the checked and replace-semantics writers.
const SQL_UPDATE_EMBEDDING_WITH_DIM: &str =
    "UPDATE memories SET embedding = ?1, embedding_dim = ?2 WHERE id = ?3";
/// Degenerate empty-vector sibling of [`SQL_UPDATE_EMBEDDING_WITH_DIM`]
/// (legacy parity: empty embeddings persist with `embedding_dim = NULL`).
const SQL_UPDATE_EMBEDDING_NULL_DIM: &str =
    "UPDATE memories SET embedding = ?1, embedding_dim = NULL WHERE id = ?2";

/// Store an embedding vector for a memory.
///
/// v0.6.3.1 P2 — writes are now headed with the magic byte (`encode_embedding_blob`)
/// and the namespace's first established dim is enforced. A dim mismatch
/// returns a typed [`EmbeddingDimMismatch`] surfaced as a 409 by the handler
/// layer. The same call also persists `embedding_dim` so future stats /
/// doctor passes don't re-derive from BLOB length.
///
/// # Errors
///
/// Returns [`EmbeddingDimMismatch`] (boxed via anyhow) when the embedding's
/// dimensionality differs from what the namespace established, or the
/// underlying SQLite error on failure.
pub fn set_embedding(conn: &Connection, id: &str, embedding: &[f32]) -> Result<()> {
    // Resolve namespace + check the dim invariant before mutating.
    let namespace: Option<String> = conn
        .query_row(
            "SELECT namespace FROM memories WHERE id = ?1",
            params![id],
            |r| r.get(0),
        )
        .ok();
    let attempted = embedding.len();
    if attempted == 0 {
        // Empty embeddings are a degenerate case — earlier code accepted
        // them; preserve that to avoid breaking legacy tests but skip the
        // dim check.
        let bytes = crate::embeddings::encode_embedding_blob(embedding);
        conn.execute(SQL_UPDATE_EMBEDDING_NULL_DIM, params![bytes, id])?;
        return Ok(());
    }
    if let Some(ref ns) = namespace
        && let Some(established) = namespace_embedding_dim(conn, ns)?
        && established != attempted
    {
        return Err(EmbeddingDimMismatch {
            namespace: ns.clone(),
            established,
            attempted,
        }
        .into());
    }
    let bytes = crate::embeddings::encode_embedding_blob(embedding);
    let dim_i64 = i64::try_from(attempted).unwrap_or(i64::MAX);
    conn.execute(SQL_UPDATE_EMBEDDING_WITH_DIM, params![bytes, dim_i64, id])?;
    Ok(())
}

/// v0.7.0 Wave-2 A5 (issue #853) — batched embedding writer.
///
/// Writes a slice of `(id, embedding)` pairs inside a single SQLite
/// transaction. Equivalent to calling [`set_embedding`] in a loop, but
/// collapses N `UPDATE` round-trips (N implicit commits in autocommit
/// mode) into one transaction commit, which is the dominant cost on
/// SQLite WAL when N grows past a handful of rows.
///
/// Dim-invariant policy matches [`set_embedding`]:
/// * Empty embeddings are written as `embedding_dim = NULL` (legacy
///   degenerate-case parity).
/// * Per-namespace established dim is checked once per namespace
///   (cached in-flight) and any pair whose embedding length conflicts
///   returns an `EmbeddingDimMismatch` error — the whole transaction
///   rolls back so callers never see a partial commit. The mismatch
///   carries the FIRST offending pair's namespace/established/attempted
///   triple (consistent with the single-row path).
///
/// Returns the number of rows updated (rows whose `id` was not found in
/// the `memories` table are silently skipped — same as [`set_embedding`],
/// where `UPDATE … WHERE id = ?` returns `Ok(0)` and the function still
/// returns `Ok(())`).
///
/// **Boot backfill use:** [`crate::mcp::run_mcp_server`] calls this in
/// fixed-size chunks (see `DEFAULT_EMBED_BACKFILL_BATCH_SIZE`) so the
/// embedder produces vectors in parallel-friendly bursts and the
/// SQLite commit cost amortises across the batch.
///
/// # Errors
///
/// * Returns [`EmbeddingDimMismatch`] (boxed via anyhow) if any pair's
///   embedding dim disagrees with the namespace-established dim. The
///   transaction is rolled back; no rows are mutated.
/// * Returns the underlying SQLite error on transaction/prepare/execute
///   failure.
pub fn set_embeddings_batch(
    conn: &mut Connection,
    entries: &[(String, Vec<f32>)],
) -> Result<usize> {
    if entries.is_empty() {
        return Ok(0);
    }

    // Lookup table: id -> namespace. Needed up-front because we want
    // to amortise the dim-check across a batch by resolving namespaces
    // in a single query rather than one query per row.
    let mut ns_by_id: HashMap<String, Option<String>> = HashMap::with_capacity(entries.len());
    {
        let mut stmt = conn.prepare("SELECT namespace FROM memories WHERE id = ?1")?;
        for (id, _) in entries {
            if ns_by_id.contains_key(id) {
                continue;
            }
            let ns: Option<String> = stmt
                .query_row(params![id], |r| r.get::<_, Option<String>>(0))
                .ok()
                .flatten();
            ns_by_id.insert(id.clone(), ns);
        }
    }

    // Per-namespace established dim, cached so we only hit the
    // namespace_embedding_dim path once per distinct namespace in the
    // batch (the cache is intra-batch — the namespace's established
    // dim is immutable within this call's transaction window).
    let mut ns_dim_cache: HashMap<String, Option<usize>> = HashMap::new();

    let tx = conn.transaction()?;
    {
        let mut update = tx.prepare(SQL_UPDATE_EMBEDDING_WITH_DIM)?;
        let mut update_empty = tx.prepare(SQL_UPDATE_EMBEDDING_NULL_DIM)?;

        let mut rows_updated = 0usize;
        for (id, embedding) in entries {
            let attempted = embedding.len();
            if attempted == 0 {
                let bytes = crate::embeddings::encode_embedding_blob(embedding);
                rows_updated += update_empty.execute(params![bytes, id])?;
                continue;
            }
            if let Some(Some(ns)) = ns_by_id.get(id) {
                let established = if let Some(cached) = ns_dim_cache.get(ns) {
                    *cached
                } else {
                    let resolved = namespace_embedding_dim(&tx, ns)?;
                    ns_dim_cache.insert(ns.clone(), resolved);
                    resolved
                };
                if let Some(established) = established
                    && established != attempted
                {
                    return Err(EmbeddingDimMismatch {
                        namespace: ns.clone(),
                        established,
                        attempted,
                    }
                    .into());
                }
                // First successful write in a namespace sets the
                // established dim for the rest of this batch — keep
                // the cache in sync so subsequent rows in the same
                // namespace get a fast path AND so any inconsistent
                // pair later in the batch trips the dim check rather
                // than committing.
                if established.is_none() {
                    ns_dim_cache.insert(ns.clone(), Some(attempted));
                }
            }
            let bytes = crate::embeddings::encode_embedding_blob(embedding);
            let dim_i64 = i64::try_from(attempted).unwrap_or(i64::MAX);
            rows_updated += update.execute(params![bytes, dim_i64, id])?;
        }

        drop(update);
        drop(update_empty);
        tx.commit()?;
        Ok(rows_updated)
    }
}

/// Load an embedding vector for a memory. Returns None if not set.
///
/// v0.6.3.1 P2 — tolerant of legacy unheaded payloads (raw LE f32, length
/// `4n`) and v17 headed payloads (`0x01` + `4n` bytes). Anything else returns
/// an error so the caller can surface a typed corruption signal.
///
/// # Errors
///
/// Returns [`EmbeddingFormatError`](crate::embeddings::EmbeddingFormatError)
/// when the on-disk BLOB is malformed.
pub fn get_embedding(conn: &Connection, id: &str) -> Result<Option<Vec<f32>>> {
    let result: Option<Vec<u8>> = conn
        .query_row(
            "SELECT embedding FROM memories WHERE id = ?1",
            params![id],
            |row| row.get(0),
        )
        .ok();
    match result {
        Some(bytes) if !bytes.is_empty() => {
            let floats = crate::embeddings::decode_embedding_blob(&bytes)?;
            Ok(Some(floats))
        }
        _ => Ok(None),
    }
}

/// Get all memory IDs that are missing embeddings.
///
/// #1579 B6 (F5.6): unbounded — materialises every `(id, title,
/// content)` triple in one `Vec`, which on a large backlog is the
/// whole corpus in memory. Hot loops (the embed-backfill sweep) should
/// use [`get_unembedded_ids_batch`] and drain in bounded passes; this
/// variant remains for callers that need the full snapshot semantics.
pub fn get_unembedded_ids(conn: &Connection) -> Result<Vec<(String, String, String)>> {
    let mut stmt =
        conn.prepare("SELECT id, title, content FROM memories WHERE embedding IS NULL")?;
    let rows = stmt.query_map([], |row| {
        Ok((
            row.get::<_, String>(0)?,
            row.get::<_, String>(1)?,
            row.get::<_, String>(2)?,
        ))
    })?;
    rows.collect::<rusqlite::Result<Vec<_>>>()
        .map_err(Into::into)
}

/// #1579 B6 (F5.6) — bounded variant of [`get_unembedded_ids`].
///
/// Returns at most `limit` `(id, title, content)` triples so the
/// caller's materialisation is bounded by its batch size (the
/// `AI_MEMORY_EMBED_BACKFILL_BATCH` resolver semantics) instead of the
/// whole unembedded backlog. There is deliberately NO OFFSET: rows
/// that gain an embedding drop out of the `embedding IS NULL`
/// predicate, so callers drain by re-fetching until the returned batch
/// is empty (or stops shrinking — rows whose embedding persistently
/// fails stay at the head of the scan).
pub fn get_unembedded_ids_batch(
    conn: &Connection,
    limit: usize,
) -> Result<Vec<(String, String, String)>> {
    let mut stmt = conn.prepare_cached(
        "SELECT id, title, content FROM memories WHERE embedding IS NULL LIMIT ?1",
    )?;
    let rows = stmt.query_map(params![limit], |row| {
        Ok((
            row.get::<_, String>(0)?,
            row.get::<_, String>(1)?,
            row.get::<_, String>(2)?,
        ))
    })?;
    rows.collect::<rusqlite::Result<Vec<_>>>()
        .map_err(Into::into)
}

/// #1595 — keyset-paginated variant of [`get_unembedded_ids_batch`].
///
/// Returns at most `limit` `(id, title, content)` triples whose `id`
/// sorts strictly AFTER `after_id` (or from the start when `None`),
/// in `id` order. The resilient backfill sweep advances its cursor
/// past every processed row — embedded OR skipped — so a poison row
/// (over-context-length content, transient embedder fault) can no
/// longer pin the scan head and starve the rest of the backlog (the
/// pre-fix `LIMIT`-only fetch re-returned persistently-failing rows
/// forever, and the no-progress guard then stopped the whole sweep
/// with 0 rows backfilled).
///
/// Two distinct prepared shapes (with / without the cursor predicate)
/// rather than the non-sargable `(?1 IS NULL OR id > ?1)` form, per
/// the v55/v56 sargability discipline.
///
/// # Errors
///
/// Returns the underlying SQLite error.
pub fn get_unembedded_ids_batch_after(
    conn: &Connection,
    after_id: Option<&str>,
    limit: usize,
) -> Result<Vec<(String, String, String)>> {
    let map_row = |row: &rusqlite::Row<'_>| {
        Ok((
            row.get::<_, String>(0)?,
            row.get::<_, String>(1)?,
            row.get::<_, String>(2)?,
        ))
    };
    let rows = if let Some(after) = after_id {
        let mut stmt = conn.prepare_cached(
            "SELECT id, title, content FROM memories \
             WHERE embedding IS NULL AND id > ?1 ORDER BY id LIMIT ?2",
        )?;
        let rows = stmt.query_map(params![after, limit], map_row)?;
        rows.collect::<rusqlite::Result<Vec<_>>>()?
    } else {
        let mut stmt = conn.prepare_cached(
            "SELECT id, title, content FROM memories \
             WHERE embedding IS NULL ORDER BY id LIMIT ?1",
        )?;
        let rows = stmt.query_map(params![limit], map_row)?;
        rows.collect::<rusqlite::Result<Vec<_>>>()?
    };
    Ok(rows)
}

/// #1598 — keyset-paginated scan over ALL live memories (embedded or
/// not), optionally namespace-filtered, for the `ai-memory reembed`
/// full-corpus sweep. Same cursor semantics as
/// [`get_unembedded_ids_batch_after`]: at most `limit` `(id, title,
/// content)` triples with `id` strictly after `after_id`, in `id`
/// order. Four distinct prepared shapes (namespace × cursor) keep the
/// scan sargable (v55/v56 discipline).
///
/// # Errors
///
/// Returns the underlying SQLite error.
pub fn get_memory_texts_batch(
    conn: &Connection,
    namespace: Option<&str>,
    after_id: Option<&str>,
    limit: usize,
) -> Result<Vec<(String, String, String)>> {
    let map_row = |row: &rusqlite::Row<'_>| {
        Ok((
            row.get::<_, String>(0)?,
            row.get::<_, String>(1)?,
            row.get::<_, String>(2)?,
        ))
    };
    let rows = match (namespace, after_id) {
        (Some(ns), Some(after)) => {
            let mut stmt = conn.prepare_cached(
                "SELECT id, title, content FROM memories \
                 WHERE namespace = ?1 AND id > ?2 ORDER BY id LIMIT ?3",
            )?;
            let rows = stmt.query_map(params![ns, after, limit], map_row)?;
            rows.collect::<rusqlite::Result<Vec<_>>>()?
        }
        (Some(ns), None) => {
            let mut stmt = conn.prepare_cached(
                "SELECT id, title, content FROM memories \
                 WHERE namespace = ?1 ORDER BY id LIMIT ?2",
            )?;
            let rows = stmt.query_map(params![ns, limit], map_row)?;
            rows.collect::<rusqlite::Result<Vec<_>>>()?
        }
        (None, Some(after)) => {
            let mut stmt = conn.prepare_cached(
                "SELECT id, title, content FROM memories \
                 WHERE id > ?1 ORDER BY id LIMIT ?2",
            )?;
            let rows = stmt.query_map(params![after, limit], map_row)?;
            rows.collect::<rusqlite::Result<Vec<_>>>()?
        }
        (None, None) => {
            let mut stmt = conn
                .prepare_cached("SELECT id, title, content FROM memories ORDER BY id LIMIT ?1")?;
            let rows = stmt.query_map(params![limit], map_row)?;
            rows.collect::<rusqlite::Result<Vec<_>>>()?
        }
    };
    Ok(rows)
}

/// #1598 — REPLACE-semantics sibling of [`set_embeddings_batch`] for
/// the `ai-memory reembed` vector-space migration.
///
/// Identical single-transaction write shape, but it deliberately does
/// NOT enforce the per-namespace established-dim invariant: re-embed
/// is exactly the tool that migrates a namespace from one model/dim to
/// another, so mid-run the namespace legitimately holds mixed dims
/// (the H7 recall read-guards skip dim-mismatched vectors during the
/// transition, and the sweep converges every row to the target dim).
/// Every other caller MUST keep using [`set_embeddings_batch`] — the
/// G4 invariant is what stops a misconfigured writer from silently
/// zeroing cosine scores.
///
/// Returns the number of rows updated (unknown ids are skipped, same
/// as the checked sibling).
///
/// # Errors
///
/// Returns the underlying SQLite transaction / statement error.
pub fn set_embeddings_batch_reembed(
    conn: &mut Connection,
    entries: &[(String, Vec<f32>)],
) -> Result<usize> {
    if entries.is_empty() {
        return Ok(0);
    }
    let tx = conn.transaction()?;
    let mut rows_updated = 0usize;
    {
        let mut update = tx.prepare(SQL_UPDATE_EMBEDDING_WITH_DIM)?;
        let mut update_empty = tx.prepare(SQL_UPDATE_EMBEDDING_NULL_DIM)?;
        for (id, embedding) in entries {
            let bytes = crate::embeddings::encode_embedding_blob(embedding);
            if embedding.is_empty() {
                // Legacy degenerate-case parity with `set_embedding`.
                rows_updated += update_empty.execute(params![bytes, id])?;
            } else {
                let dim_i64 = i64::try_from(embedding.len()).unwrap_or(i64::MAX);
                rows_updated += update.execute(params![bytes, dim_i64, id])?;
            }
        }
    }
    tx.commit()?;
    Ok(rows_updated)
}

/// #1598 — `(total_rows, rows_with_embeddings)` for the reembed
/// dry-run plan, optionally namespace-filtered. `COUNT(embedding)`
/// counts non-NULL values, so the missing count is the difference.
///
/// # Errors
///
/// Returns the underlying SQLite error.
pub fn embedding_coverage(conn: &Connection, namespace: Option<&str>) -> Result<(u64, u64)> {
    let (total, embedded): (i64, i64) = if let Some(ns) = namespace {
        conn.query_row(
            "SELECT COUNT(*), COUNT(embedding) FROM memories WHERE namespace = ?1",
            params![ns],
            |r| Ok((r.get(0)?, r.get(1)?)),
        )?
    } else {
        conn.query_row("SELECT COUNT(*), COUNT(embedding) FROM memories", [], |r| {
            Ok((r.get(0)?, r.get(1)?))
        })?
    };
    Ok((
        u64::try_from(total).unwrap_or(0),
        u64::try_from(embedded).unwrap_or(0),
    ))
}

/// #1598 — distinct embedding dimensionalities currently stored,
/// optionally namespace-filtered, for the reembed pre-flight banner
/// (the loud "old dims vs target dim" disclosure before a vector-space
/// migration). Prefers the declared `embedding_dim` column and falls
/// back to deriving from the BLOB length for legacy rows — `4n+1`
/// bytes is the v17 headed form (`(len-1)/4` floats), `4n` the
/// legacy unheaded form (`len/4`), mirroring [`dim_violations`].
///
/// # Errors
///
/// Returns the underlying SQLite error.
pub fn distinct_embedding_dims(conn: &Connection, namespace: Option<&str>) -> Result<Vec<usize>> {
    const DIM_EXPR: &str = "COALESCE(embedding_dim, \
         CASE WHEN length(embedding) % 4 = 1 THEN (length(embedding)-1)/4 \
              ELSE length(embedding)/4 END)";
    let collect = |stmt: &mut rusqlite::Statement<'_>,
                   params: &[&dyn rusqlite::ToSql]|
     -> Result<Vec<usize>> {
        let rows = stmt.query_map(params, |r| r.get::<_, i64>(0))?;
        Ok(rows
            .collect::<rusqlite::Result<Vec<_>>>()?
            .into_iter()
            .filter_map(|d| usize::try_from(d).ok())
            .collect())
    };
    if let Some(ns) = namespace {
        let mut stmt = conn.prepare(&format!(
            "SELECT DISTINCT {DIM_EXPR} AS dim FROM memories \
             WHERE embedding IS NOT NULL AND namespace = ?1 ORDER BY dim"
        ))?;
        collect(&mut stmt, &[&ns])
    } else {
        let mut stmt = conn.prepare(&format!(
            "SELECT DISTINCT {DIM_EXPR} AS dim FROM memories \
             WHERE embedding IS NOT NULL ORDER BY dim"
        ))?;
        collect(&mut stmt, &[])
    }
}

/// #1579 B3 — count of rows carrying a stored embedding. Cheap probe
/// (no blob decode, no row materialisation) used by the CLI recall
/// path to decide whether a one-shot invocation should pay the HNSW
/// graph-construction cost at all (see
/// [`crate::hnsw::CLI_HNSW_BUILD_MIN_ENTRIES`]).
///
/// # Errors
///
/// Bubbles the rusqlite error from the COUNT query.
pub fn count_embedded_memories(conn: &Connection) -> Result<i64> {
    conn.query_row(
        "SELECT COUNT(*) FROM memories WHERE embedding IS NOT NULL",
        [],
        |row| row.get(0),
    )
    .map_err(Into::into)
}

/// Get all stored embeddings as (id, embedding) pairs for building the HNSW index.
///
/// v0.6.3.1 P2 — uses the magic-byte tolerant decoder. Rows whose BLOB is
/// malformed are logged and skipped (the alternative — bailing the entire
/// HNSW build — would take the whole semantic-search surface offline for one
/// corrupt row).
pub fn get_all_embeddings(conn: &Connection) -> Result<Vec<(String, Vec<f32>)>> {
    let mut stmt =
        conn.prepare("SELECT id, embedding FROM memories WHERE embedding IS NOT NULL")?;
    let rows = stmt.query_map([], |row| {
        let id: String = row.get(0)?;
        let bytes: Vec<u8> = row.get(1)?;
        Ok((id, bytes))
    })?;
    let mut entries = Vec::new();
    for row in rows {
        let (id, bytes) = row?;
        if bytes.is_empty() {
            continue;
        }
        match crate::embeddings::decode_embedding_blob(&bytes) {
            Ok(floats) => entries.push((id, floats)),
            Err(e) => {
                tracing::warn!(
                    memory_id = %id,
                    error = %e,
                    "skipping memory with malformed embedding BLOB during HNSW build"
                );
            }
        }
    }
    Ok(entries)
}

/// Hybrid recall — FTS5 keyword search + semantic cosine similarity.
/// Returns memories ranked by a blended score of keyword and semantic relevance.
/// When an HNSW `vector_index` is provided, uses approximate nearest-neighbor
/// search instead of scanning all embeddings linearly.
#[allow(clippy::too_many_arguments)]
/// v0.6.3.1 (P3): hybrid recall preserving the existing 2-tuple return
/// shape for HTTP / CLI / bench callers. Delegates to
/// [`recall_hybrid_with_telemetry`] and discards the telemetry. Kept so
/// the dozen-plus call sites need no churn for a feature only MCP
/// `handle_recall` consumes.
#[allow(clippy::too_many_arguments)]
pub fn recall_hybrid(
    conn: &Connection,
    context: &str,
    query_embedding: &[f32],
    namespace: Option<&str>,
    limit: usize,
    tags_filter: Option<&str>,
    since: Option<&str>,
    until: Option<&str>,
    vector_index: Option<&crate::hnsw::VectorIndex>,
    short_extend: i64,
    mid_extend: i64,
    as_agent: Option<&str>,
    budget_tokens: Option<usize>,
    scoring: &crate::config::ResolvedScoring,
    // v0.7.0 WT-1-E — see [`recall_with_telemetry`] for the
    // archived-source exclusion contract.
    include_archived: bool,
    // v0.7.0 Form 4 / Cluster-A PERF-3 — push `--source-uri-prefix`
    // into the SQL WHERE on both the FTS and semantic branches so the
    // partial `idx_memories_source_uri` index covers the lookup. See
    // [`recall`] for the contract.
    source_uri_prefix: Option<&str>,
) -> Result<(Vec<(Memory, f64)>, BudgetOutcome)> {
    let (results, outcome, _telemetry) = recall_hybrid_with_telemetry(
        conn,
        context,
        query_embedding,
        namespace,
        limit,
        tags_filter,
        since,
        until,
        vector_index,
        short_extend,
        mid_extend,
        as_agent,
        budget_tokens,
        scoring,
        include_archived,
        source_uri_prefix,
    )?;
    Ok((results, outcome))
}

/// FX-4 / PERF-2 (2026-05-26) — convenience wrapper for the HTTP
/// recall handler. Same return shape as [`recall_hybrid`] but accepts
/// a pre-computed HNSW hit slice (caller ran `idx.search()` outside
/// the DB lock) so the DB-mutex hold window does not cover the
/// CPU-bound ANN walk. Telemetry is dropped on this path; the HTTP
/// surface does not consume it today.
#[allow(clippy::too_many_arguments)]
pub fn recall_hybrid_precomputed_hnsw(
    conn: &Connection,
    context: &str,
    query_embedding: &[f32],
    namespace: Option<&str>,
    limit: usize,
    tags_filter: Option<&str>,
    since: Option<&str>,
    until: Option<&str>,
    precomputed_hnsw_hits: &[crate::hnsw::VectorHit],
    short_extend: i64,
    mid_extend: i64,
    as_agent: Option<&str>,
    budget_tokens: Option<usize>,
    scoring: &crate::config::ResolvedScoring,
    include_archived: bool,
    source_uri_prefix: Option<&str>,
) -> Result<(Vec<(Memory, f64)>, BudgetOutcome)> {
    let (results, outcome, _telemetry) = recall_hybrid_with_telemetry_precomputed_hnsw(
        conn,
        context,
        query_embedding,
        namespace,
        limit,
        tags_filter,
        since,
        until,
        precomputed_hnsw_hits,
        short_extend,
        mid_extend,
        as_agent,
        budget_tokens,
        scoring,
        include_archived,
        source_uri_prefix,
    )?;
    Ok((results, outcome))
}

/// v0.6.3.1 (P3 + P6): hybrid recall reporting per-stage candidate counts,
/// the average semantic blend weight, and the full budget outcome. MCP
/// `handle_recall` uses the telemetry to populate the `meta` block (closes
/// audit gaps G2/G8/G11) and the BudgetOutcome to populate R1 budget fields.
///
/// The retrieval logic is unchanged — anti-goal of P3 is "do not change
/// recall scoring or fusion logic." Counters are computed in place:
/// `fts_candidates` is the pre-fusion FTS5 row count, `hnsw_candidates`
/// is the pre-fusion HNSW (or linear-scan) hit count admitted past the
/// 0.2 cosine gate, `blend_weight_avg` is the mean `semantic_weight`
/// across the *returned* set (not the full candidate pool — operators
/// care about what made it out).
// ---------------------------------------------------------------------------
// #871 — `recall_hybrid_with_telemetry` stage helpers.
//
// The original function was ~508 LOC carrying query preparation,
// FTS5 keyword retrieval, semantic (HNSW or linear-scan) retrieval,
// adaptive blend + decay scoring, touch ops + budget application,
// and telemetry assembly. Per the code-review verdict the function
// is split into focused stage-helpers so each phase has a clear
// contract and the orchestrator stays readable.
//
// The stages are kept inside `storage::mod` (rather than carved into
// a sub-module) because the helpers all share access to private
// helpers like `row_to_memory`, `sanitize_fts_query`,
// `archived_source_clause`, etc., and the SQL is tightly tied to
// the schema living in this module.
//
// Behaviour is byte-for-byte preserved: the same SQL runs, the same
// fusion produces the same blended scores, and `touch_many` mutates
// the same surviving set. Only the function-internal structure
// changes.
// ---------------------------------------------------------------------------

/// Result of [`prepare_hybrid_query`] — the pre-computed SQL
/// fragments + bind params the FTS and semantic phases need.
struct HybridPrep<'a> {
    fts_query: String,
    now: String,
    prefixes: VisibilityPrefixes,
    fts_hierarchy_fragment: String,
    sem_hierarchy_fragment: String,
    effective_namespace: Option<&'a str>,
    hierarchy_active: bool,
    fts_archived_fragment: &'static str,
    sem_archived_fragment: &'static str,
    fts_source_uri_fragment: &'static str,
    sem_source_uri_fragment: &'static str,
    source_uri_like_param: Option<String>,
}

/// #871 stage 1 — query preparation. Sanitises the FTS5 expression,
/// resolves namespace hierarchy expansion (`Task 1.12`), computes
/// visibility prefixes for the `?8..?11` (FTS) / `?6..?9` (semantic)
/// bind slots, and stamps the archived-source / source-URI-prefix
/// SQL fragments.
///
/// The `'now'` timestamp is captured here so all subsequent stages
/// see the same monotonic instant.
fn prepare_hybrid_query<'a>(
    context: &str,
    namespace: Option<&'a str>,
    as_agent: Option<&str>,
    include_archived: bool,
    source_uri_prefix: Option<&str>,
) -> HybridPrep<'a> {
    let now = Utc::now().to_rfc3339();
    let fts_query = sanitize_fts_query(context, true);
    let prefixes = compute_visibility_prefixes(as_agent);
    let (fts_hierarchy_in, hierarchy_active) = hierarchy_in_clause(namespace);
    let fts_hierarchy_fragment = fts_hierarchy_in.unwrap_or_default();
    let sem_hierarchy_fragment = if hierarchy_active {
        if let Some(ns) = namespace {
            let ancestors = crate::models::namespace_ancestors(ns);
            let quoted: Vec<String> = ancestors
                .iter()
                .map(|a| format!("'{}'", a.replace('\'', "''")))
                .collect();
            format!("AND memories.namespace IN ({})", quoted.join(","))
        } else {
            String::new()
        }
    } else {
        String::new()
    };
    let effective_namespace = if hierarchy_active { None } else { namespace };
    let fts_archived_fragment = archived_source_clause(include_archived, "m");
    let sem_archived_fragment = archived_source_clause(include_archived, "memories");
    let source_uri_like_param: Option<String> = match source_uri_prefix {
        Some(prefix) if !prefix.is_empty() => Some(format!("{}%", escape_like_pattern(prefix))),
        _ => None,
    };
    let fts_source_uri_fragment = if source_uri_like_param.is_some() {
        "AND m.source_uri LIKE ?12 ESCAPE '\\'"
    } else {
        ""
    };
    let sem_source_uri_fragment = if source_uri_like_param.is_some() {
        "AND memories.source_uri LIKE ?10 ESCAPE '\\'"
    } else {
        ""
    };
    HybridPrep {
        fts_query,
        now,
        prefixes,
        fts_hierarchy_fragment,
        sem_hierarchy_fragment,
        effective_namespace,
        hierarchy_active,
        fts_archived_fragment,
        sem_archived_fragment,
        fts_source_uri_fragment,
        sem_source_uri_fragment,
        source_uri_like_param,
    }
}

/// #871 stage 2 — FTS5 keyword phase. Builds + executes the FTS SQL
/// with the per-row `fts_score` projection, returns the raw
/// `(Memory, fts_score, embedding_bytes)` tuples for the fusion
/// stage. The embedding bytes are pulled inline from the same
/// SELECT (Cluster-F PERF-2) so the fusion stage can compute cosine
/// without an N+1 round-trip.
fn fts_keyword_phase(
    conn: &Connection,
    prep: &HybridPrep<'_>,
    tags_filter: Option<&str>,
    since: Option<&str>,
    until: Option<&str>,
    limit: usize,
) -> Result<Vec<(Memory, f64, Option<Vec<u8>>)>> {
    let fts_limit = (limit * 3).max(30);
    let fts_sql = format!(
        "SELECT m.id, m.tier, m.namespace, m.title, m.content, m.tags, m.priority,
                m.confidence, m.source, m.access_count, m.created_at, m.updated_at,
                m.last_accessed_at, m.expires_at, m.metadata, m.reflection_depth,
                m.memory_kind, m.entity_id, m.persona_version,
                m.citations, m.source_uri, m.source_span,
                m.confidence_source, m.confidence_signals, m.confidence_decayed_at, m.embedding,
                (fts.rank * -1) + (m.priority * 0.5) + (MIN(m.access_count, 50) * 0.1)
                + (m.confidence * 2.0)
                + (CASE m.tier WHEN 'long' THEN 3.0 WHEN 'mid' THEN 1.0 ELSE 0.0 END)
                + (1.0 / (1.0 + (julianday('now') - julianday(m.updated_at)) * 0.1))
                AS fts_score
         FROM memories_fts fts
         JOIN memories m ON m.rowid = fts.rowid
         WHERE memories_fts MATCH ?1
           AND (?2 IS NULL OR m.namespace = ?2)
           {fts_hierarchy_fragment}
           AND (m.expires_at IS NULL OR m.expires_at > ?3)
           AND (?4 IS NULL OR EXISTS (SELECT 1 FROM json_each(m.tags) WHERE json_each.value = ?4))
           AND (?5 IS NULL OR m.created_at >= ?5)
           AND (?6 IS NULL OR m.created_at <= ?6)
           {fts_archived_fragment}
           {fts_source_uri_fragment}
           {vis}
         ORDER BY fts_score DESC
         LIMIT ?7",
        fts_hierarchy_fragment = prep.fts_hierarchy_fragment,
        fts_archived_fragment = prep.fts_archived_fragment,
        fts_source_uri_fragment = prep.fts_source_uri_fragment,
        vis = visibility_clause(8, "m"),
    );
    // #1579 B6 — recall’s FTS branch is the hottest read statement;
    // prepare_cached amortises re-parsing across recalls (shape cardinality
    // is small: the optional fragments expand to a handful of variants).
    let mut fts_stmt = conn.prepare_cached(&fts_sql)?;
    let fts_row_handler =
        |row: &rusqlite::Row<'_>| -> rusqlite::Result<(Memory, f64, Option<Vec<u8>>)> {
            let mem = row_to_memory(row)?;
            let fts_score: f64 = row.get("fts_score")?;
            // Index 25 = `m.embedding` (the SELECT list above places it
            // after `confidence_decayed_at`). Pull as `Option<Vec<u8>>`
            // so legacy rows without embeddings surface as `None`.
            let embedding_bytes: Option<Vec<u8>> = row.get(25)?;
            Ok((mem, fts_score, embedding_bytes))
        };
    let (vis_p, vis_t, vis_u, vis_o) = prep.prefixes.clone();
    let rows: Vec<(Memory, f64, Option<Vec<u8>>)> =
        if let Some(ref uri_param) = prep.source_uri_like_param {
            fts_stmt
                .query_map(
                    params![
                        prep.fts_query,
                        prep.effective_namespace,
                        prep.now,
                        tags_filter,
                        since,
                        until,
                        fts_limit,
                        vis_p,
                        vis_t,
                        vis_u,
                        vis_o,
                        uri_param,
                    ],
                    fts_row_handler,
                )?
                .collect::<rusqlite::Result<Vec<_>>>()?
        } else {
            fts_stmt
                .query_map(
                    params![
                        prep.fts_query,
                        prep.effective_namespace,
                        prep.now,
                        tags_filter,
                        since,
                        until,
                        fts_limit,
                        vis_p,
                        vis_t,
                        vis_u,
                        vis_o,
                    ],
                    fts_row_handler,
                )?
                .collect::<rusqlite::Result<Vec<_>>>()?
        };
    Ok(rows)
}

/// #871 stage 3 — semantic phase. Two paths share the same `scored`
/// HashMap mutation contract:
///
///   - HNSW path (when a `vector_index` is supplied): runs an ANN
///     search bounded at `5×limit`, gates each hit at `cosine > 0.2`,
///     and re-applies the FTS WHERE-clause filters in Rust because
///     the HNSW index returns raw vector neighbours (no SQL
///     visibility / archived-source / source-URI-prefix filter has
///     run).
///   - Linear-scan fallback (HNSW absent): runs the semantic SQL,
///     decodes embedding BLOBs, applies the same `cosine > 0.2`
///     gate, and inserts surviving rows into `scored`.
///
/// Returns the running `hnsw_candidates_count` for telemetry. Rows
/// already present in `scored` (i.e. FTS-side hits) are skipped so
/// the FTS embedding-based cosine wins (consistent with the
/// pre-refactor behaviour).
#[allow(clippy::too_many_arguments)]
fn semantic_phase(
    conn: &Connection,
    prep: &HybridPrep<'_>,
    query_embedding: &[f32],
    vector_index: Option<&crate::hnsw::VectorIndex>,
    // FX-4 / PERF-2 (2026-05-26) — when supplied, the HNSW search
    // has already been executed OUTSIDE the DB lock by the caller
    // (HTTP recall handler) and the hits are passed in here. The
    // function uses these directly instead of re-running
    // `idx.search()`, which keeps the CPU-bound ANN walk off the
    // DB-mutex hold window so concurrent recalls do not serialise
    // behind one another. When both `vector_index` and
    // `precomputed_hnsw_hits` are supplied, the precomputed slice
    // wins — callers that already paid the search cost outside the
    // lock must not pay it again inside. Existing callers (MCP /
    // CLI / SAL) pass `None` and keep the legacy single-call
    // behaviour where `semantic_phase` runs the search itself.
    precomputed_hnsw_hits: Option<&[crate::hnsw::VectorHit]>,
    namespace: Option<&str>,
    tags_filter: Option<&str>,
    since: Option<&str>,
    until: Option<&str>,
    limit: usize,
    include_archived: bool,
    source_uri_prefix: Option<&str>,
    scored: &mut HashMap<String, (Memory, f64, f64)>,
    // v0.7.0 H7 — bumped once per stored embedding whose dimensionality
    // disagrees with `query_embedding` (embedder-model switch). Accumulated
    // across the whole recall and surfaced via telemetry + an aggregated warn.
    dim_mismatch_count: &mut usize,
) -> Result<usize> {
    let mut hnsw_candidates_count: usize = 0;
    let now = prep.now.as_str();
    // FX-4 / PERF-2 — when `precomputed_hnsw_hits` is supplied OR a
    // `vector_index` is supplied, run the HNSW-hit ingestion path.
    // The precomputed path skips the `idx.search()` call (already
    // paid outside the lock); the legacy path runs the search
    // inline.
    if precomputed_hnsw_hits.is_some() || vector_index.is_some() {
        let owned_hits;
        let hits: &[crate::hnsw::VectorHit] = if let Some(pre) = precomputed_hnsw_hits {
            pre
        } else {
            let ann_limit = (limit * 5).max(50);
            owned_hits = vector_index
                .expect("vector_index set in legacy branch")
                .search(query_embedding, ann_limit);
            owned_hits.as_slice()
        };
        // v0.7.0 #981 — pre-#981 this branch called `get(conn, &hit.id)`
        // per hit, producing 50-250 round-trips per recall on a warm
        // index. The fix collects the ids that pass the
        // `cosine > 0.2` + not-yet-scored cosine gate, batches the
        // SELECT via `get_many`, and re-applies the row-side filter
        // ladder against the fetched map. Net effect: one SELECT
        // instead of N, no behavioural drift on the per-row filters
        // because they're applied identically against `&mem`. See
        // `tests/recall_semantic_batch_fetch_981.rs` for the pin.
        let mut needed_ids: Vec<String> = Vec::with_capacity(hits.len());
        let mut hit_meta: Vec<(String, f64)> = Vec::with_capacity(hits.len());
        for hit in hits {
            if scored.contains_key(&hit.id) {
                continue;
            }
            let cosine = f64::from(1.0 - hit.distance);
            // v0.6.2 (S18 iteration): cosine gate relaxed 0.3 → 0.2 —
            // see the matching comment in the linear-scan branch below.
            if cosine > crate::RECALL_COSINE_GATE {
                needed_ids.push(hit.id.clone());
                hit_meta.push((hit.id.clone(), cosine));
            }
        }
        let fetched = get_many(conn, &needed_ids)?;
        for (id, cosine) in hit_meta {
            let Some(mem) = fetched.get(&id) else {
                continue;
            };
            if let Some(ns) = namespace {
                if prep.hierarchy_active {
                    let ancestors = crate::models::namespace_ancestors(ns);
                    if !ancestors.iter().any(|a| a == &mem.namespace) {
                        continue;
                    }
                } else if mem.namespace != ns {
                    continue;
                }
            }
            if let Some(exp) = &mem.expires_at
                && exp.as_str() <= now
            {
                continue;
            }
            if let Some(tf) = tags_filter
                && !mem.tags.iter().any(|t| t == tf)
            {
                continue;
            }
            if let Some(s) = since
                && mem.created_at.as_str() < s
            {
                continue;
            }
            if let Some(u) = until
                && mem.created_at.as_str() > u
            {
                continue;
            }
            if !is_visible(mem, &prep.prefixes) {
                continue;
            }
            if !include_archived && is_archived_source(mem) {
                continue;
            }
            if let Some(prefix) = source_uri_prefix
                && !prefix.is_empty()
                && !mem
                    .source_uri
                    .as_deref()
                    .is_some_and(|u| u.starts_with(prefix))
            {
                continue;
            }
            // Clone is unavoidable here — `scored` owns the Memory
            // for the final cross-phase merge, and `fetched` may be
            // re-read for downstream phases.
            scored.insert(mem.id.clone(), (mem.clone(), 0.0, cosine));
            hnsw_candidates_count += 1;
        }
        return Ok(hnsw_candidates_count);
    }

    // Fallback: linear scan over all embeddings.
    let sem_sql = format!(
        "SELECT id, tier, namespace, title, content, tags, priority,
                confidence, source, access_count, created_at, updated_at,
                last_accessed_at, expires_at, metadata, reflection_depth, memory_kind, embedding
         FROM memories
         WHERE embedding IS NOT NULL
           AND (?1 IS NULL OR namespace = ?1)
           {sem_hierarchy_fragment}
           AND (expires_at IS NULL OR expires_at > ?2)
           AND (?3 IS NULL OR EXISTS (SELECT 1 FROM json_each(memories.tags) WHERE json_each.value = ?3))
           AND (?4 IS NULL OR created_at >= ?4)
           AND (?5 IS NULL OR created_at <= ?5)
           {sem_archived_fragment}
           {sem_source_uri_fragment}
           {vis}",
        sem_hierarchy_fragment = prep.sem_hierarchy_fragment,
        sem_archived_fragment = prep.sem_archived_fragment,
        sem_source_uri_fragment = prep.sem_source_uri_fragment,
        vis = visibility_clause(6, "memories"),
    );
    // #1579 B6 — same prepare_cached treatment as the FTS branch above.
    let mut sem_stmt = conn.prepare_cached(&sem_sql)?;
    let sem_row_handler = |row: &rusqlite::Row<'_>| -> rusqlite::Result<(Memory, Option<Vec<u8>>)> {
        let mem = row_to_memory(row)?;
        // v0.7.x Form 6 — `memory_kind` was inserted between
        // `reflection_depth` and `embedding` in the SELECT list
        // above; `embedding` sits at zero-based index 17.
        let emb_bytes: Option<Vec<u8>> = row.get(17)?;
        Ok((mem, emb_bytes))
    };
    let (vis_p, vis_t, vis_u, vis_o) = prep.prefixes.clone();
    let sem_results: Vec<(Memory, Option<Vec<u8>>)> =
        if let Some(ref uri_param) = prep.source_uri_like_param {
            sem_stmt
                .query_map(
                    params![
                        prep.effective_namespace,
                        prep.now,
                        tags_filter,
                        since,
                        until,
                        vis_p,
                        vis_t,
                        vis_u,
                        vis_o,
                        uri_param,
                    ],
                    sem_row_handler,
                )?
                .collect::<rusqlite::Result<Vec<_>>>()?
        } else {
            sem_stmt
                .query_map(
                    params![
                        prep.effective_namespace,
                        prep.now,
                        tags_filter,
                        since,
                        until,
                        vis_p,
                        vis_t,
                        vis_u,
                        vis_o,
                    ],
                    sem_row_handler,
                )?
                .collect::<rusqlite::Result<Vec<_>>>()?
        };
    for (mem, emb_bytes) in sem_results {
        if scored.contains_key(&mem.id) {
            continue;
        }
        if let Some(bytes) = emb_bytes
            && !bytes.is_empty()
        {
            // v0.6.3.1 P2 — tolerate legacy + headed payloads; skip
            // (with telemetry) on malformed BLOBs so a single corrupt
            // row can't poison the whole semantic stage.
            let Ok(emb) = crate::embeddings::decode_embedding_blob(&bytes) else {
                tracing::warn!(
                    memory_id = %mem.id,
                    "skipping malformed embedding BLOB during semantic recall"
                );
                continue;
            };
            let cosine =
                match crate::embeddings::Embedder::cosine_similarity_checked(query_embedding, &emb)
                {
                    crate::embeddings::CosineComparison::Comparable(c) => f64::from(c),
                    crate::embeddings::CosineComparison::DimensionMismatch { .. } => {
                        // v0.7.0 H7 — stored embedding came from a different
                        // embedder model; counted (not silently dropped) so the
                        // aggregated warn + telemetry can flag the model switch.
                        *dim_mismatch_count += 1;
                        continue;
                    }
                };
            if cosine > crate::RECALL_COSINE_GATE {
                scored.insert(mem.id.clone(), (mem, 0.0, cosine));
                hnsw_candidates_count += 1;
            }
        }
    }
    Ok(hnsw_candidates_count)
}

/// #871 stage 4 — adaptive blend + decay.
///
/// Per-row: normalises `fts_score` by `max_fts_score`, lerp-derives
/// `semantic_weight` from content length (0.50 ≤500 chars → 0.15
/// ≥5000 chars; embeddings lose information on long text, FTS stays
/// precise), and multiplies by the per-tier exponential decay from
/// `scoring`. Returns the ranked (sort by blended score, truncated
/// to `limit`) result list AND the captured per-candidate
/// `semantic_weight` vector for telemetry.
fn blend_and_rank(
    scored: HashMap<String, (Memory, f64, f64)>,
    max_fts_score: f64,
    scoring: &crate::config::ResolvedScoring,
    limit: usize,
) -> (Vec<(Memory, f64)>, Vec<f64>) {
    let now_utc = Utc::now();
    let mut weights: Vec<f64> = Vec::new();
    let mut results: Vec<(Memory, f64)> = scored
        .into_values()
        .map(|(mem, fts_score, cosine)| {
            let norm_fts = if max_fts_score > 0.0 {
                fts_score / max_fts_score
            } else {
                0.0
            };
            // B4 (R2-LOW) — clamp to i32::MAX instead of panicking when
            // a memory's content is >2GB. The lerp below treats anything
            // ≥5000 chars as the long-tail bucket regardless, so the
            // clamp does not change scoring; it only closes a panic
            // window a hostile import could otherwise reach.
            let content_len = f64::from(i32::try_from(mem.content.len()).unwrap_or(i32::MAX));
            let semantic_weight = if content_len <= 500.0 {
                0.50
            } else if content_len >= 5000.0 {
                0.15
            } else {
                0.50 - 0.35 * ((content_len - 500.0) / 4500.0)
            };
            weights.push(semantic_weight);
            let blended = semantic_weight * cosine + (1.0 - semantic_weight) * norm_fts;
            let age_days = chrono::DateTime::parse_from_rfc3339(&mem.created_at)
                .ok()
                .map_or(0.0, |ts| {
                    let secs = (now_utc - ts.with_timezone(&Utc)).num_seconds();
                    #[allow(clippy::cast_precision_loss)]
                    {
                        secs as f64 / crate::SECS_PER_DAY as f64
                    }
                });
            let decay = scoring.decay_multiplier(&mem.tier, age_days);
            (mem, blended * decay)
        })
        .collect();
    results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
    results.truncate(limit);
    (results, weights)
}

/// #871 stage 5 — post-fusion ops: proximity boost (when hierarchy
/// expansion is active), token-budget application, and the batched
/// `touch_many` write that bumps `access_count` + slides the per-tier
/// expiry on every memory in the surviving set.
fn apply_recall_post_ops(
    conn: &Connection,
    results: Vec<(Memory, f64)>,
    hierarchy_active: bool,
    namespace: Option<&str>,
    budget_tokens: Option<usize>,
    short_extend: i64,
    mid_extend: i64,
) -> (Vec<(Memory, f64)>, BudgetOutcome) {
    let boosted = if let (true, Some(anchor)) = (hierarchy_active, namespace) {
        apply_proximity_boost(results, anchor)
    } else {
        results
    };
    let (budgeted, outcome) = apply_token_budget(boosted, budget_tokens);
    let touch_ids: Vec<&str> = budgeted.iter().map(|(mem, _)| mem.id.as_str()).collect();
    if let Err(e) = touch_many(conn, &touch_ids, short_extend, mid_extend) {
        tracing::warn!("touch_many failed for hybrid recall set: {}", e);
    }
    (budgeted, outcome)
}

/// #871 stage 6 — telemetry assembly. Aggregates the per-stage
/// candidate counters and the mean `semantic_weight` across the
/// returned set (NOT the full candidate pool — operators care about
/// what made it out).
fn assemble_recall_telemetry(
    fts_candidates: usize,
    hnsw_candidates: usize,
    blend_weights: &[f64],
    embedding_dim_mismatch: usize,
) -> crate::models::RecallTelemetry {
    let blend_weight_avg = if blend_weights.is_empty() {
        0.0
    } else {
        #[allow(clippy::cast_precision_loss)]
        let n = blend_weights.len() as f64;
        blend_weights.iter().sum::<f64>() / n
    };
    crate::models::RecallTelemetry {
        fts_candidates,
        hnsw_candidates,
        blend_weight_avg,
        embedding_dim_mismatch,
    }
}

#[allow(clippy::too_many_arguments)]
pub fn recall_hybrid_with_telemetry(
    conn: &Connection,
    context: &str,
    query_embedding: &[f32],
    namespace: Option<&str>,
    limit: usize,
    tags_filter: Option<&str>,
    since: Option<&str>,
    until: Option<&str>,
    vector_index: Option<&crate::hnsw::VectorIndex>,
    short_extend: i64,
    mid_extend: i64,
    as_agent: Option<&str>,
    budget_tokens: Option<usize>,
    scoring: &crate::config::ResolvedScoring,
    // v0.7.0 WT-1-E — see [`recall_with_telemetry`] for the
    // archived-source exclusion contract.
    include_archived: bool,
    // v0.7.0 Form 4 / Cluster-A PERF-3 — see [`recall`] for the
    // contract. Pushed into both the FTS and semantic branch SQL so
    // both pools are constrained by the partial
    // `idx_memories_source_uri` index, not the post-fetch Rust filter.
    source_uri_prefix: Option<&str>,
) -> Result<(
    Vec<(Memory, f64)>,
    BudgetOutcome,
    crate::models::RecallTelemetry,
)> {
    recall_hybrid_with_telemetry_inner(
        conn,
        context,
        query_embedding,
        namespace,
        limit,
        tags_filter,
        since,
        until,
        vector_index,
        None,
        short_extend,
        mid_extend,
        as_agent,
        budget_tokens,
        scoring,
        include_archived,
        source_uri_prefix,
    )
}

/// FX-4 / PERF-2 (2026-05-26) — variant of
/// [`recall_hybrid_with_telemetry`] that accepts a pre-computed slice
/// of HNSW hits in place of the in-pipeline `idx.search()` call. The
/// HTTP recall handler runs the ANN walk OUTSIDE the DB mutex (the
/// HNSW index lives behind its own `vector_index` mutex; the DB lock
/// is not required for the search) and passes the result here so the
/// DB-mutex hold window covers only the FTS5 query + the batched
/// `get_many` fetch + the touch ops. Concurrent recalls overlap
/// their CPU-bound ANN walks instead of serialising behind the
/// single shared connection.
///
/// Semantics-preserving by construction: the precomputed hits feed
/// the same per-hit `cosine > 0.2` gate + `get_many` round-trip
/// inside [`semantic_phase`] that the legacy single-call path uses.
/// Existing callers (MCP / CLI / SAL) continue to call
/// [`recall_hybrid_with_telemetry`] and pay the search cost inside
/// the lock; only the HTTP handler swaps in the new path.
#[allow(clippy::too_many_arguments)]
pub fn recall_hybrid_with_telemetry_precomputed_hnsw(
    conn: &Connection,
    context: &str,
    query_embedding: &[f32],
    namespace: Option<&str>,
    limit: usize,
    tags_filter: Option<&str>,
    since: Option<&str>,
    until: Option<&str>,
    precomputed_hnsw_hits: &[crate::hnsw::VectorHit],
    short_extend: i64,
    mid_extend: i64,
    as_agent: Option<&str>,
    budget_tokens: Option<usize>,
    scoring: &crate::config::ResolvedScoring,
    include_archived: bool,
    source_uri_prefix: Option<&str>,
) -> Result<(
    Vec<(Memory, f64)>,
    BudgetOutcome,
    crate::models::RecallTelemetry,
)> {
    recall_hybrid_with_telemetry_inner(
        conn,
        context,
        query_embedding,
        namespace,
        limit,
        tags_filter,
        since,
        until,
        None,
        Some(precomputed_hnsw_hits),
        short_extend,
        mid_extend,
        as_agent,
        budget_tokens,
        scoring,
        include_archived,
        source_uri_prefix,
    )
}

/// Inner dispatch shared by [`recall_hybrid_with_telemetry`] (legacy,
/// runs `idx.search()` inside the DB-lock window) and
/// [`recall_hybrid_with_telemetry_precomputed_hnsw`] (FX-4 / PERF-2,
/// caller pre-ran the ANN walk outside the DB lock). Exactly one of
/// `vector_index` / `precomputed_hnsw_hits` is `Some` on any given
/// call; the inner is private so the variant choice cannot drift.
#[allow(clippy::too_many_arguments)]
fn recall_hybrid_with_telemetry_inner(
    conn: &Connection,
    context: &str,
    query_embedding: &[f32],
    namespace: Option<&str>,
    limit: usize,
    tags_filter: Option<&str>,
    since: Option<&str>,
    until: Option<&str>,
    vector_index: Option<&crate::hnsw::VectorIndex>,
    precomputed_hnsw_hits: Option<&[crate::hnsw::VectorHit]>,
    short_extend: i64,
    mid_extend: i64,
    as_agent: Option<&str>,
    budget_tokens: Option<usize>,
    scoring: &crate::config::ResolvedScoring,
    include_archived: bool,
    source_uri_prefix: Option<&str>,
) -> Result<(
    Vec<(Memory, f64)>,
    BudgetOutcome,
    crate::models::RecallTelemetry,
)> {
    // Stage 1 — query preparation (FTS sanitisation, namespace
    // hierarchy expansion, visibility prefixes, SQL fragments).
    let prep = prepare_hybrid_query(
        context,
        namespace,
        as_agent,
        include_archived,
        source_uri_prefix,
    );

    // Stage 2 — FTS5 keyword phase.
    let fts_results = fts_keyword_phase(conn, &prep, tags_filter, since, until, limit)?;

    // Fusion pool (id → (memory, fts_score, cosine_score)). FTS rows
    // land first so their inline-fetched embedding-cosine wins; the
    // semantic phase only inserts ids it hasn't seen.
    //
    // PERF-6 (med/low review batch) — pre-size the map so we avoid the
    // 4-realloc growth path (4 → 8 → 16 → 32) on every recall. Upper
    // bound is fts_results.len() (already in scope) + the upcoming
    // semantic phase's `ann_limit = max(limit*5, 50)`; the slight
    // over-allocation is dwarfed by the saved zeroing + rehashing cost
    // at default `limit=10` where the natural growth path would have
    // run through ~3 reallocations.
    let scored_cap = fts_results
        .len()
        .saturating_add(limit.saturating_mul(5).max(50));
    let mut scored: HashMap<String, (Memory, f64, f64)> = HashMap::with_capacity(scored_cap);
    let mut max_fts_score: f64 = 1.0;
    let mut fts_candidates_count: usize = 0;
    // v0.7.0 H7 — accumulates stored embeddings whose dimensionality
    // disagrees with the active model's `query_embedding` across BOTH the
    // FTS branch (here) and the semantic linear-scan branch (below).
    let mut dim_mismatch_count: usize = 0;
    for (mem, fts_score, embedding_bytes) in fts_results {
        if fts_score > max_fts_score {
            max_fts_score = fts_score;
        }
        // Cluster-F PERF-2 — cosine from the inline-fetched embedding
        // bytes. Malformed BLOBs degrade to cosine=0 + warn-log so a
        // single corrupt row does not poison the whole recall.
        let cosine = match embedding_bytes {
            Some(bytes) if !bytes.is_empty() => {
                match crate::embeddings::decode_embedding_blob(&bytes) {
                    Ok(emb) => match crate::embeddings::Embedder::cosine_similarity_checked(
                        query_embedding,
                        &emb,
                    ) {
                        crate::embeddings::CosineComparison::Comparable(c) => f64::from(c),
                        crate::embeddings::CosineComparison::DimensionMismatch { .. } => {
                            // v0.7.0 H7 — embedder-model switch: count the
                            // stale-dimension row instead of letting it score a
                            // silent 0.0 cosine. FTS keyword score still applies.
                            dim_mismatch_count += 1;
                            0.0
                        }
                    },
                    Err(_) => {
                        tracing::warn!(
                            memory_id = %mem.id,
                            "skipping malformed embedding BLOB during hybrid recall (FTS branch)"
                        );
                        0.0
                    }
                }
            }
            _ => 0.0,
        };
        scored.insert(mem.id.clone(), (mem, fts_score, cosine));
        fts_candidates_count += 1;
    }

    // Stage 3 — semantic phase (HNSW when available, linear-scan
    // fallback). When `precomputed_hnsw_hits` is supplied the search
    // step is skipped (already paid outside the DB lock); otherwise
    // the in-pipeline `idx.search()` runs as before.
    let hnsw_candidates_count = semantic_phase(
        conn,
        &prep,
        query_embedding,
        vector_index,
        precomputed_hnsw_hits,
        namespace,
        tags_filter,
        since,
        until,
        limit,
        include_archived,
        source_uri_prefix,
        &mut scored,
        &mut dim_mismatch_count,
    )?;

    // v0.7.0 H7 — de-silence embedder-model switches. A non-zero count means
    // stored embeddings were produced by a different model (different
    // dimensionality) than the active embedder, so their semantic signal was
    // forced to 0.0 for this query. One aggregated warn per recall (not per
    // row) tells the operator the affected rows need re-embedding.
    if dim_mismatch_count > 0 {
        tracing::warn!(
            dim_mismatch_count,
            active_query_dim = query_embedding.len(),
            "recall skipped {dim_mismatch_count} stored embedding(s) with mismatched \
             dimensionality — the embedder model appears to have changed; re-embed the \
             affected memories to restore their semantic recall signal"
        );
    }

    // Stage 4 — adaptive blend + per-tier decay.
    let (results, blend_weights) = blend_and_rank(scored, max_fts_score, scoring, limit);

    // Stage 5 — proximity boost + token budget + batched touch.
    let (budgeted, outcome) = apply_recall_post_ops(
        conn,
        results,
        prep.hierarchy_active,
        namespace,
        budget_tokens,
        short_extend,
        mid_extend,
    );

    // Stage 6 — telemetry assembly.
    let telemetry = assemble_recall_telemetry(
        fts_candidates_count,
        hnsw_candidates_count,
        &blend_weights,
        dim_mismatch_count,
    );

    Ok((budgeted, outcome, telemetry))
}

/// Checkpoint WAL for clean shutdown.
pub fn checkpoint(conn: &Connection) -> Result<()> {
    conn.pragma_update(None, "wal_checkpoint", "TRUNCATE")?;
    Ok(())
}

// ---------------------------------------------------------------------------
// Phase 3 foundation (issue #224) — sync_state helpers.
//
// These are additive: they do not change how the existing `ai-memory sync`
// command behaves in v0.6.0 GA. They exist so HTTP sync endpoints and the
// CRDT-lite merge follow-up can durably track "last updated_at seen from
// peer X" per local agent.
// ---------------------------------------------------------------------------

/// Record the latest `updated_at` this local agent has observed from `peer_id`.
/// Monotonic by timestamp — older writes do not overwrite newer ones.
/// Lazily creates the row on first observation.
pub fn sync_state_observe(
    conn: &Connection,
    agent_id: &str,
    peer_id: &str,
    seen_at: &str,
) -> Result<()> {
    let now = Utc::now().to_rfc3339();
    conn.execute(
        "INSERT INTO sync_state (agent_id, peer_id, last_seen_at, last_pulled_at) \
         VALUES (?1, ?2, ?3, ?4) \
         ON CONFLICT(agent_id, peer_id) DO UPDATE SET \
            last_seen_at = CASE WHEN excluded.last_seen_at > last_seen_at \
                                THEN excluded.last_seen_at \
                                ELSE last_seen_at END, \
            last_pulled_at = excluded.last_pulled_at",
        params![agent_id, peer_id, seen_at, now],
    )?;
    Ok(())
}

/// Load the full vector clock for `agent_id` — the set of
/// (`peer_id` -> `last_seen_at`) this local agent tracks.
pub fn sync_state_load(conn: &Connection, agent_id: &str) -> Result<crate::models::VectorClock> {
    let mut stmt =
        conn.prepare("SELECT peer_id, last_seen_at FROM sync_state WHERE agent_id = ?1")?;
    let rows = stmt.query_map(params![agent_id], |row| {
        Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
    })?;
    let mut clock = crate::models::VectorClock::default();
    for row in rows {
        let (peer, at) = row?;
        clock.entries.insert(peer, at);
    }
    Ok(clock)
}

/// Look up this peer's last-push watermark for `peer_id`. Returns `None`
/// if we've never successfully pushed to them (foundation-era rows also
/// return `None` because the column was added in schema v12).
#[must_use]
#[allow(dead_code)] // called via lib crate (daemon_runtime); bin sees it as unused
pub fn sync_state_last_pushed(conn: &Connection, agent_id: &str, peer_id: &str) -> Option<String> {
    conn.query_row(
        "SELECT last_pushed_at FROM sync_state WHERE agent_id = ?1 AND peer_id = ?2",
        params![agent_id, peer_id],
        |r| r.get::<_, Option<String>>(0),
    )
    .ok()
    .flatten()
}

/// Record that local memories up to `updated_at = pushed_at` have been
/// accepted by `peer_id`. Creates the row if it doesn't exist; monotonic.
#[allow(dead_code)] // called via lib crate (daemon_runtime); bin sees it as unused
pub fn sync_state_record_push(
    conn: &Connection,
    agent_id: &str,
    peer_id: &str,
    pushed_at: &str,
) -> Result<()> {
    let now = Utc::now().to_rfc3339();
    conn.execute(
        "INSERT INTO sync_state (agent_id, peer_id, last_seen_at, last_pulled_at, last_pushed_at) \
         VALUES (?1, ?2, ?3, ?3, ?4) \
         ON CONFLICT(agent_id, peer_id) DO UPDATE SET \
            last_pushed_at = CASE \
                WHEN excluded.last_pushed_at IS NULL THEN last_pushed_at \
                WHEN last_pushed_at IS NULL THEN excluded.last_pushed_at \
                WHEN excluded.last_pushed_at > last_pushed_at THEN excluded.last_pushed_at \
                ELSE last_pushed_at END",
        params![agent_id, peer_id, now, pushed_at],
    )?;
    Ok(())
}

/// Return memories whose `updated_at > since`, ordered by `updated_at`
/// ascending. Used by `GET /api/v1/sync/since` to stream incremental
/// updates to a peer. Caps at `limit` rows (caller-chosen pagination).
pub fn memories_updated_since(
    conn: &Connection,
    since: Option<&str>,
    limit: usize,
) -> Result<Vec<Memory>> {
    // #1028 (HIGH, 2026-05-21) — REVERTED 2026-05-21 via QC pass-2.
    // The first-pass fix added a SAL-level
    // `COALESCE(scope, 'private') <> 'private'` filter here on the
    // grounds of "defense-in-depth". That was wrong: the federation
    // visibility gate (federation_legacy_row_visibility_978 + the
    // dispatch logic in src/federation/) is a RICHER contract than
    // pure scope=private — it handles owner-signed-private projection
    // back to the owner peer, inbox-target private projection, and
    // federation_share opt-in on legacy rows. The SAL-level filter
    // bypassed those branches and broke 5 federation tests. The
    // visibility gate runs DOWNSTREAM of this method and already
    // refuses to project rows that shouldn't federate. The proper
    // fix would belong in the federation handler (or the visibility
    // gate audit) — tracked under follow-up rather than at the SAL.
    // #1476 — sargable split, mirrors src/store/postgres.rs. The former
    // `(?1 IS NULL OR updated_at > ?1)` predicate is non-sargable: SQLite
    // cannot use `idx_memories_updated_at` to satisfy an OR-NULL branch,
    // so it falls back to a full table scan. Splitting on `since` lets
    // the None path read in index order (no predicate) and the Some path
    // use the index as a range bound (`updated_at > ?1`), each with
    // early-stop under the LIMIT.
    const COLS: &str = "SELECT id, tier, namespace, title, content, tags, priority, confidence, \
                source, access_count, created_at, updated_at, last_accessed_at, \
                expires_at, metadata \
         FROM memories ";
    let rows = match since {
        None => {
            let mut stmt = conn.prepare(&format!("{COLS} ORDER BY updated_at ASC LIMIT ?1"))?;
            stmt.query_map(params![limit], row_to_memory)?
                .collect::<rusqlite::Result<Vec<_>>>()
        }
        Some(s) => {
            let mut stmt = conn.prepare(&format!(
                "{COLS} WHERE updated_at > ?1 ORDER BY updated_at ASC LIMIT ?2"
            ))?;
            stmt.query_map(params![s, limit], row_to_memory)?
                .collect::<rusqlite::Result<Vec<_>>>()
        }
    };
    rows.map_err(Into::into)
}

/// Deep health check — verifies DB is accessible and FTS is functional.
pub fn health_check(conn: &Connection) -> Result<bool> {
    let _: i64 = conn.query_row("SELECT COUNT(*) FROM memories", [], |r| r.get(0))?;
    conn.execute(
        "INSERT INTO memories_fts(memories_fts) VALUES('integrity-check')",
        [],
    )?;
    Ok(true)
}

// ---------------------------------------------------------------------------
// Namespace standards
// ---------------------------------------------------------------------------

/// Set the standard memory for a namespace, with optional parent for rule layering.
pub fn set_namespace_standard(
    conn: &Connection,
    namespace: &str,
    standard_id: &str,
    parent: Option<&str>,
) -> Result<()> {
    // Verify the memory exists (but allow cross-namespace — shared policy)
    let _mem = get(conn, standard_id)?.ok_or_else(|| {
        // #962 typed envelope — 404 NOT_FOUND.
        anyhow::Error::new(StorageError::MemoryNotFound {
            id: standard_id.to_string(),
            role: None,
        })
    })?;
    // Resolve parent: explicit > auto-detect by `-` prefix > none
    let resolved_parent = match parent {
        Some(p) => {
            if p == namespace {
                // #962 typed envelope.
                return Err(anyhow::Error::new(StorageError::InvalidArgument {
                    reason: "namespace cannot be its own parent".to_string(),
                }));
            }
            Some(p.to_string())
        }
        None => auto_detect_parent(conn, namespace),
    };
    let now = chrono::Utc::now().to_rfc3339();
    conn.execute(
        "INSERT INTO namespace_meta (namespace, standard_id, updated_at, parent_namespace)
         VALUES (?1, ?2, ?3, ?4)
         ON CONFLICT(namespace) DO UPDATE SET standard_id = ?2, updated_at = ?3, parent_namespace = ?4",
        params![namespace, standard_id, now, resolved_parent],
    )?;
    Ok(())
}

/// Auto-detect parent namespace by `-` prefix.
/// "ai-memory-tests" → checks "ai-memory" → checks "ai" → first match wins.
fn auto_detect_parent(conn: &Connection, namespace: &str) -> Option<String> {
    let mut candidate = namespace.to_string();
    while let Some(pos) = candidate.rfind('-') {
        candidate.truncate(pos);
        if candidate.is_empty() {
            break;
        }
        // Check if this candidate has a standard set
        if get_namespace_standard(conn, &candidate)
            .ok()
            .flatten()
            .is_some()
        {
            return Some(candidate);
        }
    }
    None
}

/// Get the standard memory ID for a namespace.
#[allow(clippy::unnecessary_wraps)]
pub fn get_namespace_standard(conn: &Connection, namespace: &str) -> Result<Option<String>> {
    let result = conn
        .query_row(
            "SELECT standard_id FROM namespace_meta WHERE namespace = ?1",
            params![namespace],
            |r| r.get(0),
        )
        .ok();
    Ok(result)
}

/// Get the parent namespace for a given namespace.
pub fn get_namespace_parent(conn: &Connection, namespace: &str) -> Option<String> {
    conn.query_row(
        "SELECT parent_namespace FROM namespace_meta WHERE namespace = ?1 AND parent_namespace IS NOT NULL",
        params![namespace],
        |r| r.get(0),
    )
    .ok()
}

/// v0.6.2 (S35): read the full `namespace_meta` row for a namespace so the
/// caller can fan it out to peers. Returns `None` when no standard is set.
/// Mirrors the (`namespace`, `standard_id`, `parent_namespace`, `updated_at`)
/// tuple used by `set_namespace_standard`.
#[allow(clippy::unnecessary_wraps)]
pub fn get_namespace_meta_entry(
    conn: &Connection,
    namespace: &str,
) -> Result<Option<crate::models::NamespaceMetaEntry>> {
    let row = conn
        .query_row(
            "SELECT namespace, standard_id, parent_namespace, updated_at
             FROM namespace_meta WHERE namespace = ?1",
            params![namespace],
            |r| {
                Ok(crate::models::NamespaceMetaEntry {
                    namespace: r.get(0)?,
                    standard_id: r.get(1)?,
                    parent_namespace: r.get(2)?,
                    updated_at: r.get::<_, Option<String>>(3)?.unwrap_or_default(),
                })
            },
        )
        .ok();
    Ok(row)
}

/// Clear the standard for a namespace.
pub fn clear_namespace_standard(conn: &Connection, namespace: &str) -> Result<bool> {
    let changed = conn.execute(
        "DELETE FROM namespace_meta WHERE namespace = ?1",
        params![namespace],
    )?;
    Ok(changed > 0)
}

// ---------------------------------------------------------------------------
// Task 1.9 — governance enforcement + pending_actions CRUD
// ---------------------------------------------------------------------------

/// Build the namespace inheritance chain in **top-down** order
/// (`["*", root, ..., leaf]`). Mirrors and replaces the historical
/// `mcp::build_namespace_chain` so non-MCP call sites (db-layer
/// governance enforcement, HTTP handlers, future hook pipelines) can
/// reuse the same walk.
///
/// Properties (preserved from the prior MCP-only implementation):
/// - cycle-safe (visited set + bounded by `MAX_EXPLICIT_DEPTH = 8`)
/// - includes the global standard `*` as the most-general entry
/// - prepends explicit `namespace_meta.parent_namespace` ancestors
///   before the `/`-derived hierarchy, supporting flat→hierarchical
///   linking (e.g. legacy `ai-memory` → `ai-memory-mcp`)
///
/// The MCP layer's display path consumes this top-down. The governance
/// resolver in [`resolve_governance_policy`] reverses it for a
/// leaf-first walk (most-specific wins).
#[must_use]
pub fn build_namespace_chain(conn: &Connection, namespace: &str) -> Vec<String> {
    const MAX_EXPLICIT_DEPTH: usize = 8;
    let mut chain: Vec<String> = Vec::new();

    if namespace == "*" {
        chain.push("*".to_string());
        return chain;
    }

    // Always start with the global standard — most general.
    chain.push("*".to_string());

    // 1. /-derived ancestors. `namespace_ancestors` returns most-specific-first;
    //    reverse for top-down (root ancestor first, then namespace itself last).
    let mut hierarchy_chain: Vec<String> = crate::models::namespace_ancestors(namespace)
        .into_iter()
        .rev()
        .collect();

    // 2. If the ROOTmost of the /-chain has an explicit `namespace_meta` parent,
    //    prepend that chain (bounded by MAX_EXPLICIT_DEPTH + cycle-safe).
    //    Supports legacy flat namespaces (e.g. `ai-memory` → `ai-memory-mcp`).
    if let Some(root) = hierarchy_chain.first().cloned() {
        let mut explicit_above: Vec<String> = Vec::new();
        let mut current = root;
        for _ in 0..MAX_EXPLICIT_DEPTH {
            match get_namespace_parent(conn, &current) {
                Some(p)
                    if p != "*"
                        && !explicit_above.contains(&p)
                        && !hierarchy_chain.contains(&p) =>
                {
                    explicit_above.push(p.clone());
                    current = p;
                }
                _ => break,
            }
        }
        // `explicit_above` is [immediate-explicit-parent, grandparent, ...];
        // reverse to prepend in top-down order.
        for p in explicit_above.into_iter().rev() {
            chain.push(p);
        }
    }

    // 3. Append the /-derived chain (top-down).
    for entry in hierarchy_chain.drain(..) {
        if !chain.contains(&entry) {
            chain.push(entry);
        }
    }

    chain
}

/// Read the explicit governance policy attached to a single namespace's
/// standard memory. Does **not** walk the inheritance chain — callers
/// that want hierarchical resolution should use
/// [`resolve_governance_policy`] instead.
///
/// **NHI-P4-T19 (v0.7.0 NHI testing):** returns `None` when the
/// standard carries no explicit `metadata.governance`. Operators who
/// want enforcement-by-default can either (a) write
/// `metadata.governance = {"write": "owner", ...}` into their standard
/// memory, or (b) use the
/// [`crate::models::GovernancePolicy::default_for_managed_namespace`]
/// helper as a starting template. Changing the implicit fallback to
/// Owner is deferred to v0.7.1 because it can break inheritance chains
/// where a parent's standard was registered under a distinct agent
/// identity from descendant operations.
fn read_namespace_policy(conn: &Connection, namespace: &str) -> Option<GovernancePolicy> {
    let standard_id = get_namespace_standard(conn, namespace).ok()??;
    let mem = get(conn, &standard_id).ok()??;
    match GovernancePolicy::from_metadata(&mem.metadata) {
        Some(Ok(p)) => Some(p),
        // #1384 — observability for stored-corruption. The write path
        // (`memory_namespace_set_standard` → typed `GovernancePolicy`
        // deserialise) rejects unknown enum variants and malformed
        // structures (verified live against alice: `write: "approval"`
        // returns a typed 400 error). A parse error here therefore
        // means the stored JSON drifted out-of-band: direct SQL update,
        // migration corruption, older binary writing newer schema,
        // etc. Pre-#1384 this arm silently returned `None` and the
        // inheritance walk continued to the parent — which may be
        // totally permissive, silently downgrading the operator's
        // intent. Surface the drift via tracing WARN so operators
        // can grep `ai_memory::governance::policy_read` for the lag.
        // We still return `None` (don't fail-CLOSED at the read site
        // — that could lock callers out of unrelated namespaces) but
        // operators now have a structured signal to investigate.
        Some(Err(parse_err)) => {
            tracing::warn!(
                target: "ai_memory::governance::policy_read",
                namespace = %namespace,
                standard_id = %standard_id,
                error = %parse_err,
                "stored metadata.governance failed typed deserialise — \
                 inheritance walk will continue past this namespace as \
                 if no policy were set. Likely cause: direct SQL update, \
                 older binary, or corrupted migration. Operator should \
                 re-run `memory_namespace_set_standard` to restore the \
                 typed shape."
            );
            None
        }
        None => None,
    }
}

/// Resolve the governance policy that gates actions in `namespace`.
///
/// v0.6.3.1 (P4, audit G1): walks the inheritance chain leaf-first and
/// returns the most-specific policy. This closes the audit's
/// highest-severity finding — prior to this fix the resolver consulted
/// only the leaf, which left children of governed parents (e.g.
/// `alphaone/secure/team-a` under an `Approve` policy at
/// `alphaone/secure`) **completely ungoverned** despite the
/// architecture page T2 promising "Hierarchical policy inheritance
/// (default at `org/`, overridable at `org/team/`)".
///
/// **Walk semantics** (carefully — easy to get subtly wrong):
///   1. Build the chain via [`build_namespace_chain`] (top-down) and
///      reverse it so we walk leaf → root. The leaf is the namespace
///      we were asked about; the root is the global `*` standard.
///   2. At each level `k`, look up the policy attached to that
///      namespace's standard memory.
///      - If a policy **exists**, it is the most-specific match seen
///        so far. Return it immediately. ("Most specific wins.")
///      - If a policy **also says `inherit: false`**, this is already
///        the same return path — we never reach the parent because
///        we already returned.
///   3. If level `k` has **no policy at all**, keep walking — this is
///      the implicit-inherit branch (no policy means "I don't override
///      my parent").
///   4. If we walk off the top of the chain without finding a policy,
///      return `None` (enforcement remains opt-in for namespaces with
///      no governance configured anywhere in the chain).
///
/// **Where does `inherit: false` actually do work?** When the most-
/// specific policy we hit on the walk has `inherit: false`. That
/// policy is returned (same return point as the inherit=true case),
/// so its rules govern the action; the false flag is what
/// **conceptually stops** the walk above it, but the implementation
/// stops the walk simply by virtue of having found a policy. The flag
/// matters most as a documented contract surfaced to operators: "a
/// policy here authoritatively replaces, not extends, what's above."
/// The flag also flows through the queued-pending-action approver
/// resolution so consensus/agent rules don't accidentally re-walk to
/// a parent.
///
/// Cycle-safety is inherited from `build_namespace_chain`
/// (`MAX_EXPLICIT_DEPTH = 8` + visited set). No new cache is
/// introduced — profile-driven optimization is a v0.7 item.
pub fn resolve_governance_policy(conn: &Connection, namespace: &str) -> Option<GovernancePolicy> {
    // build_namespace_chain returns top-down (`["*", root, ..., leaf]`).
    // Governance resolution wants leaf-first (most specific first), so
    // we reverse before walking.
    let chain = build_namespace_chain(conn, namespace);
    for level in chain.into_iter().rev() {
        // Most-specific match wins. Returning immediately here means
        // an explicit policy at the leaf (or any descendant level
        // with a policy) authoritatively overrides anything above —
        // which is precisely the inherit=false semantic, applied
        // implicitly. The inherit=false flag is preserved on the
        // returned policy so callers (e.g. the pending_action
        // approver resolver) don't accidentally re-walk to a parent.
        if let Some(policy) = read_namespace_policy(conn, &level) {
            return Some(policy);
        }
        // Implicit branch: no policy at this level → keep walking
        // toward the root. This is the "default inherit" behavior
        // that closes G1.
    }
    None
}

/// v0.7.0 L1-8 — read `governance.require_approval_above_depth` from the
/// namespace's most-specific governance metadata blob, leaf-first.
///
/// This is intentionally a free function (not a field on
/// [`GovernancePolicy`]) to avoid introducing a new required struct field
/// that would need updating at every `GovernancePolicy { … }` literal
/// in the codebase. The existing `GovernancePolicy` struct represents
/// the resolved enforcement policy; this field is a pre-write interception
/// threshold that lives beside it, not inside it.
///
/// Returns `None` when:
/// - no namespace standard is configured at any level of the chain, OR
/// - the standard's `metadata.governance` blob is absent or null, OR
/// - the blob does not contain a `require_approval_above_depth` key, OR
/// - the key is present but `null`.
///
/// Returns `Some(threshold)` when the key is a non-null unsigned integer.
/// Callers in `memory_reflect` compare `proposed_depth > threshold` and
/// queue a `pending_actions` row when the condition is true.
pub fn resolve_require_approval_above_depth(conn: &Connection, namespace: &str) -> Option<u32> {
    let chain = build_namespace_chain(conn, namespace);
    for level in chain.into_iter().rev() {
        let standard_id = match get_namespace_standard(conn, &level) {
            Ok(Some(id)) => id,
            _ => continue,
        };
        let mem = match get(conn, &standard_id) {
            Ok(Some(m)) => m,
            _ => continue,
        };
        // Governance blob must exist and not be null.
        let gov = match mem.metadata.get(crate::META_KEY_GOVERNANCE) {
            Some(g) if !g.is_null() => g,
            _ => continue,
        };
        // The field is optional inside the blob — `None` means skip this
        // level and keep walking (inherit semantics: an ancestor that sets
        // the field governs if the leaf does not override it).
        if let Some(threshold) = gov.get("require_approval_above_depth") {
            if let Some(n) = threshold.as_u64() {
                // QUAL-3 (FX-5): operator-controlled metadata. Reject the
                // silent `n as u32` truncation that would let an operator
                // who sets `require_approval_above_depth = 2^32` (which
                // would silently land as 0) DISABLE the approval gate
                // entirely (depth > 0 was the original intent, but
                // `low_32(2^32) == 0` makes `depth > 0` the actual gate;
                // any value ≥ 2^32 whose low-32 bits are also high turns
                // off the gate). Fail-CLOSED on overflow: saturate to 0
                // so EVERY depth triggers approval — this is the
                // conservative posture per CLAUDE.md K3/K9 governance
                // discipline. The companion regression test at
                // `tests/governance_metadata_no_silent_truncation.rs`
                // pins this behaviour.
                return Some(u32::try_from(n).unwrap_or(0));
            }
            // Key present but null → no gate at this level; keep walking.
        }
        // Policy found at this level but no require_approval_above_depth
        // key → no gate; stop walking (same leaf-first-wins semantics as
        // the main resolve_governance_policy walker: a leaf policy that
        // doesn't set the field takes precedence over a parent that does).
        if GovernancePolicy::from_metadata(&mem.metadata).is_some() {
            return None;
        }
    }
    None
}

/// v0.7.0 L2-6 — read `governance.skill_promotion_min_depth` from the
/// namespace's most-specific governance metadata blob, leaf-first.
///
/// Mirrors [`resolve_require_approval_above_depth`] in shape and walk
/// semantics: it's a free function (not a [`GovernancePolicy`] field)
/// so it can land without churning every `GovernancePolicy { … }`
/// literal in the codebase, and it's a per-namespace threshold rather
/// than part of the resolved enforcement policy.
///
/// Returns `None` when:
/// - no namespace standard is configured at any level of the chain, OR
/// - the standard's `metadata.governance` blob is absent or null, OR
/// - the blob does not contain a `skill_promotion_min_depth` key, OR
/// - the key is present but `null`.
///
/// Returns `Some(threshold)` when the key is a non-null unsigned integer.
/// The `memory_skill_promote_from_reflection` MCP tool falls back to the
/// compiled-in default of `1` when this returns `None` — a reflection
/// must have at least one level of synthesised insight (depth ≥ 1)
/// before it can be promoted to a reusable skill.
pub fn resolve_skill_promotion_min_depth(conn: &Connection, namespace: &str) -> Option<u32> {
    let chain = build_namespace_chain(conn, namespace);
    for level in chain.into_iter().rev() {
        let standard_id = match get_namespace_standard(conn, &level) {
            Ok(Some(id)) => id,
            _ => continue,
        };
        let mem = match get(conn, &standard_id) {
            Ok(Some(m)) => m,
            _ => continue,
        };
        let gov = match mem.metadata.get(crate::META_KEY_GOVERNANCE) {
            Some(g) if !g.is_null() => g,
            _ => continue,
        };
        if let Some(threshold) = gov.get("skill_promotion_min_depth") {
            if let Some(n) = threshold.as_u64() {
                // QUAL-3 (FX-5): operator-controlled metadata. Reject the
                // silent `n as u32` truncation that would let an operator
                // who sets `skill_promotion_min_depth = 2^32 + k` silently
                // land as `k` after truncation — including the
                // catastrophic `k == 0` case which would mean "every
                // reflection can be promoted to a skill regardless of
                // depth". Fail-CLOSED on overflow: saturate to `u32::MAX`
                // so NO reflection can be promoted (the
                // `actual_depth_u32 < min_depth` check at
                // `src/mcp/tools/skill_promote.rs:174` becomes
                // permanently true). The companion regression test at
                // `tests/governance_metadata_no_silent_truncation.rs`
                // pins this behaviour.
                return Some(u32::try_from(n).unwrap_or(u32::MAX));
            }
            // Key present but null → no override at this level; keep walking.
        }
        // Policy found at this level but no skill_promotion_min_depth
        // key → no override; stop walking (leaf-first-wins semantics).
        if GovernancePolicy::from_metadata(&mem.metadata).is_some() {
            return None;
        }
    }
    None
}

/// Return true if `agent_id` matches a registered agent in `_agents`.
pub fn is_registered_agent(conn: &Connection, agent_id: &str) -> bool {
    let title = crate::models::agent_registration_title(agent_id);
    conn.query_row(
        "SELECT 1 FROM memories WHERE namespace = ?1 AND title = ?2",
        params![AGENTS_NAMESPACE, &title],
        |r| r.get::<_, i64>(0),
    )
    .is_ok()
}

/// Evaluate a governance level against caller context.
/// - `action`: the [`GovernedAction`] under evaluation; threaded into the
///   [`crate::governance::GovernanceRefusal`] envelope so refusal Display
///   includes the action verb without the caller having to wrap.
/// - `namespace`: target namespace; attached to the refusal envelope.
/// - `memory_owner`: the existing memory's `metadata.agent_id` (delete/promote paths).
///   Pass `None` for store operations.
/// - `namespace_owner`: the `metadata.agent_id` of the namespace's standard memory,
///   used as the "owner" for store operations. Resolved once by the caller.
///
/// #963 Phase 2 — `Deny` returns a typed
/// [`crate::governance::GovernanceRefusal`]. The `reason` field carries
/// the human-readable phrase WITHOUT the `"governance: "` prefix (the
/// envelope's `Display` adds the `"<action> denied by governance: "`
/// header). Pre-#963 the same path produced
/// `Deny(format!("governance: ..."))` which doubled the prefix when
/// consumers re-wrapped via `deny_message`.
fn evaluate_level(
    conn: &Connection,
    action: GovernedAction,
    namespace: &str,
    level: &GovernanceLevel,
    agent_id: &str,
    memory_owner: Option<&str>,
    namespace_owner: Option<&str>,
) -> GovernanceDecision {
    use crate::governance::GovernanceRefusal;
    match level {
        GovernanceLevel::Any => GovernanceDecision::Allow,
        GovernanceLevel::Registered => {
            if is_registered_agent(conn, agent_id) {
                GovernanceDecision::Allow
            } else {
                GovernanceDecision::Deny(
                    GovernanceRefusal::new(
                        action,
                        GovernanceLevel::Registered,
                        agent_id,
                        format!("caller '{agent_id}' is not a registered agent"),
                    )
                    .with_namespace(namespace),
                )
            }
        }
        GovernanceLevel::Owner => {
            let owner = memory_owner.or(namespace_owner);
            match owner {
                Some(o) if o == agent_id => GovernanceDecision::Allow,
                Some(o) => GovernanceDecision::Deny(
                    GovernanceRefusal::new(
                        action,
                        GovernanceLevel::Owner,
                        agent_id,
                        format!("caller '{agent_id}' is not the owner ('{o}')"),
                    )
                    .with_namespace(namespace)
                    .with_owner(o),
                ),
                None => GovernanceDecision::Deny(
                    GovernanceRefusal::new(
                        action,
                        GovernanceLevel::Owner,
                        agent_id,
                        "owner-level action has no resolvable owner",
                    )
                    .with_namespace(namespace),
                ),
            }
        }
        GovernanceLevel::Approve => {
            // Caller translates this into a queued pending_action — the enforcement
            // helpers below own the queueing so the db layer is the single source
            // of truth for pending ids.
            GovernanceDecision::Pending(String::new())
        }
    }
}

/// Resolve the namespace-owner (`metadata.agent_id` of the namespace's
/// standard memory) used for `Owner`-level store checks.
///
/// **F1 (v0.7.0 round-2-fixes):** the lookup now walks the inheritance
/// chain leaf-first via [`build_namespace_chain`], returning the
/// `agent_id` of the first standard memory found. This mirrors
/// [`resolve_governance_policy`]'s semantics so that when a deep child
/// inherits a parent's `governance.write = owner` policy, the owner
/// check resolves to the parent's standard owner — matching operator
/// intuition that the helper means "owner of the effective policy at
/// this namespace".
///
/// Without this walk, deep children with no standard of their own
/// triggered `governance: owner-level action has no resolvable owner`
/// despite the parent's policy being correctly inherited.
fn namespace_owner(conn: &Connection, namespace: &str) -> Option<String> {
    // build_namespace_chain returns top-down (`["*", root, ..., leaf]`).
    // We want leaf-first so the most-specific owner wins, matching how
    // resolve_governance_policy picks up the most-specific policy.
    let chain = build_namespace_chain(conn, namespace);
    for level in chain.into_iter().rev() {
        let Some(standard_id) = get_namespace_standard(conn, &level).ok().flatten() else {
            continue;
        };
        let Some(mem) = get(conn, &standard_id).ok().flatten() else {
            continue;
        };
        if let Some(owner) = mem
            .metadata
            .get("agent_id")
            .and_then(|v| v.as_str())
            .map(str::to_string)
        {
            return Some(owner);
        }
    }
    None
}

/// Enforce governance for a `GovernedAction`. On [`GovernanceDecision::Pending`],
/// a row is inserted into `pending_actions` and the returned `pending_id` is
/// embedded in the decision.
///
/// v0.7.0 K3 — the gate now consults
/// [`crate::config::active_permissions_mode`] and branches on the
/// active [`crate::config::PermissionsMode`]:
///
/// - [`PermissionsMode::Off`]: skip the gate entirely. Returns `Allow`
///   without touching `resolve_governance_policy` or `pending_actions`.
/// - [`PermissionsMode::Advisory`]: resolve the policy, log any
///   would-be `Deny`/`Pending` outcome at `WARN`, then return `Allow`.
///   No `pending_actions` row is queued. This is the v0.7.0 default —
///   it preserves the v0.6.x posture for upgrading operators where
///   governance metadata was advertised but the wider permission
///   system was honest-disclosed as advisory.
/// - [`PermissionsMode::Enforce`]: the historical strict path.
///   `Deny`/`Pending` decisions surface verbatim and the
///   `pending_actions` row is queued. Audit-ready posture; opt in via
///   `[permissions] mode = "enforce"` in `config.toml`.
///
/// Every consult increments the per-mode counter exposed via
/// [`crate::config::permissions_decision_counts`] so doctor +
/// capabilities can surface gate activity.
///
/// [`PermissionsMode`]: crate::config::PermissionsMode
pub fn enforce_governance(
    conn: &Connection,
    action: GovernedAction,
    namespace: &str,
    agent_id: &str,
    memory_id: Option<&str>,
    memory_owner: Option<&str>,
    payload: &serde_json::Value,
) -> Result<GovernanceDecision> {
    use crate::config::{PermissionsMode, active_permissions_mode, record_permissions_decision};

    let mode = active_permissions_mode();
    record_permissions_decision(mode);

    // K3 — `Off` short-circuits before any policy lookup.
    if mode == PermissionsMode::Off {
        return Ok(GovernanceDecision::Allow);
    }

    // Opt-in enforcement: namespaces without an explicit policy are unaffected.
    let Some(policy) = resolve_governance_policy(conn, namespace) else {
        return Ok(GovernanceDecision::Allow);
    };
    // #880 — `write`/`delete`/`promote` live on `policy.core` after
    // the governance decomposition.
    let level = match action {
        GovernedAction::Store => &policy.core.write,
        GovernedAction::Delete => &policy.core.delete,
        GovernedAction::Promote => &policy.core.promote,
        // v0.7.0 L1-8: Reflect is gated by the L1-8 approval mechanism
        // (`require_approval_above_depth`) in the MCP handler rather than
        // the standard `enforce_governance` pipeline. Map to `write`
        // as the conservative fallback so the arm compiles; in practice
        // no current callsite passes `GovernedAction::Reflect` here.
        GovernedAction::Reflect => &policy.core.write,
    };
    let ns_owner = if matches!(action, GovernedAction::Store) {
        namespace_owner(conn, namespace)
    } else {
        None
    };

    let decision = evaluate_level(
        conn,
        action,
        namespace,
        level,
        agent_id,
        memory_owner,
        ns_owner.as_deref(),
    );

    // K3 — `Advisory` logs the would-be outcome but does not block or
    // queue a pending row. The capabilities surface continues to
    // advertise `permissions.mode = "advisory"` so external integrators
    // see the consistent posture.
    if mode == PermissionsMode::Advisory {
        match &decision {
            GovernanceDecision::Allow => {}
            GovernanceDecision::Deny(refusal) => {
                tracing::warn!(
                    target: "ai_memory::governance",
                    namespace = %namespace,
                    agent_id = %agent_id,
                    action = ?action,
                    reason = %refusal.reason,
                    denied_level = %refusal.denied_level.as_str(),
                    "permissions.mode=advisory: would-deny suppressed (allowing)"
                );
            }
            GovernanceDecision::Pending(_) => {
                tracing::warn!(
                    target: "ai_memory::governance",
                    namespace = %namespace,
                    agent_id = %agent_id,
                    action = ?action,
                    "permissions.mode=advisory: would-queue-approval suppressed (allowing)"
                );
            }
        }
        return Ok(GovernanceDecision::Allow);
    }

    // K3 — `Enforce`: the historical strict path. `Pending` queues a
    // `pending_actions` row and returns the canonical id.
    if let GovernanceDecision::Pending(_) = decision {
        let pending_id =
            queue_pending_action(conn, action, namespace, memory_id, agent_id, payload)?;
        return Ok(GovernanceDecision::Pending(pending_id));
    }
    Ok(decision)
}

/// Insert a `pending_actions` row and return its id.
pub fn queue_pending_action(
    conn: &Connection,
    action: GovernedAction,
    namespace: &str,
    memory_id: Option<&str>,
    requested_by: &str,
    payload: &serde_json::Value,
) -> Result<String> {
    let id = uuid::Uuid::new_v4().to_string();
    let now = Utc::now().to_rfc3339();
    let payload_json = serde_json::to_string(payload)?;
    conn.execute(
        "INSERT INTO pending_actions (id, action_type, memory_id, namespace, payload, requested_by, requested_at, status)
         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, 'pending')",
        params![
            id,
            action.as_str(),
            memory_id,
            namespace,
            payload_json,
            requested_by,
            now,
        ],
    )?;
    Ok(id)
}

/// v0.6.2 (S34): upsert a `pending_actions` row from a canonical `PendingAction`
/// struct — used by `sync_push` to apply a peer-originated pending row so
/// governance state is cluster-consistent. Preserves `approvals` and
/// decision fields verbatim so re-plays converge. Uses `INSERT ... ON
/// CONFLICT(id) DO UPDATE` because the originator's id is stable across
/// peers (unlike `queue_pending_action` which mints a fresh UUID per
/// queue call).
pub fn upsert_pending_action(conn: &Connection, pa: &PendingAction) -> Result<()> {
    let payload_json = serde_json::to_string(&pa.payload)?;
    let approvals_json = serde_json::to_string(&pa.approvals)?;
    conn.execute(
        "INSERT INTO pending_actions
         (id, action_type, memory_id, namespace, payload, requested_by,
          requested_at, status, decided_by, decided_at, approvals)
         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)
         ON CONFLICT(id) DO UPDATE SET
            action_type  = excluded.action_type,
            memory_id    = excluded.memory_id,
            namespace    = excluded.namespace,
            payload      = excluded.payload,
            requested_by = excluded.requested_by,
            requested_at = excluded.requested_at,
            status       = excluded.status,
            decided_by   = excluded.decided_by,
            decided_at   = excluded.decided_at,
            approvals    = excluded.approvals",
        params![
            pa.id,
            pa.action_type,
            pa.memory_id,
            pa.namespace,
            payload_json,
            pa.requested_by,
            pa.requested_at,
            pa.status,
            pa.decided_by,
            pa.decided_at,
            approvals_json,
        ],
    )?;
    Ok(())
}

pub fn list_pending_actions(
    conn: &Connection,
    status: Option<&str>,
    limit: usize,
) -> Result<Vec<PendingAction>> {
    let mut stmt = conn.prepare(
        "SELECT id, action_type, memory_id, namespace, payload, requested_by,
                requested_at, status, decided_by, decided_at, approvals
         FROM pending_actions
         WHERE (?1 IS NULL OR status = ?1)
         ORDER BY requested_at DESC
         LIMIT ?2",
    )?;
    let rows = stmt.query_map(params![status, limit], |row| {
        let payload_str: String = row.get(4)?;
        let payload: serde_json::Value =
            serde_json::from_str(&payload_str).unwrap_or(serde_json::Value::Null);
        let approvals_str: String = row.get(10)?;
        let approvals: Vec<Approval> = serde_json::from_str(&approvals_str).unwrap_or_default();
        Ok(PendingAction {
            id: row.get(0)?,
            action_type: row.get(1)?,
            memory_id: row.get(2)?,
            namespace: row.get(3)?,
            payload,
            requested_by: row.get(5)?,
            requested_at: row.get(6)?,
            status: row.get(7)?,
            decided_by: row.get(8)?,
            decided_at: row.get(9)?,
            approvals,
        })
    })?;
    rows.collect::<rusqlite::Result<Vec<_>>>()
        .map_err(Into::into)
}

pub fn get_pending_action(conn: &Connection, id: &str) -> Result<Option<PendingAction>> {
    let row = conn.query_row(
        "SELECT id, action_type, memory_id, namespace, payload, requested_by,
                requested_at, status, decided_by, decided_at, approvals
         FROM pending_actions WHERE id = ?1",
        params![id],
        |row| {
            let payload_str: String = row.get(4)?;
            let payload: serde_json::Value =
                serde_json::from_str(&payload_str).unwrap_or(serde_json::Value::Null);
            let approvals_str: String = row.get(10)?;
            let approvals: Vec<Approval> = serde_json::from_str(&approvals_str).unwrap_or_default();
            Ok(PendingAction {
                id: row.get(0)?,
                action_type: row.get(1)?,
                memory_id: row.get(2)?,
                namespace: row.get(3)?,
                payload,
                requested_by: row.get(5)?,
                requested_at: row.get(6)?,
                status: row.get(7)?,
                decided_by: row.get(8)?,
                decided_at: row.get(9)?,
                approvals,
            })
        },
    );
    match row {
        Ok(p) => Ok(Some(p)),
        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
        Err(e) => Err(e.into()),
    }
}

/// Mark a pending action as approved or rejected. Returns true on status
/// transition. Does NOT execute the action itself — the caller replays
/// the payload on approval (the db layer doesn't know how to execute
/// cross-interface write semantics).
///
/// v0.7.0 S5-M2 — on a successful deny transition this function appends a
/// `pending_action.denied` row to `signed_events` so the audit chain
/// captures every governance refusal alongside the approval and timeout
/// events. The emit is best-effort: failure is logged but does NOT roll
/// back the decision write (operators inspecting the audit chain see a
/// gap rather than losing the underlying decision).
pub fn decide_pending_action(
    conn: &Connection,
    id: &str,
    approve: bool,
    decided_by: &str,
) -> Result<bool> {
    let new_status = if approve { "approved" } else { "rejected" };
    let now = Utc::now().to_rfc3339();
    let updated = conn.execute(
        "UPDATE pending_actions SET status = ?1, decided_by = ?2, decided_at = ?3
         WHERE id = ?4 AND status = 'pending'",
        params![new_status, decided_by, now, id],
    )?;
    // S5-M2: emit a `pending_action.denied` audit row when the transition
    // landed and the decision is a deny. Approve emits later (after
    // execution) so the audit row captures the post-execute state — see
    // `execute_pending_action`.
    if updated > 0 && !approve {
        if let Ok(Some(pa)) = get_pending_action(conn, id) {
            emit_pending_action_event(conn, &pa, "pending_action.denied", Some(decided_by));
        }
    }
    Ok(updated > 0)
}

/// v0.7.0 S5-M1/M2 — append a `pending_action.<state>` row to
/// `signed_events` so the audit chain captures every governance
/// decision transition (approve / deny / timeout).
///
/// `event_type` is one of:
/// - `"pending_action.approved"` (emitted from `execute_pending_action`
///   after a successful execute)
/// - `"pending_action.denied"` (emitted from `decide_pending_action`
///   on a deny transition)
/// - `"pending_action.timed_out"` (emitted from
///   `sweep_pending_action_timeouts` per expired row)
///
/// The CBOR payload encodes `(pending_id, action_type, namespace,
/// requested_by, decided_by, status, timestamp)` so a downstream
/// auditor can replay decision provenance without re-reading the
/// (mutable) `pending_actions` table.
///
/// Best-effort: any encode / append failure is logged at WARN; the
/// caller's primary mutation MUST NOT roll back on audit failure.
/// Mirrors the same posture as `memory_link.invalidated` emit (the
/// audit chain is allowed to gap, the underlying write is not).
fn emit_pending_action_event(
    conn: &Connection,
    pa: &PendingAction,
    event_type: &str,
    decided_by_override: Option<&str>,
) {
    // Build the canonical CBOR payload. We sort keys via a BTreeMap so
    // the encoding is stable across releases — the SHA-256 over these
    // bytes is the audit chain's commitment to the decision shape.
    // Mirrors the encoding pattern used by `identity::sign::canonical_cbor`
    // (ciborium + BTreeMap-ordered keys) so the audit chain stays
    // canonicalized across emit sites.
    use std::collections::BTreeMap;
    let decided_by = decided_by_override
        .map(str::to_string)
        .or_else(|| pa.decided_by.clone())
        .unwrap_or_default();
    let timestamp = Utc::now().to_rfc3339();
    let mut map: BTreeMap<&str, ciborium::Value> = BTreeMap::new();
    map.insert(
        field_names::PENDING_ID,
        ciborium::Value::Text(pa.id.clone()),
    );
    map.insert(
        field_names::ACTION_TYPE,
        ciborium::Value::Text(pa.action_type.clone()),
    );
    map.insert("namespace", ciborium::Value::Text(pa.namespace.clone()));
    map.insert(
        field_names::REQUESTED_BY,
        ciborium::Value::Text(pa.requested_by.clone()),
    );
    map.insert(
        field_names::DECIDED_BY,
        ciborium::Value::Text(decided_by.clone()),
    );
    map.insert("status", ciborium::Value::Text(pa.status.clone()));
    map.insert("timestamp", ciborium::Value::Text(timestamp.clone()));
    let entries: Vec<(ciborium::Value, ciborium::Value)> = map
        .into_iter()
        .map(|(k, v)| (ciborium::Value::Text(k.to_string()), v))
        .collect();
    let value = ciborium::Value::Map(entries);
    let mut cbor: Vec<u8> = Vec::with_capacity(128);
    if let Err(e) = ciborium::ser::into_writer(&value, &mut cbor) {
        tracing::warn!(
            target: crate::signed_events::SIGNED_EVENTS_TRACE_TARGET,
            pending_id = %pa.id,
            event_type,
            "failed to encode canonical CBOR for pending_action event: {e}"
        );
        return;
    }

    // Audit row's `agent_id` field: the decision actor (decider) for
    // approve / deny, the requester for the requester-less timeout
    // path (no human/agent decided — the sweeper transitioned the
    // row, so the "actor" is the originating requester).
    let agent_id = if event_type == "pending_action.timed_out" {
        pa.requested_by.clone()
    } else {
        decided_by
    };

    // v0.7.0 #1099 (SR-1 #4, HIGH) — sign pending_action audit rows
    // with the daemon's installed signing key when one is available.
    // Pre-#1099 every pending_action.{approved,rejected,timed_out}
    // row landed with `signature: None, attest_level: "unsigned"`
    // even when the daemon had loaded a signing key — breaking the
    // procurement-grade tamper-evidence claim on the approval audit
    // trail. Falls back to (None, "unsigned") cleanly when no key
    // is installed (legacy posture).
    let event = crate::signed_events::SignedEvent::with_daemon_signature(
        crate::signed_events::payload_hash(&cbor),
        agent_id,
        event_type.to_string(),
        timestamp,
    );
    if let Err(e) = crate::signed_events::append_signed_event(conn, &event) {
        tracing::warn!(
            target: crate::signed_events::SIGNED_EVENTS_TRACE_TARGET,
            pending_id = %pa.id,
            event_type,
            "failed to append pending_action audit row: {e}"
        );
    }
}

/// v0.7.0 S5-H4 — extract `metadata.agent_id` from a pending-action
/// store/reflect payload and verify it matches `pa.requested_by`.
///
/// The S5 audit caught an approver-on-behalf laundering hole: a caller
/// could queue a `pending_action` with `requested_by = "alice"` but
/// embed a payload whose `metadata.agent_id = "bob"`, and on execute
/// the new memory would land attributed to bob — the approver, not the
/// requester, was attributing the write. This helper closes the gap by
/// requiring the payload's claimed agent to equal the pending row's
/// `requested_by`. If the payload omits an agent_id, we treat that as
/// a match (older callers may not have populated the field; the
/// substrate still records `pa.requested_by` as the canonical attributor
/// and the memory's `metadata.agent_id` gets stamped from there).
///
/// The check fires only on payload shapes that carry an agent_id —
/// today: `store` (full Memory JSON) and `reflect` (the L1-8 payload
/// that includes `agent_id`). `delete` / `promote` payloads do not
/// carry an agent_id (the action is attributed to `pa.requested_by`
/// directly), so this function returns `Ok(())` on those.
fn verify_payload_agent_id(pa: &PendingAction) -> Result<()> {
    let payload_agent_id = pa
        .payload
        .get("agent_id")
        .and_then(serde_json::Value::as_str)
        .or_else(|| {
            pa.payload
                .get("metadata")
                .and_then(|m| m.get("agent_id"))
                .and_then(serde_json::Value::as_str)
        });
    if let Some(claimed) = payload_agent_id
        && claimed != pa.requested_by
    {
        // #962 typed envelope — ApproverLaundering maps to 403 FORBIDDEN
        // via MemoryError::RefusedByGovernance (S5-H4 contract).
        return Err(anyhow::Error::new(StorageError::ApproverLaundering {
            pending_id: pa.id.clone(),
            claimed: claimed.to_string(),
            requester: pa.requested_by.clone(),
        }));
    }
    Ok(())
}

/// Task 1.10 — outcome of an approver-aware approve call.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ApproveOutcome {
    /// #1620 — no pending row with this id exists. Maps to 404 on
    /// every surface; pre-#1620 this collapsed into `Rejected` and
    /// surfaced as 403 on sqlite while postgres returned 404 for the
    /// same probe.
    NotFound,
    /// Approver check failed; policy identifies the reason.
    Rejected(String),
    /// Consensus quorum not yet met; vote recorded.
    Pending { votes: usize, quorum: u32 },
    /// Fully approved (Human single-step, matching Agent, or consensus
    /// threshold met). Caller may now replay the payload via
    /// `execute_pending_action`.
    Approved,
}

/// Task 1.10 — approver-type aware approve. Enforces the
/// `metadata.governance.approver` of the pending action's namespace.
pub fn approve_with_approver_type(
    conn: &Connection,
    pending_id: &str,
    approver_agent_id: &str,
) -> Result<ApproveOutcome> {
    let Some(pa) = get_pending_action(conn, pending_id)? else {
        // #1620 — typed NotFound (was Rejected → 403; postgres 404'd).
        return Ok(ApproveOutcome::NotFound);
    };
    if pa.status != "pending" {
        return Ok(ApproveOutcome::Rejected(format!(
            "already decided: status={}",
            pa.status
        )));
    }
    // Resolve the namespace's approver type. If no policy, default to Human —
    // which accepts any approval (back-compat with 1.9 callers).
    // #880 — `approver` lives on `policy.core` after the governance
    // decomposition.
    let approver = resolve_governance_policy(conn, &pa.namespace)
        .map_or(ApproverType::Human, |p| p.core.approver);

    match approver {
        ApproverType::Human => {
            let ok = decide_pending_action(conn, pending_id, true, approver_agent_id)?;
            if ok {
                Ok(ApproveOutcome::Approved)
            } else {
                Ok(ApproveOutcome::Rejected(
                    crate::errors::msg::DECISION_WRITE_FAILED.into(),
                ))
            }
        }
        ApproverType::Agent(required) => {
            if approver_agent_id != required {
                return Ok(ApproveOutcome::Rejected(format!(
                    "designated approver is '{required}'; got '{approver_agent_id}'"
                )));
            }
            let ok = decide_pending_action(conn, pending_id, true, approver_agent_id)?;
            if ok {
                Ok(ApproveOutcome::Approved)
            } else {
                Ok(ApproveOutcome::Rejected(
                    crate::errors::msg::DECISION_WRITE_FAILED.into(),
                ))
            }
        }
        ApproverType::Consensus(quorum) => {
            // Issue #216: a single caller could previously satisfy any
            // Consensus(n) quorum by varying the unauthenticated `agent_id`
            // (`alice`, `bob`, `Alice`/`alice` were three distinct votes).
            // Two changes harden the path:
            //   1. Require each voter to be a registered agent — raises the
            //      bar from "claim any string" to "operator pre-registered
            //      this id". Combined with auth on the approve endpoint
            //      (operator-deployed) this gives a real multi-party gate.
            //   2. Canonicalize the agent_id to lowercase for both the
            //      duplicate-vote check and storage so case-variants of the
            //      same id collapse to a single vote.
            if !is_registered_agent(conn, approver_agent_id) {
                return Ok(ApproveOutcome::Rejected(format!(
                    "consensus voter '{approver_agent_id}' is not a registered agent"
                )));
            }
            let canonical_id = approver_agent_id.to_ascii_lowercase();
            let mut approvals = pa.approvals.clone();
            if approvals
                .iter()
                .any(|a| a.agent_id.eq_ignore_ascii_case(&canonical_id))
            {
                return Ok(ApproveOutcome::Pending {
                    votes: approvals.len(),
                    quorum,
                });
            }
            approvals.push(Approval {
                agent_id: canonical_id.clone(),
                approved_at: Utc::now().to_rfc3339(),
            });
            let approvals_json = serde_json::to_string(&approvals)?;
            conn.execute(
                "UPDATE pending_actions SET approvals = ?1 WHERE id = ?2 AND status = 'pending'",
                params![approvals_json, pending_id],
            )?;
            let votes = approvals.len();
            if u32::try_from(votes).unwrap_or(u32::MAX) >= quorum {
                // Threshold met — transition status so the caller can replay.
                let ok = decide_pending_action(conn, pending_id, true, &canonical_id)?;
                if ok {
                    return Ok(ApproveOutcome::Approved);
                }
                return Ok(ApproveOutcome::Rejected(
                    "decision write failed at consensus threshold".into(),
                ));
            }
            Ok(ApproveOutcome::Pending { votes, quorum })
        }
    }
}

/// Task 1.10 — Execute an approved pending action's payload. Callers invoke
/// this after `approve_with_approver_type` returns `Approved`. Returns the
/// affected memory id (new id for store, existing id for delete/promote).
///
/// v0.7.0 S5-H1 — adds a `"reflect"` arm so an approved deep-reflection
/// queued by the L1-8 MCP gate (see `mcp::tools::reflect`) actually lands
/// instead of erroring out as "unknown action_type". The arm reconstructs
/// the original [`ReflectInput`] from the queued payload and replays it
/// through [`reflect`], inheriting the same depth-cap / source-resolution
/// checks the direct write path runs.
///
/// v0.7.0 S5-H4 — every arm runs [`verify_payload_agent_id`] BEFORE the
/// side-effecting mutation so an approver cannot launder a payload whose
/// embedded `agent_id` disagrees with the original requester (the
/// `pending_actions.requested_by` column). The refusal is a hard
/// `MemoryError::Validation`-shaped anyhow bail; on refusal we emit a
/// `pending_action.refused_agent_id_mismatch` audit row so the laundering
/// attempt is captured by the signed_events chain.
///
/// v0.7.0 S5-M1 — on a successful execute the function appends a
/// `pending_action.approved` row to `signed_events` (the deny + timeout
/// emits live in `decide_pending_action` and
/// `sweep_pending_action_timeouts` respectively, so the three governance
/// transitions are audit-complete together).
pub fn execute_pending_action(conn: &Connection, pending_id: &str) -> Result<Option<String>> {
    let Some(pa) = get_pending_action(conn, pending_id)? else {
        // #962 typed envelope — 404 NOT_FOUND.
        return Err(anyhow::Error::new(StorageError::PendingActionNotFound {
            pending_id: pending_id.to_string(),
        }));
    };
    if pa.status != "approved" {
        // #962 typed envelope — 409 CONFLICT (action is in the wrong state).
        return Err(anyhow::Error::new(
            StorageError::PendingActionStateInvalid {
                pending_id: pending_id.to_string(),
                status: pa.status.clone(),
            },
        ));
    }
    // S5-H4: refuse approver-on-behalf laundering BEFORE the side-effecting
    // write. Emit an audit row on refusal so the laundering attempt is
    // captured by the signed_events chain even when the substrate
    // bails the execute.
    if let Err(e) = verify_payload_agent_id(&pa) {
        emit_pending_action_event(conn, &pa, "pending_action.refused_agent_id_mismatch", None);
        return Err(e);
    }
    let memory_id = match pa.action_type.as_str() {
        "store" => {
            let mut mem: Memory = serde_json::from_value(pa.payload.clone()).map_err(|e| {
                // #962 typed envelope.
                anyhow::Error::new(StorageError::InvalidArgument {
                    reason: format!("invalid store payload: {e}"),
                })
            })?;
            // Stamp fresh id + timestamps so the execution is idempotent on replay.
            mem.id = uuid::Uuid::new_v4().to_string();
            let now = Utc::now().to_rfc3339();
            mem.created_at.clone_from(&now);
            mem.updated_at = now;
            mem.access_count = 0;
            let actual_id = insert(conn, &mem)?;
            Some(actual_id)
        }
        "delete" => {
            if let Some(mid) = pa.memory_id.clone() {
                delete(conn, &mid)?;
                Some(mid)
            } else {
                None
            }
        }
        "promote" => {
            if let Some(mid) = pa.memory_id.clone() {
                if let Some(to_ns) = pa
                    .payload
                    .get(field_names::TO_NAMESPACE)
                    .and_then(|v| v.as_str())
                {
                    // Vertical promotion to ancestor.
                    let clone_id = promote_to_namespace(conn, &mid, to_ns)?;
                    Some(clone_id)
                } else {
                    // Tier bump to long + clear expiry.
                    let (_found, _changed) = update(
                        conn,
                        &mid,
                        None,
                        None,
                        Some(&Tier::Long),
                        None,
                        None,
                        None,
                        None,
                        Some(""),
                        None,
                    )?;
                    Some(mid)
                }
            } else {
                None
            }
        }
        "reflect" => execute_reflect_from_payload(conn, &pa)?,
        other => {
            // #962 typed envelope.
            return Err(anyhow::Error::new(StorageError::InvalidArgument {
                reason: format!("unknown action_type: {other}"),
            }));
        }
    };
    // S5-M1: emit the approve audit row after the side-effecting write
    // succeeded so the audit chain reflects the post-execute state. The
    // emit is best-effort (warn-only) so an audit-side failure does not
    // roll back the governance decision.
    emit_pending_action_event(
        conn,
        &pa,
        "pending_action.approved",
        pa.decided_by.as_deref(),
    );
    Ok(memory_id)
}

/// v0.7.0 S5-H1 — replay an approved reflect pending action through
/// [`reflect`]. Factored out of [`execute_pending_action`] so the arm
/// stays focused on payload deserialization + the substrate call, and
/// so the unit test (`test_execute_reflect_arm_succeeds_round_trip`)
/// can exercise the helper without duplicating the wrapper logic.
///
/// Payload shape (mirrors what `mcp::tools::reflect` queued in L1-8):
///
/// ```json
/// {
///   "source_ids": ["…", "…"],
///   "title": "…",
///   "content": "…",
///   "namespace": "…",
///   "tier": "mid",
///   "tags": ["…"],
///   "priority": 5,
///   "confidence": 1.0,
///   "agent_id": "…",
///   "proposed_depth": 3,
///   "metadata": { … }
/// }
/// ```
///
/// All fields are optional except `source_ids`, `title`, and `content`
/// (the substrate validator rejects empty values, so missing keys
/// surface as a `Validation` error rather than a panic).
fn execute_reflect_from_payload(conn: &Connection, pa: &PendingAction) -> Result<Option<String>> {
    let payload = &pa.payload;
    let source_ids: Vec<String> = payload
        .get(field_names::SOURCE_IDS)
        .and_then(|v| v.as_array())
        .map(|arr| {
            arr.iter()
                .filter_map(|v| v.as_str().map(str::to_string))
                .collect()
        })
        .unwrap_or_default();
    if source_ids.is_empty() {
        // #962 typed envelope.
        return Err(anyhow::Error::new(StorageError::InvalidArgument {
            reason: "invalid reflect payload: source_ids missing or empty".to_string(),
        }));
    }
    let title = payload
        .get("title")
        .and_then(|v| v.as_str())
        .ok_or_else(|| {
            // #962 typed envelope.
            anyhow::Error::new(StorageError::InvalidArgument {
                reason: "invalid reflect payload: title missing".to_string(),
            })
        })?
        .to_string();
    let content = payload
        .get("content")
        .and_then(|v| v.as_str())
        .ok_or_else(|| {
            // #962 typed envelope.
            anyhow::Error::new(StorageError::InvalidArgument {
                reason: "invalid reflect payload: content missing".to_string(),
            })
        })?
        .to_string();
    let namespace = payload
        .get("namespace")
        .and_then(|v| v.as_str())
        .map(str::to_string)
        .or_else(|| Some(pa.namespace.clone()));
    let tier = payload
        .get("tier")
        .and_then(|v| v.as_str())
        .and_then(Tier::from_str)
        .unwrap_or(Tier::Mid);
    let tags: Vec<String> = payload
        .get("tags")
        .and_then(|v| v.as_array())
        .map(|arr| {
            arr.iter()
                .filter_map(|v| v.as_str().map(str::to_string))
                .collect()
        })
        .unwrap_or_default();
    let priority = i32::try_from(
        payload
            .get("priority")
            .and_then(|v| v.as_i64())
            .unwrap_or(5),
    )
    .unwrap_or(5);
    let confidence = payload
        .get(field_names::CONFIDENCE)
        .and_then(|v| v.as_f64())
        .unwrap_or(1.0);
    // Use the queued payload's agent_id when present (already verified
    // to match `pa.requested_by` by `verify_payload_agent_id`), else
    // fall back to `pa.requested_by` — the substrate stamps the value
    // onto `metadata.agent_id` so attribution stays consistent.
    let agent_id = payload
        .get("agent_id")
        .and_then(|v| v.as_str())
        .map(str::to_string)
        .unwrap_or_else(|| pa.requested_by.clone());
    let metadata = payload
        .get("metadata")
        .cloned()
        .unwrap_or_else(|| serde_json::json!({}));

    let input = crate::storage::reflect::ReflectInput {
        source_ids,
        title,
        content,
        namespace,
        tier,
        tags,
        priority,
        confidence,
        // v0.7.x (issue #1175): vendor-neutral substrate default.
        // Mirrors the MCP-side default at `src/mcp/tools/reflect.rs`
        // — see the comment there for the heterogeneous-NHI rationale.
        // Vendor identity stays in `metadata.agent_id`.
        source: crate::validate::DEFAULT_NHI_SOURCE.to_string(),
        agent_id,
        metadata,
    };
    let outcome = crate::storage::reflect::reflect(conn, &input)
        .map_err(|e| anyhow::anyhow!("reflect execute failed: {e}"))?;
    Ok(Some(outcome.id))
}

/// Check if a memory ID is a namespace standard (used by consolidate to warn).
pub fn is_namespace_standard(conn: &Connection, id: &str) -> bool {
    conn.query_row(
        "SELECT COUNT(*) FROM namespace_meta WHERE standard_id = ?1",
        params![id],
        |r| r.get::<_, i64>(0),
    )
    .unwrap_or(0)
        > 0
}

/// v0.6.3 (capabilities schema v2): count namespace standards whose
/// `metadata.governance` is non-null. A "rule" here means a namespace
/// has an explicit governance policy attached to its standard memory.
/// The count is a transparent passthrough — the full permission system
/// arrives in v0.7 (arch-enhancement-spec §3).
pub fn count_active_governance_rules(conn: &Connection) -> Result<usize> {
    let count: i64 = conn
        .query_row(
            "SELECT COUNT(*) FROM memories m
             INNER JOIN namespace_meta nm ON nm.standard_id = m.id
             WHERE json_extract(m.metadata, '$.governance') IS NOT NULL",
            [],
            |r| r.get(0),
        )
        .unwrap_or(0);
    Ok(usize::try_from(count.max(0)).unwrap_or(0))
}

/// v0.7.0 K5 — enumerate every namespace whose standard memory carries an
/// explicit `metadata.governance` policy and return `(namespace, policy)`
/// pairs sorted lexicographically by namespace.
///
/// Companion to [`count_active_governance_rules`] (which returns just the
/// count). Powers the `permissions.rule_summary` field surfaced by
/// capabilities v3 — the K5 increment closes the v0.6.3.1 honesty
/// disclosure that the field was previously dropped from the wire because
/// no per-rule serializer existed.
///
/// Rows whose `metadata.governance` payload fails to round-trip through
/// `GovernancePolicy::from_metadata` are silently skipped — the
/// capabilities surface is best-effort and a malformed policy must not
/// take down the entire response. The wider gate
/// (`enforce_governance` → `read_namespace_policy`) already swallows the
/// same parse failures, so the surfaces stay consistent.
///
/// # Errors
///
/// Returns `Err` only on hard SQLite failures (e.g. table missing); the
/// row-level parse failures noted above are handled internally.
pub fn list_active_governance_policies(
    conn: &Connection,
) -> Result<Vec<(String, GovernancePolicy)>> {
    // Pull the raw `(namespace, metadata)` tuples for every namespace
    // whose standard memory has a non-null `metadata.governance`. We
    // ORDER BY at the SQL layer so the lex sort comes free and the
    // caller doesn't have to re-sort.
    let mut stmt = conn.prepare(
        "SELECT nm.namespace, m.metadata
         FROM namespace_meta nm
         INNER JOIN memories m ON m.id = nm.standard_id
         WHERE json_extract(m.metadata, '$.governance') IS NOT NULL
         ORDER BY nm.namespace ASC",
    )?;
    let rows = stmt.query_map([], |r| {
        let ns: String = r.get(0)?;
        let meta_str: String = r.get(1)?;
        Ok((ns, meta_str))
    })?;

    let mut out = Vec::new();
    for row in rows.flatten() {
        let (ns, meta_str) = row;
        // Parse the metadata blob; skip rows that don't deserialize.
        let Ok(meta) = serde_json::from_str::<serde_json::Value>(&meta_str) else {
            continue;
        };
        // `from_metadata` returns `None` when the field is missing/null
        // (the SQL filter already excludes that path) and
        // `Some(Err(_))` on a malformed policy payload — skip both.
        match GovernancePolicy::from_metadata(&meta) {
            Some(Ok(policy)) => out.push((ns, policy)),
            _ => continue,
        }
    }
    Ok(out)
}

/// v0.6.3 (capabilities schema v2): count rows in the `subscriptions`
/// table. Used by `handle_capabilities` as a proxy for "registered
/// hooks" — the hook pipeline itself is v0.7 Bucket 0 work.
pub fn count_subscriptions(conn: &Connection) -> Result<usize> {
    let count: i64 = conn
        .query_row("SELECT COUNT(*) FROM subscriptions", [], |r| r.get(0))
        .unwrap_or(0);
    Ok(usize::try_from(count.max(0)).unwrap_or(0))
}

/// v0.6.3 (capabilities schema v2): count `pending_actions` rows whose
/// `status` matches the predicate. Used by `handle_capabilities` to
/// surface live approval queue depth.
pub fn count_pending_actions_by_status(conn: &Connection, status: &str) -> Result<usize> {
    let count: i64 = conn
        .query_row(
            "SELECT COUNT(*) FROM pending_actions WHERE status = ?1",
            params![status],
            |r| r.get(0),
        )
        .unwrap_or(0);
    Ok(usize::try_from(count.max(0)).unwrap_or(0))
}

/// v0.7.0 K2 — pending_actions timeout sweeper.
///
/// Scans `pending_actions` for `status='pending'` rows whose age exceeds
/// the per-row `default_timeout_seconds` (or `global_default_secs` when
/// the per-row column is NULL). Transitions matching rows to
/// `status='expired'` and stamps `expired_at = now`.
///
/// Returns the list of `(id, namespace)` tuples that were just expired
/// so the caller can fan out approval-decision events. Empty queue is a
/// silent no-op.
///
/// Closes the v0.6.3.1 honest-Capabilities-v2 disclosure that
/// `default_timeout_seconds` was previously advertised but unused (the
/// v2 honesty patch had dropped it from the wire shape; K2 ships the
/// backing sweeper so the field is meaningful again).
///
/// # Errors
///
/// Returns `Err` only on hard SQLite failures (e.g. table missing).
pub fn sweep_pending_action_timeouts(
    conn: &Connection,
    global_default_secs: i64,
) -> Result<Vec<(String, String)>> {
    // Step 1 — find candidates. We compute age in SQL via julianday()
    // arithmetic so the sweep is index-friendly and avoids parsing
    // every `requested_at` row in Rust. The composite index
    // `idx_pending_status_requested` (added in migration v21) keeps
    // the planner from full-scanning the table.
    //
    // The `default_timeout_seconds` column is nullable; rows with NULL
    // fall back to `global_default_secs`. A non-positive global default
    // disables the sweeper entirely (operator escape hatch).
    if global_default_secs <= 0 {
        return Ok(Vec::new());
    }
    let mut stmt = conn.prepare(
        "SELECT id, namespace FROM pending_actions
         WHERE status = 'pending'
           AND (julianday('now') - julianday(requested_at)) * 86400.0
               > COALESCE(default_timeout_seconds, ?1)",
    )?;
    let rows: Vec<(String, String)> = stmt
        .query_map(params![global_default_secs], |row| {
            Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
        })?
        .collect::<rusqlite::Result<Vec<_>>>()?;
    if rows.is_empty() {
        return Ok(Vec::new());
    }

    // Step 2 — flip status='expired' + stamp expired_at. We update
    // row-by-row inside a single transaction so a failure mid-batch
    // rolls back cleanly. The WHERE clause re-checks status='pending'
    // so a concurrent decide_pending_action wins (its decision is
    // not overwritten).
    let now = Utc::now().to_rfc3339();
    let tx_savepoint = conn.unchecked_transaction()?;
    {
        let mut update = tx_savepoint.prepare(
            "UPDATE pending_actions
             SET status = 'expired', expired_at = ?1
             WHERE id = ?2 AND status = 'pending'",
        )?;
        for (id, _) in &rows {
            update.execute(params![now, id])?;
        }
    }
    tx_savepoint.commit()?;
    // v0.7.0 S5-M2 — emit a `pending_action.timed_out` audit row per
    // expired pending row so the audit chain captures the timeout
    // transition alongside approve / deny. Best-effort: a missing
    // pending row or audit failure is logged at WARN; the sweep
    // itself has already committed.
    for (id, _) in &rows {
        if let Ok(Some(pa)) = get_pending_action(conn, id) {
            emit_pending_action_event(conn, &pa, "pending_action.timed_out", None);
        }
    }
    Ok(rows)
}

// ---------------------------------------------------------------------------
// `ai-memory doctor` (P7 / R7) — query helpers.
// ---------------------------------------------------------------------------
//
// These read-only helpers back the `ai-memory doctor` CLI subcommand. Each
// query is a single indexed `COUNT(*)` (or close to it) so the reporter can
// run an entire health pass without holding the DB lock long enough to
// block live writers.
//
// Surfaces consumed:
// - `count_dim_violations` reads the post-P2 `embedding_dim` column when
//   present and gracefully reports `Ok(None)` on pre-P2 schemas (the column
//   doesn't exist yet on `release/v0.6.3`).
// - `count_index_evictions` reads the post-P3 `index_evictions_total` global
//   counter when wired (there is no schema-level surface today; it returns
//   `Ok(None)` so the doctor can render a "not yet observed" line).
// - `count_oldest_pending_action_age_secs` is portable today and reports the
//   age of the oldest `pending` row in seconds.
// - `count_governance_chain_depth` walks `parent_namespace` for each
//   namespace_meta row to estimate the inheritance depth distribution
//   the P4 enforcer will eventually consume.

/// Count rows whose `embedding_dim` (post-P2) does not match the modal
/// dim within their namespace. On pre-P2 schemas the `embedding_dim`
/// column doesn't exist; the function returns `Ok(None)` so the doctor
/// can render "not yet observed (pre-P2 schema)".
///
/// # Errors
///
/// Returns `Err` only on hard SQLite failures — a missing column is
/// reported as `Ok(None)`, not an error.
pub fn doctor_dim_violations(conn: &Connection) -> Result<Option<usize>> {
    let has_dim = conn
        .prepare("SELECT embedding_dim FROM memories LIMIT 0")
        .is_ok();
    if !has_dim {
        return Ok(None);
    }
    // For each namespace, find the modal dim (most-frequent non-null value)
    // and count rows whose dim differs from it. Rows with NULL dim but a
    // non-empty embedding count as violations too — they are mid-migration.
    let n: i64 = conn
        .query_row(
            "WITH per_ns_modes AS (
                 SELECT namespace, embedding_dim, COUNT(*) AS c
                 FROM memories
                 WHERE embedding IS NOT NULL AND embedding_dim IS NOT NULL
                 GROUP BY namespace, embedding_dim
             ),
             ranked AS (
                 SELECT namespace, embedding_dim,
                        ROW_NUMBER() OVER (PARTITION BY namespace ORDER BY c DESC) AS rn
                 FROM per_ns_modes
             ),
             modes AS (
                 SELECT namespace, embedding_dim AS modal_dim
                 FROM ranked WHERE rn = 1
             )
             SELECT COUNT(*)
             FROM memories m
             LEFT JOIN modes mo ON mo.namespace = m.namespace
             WHERE m.embedding IS NOT NULL
               AND (m.embedding_dim IS NULL
                    OR (mo.modal_dim IS NOT NULL AND m.embedding_dim != mo.modal_dim))",
            [],
            |r| r.get(0),
        )
        .unwrap_or(0);
    Ok(Some(usize::try_from(n.max(0)).unwrap_or(0)))
}

/// Age in seconds of the oldest `pending` row in `pending_actions`, or
/// `None` if the queue is empty (or the column is unparseable). The
/// doctor uses this to flag a backlog older than 24h as critical.
///
/// # Errors
///
/// Returns `Err` only on hard SQLite failures (e.g. missing table).
pub fn doctor_oldest_pending_age_secs(conn: &Connection) -> Result<Option<i64>> {
    let row: Option<String> = conn
        .query_row(
            "SELECT requested_at FROM pending_actions WHERE status = 'pending'
             ORDER BY requested_at ASC LIMIT 1",
            [],
            |r| r.get(0),
        )
        .ok();
    let Some(ts) = row else {
        return Ok(None);
    };
    let Ok(parsed) = chrono::DateTime::parse_from_rfc3339(&ts) else {
        return Ok(None);
    };
    // M11 (v0.7.0 round-2) — clamp negative ages to 0. `requested_at`
    // is stamped by the writer's clock; on a host with skewed time
    // (NTP slewing back, intentional misconfiguration, or VM time
    // travel) `now - parsed` can land negative and downstream
    // consumers (the doctor surface treats this as "age in seconds")
    // would surface a nonsensical figure. The WARN gives operators
    // the signal so they can investigate the clock drift instead of
    // chasing a phantom backlog.
    let raw_age = (Utc::now() - parsed.with_timezone(&Utc)).num_seconds();
    let age = if raw_age < 0 {
        tracing::warn!(
            requested_at = %ts,
            raw_age_seconds = raw_age,
            "pending_actions row has future timestamp; clamping age to 0"
        );
        0
    } else {
        raw_age
    };
    Ok(Some(age))
}

/// Count of namespaces that have a standard registered with a non-null
/// `metadata.governance` block, and the count without (just a standard
/// memory but no policy attached).
///
/// # Errors
///
/// Returns `Err` only on hard SQLite failures.
pub fn doctor_governance_coverage(conn: &Connection) -> Result<(usize, usize)> {
    let with_policy: i64 = conn
        .query_row(
            "SELECT COUNT(*) FROM memories m
             INNER JOIN namespace_meta nm ON nm.standard_id = m.id
             WHERE json_extract(m.metadata, '$.governance') IS NOT NULL",
            [],
            |r| r.get(0),
        )
        .unwrap_or(0);
    let total_meta: i64 = conn
        .query_row("SELECT COUNT(*) FROM namespace_meta", [], |r| r.get(0))
        .unwrap_or(0);
    let with = usize::try_from(with_policy.max(0)).unwrap_or(0);
    let total = usize::try_from(total_meta.max(0)).unwrap_or(0);
    Ok((with, total.saturating_sub(with)))
}

/// Distribution of the `parent_namespace` chain depth across
/// `namespace_meta` rows. Returns a Vec where index `i` is the count of
/// namespaces with chain depth `i` (depth 0 = no parent).
///
/// Walks each row's `parent_namespace` chain up to a hard cap of 16 to
/// avoid runaway loops on malformed data. Rows whose chain exceeds the
/// cap are bucketed at the cap.
///
/// # Errors
///
/// Returns `Err` only on hard SQLite failures.
pub fn doctor_governance_depth_distribution(conn: &Connection) -> Result<Vec<usize>> {
    const MAX_DEPTH: usize = 16;
    let mut stmt = conn.prepare("SELECT namespace, parent_namespace FROM namespace_meta")?;
    let rows = stmt.query_map([], |r| {
        Ok((r.get::<_, String>(0)?, r.get::<_, Option<String>>(1)?))
    })?;
    let parent_map: HashMap<String, Option<String>> = rows
        .filter_map(rusqlite::Result::ok)
        .collect::<HashMap<_, _>>();
    let mut hist = vec![0_usize; MAX_DEPTH + 1];
    for ns in parent_map.keys() {
        let mut depth = 0_usize;
        let mut cur = parent_map.get(ns).cloned().flatten();
        while let Some(p) = cur {
            depth += 1;
            if depth >= MAX_DEPTH {
                break;
            }
            cur = parent_map.get(&p).cloned().flatten();
        }
        let bucket = depth.min(MAX_DEPTH);
        hist[bucket] += 1;
    }
    Ok(hist)
}

/// Sum of `subscriptions.dispatch_count` and `subscriptions.failure_count`
/// across all rows. Returns `(dispatched, failed)`. Used by the doctor to
/// estimate webhook delivery success rate.
///
/// # Errors
///
/// Returns `Err` only on hard SQLite failures.
pub fn doctor_webhook_delivery_totals(conn: &Connection) -> Result<(u64, u64)> {
    let dispatched: i64 = conn
        .query_row(
            "SELECT COALESCE(SUM(dispatch_count), 0) FROM subscriptions",
            [],
            |r| r.get(0),
        )
        .unwrap_or(0);
    let failed: i64 = conn
        .query_row(
            "SELECT COALESCE(SUM(failure_count), 0) FROM subscriptions",
            [],
            |r| r.get(0),
        )
        .unwrap_or(0);
    Ok((
        u64::try_from(dispatched.max(0)).unwrap_or(0),
        u64::try_from(failed.max(0)).unwrap_or(0),
    ))
}

/// Maximum sync-clock skew in seconds across the `sync_state` table —
/// the largest gap between `last_pulled_at` (when this peer last heard
/// from a peer) and `last_seen_at` (the peer's own `updated_at` advance).
/// Returns `Ok(None)` when `sync_state` is empty or the columns are
/// missing on a pre-T3 schema.
///
/// # Errors
///
/// Returns `Err` only on hard SQLite failures.
// ---------------------------------------------------------------------
// v0.6.4-009 — capability-expansion audit log
// ---------------------------------------------------------------------

/// Single audit_log row (capability-expansion shape — extensible).
#[derive(Debug, Clone)]
pub struct CapabilityExpansionRow {
    pub id: String,
    pub agent_id: Option<String>,
    pub event_type: String,
    pub requested_family: Option<String>,
    pub granted: bool,
    pub attestation_tier: Option<String>,
    pub timestamp: String,
}

/// Record a capability-expansion attempt. Used by
/// `handle_capabilities_family` after the allowlist decision is made.
/// Records BOTH grant and deny outcomes so operators can see attempted
/// access patterns even when the gate refused.
///
/// `granted=true` means the agent received the schemas; `granted=false`
/// means the agent was denied or the family was unknown.
///
/// Best-effort: a failed insert (e.g., disk full) is logged via tracing
/// but does not propagate the error to the caller — the audit trail
/// must never block the actual call.
pub fn record_capability_expansion(
    conn: &Connection,
    agent_id: Option<&str>,
    family: &str,
    granted: bool,
    attestation_tier: Option<&str>,
) {
    let id = uuid::Uuid::new_v4().to_string();
    let now = Utc::now().to_rfc3339();
    let result = conn.execute(
        "INSERT INTO audit_log (id, agent_id, event_type, requested_family, \
         granted, attestation_tier, timestamp) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
        rusqlite::params![
            id,
            agent_id,
            "capability_expansion",
            family,
            i32::from(granted),
            attestation_tier,
            now,
        ],
    );
    if let Err(e) = result {
        tracing::warn!(
            "audit_log insert failed (capability_expansion / agent={:?} / family={}): {e}",
            agent_id,
            family,
        );
    }
}

/// List recent capability-expansion rows, newest first. `limit` clamps
/// the row count.
pub fn list_capability_expansions(
    conn: &Connection,
    limit: usize,
    agent_filter: Option<&str>,
) -> Result<Vec<CapabilityExpansionRow>> {
    let n = (limit.min(10_000)) as i64;
    let map_row = |r: &rusqlite::Row<'_>| -> rusqlite::Result<CapabilityExpansionRow> {
        Ok(CapabilityExpansionRow {
            id: r.get(0)?,
            agent_id: r.get(1)?,
            event_type: r.get(2)?,
            requested_family: r.get(3)?,
            granted: r.get::<_, i64>(4)? != 0,
            attestation_tier: r.get(5)?,
            timestamp: r.get(6)?,
        })
    };
    if let Some(a) = agent_filter {
        let mut stmt = conn.prepare(
            "SELECT id, agent_id, event_type, requested_family, granted, \
             attestation_tier, timestamp FROM audit_log \
             WHERE event_type = 'capability_expansion' AND agent_id = ?1 \
             ORDER BY timestamp DESC LIMIT ?2",
        )?;
        let rows = stmt.query_map(rusqlite::params![a, n], map_row)?;
        rows.collect::<Result<Vec<_>, _>>().map_err(Into::into)
    } else {
        let mut stmt = conn.prepare(
            "SELECT id, agent_id, event_type, requested_family, granted, \
             attestation_tier, timestamp FROM audit_log \
             WHERE event_type = 'capability_expansion' \
             ORDER BY timestamp DESC LIMIT ?1",
        )?;
        let rows = stmt.query_map(rusqlite::params![n], map_row)?;
        rows.collect::<Result<Vec<_>, _>>().map_err(Into::into)
    }
}

pub fn doctor_max_sync_skew_secs(conn: &Connection) -> Result<Option<i64>> {
    let mut stmt = match conn.prepare(
        "SELECT last_seen_at, last_pulled_at FROM sync_state WHERE last_pulled_at IS NOT NULL",
    ) {
        Ok(s) => s,
        Err(_) => return Ok(None),
    };
    let rows = stmt.query_map([], |r| Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?)))?;
    let mut max_skew: Option<i64> = None;
    for row in rows {
        let Ok((seen, pulled)) = row else { continue };
        let Ok(s) = chrono::DateTime::parse_from_rfc3339(&seen) else {
            continue;
        };
        let Ok(p) = chrono::DateTime::parse_from_rfc3339(&pulled) else {
            continue;
        };
        let skew = (s.with_timezone(&Utc) - p.with_timezone(&Utc))
            .num_seconds()
            .abs();
        max_skew = Some(max_skew.map_or(skew, |m| m.max(skew)));
    }
    Ok(max_skew)
}

// ---------------------------------------------------------------------------
// L1-4 — Reflection-depth telemetry for `ai-memory doctor`.
// ---------------------------------------------------------------------------

/// One namespace's reflection-depth distribution row returned by
/// [`doctor_reflection_depth_distribution`].
///
/// The four depth buckets mirror the default `max_reflection_depth=3`
/// cap: depth 0 (direct memories), depth 1, depth 2, depth 3+. Depth
/// 3+ is collapsed into a single counter because depths beyond the cap
/// are impossible to store under standard policy; the bucket exists so
/// future schemas with raised caps still produce a non-zero column.
pub struct ReflectionDepthRow {
    pub namespace: String,
    pub depth0: i64,
    pub depth1: i64,
    pub depth2: i64,
    pub depth3_plus: i64,
    pub avg_depth: f64,
    pub max_depth: i64,
    pub total: i64,
}

/// Depth distribution across all namespaces that hold at least one
/// memory with `reflection_depth > 0`, plus the `_global_` aggregate.
///
/// Uses a single GROUP BY pass so the query is a single indexed scan
/// over `memories.reflection_depth`. A fresh DB (all rows at depth 0)
/// returns an empty `Vec` — the caller (doctor) renders that as
/// "no reflections observed".
///
/// # Errors
///
/// Returns `Err` only on hard SQLite failures (e.g. the `memories`
/// table does not exist yet — pre-migration schemas).
pub fn doctor_reflection_depth_distribution(conn: &Connection) -> Result<Vec<ReflectionDepthRow>> {
    // Aggregate per namespace, only namespaces that contain at least
    // one reflected memory (depth > 0). The doctor renders a global
    // summary from the returned rows; the SQL avoids a second pass by
    // letting the caller roll up the namespace rows.
    let mut stmt = conn.prepare(
        "SELECT
             namespace,
             SUM(CASE WHEN reflection_depth = 0 THEN 1 ELSE 0 END),
             SUM(CASE WHEN reflection_depth = 1 THEN 1 ELSE 0 END),
             SUM(CASE WHEN reflection_depth = 2 THEN 1 ELSE 0 END),
             SUM(CASE WHEN reflection_depth >= 3 THEN 1 ELSE 0 END),
             AVG(CAST(reflection_depth AS REAL)),
             MAX(reflection_depth),
             COUNT(*)
         FROM memories
         GROUP BY namespace
         HAVING MAX(reflection_depth) > 0
         ORDER BY namespace",
    )?;
    let rows = stmt.query_map([], |r| {
        Ok(ReflectionDepthRow {
            namespace: r.get(0)?,
            depth0: r.get(1)?,
            depth1: r.get(2)?,
            depth2: r.get(3)?,
            depth3_plus: r.get(4)?,
            avg_depth: r.get(5)?,
            max_depth: r.get(6)?,
            total: r.get(7)?,
        })
    })?;
    let mut out = Vec::new();
    for row in rows {
        out.push(row?);
    }
    Ok(out)
}

/// Count of `reflection.depth_exceeded` audit events in `signed_events`
/// within a given look-back window.
///
/// `since_rfc3339` is an RFC 3339 timestamp; only events with
/// `timestamp >= since_rfc3339` are counted. Pass the epoch
/// (`"1970-01-01T00:00:00Z"`) to count all-time.
///
/// Returns `0` when the `signed_events` table does not exist (pre-H5
/// schemas) rather than propagating the error, matching the pattern
/// in other doctor helpers.
///
/// # Errors
///
/// Returns `Err` only on hard query failures (table exists but query
/// is malformed — should not happen in practice).
pub fn doctor_reflection_depth_exceeded_count(
    conn: &Connection,
    since_rfc3339: &str,
) -> Result<i64> {
    let n: i64 = conn
        .query_row(
            "SELECT COUNT(*) FROM signed_events
             WHERE event_type = 'reflection.depth_exceeded'
               AND timestamp >= ?1",
            params![since_rfc3339],
            |r| r.get(0),
        )
        .unwrap_or(0);
    Ok(n)
}

/// Reflection totals per namespace: memories created in the last 24h,
/// 7d, and all-time that have `reflection_depth > 0`.
///
/// Returns one tuple `(ns, last_24h, last_7d, all_time)` per
/// namespace that has at least one reflected memory. Namespaces with
/// no reflections are omitted; the caller renders "no reflections" for
/// the global summary.
///
/// # Errors
///
/// Returns `Err` on hard SQLite failures.
pub fn doctor_reflection_totals_by_namespace(
    conn: &Connection,
) -> Result<Vec<(String, i64, i64, i64)>> {
    let now = Utc::now();
    let last_day_cutoff = (now - chrono::Duration::hours(24)).to_rfc3339();
    let cutoff_7d = (now - chrono::Duration::days(7)).to_rfc3339();

    let mut stmt = conn.prepare(
        "SELECT
             namespace,
             SUM(CASE WHEN created_at >= ?1 THEN 1 ELSE 0 END),
             SUM(CASE WHEN created_at >= ?2 THEN 1 ELSE 0 END),
             COUNT(*)
         FROM memories
         WHERE reflection_depth > 0
         GROUP BY namespace
         ORDER BY namespace",
    )?;
    let rows = stmt.query_map(params![last_day_cutoff, cutoff_7d], |r| {
        Ok((
            r.get::<_, String>(0)?,
            r.get::<_, i64>(1)?,
            r.get::<_, i64>(2)?,
            r.get::<_, i64>(3)?,
        ))
    })?;
    let mut out = Vec::new();
    for row in rows {
        out.push(row?);
    }
    Ok(out)
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::models::{MID_TTL_EXTEND_SECS, Memory, SHORT_TTL_EXTEND_SECS, Tier};

    fn test_db() -> Connection {
        open(std::path::Path::new(":memory:")).unwrap()
    }

    /// Insert a minimal memory row with an explicit `updated_at` so the
    /// federation-catchup tests can control the range boundary. Only the
    /// NOT-NULL/no-default columns are specified; everything else falls to
    /// the schema defaults (which `row_to_memory` reads cleanly).
    fn insert_memory_at(conn: &Connection, id: &str, updated_at: &str) {
        conn.execute(
            "INSERT INTO memories (id, tier, namespace, title, content, created_at, updated_at) \
             VALUES (?1, 'mid', 'ns', ?1, 'content body', ?2, ?2)",
            params![id, updated_at],
        )
        .expect("insert memory row");
    }

    #[test]
    fn memories_updated_since_sargable_split_none_and_some_paths() {
        // #1476 — the OR-NULL predicate was split into a None path (no
        // predicate, ORDER BY updated_at ASC) and a Some path (strict
        // `updated_at > ?1`). Pin the behavioral contract of both branches
        // so the sargable rewrite can never silently change which rows a
        // peer catchup observes.
        let conn = test_db();
        let t1 = "2026-01-01T00:00:00+00:00";
        let t2 = "2026-01-02T00:00:00+00:00";
        let t3 = "2026-01-03T00:00:00+00:00";
        // Insert out of order to prove ORDER BY actually sorts.
        insert_memory_at(&conn, "b", t2);
        insert_memory_at(&conn, "c", t3);
        insert_memory_at(&conn, "a", t1);

        // None path: every row, ascending by updated_at.
        let all = memories_updated_since(&conn, None, 100).expect("none path");
        let ids: Vec<&str> = all.iter().map(|m| m.id.as_str()).collect();
        assert_eq!(
            ids,
            vec!["a", "b", "c"],
            "None path: all rows ASC by updated_at"
        );

        // Some path is STRICTLY greater — the boundary row (t1) is excluded.
        let after_t1 = memories_updated_since(&conn, Some(t1), 100).expect("some path");
        let ids: Vec<&str> = after_t1.iter().map(|m| m.id.as_str()).collect();
        assert_eq!(
            ids,
            vec!["b", "c"],
            "Some(t1): strict > excludes the boundary row"
        );

        // Past the newest row → empty.
        let after_t3 = memories_updated_since(&conn, Some(t3), 100).expect("some path empty");
        assert!(
            after_t3.is_empty(),
            "Some(t3): nothing strictly newer than the max"
        );

        // LIMIT caps from the low end of the range (oldest-first under ASC).
        let one = memories_updated_since(&conn, Some(t1), 1).expect("some path limited");
        let ids: Vec<&str> = one.iter().map(|m| m.id.as_str()).collect();
        assert_eq!(
            ids,
            vec!["b"],
            "Some(t1) LIMIT 1: oldest row strictly after t1"
        );
    }

    #[test]
    fn memories_updated_since_uses_updated_at_index() {
        // #1476 — the sargable Some path must resolve through
        // `idx_memories_updated_at`, not a full table scan. Assert the
        // query plan references the index via EXPLAIN QUERY PLAN.
        let conn = test_db();
        let mut stmt = conn
            .prepare(
                "EXPLAIN QUERY PLAN \
                 SELECT id FROM memories WHERE updated_at > ?1 \
                 ORDER BY updated_at ASC LIMIT ?2",
            )
            .expect("prepare explain");
        let plan: String = stmt
            .query_map(params!["2026-01-01T00:00:00+00:00", 10_i64], |r| {
                r.get::<_, String>(3)
            })
            .expect("explain rows")
            .map(|r| r.expect("explain detail"))
            .collect::<Vec<_>>()
            .join(" | ");
        assert!(
            plan.contains("idx_memories_updated_at"),
            "sargable catchup query must use idx_memories_updated_at; plan was: {plan}"
        );
    }

    #[test]
    fn perf_8_hierarchy_in_clause_cache_hits_on_repeat() {
        // PERF-8 — verify cached fragment matches the freshly-
        // computed value byte-equal. Cache invalidation isn't part
        // of the public contract (ancestors are deterministic on
        // the namespace input), so a cache hit must be wire-equal
        // to a cold compute.
        hierarchy_cache_clear_for_tests();
        let ns = Some("alphaone/team/alice");
        let (a, active_a) = hierarchy_in_clause(ns);
        let (b, active_b) = hierarchy_in_clause(ns);
        assert!(active_a && active_b);
        assert_eq!(
            a, b,
            "PERF-8: cached hierarchy_in_clause result drift on second lookup",
        );
        assert!(
            a.expect("non-None fragment")
                .contains("AND m.namespace IN ("),
            "PERF-8: fragment shape regressed",
        );
    }

    #[test]
    fn perf_8_hierarchy_cache_handles_non_hierarchical_ns() {
        // Non-hierarchical namespaces (no `/`) MUST short-circuit
        // before touching the cache so the cache only stores the
        // legitimate entries.
        hierarchy_cache_clear_for_tests();
        let (frag, active) = hierarchy_in_clause(Some("global"));
        assert_eq!(frag, None);
        assert!(!active);
    }

    #[test]
    fn perf_8_hierarchy_cache_bounded_under_pressure() {
        // Filling the cache past HIERARCHY_CACHE_MAX must not
        // unbounded-grow it; eviction kicks in beyond the cap.
        hierarchy_cache_clear_for_tests();
        for i in 0..(HIERARCHY_CACHE_MAX * 2) {
            let ns = format!("tenant{i}/sub");
            let _ = hierarchy_in_clause(Some(&ns));
        }
        let cache_len = hierarchy_cache().lock().unwrap().len();
        assert!(
            cache_len <= HIERARCHY_CACHE_MAX,
            "PERF-8: hierarchy cache grew unbounded: {cache_len} > {HIERARCHY_CACHE_MAX}",
        );
    }

    /// v0.7.0 #981 — `get_many` batches the SELECTs the semantic-phase
    /// HNSW recall branch previously issued per-id. This test pins:
    ///   1. Empty `ids` short-circuits to an empty map without touching
    ///      the connection.
    ///   2. All requested + existing rows land in the result map.
    ///   3. Missing ids are silently dropped (no error, no panic) —
    ///      the caller observes via `map.get(&id).is_none()`.
    ///   4. Order doesn't matter — `IN (...)` is unordered; callers
    ///      that need original ordering re-apply via the hit list.
    ///   5. Chunking >500 ids still returns every row.
    #[test]
    fn get_many_batches_and_handles_empty_missing_and_chunked_inputs_981() {
        let conn = test_db();
        // Seed 3 rows.
        let m1 = make_memory("alpha", "ns/a", Tier::Long, 5);
        let m2 = make_memory("beta", "ns/b", Tier::Long, 5);
        let m3 = make_memory("gamma", "ns/c", Tier::Long, 5);
        insert(&conn, &m1).unwrap();
        insert(&conn, &m2).unwrap();
        insert(&conn, &m3).unwrap();

        // (1) Empty input.
        assert!(get_many(&conn, &[]).unwrap().is_empty());

        // (2) Existing ids.
        let ids = vec![m1.id.clone(), m2.id.clone()];
        let got = get_many(&conn, &ids).unwrap();
        assert_eq!(got.len(), 2);
        assert!(got.contains_key(&m1.id));
        assert!(got.contains_key(&m2.id));
        assert!(!got.contains_key(&m3.id));

        // (3) Mixed existing + missing — missing silently dropped.
        let mixed = vec![m1.id.clone(), "nope-not-a-real-id".to_string()];
        let got = get_many(&conn, &mixed).unwrap();
        assert_eq!(got.len(), 1);
        assert!(got.contains_key(&m1.id));

        // (4) Order doesn't matter — IN clause is set-like.
        let reversed = vec![m3.id.clone(), m2.id.clone(), m1.id.clone()];
        let got = get_many(&conn, &reversed).unwrap();
        assert_eq!(got.len(), 3);
        for id in &reversed {
            assert!(got.contains_key(id), "id {id} missing from set-fetch");
        }

        // (5) Chunked >500 ids still returns every row.
        let mut bulk: Vec<Memory> = Vec::with_capacity(750);
        let mut bulk_ids: Vec<String> = Vec::with_capacity(750);
        for i in 0..750 {
            let m = make_memory(&format!("bulk-{i}"), "ns/bulk", Tier::Long, 1);
            insert(&conn, &m).unwrap();
            bulk_ids.push(m.id.clone());
            bulk.push(m);
        }
        let got = get_many(&conn, &bulk_ids).unwrap();
        assert_eq!(
            got.len(),
            750,
            "chunked fetch >500 must still return every row",
        );
    }

    fn make_memory(title: &str, ns: &str, tier: Tier, priority: i32) -> Memory {
        let now = chrono::Utc::now().to_rfc3339();
        Memory {
            id: uuid::Uuid::new_v4().to_string(),
            tier: tier.clone(),
            namespace: ns.to_string(),
            title: title.to_string(),
            content: format!("Content for {title}"),
            tags: vec![],
            priority,
            confidence: 1.0,
            source: "test".to_string(),
            access_count: 0,
            created_at: now.clone(),
            updated_at: now,
            last_accessed_at: None,
            expires_at: tier
                .default_ttl_secs()
                .map(|s| (chrono::Utc::now() + chrono::Duration::seconds(s)).to_rfc3339()),
            metadata: serde_json::json!({}),
            reflection_depth: 0,
            memory_kind: crate::models::MemoryKind::Observation,
            entity_id: None,
            persona_version: None,
            citations: Vec::new(),
            source_uri: None,
            source_span: None,
            confidence_source: ConfidenceSource::CallerProvided,
            confidence_signals: None,
            confidence_decayed_at: None,
            version: 1,
        }
    }

    fn mem_with_scope(ns: &str, scope: Option<&str>) -> Memory {
        let mut m = make_memory("scoped", ns, Tier::Long, 5);
        if let Some(s) = scope {
            let mut map = serde_json::Map::new();
            map.insert(
                crate::META_KEY_SCOPE.to_string(),
                serde_json::Value::String(s.to_string()),
            );
            m.metadata = serde_json::Value::Object(map);
        }
        m
    }

    // Pins the Rust-side visibility predicate (`is_visible`) that the HNSW
    // recall branch uses when SQL-side visibility can't be attached. Exercises
    // every `MemoryScope` arm plus `matches_subtree`, which the integration
    // recall paths only hit for whichever scope the fixture corpus happens to
    // carry — leaving the other arms uncovered. Deterministic, no DB.
    #[test]
    fn is_visible_scope_matrix_covers_every_arm() {
        // No-agent caller (all-None prefixes) bypasses the filter entirely.
        let unfiltered = (None, None, None, None);
        assert!(super::is_visible(
            &mem_with_scope("acme/eng/web", Some("private")),
            &unfiltered
        ));

        // 4-level agent ns populates every prefix slot:
        // p=acme/eng/web/team, t=acme/eng/web, u=acme/eng, o=acme.
        let prefixes = super::compute_visibility_prefixes(Some("acme/eng/web/team"));
        assert_eq!(
            prefixes,
            (
                Some("acme/eng/web/team".to_string()),
                Some("acme/eng/web".to_string()),
                Some("acme/eng".to_string()),
                Some("acme".to_string()),
            )
        );

        // Collective: visible to anyone.
        assert!(super::is_visible(
            &mem_with_scope("zzz/other", Some("collective")),
            &prefixes
        ));

        // Private: only the caller's own namespace (p) is visible.
        assert!(super::is_visible(
            &mem_with_scope("acme/eng/web/team", Some("private")),
            &prefixes
        ));
        assert!(!super::is_visible(
            &mem_with_scope("acme/eng/web", Some("private")),
            &prefixes
        ));

        // Absent scope key → MemoryScope::default() (Private) semantics.
        assert!(super::is_visible(
            &mem_with_scope("acme/eng/web/team", None),
            &prefixes
        ));
        assert!(!super::is_visible(
            &mem_with_scope("acme/other", None),
            &prefixes
        ));

        // Team subtree (t = acme/eng/web): exact + descendant in, sibling out.
        assert!(super::is_visible(
            &mem_with_scope("acme/eng/web", Some("team")),
            &prefixes
        ));
        assert!(super::is_visible(
            &mem_with_scope("acme/eng/web/team/v2", Some("team")),
            &prefixes
        ));
        assert!(!super::is_visible(
            &mem_with_scope("acme/eng/api", Some("team")),
            &prefixes
        ));

        // Unit subtree (u = acme/eng).
        assert!(super::is_visible(
            &mem_with_scope("acme/eng", Some("unit")),
            &prefixes
        ));
        assert!(!super::is_visible(
            &mem_with_scope("acme/sales", Some("unit")),
            &prefixes
        ));

        // Org subtree (o = acme).
        assert!(super::is_visible(
            &mem_with_scope("acme", Some("org")),
            &prefixes
        ));
        assert!(!super::is_visible(
            &mem_with_scope("globex", Some("org")),
            &prefixes
        ));

        // matches_subtree None arm: a shallow agent leaves the org slot empty,
        // so an org-scoped memory is denied (no prefix to match against).
        let shallow = super::compute_visibility_prefixes(Some("acme"));
        assert_eq!(shallow.3, None);
        assert!(!super::is_visible(
            &mem_with_scope("acme", Some("org")),
            &shallow
        ));

        // Unknown scope string → from_str None → caller denied.
        assert!(!super::is_visible(
            &mem_with_scope("acme/eng/web/team", Some("definitely-not-a-scope")),
            &prefixes
        ));

        // None-agent → all-None tuple (the no-filter sentinel).
        assert_eq!(
            super::compute_visibility_prefixes(None),
            (None, None, None, None)
        );
    }

    #[test]
    fn open_creates_schema() {
        let conn = test_db();
        let count: i64 = conn
            .query_row("SELECT COUNT(*) FROM memories", [], |r| r.get(0))
            .unwrap();
        assert_eq!(count, 0);
    }

    #[test]
    fn insert_and_get() {
        let conn = test_db();
        let mem = make_memory("Test insert", "test", Tier::Long, 5);
        let id = insert(&conn, &mem).unwrap();
        let got = get(&conn, &id).unwrap().unwrap();
        assert_eq!(got.title, "Test insert");
        assert_eq!(got.namespace, "test");
        assert_eq!(got.priority, 5);
    }

    #[test]
    fn get_nonexistent() {
        let conn = test_db();
        let got = get(&conn, "nonexistent-id").unwrap();
        assert!(got.is_none());
    }

    // #1466 — write-path chokepoint regression. A non-Long memory handed
    // to any insert path with `expires_at: None` must land with a
    // tier-default expiry so GC (`expires_at IS NOT NULL AND expires_at <
    // now`) can eventually reap it; before the fix it landed NULL =
    // immortal. Long stays NULL; an explicit expiry is preserved.

    fn ttl_gap_secs(created_at: &str, expires_at: &str) -> i64 {
        let base = chrono::DateTime::parse_from_rfc3339(created_at).unwrap();
        let exp = chrono::DateTime::parse_from_rfc3339(expires_at).unwrap();
        (exp - base).num_seconds()
    }

    #[test]
    fn insert_backfills_mid_expiry_when_none() {
        let conn = test_db();
        let mut mem = make_memory("mid none", "test", Tier::Mid, 5);
        mem.expires_at = None;
        let id = insert(&conn, &mem).unwrap();
        let got = get(&conn, &id).unwrap().unwrap();
        let exp = got.expires_at.expect("mid must not land immortal");
        assert_eq!(ttl_gap_secs(&got.created_at, &exp), crate::SECS_PER_WEEK);
    }

    #[test]
    fn insert_backfills_short_expiry_when_none() {
        let conn = test_db();
        let mut mem = make_memory("short none", "test", Tier::Short, 5);
        mem.expires_at = None;
        let id = insert(&conn, &mem).unwrap();
        let got = get(&conn, &id).unwrap().unwrap();
        let exp = got.expires_at.expect("short must not land immortal");
        assert_eq!(
            ttl_gap_secs(&got.created_at, &exp),
            6 * crate::SECS_PER_HOUR
        );
    }

    #[test]
    fn insert_leaves_long_expiry_none() {
        let conn = test_db();
        let mut mem = make_memory("long none", "test", Tier::Long, 5);
        mem.expires_at = None;
        let id = insert(&conn, &mem).unwrap();
        let got = get(&conn, &id).unwrap().unwrap();
        assert!(got.expires_at.is_none(), "long has no TTL — must stay NULL");
    }

    #[test]
    fn insert_preserves_explicit_expiry() {
        let conn = test_db();
        let explicit = "2027-06-15T12:00:00+00:00".to_string();
        let mut mem = make_memory("mid explicit", "test", Tier::Mid, 5);
        mem.expires_at = Some(explicit.clone());
        let id = insert(&conn, &mem).unwrap();
        let got = get(&conn, &id).unwrap().unwrap();
        assert_eq!(got.expires_at, Some(explicit));
    }

    #[test]
    fn insert_with_conflict_backfills_mid_expiry_when_none() {
        let conn = test_db();
        let mut mem = make_memory("conflict mid", "test", Tier::Mid, 5);
        mem.expires_at = None;
        let id = insert_with_conflict(&conn, &mem, ConflictMode::Merge).unwrap();
        let got = get(&conn, &id).unwrap().unwrap();
        let exp = got.expires_at.expect("mid must not land immortal");
        assert_eq!(ttl_gap_secs(&got.created_at, &exp), crate::SECS_PER_WEEK);
    }

    #[test]
    fn insert_if_newer_backfills_mid_expiry_when_none() {
        let conn = test_db();
        let mut mem = make_memory("newer mid", "test", Tier::Mid, 5);
        mem.expires_at = None;
        let id = insert_if_newer(&conn, &mem).unwrap();
        let got = get(&conn, &id).unwrap().unwrap();
        let exp = got.expires_at.expect("mid must not land immortal");
        assert_eq!(ttl_gap_secs(&got.created_at, &exp), crate::SECS_PER_WEEK);
    }

    #[test]
    fn consolidate_backfills_mid_expiry() {
        let conn = test_db();
        let a = make_memory("src a", "test", Tier::Mid, 5);
        let b = make_memory("src b", "test", Tier::Mid, 5);
        let id_a = insert(&conn, &a).unwrap();
        let id_b = insert(&conn, &b).unwrap();
        let new_id = consolidate(
            &conn,
            &[id_a, id_b],
            "merged",
            "summary body",
            "test",
            &Tier::Mid,
            "test",
            "agent-x",
        )
        .unwrap();
        let got = get(&conn, &new_id).unwrap().unwrap();
        let exp = got
            .expires_at
            .expect("consolidated mid must not land immortal");
        assert_eq!(ttl_gap_secs(&got.created_at, &exp), crate::SECS_PER_WEEK);
    }

    #[test]
    fn update_partial_fields() {
        let conn = test_db();
        let mem = make_memory("Original", "test", Tier::Mid, 5);
        let id = insert(&conn, &mem).unwrap();

        let (found, content_changed) = update(
            &conn,
            &id,
            Some("Updated Title"),
            None,
            None,
            None,
            None,
            Some(9),
            None,
            None,
            None,
        )
        .unwrap();
        assert!(found);
        assert!(content_changed); // title changed

        let got = get(&conn, &id).unwrap().unwrap();
        assert_eq!(got.title, "Updated Title");
        assert_eq!(got.priority, 9);
        assert_eq!(got.content, mem.content); // unchanged
    }

    #[test]
    fn update_content_changed_flag() {
        let conn = test_db();
        let mem = make_memory("Stable", "test", Tier::Mid, 5);
        let id = insert(&conn, &mem).unwrap();

        // Updating only priority — content_changed should be false
        let (found, content_changed) = update(
            &conn,
            &id,
            None,
            None,
            None,
            None,
            None,
            Some(8),
            None,
            None,
            None,
        )
        .unwrap();
        assert!(found);
        assert!(!content_changed);

        // Updating content — content_changed should be true
        let (found, content_changed) = update(
            &conn,
            &id,
            None,
            Some("New content"),
            None,
            None,
            None,
            None,
            None,
            None,
            None,
        )
        .unwrap();
        assert!(found);
        assert!(content_changed);
    }

    #[test]
    fn update_nonexistent_returns_false() {
        let conn = test_db();
        let (found, _) = update(
            &conn,
            "bad-id",
            Some("New"),
            None,
            None,
            None,
            None,
            None,
            None,
            None,
            None,
        )
        .unwrap();
        assert!(!found);
    }

    #[test]
    fn update_tier_downgrade_protection() {
        let conn = test_db();
        // Long-tier memory should never be downgraded
        let mem = make_memory("Permanent", "test", Tier::Long, 9);
        let id = insert(&conn, &mem).unwrap();

        let (found, _) = update(
            &conn,
            &id,
            None,
            None,
            Some(&Tier::Short),
            None,
            None,
            None,
            None,
            None,
            None,
        )
        .unwrap();
        assert!(found);
        let got = get(&conn, &id).unwrap().unwrap();
        assert_eq!(got.tier, Tier::Long); // still long

        // Mid-tier should not downgrade to short
        let mem2 = make_memory("Working", "test", Tier::Mid, 5);
        let id2 = insert(&conn, &mem2).unwrap();

        let (found, _) = update(
            &conn,
            &id2,
            None,
            None,
            Some(&Tier::Short),
            None,
            None,
            None,
            None,
            None,
            None,
        )
        .unwrap();
        assert!(found);
        let got2 = get(&conn, &id2).unwrap().unwrap();
        assert_eq!(got2.tier, Tier::Mid); // still mid

        // Mid-tier CAN upgrade to long
        let (found, _) = update(
            &conn,
            &id2,
            None,
            None,
            Some(&Tier::Long),
            None,
            None,
            None,
            None,
            None,
            None,
        )
        .unwrap();
        assert!(found);
        let got3 = get(&conn, &id2).unwrap().unwrap();
        assert_eq!(got3.tier, Tier::Long); // upgraded
    }

    #[test]
    fn update_title_collision_returns_error() {
        let conn = test_db();
        let mem_a = make_memory("Alpha", "test", Tier::Mid, 5);
        let mem_b = make_memory("Beta", "test", Tier::Mid, 5);
        let id_a = insert(&conn, &mem_a).unwrap();
        let _id_b = insert(&conn, &mem_b).unwrap();

        // Updating Alpha's title to "Beta" in same namespace should fail
        let result = update(
            &conn,
            &id_a,
            Some("Beta"),
            None,
            None,
            None,
            None,
            None,
            None,
            None,
            None,
        );
        assert!(result.is_err());
        let err = result.unwrap_err().to_string();
        assert!(err.contains("already exists in namespace"));
    }

    #[test]
    fn delete_existing() {
        let conn = test_db();
        let mem = make_memory("To delete", "test", Tier::Short, 3);
        let id = insert(&conn, &mem).unwrap();
        assert!(delete(&conn, &id).unwrap());
        assert!(get(&conn, &id).unwrap().is_none());
    }

    #[test]
    fn delete_nonexistent() {
        let conn = test_db();
        assert!(!delete(&conn, "bad-id").unwrap());
    }

    #[test]
    fn list_with_namespace_filter() {
        let conn = test_db();
        insert(&conn, &make_memory("A", "ns1", Tier::Long, 5)).unwrap();
        insert(&conn, &make_memory("B", "ns2", Tier::Long, 5)).unwrap();
        insert(&conn, &make_memory("C", "ns1", Tier::Long, 5)).unwrap();

        let results = list(
            &conn,
            Some("ns1"),
            None,
            100,
            0,
            None,
            None,
            None,
            None,
            None,
        )
        .unwrap();
        assert_eq!(results.len(), 2);
    }

    #[test]
    fn list_with_tier_filter() {
        let conn = test_db();
        insert(&conn, &make_memory("Long", "test", Tier::Long, 5)).unwrap();
        insert(&conn, &make_memory("Mid", "test", Tier::Mid, 5)).unwrap();

        let results = list(
            &conn,
            None,
            Some(&Tier::Long),
            100,
            0,
            None,
            None,
            None,
            None,
            None,
        )
        .unwrap();
        assert_eq!(results.len(), 1);
        assert_eq!(results[0].title, "Long");
    }

    #[test]
    fn list_with_limit() {
        let conn = test_db();
        for i in 0..5 {
            insert(
                &conn,
                &make_memory(&format!("Mem {i}"), "test", Tier::Long, 5),
            )
            .unwrap();
        }
        let results = list(&conn, None, None, 3, 0, None, None, None, None, None).unwrap();
        assert_eq!(results.len(), 3);
    }

    #[test]
    fn search_keyword_match() {
        let conn = test_db();
        insert(
            &conn,
            &make_memory("PostgreSQL config", "test", Tier::Long, 5),
        )
        .unwrap();
        insert(&conn, &make_memory("Redis cache", "test", Tier::Long, 5)).unwrap();

        let results = search(
            &conn,
            "PostgreSQL",
            None,
            None,
            10,
            None,
            None,
            None,
            None,
            None,
            None,
            false,
        )
        .unwrap();
        assert_eq!(results.len(), 1);
        assert!(results[0].title.contains("PostgreSQL"));
    }

    #[test]
    fn search_no_match() {
        let conn = test_db();
        insert(&conn, &make_memory("PostgreSQL", "test", Tier::Long, 5)).unwrap();
        let results = search(
            &conn,
            "nonexistent_term_xyz",
            None,
            None,
            10,
            None,
            None,
            None,
            None,
            None,
            None,
            false,
        )
        .unwrap();
        assert_eq!(results.len(), 0);
    }

    #[test]
    fn recall_returns_scored() {
        let conn = test_db();
        insert(
            &conn,
            &make_memory("Rust programming language", "test", Tier::Long, 8),
        )
        .unwrap();
        insert(
            &conn,
            &make_memory("Python scripting", "test", Tier::Long, 5),
        )
        .unwrap();

        let (results, _tokens) = recall(
            &conn,
            "Rust programming",
            None,
            10,
            None,
            None,
            None,
            SHORT_TTL_EXTEND_SECS,
            MID_TTL_EXTEND_SECS,
            None,
            None,
            false,
            None,
        )
        .unwrap();
        assert!(!results.is_empty());
        // Score should be present
        let (mem, score) = &results[0];
        assert!(mem.title.contains("Rust"));
        assert!(*score > 0.0);
    }

    #[test]
    fn recall_empty_context() {
        let conn = test_db();
        insert(&conn, &make_memory("Test", "test", Tier::Long, 5)).unwrap();
        // Empty context should not crash
        let results = recall(
            &conn,
            "",
            None,
            10,
            None,
            None,
            None,
            SHORT_TTL_EXTEND_SECS,
            MID_TTL_EXTEND_SECS,
            None,
            None,
            false,
            None,
        );
        // May return empty or error, both acceptable
        assert!(results.is_ok() || results.is_err());
    }

    #[test]
    fn touch_increments_access_count() {
        let conn = test_db();
        let mem = make_memory("Touchable", "test", Tier::Mid, 5);
        let id = insert(&conn, &mem).unwrap();
        assert_eq!(get(&conn, &id).unwrap().unwrap().access_count, 0);

        touch(&conn, &id, SHORT_TTL_EXTEND_SECS, MID_TTL_EXTEND_SECS).unwrap();
        assert_eq!(get(&conn, &id).unwrap().unwrap().access_count, 1);

        touch(&conn, &id, SHORT_TTL_EXTEND_SECS, MID_TTL_EXTEND_SECS).unwrap();
        assert_eq!(get(&conn, &id).unwrap().unwrap().access_count, 2);
    }

    #[test]
    fn find_contradictions_similar_titles() {
        let conn = test_db();
        insert(
            &conn,
            &make_memory("Database is PostgreSQL", "infra", Tier::Long, 8),
        )
        .unwrap();
        insert(
            &conn,
            &make_memory("Database is MySQL", "infra", Tier::Long, 5),
        )
        .unwrap();

        let contradictions = find_contradictions(&conn, "Database is PostgreSQL", "infra").unwrap();
        assert!(!contradictions.is_empty());
    }

    /// Issue #1320 regression — disjoint-topic titles that share only
    /// English stopwords ("are", "is", "the") MUST NOT surface as
    /// potential contradictions of each other. Pre-fix the FTS5
    /// OR-joined query matched any row containing the stopword, so a
    /// tomato-fact stored alongside a moon-landing fact and a
    /// retrieval-mechanics fact returned every cross-topic pair as
    /// `potential_contradictions`. Post-fix the Jaccard floor on
    /// stopword-stripped title tokens drops the false positives;
    /// `Vec::is_empty()` is the post-condition.
    #[test]
    fn find_contradictions_disjoint_topics_no_false_positives_1320() {
        let conn = test_db();
        insert(
            &conn,
            &make_memory("Tomatoes are red fruit", "v1-p5-disjoint", Tier::Long, 5),
        )
        .unwrap();
        insert(
            &conn,
            &make_memory(
                "Moon landing happened in 1969",
                "v1-p5-disjoint",
                Tier::Long,
                5,
            ),
        )
        .unwrap();
        insert(
            &conn,
            &make_memory(
                "Retrieval-augmented generation works by combining recall with synthesis",
                "v1-p5-disjoint",
                Tier::Long,
                5,
            ),
        )
        .unwrap();

        // Tomato seed must not flag moon-landing or retrieval rows.
        let hits = find_contradictions(&conn, "Tomatoes are red fruit", "v1-p5-disjoint").unwrap();
        assert!(
            hits.iter().all(|m| m.title == "Tomatoes are red fruit"),
            "tomato seed leaked false positives: {:?}",
            hits.iter().map(|m| m.title.as_str()).collect::<Vec<_>>(),
        );

        // Moon-landing seed must not flag tomato or retrieval rows.
        let hits =
            find_contradictions(&conn, "Moon landing happened in 1969", "v1-p5-disjoint").unwrap();
        assert!(
            hits.iter()
                .all(|m| m.title == "Moon landing happened in 1969"),
            "moon-landing seed leaked false positives: {:?}",
            hits.iter().map(|m| m.title.as_str()).collect::<Vec<_>>(),
        );

        // Retrieval seed must not flag tomato or moon-landing rows.
        let hits = find_contradictions(
            &conn,
            "Retrieval-augmented generation works by combining recall with synthesis",
            "v1-p5-disjoint",
        )
        .unwrap();
        assert!(
            hits.iter().all(|m| m.title.starts_with("Retrieval")),
            "retrieval seed leaked false positives: {:?}",
            hits.iter().map(|m| m.title.as_str()).collect::<Vec<_>>(),
        );
    }

    /// Issue #1320 regression — pure-stopword seed title must not pull
    /// any rows. Pre-fix the FTS5 OR-query expanded to a no-op against
    /// the stopword set; post-fix the seed tokenises to empty after
    /// stopword removal so the Jaccard floor returns 0 for every
    /// candidate.
    #[test]
    fn find_contradictions_pure_stopword_seed_returns_empty_1320() {
        let conn = test_db();
        insert(
            &conn,
            &make_memory(
                "The thing is the other thing",
                "v1-p5-stopword",
                Tier::Long,
                5,
            ),
        )
        .unwrap();
        let hits = find_contradictions(&conn, "the is a", "v1-p5-stopword").unwrap();
        assert!(
            hits.is_empty(),
            "pure-stopword seed pulled candidates: {:?}",
            hits.iter().map(|m| m.title.as_str()).collect::<Vec<_>>(),
        );
    }

    /// Issue #1320 — stage-2 filter must not over-prune the legitimate
    /// near-duplicate case. "Database is PostgreSQL" and "Database is
    /// MySQL" share `{database}` after stopword removal — Jaccard 1/3,
    /// passes the 0.30 floor. Pinned alongside the false-positive test
    /// so a future tightening of the floor can't silently regress the
    /// supported "similar-title" detection.
    #[test]
    fn find_contradictions_similar_titles_still_caught_1320() {
        let conn = test_db();
        insert(
            &conn,
            &make_memory("Database is PostgreSQL", "v1-p5-positive", Tier::Long, 8),
        )
        .unwrap();
        insert(
            &conn,
            &make_memory("Database is MySQL", "v1-p5-positive", Tier::Long, 5),
        )
        .unwrap();
        let hits = find_contradictions(&conn, "Database is PostgreSQL", "v1-p5-positive").unwrap();
        let titles: Vec<&str> = hits.iter().map(|m| m.title.as_str()).collect();
        assert!(
            titles.contains(&"Database is MySQL"),
            "similar-title detection regressed: {titles:?}",
        );
    }

    #[test]
    fn contradiction_title_jaccard_floor_pinned_1320() {
        // Pin the compiled floor at 0.30 (the v0.7.0 #1320 calibration
        // landing). Lowering it re-introduces stopword noise; raising
        // it breaks the "Database is PostgreSQL / MySQL" near-duplicate
        // case (Jaccard 1/3 ≈ 0.333). Either direction needs an issue
        // ticket and a fresh calibration sweep.
        assert!(
            (CONTRADICTION_TITLE_JACCARD_FLOOR - 0.30).abs() < f32::EPSILON,
            "floor drifted: {CONTRADICTION_TITLE_JACCARD_FLOOR}",
        );
    }

    #[test]
    fn contradiction_title_tokens_strips_stopwords_and_lowercases_1320() {
        let toks = contradiction_title_tokens("The Database Is PostgreSQL");
        assert!(toks.contains("database"));
        assert!(toks.contains("postgresql"));
        assert!(!toks.contains("the"));
        assert!(!toks.contains("is"));
    }

    #[test]
    fn create_and_get_links() {
        let conn = test_db();
        let id1 = insert(&conn, &make_memory("Memory A", "test", Tier::Long, 5)).unwrap();
        let id2 = insert(&conn, &make_memory("Memory B", "test", Tier::Long, 5)).unwrap();

        create_link(&conn, &id1, &id2, "related_to").unwrap();
        let links = get_links(&conn, &id1).unwrap();
        assert_eq!(links.len(), 1);
        assert_eq!(
            links[0].relation,
            crate::models::MemoryLinkRelation::RelatedTo
        );
    }

    #[test]
    fn consolidate_merges_memories() {
        let conn = test_db();
        let id1 = insert(&conn, &make_memory("Part 1", "test", Tier::Mid, 5)).unwrap();
        let id2 = insert(&conn, &make_memory("Part 2", "test", Tier::Mid, 5)).unwrap();

        let new_id = consolidate(
            &conn,
            &[id1.clone(), id2.clone()],
            "Combined",
            "Part 1 + Part 2",
            "test",
            &Tier::Long,
            "test",
            "test-consolidator",
        )
        .unwrap();
        // Original memories should be deleted
        assert!(get(&conn, &id1).unwrap().is_none());
        assert!(get(&conn, &id2).unwrap().is_none());
        // New memory should exist
        let combined = get(&conn, &new_id).unwrap().unwrap();
        assert_eq!(combined.title, "Combined");
        assert_eq!(combined.tier, Tier::Long);
    }

    #[test]
    fn stats_counts() {
        let conn = test_db();
        let path = std::path::Path::new(":memory:");
        insert(&conn, &make_memory("A", "ns1", Tier::Long, 5)).unwrap();
        insert(&conn, &make_memory("B", "ns1", Tier::Mid, 5)).unwrap();
        insert(&conn, &make_memory("C", "ns2", Tier::Short, 5)).unwrap();

        let s = stats(&conn, path).unwrap();
        assert_eq!(s.total, 3);
    }

    #[test]
    fn gc_removes_expired() {
        let conn = test_db();
        let mut mem = make_memory("Expired", "test", Tier::Short, 5);
        mem.expires_at = Some("2020-01-01T00:00:00+00:00".to_string()); // past
        insert(&conn, &mem).unwrap();

        let removed = gc(&conn, false).unwrap();
        assert_eq!(removed, 1);
    }

    #[test]
    fn gc_preserves_long_term() {
        let conn = test_db();
        insert(&conn, &make_memory("Permanent", "test", Tier::Long, 5)).unwrap();
        let removed = gc(&conn, false).unwrap();
        assert_eq!(removed, 0);
    }

    #[test]
    fn gc_archives_before_delete() {
        let conn = test_db();
        let mut mem = make_memory("Archivable", "test", Tier::Short, 5);
        mem.expires_at = Some("2020-01-01T00:00:00+00:00".to_string());
        insert(&conn, &mem).unwrap();

        let removed = gc(&conn, true).unwrap();
        assert_eq!(removed, 1);

        // Should be in archive
        let archived = list_archived(&conn, None, 10, 0).unwrap();
        assert_eq!(archived.len(), 1);
        assert_eq!(archived[0]["title"], "Archivable");
        assert_eq!(archived[0]["archive_reason"], "ttl_expired");
    }

    #[test]
    fn restore_archived_memory() {
        // v0.6.3.1 P2 (G5) — restore preserves the original tier and
        // expires_at instead of resetting to long/permanent. Pre-v17 this
        // test asserted `is_none()` for expires_at — that was the bug
        // being fixed.
        let conn = test_db();
        let mut mem = make_memory("Restorable", "test", Tier::Short, 5);
        let original_expiry = "2020-01-01T00:00:00+00:00".to_string();
        mem.expires_at = Some(original_expiry.clone());
        let id = insert(&conn, &mem).unwrap();

        gc(&conn, true).unwrap();
        assert!(get(&conn, &id).unwrap().is_none()); // gone from active

        let restored = restore_archived(&conn, &id).unwrap();
        assert!(restored);

        let got = get(&conn, &id).unwrap().unwrap();
        assert_eq!(got.title, "Restorable");
        assert_eq!(
            got.tier.as_str(),
            Tier::Short.as_str(),
            "G5: restore must preserve the original tier"
        );
        assert_eq!(
            got.expires_at,
            Some(original_expiry),
            "G5: restore must preserve the original expires_at"
        );
    }

    #[test]
    fn purge_archive_removes_all() {
        let conn = test_db();
        let mut mem = make_memory("Purgeable", "test", Tier::Short, 5);
        mem.expires_at = Some("2020-01-01T00:00:00+00:00".to_string());
        insert(&conn, &mem).unwrap();
        gc(&conn, true).unwrap();

        let purged = purge_archive(&conn, None).unwrap();
        assert_eq!(purged, 1);
        assert_eq!(list_archived(&conn, None, 10, 0).unwrap().len(), 0);
    }

    #[test]
    fn purge_archive_rejects_negative_days() {
        let conn = test_db();
        let result = purge_archive(&conn, Some(-1));
        assert!(result.is_err());
        assert!(result.unwrap_err().to_string().contains("non-negative"));
    }

    #[test]
    fn restore_rejects_active_id_collision() {
        let conn = test_db();
        let mut mem = make_memory("Collision Test", "test", Tier::Short, 5);
        mem.expires_at = Some("2020-01-01T00:00:00+00:00".to_string());
        let id = insert(&conn, &mem).unwrap();

        // Archive it via GC
        gc(&conn, true).unwrap();
        assert!(get(&conn, &id).unwrap().is_none());

        // Manually insert a memory with the SAME id but different title into active table
        conn.execute(
            "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, confidence, source, access_count, created_at, updated_at)
             VALUES (?1, 'long', 'test', 'Blocker Title', 'blocks restore', '[]', 5, 1.0, 'test', 0, datetime('now'), datetime('now'))",
            rusqlite::params![id],
        ).unwrap();

        // Restore should fail because id exists in active table
        let result = restore_archived(&conn, &id);
        assert!(result.is_err());
        assert!(
            result
                .unwrap_err()
                .to_string()
                .contains("already exists in active table")
        );
    }

    #[test]
    fn archive_stats_counts() {
        let conn = test_db();
        let mut m1 = make_memory("Stats A", "ns1", Tier::Short, 5);
        m1.expires_at = Some("2020-01-01T00:00:00+00:00".to_string());
        let mut m2 = make_memory("Stats B", "ns1", Tier::Short, 5);
        m2.expires_at = Some("2020-01-01T00:00:00+00:00".to_string());
        insert(&conn, &m1).unwrap();
        insert(&conn, &m2).unwrap();
        gc(&conn, true).unwrap();

        let stats = archive_stats(&conn).unwrap();
        assert_eq!(stats["archived_total"], 2);
    }

    #[test]
    fn archive_memory_moves_live_row_to_archive() {
        // S29 — explicit archive endpoint must move the row out of
        // `memories` and into `archived_memories` with the caller-supplied
        // reason. Unlike gc(archive=true), this is NOT gated on
        // `expires_at` — the caller is asking for it right now.
        let conn = test_db();
        let mem = make_memory("Archive me", "s29", Tier::Long, 5);
        let id = insert(&conn, &mem).unwrap();

        let moved = archive_memory(&conn, &id, Some("explicit")).unwrap();
        assert!(moved, "live row must be archived on first call");
        assert!(
            get(&conn, &id).unwrap().is_none(),
            "row must be removed from active table"
        );

        let archived = list_archived(&conn, None, 10, 0).unwrap();
        assert_eq!(archived.len(), 1);
        assert_eq!(archived[0]["id"], id);
        assert_eq!(archived[0]["archive_reason"], "explicit");

        // Second call is a no-op — row is already out of `memories`.
        let second = archive_memory(&conn, &id, Some("explicit")).unwrap();
        assert!(
            !second,
            "second archive call must report no-op (no live row)"
        );
    }

    #[test]
    fn archive_memory_missing_id_returns_false() {
        // Peers that never saw M1 must no-op, not error, on sync_push
        // archives fanout.
        let conn = test_db();
        let moved = archive_memory(&conn, "nonexistent-id", None).unwrap();
        assert!(!moved);
    }

    #[test]
    fn archive_memory_default_reason_is_archive() {
        let conn = test_db();
        let mem = make_memory("Default reason", "s29", Tier::Long, 5);
        let id = insert(&conn, &mem).unwrap();
        assert!(archive_memory(&conn, &id, None).unwrap());
        let archived = list_archived(&conn, None, 10, 0).unwrap();
        assert_eq!(archived[0]["archive_reason"], "archive");
    }

    #[test]
    fn export_all_and_links() {
        let conn = test_db();
        let id1 = insert(&conn, &make_memory("Export A", "test", Tier::Long, 5)).unwrap();
        let id2 = insert(&conn, &make_memory("Export B", "test", Tier::Long, 5)).unwrap();
        create_link(&conn, &id1, &id2, "supersedes").unwrap();

        let mems = export_all(&conn).unwrap();
        assert_eq!(mems.len(), 2);
        let links = export_links(&conn).unwrap();
        assert_eq!(links.len(), 1);
    }

    #[test]
    fn list_namespaces_counts() {
        let conn = test_db();
        insert(&conn, &make_memory("A", "alpha", Tier::Long, 5)).unwrap();
        insert(&conn, &make_memory("B", "alpha", Tier::Long, 5)).unwrap();
        insert(&conn, &make_memory("C", "beta", Tier::Long, 5)).unwrap();

        let ns = list_namespaces(&conn).unwrap();
        assert_eq!(ns.len(), 2);
    }

    #[test]
    fn taxonomy_flat_namespaces_only() {
        // No `/` anywhere — every namespace is a direct child of the root.
        let conn = test_db();
        insert(&conn, &make_memory("A", "alpha", Tier::Long, 5)).unwrap();
        insert(&conn, &make_memory("B", "alpha", Tier::Long, 5)).unwrap();
        insert(&conn, &make_memory("C", "beta", Tier::Long, 5)).unwrap();

        let tax = get_taxonomy(&conn, None, 8, 1000).unwrap();
        assert_eq!(tax.total_count, 3);
        assert!(!tax.truncated);
        assert_eq!(tax.tree.namespace, "");
        assert_eq!(tax.tree.subtree_count, 3);
        assert_eq!(tax.tree.count, 0); // no memories at the synthetic root
        assert_eq!(tax.tree.children.len(), 2);
        let alpha = tax
            .tree
            .children
            .iter()
            .find(|c| c.name == "alpha")
            .unwrap();
        assert_eq!(alpha.count, 2);
        assert_eq!(alpha.subtree_count, 2);
        assert!(alpha.children.is_empty());
        let beta = tax.tree.children.iter().find(|c| c.name == "beta").unwrap();
        assert_eq!(beta.count, 1);
    }

    #[test]
    fn taxonomy_hierarchical_tree() {
        // Mixed depths: tree must aggregate counts up the spine.
        let conn = test_db();
        insert(&conn, &make_memory("a", "alphaone", Tier::Long, 5)).unwrap();
        insert(&conn, &make_memory("b", "alphaone/eng", Tier::Long, 5)).unwrap();
        insert(
            &conn,
            &make_memory("c", "alphaone/eng/platform", Tier::Long, 5),
        )
        .unwrap();
        insert(
            &conn,
            &make_memory("d", "alphaone/eng/platform", Tier::Long, 5),
        )
        .unwrap();
        insert(&conn, &make_memory("e", "alphaone/sales", Tier::Long, 5)).unwrap();

        let tax = get_taxonomy(&conn, None, 8, 1000).unwrap();
        assert_eq!(tax.total_count, 5);
        assert_eq!(tax.tree.subtree_count, 5);
        assert_eq!(tax.tree.children.len(), 1);

        let alphaone = &tax.tree.children[0];
        assert_eq!(alphaone.name, "alphaone");
        assert_eq!(alphaone.namespace, "alphaone");
        assert_eq!(alphaone.count, 1); // memory "a" lives at exactly "alphaone"
        assert_eq!(alphaone.subtree_count, 5);
        assert_eq!(alphaone.children.len(), 2);

        let eng = alphaone.children.iter().find(|c| c.name == "eng").unwrap();
        assert_eq!(eng.namespace, "alphaone/eng");
        assert_eq!(eng.count, 1);
        assert_eq!(eng.subtree_count, 3);
        let platform = &eng.children[0];
        assert_eq!(platform.name, "platform");
        assert_eq!(platform.namespace, "alphaone/eng/platform");
        assert_eq!(platform.count, 2);
        assert_eq!(platform.subtree_count, 2);
        assert!(platform.children.is_empty());
    }

    #[test]
    fn taxonomy_prefix_scopes_subtree() {
        let conn = test_db();
        insert(&conn, &make_memory("a", "alphaone/eng", Tier::Long, 5)).unwrap();
        insert(
            &conn,
            &make_memory("b", "alphaone/eng/platform", Tier::Long, 5),
        )
        .unwrap();
        insert(&conn, &make_memory("c", "alphaone/sales", Tier::Long, 5)).unwrap();
        // Sibling that happens to share a string prefix — must NOT bleed in.
        insert(&conn, &make_memory("d", "alphaone-sibling", Tier::Long, 5)).unwrap();
        insert(&conn, &make_memory("e", "other", Tier::Long, 5)).unwrap();

        let tax = get_taxonomy(&conn, Some("alphaone/eng"), 8, 1000).unwrap();
        assert_eq!(tax.total_count, 2);
        assert_eq!(tax.tree.namespace, "alphaone/eng");
        assert_eq!(tax.tree.name, "eng");
        assert_eq!(tax.tree.count, 1);
        assert_eq!(tax.tree.subtree_count, 2);
        assert_eq!(tax.tree.children.len(), 1);
        assert_eq!(tax.tree.children[0].name, "platform");
        assert_eq!(tax.tree.children[0].count, 1);
    }

    /// #1531 L5 — `validate_namespace` permits the LIKE metacharacters
    /// `%` / `_` in segments (historical flexibility), so the taxonomy
    /// prefix walk must escape its descendant pattern. Pre-fix the
    /// unescaped `LIKE ?2 || '/%'` let prefix `a%` aggregate the `ax/...`
    /// subtree.
    #[test]
    fn taxonomy_prefix_like_metacharacters_do_not_widen_match_l5() {
        let conn = test_db();
        insert(&conn, &make_memory("a", "a%/child", Tier::Long, 5)).unwrap();
        insert(&conn, &make_memory("b", "ax/child", Tier::Long, 5)).unwrap();
        insert(&conn, &make_memory("c", "a_/child", Tier::Long, 5)).unwrap();

        // Literal `a%` prefix must scope to the `a%` subtree only.
        let tax = get_taxonomy(&conn, Some("a%"), 8, 1000).unwrap();
        assert_eq!(
            tax.total_count, 1,
            "prefix 'a%' must not aggregate 'ax/...' or 'a_/...' subtrees"
        );

        // Literal `a_` prefix likewise.
        let tax = get_taxonomy(&conn, Some("a_"), 8, 1000).unwrap();
        assert_eq!(
            tax.total_count, 1,
            "prefix 'a_' must not aggregate single-char-wildcard siblings"
        );

        // Plain prefixes are unchanged.
        let tax = get_taxonomy(&conn, Some("ax"), 8, 1000).unwrap();
        assert_eq!(tax.total_count, 1);
    }

    #[test]
    fn taxonomy_depth_clamps_but_preserves_subtree_counts() {
        let conn = test_db();
        insert(
            &conn,
            &make_memory("a", "alphaone/eng/platform/db", Tier::Long, 5),
        )
        .unwrap();
        insert(
            &conn,
            &make_memory("b", "alphaone/eng/platform/api", Tier::Long, 5),
        )
        .unwrap();

        let tax = get_taxonomy(&conn, None, 2, 1000).unwrap();
        assert_eq!(tax.total_count, 2);
        let alphaone = &tax.tree.children[0];
        let eng = &alphaone.children[0];
        // Depth=2 below the empty prefix means we descend exactly two
        // levels (alphaone → eng); deeper segments are folded into
        // `eng.subtree_count` without rendering child nodes.
        assert!(eng.children.is_empty());
        assert_eq!(eng.subtree_count, 2);
        assert_eq!(eng.count, 0); // nothing at exactly "alphaone/eng"
    }

    #[test]
    fn taxonomy_excludes_expired_memories() {
        // Mirror of `list_namespaces` semantics — expired rows must not
        // count toward either the tree or `total_count`.
        let conn = test_db();
        let mut alive = make_memory("alive", "alpha", Tier::Long, 5);
        let mut dead = make_memory("dead", "alpha", Tier::Short, 5);
        // Force the short-tier memory's expiry into the past.
        dead.expires_at = Some("2000-01-01T00:00:00Z".to_string());
        alive.expires_at = None;
        insert(&conn, &alive).unwrap();
        insert(&conn, &dead).unwrap();

        let tax = get_taxonomy(&conn, None, 8, 1000).unwrap();
        assert_eq!(tax.total_count, 1);
        assert_eq!(tax.tree.children.len(), 1);
        assert_eq!(tax.tree.children[0].count, 1);
    }

    #[test]
    fn taxonomy_truncates_at_limit_but_total_stays_honest() {
        let conn = test_db();
        for ns in ["aa", "bb", "cc", "dd", "ee"] {
            insert(&conn, &make_memory("m", ns, Tier::Long, 5)).unwrap();
        }
        let tax = get_taxonomy(&conn, None, 8, 2).unwrap();
        // Limit drops 3 namespaces from the walk; total_count must
        // still see all 5 memories so renderers can warn the user.
        assert_eq!(tax.total_count, 5);
        assert!(tax.truncated);
        assert_eq!(tax.tree.children.len(), 2);
    }

    #[test]
    fn forget_by_namespace() {
        let conn = test_db();
        insert(&conn, &make_memory("A", "delete-me", Tier::Long, 5)).unwrap();
        insert(&conn, &make_memory("B", "delete-me", Tier::Long, 5)).unwrap();
        insert(&conn, &make_memory("C", "keep", Tier::Long, 5)).unwrap();

        let deleted = forget(&conn, Some("delete-me"), None, None, false).unwrap();
        assert_eq!(deleted, 2);
        let remaining = list(&conn, None, None, 100, 0, None, None, None, None, None).unwrap();
        assert_eq!(remaining.len(), 1);
    }

    #[test]
    fn set_and_get_embedding() {
        let conn = test_db();
        let mem = make_memory("Embed test", "test", Tier::Long, 5);
        let id = insert(&conn, &mem).unwrap();

        let emb = vec![0.1f32, 0.2, 0.3, 0.4];
        set_embedding(&conn, &id, &emb).unwrap();

        let got = get_embedding(&conn, &id).unwrap().unwrap();
        assert_eq!(got.len(), 4);
        assert!((got[0] - 0.1).abs() < 1e-6);
    }

    // -- #1595 / #1598 — resilient-backfill + reembed storage helpers --

    /// #1595 — the keyset fetch pages strictly past the cursor in `id`
    /// order, and rows that gain an embedding drop out of the scan.
    #[test]
    fn unembedded_batch_after_cursor_paginates_1595() {
        let conn = test_db();
        let mut ids: Vec<String> = (0..5)
            .map(|i| {
                insert(
                    &conn,
                    &make_memory(&format!("row-{i}"), "bf-1595", Tier::Long, 5),
                )
                .unwrap()
            })
            .collect();
        ids.sort();

        let first = get_unembedded_ids_batch_after(&conn, None, 2).unwrap();
        assert_eq!(first.len(), 2);
        assert_eq!(first[0].0, ids[0], "scan starts at the smallest id");
        let cursor = first.last().unwrap().0.clone();

        let rest = get_unembedded_ids_batch_after(&conn, Some(&cursor), 10).unwrap();
        assert_eq!(rest.len(), 3);
        assert!(
            rest.iter().all(|(id, _, _)| id.as_str() > cursor.as_str()),
            "every row must sort strictly after the cursor"
        );

        // Embedded rows leave the unembedded predicate.
        set_embedding(&conn, &ids[0], &[0.1, 0.2]).unwrap();
        let after = get_unembedded_ids_batch_after(&conn, None, 10).unwrap();
        assert_eq!(after.len(), 4);
        assert!(after.iter().all(|(id, _, _)| id != &ids[0]));
    }

    /// #1598 — the reembed full-corpus scan returns embedded AND
    /// unembedded rows, honors the namespace filter, and pages by
    /// cursor.
    #[test]
    fn memory_texts_batch_namespace_and_cursor_1598() {
        let conn = test_db();
        let mut ns_a_ids: Vec<String> = (0..3)
            .map(|i| {
                insert(
                    &conn,
                    &make_memory(&format!("a-{i}"), "reembed-a", Tier::Long, 5),
                )
                .unwrap()
            })
            .collect();
        ns_a_ids.sort();
        for i in 0..2 {
            insert(
                &conn,
                &make_memory(&format!("b-{i}"), "reembed-b", Tier::Long, 5),
            )
            .unwrap();
        }
        // An already-embedded row MUST still be scanned — reembed
        // replaces existing vectors, it is not a backfill.
        set_embedding(&conn, &ns_a_ids[0], &[0.5, 0.5]).unwrap();

        let all = get_memory_texts_batch(&conn, None, None, 100).unwrap();
        assert_eq!(all.len(), 5, "unfiltered scan sees every live row");

        let ns_a = get_memory_texts_batch(&conn, Some("reembed-a"), None, 100).unwrap();
        assert_eq!(ns_a.len(), 3);
        assert_eq!(ns_a[0].0, ns_a_ids[0], "embedded row still scanned");

        let first = get_memory_texts_batch(&conn, Some("reembed-a"), None, 1).unwrap();
        let cursor = first[0].0.clone();
        let rest = get_memory_texts_batch(&conn, Some("reembed-a"), Some(&cursor), 100).unwrap();
        assert_eq!(rest.len(), 2);
        assert!(rest.iter().all(|(id, _, _)| id.as_str() > cursor.as_str()));
    }

    /// #1598 — the reembed writer REPLACES vectors across a dim change
    /// that the checked writer (G4 invariant) refuses, and skips
    /// unknown ids like its checked sibling.
    #[test]
    fn set_embeddings_batch_reembed_bypasses_dim_invariant_1598() {
        let mut conn = test_db();
        let id1 = insert(&conn, &make_memory("dim-est", "reembed-dim", Tier::Long, 5)).unwrap();
        let id2 = insert(&conn, &make_memory("dim-mig", "reembed-dim", Tier::Long, 5)).unwrap();
        // Establish a 4-dim namespace.
        set_embedding(&conn, &id1, &[0.1, 0.2, 0.3, 0.4]).unwrap();

        // The checked writer enforces the established dim…
        let refused =
            set_embeddings_batch(&mut conn, &[(id2.clone(), vec![0.1_f32; 8])]).unwrap_err();
        assert!(
            refused.downcast_ref::<EmbeddingDimMismatch>().is_some(),
            "checked writer must refuse the dim change: {refused}"
        );

        // …the migration writer replaces every row to the new dim.
        let entries = vec![
            (id1.clone(), vec![0.9_f32; 8]),
            (id2.clone(), vec![0.8_f32; 8]),
        ];
        let written = set_embeddings_batch_reembed(&mut conn, &entries).unwrap();
        assert_eq!(written, 2);
        assert_eq!(get_embedding(&conn, &id1).unwrap().unwrap().len(), 8);
        assert_eq!(get_embedding(&conn, &id2).unwrap().unwrap().len(), 8);
        assert_eq!(
            namespace_embedding_dim(&conn, "reembed-dim").unwrap(),
            Some(8),
            "namespace converges to the target dim"
        );

        // Unknown ids are skipped; empty input is a no-op.
        let n = set_embeddings_batch_reembed(
            &mut conn,
            &[("no-such-id".to_string(), vec![0.1_f32; 8])],
        )
        .unwrap();
        assert_eq!(n, 0);
        assert_eq!(set_embeddings_batch_reembed(&mut conn, &[]).unwrap(), 0);
    }

    /// #1598 — dry-run coverage counts, with and without the namespace
    /// filter.
    #[test]
    fn embedding_coverage_counts_1598() {
        let conn = test_db();
        let id_a = insert(&conn, &make_memory("c-a", "cov-a", Tier::Long, 5)).unwrap();
        insert(&conn, &make_memory("c-b", "cov-a", Tier::Long, 5)).unwrap();
        insert(&conn, &make_memory("c-c", "cov-b", Tier::Long, 5)).unwrap();
        set_embedding(&conn, &id_a, &[0.1, 0.2]).unwrap();

        assert_eq!(embedding_coverage(&conn, None).unwrap(), (3, 1));
        assert_eq!(embedding_coverage(&conn, Some("cov-a")).unwrap(), (2, 1));
        assert_eq!(embedding_coverage(&conn, Some("cov-b")).unwrap(), (1, 0));
        assert_eq!(embedding_coverage(&conn, Some("cov-none")).unwrap(), (0, 0));
    }

    /// #1598 — the pre-flight dim survey lists every stored dim
    /// (sorted) and honors the namespace filter.
    #[test]
    fn distinct_embedding_dims_lists_mixed_1598() {
        let mut conn = test_db();
        let id_a = insert(&conn, &make_memory("d-a", "dims-a", Tier::Long, 5)).unwrap();
        let id_b = insert(&conn, &make_memory("d-b", "dims-b", Tier::Long, 5)).unwrap();
        let id_c = insert(&conn, &make_memory("d-c", "dims-b", Tier::Long, 5)).unwrap();
        set_embedding(&conn, &id_a, &[0.1, 0.2]).unwrap();
        set_embedding(&conn, &id_b, &[0.1; 8]).unwrap();
        // Mixed dims inside ONE namespace only arise mid-migration —
        // land them via the reembed writer.
        set_embeddings_batch_reembed(&mut conn, &[(id_c, vec![0.2_f32; 4])]).unwrap();

        assert_eq!(distinct_embedding_dims(&conn, None).unwrap(), vec![2, 4, 8]);
        assert_eq!(
            distinct_embedding_dims(&conn, Some("dims-b")).unwrap(),
            vec![4, 8]
        );
        assert!(
            distinct_embedding_dims(&conn, Some("dims-none"))
                .unwrap()
                .is_empty()
        );
    }

    // -- Pillar 2 / Stream D — memory_check_duplicate -------------------

    fn insert_with_embedding(
        conn: &Connection,
        title: &str,
        ns: &str,
        embedding: &[f32],
    ) -> String {
        let mem = make_memory(title, ns, Tier::Long, 5);
        let id = insert(conn, &mem).unwrap();
        set_embedding(conn, &id, embedding).unwrap();
        id
    }

    #[test]
    fn check_duplicate_empty_db_returns_no_match() {
        let conn = test_db();
        let q = vec![1.0_f32, 0.0, 0.0];
        let r = check_duplicate(&conn, &q, None, 0.85).unwrap();
        assert!(!r.is_duplicate);
        assert!(r.nearest.is_none());
        assert_eq!(r.candidates_scanned, 0);
    }

    #[test]
    fn check_duplicate_finds_highest_cosine_match() {
        let conn = test_db();
        // a = [1,0,0]; b = [0,1,0]; c = [0.99,0.01,0]. Query = [1,0,0]
        // expects `c` (cos ~0.9999) > `a` (cos =1.0 actually).
        // Use distinct vectors: a=[1,0,0] cos 1.0, b=[0.7,0.7,0] cos 0.707,
        // c=[0,1,0] cos 0.0. Best should be `a`.
        let id_a = insert_with_embedding(&conn, "alpha", "ns", &[1.0, 0.0, 0.0]);
        let _id_b = insert_with_embedding(&conn, "beta", "ns", &[0.7, 0.7, 0.0]);
        let _id_c = insert_with_embedding(&conn, "gamma", "ns", &[0.0, 1.0, 0.0]);

        let q = vec![1.0_f32, 0.0, 0.0];
        let r = check_duplicate(&conn, &q, None, 0.85).unwrap();
        let nearest = r.nearest.expect("expected a nearest match");
        assert_eq!(nearest.id, id_a);
        assert!(nearest.similarity > 0.99);
        assert_eq!(r.candidates_scanned, 3);
        assert!(r.is_duplicate);
        assert!((r.threshold - 0.85).abs() < 1e-6);
    }

    #[test]
    fn check_duplicate_below_threshold_not_flagged_but_returns_nearest() {
        let conn = test_db();
        let id_b = insert_with_embedding(&conn, "beta", "ns", &[0.7, 0.7, 0.0]);

        // Cosine([1,0,0], [0.7,0.7,0]) ~ 0.707 — below default 0.85.
        let q = vec![1.0_f32, 0.0, 0.0];
        let r = check_duplicate(&conn, &q, None, 0.85).unwrap();
        let nearest = r
            .nearest
            .expect("nearest must surface even when below threshold");
        assert_eq!(nearest.id, id_b);
        assert!(!r.is_duplicate);
    }

    #[test]
    fn check_duplicate_threshold_clamped_to_floor() {
        let conn = test_db();
        // Caller passes a permissive 0.0; the response threshold must
        // be clamped to DUPLICATE_THRESHOLD_MIN so unrelated content
        // can't be dressed as a merge candidate.
        let _ = insert_with_embedding(&conn, "x", "ns", &[1.0, 0.0, 0.0]);
        let q = vec![0.0_f32, 1.0, 0.0]; // orthogonal — cosine 0.0
        let r = check_duplicate(&conn, &q, None, 0.0).unwrap();
        assert!((r.threshold - DUPLICATE_THRESHOLD_MIN).abs() < 1e-6);
        assert!(!r.is_duplicate);
    }

    #[test]
    fn check_duplicate_namespace_filter_isolates_scan() {
        let conn = test_db();
        let _hit_in_other_ns = insert_with_embedding(&conn, "x", "other", &[1.0, 0.0, 0.0]);
        let id_target = insert_with_embedding(&conn, "y", "ns", &[0.6, 0.8, 0.0]);

        let q = vec![1.0_f32, 0.0, 0.0];
        let r = check_duplicate(&conn, &q, Some("ns"), 0.85).unwrap();
        assert_eq!(r.candidates_scanned, 1);
        assert_eq!(r.nearest.expect("namespace filter ignored").id, id_target);
    }

    #[test]
    fn check_duplicate_skips_expired_rows() {
        let conn = test_db();
        // Short-tier memory with a backdated `expires_at` is past the
        // live-row gate and must not be a candidate.
        let mut mem = make_memory("expired", "ns", Tier::Short, 5);
        mem.expires_at = Some((chrono::Utc::now() - chrono::Duration::seconds(60)).to_rfc3339());
        let id = insert(&conn, &mem).unwrap();
        set_embedding(&conn, &id, &[1.0, 0.0, 0.0]).unwrap();

        let q = vec![1.0_f32, 0.0, 0.0];
        let r = check_duplicate(&conn, &q, None, 0.85).unwrap();
        assert_eq!(r.candidates_scanned, 0);
        assert!(r.nearest.is_none());
    }

    #[test]
    fn check_duplicate_skips_unembedded_rows() {
        let conn = test_db();
        // One memory with an embedding, one without — only the embedded
        // row should appear in `candidates_scanned`.
        let id_embedded = insert_with_embedding(&conn, "with-emb", "ns", &[1.0, 0.0, 0.0]);
        let mem = make_memory("no-emb", "ns", Tier::Long, 5);
        let _ = insert(&conn, &mem).unwrap();

        let q = vec![1.0_f32, 0.0, 0.0];
        let r = check_duplicate(&conn, &q, None, 0.85).unwrap();
        assert_eq!(r.candidates_scanned, 1);
        assert_eq!(r.nearest.expect("embedded match").id, id_embedded);
    }

    #[test]
    fn check_duplicate_skips_blob_with_non_multiple_of_4_length() {
        // Regression: pre-fix, an embedding blob whose length was not
        // a multiple of 4 would silently drop a trailing partial chunk
        // via chunks_exact and compute cosine against a shorter
        // candidate vector — producing a misleading score. The bounds
        // check now skips the row entirely.
        let conn = test_db();
        let mem = make_memory("malformed-blob", "ns", Tier::Long, 5);
        let id = insert(&conn, &mem).unwrap();
        // Write a 7-byte blob (1 short of 8 = 2 f32s) directly to
        // sqlite, bypassing set_embedding which only takes &[f32].
        conn.execute(
            "UPDATE memories SET embedding = ?1 WHERE id = ?2",
            params![&[0u8; 7][..], &id],
        )
        .unwrap();

        let q = vec![1.0_f32, 0.0];
        let r = check_duplicate(&conn, &q, None, 0.85).unwrap();
        assert_eq!(
            r.candidates_scanned, 0,
            "malformed blob must be skipped, not silently truncated"
        );
        assert!(r.nearest.is_none());
    }

    #[test]
    fn check_duplicate_skips_blob_with_dimension_mismatch() {
        // Regression: a blob with a valid length (multiple of 4) but
        // wrong dimension vs the query embedding must NOT be scored;
        // cosine_similarity zips and would silently truncate to the
        // shorter input, producing a wrong similarity.
        let conn = test_db();
        // Insert a memory with a 3-dim embedding via the normal path.
        let _id = insert_with_embedding(&conn, "different-dim", "ns", &[1.0, 0.0, 0.0]);

        // Query with a 4-dim embedding — different from the candidate.
        let q = vec![1.0_f32, 0.0, 0.0, 0.0];
        let r = check_duplicate(&conn, &q, None, 0.85).unwrap();
        assert_eq!(
            r.candidates_scanned, 0,
            "dimension-mismatched candidate must be skipped"
        );
        assert!(r.nearest.is_none());
    }

    #[test]
    fn get_unembedded_returns_memoryless() {
        let conn = test_db();
        let mem = make_memory("No embed", "test", Tier::Long, 5);
        insert(&conn, &mem).unwrap();

        let unembedded = get_unembedded_ids(&conn).unwrap();
        assert_eq!(unembedded.len(), 1);
    }

    #[test]
    fn health_check_passes() {
        let conn = test_db();
        assert!(health_check(&conn).unwrap());
    }

    #[test]
    fn sanitize_fts_strips_operators_and_quotes() {
        // FTS5 special chars: " * ^ { } ( ) : - | are stripped
        let sanitized = sanitize_fts_query("test* \"injection\" (drop)", true);
        assert!(!sanitized.contains('*'));
        assert!(!sanitized.contains('('));
        assert!(!sanitized.contains(')'));
        // Standalone boolean operators are removed
        let sanitized2 = sanitize_fts_query("hello AND world OR NOT NEAR test", true);
        assert!(sanitized2.contains("hello"));
        assert!(sanitized2.contains("world"));
        assert!(sanitized2.contains("test"));
        // Empty input returns placeholder
        let sanitized3 = sanitize_fts_query("", true);
        assert_eq!(sanitized3, "\"_empty_\"");
        // `+` prefix operator is stripped (prevents exclusion injection);
        // `-` is now preserved inside phrase-quoted tokens so hyphenated
        // content ("well-known", "foo-bar") searches correctly against
        // the unicode61 tokenizer. Phrase-quoting keeps `-` from reaching
        // FTS5 as a prefix operator, closing the injection hole.
        let sanitized4 = sanitize_fts_query("-secret +required", true);
        assert!(!sanitized4.contains('+'));
        assert!(sanitized4.contains("secret"));
        assert!(sanitized4.contains("required"));
        // Hyphenated tokens pass through as phrase searches.
        let sanitized5 = sanitize_fts_query("well-known", true);
        assert!(sanitized5.contains("well-known"));
    }

    #[test]
    fn get_by_prefix_8char() {
        let conn = test_db();
        let mem = make_memory("Prefix test", "test", Tier::Long, 5);
        let id = insert(&conn, &mem).unwrap();
        let prefix = &id[..8];
        let got = get_by_prefix(&conn, prefix).unwrap().unwrap();
        assert_eq!(got.id, id);
        assert_eq!(got.title, "Prefix test");
    }

    #[test]
    fn get_by_prefix_full_uuid() {
        let conn = test_db();
        let mem = make_memory("Full UUID prefix", "test", Tier::Long, 5);
        let id = insert(&conn, &mem).unwrap();
        // Full UUID used as prefix still works (LIKE 'full-uuid%' matches exact)
        let got = get_by_prefix(&conn, &id).unwrap().unwrap();
        assert_eq!(got.id, id);
    }

    #[test]
    fn get_by_prefix_nonexistent() {
        let conn = test_db();
        let got = get_by_prefix(&conn, "ffffffff").unwrap();
        assert!(got.is_none());
    }

    #[test]
    fn get_by_prefix_ambiguous() {
        let conn = test_db();
        // Insert two memories with IDs sharing a common prefix
        let mut mem1 = make_memory("Ambig A", "test", Tier::Long, 5);
        mem1.id = "aaaa1111-0000-0000-0000-000000000001".to_string();
        insert(&conn, &mem1).unwrap();
        let mut mem2 = make_memory("Ambig B", "test2", Tier::Long, 5);
        mem2.id = "aaaa2222-0000-0000-0000-000000000002".to_string();
        insert(&conn, &mem2).unwrap();
        let result = get_by_prefix(&conn, "aaaa");
        assert!(result.is_err());
        let err_msg = result.unwrap_err().to_string();
        assert!(err_msg.contains("ambiguous"));
        assert!(err_msg.contains("2 matches"));
        // Error should list the matching full IDs so the user can pick one
        assert!(
            err_msg.contains("aaaa1111-0000-0000-0000-000000000001"),
            "error should list matching IDs, got: {err_msg}"
        );
        assert!(err_msg.contains("aaaa2222-0000-0000-0000-000000000002"));
    }

    #[test]
    fn resolve_id_exact_then_prefix() {
        let conn = test_db();
        let mem = make_memory("Resolve test", "test", Tier::Long, 5);
        let id = insert(&conn, &mem).unwrap();
        // Exact match
        let got = resolve_id(&conn, &id).unwrap().unwrap();
        assert_eq!(got.id, id);
        // Prefix match
        let got2 = resolve_id(&conn, &id[..8]).unwrap().unwrap();
        assert_eq!(got2.id, id);
        // Nonexistent
        let got3 = resolve_id(&conn, "zzzzzzzz").unwrap();
        assert!(got3.is_none());
    }

    #[test]
    fn insert_if_newer_updates() {
        let conn = test_db();
        let mut mem = make_memory("Sync test", "test", Tier::Long, 5);
        let id = insert(&conn, &mem).unwrap();

        mem.id = id.clone();
        mem.content = "Updated via sync".to_string();
        mem.updated_at = (chrono::Utc::now() + chrono::Duration::hours(1)).to_rfc3339();
        let result_id = insert_if_newer(&conn, &mem).unwrap();
        assert_eq!(result_id, id);

        let got = get(&conn, &id).unwrap().unwrap();
        assert_eq!(got.content, "Updated via sync");
    }

    // --- Metadata tests (Task 1.1) ---

    #[test]
    fn metadata_default_empty_object() {
        let conn = test_db();
        let mem = make_memory("Default metadata", "test", Tier::Long, 5);
        let id = insert(&conn, &mem).unwrap();
        let got = get(&conn, &id).unwrap().unwrap();
        assert_eq!(got.metadata, serde_json::json!({}));
    }

    #[test]
    fn metadata_store_and_retrieve() {
        let conn = test_db();
        let mut mem = make_memory("With metadata", "test", Tier::Long, 5);
        mem.metadata = serde_json::json!({"agent_id": "claude-1", "session": 42});
        let id = insert(&conn, &mem).unwrap();
        let got = get(&conn, &id).unwrap().unwrap();
        assert_eq!(got.metadata["agent_id"], "claude-1");
        assert_eq!(got.metadata["session"], 42);
    }

    #[test]
    fn metadata_roundtrip_nested_json() {
        let conn = test_db();
        let mut mem = make_memory("Nested metadata", "test", Tier::Long, 5);
        mem.metadata = serde_json::json!({
            "agent": {"type": "ai:claude", "version": "4.6"},
            "tags_extra": ["experimental"],
            "score": 0.95
        });
        let id = insert(&conn, &mem).unwrap();
        let got = get(&conn, &id).unwrap().unwrap();
        assert_eq!(got.metadata["agent"]["type"], "ai:claude");
        assert_eq!(got.metadata["tags_extra"][0], "experimental");
        assert!((got.metadata["score"].as_f64().unwrap() - 0.95).abs() < f64::EPSILON);
    }

    #[test]
    fn metadata_preserved_on_update() {
        let conn = test_db();
        let mut mem = make_memory("Update metadata", "test", Tier::Long, 5);
        mem.metadata = serde_json::json!({"key": "original"});
        let id = insert(&conn, &mem).unwrap();

        // Update without metadata — should preserve existing
        let (found, _) = update(
            &conn,
            &id,
            None,
            Some("new content"),
            None,
            None,
            None,
            None,
            None,
            None,
            None,
        )
        .unwrap();
        assert!(found);
        let got = get(&conn, &id).unwrap().unwrap();
        assert_eq!(got.metadata["key"], "original");
        assert_eq!(got.content, "new content");

        // Update with new metadata — should replace
        let new_meta = serde_json::json!({"key": "updated", "extra": true});
        let (found, _) = update(
            &conn,
            &id,
            None,
            None,
            None,
            None,
            None,
            None,
            None,
            None,
            Some(&new_meta),
        )
        .unwrap();
        assert!(found);
        let got = get(&conn, &id).unwrap().unwrap();
        assert_eq!(got.metadata["key"], "updated");
        assert_eq!(got.metadata["extra"], true);
    }

    #[test]
    fn metadata_preserved_on_upsert() {
        let conn = test_db();
        let mut mem = make_memory("Upsert meta", "test", Tier::Long, 5);
        mem.metadata = serde_json::json!({"version": 1});
        insert(&conn, &mem).unwrap();

        // Insert again with same title+namespace — upsert should update metadata
        let mut mem2 = make_memory("Upsert meta", "test", Tier::Long, 5);
        mem2.metadata = serde_json::json!({"version": 2});
        let id = insert(&conn, &mem2).unwrap();
        let got = get(&conn, &id).unwrap().unwrap();
        assert_eq!(got.metadata["version"], 2);
    }

    #[test]
    fn metadata_in_list_and_search() {
        let conn = test_db();
        let mut mem = make_memory("Searchable metadata", "test", Tier::Long, 8);
        mem.metadata = serde_json::json!({"source_model": "opus"});
        insert(&conn, &mem).unwrap();

        let results = list(
            &conn,
            Some("test"),
            None,
            10,
            0,
            None,
            None,
            None,
            None,
            None,
        )
        .unwrap();
        assert_eq!(results.len(), 1);
        assert_eq!(results[0].metadata["source_model"], "opus");

        let results = search(
            &conn,
            "Searchable",
            Some("test"),
            None,
            10,
            None,
            None,
            None,
            None,
            None,
            None,
            false,
        )
        .unwrap();
        assert_eq!(results.len(), 1);
        assert_eq!(results[0].metadata["source_model"], "opus");
    }

    #[test]
    fn metadata_in_recall() {
        let conn = test_db();
        let mut mem = make_memory("Recallable metadata", "test", Tier::Long, 8);
        mem.metadata = serde_json::json!({"context": "test-recall"});
        insert(&conn, &mem).unwrap();

        let (results, _tokens) = recall(
            &conn,
            "Recallable",
            Some("test"),
            10,
            None,
            None,
            None,
            crate::SECS_PER_HOUR,
            crate::SECS_PER_DAY,
            None,
            None,
            false,
            None,
        )
        .unwrap();
        assert!(!results.is_empty());
        assert_eq!(results[0].0.metadata["context"], "test-recall");
    }

    #[test]
    fn metadata_in_export_import() {
        let conn = test_db();
        let mut mem = make_memory("Export metadata", "test", Tier::Long, 5);
        mem.metadata = serde_json::json!({"exported": true});
        insert(&conn, &mem).unwrap();

        let exported = export_all(&conn).unwrap();
        assert_eq!(exported.len(), 1);
        assert_eq!(exported[0].metadata["exported"], true);

        // Import into fresh DB
        let conn2 = test_db();
        insert(&conn2, &exported[0]).unwrap();
        let got = get(&conn2, &exported[0].id).unwrap().unwrap();
        assert_eq!(got.metadata["exported"], true);
    }

    #[test]
    fn metadata_schema_migration() {
        // Simulate a pre-v7 database (no metadata column) by creating one
        // and checking that migration adds the column with correct default
        let conn = test_db();
        let mem = make_memory("Migration test", "test", Tier::Long, 5);
        let id = insert(&conn, &mem).unwrap();

        // Verify the column exists and has the default value
        let metadata_str: String = conn
            .query_row(
                "SELECT metadata FROM memories WHERE id = ?1",
                params![id],
                |r| r.get(0),
            )
            .unwrap();
        assert_eq!(metadata_str, "{}");
    }

    #[test]
    fn metadata_survives_archive_restore_cycle() {
        let conn = test_db();
        let mut mem = make_memory("Archivable", "test", Tier::Short, 5);
        mem.metadata = serde_json::json!({"origin": "archive-test"});
        // Set expiry in the past so GC will archive it
        mem.expires_at = Some("2020-01-01T00:00:00+00:00".to_string());
        let id = insert(&conn, &mem).unwrap();

        // Run GC with archive=true — should archive the expired memory
        let deleted = gc(&conn, true).unwrap();
        assert_eq!(deleted, 1);

        // Verify metadata is in the archive
        let archived = list_archived(&conn, None, 10, 0).unwrap();
        assert_eq!(archived.len(), 1);
        assert_eq!(archived[0]["metadata"]["origin"], "archive-test");

        // Restore and verify metadata survives the round-trip
        let restored = restore_archived(&conn, &id).unwrap();
        assert!(restored);
        let got = get(&conn, &id).unwrap().unwrap();
        assert_eq!(got.metadata["origin"], "archive-test");
    }

    #[test]
    fn metadata_in_insert_if_newer() {
        let conn = test_db();
        let mut mem = make_memory("Sync metadata", "test", Tier::Long, 5);
        mem.metadata = serde_json::json!({"version": 1});
        let id = insert(&conn, &mem).unwrap();

        // Insert newer version with different metadata
        mem.id = id.clone();
        mem.metadata = serde_json::json!({"version": 2, "synced": true});
        mem.updated_at = (chrono::Utc::now() + chrono::Duration::hours(1)).to_rfc3339();
        insert_if_newer(&conn, &mem).unwrap();

        let got = get(&conn, &id).unwrap().unwrap();
        assert_eq!(got.metadata["version"], 2);
        assert_eq!(got.metadata["synced"], true);

        // Insert older version — metadata should NOT be overwritten
        mem.metadata = serde_json::json!({"version": 0, "stale": true});
        mem.updated_at = "2020-01-01T00:00:00+00:00".to_string();
        insert_if_newer(&conn, &mem).unwrap();

        let got = get(&conn, &id).unwrap().unwrap();
        assert_eq!(got.metadata["version"], 2); // still the newer one
        assert!(got.metadata.get("stale").is_none());
    }

    #[test]
    fn metadata_merged_in_consolidate() {
        let conn = test_db();
        let mut mem_a = make_memory("Consolidate A", "test", Tier::Long, 5);
        mem_a.metadata = serde_json::json!({"agent": "claude", "shared": "from_a"});
        let id_a = insert(&conn, &mem_a).unwrap();

        let mut mem_b = make_memory("Consolidate B", "test", Tier::Long, 7);
        mem_b.metadata = serde_json::json!({"model": "opus", "shared": "from_b"});
        let id_b = insert(&conn, &mem_b).unwrap();

        let new_id = consolidate(
            &conn,
            &[id_a, id_b],
            "Merged",
            "Combined content",
            "test",
            &Tier::Long,
            "consolidation",
            "test-consolidator",
        )
        .unwrap();

        let got = get(&conn, &new_id).unwrap().unwrap();
        // Both keys present; "shared" key takes value from later source (mem_b)
        assert_eq!(got.metadata["agent"], "claude");
        assert_eq!(got.metadata["model"], "opus");
        assert_eq!(got.metadata["shared"], "from_b");
    }

    #[test]
    fn metadata_consolidate_rejects_oversized_merge() {
        let conn = test_db();
        // Create two memories with large unique-key metadata that together exceed 64KB
        let mut mem_a = make_memory("Big meta A", "test", Tier::Long, 5);
        let big_val_a: serde_json::Map<String, serde_json::Value> = (0..500)
            .map(|i| {
                (
                    format!("key_a_{i}"),
                    serde_json::Value::String("x".repeat(60)),
                )
            })
            .collect();
        mem_a.metadata = serde_json::Value::Object(big_val_a);
        let id_a = insert(&conn, &mem_a).unwrap();

        let mut mem_b = make_memory("Big meta B", "test", Tier::Long, 5);
        let big_val_b: serde_json::Map<String, serde_json::Value> = (0..500)
            .map(|i| {
                (
                    format!("key_b_{i}"),
                    serde_json::Value::String("x".repeat(60)),
                )
            })
            .collect();
        mem_b.metadata = serde_json::Value::Object(big_val_b);
        let id_b = insert(&conn, &mem_b).unwrap();

        // Consolidate should fail because merged metadata exceeds 64KB
        let result = consolidate(
            &conn,
            &[id_a, id_b],
            "Oversized merge",
            "Should fail",
            "test",
            &Tier::Long,
            "consolidation",
            "test-consolidator",
        );
        let err = result.expect_err("consolidate should fail for oversized merged metadata");
        let msg = err.to_string();
        assert!(
            msg.contains("merged metadata exceeds size limit"),
            "expected metadata size error, got: {msg}"
        );
    }

    #[test]
    fn metadata_special_characters_roundtrip() {
        let conn = test_db();
        let mut mem = make_memory("Special chars metadata", "test", Tier::Long, 5);
        mem.metadata = serde_json::json!({
            "pipe": "a|b|c",
            "newline": "line1\nline2",
            "tab": "col1\tcol2",
            "backslash": "path\\to\\file",
            "unicode": "\u{1F600}\u{1F4A9}",
            "cjk": "\u{4e16}\u{754c}",
            "empty": "",
            "nested_special": {"inner|key": "val\nue"}
        });
        let id = insert(&conn, &mem).unwrap();
        let got = get(&conn, &id).unwrap().unwrap();
        assert_eq!(got.metadata["pipe"], "a|b|c");
        assert_eq!(got.metadata["newline"], "line1\nline2");
        assert_eq!(got.metadata["unicode"], "\u{1F600}\u{1F4A9}");
        assert_eq!(got.metadata["cjk"], "\u{4e16}\u{754c}");
        assert_eq!(got.metadata["nested_special"]["inner|key"], "val\nue");
    }

    #[test]
    fn metadata_corrupt_column_falls_back_to_empty() {
        let conn = test_db();
        let mem = make_memory("Corrupt test", "test", Tier::Long, 5);
        let id = insert(&conn, &mem).unwrap();

        // Manually corrupt the metadata column
        conn.execute(
            "UPDATE memories SET metadata = 'NOT VALID JSON {{{{' WHERE id = ?1",
            params![id],
        )
        .unwrap();

        // row_to_memory should fall back to {} without panicking
        let got = get(&conn, &id).unwrap().unwrap();
        assert_eq!(got.metadata, serde_json::json!({}));
    }

    #[test]
    fn metadata_restore_resets_corrupt_archived_metadata() {
        let conn = test_db();
        let mut mem = make_memory("Corrupt archive", "test", Tier::Short, 5);
        mem.metadata = serde_json::json!({"valid": true});
        mem.expires_at = Some("2020-01-01T00:00:00+00:00".to_string());
        let id = insert(&conn, &mem).unwrap();

        // Archive via GC
        gc(&conn, true).unwrap();

        // Corrupt the archived metadata directly
        conn.execute(
            "UPDATE archived_memories SET metadata = 'CORRUPT JSON' WHERE id = ?1",
            params![id],
        )
        .unwrap();

        // Restore — should reset metadata to {} instead of failing
        let restored = restore_archived(&conn, &id).unwrap();
        assert!(restored);
        let got = get(&conn, &id).unwrap().unwrap();
        assert_eq!(got.metadata, serde_json::json!({}));
    }

    #[test]
    fn scope_index_exists_after_migration() {
        // v0.6.0 GA (schema v10) — the `scope_idx` generated column and its
        // B-tree index must exist after `open()` runs migration.
        let conn = test_db();
        let has_col: bool = conn
            .prepare("SELECT scope_idx FROM memories LIMIT 0")
            .is_ok();
        assert!(has_col, "scope_idx generated column missing");
        let idx_exists: i64 = conn
            .query_row(
                "SELECT COUNT(*) FROM sqlite_master WHERE type='index' AND name='idx_memories_scope_idx'",
                [],
                |row| row.get(0),
            )
            .unwrap();
        assert_eq!(idx_exists, 1, "idx_memories_scope_idx missing");
    }

    #[test]
    fn scope_index_used_for_direct_scope_filter() {
        // v0.6.0 GA — confirm `idx_memories_scope_idx` is picked for a
        // direct `WHERE scope_idx = ?` predicate. This is the shape the
        // query planner sees for `scope = 'collective'` fast-paths and
        // the branch-local predicate inside `visibility_clause`.
        //
        // We deliberately do NOT assert the index is used for the full
        // visibility_clause OR-chain — SQLite's planner may (correctly)
        // choose a scan when the OR-chain has variable selectivity across
        // branches. The point of the index is to accelerate the common
        // case when a recall narrows to one scope; the multi-branch
        // visibility clause still benefits because each branch evaluates
        // the predicate against a single column rather than a JSON extract.
        let conn = test_db();
        // Seed enough rows + ANALYZE so planner cost model is honest.
        for i in 0..200 {
            let scope = if i % 3 == 0 { "collective" } else { "private" };
            let mut mem = make_memory(&format!("row-{i}"), "test", Tier::Long, 5);
            mem.metadata = serde_json::json!({"scope": scope});
            insert(&conn, &mem).unwrap();
        }
        conn.execute("ANALYZE", []).unwrap();
        let plan: Vec<String> = conn
            .prepare("EXPLAIN QUERY PLAN SELECT id FROM memories WHERE scope_idx = ?1")
            .unwrap()
            .query_map(params!["collective"], |row| row.get::<_, String>(3))
            .unwrap()
            .collect::<rusqlite::Result<_>>()
            .unwrap();
        let joined = plan.join("\n");
        assert!(
            joined.contains("idx_memories_scope_idx"),
            "direct scope filter must use idx_memories_scope_idx; got:\n{joined}"
        );
    }

    #[test]
    fn scope_idx_reflects_metadata_on_insert_and_update() {
        // v0.6.0 GA — the VIRTUAL generated column must track metadata.scope
        // across insert and update without manual maintenance.
        let conn = test_db();
        let mut mem = make_memory("scope-tracking", "test", Tier::Long, 5);
        mem.metadata = serde_json::json!({"scope": "team"});
        let id = insert(&conn, &mem).unwrap();
        let scope: String = conn
            .query_row(
                "SELECT scope_idx FROM memories WHERE id = ?1",
                params![id],
                |r| r.get(0),
            )
            .unwrap();
        assert_eq!(scope, "team");

        // Flip scope to unit via metadata update — generated column updates.
        let new_meta = serde_json::json!({"scope": "unit"});
        update(
            &conn,
            &id,
            None,
            None,
            None,
            None,
            None,
            None,
            None,
            None,
            Some(&new_meta),
        )
        .unwrap();
        let scope2: String = conn
            .query_row(
                "SELECT scope_idx FROM memories WHERE id = ?1",
                params![id],
                |r| r.get(0),
            )
            .unwrap();
        assert_eq!(scope2, "unit");

        // Memory with no scope key — virtual column returns the default.
        let mut bare = make_memory("no-scope-key", "test", Tier::Long, 5);
        bare.metadata = serde_json::json!({});
        let id2 = insert(&conn, &bare).unwrap();
        let scope3: String = conn
            .query_row(
                "SELECT scope_idx FROM memories WHERE id = ?1",
                params![id2],
                |r| r.get(0),
            )
            .unwrap();
        assert_eq!(scope3, "private");
    }

    #[test]
    fn auto_purge_archive_respects_max_days() {
        let conn = test_db();
        let mut mem = make_memory("Purge test", "test", Tier::Short, 5);
        mem.expires_at = Some("2020-01-01T00:00:00+00:00".to_string());
        insert(&conn, &mem).unwrap();
        gc(&conn, true).unwrap();

        // Archive exists
        let archived = list_archived(&conn, None, 10, 0).unwrap();
        assert_eq!(archived.len(), 1);

        // Backdate archived_at to 30 days ago so purge can detect it
        conn.execute(
            "UPDATE archived_memories SET archived_at = ?1",
            params![(chrono::Utc::now() - chrono::Duration::days(30)).to_rfc3339()],
        )
        .unwrap();

        // Purge with None (disabled) — no-op
        let purged = auto_purge_archive(&conn, None).unwrap();
        assert_eq!(purged, 0);
        assert_eq!(list_archived(&conn, None, 10, 0).unwrap().len(), 1);

        // Purge with 0 days — should NOT purge (guard condition)
        let purged = auto_purge_archive(&conn, Some(0)).unwrap();
        assert_eq!(purged, 0);

        // Purge with 90 days — archive is only 30 days old, should NOT purge
        let purged = auto_purge_archive(&conn, Some(90)).unwrap();
        assert_eq!(purged, 0);

        // Purge with 7 days — archive is 30 days old, should be purged
        let purged = auto_purge_archive(&conn, Some(7)).unwrap();
        assert_eq!(purged, 1);
        assert!(list_archived(&conn, None, 10, 0).unwrap().is_empty());
    }

    // ─────────────────────────────────────────────────────────────────
    // Schema v15 (v0.6.3 Stream B) — temporal-validity KG migration.
    // ─────────────────────────────────────────────────────────────────

    fn column_exists(conn: &Connection, table: &str, column: &str) -> bool {
        let mut stmt = conn
            .prepare(&format!("PRAGMA table_info({table})"))
            .unwrap();
        let cols: Vec<String> = stmt
            .query_map([], |row| row.get::<_, String>(1))
            .unwrap()
            .filter_map(Result::ok)
            .collect();
        cols.iter().any(|c| c == column)
    }

    fn index_exists(conn: &Connection, name: &str) -> bool {
        conn.query_row(
            "SELECT 1 FROM sqlite_master WHERE type='index' AND name=?1",
            params![name],
            |r| r.get::<_, i64>(0),
        )
        .is_ok()
    }

    #[test]
    fn schema_v15_memory_links_has_temporal_columns() {
        let conn = test_db();
        assert!(column_exists(&conn, "memory_links", "valid_from"));
        assert!(column_exists(&conn, "memory_links", "valid_until"));
        assert!(column_exists(&conn, "memory_links", "observed_by"));
        assert!(column_exists(&conn, "memory_links", "signature"));
    }

    #[test]
    fn schema_v15_memory_links_temporal_indexes_exist() {
        let conn = test_db();
        assert!(index_exists(&conn, "idx_links_temporal_src"));
        assert!(index_exists(&conn, "idx_links_temporal_tgt"));
        assert!(index_exists(&conn, "idx_links_relation"));
    }

    #[test]
    fn schema_v15_entity_aliases_table_exists() {
        let conn = test_db();
        let count: i64 = conn
            .query_row("SELECT COUNT(*) FROM entity_aliases", [], |r| r.get(0))
            .unwrap();
        assert_eq!(count, 0);
        assert!(index_exists(&conn, "idx_entity_aliases_alias"));
    }

    #[test]
    fn schema_v15_entity_aliases_primary_key_unique() {
        let conn = test_db();
        let now = chrono::Utc::now().to_rfc3339();
        conn.execute(
            "INSERT INTO entity_aliases (entity_id, alias, created_at) VALUES (?1, ?2, ?3)",
            params!["e1", "Alpha", &now],
        )
        .unwrap();
        let dup = conn.execute(
            "INSERT INTO entity_aliases (entity_id, alias, created_at) VALUES (?1, ?2, ?3)",
            params!["e1", "Alpha", &now],
        );
        assert!(dup.is_err(), "expected PK uniqueness violation");
    }

    // -- Pillar 2 / Stream B — entity_register / entity_get_by_alias ------

    #[test]
    fn entity_register_creates_new_entity_with_aliases() {
        let conn = test_db();
        let aliases = vec!["pa".to_string(), "Project A".to_string()];
        let reg = entity_register(
            &conn,
            "Project Alpha",
            "projects/alpha",
            &aliases,
            &serde_json::json!({}),
            Some("test-agent"),
        )
        .unwrap();
        assert!(reg.created, "first registration must be created=true");
        assert_eq!(reg.canonical_name, "Project Alpha");
        assert_eq!(reg.namespace, "projects/alpha");
        // Aliases inserted in one call share a created_at; the
        // secondary `alias ASC` sort orders by ASCII codepoint, so
        // uppercase 'P' (80) < lowercase 'p' (112). canonical_name is
        // auto-inserted as an alias so entity_get_by_alias resolves it.
        assert_eq!(
            reg.aliases,
            vec![
                "Project A".to_string(),
                "Project Alpha".to_string(),
                "pa".to_string()
            ]
        );

        let m = get(&conn, &reg.entity_id).unwrap().unwrap();
        assert_eq!(m.title, "Project Alpha");
        assert_eq!(m.tier.rank(), Tier::Long.rank());
        assert!(m.tags.contains(&"entity".to_string()));
        assert_eq!(m.metadata["kind"], "entity");
        assert_eq!(m.metadata["agent_id"], "test-agent");
    }

    #[test]
    fn entity_register_reuses_existing_and_merges_aliases() {
        let conn = test_db();
        let first = entity_register(
            &conn,
            "Project Alpha",
            "projects/alpha",
            &["pa".to_string()],
            &serde_json::json!({}),
            Some("a1"),
        )
        .unwrap();
        let second = entity_register(
            &conn,
            "Project Alpha",
            "projects/alpha",
            &["pa".to_string(), "alpha".to_string()],
            &serde_json::json!({}),
            Some("a2"),
        )
        .unwrap();
        assert!(first.created);
        assert!(!second.created, "second call must reuse the entity");
        assert_eq!(first.entity_id, second.entity_id);
        // First call inserted ["Project Alpha", "pa"] at ts1; second
        // call inserted "alpha" at ts2 (ts1 < ts2). Sort is created_at
        // ASC, alias ASC.
        assert_eq!(
            second.aliases,
            vec![
                "Project Alpha".to_string(),
                "pa".to_string(),
                "alpha".to_string()
            ]
        );
    }

    #[test]
    fn entity_register_errors_on_collision_with_non_entity_memory() {
        let conn = test_db();
        let mem = make_memory("Conflict", "projects/alpha", Tier::Long, 5);
        insert(&conn, &mem).unwrap();
        let err = entity_register(
            &conn,
            "Conflict",
            "projects/alpha",
            &[],
            &serde_json::json!({}),
            None,
        )
        .unwrap_err();
        let msg = format!("{err}");
        assert!(
            msg.contains("non-entity memory"),
            "expected collision error, got: {msg}"
        );
    }

    #[test]
    fn entity_register_skips_blank_aliases() {
        let conn = test_db();
        let reg = entity_register(
            &conn,
            "Trim Test",
            "test",
            &[String::new(), "   ".to_string(), "ok".to_string()],
            &serde_json::json!({}),
            None,
        )
        .unwrap();
        // canonical_name "Trim Test" auto-included; "T" (84) < "o" (111).
        assert_eq!(reg.aliases, vec!["Trim Test".to_string(), "ok".to_string()]);
    }

    #[test]
    fn entity_register_preserves_caller_metadata_keys() {
        let conn = test_db();
        let extra = serde_json::json!({"team": "platform", "kind": "ignored"});
        let reg = entity_register(&conn, "Service X", "svc", &[], &extra, None).unwrap();
        let m = get(&conn, &reg.entity_id).unwrap().unwrap();
        assert_eq!(m.metadata["team"], "platform");
        // Caller's `kind` is overwritten — entity records must always
        // carry kind=entity for the resolver to find them.
        assert_eq!(m.metadata["kind"], "entity");
    }

    #[test]
    fn entity_get_by_alias_returns_record_with_full_alias_set() {
        let conn = test_db();
        let reg = entity_register(
            &conn,
            "Project Alpha",
            "projects/alpha",
            &["pa".to_string(), "alpha".to_string()],
            &serde_json::json!({}),
            None,
        )
        .unwrap();
        let got = entity_get_by_alias(&conn, "pa", None).unwrap().unwrap();
        assert_eq!(got.entity_id, reg.entity_id);
        assert_eq!(got.canonical_name, "Project Alpha");
        assert_eq!(got.namespace, "projects/alpha");
        // Same-batch aliases share a created_at; alphabetical
        // tiebreak orders by ASCII codepoint: "Project Alpha" (P=80)
        // < "alpha" (a=97) < "pa" (p=112). canonical_name auto-included.
        assert_eq!(
            got.aliases,
            vec![
                "Project Alpha".to_string(),
                "alpha".to_string(),
                "pa".to_string()
            ]
        );
    }

    #[test]
    fn entity_register_canonical_name_resolves_via_get_by_alias() {
        // Regression test for NHI-P3-T2 (v0.7.0 NHI test playbook):
        // registering an entity with no aliases must still leave it
        // reachable via entity_get_by_alias("<canonical_name>") so the
        // alias-resolution pathway isn't dead-on-arrival when the
        // caller only knows the canonical name.
        let conn = test_db();
        let reg = entity_register(
            &conn,
            "OnlyCanonical",
            "test",
            &[],
            &serde_json::json!({}),
            None,
        )
        .unwrap();
        assert!(reg.created);
        assert_eq!(
            reg.aliases,
            vec!["OnlyCanonical".to_string()],
            "canonical_name must be auto-inserted as an alias"
        );
        let got = entity_get_by_alias(&conn, "OnlyCanonical", Some("test"))
            .unwrap()
            .expect("canonical_name must resolve via entity_get_by_alias");
        assert_eq!(got.entity_id, reg.entity_id);
        assert_eq!(got.canonical_name, "OnlyCanonical");
    }

    #[test]
    fn entity_get_by_alias_returns_none_for_unknown_alias() {
        let conn = test_db();
        let got = entity_get_by_alias(&conn, "missing", None).unwrap();
        assert!(got.is_none());
    }

    #[test]
    fn entity_get_by_alias_filters_by_namespace() {
        let conn = test_db();
        entity_register(
            &conn,
            "Acme",
            "ns_a",
            &["a".to_string()],
            &serde_json::json!({}),
            None,
        )
        .unwrap();
        entity_register(
            &conn,
            "Acme Corp",
            "ns_b",
            &["a".to_string()],
            &serde_json::json!({}),
            None,
        )
        .unwrap();
        let in_a = entity_get_by_alias(&conn, "a", Some("ns_a"))
            .unwrap()
            .unwrap();
        assert_eq!(in_a.namespace, "ns_a");
        assert_eq!(in_a.canonical_name, "Acme");
        let in_b = entity_get_by_alias(&conn, "a", Some("ns_b"))
            .unwrap()
            .unwrap();
        assert_eq!(in_b.namespace, "ns_b");
        assert_eq!(in_b.canonical_name, "Acme Corp");
    }

    #[test]
    fn entity_get_by_alias_without_namespace_picks_most_recent() {
        let conn = test_db();
        // Older entity created first.
        entity_register(
            &conn,
            "Older",
            "ns_old",
            &["dup".to_string()],
            &serde_json::json!({}),
            None,
        )
        .unwrap();
        // Sleep just enough to guarantee a strictly later created_at.
        std::thread::sleep(std::time::Duration::from_millis(5));
        entity_register(
            &conn,
            "Newer",
            "ns_new",
            &["dup".to_string()],
            &serde_json::json!({}),
            None,
        )
        .unwrap();
        let got = entity_get_by_alias(&conn, "dup", None).unwrap().unwrap();
        assert_eq!(got.canonical_name, "Newer");
        assert_eq!(got.namespace, "ns_new");
    }

    #[test]
    fn entity_get_by_alias_ignores_non_entity_memory_with_matching_alias() {
        let conn = test_db();
        // Insert a regular (non-entity) memory and a stray
        // entity_aliases row pointing at it. The resolver must skip
        // it because `kind != 'entity'`.
        let mut mem = make_memory("Decoy", "test", Tier::Long, 5);
        mem.metadata = serde_json::json!({});
        let mid = insert(&conn, &mem).unwrap();
        let now = chrono::Utc::now().to_rfc3339();
        conn.execute(
            "INSERT INTO entity_aliases (entity_id, alias, created_at) VALUES (?1, ?2, ?3)",
            params![&mid, "decoy", &now],
        )
        .unwrap();
        let got = entity_get_by_alias(&conn, "decoy", None).unwrap();
        assert!(got.is_none(), "non-entity memories must not resolve");
    }

    #[test]
    fn entity_register_idempotent_aliases_are_deduped() {
        let conn = test_db();
        let reg = entity_register(
            &conn,
            "Dedup",
            "test",
            &["x".to_string(), "x".to_string(), "y".to_string()],
            &serde_json::json!({}),
            None,
        )
        .unwrap();
        // INSERT OR IGNORE collapses the duplicate "x"; canonical
        // ("Dedup") auto-inserted as well, so 3 distinct aliases.
        assert_eq!(reg.aliases.len(), 3);
        assert!(reg.aliases.contains(&"Dedup".to_string()));
        assert!(reg.aliases.contains(&"x".to_string()));
        assert!(reg.aliases.contains(&"y".to_string()));
    }

    // -- Pillar 2 / Stream C — kg_timeline ---------------------------------

    /// Insert a link with an explicit `valid_from` so timeline tests can
    /// pin event ordering without relying on wall-clock spread.
    fn insert_link_at(
        conn: &Connection,
        source_id: &str,
        target_id: &str,
        relation: &str,
        valid_from: &str,
    ) {
        let now = chrono::Utc::now().to_rfc3339();
        conn.execute(
            "INSERT INTO memory_links (source_id, target_id, relation, created_at, valid_from) \
             VALUES (?1, ?2, ?3, ?4, ?5)",
            params![source_id, target_id, relation, now, valid_from],
        )
        .unwrap();
    }

    #[test]
    fn create_link_populates_valid_from_for_new_rows() {
        let conn = test_db();
        let src = make_memory("kg-src", "test", Tier::Long, 5);
        let tgt = make_memory("kg-tgt", "test", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &tgt).unwrap();
        create_link(&conn, &src.id, &tgt.id, "related_to").unwrap();
        let valid_from: Option<String> = conn
            .query_row(
                "SELECT valid_from FROM memory_links WHERE source_id = ?1",
                params![&src.id],
                |r| r.get(0),
            )
            .unwrap();
        assert!(
            valid_from.is_some(),
            "create_link must populate valid_from so kg_timeline can see new links"
        );
    }

    // v0.7 H2 — schema v23: `attest_level` column present + populated.
    #[test]
    fn schema_v23_memory_links_has_attest_level_column() {
        let conn = test_db();
        assert!(
            column_exists(&conn, "memory_links", "attest_level"),
            "v23 must add attest_level column to memory_links"
        );
    }

    // v0.7 H2 — no-keypair path: signature stays NULL, attest_level
    // is recorded as "unsigned". This is the v0.6.4 backward-compat
    // contract — operators that haven't generated a keypair keep the
    // pre-H2 behaviour.
    #[test]
    fn create_link_signed_without_keypair_is_unsigned() {
        let conn = test_db();
        let src = make_memory("h2-src-unsigned", "test", Tier::Long, 5);
        let tgt = make_memory("h2-tgt-unsigned", "test", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &tgt).unwrap();

        let level = create_link_signed(&conn, &src.id, &tgt.id, "related_to", None).unwrap();
        assert_eq!(level, "unsigned");

        let (sig, attest): (Option<Vec<u8>>, Option<String>) = conn
            .query_row(
                "SELECT signature, attest_level FROM memory_links \
                 WHERE source_id = ?1 AND target_id = ?2",
                params![&src.id, &tgt.id],
                |r| Ok((r.get(0)?, r.get(1)?)),
            )
            .unwrap();
        assert!(sig.is_none(), "no keypair → signature must be NULL");
        assert_eq!(attest.as_deref(), Some("unsigned"));
    }

    // v0.7 H2 — happy path: with an active keypair, every link write
    // gets a 64-byte Ed25519 signature in the `signature` column and
    // attest_level = "self_signed". The signature must verify against
    // the keypair's public key over the canonical CBOR payload.
    #[test]
    fn create_link_signed_with_keypair_persists_valid_signature() {
        use crate::identity::{keypair, sign as link_sign};
        use ed25519_dalek::Verifier;

        let conn = test_db();
        let src = make_memory("h2-src-signed", "test", Tier::Long, 5);
        let tgt = make_memory("h2-tgt-signed", "test", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &tgt).unwrap();

        let kp = keypair::generate("alice").unwrap();
        let level = create_link_signed(&conn, &src.id, &tgt.id, "supersedes", Some(&kp)).unwrap();
        assert_eq!(level, "self_signed");

        // Read back the persisted row and confirm the signature shape.
        let (sig, attest, valid_from): (Option<Vec<u8>>, Option<String>, Option<String>) = conn
            .query_row(
                "SELECT signature, attest_level, valid_from FROM memory_links \
                 WHERE source_id = ?1 AND target_id = ?2",
                params![&src.id, &tgt.id],
                |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
            )
            .unwrap();
        let sig_bytes = sig.expect("signature must be present when keypair is provided");
        assert_eq!(sig_bytes.len(), 64, "Ed25519 signature is 64 bytes");
        assert_eq!(attest.as_deref(), Some("self_signed"));
        let valid_from = valid_from.expect("valid_from must be set on the insert path");

        // Re-derive the canonical bytes the writer signed over and
        // verify with the keypair's public key. This is what H3's
        // inbound verifier will do on every received link.
        let signable = link_sign::SignableLink {
            src_id: &src.id,
            dst_id: &tgt.id,
            relation: "supersedes",
            observed_by: Some(kp.agent_id.as_str()),
            valid_from: Some(valid_from.as_str()),
            valid_until: None,
        };
        let payload = link_sign::canonical_cbor(&signable).unwrap();
        let mut sig_arr = [0u8; 64];
        sig_arr.copy_from_slice(&sig_bytes);
        let sig_obj = ed25519_dalek::Signature::from_bytes(&sig_arr);
        kp.public
            .verify(&payload, &sig_obj)
            .expect("persisted signature must verify against the writer's public key");
    }

    // v0.7.0 H6 (round-2) — regression: the SQLite write path must
    // truncate `valid_from` to microsecond precision BEFORE signing
    // and persisting, so the row a federation peer receives serialises
    // back to the same canonical RFC3339 string regardless of the
    // adapter that wrote it. We assert two properties:
    //
    // 1. The `valid_from` column NEVER contains a 9-digit fractional
    //    second (nanoseconds), only at most 6 digits (microseconds).
    // 2. The persisted signature verifies against canonical CBOR
    //    derived from the same microsecond-truncated string the row
    //    holds — i.e. the round-trip is byte-stable.
    #[test]
    fn h6_create_link_signed_truncates_valid_from_to_microseconds() {
        use crate::identity::{keypair, sign as link_sign};
        use ed25519_dalek::Verifier;

        let conn = test_db();
        let src = make_memory("h6-src", "test", Tier::Long, 5);
        let tgt = make_memory("h6-tgt", "test", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &tgt).unwrap();

        let kp = keypair::generate("alice").unwrap();
        let level = create_link_signed(&conn, &src.id, &tgt.id, "related_to", Some(&kp)).unwrap();
        assert_eq!(level, "self_signed");

        let (sig, valid_from): (Option<Vec<u8>>, Option<String>) = conn
            .query_row(
                "SELECT signature, valid_from FROM memory_links \
                 WHERE source_id = ?1 AND target_id = ?2",
                params![&src.id, &tgt.id],
                |r| Ok((r.get(0)?, r.get(1)?)),
            )
            .unwrap();
        let valid_from = valid_from.expect("valid_from set on signed insert path");

        // RFC3339 fractional-second precision check. The string looks
        // like `2026-05-10T12:34:56.123456+00:00` (microsecond) or
        // `...:56.123456789+00:00` (nanosecond). After H6, the maximum
        // length of the fractional run must be 6.
        if let Some(dot) = valid_from.find('.') {
            let after = &valid_from[dot + 1..];
            let frac_len = after.chars().take_while(|c| c.is_ascii_digit()).count();
            assert!(
                frac_len <= 6,
                "H6 regression: valid_from has {frac_len}-digit fractional second; expected ≤ 6 (microseconds). Value: {valid_from}"
            );
        }

        // Round-trip the signature against canonical CBOR computed
        // from the EXACT string stored in the row. If the writer
        // signed over a nanosecond-precision string but the column
        // round-trips at microsecond precision, this verify fails —
        // which is exactly the postgres-G3 failure mode SQLite is now
        // immunised against.
        let sig_bytes = sig.expect("signature persisted");
        let signable = link_sign::SignableLink {
            src_id: &src.id,
            dst_id: &tgt.id,
            relation: "related_to",
            observed_by: Some(kp.agent_id.as_str()),
            valid_from: Some(valid_from.as_str()),
            valid_until: None,
        };
        let payload = link_sign::canonical_cbor(&signable).unwrap();
        let mut sig_arr = [0u8; 64];
        sig_arr.copy_from_slice(&sig_bytes);
        let sig_obj = ed25519_dalek::Signature::from_bytes(&sig_arr);
        kp.public.verify(&payload, &sig_obj).expect(
            "H6 regression: signature must verify against canonical CBOR \
             derived from the stored (microsecond-truncated) valid_from",
        );
    }

    // v0.7.0 fix-campaign A3 (LINK-PARITY, #690) — the cycle check
    // refuses a `reflects_on` edge whose target already transitively
    // reflects back on the source. This is the storage-layer
    // invariant the HTTP / SAL / federation paths now share with the
    // MCP path.
    #[test]
    fn a3_validate_link_pre_create_refuses_reflection_cycle() {
        use crate::config::{
            PermissionsMode, lock_permissions_mode_for_test,
            override_active_permissions_mode_for_test,
        };
        // The active permissions mode is process-wide; hold the
        // serialisation guard so parallel lib tests cannot flip the
        // mode out from under us. See `pin_governance_enforce_for_test`
        // in handlers/mod.rs for the same pattern.
        let _gate = lock_permissions_mode_for_test();
        // Pin mode to Off so the K9 evaluator stays out of the way —
        // this test only exercises the cycle gate.
        override_active_permissions_mode_for_test(PermissionsMode::Off);

        let conn = test_db();
        let a = make_memory("a3-a", "ns", Tier::Long, 5);
        let b = make_memory("a3-b", "ns", Tier::Long, 5);
        let c = make_memory("a3-c", "ns", Tier::Long, 5);
        insert(&conn, &a).unwrap();
        insert(&conn, &b).unwrap();
        insert(&conn, &c).unwrap();

        // Build chain: a --reflects_on--> b --reflects_on--> c.
        create_link(&conn, &a.id, &b.id, "reflects_on").unwrap();
        create_link(&conn, &b.id, &c.id, "reflects_on").unwrap();

        // Attempting c --reflects_on--> a would close the cycle.
        let err = create_link(&conn, &c.id, &a.id, "reflects_on")
            .expect_err("cycle-closing reflects_on must be refused");
        let msg = err.to_string();
        assert!(
            msg.starts_with(LINK_CYCLE_ERR_PREFIX),
            "expected {LINK_CYCLE_ERR_PREFIX} prefix, got: {msg}"
        );

        // A `related_to` edge between the same pair is still allowed —
        // only `reflects_on` participates in the DAG invariant.
        create_link(&conn, &c.id, &a.id, "related_to")
            .expect("related_to is not gated by the cycle check");
    }

    // v0.7.0 fix-campaign A3 (LINK-PARITY, #690) — the K9 permission
    // pipeline gates link writes at the storage layer (not just at
    // the MCP entry point). A `Deny` rule on `memory_link` refuses
    // the write through `create_link` / `create_link_signed`.
    #[test]
    fn a3_validate_link_pre_create_respects_governance_deny() {
        use crate::config::{
            PermissionsMode, lock_permissions_mode_for_test,
            override_active_permissions_mode_for_test,
        };
        use crate::permissions::{
            PermissionRule, RuleDecision, clear_active_permission_rules_for_test,
            set_active_permission_rules,
        };
        let _gate = lock_permissions_mode_for_test();
        override_active_permissions_mode_for_test(PermissionsMode::Enforce);
        clear_active_permission_rules_for_test();
        set_active_permission_rules(vec![PermissionRule {
            namespace_pattern: "a3-deny/**".to_string(),
            op: "memory_link".to_string(),
            agent_pattern: "*".to_string(),
            decision: RuleDecision::Deny,
            reason: Some("test: link denied by a3 rule".to_string()),
        }]);

        let conn = test_db();
        let s = make_memory("a3-src", "a3-deny/scope", Tier::Long, 5);
        let t = make_memory("a3-tgt", "a3-deny/scope", Tier::Long, 5);
        insert(&conn, &s).unwrap();
        insert(&conn, &t).unwrap();

        let err = create_link(&conn, &s.id, &t.id, "related_to")
            .expect_err("a Deny rule must refuse the link write");
        let msg = err.to_string();
        assert!(
            msg.starts_with(LINK_PERMISSION_DENIED_ERR_PREFIX),
            "expected {LINK_PERMISSION_DENIED_ERR_PREFIX} prefix, got: {msg}"
        );

        // Cleanup so the global registry does not leak into other tests
        // running in the same process.
        clear_active_permission_rules_for_test();
        override_active_permissions_mode_for_test(PermissionsMode::Advisory);
    }

    // v0.7.0 fix-campaign A3 (LINK-PARITY, #690) — federation receive
    // path: peer-attested inbound links bypass the K9 governance
    // gate (the peer is trusted by mTLS + Ed25519 attestation), but
    // the cycle check ALWAYS runs even on peer writes.
    #[test]
    fn a3_create_link_inbound_peer_attested_bypasses_governance() {
        use crate::config::{
            PermissionsMode, lock_permissions_mode_for_test,
            override_active_permissions_mode_for_test,
        };
        use crate::permissions::{
            PermissionRule, RuleDecision, clear_active_permission_rules_for_test,
            set_active_permission_rules,
        };
        let _gate = lock_permissions_mode_for_test();
        override_active_permissions_mode_for_test(PermissionsMode::Enforce);
        clear_active_permission_rules_for_test();
        set_active_permission_rules(vec![PermissionRule {
            namespace_pattern: "**".to_string(),
            op: "memory_link".to_string(),
            agent_pattern: "*".to_string(),
            decision: RuleDecision::Deny,
            reason: Some("test: every link denied".to_string()),
        }]);

        let conn = test_db();
        let s = make_memory("inbound-src", "a3-fed", Tier::Long, 5);
        let t = make_memory("inbound-tgt", "a3-fed", Tier::Long, 5);
        insert(&conn, &s).unwrap();
        insert(&conn, &t).unwrap();

        // v0.7.0 issue #810 / #813 — the CHECK trigger on memory_links
        // refuses any peer_attested row whose signature blob is NULL /
        // wrong-length. The pre-#810 test passed a NULL signature here
        // because the legacy invariant did not police that pairing;
        // now we synthesise a 64-byte fake signature blob so the row
        // satisfies the trigger's WHEN clause. The K9-bypass property
        // under test is orthogonal to whether the signature bytes
        // actually verify (verification is `memory_verify`'s job, not
        // this insertion path's).
        let link = MemoryLink {
            source_id: s.id.clone(),
            target_id: t.id.clone(),
            relation: crate::models::MemoryLinkRelation::RelatedTo,
            created_at: chrono::Utc::now().to_rfc3339(),
            valid_from: None,
            valid_until: None,
            observed_by: Some("peer:remote".to_string()),
            signature: Some(vec![0xAB_u8; 64]),
            attest_level: None,
        };

        // Peer-attested inbound bypasses the K9 deny.
        create_link_inbound(&conn, &link, "peer_attested")
            .expect("peer_attested must bypass K9 governance");

        // But an unsigned inbound link is still gated locally.
        let link2 = MemoryLink {
            source_id: t.id.clone(),
            target_id: s.id.clone(),
            relation: crate::models::MemoryLinkRelation::RelatedTo,
            created_at: chrono::Utc::now().to_rfc3339(),
            valid_from: None,
            valid_until: None,
            observed_by: Some("peer:remote".to_string()),
            signature: None,
            attest_level: None,
        };
        let err = create_link_inbound(&conn, &link2, "unsigned")
            .expect_err("unsigned inbound must NOT bypass governance");
        assert!(
            err.to_string()
                .starts_with(LINK_PERMISSION_DENIED_ERR_PREFIX)
        );

        clear_active_permission_rules_for_test();
        override_active_permissions_mode_for_test(PermissionsMode::Advisory);
    }

    // v0.7.0 fix-campaign A3 (LINK-PARITY, #690) — even a trusted
    // peer cannot extend a `reflects_on` cycle on the receiver. The
    // cycle gate runs regardless of attest_level.
    #[test]
    fn a3_create_link_inbound_peer_attested_still_refuses_cycle() {
        use crate::config::{
            PermissionsMode, lock_permissions_mode_for_test,
            override_active_permissions_mode_for_test,
        };
        let _gate = lock_permissions_mode_for_test();
        override_active_permissions_mode_for_test(PermissionsMode::Off);

        let conn = test_db();
        let a = make_memory("inbound-cycle-a", "ns", Tier::Long, 5);
        let b = make_memory("inbound-cycle-b", "ns", Tier::Long, 5);
        insert(&conn, &a).unwrap();
        insert(&conn, &b).unwrap();
        create_link(&conn, &a.id, &b.id, "reflects_on").unwrap();

        let cycle_link = MemoryLink {
            source_id: b.id.clone(),
            target_id: a.id.clone(),
            relation: crate::models::MemoryLinkRelation::ReflectsOn,
            created_at: chrono::Utc::now().to_rfc3339(),
            valid_from: None,
            valid_until: None,
            observed_by: Some("peer:remote".to_string()),
            signature: None,
            attest_level: None,
        };
        let err = create_link_inbound(&conn, &cycle_link, "peer_attested")
            .expect_err("cycle check must run even on peer_attested inbound");
        assert!(err.to_string().starts_with(LINK_CYCLE_ERR_PREFIX));
    }

    // v0.7.0 H6 (round-2) — pure-function test: the truncation helper
    // itself must collapse only sub-microsecond digits and leave
    // microsecond-aligned inputs unchanged.
    #[test]
    fn h6_truncate_to_microseconds_drops_nanos() {
        use chrono::{TimeZone, Timelike};
        let ns = Utc.with_ymd_and_hms(2026, 5, 10, 12, 34, 56).unwrap();
        let ns = ns.with_nanosecond(123_456_789).unwrap();
        let truncated = truncate_to_microseconds(ns);
        // 123_456_789 ns → 123_456 µs → 123_456_000 ns.
        assert_eq!(truncated.nanosecond(), 123_456_000);
        // Round-trip through to_rfc3339 must produce a 6-digit
        // fractional second (the property H6 commits to).
        let s = truncated.to_rfc3339();
        let dot = s.find('.').expect("fractional second present");
        let frac = &s[dot + 1..];
        let frac_len = frac.chars().take_while(|c| c.is_ascii_digit()).count();
        assert_eq!(frac_len, 6, "expected exactly 6-digit fractional; got: {s}");
    }

    #[test]
    fn kg_timeline_returns_events_ordered_by_valid_from_ascending() {
        let conn = test_db();
        let src = make_memory("alpha", "kg/projects/alpha", Tier::Long, 5);
        let s1 = make_memory("kickoff", "kg/projects/alpha", Tier::Long, 5);
        let s2 = make_memory("design phase", "kg/projects/alpha", Tier::Long, 5);
        let s3 = make_memory("implementation", "kg/projects/alpha", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &s1).unwrap();
        insert(&conn, &s2).unwrap();
        insert(&conn, &s3).unwrap();

        // Insert in a deliberately-shuffled order so ORDER BY isn't
        // a happy accident of insertion order.
        insert_link_at(
            &conn,
            &src.id,
            &s2.id,
            "supersedes",
            "2026-02-03T00:00:00+00:00",
        );
        insert_link_at(
            &conn,
            &src.id,
            &s1.id,
            "related_to",
            "2026-01-15T00:00:00+00:00",
        );
        insert_link_at(
            &conn,
            &src.id,
            &s3.id,
            "supersedes",
            "2026-03-22T00:00:00+00:00",
        );

        let events = kg_timeline(&conn, &src.id, None, None, None).unwrap();
        assert_eq!(events.len(), 3);
        assert_eq!(events[0].target_id, s1.id);
        assert_eq!(events[1].target_id, s2.id);
        assert_eq!(events[2].target_id, s3.id);
        assert_eq!(events[0].title, "kickoff");
        assert_eq!(events[1].relation, "supersedes");
        assert_eq!(events[0].target_namespace, "kg/projects/alpha");
    }

    #[test]
    fn kg_timeline_filters_by_since_inclusive() {
        let conn = test_db();
        let src = make_memory("e", "ns", Tier::Long, 5);
        let t1 = make_memory("e1", "ns", Tier::Long, 5);
        let t2 = make_memory("e2", "ns", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &t1).unwrap();
        insert(&conn, &t2).unwrap();
        insert_link_at(
            &conn,
            &src.id,
            &t1.id,
            "related_to",
            "2026-01-01T00:00:00+00:00",
        );
        insert_link_at(
            &conn,
            &src.id,
            &t2.id,
            "related_to",
            "2026-03-01T00:00:00+00:00",
        );

        let events = kg_timeline(
            &conn,
            &src.id,
            Some("2026-02-01T00:00:00+00:00"),
            None,
            None,
        )
        .unwrap();
        assert_eq!(events.len(), 1);
        assert_eq!(events[0].target_id, t2.id);

        // Boundary: since == valid_from should match (inclusive).
        let on_boundary = kg_timeline(
            &conn,
            &src.id,
            Some("2026-03-01T00:00:00+00:00"),
            None,
            None,
        )
        .unwrap();
        assert_eq!(on_boundary.len(), 1);
    }

    #[test]
    fn kg_timeline_filters_by_until_inclusive() {
        let conn = test_db();
        let src = make_memory("e", "ns", Tier::Long, 5);
        let t1 = make_memory("e1", "ns", Tier::Long, 5);
        let t2 = make_memory("e2", "ns", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &t1).unwrap();
        insert(&conn, &t2).unwrap();
        insert_link_at(
            &conn,
            &src.id,
            &t1.id,
            "related_to",
            "2026-01-01T00:00:00+00:00",
        );
        insert_link_at(
            &conn,
            &src.id,
            &t2.id,
            "related_to",
            "2026-03-01T00:00:00+00:00",
        );

        let events = kg_timeline(
            &conn,
            &src.id,
            None,
            Some("2026-02-01T00:00:00+00:00"),
            None,
        )
        .unwrap();
        assert_eq!(events.len(), 1);
        assert_eq!(events[0].target_id, t1.id);
    }

    #[test]
    fn kg_timeline_skips_links_with_null_valid_from() {
        let conn = test_db();
        let src = make_memory("s", "ns", Tier::Long, 5);
        let t1 = make_memory("t1", "ns", Tier::Long, 5);
        let t2 = make_memory("t2", "ns", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &t1).unwrap();
        insert(&conn, &t2).unwrap();
        // Direct insert with NULL valid_from to simulate an external
        // writer that bypassed `create_link`.
        let now = chrono::Utc::now().to_rfc3339();
        // v0.7.0 fix campaign R1-M2 — direct-SQL writer must use a
        // value in the closed-set; the trigger now refuses 'rel'.
        conn.execute(
            "INSERT INTO memory_links (source_id, target_id, relation, created_at, valid_from) \
             VALUES (?1, ?2, 'related_to', ?3, NULL)",
            params![&src.id, &t1.id, &now],
        )
        .unwrap();
        insert_link_at(
            &conn,
            &src.id,
            &t2.id,
            "supersedes",
            "2026-01-01T00:00:00+00:00",
        );

        let events = kg_timeline(&conn, &src.id, None, None, None).unwrap();
        assert_eq!(events.len(), 1);
        assert_eq!(events[0].target_id, t2.id);
    }

    #[test]
    fn kg_timeline_excludes_links_where_source_is_target() {
        // The query is anchored on `source_id`; inbound edges (where the
        // entity is the target) are intentionally NOT part of the
        // timeline. This guards against accidentally widening the
        // contract to a bidirectional view.
        let conn = test_db();
        let entity = make_memory("entity", "ns", Tier::Long, 5);
        let other = make_memory("other", "ns", Tier::Long, 5);
        insert(&conn, &entity).unwrap();
        insert(&conn, &other).unwrap();
        insert_link_at(
            &conn,
            &other.id,
            &entity.id,
            "related_to",
            "2026-01-01T00:00:00+00:00",
        );
        let events = kg_timeline(&conn, &entity.id, None, None, None).unwrap();
        assert!(events.is_empty());
    }

    #[test]
    fn kg_timeline_limit_clamped_to_max() {
        let conn = test_db();
        let src = make_memory("s", "ns", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        for i in 0..5 {
            let t = make_memory(&format!("t{i}"), "ns", Tier::Long, 5);
            insert(&conn, &t).unwrap();
            insert_link_at(
                &conn,
                &src.id,
                &t.id,
                "related_to",
                &format!("2026-01-0{}T00:00:00+00:00", i + 1),
            );
        }
        // Caller passes a wildly oversized limit — should be clamped
        // to KG_TIMELINE_MAX_LIMIT (i.e. accepted, not errored), and
        // since the row count is small, should return all 5.
        let events = kg_timeline(&conn, &src.id, None, None, Some(usize::MAX)).unwrap();
        assert_eq!(events.len(), 5);

        // Caller passes 0 — clamp to 1.
        let one = kg_timeline(&conn, &src.id, None, None, Some(0)).unwrap();
        assert_eq!(one.len(), 1);
    }

    #[test]
    fn kg_timeline_carries_observed_by_and_valid_until() {
        let conn = test_db();
        let src = make_memory("s", "ns", Tier::Long, 5);
        let t = make_memory("t", "ns", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &t).unwrap();
        let now = chrono::Utc::now().to_rfc3339();
        conn.execute(
            "INSERT INTO memory_links (source_id, target_id, relation, created_at, valid_from, valid_until, observed_by) \
             VALUES (?1, ?2, 'supersedes', ?3, '2026-01-01T00:00:00+00:00', '2026-12-31T23:59:59+00:00', 'agent-pm-1')",
            params![&src.id, &t.id, &now],
        )
        .unwrap();
        let events = kg_timeline(&conn, &src.id, None, None, None).unwrap();
        assert_eq!(events.len(), 1);
        assert_eq!(events[0].observed_by.as_deref(), Some("agent-pm-1"));
        assert_eq!(
            events[0].valid_until.as_deref(),
            Some("2026-12-31T23:59:59+00:00")
        );
    }

    #[test]
    fn kg_timeline_empty_for_unknown_source() {
        let conn = test_db();
        let events = kg_timeline(&conn, "nonexistent-id", None, None, None).unwrap();
        assert!(events.is_empty());
    }

    // -- Pillar 2 / Stream C — kg_invalidate -------------------------------

    #[test]
    fn invalidate_link_sets_valid_until_to_provided_timestamp() {
        let conn = test_db();
        let src = make_memory("inv-s", "test", Tier::Long, 5);
        let tgt = make_memory("inv-t", "test", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &tgt).unwrap();
        create_link(&conn, &src.id, &tgt.id, "related_to").unwrap();
        let stamp = "2026-12-31T23:59:59+00:00";
        let res = invalidate_link(&conn, &src.id, &tgt.id, "related_to", Some(stamp))
            .unwrap()
            .expect("link must exist");
        assert_eq!(res.valid_until, stamp);
        assert!(res.previous_valid_until.is_none());
        let stored: Option<String> = conn
            .query_row(
                "SELECT valid_until FROM memory_links \
                 WHERE source_id = ?1 AND target_id = ?2 AND relation = ?3",
                params![&src.id, &tgt.id, "related_to"],
                |r| r.get(0),
            )
            .unwrap();
        assert_eq!(stored.as_deref(), Some(stamp));
    }

    #[test]
    fn invalidate_link_defaults_to_now_when_no_timestamp_provided() {
        let conn = test_db();
        let src = make_memory("inv-s", "test", Tier::Long, 5);
        let tgt = make_memory("inv-t", "test", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &tgt).unwrap();
        create_link(&conn, &src.id, &tgt.id, "related_to").unwrap();
        let res = invalidate_link(&conn, &src.id, &tgt.id, "related_to", None)
            .unwrap()
            .expect("link must exist");
        // The default is wall-clock now; assert it parses as RFC3339 and
        // is within a small window of the test's "now" (allow 60s skew
        // to accommodate slow runners).
        let parsed = chrono::DateTime::parse_from_rfc3339(&res.valid_until)
            .expect("default valid_until must be RFC3339");
        let now = chrono::Utc::now();
        let drift = now.signed_duration_since(parsed.with_timezone(&chrono::Utc));
        assert!(
            drift.num_seconds().abs() < 60,
            "default valid_until {} should be near now {now}",
            res.valid_until
        );
    }

    #[test]
    fn invalidate_link_returns_none_for_unknown_triple() {
        let conn = test_db();
        // No memories or links created.
        let res = invalidate_link(&conn, "missing-src", "missing-tgt", "related_to", None).unwrap();
        assert!(res.is_none());
    }

    #[test]
    fn invalidate_link_returns_none_when_relation_does_not_match() {
        // Link exists for ("related_to") but caller asks for ("supersedes").
        let conn = test_db();
        let src = make_memory("inv-s", "test", Tier::Long, 5);
        let tgt = make_memory("inv-t", "test", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &tgt).unwrap();
        create_link(&conn, &src.id, &tgt.id, "related_to").unwrap();
        let res = invalidate_link(&conn, &src.id, &tgt.id, "supersedes", None).unwrap();
        assert!(res.is_none(), "must not match across relation values");
    }

    #[test]
    fn invalidate_link_overwrites_existing_valid_until_and_reports_prior() {
        let conn = test_db();
        let src = make_memory("inv-s", "test", Tier::Long, 5);
        let tgt = make_memory("inv-t", "test", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &tgt).unwrap();
        create_link(&conn, &src.id, &tgt.id, "related_to").unwrap();
        let first = "2026-06-01T00:00:00+00:00";
        let second = "2026-12-01T00:00:00+00:00";
        let r1 = invalidate_link(&conn, &src.id, &tgt.id, "related_to", Some(first))
            .unwrap()
            .unwrap();
        assert!(r1.previous_valid_until.is_none());
        let r2 = invalidate_link(&conn, &src.id, &tgt.id, "related_to", Some(second))
            .unwrap()
            .unwrap();
        assert_eq!(r2.previous_valid_until.as_deref(), Some(first));
        assert_eq!(r2.valid_until, second);
    }

    #[test]
    fn invalidate_link_distinguishes_relation_when_multiple_links_share_endpoints() {
        // Two links between the same pair, different relations. Invalidating
        // one must not affect the other.
        let conn = test_db();
        let src = make_memory("inv-s", "test", Tier::Long, 5);
        let tgt = make_memory("inv-t", "test", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &tgt).unwrap();
        create_link(&conn, &src.id, &tgt.id, "related_to").unwrap();
        create_link(&conn, &src.id, &tgt.id, "supersedes").unwrap();
        let stamp = "2026-07-15T12:00:00+00:00";
        invalidate_link(&conn, &src.id, &tgt.id, "related_to", Some(stamp))
            .unwrap()
            .unwrap();
        let related: Option<String> = conn
            .query_row(
                "SELECT valid_until FROM memory_links \
                 WHERE source_id = ?1 AND target_id = ?2 AND relation = 'related_to'",
                params![&src.id, &tgt.id],
                |r| r.get(0),
            )
            .unwrap();
        let supers: Option<String> = conn
            .query_row(
                "SELECT valid_until FROM memory_links \
                 WHERE source_id = ?1 AND target_id = ?2 AND relation = 'supersedes'",
                params![&src.id, &tgt.id],
                |r| r.get(0),
            )
            .unwrap();
        assert_eq!(related.as_deref(), Some(stamp));
        assert!(
            supers.is_none(),
            "the sibling 'supersedes' link must remain valid"
        );
    }

    #[test]
    fn invalidate_link_preserves_other_columns() {
        // valid_from, observed_by, created_at, signature must not be
        // touched by the invalidate UPDATE.
        let conn = test_db();
        let src = make_memory("inv-s", "test", Tier::Long, 5);
        let tgt = make_memory("inv-t", "test", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &tgt).unwrap();
        let now = chrono::Utc::now().to_rfc3339();
        conn.execute(
            "INSERT INTO memory_links \
             (source_id, target_id, relation, created_at, valid_from, observed_by) \
             VALUES (?1, ?2, 'related_to', ?3, '2026-01-01T00:00:00+00:00', 'agent-x')",
            params![&src.id, &tgt.id, &now],
        )
        .unwrap();
        invalidate_link(
            &conn,
            &src.id,
            &tgt.id,
            "related_to",
            Some("2026-12-31T23:59:59+00:00"),
        )
        .unwrap()
        .unwrap();
        let (vf, ob, ca): (Option<String>, Option<String>, String) = conn
            .query_row(
                "SELECT valid_from, observed_by, created_at FROM memory_links \
                 WHERE source_id = ?1 AND target_id = ?2 AND relation = 'related_to'",
                params![&src.id, &tgt.id],
                |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
            )
            .unwrap();
        assert_eq!(vf.as_deref(), Some("2026-01-01T00:00:00+00:00"));
        assert_eq!(ob.as_deref(), Some("agent-x"));
        assert_eq!(ca, now);
    }

    #[test]
    fn kg_query_default_excludes_invalidated_edges() {
        // NHI-P3-T7 regression: prior versions returned invalidated
        // edges in default kg_query results. The "current view" filter
        // must exclude any edge whose `valid_until` lies in the past.
        let conn = test_db();
        let src = make_memory("inv-src", "ns", Tier::Long, 5);
        let live = make_memory("inv-live", "ns", Tier::Long, 5);
        let dead = make_memory("inv-dead", "ns", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &live).unwrap();
        insert(&conn, &dead).unwrap();
        // Live edge — no valid_until.
        insert_link_full(&conn, &src.id, &live.id, "related_to", None, None, None);
        // Dead edge — valid_until set in the past.
        insert_link_full(
            &conn,
            &src.id,
            &dead.id,
            "supersedes",
            None,
            Some("2020-01-01T00:00:00+00:00"),
            None,
        );

        // Default ("current view"): only the live edge shows up.
        let current = kg_query(&conn, &src.id, 1, None, None, None, false).unwrap();
        assert_eq!(current.len(), 1);
        assert_eq!(current[0].target_id, live.id);

        // Opt-in: include_invalidated=true returns both edges.
        let full = kg_query(&conn, &src.id, 1, None, None, None, true).unwrap();
        assert_eq!(full.len(), 2);
    }

    #[test]
    fn default_for_managed_namespace_helper_yields_write_owner() {
        // NHI-P4-T19 (v0.7.0 NHI testing): the
        // `GovernancePolicy::default_for_managed_namespace` helper
        // exists so operators can opt into K9 namespace-lock semantics
        // by writing the policy into their standard memory's metadata.
        // Changing the implicit fallback in `read_namespace_policy`
        // is deferred to v0.7.1 because it would break inheritance
        // chains where parent and child standards were registered
        // under distinct agent identities. Tests ensures the helper
        // returns the documented shape.
        let policy = crate::models::GovernancePolicy::default_for_managed_namespace();
        assert_eq!(policy.core.write, crate::models::GovernanceLevel::Owner);
        assert_eq!(policy.core.promote, crate::models::GovernanceLevel::Any);
        assert_eq!(policy.core.delete, crate::models::GovernanceLevel::Owner);
        assert!(policy.core.inherit);
    }

    #[test]
    fn namespace_set_standard_with_explicit_owner_policy_enforces_lock() {
        // NHI-P4-T19 regression: when the operator explicitly writes
        // `governance.write=owner` into the standard memory's
        // metadata, the namespace lock is enforced. This is the
        // opt-in path the v0.7.0 verdict recommends documenting; the
        // helper `default_for_managed_namespace` is the canonical
        // shape.
        let conn = test_db();
        let mut standard = make_memory("std", "ns/locked", Tier::Long, 8);
        let policy =
            serde_json::to_value(crate::models::GovernancePolicy::default_for_managed_namespace())
                .unwrap();
        standard.metadata = serde_json::json!({"governance": policy});
        let standard_id = insert(&conn, &standard).unwrap();
        set_namespace_standard(&conn, "ns/locked", &standard_id, None).unwrap();

        let resolved = resolve_governance_policy(&conn, "ns/locked")
            .expect("policy must resolve when explicitly set");
        assert_eq!(resolved.core.write, crate::models::GovernanceLevel::Owner);
    }

    /// F1 regression (v0.7.0 round-2-fixes): when a parent namespace
    /// has `governance.write = owner` with `inherit: true` and a deep
    /// child has no standard of its own, the owner-level check must
    /// resolve the namespace owner by walking the same chain that
    /// `resolve_governance_policy` walks. Pre-fix the helper looked
    /// only at the leaf's standard, returning None and producing a
    /// "no resolvable owner" Deny even for the rightful owner.
    #[test]
    fn enforce_governance_inherits_owner_for_deep_child_owner_write() {
        use crate::config::{
            PermissionsMode, lock_permissions_mode_for_test,
            override_active_permissions_mode_for_test,
        };
        use crate::models::{
            ApproverType, CorePolicy, GovernanceDecision, GovernanceLevel, GovernancePolicy,
            GovernedAction, default_metadata,
        };

        let _gate = lock_permissions_mode_for_test();
        override_active_permissions_mode_for_test(PermissionsMode::Enforce);

        let conn = test_db();

        // Seed a parent standard that enforces write=owner with inherit=true.
        let parent_ns = "f1/parent";
        let owner = "ai:alice";
        let policy = GovernancePolicy {
            core: CorePolicy {
                write: GovernanceLevel::Owner,
                promote: GovernanceLevel::Any,
                delete: GovernanceLevel::Owner,
                approver: ApproverType::Human,
                inherit: true,
                max_reflection_depth: None,
            },
            ..Default::default()
        };

        let now = chrono::Utc::now().to_rfc3339();
        let mut metadata = default_metadata();
        if let Some(obj) = metadata.as_object_mut() {
            obj.insert(
                "agent_id".to_string(),
                serde_json::Value::String(owner.to_string()),
            );
            obj.insert(
                "governance".to_string(),
                serde_json::to_value(&policy).unwrap(),
            );
        }
        let standard = Memory {
            id: uuid::Uuid::new_v4().to_string(),
            tier: Tier::Long,
            namespace: format!("_standards-{parent_ns}"),
            title: "f1-standard".to_string(),
            content: "f1 policy".to_string(),
            tags: vec![],
            priority: 9,
            confidence: 1.0,
            source: "test".to_string(),
            access_count: 0,
            created_at: now.clone(),
            updated_at: now,
            last_accessed_at: None,
            expires_at: None,
            metadata,
            reflection_depth: 0,
            memory_kind: crate::models::MemoryKind::Observation,
            entity_id: None,
            persona_version: None,
            citations: Vec::new(),
            source_uri: None,
            source_span: None,
            confidence_source: ConfidenceSource::CallerProvided,
            confidence_signals: None,
            confidence_decayed_at: None,
            version: 1,
        };
        let standard_id = insert(&conn, &standard).unwrap();
        set_namespace_standard(&conn, parent_ns, &standard_id, None).unwrap();

        // Deep child has NO standard of its own; everything must
        // resolve via the chain walk.
        let child_ns = "f1/parent/a/b/c";
        let payload = serde_json::json!({"title": "deep-child"});

        // Owner-level write by the rightful owner: ALLOW.
        let allow = enforce_governance(
            &conn,
            GovernedAction::Store,
            child_ns,
            owner,
            None,
            None,
            &payload,
        )
        .expect("enforce_governance must not error on inherited owner policy");
        assert!(
            matches!(allow, GovernanceDecision::Allow),
            "owner write at deep child must Allow when chain walk finds the parent's owner: got {allow:?}"
        );

        // Owner-level write by a non-owner: DENY.
        let deny = enforce_governance(
            &conn,
            GovernedAction::Store,
            child_ns,
            "ai:eve",
            None,
            None,
            &payload,
        )
        .expect("enforce_governance must not error");
        match deny {
            GovernanceDecision::Deny(refusal) => {
                assert!(
                    refusal.reason.contains("not the owner"),
                    "non-owner deny should cite ownership mismatch, got: {refusal:?}"
                );
                assert_eq!(
                    refusal.denied_level,
                    GovernanceLevel::Owner,
                    "owner-level refusal must carry GovernanceLevel::Owner; got {refusal:?}",
                );
            }
            other => panic!("expected Deny for non-owner, got {other:?}"),
        }
    }

    /// F1 corollary: `inherit = false` on the parent must STOP the
    /// chain walk at the parent. The deep child has no policy of its
    /// own and the parent declines to share, so the action is
    /// ungoverned (Allow).
    ///
    /// Note: under `resolve_governance_policy` semantics, the
    /// `inherit` flag is documentation/contract — the leaf-first walk
    /// stops at the most-specific policy regardless. The flag flows
    /// through to consumers (e.g. pending_action approver resolution)
    /// to signal "do not re-walk above me." This test pins the
    /// observable outcome: a deep child with NO standard inherits a
    /// parent policy regardless of the `inherit` flag value, because
    /// the walk only stops at policies that exist. The flag's
    /// "stop" semantics apply when an intermediate policy declines to
    /// be inherited above itself, not below.
    #[test]
    fn enforce_governance_deep_child_with_inherit_false_still_resolves_via_walk() {
        use crate::config::{
            PermissionsMode, lock_permissions_mode_for_test,
            override_active_permissions_mode_for_test,
        };
        use crate::models::{
            ApproverType, CorePolicy, GovernanceDecision, GovernanceLevel, GovernancePolicy,
            GovernedAction, default_metadata,
        };

        let _gate = lock_permissions_mode_for_test();
        override_active_permissions_mode_for_test(PermissionsMode::Enforce);

        let conn = test_db();

        // Parent has inherit=false: descendants without a policy of
        // their own should still resolve to this policy on the
        // leaf-first walk; inherit=false is a forward-blocker
        // ("nothing above me applies to namespaces I govern"), not a
        // backward-blocker ("namespaces below me cannot inherit").
        // This matches the documented semantics in
        // `resolve_governance_policy`'s docstring.
        let parent_ns = "f1nb/parent";
        let owner = "ai:alice";
        let policy = GovernancePolicy {
            core: CorePolicy {
                write: GovernanceLevel::Owner,
                promote: GovernanceLevel::Any,
                delete: GovernanceLevel::Owner,
                approver: ApproverType::Human,
                inherit: false,
                max_reflection_depth: None,
            },
            ..Default::default()
        };
        let now = chrono::Utc::now().to_rfc3339();
        let mut metadata = default_metadata();
        if let Some(obj) = metadata.as_object_mut() {
            obj.insert(
                "agent_id".to_string(),
                serde_json::Value::String(owner.to_string()),
            );
            obj.insert(
                "governance".to_string(),
                serde_json::to_value(&policy).unwrap(),
            );
        }
        let standard = Memory {
            id: uuid::Uuid::new_v4().to_string(),
            tier: Tier::Long,
            namespace: format!("_standards-{parent_ns}"),
            title: "f1nb-standard".to_string(),
            content: "policy".to_string(),
            tags: vec![],
            priority: 9,
            confidence: 1.0,
            source: "test".to_string(),
            access_count: 0,
            created_at: now.clone(),
            updated_at: now,
            last_accessed_at: None,
            expires_at: None,
            metadata,
            reflection_depth: 0,
            memory_kind: crate::models::MemoryKind::Observation,
            entity_id: None,
            persona_version: None,
            citations: Vec::new(),
            source_uri: None,
            source_span: None,
            confidence_source: ConfidenceSource::CallerProvided,
            confidence_signals: None,
            confidence_decayed_at: None,
            version: 1,
        };
        let standard_id = insert(&conn, &standard).unwrap();
        set_namespace_standard(&conn, parent_ns, &standard_id, None).unwrap();

        // Deep child write by owner is still Allow (chain walk finds
        // parent owner; inherit=false on the parent does not block
        // descendants).
        let decision = enforce_governance(
            &conn,
            GovernedAction::Store,
            "f1nb/parent/x/y",
            owner,
            None,
            None,
            &serde_json::json!({}),
        )
        .unwrap();
        assert!(
            matches!(decision, GovernanceDecision::Allow),
            "owner write at deep child resolves via leaf-first walk: got {decision:?}"
        );
    }

    #[test]
    fn find_paths_default_excludes_invalidated_edges() {
        // NHI-P3-T7 regression: find_paths must skip edges whose
        // valid_until lies in the past unless the caller asks for the
        // full historical link graph.
        let conn = test_db();
        let a = make_memory("fp-a", "ns", Tier::Long, 5);
        let b = make_memory("fp-b", "ns", Tier::Long, 5);
        let c = make_memory("fp-c", "ns", Tier::Long, 5);
        insert(&conn, &a).unwrap();
        insert(&conn, &b).unwrap();
        insert(&conn, &c).unwrap();
        // Live path A → C.
        insert_link_full(&conn, &a.id, &c.id, "related_to", None, None, None);
        // Dead path A → B → C (the A→B leg is invalidated).
        insert_link_full(
            &conn,
            &a.id,
            &b.id,
            "supersedes",
            None,
            Some("2020-01-01T00:00:00+00:00"),
            None,
        );
        insert_link_full(&conn, &b.id, &c.id, "related_to", None, None, None);

        // Default: only the live A→C path.
        let current = find_paths(&conn, &a.id, &c.id, Some(3), None, false).unwrap();
        assert_eq!(current.len(), 1);
        assert_eq!(current[0], vec![a.id.clone(), c.id.clone()]);

        // Opt-in: include_invalidated=true returns both paths.
        let full = find_paths(&conn, &a.id, &c.id, Some(3), None, true).unwrap();
        assert_eq!(full.len(), 2);
    }

    // -- Pillar 2 / Stream C — kg_query (depth=1) ---------------------------

    /// Insert a link with explicit `temporal/observed_by` columns so the
    /// `kg_query` filter tests can pin behavior without relying on
    /// wall-clock spread.
    fn insert_link_full(
        conn: &Connection,
        source_id: &str,
        target_id: &str,
        relation: &str,
        valid_from: Option<&str>,
        valid_until: Option<&str>,
        observed_by: Option<&str>,
    ) {
        let now = chrono::Utc::now().to_rfc3339();
        conn.execute(
            "INSERT INTO memory_links \
             (source_id, target_id, relation, created_at, valid_from, valid_until, observed_by) \
             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
            params![
                source_id,
                target_id,
                relation,
                now,
                valid_from,
                valid_until,
                observed_by
            ],
        )
        .unwrap();
    }

    #[test]
    fn kg_query_returns_outbound_neighbors_at_depth_1() {
        let conn = test_db();
        let src = make_memory("alpha", "kg/projects/alpha", Tier::Long, 5);
        let n1 = make_memory("kickoff", "kg/projects/alpha", Tier::Long, 5);
        let n2 = make_memory("design", "kg/projects/alpha", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &n1).unwrap();
        insert(&conn, &n2).unwrap();
        insert_link_full(
            &conn,
            &src.id,
            &n1.id,
            "related_to",
            Some("2026-01-15T00:00:00+00:00"),
            None,
            Some("agent-1"),
        );
        insert_link_full(
            &conn,
            &src.id,
            &n2.id,
            "supersedes",
            Some("2026-02-03T00:00:00+00:00"),
            None,
            Some("agent-2"),
        );

        let nodes = kg_query(&conn, &src.id, 1, None, None, None, false).unwrap();
        assert_eq!(nodes.len(), 2);
        // Ordered by COALESCE(valid_from, created_at) ASC.
        assert_eq!(nodes[0].target_id, n1.id);
        assert_eq!(nodes[1].target_id, n2.id);
        assert_eq!(nodes[0].title, "kickoff");
        assert_eq!(nodes[0].relation, "related_to");
        assert_eq!(nodes[0].observed_by.as_deref(), Some("agent-1"));
        assert_eq!(nodes[0].depth, 1);
        assert_eq!(nodes[0].path, format!("{}->{}", src.id, n1.id));
        assert_eq!(nodes[0].target_namespace, "kg/projects/alpha");
    }

    #[test]
    fn kg_query_filters_by_valid_at_window() {
        let conn = test_db();
        let src = make_memory("e", "ns", Tier::Long, 5);
        let t1 = make_memory("e1", "ns", Tier::Long, 5);
        let t2 = make_memory("e2", "ns", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &t1).unwrap();
        insert(&conn, &t2).unwrap();
        // t1 valid 2026-01-01 → 2026-02-01; t2 valid from 2026-03-01.
        insert_link_full(
            &conn,
            &src.id,
            &t1.id,
            "related_to",
            Some("2026-01-01T00:00:00+00:00"),
            Some("2026-02-01T00:00:00+00:00"),
            None,
        );
        insert_link_full(
            &conn,
            &src.id,
            &t2.id,
            "related_to",
            Some("2026-03-01T00:00:00+00:00"),
            None,
            None,
        );

        // At 2026-01-15 only t1 is valid.
        let n_jan = kg_query(
            &conn,
            &src.id,
            1,
            Some("2026-01-15T00:00:00+00:00"),
            None,
            None,
            false,
        )
        .unwrap();
        assert_eq!(n_jan.len(), 1);
        assert_eq!(n_jan[0].target_id, t1.id);

        // At 2026-02-15 the first link is closed, the second hasn't
        // started yet — empty.
        let n_feb = kg_query(
            &conn,
            &src.id,
            1,
            Some("2026-02-15T00:00:00+00:00"),
            None,
            None,
            false,
        )
        .unwrap();
        assert!(n_feb.is_empty());

        // At 2026-04-01 only t2 is valid.
        let n_apr = kg_query(
            &conn,
            &src.id,
            1,
            Some("2026-04-01T00:00:00+00:00"),
            None,
            None,
            false,
        )
        .unwrap();
        assert_eq!(n_apr.len(), 1);
        assert_eq!(n_apr[0].target_id, t2.id);
    }

    #[test]
    fn kg_query_skips_null_valid_from_when_valid_at_filter_active() {
        let conn = test_db();
        let src = make_memory("s", "ns", Tier::Long, 5);
        let t = make_memory("t", "ns", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &t).unwrap();
        // Link with NULL valid_from — must be invisible to a temporally
        // scoped query (we cannot tell if it was valid at any point).
        insert_link_full(&conn, &src.id, &t.id, "related_to", None, None, None);

        let with_filter = kg_query(
            &conn,
            &src.id,
            1,
            Some("2026-01-15T00:00:00+00:00"),
            None,
            None,
            false,
        )
        .unwrap();
        assert!(with_filter.is_empty());

        // Without the filter, the same link IS returned.
        let without = kg_query(&conn, &src.id, 1, None, None, None, false).unwrap();
        assert_eq!(without.len(), 1);
        assert_eq!(without[0].target_id, t.id);
    }

    #[test]
    fn kg_query_filters_by_allowed_agents() {
        let conn = test_db();
        let src = make_memory("s", "ns", Tier::Long, 5);
        let t1 = make_memory("t1", "ns", Tier::Long, 5);
        let t2 = make_memory("t2", "ns", Tier::Long, 5);
        let t3 = make_memory("t3", "ns", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &t1).unwrap();
        insert(&conn, &t2).unwrap();
        insert(&conn, &t3).unwrap();
        insert_link_full(
            &conn,
            &src.id,
            &t1.id,
            "related_to",
            Some("2026-01-01T00:00:00+00:00"),
            None,
            Some("agent-a"),
        );
        insert_link_full(
            &conn,
            &src.id,
            &t2.id,
            "related_to",
            Some("2026-01-02T00:00:00+00:00"),
            None,
            Some("agent-b"),
        );
        // Link with NULL observed_by must be excluded once the agent
        // filter is active (`NULL IN (...)` is NULL/false in SQLite).
        insert_link_full(
            &conn,
            &src.id,
            &t3.id,
            "related_to",
            Some("2026-01-03T00:00:00+00:00"),
            None,
            None,
        );

        let allow_a = vec!["agent-a".to_string()];
        let only_a = kg_query(&conn, &src.id, 1, None, Some(&allow_a), None, false).unwrap();
        assert_eq!(only_a.len(), 1);
        assert_eq!(only_a[0].target_id, t1.id);

        let allow_both = vec!["agent-a".to_string(), "agent-b".to_string()];
        let both = kg_query(&conn, &src.id, 1, None, Some(&allow_both), None, false).unwrap();
        assert_eq!(both.len(), 2);
    }

    #[test]
    fn kg_query_empty_allowed_agents_returns_zero_rows() {
        let conn = test_db();
        let src = make_memory("s", "ns", Tier::Long, 5);
        let t = make_memory("t", "ns", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &t).unwrap();
        insert_link_full(
            &conn,
            &src.id,
            &t.id,
            "related_to",
            Some("2026-01-01T00:00:00+00:00"),
            None,
            Some("agent-a"),
        );

        // Sanity: no filter returns the link.
        let unfiltered = kg_query(&conn, &src.id, 1, None, None, None, false).unwrap();
        assert_eq!(unfiltered.len(), 1);

        // Empty allowlist == "no agents trusted" — must return zero
        // rows, not silently fall through to the unfiltered path.
        let empty: Vec<String> = Vec::new();
        let none = kg_query(&conn, &src.id, 1, None, Some(&empty), None, false).unwrap();
        assert!(none.is_empty());
    }

    #[test]
    fn kg_query_rejects_max_depth_zero() {
        let conn = test_db();
        let src = make_memory("s", "ns", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        let err = kg_query(&conn, &src.id, 0, None, None, None, false).unwrap_err();
        assert!(err.to_string().contains("max_depth"));
    }

    #[test]
    fn kg_query_rejects_unsupported_max_depth() {
        // The recursive-CTE slice supports depth 1..=5; passing 6+ must
        // produce an explicit error so callers learn they hit the
        // ceiling rather than receiving a partial graph.
        let conn = test_db();
        let src = make_memory("s", "ns", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        let err = kg_query(
            &conn,
            &src.id,
            KG_QUERY_MAX_SUPPORTED_DEPTH + 1,
            None,
            None,
            None,
            false,
        )
        .unwrap_err();
        let msg = err.to_string();
        assert!(msg.contains(&format!("max_depth={}", KG_QUERY_MAX_SUPPORTED_DEPTH + 1)));
        assert!(msg.contains(&format!("supported depth={KG_QUERY_MAX_SUPPORTED_DEPTH}")));
    }

    #[test]
    fn kg_query_traverses_multiple_hops() {
        // src -> mid -> leaf. depth=2 must return both hops, with
        // depth/path reflecting the chain.
        let conn = test_db();
        let src = make_memory("src", "ns", Tier::Long, 5);
        let mid = make_memory("mid", "ns", Tier::Long, 5);
        let leaf = make_memory("leaf", "ns", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &mid).unwrap();
        insert(&conn, &leaf).unwrap();
        insert_link_full(
            &conn,
            &src.id,
            &mid.id,
            "related_to",
            Some("2026-01-01T00:00:00+00:00"),
            None,
            Some("agent-x"),
        );
        insert_link_full(
            &conn,
            &mid.id,
            &leaf.id,
            "supersedes",
            Some("2026-01-02T00:00:00+00:00"),
            None,
            Some("agent-x"),
        );

        // depth=1 sees only mid.
        let d1 = kg_query(&conn, &src.id, 1, None, None, None, false).unwrap();
        assert_eq!(d1.len(), 1);
        assert_eq!(d1[0].target_id, mid.id);
        assert_eq!(d1[0].depth, 1);

        // depth=2 sees both, ordered shallow-first.
        let d2 = kg_query(&conn, &src.id, 2, None, None, None, false).unwrap();
        assert_eq!(d2.len(), 2);
        assert_eq!(d2[0].target_id, mid.id);
        assert_eq!(d2[0].depth, 1);
        assert_eq!(d2[0].path, format!("{}->{}", src.id, mid.id));
        assert_eq!(d2[1].target_id, leaf.id);
        assert_eq!(d2[1].depth, 2);
        assert_eq!(d2[1].relation, "supersedes");
        assert_eq!(d2[1].path, format!("{}->{}->{}", src.id, mid.id, leaf.id));
    }

    #[test]
    fn kg_query_multi_hop_respects_valid_at_per_hop() {
        // src -> mid valid 2026-01..02; mid -> leaf valid 2026-04+.
        // At valid_at=2026-01-15 the second hop is not yet valid, so
        // only mid is returned; at valid_at=2026-04-15 the first hop is
        // closed, so both are filtered out.
        let conn = test_db();
        let src = make_memory("s", "ns", Tier::Long, 5);
        let mid = make_memory("m", "ns", Tier::Long, 5);
        let leaf = make_memory("l", "ns", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &mid).unwrap();
        insert(&conn, &leaf).unwrap();
        insert_link_full(
            &conn,
            &src.id,
            &mid.id,
            "related_to",
            Some("2026-01-01T00:00:00+00:00"),
            Some("2026-02-01T00:00:00+00:00"),
            None,
        );
        insert_link_full(
            &conn,
            &mid.id,
            &leaf.id,
            "related_to",
            Some("2026-04-01T00:00:00+00:00"),
            None,
            None,
        );

        let mid_only = kg_query(
            &conn,
            &src.id,
            3,
            Some("2026-01-15T00:00:00+00:00"),
            None,
            None,
            false,
        )
        .unwrap();
        assert_eq!(mid_only.len(), 1);
        assert_eq!(mid_only[0].target_id, mid.id);

        let neither = kg_query(
            &conn,
            &src.id,
            3,
            Some("2026-04-15T00:00:00+00:00"),
            None,
            None,
            false,
        )
        .unwrap();
        assert!(neither.is_empty());
    }

    #[test]
    fn kg_query_detects_cycles() {
        // a -> b -> c -> a forms a cycle. Even with max_depth=5, the
        // traversal must stop revisiting nodes that are already on the
        // path; the result lists each reachable node at most once.
        let conn = test_db();
        let a = make_memory("a", "ns", Tier::Long, 5);
        let b = make_memory("b", "ns", Tier::Long, 5);
        let c = make_memory("c", "ns", Tier::Long, 5);
        insert(&conn, &a).unwrap();
        insert(&conn, &b).unwrap();
        insert(&conn, &c).unwrap();
        insert_link_full(
            &conn,
            &a.id,
            &b.id,
            "related_to",
            Some("2026-01-01T00:00:00+00:00"),
            None,
            None,
        );
        insert_link_full(
            &conn,
            &b.id,
            &c.id,
            "related_to",
            Some("2026-01-02T00:00:00+00:00"),
            None,
            None,
        );
        insert_link_full(
            &conn,
            &c.id,
            &a.id,
            "related_to",
            Some("2026-01-03T00:00:00+00:00"),
            None,
            None,
        );

        let nodes = kg_query(&conn, &a.id, 5, None, None, None, false).unwrap();
        // Expect b at depth 1 and c at depth 2; the cycle back to a is
        // pruned. (The c->a edge could in principle surface a again at
        // depth 3, but only if a is not on its own path — and the
        // anchor seeds path with `a->b`, so a IS on every descendant
        // path through b/c.)
        assert_eq!(nodes.len(), 2);
        assert_eq!(nodes[0].target_id, b.id);
        assert_eq!(nodes[0].depth, 1);
        assert_eq!(nodes[1].target_id, c.id);
        assert_eq!(nodes[1].depth, 2);
    }

    #[test]
    fn kg_query_multi_hop_filters_by_allowed_agents_per_hop() {
        // src -> mid (agent-a), mid -> leaf (agent-b). With allow=[a]
        // only the first hop survives; with allow=[a,b] both surface.
        let conn = test_db();
        let src = make_memory("s", "ns", Tier::Long, 5);
        let mid = make_memory("m", "ns", Tier::Long, 5);
        let leaf = make_memory("l", "ns", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &mid).unwrap();
        insert(&conn, &leaf).unwrap();
        insert_link_full(
            &conn,
            &src.id,
            &mid.id,
            "related_to",
            Some("2026-01-01T00:00:00+00:00"),
            None,
            Some("agent-a"),
        );
        insert_link_full(
            &conn,
            &mid.id,
            &leaf.id,
            "related_to",
            Some("2026-01-02T00:00:00+00:00"),
            None,
            Some("agent-b"),
        );

        let allow_a = vec!["agent-a".to_string()];
        let only_first = kg_query(&conn, &src.id, 3, None, Some(&allow_a), None, false).unwrap();
        assert_eq!(only_first.len(), 1);
        assert_eq!(only_first[0].target_id, mid.id);

        let allow_both = vec!["agent-a".to_string(), "agent-b".to_string()];
        let both = kg_query(&conn, &src.id, 3, None, Some(&allow_both), None, false).unwrap();
        assert_eq!(both.len(), 2);
        assert_eq!(both[1].target_id, leaf.id);
        assert_eq!(both[1].depth, 2);
    }

    #[test]
    fn kg_query_limit_clamped_to_max() {
        let conn = test_db();
        let src = make_memory("s", "ns", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        for i in 0..3 {
            let t = make_memory(&format!("t{i}"), "ns", Tier::Long, 5);
            insert(&conn, &t).unwrap();
            insert_link_full(
                &conn,
                &src.id,
                &t.id,
                "related_to",
                Some(&format!("2026-01-{:02}T00:00:00+00:00", i + 1)),
                None,
                None,
            );
        }

        // limit=usize::MAX clamps to KG_QUERY_MAX_LIMIT (1000),
        // which is bigger than our 3 rows — all returned.
        let all = kg_query(&conn, &src.id, 1, None, None, Some(usize::MAX), false).unwrap();
        assert_eq!(all.len(), 3);

        // limit=0 clamps up to 1.
        let one = kg_query(&conn, &src.id, 1, None, None, Some(0), false).unwrap();
        assert_eq!(one.len(), 1);
    }

    #[test]
    fn kg_query_empty_for_unknown_source() {
        let conn = test_db();
        let nodes = kg_query(&conn, "no-such-id", 1, None, None, None, false).unwrap();
        assert!(nodes.is_empty());
    }

    #[test]
    fn schema_v15_existing_links_get_valid_from_backfilled() {
        // Simulate a v14 database with one link, then re-run the
        // v15 migration and assert valid_from was backfilled to the
        // source memory's created_at. We do this by opening a fresh
        // db (which is at v15), inserting a link with NULL valid_from,
        // rolling schema_version back to 14, and re-opening to force
        // the v15 block to re-execute the backfill UPDATE.
        let path = std::env::temp_dir().join(format!(
            "ai_memory_v15_backfill_{}.db",
            uuid::Uuid::new_v4()
        ));
        {
            let conn = open(&path).unwrap();
            let src = make_memory("src", "test", Tier::Long, 5);
            let tgt = make_memory("tgt", "test", Tier::Long, 5);
            insert(&conn, &src).unwrap();
            insert(&conn, &tgt).unwrap();
            // Insert a link directly with NULL valid_from to mimic
            // pre-migration state.
            conn.execute(
                "INSERT INTO memory_links (source_id, target_id, relation, created_at, valid_from) \
                 VALUES (?1, ?2, 'related_to', ?3, NULL)",
                params![&src.id, &tgt.id, &chrono::Utc::now().to_rfc3339()],
            )
            .unwrap();
            // Roll schema back to v14 and re-run migrate via re-open.
            conn.execute("DELETE FROM schema_version", []).unwrap();
            conn.execute("INSERT INTO schema_version (version) VALUES (14)", [])
                .unwrap();
        }

        let conn2 = open(&path).unwrap();
        let backfilled: Option<String> = conn2
            .query_row("SELECT valid_from FROM memory_links LIMIT 1", [], |r| {
                r.get(0)
            })
            .unwrap();
        assert!(
            backfilled.is_some(),
            "expected valid_from to be backfilled, got NULL"
        );
        let _ = std::fs::remove_file(&path);
    }

    #[test]
    fn namespace_prefix_query_index_available() {
        let conn = test_db();
        // SQLite's default BINARY collation supports prefix-matching LIKE queries
        // with the idx_memories_namespace index. Verify the index exists and a
        // simple prefix query can execute (EXPLAIN QUERY PLAN output varies by
        // SQLite version and query planner heuristics, so we just check that the
        // query completes without error).
        let result: Option<String> = conn
            .query_row(
                "SELECT name FROM sqlite_master WHERE type='index' AND name='idx_memories_namespace'",
                [],
                |r| r.get(0),
            )
            .unwrap();
        assert_eq!(
            result,
            Some("idx_memories_namespace".to_string()),
            "idx_memories_namespace index should exist"
        );

        // Execute a prefix LIKE query to ensure it compiles and runs
        let count: i64 = conn
            .query_row(
                "SELECT COUNT(*) FROM memories WHERE namespace LIKE 'test/%'",
                [],
                |r| r.get(0),
            )
            .unwrap();
        assert_eq!(count, 0);
    }

    // -----------------------------------------------------------------
    // Doctor (P7) helper unit tests.
    // -----------------------------------------------------------------

    #[test]
    fn doctor_dim_violations_post_p2_returns_zero_on_fresh_db() {
        // Post-P2 (schema v18+), a fresh DB has the `embedding_dim` column
        // but zero rows in violation. The helper must report Some(0), not
        // None. (Pre-P2 it returned None to indicate "column not yet
        // present"; that path is now obsolete.)
        let conn = test_db();
        let result = doctor_dim_violations(&conn).unwrap();
        assert_eq!(result, Some(0));
    }

    #[test]
    fn doctor_oldest_pending_age_secs_empty_queue() {
        let conn = test_db();
        let age = doctor_oldest_pending_age_secs(&conn).unwrap();
        assert_eq!(age, None);
    }

    #[test]
    fn doctor_oldest_pending_age_secs_reports_age() {
        let conn = test_db();
        let one_hour_ago = (Utc::now() - chrono::Duration::hours(1)).to_rfc3339();
        conn.execute(
            "INSERT INTO pending_actions (id, action_type, namespace, payload, requested_by, requested_at, status)
             VALUES ('p1', 'store', 'ns', '{}', 'agent', ?1, 'pending')",
            params![one_hour_ago],
        )
        .unwrap();
        let age = doctor_oldest_pending_age_secs(&conn).unwrap().unwrap();
        // Allow a generous margin — the test machine clock is the source of truth.
        assert!((3500..=3700).contains(&age), "expected ~3600s, got {age}");
    }

    #[test]
    fn doctor_governance_coverage_with_namespace_meta() {
        let conn = test_db();
        // No namespaces — both counts zero.
        let (with, without) = doctor_governance_coverage(&conn).unwrap();
        assert_eq!((with, without), (0, 0));
    }

    #[test]
    fn doctor_governance_depth_distribution_chains() {
        let conn = test_db();
        // Build a small inheritance tree: root -> a -> a/b -> a/b/c
        let now = Utc::now().to_rfc3339();
        conn.execute(
            "INSERT INTO namespace_meta (namespace, parent_namespace, updated_at) VALUES ('root', NULL, ?1)",
            params![now],
        ).unwrap();
        conn.execute(
            "INSERT INTO namespace_meta (namespace, parent_namespace, updated_at) VALUES ('a', 'root', ?1)",
            params![now],
        ).unwrap();
        conn.execute(
            "INSERT INTO namespace_meta (namespace, parent_namespace, updated_at) VALUES ('a/b', 'a', ?1)",
            params![now],
        ).unwrap();
        conn.execute(
            "INSERT INTO namespace_meta (namespace, parent_namespace, updated_at) VALUES ('a/b/c', 'a/b', ?1)",
            params![now],
        ).unwrap();
        let dist = doctor_governance_depth_distribution(&conn).unwrap();
        assert_eq!(dist[0], 1, "root has depth 0");
        assert_eq!(dist[1], 1, "a has depth 1");
        assert_eq!(dist[2], 1, "a/b has depth 2");
        assert_eq!(dist[3], 1, "a/b/c has depth 3");
    }

    #[test]
    fn doctor_webhook_delivery_totals_empty() {
        let conn = test_db();
        let (dispatched, failed) = doctor_webhook_delivery_totals(&conn).unwrap();
        assert_eq!((dispatched, failed), (0, 0));
    }

    #[test]
    fn doctor_max_sync_skew_secs_empty() {
        let conn = test_db();
        let skew = doctor_max_sync_skew_secs(&conn).unwrap();
        assert_eq!(skew, None);
    }

    // ---- v0.6.4-009 — capability-expansion audit log ----

    #[test]
    fn audit_log_record_and_list_grant_and_deny() {
        let conn = test_db();
        record_capability_expansion(&conn, Some("alice"), "graph", true, None);
        record_capability_expansion(&conn, Some("bob"), "power", false, None);
        let rows = list_capability_expansions(&conn, 50, None).unwrap();
        assert_eq!(rows.len(), 2);
        // Newest first.
        assert!(rows[0].timestamp >= rows[1].timestamp);
        let grant_row = rows
            .iter()
            .find(|r| r.agent_id.as_deref() == Some("alice"))
            .unwrap();
        assert!(grant_row.granted);
        assert_eq!(grant_row.requested_family.as_deref(), Some("graph"));
        let deny_row = rows
            .iter()
            .find(|r| r.agent_id.as_deref() == Some("bob"))
            .unwrap();
        assert!(!deny_row.granted);
        assert_eq!(deny_row.requested_family.as_deref(), Some("power"));
    }

    #[test]
    fn audit_log_filter_by_agent() {
        let conn = test_db();
        record_capability_expansion(&conn, Some("alice"), "graph", true, None);
        record_capability_expansion(&conn, Some("bob"), "power", false, None);
        let alice = list_capability_expansions(&conn, 50, Some("alice")).unwrap();
        assert_eq!(alice.len(), 1);
        assert_eq!(alice[0].agent_id.as_deref(), Some("alice"));
        let none_match = list_capability_expansions(&conn, 50, Some("nobody")).unwrap();
        assert!(none_match.is_empty());
    }

    #[test]
    fn audit_log_anonymous_caller() {
        let conn = test_db();
        record_capability_expansion(&conn, None, "core", true, None);
        let rows = list_capability_expansions(&conn, 50, None).unwrap();
        assert_eq!(rows.len(), 1);
        assert!(rows[0].agent_id.is_none());
    }

    #[test]
    fn audit_log_migration_idempotent_on_re_open() {
        // Open the DB twice in succession; the audit_log CREATE TABLE
        // IF NOT EXISTS path must not error.
        let p = tempfile::NamedTempFile::new().unwrap();
        let p = p.path().to_path_buf();
        let _ = open(&p).unwrap();
        let conn = open(&p).unwrap();
        // And the indexes are present.
        let cnt: i64 = conn
            .query_row(
                "SELECT count(*) FROM sqlite_master WHERE name LIKE 'idx_audit_log_%'",
                [],
                |r| r.get(0),
            )
            .unwrap();
        assert_eq!(
            cnt, 3,
            "expected 3 audit_log indexes (agent_id, ts, event_type)"
        );
    }

    // ---------------------------------------------------------------
    // v0.7.0 K2 — pending_actions timeout sweeper.
    //
    // Closes the v0.6.3.1 honest-Capabilities-v2 disclosure that
    // `default_timeout_seconds` was advertised but unused.
    // ---------------------------------------------------------------

    /// Insert a `pending_actions` row with a back-dated `requested_at`
    /// so we can drive the sweeper without `tokio::time` games.
    fn insert_stale_pending(
        conn: &Connection,
        id: &str,
        namespace: &str,
        age_secs: i64,
        per_row_timeout: Option<i64>,
    ) {
        let requested_at = (chrono::Utc::now() - chrono::Duration::seconds(age_secs)).to_rfc3339();
        conn.execute(
            "INSERT INTO pending_actions
             (id, action_type, namespace, payload, requested_by, requested_at,
              status, default_timeout_seconds)
             VALUES (?1, 'store', ?2, '{}', 'tester', ?3, 'pending', ?4)",
            params![id, namespace, requested_at, per_row_timeout],
        )
        .unwrap();
    }

    #[test]
    fn sweep_marks_stale_pending_row_expired() {
        let conn = test_db();
        // 2-hour-old pending row; global default is 1 hour → must expire.
        insert_stale_pending(&conn, "stale-1", "ns/a", 7_200, None);

        let expired = sweep_pending_action_timeouts(&conn, crate::SECS_PER_HOUR).unwrap();
        assert_eq!(expired.len(), 1, "expected exactly one expiry");
        assert_eq!(expired[0], ("stale-1".to_string(), "ns/a".to_string()));

        // Row is now status='expired' with expired_at populated.
        let (status, expired_at): (String, Option<String>) = conn
            .query_row(
                "SELECT status, expired_at FROM pending_actions WHERE id = ?1",
                params!["stale-1"],
                |r| Ok((r.get(0)?, r.get(1)?)),
            )
            .unwrap();
        assert_eq!(status, "expired");
        assert!(
            expired_at.is_some(),
            "expired_at must be stamped by the sweeper"
        );
    }

    #[test]
    fn sweep_leaves_fresh_pending_alone() {
        let conn = test_db();
        // 30-second-old pending row; global default is 1 hour → still pending.
        insert_stale_pending(&conn, "fresh-1", "ns/a", 30, None);

        let expired = sweep_pending_action_timeouts(&conn, crate::SECS_PER_HOUR).unwrap();
        assert!(expired.is_empty());
        let status: String = conn
            .query_row(
                "SELECT status FROM pending_actions WHERE id = ?1",
                params!["fresh-1"],
                |r| r.get(0),
            )
            .unwrap();
        assert_eq!(status, "pending");
    }

    #[test]
    fn sweep_per_row_timeout_overrides_global_default() {
        let conn = test_db();
        // 5-minute-old row; per-row TTL = 60s → MUST expire even
        // though the global default (1h) would say "still fresh".
        insert_stale_pending(&conn, "short-ttl", "ns/a", 300, Some(60));
        // Same age, no per-row override → still pending under the
        // 1h global default.
        insert_stale_pending(&conn, "no-override", "ns/a", 300, None);

        let expired = sweep_pending_action_timeouts(&conn, crate::SECS_PER_HOUR).unwrap();
        let ids: Vec<&String> = expired.iter().map(|(id, _)| id).collect();
        assert_eq!(ids, vec![&"short-ttl".to_string()]);
    }

    #[test]
    fn sweep_skips_already_decided_rows() {
        let conn = test_db();
        // Pre-insert an OLD row already approved — must not touch it.
        let approved_at = (chrono::Utc::now() - chrono::Duration::seconds(7_200)).to_rfc3339();
        conn.execute(
            "INSERT INTO pending_actions
             (id, action_type, namespace, payload, requested_by, requested_at,
              status, decided_by, decided_at)
             VALUES ('approved-old', 'store', 'ns/a', '{}', 'alice', ?1,
                     'approved', 'bob', ?1)",
            params![approved_at],
        )
        .unwrap();

        let expired = sweep_pending_action_timeouts(&conn, 60).unwrap();
        assert!(expired.is_empty(), "non-pending rows must be ignored");
        let status: String = conn
            .query_row(
                "SELECT status FROM pending_actions WHERE id = 'approved-old'",
                [],
                |r| r.get(0),
            )
            .unwrap();
        assert_eq!(status, "approved", "decided row status preserved");
    }

    #[test]
    fn sweep_disabled_when_global_default_non_positive() {
        let conn = test_db();
        // Stale row with no per-row TTL.
        insert_stale_pending(&conn, "stale-2", "ns/a", 7_200, None);
        // Operator escape hatch: 0 (or negative) global default
        // disables the sweep entirely.
        let expired = sweep_pending_action_timeouts(&conn, 0).unwrap();
        assert!(expired.is_empty());
        let expired_neg = sweep_pending_action_timeouts(&conn, -1).unwrap();
        assert!(expired_neg.is_empty());
    }

    #[test]
    fn sweep_empty_queue_is_silent_noop() {
        let conn = test_db();
        let expired = sweep_pending_action_timeouts(&conn, 60).unwrap();
        assert!(expired.is_empty());
    }

    // -----------------------------------------------------------------
    // v0.7.0 fix campaign R1-M2 / R1-M3 / R1-M4 (#690)
    //
    // Substrate-side defense-in-depth: SQL CHECK triggers + typed
    // `MemoryLinkRelation` + `ConflictMode`-aware insert primitive.
    // The tests below pin the contract the brief calls out by name so
    // a future regression surfaces here, not in a downstream consumer.
    // -----------------------------------------------------------------

    /// R1-M2 — direct-SQL INSERT with a tier outside the closed set is
    /// refused by the trigger.
    #[test]
    fn test_memories_tier_check_rejects_invalid() {
        let conn = test_db();
        let now = chrono::Utc::now().to_rfc3339();
        let err = conn.execute(
            "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, confidence, source, access_count, created_at, updated_at, metadata) \
             VALUES (?1, 'long-term', 'ns-ck', 'bad-tier', 'x', '[]', 5, 1.0, 'test', 0, ?2, ?2, '{}')",
            params!["m-bad-tier", now],
        ).unwrap_err();
        let msg = err.to_string();
        assert!(
            msg.contains("memories.tier must be one of"),
            "expected R1-M2 tier check, got: {msg}"
        );
    }

    /// R1-M2 — direct-SQL INSERT with priority out of `[1, 10]` is
    /// refused by the trigger.
    #[test]
    fn test_memories_priority_check_rejects_oob() {
        let conn = test_db();
        let now = chrono::Utc::now().to_rfc3339();
        let err = conn.execute(
            "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, confidence, source, access_count, created_at, updated_at, metadata) \
             VALUES (?1, 'mid', 'ns-ck', 'bad-prio', 'x', '[]', 11, 1.0, 'test', 0, ?2, ?2, '{}')",
            params!["m-bad-prio", now],
        ).unwrap_err();
        assert!(
            err.to_string()
                .contains("memories.priority must be between 1 and 10"),
            "expected R1-M2 priority check, got: {err}"
        );
        // Lower bound mirror: priority = 0 is also out-of-band.
        let err_low = conn.execute(
            "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, confidence, source, access_count, created_at, updated_at, metadata) \
             VALUES (?1, 'mid', 'ns-ck', 'bad-prio-low', 'x', '[]', 0, 1.0, 'test', 0, ?2, ?2, '{}')",
            params!["m-bad-prio-low", now],
        ).unwrap_err();
        assert!(err_low.to_string().contains("priority"));
    }

    /// R1-M2 — confidence outside `[0.0, 1.0]` is refused by the trigger.
    #[test]
    fn test_memories_confidence_check_rejects_oob() {
        let conn = test_db();
        let now = chrono::Utc::now().to_rfc3339();
        let err = conn.execute(
            "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, confidence, source, access_count, created_at, updated_at, metadata) \
             VALUES (?1, 'mid', 'ns-ck', 'bad-conf', 'x', '[]', 5, 1.5, 'test', 0, ?2, ?2, '{}')",
            params!["m-bad-conf", now],
        ).unwrap_err();
        assert!(
            err.to_string().contains("memories.confidence"),
            "expected R1-M2 confidence check, got: {err}"
        );
    }

    /// R1-M2 — direct-SQL link INSERT with an off-closed-set relation
    /// is refused by the trigger.
    #[test]
    fn test_memory_links_relation_check_rejects_unknown() {
        let conn = test_db();
        let src = insert(&conn, &make_memory("rel-src", "ns-ck", Tier::Mid, 5)).unwrap();
        let tgt = insert(&conn, &make_memory("rel-tgt", "ns-ck", Tier::Mid, 5)).unwrap();
        let now = chrono::Utc::now().to_rfc3339();
        let err = conn
            .execute(
                "INSERT INTO memory_links (source_id, target_id, relation, created_at, valid_from) \
             VALUES (?1, ?2, 'follows', ?3, ?3)",
                params![src, tgt, now],
            )
            .unwrap_err();
        assert!(
            err.to_string()
                .contains("memory_links.relation must be one of"),
            "expected R1-M2 relation check, got: {err}"
        );
    }

    /// R1-M2 — direct-SQL link INSERT with an unknown `attest_level` is
    /// refused; legacy `NULL` stays allowed.
    #[test]
    fn test_memory_links_attest_level_check_rejects_unknown() {
        let conn = test_db();
        let src = insert(&conn, &make_memory("att-src", "ns-ck", Tier::Mid, 5)).unwrap();
        let tgt = insert(&conn, &make_memory("att-tgt", "ns-ck", Tier::Mid, 5)).unwrap();
        let now = chrono::Utc::now().to_rfc3339();
        // NULL attest_level OK (legacy).
        conn.execute(
            "INSERT INTO memory_links (source_id, target_id, relation, created_at, valid_from, attest_level) \
             VALUES (?1, ?2, 'related_to', ?3, ?3, NULL)",
            params![src, tgt, now],
        )
        .expect("NULL attest_level must remain accepted");
        // Bogus attest_level refused.
        let err = conn.execute(
            "INSERT INTO memory_links (source_id, target_id, relation, created_at, valid_from, attest_level) \
             VALUES (?1, ?2, 'supersedes', ?3, ?3, 'totally-fake')",
            params![src, tgt, now],
        ).unwrap_err();
        assert!(err.to_string().contains("memory_links.attest_level"));
    }

    /// R1-M3 — `insert_with_conflict(.., ConflictMode::Error)` refuses
    /// the second write when `(title, namespace)` collides.
    #[test]
    fn test_insert_with_conflict_error_mode_refuses_duplicate() {
        let conn = test_db();
        let m1 = make_memory("dup-title", "ns-conflict", Tier::Mid, 5);
        let _id = insert_with_conflict(&conn, &m1, ConflictMode::Error).unwrap();
        let mut m2 = make_memory("dup-title", "ns-conflict", Tier::Mid, 7);
        m2.content = "second writer should be refused".to_string();
        let err = insert_with_conflict(&conn, &m2, ConflictMode::Error).unwrap_err();
        let conflict = err.downcast_ref::<ConflictError>();
        assert!(
            conflict.is_some(),
            "expected typed ConflictError, got: {err}"
        );
        // First writer's content is preserved (no silent overwrite).
        let row = find_by_title_namespace(&conn, "dup-title", "ns-conflict")
            .unwrap()
            .expect("first row still present");
        let fetched = get(&conn, &row).unwrap().unwrap();
        assert_ne!(
            fetched.content, "second writer should be refused",
            "Error mode must not mutate the existing row"
        );
    }

    /// R1-M3 — `insert_with_conflict(.., ConflictMode::Merge)` is
    /// byte-equivalent to the legacy `insert()` silent-merge path.
    #[test]
    fn test_insert_with_conflict_merge_mode_updates() {
        let conn = test_db();
        let m1 = make_memory("merge-title", "ns-merge", Tier::Mid, 5);
        let id_a = insert_with_conflict(&conn, &m1, ConflictMode::Merge).unwrap();
        let mut m2 = make_memory("merge-title", "ns-merge", Tier::Mid, 7);
        m2.content = "merged-content".to_string();
        let id_b = insert_with_conflict(&conn, &m2, ConflictMode::Merge).unwrap();
        assert_eq!(id_a, id_b, "merge mode returns the existing row id");
        let fetched = get(&conn, &id_a).unwrap().unwrap();
        assert_eq!(fetched.content, "merged-content");
    }

    /// R1-M3 — `insert_with_conflict(.., ConflictMode::Version)` keeps
    /// both rows; the second writer lands under a versioned title.
    #[test]
    fn test_insert_with_conflict_version_keeps_both() {
        let conn = test_db();
        let m1 = make_memory("versioned", "ns-v", Tier::Mid, 5);
        let id_a = insert_with_conflict(&conn, &m1, ConflictMode::Version).unwrap();
        let mut m2 = make_memory("versioned", "ns-v", Tier::Mid, 5);
        m2.content = "second version content".to_string();
        let id_b = insert_with_conflict(&conn, &m2, ConflictMode::Version).unwrap();
        assert_ne!(id_a, id_b, "version mode produces a distinct row");
        // Both titles are reachable: original + `(2)` suffix.
        let original_id = find_by_title_namespace(&conn, "versioned", "ns-v")
            .unwrap()
            .expect("original row");
        let versioned_id = find_by_title_namespace(&conn, "versioned (2)", "ns-v")
            .unwrap()
            .expect("versioned row");
        assert_eq!(original_id, id_a);
        assert_eq!(versioned_id, id_b);
    }

    /// R1-M4 — `MemoryLink.relation` round-trips through the typed
    /// closed set across `create_link` + `get_links`.
    #[test]
    fn test_memory_link_relation_round_trips() {
        let conn = test_db();
        let src = insert(&conn, &make_memory("rt-src", "ns-rt", Tier::Mid, 5)).unwrap();
        let tgt = insert(&conn, &make_memory("rt-tgt", "ns-rt", Tier::Mid, 5)).unwrap();
        create_link(&conn, &src, &tgt, "supersedes").unwrap();
        let links = get_links(&conn, &src).unwrap();
        assert_eq!(links.len(), 1);
        assert_eq!(
            links[0].relation,
            crate::models::MemoryLinkRelation::Supersedes,
            "relation must round-trip as the typed Supersedes variant"
        );
        // Cross-check serde wire shape: enum → `"supersedes"` string.
        let wire = serde_json::to_string(&links[0]).unwrap();
        assert!(
            wire.contains("\"relation\":\"supersedes\""),
            "serde wire form must be the canonical lowercase snake_case \
             string; got {wire}"
        );
    }

    // ---------------------------------------------------------------
    // v0.7.0 S5 verdict — approval exec fixes:
    //   S5-H1 reflect arm, S5-H4 agent_id verify,
    //   S5-M1/M2 signed_events emit on approve/deny/timeout.
    // ---------------------------------------------------------------

    /// Helper — count signed_events rows matching `event_type`. Used by
    /// the audit-emit tests below so they don't have to scrape the table
    /// in raw SQL each time.
    fn count_signed_events(conn: &Connection, event_type: &str) -> usize {
        crate::signed_events::list_signed_events(conn, None, 1000, 0)
            .unwrap_or_default()
            .into_iter()
            .filter(|e| e.event_type == event_type)
            .count()
    }

    /// S5-H1 — an approved `reflect` pending action MUST execute through
    /// `db::reflect` and persist a new reflection memory whose
    /// `metadata.reflection_metadata.sources` matches the queued
    /// `source_ids`. Pre-fix this would error with
    /// "unknown action_type: reflect" and the queued row would never land.
    #[test]
    fn test_execute_reflect_arm_succeeds_round_trip() {
        let conn = test_db();
        // Seed two source memories the reflection will reflect on.
        let src1 = make_memory("src-1", "ns/reflect", Tier::Mid, 5);
        let src2 = make_memory("src-2", "ns/reflect", Tier::Mid, 5);
        let src1_id = insert(&conn, &src1).unwrap();
        let src2_id = insert(&conn, &src2).unwrap();

        // Queue an approved reflect pending action with the L1-8 payload shape.
        let payload = serde_json::json!({
            "source_ids": [src1_id, src2_id],
            "title": "reflective synthesis",
            "content": "deep observation across sources",
            "namespace": "ns/reflect",
            "tier": Tier::Mid.as_str(),
            "tags": ["reflective"],
            "priority": 6,
            "confidence": 0.9,
            "agent_id": "alice",
            "proposed_depth": 1,
        });
        let pending_id = queue_pending_action(
            &conn,
            crate::models::GovernedAction::Reflect,
            "ns/reflect",
            None,
            "alice",
            &payload,
        )
        .unwrap();
        // Approve so execute_pending_action accepts the row.
        assert!(decide_pending_action(&conn, &pending_id, true, "approver").unwrap());

        let result = execute_pending_action(&conn, &pending_id).expect("reflect execute ok");
        let new_id = result.expect("reflect must return the new reflection id");
        let mem = get(&conn, &new_id)
            .unwrap()
            .expect("reflection memory landed");
        assert_eq!(mem.title, "reflective synthesis");
        assert_eq!(mem.namespace, "ns/reflect");
        assert_eq!(mem.reflection_depth, 1, "depth = max(source depths) + 1");
        // The substrate stamps `metadata.agent_id` from the input.agent_id field.
        assert_eq!(mem.metadata["agent_id"], "alice");
    }

    /// S5-H4 — a queued payload whose `agent_id` does NOT match
    /// `pa.requested_by` is approver-on-behalf laundering. Execute MUST
    /// refuse, MUST NOT insert the memory, AND MUST emit a
    /// `pending_action.refused_agent_id_mismatch` audit row so the
    /// attempt is captured by the signed_events chain.
    #[test]
    fn test_execute_refuses_payload_agent_id_mismatch() {
        let conn = test_db();
        let mut mem = make_memory("laundered store", "ns/launder", Tier::Mid, 5);
        // Requester is "alice", but the payload claims agent_id "bob" —
        // pre-fix this would land a memory attributed to "bob" even
        // though the original requester was "alice".
        mem.metadata = serde_json::json!({"agent_id": "bob"});
        let payload = serde_json::to_value(&mem).unwrap();
        let pending_id = queue_pending_action(
            &conn,
            crate::models::GovernedAction::Store,
            "ns/launder",
            None,
            "alice",
            &payload,
        )
        .unwrap();
        assert!(decide_pending_action(&conn, &pending_id, true, "approver").unwrap());

        let err = execute_pending_action(&conn, &pending_id)
            .expect_err("execute MUST refuse laundered agent_id");
        let msg = format!("{err}");
        assert!(
            msg.contains("approver-on-behalf laundering refused"),
            "expected laundering-refusal message, got: {msg}"
        );
        // No memory landed.
        let count: i64 = conn
            .query_row(
                "SELECT COUNT(*) FROM memories WHERE namespace = 'ns/launder'",
                [],
                |r| r.get(0),
            )
            .unwrap();
        assert_eq!(count, 0, "refused execute must not insert a memory");
        // Audit row captured.
        assert_eq!(
            count_signed_events(&conn, "pending_action.refused_agent_id_mismatch"),
            1,
            "refusal must append a signed_events row"
        );
        // No approve audit emitted on refused path.
        assert_eq!(count_signed_events(&conn, "pending_action.approved"), 0);
    }

    /// S5-M1 — a successful approve+execute MUST append a
    /// `pending_action.approved` row to `signed_events`. Pre-fix the
    /// audit chain had no record of the approval transition.
    #[test]
    fn test_approve_emits_signed_event() {
        let conn = test_db();
        let mem = make_memory("approved store", "ns/approve", Tier::Mid, 5);
        let payload = serde_json::to_value(&mem).unwrap();
        let pending_id = queue_pending_action(
            &conn,
            crate::models::GovernedAction::Store,
            "ns/approve",
            None,
            mem.metadata["agent_id"].as_str().unwrap_or("alice"),
            &payload,
        )
        .unwrap();
        // Requester field is the same as the payload metadata.agent_id
        // (default fixture leaves it as `{}`), so to keep the verifier
        // happy we re-fetch and assert the queue happened. Then approve.
        assert!(decide_pending_action(&conn, &pending_id, true, "approver").unwrap());
        let _ = execute_pending_action(&conn, &pending_id).expect("execute ok");
        assert_eq!(
            count_signed_events(&conn, "pending_action.approved"),
            1,
            "approve+execute must append one audit row"
        );
        // Deny / timeout MUST NOT have been emitted.
        assert_eq!(count_signed_events(&conn, "pending_action.denied"), 0);
        assert_eq!(count_signed_events(&conn, "pending_action.timed_out"), 0);
    }

    /// S5-M2 — a deny transition (decide_pending_action with approve=false)
    /// MUST append a `pending_action.denied` row to `signed_events`.
    /// Pre-fix the deny path was silent in the audit chain.
    #[test]
    fn test_deny_emits_signed_event() {
        let conn = test_db();
        let payload = serde_json::json!({"title": "to-deny", "content": "x"});
        let pending_id = queue_pending_action(
            &conn,
            crate::models::GovernedAction::Store,
            "ns/deny",
            None,
            "alice",
            &payload,
        )
        .unwrap();
        let transitioned = decide_pending_action(&conn, &pending_id, false, "approver").unwrap();
        assert!(transitioned, "deny transition must succeed on pending row");
        assert_eq!(
            count_signed_events(&conn, "pending_action.denied"),
            1,
            "deny must append one audit row"
        );
        // Approve / timeout MUST NOT have been emitted.
        assert_eq!(count_signed_events(&conn, "pending_action.approved"), 0);
        assert_eq!(count_signed_events(&conn, "pending_action.timed_out"), 0);
    }

    /// S5-M2 — the timeout sweeper MUST append one
    /// `pending_action.timed_out` row per expired pending row.
    /// Pre-fix the sweep transitioned rows silently, leaving the audit
    /// chain blind to the auto-expiration.
    #[test]
    fn test_timeout_sweeper_emits_signed_event() {
        let conn = test_db();
        // Two stale pending rows + one fresh row. Only the stale rows
        // expire under a 1-hour global default; the fresh row stays.
        insert_stale_pending(&conn, "stale-a", "ns/x", 7_200, None);
        insert_stale_pending(&conn, "stale-b", "ns/y", 7_200, None);
        insert_stale_pending(&conn, "fresh-c", "ns/z", 30, None);

        let expired = sweep_pending_action_timeouts(&conn, crate::SECS_PER_HOUR).unwrap();
        assert_eq!(expired.len(), 2, "two stale rows must expire");
        assert_eq!(
            count_signed_events(&conn, "pending_action.timed_out"),
            2,
            "one audit row per expired pending row"
        );
        // The fresh row is still pending; no audit emit for it.
        let fresh_status: String = conn
            .query_row(
                "SELECT status FROM pending_actions WHERE id = 'fresh-c'",
                [],
                |r| r.get(0),
            )
            .unwrap();
        assert_eq!(fresh_status, "pending");
    }

    // -----------------------------------------------------------------
    // v0.7.0 S4-INFO2 — `memory_link.created` audit emit
    // -----------------------------------------------------------------

    /// Count the number of `signed_events` rows for a given event_type
    /// and substring match on the row's `payload_hash`-bearing row.
    /// Used by the audit emit tests below.
    fn count_signed_events_of_type(conn: &Connection, event_type: &str) -> i64 {
        conn.query_row(
            "SELECT COUNT(*) FROM signed_events WHERE event_type = ?1",
            params![event_type],
            |r| r.get(0),
        )
        .unwrap()
    }

    #[test]
    fn test_memory_link_created_emits_signed_event_unsigned_path() {
        // S4-INFO2 — every successful link create appends one
        // `memory_link.created` row, even on the unsigned path. The
        // emit's `attest_level` and `signature` columns must mirror
        // the source row.
        let conn = test_db();
        let src = make_memory("s4info2-src-u", "test", Tier::Long, 5);
        let tgt = make_memory("s4info2-tgt-u", "test", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &tgt).unwrap();

        let before = count_signed_events_of_type(&conn, "memory_link.created");
        create_link_signed(&conn, &src.id, &tgt.id, "related_to", None).unwrap();
        let after = count_signed_events_of_type(&conn, "memory_link.created");
        assert_eq!(after, before + 1, "unsigned create must emit one audit row");

        // Inspect the emitted row's signing-surface columns.
        let (attest, sig): (String, Option<Vec<u8>>) = conn
            .query_row(
                "SELECT attest_level, signature FROM signed_events \
                 WHERE event_type = 'memory_link.created' \
                 ORDER BY timestamp DESC LIMIT 1",
                [],
                |r| Ok((r.get(0)?, r.get(1)?)),
            )
            .unwrap();
        assert_eq!(attest, "unsigned");
        assert!(sig.is_none(), "unsigned create must emit NULL signature");
    }

    #[test]
    fn test_memory_link_created_emits_signed_event_signed_path() {
        // S4-INFO2 — signed path: the emitted row's payload_hash
        // must match SHA-256 over the canonical CBOR that the H2
        // signer just committed to, AND the `signature` must equal
        // the link row's signature byte-for-byte (auditor cross-check).
        use crate::identity::{keypair, sign as link_sign};

        let conn = test_db();
        let src = make_memory("s4info2-src-s", "test", Tier::Long, 5);
        let tgt = make_memory("s4info2-tgt-s", "test", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &tgt).unwrap();

        let kp = keypair::generate("alice").unwrap();
        create_link_signed(&conn, &src.id, &tgt.id, "supersedes", Some(&kp)).unwrap();

        // Read back the link row's signature + valid_from so we can
        // re-derive the canonical CBOR the audit row should commit to.
        let (link_sig, valid_from): (Vec<u8>, String) = conn
            .query_row(
                "SELECT signature, valid_from FROM memory_links \
                 WHERE source_id = ?1 AND target_id = ?2",
                params![&src.id, &tgt.id],
                |r| Ok((r.get::<_, Vec<u8>>(0)?, r.get::<_, String>(1)?)),
            )
            .unwrap();
        let signable = link_sign::SignableLink {
            src_id: &src.id,
            dst_id: &tgt.id,
            relation: "supersedes",
            observed_by: Some(kp.agent_id.as_str()),
            valid_from: Some(valid_from.as_str()),
            valid_until: None,
        };
        let expected_hash = crate::signed_events::payload_hash(
            &link_sign::canonical_cbor(&signable).expect("cbor"),
        );

        let (agent, attest, sig, payload): (String, String, Option<Vec<u8>>, Vec<u8>) = conn
            .query_row(
                "SELECT agent_id, attest_level, signature, payload_hash \
                 FROM signed_events \
                 WHERE event_type = 'memory_link.created' \
                 ORDER BY timestamp DESC LIMIT 1",
                [],
                |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?)),
            )
            .unwrap();
        assert_eq!(agent, "alice");
        assert_eq!(attest, "self_signed");
        assert_eq!(
            sig.as_deref(),
            Some(link_sig.as_slice()),
            "audit row signature must mirror memory_links.signature byte-for-byte"
        );
        assert_eq!(
            payload, expected_hash,
            "audit row payload_hash must SHA-256 the canonical CBOR H2 signed over"
        );
    }

    #[test]
    fn test_memory_link_created_emit_is_idempotent_on_replay() {
        // INSERT OR IGNORE collapses duplicate (src,dst,relation)
        // writes to a no-op at the link layer. The audit emit must
        // NOT fire on the replay — otherwise an idempotent retry by
        // a federation peer would inflate the audit row count for
        // the same logical event.
        let conn = test_db();
        let src = make_memory("s4info2-src-d", "test", Tier::Long, 5);
        let tgt = make_memory("s4info2-tgt-d", "test", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &tgt).unwrap();

        create_link_signed(&conn, &src.id, &tgt.id, "related_to", None).unwrap();
        let after_first = count_signed_events_of_type(&conn, "memory_link.created");
        create_link_signed(&conn, &src.id, &tgt.id, "related_to", None).unwrap();
        let after_second = count_signed_events_of_type(&conn, "memory_link.created");
        assert_eq!(
            after_second, after_first,
            "duplicate (src,dst,relation) replay must not emit a second audit row"
        );
    }

    #[test]
    fn test_create_link_inbound_emits_signed_event() {
        // The federation-replicated path must emit too — the audit
        // ledger reflects every link visible locally.
        let conn = test_db();
        let src = make_memory("s4info2-in-src", "test", Tier::Long, 5);
        let tgt = make_memory("s4info2-in-tgt", "test", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &tgt).unwrap();

        let now = chrono::Utc::now().to_rfc3339();
        let link = MemoryLink {
            source_id: src.id.clone(),
            target_id: tgt.id.clone(),
            relation: crate::models::MemoryLinkRelation::RelatedTo,
            created_at: now.clone(),
            signature: None,
            observed_by: Some("peer-bob".to_string()),
            valid_from: Some(now.clone()),
            valid_until: None,
            attest_level: None,
        };
        let before = count_signed_events_of_type(&conn, "memory_link.created");
        create_link_inbound(&conn, &link, "unsigned").unwrap();
        let after = count_signed_events_of_type(&conn, "memory_link.created");
        assert_eq!(after, before + 1);

        let agent: String = conn
            .query_row(
                "SELECT agent_id FROM signed_events \
                 WHERE event_type = 'memory_link.created' \
                 ORDER BY timestamp DESC LIMIT 1",
                [],
                |r| r.get(0),
            )
            .unwrap();
        assert_eq!(
            agent, "peer-bob",
            "inbound emit must record the peer's claimed observed_by"
        );
    }

    #[test]
    fn test_create_link_signed_emit_failure_does_not_roll_back() {
        // Drop the signed_events table to simulate a substrate
        // problem (schema drift, disk error mapped to a SQL
        // failure). The link create must still commit and the
        // function must return Ok — the audit emit is best-effort.
        let conn = test_db();
        let src = make_memory("s4info2-fail-src", "test", Tier::Long, 5);
        let tgt = make_memory("s4info2-fail-tgt", "test", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &tgt).unwrap();

        // Knock out the audit substrate.
        conn.execute("DROP TABLE signed_events", []).unwrap();

        let result = create_link_signed(&conn, &src.id, &tgt.id, "related_to", None);
        assert!(
            result.is_ok(),
            "audit emit failure must not crater the link create: {result:?}"
        );

        // The link itself must have persisted.
        let count: i64 = conn
            .query_row(
                "SELECT COUNT(*) FROM memory_links \
                 WHERE source_id = ?1 AND target_id = ?2",
                params![&src.id, &tgt.id],
                |r| r.get(0),
            )
            .unwrap();
        assert_eq!(
            count, 1,
            "link row must have committed despite audit failure"
        );
    }

    // ─────────────────────────────────────────────────────────────────────────
    // L1-1 (v0.7.0) — MemoryKind typed enum + migration v31 tests
    //
    // Migration v31 (memory_kind) was originally authored as v30 on
    // l1/typed-memorykind; renumbered during the L1 wave merge after
    // substrate-rules (issue #691) took v30. The backfill SQL is unchanged.
    // ─────────────────────────────────────────────────────────────────────────

    /// Migration v31 backfill: a row with `memory_kind='observation'` and
    /// `metadata.type='reflection'` should be updated to
    /// `memory_kind='reflection'` by the backfill SQL in the migration.
    #[test]
    fn l1_1_migration_backfill_sets_reflection_kind() {
        let conn = test_db();
        let now = chrono::Utc::now().to_rfc3339();
        let id = uuid::Uuid::new_v4().to_string();
        // Insert a row that looks like a pre-v31 reflection: memory_kind
        // defaults to 'observation' (the old schema had no such column)
        // but metadata.type = 'reflection' signals it was produced by
        // memory_reflect.
        conn.execute(
            "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, \
             confidence, source, access_count, created_at, updated_at, metadata, \
             reflection_depth, memory_kind) \
             VALUES (?1,'mid','ns','backfill-test','content','[]',5,1.0,'test',0,?2,?2,?3,0,'observation')",
            rusqlite::params![id, now, r#"{"type":"reflection"}"#],
        )
        .unwrap();

        // Confirm the row starts with memory_kind='observation'.
        let before: String = conn
            .query_row(
                "SELECT memory_kind FROM memories WHERE id = ?1",
                [&id],
                |r| r.get(0),
            )
            .unwrap();
        assert_eq!(before, "observation");

        // Run the backfill SQL (same logic as migration v31).
        conn.execute(
            "UPDATE memories SET memory_kind = 'reflection' \
             WHERE memory_kind = 'observation' \
               AND json_valid(metadata) \
               AND json_extract(metadata, '$.type') = 'reflection'",
            [],
        )
        .unwrap();

        let after: String = conn
            .query_row(
                "SELECT memory_kind FROM memories WHERE id = ?1",
                [&id],
                |r| r.get(0),
            )
            .unwrap();
        assert_eq!(
            after, "reflection",
            "backfill must upgrade metadata.type=reflection rows to memory_kind=reflection"
        );
    }

    /// Backfill must NOT touch rows where `metadata.type` is absent or is
    /// something other than `'reflection'`.
    #[test]
    fn l1_1_migration_backfill_leaves_non_reflection_rows_alone() {
        let conn = test_db();
        let now = chrono::Utc::now().to_rfc3339();
        let id = uuid::Uuid::new_v4().to_string();
        conn.execute(
            "INSERT INTO memories (id, tier, namespace, title, content, tags, priority, \
             confidence, source, access_count, created_at, updated_at, metadata, \
             reflection_depth, memory_kind) \
             VALUES (?1,'mid','ns','obs-test','content','[]',5,1.0,'test',0,?2,?2,'{}',0,'observation')",
            rusqlite::params![id, now],
        )
        .unwrap();

        conn.execute(
            "UPDATE memories SET memory_kind = 'reflection' \
             WHERE memory_kind = 'observation' \
               AND json_valid(metadata) \
               AND json_extract(metadata, '$.type') = 'reflection'",
            [],
        )
        .unwrap();

        let after: String = conn
            .query_row(
                "SELECT memory_kind FROM memories WHERE id = ?1",
                [&id],
                |r| r.get(0),
            )
            .unwrap();
        assert_eq!(
            after, "observation",
            "backfill must not change rows without metadata.type=reflection"
        );
    }

    /// `memories_by_kind(Observation)` returns only observation memories;
    /// `memories_by_kind(Reflection)` returns only reflection memories.
    #[test]
    fn l1_1_memories_by_kind_returns_correct_subset() {
        let conn = test_db();

        // Insert one observation and one reflection memory.
        let obs = Memory {
            id: uuid::Uuid::new_v4().to_string(),
            tier: Tier::Long,
            namespace: "kind-ns".to_string(),
            title: "obs-memory".to_string(),
            content: "observation content".to_string(),
            tags: vec![],
            priority: 5,
            confidence: 1.0,
            source: "test".to_string(),
            access_count: 0,
            created_at: chrono::Utc::now().to_rfc3339(),
            updated_at: chrono::Utc::now().to_rfc3339(),
            last_accessed_at: None,
            expires_at: None,
            metadata: serde_json::json!({}),
            reflection_depth: 0,
            memory_kind: crate::models::MemoryKind::Observation,
            entity_id: None,
            persona_version: None,
            citations: Vec::new(),
            source_uri: None,
            source_span: None,
            confidence_source: ConfidenceSource::CallerProvided,
            confidence_signals: None,
            confidence_decayed_at: None,
            version: 1,
        };
        let ref_mem = Memory {
            id: uuid::Uuid::new_v4().to_string(),
            tier: Tier::Long,
            namespace: "kind-ns".to_string(),
            title: "ref-memory".to_string(),
            content: "reflection content".to_string(),
            tags: vec![],
            priority: 5,
            confidence: 1.0,
            source: "test".to_string(),
            access_count: 0,
            created_at: chrono::Utc::now().to_rfc3339(),
            updated_at: chrono::Utc::now().to_rfc3339(),
            last_accessed_at: None,
            expires_at: None,
            metadata: serde_json::json!({}),
            reflection_depth: 1,
            memory_kind: crate::models::MemoryKind::Reflection,
            entity_id: None,
            persona_version: None,
            citations: Vec::new(),
            source_uri: None,
            source_span: None,
            confidence_source: ConfidenceSource::CallerProvided,
            confidence_signals: None,
            confidence_decayed_at: None,
            version: 1,
        };

        insert(&conn, &obs).unwrap();
        insert(&conn, &ref_mem).unwrap();

        let obs_rows = memories_by_kind(&conn, &crate::models::MemoryKind::Observation).unwrap();
        let ref_rows = memories_by_kind(&conn, &crate::models::MemoryKind::Reflection).unwrap();

        assert!(
            obs_rows
                .iter()
                .all(|m| m.memory_kind == crate::models::MemoryKind::Observation),
            "memories_by_kind(Observation) must return only Observation memories"
        );
        assert!(
            ref_rows
                .iter()
                .all(|m| m.memory_kind == crate::models::MemoryKind::Reflection),
            "memories_by_kind(Reflection) must return only Reflection memories"
        );
        // The inserted observation must appear in obs_rows.
        assert!(
            obs_rows.iter().any(|m| m.title == "obs-memory"),
            "obs-memory must be in Observation results"
        );
        // The inserted reflection must appear in ref_rows.
        assert!(
            ref_rows.iter().any(|m| m.title == "ref-memory"),
            "ref-memory must be in Reflection results"
        );
        // Cross-check: obs memory must NOT be in reflection results.
        assert!(
            !ref_rows.iter().any(|m| m.title == "obs-memory"),
            "obs-memory must not appear in Reflection results"
        );
    }

    /// Inserting a memory with `memory_kind=Reflection` and then reading it
    /// back via `get()` must preserve the `Reflection` variant.
    #[test]
    fn l1_1_memory_kind_roundtrips_through_insert_get() {
        let conn = test_db();
        let mem = Memory {
            id: uuid::Uuid::new_v4().to_string(),
            tier: Tier::Long,
            namespace: "roundtrip-ns".to_string(),
            title: "kind-roundtrip".to_string(),
            content: "roundtrip content".to_string(),
            tags: vec![],
            priority: 5,
            confidence: 1.0,
            source: "test".to_string(),
            access_count: 0,
            created_at: chrono::Utc::now().to_rfc3339(),
            updated_at: chrono::Utc::now().to_rfc3339(),
            last_accessed_at: None,
            expires_at: None,
            metadata: serde_json::json!({}),
            reflection_depth: 1,
            memory_kind: crate::models::MemoryKind::Reflection,
            entity_id: None,
            persona_version: None,
            citations: Vec::new(),
            source_uri: None,
            source_span: None,
            confidence_source: ConfidenceSource::CallerProvided,
            confidence_signals: None,
            confidence_decayed_at: None,
            version: 1,
        };
        let id = insert(&conn, &mem).unwrap();
        let got = get(&conn, &id)
            .unwrap()
            .expect("inserted memory must be found");
        assert_eq!(
            got.memory_kind,
            crate::models::MemoryKind::Reflection,
            "memory_kind=Reflection must roundtrip through insert→get"
        );
    }

    /// The upsert sticky-field logic: if a row already has
    /// `memory_kind='reflection'`, a subsequent upsert with
    /// `memory_kind='observation'` must NOT overwrite it.
    #[test]
    fn l1_1_upsert_preserves_reflection_kind() {
        let conn = test_db();
        let now = chrono::Utc::now().to_rfc3339();
        let id = uuid::Uuid::new_v4().to_string();

        // First insert: Reflection.
        let mem_reflection = Memory {
            id: id.clone(),
            tier: Tier::Long,
            namespace: "sticky-ns".to_string(),
            title: "sticky-title".to_string(),
            content: "original content".to_string(),
            tags: vec![],
            priority: 5,
            confidence: 1.0,
            source: "test".to_string(),
            access_count: 0,
            created_at: now.clone(),
            updated_at: now.clone(),
            last_accessed_at: None,
            expires_at: None,
            metadata: serde_json::json!({}),
            reflection_depth: 1,
            memory_kind: crate::models::MemoryKind::Reflection,
            entity_id: None,
            persona_version: None,
            citations: Vec::new(),
            source_uri: None,
            source_span: None,
            confidence_source: ConfidenceSource::CallerProvided,
            confidence_signals: None,
            confidence_decayed_at: None,
            version: 1,
        };
        insert(&conn, &mem_reflection).unwrap();

        // Second upsert: Observation (same title+namespace → triggers ON CONFLICT).
        let mem_obs = Memory {
            id: uuid::Uuid::new_v4().to_string(), // different id, same title+ns
            tier: Tier::Long,
            namespace: "sticky-ns".to_string(),
            title: "sticky-title".to_string(),
            content: "updated content".to_string(),
            tags: vec![],
            priority: 6,
            confidence: 1.0,
            source: "test".to_string(),
            access_count: 0,
            created_at: now.clone(),
            updated_at: now,
            last_accessed_at: None,
            expires_at: None,
            metadata: serde_json::json!({}),
            reflection_depth: 0,
            memory_kind: crate::models::MemoryKind::Observation,
            entity_id: None,
            persona_version: None,
            citations: Vec::new(),
            source_uri: None,
            source_span: None,
            confidence_source: ConfidenceSource::CallerProvided,
            confidence_signals: None,
            confidence_decayed_at: None,
            version: 1,
        };
        insert(&conn, &mem_obs).unwrap();

        // The row must still be Reflection (sticky field wins).
        let got = get(&conn, &id)
            .unwrap()
            .expect("original memory must still exist");
        assert_eq!(
            got.memory_kind,
            crate::models::MemoryKind::Reflection,
            "upsert with Observation must not overwrite an existing Reflection kind"
        );
    }

    // -----------------------------------------------------------------
    // v0.7.0 issue #810 / #812 / #813 — CHECK trigger + strongest_attest
    // -----------------------------------------------------------------

    #[test]
    fn strongest_attest_returns_unsigned_for_isolate_source() {
        // A source with no outbound links — the only honest default
        // is `unsigned`.
        let conn = test_db();
        let lonely = make_memory("lonely", "test", Tier::Long, 5);
        insert(&conn, &lonely).unwrap();
        let got = strongest_attest_level_for_source(&conn, &lonely.id).unwrap();
        assert_eq!(got, "unsigned");
    }

    #[test]
    fn strongest_attest_picks_self_signed_over_unsigned() {
        use crate::identity::keypair;
        // Serialise against the a3 tests that flip the *global* permissions
        // mode to Enforce + install a deny-all link rule; without this gate
        // their Enforce window can race this create_link_signed call and
        // surface a spurious "link denied by permission rule". See the
        // governance-mode test-isolation tracking issue. #626 Layer-3 QC.
        let _gate = crate::config::lock_permissions_mode_for_test();
        let conn = test_db();
        let src = make_memory("attest-src", "test", Tier::Long, 5);
        let a = make_memory("attest-a", "test", Tier::Long, 5);
        let b = make_memory("attest-b", "test", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &a).unwrap();
        insert(&conn, &b).unwrap();
        // One unsigned + one signed outbound link.
        create_link_signed(&conn, &src.id, &a.id, "related_to", None).unwrap();
        let kp = keypair::generate("alice").unwrap();
        create_link_signed(&conn, &src.id, &b.id, "supersedes", Some(&kp)).unwrap();
        let got = strongest_attest_level_for_source(&conn, &src.id).unwrap();
        assert_eq!(got, "self_signed", "self_signed beats unsigned");
    }

    #[test]
    fn strongest_attest_picks_peer_attested_over_self_signed() {
        // Construct a peer-attested row by hand-rolling the
        // create_link_inbound path so we don't depend on a remote
        // signature. The CHECK trigger requires a 64-byte sig blob
        // for `peer_attested` — fabricate one.
        let conn = test_db();
        let src = make_memory("attest-pa-src", "test", Tier::Long, 5);
        let a = make_memory("attest-pa-a", "test", Tier::Long, 5);
        let b = make_memory("attest-pa-b", "test", Tier::Long, 5);
        insert(&conn, &src).unwrap();
        insert(&conn, &a).unwrap();
        insert(&conn, &b).unwrap();
        // Self-signed link.
        let kp = crate::identity::keypair::generate("alice").unwrap();
        create_link_signed(&conn, &src.id, &a.id, "related_to", Some(&kp)).unwrap();
        // Hand-inject a peer_attested row with a 64-byte signature so
        // the CHECK trigger admits it.
        let now = chrono::Utc::now().to_rfc3339();
        let sig = vec![0xAB_u8; 64];
        conn.execute(
            "INSERT INTO memory_links \
                (source_id, target_id, relation, created_at, valid_from, signature, attest_level, observed_by) \
             VALUES (?1, ?2, 'related_to', ?3, ?3, ?4, 'peer_attested', 'peer-bob')",
            params![&src.id, &b.id, &now, &sig],
        )
        .unwrap();
        let got = strongest_attest_level_for_source(&conn, &src.id).unwrap();
        assert_eq!(got, "peer_attested", "peer_attested beats self_signed");
    }

    #[test]
    fn ck_trigger_refuses_self_signed_insert_without_signature() {
        // BUG-A regression test — a direct INSERT that claims
        // `self_signed` with NULL signature must fail at the SQLite
        // trigger layer. Closes the phantom-attest-level defect at
        // the substrate boundary even when a future caller (or
        // operator UPDATE) bypasses `create_link_signed`'s match arm.
        let conn = test_db();
        let s = make_memory("ck-src", "test", Tier::Long, 5);
        let t = make_memory("ck-tgt", "test", Tier::Long, 5);
        insert(&conn, &s).unwrap();
        insert(&conn, &t).unwrap();
        let now = chrono::Utc::now().to_rfc3339();
        let res = conn.execute(
            "INSERT INTO memory_links \
                (source_id, target_id, relation, created_at, valid_from, signature, attest_level) \
             VALUES (?1, ?2, 'related_to', ?3, ?3, NULL, 'self_signed')",
            params![&s.id, &t.id, &now],
        );
        let err = res.expect_err("CHECK trigger must reject self_signed + NULL signature");
        let msg = format!("{err}");
        assert!(
            msg.contains("CHECK constraint failed")
                || msg.contains("attest_level")
                || msg.contains("64-byte signature"),
            "trigger error must name the failure mode, got: {msg}"
        );
    }

    #[test]
    fn ck_trigger_refuses_self_signed_insert_with_wrong_length_signature() {
        // Same defense for a non-NULL but wrong-length signature
        // (e.g. truncated by a partial wire-read or a malformed
        // operator INSERT).
        let conn = test_db();
        let s = make_memory("ck-src-wlen", "test", Tier::Long, 5);
        let t = make_memory("ck-tgt-wlen", "test", Tier::Long, 5);
        insert(&conn, &s).unwrap();
        insert(&conn, &t).unwrap();
        let now = chrono::Utc::now().to_rfc3339();
        let res = conn.execute(
            "INSERT INTO memory_links \
                (source_id, target_id, relation, created_at, valid_from, signature, attest_level) \
             VALUES (?1, ?2, 'related_to', ?3, ?3, ?4, 'self_signed')",
            params![&s.id, &t.id, &now, &[0u8; 8][..]],
        );
        assert!(
            res.is_err(),
            "CHECK trigger must reject wrong-length signature"
        );
    }

    #[test]
    fn ck_trigger_refuses_update_to_self_signed_without_signature() {
        // The CHECK trigger fires on UPDATE as well as INSERT — a
        // post-hoc UPDATE that flips an unsigned row to self_signed
        // without supplying signature bytes must be refused.
        let conn = test_db();
        let s = make_memory("ck-upd-src", "test", Tier::Long, 5);
        let t = make_memory("ck-upd-tgt", "test", Tier::Long, 5);
        insert(&conn, &s).unwrap();
        insert(&conn, &t).unwrap();
        create_link_signed(&conn, &s.id, &t.id, "related_to", None).unwrap();
        let res = conn.execute(
            "UPDATE memory_links SET attest_level = 'self_signed' \
             WHERE source_id = ?1 AND target_id = ?2",
            params![&s.id, &t.id],
        );
        assert!(
            res.is_err(),
            "CHECK trigger must reject UPDATE to self_signed with NULL signature"
        );
    }

    #[test]
    fn ck_trigger_admits_unsigned_with_null_signature() {
        // The trigger's `WHEN` clause is scoped to self_signed /
        // peer_attested — the unsigned path with NULL signature
        // (the v0.6.4 default) must still admit. Negative-control
        // test pinning the trigger's narrow scope.
        let conn = test_db();
        let s = make_memory("ck-unsigned-src", "test", Tier::Long, 5);
        let t = make_memory("ck-unsigned-tgt", "test", Tier::Long, 5);
        insert(&conn, &s).unwrap();
        insert(&conn, &t).unwrap();
        // create_link_signed's unsigned branch sets (NULL, "unsigned");
        // confirm it still works under the new trigger.
        create_link_signed(&conn, &s.id, &t.id, "related_to", None)
            .expect("unsigned create must still succeed under the new CHECK trigger");
    }

    // -----------------------------------------------------------------
    // #626 Layer-3 (Task 1.3 / C3) — bind_agent_pubkey + agent_pubkey
    // -----------------------------------------------------------------

    #[test]
    fn agent_pubkey_none_before_bind_and_some_after() {
        let conn = test_db();
        register_agent(&conn, "ai:curator", "ai:generic", &[]).expect("register");
        // Registered but unbound → permissive None.
        assert_eq!(agent_pubkey(&conn, "ai:curator").unwrap(), None);

        let kp = crate::identity::keypair::generate("ai:curator").expect("generate");
        let b64 = kp.public_base64();
        bind_agent_pubkey(&conn, "ai:curator", &b64).expect("bind");
        assert_eq!(agent_pubkey(&conn, "ai:curator").unwrap(), Some(b64));
    }

    #[test]
    fn agent_pubkey_none_for_unregistered_agent() {
        let conn = test_db();
        // Never registered → None (collapses to "no key to verify").
        assert_eq!(agent_pubkey(&conn, "ai:ghost").unwrap(), None);
    }

    #[test]
    fn bind_agent_pubkey_rejects_unregistered_agent() {
        let conn = test_db();
        let err = bind_agent_pubkey(&conn, "ai:ghost", "AAAA").unwrap_err();
        assert!(
            err.to_string().contains("not registered"),
            "binding to an unregistered agent must be rejected; got: {err}",
        );
    }

    #[test]
    fn bind_agent_pubkey_rotates_key_in_place() {
        let conn = test_db();
        register_agent(&conn, "ai:curator", "ai:generic", &[]).expect("register");
        let k1 = crate::identity::keypair::generate("ai:curator")
            .unwrap()
            .public_base64();
        let k2 = crate::identity::keypair::generate("ai:curator")
            .unwrap()
            .public_base64();
        assert_ne!(k1, k2, "two fresh keys differ");
        bind_agent_pubkey(&conn, "ai:curator", &k1).expect("bind k1");
        assert_eq!(agent_pubkey(&conn, "ai:curator").unwrap(), Some(k1));
        // Rotation overwrites in place.
        bind_agent_pubkey(&conn, "ai:curator", &k2).expect("rotate to k2");
        assert_eq!(agent_pubkey(&conn, "ai:curator").unwrap(), Some(k2));
    }

    #[test]
    fn bind_agent_pubkey_preserves_registration_fields() {
        // Binding a key must not clobber agent_type / capabilities /
        // registered_at — list_agents must still see the full row.
        let conn = test_db();
        register_agent(
            &conn,
            "ai:curator",
            "ai:claude-opus",
            &["recall".to_string(), "write".to_string()],
        )
        .expect("register");
        let before = list_agents(&conn).expect("list before");
        let kp = crate::identity::keypair::generate("ai:curator").unwrap();
        bind_agent_pubkey(&conn, "ai:curator", &kp.public_base64()).expect("bind");
        let after = list_agents(&conn).expect("list after");

        let a_before = before
            .iter()
            .find(|a| a.agent_id == "ai:curator")
            .expect("present before");
        let a_after = after
            .iter()
            .find(|a| a.agent_id == "ai:curator")
            .expect("present after");
        assert_eq!(a_after.agent_type, a_before.agent_type);
        assert_eq!(a_after.capabilities, a_before.capabilities);
        assert_eq!(a_after.registered_at, a_before.registered_at);
    }

    // -----------------------------------------------------------------
    // #626 Layer-3 (Task 1.3 / C5) — revoke_agent_pubkey
    // -----------------------------------------------------------------

    #[test]
    fn revoke_agent_pubkey_clears_bound_key() {
        let conn = test_db();
        register_agent(&conn, "ai:curator", "ai:generic", &[]).expect("register");
        let kp = crate::identity::keypair::generate("ai:curator").unwrap();
        bind_agent_pubkey(&conn, "ai:curator", &kp.public_base64()).expect("bind");
        assert!(agent_pubkey(&conn, "ai:curator").unwrap().is_some());
        revoke_agent_pubkey(&conn, "ai:curator").expect("revoke");
        assert_eq!(agent_pubkey(&conn, "ai:curator").unwrap(), None);
    }

    #[test]
    fn revoke_agent_pubkey_is_idempotent_without_bound_key() {
        let conn = test_db();
        register_agent(&conn, "ai:curator", "ai:generic", &[]).expect("register");
        // No key ever bound — revoke still succeeds and stays None.
        revoke_agent_pubkey(&conn, "ai:curator").expect("revoke unbound");
        assert_eq!(agent_pubkey(&conn, "ai:curator").unwrap(), None);
    }

    #[test]
    fn revoke_agent_pubkey_rejects_unregistered_agent() {
        let conn = test_db();
        let err = revoke_agent_pubkey(&conn, "ai:ghost").unwrap_err();
        assert!(
            err.to_string().contains("not registered"),
            "revoking an unregistered agent must be rejected; got: {err}",
        );
    }

    #[test]
    fn revoke_agent_pubkey_preserves_registration_fields() {
        let conn = test_db();
        register_agent(
            &conn,
            "ai:curator",
            "ai:claude-opus",
            &["recall".to_string(), "write".to_string()],
        )
        .expect("register");
        let kp = crate::identity::keypair::generate("ai:curator").unwrap();
        bind_agent_pubkey(&conn, "ai:curator", &kp.public_base64()).expect("bind");
        revoke_agent_pubkey(&conn, "ai:curator").expect("revoke");
        let after = list_agents(&conn).expect("list after");
        let a = after
            .iter()
            .find(|a| a.agent_id == "ai:curator")
            .expect("present after revoke");
        assert_eq!(a.agent_type, "ai:claude-opus");
        assert_eq!(
            a.capabilities,
            vec!["recall".to_string(), "write".to_string()]
        );
    }

    #[test]
    fn revoke_then_rebind_restores_attestable_key() {
        let conn = test_db();
        register_agent(&conn, "ai:curator", "ai:generic", &[]).expect("register");
        let k1 = crate::identity::keypair::generate("ai:curator")
            .unwrap()
            .public_base64();
        bind_agent_pubkey(&conn, "ai:curator", &k1).expect("bind k1");
        revoke_agent_pubkey(&conn, "ai:curator").expect("revoke");
        assert_eq!(agent_pubkey(&conn, "ai:curator").unwrap(), None);
        let k2 = crate::identity::keypair::generate("ai:curator")
            .unwrap()
            .public_base64();
        bind_agent_pubkey(&conn, "ai:curator", &k2).expect("rebind k2");
        assert_eq!(agent_pubkey(&conn, "ai:curator").unwrap(), Some(k2));
    }
}