Skip to main content

ai_memory/
daemon_runtime.rs

1// Copyright 2026 AlphaOne LLC
2// SPDX-License-Identifier: Apache-2.0
3
4//! Daemon runtime — orchestration shell for the `ai-memory` binary.
5//!
6//! W6 lifted `serve()` and the top-level dispatch out of `main.rs` so the
7//! production HTTP daemon, the integration test harness, and the
8//! coverage-instrumented tests in this module all share one source of
9//! truth. `main.rs` keeps its `#[tokio::main]` entry point but immediately
10//! delegates here for every subcommand.
11//!
12//! ## Public surface (post-W6)
13//!
14//! - [`run`] — top-level CLI dispatch (called from `main()`).
15//! - [`serve`] — full HTTP daemon body (TLS or plain).
16//! - [`bootstrap_serve`] — testable struct-returning state builder.
17//! - [`build_router`] — composition wrapper around `lib::build_router`.
18//! - [`build_embedder`], [`build_vector_index`] — single canonical builders
19//!   used by both `serve()` and `cli::recall::run`.
20//! - [`spawn_gc_loop`], [`spawn_wal_checkpoint_loop`] — daemon background
21//!   tasks, returning a [`JoinHandle`] so callers can abort on shutdown.
22//! - [`is_write_command`] — write-command predicate driving the post-write
23//!   WAL checkpoint.
24//! - [`passphrase_from_file`], [`apply_anonymize_default`] — startup helpers.
25//!
26//! ## Pre-W6 helpers retained
27//!
28//! - [`serve_http_with_shutdown`], [`serve_http_with_shutdown_future`] —
29//!   the in-process HTTP harness the integration suite drives.
30//! - [`run_sync_daemon_with_shutdown`],
31//!   [`run_sync_daemon_with_shutdown_using_client`],
32//!   [`sync_cycle_once`] — the sync-daemon body.
33//! - [`run_curator_daemon_with_shutdown`],
34//!   [`run_curator_daemon_with_primitives`] — the curator-daemon body.
35
36use crate::models::field_names;
37use std::io::Write as _;
38use std::path::Path;
39use std::path::PathBuf;
40use std::sync::Arc;
41use std::sync::atomic::{AtomicBool, Ordering};
42use std::time::{Duration, Instant};
43
44use anyhow::{Context, Result};
45use axum::Router;
46use clap::{Args, CommandFactory, Parser, Subcommand};
47use clap_complete::{Shell, generate};
48use rusqlite::Connection;
49use tokio::sync::{Mutex, Notify};
50use tokio::task::JoinHandle;
51use tracing_subscriber::EnvFilter;
52
53use crate::cli::agents::{AgentsArgs, PendingArgs};
54use crate::cli::archive::ArchiveArgs;
55use crate::cli::audit::AuditArgs;
56use crate::cli::backup::{BackupArgs, RestoreArgs};
57use crate::cli::boot::BootArgs;
58use crate::cli::consolidate::{AutoConsolidateArgs, ConsolidateArgs};
59use crate::cli::crud::{DeleteArgs, GetArgs, ListArgs};
60use crate::cli::curator::CuratorArgs;
61use crate::cli::forget::ForgetArgs;
62use crate::cli::identity::IdentityArgs;
63use crate::cli::install::InstallArgs;
64use crate::cli::io::{ImportArgs, MineArgs};
65use crate::cli::link::{LinkArgs, ResolveArgs};
66use crate::cli::logs::LogsArgs;
67use crate::cli::promote::PromoteArgs;
68use crate::cli::recall::RecallArgs;
69use crate::cli::rules::RulesArgs;
70use crate::cli::search::SearchArgs;
71use crate::cli::store::StoreArgs;
72use crate::cli::sync::{SyncArgs, SyncDaemonArgs};
73use crate::cli::update::UpdateArgs;
74use crate::cli::verify::VerifyChainArgs;
75use crate::cli::verify_signed_events::VerifySignedEventsChainArgs;
76use crate::cli::wrap::WrapArgs;
77use crate::config::{AppConfig, FeatureTier};
78use crate::embeddings::Embedder;
79use crate::handlers::{ApiKeyState, AppState, Db};
80use crate::hnsw::VectorIndex;
81use crate::{bench, cli, db, embeddings, federation, hnsw, llm, mcp, tls};
82
83#[cfg(feature = "sal")]
84use crate::migrate;
85
86const DEFAULT_DB: &str = "ai-memory.db";
87const DEFAULT_PORT: u16 = 9077;
88const GC_INTERVAL_SECS: u64 = 30 * crate::SECS_PER_MINUTE as u64;
89/// WAL auto-checkpoint cadence in the HTTP daemon. Bounds `*-wal`
90/// file growth between `SQLite`'s internal page-count checkpoints.
91const WAL_CHECKPOINT_INTERVAL_SECS: u64 = 10 * crate::SECS_PER_MINUTE as u64;
92/// v0.7.0 K2 — pending_actions timeout sweeper cadence. Fires every
93/// 60s and transitions `status='pending'` rows whose age exceeds the
94/// per-row `default_timeout_seconds` (or the global default below) to
95/// `status='expired'`.
96const PENDING_TIMEOUT_SWEEP_INTERVAL_SECS: u64 = 60;
97/// Default per-row TTL applied when a `pending_actions` row has a NULL
98/// `default_timeout_seconds`. 24 hours — matches the operator-facing
99/// `doctor` warning window so a row already classed CRITICAL by
100/// `doctor_oldest_pending_age_secs` is also a sweeper candidate.
101const PENDING_TIMEOUT_DEFAULT_SECS: i64 = crate::SECS_PER_DAY;
102/// v0.7.0 I3 — transcript archive→prune sweeper cadence. The lifecycle
103/// scan walks every transcript row plus a per-candidate join into
104/// `memories`, so we run it less aggressively than the K2 60-second
105/// pending-actions sweeper. 10 minutes is fast enough that operator-
106/// visible drift between TTL expiry and archive is bounded by one
107/// tick, and slow enough that the scan never dominates a busy
108/// daemon's wall-clock.
109const TRANSCRIPT_LIFECYCLE_SWEEP_INTERVAL_SECS: u64 = 600;
110/// v0.7.0 K8 — agent-quota daily-counter reset cadence. The sweep
111/// zeroes `current_memories_today` + `current_links_today` for every
112/// row whose `day_started_at` predates the current UTC date. 60-second
113/// cadence matches the K2 pending-actions sweeper — a single SQL
114/// UPDATE that touches at most one row per registered agent per
115/// midnight crossing.
116const AGENT_QUOTA_RESET_INTERVAL_SECS: u64 = 60;
117
118// ---------------------------------------------------------------------------
119// Clap-derived CLI surface
120// ---------------------------------------------------------------------------
121//
122// The clap structs live in the lib crate so `daemon_runtime::run` can
123// take them as parameters. `main.rs` re-exports `Cli` and immediately
124// delegates here.
125
126#[derive(Parser)]
127#[command(
128    name = "ai-memory",
129    version,
130    about = "AI-agnostic persistent memory — MCP server, HTTP API, and CLI for any AI platform"
131)]
132pub struct Cli {
133    #[command(subcommand)]
134    pub command: Command,
135    #[arg(long, env = "AI_MEMORY_DB", default_value = DEFAULT_DB, global = true)]
136    pub db: PathBuf,
137    /// Output as JSON (machine-parseable)
138    #[arg(long, global = true, default_value_t = false)]
139    pub json: bool,
140    /// Agent identifier used for store operations. If unset, an NHI-hardened
141    /// default is synthesized (see `ai-memory store --help`). Accepts the
142    /// `AI_MEMORY_AGENT_ID` environment variable as a fallback.
143    #[arg(long, env = "AI_MEMORY_AGENT_ID", global = true)]
144    pub agent_id: Option<String>,
145    /// v0.6.0.0: path to a file containing the `SQLCipher` passphrase.
146    /// Only meaningful when the binary was built with
147    /// `--features sqlcipher` (standard builds ignore this flag). File
148    /// must be root-readable (mode 0400 recommended). The passphrase is
149    /// read once at startup and exported as `AI_MEMORY_DB_PASSPHRASE`
150    /// for the duration of the process — passing the passphrase
151    /// directly as an env var or as a flag value leaks to the process
152    /// list (`ps -E`) and shell history.
153    #[arg(long, global = true, value_name = "PATH")]
154    pub db_passphrase_file: Option<PathBuf>,
155}
156
157#[derive(Subcommand)]
158pub enum Command {
159    /// Start the HTTP memory daemon.
160    ///
161    /// **Tier resolution.** Unlike `mcp` / `store` / `recall`, the
162    /// `serve` subcommand does NOT accept a `--tier` flag. The
163    /// daemon's effective feature tier is resolved from the `tier`
164    /// field in `config.toml`, falling back to the compiled-in
165    /// default (`semantic`). For per-invocation tier overrides use
166    /// the `mcp` / `store` / `recall` subcommands, which expose
167    /// `--tier` directly. See `docs/ADMIN_GUIDE.md` §"Feature tiers"
168    /// and issue #703 for the rationale (a long-running daemon owns
169    /// embedder / LLM resources that are expensive to swap mid-run,
170    /// so tier is fixed at startup via configuration).
171    Serve(ServeArgs),
172    /// Run as an MCP (Model Context Protocol) tool server over stdio
173    Mcp {
174        /// Feature tier: keyword (FTS only) or semantic (embeddings + FTS)
175        #[arg(long, default_value = "semantic")]
176        tier: String,
177        /// v0.6.4 — Tool surface profile. One of `core`, `graph`, `admin`,
178        /// `power`, `full`, or a comma-separated custom list (e.g.,
179        /// `core,graph,archive`). Default `core` (7 tools at v0.7.0:
180        /// the original 5 + `memory_load_family` + `memory_smart_load`).
181        /// Resolution order: this CLI flag > `AI_MEMORY_PROFILE` env >
182        /// `[mcp].profile` in config.toml > `core`. Set `--profile full`
183        /// to expose every family — at v0.7.0,
184        /// `Profile::full().expected_tool_count()` returns 74 (canonical
185        /// SSOT; pinned by `profile_full_matches_registry_all` against
186        /// `crate::mcp::registry::tool_names::ALL.len()`). The 74
187        /// advertised entries decompose as 73 callable "memory tools"
188        /// plus the always-on `memory_capabilities` bootstrap; the
189        /// `build_capabilities_summary` "{n} memory tools" phrasing
190        /// reports the 73 memory-tool count to reconcile with the
191        /// user-facing summary (see issue #862 for the disambiguation).
192        #[arg(long, env = "AI_MEMORY_PROFILE")]
193        profile: Option<String>,
194    },
195    /// Store a new memory
196    Store(StoreArgs),
197    /// Update an existing memory by ID
198    Update(UpdateArgs),
199    /// Recall memories relevant to a context
200    Recall(RecallArgs),
201    /// Search memories by text
202    Search(SearchArgs),
203    /// Retrieve a memory by ID
204    Get(GetArgs),
205    /// List memories
206    List(ListArgs),
207    /// Delete a memory by ID
208    Delete(DeleteArgs),
209    /// Promote a memory to long-term
210    Promote(PromoteArgs),
211    /// Delete memories matching a pattern
212    Forget(ForgetArgs),
213    /// Link two memories
214    Link(LinkArgs),
215    /// Consolidate multiple memories into one
216    Consolidate(ConsolidateArgs),
217    /// Run garbage collection
218    Gc,
219    /// Show statistics
220    Stats,
221    /// List all namespaces
222    Namespaces,
223    /// v0.7.0 (issue #800) — operator CRUD for the per-namespace
224    /// standard policy memory pointer (Batman Mode Crack 1). Three
225    /// verbs: `set-standard` / `get-standard` / `clear-standard`, plus
226    /// the `batman-policy` helper that prints the canonical Batman
227    /// `GovernancePolicy` JSON blob. Closes the friction that kept
228    /// Batman Forms 2 + 6 dormant on most installs by replacing the
229    /// MCP-stdio JSON-RPC dance with first-class CLI surface.
230    Namespace(crate::cli::namespace::NamespaceArgs),
231    /// v0.7.x (#1146) — enterprise configuration tooling.
232    /// `ai-memory config migrate` rewrites a legacy v1 (flat-field)
233    /// `config.toml` to the v2 sectioned shape (`[llm]`, `[embeddings]`,
234    /// `[reranker]`, `[storage]`) with a timestamped `.bak` backup.
235    /// `--dry-run` prints the diff without writing.
236    /// `--also-clean-claude-json` additionally removes the
237    /// `mcpServers.<*>.env` block from `~/.claude.json` after the
238    /// operator has verified the new config.
239    Config(crate::cli::commands::config::ConfigCliArgs),
240    /// Export all memories as JSON
241    Export,
242    /// Import memories from JSON (stdin)
243    Import(ImportArgs),
244    /// Resolve a contradiction — mark one memory as superseding another
245    Resolve(ResolveArgs),
246    /// Interactive memory shell (REPL)
247    Shell,
248    /// Sync memories between two database files
249    Sync(SyncArgs),
250    /// Run the peer-to-peer sync daemon — continuously exchange memories
251    /// with one or more HTTP peers (Phase 3 Task 3b.1). The defining
252    /// grand-slam capability: two agents on two machines form a live
253    /// knowledge mesh with no cloud, no login, no `SaaS`.
254    SyncDaemon(SyncDaemonArgs),
255    /// Auto-consolidate short-term memories by namespace
256    AutoConsolidate(AutoConsolidateArgs),
257    /// Generate shell completions
258    Completions(CompletionsArgs),
259    /// Generate man page
260    Man,
261    /// Import memories from historical conversations (Claude, `ChatGPT`, Slack exports)
262    Mine(MineArgs),
263    /// Manage the memory archive (list, restore, purge, stats)
264    Archive(ArchiveArgs),
265    /// Register or list agents (Task 1.3)
266    Agents(AgentsArgs),
267    /// v0.7 (Track H, Task H1) — per-agent Ed25519 keypair lifecycle.
268    /// `generate` / `import` / `list` / `export-pub` against the local
269    /// key directory (default `<config>/ai-memory/keys`). Hardware-backed
270    /// key storage (TPM/HSM/Secure Enclave) is out of OSS scope and
271    /// lives in the AgenticMem commercial layer.
272    Identity(IdentityArgs),
273    /// v0.7.0 QW-3 — context-offload substrate primitive. Persists a
274    /// file (or `-` for stdin) into the `offloaded_blobs` substrate
275    /// and prints the short `ref_id` callers keep in their working
276    /// window. Pairs with `ai-memory deref <ref_id>`.
277    Offload(crate::cli::offload::OffloadArgs),
278    /// v0.7.0 QW-3 — dereference a previously-offloaded `ref_id`.
279    /// Refuses tampered rows (SHA-256 mismatch). Pairs with
280    /// `ai-memory offload <file>`.
281    Deref(crate::cli::offload::DerefArgs),
282    /// v0.7.0 (issue #691) — substrate-level agent-action rules engine.
283    /// CRUD over the `governance_rules` table consulted by
284    /// `check_agent_action`. Mutation verbs (add/enable/disable/remove)
285    /// require the operator's Ed25519 keypair on disk at
286    /// `<key-dir>/operator.priv` (mode 0600); without `--sign` they
287    /// refuse with `governance.no_operator_key`. Read verbs (list /
288    /// check) are unprivileged.
289    Rules(RulesArgs),
290    /// List / approve / reject governance-pending actions (Task 1.9)
291    Pending(PendingArgs),
292    /// v0.6.0.0: snapshot the `SQLite` database to a timestamped backup
293    /// file. Uses `SQLite` `VACUUM INTO` which is hot-backup safe (no daemon
294    /// stop required). Writes a `manifest.json` alongside (sha256 + version).
295    Backup(BackupArgs),
296    /// v0.6.0.0: restore the `SQLite` database from a backup file written
297    /// by `ai-memory backup`. Verifies the manifest sha256 before
298    /// replacing the current DB. The current DB is moved aside as a safety
299    /// net before the replacement.
300    Restore(RestoreArgs),
301    /// v0.6.1: run the autonomous curator. `--once` runs a single sweep
302    /// and prints a JSON report; `--daemon` loops with `--interval-secs`
303    /// between cycles. Auto-tags memories without tags and flags
304    /// contradictions against nearby siblings in the same namespace.
305    Curator(CuratorArgs),
306    /// v0.6.3 (Pillar 3 / Stream E): run the canonical performance
307    /// workload and print measured p50/p95/p99 against the budgets in
308    /// `PERFORMANCE.md`. Each invocation seeds a disposable temp DB so
309    /// the user's main DB is untouched. Exits non-zero when any p95
310    /// exceeds its budget by more than the published 10% tolerance.
311    Bench(BenchArgs),
312    /// v0.7: migrate memories between SAL backends. Gated behind
313    /// `--features sal`. Reads pages via `MemoryStore::list`, writes
314    /// via `MemoryStore::store`. Idempotent: source ids are preserved
315    /// and both adapters upsert on id.
316    #[cfg(feature = "sal")]
317    Migrate(MigrateArgs),
318    /// v0.7.0 Wave-1 Fix 3: bootstrap a SAL backend's schema by URL.
319    /// Opens the target store via the same factory as `migrate` (which
320    /// triggers `INIT_SCHEMA` as a side effect) then enumerates the
321    /// resulting catalog (tables, views, functions, indices,
322    /// extensions, schema_version). On Postgres with Apache AGE
323    /// installed it also bootstraps the `memory_graph` projection via
324    /// `SELECT create_graph('memory_graph')`. Idempotent — safe to
325    /// re-run against an already-initialized store. Gated behind
326    /// `--features sal`.
327    #[cfg(feature = "sal")]
328    SchemaInit(crate::cli::schema_init::SchemaInitArgs),
329    /// v0.6.3.1 (P7 / R7): operator-visible health dashboard. Reads
330    /// Capabilities v2 (P1) + data integrity surfaces (P2) + recall
331    /// observability (P3). With `--remote <url>` becomes a fleet doctor
332    /// at T3+. Read-only — never mutates the database. Exits 0 on a
333    /// healthy report, 2 on critical findings, and 1 on warnings when
334    /// `--fail-on-warn` is passed.
335    Doctor(DoctorCliArgs),
336    /// Issue #487: emit session-boot context. Universal primitive every
337    /// AI-agent integration recipe (Claude Code SessionStart hook, Cursor /
338    /// Cline / Continue / Windsurf system-message, Codex / Apps SDK /
339    /// Agent SDK programmatic prepend, OpenClaw built-in, local models
340    /// via LM Studio / Ollama / vLLM) calls before the agent's first turn.
341    /// Read-only, fast, never blocks. With `--quiet` (recommended for
342    /// hooks) a missing DB exits 0 with empty stdout.
343    Boot(BootArgs),
344    /// Issue #487 PR-2: wire `ai-memory boot` and the `ai-memory-mcp`
345    /// server into AI agents' config files (Claude Code SessionStart hook,
346    /// Cursor / Cline / Continue / Windsurf / OpenClaw MCP config). Default
347    /// is `--dry-run` (prints the diff, writes nothing). Pass `--apply` to
348    /// commit. Pass `--uninstall --apply` to remove a previously-installed
349    /// managed block.
350    Install(InstallArgs),
351    /// Issue #487 PR-6: cross-platform Rust replacement for the bash /
352    /// PowerShell wrappers PR-1 shipped in the integration recipes. Runs
353    /// `ai-memory boot` in-process, builds a system message, then spawns
354    /// the named agent CLI with the system message delivered via the
355    /// strategy chosen by `default_strategy(<agent>)` (or an explicit
356    /// `--system-flag` / `--system-env` / `--message-file-flag`
357    /// override). Exit code is propagated from the wrapped agent.
358    Wrap(WrapArgs),
359    /// Issue #487 PR-5: operator-facing CLI for the operational logging
360    /// facility (`tail`, `cat`, `archive`, `purge`). Default-OFF — emits
361    /// nothing useful unless `[logging] enabled = true` is set in
362    /// `config.toml`.
363    Logs(LogsArgs),
364    /// Issue #487 PR-5: operator-facing CLI for the security audit
365    /// trail (`verify`, `tail`, `path`). Default-OFF — emits nothing
366    /// useful unless `[audit] enabled = true` is set in `config.toml`.
367    Audit(AuditArgs),
368    /// v0.7.0 K11 — translate legacy `[governance]` policies in
369    /// `config.toml` into the v0.7 `[[permissions.rules]]` (K9) format.
370    /// Default mode is dry-run: prints to stdout. Pass `--config-out
371    /// PATH` to write the rendered block to a file (or merge in-place
372    /// when `PATH` matches the loaded config).
373    Governance(GovernanceCliArgs),
374    /// v0.7.0 L1-3 — external verifier for reflection chains
375    /// (procurement-grade audit tool). Walks `reflects_on` edges
376    /// backward from `<memory_id>` to depth 0, verifies each
377    /// Ed25519 signature, and emits a structured chain-integrity
378    /// report. Exit 0 if fully verified; non-zero otherwise.
379    VerifyReflectionChain(VerifyChainArgs),
380    /// v0.7.0 V-4 closeout (#698) — walk the SQL-side `signed_events`
381    /// cross-row hash chain (schema v34) and emit a structured
382    /// report. Distinct from `verify-reflection-chain` (which walks
383    /// reflects_on edges) and from `audit verify` (which walks the
384    /// JSONL audit log). Exit 0 if the chain holds; 1 on chain
385    /// break.
386    VerifySignedEventsChain(VerifySignedEventsChainArgs),
387    /// v0.7.0 L2-5 (issue #670) — export a procurement-grade forensic
388    /// evidence bundle (signed tarball) for a memory and its
389    /// reflection chain. The OSS surface for the `AgenticMem Attest`
390    /// tier; see [`crate::forensic::bundle`] for the bundle layout.
391    ExportForensicBundle(crate::forensic::bundle::ExportForensicBundleArgs),
392    /// v0.7.0 L2-5 (issue #670) — verify a forensic evidence bundle.
393    /// Re-hashes every file, checks the manifest signature when
394    /// present, and re-verifies every edge signature against the
395    /// bundled `observed_by` public key.
396    VerifyForensicBundle(crate::forensic::bundle::VerifyForensicBundleArgs),
397    /// v0.7.0 QW-1 — write every reflection memory to a file under
398    /// `~/.ai-memory/reflections/<namespace>/<id>.md` (or `.json` with
399    /// `--format json`) so operators can `cat` what the substrate has
400    /// synthesised without learning SQL. The on-disk artefact is
401    /// derived; the SQL row stays canonical.
402    ExportReflections(crate::cli::commands::export_reflections::ExportReflectionsArgs),
403    /// v0.7.0 (issue #1389) — fail-safe recovery of agent context
404    /// from a host's per-turn transcript file when the previous
405    /// session terminated ungracefully (SIGKILL, tmux lockup, host
406    /// crash) between turns. Closes the #1388 substrate failure
407    /// mode. Designed for SessionStart-hook chaining after
408    /// `ai-memory boot`; the in-session counterpart is the
409    /// `memory_recover_previous_session` MCP tool.
410    RecoverPreviousSession(
411        crate::cli::commands::recover_previous_session::RecoverPreviousSessionArgs,
412    ),
413    /// v0.7.0 WT-1-F — operator-side wrapper over the atomisation
414    /// engine ([`crate::atomisation::Atomiser`]). Decomposes one
415    /// long-form memory into atomic propositions; surfaces every
416    /// substrate failure with a stable exit code (see
417    /// [`crate::cli::commands::atomise::exit_code`]).
418    Atomise(crate::cli::commands::atomise::AtomiseArgs),
419    /// v0.7.0 QW-2 — fetch (or regenerate) the Persona artefact for
420    /// an entity. Read-only by default; pass `--regenerate` to run
421    /// the curator and persist a fresh row.
422    Persona(crate::cli::commands::persona::PersonaArgs),
423    /// v0.7.0 Form 5 (issue #758) — calibration driver verbs.
424    /// `ai-memory calibrate confidence --from-shadow` reads
425    /// `confidence_shadow_observations` and emits per-(namespace,
426    /// source) baselines computed over the window.
427    Calibrate(crate::cli::commands::calibrate_confidence::CalibrateArgs),
428    /// v0.7.0 Cluster E API-2 (issue #767) — `ai-memory skill
429    /// <register|list|get|resource|export|promote|compose>` CLI parity
430    /// surface for the 7 L1-5 Agent Skills MCP tools. Dispatches into
431    /// the same substrate handlers (re-exported under
432    /// `crate::mcp::handle_skill_*`); no business logic is duplicated.
433    Skill(crate::cli::commands::skill::SkillArgs),
434    /// v0.7.0 #1095 — `ai-memory share` subcommand. Closes the SR-4
435    /// three-surface-parity gap. Copies a memory into the recipient
436    /// agent's shared namespace `_shared/<from>→<to>/` via the same
437    /// substrate primitive the MCP tool (`memory_share`) and HTTP
438    /// route (`POST /api/v1/share`) consume — guaranteeing byte-equal
439    /// envelopes across the three surfaces.
440    Share(crate::cli::share::ShareArgs),
441    /// v0.7.0 ARCH-3 / FX-12 — `ai-memory kg-query` subcommand.
442    /// Outbound KG traversal from a source memory (<=5 hops). CLI
443    /// parity for the MCP `memory_kg_query` tool.
444    KgQuery(crate::cli::commands::kg_query::KgQueryArgs),
445    /// v0.7.0 ARCH-3 / FX-12 — `ai-memory find-paths` subcommand.
446    /// Enumerate up to N paths through the KG between two memories
447    /// (BFS, `max_depth<=7`). CLI parity for `memory_find_paths`.
448    FindPaths(crate::cli::commands::find_paths::FindPathsArgs),
449    /// v0.7.0 ARCH-3 / FX-12 — `ai-memory recall-observations`
450    /// subcommand. List rows from the recall-consumption ledger
451    /// (#886). CLI parity for `memory_recall_observations`.
452    RecallObservations(crate::cli::commands::recall_observations::RecallObservationsArgs),
453    /// v0.7.0 #1443 — `ai-memory expand` subcommand. LLM query-expansion
454    /// over a free-text query. CLI parity for the MCP
455    /// `memory_expand_query` tool + the `POST /api/v1/expand_query` HTTP
456    /// route — all three share [`crate::mcp::handle_expand_query`]. Lets
457    /// a harness inject expansion as a one-shot without an MCP stdio
458    /// server or HTTP daemon. Requires a configured LLM (any tier via
459    /// `AI_MEMORY_LLM_BACKEND`, or smart/autonomous preset).
460    Expand(crate::cli::commands::expand::ExpandArgs),
461    /// v0.7.0 ARCH-3 / FX-12 — `ai-memory check-duplicate`
462    /// subcommand. Pre-write near-duplicate check via cosine over
463    /// stored embeddings. CLI parity for `memory_check_duplicate`.
464    /// Requires the embedder (semantic tier or above).
465    CheckDuplicate(crate::cli::commands::check_duplicate::CheckDuplicateArgs),
466    /// v0.7.0 #1598 — `ai-memory reembed` subcommand. Full-corpus
467    /// vector-space migration: re-embeds every live memory (optionally
468    /// `--namespace`-filtered) with the resolved embedding
469    /// backend/model and REPLACES the stored vectors (unlike the boot
470    /// backfill, which only fills missing ones). `--dry-run` prints
471    /// the plan; per-row #1595 failure isolation (skip-with-WARN)
472    /// keeps one poison row from stopping the sweep. Resolves the
473    /// embedder via the same `AppConfig::resolve_embeddings()` +
474    /// `Embedder::from_resolved` path as daemon/MCP boot.
475    Reembed(crate::cli::commands::reembed::ReembedArgs),
476    /// v0.7.0 ARCH-3 / FX-12 — `ai-memory replay` subcommand.
477    /// Reconstruct the conversation transcript chain that produced a
478    /// memory. CLI parity for `memory_replay`.
479    Replay(crate::cli::commands::replay::ReplayArgs),
480    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory reflect`. CLI
481    /// parity for `memory_reflect`. CLI dispatcher uses
482    /// `active_keypair=None` / `embedder=None`; operators who need
483    /// signing or LLM dedup drive the daemon via MCP / HTTP.
484    Reflect(crate::cli::commands::reflect::ReflectArgs),
485    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory subscribe`. CLI
486    /// parity for `memory_subscribe`.
487    Subscribe(crate::cli::commands::subscribe::SubscribeArgs),
488    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory unsubscribe`. CLI
489    /// parity for `memory_unsubscribe`.
490    Unsubscribe(crate::cli::commands::unsubscribe::UnsubscribeArgs),
491    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory list-subscriptions`.
492    /// CLI parity for `memory_list_subscriptions`.
493    ListSubscriptions(crate::cli::commands::list_subscriptions::ListSubscriptionsArgs),
494    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory subscription-replay`.
495    /// CLI parity for `memory_subscription_replay`.
496    SubscriptionReplay(crate::cli::commands::subscription_replay::SubscriptionReplayArgs),
497    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory subscription-dlq-list`.
498    /// CLI parity for `memory_subscription_dlq_list`.
499    SubscriptionDlqList(crate::cli::commands::subscription_dlq_list::SubscriptionDlqListArgs),
500    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory notify`. CLI
501    /// parity for `memory_notify`.
502    Notify(crate::cli::commands::notify::NotifyArgs),
503    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory inbox`. CLI
504    /// parity for `memory_inbox`.
505    Inbox(crate::cli::commands::inbox::InboxArgs),
506    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory ingest-multistep`.
507    /// CLI parity for `memory_ingest_multistep`. CLI dispatcher passes
508    /// `handler=None`; tier-locked advisory returns on every tier
509    /// because the CLI does not own the LLM dispatch.
510    IngestMultistep(crate::cli::commands::ingest_multistep::IngestMultistepArgs),
511    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory kg-invalidate`.
512    /// CLI parity for `memory_kg_invalidate`.
513    KgInvalidate(crate::cli::commands::kg_invalidate::KgInvalidateArgs),
514    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory kg-timeline`. CLI
515    /// parity for `memory_kg_timeline`.
516    KgTimeline(crate::cli::commands::kg_timeline::KgTimelineArgs),
517    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory entity-register`.
518    /// CLI parity for `memory_entity_register`.
519    EntityRegister(crate::cli::commands::entity_register::EntityRegisterArgs),
520    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory entity-get-by-alias`.
521    /// CLI parity for `memory_entity_get_by_alias`.
522    EntityGetByAlias(crate::cli::commands::entity_get_by_alias::EntityGetByAliasArgs),
523    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory dependents-of-invalidated`.
524    /// CLI parity for `memory_dependents_of_invalidated`.
525    DependentsOfInvalidated(
526        crate::cli::commands::dependents_of_invalidated::DependentsOfInvalidatedArgs,
527    ),
528    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory reflection-origin`.
529    /// CLI parity for `memory_reflection_origin`.
530    ReflectionOrigin(crate::cli::commands::reflection_origin::ReflectionOriginArgs),
531    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory quota-status`. CLI
532    /// parity for `memory_quota_status`.
533    QuotaStatus(crate::cli::commands::quota_status::QuotaStatusArgs),
534}
535
536/// `ai-memory governance` parent argument struct.
537#[derive(Args)]
538pub struct GovernanceCliArgs {
539    #[command(subcommand)]
540    pub action: GovernanceAction,
541}
542
543/// `ai-memory governance` sub-subcommands. K11 migrator + 7th-form
544/// `install-defaults` (issue #760) bulk-activator for seed rules
545/// R001-R004 live here; future K-track work may add more verbs
546/// (`lint`, `explain`, …) so the surface is shaped as an enum from
547/// day one.
548#[derive(clap::Subcommand)]
549pub enum GovernanceAction {
550    /// Translate legacy [governance] policies to v0.7
551    /// [[permissions.rules]] (K9 format).
552    MigrateToPermissions(crate::cli::governance_migrate::MigrateToPermissionsArgs),
553    /// v0.7.0 7th-form closeout (issue #760) — flip the seeded
554    /// operator hard rules R001-R004 (migration
555    /// `0024_v07_governance_rules.sql`) to `enabled = 1`. Interactive
556    /// confirmation by default; `--yes` overrides for CI/scripts.
557    InstallDefaults(crate::cli::governance_install_defaults::InstallDefaultsArgs),
558    /// v0.7.0 issue #863 — shell-side parity for the MCP tool
559    /// `memory_check_agent_action`. Dry-run a substrate agent-action
560    /// rule (R001-R004 plus any operator-added rule) and emit the
561    /// Allow / Refuse / Warn verdict.
562    CheckAction(crate::cli::governance_check_action::CheckActionArgs),
563}
564
565/// Arguments for the `doctor` subcommand. Lives next to `Cli` so clap
566/// derives them automatically; the actual report logic lives in
567/// `cli::doctor::run`.
568#[derive(Args)]
569pub struct DoctorCliArgs {
570    /// Query a remote ai-memory daemon's HTTP capabilities + stats
571    /// endpoints instead of opening the local DB. Sections that need
572    /// raw SQL access render as N/A in this mode.
573    #[arg(long, value_name = "URL")]
574    pub remote: Option<String>,
575    /// Emit the report as JSON instead of human-readable text. Useful
576    /// for CI consumers and for `jq`-style filtering.
577    #[arg(long)]
578    pub json: bool,
579    /// Exit 1 when at least one section is at WARN severity. Without
580    /// this flag, warnings keep exit 0; criticals always exit 2.
581    #[arg(long)]
582    pub fail_on_warn: bool,
583    /// v0.6.4-004 — print per-tool, per-family, and per-profile token
584    /// costs (`cl100k_base`) instead of the regular health report.
585    /// Combined with `--json` returns a structured payload for CI.
586    /// Combined with `--profile <name>` reports the cost under that
587    /// hypothetical profile in addition to the active default.
588    #[arg(long)]
589    pub tokens: bool,
590    /// v0.6.4-004 — when used with `--tokens`, evaluate cost under this
591    /// hypothetical profile. Defaults to `core` (the v0.6.4 default).
592    /// Accepts the same vocabulary as `ai-memory mcp --profile`.
593    #[arg(long, value_name = "PROFILE")]
594    pub profile: Option<String>,
595    /// v0.6.4-004 — dump the full per-tool size table as JSON. Implies
596    /// `--tokens`. Used by CI and benchmarks to capture the source-of-
597    /// truth size data without parsing the rendered report.
598    #[arg(long)]
599    pub raw_table: bool,
600    /// v0.7-G3 — emit hook-executor backpressure metrics
601    /// (`events_fired`, `events_dropped`, `mean_latency_us`)
602    /// per loaded hook. Routed through the same reporter bucket
603    /// as `--tokens`. The runtime registry isn't reachable from
604    /// the CLI process, so this surface reports the loaded
605    /// `hooks.toml` shape + zeroed metric placeholders until
606    /// G7-G11 wires the executor into the running daemon's
607    /// snapshot.
608    #[arg(long)]
609    pub hooks: bool,
610}
611
612#[derive(Args)]
613pub struct BenchArgs {
614    /// Measured iterations per operation. Clamped to `[1, 100_000]`.
615    #[arg(long, default_value_t = bench::DEFAULT_ITERATIONS)]
616    pub iterations: usize,
617    /// Warmup iterations discarded from the percentile sample.
618    /// Clamped to `[0, 10_000]`.
619    #[arg(long, default_value_t = bench::DEFAULT_WARMUP)]
620    pub warmup: usize,
621    /// Emit results as JSON instead of the human-readable table.
622    #[arg(long)]
623    pub json: bool,
624    /// Path to a previous `bench --json` payload. When supplied, the
625    /// fresh run is compared per-operation against this baseline and
626    /// the process exits non-zero if any measured p95 exceeds the
627    /// baseline by more than `--regression-threshold` percent.
628    /// Independent of the absolute-budget guard.
629    #[arg(long, value_name = "PATH")]
630    pub baseline: Option<String>,
631    /// Allowed p95 growth (percent) over the `--baseline` reading
632    /// before a row is flagged as a regression. Clamped to
633    /// `[0.0, 1000.0]`. Has no effect without `--baseline`.
634    #[arg(long, default_value_t = bench::DEFAULT_REGRESSION_THRESHOLD_PCT)]
635    pub regression_threshold: f64,
636    /// Append this run to a JSONL history file (one self-describing
637    /// JSON object per line). Creates the file and any missing parent
638    /// directories on first call. Each entry carries `captured_at`
639    /// (RFC3339), `iterations`, `warmup`, and the same `results` array
640    /// `--json` emits — long-running campaigns can build a regression
641    /// dataset to feed downstream tooling. The CLI table / JSON output
642    /// still prints; this flag only adds the append side effect.
643    #[arg(long, value_name = "PATH")]
644    pub history: Option<PathBuf>,
645    /// #1579 B8 — seed a scratch corpus of N rows before running the
646    /// workload and gate the verdict against the per-scale budget
647    /// table in `PERFORMANCE.md` §"Corpus-scale budgets". Omitting the
648    /// flag keeps the legacy ~500-row workload and legacy budgets.
649    /// Clamped to `[1, 1_000_000]`.
650    #[arg(long, value_name = "ROWS")]
651    pub scale: Option<usize>,
652}
653
654/// Default `--batch` page-size hint for `ai-memory migrate`. Currently
655/// an API-compatibility hint only — see the `MAX_ROWS` note in
656/// `src/migrate.rs::migrate`.
657#[cfg(feature = "sal")]
658const MIGRATE_BATCH_DEFAULT: usize = 1000;
659
660#[cfg(feature = "sal")]
661#[derive(Args)]
662pub struct MigrateArgs {
663    /// Source URL. `sqlite:///path/to/file.db` or
664    /// `postgres://user:pass@host:port/dbname`.
665    #[arg(long)]
666    pub from: String,
667    /// Destination URL. Same URL shape as `--from`.
668    #[arg(long)]
669    pub to: String,
670    /// Page-size hint. Default 1000. Retained for API compatibility —
671    /// the current migrator reads one page capped at `MAX_ROWS`
672    /// (1,000,000) and refuses loudly past it; see `src/migrate.rs`.
673    #[arg(long, default_value_t = MIGRATE_BATCH_DEFAULT)]
674    pub batch: usize,
675    /// Only migrate memories in this namespace.
676    #[arg(long)]
677    pub namespace: Option<String>,
678    /// Emit the report but do NOT write to the destination.
679    #[arg(long)]
680    pub dry_run: bool,
681    /// Emit the report as JSON rather than human-readable text.
682    #[arg(long)]
683    pub json: bool,
684}
685
686#[derive(Args)]
687pub struct ServeArgs {
688    #[arg(long, default_value = "127.0.0.1")]
689    pub host: String,
690    #[arg(long, default_value_t = DEFAULT_PORT)]
691    pub port: u16,
692    /// Path to PEM-encoded TLS certificate (may include the full chain).
693    /// Passing both `--tls-cert` and `--tls-key` switches `serve` to
694    /// HTTPS. rustls under the hood — no OpenSSL dep. Absent both
695    /// flags = plain HTTP (same as every previous release).
696    #[arg(long, requires = "tls_key")]
697    pub tls_cert: Option<PathBuf>,
698    /// Path to PEM-encoded TLS private key (PKCS#8 or RSA).
699    #[arg(long, requires = "tls_cert")]
700    pub tls_key: Option<PathBuf>,
701    /// Path to a file containing SHA-256 fingerprints of trusted client
702    /// certificates, one per line (case-insensitive hex, optionally with
703    /// `:` separators; comments start with `#`). When set, `serve`
704    /// demands client-cert mTLS on every connection and refuses any peer
705    /// whose cert fingerprint is not on the list. Requires `--tls-cert`
706    /// and `--tls-key`. This is the peer-mesh identity gate — a peer
707    /// without an authorised cert can't even open a TCP connection, let
708    /// alone hit `/sync/push`. Layer 2 of the peer-mesh crypto stack;
709    /// attested `agent_id` extraction (Layer 2b) lands post-v0.6.0.
710    #[arg(long, requires = "tls_cert")]
711    pub mtls_allowlist: Option<PathBuf>,
712    /// Seconds to wait for in-flight requests to complete on graceful
713    /// shutdown (SIGINT). Default 30. Bumped from 10 in v0.6.0 because
714    /// large `/sync/push` batches can take longer than 10s under load
715    /// (red-team #233).
716    #[arg(long, default_value_t = 30)]
717    pub shutdown_grace_secs: u64,
718
719    // -------- v0.7 federation (ADR-0001) ---------------------------
720    /// W-of-N write quorum. When >=1 and `--quorum-peers` is non-empty,
721    /// every HTTP write fans out to every peer and returns OK only
722    /// after the local commit + W-1 peer acks land within
723    /// `--quorum-timeout-ms`. Default 0 = federation disabled, daemon
724    /// behaves exactly like v0.6.0.
725    #[arg(long, default_value_t = 0)]
726    pub quorum_writes: usize,
727    /// Comma-separated list of peer base URLs. Each peer is assumed to
728    /// expose `POST /api/v1/sync/push` — the same endpoint the
729    /// sync-daemon already uses.
730    #[arg(long, value_delimiter = ',')]
731    pub quorum_peers: Vec<String>,
732    /// Deadline for quorum-ack collection. After this many ms the
733    /// write returns 503 `quorum_not_met`. Default 2000 assumes
734    /// same-DC peers; cross-region (WAN) meshes need 5000-10000 —
735    /// the do-1461 reference deployment uses 8000. See
736    /// docs/federation.md for sizing guidance. (#1565)
737    #[arg(long, default_value_t = 2000)]
738    pub quorum_timeout_ms: u64,
739    /// Optional mTLS client cert for outbound federation POSTs. Same
740    /// cert material the sync-daemon's `--client-cert` accepts.
741    #[arg(long)]
742    pub quorum_client_cert: Option<PathBuf>,
743    /// Optional mTLS client key for outbound federation POSTs.
744    #[arg(long)]
745    pub quorum_client_key: Option<PathBuf>,
746    /// Optional root CA cert to trust for outbound federation HTTPS.
747    /// Required whenever peers present a cert NOT rooted in Mozilla's
748    /// `webpki-roots` bundle (self-signed, private CA, ephemeral test
749    /// CA, etc.) — without this, the reqwest rustls-tls client rejects
750    /// peer certs and every quorum write times out as `quorum_not_met`.
751    /// See #333.
752    #[arg(long)]
753    pub quorum_ca_cert: Option<PathBuf>,
754    /// v0.6.0.1 (#320) — how often, in seconds, the daemon pulls peers
755    /// for any updates it missed while offline or partitioned. 0 disables
756    /// the catchup loop entirely. Default 30s keeps a post-partition
757    /// node convergent within one interval after resume.
758    #[arg(long, default_value_t = 30)]
759    pub catchup_interval_secs: u64,
760    /// v0.7.0 epic (ADR-001) — the federation identity this node signs and
761    /// presents as (`sender_agent_id`). Precedence-2 source, below the
762    /// `AI_MEMORY_FED_IDENTITY` env override and above the historical
763    /// `host:<hostname>` default. Set this to a stable, trust-domain-scoped
764    /// id (e.g. `region/nyc/node-7`) so a node's identity survives a
765    /// hostname change. Unset = keep the hostname default.
766    #[arg(long)]
767    pub federation_identity: Option<String>,
768
769    // -------- v0.7.0 Wave-3 — adapter selection --------------------
770    /// v0.7.0 Wave-3 — full SAL store URL. When set, the daemon binds
771    /// its [`MemoryStore`] handle to the URL-resolved adapter instead
772    /// of the default SQLite path derived from `--db`.
773    ///
774    /// Accepted shapes:
775    ///
776    /// - `sqlite:///absolute/path/to/file.db` — SQLite adapter (same
777    ///   semantics as `--db`).
778    /// - `postgres://user:pass@host:port/dbname` — Postgres adapter.
779    /// - `postgresql://...` — alias for the Postgres scheme.
780    ///
781    /// `--db` and `--store-url` are mutually exclusive: passing both
782    /// is rejected at startup with a clear error.
783    ///
784    /// Postgres-backed daemons require `--features sal,sal-postgres`
785    /// at build time; otherwise the URL is rejected at startup. See
786    /// `docs/postgres-age-guide.md` for the operator workflow.
787    ///
788    /// [`MemoryStore`]: crate::store::MemoryStore
789    #[cfg(feature = "sal")]
790    #[arg(long, value_name = "URL")]
791    pub store_url: Option<String>,
792}
793
794#[derive(Args)]
795pub struct CompletionsArgs {
796    pub shell: Shell,
797}
798
799// ---------------------------------------------------------------------------
800// Top-level dispatch
801// ---------------------------------------------------------------------------
802
803/// Top-level CLI dispatch. Called from `main()` after `Cli::parse()`.
804///
805/// Handles:
806/// - `--db-passphrase-file` → exports `AI_MEMORY_DB_PASSPHRASE`.
807/// - `is_write_command` → conditional post-run WAL checkpoint.
808/// - The match arm for every `Command` variant.
809#[allow(clippy::too_many_lines)]
810pub async fn run(cli: Cli, app_config: &AppConfig) -> Result<()> {
811    // v0.6.0.0: read the SQLCipher passphrase from a file and export it as
812    // AI_MEMORY_DB_PASSPHRASE for the duration of the process. File path
813    // comes from the --db-passphrase-file flag (global). No-op on standard
814    // SQLite builds (the env var is ignored unless the binary was built
815    // with --features sqlcipher).
816    if let Some(path) = &cli.db_passphrase_file {
817        let passphrase = passphrase_from_file(path)?;
818        // SAFETY: single-threaded startup before any worker threads spawn.
819        unsafe { std::env::set_var("AI_MEMORY_DB_PASSPHRASE", passphrase) };
820    }
821    let db_path = app_config.effective_db(&cli.db);
822    // Seed the process-wide per-agent quota defaults from the resolved
823    // `[limits]` config (env `AI_MEMORY_MAX_*` > `[limits]` > compiled
824    // default). `ensure_row` / the Postgres quota-row auto-inserts read
825    // these when stamping a fresh `agent_quotas` row, so every
826    // subcommand path (serve / mcp / CLI writes) charges the same
827    // operator-tuned daily caps. Idempotent — first writer wins; later
828    // calls are no-ops.
829    {
830        let limits = app_config.resolve_limits();
831        crate::quotas::set_quota_defaults(crate::quotas::QuotaDefaults {
832            max_memories_per_day: limits.max_memories_per_day,
833            max_storage_bytes: limits.max_storage_bytes,
834            max_links_per_day: limits.max_links_per_day,
835        });
836    }
837    // #1579 B7 — seed the process-wide sqlite `PRAGMA mmap_size` from
838    // the resolved `[storage]` config (env `AI_MEMORY_DB_MMAP_SIZE` >
839    // `[storage].db_mmap_size_bytes` > compiled 256 MiB default).
840    // Every subsequent `db::open` on any subcommand path (serve / mcp /
841    // CLI) applies it. Idempotent — first writer wins, same as the
842    // quota seeding above.
843    let resolved_storage = app_config.resolve_storage();
844    crate::storage::set_db_mmap_size(resolved_storage.db_mmap_size_bytes);
845    // #1604 — seed the process-wide rerank input-sequence cap from the
846    // resolved `[reranker]` config (env `AI_MEMORY_RERANK_MAX_SEQ` >
847    // `[reranker].max_seq_tokens` > compiled default). Every subsequent
848    // batched cross-encoder rerank forward on any subcommand path
849    // (serve / mcp / CLI) applies it. Idempotent — first writer wins,
850    // same as the mmap seeding above.
851    crate::reranker::set_rerank_max_seq(app_config.resolve_reranker().max_seq_tokens);
852    // n15 — seed the process-wide per-namespace confidence-decay
853    // half-life overrides from `[curator.confidence_decay_half_life_days]`.
854    // `apply_decay_touch` (the recall-time decay updater on any subcommand
855    // path) resolves the per-namespace half-life through this global.
856    // Idempotent — first writer wins, same as the seeding above.
857    crate::confidence::decay::set_namespace_half_life_overrides(
858        app_config.confidence_decay_half_life_overrides(),
859    );
860    // #1590 — seed the process-wide operator-configured default
861    // namespace (Some ONLY when `[storage].default_namespace` — or the
862    // legacy flat field — was explicitly set). Every write surface
863    // (MCP `memory_store`, HTTP `POST /api/v1/memories`, the CLI
864    // namespace ladder) consults this; unconfigured deployments keep
865    // their historical per-surface defaults.
866    crate::config::set_configured_default_namespace(
867        resolved_storage
868            .explicit_default_namespace()
869            .map(str::to_string),
870    );
871    let j = cli.json;
872    let cli_agent_id: Option<String> = cli.agent_id.clone();
873    // Track whether command writes to DB (for WAL checkpoint)
874    let needs_checkpoint = is_write_command(&cli.command);
875    let db_path_for_checkpoint = if needs_checkpoint {
876        Some(db_path.clone())
877    } else {
878        None
879    };
880
881    let result = match cli.command {
882        Command::Serve(a) => {
883            // v0.7.0 Wave-3 — `--db` and `--store-url` are mutually
884            // exclusive when both are explicitly supplied. clap can't
885            // express this conflict cross-struct (the global `--db`
886            // lives on `Cli`, the new `--store-url` lives on
887            // `ServeArgs`), so the check happens here at runtime.
888            //
889            // `--db` carries a non-`None` `default_value`, so we can't
890            // tell from the parsed value alone whether the operator
891            // typed it on the command line. We approximate explicit
892            // intent through the `AI_MEMORY_DB` env var (which clap
893            // resolves into the same field) and a non-default path.
894            // When both signals indicate `--db` was deliberate AND
895            // `--store-url` is set, refuse to start.
896            #[cfg(feature = "sal")]
897            if let Some(ref url) = a.store_url {
898                let db_was_explicit =
899                    std::env::var("AI_MEMORY_DB").is_ok() || db_path != PathBuf::from(DEFAULT_DB);
900                if db_was_explicit {
901                    // #1579 A3 (SECURITY) — redact the URL credential
902                    // before it lands in the error output.
903                    anyhow::bail!(
904                        "--db and --store-url are mutually exclusive. \
905                         Pass exactly one. Got --db={} and --store-url={}",
906                        db_path.display(),
907                        crate::logging::redact_url_password(url),
908                    );
909                }
910            }
911            serve(db_path, a, app_config).await
912        }
913        Command::Mcp { tier, profile } => {
914            let feature_tier = app_config.effective_tier(Some(&tier));
915            // v0.6.4-001 — resolve profile (CLI/env > config > default core).
916            // Surface parse errors to stderr with the diagnostic that
917            // ProfileParseError already produces (lists valid profiles +
918            // valid families) before exiting.
919            let resolved_profile = match app_config.effective_profile(profile.as_deref()) {
920                Ok(p) => p,
921                Err(e) => {
922                    eprintln!("ai-memory mcp: invalid profile: {e}");
923                    std::process::exit(2);
924                }
925            };
926            // v0.7.0 F6 — `mcp::run_mcp_server` is a synchronous
927            // stdin-reading loop that internally calls
928            // `reqwest::blocking::Client` for every LLM-backed tool
929            // (`memory_consolidate`, `memory_expand_query`,
930            // `memory_auto_tag`, `memory_detect_contradiction`).
931            // Running that on a tokio worker thread directly does
932            // two bad things at once:
933            //   1. Pegs a worker thread on a synchronous read and
934            //      keeps the multi-threaded runtime spinning on
935            //      the remaining workers (the 99.3% CPU
936            //      `clock_gettime` / `mach_absolute_time` poll loop
937            //      observed in Round-2 sample profiling).
938            //   2. Calls `reqwest::blocking::Client::send()` from
939            //      within an active tokio runtime context, which
940            //      either panics ("Cannot start a runtime from
941            //      within a runtime") or silently fails the chat
942            //      RPC ("Failed to send chat request") — the
943            //      proximate cause of the four LLM-backed tools
944            //      returning errors while ollama itself was healthy.
945            // Routing the entire MCP loop through `spawn_blocking`
946            // gives it its own dedicated thread with no tokio
947            // runtime context, so the blocking reqwest calls inside
948            // `OllamaClient::generate` are issued cleanly.
949            let db_path_owned = db_path.clone();
950            let app_config_owned = app_config.clone();
951            tokio::task::spawn_blocking(move || {
952                mcp::run_mcp_server(
953                    &db_path_owned,
954                    feature_tier,
955                    &app_config_owned,
956                    &resolved_profile,
957                )
958            })
959            .await
960            .map_err(|e| anyhow::anyhow!("mcp join: {e}"))??;
961            Ok(())
962        }
963        Command::Store(a) => {
964            let stdout = std::io::stdout();
965            let stderr = std::io::stderr();
966            let mut so = stdout.lock();
967            let mut se = stderr.lock();
968            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
969            cli::store::run(
970                &db_path,
971                a,
972                j,
973                app_config,
974                cli_agent_id.as_deref(),
975                &mut out,
976            )
977        }
978        Command::Update(a) => {
979            let stdout = std::io::stdout();
980            let stderr = std::io::stderr();
981            let mut so = stdout.lock();
982            let mut se = stderr.lock();
983            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
984            cli::update::run(&db_path, &a, j, &mut out)
985        }
986        Command::Recall(a) => {
987            let stdout = std::io::stdout();
988            let stderr = std::io::stderr();
989            let mut so = stdout.lock();
990            let mut se = stderr.lock();
991            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
992            cli::recall::run(&db_path, &a, j, app_config, &mut out)
993        }
994        Command::Search(a) => {
995            let stdout = std::io::stdout();
996            let stderr = std::io::stderr();
997            let mut so = stdout.lock();
998            let mut se = stderr.lock();
999            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1000            cli::search::run(&db_path, &a, j, &mut out)
1001        }
1002        Command::Get(a) => {
1003            let stdout = std::io::stdout();
1004            let stderr = std::io::stderr();
1005            let mut so = stdout.lock();
1006            let mut se = stderr.lock();
1007            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1008            cli::crud::cmd_get(&db_path, &a, j, &mut out)
1009        }
1010        Command::List(a) => {
1011            let stdout = std::io::stdout();
1012            let stderr = std::io::stderr();
1013            let mut so = stdout.lock();
1014            let mut se = stderr.lock();
1015            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1016            cli::crud::cmd_list(&db_path, &a, j, app_config, &mut out)
1017        }
1018        Command::Delete(a) => {
1019            let stdout = std::io::stdout();
1020            let stderr = std::io::stderr();
1021            let mut so = stdout.lock();
1022            let mut se = stderr.lock();
1023            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1024            cli::crud::cmd_delete(&db_path, &a, j, cli_agent_id.as_deref(), &mut out)
1025        }
1026        Command::Promote(a) => {
1027            let stdout = std::io::stdout();
1028            let stderr = std::io::stderr();
1029            let mut so = stdout.lock();
1030            let mut se = stderr.lock();
1031            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1032            cli::promote::cmd_promote(&db_path, &a, j, cli_agent_id.as_deref(), &mut out)
1033        }
1034        Command::Forget(a) => {
1035            let stdout = std::io::stdout();
1036            let stderr = std::io::stderr();
1037            let mut so = stdout.lock();
1038            let mut se = stderr.lock();
1039            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1040            cli::forget::cmd_forget(&db_path, &a, j, &mut out)
1041        }
1042        Command::Link(a) => {
1043            let stdout = std::io::stdout();
1044            let stderr = std::io::stderr();
1045            let mut so = stdout.lock();
1046            let mut se = stderr.lock();
1047            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1048            cli::link::cmd_link(&db_path, &a, j, &mut out)
1049        }
1050        Command::Consolidate(a) => {
1051            let stdout = std::io::stdout();
1052            let stderr = std::io::stderr();
1053            let mut so = stdout.lock();
1054            let mut se = stderr.lock();
1055            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1056            cli::consolidate::run(&db_path, a, j, cli_agent_id.as_deref(), &mut out)
1057        }
1058        Command::Resolve(a) => {
1059            let stdout = std::io::stdout();
1060            let stderr = std::io::stderr();
1061            let mut so = stdout.lock();
1062            let mut se = stderr.lock();
1063            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1064            cli::link::cmd_resolve(&db_path, &a, j, &mut out)
1065        }
1066        Command::Shell => cli::shell::run(&db_path),
1067        Command::Sync(a) => {
1068            let stdout = std::io::stdout();
1069            let stderr = std::io::stderr();
1070            let mut so = stdout.lock();
1071            let mut se = stderr.lock();
1072            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1073            cli::sync::run(&db_path, &a, j, cli_agent_id.as_deref(), &mut out)
1074        }
1075        Command::SyncDaemon(a) => cli::sync::run_daemon(&db_path, a, cli_agent_id.as_deref()).await,
1076        Command::AutoConsolidate(a) => {
1077            let stdout = std::io::stdout();
1078            let stderr = std::io::stderr();
1079            let mut so = stdout.lock();
1080            let mut se = stderr.lock();
1081            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1082            cli::consolidate::run_auto(&db_path, &a, j, cli_agent_id.as_deref(), &mut out)
1083        }
1084        Command::Gc => {
1085            let stdout = std::io::stdout();
1086            let stderr = std::io::stderr();
1087            let mut so = stdout.lock();
1088            let mut se = stderr.lock();
1089            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1090            cli::gc::run_gc(&db_path, j, app_config, &mut out)
1091        }
1092        Command::Stats => {
1093            let stdout = std::io::stdout();
1094            let stderr = std::io::stderr();
1095            let mut so = stdout.lock();
1096            let mut se = stderr.lock();
1097            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1098            cli::gc::run_stats(&db_path, j, &mut out)
1099        }
1100        Command::Namespaces => {
1101            let stdout = std::io::stdout();
1102            let stderr = std::io::stderr();
1103            let mut so = stdout.lock();
1104            let mut se = stderr.lock();
1105            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1106            cli::gc::run_namespaces(&db_path, j, &mut out)
1107        }
1108        Command::Namespace(a) => {
1109            // v0.7.0 (issue #800) — Batman Mode Crack 1. First-class CLI
1110            // wrapper around the MCP `memory_namespace_set_standard` /
1111            // `_get_standard` / `_clear_standard` tools so operators
1112            // don't need to drop into MCP-stdio JSON-RPC just to bind
1113            // a `GovernancePolicy` to a namespace.
1114            let stdout = std::io::stdout();
1115            let stderr = std::io::stderr();
1116            let mut so = stdout.lock();
1117            let mut se = stderr.lock();
1118            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1119            cli::namespace::run(&db_path, a, j, &mut out)
1120        }
1121        Command::Config(a) => {
1122            // v0.7.x (#1146) — enterprise configuration tooling.
1123            // `ai-memory config migrate` rewrites a legacy v1
1124            // (flat-field) `config.toml` to the v2 sectioned shape.
1125            let stdout = std::io::stdout();
1126            let stderr = std::io::stderr();
1127            let mut so = stdout.lock();
1128            let mut se = stderr.lock();
1129            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1130            match cli::commands::config::run(&db_path, a, &mut out)? {
1131                0 => Ok(()),
1132                code => std::process::exit(code),
1133            }
1134        }
1135        Command::Export => {
1136            let stdout = std::io::stdout();
1137            let stderr = std::io::stderr();
1138            let mut so = stdout.lock();
1139            let mut se = stderr.lock();
1140            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1141            cli::io::export(&db_path, &mut out)
1142        }
1143        Command::Import(a) => {
1144            let stdout = std::io::stdout();
1145            let stderr = std::io::stderr();
1146            let mut so = stdout.lock();
1147            let mut se = stderr.lock();
1148            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1149            cli::io::import(&db_path, &a, j, cli_agent_id.as_deref(), &mut out)
1150        }
1151        Command::Completions(a) => {
1152            generate(
1153                a.shell,
1154                &mut Cli::command(),
1155                "ai-memory",
1156                &mut std::io::stdout(),
1157            );
1158            Ok(())
1159        }
1160        Command::Man => {
1161            let cmd = Cli::command();
1162            let man = clap_mangen::Man::new(cmd);
1163            man.render(&mut std::io::stdout())?;
1164            Ok(())
1165        }
1166        Command::Mine(a) => {
1167            let stdout = std::io::stdout();
1168            let stderr = std::io::stderr();
1169            let mut so = stdout.lock();
1170            let mut se = stderr.lock();
1171            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1172            cli::io::mine(
1173                &db_path,
1174                a,
1175                j,
1176                app_config,
1177                cli_agent_id.as_deref(),
1178                &mut out,
1179            )
1180        }
1181        Command::Archive(a) => {
1182            let stdout = std::io::stdout();
1183            let stderr = std::io::stderr();
1184            let mut so = stdout.lock();
1185            let mut se = stderr.lock();
1186            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1187            cli::archive::run(&db_path, a, j, &mut out)
1188        }
1189        Command::Agents(a) => {
1190            let stdout = std::io::stdout();
1191            let stderr = std::io::stderr();
1192            let mut so = stdout.lock();
1193            let mut se = stderr.lock();
1194            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1195            cli::agents::run_agents(&db_path, a, j, &mut out)
1196        }
1197        Command::Identity(a) => {
1198            // v0.7 H1 — keypair lifecycle is DB-free. The handler
1199            // resolves the key directory itself (via --key-dir or the
1200            // default <config>/ai-memory/keys).
1201            let stdout = std::io::stdout();
1202            let stderr = std::io::stderr();
1203            let mut so = stdout.lock();
1204            let mut se = stderr.lock();
1205            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1206            cli::identity::run(a, j, &mut out)
1207        }
1208        Command::Offload(a) => {
1209            // v0.7.0 QW-3 — context-offload substrate primitive.
1210            // Reads `--file` (or `-` stdin), writes a row into
1211            // `offloaded_blobs`, returns the `ref_id`. The full
1212            // short-term-context-compression pattern (Mermaid canvas
1213            // + auto-cadence + node_id integration) targets v0.8.0.
1214            let stdout = std::io::stdout();
1215            let stderr = std::io::stderr();
1216            let mut so = stdout.lock();
1217            let mut se = stderr.lock();
1218            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1219            cli::offload::run_offload(&db_path, &a, &mut out)
1220        }
1221        Command::Deref(a) => {
1222            // v0.7.0 QW-3 — dereference a `ref_id` produced by
1223            // `ai-memory offload`. Refuses tampered rows.
1224            let stdout = std::io::stdout();
1225            let stderr = std::io::stderr();
1226            let mut so = stdout.lock();
1227            let mut se = stderr.lock();
1228            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1229            cli::offload::run_deref(&db_path, &a, &mut out)
1230        }
1231        Command::Rules(a) => {
1232            // v0.7.0 (issue #691) — substrate-level agent-action rules
1233            // engine. Mutation verbs require the operator key on disk;
1234            // read verbs (list / check) work without it.
1235            let stdout = std::io::stdout();
1236            let stderr = std::io::stderr();
1237            let mut so = stdout.lock();
1238            let mut se = stderr.lock();
1239            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1240            cli::rules::run(&db_path, a, j, &mut out)
1241        }
1242        Command::Pending(a) => {
1243            let stdout = std::io::stdout();
1244            let stderr = std::io::stderr();
1245            let mut so = stdout.lock();
1246            let mut se = stderr.lock();
1247            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1248            cli::agents::run_pending(&db_path, a, j, cli_agent_id.as_deref(), &mut out)
1249        }
1250        Command::Backup(a) => {
1251            let stdout = std::io::stdout();
1252            let stderr = std::io::stderr();
1253            let mut so = stdout.lock();
1254            let mut se = stderr.lock();
1255            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1256            cli::backup::run_backup(&db_path, &a, j, &mut out)
1257        }
1258        Command::Restore(a) => {
1259            let stdout = std::io::stdout();
1260            let stderr = std::io::stderr();
1261            let mut so = stdout.lock();
1262            let mut se = stderr.lock();
1263            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1264            cli::backup::run_restore(&db_path, &a, j, &mut out)
1265        }
1266        Command::Curator(a) => {
1267            // v0.7.0 #1548 — `--db` and `--store-url` are mutually
1268            // exclusive when both are explicitly supplied, mirroring the
1269            // `serve` arm above. The global `--db` carries a non-`None`
1270            // `default_value`, so we approximate explicit operator
1271            // intent through the `AI_MEMORY_DB` env var (which clap
1272            // resolves into the same field) or a non-default path.
1273            #[cfg(feature = "sal")]
1274            if let Some(ref url) = a.store_url {
1275                let db_was_explicit =
1276                    std::env::var("AI_MEMORY_DB").is_ok() || db_path != PathBuf::from(DEFAULT_DB);
1277                if db_was_explicit {
1278                    // #1579 A3 (SECURITY) — redact the URL credential
1279                    // before it lands in the error output.
1280                    anyhow::bail!(
1281                        "--db and --store-url are mutually exclusive. \
1282                         Pass exactly one. Got --db={} and --store-url={}",
1283                        db_path.display(),
1284                        crate::logging::redact_url_password(url),
1285                    );
1286                }
1287            }
1288            // Initialize the tracing subscriber so the daemon-start
1289            // banner and per-cycle `tracing::info!` lines in
1290            // `curator::run_daemon` actually emit. Previously only the
1291            // HTTP `serve` path called `init_tracing()`, leaving the
1292            // curator path silent regardless of `RUST_LOG`. `try_init`
1293            // inside `init_tracing` makes this safe to call even when
1294            // another subscriber is already installed.
1295            init_tracing();
1296            // Daemon mode runs indefinitely on a `spawn_blocking` worker
1297            // that itself calls `tracing::info!`. If the dispatch held
1298            // the process-wide `Stdout::lock()` while the daemon ran,
1299            // the blocking thread's tracing write would deadlock on the
1300            // ReentrantMutex (same-thread re-entry is fine; cross-thread
1301            // contention isn't). `--daemon` doesn't write to `out`
1302            // anyway, so route it to `io::sink()` and only lock the
1303            // real stdout/stderr for the modes that actually emit CLI
1304            // output (`--once`, `--reflect`, `--rollback`).
1305            if a.daemon {
1306                let mut so = std::io::sink();
1307                let mut se = std::io::sink();
1308                let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1309                cli::curator::run(&db_path, &a, app_config, &mut out).await
1310            } else {
1311                let stdout = std::io::stdout();
1312                let stderr = std::io::stderr();
1313                let mut so = stdout.lock();
1314                let mut se = stderr.lock();
1315                let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1316                cli::curator::run(&db_path, &a, app_config, &mut out).await
1317            }
1318        }
1319        Command::Bench(a) => cmd_bench(&a),
1320        #[cfg(feature = "sal")]
1321        Command::Migrate(a) => cmd_migrate(&a).await,
1322        #[cfg(feature = "sal")]
1323        Command::SchemaInit(a) => {
1324            let stdout = std::io::stdout();
1325            let stderr = std::io::stderr();
1326            let mut so = stdout.lock();
1327            let mut se = stderr.lock();
1328            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1329            cli::schema_init::run(&a, &mut out).await
1330        }
1331        Command::Doctor(a) => {
1332            // P7 / R7. The doctor is read-only; it never sets
1333            // `needs_checkpoint`. We compute the exit code from the
1334            // overall severity and propagate it via the process-exit
1335            // path below so callers (CI, ops scripts) can branch on it.
1336            //
1337            // The remote mode uses `reqwest::blocking::Client` which
1338            // panics when dropped on a tokio runtime thread, so the
1339            // entire doctor pass runs inside `spawn_blocking`.
1340            let db_path_doctor = db_path.clone();
1341            // v0.6.4-004 — `--tokens` (and its alias `--raw-table`) bypass
1342            // the regular health pass. Routes to a dedicated tokens
1343            // reporter that consumes `crate::sizes::tool_sizes()` and
1344            // `crate::profile::Family::for_tool` to roll up cost.
1345            if a.tokens || a.raw_table {
1346                let stdout = std::io::stdout();
1347                let stderr = std::io::stderr();
1348                let mut so = stdout.lock();
1349                let mut se = stderr.lock();
1350                let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1351                let exit = cli::doctor::run_tokens(
1352                    cli::doctor::TokensArgs {
1353                        json: a.json,
1354                        raw_table: a.raw_table,
1355                        profile: a.profile,
1356                        hooks: a.hooks,
1357                    },
1358                    &mut out,
1359                )?;
1360                std::process::exit(exit);
1361            }
1362            // v0.7-G3 — `--hooks` standalone routes to the hook
1363            // executor metrics reporter. Same dispatch shape as
1364            // `--tokens` so both share the "tokens reporter
1365            // bucket" the G3 prompt called out.
1366            if a.hooks {
1367                let stdout = std::io::stdout();
1368                let stderr = std::io::stderr();
1369                let mut so = stdout.lock();
1370                let mut se = stderr.lock();
1371                let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1372                let exit = cli::doctor::run_hooks(
1373                    cli::doctor::HooksReportArgs { json: a.json },
1374                    &mut out,
1375                )?;
1376                std::process::exit(exit);
1377            }
1378            let args = cli::doctor::DoctorArgs {
1379                remote: a.remote,
1380                json: a.json,
1381                fail_on_warn: a.fail_on_warn,
1382            };
1383            let join = tokio::task::spawn_blocking(move || {
1384                let stdout = std::io::stdout();
1385                let stderr = std::io::stderr();
1386                let mut so = stdout.lock();
1387                let mut se = stderr.lock();
1388                let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1389                cli::doctor::run(&db_path_doctor, &args, &mut out)
1390            })
1391            .await;
1392            match join {
1393                Ok(Ok(0)) => Ok(()),
1394                Ok(Ok(code)) => std::process::exit(code),
1395                Ok(Err(e)) => Err(e),
1396                Err(e) => Err(anyhow::anyhow!("doctor task join failed: {e}")),
1397            }
1398        }
1399        Command::Boot(a) => {
1400            // Issue #487. Read-only, fast, no embedder, no daemon. Suitable
1401            // for invocation from any AI-agent integration (Claude Code
1402            // SessionStart hook, Cursor / Cline / Continue / Windsurf
1403            // system-message, programmatic prepend in Claude Agent SDK /
1404            // OpenAI Apps SDK / Codex CLI, OpenClaw built-in, local models
1405            // via LM Studio / Ollama / vLLM).
1406            let stdout = std::io::stdout();
1407            let stderr = std::io::stderr();
1408            let mut so = stdout.lock();
1409            let mut se = stderr.lock();
1410            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1411            // PR-5: a `boot` invocation is itself an audit-worthy event.
1412            // Emission is a no-op when audit is disabled.
1413            crate::audit::emit(crate::audit::EventBuilder::new(
1414                crate::audit::AuditAction::SessionBoot,
1415                crate::audit::actor(
1416                    cli_agent_id.as_deref().unwrap_or("anonymous"),
1417                    "explicit_or_default",
1418                    None,
1419                ),
1420                crate::audit::target_sweep(a.namespace.as_deref().unwrap_or("auto")),
1421            ));
1422            cli::boot::run(&db_path, &a, app_config, &mut out)
1423        }
1424        Command::Install(a) => {
1425            // Issue #487 PR-2. Read-only filesystem op against the agent's
1426            // config file (NOT the ai-memory DB). Default is dry-run; --apply
1427            // is opt-in and writes a backup before mutating anything.
1428            let stdout = std::io::stdout();
1429            let stderr = std::io::stderr();
1430            let mut so = stdout.lock();
1431            let mut se = stderr.lock();
1432            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1433            cli::install::run(&a, &mut out)
1434        }
1435        Command::Wrap(a) => {
1436            // Issue #487 PR-6. Pure-Rust cross-platform replacement for
1437            // the bash / PowerShell wrappers PR-1 shipped in the
1438            // integration recipes. Runs boot in-process, builds the
1439            // system message, spawns the wrapped agent, and propagates
1440            // the agent's exit code via std::process::exit.
1441            let stdout = std::io::stdout();
1442            let stderr = std::io::stderr();
1443            let mut so = stdout.lock();
1444            let mut se = stderr.lock();
1445            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1446            let code = cli::wrap::run(&db_path, &a, app_config, &mut out)?;
1447            // Drop the locks/output before exit so any pending writes
1448            // get flushed by the OS on process teardown.
1449            drop(out);
1450            drop(so);
1451            drop(se);
1452            if code == 0 {
1453                Ok(())
1454            } else {
1455                std::process::exit(code);
1456            }
1457        }
1458        Command::Logs(a) => {
1459            let stdout = std::io::stdout();
1460            let stderr = std::io::stderr();
1461            let mut so = stdout.lock();
1462            let mut se = stderr.lock();
1463            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1464            cli::logs::run(a, app_config, &mut out)
1465        }
1466        Command::Audit(a) => {
1467            let stdout = std::io::stdout();
1468            let stderr = std::io::stderr();
1469            let mut so = stdout.lock();
1470            let mut se = stderr.lock();
1471            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1472            match cli::audit::run(a, app_config, &mut out)? {
1473                0 => Ok(()),
1474                code => std::process::exit(code),
1475            }
1476        }
1477        Command::Governance(a) => {
1478            let stdout = std::io::stdout();
1479            let stderr = std::io::stderr();
1480            let mut so = stdout.lock();
1481            let mut se = stderr.lock();
1482            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1483            match a.action {
1484                GovernanceAction::MigrateToPermissions(args) => {
1485                    cli::governance_migrate::run(args, &mut out)
1486                }
1487                GovernanceAction::InstallDefaults(args) => {
1488                    cli::governance_install_defaults::run(&db_path, args, &mut out)
1489                }
1490                GovernanceAction::CheckAction(args) => {
1491                    cli::governance_check_action::run(&db_path, &args, &mut out)
1492                }
1493            }
1494        }
1495        Command::VerifyReflectionChain(a) => {
1496            let stdout = std::io::stdout();
1497            let stderr = std::io::stderr();
1498            let mut so = stdout.lock();
1499            let mut se = stderr.lock();
1500            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1501            match cli::verify::run(&db_path, &a, &mut out)? {
1502                0 => Ok(()),
1503                code => std::process::exit(code),
1504            }
1505        }
1506        Command::VerifySignedEventsChain(a) => {
1507            let stdout = std::io::stdout();
1508            let stderr = std::io::stderr();
1509            let mut so = stdout.lock();
1510            let mut se = stderr.lock();
1511            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1512            match cli::verify_signed_events::run(&db_path, &a, &mut out)? {
1513                0 => Ok(()),
1514                code => std::process::exit(code),
1515            }
1516        }
1517        Command::ExportForensicBundle(a) => {
1518            let stdout = std::io::stdout();
1519            let stderr = std::io::stderr();
1520            let mut so = stdout.lock();
1521            let mut se = stderr.lock();
1522            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1523            match cli::export::export(&db_path, &a, &mut out)? {
1524                0 => Ok(()),
1525                code => std::process::exit(code),
1526            }
1527        }
1528        Command::VerifyForensicBundle(a) => {
1529            let stdout = std::io::stdout();
1530            let stderr = std::io::stderr();
1531            let mut so = stdout.lock();
1532            let mut se = stderr.lock();
1533            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1534            match cli::export::verify(&a, &mut out)? {
1535                0 => Ok(()),
1536                code => std::process::exit(code),
1537            }
1538        }
1539        Command::ExportReflections(a) => {
1540            let stdout = std::io::stdout();
1541            let stderr = std::io::stderr();
1542            let mut so = stdout.lock();
1543            let mut se = stderr.lock();
1544            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1545            match cli::commands::export_reflections::run(&db_path, &a, &mut out)? {
1546                0 => Ok(()),
1547                code => std::process::exit(code),
1548            }
1549        }
1550        Command::RecoverPreviousSession(a) => {
1551            // Issue #1389 — fail-safe recovery from host transcripts.
1552            // Graceful by design: the SessionStart-hook chain MUST
1553            // NOT wedge the agent boot, so per-line parse errors
1554            // surface in the report rather than as Err.
1555            let stdout = std::io::stdout();
1556            let stderr = std::io::stderr();
1557            let mut so = stdout.lock();
1558            let mut se = stderr.lock();
1559            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1560            match cli::commands::recover_previous_session::run(&db_path, &a, &mut out)? {
1561                0 => Ok(()),
1562                code => std::process::exit(code),
1563            }
1564        }
1565        Command::Atomise(a) => {
1566            let stdout = std::io::stdout();
1567            let stderr = std::io::stderr();
1568            let mut so = stdout.lock();
1569            let mut se = stderr.lock();
1570            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1571            match cli::commands::atomise::run(
1572                &db_path,
1573                &a,
1574                app_config,
1575                cli_agent_id.as_deref(),
1576                &mut out,
1577            )? {
1578                0 => Ok(()),
1579                code => std::process::exit(code),
1580            }
1581        }
1582        Command::Persona(a) => {
1583            let stdout = std::io::stdout();
1584            let stderr = std::io::stderr();
1585            let mut so = stdout.lock();
1586            let mut se = stderr.lock();
1587            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1588            // v0.7.0 QW-2 — the CLI deliberately runs WITHOUT a live
1589            // LLM client. `--regenerate` requires one; we surface the
1590            // documented "install Ollama" hint via exit code 2 rather
1591            // than spinning up a transient OllamaClient here. Operators
1592            // who want the regenerate path call `memory_persona_generate`
1593            // through MCP (where the daemon already owns the LLM).
1594            match cli::commands::persona::run(&db_path, &a, None, None, &mut out)? {
1595                0 => Ok(()),
1596                code => std::process::exit(code),
1597            }
1598        }
1599        Command::Calibrate(a) => {
1600            let stdout = std::io::stdout();
1601            let stderr = std::io::stderr();
1602            let mut so = stdout.lock();
1603            let mut se = stderr.lock();
1604            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1605            // v0.7.0 Form 5 (issue #758) — calibration driver.
1606            // Currently dispatches `calibrate confidence`; future
1607            // subcommands (e.g. `calibrate recall`) layer on alongside.
1608            match a.subcommand {
1609                cli::commands::calibrate_confidence::CalibrateSubcommand::Confidence(ref conf) => {
1610                    match cli::commands::calibrate_confidence::run(&db_path, conf, &mut out)? {
1611                        0 => Ok(()),
1612                        code => std::process::exit(code),
1613                    }
1614                }
1615            }
1616        }
1617        Command::Skill(a) => {
1618            let stdout = std::io::stdout();
1619            let stderr = std::io::stderr();
1620            let mut so = stdout.lock();
1621            let mut se = stderr.lock();
1622            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1623            // v0.7.0 Cluster E API-2 (issue #767) — `ai-memory skill
1624            // <subcommand>`. The CLI dispatches with `active_keypair =
1625            // None` to match the existing CLI convention (Persona /
1626            // Calibrate also run without daemon-side ambient state).
1627            // Operators who want signed skill registers/exports/promotes
1628            // hit the MCP / HTTP surface where the daemon owns the
1629            // keypair; the CLI surface stays unsigned by design so
1630            // shell scripts can drive skills without re-implementing
1631            // the keypair-load ceremony.
1632            match cli::commands::skill::run(&db_path, &a, None, &mut out)? {
1633                0 => Ok(()),
1634                code => std::process::exit(code),
1635            }
1636        }
1637        Command::Share(a) => {
1638            let stdout = std::io::stdout();
1639            let stderr = std::io::stderr();
1640            let mut so = stdout.lock();
1641            let mut se = stderr.lock();
1642            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1643            // v0.7.0 #1095 — `ai-memory share`. Wraps the same substrate
1644            // primitive (`mcp::tools::share::handle_share`) the MCP +
1645            // HTTP surfaces consume; wire envelope is byte-equal across
1646            // the three.
1647            cli::share::cmd_share(&db_path, &a, &mut out)
1648        }
1649        // v0.7.0 ARCH-3 / FX-12 — MCP/CLI parity build-out. Each
1650        // dispatch arm wraps the same substrate primitive the MCP tool
1651        // consumes; wire envelope is byte-equal across MCP / HTTP /
1652        // CLI. See `docs/v0.7.0/arch-3-mcp-cli-parity-audit.md`.
1653        Command::KgQuery(a) => {
1654            let stdout = std::io::stdout();
1655            let stderr = std::io::stderr();
1656            let mut so = stdout.lock();
1657            let mut se = stderr.lock();
1658            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1659            cli::commands::kg_query::cmd_kg_query(&db_path, &a, &mut out)
1660        }
1661        Command::FindPaths(a) => {
1662            let stdout = std::io::stdout();
1663            let stderr = std::io::stderr();
1664            let mut so = stdout.lock();
1665            let mut se = stderr.lock();
1666            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1667            cli::commands::find_paths::cmd_find_paths(&db_path, &a, &mut out)
1668        }
1669        Command::RecallObservations(a) => {
1670            let stdout = std::io::stdout();
1671            let stderr = std::io::stderr();
1672            let mut so = stdout.lock();
1673            let mut se = stderr.lock();
1674            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1675            cli::commands::recall_observations::cmd_recall_observations(&db_path, &a, &mut out)
1676        }
1677        Command::CheckDuplicate(a) => {
1678            let stdout = std::io::stdout();
1679            let stderr = std::io::stderr();
1680            let mut so = stdout.lock();
1681            let mut se = stderr.lock();
1682            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1683            cli::commands::check_duplicate::cmd_check_duplicate(&db_path, &a, app_config, &mut out)
1684                .await
1685        }
1686        Command::Expand(a) => {
1687            let stdout = std::io::stdout();
1688            let stderr = std::io::stderr();
1689            let mut so = stdout.lock();
1690            let mut se = stderr.lock();
1691            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1692            match cli::commands::expand::cmd_expand(&a, app_config, &mut out).await? {
1693                0 => Ok(()),
1694                code => std::process::exit(code),
1695            }
1696        }
1697        Command::Reembed(a) => {
1698            let stdout = std::io::stdout();
1699            let stderr = std::io::stderr();
1700            let mut so = stdout.lock();
1701            let mut se = stderr.lock();
1702            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1703            // v0.7.0 #1598 — full-corpus vector-space migration.
1704            // Non-zero exit codes map configuration outcomes
1705            // (no-embedder / init-failed) like `ai-memory expand`.
1706            match cli::commands::reembed::cmd_reembed(&db_path, &a, app_config, &mut out).await? {
1707                0 => Ok(()),
1708                code => std::process::exit(code),
1709            }
1710        }
1711        Command::Replay(a) => {
1712            let stdout = std::io::stdout();
1713            let stderr = std::io::stderr();
1714            let mut so = stdout.lock();
1715            let mut se = stderr.lock();
1716            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1717            cli::commands::replay::cmd_replay(&db_path, &a, &mut out)
1718        }
1719        // v0.7.0 ARCH-3 / FX-C3 (batch2) — 16 additional CLI parity
1720        // dispatch arms. Each wraps the same substrate primitive the
1721        // MCP tool consumes; wire envelope is byte-equal across MCP /
1722        // HTTP / CLI. See
1723        // `docs/v0.7.0/arch-3-mcp-cli-parity-audit.md` §"Added in
1724        // fix/arch3-mcp-cli-parity-batch2".
1725        Command::Reflect(a) => {
1726            let stdout = std::io::stdout();
1727            let stderr = std::io::stderr();
1728            let mut so = stdout.lock();
1729            let mut se = stderr.lock();
1730            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1731            cli::commands::reflect::cmd_reflect(&db_path, &a, &mut out)
1732        }
1733        Command::Subscribe(a) => {
1734            let stdout = std::io::stdout();
1735            let stderr = std::io::stderr();
1736            let mut so = stdout.lock();
1737            let mut se = stderr.lock();
1738            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1739            cli::commands::subscribe::cmd_subscribe(&db_path, &a, &mut out)
1740        }
1741        Command::Unsubscribe(a) => {
1742            let stdout = std::io::stdout();
1743            let stderr = std::io::stderr();
1744            let mut so = stdout.lock();
1745            let mut se = stderr.lock();
1746            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1747            cli::commands::unsubscribe::cmd_unsubscribe(&db_path, &a, &mut out)
1748        }
1749        Command::ListSubscriptions(a) => {
1750            let stdout = std::io::stdout();
1751            let stderr = std::io::stderr();
1752            let mut so = stdout.lock();
1753            let mut se = stderr.lock();
1754            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1755            cli::commands::list_subscriptions::cmd_list_subscriptions(&db_path, &a, &mut out)
1756        }
1757        Command::SubscriptionReplay(a) => {
1758            let stdout = std::io::stdout();
1759            let stderr = std::io::stderr();
1760            let mut so = stdout.lock();
1761            let mut se = stderr.lock();
1762            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1763            cli::commands::subscription_replay::cmd_subscription_replay(&db_path, &a, &mut out)
1764        }
1765        Command::SubscriptionDlqList(a) => {
1766            let stdout = std::io::stdout();
1767            let stderr = std::io::stderr();
1768            let mut so = stdout.lock();
1769            let mut se = stderr.lock();
1770            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1771            cli::commands::subscription_dlq_list::cmd_subscription_dlq_list(&db_path, &a, &mut out)
1772        }
1773        Command::Notify(a) => {
1774            let stdout = std::io::stdout();
1775            let stderr = std::io::stderr();
1776            let mut so = stdout.lock();
1777            let mut se = stderr.lock();
1778            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1779            cli::commands::notify::cmd_notify(&db_path, &a, app_config, &mut out)
1780        }
1781        Command::Inbox(a) => {
1782            let stdout = std::io::stdout();
1783            let stderr = std::io::stderr();
1784            let mut so = stdout.lock();
1785            let mut se = stderr.lock();
1786            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1787            cli::commands::inbox::cmd_inbox(&db_path, &a, &mut out)
1788        }
1789        Command::IngestMultistep(a) => {
1790            let stdout = std::io::stdout();
1791            let stderr = std::io::stderr();
1792            let mut so = stdout.lock();
1793            let mut se = stderr.lock();
1794            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1795            cli::commands::ingest_multistep::cmd_ingest_multistep(&a, app_config, &mut out)
1796        }
1797        Command::KgInvalidate(a) => {
1798            let stdout = std::io::stdout();
1799            let stderr = std::io::stderr();
1800            let mut so = stdout.lock();
1801            let mut se = stderr.lock();
1802            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1803            cli::commands::kg_invalidate::cmd_kg_invalidate(&db_path, &a, &mut out)
1804        }
1805        Command::KgTimeline(a) => {
1806            let stdout = std::io::stdout();
1807            let stderr = std::io::stderr();
1808            let mut so = stdout.lock();
1809            let mut se = stderr.lock();
1810            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1811            cli::commands::kg_timeline::cmd_kg_timeline(&db_path, &a, &mut out)
1812        }
1813        Command::EntityRegister(a) => {
1814            let stdout = std::io::stdout();
1815            let stderr = std::io::stderr();
1816            let mut so = stdout.lock();
1817            let mut se = stderr.lock();
1818            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1819            cli::commands::entity_register::cmd_entity_register(&db_path, &a, &mut out)
1820        }
1821        Command::EntityGetByAlias(a) => {
1822            let stdout = std::io::stdout();
1823            let stderr = std::io::stderr();
1824            let mut so = stdout.lock();
1825            let mut se = stderr.lock();
1826            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1827            cli::commands::entity_get_by_alias::cmd_entity_get_by_alias(&db_path, &a, &mut out)
1828        }
1829        Command::DependentsOfInvalidated(a) => {
1830            let stdout = std::io::stdout();
1831            let stderr = std::io::stderr();
1832            let mut so = stdout.lock();
1833            let mut se = stderr.lock();
1834            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1835            cli::commands::dependents_of_invalidated::cmd_dependents_of_invalidated(
1836                &db_path, &a, &mut out,
1837            )
1838        }
1839        Command::ReflectionOrigin(a) => {
1840            let stdout = std::io::stdout();
1841            let stderr = std::io::stderr();
1842            let mut so = stdout.lock();
1843            let mut se = stderr.lock();
1844            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1845            cli::commands::reflection_origin::cmd_reflection_origin(&db_path, &a, &mut out)
1846        }
1847        Command::QuotaStatus(a) => {
1848            let stdout = std::io::stdout();
1849            let stderr = std::io::stderr();
1850            let mut so = stdout.lock();
1851            let mut se = stderr.lock();
1852            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1853            cli::commands::quota_status::cmd_quota_status(&db_path, &a, &mut out)
1854        }
1855    };
1856
1857    // WAL checkpoint after write commands to prevent unbounded WAL growth
1858    if result.is_ok()
1859        && let Some(cp_path) = db_path_for_checkpoint
1860        && let Ok(conn) = db::open(&cp_path)
1861    {
1862        let _ = db::checkpoint(&conn);
1863    }
1864
1865    result
1866}
1867
1868// ---------------------------------------------------------------------------
1869// is_write_command — predicate for the post-run WAL checkpoint.
1870// ---------------------------------------------------------------------------
1871
1872/// Returns true if `cmd` is a write-class subcommand. The post-run WAL
1873/// checkpoint in [`run`] runs only when this returns `true`.
1874#[must_use]
1875pub fn is_write_command(cmd: &Command) -> bool {
1876    matches!(
1877        cmd,
1878        Command::Store(_)
1879            | Command::Update(_)
1880            | Command::Delete(_)
1881            | Command::Promote(_)
1882            | Command::Forget(_)
1883            | Command::Link(_)
1884            | Command::Consolidate(_)
1885            | Command::Resolve(_)
1886            | Command::Sync(_)
1887            | Command::SyncDaemon(_)
1888            | Command::Import(_)
1889            | Command::AutoConsolidate(_)
1890            | Command::Gc
1891            | Command::Atomise(_)
1892            // v0.7.0 Cluster E API-2 (issue #767) — register / export /
1893            // promote write to the `skills` and `signed_events` tables.
1894            // List / get / resource / compose are read-only but classify
1895            // the whole verb family as write-class so the post-run WAL
1896            // checkpoint keeps the long-lived sqlite file from growing
1897            // unbounded under register-heavy workloads.
1898            | Command::Skill(_)
1899            // v0.7.0 Batman Mode (issue #800) — `namespace set-standard`
1900            // and `clear-standard` write to `namespace_meta`. The
1901            // `get-standard` and `batman-policy` verbs are read-only
1902            // but we classify the whole family as write-class so the
1903            // post-run WAL checkpoint runs.
1904            | Command::Namespace(_)
1905            // v0.7.0 #1095 — `ai-memory share` copies a row into the
1906            // recipient agent's `_shared/<from>→<to>/` namespace, so
1907            // it must trip the post-run WAL checkpoint.
1908            | Command::Share(_)
1909            // v0.7.0 ARCH-3 / FX-C3 (batch2) — write-class verbs in
1910            // the new parity batch. The reads (list-subscriptions /
1911            // subscription-replay / subscription-dlq-list / inbox /
1912            // kg-timeline / entity-get-by-alias / dependents-of-
1913            // invalidated / reflection-origin / quota-status) are
1914            // omitted from this list.
1915            | Command::Reflect(_)
1916            | Command::Subscribe(_)
1917            | Command::Unsubscribe(_)
1918            | Command::Notify(_)
1919            | Command::IngestMultistep(_)
1920            | Command::KgInvalidate(_)
1921            | Command::EntityRegister(_)
1922    )
1923}
1924
1925// ---------------------------------------------------------------------------
1926// Startup helpers (passphrase, anonymize default)
1927// ---------------------------------------------------------------------------
1928
1929/// Read the `SQLCipher` passphrase from `path`. Strips a single trailing
1930/// newline / CRLF; rejects an empty passphrase (post-strip) with an error;
1931/// preserves all other internal whitespace.
1932///
1933/// v0.7.0 #1055 (Agent-2 #5) — on Unix, the function rejects the
1934/// passphrase file when its mode allows ANY group or world access
1935/// (`mode & 0o077 != 0`). Pre-#1055 the function accepted
1936/// world-readable / group-readable files even though CLAUDE.md and
1937/// the doc comment at `src/storage/connection.rs:139-141` promise the
1938/// passphrase file is mode 0400. Any local user with read access to
1939/// the configured path could read the `SQLCipher` passphrase and
1940/// decrypt the on-disk DB offline. Operators with a legitimate need
1941/// for the legacy permissive posture (shared-container deploys where
1942/// the secret is already gated upstream by the orchestrator) can opt
1943/// back in via `AI_MEMORY_PASSPHRASE_FILE_ALLOW_LAX_PERMS=1`. The
1944/// unsafe override is logged at WARN on every fire.
1945///
1946/// # Errors
1947///
1948/// - The file cannot be read (e.g. missing, permission denied).
1949/// - The passphrase, after stripping the trailing newline, is empty.
1950/// - (Unix only, post-#1055) the file's mode allows group or world
1951///   access without the env-var escape hatch.
1952pub fn passphrase_from_file(path: &Path) -> Result<String> {
1953    // v0.7.0 #1055 — Unix permission check. We use the `mode & 0o077`
1954    // bitmask which fires on any group or world rwx bit. Windows
1955    // has no equivalent file-mode ACL primitive; the check is
1956    // compile-conditional so the function still works on cross-
1957    // platform builds.
1958    #[cfg(unix)]
1959    {
1960        use std::os::unix::fs::PermissionsExt;
1961        let meta = std::fs::metadata(path).with_context(|| {
1962            format!(
1963                "stat passphrase file {} for permission check (#1055)",
1964                path.display()
1965            )
1966        })?;
1967        let mode = meta.permissions().mode();
1968        let lax_bits = mode & 0o077;
1969        if lax_bits != 0 {
1970            let fail_open = std::env::var("AI_MEMORY_PASSPHRASE_FILE_ALLOW_LAX_PERMS")
1971                .map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
1972                .unwrap_or(false);
1973            if fail_open {
1974                tracing::warn!(
1975                    target: "ai_memory::daemon_runtime",
1976                    path = %path.display(),
1977                    mode = format!("{:o}", mode & 0o777),
1978                    "passphrase_from_file: file is group/world-readable; \
1979                     AI_MEMORY_PASSPHRASE_FILE_ALLOW_LAX_PERMS=1 — accepting \
1980                     (UNSAFE, legacy posture). Tighten with `chmod 0400 <path>` \
1981                     and clear the env var."
1982                );
1983            } else {
1984                anyhow::bail!(
1985                    "passphrase file {} has lax permissions (mode {:o}, group/world bits set); \
1986                     tighten with `chmod 0400 {}` OR set \
1987                     AI_MEMORY_PASSPHRASE_FILE_ALLOW_LAX_PERMS=1 to opt out (#1055)",
1988                    path.display(),
1989                    mode & 0o777,
1990                    path.display(),
1991                );
1992            }
1993        }
1994    }
1995    let mut raw = std::fs::read_to_string(path)
1996        .with_context(|| format!("reading passphrase file {}", path.display()))?;
1997    let passphrase = raw.trim_end_matches(['\n', '\r']).to_string();
1998    // #1258 — zeroize the intermediate `raw` buffer so the secret bytes
1999    // do not linger on the heap after we hand the trimmed copy to the
2000    // caller. The caller is responsible for zeroizing the returned
2001    // `passphrase` when it falls out of scope (typically passed
2002    // straight into `AI_MEMORY_DB_PASSPHRASE`).
2003    {
2004        use zeroize::Zeroize;
2005        raw.zeroize();
2006    }
2007    if passphrase.is_empty() {
2008        anyhow::bail!("passphrase file {} is empty", path.display());
2009    }
2010    Ok(passphrase)
2011}
2012
2013/// Apply the configured `anonymize_default` to the runtime env: when the
2014/// config asks for anonymization but the user hasn't already set
2015/// `AI_MEMORY_ANONYMIZE`, set it to `"1"`. Idempotent — repeated calls are
2016/// a no-op once the env var is set.
2017///
2018/// Note: this writes to the process environment; callers must invoke it
2019/// from the single-threaded startup region (before any worker threads are
2020/// spawned). The production binary calls it from `main()` for that reason.
2021pub fn apply_anonymize_default(app_config: &AppConfig) {
2022    // #198: config → env mapping for agent_id anonymization. Env var already
2023    // set by the caller wins; config is only applied when the env is unset.
2024    if app_config.effective_anonymize_default()
2025        && std::env::var(crate::identity::ENV_ANONYMIZE).is_err()
2026    {
2027        // SAFETY: single-threaded startup before any worker threads spawn.
2028        unsafe { std::env::set_var(crate::identity::ENV_ANONYMIZE, "1") };
2029    }
2030}
2031
2032/// #976 (2026-05-20) — resolve the admin-allowlist with env-var
2033/// precedence over the config-file `[admin].agent_ids` block.
2034///
2035/// `AI_MEMORY_ADMIN_AGENT_IDS` is a comma-separated list of agent_ids.
2036/// The wildcard `*` is honoured (every authenticated caller becomes
2037/// admin — appropriate for test daemons + container deploys that
2038/// receive the admin allowlist from orchestration secrets instead of a
2039/// shipped config.toml). Same `validate_agent_id` filter as the config
2040/// path; malformed entries are dropped with a `warn` log so a single
2041/// typo cannot lock the operator out.
2042///
2043/// Returns the config-file allowlist when the env var is absent or
2044/// empty; returns an empty Vec when neither source provides agent_ids
2045/// (closes every admin-class endpoint by default — the secure
2046/// posture per the post-#946 NHI contract).
2047#[must_use]
2048pub fn resolve_admin_agent_ids(admin_cfg: Option<&crate::config::AdminConfig>) -> Vec<String> {
2049    if let Ok(raw) = std::env::var("AI_MEMORY_ADMIN_AGENT_IDS")
2050        && !raw.trim().is_empty()
2051    {
2052        let mut out = Vec::new();
2053        for entry in raw.split(',') {
2054            let id = entry.trim();
2055            if id.is_empty() {
2056                continue;
2057            }
2058            // #980 (2026-05-20) — the `AI_MEMORY_ADMIN_AGENT_IDS=*`
2059            // wildcard carve-out is REMOVED. Pre-#980 the env var
2060            // accepted `"*"` as an explicit "admit every caller"
2061            // sentinel; combined with the `is_admin_caller` wildcard
2062            // arm (also closed in #980), an operator who set the
2063            // env var (intentionally or via a copy-paste mishap)
2064            // opened every admin endpoint. Operators wanting a
2065            // permissive admin posture must now enumerate the agent
2066            // ids explicitly (e.g. comma-separated list of NHI
2067            // principals); the wildcard entry is rejected by
2068            // `validate_agent_id` (shape: `*` is not in the allowed
2069            // char class) and dropped with a WARN. The previous
2070            // explicit-test-only path lives behind `#[cfg(test)]` in
2071            // `is_admin_caller`; production deployments cannot reach
2072            // it regardless of how the allowlist is populated.
2073            match crate::validate::validate_agent_id(id) {
2074                Ok(()) => out.push(id.to_string()),
2075                Err(e) => {
2076                    tracing::warn!(
2077                        "AI_MEMORY_ADMIN_AGENT_IDS entry '{id}' rejected: {e}; dropping"
2078                    );
2079                }
2080            }
2081        }
2082        return out;
2083    }
2084    admin_cfg
2085        .map(crate::config::AdminConfig::validated_agent_ids)
2086        .unwrap_or_default()
2087}
2088
2089// ---------------------------------------------------------------------------
2090// Embedder / vector-index canonical builders
2091// ---------------------------------------------------------------------------
2092
2093/// #1521 — resolve the daemon embedder model under the canonical
2094/// precedence ladder, mirroring the [`AppConfig::resolve_embeddings`]
2095/// layering for the model dimension:
2096///
2097///   1. `[embeddings].model` (sectioned v2 config, #1146)
2098///   2. legacy flat `embedding_model` (deprecated)
2099///   3. tier-preset `embedding_model`
2100///   4. `None` (keyword-only / embeddings disabled)
2101///
2102/// The model is read from the explicit section/flat fields rather than
2103/// `ResolvedEmbeddings.model` (which defaults to nomic whenever ANY
2104/// `[embeddings]` key is present), so a url-only section on the semantic
2105/// tier still keeps the tier-preset MiniLM model. A configured id the
2106/// 2-model daemon embedder cannot construct (or an unparseable one)
2107/// degrades to the tier preset — the operator picked a pin, not
2108/// keyword-only. Pure: no network I/O, so the precedence is unit-testable
2109/// without an HF-Hub fetch (`build_embedder` does the construction).
2110#[allow(deprecated)]
2111pub(crate) fn resolve_embedder_model(
2112    tier_config: &crate::config::TierConfig,
2113    app_config: &AppConfig,
2114) -> Option<crate::config::EmbeddingModel> {
2115    let preset = tier_config.embedding_model;
2116    let preset_label = preset
2117        .map(|m| m.hf_model_id().to_string())
2118        .unwrap_or_else(|| "none".to_string());
2119
2120    let configured = app_config
2121        .embeddings
2122        .as_ref()
2123        .and_then(|section| section.model.clone())
2124        .filter(|raw| !raw.trim().is_empty())
2125        .map(|raw| (raw, "[embeddings].model"))
2126        .or_else(|| {
2127            app_config
2128                .embedding_model
2129                .clone()
2130                .filter(|raw| !raw.trim().is_empty())
2131                .map(|raw| (raw, "legacy embedding_model"))
2132        });
2133
2134    let Some((raw, origin)) = configured else {
2135        return preset;
2136    };
2137    match crate::config::EmbeddingModel::from_canonical_id(&raw) {
2138        Some(model) => {
2139            tracing::info!(
2140                "embedder: using configured model {} from {origin} (tier-preset would have been {})",
2141                model.hf_model_id(),
2142                preset_label
2143            );
2144            Some(model)
2145        }
2146        None => {
2147            tracing::warn!(
2148                "embedder: configured model {raw:?} (from {origin}) is not constructible by the \
2149                 daemon embedder (supported: nomic-embed-text-v1.5, all-MiniLM-L6-v2); \
2150                 falling back to tier-preset {preset_label}"
2151            );
2152            preset
2153        }
2154    }
2155}
2156
2157/// Construct the [`Embedder`] for a given tier. Returns `None` for the
2158/// keyword tier (no embedder requested) and on load failure (caller
2159/// degrades to keyword fallback). On failure the diagnostic is emitted
2160/// via `tracing::error!` so operators see it in `journalctl`.
2161///
2162/// This is the single canonical embedder builder used by both `serve()`
2163/// (HTTP daemon) and `cli::recall::run` (offline recall). Prior to W6
2164/// each call site had its own copy, with subtly different fallback
2165/// shapes — the bug at issue #322 was a direct consequence.
2166#[allow(deprecated)]
2167pub async fn build_embedder(feature_tier: FeatureTier, app_config: &AppConfig) -> Option<Embedder> {
2168    let tier_config = feature_tier.config();
2169    // #1521: consume the canonical embeddings resolver so the sectioned
2170    // `[embeddings]` block (#1146) drives the daemon embedder, not just
2171    // the deprecated flat fields.
2172    //
2173    // #1598 — construction is delegated to the single shared boot
2174    // entry `Embedder::from_resolved` (also used by the MCP stdio
2175    // init). For the local/ollama backend the model is resolved by
2176    // the pure `resolve_embedder_model` helper (precedence:
2177    // `[embeddings].model` section > legacy flat `embedding_model` >
2178    // tier preset); for API backends the operator's `model` id is
2179    // wired verbatim by the resolver and the tier preset only gates
2180    // whether embeddings are enabled at all (Some vs None).
2181    let resolved_embeddings = app_config.resolve_embeddings();
2182    let tier_model = if crate::config::is_api_embed_backend(&resolved_embeddings.backend) {
2183        tier_config.embedding_model
2184    } else {
2185        resolve_embedder_model(&tier_config, app_config)
2186    };
2187    let Some(emb_model) = tier_model else {
2188        tracing::info!(
2189            "embedder disabled — tier={} keyword-only (FTS5); semantic recall not wired",
2190            feature_tier.as_str()
2191        );
2192        return None;
2193    };
2194    // The HF-Hub sync API and candle model-load are blocking CPU work that
2195    // internally spin their own tokio runtime. Running them directly in this
2196    // async context panics with "Cannot drop a runtime in a context where
2197    // blocking is not allowed." Move the whole construction onto the blocking
2198    // pool so the inner runtime is owned by a dedicated thread.
2199    let resolved_for_build = resolved_embeddings.clone();
2200    let build = match tokio::task::spawn_blocking(move || {
2201        embeddings::Embedder::from_resolved(&resolved_for_build, Some(emb_model))
2202    })
2203    .await
2204    {
2205        Ok(b) => b,
2206        Err(e) => {
2207            tracing::error!("embedder spawn_blocking join failed: {e}");
2208            return None;
2209        }
2210    };
2211    match build {
2212        Ok(Some(emb)) => {
2213            tracing::info!(
2214                "embedder loaded ({}) — tier={} semantic recall enabled",
2215                emb.model_description(),
2216                feature_tier.as_str()
2217            );
2218            Some(emb)
2219        }
2220        // Unreachable with `Some(emb_model)` threaded above; kept
2221        // explicit so the keyword-tier contract of `from_resolved`
2222        // stays loud here (#1598).
2223        Ok(None) => None,
2224        Err(e) => {
2225            // v0.6.2 (#327): make embedder load failures loud. The
2226            // prior WARN level was easy to miss in DO droplet logs,
2227            // which led to scenario-18 black-holing (semantic recall
2228            // falling back to keyword-only without the operator
2229            // noticing). An ERROR-level log with an obvious marker
2230            // surfaces this immediately in `journalctl -u ai-memory`
2231            // or tail -f /var/log/ai-memory-serve.log.
2232            tracing::error!(
2233                "EMBEDDER LOAD FAILED — tier={} requested semantic features, \
2234                 but embedder init errored: {e:#}. Semantic recall DEGRADED to \
2235                 keyword (#1593/#1598 fail-closed; the chat LLM client is NEVER \
2236                 reused for embeddings). Semantic recall, sync_push embedding \
2237                 refresh (#322), and HNSW index will be NO-OPS. For local \
2238                 backends check network egress to HuggingFace Hub + available \
2239                 memory for model weights; for API backends check the resolved \
2240                 base URL / API key (`ai-memory doctor`). To force keyword-only \
2241                 explicitly (silences this error), set `tier = \"keyword\"` in \
2242                 config.toml.",
2243                feature_tier.as_str()
2244            );
2245            None
2246        }
2247    }
2248}
2249
2250/// v0.7.0 L5 — construct the LLM [`OllamaClient`] for autonomy-hook
2251/// capable feature tiers (`smart` / `autonomous`). Returns `None` for
2252/// the `keyword` / `semantic` tiers (no `llm_model` declared in the
2253/// [`TierConfig`]) and on Ollama unreachability (caller degrades to
2254/// non-LLM behaviour). On failure the diagnostic is emitted via
2255/// `tracing::warn!` so operators see it in `journalctl` without
2256/// killing the daemon — autonomy hooks are best-effort and the
2257/// store path must keep working when Ollama is offline.
2258///
2259/// **FX-D1 (v0.7.0, 2026-05-27).** Pre-FX-D1 this function wrapped
2260/// the sync [`llm::OllamaClient::build_from_resolved`] in
2261/// `tokio::task::spawn_blocking`. The sync constructor went through
2262/// `block_on_local`, whose FX-C1 design panicked on the current-thread
2263/// arm. Production tests that defaulted to `#[tokio::test]`
2264/// (current-thread) hit the panic — `spawn_blocking`'s blocking-pool
2265/// thread inherits the outer runtime handle, so `Handle::try_current()`
2266/// resolved to a `CurrentThread` flavor and tripped the panic. The
2267/// log line was: `task 294 panicked with message "OllamaClient sync
2268/// wrapper called from inside a current-thread tokio runtime."`.
2269///
2270/// The surgical fix is to call the async constructor
2271/// [`llm::OllamaClient::build_from_resolved_async`] directly — no
2272/// `spawn_blocking`, no `block_on_local`, no sync→async bridge — so
2273/// the construction runs on whichever tokio runtime the caller
2274/// brought. The defensive fix in `block_on_local` (replace the panic
2275/// with a fresh-OS-thread bridge) catches every other unknown
2276/// callsite that might hit the same shape; this surgical fix is the
2277/// optimal path at this known callsite.
2278pub async fn build_llm_client(
2279    feature_tier: FeatureTier,
2280    app_config: &AppConfig,
2281) -> Option<llm::OllamaClient> {
2282    // v0.7.x (#1146) — single canonical entry through the resolver.
2283    // The resolver folds CLI flags (none here — `ai-memory serve`
2284    // exposes no CLI LLM override), AI_MEMORY_LLM_* env vars, the
2285    // [llm] config section, the legacy llm_model/ollama_url flat
2286    // fields, and the compiled tier preset. The provenance fields
2287    // surface via the tracing log line so RUST_LOG=ai_memory=debug
2288    // shows which precedence layer won.
2289    let resolved = app_config.resolve_llm(None, None, None);
2290
2291    // No-preset-tier short-circuit: when the tier has no compiled
2292    // `llm_model` preset (Keyword + Semantic at v0.7.0) AND there is
2293    // no explicit operator intent (resolver `source == CompiledDefault`),
2294    // the resolver's Ollama-default-fallback should NOT pull a client
2295    // into existence. This matches pre-#1146 v0.6.x behaviour and
2296    // avoids paying a blocking reqwest call to a (likely-absent)
2297    // Ollama under tokio test contexts. Operators who explicitly
2298    // want an LLM on Keyword/Semantic set AI_MEMORY_LLM_BACKEND or
2299    // write a [llm] section, which moves `source` off the
2300    // CompiledDefault arm.
2301    if feature_tier.config().llm_model.is_none()
2302        && matches!(
2303            resolved.source,
2304            crate::config::ConfigSource::CompiledDefault
2305        )
2306    {
2307        tracing::debug!(
2308            "L5: llm client disabled — tier={} has no llm_model preset AND no \
2309             operator LLM config; set AI_MEMORY_LLM_BACKEND or [llm] section to enable",
2310            feature_tier.as_str()
2311        );
2312        return None;
2313    }
2314
2315    let backend = resolved.backend.clone();
2316    let model = resolved.model.clone();
2317    let source = resolved.source.as_str().to_string();
2318    let key_source = resolved.api_key_source.as_str().to_string();
2319    let tier_str = feature_tier.as_str().to_string();
2320
2321    // FX-D1 (2026-05-27): call the async constructor directly. The
2322    // pre-FX-D1 `spawn_blocking` wrapper drove the sync constructor
2323    // through `block_on_local`, which panicked on the current-thread
2324    // tokio arm (the default `#[tokio::test]` flavor). The async
2325    // path skips the sync→async bridge entirely so the construction
2326    // runs on whichever tokio runtime the caller brought, with no
2327    // re-entry hazard.
2328    let build = llm::OllamaClient::build_from_resolved_async(&resolved).await;
2329
2330    match build {
2331        Ok(Some(client)) => {
2332            tracing::info!(
2333                "L5: llm client ready — tier={tier_str} backend={backend} \
2334                 model={model} source={source} key_source={key_source} \
2335                 — auto_tag/expand_query/contradiction-detection/reflection \
2336                 hooks armed (#1146 resolver path)"
2337            );
2338            Some(client)
2339        }
2340        Ok(None) => {
2341            tracing::warn!(
2342                "L5: llm client disabled — resolver returned no client \
2343                 (tier={tier_str} backend={backend} source={source}); \
2344                 LLM-powered hooks are no-ops"
2345            );
2346            None
2347        }
2348        Err(e) => {
2349            tracing::warn!(
2350                "L5: llm client init failed (tier={tier_str} backend={backend} \
2351                 source={source}); LLM-powered hooks are no-ops: {e}"
2352            );
2353            None
2354        }
2355    }
2356}
2357
2358/// Build the in-memory [`VectorIndex`] from `conn`. When `embedder_present`
2359/// is false, returns `None` (the keyword-only path doesn't need an index).
2360/// When the embedder is present but the DB is empty (or query errors),
2361/// returns `Some(VectorIndex::empty())` so write paths can populate it
2362/// in-place.
2363#[must_use]
2364pub fn build_vector_index(conn: &Connection, embedder_present: bool) -> Option<VectorIndex> {
2365    if !embedder_present {
2366        return None;
2367    }
2368    match db::get_all_embeddings(conn) {
2369        Ok(entries) if !entries.is_empty() => Some(hnsw::VectorIndex::build(entries)),
2370        _ => Some(hnsw::VectorIndex::empty()),
2371    }
2372}
2373
2374/// #1579 B3 — read the boot warm-up entry set (every stored
2375/// embedding) over a private connection. Opened fresh so the boot
2376/// loader thread never touches the request-serving connection;
2377/// failures degrade to "no warm-up" with a WARN (the daemon keeps
2378/// serving keyword/FTS recall — the pre-#1579 failure posture).
2379pub(crate) fn load_boot_index_entries(db_path: &Path) -> Option<Vec<(String, Vec<f32>)>> {
2380    let conn = match db::open(db_path) {
2381        Ok(c) => c,
2382        Err(e) => {
2383            tracing::warn!(
2384                db_path = %db_path.display(),
2385                err = %e,
2386                "HNSW boot warm-up: could not open DB; semantic index stays cold (#1579 B3)"
2387            );
2388            return None;
2389        }
2390    };
2391    match db::get_all_embeddings(&conn) {
2392        Ok(entries) => Some(entries),
2393        Err(e) => {
2394            tracing::warn!(
2395                err = %e,
2396                "HNSW boot warm-up: get_all_embeddings failed; semantic index stays cold (#1579 B3)"
2397            );
2398            None
2399        }
2400    }
2401}
2402
2403/// #1579 B3 — async boot HNSW warm-up for `serve`.
2404///
2405/// Pre-#1579 the daemon built the HNSW graph SYNCHRONOUSLY at boot
2406/// (`get_all_embeddings` + `VectorIndex::build` on the startup path):
2407/// P1 measured spawn→initialize at 40 s for a 10k-vector corpus and
2408/// >28 min at 100k. This loader moves the whole load+build off the
2409/// startup path onto a background thread, reusing the #968
2410/// double-buffer rebuild machinery: the daemon binds and answers
2411/// immediately with an EMPTY index; semantic recall degrades to its
2412/// keyword/FTS blend until the warmed graph swaps in (the #519
2413/// proactive conflict check routes to its bounded-scan fallback for
2414/// the same window via [`hnsw::VectorIndex::is_fully_searchable`]).
2415///
2416/// Locking discipline: the `AppState.vector_index` outer mutex is
2417/// held only for microsecond-scale steps (seed-extend, schedule,
2418/// swap) — NEVER across the graph build, which runs detached on the
2419/// #968 rebuild thread. Request handlers therefore keep making
2420/// progress throughout the warm-up.
2421///
2422/// Emits one INFO line when the swap lands so operators can see
2423/// time-to-semantic-ready in the daemon log.
2424pub fn spawn_vector_index_boot_load(
2425    db_path: std::path::PathBuf,
2426    vector_index: Arc<tokio::sync::Mutex<Option<VectorIndex>>>,
2427) -> std::thread::JoinHandle<()> {
2428    std::thread::spawn(move || {
2429        let started = std::time::Instant::now();
2430        let Some(entries) = load_boot_index_entries(&db_path) else {
2431            return;
2432        };
2433        if entries.is_empty() {
2434            tracing::info!(
2435                "HNSW boot warm-up: no stored embeddings — index starts empty (#1579 B3)"
2436            );
2437            return;
2438        }
2439        let total = entries.len();
2440        // Step 1 — seed + schedule the background build under a BRIEF
2441        // outer lock. The returned handle is detached from the borrow
2442        // (the rebuild thread captures Arc'd internals, not `&self`),
2443        // so we can join it after dropping the guard.
2444        let build_handle = {
2445            let guard = vector_index.blocking_lock();
2446            let Some(idx) = guard.as_ref() else {
2447                return;
2448            };
2449            idx.seed_and_rebuild_async(entries)
2450        };
2451        let _ = build_handle.join();
2452        // Step 2 — swap the warmed graph in; loop covers the
2453        // rebuild-CAS race with any routine 200-overflow rebuild that
2454        // was scheduled by boot-window writes (see
2455        // `VectorIndex::warm_boot` for the same contract).
2456        loop {
2457            let pending = {
2458                let guard = vector_index.blocking_lock();
2459                let Some(idx) = guard.as_ref() else {
2460                    return;
2461                };
2462                if idx.is_fully_searchable() {
2463                    None
2464                } else {
2465                    Some(idx.rebuild_async())
2466                }
2467            };
2468            match pending {
2469                None => break,
2470                Some(handle) => {
2471                    let _ = handle.join();
2472                    // A no-op handle (rebuild CAS busy) joins
2473                    // instantly — pace the retry so the loop doesn't
2474                    // spin while the in-flight build finishes.
2475                    std::thread::sleep(crate::hnsw::REBUILD_WAIT_POLL_INTERVAL);
2476                }
2477            }
2478        }
2479        #[allow(clippy::cast_possible_truncation)]
2480        let elapsed_ms = started.elapsed().as_millis() as u64;
2481        tracing::info!(
2482            entries = total,
2483            elapsed_ms,
2484            "HNSW index warm (#1579 B3): async boot build swapped in; \
2485             semantic recall is now index-backed"
2486        );
2487    })
2488}
2489
2490// ---------------------------------------------------------------------------
2491// v0.7 Track H — H2 active keypair loading
2492// ---------------------------------------------------------------------------
2493
2494// Round-3 F12 — the daemon's fixed signing-key label. Canonical const
2495// (with the full F12 rationale) now lives at
2496// `crate::identity::keypair::DAEMON_KEYPAIR_LABEL` (#1558).
2497use crate::identity::keypair::DAEMON_KEYPAIR_LABEL;
2498
2499/// Round-3 F12 — ensure the daemon's signing keypair exists on disk and
2500/// load it for the serve [`AppState`]. Returns the in-memory keypair
2501/// (if any) plus the lifecycle outcome (Generated/AlreadyExists/
2502/// SkippedDisabled/None) so the startup banner can surface the
2503/// auto-gen line.
2504///
2505/// Resolution:
2506///   1. Resolve the default key directory
2507///      ([`crate::identity::keypair::default_key_dir`]).
2508///   2. Call [`crate::identity::keypair::ensure_keypair`] under the
2509///      stable [`DAEMON_KEYPAIR_LABEL`]. Idempotent: a daemon restart
2510///      never overwrites an existing keypair (which would silently
2511///      invalidate every prior signed link).
2512///   3. Load the keypair from disk and return it.
2513///
2514/// Failure at any step degrades the daemon to unsigned-link mode (the
2515/// pre-v0.7 posture) without aborting startup. Log lines describe
2516/// which path was taken so an operator inspecting daemon logs sees
2517/// the cause.
2518fn ensure_and_load_daemon_keypair() -> (
2519    Option<crate::identity::keypair::AgentKeypair>,
2520    Option<crate::identity::keypair::EnsureOutcome>,
2521) {
2522    let dir = match crate::identity::keypair::default_key_dir() {
2523        Ok(d) => d,
2524        Err(e) => {
2525            tracing::info!("identity: no default key dir available, link signing disabled: {e}");
2526            return (None, None);
2527        }
2528    };
2529    // The `[identity].disabled` config field is not yet wired in
2530    // v0.7.0; pass `false` so the helper auto-generates unless the
2531    // operator pre-staged a keypair. A future config field can opt
2532    // out without changing this call site.
2533    let outcome = match crate::identity::keypair::ensure_keypair(DAEMON_KEYPAIR_LABEL, &dir, false)
2534    {
2535        Ok(o) => o,
2536        Err(e) => {
2537            tracing::warn!("identity: keypair auto-gen failed: {e:#}");
2538            return (None, None);
2539        }
2540    };
2541    if matches!(
2542        outcome,
2543        crate::identity::keypair::EnsureOutcome::SkippedDisabled
2544    ) {
2545        return (None, Some(outcome));
2546    }
2547    let kp = match crate::identity::keypair::load(DAEMON_KEYPAIR_LABEL, &dir) {
2548        Ok(kp) if kp.can_sign() => {
2549            tracing::info!(
2550                "identity: loaded signing keypair for {DAEMON_KEYPAIR_LABEL} from {}",
2551                dir.display()
2552            );
2553            Some(kp)
2554        }
2555        Ok(_) => {
2556            tracing::info!(
2557                "identity: only public key on disk for {DAEMON_KEYPAIR_LABEL}; link signing disabled"
2558            );
2559            None
2560        }
2561        Err(e) => {
2562            tracing::warn!(
2563                "identity: keypair load failed for {DAEMON_KEYPAIR_LABEL}: {e:#}; link signing disabled"
2564            );
2565            None
2566        }
2567    };
2568    (kp, Some(outcome))
2569}
2570
2571// ---------------------------------------------------------------------------
2572// Background tasks (GC, WAL checkpoint)
2573// ---------------------------------------------------------------------------
2574
2575/// Spawn the periodic GC loop. Sleeps `interval`, then runs `db::gc`,
2576/// `db::auto_purge_archive`, and (Cluster G, #767) the shadow-
2577/// observation retention sweep against the daemon's shared connection.
2578/// The returned [`JoinHandle`] is owned by the caller; `serve()` aborts
2579/// it on shutdown.
2580///
2581/// `shadow_retention_days` honors the operator-tunable
2582/// `[confidence] shadow_retention_days` from `config.toml`, falling
2583/// back to [`crate::confidence::shadow::DEFAULT_SHADOW_RETENTION_DAYS`]
2584/// (30) when unset. `<= 0` disables the sweep (matches the
2585/// `archive_max_days` convention).
2586#[must_use]
2587pub fn spawn_gc_loop(
2588    state: Db,
2589    archive_max_days: Option<i64>,
2590    interval: Duration,
2591) -> JoinHandle<()> {
2592    spawn_gc_loop_with_shadow_retention(
2593        state,
2594        archive_max_days,
2595        crate::confidence::shadow::DEFAULT_SHADOW_RETENTION_DAYS,
2596        interval,
2597    )
2598}
2599
2600/// Cluster G (#767) — `spawn_gc_loop` variant that takes an explicit
2601/// shadow-observation retention window. Used by `bootstrap_serve` so
2602/// the operator-tunable `[confidence] shadow_retention_days` from
2603/// `config.toml` flows through. `spawn_gc_loop` is the no-arg wrapper
2604/// that picks the compiled default for legacy call sites (tests).
2605#[must_use]
2606pub fn spawn_gc_loop_with_shadow_retention(
2607    state: Db,
2608    archive_max_days: Option<i64>,
2609    shadow_retention_days: i64,
2610    interval: Duration,
2611) -> JoinHandle<()> {
2612    tokio::spawn(async move {
2613        loop {
2614            tokio::time::sleep(interval).await;
2615            let lock = state.lock().await;
2616            match db::gc(&lock.0, lock.3) {
2617                Ok(n) if n > 0 => tracing::info!("gc: expired {n} memories"),
2618                _ => {}
2619            }
2620            // Auto-purge old archives if configured
2621            match db::auto_purge_archive(&lock.0, archive_max_days) {
2622                Ok(n) if n > 0 => tracing::info!("gc: purged {n} old archived memories"),
2623                _ => {}
2624            }
2625            // Cluster G (#767, PERF-4) — shadow-mode observation
2626            // retention sweep. `<= 0` is a no-op (operator opt-out).
2627            match crate::confidence::shadow::gc_observations(&lock.0, shadow_retention_days) {
2628                Ok(n) if n > 0 => tracing::info!(
2629                    "gc: purged {n} shadow observations older than {shadow_retention_days}d"
2630                ),
2631                Ok(_) => {}
2632                Err(e) => tracing::warn!("shadow observation gc failed: {e}"),
2633            }
2634            // #1690 — recall_observations retention sweep. The pruner
2635            // (observations::gc::prune, honouring AI_MEMORY_OBSERVATIONS_TTL_DAYS
2636            // — CLAUDE.md env #42) previously had NO production caller, so the
2637            // recall-observation ledger grew unbounded with recall traffic.
2638            match crate::observations::gc::prune(&lock.0) {
2639                Ok(n) if n > 0 => {
2640                    tracing::info!("gc: pruned {n} expired recall_observations");
2641                }
2642                Ok(_) => {}
2643                Err(e) => tracing::warn!("recall_observations gc failed: {e}"),
2644            }
2645        }
2646    })
2647}
2648
2649/// v0.7.0 K2 — spawn the periodic `pending_actions` timeout sweeper.
2650///
2651/// Sleeps `interval`, then calls [`db::sweep_pending_action_timeouts`]
2652/// against the daemon's shared connection. Per-row
2653/// `default_timeout_seconds` overrides the global `default_secs` when
2654/// non-NULL. A non-positive `default_secs` disables the sweeper.
2655///
2656/// Returned [`JoinHandle`] is owned by the caller; `serve()` aborts it
2657/// on shutdown — same lifecycle as [`spawn_gc_loop`].
2658///
2659/// Closes the v0.6.3.1 honest-Capabilities-v2 disclosure that the
2660/// `default_timeout_seconds` field was advertised but unused.
2661#[must_use]
2662pub fn spawn_pending_timeout_sweep_loop(
2663    state: Db,
2664    db_path: PathBuf,
2665    default_secs: i64,
2666    interval: Duration,
2667) -> JoinHandle<()> {
2668    tokio::spawn(async move {
2669        loop {
2670            tokio::time::sleep(interval).await;
2671            // Hold the lock just long enough for the sweep call. The
2672            // expired ids returned by the sweeper are dispatched to
2673            // subscribers AFTER the lock drops so a slow webhook can
2674            // never starve write traffic.
2675            let expired = {
2676                let lock = state.lock().await;
2677                match db::sweep_pending_action_timeouts(&lock.0, default_secs) {
2678                    Ok(rows) => rows,
2679                    Err(e) => {
2680                        tracing::warn!("pending_actions sweep failed: {e}");
2681                        Vec::new()
2682                    }
2683                }
2684            };
2685            if expired.is_empty() {
2686                continue;
2687            }
2688            tracing::info!(
2689                "pending_actions sweep: marked {} row(s) expired",
2690                expired.len()
2691            );
2692            // Best-effort fan-out via the existing subscription
2693            // dispatcher. K2 piggybacks on the lifecycle event
2694            // shape — the namespace + id are enough for downstream
2695            // webhook consumers to look the row up. The full
2696            // approval-event surface (typed payloads, retry, DLQ)
2697            // arrives in K4 / K7.
2698            for (id, namespace) in expired {
2699                let lock = state.lock().await;
2700                crate::subscriptions::dispatch_event(
2701                    &lock.0,
2702                    "pending_action_expired",
2703                    &id,
2704                    &namespace,
2705                    None,
2706                    &db_path,
2707                );
2708            }
2709        }
2710    })
2711}
2712
2713/// v0.7.0 I3 — spawn the periodic transcript archive→prune sweeper.
2714///
2715/// Sleeps `interval`, then calls
2716/// [`crate::transcripts::sweep_transcript_lifecycle`] against the
2717/// daemon's shared connection. The per-namespace TTL configuration
2718/// is captured by `cfg` once at spawn time (operators editing
2719/// `[transcripts]` in `config.toml` after boot must restart the
2720/// daemon — same model as the K2 pending sweeper).
2721///
2722/// The returned [`JoinHandle`] is owned by the caller; `serve()`
2723/// aborts it on shutdown — same lifecycle as
2724/// [`spawn_pending_timeout_sweep_loop`].
2725#[must_use]
2726pub fn spawn_transcript_lifecycle_sweep_loop(
2727    state: Db,
2728    cfg: crate::config::TranscriptsConfig,
2729    interval: Duration,
2730) -> JoinHandle<()> {
2731    tokio::spawn(async move {
2732        loop {
2733            tokio::time::sleep(interval).await;
2734            // Hold the connection lock for the whole sweep: the
2735            // archive + prune phases share one `now` and the
2736            // archive-then-prune semantics require sequential
2737            // execution against the same view of the table. A 10-
2738            // minute cadence means the lock window is at most a few
2739            // ms even on busy databases.
2740            let report = {
2741                let lock = state.lock().await;
2742                match crate::transcripts::sweep_transcript_lifecycle(&lock.0, &cfg) {
2743                    Ok(r) => r,
2744                    Err(e) => {
2745                        tracing::warn!("transcript lifecycle sweep failed: {e}");
2746                        continue;
2747                    }
2748                }
2749            };
2750            if report.archived > 0 || report.pruned > 0 || report.errors > 0 {
2751                tracing::info!(
2752                    "transcript lifecycle sweep: archived={} pruned={} errors={}",
2753                    report.archived,
2754                    report.pruned,
2755                    report.errors,
2756                );
2757            }
2758        }
2759    })
2760}
2761
2762/// v0.7.0 K8 — spawn the periodic agent-quota daily-counter reset
2763/// sweeper.
2764///
2765/// Sleeps `interval`, then calls [`crate::quotas::reset_daily`] against
2766/// the daemon's shared connection. The SQL statement zeros
2767/// `current_memories_today` + `current_links_today` for every row
2768/// whose `day_started_at` is not the current UTC date — touched rows
2769/// equal "agents that crossed midnight since the last sweep tick"
2770/// which is at most one row per registered agent per 24h.
2771///
2772/// The returned [`JoinHandle`] is owned by the caller; `serve()`
2773/// aborts it on shutdown — same lifecycle as
2774/// [`spawn_pending_timeout_sweep_loop`].
2775#[must_use]
2776pub fn spawn_agent_quota_reset_loop(state: Db, interval: Duration) -> JoinHandle<()> {
2777    tokio::spawn(async move {
2778        loop {
2779            tokio::time::sleep(interval).await;
2780            let reset_count = {
2781                let lock = state.lock().await;
2782                match crate::quotas::reset_daily(&lock.0) {
2783                    Ok(n) => n,
2784                    Err(e) => {
2785                        tracing::warn!("agent_quotas daily reset failed: {e}");
2786                        continue;
2787                    }
2788                }
2789            };
2790            if reset_count > 0 {
2791                tracing::info!("agent_quotas daily reset: {reset_count} row(s) zeroed");
2792            }
2793        }
2794    })
2795}
2796
2797/// Spawn the periodic WAL checkpoint loop. First checkpoint runs
2798/// `interval / 2` after start (staggered from the GC loop to avoid
2799/// lock-contention bursts on cold start), then on a fixed cadence.
2800#[must_use]
2801pub fn spawn_wal_checkpoint_loop(state: Db, interval: Duration) -> JoinHandle<()> {
2802    let half = interval / 2;
2803    tokio::spawn(async move {
2804        // First checkpoint runs halfway through the interval so the two
2805        // long-running maintenance tasks never overlap on cold start.
2806        tokio::time::sleep(half).await;
2807        loop {
2808            {
2809                let lock = state.lock().await;
2810                match db::checkpoint(&lock.0) {
2811                    Ok(()) => tracing::debug!("wal checkpoint: ok"),
2812                    Err(e) => tracing::warn!("wal checkpoint failed: {e}"),
2813                }
2814            }
2815            tokio::time::sleep(interval).await;
2816        }
2817    })
2818}
2819
2820// ---------------------------------------------------------------------------
2821// Router composition
2822// ---------------------------------------------------------------------------
2823
2824/// Compose the production HTTP router. Thin wrapper around
2825/// [`crate::build_router`] (the W3-vintage source of truth for the
2826/// route table). `daemon_runtime::build_router` exists so test code in
2827/// this module can build the router without naming `crate::build_router`
2828/// directly, and so future router-composition logic (e.g. middleware
2829/// reorder, custom layers) lives in one place.
2830#[must_use]
2831pub fn build_router(app_state: AppState, api_key_state: ApiKeyState) -> Router {
2832    crate::build_router(api_key_state, app_state)
2833}
2834
2835// ---------------------------------------------------------------------------
2836// serve() — the HTTP daemon body, post-W6 split.
2837// ---------------------------------------------------------------------------
2838
2839/// Aggregated state produced by [`bootstrap_serve`].
2840pub struct ServeBootstrap {
2841    pub app_state: AppState,
2842    pub api_key_state: ApiKeyState,
2843    pub db_state: Db,
2844    pub archive_max_days: Option<i64>,
2845    pub task_handles: Vec<JoinHandle<()>>,
2846    /// Round-3 F12 — lifecycle outcome of the daemon's signing-keypair
2847    /// auto-gen path, captured by [`ensure_and_load_daemon_keypair`].
2848    /// Read by [`serve`] when composing the F8/F12 startup banner so
2849    /// operators see whether a fresh key was created on first boot.
2850    pub daemon_keypair_outcome: Option<crate::identity::keypair::EnsureOutcome>,
2851    /// v0.7.0 H7 (round-2) — resolved per-request HTTP timeout. The
2852    /// `serve` path passes this to [`crate::build_router_with_timeout`]
2853    /// so the timeout middleware is wired with the operator's
2854    /// `request_timeout_secs` (default 60 s).
2855    pub request_timeout: std::time::Duration,
2856    /// v0.7.0 Policy-Engine Item 3 — shared atomic metrics handle for the
2857    /// deferred-audit drainer. `serve` polls these on the shutdown path
2858    /// (after the HTTP server has quiesced) to wait for every submitted
2859    /// refusal to flush into `signed_events` before the WAL checkpoint +
2860    /// process exit. The producer-side queue itself lives on `AppState`
2861    /// and inside the process-wide governance-hook `OnceLock`s, so this
2862    /// metrics handle is the only drain-observability surface `serve`
2863    /// retains after the queue is moved into `AppState`.
2864    pub deferred_audit_metrics: crate::governance::deferred_audit::DeferredAuditMetrics,
2865}
2866
2867/// v0.7.0 Wave-3 — resolve a [`MemoryStore`] handle from the operator's
2868/// `--store-url` (when set) or fall back to a [`SqliteStore`] wrapping
2869/// the on-disk database `--db` already opened.
2870///
2871/// Returns the resolved [`StorageBackend`] tag plus the polymorphic
2872/// `Arc<dyn MemoryStore>` so the caller can wire both fields onto
2873/// `AppState` and have downstream handlers branch on the tag without
2874/// dynamic-dispatch probes.
2875///
2876/// URL precedence:
2877///
2878/// - `Some("postgres://...")` or `Some("postgresql://...")` →
2879///   [`PostgresStore::connect`]; resolves to
2880///   [`StorageBackend::Postgres`]. Requires `--features sal-postgres`
2881///   at build time; the URL is rejected at runtime under a sal-only
2882///   build with a clear error.
2883/// - `Some("sqlite:///path")` → [`SqliteStore::open`]; resolves to
2884///   [`StorageBackend::Sqlite`]. The on-disk path may or may not be
2885///   the same file `--db` already opened — both views see the same
2886///   rows when they coincide; the SQLite file-locking layer arbitrates
2887///   any cross-connection contention.
2888/// - `None` → [`SqliteStore::open`] against `db_path`; resolves to
2889///   [`StorageBackend::Sqlite`]. The default behaviour preserved
2890///   for every operator who has not opted in to `--store-url`.
2891///
2892/// Anything else exits non-zero with the same "unrecognised store URL"
2893/// diagnostic [`crate::migrate::open_store`] returns, keeping the
2894/// surface area consistent across `serve`, `migrate`, and
2895/// `schema-init`.
2896///
2897/// [`MemoryStore`]: crate::store::MemoryStore
2898/// [`SqliteStore`]: crate::store::sqlite::SqliteStore
2899/// [`PostgresStore::connect`]: crate::store::postgres::PostgresStore::connect
2900/// [`SqliteStore::open`]: crate::store::sqlite::SqliteStore::open
2901/// [`StorageBackend`]: crate::handlers::StorageBackend
2902/// [`StorageBackend::Postgres`]: crate::handlers::StorageBackend::Postgres
2903/// [`StorageBackend::Sqlite`]: crate::handlers::StorageBackend::Sqlite
2904#[cfg(feature = "sal")]
2905/// v0.7.x (issue #1169) — resolve the configured embedder dim for the
2906/// postgres-schema bootstrap (used by [`build_store_handle`]).
2907///
2908/// Resolution ladder (first arm wins):
2909///
2910/// 1. [`crate::config::AppConfig::resolve_embeddings`] returns
2911///    `ResolvedEmbeddings.embedding_dim` populated by the canonical
2912///    [`crate::config::canonical_embedding_dim`] lookup table when the
2913///    operator-picked model id is in [`crate::config::KNOWN_EMBEDDING_DIMS`].
2914/// 2. Legacy flat-field path: parse `app_config.embedding_model` as the
2915///    2-family [`crate::config::EmbeddingModel`] enum and pull its
2916///    compile-time `dim()` (`nomic_embed_v15` / `mini_lm_l6_v2`).
2917/// 3. Tier-preset fallback when neither resolver nor legacy parses
2918///    yields a dim — the historical pre-#1169 behaviour, retained as
2919///    the last-resort default.
2920///
2921/// Returns `None` only when no embedder is configured at all
2922/// (`tier_config.embedding_model.is_none()` AND no operator override) —
2923/// i.e. the keyword-only tier. The postgres bootstrap then falls back
2924/// to `DEFAULT_EMBEDDING_DIM` per `build_store_handle`'s
2925/// `configured_embedding_dim` doc comment.
2926#[cfg(feature = "sal")]
2927#[must_use]
2928#[allow(deprecated)]
2929fn resolve_configured_embedding_dim(
2930    app_config: &crate::config::AppConfig,
2931    tier_config: &crate::config::TierConfig,
2932) -> Option<u32> {
2933    let preset = tier_config.embedding_model;
2934    let resolved = app_config.resolve_embeddings();
2935    resolved
2936        .embedding_dim
2937        .or_else(|| {
2938            app_config
2939                .embedding_model
2940                .as_deref()
2941                .and_then(|raw| raw.parse::<crate::config::EmbeddingModel>().ok())
2942                .map(|m| u32::try_from(m.dim()).unwrap_or(384))
2943        })
2944        .or_else(|| preset.map(|m| u32::try_from(m.dim()).unwrap_or(384)))
2945}
2946
2947/// v0.7.0 #1548 — resolve the curator's SAL store handle from the same
2948/// URL-scheme dispatch the HTTP `serve` path uses. When `store_url` is
2949/// `Some`, the adapter is bound to the URL-resolved backend (SQLite *or*
2950/// Postgres); when `None`, it falls through to a SQLite store at the
2951/// `--db` path. The embedder dim + Postgres pool sizing are resolved
2952/// from `app_config` exactly as in `serve` so a postgres-backed curator
2953/// bootstraps an identically-shaped schema/pool to the HTTP daemon
2954/// pointed at the same federated store.
2955///
2956/// Returns only the `Arc<dyn MemoryStore>` — the curator passes do not
2957/// need the [`crate::handlers::StorageBackend`] tag the HTTP daemon
2958/// threads into its `AppState`.
2959#[cfg(feature = "sal")]
2960pub(crate) async fn build_curator_store(
2961    store_url: Option<&str>,
2962    db_path: &Path,
2963    app_config: &crate::config::AppConfig,
2964) -> Result<Arc<dyn crate::store::MemoryStore>> {
2965    let tier_config = app_config.effective_tier(None).config();
2966    let configured_embedding_dim = resolve_configured_embedding_dim(app_config, &tier_config);
2967    let (_backend, store) = build_store_handle(
2968        store_url,
2969        db_path,
2970        app_config.postgres_statement_timeout_secs,
2971        configured_embedding_dim,
2972        app_config.resolve_pg_pool(),
2973    )
2974    .await
2975    .context("build SAL store handle for curator")?;
2976    Ok(store)
2977}
2978
2979#[cfg(feature = "sal")]
2980async fn build_store_handle(
2981    store_url: Option<&str>,
2982    db_path: &Path,
2983    postgres_statement_timeout_secs: Option<u64>,
2984    // Issue #877: configured embedder dim. `None` keeps the legacy
2985    // `DEFAULT_EMBEDDING_DIM` (384, MiniLM) behaviour for callers that
2986    // explicitly do not load an embedder (keyword-only deployments).
2987    // When `Some(dim)` is passed, the postgres adapter takes the
2988    // auto-migrate path so a fresh-container schema bootstrapped at the
2989    // default 384 is converted in-place to match the configured
2990    // embedder's actual dimension (e.g. 768 for `nomic_embed_v15`).
2991    configured_embedding_dim: Option<u32>,
2992    // Resolved Postgres connection-pool sizing (`AI_MEMORY_PG_POOL_MAX` /
2993    // `_MIN` / `_ACQUIRE_TIMEOUT_SECS` > config.toml > compiled default),
2994    // produced by `AppConfig::resolve_pg_pool`. Threaded into the sqlx
2995    // `PgPoolOptions` build; inert on the sqlite path.
2996    pool: crate::store::PoolConfig,
2997) -> Result<(
2998    crate::handlers::StorageBackend,
2999    Arc<dyn crate::store::MemoryStore>,
3000)> {
3001    use crate::handlers::StorageBackend;
3002
3003    match store_url {
3004        Some(url) => {
3005            let lowered = url.to_ascii_lowercase();
3006            if crate::migrate::is_postgres_url(&lowered) {
3007                #[cfg(feature = "sal-postgres")]
3008                {
3009                    let timeout = postgres_statement_timeout_secs
3010                        .unwrap_or(crate::store::postgres::DEFAULT_STATEMENT_TIMEOUT_SECS);
3011                    // Issue #877: route through the auto-migrate entry
3012                    // point when the daemon resolved a configured
3013                    // embedder dim. Bootstrap goes via `connect_with_dim`
3014                    // so the *fresh* schema lands `vector(<dim>)` from
3015                    // the very first INIT; the auto-migrate then handles
3016                    // the pre-existing-schema-at-wrong-dim case.
3017                    // #1579 A3 (SECURITY) — log the password-redacted
3018                    // URL. Pre-fix this line shipped the full
3019                    // `--store-url` (credential included) to journald
3020                    // at INFO.
3021                    let display_url = crate::logging::redact_url_password(url);
3022                    let store = if let Some(dim) = configured_embedding_dim {
3023                        tracing::info!(
3024                            "Wave-3 (issue #877): opening Postgres SAL store at {display_url} \
3025                             (statement_timeout={timeout}s, embedding_dim={dim}, auto_migrate=on, \
3026                             pool_max={}, pool_min={}, acquire_timeout={}s)",
3027                            pool.max_connections,
3028                            pool.min_connections,
3029                            pool.acquire_timeout_secs
3030                        );
3031                        crate::store::postgres::PostgresStore::connect_with_dim_and_timeout_auto_migrate(
3032                            url, dim, timeout, pool,
3033                        )
3034                        .await
3035                        .context("connect postgres adapter (auto-migrate dim)")?
3036                    } else {
3037                        tracing::info!(
3038                            "Wave-3: opening Postgres SAL store at {display_url} \
3039                             (statement_timeout={timeout}s, no embedder configured, \
3040                             pool_max={}, pool_min={}, acquire_timeout={}s)",
3041                            pool.max_connections,
3042                            pool.min_connections,
3043                            pool.acquire_timeout_secs
3044                        );
3045                        crate::store::postgres::PostgresStore::connect_with_dim_and_timeout(
3046                            url,
3047                            crate::store::postgres::DEFAULT_EMBEDDING_DIM,
3048                            timeout,
3049                            pool,
3050                        )
3051                        .await
3052                        .context("connect postgres adapter")?
3053                    };
3054                    Ok((StorageBackend::Postgres, Arc::new(store)))
3055                }
3056                #[cfg(not(feature = "sal-postgres"))]
3057                {
3058                    let _ = url;
3059                    let _ = postgres_statement_timeout_secs;
3060                    let _ = configured_embedding_dim;
3061                    let _ = pool;
3062                    anyhow::bail!(
3063                        "--store-url postgres:// requires the binary to be built with \
3064                         --features sal-postgres; this binary was built with --features sal only"
3065                    );
3066                }
3067            } else if let Some(path) = url
3068                .strip_prefix("sqlite://")
3069                .or_else(|| url.strip_prefix("SQLITE://"))
3070            {
3071                let clean = path
3072                    .strip_prefix('/')
3073                    .map_or(path, |p| if p.starts_with('/') { p } else { path });
3074                tracing::info!("Wave-3: opening SQLite SAL store at {clean} (--store-url)");
3075                let store = crate::store::sqlite::SqliteStore::open(clean)
3076                    .map_err(|e| anyhow::anyhow!("open sqlite adapter: {e}"))?;
3077                Ok((StorageBackend::Sqlite, Arc::new(store)))
3078            } else {
3079                // #1579 A3 (SECURITY) — a mistyped scheme can still
3080                // carry credentials; redact before echoing.
3081                anyhow::bail!(
3082                    "unrecognised --store-url: {} (expected sqlite:///path or postgres://...)",
3083                    crate::logging::redact_url_password(url)
3084                )
3085            }
3086        }
3087        None => {
3088            let _ = postgres_statement_timeout_secs;
3089            let _ = configured_embedding_dim;
3090            let _ = pool;
3091            tracing::debug!("Wave-3: --store-url absent; opening SQLite SAL store at --db path");
3092            let store = crate::store::sqlite::SqliteStore::open(db_path)
3093                .map_err(|e| anyhow::anyhow!("open sqlite adapter: {e}"))?;
3094            Ok((StorageBackend::Sqlite, Arc::new(store)))
3095        }
3096    }
3097}
3098
3099/// v0.7.0 #1455 — `true` when the operator opted into the legacy
3100/// permissive governance posture via
3101/// `AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR` (`1` / `true`). Default
3102/// `false` keeps the fail-CLOSED secure default. Shared by the storage
3103/// pre-write hook and the wire-check hook so the two read the same
3104/// override identically.
3105/// Actor/queue label for wire-action governance consultations.
3106const WIRE_ACTION_ACTOR: &str = "daemon:wire_action";
3107
3108fn governance_fail_open_on_error() -> bool {
3109    std::env::var(ENV_GOVERNANCE_FAIL_OPEN)
3110        .map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
3111        .unwrap_or(false)
3112}
3113
3114/// #1455 legacy fail-open opt-out env var — one spelling shared by the
3115/// reader above and the operator-facing log hints below (#1558).
3116const ENV_GOVERNANCE_FAIL_OPEN: &str = "AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR";
3117
3118/// #1583 (SEC, MED) — install the substrate `GOVERNANCE_PRE_WRITE`
3119/// storage hook (the L1-6 agent-action `memory_write` gate). Extracted
3120/// from `bootstrap_serve` so every LONG-LIVED write surface installs
3121/// the SAME closure: the HTTP daemon (`serve`) AND the MCP stdio server
3122/// (`run_mcp_server`). Pre-#1583 only `serve` installed it, so
3123/// operator-configured agent-action rules were silently bypassed for
3124/// every MCP-driven write — the primary NHI agent interface.
3125///
3126/// CLI one-shot binaries (`ai-memory store …`) intentionally do NOT
3127/// call this (the L1-6 E operator-as-actor exemption — see
3128/// `src/storage/mod.rs` §hook doc + `cli_one_shot_does_not_install_hook`);
3129/// the operator's direct substrate ops stay unimpeded by design.
3130///
3131/// `hook_consultation_conn` MUST be a connection distinct from the
3132/// caller's main write connection (the hook fires synchronously from
3133/// inside `storage::insert`, which holds the main connection). When it
3134/// is `None` (open failed at install time) the hook fails CLOSED per
3135/// #1455.
3136pub(crate) fn install_governance_pre_write_hook(
3137    db_path: &Path,
3138    deferred_audit_queue: &crate::governance::deferred_audit::DeferredAuditQueue,
3139    rule_cache: &Arc<crate::governance::rule_cache::RuleCache>,
3140    hook_consultation_conn: Option<Arc<std::sync::Mutex<rusqlite::Connection>>>,
3141) {
3142    use crate::governance::agent_action::{
3143        AgentAction, Decision as RuleDecision, check_agent_action_deferred_cached,
3144    };
3145    let rules_db_path = db_path.to_path_buf();
3146    let queue_for_hook = deferred_audit_queue.clone();
3147    let cache_for_hook = Arc::clone(rule_cache);
3148    let conn_for_hook = hook_consultation_conn;
3149    let install_result = crate::storage::GOVERNANCE_PRE_WRITE.set(Box::new(
3150        move |mem: &crate::models::Memory| -> std::result::Result<(), String> {
3151            let action = AgentAction::Custom {
3152                custom_kind: "memory_write".to_string(),
3153                payload: serde_json::json!({
3154                    "namespace": mem.namespace,
3155                    "tier": mem.tier.as_str(),
3156                    (field_names::MEMORY_KIND): mem.memory_kind.as_str(),
3157                    "title": mem.title,
3158                }),
3159            };
3160            // Resolve the agent_id from the memory's metadata
3161            // (every substrate-written memory carries it under
3162            // `metadata.agent_id` — see CLAUDE.md §"Agent
3163            // Identity"). Fall back to a stable hook-source tag
3164            // when the metadata key is missing so the audit row
3165            // still attributes the refusal.
3166            let agent_id = mem
3167                .metadata
3168                .get("agent_id")
3169                .and_then(|v| v.as_str())
3170                .unwrap_or("substrate:pre_write_hook")
3171                .to_string();
3172            let Some(conn_arc) = conn_for_hook.as_ref() else {
3173                // v0.7.0 #1455 (SEC, MED) — FAIL-CLOSED when the hook
3174                // consultation connection could not be opened at
3175                // install time. The pre-#1455 posture degraded to
3176                // ALLOW, which meant a daemon that lost its rules DB
3177                // at boot (permissions flip, disk pressure, an
3178                // attacker who can make `db::open` fail) silently
3179                // disabled the entire substrate write-gate while
3180                // continuing to accept writes. That is the same
3181                // bypass class #1054 closed for consultation ERRORS;
3182                // an unavailable connection is just a permanent
3183                // consultation failure and gets the same secure
3184                // default + the same operator escape hatch.
3185                return governance_consultation_unavailable(
3186                    &queue_for_hook,
3187                    &agent_id,
3188                    &action,
3189                    &rules_db_path,
3190                    "L1-6 governance pre-write",
3191                );
3192            };
3193            let conn_guard = match conn_arc.lock() {
3194                Ok(g) => g,
3195                Err(poisoned) => {
3196                    tracing::warn!(
3197                        "L1-6 governance pre-write: consultation connection mutex poisoned; \
3198                             recovering inner connection and continuing"
3199                    );
3200                    poisoned.into_inner()
3201                }
3202            };
3203            let conn_for_check: &rusqlite::Connection = &conn_guard;
3204            match check_agent_action_deferred_cached(
3205                conn_for_check,
3206                Some(&cache_for_hook),
3207                &agent_id,
3208                &action,
3209                &queue_for_hook,
3210            ) {
3211                Ok(RuleDecision::Allow | RuleDecision::Warn { .. }) => Ok(()),
3212                Ok(RuleDecision::Refuse { rule_id, reason }) => {
3213                    tracing::info!(
3214                        "L1-6 governance pre-write refused namespace={:?} rule_id={} \
3215                             reason={} (chain-logged via deferred audit queue)",
3216                        mem.namespace,
3217                        rule_id,
3218                        reason
3219                    );
3220                    Err(reason)
3221                }
3222                Err(e) => {
3223                    // v0.7.0 #1054 (Agent-2 #4) — fail-CLOSED on
3224                    // rule-consultation error and chain-log the
3225                    // refusal so an attacker who can induce
3226                    // consultation errors (concurrent PRAGMA
3227                    // wal_checkpoint, ATTACH-as-readonly
3228                    // contention, etc.) cannot race a refused
3229                    // write through the gate. The pre-#1054
3230                    // posture degraded to ALLOW, which made the
3231                    // gate dependent on the rule consultation
3232                    // never erroring — a fragile invariant.
3233                    //
3234                    // Operators with a legitimate need for the
3235                    // legacy fail-open posture (e.g. during a
3236                    // chaos-test window where transient SQL
3237                    // pressure is expected) can opt back in via
3238                    // `AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR=1`.
3239                    // The unsafe override is logged at WARN on
3240                    // every fire and counts toward the
3241                    // governance posture surface so an audit can
3242                    // detect the legacy-permissive mode.
3243                    let reason = format!("governance:consultation_failed: {e}");
3244                    let fail_open = std::env::var("AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR")
3245                        .map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
3246                        .unwrap_or(false);
3247                    // Emit a governance.refusal-shaped row to the
3248                    // deferred audit queue regardless of the
3249                    // open/closed decision so the audit chain
3250                    // captures the consultation failure either
3251                    // way. The synthetic Decision::Refuse uses
3252                    // rule_id=`governance:consultation_failed` so
3253                    // a downstream auditor can distinguish
3254                    // "no rule fired" from "consultation broke".
3255                    let synthetic_refusal = RuleDecision::Refuse {
3256                        rule_id: "governance:consultation_failed".to_string(),
3257                        reason: reason.clone(),
3258                    };
3259                    queue_for_hook.submit_refusal(&agent_id, &action, &synthetic_refusal);
3260                    if fail_open {
3261                        tracing::warn!(
3262                            "L1-6 governance pre-write: rule consultation failed: {}; \
3263                                 AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR=1 — \
3264                                 degrading to ALLOW (UNSAFE, legacy posture)",
3265                            e
3266                        );
3267                        Ok(())
3268                    } else {
3269                        tracing::warn!(
3270                            "L1-6 governance pre-write: rule consultation failed: {}; \
3271                                 failing CLOSED (post-#1054 secure default — \
3272                                 set AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR=1 to revert)",
3273                            e
3274                        );
3275                        Err(reason)
3276                    }
3277                }
3278            }
3279        },
3280    ));
3281    if install_result.is_err() {
3282        // Already installed — happens if the same process boots a
3283        // write surface twice (test reuse via `bootstrap_serve`, or a
3284        // process that runs both `serve` and `mcp`). The OnceLock
3285        // contract guarantees the FIRST installed closure wins; we log
3286        // and proceed rather than abort.
3287        tracing::debug!(
3288            "L1-6 governance pre-write hook already installed (process-wide OnceLock); \
3289             the existing hook remains active for this process"
3290        );
3291    } else {
3292        tracing::info!(
3293            "L1-6 governance pre-write hook installed (substrate-authoritative \
3294             memory_write gate active + deferred chain-log on refusal)"
3295        );
3296    }
3297}
3298
3299/// #1685 — shared installer for the wire-action egress gate
3300/// ([`crate::governance::wire_check::GOVERNANCE_PRE_ACTION`]) so BOTH the HTTP
3301/// daemon (`serve`) and the MCP stdio loop (`run_mcp_server`) install the SAME
3302/// closure. Before this, only `serve` installed it, leaving the `skill_export`
3303/// (FilesystemWrite) and LLM (NetworkRequest) egress sinks fail-OPEN on the MCP
3304/// surface — the primary NHI interface. Process-wide `OnceLock`, so a second
3305/// install (in-process serve+mcp) is a logged no-op. Mirrors
3306/// [`install_governance_pre_write_hook`]; the gate covers the agent-EXTERNAL
3307/// variants that have an egress sink today (FilesystemWrite/NetworkRequest/
3308/// ProcessSpawn; Bash + Custom have none yet — v0.8 #1695).
3309pub(crate) fn install_governance_pre_action_hook(
3310    db_path: &Path,
3311    deferred_audit_queue: &crate::governance::deferred_audit::DeferredAuditQueue,
3312    rule_cache: &Arc<crate::governance::rule_cache::RuleCache>,
3313    hook_consultation_conn: Option<Arc<std::sync::Mutex<rusqlite::Connection>>>,
3314) {
3315    use crate::governance::agent_action::{
3316        AgentAction, Decision as RuleDecision, check_agent_action_deferred_cached,
3317    };
3318    let rules_db_path = db_path.to_path_buf();
3319    let cache_for_wire_check = Arc::clone(rule_cache);
3320    let queue_for_wire_check = deferred_audit_queue.clone();
3321    let conn_for_wire_check = hook_consultation_conn;
3322    let install_result = crate::governance::wire_check::GOVERNANCE_PRE_ACTION.set(Box::new(
3323        move |action: &AgentAction| -> std::result::Result<(), String> {
3324            let Some(conn_arc) = conn_for_wire_check.as_ref() else {
3325                // #1455 — FAIL-CLOSED when the consultation connection is
3326                // unavailable; a daemon-internal wire action is higher-stakes
3327                // than a storage write, so degrading to ALLOW would be the
3328                // worst place to fail open.
3329                return governance_consultation_unavailable(
3330                    &queue_for_wire_check,
3331                    WIRE_ACTION_ACTOR,
3332                    action,
3333                    &rules_db_path,
3334                    "wire_check",
3335                );
3336            };
3337            let conn_guard = match conn_arc.lock() {
3338                Ok(g) => g,
3339                Err(poisoned) => {
3340                    tracing::warn!(
3341                        "wire_check: consultation connection mutex poisoned; \
3342                         recovering inner connection and continuing"
3343                    );
3344                    poisoned.into_inner()
3345                }
3346            };
3347            let conn_for_check: &rusqlite::Connection = &conn_guard;
3348            match check_agent_action_deferred_cached(
3349                conn_for_check,
3350                Some(&cache_for_wire_check),
3351                WIRE_ACTION_ACTOR,
3352                action,
3353                &queue_for_wire_check,
3354            ) {
3355                Ok(RuleDecision::Allow | RuleDecision::Warn { .. }) => Ok(()),
3356                Ok(RuleDecision::Refuse { rule_id, reason }) => {
3357                    tracing::info!(
3358                        "wire_check refused action kind={} rule_id={} reason={} \
3359                         (chain-logged via deferred audit queue)",
3360                        action.kind(),
3361                        rule_id,
3362                        reason,
3363                    );
3364                    Err(reason)
3365                }
3366                Err(e) => {
3367                    // #1054 — same fail-CLOSED posture as the storage hook;
3368                    // env escape hatch AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR=1.
3369                    let reason = format!("governance:consultation_failed: {e}");
3370                    let fail_open = std::env::var("AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR")
3371                        .map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
3372                        .unwrap_or(false);
3373                    let synthetic_refusal = RuleDecision::Refuse {
3374                        rule_id: "governance:consultation_failed".to_string(),
3375                        reason: reason.clone(),
3376                    };
3377                    queue_for_wire_check.submit_refusal(
3378                        WIRE_ACTION_ACTOR,
3379                        action,
3380                        &synthetic_refusal,
3381                    );
3382                    if fail_open {
3383                        tracing::warn!(
3384                            "wire_check: rule consultation failed: {}; \
3385                             AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR=1 — \
3386                             degrading to ALLOW for this action ({}) (UNSAFE, legacy posture)",
3387                            e,
3388                            action.kind(),
3389                        );
3390                        Ok(())
3391                    } else {
3392                        tracing::warn!(
3393                            "wire_check: rule consultation failed: {}; failing CLOSED \
3394                             for this action ({}) (post-#1054 secure default — set \
3395                             AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR=1 to revert)",
3396                            e,
3397                            action.kind(),
3398                        );
3399                        Err(reason)
3400                    }
3401                }
3402            }
3403        },
3404    ));
3405    if install_result.is_err() {
3406        tracing::debug!(
3407            "wire_check pre-action hook already installed (process-wide OnceLock); \
3408             the existing hook remains active for this daemon"
3409        );
3410    } else {
3411        tracing::info!(
3412            "wire_check pre-action hook installed (agent-action gate active for \
3413             FilesystemWrite/NetworkRequest/ProcessSpawn; n26: Bash + Custom \
3414             have no egress sink yet — structural coverage tracked v0.8 #1695)"
3415        );
3416    }
3417}
3418
3419/// v0.7.0 #1455 (SEC, MED) — shared fail-CLOSED handler for the case
3420/// where a governance hook's rule-consultation connection could not be
3421/// opened at install time. Chain-logs a synthetic
3422/// `governance:consultation_unavailable` refusal, then returns the
3423/// fail-CLOSED verdict (`Err`) unless the operator opted into the
3424/// legacy permissive posture. Reads the env override exactly once and
3425/// delegates the verdict to [`governance_consultation_unavailable_inner`]
3426/// so the decision is unit-testable without env mutation.
3427fn governance_consultation_unavailable(
3428    queue: &crate::governance::deferred_audit::DeferredAuditQueue,
3429    agent_id: &str,
3430    action: &crate::governance::agent_action::AgentAction,
3431    rules_db_path: &Path,
3432    surface: &str,
3433) -> std::result::Result<(), String> {
3434    governance_consultation_unavailable_inner(
3435        queue,
3436        agent_id,
3437        action,
3438        rules_db_path,
3439        surface,
3440        governance_fail_open_on_error(),
3441    )
3442}
3443
3444/// Pure inner of [`governance_consultation_unavailable`] — `fail_open`
3445/// is passed explicitly so tests can pin both the secure default
3446/// (`fail_open = false` ⇒ `Err`, the security contract) and the
3447/// operator-override path (`fail_open = true` ⇒ `Ok`) without touching
3448/// process env.
3449fn governance_consultation_unavailable_inner(
3450    queue: &crate::governance::deferred_audit::DeferredAuditQueue,
3451    agent_id: &str,
3452    action: &crate::governance::agent_action::AgentAction,
3453    rules_db_path: &Path,
3454    surface: &str,
3455    fail_open: bool,
3456) -> std::result::Result<(), String> {
3457    use crate::governance::agent_action::Decision as RuleDecision;
3458    let reason = format!(
3459        "governance:consultation_unavailable: rules DB at {} could not be opened at hook install",
3460        rules_db_path.display(),
3461    );
3462    // Chain-log the consultation failure regardless of the open/closed
3463    // decision so an audit can detect that the gate ran degraded.
3464    let synthetic_refusal = RuleDecision::Refuse {
3465        rule_id: "governance:consultation_unavailable".to_string(),
3466        reason: reason.clone(),
3467    };
3468    queue.submit_refusal(agent_id, action, &synthetic_refusal);
3469    if fail_open {
3470        tracing::warn!(
3471            "{surface}: hook consultation connection unavailable (rules DB at {}); \
3472             {ENV_GOVERNANCE_FAIL_OPEN}=1 — degrading to ALLOW (UNSAFE, legacy posture)",
3473            rules_db_path.display(),
3474        );
3475        Ok(())
3476    } else {
3477        tracing::warn!(
3478            "{surface}: hook consultation connection unavailable (rules DB at {}); failing CLOSED \
3479             (#1455 secure default — set {ENV_GOVERNANCE_FAIL_OPEN}=1 to revert)",
3480            rules_db_path.display(),
3481        );
3482        Err(reason)
3483    }
3484}
3485
3486/// #1458 (SEC, MED) — operator opt-in: when `AI_MEMORY_REQUIRE_API_KEY`
3487/// is truthy, the daemon hard-refuses to start without an `api_key` on
3488/// ANY bind host (including loopback). This is the hardened posture for
3489/// deployments that front the daemon with a reverse proxy /
3490/// `--network=host` container / `socat` forward — the loopback host
3491/// string the daemon sees does not reflect off-host reachability, so the
3492/// string-match loopback guard alone cannot protect them.
3493fn require_api_key_strict() -> bool {
3494    std::env::var("AI_MEMORY_REQUIRE_API_KEY")
3495        .map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
3496        .unwrap_or(false)
3497}
3498
3499/// #1458 (SEC, MED) — decide whether the daemon may bind given the
3500/// configured api_key, the bind `host`, and the `strict` opt-in.
3501///
3502/// Returns:
3503///   - `Ok(None)` — safe to bind silently (api_key is set);
3504///   - `Ok(Some(warning))` — bind permitted but emit `warning` (keyless
3505///     loopback, default single-tenant posture);
3506///   - `Err(reason)` — refuse to bind (keyless non-loopback, or keyless
3507///     under the `strict` opt-in).
3508///
3509/// Pulled out of `bootstrap_serve` so all three outcomes are unit
3510/// testable without standing up a daemon.
3511fn api_key_bind_guard(
3512    api_key_present: bool,
3513    host: &str,
3514    strict: bool,
3515) -> std::result::Result<Option<String>, String> {
3516    if api_key_present {
3517        return Ok(None);
3518    }
3519    if strict {
3520        return Err(format!(
3521            "refusing to start without an API key: AI_MEMORY_REQUIRE_API_KEY is set, which \
3522             mandates `api_key` on every bind (requested host {host:?}). A reverse proxy, \
3523             --network=host container, or socat forward can present loopback to the daemon \
3524             while exposing it off-host, so the loopback guard alone is insufficient. \
3525             Set top-level `api_key = \"...\"` in config (or --api-key on the CLI), or unset \
3526             AI_MEMORY_REQUIRE_API_KEY to fall back to the loopback-only default. (#1458)"
3527        ));
3528    }
3529    let is_loopback = host == "127.0.0.1"
3530        || host == "::1"
3531        || host == "localhost"
3532        || host == "0:0:0:0:0:0:0:1"
3533        || host == "[::1]";
3534    if !is_loopback {
3535        return Err(format!(
3536            "refusing to bind to non-loopback address {host:?} without an API key: \
3537             the daemon's api_key is unset (default-off auth would expose every \
3538             privileged endpoint to any caller that can reach the bind address). \
3539             Either set top-level `api_key = \"...\"` in config (or --api-key on the CLI) and rebind, \
3540             or rebind to 127.0.0.1 / ::1 / localhost for a single-tenant deployment. \
3541             (v0.7.0 fix campaign S5-C1, 2026-05-13. Note: api_key is a TOP-LEVEL \
3542             AppConfig field per src/config.rs:2283; [api] subsection is silently ignored by serde.)"
3543        ));
3544    }
3545    Ok(Some(format!(
3546        "API key NOT configured — daemon bound to loopback {host:?}. \
3547         Privileged endpoints (POST /memories, /links, /agents, /subscriptions) \
3548         accept any caller that reaches this listener. #1458: a reverse proxy, \
3549         --network=host container, or socat forward presents loopback to the daemon \
3550         while exposing it off-host, re-opening this keyless write surface — set \
3551         top-level `api_key = \"...\"` (or AI_MEMORY_REQUIRE_API_KEY=1 to hard-require it) \
3552         for any deployment that is not strictly single-tenant on this host. \
3553         /approve and /reject remain HMAC-gated regardless."
3554    )))
3555}
3556
3557/// Build all daemon state and spawn background tasks. Returns the
3558/// aggregated state without binding any sockets — testable in isolation.
3559///
3560/// DOC-6: this function reads several legacy `AppConfig` fields
3561/// (`auto_tag_model`, `llm_model`, `ollama_url`) directly for v0.7.x
3562/// backward compat; the `#[allow(deprecated)]` carves out the legacy
3563/// reads while keeping the deprecation warning live for external
3564/// consumers.
3565#[allow(deprecated)]
3566pub async fn bootstrap_serve(
3567    db_path: &Path,
3568    args: &ServeArgs,
3569    app_config: &AppConfig,
3570) -> Result<ServeBootstrap> {
3571    // S5-C1 (v0.7.0 fix campaign 2026-05-13): refuse default-off auth
3572    // on non-loopback binds. When `api_key` is unset, the `api_key_auth`
3573    // middleware is a pass-through — every privileged endpoint (write,
3574    // approve, reject, governance state) is reachable by any caller
3575    // that can open a TCP connection. The K10 SSE/approval path is
3576    // HMAC-gated and the legacy /approve + /reject paths are now also
3577    // HMAC-gated (see `handlers::approve_pending` and
3578    // `handlers::reject_pending`), but the broader write surface
3579    // (POST /api/v1/memories, /links, /agents, /subscriptions, …)
3580    // still rides on `api_key_auth`. Refusing to bind to a routable
3581    // address with no API key configured is the safe default;
3582    // operators who *intentionally* run a public daemon must set
3583    // `[api] api_key` (or `--api-key` on the CLI) explicitly.
3584    match api_key_bind_guard(
3585        app_config.api_key.is_some(),
3586        args.host.as_str(),
3587        require_api_key_strict(),
3588    ) {
3589        Ok(None) => {}
3590        Ok(Some(warning)) => tracing::warn!("{warning}"),
3591        Err(reason) => anyhow::bail!("{reason}"),
3592    }
3593
3594    let resolved_ttl = app_config.effective_ttl();
3595    let archive_on_gc = app_config.effective_archive_on_gc();
3596    let conn = db::open(db_path)?;
3597
3598    // v0.7.0 SEC-2 (Cluster D, issue #767) — fail-OPEN diagnostic + the
3599    // operator-opt-in fail-CLOSED knob. When `governance_rules` has any
3600    // `enabled = 1` row AND no operator pubkey is resolved, the L1-6
3601    // loader honours every enabled row without signature verification
3602    // (pre-L1-6 compat mode). A SQL-write gadget that mutates
3603    // `governance_rules` can therefore install / flip rules without
3604    // operator consent.
3605    //
3606    // Default: surface a once-per-process `tracing::error!` so the
3607    // operator sees the fail-OPEN posture on every daemon start.
3608    //
3609    // Operator opt-in: `[governance] require_operator_pubkey = true`
3610    // promotes the diagnostic to a hard refusal — `bootstrap_serve`
3611    // returns an `anyhow::Error` and the daemon does NOT start. This
3612    // is the right posture for hardened deployments that want strict
3613    // enforcement BEFORE the pubkey lands.
3614    let enabled_rule_count =
3615        crate::governance::rules_store::count_enabled_rules(&conn).unwrap_or(0);
3616    let pubkey_resolved = crate::governance::rules_store::resolve_operator_pubkey().is_some();
3617    if enabled_rule_count > 0 && !pubkey_resolved {
3618        crate::governance::rules_store::log_missing_operator_pubkey_once(enabled_rule_count);
3619        if app_config
3620            .governance
3621            .as_ref()
3622            .is_some_and(|g| g.require_operator_pubkey)
3623        {
3624            anyhow::bail!(
3625                "SEC-2 fail-closed: `[governance] require_operator_pubkey = true` is set but \
3626                 `governance_rules` contains {enabled_rule_count} enabled row(s) AND no \
3627                 operator pubkey is resolved (AI_MEMORY_OPERATOR_PUBKEY unset AND \
3628                 ~/.config/ai-memory/operator.key.pub absent). Refusing to start: a fail-OPEN \
3629                 L1-6 loader would honour every enabled rule without signature verification. \
3630                 Run `ai-memory rules keygen` + `ai-memory rules sign-seed` to activate L1-6, \
3631                 or unset `require_operator_pubkey` to accept the pre-L1-6 posture."
3632            );
3633        }
3634    }
3635
3636    // v0.7.0 L1-6 Deliverable E (issue #691) — install the substrate
3637    // governance pre-write hook BEFORE any write paths come live. The
3638    // hook consults the operator-signed `governance_rules` table for
3639    // a refusal verdict at every `storage::insert*` callsite; a
3640    // refusal short-circuits the SQL `INSERT` cleanly (no row
3641    // written, MemoryError::RefusedByGovernance bubbled).
3642    //
3643    // Layering: the hook is a `OnceLock<Box<Fn>>` in `src/storage/mod.rs`
3644    // — installation is one-shot for the process lifetime. CLI
3645    // one-shot binaries (`ai-memory store`, `ai-memory mine`, …)
3646    // never reach this codepath and so leave the hook empty by
3647    // design (operator standing directive: rules gate AGENT writes,
3648    // not the operator's direct CLI ops).
3649    //
3650    // The closure opens a fresh `Connection` per call (via
3651    // `db::open` against the same db_path) so it does NOT contend
3652    // with the substrate writer's lock held during `storage::insert`.
3653    // SQLite WAL mode allows the rule-read to proceed in parallel.
3654    // Failure to open the rule-consultation connection degrades to
3655    // ALLOW with a WARN: a transient FS issue must not wedge the
3656    // write surface, and the operator can detect the degradation
3657    // from the log surface.
3658    //
3659    // v0.7.0 Policy-Engine Item 3 (2026-05-14) — the hook now also
3660    // submits every refusal to the process-wide deferred-audit
3661    // queue via `check_agent_action_deferred`. The queue's
3662    // background drainer task chain-logs each refusal as a
3663    // `governance.refusal` row in `signed_events` AFTER the
3664    // in-flight `storage::insert` transaction has released its
3665    // lock. This closes the cryptographic-log gap that the prior
3666    // `_no_audit` variant left open (refusals were typed but not
3667    // chain-logged; the deadlock-avoidance came at the cost of
3668    // breaking the bypass-impossibility audit story for storage
3669    // writes).
3670    let (deferred_audit_queue, deferred_audit_supervisor) =
3671        crate::governance::deferred_audit::install_deferred_audit_drainer(db_path);
3672    // Capture the shared atomic metrics handle BEFORE the queue is cloned
3673    // into the governance hooks + moved onto `AppState`. `serve` polls
3674    // these on shutdown to drain the queue before the WAL checkpoint.
3675    let deferred_audit_metrics = deferred_audit_queue.metrics();
3676    tracing::info!(
3677        "policy-engine item 3: deferred-audit drainer spawned (chain-logs \
3678         storage refusals as `governance.refusal` rows in signed_events)"
3679    );
3680
3681    // v0.7.0 #991 — per-instance rule cache shared by the substrate
3682    // `GOVERNANCE_PRE_WRITE` storage hook (below), the
3683    // `wire_check::GOVERNANCE_PRE_ACTION` action hook (below), and the
3684    // `AppState.rule_cache` field (HTTP handler call sites). Cloning
3685    // the `Arc<RuleCache>` into each captures-by-reference; the cache
3686    // is dropped when the last reference (AppState + the two hooks)
3687    // goes away on daemon shutdown. Per-instance means multi-daemon
3688    // test fixtures don't cross-pollute (the contract that the #990
3689    // revert restored after #983 shipped a process-wide singleton).
3690    let rule_cache: Arc<crate::governance::rule_cache::RuleCache> =
3691        Arc::new(crate::governance::rule_cache::RuleCache::new());
3692
3693    // v0.7.0 #1017 (Agent-1 #3) — long-lived consultation connection
3694    // shared between the storage `GOVERNANCE_PRE_WRITE` hook and the
3695    // `wire_check::GOVERNANCE_PRE_ACTION` action hook. Pre-#1017 each
3696    // hook invocation called `db::open(&rules_db_path)` which runs
3697    // 4 PRAGMAs + SCHEMA execute_batch + migrate() + trigger probe —
3698    // ~1-2ms per write that paid the cost unconditionally even on
3699    // RuleCache hits. The #991 rule cache made the OPEN overhead the
3700    // dominant remaining hot-path cost; #1017 closes the gap by
3701    // opening the connection ONCE at install time and reusing it
3702    // across all hook invocations. The connection is wrapped in
3703    // `std::sync::Mutex` because hooks fire from both sync paths
3704    // (`storage::insert` is sync; wire-check is consulted from sync
3705    // `governance::wire_check::check` regardless of caller context).
3706    //
3707    // If `db::open` fails at install time, we install hooks that
3708    // degrade to ALLOW on every call with a WARN — same posture as
3709    // the pre-#1017 per-call open-failure leg. The operator sees the
3710    // diagnostic in daemon logs and can re-attempt.
3711    let hook_consultation_conn: Option<Arc<std::sync::Mutex<rusqlite::Connection>>> =
3712        match db::open(db_path) {
3713            Ok(c) => Some(Arc::new(std::sync::Mutex::new(c))),
3714            Err(e) => {
3715                tracing::warn!(
3716                    target: "ai_memory::daemon_runtime",
3717                    "v0.7.0 #1017: failed to open hook consultation connection at {}: {}; \
3718                     governance hooks will degrade to ALLOW on every invocation",
3719                    db_path.display(),
3720                    e,
3721                );
3722                None
3723            }
3724        };
3725
3726    // #1582/#1583 (SEC) — the substrate pre-write gate is installed via
3727    // the shared helper so EVERY long-lived write surface installs the
3728    // SAME closure. `serve` (here) and `mcp` (`run_mcp_server`) both call
3729    // it; CLI one-shot binaries intentionally do NOT (the L1-6 E
3730    // operator-as-actor exemption — see the helper's doc).
3731    install_governance_pre_write_hook(
3732        db_path,
3733        &deferred_audit_queue,
3734        &rule_cache,
3735        hook_consultation_conn.clone(),
3736    );
3737
3738    // v0.7.0 (issue #691 fold-1) — install the universal AgentAction
3739    // wire-point hook BEFORE any daemon-side write/network/spawn paths
3740    // come live. Mirrors the L1-6 E pattern above but covers the FOUR
3741    // agent-EXTERNAL action variants (Bash, FilesystemWrite,
3742    // NetworkRequest, ProcessSpawn) consulted by skill_export,
3743    // federation::sync, hooks::executor, and the LLM client. CLI
3744    // one-shot binaries never reach this path so the hook stays empty
3745    // for direct operator ops (L1-6 E operator-as-actor exemption).
3746    //
3747    // v0.7.0 #1034 (Agent-6 #2) — wire-check refusals now flow into the
3748    // SAME deferred-audit queue the substrate pre-write hook uses, so
3749    // every refusal — storage AND wire — chain-logs a `governance.refusal`
3750    // row in `signed_events`. Pre-#1034 the wire-check refusals only
3751    // emitted to the forensic JSONL log; the cryptographic-audit chain
3752    // missed them, breaking the bypass-impossibility audit story for the
3753    // four agent-EXTERNAL action variants. The closure uses the stable
3754    // `daemon:wire_action` tag for `agent_id` attribution because the
3755    // wire-check fires inside daemon-internal subsystems (federation,
3756    // hooks, LLM, skill_export) where there is no per-request agent
3757    // identity bound to the action; the storage hook's
3758    // `substrate:pre_write_hook` fallback uses the same shape.
3759    // #1685 — wire-action egress gate, via the shared installer (also called
3760    // by run_mcp_server, so the MCP surface is no longer fail-open).
3761    install_governance_pre_action_hook(
3762        db_path,
3763        &deferred_audit_queue,
3764        &rule_cache,
3765        hook_consultation_conn.clone(),
3766    );
3767
3768    // Issue #219: build the embedder + HNSW index up front so HTTP write
3769    // paths can populate them. Previously the daemon never constructed an
3770    // embedder, silently excluding every HTTP-authored memory from semantic
3771    // recall. Build only when the configured feature tier enables it —
3772    // keyword-only deployments keep their zero-dep, zero-RAM profile.
3773    // Daemon has no per-invocation tier override; honour the config tier.
3774    let feature_tier = app_config.effective_tier(None);
3775    let tier_config = feature_tier.config();
3776    let embedder = build_embedder(feature_tier, app_config).await;
3777    // #1579 B3 — async boot HNSW. The daemon binds with an EMPTY
3778    // index and becomes ready immediately; a background loader
3779    // (`spawn_vector_index_boot_load`) reads the stored embeddings
3780    // over its own connection, builds the graph on the #968 rebuild
3781    // thread, and swaps it in (INFO line on swap). Until then,
3782    // semantic recall serves its keyword/FTS blend and the #519
3783    // proactive conflict check uses its bounded-scan fallback. The
3784    // pre-#1579 synchronous build held boot for 40 s at 10k vectors
3785    // and >28 min at 100k (P1 audit).
3786    let vector_index_state: Arc<Mutex<Option<VectorIndex>>> = Arc::new(Mutex::new(
3787        embedder.is_some().then(hnsw::VectorIndex::empty),
3788    ));
3789    if embedder.is_some() {
3790        let _boot_index_loader =
3791            spawn_vector_index_boot_load(db_path.to_path_buf(), Arc::clone(&vector_index_state));
3792    }
3793
3794    // v0.7.0 L5 — build the LLM client for autonomy-hook capable tiers
3795    // (smart/autonomous). The HTTP `create_memory` handler reaches for
3796    // `app.llm` to call `auto_tag` (mirroring MCP `handle_store` at
3797    // `crate::mcp::handle_store` (auto-tag block)). When the configured tier has no
3798    // `llm_model` (keyword/semantic) or the Ollama endpoint is
3799    // unreachable, the client stays `None` and the hook silently
3800    // degrades to operator-supplied tags only.
3801    let llm = build_llm_client(feature_tier, app_config).await;
3802
3803    let db_state: Db = Arc::new(Mutex::new((
3804        conn,
3805        db_path.to_path_buf(),
3806        resolved_ttl,
3807        archive_on_gc,
3808    )));
3809
3810    // Federation: parsed from --quorum-writes / --quorum-peers. Disabled
3811    // entirely when either is absent — daemon behaves exactly like
3812    // v0.6.0 in that case.
3813    // #[cfg_attr] keeps the `mut` only when DLQ wire-up below is
3814    // active — under default-features the binding is read-only.
3815    #[cfg_attr(not(feature = "sal"), allow(unused_mut))]
3816    let mut federation = federation::FederationConfig::build(
3817        args.quorum_writes,
3818        &args.quorum_peers,
3819        std::time::Duration::from_millis(args.quorum_timeout_ms),
3820        args.quorum_client_cert.as_deref(),
3821        args.quorum_client_key.as_deref(),
3822        args.quorum_ca_cert.as_deref(),
3823        // v0.7.0 epic (ADR-001) — federation identity is resolved, not
3824        // hardcoded. Precedence: AI_MEMORY_FED_IDENTITY env >
3825        // `--federation-identity` operator config > the historical
3826        // `host:<hostname>` default. A blank flag is skipped by the
3827        // resolver, so it can never collapse the identity to empty.
3828        federation::identity::resolve_federation_identity(args.federation_identity.as_deref()),
3829        // v0.7.0 fold-A2A1.4 (#702) — thread the operator-configured
3830        // `[api] api_key` into federation outbound so peer POSTs carry
3831        // `x-api-key`. Without this, cross-host federation BREAKS when
3832        // any peer runs with api-key auth (peer returns 401 → quorum
3833        // never converges). `None` keeps the prior behaviour unchanged.
3834        app_config.api_key.clone(),
3835    )
3836    .context("federation config")?;
3837
3838    let mut task_handles: Vec<JoinHandle<()>> = Vec::new();
3839
3840    if let Some(ref fed) = federation {
3841        tracing::info!(
3842            "federation enabled: W={} over {} peer(s), timeout {}ms",
3843            fed.policy.w,
3844            fed.peer_count(),
3845            args.quorum_timeout_ms,
3846        );
3847        // v0.6.0.1 (#320) — post-partition catchup poller. Closes the gap
3848        // where a rejoining node only sees post-resume writes.
3849        //
3850        // v0.7.0 M3 — the catchup loop now plumbs the SAL store handle
3851        // through (instead of `db::insert_if_newer`) so postgres-backed
3852        // daemons route peer pushes to postgres. The actual spawn is
3853        // deferred until after `build_store_handle` resolves the
3854        // `Arc<dyn MemoryStore>` — see the post-store-build block below.
3855        if args.catchup_interval_secs > 0 {
3856            tracing::info!(
3857                "catchup loop enabled: polling {} peer(s) every {}s",
3858                fed.peer_count(),
3859                args.catchup_interval_secs,
3860            );
3861        } else {
3862            tracing::info!("catchup loop disabled (--catchup-interval-secs=0)");
3863        }
3864    }
3865
3866    // v0.7.0 A5 — resolve the effective MCP tool profile for the HTTP
3867    // path so `/capabilities` v3 reports honest loaded/total counts.
3868    // Mirrors the MCP-mode resolution at src/daemon_runtime.rs:501;
3869    // unresolvable profile (e.g., bad config.toml) falls back to
3870    // Profile::core() rather than blocking HTTP boot.
3871    let resolved_profile = app_config
3872        .effective_profile(None)
3873        .unwrap_or_else(|_| crate::profile::Profile::core());
3874    let mcp_config_for_http = app_config.mcp.clone();
3875    // v0.7 Track H — H2 + Round-3 F12: ensure-and-load the daemon's
3876    // outbound-link signing keypair. The helper auto-generates the
3877    // well-known `daemon` keypair under `~/.config/ai-memory/keys/` on
3878    // first start (idempotent — a restart never overwrites an existing
3879    // keypair) and returns it for the AppState. The lifecycle outcome
3880    // is captured separately so the startup banner can surface the
3881    // auto-gen path. Failure at any step degrades to unsigned-link
3882    // mode without aborting startup.
3883    let (active_keypair, daemon_keypair_outcome) = ensure_and_load_daemon_keypair();
3884
3885    // v0.7.0 B3-fix2 — gate the family-descriptor embedding precompute
3886    // behind `AI_MEMORY_PRECOMPUTE_FAMILY_EMBEDDINGS=1`, default OFF.
3887    //
3888    // ## Why default-OFF
3889    //
3890    // The B3 precompute is forward-infrastructure for B2's
3891    // `memory_smart_load(intent)`, which is not yet wired into any HTTP
3892    // or MCP handler — `best_family_match` is dead code in production
3893    // today (only one unit test calls it). Running 8 detached embeds at
3894    // boot therefore buys nothing for current callers but does compete
3895    // for the embedder's `std::sync::Mutex<BertModel>` against every
3896    // request that needs to embed (notify content, sync_push row
3897    // refresh, recall query, single-row create_memory).
3898    //
3899    // Under heavy parallel `cargo test` load (every integration test
3900    // spawns its own `ai-memory serve` subprocess, saturating CPU),
3901    // that contention pushes federation-quorum windows over the 5 s
3902    // ack budget — observed locally as `http_notify_fans_out_…` 503s
3903    // and `test_serve_mtls_…` POST timeouts that did not occur on
3904    // `origin/main` and disappear when the precompute is gated off.
3905    // Even the prior B3-fix's "detached spawn_blocking" form does not
3906    // help: the contention is on the embedder mutex inside `embed()`,
3907    // not on the tokio scheduler.
3908    //
3909    // ## Cell semantics preserved
3910    //
3911    // `AppState::family_embeddings` stays `Arc<RwLock<Option<…>>>` so
3912    // B2 can flip the env var on (or remove the gate entirely) the
3913    // day the smart loader actually consumes the cache, without an
3914    // `AppState` field-shape change. `None` continues to mean "not
3915    // yet populated" and `best_family_match` already short-circuits
3916    // to its non-embedding fallback in that state.
3917    let family_embeddings: Arc<
3918        tokio::sync::RwLock<Option<Vec<(crate::profile::Family, Vec<f32>)>>>,
3919    > = Arc::new(tokio::sync::RwLock::new(None));
3920    let embedder_arc = Arc::new(embedder);
3921
3922    // #1691 — build + install the cross-encoder reranker for the HTTP
3923    // daemon so the HTTP recall surface applies the SAME neural rerank
3924    // stage the MCP/CLI recall paths run (the prior n23 NOTE in
3925    // handlers/recall.rs documented the gap). Gated on the resolved tier
3926    // enabling the cross-encoder, mirroring the MCP boot path
3927    // (`run_mcp_server`). Installed into the process-global
3928    // RuntimeContext (interior `OnceLock`) so no AppState field-shape
3929    // change is needed; the recall handler reads it via
3930    // `app.runtime.reranker()`. Keyword/semantic/smart tiers leave the
3931    // slot empty and recall runs without the rerank stage, exactly as
3932    // before.
3933    if tier_config.cross_encoder {
3934        tracing::info!("serve: loading neural cross-encoder (#1691 HTTP recall rerank)");
3935        let ce = crate::reranker::CrossEncoder::new_neural();
3936        if ce.is_neural() {
3937            tracing::info!("serve: neural cross-encoder ready (batched)");
3938        } else {
3939            tracing::warn!("serve: neural cross-encoder unavailable, using lexical fallback");
3940        }
3941        // #1691/n14 — apply the operator-configured score floor
3942        // (env > [reranker].score_floor > Off) on the HTTP recall reranker
3943        // too, matching the MCP build site.
3944        crate::runtime_context::RuntimeContext::global().install_reranker(Arc::new(
3945            crate::reranker::BatchedReranker::with_score_floor(
3946                ce,
3947                app_config.resolve_reranker_score_floor(),
3948            ),
3949        ));
3950    }
3951
3952    if std::env::var("AI_MEMORY_PRECOMPUTE_FAMILY_EMBEDDINGS")
3953        .ok()
3954        .as_deref()
3955        == Some("1")
3956    {
3957        let cache = family_embeddings.clone();
3958        let embedder_for_task = embedder_arc.clone();
3959        task_handles.push(tokio::spawn(async move {
3960            // ----------------------------------------------------------------
3961            // H1 (v0.7.0 round-2) — lock-discipline for the family-embedding
3962            // precompute:
3963            //
3964            //   1. The slow `Embedder::embed(descriptor)` calls run inside a
3965            //      `spawn_blocking` closure that holds NO lock on
3966            //      `family_embeddings`. Each (Family, Vec<f32>) pair is
3967            //      collected into a local `Vec` owned by the blocking task.
3968            //   2. Only AFTER the entire batch is computed do we take
3969            //      `family_embeddings.write().await` exactly ONCE to swap
3970            //      the populated `Some(Vec)` into the cache.
3971            //
3972            // Why: the prior shape that acquired the write lock before each
3973            // embed call would have parked every concurrent `try_read()`
3974            // reader for the duration of an ML inference round trip — up
3975            // to seconds on a cold runner. Concurrent recall handlers that
3976            // call `AppState::best_family_match` would be forced into the
3977            // no-cache fallback even when the embedder was fully operational.
3978            //
3979            // The two-phase shape below is the canonical "compute outside,
3980            // commit inside" lock pattern: readers see either `None`
3981            // (precompute not yet finished) or the fully-populated
3982            // `Some(Vec)` — never a half-built vector.
3983            // ----------------------------------------------------------------
3984            let computed = tokio::task::spawn_blocking(move || {
3985                // No lock held during embed calls — pairs are accumulated
3986                // into a local Vec returned to the async caller below.
3987                AppState::precompute_family_embeddings(
3988                    embedder_for_task
3989                        .as_ref()
3990                        .as_ref()
3991                        .map(|e| e as &dyn crate::embeddings::Embed),
3992                )
3993            })
3994            .await
3995            .unwrap_or_else(|e| {
3996                tracing::warn!(
3997                    error = %e,
3998                    "B3: family-descriptor precompute task panicked; \
3999                     family_embeddings will stay empty",
4000                );
4001                Vec::new()
4002            });
4003            if !computed.is_empty() {
4004                tracing::info!(
4005                    "B3: pre-computed {} family-descriptor embeddings (async)",
4006                    computed.len(),
4007                );
4008            }
4009            // Single-shot commit: write lock acquired ONCE here and
4010            // released immediately after the swap. No embedder calls run
4011            // under this lock.
4012            *cache.write().await = Some(computed);
4013        }));
4014    } else {
4015        tracing::debug!(
4016            "B3: family-descriptor precompute disabled \
4017             (AI_MEMORY_PRECOMPUTE_FAMILY_EMBEDDINGS != 1); \
4018             best_family_match will return None until B2 wires \
4019             the smart loader and the gate is flipped on"
4020        );
4021    }
4022
4023    // v0.7.0 Wave-3 — resolve the polymorphic `MemoryStore` handle from
4024    // the operator's `--store-url` (when set) or build a `SqliteStore`
4025    // wrapping the same on-disk database `--db` already opened. Both
4026    // branches end with a populated `Arc<dyn MemoryStore>` so handlers
4027    // can dispatch through the SAL unconditionally on `--features sal`
4028    // builds. The `storage_backend` flag below records which adapter
4029    // resolved so handlers can branch + the `/capabilities` payload can
4030    // surface it for operators.
4031    //
4032    // Standard builds (no `--features sal`) skip the trait wiring
4033    // entirely — the daemon stays a pure SQLite-on-disk deployment with
4034    // zero behavioural drift versus pre-Wave-3.
4035    // Issue #877: resolve the configured embedder dim from the same
4036    // resolution ladder `build_embedder` uses — app_config override wins,
4037    // then tier preset, then None. We re-derive it here (instead of
4038    // pulling from the materialised `embedder` handle) because the
4039    // embedder load itself can fail (network egress to HF Hub, OOM,
4040    // etc.) and we still need the *configured* dim to inform the
4041    // postgres bootstrap, otherwise a transient embedder load failure
4042    // would leave the schema mis-dimensioned silently. Falls back to
4043    // `None` only when no embedder model is configured at all
4044    // (keyword-only).
4045    //
4046    // v0.7.x (issue #1169): the resolution ladder now prefers the
4047    // resolver-side canonical dim lookup
4048    // ([`crate::config::canonical_embedding_dim`]) so an operator
4049    // pick of `[embeddings].model = "bge-large-en"` (or any other
4050    // model id outside the 2-family [`EmbeddingModel`] enum) bootstraps
4051    // the postgres schema at the live 1024-dim instead of silently
4052    // dropping to the tier-preset's 768-dim. The enum-parse arm
4053    // remains as the back-compat path for legacy flat-field configs
4054    // (`embedding_model = "nomic_embed_v15"`), and the tier preset is
4055    // the last-resort fallback. The pre-#1169 path lost the resolver
4056    // signal entirely — schema dim wrong on every non-enum operator
4057    // pick, with no log signal because the parse arm silently fell
4058    // through to the preset.
4059    #[cfg(feature = "sal")]
4060    let configured_embedding_dim: Option<u32> =
4061        resolve_configured_embedding_dim(app_config, &tier_config);
4062    #[cfg(feature = "sal")]
4063    let (storage_backend, store_handle) = build_store_handle(
4064        args.store_url.as_deref(),
4065        db_path,
4066        app_config.postgres_statement_timeout_secs,
4067        configured_embedding_dim,
4068        app_config.resolve_pg_pool(),
4069    )
4070    .await
4071    .context("build SAL store handle")?;
4072    #[cfg(not(feature = "sal"))]
4073    let storage_backend = crate::handlers::StorageBackend::Sqlite;
4074
4075    // v0.7.0 Track D #933 — federation push DLQ sink. Resolved here
4076    // (after `build_store_handle` returns the typed store) so the
4077    // `broadcast_store_quorum` fanout can land DLQ rows on per-peer
4078    // failure. Sqlite-backed daemons get the shared `Db` mutex sink;
4079    // postgres-backed daemons get the pool-backed sink. The chosen
4080    // sink is also handed to the `replay_federation_push_dlq` worker
4081    // spawned below so the same DLQ rows the broadcast wrote are the
4082    // ones the worker drains.
4083    //
4084    // Feature-gated to `--features sal` — the DLQ trait surface
4085    // requires `async-trait` which is a SAL-only dep. Default
4086    // (sqlite-only) builds preserve pre-#933 behaviour.
4087    #[cfg(feature = "sal")]
4088    if let Some(ref mut fed) = federation {
4089        let sink: std::sync::Arc<dyn federation::FederationDlqSink> = match storage_backend {
4090            #[cfg(feature = "sal-postgres")]
4091            crate::handlers::StorageBackend::Postgres => {
4092                // Recover the typed PostgresStore via the generic
4093                // `as_any` downcast hatch (renamed from
4094                // `as_any_for_postgres` per ARCH-15, FX-C4-batch2) so
4095                // the sink can issue raw SQL through
4096                // `PostgresStore::pool()`. Falls back to the sqlite
4097                // sink (which would error on every INSERT because the
4098                // postgres DB has no sqlite connection) when the
4099                // downcast fails — unreachable in practice because the
4100                // only backend returning `StorageBackend::Postgres` IS
4101                // PostgresStore.
4102                if let Some(pg) = store_handle
4103                    .as_any()
4104                    .downcast_ref::<crate::store::postgres::PostgresStore>()
4105                {
4106                    std::sync::Arc::new(federation::push_dlq::PostgresDlqSink::new(
4107                        std::sync::Arc::new(pg.clone()),
4108                    ))
4109                } else {
4110                    tracing::warn!(
4111                        "federation push DLQ: PostgresStore downcast failed; \
4112                             falling back to sqlite sink (DLQ writes WILL error \
4113                             on postgres-backed daemons until the cast is restored)"
4114                    );
4115                    std::sync::Arc::new(federation::push_dlq::SqliteDlqSink::new(db_state.clone()))
4116                }
4117            }
4118            _ => std::sync::Arc::new(federation::push_dlq::SqliteDlqSink::new(db_state.clone())),
4119        };
4120        fed.dlq_sink = Some(sink);
4121    }
4122
4123    // v0.7.0 M3 — spawn the federation catchup loop now that the SAL
4124    // store handle has resolved. The loop dispatches each peer-pulled
4125    // memory through `store.apply_remote_memory` (postgres-aware) on
4126    // `--features sal` builds; legacy builds fall back to the
4127    // `db::insert_if_newer` sqlite path.
4128    if let Some(ref fed) = federation
4129        && args.catchup_interval_secs > 0
4130    {
4131        let interval = std::time::Duration::from_secs(args.catchup_interval_secs);
4132        #[cfg(feature = "sal")]
4133        {
4134            federation::spawn_catchup_loop_with_store(
4135                fed.clone(),
4136                db_state.clone(),
4137                Some(store_handle.clone()),
4138                interval,
4139            );
4140        }
4141        #[cfg(not(feature = "sal"))]
4142        {
4143            federation::spawn_catchup_loop(fed.clone(), db_state.clone(), interval);
4144        }
4145
4146        // v0.7.0 Track D #933 — federation push DLQ replay worker.
4147        // Polls the DLQ at the same cadence as the catchup loop and
4148        // re-attempts `post_once` against each peer until the row
4149        // Acks. The worker maintains the
4150        // `ai_memory_federation_push_dlq_depth` Prometheus gauge.
4151        #[cfg(feature = "sal")]
4152        if let Some(sink) = fed.dlq_sink.clone() {
4153            let _replay_handle =
4154                federation::spawn_replay_federation_push_dlq(fed.clone(), sink, interval);
4155            tracing::info!(
4156                "federation push DLQ replay worker enabled: polling every {}s",
4157                args.catchup_interval_secs,
4158            );
4159        }
4160    }
4161
4162    // #1579 A4 — serve-boot embedding-backfill sweep over the SAL
4163    // store. The legacy backfill (`crate::mcp::run_embedding_backfill*`)
4164    // is rusqlite-`Connection`-bound and runs ONLY at MCP stdio boot,
4165    // so postgres-backed daemons (which exist exclusively behind
4166    // `serve --store-url postgres://…`) never re-embedded the rows the
4167    // v29 embedding-dim migration NULLed — fleet semantic recall was
4168    // dead (P3 audit: 37/7,994 rows embedded, 0 backfill journal
4169    // lines). This sweep drains `MemoryStore::list_unembedded` in
4170    // bounded `[embeddings].backfill_batch` chunks through the daemon
4171    // embedder. SQLite-backed serve daemons are a structural no-op
4172    // (the sqlite adapter inherits the empty `list_unembedded`
4173    // default — its side-table embeddings are backfilled by the MCP
4174    // boot path), so this changes nothing for them. Detached task:
4175    // boot readiness never blocks on the sweep.
4176    #[cfg(feature = "sal")]
4177    if embedder_arc.is_some() {
4178        let backfill_store = store_handle.clone();
4179        let backfill_embedder = embedder_arc.clone();
4180        let backfill_batch = usize::try_from(app_config.resolve_embeddings().backfill_batch)
4181            .unwrap_or(crate::mcp::DEFAULT_EMBED_BACKFILL_BATCH_SIZE);
4182        task_handles.push(tokio::spawn(async move {
4183            let Some(emb) = backfill_embedder.as_ref() else {
4184                return;
4185            };
4186            // Operator-level maintenance path: must see (and re-embed)
4187            // every row regardless of metadata.scope — same posture as
4188            // the federation catchup loop. Sentinel principal, not a
4189            // literal, per the #1558 identity-sentinel SSOT.
4190            let ctx = crate::store::CallerContext::for_admin(
4191                crate::identity::sentinels::EMBEDDING_BACKFILL,
4192            );
4193            let written = crate::store::run_embedding_backfill_on_store(
4194                backfill_store.as_ref(),
4195                &ctx,
4196                emb,
4197                backfill_batch,
4198            )
4199            .await;
4200            if written > 0 {
4201                tracing::info!(
4202                    "embedding backfill (serve boot, #1579 A4): {written} row(s) embedded"
4203                );
4204            }
4205        }));
4206    }
4207
4208    // FED-P3b — outbound credential renewal worker. When this node holds a
4209    // CA-issued credential file (`AI_MEMORY_FED_CRED_PATH`), keep it fresh:
4210    // an external issuer rewrites the short-lived credential on renewal and
4211    // this worker swaps it into the live send path without a daemon
4212    // restart. Independent of the catchup interval; a no-op (not spawned)
4213    // when no credential path is configured.
4214    if federation.is_some()
4215        && std::env::var(federation::identity::credential::FED_CREDENTIAL_PATH_ENV).is_ok()
4216    {
4217        let renewal_interval = Duration::from_secs(
4218            federation::identity::renewal::DEFAULT_RENEWAL_INTERVAL_SECS.unsigned_abs(),
4219        );
4220        let _renewal_handle = federation::identity::renewal::spawn_refresh_outbound_credential(
4221            db_state.clone(),
4222            renewal_interval,
4223        );
4224        tracing::info!(
4225            "federation outbound credential renewal worker enabled: refreshing every {}s",
4226            renewal_interval.as_secs(),
4227        );
4228    }
4229
4230    if matches!(storage_backend, crate::handlers::StorageBackend::Postgres) {
4231        tracing::warn!(
4232            "v0.7.0 Wave-3: postgres-backed daemon — handlers that have not \
4233             yet migrated to the SAL trait surface 501 Not Implemented. See \
4234             docs/postgres-age-guide.md for the supported endpoint inventory."
4235        );
4236    }
4237
4238    let app_state = AppState {
4239        db: db_state.clone(),
4240        embedder: embedder_arc,
4241        vector_index: vector_index_state,
4242        federation: Arc::new(federation),
4243        tier_config: Arc::new(tier_config),
4244        scoring: Arc::new(app_config.effective_scoring()),
4245        profile: Arc::new(resolved_profile),
4246        mcp_config: Arc::new(mcp_config_for_http),
4247        active_keypair: Arc::new(active_keypair),
4248        family_embeddings,
4249        storage_backend,
4250        #[cfg(feature = "sal")]
4251        store: store_handle,
4252        llm: Arc::new(llm),
4253        // v0.7.0 L15 — dedicated auto_tag model from config.toml.
4254        auto_tag_model: Arc::new(app_config.auto_tag_model.clone()),
4255        // v0.7.0 H8 (round-2) — per-LLM-call timeout (default 30s).
4256        llm_call_timeout: Duration::from_secs(app_config.effective_llm_call_timeout_secs()),
4257        // v0.7.0 H5 (round-2) — fresh per-process replay cache + the
4258        // resolved `[verify] require_nonce` toggle. Default `false`
4259        // preserves verify-anytime semantics for unmigrated clients;
4260        // operators opt into strict mode via `config.toml`.
4261        replay_cache: Arc::new(crate::identity::replay::ReplayCache::new()),
4262        verify_require_nonce: app_config.verify.as_ref().is_some_and(|v| v.require_nonce),
4263        // #1255 (MED, 2026-05-25) — persistence-enabled federation
4264        // nonce cache. Rehydrates from disk on boot so a daemon
4265        // restart does NOT re-open the replay window for any
4266        // captured `(body, sig, nonce)` tuple. Falls back to the
4267        // in-memory-only constructor with a WARN log if persistence
4268        // open fails (e.g. disk pressure, locked file) — the daemon
4269        // continues to boot at the pre-#1255 posture rather than
4270        // crash-looping on a transient sqlite issue.
4271        federation_nonce_cache: Arc::new(
4272            match crate::identity::replay::FederationNonceCache::new_with_db_persistence(db_path) {
4273                Ok(c) => c,
4274                Err(e) => {
4275                    tracing::warn!(
4276                        target: "ai_memory::identity::replay",
4277                        db_path = %db_path.display(),
4278                        err = %e,
4279                        "#1255: FederationNonceCache persistence open failed; falling back to \
4280                         in-memory cache. Daemon restarts will reopen the replay window until \
4281                         operators resolve the underlying sqlite issue."
4282                    );
4283                    crate::identity::replay::FederationNonceCache::new()
4284                }
4285            },
4286        ),
4287        // v0.7.0 (issue #519) — resolved autonomous_hooks flag for the
4288        // HTTP create_memory path's proactive conflict-detection
4289        // helper. Falls back to false when unset (preserves v0.6.x
4290        // post-hoc-only contradiction surface).
4291        autonomous_hooks: app_config.effective_autonomous_hooks(),
4292        // v0.7.0 (issue #518) — resolved recall_scope defaults from
4293        // `[agents.defaults.recall_scope]`. None preserves v0.6.x
4294        // recall semantics (no splice on session_default=true).
4295        recall_scope: Arc::new(app_config.effective_recall_scope().cloned()),
4296        // v0.7.0 Policy-Engine Item 3 — deferred-audit producer handle.
4297        // Always Some on bootstrap_serve (the drainer was spawned
4298        // above before the storage hook installed). Wrapped in
4299        // Arc<Option<...>> per the AppState clone-cheap idiom.
4300        deferred_audit_queue: Arc::new(Some(deferred_audit_queue)),
4301        // v0.7.0 SHIP cluster (#946 / #957 / #960 / #961, 2026-05-20)
4302        // — operator-configured `[admin] agent_ids = [...]` allowlist.
4303        // `validated_agent_ids()` drops malformed entries with a
4304        // `warn` log so a single typo cannot lock the operator out;
4305        // an absent `[admin]` block resolves to an empty Vec which
4306        // closes every admin-class endpoint by default.
4307        //
4308        // #976 (2026-05-20): `AI_MEMORY_ADMIN_AGENT_IDS` env var
4309        // overrides the config-file allowlist. Comma-separated list of
4310        // agent_ids; `*` is the wildcard (everyone is admin —
4311        // appropriate for test daemons + container deploys where the
4312        // allowlist comes from orchestration secrets, not config.toml).
4313        // Same `validate_agent_id` filter applies; malformed entries
4314        // warn + drop. Precedence: env var > `[admin]` config block.
4315        admin_agent_ids: Arc::new(resolve_admin_agent_ids(app_config.admin.as_ref())),
4316        // v0.7.0 #991 — share the per-instance rule cache constructed
4317        // above (and already wired into both hook closures) with the
4318        // HTTP handler entry points. One cache per daemon lifetime.
4319        rule_cache: Arc::clone(&rule_cache),
4320        // v0.7.x (issue #1168) — operator-resolved LLM / embeddings /
4321        // reranker triple. Threaded into the HTTP `/api/v1/capabilities`
4322        // handler so the wire-reported `models.*` block mirrors the
4323        // running daemon's actual model wiring (matching the boot
4324        // banner + the live LLM client), NOT the compiled tier preset.
4325        // The resolver folds CLI / env / `[llm]` / legacy / compiled-
4326        // default precedence and the resulting triple is process-stable.
4327        resolved_models: Arc::new(app_config.resolve_models()),
4328        runtime: crate::runtime_context::RuntimeContext::global_arc(),
4329        // Operator-resolved `[limits].max_page_size` (env
4330        // `AI_MEMORY_MAX_PAGE_SIZE`) — per-request page / bulk
4331        // materialization bound for list / search / bulk-create /
4332        // federation-sync handlers. Falls back to the compiled
4333        // `MAX_BULK_SIZE` default when unset.
4334        max_page_size: app_config.resolve_limits().max_page_size,
4335    };
4336
4337    // v0.7.0 Policy-Engine Item 3 — register the deferred-audit
4338    // supervisor task with the task_handles vec so `serve()` aborts
4339    // it on shutdown. The supervisor wraps the drainer with panic
4340    // recovery + graceful drain of buffered events when the queue is
4341    // closed. This MUST be in `task_handles` so the test assertion in
4342    // `test_bootstrap_serve_keyword_tier_no_embedder` updates its
4343    // expected count accordingly.
4344    task_handles.push(deferred_audit_supervisor);
4345
4346    // Automatic GC. Cluster G (#767) — pass through the operator-
4347    // tunable `[confidence] shadow_retention_days` so the periodic
4348    // sweep on `confidence_shadow_observations` runs at the configured
4349    // window (default 30 days).
4350    let shadow_retention_days = app_config.confidence.as_ref().map_or(
4351        crate::confidence::shadow::DEFAULT_SHADOW_RETENTION_DAYS,
4352        crate::config::ConfidenceConfig::effective_shadow_retention_days,
4353    );
4354    task_handles.push(spawn_gc_loop_with_shadow_retention(
4355        db_state.clone(),
4356        app_config.archive_max_days,
4357        shadow_retention_days,
4358        Duration::from_secs(GC_INTERVAL_SECS),
4359    ));
4360
4361    // #1690 — offloaded_blobs TTL sweep. `offload_ttl_sweep::spawn` existed but
4362    // was never pushed into the bootstrap spawn list, so offloaded blobs grew
4363    // unbounded (the module doc-comment claiming it was "spawned by
4364    // bootstrap_serve" was false until this wiring). Daily cadence.
4365    task_handles.push(crate::background::offload_ttl_sweep::spawn(
4366        db_state.clone(),
4367        crate::background::offload_ttl_sweep::DEFAULT_INTERVAL,
4368    ));
4369
4370    // v0.6.0 GA: periodic WAL checkpoint. Under continuous writes the WAL
4371    // file grows until SQLite's auto-checkpoint fires (every 1000 pages by
4372    // default) — which is inconsistent timing and can leave the file at
4373    // hundreds of MB between auto-checkpoints. A dedicated task running on
4374    // a fixed cadence keeps the WAL bounded and makes operational storage
4375    // behaviour predictable. We stagger from GC to avoid lock-contention
4376    // bursts. See docs/ARCHITECTURAL_LIMITS.md for why this workaround is
4377    // necessary in a single-connection daemon.
4378    task_handles.push(spawn_wal_checkpoint_loop(
4379        db_state.clone(),
4380        Duration::from_secs(WAL_CHECKPOINT_INTERVAL_SECS),
4381    ));
4382
4383    // v0.7.0 K2: pending_actions timeout sweeper. Closes the v0.6.3.1
4384    // honest-Capabilities-v2 disclosure that `default_timeout_seconds`
4385    // was advertised in v1 but unused. 60-second cadence; per-row
4386    // override via the `default_timeout_seconds` column. The global
4387    // default below is the fall-through when the per-row column is
4388    // NULL — matches the `doctor_oldest_pending_age_secs` 24h CRIT
4389    // window so a row that would already be flagged red also expires.
4390    task_handles.push(spawn_pending_timeout_sweep_loop(
4391        db_state.clone(),
4392        db_path.to_path_buf(),
4393        PENDING_TIMEOUT_DEFAULT_SECS,
4394        Duration::from_secs(PENDING_TIMEOUT_SWEEP_INTERVAL_SECS),
4395    ));
4396
4397    // v0.7.0 I3: transcript archive→prune lifecycle sweeper. Resolves
4398    // per-namespace TTL + grace from `[transcripts]` in config.toml
4399    // (compiled defaults: 30-day TTL, 7-day grace) and runs every 10
4400    // minutes — heavier than K2's 60s scan because phase 1 walks the
4401    // I2 join table per candidate. Companion to the K2 sweeper above:
4402    // both follow the same spawn-per-interval shape so shutdown +
4403    // observability behave identically.
4404    task_handles.push(spawn_transcript_lifecycle_sweep_loop(
4405        db_state.clone(),
4406        app_config.effective_transcripts(),
4407        Duration::from_secs(TRANSCRIPT_LIFECYCLE_SWEEP_INTERVAL_SECS),
4408    ));
4409
4410    // v0.7.0 K8: agent-quota daily-counter reset sweeper. Resets
4411    // `current_memories_today` + `current_links_today` for every row
4412    // whose `day_started_at` predates the current UTC date. 60-second
4413    // cadence — same shape as the K2 pending sweeper above. The
4414    // inline-roll branch in `crate::quotas::check_quota` /
4415    // `crate::quotas::record_op` is the per-write fallback so the
4416    // substrate stays honest even if this sweep is delayed.
4417    task_handles.push(spawn_agent_quota_reset_loop(
4418        db_state.clone(),
4419        Duration::from_secs(AGENT_QUOTA_RESET_INTERVAL_SECS),
4420    ));
4421
4422    // v0.7.0 fold-A2A1.4 (#702) — mtls_enforced is true when the
4423    // operator configured the full TLS+mTLS stack (cert+key+allowlist).
4424    // The api_key_auth middleware uses this to bypass the `x-api-key`
4425    // requirement on `/api/v1/sync/*` paths, because rustls has already
4426    // verified the client cert against the operator-pinned allowlist
4427    // — adding a shared-secret check on top is redundant and breaks
4428    // cross-host federation when the peer doesn't carry the secret.
4429    let mtls_enforced =
4430        args.tls_cert.is_some() && args.tls_key.is_some() && args.mtls_allowlist.is_some();
4431    let api_key_state = ApiKeyState {
4432        key: app_config.api_key.clone(),
4433        mtls_enforced,
4434    };
4435    if api_key_state.key.is_some() {
4436        if mtls_enforced {
4437            tracing::info!(
4438                "API key authentication enabled — federation endpoints (/api/v1/sync/*) \
4439                 bypass api-key check because mTLS allowlist is configured"
4440            );
4441        } else {
4442            tracing::info!("API key authentication enabled");
4443        }
4444    }
4445
4446    // #1570 (H6) — record whether request authentication is configured
4447    // so the shared admin-role gate can refuse to mint admin from a
4448    // bare self-asserted `X-Agent-Id` header on unauthenticated
4449    // deployments. Boot-time WARN when the operator configured admin
4450    // ids but the gate will refuse them all (no api_key, trust flag
4451    // off) — names the escape hatch so the remediation is one search
4452    // away. Mirrors the #1455 fail-closed convention.
4453    crate::handlers::admin_role::mark_request_authn_configured(api_key_state.key.is_some());
4454    if !app_state.admin_agent_ids.is_empty()
4455        && api_key_state.key.is_none()
4456        && !crate::handlers::admin_role::admin_header_trust_enabled()
4457    {
4458        tracing::warn!(
4459            "[admin].agent_ids is configured but no api_key is set: the X-Agent-Id header is \
4460             self-asserted, so admin-role requests will be REFUSED (403) until you either \
4461             configure an api_key or explicitly opt into the legacy header-trust posture with \
4462             {}=1 (#1570 secure default)",
4463            crate::handlers::admin_role::ENV_ADMIN_HEADER_TRUST,
4464        );
4465    }
4466
4467    Ok(ServeBootstrap {
4468        app_state,
4469        api_key_state,
4470        db_state,
4471        archive_max_days: app_config.archive_max_days,
4472        task_handles,
4473        daemon_keypair_outcome,
4474        // H7 (v0.7.0 round-2) — per-request HTTP timeout (default 60s).
4475        request_timeout: Duration::from_secs(app_config.effective_request_timeout_secs()),
4476        deferred_audit_metrics,
4477    })
4478}
4479
4480/// Init the tracing subscriber for the HTTP daemon. Idempotent at the
4481/// `tracing-subscriber` level — repeated calls log a warning and no-op
4482/// rather than panic. Split out from `serve()` so test code can opt out.
4483fn init_tracing() {
4484    let _ = tracing_subscriber::fmt()
4485        .with_env_filter(
4486            EnvFilter::from_default_env()
4487                .add_directive(crate::logging::DEFAULT_LOG_DIRECTIVE.parse().unwrap())
4488                .add_directive("tower_http=info".parse().unwrap()),
4489        )
4490        .try_init();
4491}
4492
4493/// Run the HTTP memory daemon. Loads TLS state, builds `AppState`, spawns
4494/// the GC + WAL-checkpoint loops, and binds a listener (TLS or plain HTTP).
4495///
4496/// Behaviour is preserved from the pre-W6 inline `main::serve` body — only
4497/// the structure has changed.
4498#[allow(clippy::too_many_lines)]
4499pub async fn serve(db_path: PathBuf, args: ServeArgs, app_config: &AppConfig) -> Result<()> {
4500    init_tracing();
4501
4502    let bootstrap = bootstrap_serve(&db_path, &args, app_config).await?;
4503
4504    // Round-2 F8 + Round-3 F12 — startup banner. Surfaces the effective
4505    // permissions mode (and the v0.7.0 enforce-default migration warning
4506    // when the operator has no `[permissions]` block in config) plus the
4507    // F12 keypair-autogen result captured by `ensure_and_load_daemon_keypair`
4508    // earlier in this fn.
4509    let banner_inputs = crate::cli::serve_banner::BannerInputs {
4510        // B4 (S5-M3) — `.and_then` (not `.map`) so a partial
4511        // `[permissions]` block without `mode = ` collapses to `None`
4512        // and the banner's migration WARN fires, matching
4513        // `AppConfig::effective_permissions_mode` semantics.
4514        configured_permissions_mode: app_config.permissions.as_ref().and_then(|p| p.mode),
4515        auto_generated_keypair_path: bootstrap.daemon_keypair_outcome.as_ref().and_then(
4516            |o| match o {
4517                crate::identity::keypair::EnsureOutcome::Generated { pub_path } => {
4518                    Some(pub_path.display().to_string())
4519                }
4520                _ => None,
4521            },
4522        ),
4523        identity_disabled: matches!(
4524            bootstrap.daemon_keypair_outcome,
4525            Some(crate::identity::keypair::EnsureOutcome::SkippedDisabled)
4526        ),
4527    };
4528    for line in crate::cli::serve_banner::compose_banner(&banner_inputs) {
4529        if line.is_warn() {
4530            tracing::warn!("{}", line.message());
4531        } else {
4532            tracing::info!("{}", line.message());
4533        }
4534    }
4535
4536    let addr = format!("{}:{}", args.host, args.port);
4537    tracing::info!("database: {}", db_path.display());
4538
4539    // Graceful shutdown. The signal future only waits for ctrl_c and
4540    // then resolves, which tells axum to begin graceful shutdown of
4541    // in-flight requests. The deferred-audit drain + WAL checkpoint run
4542    // AFTER the server has fully quiesced (below `serve`), so:
4543    //   1. no refusal submitted by an in-flight request is lost, and
4544    //   2. the final checkpoint captures every write — including the
4545    //      drainer's `signed_events` appends, which share the same WAL
4546    //      file even though the drainer holds its own connection.
4547    // v0.7.0 Policy-Engine Item 3 (audit-log-loss-on-shutdown fix): the
4548    // checkpoint used to live inside this future, firing at signal time
4549    // before in-flight requests (and the audit drainer) had quiesced —
4550    // so refusal rows submitted during graceful shutdown could be lost.
4551    let checkpoint_state = bootstrap.db_state.clone();
4552    let drain_metrics = bootstrap.deferred_audit_metrics.clone();
4553    let shutdown = async move {
4554        let _ = tokio::signal::ctrl_c().await;
4555        tracing::info!("shutting down — draining deferred-audit queue then checkpointing WAL");
4556    };
4557
4558    // Native TLS (Layer 1): if both --tls-cert and --tls-key are provided,
4559    // bind via axum-server + rustls. Plain HTTP otherwise — backward
4560    // compatible with every prior release. The `requires = …` clap
4561    // attributes prevent the half-configured case.
4562    if let (Some(cert), Some(key)) = (&args.tls_cert, &args.tls_key) {
4563        // rustls 0.23 needs an explicit CryptoProvider; install ring
4564        // before any TLS setup. Idempotent — second install is a
4565        // harmless no-op via ignore.
4566        let _ = rustls::crypto::ring::default_provider().install_default();
4567        // Load TLS / mTLS config BEFORE printing the "listening" log
4568        // so a misconfigured cert / key / allowlist surfaces the error
4569        // first (red-team #248).
4570        let tls_config = if let Some(allowlist_path) = &args.mtls_allowlist {
4571            tracing::info!(
4572                "mTLS enabled — client certs required. Allowlist: {}",
4573                allowlist_path.display()
4574            );
4575            tls::load_mtls_rustls_config(cert, key, allowlist_path).await?
4576        } else {
4577            tracing::warn!(
4578                "TLS enabled but mTLS NOT configured — sync endpoints \
4579                 (/api/v1/sync/push, /api/v1/sync/since) accept any client. \
4580                 Set --mtls-allowlist for production peer-mesh deployments \
4581                 (red-team #231)."
4582            );
4583            tls::load_rustls_config(cert, key).await?
4584        };
4585        let app = crate::build_router_with_timeout(
4586            bootstrap.api_key_state,
4587            bootstrap.app_state,
4588            bootstrap.request_timeout,
4589        );
4590        tracing::info!("ai-memory listening on https://{addr}");
4591        let socket_addr: std::net::SocketAddr = addr.parse()?;
4592        // axum-server doesn't have a direct graceful-shutdown on the
4593        // TLS builder yet; spawn the signal listener on the Handle
4594        // instead so ctrl_c triggers a graceful shutdown. Window is
4595        // operator-configurable via --shutdown-grace-secs (default 30,
4596        // bumped from 10 in v0.6.0 — red-team #233).
4597        let grace = std::time::Duration::from_secs(args.shutdown_grace_secs);
4598        let handle = axum_server::Handle::new();
4599        let handle_clone = handle.clone();
4600        tokio::spawn(async move {
4601            shutdown.await;
4602            handle_clone.graceful_shutdown(Some(grace));
4603        });
4604        // v0.7.0 #1581 — bind with the NoDelayAcceptor-wrapped rustls
4605        // acceptor instead of `bind_rustls` (whose DefaultAcceptor never
4606        // sets TCP_NODELAY). Without it, Nagle + the client's delayed-ACK
4607        // timer added a fixed ~40 ms to the FIRST request of every fresh
4608        // (m)TLS connection — the #1579 P3 fleet finding. Verifier chain
4609        // and accept/reject semantics are unchanged; see
4610        // `tls::serve_rustls_acceptor` + tests/mtls_nodelay_acceptor.rs.
4611        axum_server::bind(socket_addr)
4612            .acceptor(tls::serve_rustls_acceptor(&tls_config))
4613            .handle(handle)
4614            .serve(app.into_make_service())
4615            .await?;
4616    } else {
4617        tracing::warn!(
4618            "TLS NOT enabled — sync endpoints (/api/v1/sync/push, \
4619             /api/v1/sync/since) accept any caller over plain HTTP. \
4620             Set --tls-cert + --tls-key + --mtls-allowlist for production \
4621             peer-mesh deployments (red-team #231)."
4622        );
4623        tracing::info!("ai-memory listening on http://{addr}");
4624        // Wave 3 (v0.6.3): the non-TLS path delegates to
4625        // `daemon_runtime::serve_http_with_shutdown_future`, which is the
4626        // same `build_router` + `TcpListener::bind` + `axum::serve` body
4627        // the integration tests drive in-process. Production threads its
4628        // WAL-checkpoint-on-shutdown future in directly so the cleanup
4629        // semantic is preserved verbatim.
4630        serve_http_with_shutdown_future_and_timeout(
4631            &addr,
4632            bootstrap.api_key_state,
4633            bootstrap.app_state,
4634            bootstrap.request_timeout,
4635            shutdown,
4636        )
4637        .await?;
4638    }
4639
4640    // v0.7.0 Policy-Engine Item 3 — the HTTP server has now fully
4641    // quiesced (graceful shutdown complete; no in-flight request can
4642    // submit another refusal), so `submitted` is final. Drain the
4643    // deferred-audit queue before exit so every refusal captured during
4644    // the daemon's life lands in `signed_events`. We can NOT use
4645    // `close_and_flush` here: the governance hooks
4646    // (`storage::GOVERNANCE_PRE_WRITE`, `wire_check::GOVERNANCE_PRE_ACTION`)
4647    // hold sender clones inside process-wide `OnceLock`s that never drop,
4648    // so the channel never closes and awaiting the supervisor would block
4649    // forever. `drain_pending` instead polls the shared atomic metrics
4650    // until the drainer has caught up to the submitted count.
4651    let drained = crate::governance::deferred_audit::drain_pending(
4652        &drain_metrics,
4653        crate::governance::deferred_audit::DEFAULT_SHUTDOWN_DRAIN_TIMEOUT,
4654    )
4655    .await;
4656    if drained {
4657        tracing::info!(
4658            "deferred-audit queue drained ({} refusals accounted) — checkpointing WAL",
4659            drain_metrics.submitted_count()
4660        );
4661    } else {
4662        tracing::warn!(
4663            "deferred-audit drain timed out after {:?}: {} submitted but only {} accounted — \
4664             some refusal audit rows may not have flushed before exit",
4665            crate::governance::deferred_audit::DEFAULT_SHUTDOWN_DRAIN_TIMEOUT,
4666            drain_metrics.submitted_count(),
4667            drain_metrics.appended_count()
4668                + drain_metrics.append_failure_count()
4669                + drain_metrics.send_failure_count(),
4670        );
4671    }
4672
4673    // Final WAL checkpoint now that every writer (HTTP handlers + the
4674    // deferred-audit drainer) has quiesced. The drainer's appends share
4675    // this database's WAL file, so this single checkpoint folds them in
4676    // even though the drainer holds its own connection.
4677    {
4678        let lock = checkpoint_state.lock().await;
4679        let _ = db::checkpoint(&lock.0);
4680    }
4681
4682    Ok(())
4683}
4684
4685// ---------------------------------------------------------------------------
4686// cmd_bench / cmd_migrate (no-op for non-sal builds)
4687// ---------------------------------------------------------------------------
4688
4689fn cmd_bench(args: &BenchArgs) -> Result<()> {
4690    let iterations = args.iterations.clamp(1, crate::bench::MAX_ITERATIONS);
4691    let warmup = args.warmup.min(crate::bench::MAX_WARMUP);
4692    let regression_threshold = args
4693        .regression_threshold
4694        .clamp(0.0, crate::bench::MAX_REGRESSION_THRESHOLD_PCT);
4695    // Bench always seeds a disposable in-memory DB so the operator's
4696    // main DB (and disk) are untouched. SQLite's `:memory:` URL and
4697    // WAL-less mode keep the workload bounded by RAM and CPU.
4698    let conn = db::open(Path::new(":memory:"))?;
4699    // #1579 B8 — corpus scale (None = legacy default workload).
4700    let scale = args.scale.map(|s| s.clamp(1, crate::bench::MAX_SCALE));
4701    let config = bench::BenchConfig {
4702        iterations,
4703        warmup,
4704        namespace: bench::BENCH_NAMESPACE.to_string(),
4705        scale,
4706    };
4707    let results = bench::run(&conn, &config)?;
4708
4709    let regressions = if let Some(path) = &args.baseline {
4710        let baseline = bench::load_baseline(Path::new(path))?;
4711        Some(bench::compare_against_baseline(
4712            &results,
4713            &baseline,
4714            regression_threshold,
4715        ))
4716    } else {
4717        None
4718    };
4719
4720    if args.json {
4721        println!(
4722            "{}",
4723            serde_json::to_string_pretty(&serde_json::json!({
4724                "iterations": iterations,
4725                "warmup": warmup,
4726                "scale": scale,
4727                "results": results,
4728                "regressions": regressions,
4729            }))?
4730        );
4731    } else {
4732        print!("{}", bench::render_table(&results));
4733        if let Some(rows) = &regressions {
4734            println!();
4735            print!("{}", bench::render_regression_table(rows));
4736        }
4737    }
4738
4739    if let Some(history_path) = &args.history {
4740        let captured_at = chrono::Utc::now().to_rfc3339();
4741        bench::append_history(
4742            history_path,
4743            &captured_at,
4744            iterations,
4745            warmup,
4746            scale,
4747            &results,
4748        )?;
4749        let mut stderr = std::io::stderr().lock();
4750        let _ = writeln!(
4751            stderr,
4752            "bench: appended run to history file {}",
4753            history_path.display()
4754        );
4755    }
4756
4757    let budget_failed = results
4758        .iter()
4759        .any(|r| matches!(r.status, bench::Status::Fail));
4760    let regression_failed = regressions
4761        .as_ref()
4762        .is_some_and(|rows| rows.iter().any(|r| r.regressed));
4763
4764    if budget_failed && regression_failed {
4765        anyhow::bail!(
4766            "bench: at least one operation exceeded its p95 budget by >10% AND regressed >{regression_threshold:.1}% vs baseline"
4767        );
4768    }
4769    if budget_failed {
4770        anyhow::bail!("bench: at least one operation exceeded its p95 budget by >10%");
4771    }
4772    if regression_failed {
4773        anyhow::bail!(
4774            "bench: at least one operation regressed >{regression_threshold:.1}% vs baseline"
4775        );
4776    }
4777    Ok(())
4778}
4779
4780#[cfg(feature = "sal")]
4781async fn cmd_migrate(args: &MigrateArgs) -> Result<()> {
4782    let src = migrate::open_store(&args.from)
4783        .await
4784        .context("open source store")?;
4785    let dst = migrate::open_store(&args.to)
4786        .await
4787        .context("open destination store")?;
4788    let report = migrate::migrate(
4789        src.as_ref(),
4790        dst.as_ref(),
4791        args.batch,
4792        args.namespace.clone(),
4793        args.dry_run,
4794    )
4795    .await;
4796    // #1579 A3 (SECURITY) — the migrate report echoes both store URLs;
4797    // mask the userinfo password so credentials never land in stdout /
4798    // captured CI logs.
4799    let from_display = crate::logging::redact_url_password(&args.from);
4800    let to_display = crate::logging::redact_url_password(&args.to);
4801    if args.json {
4802        let value = serde_json::json!({
4803            "from_url": from_display,
4804            "to_url": to_display,
4805            "memories_read": report.memories_read,
4806            "memories_written": report.memories_written,
4807            "batches": report.batches,
4808            "errors": report.errors,
4809            "dry_run": report.dry_run,
4810        });
4811        println!("{}", serde_json::to_string_pretty(&value)?);
4812    } else {
4813        println!("migration report");
4814        println!("  from:              {from_display}");
4815        println!("  to:                {to_display}");
4816        println!("  memories_read:     {}", report.memories_read);
4817        println!("  memories_written:  {}", report.memories_written);
4818        println!("  batches:           {}", report.batches);
4819        println!("  dry_run:           {}", report.dry_run);
4820        println!("  errors:            {}", report.errors.len());
4821        for e in &report.errors {
4822            println!("    - {e}");
4823        }
4824    }
4825    if !report.errors.is_empty() {
4826        anyhow::bail!("migration completed with {} error(s)", report.errors.len());
4827    }
4828    Ok(())
4829}
4830
4831// ---------------------------------------------------------------------------
4832// Pre-W6 helpers — in-process HTTP harness, sync-daemon body, curator-daemon body.
4833// ---------------------------------------------------------------------------
4834
4835/// Run the HTTP daemon (plain HTTP, no TLS) with a programmable shutdown.
4836///
4837/// Mirrors the `else` branch of `serve()` in pre-W6 `main.rs` (the non-TLS
4838/// path). Builds the production `Router` via `build_router`, binds a
4839/// `TcpListener` to `addr`, and runs `axum::serve` with a graceful-shutdown
4840/// future that resolves when `shutdown.notify_one()` is called.
4841///
4842/// Tests pass a known port (pick one via `free_port()` and pass
4843/// `127.0.0.1:<port>`). The function returns when shutdown completes;
4844/// callers can `tokio::spawn` it and `notify` to stop.
4845pub async fn serve_http_with_shutdown(
4846    addr: &str,
4847    api_key_state: ApiKeyState,
4848    app_state: AppState,
4849    shutdown: Arc<Notify>,
4850) -> Result<()> {
4851    serve_http_with_shutdown_future(addr, api_key_state, app_state, async move {
4852        shutdown.notified().await;
4853    })
4854    .await
4855}
4856
4857/// Variant of [`serve_http_with_shutdown`] that takes an arbitrary
4858/// shutdown future. The production `serve()` needs to run a WAL
4859/// checkpoint after the OS signal but before tearing down the listener;
4860/// that cleanup work is awkward to express through a `Notify` alone.
4861/// Accepting a `Future` lets the caller embed any async cleanup into the
4862/// shutdown future itself, while the helper keeps the `build_router` +
4863/// `TcpListener::bind` + `axum::serve` body it already owns.
4864pub async fn serve_http_with_shutdown_future<F>(
4865    addr: &str,
4866    api_key_state: ApiKeyState,
4867    app_state: AppState,
4868    shutdown: F,
4869) -> Result<()>
4870where
4871    F: std::future::Future<Output = ()> + Send + 'static,
4872{
4873    serve_http_with_shutdown_future_and_timeout(
4874        addr,
4875        api_key_state,
4876        app_state,
4877        Duration::from_secs(crate::config::DEFAULT_REQUEST_TIMEOUT_SECS),
4878        shutdown,
4879    )
4880    .await
4881}
4882
4883/// v0.7.0 H7 (round-2) — variant of [`serve_http_with_shutdown_future`]
4884/// that accepts an explicit per-request timeout. Used by tests to
4885/// drive the slow-POST edge directly.
4886pub async fn serve_http_with_shutdown_future_and_timeout<F>(
4887    addr: &str,
4888    api_key_state: ApiKeyState,
4889    app_state: AppState,
4890    request_timeout: Duration,
4891    shutdown: F,
4892) -> Result<()>
4893where
4894    F: std::future::Future<Output = ()> + Send + 'static,
4895{
4896    let app = crate::build_router_with_timeout(api_key_state, app_state, request_timeout);
4897    let listener = tokio::net::TcpListener::bind(addr)
4898        .await
4899        .with_context(|| format!("bind {addr}"))?;
4900    axum::serve(listener, app)
4901        .with_graceful_shutdown(shutdown)
4902        .await
4903        .context("axum::serve")?;
4904    Ok(())
4905}
4906
4907/// Run a single sync cycle against one peer — pull then push.
4908///
4909/// Lifted verbatim (modulo path-of-Path-vs-PathBuf) from the pre-W6
4910/// `main.rs::sync_cycle_once` so the integration sync-daemon test can
4911/// drive it without subprocess. The signature matches the private
4912/// main.rs helper 1:1 to keep call sites identical.
4913pub async fn sync_cycle_once(
4914    client: &reqwest::Client,
4915    db_path: &Path,
4916    local_agent_id: &str,
4917    peer_url: &str,
4918    api_key: Option<&str>,
4919    batch_size: usize,
4920) -> Result<()> {
4921    let peer_url = peer_url.trim_end_matches('/');
4922
4923    // --- PULL --------------------------------------------------------
4924    let since = {
4925        let conn = db::open(db_path)?;
4926        db::sync_state_load(&conn, local_agent_id)?
4927            .entries
4928            .get(peer_url)
4929            .cloned()
4930    };
4931
4932    let mut pull_url = format!(
4933        "{peer_url}/api/v1/sync/since?limit={batch_size}&peer={}",
4934        urlencoding_minimal(local_agent_id)
4935    );
4936    if let Some(ref s) = since {
4937        pull_url.push_str("&since=");
4938        pull_url.push_str(&urlencoding_minimal(s));
4939    }
4940
4941    // v0.7.0 #238/#239 — attach `x-peer-id` so the peer's
4942    // attestation + scope-allowlist substrate sees our self-claim.
4943    let mut req = client
4944        .get(&pull_url)
4945        .header(crate::HEADER_AGENT_ID, local_agent_id)
4946        .header(
4947            crate::federation::peer_attestation::PEER_ID_HEADER,
4948            local_agent_id,
4949        );
4950    if let Some(key) = api_key {
4951        req = req.header(crate::HEADER_API_KEY, key);
4952    }
4953    let resp = req.send().await?;
4954    if !resp.status().is_success() {
4955        anyhow::bail!("sync-daemon: pull status {}", resp.status());
4956    }
4957    let pulled: SyncSinceResponse = resp.json().await?;
4958    let pull_count = pulled.memories.len();
4959    let latest_pulled = pulled.memories.last().map(|m| m.updated_at.clone());
4960
4961    {
4962        let conn = db::open(db_path)?;
4963        for mem in &pulled.memories {
4964            if crate::validate::RequestValidator::validate_memory(mem).is_ok() {
4965                let _ = db::insert_if_newer(&conn, mem);
4966            }
4967        }
4968        if let Some(ref at) = latest_pulled {
4969            db::sync_state_observe(&conn, local_agent_id, peer_url, at)?;
4970        }
4971    }
4972
4973    // --- PUSH --------------------------------------------------------
4974    let last_pushed = {
4975        let conn = db::open(db_path)?;
4976        db::sync_state_last_pushed(&conn, local_agent_id, peer_url)
4977    };
4978    let outgoing = {
4979        let conn = db::open(db_path)?;
4980        db::memories_updated_since(&conn, last_pushed.as_deref(), batch_size)?
4981    };
4982    let push_count = outgoing.len();
4983    let latest_pushed = outgoing.last().map(|m| m.updated_at.clone());
4984
4985    if !outgoing.is_empty() {
4986        let body = serde_json::json!({
4987            (field_names::SENDER_AGENT_ID): local_agent_id,
4988            "sender_clock": { "entries": {} },
4989            "memories": outgoing,
4990            "dry_run": false,
4991        });
4992        // v0.7.0 #238 — attach `x-peer-id` so the receiver attests
4993        // body.sender_agent_id against our wire-level peer identity.
4994        let mut req = client
4995            .post(format!("{peer_url}/api/v1/sync/push"))
4996            .header(crate::HEADER_AGENT_ID, local_agent_id)
4997            .header(
4998                crate::federation::peer_attestation::PEER_ID_HEADER,
4999                local_agent_id,
5000            )
5001            .header(crate::HEADER_CONTENT_TYPE, crate::MIME_JSON)
5002            .json(&body);
5003        if let Some(key) = api_key {
5004            req = req.header(crate::HEADER_API_KEY, key);
5005        }
5006        let resp = req.send().await?;
5007        if !resp.status().is_success() {
5008            anyhow::bail!("sync-daemon: push status {}", resp.status());
5009        }
5010        if let Some(at) = latest_pushed {
5011            let conn = db::open(db_path)?;
5012            db::sync_state_record_push(&conn, local_agent_id, peer_url, &at)?;
5013        }
5014    }
5015
5016    tracing::info!("sync-daemon: peer={peer_url} pulled={pull_count} pushed={push_count}");
5017    Ok(())
5018}
5019
5020/// Run the sync-daemon main loop with a programmable shutdown.
5021///
5022/// Mirrors the body of the pre-W6 `cmd_sync_daemon()` in `main.rs`: for
5023/// each cycle, fan out a `JoinSet` across `peers`, then race a sleep
5024/// against the shutdown notify. Returns when the notify fires. The
5025/// integration test can build a one-cycle test by setting `interval_secs=1`
5026/// and notifying after a short tokio sleep.
5027pub async fn run_sync_daemon_with_shutdown(
5028    db_path: PathBuf,
5029    local_agent_id: String,
5030    peers: Vec<String>,
5031    api_key: Option<String>,
5032    interval_secs: u64,
5033    batch_size: usize,
5034    shutdown: Arc<Notify>,
5035) -> Result<()> {
5036    let client = reqwest::Client::builder()
5037        .timeout(Duration::from_secs(30))
5038        .build()?;
5039    run_sync_daemon_with_shutdown_using_client(
5040        client,
5041        db_path,
5042        local_agent_id,
5043        peers,
5044        api_key,
5045        interval_secs,
5046        batch_size,
5047        shutdown,
5048    )
5049    .await
5050}
5051
5052/// Variant of [`run_sync_daemon_with_shutdown`] that takes a caller-built
5053/// `reqwest::Client`. The production `cmd_sync_daemon()` constructs an
5054/// mTLS-aware client (via `build_rustls_client_config`) and threads it
5055/// in here so the helper drives the same loop body the test version
5056/// drives — keeping `daemon_runtime` as the single source of truth for
5057/// the sync-daemon loop while preserving the production TLS contract.
5058pub async fn run_sync_daemon_with_shutdown_using_client(
5059    client: reqwest::Client,
5060    db_path: PathBuf,
5061    local_agent_id: String,
5062    peers: Vec<String>,
5063    api_key: Option<String>,
5064    interval_secs: u64,
5065    batch_size: usize,
5066    shutdown: Arc<Notify>,
5067) -> Result<()> {
5068    let interval = interval_secs.max(1);
5069    let batch_size = batch_size.max(1);
5070
5071    let db_path_owned: Arc<Path> = Arc::from(db_path.as_path());
5072    let local_agent_id_arc: Arc<str> = Arc::from(local_agent_id.as_str());
5073    let api_key_arc: Option<Arc<str>> = api_key.as_deref().map(Arc::from);
5074    let peers_arc: Vec<Arc<str>> = peers.iter().map(|s| Arc::from(s.as_str())).collect();
5075    loop {
5076        let mut set: tokio::task::JoinSet<()> = tokio::task::JoinSet::new();
5077        for peer_url in &peers_arc {
5078            let client = client.clone();
5079            let db_path = db_path_owned.clone();
5080            let local_agent_id = local_agent_id_arc.clone();
5081            let peer_url = peer_url.clone();
5082            let api_key = api_key_arc.clone();
5083            set.spawn(async move {
5084                if let Err(e) = sync_cycle_once(
5085                    &client,
5086                    &db_path,
5087                    &local_agent_id,
5088                    &peer_url,
5089                    api_key.as_deref(),
5090                    batch_size,
5091                )
5092                .await
5093                {
5094                    tracing::warn!("sync-daemon: peer {peer_url} cycle failed: {e}");
5095                }
5096            });
5097        }
5098        while set.join_next().await.is_some() {}
5099
5100        tokio::select! {
5101            () = tokio::time::sleep(Duration::from_secs(interval)) => {}
5102            () = shutdown.notified() => {
5103                tracing::info!("sync-daemon: shutdown signal received");
5104                return Ok(());
5105            }
5106        }
5107    }
5108}
5109
5110/// Run the curator daemon with a programmable shutdown.
5111///
5112/// Mirrors the daemon arm of the pre-W6 `cmd_curator()`. The inner work is
5113/// `curator::run_daemon` (a blocking, tight-loop-with-`AtomicBool` already
5114/// in lib code), which we drive from a `spawn_blocking`. Tests fire the
5115/// `Notify` to set the shutdown bool and the blocking task observes it
5116/// within ~500ms (`run_daemon`'s sleep tick).
5117pub async fn run_curator_daemon_with_shutdown(
5118    db_path: PathBuf,
5119    cfg: crate::curator::CuratorConfig,
5120    shutdown: Arc<Notify>,
5121) -> Result<()> {
5122    let shutdown_flag = Arc::new(AtomicBool::new(false));
5123    let shutdown_flag_for_signal = shutdown_flag.clone();
5124    tokio::spawn(async move {
5125        shutdown.notified().await;
5126        shutdown_flag_for_signal.store(true, Ordering::Relaxed);
5127    });
5128
5129    let llm_arc: Option<Arc<crate::llm::OllamaClient>> = None;
5130    // Issue #816 — load the daemon signing keypair so the curator's
5131    // auto-persona sweep can produce signed persona rows. `None`
5132    // (no key on disk + auto-gen disabled) leaves the sweep no-op,
5133    // matching the pre-#816 behaviour.
5134    let (kp_opt, _outcome) = ensure_and_load_daemon_keypair();
5135    let active_keypair = kp_opt.map(Arc::new);
5136    let db_owned = db_path;
5137    tokio::task::spawn_blocking(move || {
5138        crate::curator::run_daemon(db_owned, llm_arc, cfg, shutdown_flag, active_keypair);
5139    })
5140    .await
5141    .map_err(|e| anyhow::anyhow!("curator daemon join: {e}"))?;
5142    Ok(())
5143}
5144
5145/// Curator-daemon loop body, primitive-arg flavour for the binary.
5146///
5147/// The caller supplies the already-resolved LLM client (built via
5148/// `build_curator_llm` so the `--daemon` path shares the identical
5149/// #1146-resolver result with the `--once` path — see #1440). `None`
5150/// disables the LLM, leaving keyword-only curation.
5151#[allow(clippy::too_many_arguments)]
5152pub async fn run_curator_daemon_with_primitives(
5153    db_path: PathBuf,
5154    interval_secs: u64,
5155    max_ops_per_cycle: usize,
5156    dry_run: bool,
5157    include_namespaces: Vec<String>,
5158    exclude_namespaces: Vec<String>,
5159    llm: Option<Arc<crate::llm::OllamaClient>>,
5160    shutdown: Arc<Notify>,
5161) -> Result<()> {
5162    let cfg = crate::curator::CuratorConfig {
5163        interval_secs,
5164        max_ops_per_cycle,
5165        dry_run,
5166        include_namespaces,
5167        exclude_namespaces,
5168        compaction: crate::curator::CompactionConfig::default(),
5169    };
5170
5171    let shutdown_flag = Arc::new(AtomicBool::new(false));
5172    let shutdown_flag_for_signal = shutdown_flag.clone();
5173    tokio::spawn(async move {
5174        shutdown.notified().await;
5175        shutdown_flag_for_signal.store(true, Ordering::Relaxed);
5176    });
5177
5178    // Issue #816 — load the daemon signing keypair for the auto-persona
5179    // sweep. Mirrors the load in `run_curator_daemon_with_shutdown`;
5180    // both daemon entry-points need the same keypair resolution so the
5181    // CLI (`ai-memory curator --daemon`) and the test-driven shutdown
5182    // flow both honour the same on-disk state.
5183    let (kp_opt, _outcome) = ensure_and_load_daemon_keypair();
5184    let active_keypair = kp_opt.map(Arc::new);
5185
5186    tokio::task::spawn_blocking(move || {
5187        crate::curator::run_daemon(db_path, llm, cfg, shutdown_flag, active_keypair);
5188    })
5189    .await
5190    .map_err(|e| anyhow::anyhow!("curator daemon join: {e}"))?;
5191    Ok(())
5192}
5193
5194// -----------------------------------------------------------------------
5195// helpers
5196// -----------------------------------------------------------------------
5197
5198/// Minimal URL-component encoder — only the characters the sync-daemon
5199/// queries actually emit (RFC3339 timestamps with `:` and `+`, and
5200/// agent ids with `:`/`@`/`/`). Mirror of the pre-W6
5201/// `main.rs::urlencoding_minimal`.
5202fn urlencoding_minimal(s: &str) -> String {
5203    use std::fmt::Write as _;
5204    let mut out = String::with_capacity(s.len());
5205    for b in s.bytes() {
5206        match b {
5207            b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => {
5208                out.push(b as char);
5209            }
5210            _ => {
5211                let _ = write!(out, "%{b:02X}");
5212            }
5213        }
5214    }
5215    out
5216}
5217
5218/// Mirrors the pre-W6 `main.rs::SyncSinceResponse` — the fields we
5219/// deserialize from the peer's `/api/v1/sync/since` body. `count` and
5220/// `limit` are present in the wire payload but unused on the receive
5221/// side; allowed to be dead so `clippy::pedantic` doesn't trip.
5222#[derive(serde::Deserialize)]
5223struct SyncSinceResponse {
5224    #[allow(dead_code)]
5225    count: usize,
5226    #[allow(dead_code)]
5227    limit: usize,
5228    memories: Vec<crate::models::Memory>,
5229}
5230
5231/// Re-export the `Instant`/`Duration` types so test crate use sites stay
5232/// terse.  Kept private — internal to this module.
5233#[allow(dead_code)]
5234fn _imports_in_use(_: Instant, _: Duration) {}
5235
5236// ===========================================================================
5237// Tests
5238// ===========================================================================
5239
5240#[cfg(test)]
5241#[allow(deprecated)] // DOC-6: tests intentionally exercise legacy AppConfig flat fields
5242mod tests {
5243    use super::*;
5244    use crate::cli::test_utils::TestEnv;
5245    use crate::config::ResolvedTtl;
5246    use axum::body::Body;
5247    use axum::http::{Request, StatusCode};
5248    use tower::ServiceExt as _;
5249
5250    /// #1579 A3 (SECURITY) — regression pin: the Postgres SAL boot
5251    /// path must log the REDACTED store URL. Pre-fix,
5252    /// `build_store_handle` interpolated the raw `--store-url`
5253    /// (password included) into the INFO boot line, shipping the
5254    /// credential to journald / any log sink. The INFO line fires
5255    /// before the connect attempt, so an unreachable port (`:1`)
5256    /// still exercises the log site; the connect error itself is
5257    /// expected and asserted as `Err`.
5258    #[cfg(feature = "sal-postgres")]
5259    #[tokio::test]
5260    async fn issue_1579_a3_boot_log_redacts_store_url_password() {
5261        use std::sync::{Arc, Mutex};
5262
5263        #[derive(Clone, Default)]
5264        struct SharedBuf(Arc<Mutex<Vec<u8>>>);
5265        impl std::io::Write for SharedBuf {
5266            fn write(&mut self, b: &[u8]) -> std::io::Result<usize> {
5267                self.0.lock().expect("buf lock").extend_from_slice(b);
5268                Ok(b.len())
5269            }
5270            fn flush(&mut self) -> std::io::Result<()> {
5271                Ok(())
5272            }
5273        }
5274
5275        let buf = SharedBuf::default();
5276        let writer_buf = buf.clone();
5277        let subscriber = tracing_subscriber::fmt()
5278            .with_max_level(tracing::Level::INFO)
5279            .with_ansi(false)
5280            .with_writer(move || writer_buf.clone())
5281            .finish();
5282        // Thread-local default — `#[tokio::test]` runs the future on
5283        // the current thread, so every log the boot path emits during
5284        // the await lands in `buf`.
5285        let _guard = tracing::subscriber::set_default(subscriber);
5286
5287        let secret = "sup3r-s3cret-pw";
5288        let url = format!("postgres://ai_memory:{secret}@127.0.0.1:1/ai_memory");
5289        let dir = tempfile::tempdir().expect("tempdir");
5290        let db_path = dir.path().join("unused.db");
5291        let res = build_store_handle(
5292            Some(&url),
5293            &db_path,
5294            None,
5295            Some(384),
5296            crate::store::PoolConfig::default(),
5297        )
5298        .await;
5299        assert!(res.is_err(), "port 1 must refuse the connection");
5300
5301        let logs = String::from_utf8_lossy(&buf.0.lock().expect("buf lock")).to_string();
5302        assert!(
5303            logs.contains("opening Postgres SAL store at postgres://ai_memory:****@127.0.0.1:1"),
5304            "boot line must log the redacted URL; got:\n{logs}"
5305        );
5306        assert!(
5307            !logs.contains(secret),
5308            "store-URL password leaked into the boot log:\n{logs}"
5309        );
5310    }
5311
5312    /// #1455 (SEC, MED) — when a governance hook's rule-consultation
5313    /// connection could not be opened at install time, the gate MUST
5314    /// fail CLOSED by default (return `Err`), and only degrade to ALLOW
5315    /// when the operator explicitly opts into the legacy permissive
5316    /// posture. The pre-#1455 behaviour silently degraded to ALLOW,
5317    /// disabling the entire substrate write-gate whenever `db::open`
5318    /// failed at boot.
5319    #[test]
5320    fn governance_consultation_unavailable_fails_closed_by_default_1455() {
5321        use crate::governance::agent_action::AgentAction;
5322        use crate::governance::deferred_audit::DeferredAuditQueue;
5323
5324        // Keep the receiver alive so the audit submit doesn't trip the
5325        // closed-receiver WARN path (cosmetic; not under test here).
5326        let (queue, _rx) = DeferredAuditQueue::new();
5327        let action = AgentAction::Custom {
5328            custom_kind: "memory_write".to_string(),
5329            payload: serde_json::json!({ "namespace": "ns", "tier": "long" }),
5330        };
5331        let path = Path::new("/nonexistent/rules.db");
5332
5333        // Secure default: no operator override ⇒ fail CLOSED.
5334        let closed = governance_consultation_unavailable_inner(
5335            &queue,
5336            "agent:test",
5337            &action,
5338            path,
5339            "test-surface",
5340            false,
5341        );
5342        let reason = closed.expect_err("missing consultation conn MUST fail CLOSED");
5343        assert!(
5344            reason.contains("consultation_unavailable"),
5345            "fail-closed reason must name the cause: {reason}"
5346        );
5347
5348        // Operator override ⇒ legacy permissive ALLOW.
5349        let opened = governance_consultation_unavailable_inner(
5350            &queue,
5351            "agent:test",
5352            &action,
5353            path,
5354            "test-surface",
5355            true,
5356        );
5357        assert!(
5358            opened.is_ok(),
5359            "fail_open override MUST degrade to ALLOW (legacy posture)"
5360        );
5361    }
5362
5363    /// #1455 — the env-reading wrapper honours the documented
5364    /// `AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR` truthy values and
5365    /// defaults to `false` (fail-closed) when unset.
5366    #[test]
5367    fn governance_fail_open_on_error_env_parse_1455() {
5368        // Unset → secure default.
5369        unsafe { std::env::remove_var("AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR") };
5370        assert!(!governance_fail_open_on_error());
5371        // Truthy forms → permissive.
5372        unsafe { std::env::set_var("AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR", "1") };
5373        assert!(governance_fail_open_on_error());
5374        unsafe { std::env::set_var("AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR", "TRUE") };
5375        assert!(governance_fail_open_on_error());
5376        // Falsy / junk → secure default.
5377        unsafe { std::env::set_var("AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR", "0") };
5378        assert!(!governance_fail_open_on_error());
5379        unsafe { std::env::remove_var("AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR") };
5380    }
5381
5382    // ---- #1458 (SEC, MED): api_key bind guard ------------------------------
5383
5384    /// With an api_key configured the guard permits any bind silently.
5385    #[test]
5386    fn api_key_bind_guard_present_binds_silently_1458() {
5387        assert_eq!(api_key_bind_guard(true, "0.0.0.0", false).unwrap(), None);
5388        assert_eq!(api_key_bind_guard(true, "127.0.0.1", true).unwrap(), None);
5389    }
5390
5391    /// Keyless loopback bind is permitted but MUST warn about the
5392    /// reverse-proxy/host-network re-exposure hazard.
5393    #[test]
5394    fn api_key_bind_guard_keyless_loopback_warns_1458() {
5395        for host in ["127.0.0.1", "::1", "localhost", "[::1]", "0:0:0:0:0:0:0:1"] {
5396            let warning = api_key_bind_guard(false, host, false)
5397                .unwrap()
5398                .unwrap_or_else(|| panic!("keyless loopback {host} must warn, not bind silently"));
5399            assert!(
5400                warning.contains("reverse proxy") && warning.contains("off-host"),
5401                "warning must name the proxy hazard for {host}: {warning}"
5402            );
5403        }
5404    }
5405
5406    /// Keyless non-loopback bind is refused outright.
5407    #[test]
5408    fn api_key_bind_guard_keyless_non_loopback_refuses_1458() {
5409        let err = api_key_bind_guard(false, "0.0.0.0", false)
5410            .expect_err("keyless non-loopback bind MUST be refused");
5411        assert!(err.contains("refusing to bind to non-loopback"), "{err}");
5412    }
5413
5414    /// The strict opt-in refuses a keyless start even on loopback,
5415    /// because the loopback host string cannot see a fronting proxy.
5416    #[test]
5417    fn api_key_bind_guard_strict_refuses_keyless_loopback_1458() {
5418        let err = api_key_bind_guard(false, "127.0.0.1", true)
5419            .expect_err("strict mode MUST refuse keyless loopback bind");
5420        assert!(
5421            err.contains("AI_MEMORY_REQUIRE_API_KEY"),
5422            "strict refusal must name the knob: {err}"
5423        );
5424        // Strict is moot when a key IS present.
5425        assert_eq!(api_key_bind_guard(true, "127.0.0.1", true).unwrap(), None);
5426    }
5427
5428    /// The strict-mode env parser honours truthy forms and defaults off.
5429    #[test]
5430    fn require_api_key_strict_env_parse_1458() {
5431        unsafe { std::env::remove_var("AI_MEMORY_REQUIRE_API_KEY") };
5432        assert!(!require_api_key_strict());
5433        unsafe { std::env::set_var("AI_MEMORY_REQUIRE_API_KEY", "1") };
5434        assert!(require_api_key_strict());
5435        unsafe { std::env::set_var("AI_MEMORY_REQUIRE_API_KEY", "TRUE") };
5436        assert!(require_api_key_strict());
5437        unsafe { std::env::set_var("AI_MEMORY_REQUIRE_API_KEY", "0") };
5438        assert!(!require_api_key_strict());
5439        unsafe { std::env::remove_var("AI_MEMORY_REQUIRE_API_KEY") };
5440    }
5441
5442    // ----- helpers -------------------------------------------------------
5443
5444    fn args_with_db(_db: &Path) -> ServeArgs {
5445        ServeArgs {
5446            host: "127.0.0.1".to_string(),
5447            port: 0,
5448            tls_cert: None,
5449            tls_key: None,
5450            mtls_allowlist: None,
5451            shutdown_grace_secs: 30,
5452            quorum_writes: 0,
5453            quorum_peers: vec![],
5454            quorum_timeout_ms: 2000,
5455            quorum_client_cert: None,
5456            quorum_client_key: None,
5457            quorum_ca_cert: None,
5458            catchup_interval_secs: 0,
5459            federation_identity: None,
5460            #[cfg(feature = "sal")]
5461            store_url: None,
5462        }
5463    }
5464
5465    fn keyword_app_state(db_path: &Path) -> AppState {
5466        let conn = db::open(db_path).unwrap();
5467        let db_state: Db = Arc::new(Mutex::new((
5468            conn,
5469            db_path.to_path_buf(),
5470            ResolvedTtl::default(),
5471            true,
5472        )));
5473        AppState {
5474            db: db_state,
5475            embedder: Arc::new(None),
5476            vector_index: Arc::new(Mutex::new(None)),
5477            federation: Arc::new(None),
5478            tier_config: Arc::new(FeatureTier::Keyword.config()),
5479            scoring: Arc::new(crate::config::ResolvedScoring::default()),
5480            profile: Arc::new(crate::profile::Profile::core()),
5481            mcp_config: Arc::new(None),
5482            active_keypair: Arc::new(None),
5483            family_embeddings: Arc::new(tokio::sync::RwLock::new(Some(Vec::new()))),
5484            storage_backend: crate::handlers::StorageBackend::Sqlite,
5485            #[cfg(feature = "sal")]
5486            store: {
5487                let s = crate::store::sqlite::SqliteStore::open(db_path)
5488                    .expect("open SqliteStore for keyword_app_state");
5489                Arc::new(s)
5490            },
5491            llm: Arc::new(None),
5492            auto_tag_model: Arc::new(None),
5493            llm_call_timeout: Duration::from_secs(crate::config::DEFAULT_LLM_CALL_TIMEOUT_SECS),
5494            replay_cache: Arc::new(crate::identity::replay::ReplayCache::new()),
5495            verify_require_nonce: false,
5496            federation_nonce_cache: Arc::new(crate::identity::replay::FederationNonceCache::new()),
5497            autonomous_hooks: false,
5498            recall_scope: Arc::new(None),
5499            deferred_audit_queue: Arc::new(None),
5500            admin_agent_ids: Arc::new(Vec::new()),
5501            // v0.7.0 #991 — fresh per-test cache. No invalidation
5502            // required: tests don't share this AppState across rule
5503            // writes (each test that mutates rules opens its own
5504            // `fresh_conn()`).
5505            rule_cache: Arc::new(crate::governance::rule_cache::RuleCache::new()),
5506            resolved_models: Arc::new(crate::config::ResolvedModels::default()),
5507            runtime: crate::runtime_context::RuntimeContext::global_arc(),
5508            max_page_size: crate::handlers::MAX_BULK_SIZE,
5509        }
5510    }
5511
5512    /// Mutex env-var guard. Tests that flip env vars must serialize to
5513    /// avoid clobbering each other; `cargo test --test-threads=2` is the
5514    /// upstream gate but a per-test mutex keeps the tests honest.
5515    fn env_var_lock() -> std::sync::MutexGuard<'static, ()> {
5516        use std::sync::OnceLock;
5517        static LOCK: OnceLock<std::sync::Mutex<()>> = OnceLock::new();
5518        LOCK.get_or_init(|| std::sync::Mutex::new(()))
5519            .lock()
5520            .unwrap_or_else(|e| e.into_inner())
5521    }
5522
5523    // ----- is_write_command ---------------------------------------------
5524
5525    #[test]
5526    fn test_is_write_command_all_variants() {
5527        // Use clap's parser to build every Command variant. This avoids
5528        // having to know each Args struct's required-field set by name —
5529        // we just feed the same argv form an operator would use, and
5530        // assert the predicate returns the right answer.
5531        //
5532        // Writes (post-run WAL checkpoint expected):
5533        let writes: &[&[&str]] = &[
5534            &["ai-memory", "store", "title", "content"],
5535            &["ai-memory", "update", "id123", "--title", "t"],
5536            &["ai-memory", "delete", "id123"],
5537            &["ai-memory", "promote", "id123"],
5538            &["ai-memory", "forget", "pattern"],
5539            &["ai-memory", "link", "a", "b"],
5540            &["ai-memory", "consolidate", "ids"],
5541            &["ai-memory", "resolve", "a", "b"],
5542            &["ai-memory", "sync", "--peer", "/tmp/peer.db"],
5543            &[
5544                "ai-memory",
5545                "sync-daemon",
5546                "--peers",
5547                "http://x",
5548                "--interval-secs",
5549                "60",
5550            ],
5551            &["ai-memory", "import"],
5552            &["ai-memory", "auto-consolidate"],
5553            &["ai-memory", "gc"],
5554        ];
5555        let mut writes_checked = 0;
5556        for argv in writes {
5557            // Skip a variant whose required-field set our argv doesn't
5558            // match (clap will reject it). We still get coverage from the
5559            // variants that parse cleanly, which is the bulk.
5560            if let Ok(cli) = Cli::try_parse_from(*argv) {
5561                assert!(
5562                    is_write_command(&cli.command),
5563                    "expected write for {argv:?}"
5564                );
5565                writes_checked += 1;
5566            }
5567        }
5568        assert!(
5569            writes_checked >= 5,
5570            "expected at least 5 write variants checked, got {writes_checked}"
5571        );
5572
5573        // Reads / no-ops (no checkpoint expected):
5574        let reads: &[&[&str]] = &[
5575            &["ai-memory", "mcp"],
5576            &["ai-memory", "recall", "context"],
5577            &["ai-memory", "search", "query"],
5578            &["ai-memory", "get", "id"],
5579            &["ai-memory", "list"],
5580            &["ai-memory", "stats"],
5581            &["ai-memory", "namespaces"],
5582            &["ai-memory", "export"],
5583            &["ai-memory", "shell"],
5584            &["ai-memory", "man"],
5585            &["ai-memory", "completions", "bash"],
5586            &["ai-memory", "archive", "list"],
5587            &["ai-memory", "agents", "list"],
5588            &["ai-memory", "pending", "list"],
5589            &["ai-memory", "bench"],
5590            &["ai-memory", "serve", "--host", "127.0.0.1", "--port", "0"],
5591        ];
5592        let mut reads_checked = 0;
5593        for argv in reads {
5594            if let Ok(cli) = Cli::try_parse_from(*argv) {
5595                assert!(
5596                    !is_write_command(&cli.command),
5597                    "expected read for {argv:?}"
5598                );
5599                reads_checked += 1;
5600            }
5601        }
5602        assert!(
5603            reads_checked >= 8,
5604            "expected at least 8 read variants checked, got {reads_checked}"
5605        );
5606
5607        // Direct construction of the Args-less variants (10 variants
5608        // covered programmatically by clap above; pin the no-Args ones
5609        // here too for explicitness):
5610        assert!(is_write_command(&Command::Gc));
5611        assert!(!is_write_command(&Command::Stats));
5612        assert!(!is_write_command(&Command::Namespaces));
5613        assert!(!is_write_command(&Command::Export));
5614        assert!(!is_write_command(&Command::Shell));
5615        assert!(!is_write_command(&Command::Man));
5616        assert!(!is_write_command(&Command::Mcp {
5617            tier: "keyword".to_string(),
5618            profile: None,
5619        }));
5620    }
5621
5622    // ----- build_router via lib::build_router ---------------------------
5623
5624    #[tokio::test]
5625    async fn test_router_has_health_endpoint() {
5626        let env = TestEnv::fresh();
5627        let app_state = keyword_app_state(&env.db_path);
5628        let api_key_state = ApiKeyState {
5629            key: None,
5630            mtls_enforced: false,
5631        };
5632        let router = build_router(app_state, api_key_state);
5633        let resp = router
5634            .oneshot(
5635                Request::builder()
5636                    .method("GET")
5637                    .uri("/api/v1/health")
5638                    .body(Body::empty())
5639                    .unwrap(),
5640            )
5641            .await
5642            .unwrap();
5643        assert_eq!(resp.status(), StatusCode::OK);
5644    }
5645
5646    #[tokio::test]
5647    async fn test_router_has_metrics_at_both_paths() {
5648        let env = TestEnv::fresh();
5649        let app_state = keyword_app_state(&env.db_path);
5650        let api_key_state = ApiKeyState {
5651            key: None,
5652            mtls_enforced: false,
5653        };
5654        // /metrics
5655        let r1 = build_router(app_state.clone(), api_key_state.clone())
5656            .oneshot(
5657                Request::builder()
5658                    .method("GET")
5659                    .uri("/metrics")
5660                    .body(Body::empty())
5661                    .unwrap(),
5662            )
5663            .await
5664            .unwrap();
5665        assert_eq!(r1.status(), StatusCode::OK);
5666        // /api/v1/metrics
5667        let r2 = build_router(app_state, api_key_state)
5668            .oneshot(
5669                Request::builder()
5670                    .method("GET")
5671                    .uri("/api/v1/metrics")
5672                    .body(Body::empty())
5673                    .unwrap(),
5674            )
5675            .await
5676            .unwrap();
5677        assert_eq!(r2.status(), StatusCode::OK);
5678    }
5679
5680    #[tokio::test]
5681    async fn test_router_lists_all_v1_memory_routes() {
5682        let env = TestEnv::fresh();
5683        let app_state = keyword_app_state(&env.db_path);
5684        let api_key_state = ApiKeyState {
5685            key: None,
5686            mtls_enforced: false,
5687        };
5688        let router = build_router(app_state, api_key_state);
5689        let resp = router
5690            .oneshot(
5691                Request::builder()
5692                    .method("GET")
5693                    .uri("/api/v1/memories")
5694                    .body(Body::empty())
5695                    .unwrap(),
5696            )
5697            .await
5698            .unwrap();
5699        // Empty DB returns 200 with an empty list — anything non-error
5700        // proves the route is wired in.
5701        assert!(resp.status().is_success(), "got {}", resp.status());
5702    }
5703
5704    #[tokio::test]
5705    async fn test_router_applies_api_key_middleware_when_key_set() {
5706        let env = TestEnv::fresh();
5707        let app_state = keyword_app_state(&env.db_path);
5708        let api_key_state = ApiKeyState {
5709            key: Some("s3cret".to_string()),
5710            mtls_enforced: false,
5711        };
5712        let router = build_router(app_state, api_key_state);
5713        let resp = router
5714            .oneshot(
5715                Request::builder()
5716                    .method("GET")
5717                    .uri("/api/v1/memories")
5718                    .body(Body::empty())
5719                    .unwrap(),
5720            )
5721            .await
5722            .unwrap();
5723        assert_eq!(resp.status(), StatusCode::UNAUTHORIZED);
5724    }
5725
5726    #[tokio::test]
5727    async fn test_router_skips_api_key_middleware_when_key_none() {
5728        let env = TestEnv::fresh();
5729        let app_state = keyword_app_state(&env.db_path);
5730        let api_key_state = ApiKeyState {
5731            key: None,
5732            mtls_enforced: false,
5733        };
5734        let router = build_router(app_state, api_key_state);
5735        let resp = router
5736            .oneshot(
5737                Request::builder()
5738                    .method("GET")
5739                    .uri("/api/v1/memories")
5740                    .body(Body::empty())
5741                    .unwrap(),
5742            )
5743            .await
5744            .unwrap();
5745        assert_eq!(resp.status(), StatusCode::OK);
5746    }
5747
5748    // ----- build_embedder ------------------------------------------------
5749
5750    #[tokio::test]
5751    async fn test_build_embedder_keyword_tier_returns_none() {
5752        let cfg = AppConfig::default();
5753        let emb = build_embedder(FeatureTier::Keyword, &cfg).await;
5754        assert!(emb.is_none());
5755    }
5756
5757    #[tokio::test]
5758    async fn test_build_embedder_load_failure_returns_none() {
5759        // Can't easily induce a load failure without network — skip here.
5760        // Keyword tier covers the None branch; the ERROR-level fallback
5761        // path requires a live HF-hub-style mock, which is out of scope
5762        // for a unit test. The semantic-tier success/failure path is
5763        // exercised under `feature = "test-with-models"` in the
5764        // recall integration tests.
5765        // This test stays as a smoke check — it doesn't attempt to load.
5766    }
5767
5768    /// Issue #840 coverage — exercise the `app_config.embedding_model`
5769    /// override branch in `build_embedder` (daemon_runtime.rs L1504-1523).
5770    /// The keyword tier has no tier-preset model, so when the override is
5771    /// unparseable the resolution ladder falls through to `None` without
5772    /// attempting an HF-hub fetch. This pins the parse-failure log path
5773    /// and the `None` fallback that the L2 comment documents.
5774    #[tokio::test]
5775    async fn test_build_embedder_invalid_override_falls_back_to_preset() {
5776        let mut cfg = AppConfig::default();
5777        cfg.embedding_model = Some("not-a-real-embedding-model-2026".to_string());
5778        // Keyword tier preset is None; override parse fails → falls back
5779        // to preset None → returns None without touching HF-hub.
5780        let emb = build_embedder(FeatureTier::Keyword, &cfg).await;
5781        assert!(
5782            emb.is_none(),
5783            "unparseable override + keyword tier must return None"
5784        );
5785    }
5786
5787    // ----- resolve_embedder_model (#1521 precedence) --------------------
5788
5789    /// #1521 — the sectioned `[embeddings].model` block must beat the
5790    /// tier preset. Semantic tier presets MiniLM; a section pinning nomic
5791    /// must win. This is the core regression the issue describes (the
5792    /// section was silently dropped in favour of the preset).
5793    #[test]
5794    fn resolve_embedder_model_section_beats_tier_preset() {
5795        let mut cfg = AppConfig::default();
5796        cfg.embeddings = Some(crate::config::EmbeddingsSection {
5797            model: Some("nomic_embed_v15".to_string()),
5798            ..crate::config::EmbeddingsSection::default()
5799        });
5800        let tier = FeatureTier::Semantic.config();
5801        assert_eq!(
5802            resolve_embedder_model(&tier, &cfg),
5803            Some(crate::config::EmbeddingModel::NomicEmbedV15),
5804            "[embeddings].model must override the Semantic tier MiniLM preset"
5805        );
5806    }
5807
5808    /// #1521 — the deprecated flat `embedding_model` field must still be
5809    /// honored when no section is present (backward compat).
5810    #[test]
5811    fn resolve_embedder_model_legacy_flat_still_honored() {
5812        let mut cfg = AppConfig::default();
5813        cfg.embedding_model = Some("nomic_embed_v15".to_string());
5814        let tier = FeatureTier::Semantic.config();
5815        assert_eq!(
5816            resolve_embedder_model(&tier, &cfg),
5817            Some(crate::config::EmbeddingModel::NomicEmbedV15),
5818            "legacy flat embedding_model must still override the preset"
5819        );
5820    }
5821
5822    /// #1521 — when BOTH are set the section wins over the legacy flat
5823    /// field (precedence ladder ordering).
5824    #[test]
5825    fn resolve_embedder_model_section_beats_legacy_flat() {
5826        let mut cfg = AppConfig::default();
5827        cfg.embedding_model = Some("nomic_embed_v15".to_string());
5828        cfg.embeddings = Some(crate::config::EmbeddingsSection {
5829            model: Some("mini_lm_l6_v2".to_string()),
5830            ..crate::config::EmbeddingsSection::default()
5831        });
5832        let tier = FeatureTier::Semantic.config();
5833        assert_eq!(
5834            resolve_embedder_model(&tier, &cfg),
5835            Some(crate::config::EmbeddingModel::MiniLmL6V2),
5836            "[embeddings].model must win over legacy flat embedding_model"
5837        );
5838    }
5839
5840    /// #1521 — a url-only section (no model key) must NOT force a model;
5841    /// the tier preset is kept. Guards against keying the model decision
5842    /// off `ResolvedEmbeddings.model` (which defaults to nomic whenever
5843    /// any `[embeddings]` key is present).
5844    #[test]
5845    fn resolve_embedder_model_url_only_section_keeps_preset() {
5846        let mut cfg = AppConfig::default();
5847        cfg.embeddings = Some(crate::config::EmbeddingsSection {
5848            url: Some("http://127.0.0.1:11435".to_string()),
5849            ..crate::config::EmbeddingsSection::default()
5850        });
5851        let tier = FeatureTier::Semantic.config();
5852        assert_eq!(
5853            resolve_embedder_model(&tier, &cfg),
5854            Some(crate::config::EmbeddingModel::MiniLmL6V2),
5855            "url-only section must keep the Semantic MiniLM preset"
5856        );
5857    }
5858
5859    /// #1521 — a configured model the 2-model daemon embedder cannot
5860    /// construct degrades to the tier preset rather than disabling.
5861    #[test]
5862    fn resolve_embedder_model_unsupported_id_falls_back_to_preset() {
5863        let mut cfg = AppConfig::default();
5864        cfg.embeddings = Some(crate::config::EmbeddingsSection {
5865            model: Some("bge-large-en".to_string()),
5866            ..crate::config::EmbeddingsSection::default()
5867        });
5868        let tier = FeatureTier::Semantic.config();
5869        assert_eq!(
5870            resolve_embedder_model(&tier, &cfg),
5871            Some(crate::config::EmbeddingModel::MiniLmL6V2),
5872            "unsupported model id must fall back to the tier preset"
5873        );
5874    }
5875
5876    /// #1521 — nothing configured at any layer: keyword tier (no preset)
5877    /// yields None; semantic tier yields its MiniLM preset.
5878    #[test]
5879    fn resolve_embedder_model_unconfigured_uses_tier_preset() {
5880        let cfg = AppConfig::default();
5881        assert_eq!(
5882            resolve_embedder_model(&FeatureTier::Keyword.config(), &cfg),
5883            None,
5884            "keyword tier has no preset → None"
5885        );
5886        assert_eq!(
5887            resolve_embedder_model(&FeatureTier::Semantic.config(), &cfg),
5888            Some(crate::config::EmbeddingModel::MiniLmL6V2),
5889            "semantic tier preset is MiniLM"
5890        );
5891    }
5892
5893    // ----- build_vector_index -------------------------------------------
5894
5895    #[test]
5896    fn test_build_vector_index_no_embedder_returns_none() {
5897        let env = TestEnv::fresh();
5898        let conn = db::open(&env.db_path).unwrap();
5899        assert!(build_vector_index(&conn, false).is_none());
5900    }
5901
5902    #[test]
5903    fn test_build_vector_index_empty_db_returns_empty_index() {
5904        let env = TestEnv::fresh();
5905        let conn = db::open(&env.db_path).unwrap();
5906        let idx = build_vector_index(&conn, true);
5907        assert!(
5908            idx.is_some(),
5909            "empty DB with embedder must yield empty index"
5910        );
5911        assert_eq!(idx.unwrap().len(), 0);
5912    }
5913
5914    // ----- spawn_gc_loop / spawn_wal_checkpoint_loop --------------------
5915
5916    #[tokio::test(start_paused = true)]
5917    async fn test_spawn_gc_loop_runs_and_can_be_aborted() {
5918        let env = TestEnv::fresh();
5919        let conn = db::open(&env.db_path).unwrap();
5920        let state: Db = Arc::new(Mutex::new((
5921            conn,
5922            env.db_path.clone(),
5923            ResolvedTtl::default(),
5924            true,
5925        )));
5926        let h = spawn_gc_loop(state, None, Duration::from_secs(60));
5927        // Advance past the first sleep — the loop should now have ticked at
5928        // least once (its sleep arm has resolved). We can't easily observe
5929        // a side effect on an empty DB, so just abort and confirm the
5930        // handle is well-behaved.
5931        tokio::time::advance(Duration::from_secs(61)).await;
5932        // Yield once so the background task can see the tick.
5933        tokio::task::yield_now().await;
5934        h.abort();
5935        // Joining an aborted handle returns `JoinError` with cancelled() == true.
5936        let err = h.await.unwrap_err();
5937        assert!(err.is_cancelled());
5938    }
5939
5940    #[tokio::test(start_paused = true)]
5941    async fn test_spawn_wal_checkpoint_loop_runs_and_can_be_aborted() {
5942        let env = TestEnv::fresh();
5943        let conn = db::open(&env.db_path).unwrap();
5944        let state: Db = Arc::new(Mutex::new((
5945            conn,
5946            env.db_path.clone(),
5947            ResolvedTtl::default(),
5948            true,
5949        )));
5950        let h = spawn_wal_checkpoint_loop(state, Duration::from_secs(60));
5951        // First sleep is interval/2 = 30s. Advance past that + one full
5952        // interval to ensure at least one checkpoint cycle ran.
5953        tokio::time::advance(Duration::from_secs(31)).await;
5954        tokio::task::yield_now().await;
5955        tokio::time::advance(Duration::from_secs(60)).await;
5956        tokio::task::yield_now().await;
5957        h.abort();
5958        let err = h.await.unwrap_err();
5959        assert!(err.is_cancelled());
5960    }
5961
5962    // v0.7.0 K2 — pending_actions timeout sweeper integration test.
5963    //
5964    // Pre-seed a stale `pending_actions` row, spawn the sweep loop with
5965    // a very short interval, await long enough for at least one tick to
5966    // run on the real runtime, and assert the row was transitioned to
5967    // `status='expired'`. This is the daemon-side end-to-end check that
5968    // complements the per-function unit tests in `db::tests`. We use a
5969    // real (non-paused) runtime here because the SQL sweep query
5970    // (`julianday('now')`) consults the OS wall clock, not tokio's
5971    // virtual time — a `start_paused=true` test never observes ticks
5972    // against a back-dated row.
5973    #[tokio::test]
5974    async fn test_spawn_pending_timeout_sweep_loop_marks_stale_expired() {
5975        let env = TestEnv::fresh();
5976        let conn = db::open(&env.db_path).unwrap();
5977        // Seed a 2-hour-old pending row.
5978        let two_h_ago = (chrono::Utc::now() - chrono::Duration::hours(2)).to_rfc3339();
5979        conn.execute(
5980            "INSERT INTO pending_actions
5981             (id, action_type, namespace, payload, requested_by, requested_at,
5982              status)
5983             VALUES ('sweeper-1', 'store', 'ns/a', '{}', 'tester', ?1, 'pending')",
5984            rusqlite::params![two_h_ago],
5985        )
5986        .unwrap();
5987        let state: Db = Arc::new(Mutex::new((
5988            conn,
5989            env.db_path.clone(),
5990            ResolvedTtl::default(),
5991            true,
5992        )));
5993        // 1-hour global default; the seeded 2h-old row is stale.
5994        // Tick every 50ms so the test wraps in well under a second.
5995        let h = spawn_pending_timeout_sweep_loop(
5996            state.clone(),
5997            env.db_path.clone(),
5998            crate::SECS_PER_HOUR,
5999            Duration::from_millis(50),
6000        );
6001        // Poll the row up to 2s; succeed as soon as the sweep flips it.
6002        let mut flipped = false;
6003        for _ in 0..40 {
6004            tokio::time::sleep(Duration::from_millis(50)).await;
6005            let lock = state.lock().await;
6006            let status: String = lock
6007                .0
6008                .query_row(
6009                    "SELECT status FROM pending_actions WHERE id = 'sweeper-1'",
6010                    [],
6011                    |r| r.get(0),
6012                )
6013                .unwrap();
6014            if status == "expired" {
6015                flipped = true;
6016                break;
6017            }
6018        }
6019        h.abort();
6020        let _ = h.await;
6021        assert!(
6022            flipped,
6023            "sweeper must transition the stale row to 'expired' within 2s"
6024        );
6025    }
6026
6027    // ----- passphrase_from_file -----------------------------------------
6028
6029    /// v0.7.0 #1055 helper — write a passphrase file with mode 0400
6030    /// so the post-#1055 permission check accepts it. Tests calling
6031    /// the unhardened `std::fs::write` would inherit the OS default
6032    /// umask (typically 0644 on macOS, group/world-readable) which
6033    /// the production gate now rejects.
6034    #[cfg(unix)]
6035    fn write_passphrase_strict(path: &std::path::Path, body: &str) {
6036        use std::os::unix::fs::PermissionsExt;
6037        std::fs::write(path, body).unwrap();
6038        std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o400)).unwrap();
6039    }
6040    #[cfg(not(unix))]
6041    fn write_passphrase_strict(path: &std::path::Path, body: &str) {
6042        std::fs::write(path, body).unwrap();
6043    }
6044
6045    #[test]
6046    fn test_passphrase_strips_trailing_newline() {
6047        let dir = tempfile::tempdir().unwrap();
6048        let p = dir.path().join("pass");
6049        write_passphrase_strict(&p, "secret\n");
6050        assert_eq!(passphrase_from_file(&p).unwrap(), "secret");
6051    }
6052
6053    #[test]
6054    fn test_passphrase_strips_trailing_crlf() {
6055        let dir = tempfile::tempdir().unwrap();
6056        let p = dir.path().join("pass");
6057        write_passphrase_strict(&p, "secret\r\n");
6058        assert_eq!(passphrase_from_file(&p).unwrap(), "secret");
6059    }
6060
6061    #[test]
6062    fn test_passphrase_empty_file_errors() {
6063        let dir = tempfile::tempdir().unwrap();
6064        let p = dir.path().join("empty");
6065        write_passphrase_strict(&p, "");
6066        let err = passphrase_from_file(&p).unwrap_err();
6067        assert!(
6068            err.to_string().contains("empty"),
6069            "expected 'empty' error, got: {err}"
6070        );
6071    }
6072
6073    #[test]
6074    fn test_passphrase_empty_after_trim_errors() {
6075        // File contains only whitespace lines — after trim_end_matches
6076        // it remains "  \t" (internal whitespace preserved). Only "\n"
6077        // / "\r" alone would trigger the empty-after-strip case.
6078        let dir = tempfile::tempdir().unwrap();
6079        let p = dir.path().join("nl-only");
6080        write_passphrase_strict(&p, "\n");
6081        let err = passphrase_from_file(&p).unwrap_err();
6082        assert!(err.to_string().contains("empty"));
6083    }
6084
6085    #[test]
6086    fn test_passphrase_nonexistent_file_errors() {
6087        let dir = tempfile::tempdir().unwrap();
6088        let p = dir.path().join("does-not-exist");
6089        let err = passphrase_from_file(&p).unwrap_err();
6090        assert!(
6091            err.to_string().contains("reading passphrase file")
6092                || err.to_string().contains("stat passphrase file")
6093                || err.chain().any(|e| e.to_string().contains("No such file"))
6094                || err.chain().any(|e| e.to_string().contains("cannot find")),
6095            "got: {err:#}"
6096        );
6097    }
6098
6099    #[test]
6100    fn test_passphrase_preserves_internal_whitespace() {
6101        let dir = tempfile::tempdir().unwrap();
6102        let p = dir.path().join("pass");
6103        write_passphrase_strict(&p, "my pass phrase\n");
6104        assert_eq!(passphrase_from_file(&p).unwrap(), "my pass phrase");
6105    }
6106
6107    #[cfg(unix)]
6108    #[test]
6109    fn test_passphrase_rejects_lax_permissions_1055() {
6110        // v0.7.0 #1055 — file with mode 0644 (group/world readable)
6111        // is rejected by the permission gate. Pre-#1055 the function
6112        // accepted any readable file regardless of mode.
6113        //
6114        // Serialise on the shared `env_var_lock` so the sibling
6115        // `test_passphrase_lax_perms_env_overrides_1055` test can't
6116        // race the `AI_MEMORY_PASSPHRASE_FILE_ALLOW_LAX_PERMS` env
6117        // var into a state that bypasses the rejection.
6118        use std::os::unix::fs::PermissionsExt;
6119        let _g = env_var_lock();
6120        // SAFETY: serialised via env_var_lock; clear any stale state
6121        // from a sibling test that exited mid-test.
6122        unsafe { std::env::remove_var("AI_MEMORY_PASSPHRASE_FILE_ALLOW_LAX_PERMS") };
6123        let dir = tempfile::tempdir().unwrap();
6124        let p = dir.path().join("lax");
6125        std::fs::write(&p, "secret\n").unwrap();
6126        std::fs::set_permissions(&p, std::fs::Permissions::from_mode(0o644)).unwrap();
6127        let err = passphrase_from_file(&p).unwrap_err();
6128        let msg = err.to_string();
6129        assert!(
6130            msg.contains("lax permissions") && msg.contains("0400"),
6131            "#1055: expected lax-permission rejection with chmod 0400 hint; got: {msg}"
6132        );
6133        assert!(
6134            msg.contains("AI_MEMORY_PASSPHRASE_FILE_ALLOW_LAX_PERMS"),
6135            "#1055: failure message MUST reference the env-var escape hatch; got: {msg}"
6136        );
6137    }
6138
6139    #[cfg(unix)]
6140    #[test]
6141    fn test_passphrase_lax_perms_env_overrides_1055() {
6142        // v0.7.0 #1055 — operators can opt back into the legacy
6143        // permissive posture via
6144        // `AI_MEMORY_PASSPHRASE_FILE_ALLOW_LAX_PERMS=1`.
6145        use std::os::unix::fs::PermissionsExt;
6146        let _g = env_var_lock();
6147        let dir = tempfile::tempdir().unwrap();
6148        let p = dir.path().join("lax-with-env");
6149        std::fs::write(&p, "secret\n").unwrap();
6150        std::fs::set_permissions(&p, std::fs::Permissions::from_mode(0o644)).unwrap();
6151        // SAFETY: serialised via env_var_lock; the lock guard's
6152        // lifetime brackets the set + remove pair so no sibling
6153        // test observes the intermediate state.
6154        unsafe {
6155            std::env::set_var("AI_MEMORY_PASSPHRASE_FILE_ALLOW_LAX_PERMS", "1");
6156        }
6157        let result = passphrase_from_file(&p);
6158        unsafe {
6159            std::env::remove_var("AI_MEMORY_PASSPHRASE_FILE_ALLOW_LAX_PERMS");
6160        }
6161        assert_eq!(
6162            result.unwrap(),
6163            "secret",
6164            "#1055: env-var escape hatch MUST restore legacy permissive posture"
6165        );
6166    }
6167
6168    // ----- apply_anonymize_default --------------------------------------
6169
6170    #[test]
6171    fn test_anonymize_set_when_config_true_and_env_unset() {
6172        let _g = env_var_lock();
6173        // SAFETY: serialized via env_var_lock.
6174        unsafe { std::env::remove_var("AI_MEMORY_ANONYMIZE") };
6175        let mut cfg = AppConfig::default();
6176        cfg.identity = Some(crate::config::IdentityConfig {
6177            anonymize_default: true,
6178        });
6179        apply_anonymize_default(&cfg);
6180        assert_eq!(std::env::var("AI_MEMORY_ANONYMIZE").unwrap(), "1");
6181        // SAFETY: serialized via env_var_lock.
6182        unsafe { std::env::remove_var("AI_MEMORY_ANONYMIZE") };
6183    }
6184
6185    #[test]
6186    fn test_anonymize_unchanged_when_env_already_set() {
6187        let _g = env_var_lock();
6188        // SAFETY: serialized via env_var_lock.
6189        unsafe { std::env::set_var("AI_MEMORY_ANONYMIZE", "0") };
6190        let mut cfg = AppConfig::default();
6191        cfg.identity = Some(crate::config::IdentityConfig {
6192            anonymize_default: true,
6193        });
6194        apply_anonymize_default(&cfg);
6195        // Env var is left alone — caller-set value wins.
6196        assert_eq!(std::env::var("AI_MEMORY_ANONYMIZE").unwrap(), "0");
6197        // SAFETY: serialized via env_var_lock.
6198        unsafe { std::env::remove_var("AI_MEMORY_ANONYMIZE") };
6199    }
6200
6201    #[test]
6202    fn test_anonymize_unchanged_when_config_false() {
6203        let _g = env_var_lock();
6204        // SAFETY: serialized via env_var_lock.
6205        unsafe { std::env::remove_var("AI_MEMORY_ANONYMIZE") };
6206        let cfg = AppConfig::default();
6207        // Default config is false / None for identity.anonymize_default.
6208        apply_anonymize_default(&cfg);
6209        assert!(std::env::var("AI_MEMORY_ANONYMIZE").is_err());
6210    }
6211
6212    // ----- bootstrap_serve ----------------------------------------------
6213
6214    #[tokio::test]
6215    async fn test_bootstrap_serve_keyword_tier_no_embedder() {
6216        let env = TestEnv::fresh();
6217        let mut cfg = AppConfig::default();
6218        cfg.tier = Some("keyword".to_string());
6219        let args = args_with_db(&env.db_path);
6220        let bs = bootstrap_serve(&env.db_path, &args, &cfg).await.unwrap();
6221        // Keyword tier => no embedder, no vector index.
6222        assert!(bs.app_state.embedder.is_none());
6223        let vi = bs.app_state.vector_index.lock().await;
6224        assert!(vi.is_none());
6225        // Six task handles spawned (v0.7 policy-engine item 3 added
6226        // the deferred-audit supervisor + gc + wal_checkpoint +
6227        // v0.7 K2 pending_actions timeout sweep + v0.7 I3 transcript
6228        // archive→prune lifecycle sweep + v0.7 K8 agent_quotas
6229        // daily-counter reset sweep + #1690 offloaded_blobs TTL sweep).
6230        // v0.7 B3-fix2 gates the family-descriptor embedding precompute
6231        // behind `AI_MEMORY_PRECOMPUTE_FAMILY_EMBEDDINGS=1` (default OFF)
6232        // so it does not contend with HTTP request-path embeds under
6233        // parallel CI load — see the gate site in `bootstrap_serve`
6234        // for the rationale. The task count reverts to seven when the
6235        // env var is unset.
6236        assert_eq!(bs.task_handles.len(), 7);
6237        // Cleanly abort the spawned tasks so they don't leak across tests.
6238        for h in bs.task_handles {
6239            h.abort();
6240        }
6241    }
6242
6243    #[tokio::test]
6244    async fn test_bootstrap_serve_with_api_key_logs_enabled() {
6245        let env = TestEnv::fresh();
6246        let mut cfg = AppConfig::default();
6247        cfg.tier = Some("keyword".to_string());
6248        cfg.api_key = Some("test-key".to_string());
6249        let args = args_with_db(&env.db_path);
6250        let bs = bootstrap_serve(&env.db_path, &args, &cfg).await.unwrap();
6251        assert_eq!(bs.api_key_state.key.as_deref(), Some("test-key"));
6252        for h in bs.task_handles {
6253            h.abort();
6254        }
6255    }
6256
6257    #[tokio::test]
6258    async fn test_bootstrap_serve_federation_disabled_when_quorum_zero() {
6259        let env = TestEnv::fresh();
6260        let mut cfg = AppConfig::default();
6261        cfg.tier = Some("keyword".to_string());
6262        let args = args_with_db(&env.db_path);
6263        let bs = bootstrap_serve(&env.db_path, &args, &cfg).await.unwrap();
6264        assert!(bs.app_state.federation.is_none());
6265        for h in bs.task_handles {
6266            h.abort();
6267        }
6268    }
6269
6270    // ----- W12-F: deeper coverage --------------------------------------
6271    //
6272    // Targets the gaps left after W6 + W7 + D6: `bootstrap_serve` variants
6273    // that require a populated DB or federation, the `run` dispatch arms
6274    // not yet exercised, `cmd_bench` end-to-end with a tiny workload,
6275    // `cmd_migrate` (sal feature), `urlencoding_minimal` direct test,
6276    // and the gc / wal-checkpoint loop bodies executing through one
6277    // tick with a measurable side effect.
6278
6279    // ----- bootstrap_serve federation enabled ---------------------------
6280
6281    #[tokio::test]
6282    async fn test_bootstrap_serve_federation_enabled_attaches_config() {
6283        // quorum_writes=1 + one peer → FederationConfig::build returns
6284        // Some, so app_state.federation is wired in. Catchup loop is
6285        // disabled (catchup_interval_secs=0) — the spawn-catchup branch
6286        // is exercised by federation tests; we only verify wiring here.
6287        let env = TestEnv::fresh();
6288        let mut cfg = AppConfig::default();
6289        cfg.tier = Some("keyword".to_string());
6290        let mut args = args_with_db(&env.db_path);
6291        args.quorum_writes = 1;
6292        args.quorum_peers = vec!["http://127.0.0.1:65530".to_string()];
6293        args.quorum_timeout_ms = 100;
6294        args.catchup_interval_secs = 0;
6295        let bs = bootstrap_serve(&env.db_path, &args, &cfg).await.unwrap();
6296        assert!(bs.app_state.federation.is_some());
6297        for h in bs.task_handles {
6298            h.abort();
6299        }
6300    }
6301
6302    #[tokio::test]
6303    async fn test_bootstrap_serve_federation_enabled_with_catchup_loop() {
6304        // catchup_interval_secs > 0 → spawn_catchup_loop is invoked.
6305        // We can't directly observe the catchup loop's internal handle
6306        // (federation::spawn_catchup_loop returns a JoinHandle owned
6307        // privately by the federation module), but the side branch
6308        // "catchup loop enabled" runs and the bootstrap completes.
6309        let env = TestEnv::fresh();
6310        let mut cfg = AppConfig::default();
6311        cfg.tier = Some("keyword".to_string());
6312        let mut args = args_with_db(&env.db_path);
6313        args.quorum_writes = 1;
6314        args.quorum_peers = vec!["http://127.0.0.1:65531".to_string()];
6315        args.quorum_timeout_ms = 100;
6316        args.catchup_interval_secs = crate::SECS_PER_HOUR as u64; // long enough not to fire
6317        let bs = bootstrap_serve(&env.db_path, &args, &cfg).await.unwrap();
6318        assert!(bs.app_state.federation.is_some());
6319        for h in bs.task_handles {
6320            h.abort();
6321        }
6322    }
6323
6324    #[tokio::test]
6325    async fn test_bootstrap_serve_federation_invalid_peer_errors() {
6326        // FederationConfig::build returns Err on duplicate peer URLs
6327        // (#341). The bootstrap_serve `.context("federation config")`
6328        // wrap turns it into a daemon-startup error.
6329        let env = TestEnv::fresh();
6330        let mut cfg = AppConfig::default();
6331        cfg.tier = Some("keyword".to_string());
6332        let mut args = args_with_db(&env.db_path);
6333        args.quorum_writes = 1;
6334        args.quorum_peers = vec![
6335            "http://127.0.0.1:65532".to_string(),
6336            "http://127.0.0.1:65532/".to_string(), // duplicate after trim
6337        ];
6338        let res = bootstrap_serve(&env.db_path, &args, &cfg).await;
6339        let err = match res {
6340            Ok(_) => panic!("expected error from duplicate peer URLs"),
6341            Err(e) => e,
6342        };
6343        let s = format!("{err:#}");
6344        assert!(
6345            s.contains("federation") || s.contains("duplicate"),
6346            "got: {s}"
6347        );
6348    }
6349
6350    // ----- build_vector_index populated DB ------------------------------
6351
6352    #[test]
6353    fn test_build_vector_index_populated_db_returns_built_index() {
6354        // When the DB has stored embeddings AND the embedder is present,
6355        // `build_vector_index` should return Some(VectorIndex) populated
6356        // with those embeddings rather than an empty one.
6357        let env = TestEnv::fresh();
6358        let conn = db::open(&env.db_path).unwrap();
6359        // Insert one memory + an embedding via the public db helpers.
6360        let now = chrono::Utc::now().to_rfc3339();
6361        let mem = crate::models::Memory {
6362            id: uuid::Uuid::new_v4().to_string(),
6363            tier: crate::models::Tier::Mid,
6364            namespace: "ns".to_string(),
6365            title: "t".to_string(),
6366            content: "c".to_string(),
6367            tags: vec![],
6368            priority: 5,
6369            confidence: 1.0,
6370            source: "test".to_string(),
6371            access_count: 0,
6372            created_at: now.clone(),
6373            updated_at: now,
6374            last_accessed_at: None,
6375            expires_at: None,
6376            metadata: crate::models::default_metadata(),
6377            reflection_depth: 0,
6378            memory_kind: crate::models::MemoryKind::Observation,
6379            entity_id: None,
6380            persona_version: None,
6381            citations: Vec::new(),
6382            source_uri: None,
6383            source_span: None,
6384            confidence_source: crate::models::ConfidenceSource::CallerProvided,
6385            confidence_signals: None,
6386            confidence_decayed_at: None,
6387            version: 1,
6388        };
6389        let id = db::insert(&conn, &mem).unwrap();
6390        db::set_embedding(&conn, &id, &[1.0, 0.0, 0.0]).unwrap();
6391        let idx = build_vector_index(&conn, true).expect("populated index");
6392        assert!(
6393            idx.len() >= 1,
6394            "expected non-empty index, got len={}",
6395            idx.len()
6396        );
6397    }
6398
6399    // ----- #1579 B3: async boot HNSW loader ------------------------------
6400
6401    /// Boot-readiness contract: `spawn_vector_index_boot_load` returns
6402    /// immediately (the daemon can serve requests with the EMPTY
6403    /// index), the outer mutex stays responsive throughout the warm-up,
6404    /// and after the loader finishes the index covers every stored
6405    /// embedding and reports fully-searchable.
6406    #[tokio::test]
6407    async fn b3_1579_boot_loader_warms_index_off_the_startup_path() {
6408        let env = TestEnv::fresh();
6409        let conn = db::open(&env.db_path).unwrap();
6410        let now = chrono::Utc::now().to_rfc3339();
6411        let mut expected_ids = Vec::new();
6412        for i in 0..3 {
6413            let mem = crate::models::Memory {
6414                id: uuid::Uuid::new_v4().to_string(),
6415                tier: crate::models::Tier::Long,
6416                namespace: "ns-b3".to_string(),
6417                title: format!("warm-{i}"),
6418                content: format!("warm body {i}"),
6419                tags: vec![],
6420                priority: 5,
6421                confidence: 1.0,
6422                source: "test".to_string(),
6423                access_count: 0,
6424                created_at: now.clone(),
6425                updated_at: now.clone(),
6426                last_accessed_at: None,
6427                expires_at: None,
6428                metadata: crate::models::default_metadata(),
6429                reflection_depth: 0,
6430                memory_kind: crate::models::MemoryKind::Observation,
6431                entity_id: None,
6432                persona_version: None,
6433                citations: Vec::new(),
6434                source_uri: None,
6435                source_span: None,
6436                confidence_source: crate::models::ConfidenceSource::CallerProvided,
6437                confidence_signals: None,
6438                confidence_decayed_at: None,
6439                version: 1,
6440            };
6441            let id = db::insert(&conn, &mem).unwrap();
6442            let mut v = [0.0_f32; 3];
6443            v[i] = 1.0;
6444            db::set_embedding(&conn, &id, &v).unwrap();
6445            expected_ids.push(id);
6446        }
6447        drop(conn);
6448
6449        // The daemon-shaped state: empty index behind the AppState
6450        // mutex — exactly what `serve` now constructs before binding.
6451        let state: Arc<Mutex<Option<VectorIndex>>> =
6452            Arc::new(Mutex::new(Some(hnsw::VectorIndex::empty())));
6453        let handle = spawn_vector_index_boot_load(env.db_path.clone(), Arc::clone(&state));
6454
6455        // Readiness: the state is immediately lockable (no long-held
6456        // guard) — a request-path access during warm-up must not
6457        // deadlock or block on the graph build.
6458        {
6459            let guard = state.lock().await;
6460            assert!(
6461                guard.is_some(),
6462                "index present (possibly cold) during warm-up"
6463            );
6464        }
6465
6466        tokio::task::spawn_blocking(move || handle.join().expect("loader thread"))
6467            .await
6468            .expect("join task");
6469
6470        let guard = state.lock().await;
6471        let idx = guard.as_ref().expect("index");
6472        assert_eq!(idx.len(), 3, "every stored embedding seeded");
6473        assert!(
6474            idx.is_fully_searchable(),
6475            "loader must drive the #968 rebuild to a swapped-in graph"
6476        );
6477        let hits = idx.search(&[1.0, 0.0, 0.0], 1);
6478        assert_eq!(
6479            hits.first().map(|h| h.id.as_str()),
6480            Some(expected_ids[0].as_str()),
6481            "warmed index serves the seeded rows"
6482        );
6483    }
6484
6485    // ----- gc loop with non-empty side effect ---------------------------
6486    //
6487    // The existing `test_spawn_gc_loop_runs_and_can_be_aborted` only
6488    // covers the empty-DB path where db::gc returns 0. Seeding an expired
6489    // memory and pointing the gc loop at it lets the `Ok(n) if n > 0`
6490    // arm fire.
6491
6492    #[tokio::test(start_paused = true)]
6493    async fn test_spawn_gc_loop_purges_expired_memories() {
6494        let env = TestEnv::fresh();
6495        let conn = db::open(&env.db_path).unwrap();
6496        // Insert an expired memory (expires_at in the past).
6497        let past = (chrono::Utc::now() - chrono::Duration::days(1)).to_rfc3339();
6498        let now = chrono::Utc::now().to_rfc3339();
6499        let mem = crate::models::Memory {
6500            id: uuid::Uuid::new_v4().to_string(),
6501            tier: crate::models::Tier::Short,
6502            namespace: "ns-gc".to_string(),
6503            title: "stale".to_string(),
6504            content: "stale".to_string(),
6505            tags: vec![],
6506            priority: 1,
6507            confidence: 1.0,
6508            source: "test".to_string(),
6509            access_count: 0,
6510            created_at: now.clone(),
6511            updated_at: now,
6512            last_accessed_at: None,
6513            expires_at: Some(past),
6514            metadata: crate::models::default_metadata(),
6515            reflection_depth: 0,
6516            memory_kind: crate::models::MemoryKind::Observation,
6517            entity_id: None,
6518            persona_version: None,
6519            citations: Vec::new(),
6520            source_uri: None,
6521            source_span: None,
6522            confidence_source: crate::models::ConfidenceSource::CallerProvided,
6523            confidence_signals: None,
6524            confidence_decayed_at: None,
6525            version: 1,
6526        };
6527        db::insert(&conn, &mem).unwrap();
6528        drop(conn);
6529
6530        let conn = db::open(&env.db_path).unwrap();
6531        let state: Db = Arc::new(Mutex::new((
6532            conn,
6533            env.db_path.clone(),
6534            ResolvedTtl::default(),
6535            true,
6536        )));
6537        // archive_max_days=Some(1) lets the auto_purge_archive arm
6538        // execute too (covers the second match in the loop body).
6539        let h = spawn_gc_loop(state.clone(), Some(1), Duration::from_secs(60));
6540        // Advance past two full intervals to give both branches multiple
6541        // chances to log under paused time.
6542        tokio::time::advance(Duration::from_secs(61)).await;
6543        tokio::task::yield_now().await;
6544        tokio::time::advance(Duration::from_secs(61)).await;
6545        tokio::task::yield_now().await;
6546        h.abort();
6547        let _ = h.await;
6548    }
6549
6550    // ----- WAL checkpoint loop with measurable cycle --------------------
6551
6552    #[tokio::test(start_paused = true)]
6553    async fn test_spawn_wal_checkpoint_loop_runs_multiple_cycles() {
6554        let env = TestEnv::fresh();
6555        let conn = db::open(&env.db_path).unwrap();
6556        let state: Db = Arc::new(Mutex::new((
6557            conn,
6558            env.db_path.clone(),
6559            ResolvedTtl::default(),
6560            true,
6561        )));
6562        let h = spawn_wal_checkpoint_loop(state, Duration::from_secs(2));
6563        // First sleep is 1s (interval/2), then 2s per cycle. Advance
6564        // past three cycles.
6565        for _ in 0..4 {
6566            tokio::time::advance(Duration::from_secs(2)).await;
6567            tokio::task::yield_now().await;
6568        }
6569        h.abort();
6570        let _ = h.await;
6571    }
6572
6573    // ----- urlencoding_minimal -----------------------------------------
6574
6575    #[test]
6576    fn test_urlencoding_minimal_round_trip() {
6577        // Unreserved characters pass through unchanged.
6578        assert_eq!(urlencoding_minimal("abcXYZ-_.~"), "abcXYZ-_.~");
6579        assert_eq!(urlencoding_minimal("0123456789"), "0123456789");
6580        // Reserved / unsafe characters are percent-encoded.
6581        assert_eq!(urlencoding_minimal("a:b"), "a%3Ab");
6582        assert_eq!(urlencoding_minimal("a/b"), "a%2Fb");
6583        assert_eq!(urlencoding_minimal("a@b"), "a%40b");
6584        assert_eq!(urlencoding_minimal("a+b"), "a%2Bb");
6585        assert_eq!(urlencoding_minimal(" "), "%20");
6586        // Empty string is empty.
6587        assert_eq!(urlencoding_minimal(""), "");
6588        // RFC3339 timestamp shape (sync-daemon real input).
6589        assert_eq!(
6590            urlencoding_minimal("2024-01-02T03:04:05+00:00"),
6591            "2024-01-02T03%3A04%3A05%2B00%3A00"
6592        );
6593    }
6594
6595    // ----- run() dispatch for read-only commands ------------------------
6596    //
6597    // Each test parses a CLI argv via clap, hands the resulting `Cli`
6598    // to `daemon_runtime::run`, and asserts the dispatch path returned
6599    // Ok. We don't assert on stdout because run() writes to the
6600    // process stdout directly — what we care about for coverage is
6601    // that the match arm executed and the inner cli handler returned.
6602
6603    fn no_config_env() -> std::sync::MutexGuard<'static, ()> {
6604        // run() reads `AI_MEMORY_NO_CONFIG` indirectly via the AppConfig
6605        // we pass. We don't rely on the env directly here, but holding
6606        // env_var_lock keeps run() tests serialized so they don't race
6607        // on stdout / global subscribers.
6608        env_var_lock()
6609    }
6610
6611    #[tokio::test]
6612    async fn test_run_dispatch_stats_command() {
6613        let _g = no_config_env();
6614        let env = TestEnv::fresh();
6615        let cfg = AppConfig::default();
6616        let cli =
6617            Cli::try_parse_from(["ai-memory", "--db", env.db_path.to_str().unwrap(), "stats"])
6618                .unwrap();
6619        run(cli, &cfg).await.unwrap();
6620    }
6621
6622    #[tokio::test]
6623    async fn test_run_dispatch_namespaces_command() {
6624        let _g = no_config_env();
6625        let env = TestEnv::fresh();
6626        let cfg = AppConfig::default();
6627        let cli = Cli::try_parse_from([
6628            "ai-memory",
6629            "--db",
6630            env.db_path.to_str().unwrap(),
6631            "namespaces",
6632        ])
6633        .unwrap();
6634        run(cli, &cfg).await.unwrap();
6635    }
6636
6637    #[tokio::test]
6638    async fn test_run_dispatch_export_command() {
6639        let _g = no_config_env();
6640        let env = TestEnv::fresh();
6641        let cfg = AppConfig::default();
6642        let cli =
6643            Cli::try_parse_from(["ai-memory", "--db", env.db_path.to_str().unwrap(), "export"])
6644                .unwrap();
6645        run(cli, &cfg).await.unwrap();
6646    }
6647
6648    #[tokio::test]
6649    async fn test_run_dispatch_list_command() {
6650        let _g = no_config_env();
6651        let env = TestEnv::fresh();
6652        let cfg = AppConfig::default();
6653        let cli = Cli::try_parse_from(["ai-memory", "--db", env.db_path.to_str().unwrap(), "list"])
6654            .unwrap();
6655        run(cli, &cfg).await.unwrap();
6656    }
6657
6658    #[tokio::test]
6659    async fn test_run_dispatch_search_command() {
6660        let _g = no_config_env();
6661        let env = TestEnv::fresh();
6662        let cfg = AppConfig::default();
6663        let cli = Cli::try_parse_from([
6664            "ai-memory",
6665            "--db",
6666            env.db_path.to_str().unwrap(),
6667            "search",
6668            "anyq",
6669        ])
6670        .unwrap();
6671        run(cli, &cfg).await.unwrap();
6672    }
6673
6674    #[tokio::test]
6675    async fn test_run_dispatch_archive_list_command() {
6676        let _g = no_config_env();
6677        let env = TestEnv::fresh();
6678        let cfg = AppConfig::default();
6679        let cli = Cli::try_parse_from([
6680            "ai-memory",
6681            "--db",
6682            env.db_path.to_str().unwrap(),
6683            "archive",
6684            "list",
6685        ])
6686        .unwrap();
6687        run(cli, &cfg).await.unwrap();
6688    }
6689
6690    #[tokio::test]
6691    async fn test_run_dispatch_agents_list_command() {
6692        let _g = no_config_env();
6693        let env = TestEnv::fresh();
6694        let cfg = AppConfig::default();
6695        let cli = Cli::try_parse_from([
6696            "ai-memory",
6697            "--db",
6698            env.db_path.to_str().unwrap(),
6699            "agents",
6700            "list",
6701        ])
6702        .unwrap();
6703        run(cli, &cfg).await.unwrap();
6704    }
6705
6706    #[tokio::test]
6707    async fn test_run_dispatch_pending_list_command() {
6708        let _g = no_config_env();
6709        let env = TestEnv::fresh();
6710        let cfg = AppConfig::default();
6711        let cli = Cli::try_parse_from([
6712            "ai-memory",
6713            "--db",
6714            env.db_path.to_str().unwrap(),
6715            "pending",
6716            "list",
6717        ])
6718        .unwrap();
6719        run(cli, &cfg).await.unwrap();
6720    }
6721
6722    #[tokio::test]
6723    async fn test_run_dispatch_completions_command() {
6724        let _g = no_config_env();
6725        let env = TestEnv::fresh();
6726        let cfg = AppConfig::default();
6727        let cli = Cli::try_parse_from([
6728            "ai-memory",
6729            "--db",
6730            env.db_path.to_str().unwrap(),
6731            "completions",
6732            "bash",
6733        ])
6734        .unwrap();
6735        run(cli, &cfg).await.unwrap();
6736    }
6737
6738    #[tokio::test]
6739    async fn test_run_dispatch_man_command() {
6740        let _g = no_config_env();
6741        let env = TestEnv::fresh();
6742        let cfg = AppConfig::default();
6743        let cli = Cli::try_parse_from(["ai-memory", "--db", env.db_path.to_str().unwrap(), "man"])
6744            .unwrap();
6745        run(cli, &cfg).await.unwrap();
6746    }
6747
6748    #[tokio::test]
6749    async fn test_run_dispatch_gc_triggers_post_run_checkpoint() {
6750        // `Gc` is in is_write_command, so result.is_ok() && Some path
6751        // takes the post-run WAL checkpoint branch (lines 638-644).
6752        let _g = no_config_env();
6753        let env = TestEnv::fresh();
6754        let cfg = AppConfig::default();
6755        let cli = Cli::try_parse_from(["ai-memory", "--db", env.db_path.to_str().unwrap(), "gc"])
6756            .unwrap();
6757        run(cli, &cfg).await.unwrap();
6758    }
6759
6760    #[tokio::test]
6761    async fn test_run_dispatch_resolve_command() {
6762        // Seed two memories, then resolve one as superseding the other.
6763        let _g = no_config_env();
6764        let env = TestEnv::fresh();
6765        let id_a = crate::cli::test_utils::seed_memory(&env.db_path, "ns", "old", "old fact");
6766        let id_b = crate::cli::test_utils::seed_memory(&env.db_path, "ns", "new", "new fact");
6767        let cfg = AppConfig::default();
6768        let cli = Cli::try_parse_from([
6769            "ai-memory",
6770            "--db",
6771            env.db_path.to_str().unwrap(),
6772            "resolve",
6773            &id_a,
6774            &id_b,
6775        ])
6776        .unwrap();
6777        run(cli, &cfg).await.unwrap();
6778    }
6779
6780    #[tokio::test]
6781    async fn test_run_dispatch_get_command() {
6782        let _g = no_config_env();
6783        let env = TestEnv::fresh();
6784        let id = crate::cli::test_utils::seed_memory(&env.db_path, "ns", "t", "c");
6785        let cfg = AppConfig::default();
6786        let cli = Cli::try_parse_from([
6787            "ai-memory",
6788            "--db",
6789            env.db_path.to_str().unwrap(),
6790            "get",
6791            &id,
6792        ])
6793        .unwrap();
6794        run(cli, &cfg).await.unwrap();
6795    }
6796
6797    /// v0.7.0 V-4 closeout (#698) — dispatch coverage for the new
6798    /// `verify-signed-events-chain` subcommand. We don't tamper here
6799    /// (the lib-side test suite owns that property); the goal is to
6800    /// exercise the dispatch arm so a `cargo llvm-cov` pass over the
6801    /// daemon_runtime module sees it. On an empty DB the chain holds
6802    /// vacuously and the subcommand exits 0, so `run()` returns
6803    /// Ok(()).
6804    #[tokio::test]
6805    async fn test_run_dispatch_verify_signed_events_chain_command() {
6806        let _g = no_config_env();
6807        let env = TestEnv::fresh();
6808        let cfg = AppConfig::default();
6809        let cli = Cli::try_parse_from([
6810            "ai-memory",
6811            "--db",
6812            env.db_path.to_str().unwrap(),
6813            "verify-signed-events-chain",
6814        ])
6815        .unwrap();
6816        run(cli, &cfg).await.unwrap();
6817    }
6818
6819    #[tokio::test]
6820    async fn test_run_dispatch_promote_triggers_write_checkpoint() {
6821        // `Promote` is in is_write_command — covers the post-run
6822        // checkpoint branch on a different command.
6823        let _g = no_config_env();
6824        let env = TestEnv::fresh();
6825        let id = crate::cli::test_utils::seed_memory(&env.db_path, "ns", "t", "c");
6826        let cfg = AppConfig::default();
6827        let cli = Cli::try_parse_from([
6828            "ai-memory",
6829            "--db",
6830            env.db_path.to_str().unwrap(),
6831            "promote",
6832            &id,
6833        ])
6834        .unwrap();
6835        run(cli, &cfg).await.unwrap();
6836    }
6837
6838    // ----- run() dispatch for bench (cmd_bench end-to-end) --------------
6839
6840    #[tokio::test]
6841    async fn test_run_dispatch_bench_smoke_runs_one_iteration() {
6842        // iterations=1, warmup=0 keeps the workload tiny. The bench
6843        // body builds an in-memory DB internally — no on-disk side
6844        // effects. Covers cmd_bench from top to bottom on the
6845        // human-readable, no-baseline, no-history path.
6846        let _g = no_config_env();
6847        let env = TestEnv::fresh();
6848        let cfg = AppConfig::default();
6849        let cli = Cli::try_parse_from([
6850            "ai-memory",
6851            "--db",
6852            env.db_path.to_str().unwrap(),
6853            "bench",
6854            "--iterations",
6855            "1",
6856            "--warmup",
6857            "0",
6858        ])
6859        .unwrap();
6860        // Bench may fail the budget on a paused-time iter=1 run; we
6861        // accept either Ok or Err here — coverage is the goal.
6862        let _ = run(cli, &cfg).await;
6863    }
6864
6865    #[tokio::test]
6866    async fn test_run_dispatch_bench_json_with_history() {
6867        // Covers --json branch + --history append branch of cmd_bench.
6868        let _g = no_config_env();
6869        let env = TestEnv::fresh();
6870        let history = env.db_path.with_file_name("hist.jsonl");
6871        let cfg = AppConfig::default();
6872        let cli = Cli::try_parse_from([
6873            "ai-memory",
6874            "--db",
6875            env.db_path.to_str().unwrap(),
6876            "bench",
6877            "--iterations",
6878            "1",
6879            "--warmup",
6880            "0",
6881            "--json",
6882            "--history",
6883            history.to_str().unwrap(),
6884        ])
6885        .unwrap();
6886        let _ = run(cli, &cfg).await;
6887        // History file should now exist with at least one line.
6888        if history.exists() {
6889            let content = std::fs::read_to_string(&history).unwrap();
6890            assert!(content.contains("captured_at") || !content.is_empty());
6891        }
6892    }
6893
6894    // ----- run() dispatch for migrate (sal feature) --------------------
6895
6896    #[cfg(feature = "sal")]
6897    #[tokio::test]
6898    async fn test_run_dispatch_migrate_sqlite_to_sqlite_dry_run() {
6899        // Covers cmd_migrate happy path + dry-run / human-output branch.
6900        let _g = no_config_env();
6901        let src_env = TestEnv::fresh();
6902        let dst_env = TestEnv::fresh();
6903        // Seed source so migrate has work to do.
6904        crate::cli::test_utils::seed_memory(&src_env.db_path, "ns-mig", "t", "c");
6905        let from = format!("sqlite://{}", src_env.db_path.display());
6906        let to = format!("sqlite://{}", dst_env.db_path.display());
6907        let cfg = AppConfig::default();
6908        let cli = Cli::try_parse_from([
6909            "ai-memory",
6910            "--db",
6911            src_env.db_path.to_str().unwrap(),
6912            "migrate",
6913            "--from",
6914            &from,
6915            "--to",
6916            &to,
6917            "--dry-run",
6918        ])
6919        .unwrap();
6920        run(cli, &cfg).await.unwrap();
6921    }
6922
6923    #[cfg(feature = "sal")]
6924    #[tokio::test]
6925    async fn test_run_dispatch_migrate_json_output() {
6926        // Covers cmd_migrate --json branch.
6927        let _g = no_config_env();
6928        let src_env = TestEnv::fresh();
6929        let dst_env = TestEnv::fresh();
6930        crate::cli::test_utils::seed_memory(&src_env.db_path, "ns-mig", "t", "c");
6931        let from = format!("sqlite://{}", src_env.db_path.display());
6932        let to = format!("sqlite://{}", dst_env.db_path.display());
6933        let cfg = AppConfig::default();
6934        let cli = Cli::try_parse_from([
6935            "ai-memory",
6936            "--db",
6937            src_env.db_path.to_str().unwrap(),
6938            "migrate",
6939            "--from",
6940            &from,
6941            "--to",
6942            &to,
6943            "--json",
6944        ])
6945        .unwrap();
6946        run(cli, &cfg).await.unwrap();
6947    }
6948
6949    // ----- run() with passphrase file (covers lines 372-374) ------------
6950
6951    #[tokio::test]
6952    async fn test_run_with_db_passphrase_file_exports_env() {
6953        // Covers the `--db-passphrase-file` branch in run() (lines
6954        // 371-375) which calls passphrase_from_file then sets
6955        // AI_MEMORY_DB_PASSPHRASE in the environment.
6956        let _g = env_var_lock();
6957        // SAFETY: serialized via env_var_lock.
6958        unsafe { std::env::remove_var("AI_MEMORY_DB_PASSPHRASE") };
6959        let env = TestEnv::fresh();
6960        let pass_path = env.db_path.with_file_name("pass");
6961        std::fs::write(&pass_path, "test-passphrase\n").unwrap();
6962        // v0.7.0 #1055 — the production `passphrase_from_file` gate
6963        // rejects group/world-readable passphrase files; mirror the
6964        // operator-side 0400 mode here.
6965        #[cfg(unix)]
6966        {
6967            use std::os::unix::fs::PermissionsExt;
6968            std::fs::set_permissions(&pass_path, std::fs::Permissions::from_mode(0o400)).unwrap();
6969        }
6970        let cfg = AppConfig::default();
6971        let cli = Cli::try_parse_from([
6972            "ai-memory",
6973            "--db",
6974            env.db_path.to_str().unwrap(),
6975            "--db-passphrase-file",
6976            pass_path.to_str().unwrap(),
6977            "stats",
6978        ])
6979        .unwrap();
6980        run(cli, &cfg).await.unwrap();
6981        // Env var is now set.
6982        assert_eq!(
6983            std::env::var("AI_MEMORY_DB_PASSPHRASE").unwrap(),
6984            "test-passphrase"
6985        );
6986        // SAFETY: serialized via env_var_lock.
6987        unsafe { std::env::remove_var("AI_MEMORY_DB_PASSPHRASE") };
6988    }
6989
6990    // ----- init_tracing idempotence ------------------------------------
6991
6992    #[test]
6993    fn test_init_tracing_is_idempotent() {
6994        // Covers init_tracing — second call is a harmless no-op
6995        // (try_init returns Err which we ignore). Calling twice from
6996        // the same test exercises the second-call path on a process
6997        // that may or may not already have a global subscriber.
6998        init_tracing();
6999        init_tracing();
7000    }
7001
7002    // ----- serve_http_with_shutdown_future smoke -----------------------
7003    //
7004    // The non-TLS branch of `serve()` delegates here; cover the body
7005    // by binding to a free port, requesting /health, then shutting
7006    // down. This also covers the production code path that
7007    // `daemon_runtime::serve()` uses for the non-TLS case.
7008
7009    #[tokio::test]
7010    async fn test_serve_http_with_shutdown_future_serves_then_stops() {
7011        let env = TestEnv::fresh();
7012        let app_state = keyword_app_state(&env.db_path);
7013        let api_key_state = ApiKeyState {
7014            key: None,
7015            mtls_enforced: false,
7016        };
7017        // Pick a free port via a transient bind.
7018        let port = {
7019            let l = std::net::TcpListener::bind("127.0.0.1:0").unwrap();
7020            let p = l.local_addr().unwrap().port();
7021            drop(l);
7022            p
7023        };
7024        let addr = format!("127.0.0.1:{port}");
7025        let shutdown = Arc::new(Notify::new());
7026        let shutdown_clone = shutdown.clone();
7027        let handle = tokio::spawn(async move {
7028            serve_http_with_shutdown_future(&addr, api_key_state, app_state, async move {
7029                shutdown_clone.notified().await;
7030            })
7031            .await
7032        });
7033        // Give the server a moment to bind, then poke /health.
7034        for _ in 0..40 {
7035            if let Ok(client) = reqwest::Client::builder()
7036                .timeout(Duration::from_millis(200))
7037                .build()
7038                && client
7039                    .get(format!("http://127.0.0.1:{port}/api/v1/health"))
7040                    .send()
7041                    .await
7042                    .is_ok()
7043            {
7044                break;
7045            }
7046            tokio::time::sleep(Duration::from_millis(50)).await;
7047        }
7048        shutdown.notify_one();
7049        let res = handle.await.unwrap();
7050        assert!(res.is_ok(), "serve future returned: {res:?}");
7051    }
7052
7053    // ----- bind error surfacing ----------------------------------------
7054
7055    #[tokio::test]
7056    async fn test_serve_http_with_shutdown_future_bind_failure_errors() {
7057        // An unbindable address (port 1 on Linux/macOS without root)
7058        // should return an Err with the bind context. This covers the
7059        // `with_context` path on the TcpListener::bind line.
7060        let env = TestEnv::fresh();
7061        let app_state = keyword_app_state(&env.db_path);
7062        let api_key_state = ApiKeyState {
7063            key: None,
7064            mtls_enforced: false,
7065        };
7066        // 0.0.0.0:0 succeeds; we want a guaranteed failure. Bind to
7067        // port 1 which requires privileged perms — except on macOS in
7068        // some configs that may succeed. Use a clearly invalid address
7069        // form instead to force a bind-time error.
7070        let res = serve_http_with_shutdown_future(
7071            "definitely-not-an-address:99999",
7072            api_key_state,
7073            app_state,
7074            async {},
7075        )
7076        .await;
7077        assert!(res.is_err(), "expected bind error, got: {res:?}");
7078    }
7079
7080    // ----- v0.7.0 coverage close: dispatch arms for identity/rules/governance ---
7081    //
7082    // The grand-slam integration cascade lifted coverage uniformly except
7083    // for a handful of CLI dispatch arms in `run()` that no run-dispatch
7084    // test had ever entered: `Command::Identity`, `Command::Rules`,
7085    // `Command::Governance`. Each arm is just the stdout/stderr-lock
7086    // boilerplate + a one-line hand-off to the relevant `cli::*::run`
7087    // handler — those handlers already have their own unit tests under
7088    // `src/cli/identity.rs`, `src/cli/rules.rs`,
7089    // `src/cli/governance_migrate.rs`. The missing piece was the dispatch
7090    // boilerplate itself. These three tests exercise the read-only
7091    // (mutation-free, hermetic) verb of each arm so coverage closes
7092    // without adding any production semantics.
7093
7094    #[tokio::test]
7095    async fn test_run_dispatch_identity_list_command() {
7096        // Covers daemon_runtime::run dispatch arm `Command::Identity(a)`:
7097        // exercises the stdout/stderr lock + `cli::identity::run` hand-off.
7098        // `identity list` is read-only and DB-free; passing an empty
7099        // tempdir as --key-dir keeps the test hermetic (no HOME deps).
7100        let _g = no_config_env();
7101        let env = TestEnv::fresh();
7102        let key_dir = env.db_path.parent().unwrap().join("keys");
7103        std::fs::create_dir_all(&key_dir).unwrap();
7104        let cfg = AppConfig::default();
7105        let cli = Cli::try_parse_from([
7106            "ai-memory",
7107            "--db",
7108            env.db_path.to_str().unwrap(),
7109            "identity",
7110            "--key-dir",
7111            key_dir.to_str().unwrap(),
7112            "list",
7113        ])
7114        .unwrap();
7115        run(cli, &cfg).await.unwrap();
7116    }
7117
7118    #[tokio::test]
7119    async fn test_run_dispatch_rules_list_command() {
7120        // Covers daemon_runtime::run dispatch arm `Command::Rules(a)`:
7121        // exercises the stdout/stderr lock + `cli::rules::run` hand-off.
7122        // `rules list` is the documented read-only verb (no operator key
7123        // required per the module-level docstring of src/cli/rules.rs).
7124        // We open the DB once via `db::open` to materialize the full
7125        // schema (including the `governance_rules` table that migration
7126        // 0024 creates + seeds), then let the run() dispatch open its
7127        // own raw rusqlite connection against the same file.
7128        let _g = no_config_env();
7129        let env = TestEnv::fresh();
7130        drop(crate::db::open(&env.db_path).expect("db::open"));
7131        let key_dir = env.db_path.parent().unwrap().join("keys");
7132        std::fs::create_dir_all(&key_dir).unwrap();
7133        let cfg = AppConfig::default();
7134        let cli = Cli::try_parse_from([
7135            "ai-memory",
7136            "--db",
7137            env.db_path.to_str().unwrap(),
7138            "rules",
7139            "--key-dir",
7140            key_dir.to_str().unwrap(),
7141            "list",
7142        ])
7143        .unwrap();
7144        run(cli, &cfg).await.unwrap();
7145    }
7146
7147    #[tokio::test]
7148    async fn test_run_dispatch_governance_migrate_command() {
7149        // Covers daemon_runtime::run dispatch arm `Command::Governance(a)`
7150        // (including the inner `GovernanceAction::MigrateToPermissions`
7151        // match arm): exercises the stdout/stderr lock +
7152        // `cli::governance_migrate::run` hand-off. Dry-run is the
7153        // documented default, so we omit --config-out; the migrator
7154        // reads --config-in, parses the legacy `[governance]` block,
7155        // renders the v0.7 `[[permissions.rules]]` to stdout, and
7156        // returns Ok. No filesystem mutation outside the tempdir.
7157        let _g = no_config_env();
7158        let env = TestEnv::fresh();
7159        let cfg_path = env.db_path.parent().unwrap().join("legacy_cfg.toml");
7160        std::fs::write(
7161            &cfg_path,
7162            r#"
7163[governance]
7164
7165[[governance.policy]]
7166scope = "team/eng/*"
7167action = "write"
7168role = "engineer"
7169decision = "allow"
7170"#,
7171        )
7172        .unwrap();
7173        let cfg = AppConfig::default();
7174        let cli = Cli::try_parse_from([
7175            "ai-memory",
7176            "--db",
7177            env.db_path.to_str().unwrap(),
7178            "governance",
7179            "migrate-to-permissions",
7180            "--config-in",
7181            cfg_path.to_str().unwrap(),
7182        ])
7183        .unwrap();
7184        run(cli, &cfg).await.unwrap();
7185    }
7186
7187    // ----- v0.7.0 coverage close: fold-A2A1.4 mTLS bypass on /sync/* ----
7188    //
7189    // The grand-slam cascade landed `e188503` (fold-A2A1.4) which added 61
7190    // lines to `daemon_runtime.rs`: the `mtls_enforced` computation in
7191    // `bootstrap_serve` (true iff all of `--tls-cert`, `--tls-key`, and
7192    // `--mtls-allowlist` are set), the threaded api-key into
7193    // `FederationConfig::build`, and the differentiated tracing message
7194    // when api-key auth is enabled alongside mTLS. The post-cascade
7195    // coverage gate (run 25892100734) caught the regression at 85.60% on
7196    // `daemon_runtime.rs` — below the 86 floor — because the new
7197    // mtls_enforced=true branch + the bypass exit path through the
7198    // router were never entered by an existing test.
7199    //
7200    // The tests below close the gap by:
7201    //   1. Bootstrapping with all three TLS args set + api_key set so the
7202    //      `if mtls_enforced { tracing::info!(...federation endpoints...) }`
7203    //      branch executes and `api_key_state.mtls_enforced` is observed
7204    //      as true on the returned `ServeBootstrap`.
7205    //   2. Bootstrapping with the half-configured cases (cert+key, no
7206    //      allowlist; allowlist alone) to pin the AND-short-circuit on
7207    //      the `mtls_enforced` predicate.
7208    //   3. Driving the `build_router`-wired `api_key_auth` middleware
7209    //      through `daemon_runtime::build_router` with
7210    //      `mtls_enforced=true` so the `/api/v1/sync/...` bypass path is
7211    //      exercised, and asserting a non-`/sync/` path still 401s
7212    //      without the header.
7213    //
7214    // All hermetic: bootstrap_serve does NOT load the TLS cert / key /
7215    // allowlist files (that happens in `serve()` at the rustls config
7216    // site, after this struct is built), so passing non-existent paths
7217    // is sufficient to flip `mtls_enforced` to true without writing
7218    // real certificates.
7219
7220    #[tokio::test]
7221    async fn test_bootstrap_serve_mtls_enforced_true_with_all_three_tls_args() {
7222        // Covers `let mtls_enforced = ... && ... && ...` with the all-Some
7223        // case (true branch). Paired with `api_key = Some(...)` so the
7224        // outer `if api_key_state.key.is_some()` also fires and the
7225        // `if mtls_enforced { ... } else { ... }` chooses the
7226        // federation-bypass log message.
7227        let env = TestEnv::fresh();
7228        let mut cfg = AppConfig::default();
7229        cfg.tier = Some("keyword".to_string());
7230        cfg.api_key = Some("s3cret".to_string());
7231        let mut args = args_with_db(&env.db_path);
7232        // Paths don't need to exist — bootstrap_serve only inspects
7233        // Option presence to compute `mtls_enforced`. The rustls config
7234        // load that would actually read these files lives in `serve()`,
7235        // which we are NOT calling here.
7236        let cert_path = env.db_path.parent().unwrap().join("cert.pem");
7237        let key_path = env.db_path.parent().unwrap().join("key.pem");
7238        let allowlist_path = env.db_path.parent().unwrap().join("allowlist.json");
7239        args.tls_cert = Some(cert_path);
7240        args.tls_key = Some(key_path);
7241        args.mtls_allowlist = Some(allowlist_path);
7242        let bs = bootstrap_serve(&env.db_path, &args, &cfg).await.unwrap();
7243        assert!(
7244            bs.api_key_state.mtls_enforced,
7245            "mtls_enforced should be true when cert+key+allowlist all set"
7246        );
7247        assert_eq!(bs.api_key_state.key.as_deref(), Some("s3cret"));
7248        for h in bs.task_handles {
7249            h.abort();
7250        }
7251    }
7252
7253    #[tokio::test]
7254    async fn test_bootstrap_serve_mtls_enforced_false_when_allowlist_absent() {
7255        // Covers the AND short-circuit: cert+key set, allowlist None →
7256        // `mtls_enforced = false`. This is the TLS-but-no-mTLS
7257        // half-configured case (the `tracing::warn!("TLS enabled but
7258        // mTLS NOT configured …")` path in `serve()`). Bootstrap_serve
7259        // itself just records the flag as false; the `else` arm of the
7260        // api-key log fires.
7261        let env = TestEnv::fresh();
7262        let mut cfg = AppConfig::default();
7263        cfg.tier = Some("keyword".to_string());
7264        cfg.api_key = Some("only-tls".to_string());
7265        let mut args = args_with_db(&env.db_path);
7266        args.tls_cert = Some(env.db_path.parent().unwrap().join("cert.pem"));
7267        args.tls_key = Some(env.db_path.parent().unwrap().join("key.pem"));
7268        // mtls_allowlist intentionally left None.
7269        let bs = bootstrap_serve(&env.db_path, &args, &cfg).await.unwrap();
7270        assert!(
7271            !bs.api_key_state.mtls_enforced,
7272            "mtls_enforced should be false without --mtls-allowlist"
7273        );
7274        assert_eq!(bs.api_key_state.key.as_deref(), Some("only-tls"));
7275        for h in bs.task_handles {
7276            h.abort();
7277        }
7278    }
7279
7280    #[tokio::test]
7281    async fn test_bootstrap_serve_mtls_enforced_false_when_only_allowlist_set() {
7282        // Covers the AND short-circuit: cert/key None, allowlist Some →
7283        // false. (clap's `requires = "tls_cert"` would block this combo
7284        // at the CLI surface, but we're constructing `ServeArgs`
7285        // directly here so the inner predicate is the only gate. This
7286        // pins the predicate behaviour even if a refactor moves the
7287        // validation back to the call site.)
7288        let env = TestEnv::fresh();
7289        let mut cfg = AppConfig::default();
7290        cfg.tier = Some("keyword".to_string());
7291        let mut args = args_with_db(&env.db_path);
7292        args.mtls_allowlist = Some(env.db_path.parent().unwrap().join("allowlist.json"));
7293        // tls_cert and tls_key intentionally None.
7294        let bs = bootstrap_serve(&env.db_path, &args, &cfg).await.unwrap();
7295        assert!(
7296            !bs.api_key_state.mtls_enforced,
7297            "mtls_enforced should be false without --tls-cert"
7298        );
7299        for h in bs.task_handles {
7300            h.abort();
7301        }
7302    }
7303
7304    #[tokio::test]
7305    async fn test_bootstrap_serve_mtls_enforced_with_federation_threads_api_key() {
7306        // Joint exercise of the two fold-A2A1.4 surfaces in one
7307        // bootstrap: federation outbound carries the configured
7308        // `[api] api_key` (line ~2155, `app_config.api_key.clone()` into
7309        // `FederationConfig::build`) AND `mtls_enforced` is true.
7310        // Confirms both the api_key thread-through and the new tracing
7311        // message are activated together — the exact procurement-grade
7312        // deployment shape #702 was filed for.
7313        let env = TestEnv::fresh();
7314        let mut cfg = AppConfig::default();
7315        cfg.tier = Some("keyword".to_string());
7316        cfg.api_key = Some("fed-key".to_string());
7317        let mut args = args_with_db(&env.db_path);
7318        args.tls_cert = Some(env.db_path.parent().unwrap().join("cert.pem"));
7319        args.tls_key = Some(env.db_path.parent().unwrap().join("key.pem"));
7320        args.mtls_allowlist = Some(env.db_path.parent().unwrap().join("allowlist.json"));
7321        args.quorum_writes = 1;
7322        args.quorum_peers = vec!["http://127.0.0.1:65520".to_string()];
7323        args.quorum_timeout_ms = 100;
7324        let bs = bootstrap_serve(&env.db_path, &args, &cfg).await.unwrap();
7325        assert!(bs.api_key_state.mtls_enforced);
7326        assert_eq!(bs.api_key_state.key.as_deref(), Some("fed-key"));
7327        assert!(
7328            bs.app_state.federation.is_some(),
7329            "federation should be wired when quorum_writes>0 and peers nonempty"
7330        );
7331        for h in bs.task_handles {
7332            h.abort();
7333        }
7334    }
7335
7336    // ----- v0.7.0 coverage close: api_key_auth bypass through build_router ---
7337    //
7338    // Drives the `api_key_auth` middleware path with `mtls_enforced=true`
7339    // and a configured key. Two probes:
7340    //   - `/api/v1/sync/push` without `x-api-key` should be admitted to
7341    //     the handler stack (the federation-bypass arm). The handler
7342    //     itself rejects on payload shape, but the status is not 401 —
7343    //     proving the bypass fired.
7344    //   - `/api/v1/memories` without `x-api-key` should still 401, since
7345    //     the bypass is scoped to `/api/v1/sync/*`.
7346
7347    #[tokio::test]
7348    async fn test_build_router_with_mtls_enforced_allows_sync_without_api_key() {
7349        let env = TestEnv::fresh();
7350        let app_state = keyword_app_state(&env.db_path);
7351        let api_key_state = ApiKeyState {
7352            key: Some("s3cret".to_string()),
7353            mtls_enforced: true,
7354        };
7355        let router = build_router(app_state, api_key_state);
7356        // POST /api/v1/sync/push with empty body — the api_key_auth
7357        // middleware should NOT 401 (bypass scope hit). The downstream
7358        // handler will likely return 400/415/422 for a malformed body;
7359        // anything other than 401 proves the bypass executed.
7360        let resp = router
7361            .oneshot(
7362                Request::builder()
7363                    .method("POST")
7364                    .uri("/api/v1/sync/push")
7365                    .header(crate::HEADER_CONTENT_TYPE, crate::MIME_JSON)
7366                    .body(Body::from("{}"))
7367                    .unwrap(),
7368            )
7369            .await
7370            .unwrap();
7371        assert_ne!(
7372            resp.status(),
7373            StatusCode::UNAUTHORIZED,
7374            "expected /sync/* to bypass api-key with mtls_enforced=true, got 401"
7375        );
7376    }
7377
7378    #[tokio::test]
7379    async fn test_build_router_with_mtls_enforced_still_requires_key_on_non_sync() {
7380        let env = TestEnv::fresh();
7381        let app_state = keyword_app_state(&env.db_path);
7382        let api_key_state = ApiKeyState {
7383            key: Some("s3cret".to_string()),
7384            mtls_enforced: true,
7385        };
7386        let router = build_router(app_state, api_key_state);
7387        // GET /api/v1/memories without x-api-key — bypass is scoped to
7388        // /api/v1/sync/*, so this should still 401.
7389        let resp = router
7390            .oneshot(
7391                Request::builder()
7392                    .method("GET")
7393                    .uri("/api/v1/memories")
7394                    .body(Body::empty())
7395                    .unwrap(),
7396            )
7397            .await
7398            .unwrap();
7399        assert_eq!(
7400            resp.status(),
7401            StatusCode::UNAUTHORIZED,
7402            "non-/sync/ path must still demand x-api-key even with mtls_enforced"
7403        );
7404    }
7405
7406    #[tokio::test]
7407    async fn test_build_router_with_mtls_off_does_not_bypass_sync() {
7408        // Pins the negative: mtls_enforced=false → /sync/* WITHOUT the
7409        // header still gets 401. This is the v0.6.x backward-compatible
7410        // posture (api-key required on every path when set, no bypass).
7411        let env = TestEnv::fresh();
7412        let app_state = keyword_app_state(&env.db_path);
7413        let api_key_state = ApiKeyState {
7414            key: Some("s3cret".to_string()),
7415            mtls_enforced: false,
7416        };
7417        let router = build_router(app_state, api_key_state);
7418        let resp = router
7419            .oneshot(
7420                Request::builder()
7421                    .method("POST")
7422                    .uri("/api/v1/sync/push")
7423                    .header(crate::HEADER_CONTENT_TYPE, crate::MIME_JSON)
7424                    .body(Body::from("{}"))
7425                    .unwrap(),
7426            )
7427            .await
7428            .unwrap();
7429        assert_eq!(
7430            resp.status(),
7431            StatusCode::UNAUTHORIZED,
7432            "without mtls_enforced, /sync/* must still demand x-api-key"
7433        );
7434    }
7435
7436    #[tokio::test]
7437    async fn test_build_router_with_mtls_enforced_accepts_valid_key_on_non_sync() {
7438        // Defense-in-depth: even with mtls_enforced=true, supplying the
7439        // correct key on a non-/sync/ path still succeeds. Pins that
7440        // the bypass branch does not steal requests that legitimately
7441        // carry the header.
7442        let env = TestEnv::fresh();
7443        let app_state = keyword_app_state(&env.db_path);
7444        let api_key_state = ApiKeyState {
7445            key: Some("s3cret".to_string()),
7446            mtls_enforced: true,
7447        };
7448        let router = build_router(app_state, api_key_state);
7449        let resp = router
7450            .oneshot(
7451                Request::builder()
7452                    .method("GET")
7453                    .uri("/api/v1/memories")
7454                    .header("x-api-key", "s3cret")
7455                    .body(Body::empty())
7456                    .unwrap(),
7457            )
7458            .await
7459            .unwrap();
7460        assert!(
7461            resp.status().is_success(),
7462            "valid api-key on non-/sync/ path should succeed, got {}",
7463            resp.status()
7464        );
7465    }
7466
7467    // -----------------------------------------------------------------
7468    // v0.7-polish coverage recovery (issue #767) — Cluster D + G wires:
7469    // spawn_gc_loop_with_shadow_retention, spawn_transcript_lifecycle_
7470    // sweep_loop, spawn_agent_quota_reset_loop. Smoke-tests that prove
7471    // the loops spawn, abort cleanly, and tolerate a clean state.
7472    // -----------------------------------------------------------------
7473
7474    #[tokio::test]
7475    async fn test_spawn_gc_loop_with_shadow_retention_runs_and_can_be_aborted() {
7476        let env = TestEnv::fresh();
7477        let conn = db::open(&env.db_path).unwrap();
7478        let state: Db = Arc::new(Mutex::new((
7479            conn,
7480            env.db_path.clone(),
7481            ResolvedTtl::default(),
7482            true,
7483        )));
7484        // Long interval — we just want the spawn + abort cycle.
7485        let h = spawn_gc_loop_with_shadow_retention(state, Some(30), 7, Duration::from_secs(60));
7486        // Give it a brief moment to enter the loop body.
7487        tokio::time::sleep(Duration::from_millis(20)).await;
7488        h.abort();
7489        let _ = h.await;
7490    }
7491
7492    #[tokio::test]
7493    async fn test_spawn_gc_loop_with_shadow_retention_zero_days_is_opt_out() {
7494        // shadow_retention_days <= 0 should be tolerated — the shadow
7495        // gc helper short-circuits without touching the table.
7496        let env = TestEnv::fresh();
7497        let conn = db::open(&env.db_path).unwrap();
7498        let state: Db = Arc::new(Mutex::new((
7499            conn,
7500            env.db_path.clone(),
7501            ResolvedTtl::default(),
7502            true,
7503        )));
7504        let h = spawn_gc_loop_with_shadow_retention(
7505            state,
7506            None,
7507            0, // operator opt-out
7508            Duration::from_secs(60),
7509        );
7510        tokio::time::sleep(Duration::from_millis(20)).await;
7511        h.abort();
7512        let _ = h.await;
7513    }
7514
7515    #[tokio::test]
7516    async fn test_spawn_transcript_lifecycle_sweep_loop_runs_and_can_be_aborted() {
7517        let env = TestEnv::fresh();
7518        let conn = db::open(&env.db_path).unwrap();
7519        let state: Db = Arc::new(Mutex::new((
7520            conn,
7521            env.db_path.clone(),
7522            ResolvedTtl::default(),
7523            true,
7524        )));
7525        let cfg = crate::config::TranscriptsConfig::default();
7526        let h = spawn_transcript_lifecycle_sweep_loop(state, cfg, Duration::from_secs(60));
7527        tokio::time::sleep(Duration::from_millis(20)).await;
7528        h.abort();
7529        let _ = h.await;
7530    }
7531
7532    #[tokio::test]
7533    async fn test_spawn_agent_quota_reset_loop_runs_and_can_be_aborted() {
7534        let env = TestEnv::fresh();
7535        let conn = db::open(&env.db_path).unwrap();
7536        let state: Db = Arc::new(Mutex::new((
7537            conn,
7538            env.db_path.clone(),
7539            ResolvedTtl::default(),
7540            true,
7541        )));
7542        let h = spawn_agent_quota_reset_loop(state, Duration::from_secs(60));
7543        tokio::time::sleep(Duration::from_millis(20)).await;
7544        h.abort();
7545        let _ = h.await;
7546    }
7547
7548    #[tokio::test]
7549    async fn test_bootstrap_serve_sec2_fail_closed_when_pubkey_missing_and_rules_enabled() {
7550        // v0.7.0 SEC-2 (Cluster D) — when `[governance]
7551        // require_operator_pubkey = true` AND `governance_rules` has
7552        // any `enabled = 1` row AND no operator pubkey is resolved,
7553        // bootstrap_serve MUST refuse to start. This pins the
7554        // fail-closed posture documented at lines 2118-2153 in
7555        // bootstrap_serve.
7556        //
7557        // Dev-host hermeticity (issue #1370, 2026-05-27). The test
7558        // pre-#1370 cleared `AI_MEMORY_OPERATOR_PUBKEY` but did not
7559        // engage the `ForceNoOperatorPubkeyGuard` escape hatch added
7560        // under issue #819. `resolve_operator_pubkey()` checks TWO
7561        // sources — the env var AND `~/.config/ai-memory/operator.key.pub`
7562        // on disk (via `dirs::config_dir()`). On a dev host that has
7563        // staged a real operator pubkey at the platform config dir
7564        // (e.g. `~/Library/Application Support/ai-memory/` on macOS),
7565        // the on-disk lookup wins, `pubkey_resolved = true`, and the
7566        // SEC-2 fail-closed bail at `bootstrap_serve` never fires.
7567        // CI passes on clean-HOME runners; local fails. The guard
7568        // below forces `resolve_operator_pubkey()` to return None
7569        // for the test scope, matching the CI posture deterministically.
7570        let _no_pubkey_guard = crate::governance::rules_store::force_no_operator_pubkey_for_test();
7571        let _gate = env_var_lock();
7572        let env = TestEnv::fresh();
7573        let conn = db::open(&env.db_path).unwrap();
7574        // Create the governance_rules table + insert one enabled row.
7575        conn.execute_batch(
7576            "CREATE TABLE IF NOT EXISTS governance_rules (
7577                 id TEXT PRIMARY KEY,
7578                 kind TEXT NOT NULL,
7579                 matcher TEXT NOT NULL,
7580                 severity TEXT NOT NULL CHECK (severity IN ('refuse','warn','log')),
7581                 reason TEXT NOT NULL,
7582                 namespace TEXT NOT NULL DEFAULT '_global',
7583                 created_by TEXT NOT NULL,
7584                 created_at INTEGER NOT NULL,
7585                 enabled INTEGER NOT NULL DEFAULT 1,
7586                 signature BLOB,
7587                 attest_level TEXT NOT NULL DEFAULT 'unsigned'
7588             );",
7589        )
7590        .unwrap();
7591        conn.execute(
7592            "INSERT INTO governance_rules (id, kind, matcher, severity, reason, created_by, created_at)
7593             VALUES ('R1', 'bash', '{\"k\":\"v\"}', 'refuse', 'test', 'tester', 100)",
7594            [],
7595        )
7596        .unwrap();
7597        drop(conn);
7598        // Build cfg with require_operator_pubkey = true.
7599        let mut cfg = AppConfig::default();
7600        cfg.tier = Some("keyword".to_string());
7601        cfg.governance = Some(crate::config::GovernanceConfig {
7602            require_operator_pubkey: true,
7603        });
7604        // Ensure no pubkey is resolved by clearing the env var.
7605        let prior = std::env::var("AI_MEMORY_OPERATOR_PUBKEY").ok();
7606        unsafe { std::env::remove_var("AI_MEMORY_OPERATOR_PUBKEY") };
7607
7608        let args = args_with_db(&env.db_path);
7609        let res = bootstrap_serve(&env.db_path, &args, &cfg).await;
7610        // Restore env.
7611        if let Some(v) = prior {
7612            unsafe { std::env::set_var("AI_MEMORY_OPERATOR_PUBKEY", v) };
7613        }
7614        let err = match res {
7615            Err(e) => format!("{e:#}"),
7616            Ok(_) => panic!("expected SEC-2 fail-closed refusal"),
7617        };
7618        assert!(
7619            err.contains("SEC-2 fail-closed") || err.contains("require_operator_pubkey"),
7620            "got: {err}"
7621        );
7622    }
7623
7624    #[tokio::test]
7625    async fn test_build_llm_client_returns_none_for_keyword_tier() {
7626        // FeatureTier::Keyword has no llm_model, so the early-return
7627        // path fires without spawning any blocking work.
7628        // FX-F1: hold the env-guard so concurrent tests can't flip
7629        // AI_MEMORY_LLM_BACKEND under us mid-resolve.
7630        let _guard = env_var_lock();
7631        fx_f1_clear_llm_env();
7632        let cfg = AppConfig::default();
7633        let res = build_llm_client(FeatureTier::Keyword, &cfg).await;
7634        assert!(res.is_none(), "keyword tier must not build an LLM client");
7635    }
7636
7637    #[tokio::test]
7638    async fn test_build_llm_client_returns_none_when_ollama_unreachable() {
7639        // Smart tier requires LLM, but pointing at an unreachable URL
7640        // exercises the constructor-error path (final Err arm).
7641        let _guard = env_var_lock();
7642        fx_f1_clear_llm_env();
7643        let mut cfg = AppConfig::default();
7644        cfg.ollama_url = Some("http://127.0.0.1:1".to_string());
7645        let res = build_llm_client(FeatureTier::Smart, &cfg).await;
7646        // Either Some (constructor still returns Ok if it doesn't ping)
7647        // or None — both are valid: the assert proves the function does
7648        // not panic on an unreachable URL.
7649        let _ = res;
7650    }
7651
7652    #[test]
7653    fn test_build_vector_index_returns_some_when_embedder_present_and_db_empty() {
7654        // The else-branch of build_vector_index — when the embedder is
7655        // present and no rows exist, the helper still returns Some
7656        // (empty index). Already pinned by an existing test; this one
7657        // pins the explicit "some-non-empty" path by inserting a memory
7658        // with an embedding first.
7659        let env = TestEnv::fresh();
7660        let conn = db::open(&env.db_path).unwrap();
7661        let mem = crate::models::Memory {
7662            id: "vi-1".to_string(),
7663            tier: crate::models::Tier::Mid,
7664            namespace: "test".to_string(),
7665            title: "t".to_string(),
7666            content: "c".to_string(),
7667            tags: vec![],
7668            priority: 5,
7669            confidence: 1.0,
7670            source: "test".to_string(),
7671            access_count: 0,
7672            created_at: chrono::Utc::now().to_rfc3339(),
7673            updated_at: chrono::Utc::now().to_rfc3339(),
7674            last_accessed_at: None,
7675            expires_at: None,
7676            metadata: crate::models::default_metadata(),
7677            reflection_depth: 0,
7678            memory_kind: crate::models::MemoryKind::Observation,
7679            entity_id: None,
7680            persona_version: None,
7681            citations: Vec::new(),
7682            source_uri: None,
7683            source_span: None,
7684            confidence_source: crate::models::ConfidenceSource::CallerProvided,
7685            confidence_signals: None,
7686            confidence_decayed_at: None,
7687            version: 1,
7688        };
7689        let inserted_id = db::insert(&conn, &mem).unwrap();
7690        // Write a real-length embedding (384 dims of f32).
7691        let vec_data: Vec<f32> = (0..384).map(|i| i as f32 * 0.001).collect();
7692        db::set_embedding(&conn, &inserted_id, &vec_data).unwrap();
7693        let idx = build_vector_index(&conn, true);
7694        assert!(idx.is_some());
7695    }
7696
7697    // ===========================================================================
7698    // Issue #1169 — resolve_configured_embedding_dim resolution ladder
7699    // ===========================================================================
7700    //
7701    // These tests exercise the helper extracted from the postgres-bootstrap
7702    // path so the new code lands within the daemon_runtime.rs coverage floor.
7703    // The three resolution-ladder arms (resolver, legacy enum, tier preset)
7704    // are each pinned independently.
7705
7706    /// v0.7.x (#1169) — operator picks a model that's in
7707    /// [`crate::config::KNOWN_EMBEDDING_DIMS`]. The first arm of the
7708    /// ladder (resolver) wins and returns the canonical dim.
7709    #[cfg(feature = "sal")]
7710    #[test]
7711    fn resolve_configured_embedding_dim_resolver_arm_wins_for_known_model() {
7712        use crate::config::{AppConfig, EmbeddingsSection, FeatureTier};
7713
7714        let cfg = AppConfig {
7715            embeddings: Some(EmbeddingsSection {
7716                backend: Some("ollama".to_string()),
7717                model: Some("bge-large-en".to_string()),
7718                ..EmbeddingsSection::default()
7719            }),
7720            ..AppConfig::default()
7721        };
7722        let tier_config = FeatureTier::Autonomous.config();
7723        let dim = resolve_configured_embedding_dim(&cfg, &tier_config);
7724        assert_eq!(
7725            dim,
7726            Some(1024),
7727            "bge-large-en is in KNOWN_EMBEDDING_DIMS at 1024-dim; resolver wins"
7728        );
7729    }
7730
7731    /// v0.7.x (#1169) — operator leaves the new `[embeddings]` section
7732    /// unset AND has the legacy flat field `embedding_model =
7733    /// "nomic_embed_v15"`. The first arm returns the canonicalised
7734    /// resolver dim (the canonicaliser maps `nomic_embed_v15` to
7735    /// `nomic-embed-text-v1.5` which IS in the table) — so the
7736    /// resolver arm still wins, validating that the legacy alias path
7737    /// composes cleanly with the resolver.
7738    #[cfg(feature = "sal")]
7739    #[test]
7740    fn resolve_configured_embedding_dim_handles_legacy_alias_via_resolver() {
7741        use crate::config::{AppConfig, FeatureTier};
7742
7743        let cfg = AppConfig {
7744            embedding_model: Some("nomic_embed_v15".to_string()),
7745            ..AppConfig::default()
7746        };
7747        let tier_config = FeatureTier::Autonomous.config();
7748        let dim = resolve_configured_embedding_dim(&cfg, &tier_config);
7749        assert_eq!(
7750            dim,
7751            Some(768),
7752            "legacy alias nomic_embed_v15 canonicalises to nomic-embed-text-v1.5 (768)"
7753        );
7754    }
7755
7756    /// v0.7.x (#1169) — operator hasn't configured embeddings at all
7757    /// AND the tier preset has an embedder family — the tier-preset
7758    /// arm is the last-resort fallback.
7759    #[cfg(feature = "sal")]
7760    #[test]
7761    fn resolve_configured_embedding_dim_falls_back_to_tier_preset_when_no_override() {
7762        use crate::config::{AppConfig, FeatureTier};
7763
7764        let cfg = AppConfig::default();
7765        let tier_config = FeatureTier::Autonomous.config();
7766        let dim = resolve_configured_embedding_dim(&cfg, &tier_config);
7767        // Autonomous tier preset is NomicEmbedV15 (768). The resolver
7768        // also defaults to nomic-embed-text-v1.5 → 768 via the
7769        // KNOWN_EMBEDDING_DIMS table, so either arm gives the same
7770        // answer for the no-config case.
7771        assert_eq!(dim, Some(768));
7772    }
7773
7774    /// v0.7.x (#1169) — keyword tier has no embedder; resolver returns
7775    /// `None` (and the postgres bootstrap then uses its hardcoded
7776    /// `DEFAULT_EMBEDDING_DIM` fallback per the
7777    /// `configured_embedding_dim` doc comment on `build_store_handle`).
7778    #[cfg(feature = "sal")]
7779    #[test]
7780    fn resolve_configured_embedding_dim_returns_none_for_keyword_tier() {
7781        use crate::config::{AppConfig, FeatureTier};
7782
7783        let cfg = AppConfig::default();
7784        let tier_config = FeatureTier::Keyword.config();
7785        let dim = resolve_configured_embedding_dim(&cfg, &tier_config);
7786        // Keyword tier preset has `embedding_model = None`. The
7787        // resolver still returns `Some(768)` from the
7788        // canonical-default model id — that's the correct behavior
7789        // because the operator can ALWAYS use an embedder regardless
7790        // of tier preset; the tier preset only controls reranker /
7791        // synthesis primitives. The keyword-tier-disabled-embedder
7792        // posture is enforced at the `build_embedder` site, NOT
7793        // here. This test pins that subtlety: when the operator's
7794        // config has no [embeddings] block AND no legacy flat field
7795        // AND the tier preset disables embeddings, the resolver
7796        // still defaults to "nomic-embed-text-v1.5" (the wire-side
7797        // default at `resolve_embeddings`) — which IS in the table
7798        // — so the function returns `Some(768)` even on keyword
7799        // tier. The postgres-bootstrap caller treats that as the
7800        // configured dim regardless; pre-loading an unused 768-dim
7801        // pgvector column is operationally cheap.
7802        assert_eq!(dim, Some(768));
7803    }
7804
7805    /// v0.7.x (#1169) — operator picks a model that's NOT in
7806    /// [`crate::config::KNOWN_EMBEDDING_DIMS`] AND uses the new
7807    /// `[embeddings]` block (so the legacy flat field is absent).
7808    /// The resolver returns `None`; the legacy arm can't parse the
7809    /// model into the enum; the tier-preset arm wins as the final
7810    /// fallback. Pins the back-compat invariant for unrecognised
7811    /// model ids: pre-#1169 callers who relied on a number being
7812    /// present continue to see one.
7813    #[cfg(feature = "sal")]
7814    #[test]
7815    fn resolve_configured_embedding_dim_unknown_model_falls_to_tier_preset() {
7816        use crate::config::{AppConfig, EmbeddingsSection, FeatureTier};
7817
7818        let cfg = AppConfig {
7819            embeddings: Some(EmbeddingsSection {
7820                backend: Some("ollama".to_string()),
7821                model: Some("my-private-fork-v0.1".to_string()),
7822                ..EmbeddingsSection::default()
7823            }),
7824            ..AppConfig::default()
7825        };
7826        let tier_config = FeatureTier::Autonomous.config();
7827        let dim = resolve_configured_embedding_dim(&cfg, &tier_config);
7828        // Autonomous tier preset (NomicEmbedV15) → 768.
7829        assert_eq!(dim, Some(768));
7830    }
7831
7832    // ===========================================================================
7833    // FX-F1 (2026-05-27) — coverage uplift for the FX-D1 `build_llm_client`
7834    // overhaul. The pre-FX-F1 surface had two thin async tests
7835    // (Keyword early-return + Smart unreachable URL). FX-F1 adds the
7836    // missing branches: explicit operator-intent (Legacy / Config /
7837    // Env source via `ollama_url` or `llm.backend`), the Semantic
7838    // early-return path, every LLM backend's no-key Err arm, and an
7839    // Ollama happy-path through `build_from_resolved_async` against a
7840    // wiremock-backed `/api/tags` endpoint. Target floor for the file:
7841    // 85% (was 83.83% pre-FX-F1 per FX-F1 dispatch — the +1.17pp gap
7842    // closes by exercising the async ladder end-to-end).
7843    //
7844    // The env-mutating tests below serialise on the module-canonical
7845    // `env_var_lock()` defined above (line 4505) — the same mutex the
7846    // pre-existing env-touching tests (`test_anonymize_unchanged_when_env_already_set`,
7847    // `test_anonymize_unchanged_when_config_false`, etc.) already hold.
7848    // FX-F1 first added a parallel `FX_F1_ENV_GUARD` mutex for these
7849    // tests; that turned out to race the pre-existing tests because
7850    // independent mutexes don't serialise against each other (issue
7851    // surfaced by the QC pass on the FX-F1 patch, 2026-05-27).
7852
7853    /// SAFETY: env-var mutation is unsynchronised across threads at
7854    /// the OS level. `env_var_lock` serialises mutation across this
7855    /// test region so the unsafe is sound for the duration of each
7856    /// test that holds the guard. The cleared keys match every
7857    /// resolver ingress that `build_llm_client` and
7858    /// `build_from_resolved_async` consult.
7859    fn fx_f1_clear_llm_env() {
7860        for k in [
7861            "AI_MEMORY_LLM_BACKEND",
7862            "AI_MEMORY_LLM_MODEL",
7863            "AI_MEMORY_LLM_BASE_URL",
7864            "AI_MEMORY_LLM_API_KEY",
7865            "OLLAMA_BASE_URL",
7866            "XAI_API_KEY",
7867            "OPENAI_API_KEY",
7868            "ANTHROPIC_API_KEY",
7869            "GEMINI_API_KEY",
7870            "GOOGLE_API_KEY",
7871            "DEEPSEEK_API_KEY",
7872            "MOONSHOT_API_KEY",
7873            "KIMI_API_KEY",
7874            "DASHSCOPE_API_KEY",
7875            "QWEN_API_KEY",
7876            "MISTRAL_API_KEY",
7877            "GROQ_API_KEY",
7878            "TOGETHER_API_KEY",
7879            "CEREBRAS_API_KEY",
7880            "OPENROUTER_API_KEY",
7881            "FIREWORKS_API_KEY",
7882        ] {
7883            // SAFETY: guarded by env_var_lock at call sites.
7884            unsafe { std::env::remove_var(k) };
7885        }
7886    }
7887    // ===========================================================================
7888
7889    /// FX-F1 — Semantic tier has `llm_model = None` (per tier preset),
7890    /// so when `source = CompiledDefault` the early-return arm fires.
7891    /// Pins the second of the two "tier has no llm_model + no operator
7892    /// intent" arms; the Keyword variant is pinned above.
7893    #[tokio::test]
7894    async fn test_build_llm_client_semantic_tier_compiled_default_returns_none() {
7895        let _guard = env_var_lock();
7896        fx_f1_clear_llm_env();
7897        let cfg = AppConfig::default();
7898        let res = build_llm_client(FeatureTier::Semantic, &cfg).await;
7899        assert!(
7900            res.is_none(),
7901            "semantic tier with no operator config must short-circuit to None"
7902        );
7903    }
7904
7905    /// FX-F1 — Autonomous tier with no operator config and unreachable
7906    /// Ollama URL → resolver winds up with `Legacy` source (because
7907    /// `ollama_url` is set), bypasses the early-return arm, and falls
7908    /// through to the async constructor which returns Err (treated as
7909    /// None). Exercises the `Err(_)` match arm of `build_llm_client`.
7910    #[tokio::test]
7911    async fn test_build_llm_client_autonomous_tier_unreachable_ollama_returns_none() {
7912        let _guard = env_var_lock();
7913        fx_f1_clear_llm_env();
7914        let mut cfg = AppConfig::default();
7915        cfg.ollama_url = Some("http://127.0.0.1:1".to_string());
7916        let res = build_llm_client(FeatureTier::Autonomous, &cfg).await;
7917        // Unreachable endpoint → Err from new_with_url_async → None.
7918        assert!(
7919            res.is_none(),
7920            "autonomous tier against unreachable ollama must surface as None"
7921        );
7922    }
7923
7924    /// FX-F1 — Smart tier with an `llm.backend = "xai"` config section
7925    /// (no API key available) drives the resolver to `Config` source
7926    /// → bypasses the early-return → `build_from_resolved_async`
7927    /// returns the missing-API-key Err → mapped to None. Pins the
7928    /// non-Ollama-no-key path in build_llm_client.
7929    #[tokio::test]
7930    async fn test_build_llm_client_xai_backend_without_api_key_returns_none() {
7931        let _guard = env_var_lock();
7932        fx_f1_clear_llm_env();
7933        use crate::config::LlmSection;
7934        let mut cfg = AppConfig::default();
7935        cfg.llm = Some(LlmSection {
7936            backend: Some("xai".to_string()),
7937            model: Some("grok-4.3".to_string()),
7938            api_key_env: Some("AI_MEMORY_FX_F1_NEVER_SET_XAI_KEY".to_string()),
7939            ..LlmSection::default()
7940        });
7941        let res = build_llm_client(FeatureTier::Smart, &cfg).await;
7942        assert!(
7943            res.is_none(),
7944            "xai backend without API key MUST map to None (Err path)"
7945        );
7946    }
7947
7948    /// FX-F1 — Happy-path: Smart tier with `ollama_url` pointed at a
7949    /// wiremock-backed `/api/tags` endpoint. Resolver lands on the
7950    /// `Legacy` source (operator set `ollama_url`), bypasses the
7951    /// early-return, calls `build_from_resolved_async` which calls
7952    /// `new_with_url_async` against the mock — the health probe
7953    /// returns 200, so the constructor returns Ok(Some). The
7954    /// `Ok(Some(_))` arm of build_llm_client is exercised.
7955    #[tokio::test(flavor = "multi_thread")]
7956    async fn test_build_llm_client_ollama_happy_path_against_wiremock() {
7957        let _guard = env_var_lock();
7958        fx_f1_clear_llm_env();
7959        use wiremock::matchers::{method, path};
7960        use wiremock::{Mock, MockServer, ResponseTemplate};
7961        let server = MockServer::start().await;
7962        Mock::given(method("GET"))
7963            .and(path("/api/tags"))
7964            .respond_with(ResponseTemplate::new(200).set_body_string(r#"{"models":[]}"#))
7965            .mount(&server)
7966            .await;
7967        let mut cfg = AppConfig::default();
7968        cfg.ollama_url = Some(server.uri());
7969        cfg.llm_model = Some("test-model".to_string());
7970        let res = build_llm_client(FeatureTier::Smart, &cfg).await;
7971        assert!(
7972            res.is_some(),
7973            "wiremock-backed /api/tags must drive build_llm_client to Some"
7974        );
7975    }
7976
7977    /// FX-F1 — `build_from_resolved_async` Ollama arm directly. Mirrors
7978    /// the sync test in `llm::tests::*` but exercises the FX-D1 async
7979    /// sibling against a wiremock-backed endpoint. Pins the happy path.
7980    #[tokio::test(flavor = "multi_thread")]
7981    async fn test_build_from_resolved_async_ollama_happy_path() {
7982        let _guard = env_var_lock();
7983        fx_f1_clear_llm_env();
7984        use wiremock::matchers::{method, path};
7985        use wiremock::{Mock, MockServer, ResponseTemplate};
7986        let server = MockServer::start().await;
7987        Mock::given(method("GET"))
7988            .and(path("/api/tags"))
7989            .respond_with(ResponseTemplate::new(200).set_body_string(r#"{"models":[]}"#))
7990            .mount(&server)
7991            .await;
7992        let mut cfg = AppConfig::default();
7993        cfg.ollama_url = Some(server.uri());
7994        cfg.llm_model = Some("test-model".to_string());
7995        let resolved = cfg.resolve_llm(None, None, None);
7996        let client = crate::llm::OllamaClient::build_from_resolved_async(&resolved)
7997            .await
7998            .expect("build_from_resolved_async must succeed against healthy /api/tags");
7999        assert!(client.is_some());
8000        assert!(client.unwrap().is_ollama_native());
8001    }
8002
8003    /// FX-F1 — `build_from_resolved_async` Ollama arm against an
8004    /// unreachable URL (TCP RST). Pins the Err return path so the
8005    /// caller's `Ok(Some)/Ok(None)/Err` match still routes the failure
8006    /// without a panic.
8007    #[tokio::test(flavor = "multi_thread")]
8008    async fn test_build_from_resolved_async_ollama_unreachable_errs() {
8009        let _guard = env_var_lock();
8010        fx_f1_clear_llm_env();
8011        use std::net::TcpListener;
8012        let listener = TcpListener::bind("127.0.0.1:0").unwrap();
8013        let port = listener.local_addr().unwrap().port();
8014        drop(listener);
8015        let mut cfg = AppConfig::default();
8016        cfg.ollama_url = Some(format!("http://127.0.0.1:{port}"));
8017        cfg.llm_model = Some("test-model".to_string());
8018        let resolved = cfg.resolve_llm(None, None, None);
8019        let res = crate::llm::OllamaClient::build_from_resolved_async(&resolved).await;
8020        assert!(
8021            res.is_err(),
8022            "unreachable Ollama endpoint MUST surface as Err"
8023        );
8024    }
8025
8026    /// FX-F1 — `build_from_resolved_async` non-Ollama branch where the
8027    /// resolver could not produce an API key. Pins the missing-key Err
8028    /// arm with the canonical error-message pattern.
8029    #[tokio::test(flavor = "multi_thread")]
8030    async fn test_build_from_resolved_async_non_ollama_missing_key_errs() {
8031        let _guard = env_var_lock();
8032        fx_f1_clear_llm_env();
8033        use crate::config::LlmSection;
8034        let mut cfg = AppConfig::default();
8035        cfg.llm = Some(LlmSection {
8036            backend: Some("anthropic".to_string()),
8037            model: Some("claude-opus-4.7".to_string()),
8038            api_key_env: Some("AI_MEMORY_FX_F1_NEVER_SET_ANTHROPIC_KEY".to_string()),
8039            ..LlmSection::default()
8040        });
8041        let resolved = cfg.resolve_llm(None, None, None);
8042        let res = crate::llm::OllamaClient::build_from_resolved_async(&resolved).await;
8043        let err = match res {
8044            Err(e) => e,
8045            Ok(_) => panic!("anthropic backend without API key MUST Err"),
8046        };
8047        let msg = format!("{err}");
8048        assert!(
8049            msg.contains("requires an API key"),
8050            "missing-key error must cite the API key requirement; got: {msg}"
8051        );
8052    }
8053
8054    /// FX-F1 — `build_from_resolved_async` non-Ollama branch with an
8055    /// API key resolves to `Ok(Some)` because
8056    /// `new_openai_compatible` does no I/O at construct time. Pins
8057    /// the happy path on the OpenAI-compatible arm.
8058    #[tokio::test(flavor = "multi_thread")]
8059    async fn test_build_from_resolved_async_non_ollama_with_key_returns_some() {
8060        let _guard = env_var_lock();
8061        fx_f1_clear_llm_env();
8062        use crate::config::LlmSection;
8063        // Use a private env var that no other test touches; set it just
8064        // long enough for the resolver to pick it up, then unset.
8065        let env_name = "AI_MEMORY_FX_F1_OPENAI_KEY";
8066        // SAFETY: env mutation guarded by env_var_lock; restored below.
8067        unsafe { std::env::set_var(env_name, "sk-test-fx-f1-fake-key") };
8068        let mut cfg = AppConfig::default();
8069        cfg.llm = Some(LlmSection {
8070            backend: Some("openai".to_string()),
8071            model: Some("gpt-5".to_string()),
8072            api_key_env: Some(env_name.to_string()),
8073            ..LlmSection::default()
8074        });
8075        let resolved = cfg.resolve_llm(None, None, None);
8076        let res = crate::llm::OllamaClient::build_from_resolved_async(&resolved).await;
8077        unsafe { std::env::remove_var(env_name) };
8078        let client = res.expect("openai backend with key MUST return Ok");
8079        assert!(
8080            client.is_some(),
8081            "build_from_resolved_async with key MUST produce Some(client)"
8082        );
8083        assert!(
8084            !client.unwrap().is_ollama_native(),
8085            "openai backend must NOT report ollama-native"
8086        );
8087    }
8088
8089    /// FX-F1 — exercises the `Env` source bypass of the
8090    /// `build_llm_client` early-return arm: operator sets
8091    /// `AI_MEMORY_LLM_BACKEND=ollama` + `AI_MEMORY_LLM_BASE_URL`
8092    /// pointing at an unreachable endpoint. Resolver source = Env →
8093    /// no early-return → constructor errors → mapped to None
8094    /// (Err→None arm in build_llm_client).
8095    #[tokio::test]
8096    async fn test_build_llm_client_env_backend_unreachable_returns_none() {
8097        let _guard = env_var_lock();
8098        fx_f1_clear_llm_env();
8099        // SAFETY: env mutation guarded by env_var_lock; cleared below.
8100        unsafe {
8101            std::env::set_var("AI_MEMORY_LLM_BACKEND", "ollama");
8102            std::env::set_var("AI_MEMORY_LLM_BASE_URL", "http://127.0.0.1:1");
8103        }
8104        let cfg = AppConfig::default();
8105        let res = build_llm_client(FeatureTier::Keyword, &cfg).await;
8106        unsafe {
8107            std::env::remove_var("AI_MEMORY_LLM_BACKEND");
8108            std::env::remove_var("AI_MEMORY_LLM_BASE_URL");
8109        }
8110        // Env source bypasses the early return → constructor errors on
8111        // unreachable endpoint → mapped to None.
8112        assert!(
8113            res.is_none(),
8114            "env-source backend against unreachable URL MUST map to None"
8115        );
8116    }
8117
8118    // ===========================================================================
8119    // FX-F1 — additional helper-function coverage uplift.
8120    // The build_llm_client tests above close the FX-D1 gap; these tests
8121    // pin the smaller helper surfaces (`apply_anonymize_default`,
8122    // `resolve_admin_agent_ids`) that previously had narrow branches
8123    // uncovered. Each closes one or two uncovered lines so the file
8124    // floor (85%) clears comfortably.
8125    // ===========================================================================
8126
8127    /// FX-F1 — `apply_anonymize_default` writes the env var when both
8128    /// (a) the effective default is true AND (b) the env var is
8129    /// unset. Pre-FX-F1 this `unsafe { set_var }` arm was uncovered.
8130    #[test]
8131    fn test_apply_anonymize_default_sets_env_when_unset() {
8132        let _guard = env_var_lock();
8133        // SAFETY: serialised through env_var_lock.
8134        let prev = std::env::var("AI_MEMORY_ANONYMIZE").ok();
8135        unsafe { std::env::remove_var("AI_MEMORY_ANONYMIZE") };
8136        let mut cfg = AppConfig::default();
8137        cfg.identity = Some(crate::config::IdentityConfig {
8138            anonymize_default: true,
8139            ..crate::config::IdentityConfig::default()
8140        });
8141        apply_anonymize_default(&cfg);
8142        let got = std::env::var("AI_MEMORY_ANONYMIZE").ok();
8143        // Restore env before asserting so a failure doesn't leak.
8144        match prev {
8145            Some(v) => unsafe { std::env::set_var("AI_MEMORY_ANONYMIZE", v) },
8146            None => unsafe { std::env::remove_var("AI_MEMORY_ANONYMIZE") },
8147        }
8148        assert_eq!(
8149            got.as_deref(),
8150            Some("1"),
8151            "anonymize_default=true with env unset MUST set AI_MEMORY_ANONYMIZE=1"
8152        );
8153    }
8154
8155    /// FX-F1 — `apply_anonymize_default` is a no-op when the env var
8156    /// is already set. Mirrors the existing test gap on the "env wins
8157    /// over config" precedence rule.
8158    #[test]
8159    fn test_apply_anonymize_default_preserves_existing_env() {
8160        let _guard = env_var_lock();
8161        let prev = std::env::var("AI_MEMORY_ANONYMIZE").ok();
8162        unsafe { std::env::set_var("AI_MEMORY_ANONYMIZE", "0") };
8163        let mut cfg = AppConfig::default();
8164        cfg.identity = Some(crate::config::IdentityConfig {
8165            anonymize_default: true,
8166            ..crate::config::IdentityConfig::default()
8167        });
8168        apply_anonymize_default(&cfg);
8169        let got = std::env::var("AI_MEMORY_ANONYMIZE").ok();
8170        match prev {
8171            Some(v) => unsafe { std::env::set_var("AI_MEMORY_ANONYMIZE", v) },
8172            None => unsafe { std::env::remove_var("AI_MEMORY_ANONYMIZE") },
8173        }
8174        assert_eq!(
8175            got.as_deref(),
8176            Some("0"),
8177            "env-var precedence: pre-set AI_MEMORY_ANONYMIZE MUST survive apply_anonymize_default"
8178        );
8179    }
8180
8181    /// FX-F1 — `resolve_admin_agent_ids` empty-entry handling.
8182    /// `AI_MEMORY_ADMIN_AGENT_IDS="alice,,bob"` should drop the empty
8183    /// entry without erroring. Pins the `continue` branch on line
8184    /// 1882 of the env-csv walker.
8185    #[test]
8186    fn test_resolve_admin_agent_ids_skips_empty_entries() {
8187        let _guard = env_var_lock();
8188        let prev = std::env::var("AI_MEMORY_ADMIN_AGENT_IDS").ok();
8189        unsafe { std::env::set_var("AI_MEMORY_ADMIN_AGENT_IDS", "alice,,bob,,") };
8190        let ids = resolve_admin_agent_ids(None);
8191        match prev {
8192            Some(v) => unsafe { std::env::set_var("AI_MEMORY_ADMIN_AGENT_IDS", v) },
8193            None => unsafe { std::env::remove_var("AI_MEMORY_ADMIN_AGENT_IDS") },
8194        }
8195        assert_eq!(
8196            ids,
8197            vec!["alice".to_string(), "bob".to_string()],
8198            "empty entries between commas MUST be skipped, not surface as agent_ids"
8199        );
8200    }
8201
8202    /// FX-F1 — `resolve_admin_agent_ids` rejects malformed entries
8203    /// with a warn-log, preserving the valid ones. Pins the Err arm
8204    /// of `validate_agent_id` on line 1901-1905.
8205    #[test]
8206    fn test_resolve_admin_agent_ids_drops_malformed_entries() {
8207        let _guard = env_var_lock();
8208        let prev = std::env::var("AI_MEMORY_ADMIN_AGENT_IDS").ok();
8209        // `bad id with spaces` fails `validate_agent_id`'s shape
8210        // check; `alice` passes; `*` is the post-#980 reject.
8211        unsafe { std::env::set_var("AI_MEMORY_ADMIN_AGENT_IDS", "alice,bad id,*,bob") };
8212        let ids = resolve_admin_agent_ids(None);
8213        match prev {
8214            Some(v) => unsafe { std::env::set_var("AI_MEMORY_ADMIN_AGENT_IDS", v) },
8215            None => unsafe { std::env::remove_var("AI_MEMORY_ADMIN_AGENT_IDS") },
8216        }
8217        assert!(ids.contains(&"alice".to_string()));
8218        assert!(ids.contains(&"bob".to_string()));
8219        assert!(
8220            !ids.iter().any(|s| s.contains(' ')),
8221            "malformed entries MUST be dropped"
8222        );
8223        assert!(
8224            !ids.contains(&"*".to_string()),
8225            "wildcard `*` MUST be dropped (post-#980)"
8226        );
8227    }
8228
8229    /// FX-F1 — `resolve_admin_agent_ids` falls through to the config
8230    /// when the env var is unset/empty. Pins the
8231    /// `admin_cfg.map(...).unwrap_or_default()` tail.
8232    #[test]
8233    fn test_resolve_admin_agent_ids_falls_back_to_config() {
8234        let _guard = env_var_lock();
8235        let prev = std::env::var("AI_MEMORY_ADMIN_AGENT_IDS").ok();
8236        unsafe { std::env::remove_var("AI_MEMORY_ADMIN_AGENT_IDS") };
8237        // Empty env → fall through to config.
8238        let ids = resolve_admin_agent_ids(None);
8239        // Restore env before asserting.
8240        if let Some(v) = prev {
8241            unsafe { std::env::set_var("AI_MEMORY_ADMIN_AGENT_IDS", v) };
8242        }
8243        assert!(
8244            ids.is_empty(),
8245            "no env + no config MUST resolve to empty allowlist (secure default)"
8246        );
8247    }
8248
8249    /// FX-F1 — `resolve_admin_agent_ids` honours a whitespace-only
8250    /// `AI_MEMORY_ADMIN_AGENT_IDS` value as "unset" (the
8251    /// `!raw.trim().is_empty()` guard). Pins the guard arm.
8252    #[test]
8253    fn test_resolve_admin_agent_ids_whitespace_env_falls_to_config() {
8254        let _guard = env_var_lock();
8255        let prev = std::env::var("AI_MEMORY_ADMIN_AGENT_IDS").ok();
8256        unsafe { std::env::set_var("AI_MEMORY_ADMIN_AGENT_IDS", "   ") };
8257        let ids = resolve_admin_agent_ids(None);
8258        match prev {
8259            Some(v) => unsafe { std::env::set_var("AI_MEMORY_ADMIN_AGENT_IDS", v) },
8260            None => unsafe { std::env::remove_var("AI_MEMORY_ADMIN_AGENT_IDS") },
8261        }
8262        assert!(
8263            ids.is_empty(),
8264            "whitespace-only env MUST be treated as unset"
8265        );
8266    }
8267
8268    // ===========================================================================
8269    // FX-F2 (coverage, #1432) — close the daemon_runtime.rs floor regression
8270    // observed on the Per-Module Coverage Thresholds CI gate after the
8271    // post-FX-F1 churn (HEADER_AGENT_ID SSOT migration #19eddac9, L1-L4
8272    // capture-turn #49e04daf, etc.) shifted branch-hit counts and dropped
8273    // measured coverage from 85.00% (pinned by 197640745) to 84.89% (-0.11pp).
8274    // These tests cover the `build_store_handle` URL-scheme dispatch arms
8275    // and `resolve_configured_embedding_dim` resolution-ladder arms — every
8276    // branch in both helpers is exercised under `cfg(feature = "sal")` test
8277    // builds with no live Postgres needed.
8278    // ===========================================================================
8279
8280    /// FX-F2 — `build_store_handle` accepts a `sqlite:///path` URL and
8281    /// routes through the SqliteStore adapter (not the `--db` fallback).
8282    /// Pins the `strip_prefix("sqlite://")` arm + the SqliteStore
8283    /// `Ok(...)` tail at lines 2691-2701.
8284    #[cfg(feature = "sal")]
8285    #[tokio::test]
8286    async fn fx_f2_build_store_handle_sqlite_url_scheme() {
8287        let dir = tempfile::tempdir().unwrap();
8288        let db = dir.path().join("scheme.db");
8289        let url = format!("sqlite:///{}", db.display());
8290        let (backend, store) = build_store_handle(
8291            Some(&url),
8292            &db,
8293            None,
8294            None,
8295            crate::store::PoolConfig::default(),
8296        )
8297        .await
8298        .expect("sqlite:// URL must dispatch to SqliteStore");
8299        // Backend tag must reflect the SQLite path.
8300        assert!(
8301            matches!(backend, crate::handlers::StorageBackend::Sqlite),
8302            "sqlite:// URL MUST resolve to StorageBackend::Sqlite"
8303        );
8304        // Smoke-check that the store is usable (the SAL trait `Arc` is live).
8305        drop(store);
8306    }
8307
8308    /// FX-F2 — `build_store_handle` rejects an unrecognised URL scheme
8309    /// with the canonical bail message. Pins the `else { bail!(...) }`
8310    /// arm at lines 2702-2706 — the lone uncovered Err path on the
8311    /// sal-feature build.
8312    #[cfg(feature = "sal")]
8313    #[tokio::test]
8314    async fn fx_f2_build_store_handle_unknown_scheme_errors() {
8315        let dir = tempfile::tempdir().unwrap();
8316        let db = dir.path().join("ignored.db");
8317        let result = build_store_handle(
8318            Some("mysql://host/db"),
8319            &db,
8320            None,
8321            None,
8322            crate::store::PoolConfig::default(),
8323        )
8324        .await;
8325        let err = match result {
8326            Ok(_) => panic!("unrecognised scheme MUST bail; got Ok"),
8327            Err(e) => e,
8328        };
8329        let msg = format!("{err:#}");
8330        assert!(
8331            msg.contains("unrecognised --store-url"),
8332            "bail message MUST include the canonical prefix; got: {msg}"
8333        );
8334    }
8335
8336    /// FX-F2 — `build_store_handle` defaults to SqliteStore at the
8337    /// `--db` path when `--store-url` is absent. Pins the `None` arm
8338    /// at lines 2708-2715.
8339    #[cfg(feature = "sal")]
8340    #[tokio::test]
8341    async fn fx_f2_build_store_handle_no_url_falls_through_to_db_path() {
8342        let dir = tempfile::tempdir().unwrap();
8343        let db = dir.path().join("fallthrough.db");
8344        let (backend, _store) =
8345            build_store_handle(None, &db, None, None, crate::store::PoolConfig::default())
8346                .await
8347                .expect("absent --store-url MUST resolve to SqliteStore via --db");
8348        assert!(matches!(backend, crate::handlers::StorageBackend::Sqlite));
8349    }
8350
8351    /// FX-F2 — `resolve_configured_embedding_dim` returns the canonical
8352    /// dim from the resolver when the model id is in
8353    /// `KNOWN_EMBEDDING_DIMS`. Pins the first arm of the resolution
8354    /// ladder (line 2615-2616).
8355    #[cfg(feature = "sal")]
8356    #[test]
8357    fn fx_f2_resolve_configured_embedding_dim_canonical_lookup_wins() {
8358        let _g = env_var_lock();
8359        let mut cfg = AppConfig::default();
8360        // `nomic-embed-text-v1.5` is in KNOWN_EMBEDDING_DIMS at 768.
8361        cfg.embeddings = Some(crate::config::EmbeddingsSection {
8362            model: Some("nomic-embed-text-v1.5".to_string()),
8363            ..crate::config::EmbeddingsSection::default()
8364        });
8365        let tier_cfg = FeatureTier::Semantic.config();
8366        let dim = resolve_configured_embedding_dim(&cfg, &tier_cfg);
8367        assert!(
8368            matches!(dim, Some(d) if d == 768),
8369            "canonical lookup MUST return 768 for nomic-embed-text-v1.5; got: {dim:?}"
8370        );
8371    }
8372
8373    /// FX-F2 — `resolve_configured_embedding_dim` falls through to the
8374    /// legacy flat-field arm when the resolver yields no dim. Pins the
8375    /// `or_else(|| app_config.embedding_model...)` arm (line 2617-2623).
8376    /// The legacy `EmbeddingModel::from_str` accepts the underscore
8377    /// variant `mini_lm_l6_v2`; canonical lookup goes through the
8378    /// `[embeddings]` section, which we omit here so the resolver
8379    /// returns `embedding_dim = None` and the legacy parse arm fires.
8380    #[cfg(feature = "sal")]
8381    #[test]
8382    fn fx_f2_resolve_configured_embedding_dim_legacy_flat_field_path() {
8383        let _g = env_var_lock();
8384        let mut cfg = AppConfig::default();
8385        // No [embeddings] section → resolver returns None for dim.
8386        // Legacy flat-field `embedding_model` parses as the 2-family enum.
8387        cfg.embedding_model = Some("mini_lm_l6_v2".to_string());
8388        let tier_cfg = FeatureTier::Semantic.config();
8389        let dim = resolve_configured_embedding_dim(&cfg, &tier_cfg);
8390        assert!(
8391            matches!(dim, Some(d) if d == 384),
8392            "legacy flat-field path MUST resolve mini_lm_l6_v2 to 384; got: {dim:?}"
8393        );
8394    }
8395
8396    /// FX-F2 — `resolve_configured_embedding_dim` falls all the way
8397    /// through to the tier-preset arm when neither resolver nor legacy
8398    /// flat-field yields a dim. Pins the final `or_else(|| preset...)`
8399    /// arm (line 2624).
8400    #[cfg(feature = "sal")]
8401    #[test]
8402    fn fx_f2_resolve_configured_embedding_dim_preset_fallback() {
8403        let _g = env_var_lock();
8404        let cfg = AppConfig::default();
8405        // Default config: no [embeddings] section + no legacy
8406        // embedding_model field. Semantic tier preset HAS an embedding
8407        // model so the preset arm fires (Some(_)). Keyword tier preset
8408        // is None so we'd get None — but Semantic is the load-bearing
8409        // case for the postgres-schema-bootstrap path documented at the
8410        // function comment.
8411        let tier_cfg = FeatureTier::Semantic.config();
8412        let dim = resolve_configured_embedding_dim(&cfg, &tier_cfg);
8413        assert!(
8414            dim.is_some(),
8415            "Semantic tier preset MUST yield a dim via the fallback arm"
8416        );
8417    }
8418
8419    /// FX-F2 — `resolve_configured_embedding_dim` passes a parse-error
8420    /// in the legacy flat-field arm through to the next arm
8421    /// (`.and_then(|raw| raw.parse(...).ok())`). The function returns
8422    /// the resolver-supplied dim (whatever
8423    /// `AppConfig::resolve_embeddings()` produced from defaults) when
8424    /// the operator's malformed flat-field is dropped. Pins the
8425    /// `.and_then(..., .ok())` None-on-parse-fail arm at line 2621.
8426    #[cfg(feature = "sal")]
8427    #[test]
8428    fn fx_f2_resolve_configured_embedding_dim_malformed_legacy_drops_silently() {
8429        let _g = env_var_lock();
8430        let mut cfg = AppConfig::default();
8431        // Unparseable value — `EmbeddingModel::from_str` rejects it
8432        // and the `.ok()` swallows the error, falling through to the
8433        // preset arm.
8434        cfg.embedding_model = Some("not-a-real-model".to_string());
8435        let tier_cfg = FeatureTier::Semantic.config();
8436        let dim = resolve_configured_embedding_dim(&cfg, &tier_cfg);
8437        // The resolver+preset combination still yields a Some (default
8438        // semantic tier has an embedding model preset). The test pins
8439        // the silent-drop behaviour: the function does NOT panic /
8440        // bail on an unparseable legacy override.
8441        assert!(
8442            dim.is_some(),
8443            "unparseable legacy embedding_model MUST be dropped silently \
8444             (the .ok() arm), preset fallback fires"
8445        );
8446    }
8447
8448    // -----------------------------------------------------------------
8449    // FUPC — body-exercising sweep-loop tests. The pre-existing
8450    // spawn-and-abort smoke tests use a 60s interval, so the loop body
8451    // (the actual db::gc / sweep / checkpoint calls + their info-log
8452    // branches) never fires inside the 20ms abort window. These drive a
8453    // 1ms interval against seeded state so the body runs at least once.
8454    // -----------------------------------------------------------------
8455
8456    /// `spawn_gc_loop` body actually runs and archives an expired memory
8457    /// (the `Ok(n) if n > 0` info-log arm fires).
8458    #[tokio::test]
8459    async fn fupc_spawn_gc_loop_body_archives_expired() {
8460        use crate::models::{Memory, MemoryKind, Tier};
8461        let env = TestEnv::fresh();
8462        let conn = db::open(&env.db_path).unwrap();
8463        // Seed a memory already past its expiry so the gc sweep archives it.
8464        let mem = Memory {
8465            id: uuid::Uuid::new_v4().to_string(),
8466            tier: Tier::Short,
8467            namespace: "gc-ns".to_string(),
8468            title: "expired".to_string(),
8469            content: "stale".to_string(),
8470            priority: 5,
8471            confidence: 1.0,
8472            source: "test".to_string(),
8473            created_at: "2000-01-01T00:00:00Z".to_string(),
8474            updated_at: "2000-01-01T00:00:00Z".to_string(),
8475            expires_at: Some("2000-01-01T01:00:00Z".to_string()),
8476            memory_kind: MemoryKind::Observation,
8477            ..Memory::default()
8478        };
8479        db::insert(&conn, &mem).unwrap();
8480        let state: Db = Arc::new(Mutex::new((
8481            conn,
8482            env.db_path.clone(),
8483            ResolvedTtl::default(),
8484            true, // archive_on_gc
8485        )));
8486        let h = spawn_gc_loop(state.clone(), Some(30), Duration::from_millis(1));
8487        // Let several sweep ticks fire.
8488        tokio::time::sleep(Duration::from_millis(40)).await;
8489        h.abort();
8490        let _ = h.await;
8491        // The expired row must be gone from `memories` (archived + deleted).
8492        let lock = state.lock().await;
8493        let remaining: i64 = lock
8494            .0
8495            .query_row(
8496                "SELECT COUNT(*) FROM memories WHERE namespace = 'gc-ns'",
8497                [],
8498                |r| r.get(0),
8499            )
8500            .unwrap();
8501        assert_eq!(
8502            remaining, 0,
8503            "gc loop body must have archived the expired row"
8504        );
8505    }
8506
8507    /// `spawn_wal_checkpoint_loop` body actually runs (no panic, clean
8508    /// abort) against a live WAL-mode db.
8509    #[tokio::test]
8510    async fn fupc_spawn_wal_checkpoint_loop_body_runs() {
8511        let env = TestEnv::fresh();
8512        let conn = db::open(&env.db_path).unwrap();
8513        let state: Db = Arc::new(Mutex::new((
8514            conn,
8515            env.db_path.clone(),
8516            ResolvedTtl::default(),
8517            true,
8518        )));
8519        let h = spawn_wal_checkpoint_loop(state, Duration::from_millis(1));
8520        tokio::time::sleep(Duration::from_millis(30)).await;
8521        h.abort();
8522        let _ = h.await;
8523    }
8524
8525    /// `spawn_transcript_lifecycle_sweep_loop` body runs at a 1ms cadence
8526    /// against a clean db (the `Ok(r)` arm with a zero-count report — no
8527    /// info-log, no panic, clean abort).
8528    #[tokio::test]
8529    async fn fupc_spawn_transcript_lifecycle_sweep_body_runs_clean() {
8530        let env = TestEnv::fresh();
8531        let conn = db::open(&env.db_path).unwrap();
8532        let state: Db = Arc::new(Mutex::new((
8533            conn,
8534            env.db_path.clone(),
8535            ResolvedTtl::default(),
8536            true,
8537        )));
8538        let h = spawn_transcript_lifecycle_sweep_loop(
8539            state,
8540            crate::config::TranscriptsConfig::default(),
8541            Duration::from_millis(1),
8542        );
8543        tokio::time::sleep(Duration::from_millis(30)).await;
8544        h.abort();
8545        let _ = h.await;
8546    }
8547
8548    /// `spawn_agent_quota_reset_loop` body runs at a 1ms cadence against
8549    /// a clean db (the reset SQL touches zero rows, no panic, clean
8550    /// abort).
8551    #[tokio::test]
8552    async fn fupc_spawn_agent_quota_reset_body_runs_clean() {
8553        let env = TestEnv::fresh();
8554        let conn = db::open(&env.db_path).unwrap();
8555        let state: Db = Arc::new(Mutex::new((
8556            conn,
8557            env.db_path.clone(),
8558            ResolvedTtl::default(),
8559            true,
8560        )));
8561        let h = spawn_agent_quota_reset_loop(state, Duration::from_millis(1));
8562        tokio::time::sleep(Duration::from_millis(30)).await;
8563        h.abort();
8564        let _ = h.await;
8565    }
8566}