Skip to main content

ai_memory/
daemon_runtime.rs

1// Copyright 2026 AlphaOne LLC
2// SPDX-License-Identifier: Apache-2.0
3
4//! Daemon runtime — orchestration shell for the `ai-memory` binary.
5//!
6//! W6 lifted `serve()` and the top-level dispatch out of `main.rs` so the
7//! production HTTP daemon, the integration test harness, and the
8//! coverage-instrumented tests in this module all share one source of
9//! truth. `main.rs` keeps its `#[tokio::main]` entry point but immediately
10//! delegates here for every subcommand.
11//!
12//! ## Public surface (post-W6)
13//!
14//! - [`run`] — top-level CLI dispatch (called from `main()`).
15//! - [`serve`] — full HTTP daemon body (TLS or plain).
16//! - [`bootstrap_serve`] — testable struct-returning state builder.
17//! - [`build_router`] — composition wrapper around `lib::build_router`.
18//! - [`build_embedder`], [`build_vector_index`] — single canonical builders
19//!   used by both `serve()` and `cli::recall::run`.
20//! - [`spawn_gc_loop`], [`spawn_wal_checkpoint_loop`] — daemon background
21//!   tasks, returning a [`JoinHandle`] so callers can abort on shutdown.
22//! - [`is_write_command`] — write-command predicate driving the post-write
23//!   WAL checkpoint.
24//! - [`passphrase_from_file`], [`apply_anonymize_default`] — startup helpers.
25//!
26//! ## Pre-W6 helpers retained
27//!
28//! - [`serve_http_with_shutdown`], [`serve_http_with_shutdown_future`] —
29//!   the in-process HTTP harness the integration suite drives.
30//! - [`run_sync_daemon_with_shutdown`],
31//!   [`run_sync_daemon_with_shutdown_using_client`],
32//!   [`sync_cycle_once`] — the sync-daemon body.
33//! - [`run_curator_daemon_with_shutdown`],
34//!   [`run_curator_daemon_with_primitives`] — the curator-daemon body.
35
36use crate::models::field_names;
37use std::io::Write as _;
38use std::path::Path;
39use std::path::PathBuf;
40use std::sync::Arc;
41use std::sync::atomic::{AtomicBool, Ordering};
42use std::time::{Duration, Instant};
43
44use anyhow::{Context, Result};
45use axum::Router;
46use clap::{Args, CommandFactory, Parser, Subcommand};
47use clap_complete::{Shell, generate};
48use rusqlite::Connection;
49use tokio::sync::{Mutex, Notify};
50use tokio::task::JoinHandle;
51use tracing_subscriber::EnvFilter;
52
53use crate::cli::agents::{AgentsArgs, PendingArgs};
54use crate::cli::archive::ArchiveArgs;
55use crate::cli::audit::AuditArgs;
56use crate::cli::backup::{BackupArgs, RestoreArgs};
57use crate::cli::boot::BootArgs;
58use crate::cli::consolidate::{AutoConsolidateArgs, ConsolidateArgs};
59use crate::cli::crud::{DeleteArgs, GetArgs, ListArgs};
60use crate::cli::curator::CuratorArgs;
61use crate::cli::forget::ForgetArgs;
62use crate::cli::identity::IdentityArgs;
63use crate::cli::install::InstallArgs;
64use crate::cli::io::{ImportArgs, MineArgs};
65use crate::cli::link::{LinkArgs, ResolveArgs};
66use crate::cli::logs::LogsArgs;
67use crate::cli::promote::PromoteArgs;
68use crate::cli::recall::RecallArgs;
69use crate::cli::rules::RulesArgs;
70use crate::cli::search::SearchArgs;
71use crate::cli::store::StoreArgs;
72use crate::cli::sync::{SyncArgs, SyncDaemonArgs};
73use crate::cli::update::UpdateArgs;
74use crate::cli::verify::VerifyChainArgs;
75use crate::cli::verify_signed_events::VerifySignedEventsChainArgs;
76use crate::cli::wrap::WrapArgs;
77use crate::config::{AppConfig, FeatureTier};
78use crate::embeddings::Embedder;
79use crate::handlers::{ApiKeyState, AppState, Db};
80use crate::hnsw::VectorIndex;
81use crate::{bench, cli, db, embeddings, federation, hnsw, llm, mcp, tls};
82
83#[cfg(feature = "sal")]
84use crate::migrate;
85
86const DEFAULT_DB: &str = "ai-memory.db";
87const DEFAULT_PORT: u16 = 9077;
88const GC_INTERVAL_SECS: u64 = 30 * crate::SECS_PER_MINUTE as u64;
89/// WAL auto-checkpoint cadence in the HTTP daemon. Bounds `*-wal`
90/// file growth between `SQLite`'s internal page-count checkpoints.
91const WAL_CHECKPOINT_INTERVAL_SECS: u64 = 10 * crate::SECS_PER_MINUTE as u64;
92/// v0.7.0 K2 — pending_actions timeout sweeper cadence. Fires every
93/// 60s and transitions `status='pending'` rows whose age exceeds the
94/// per-row `default_timeout_seconds` (or the global default below) to
95/// `status='expired'`.
96const PENDING_TIMEOUT_SWEEP_INTERVAL_SECS: u64 = 60;
97/// Default per-row TTL applied when a `pending_actions` row has a NULL
98/// `default_timeout_seconds`. 24 hours — matches the operator-facing
99/// `doctor` warning window so a row already classed CRITICAL by
100/// `doctor_oldest_pending_age_secs` is also a sweeper candidate.
101const PENDING_TIMEOUT_DEFAULT_SECS: i64 = crate::SECS_PER_DAY;
102/// v0.7.0 I3 — transcript archive→prune sweeper cadence. The lifecycle
103/// scan walks every transcript row plus a per-candidate join into
104/// `memories`, so we run it less aggressively than the K2 60-second
105/// pending-actions sweeper. 10 minutes is fast enough that operator-
106/// visible drift between TTL expiry and archive is bounded by one
107/// tick, and slow enough that the scan never dominates a busy
108/// daemon's wall-clock.
109const TRANSCRIPT_LIFECYCLE_SWEEP_INTERVAL_SECS: u64 = 600;
110/// v0.7.0 K8 — agent-quota daily-counter reset cadence. The sweep
111/// zeroes `current_memories_today` + `current_links_today` for every
112/// row whose `day_started_at` predates the current UTC date. 60-second
113/// cadence matches the K2 pending-actions sweeper — a single SQL
114/// UPDATE that touches at most one row per registered agent per
115/// midnight crossing.
116const AGENT_QUOTA_RESET_INTERVAL_SECS: u64 = 60;
117
118// ---------------------------------------------------------------------------
119// Clap-derived CLI surface
120// ---------------------------------------------------------------------------
121//
122// The clap structs live in the lib crate so `daemon_runtime::run` can
123// take them as parameters. `main.rs` re-exports `Cli` and immediately
124// delegates here.
125
126#[derive(Parser)]
127#[command(
128    name = "ai-memory",
129    version,
130    about = "AI-agnostic persistent memory — MCP server, HTTP API, and CLI for any AI platform"
131)]
132pub struct Cli {
133    #[command(subcommand)]
134    pub command: Command,
135    #[arg(long, env = "AI_MEMORY_DB", default_value = DEFAULT_DB, global = true)]
136    pub db: PathBuf,
137    /// Output as JSON (machine-parseable)
138    #[arg(long, global = true, default_value_t = false)]
139    pub json: bool,
140    /// Agent identifier used for store operations. If unset, an NHI-hardened
141    /// default is synthesized (see `ai-memory store --help`). Accepts the
142    /// `AI_MEMORY_AGENT_ID` environment variable as a fallback.
143    #[arg(long, env = "AI_MEMORY_AGENT_ID", global = true)]
144    pub agent_id: Option<String>,
145    /// v0.6.0.0: path to a file containing the `SQLCipher` passphrase.
146    /// Only meaningful when the binary was built with
147    /// `--features sqlcipher` (standard builds ignore this flag). File
148    /// must be root-readable (mode 0400 recommended). The passphrase is
149    /// read once at startup and exported as `AI_MEMORY_DB_PASSPHRASE`
150    /// for the duration of the process — passing the passphrase
151    /// directly as an env var or as a flag value leaks to the process
152    /// list (`ps -E`) and shell history.
153    #[arg(long, global = true, value_name = "PATH")]
154    pub db_passphrase_file: Option<PathBuf>,
155}
156
157#[derive(Subcommand)]
158pub enum Command {
159    /// Start the HTTP memory daemon.
160    ///
161    /// **Tier resolution.** Unlike `mcp` / `store` / `recall`, the
162    /// `serve` subcommand does NOT accept a `--tier` flag. The
163    /// daemon's effective feature tier is resolved from the `tier`
164    /// field in `config.toml`, falling back to the compiled-in
165    /// default (`semantic`). For per-invocation tier overrides use
166    /// the `mcp` / `store` / `recall` subcommands, which expose
167    /// `--tier` directly. See `docs/ADMIN_GUIDE.md` §"Feature tiers"
168    /// and issue #703 for the rationale (a long-running daemon owns
169    /// embedder / LLM resources that are expensive to swap mid-run,
170    /// so tier is fixed at startup via configuration).
171    Serve(ServeArgs),
172    /// Run as an MCP (Model Context Protocol) tool server over stdio
173    Mcp {
174        /// Feature tier: keyword (FTS only) or semantic (embeddings + FTS)
175        #[arg(long, default_value = "semantic")]
176        tier: String,
177        /// v0.6.4 — Tool surface profile. One of `core`, `graph`, `admin`,
178        /// `power`, `full`, or a comma-separated custom list (e.g.,
179        /// `core,graph,archive`). Default `core` (7 tools at v0.7.0:
180        /// the original 5 + `memory_load_family` + `memory_smart_load`).
181        /// Resolution order: this CLI flag > `AI_MEMORY_PROFILE` env >
182        /// `[mcp].profile` in config.toml > `core`. Set `--profile full`
183        /// to expose every family — at v0.7.0,
184        /// `Profile::full().expected_tool_count()` returns 74 (canonical
185        /// SSOT; pinned by `profile_full_matches_registry_all` against
186        /// `crate::mcp::registry::tool_names::ALL.len()`). The 74
187        /// advertised entries decompose as 73 callable "memory tools"
188        /// plus the always-on `memory_capabilities` bootstrap; the
189        /// `build_capabilities_summary` "{n} memory tools" phrasing
190        /// reports the 73 memory-tool count to reconcile with the
191        /// user-facing summary (see issue #862 for the disambiguation).
192        #[arg(long, env = "AI_MEMORY_PROFILE")]
193        profile: Option<String>,
194    },
195    /// Store a new memory
196    Store(StoreArgs),
197    /// Update an existing memory by ID
198    Update(UpdateArgs),
199    /// Recall memories relevant to a context
200    Recall(RecallArgs),
201    /// Search memories by text
202    Search(SearchArgs),
203    /// Retrieve a memory by ID
204    Get(GetArgs),
205    /// List memories
206    List(ListArgs),
207    /// Delete a memory by ID
208    Delete(DeleteArgs),
209    /// Promote a memory to long-term
210    Promote(PromoteArgs),
211    /// Delete memories matching a pattern
212    Forget(ForgetArgs),
213    /// Link two memories
214    Link(LinkArgs),
215    /// Consolidate multiple memories into one
216    Consolidate(ConsolidateArgs),
217    /// Run garbage collection
218    Gc,
219    /// Show statistics
220    Stats,
221    /// List all namespaces
222    Namespaces,
223    /// v0.7.0 (issue #800) — operator CRUD for the per-namespace
224    /// standard policy memory pointer (Batman Mode Crack 1). Three
225    /// verbs: `set-standard` / `get-standard` / `clear-standard`, plus
226    /// the `batman-policy` helper that prints the canonical Batman
227    /// `GovernancePolicy` JSON blob. Closes the friction that kept
228    /// Batman Forms 2 + 6 dormant on most installs by replacing the
229    /// MCP-stdio JSON-RPC dance with first-class CLI surface.
230    Namespace(crate::cli::namespace::NamespaceArgs),
231    /// v0.7.x (#1146) — enterprise configuration tooling.
232    /// `ai-memory config migrate` rewrites a legacy v1 (flat-field)
233    /// `config.toml` to the v2 sectioned shape (`[llm]`, `[embeddings]`,
234    /// `[reranker]`, `[storage]`) with a timestamped `.bak` backup.
235    /// `--dry-run` prints the diff without writing.
236    /// `--also-clean-claude-json` additionally removes the
237    /// `mcpServers.<*>.env` block from `~/.claude.json` after the
238    /// operator has verified the new config.
239    Config(crate::cli::commands::config::ConfigCliArgs),
240    /// Export all memories as JSON
241    Export,
242    /// Import memories from JSON (stdin)
243    Import(ImportArgs),
244    /// Resolve a contradiction — mark one memory as superseding another
245    Resolve(ResolveArgs),
246    /// Interactive memory shell (REPL)
247    Shell,
248    /// Sync memories between two database files
249    Sync(SyncArgs),
250    /// Run the peer-to-peer sync daemon — continuously exchange memories
251    /// with one or more HTTP peers (Phase 3 Task 3b.1). The defining
252    /// grand-slam capability: two agents on two machines form a live
253    /// knowledge mesh with no cloud, no login, no `SaaS`.
254    SyncDaemon(SyncDaemonArgs),
255    /// Auto-consolidate short-term memories by namespace
256    AutoConsolidate(AutoConsolidateArgs),
257    /// Generate shell completions
258    Completions(CompletionsArgs),
259    /// Generate man page
260    Man,
261    /// Import memories from historical conversations (Claude, `ChatGPT`, Slack exports)
262    Mine(MineArgs),
263    /// Manage the memory archive (list, restore, purge, stats)
264    Archive(ArchiveArgs),
265    /// Register or list agents (Task 1.3)
266    Agents(AgentsArgs),
267    /// v0.7 (Track H, Task H1) — per-agent Ed25519 keypair lifecycle.
268    /// `generate` / `import` / `list` / `export-pub` against the local
269    /// key directory (default `<config>/ai-memory/keys`). Hardware-backed
270    /// key storage (TPM/HSM/Secure Enclave) is out of OSS scope and
271    /// lives in the AgenticMem commercial layer.
272    Identity(IdentityArgs),
273    /// v0.7.0 QW-3 — context-offload substrate primitive. Persists a
274    /// file (or `-` for stdin) into the `offloaded_blobs` substrate
275    /// and prints the short `ref_id` callers keep in their working
276    /// window. Pairs with `ai-memory deref <ref_id>`.
277    Offload(crate::cli::offload::OffloadArgs),
278    /// v0.7.0 QW-3 — dereference a previously-offloaded `ref_id`.
279    /// Refuses tampered rows (SHA-256 mismatch). Pairs with
280    /// `ai-memory offload <file>`.
281    Deref(crate::cli::offload::DerefArgs),
282    /// v0.7.0 (issue #691) — substrate-level agent-action rules engine.
283    /// CRUD over the `governance_rules` table consulted by
284    /// `check_agent_action`. Mutation verbs (add/enable/disable/remove)
285    /// require the operator's Ed25519 keypair on disk at
286    /// `<key-dir>/operator.priv` (mode 0600); without `--sign` they
287    /// refuse with `governance.no_operator_key`. Read verbs (list /
288    /// check) are unprivileged.
289    Rules(RulesArgs),
290    /// List / approve / reject governance-pending actions (Task 1.9)
291    Pending(PendingArgs),
292    /// v0.6.0.0: snapshot the `SQLite` database to a timestamped backup
293    /// file. Uses `SQLite` `VACUUM INTO` which is hot-backup safe (no daemon
294    /// stop required). Writes a `manifest.json` alongside (sha256 + version).
295    Backup(BackupArgs),
296    /// v0.6.0.0: restore the `SQLite` database from a backup file written
297    /// by `ai-memory backup`. Verifies the manifest sha256 before
298    /// replacing the current DB. The current DB is moved aside as a safety
299    /// net before the replacement.
300    Restore(RestoreArgs),
301    /// v0.6.1: run the autonomous curator. `--once` runs a single sweep
302    /// and prints a JSON report; `--daemon` loops with `--interval-secs`
303    /// between cycles. Auto-tags memories without tags and flags
304    /// contradictions against nearby siblings in the same namespace.
305    Curator(CuratorArgs),
306    /// v0.6.3 (Pillar 3 / Stream E): run the canonical performance
307    /// workload and print measured p50/p95/p99 against the budgets in
308    /// `PERFORMANCE.md`. Each invocation seeds a disposable temp DB so
309    /// the user's main DB is untouched. Exits non-zero when any p95
310    /// exceeds its budget by more than the published 10% tolerance.
311    Bench(BenchArgs),
312    /// v0.7: migrate memories between SAL backends. Gated behind
313    /// `--features sal`. Reads pages via `MemoryStore::list`, writes
314    /// via `MemoryStore::store`. Idempotent: source ids are preserved
315    /// and both adapters upsert on id.
316    #[cfg(feature = "sal")]
317    Migrate(MigrateArgs),
318    /// v0.7.0 Wave-1 Fix 3: bootstrap a SAL backend's schema by URL.
319    /// Opens the target store via the same factory as `migrate` (which
320    /// triggers `INIT_SCHEMA` as a side effect) then enumerates the
321    /// resulting catalog (tables, views, functions, indices,
322    /// extensions, schema_version). On Postgres with Apache AGE
323    /// installed it also bootstraps the `memory_graph` projection via
324    /// `SELECT create_graph('memory_graph')`. Idempotent — safe to
325    /// re-run against an already-initialized store. Gated behind
326    /// `--features sal`.
327    #[cfg(feature = "sal")]
328    SchemaInit(crate::cli::schema_init::SchemaInitArgs),
329    /// v0.6.3.1 (P7 / R7): operator-visible health dashboard. Reads
330    /// Capabilities v2 (P1) + data integrity surfaces (P2) + recall
331    /// observability (P3). With `--remote <url>` becomes a fleet doctor
332    /// at T3+. Read-only — never mutates the database. Exits 0 on a
333    /// healthy report, 2 on critical findings, and 1 on warnings when
334    /// `--fail-on-warn` is passed.
335    Doctor(DoctorCliArgs),
336    /// Issue #487: emit session-boot context. Universal primitive every
337    /// AI-agent integration recipe (Claude Code SessionStart hook, Cursor /
338    /// Cline / Continue / Windsurf system-message, Codex / Apps SDK /
339    /// Agent SDK programmatic prepend, OpenClaw built-in, local models
340    /// via LM Studio / Ollama / vLLM) calls before the agent's first turn.
341    /// Read-only, fast, never blocks. With `--quiet` (recommended for
342    /// hooks) a missing DB exits 0 with empty stdout.
343    Boot(BootArgs),
344    /// Issue #487 PR-2: wire `ai-memory boot` and the `ai-memory-mcp`
345    /// server into AI agents' config files (Claude Code SessionStart hook,
346    /// Cursor / Cline / Continue / Windsurf / OpenClaw MCP config). Default
347    /// is `--dry-run` (prints the diff, writes nothing). Pass `--apply` to
348    /// commit. Pass `--uninstall --apply` to remove a previously-installed
349    /// managed block.
350    Install(InstallArgs),
351    /// Issue #487 PR-6: cross-platform Rust replacement for the bash /
352    /// PowerShell wrappers PR-1 shipped in the integration recipes. Runs
353    /// `ai-memory boot` in-process, builds a system message, then spawns
354    /// the named agent CLI with the system message delivered via the
355    /// strategy chosen by `default_strategy(<agent>)` (or an explicit
356    /// `--system-flag` / `--system-env` / `--message-file-flag`
357    /// override). Exit code is propagated from the wrapped agent.
358    Wrap(WrapArgs),
359    /// Issue #487 PR-5: operator-facing CLI for the operational logging
360    /// facility (`tail`, `cat`, `archive`, `purge`). Default-OFF — emits
361    /// nothing useful unless `[logging] enabled = true` is set in
362    /// `config.toml`.
363    Logs(LogsArgs),
364    /// Issue #487 PR-5: operator-facing CLI for the security audit
365    /// trail (`verify`, `tail`, `path`). Default-OFF — emits nothing
366    /// useful unless `[audit] enabled = true` is set in `config.toml`.
367    Audit(AuditArgs),
368    /// v0.7.0 K11 — translate legacy `[governance]` policies in
369    /// `config.toml` into the v0.7 `[[permissions.rules]]` (K9) format.
370    /// Default mode is dry-run: prints to stdout. Pass `--config-out
371    /// PATH` to write the rendered block to a file (or merge in-place
372    /// when `PATH` matches the loaded config).
373    Governance(GovernanceCliArgs),
374    /// v0.7.0 L1-3 — external verifier for reflection chains
375    /// (procurement-grade audit tool). Walks `reflects_on` edges
376    /// backward from `<memory_id>` to depth 0, verifies each
377    /// Ed25519 signature, and emits a structured chain-integrity
378    /// report. Exit 0 if fully verified; non-zero otherwise.
379    VerifyReflectionChain(VerifyChainArgs),
380    /// v0.7.0 V-4 closeout (#698) — walk the SQL-side `signed_events`
381    /// cross-row hash chain (schema v34) and emit a structured
382    /// report. Distinct from `verify-reflection-chain` (which walks
383    /// reflects_on edges) and from `audit verify` (which walks the
384    /// JSONL audit log). Exit 0 if the chain holds; 1 on chain
385    /// break.
386    VerifySignedEventsChain(VerifySignedEventsChainArgs),
387    /// v0.7.0 L2-5 (issue #670) — export a procurement-grade forensic
388    /// evidence bundle (signed tarball) for a memory and its
389    /// reflection chain. The OSS surface for the `AgenticMem Attest`
390    /// tier; see [`crate::forensic::bundle`] for the bundle layout.
391    ExportForensicBundle(crate::forensic::bundle::ExportForensicBundleArgs),
392    /// v0.7.0 L2-5 (issue #670) — verify a forensic evidence bundle.
393    /// Re-hashes every file, checks the manifest signature when
394    /// present, and re-verifies every edge signature against the
395    /// bundled `observed_by` public key.
396    VerifyForensicBundle(crate::forensic::bundle::VerifyForensicBundleArgs),
397    /// v0.7.0 QW-1 — write every reflection memory to a file under
398    /// `~/.ai-memory/reflections/<namespace>/<id>.md` (or `.json` with
399    /// `--format json`) so operators can `cat` what the substrate has
400    /// synthesised without learning SQL. The on-disk artefact is
401    /// derived; the SQL row stays canonical.
402    ExportReflections(crate::cli::commands::export_reflections::ExportReflectionsArgs),
403    /// v0.7.0 (issue #1389) — fail-safe recovery of agent context
404    /// from a host's per-turn transcript file when the previous
405    /// session terminated ungracefully (SIGKILL, tmux lockup, host
406    /// crash) between turns. Closes the #1388 substrate failure
407    /// mode. Designed for SessionStart-hook chaining after
408    /// `ai-memory boot`; the in-session counterpart is the
409    /// `memory_recover_previous_session` MCP tool.
410    RecoverPreviousSession(
411        crate::cli::commands::recover_previous_session::RecoverPreviousSessionArgs,
412    ),
413    /// v0.7.0 WT-1-F — operator-side wrapper over the atomisation
414    /// engine ([`crate::atomisation::Atomiser`]). Decomposes one
415    /// long-form memory into atomic propositions; surfaces every
416    /// substrate failure with a stable exit code (see
417    /// [`crate::cli::commands::atomise::exit_code`]).
418    Atomise(crate::cli::commands::atomise::AtomiseArgs),
419    /// v0.7.0 QW-2 — fetch (or regenerate) the Persona artefact for
420    /// an entity. Read-only by default; pass `--regenerate` to run
421    /// the curator and persist a fresh row.
422    Persona(crate::cli::commands::persona::PersonaArgs),
423    /// v0.7.0 Form 5 (issue #758) — calibration driver verbs.
424    /// `ai-memory calibrate confidence --from-shadow` reads
425    /// `confidence_shadow_observations` and emits per-(namespace,
426    /// source) baselines computed over the window.
427    Calibrate(crate::cli::commands::calibrate_confidence::CalibrateArgs),
428    /// v0.7.0 Cluster E API-2 (issue #767) — `ai-memory skill
429    /// <register|list|get|resource|export|promote|compose>` CLI parity
430    /// surface for the 7 L1-5 Agent Skills MCP tools. Dispatches into
431    /// the same substrate handlers (re-exported under
432    /// `crate::mcp::handle_skill_*`); no business logic is duplicated.
433    Skill(crate::cli::commands::skill::SkillArgs),
434    /// v0.7.0 #1095 — `ai-memory share` subcommand. Closes the SR-4
435    /// three-surface-parity gap. Copies a memory into the recipient
436    /// agent's shared namespace `_shared/<from>→<to>/` via the same
437    /// substrate primitive the MCP tool (`memory_share`) and HTTP
438    /// route (`POST /api/v1/share`) consume — guaranteeing byte-equal
439    /// envelopes across the three surfaces.
440    Share(crate::cli::share::ShareArgs),
441    /// v0.7.0 ARCH-3 / FX-12 — `ai-memory kg-query` subcommand.
442    /// Outbound KG traversal from a source memory (<=5 hops). CLI
443    /// parity for the MCP `memory_kg_query` tool.
444    KgQuery(crate::cli::commands::kg_query::KgQueryArgs),
445    /// v0.7.0 ARCH-3 / FX-12 — `ai-memory find-paths` subcommand.
446    /// Enumerate up to N paths through the KG between two memories
447    /// (BFS, `max_depth<=7`). CLI parity for `memory_find_paths`.
448    FindPaths(crate::cli::commands::find_paths::FindPathsArgs),
449    /// v0.7.0 ARCH-3 / FX-12 — `ai-memory recall-observations`
450    /// subcommand. List rows from the recall-consumption ledger
451    /// (#886). CLI parity for `memory_recall_observations`.
452    RecallObservations(crate::cli::commands::recall_observations::RecallObservationsArgs),
453    /// v0.7.0 #1443 — `ai-memory expand` subcommand. LLM query-expansion
454    /// over a free-text query. CLI parity for the MCP
455    /// `memory_expand_query` tool + the `POST /api/v1/expand_query` HTTP
456    /// route — all three share [`crate::mcp::handle_expand_query`]. Lets
457    /// a harness inject expansion as a one-shot without an MCP stdio
458    /// server or HTTP daemon. Requires a configured LLM (any tier via
459    /// `AI_MEMORY_LLM_BACKEND`, or smart/autonomous preset).
460    Expand(crate::cli::commands::expand::ExpandArgs),
461    /// v0.7.0 ARCH-3 / FX-12 — `ai-memory check-duplicate`
462    /// subcommand. Pre-write near-duplicate check via cosine over
463    /// stored embeddings. CLI parity for `memory_check_duplicate`.
464    /// Requires the embedder (semantic tier or above).
465    CheckDuplicate(crate::cli::commands::check_duplicate::CheckDuplicateArgs),
466    /// v0.7.0 #1598 — `ai-memory reembed` subcommand. Full-corpus
467    /// vector-space migration: re-embeds every live memory (optionally
468    /// `--namespace`-filtered) with the resolved embedding
469    /// backend/model and REPLACES the stored vectors (unlike the boot
470    /// backfill, which only fills missing ones). `--dry-run` prints
471    /// the plan; per-row #1595 failure isolation (skip-with-WARN)
472    /// keeps one poison row from stopping the sweep. Resolves the
473    /// embedder via the same `AppConfig::resolve_embeddings()` +
474    /// `Embedder::from_resolved` path as daemon/MCP boot.
475    Reembed(crate::cli::commands::reembed::ReembedArgs),
476    /// v0.7.0 ARCH-3 / FX-12 — `ai-memory replay` subcommand.
477    /// Reconstruct the conversation transcript chain that produced a
478    /// memory. CLI parity for `memory_replay`.
479    Replay(crate::cli::commands::replay::ReplayArgs),
480    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory reflect`. CLI
481    /// parity for `memory_reflect`. CLI dispatcher uses
482    /// `active_keypair=None` / `embedder=None`; operators who need
483    /// signing or LLM dedup drive the daemon via MCP / HTTP.
484    Reflect(crate::cli::commands::reflect::ReflectArgs),
485    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory subscribe`. CLI
486    /// parity for `memory_subscribe`.
487    Subscribe(crate::cli::commands::subscribe::SubscribeArgs),
488    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory unsubscribe`. CLI
489    /// parity for `memory_unsubscribe`.
490    Unsubscribe(crate::cli::commands::unsubscribe::UnsubscribeArgs),
491    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory list-subscriptions`.
492    /// CLI parity for `memory_list_subscriptions`.
493    ListSubscriptions(crate::cli::commands::list_subscriptions::ListSubscriptionsArgs),
494    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory subscription-replay`.
495    /// CLI parity for `memory_subscription_replay`.
496    SubscriptionReplay(crate::cli::commands::subscription_replay::SubscriptionReplayArgs),
497    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory subscription-dlq-list`.
498    /// CLI parity for `memory_subscription_dlq_list`.
499    SubscriptionDlqList(crate::cli::commands::subscription_dlq_list::SubscriptionDlqListArgs),
500    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory notify`. CLI
501    /// parity for `memory_notify`.
502    Notify(crate::cli::commands::notify::NotifyArgs),
503    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory inbox`. CLI
504    /// parity for `memory_inbox`.
505    Inbox(crate::cli::commands::inbox::InboxArgs),
506    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory ingest-multistep`.
507    /// CLI parity for `memory_ingest_multistep`. CLI dispatcher passes
508    /// `handler=None`; tier-locked advisory returns on every tier
509    /// because the CLI does not own the LLM dispatch.
510    IngestMultistep(crate::cli::commands::ingest_multistep::IngestMultistepArgs),
511    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory kg-invalidate`.
512    /// CLI parity for `memory_kg_invalidate`.
513    KgInvalidate(crate::cli::commands::kg_invalidate::KgInvalidateArgs),
514    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory kg-timeline`. CLI
515    /// parity for `memory_kg_timeline`.
516    KgTimeline(crate::cli::commands::kg_timeline::KgTimelineArgs),
517    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory entity-register`.
518    /// CLI parity for `memory_entity_register`.
519    EntityRegister(crate::cli::commands::entity_register::EntityRegisterArgs),
520    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory entity-get-by-alias`.
521    /// CLI parity for `memory_entity_get_by_alias`.
522    EntityGetByAlias(crate::cli::commands::entity_get_by_alias::EntityGetByAliasArgs),
523    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory dependents-of-invalidated`.
524    /// CLI parity for `memory_dependents_of_invalidated`.
525    DependentsOfInvalidated(
526        crate::cli::commands::dependents_of_invalidated::DependentsOfInvalidatedArgs,
527    ),
528    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory reflection-origin`.
529    /// CLI parity for `memory_reflection_origin`.
530    ReflectionOrigin(crate::cli::commands::reflection_origin::ReflectionOriginArgs),
531    /// v0.7.0 ARCH-3 / FX-C3 (batch2) — `ai-memory quota-status`. CLI
532    /// parity for `memory_quota_status`.
533    QuotaStatus(crate::cli::commands::quota_status::QuotaStatusArgs),
534}
535
536/// `ai-memory governance` parent argument struct.
537#[derive(Args)]
538pub struct GovernanceCliArgs {
539    #[command(subcommand)]
540    pub action: GovernanceAction,
541}
542
543/// `ai-memory governance` sub-subcommands. K11 migrator + 7th-form
544/// `install-defaults` (issue #760) bulk-activator for seed rules
545/// R001-R004 live here; future K-track work may add more verbs
546/// (`lint`, `explain`, …) so the surface is shaped as an enum from
547/// day one.
548#[derive(clap::Subcommand)]
549pub enum GovernanceAction {
550    /// Translate legacy [governance] policies to v0.7
551    /// [[permissions.rules]] (K9 format).
552    MigrateToPermissions(crate::cli::governance_migrate::MigrateToPermissionsArgs),
553    /// v0.7.0 7th-form closeout (issue #760) — flip the seeded
554    /// operator hard rules R001-R004 (migration
555    /// `0024_v07_governance_rules.sql`) to `enabled = 1`. Interactive
556    /// confirmation by default; `--yes` overrides for CI/scripts.
557    InstallDefaults(crate::cli::governance_install_defaults::InstallDefaultsArgs),
558    /// v0.7.0 issue #863 — shell-side parity for the MCP tool
559    /// `memory_check_agent_action`. Dry-run a substrate agent-action
560    /// rule (R001-R004 plus any operator-added rule) and emit the
561    /// Allow / Refuse / Warn verdict.
562    CheckAction(crate::cli::governance_check_action::CheckActionArgs),
563}
564
565/// Arguments for the `doctor` subcommand. Lives next to `Cli` so clap
566/// derives them automatically; the actual report logic lives in
567/// `cli::doctor::run`.
568#[derive(Args)]
569pub struct DoctorCliArgs {
570    /// Query a remote ai-memory daemon's HTTP capabilities + stats
571    /// endpoints instead of opening the local DB. Sections that need
572    /// raw SQL access render as N/A in this mode.
573    #[arg(long, value_name = "URL")]
574    pub remote: Option<String>,
575    /// Emit the report as JSON instead of human-readable text. Useful
576    /// for CI consumers and for `jq`-style filtering.
577    #[arg(long)]
578    pub json: bool,
579    /// Exit 1 when at least one section is at WARN severity. Without
580    /// this flag, warnings keep exit 0; criticals always exit 2.
581    #[arg(long)]
582    pub fail_on_warn: bool,
583    /// v0.6.4-004 — print per-tool, per-family, and per-profile token
584    /// costs (`cl100k_base`) instead of the regular health report.
585    /// Combined with `--json` returns a structured payload for CI.
586    /// Combined with `--profile <name>` reports the cost under that
587    /// hypothetical profile in addition to the active default.
588    #[arg(long)]
589    pub tokens: bool,
590    /// v0.6.4-004 — when used with `--tokens`, evaluate cost under this
591    /// hypothetical profile. Defaults to `core` (the v0.6.4 default).
592    /// Accepts the same vocabulary as `ai-memory mcp --profile`.
593    #[arg(long, value_name = "PROFILE")]
594    pub profile: Option<String>,
595    /// v0.6.4-004 — dump the full per-tool size table as JSON. Implies
596    /// `--tokens`. Used by CI and benchmarks to capture the source-of-
597    /// truth size data without parsing the rendered report.
598    #[arg(long)]
599    pub raw_table: bool,
600    /// v0.7-G3 — emit hook-executor backpressure metrics
601    /// (`events_fired`, `events_dropped`, `mean_latency_us`)
602    /// per loaded hook. Routed through the same reporter bucket
603    /// as `--tokens`. The runtime registry isn't reachable from
604    /// the CLI process, so this surface reports the loaded
605    /// `hooks.toml` shape + zeroed metric placeholders until
606    /// G7-G11 wires the executor into the running daemon's
607    /// snapshot.
608    #[arg(long)]
609    pub hooks: bool,
610}
611
612#[derive(Args)]
613pub struct BenchArgs {
614    /// Measured iterations per operation. Clamped to `[1, 100_000]`.
615    #[arg(long, default_value_t = bench::DEFAULT_ITERATIONS)]
616    pub iterations: usize,
617    /// Warmup iterations discarded from the percentile sample.
618    /// Clamped to `[0, 10_000]`.
619    #[arg(long, default_value_t = bench::DEFAULT_WARMUP)]
620    pub warmup: usize,
621    /// Emit results as JSON instead of the human-readable table.
622    #[arg(long)]
623    pub json: bool,
624    /// Path to a previous `bench --json` payload. When supplied, the
625    /// fresh run is compared per-operation against this baseline and
626    /// the process exits non-zero if any measured p95 exceeds the
627    /// baseline by more than `--regression-threshold` percent.
628    /// Independent of the absolute-budget guard.
629    #[arg(long, value_name = "PATH")]
630    pub baseline: Option<String>,
631    /// Allowed p95 growth (percent) over the `--baseline` reading
632    /// before a row is flagged as a regression. Clamped to
633    /// `[0.0, 1000.0]`. Has no effect without `--baseline`.
634    #[arg(long, default_value_t = bench::DEFAULT_REGRESSION_THRESHOLD_PCT)]
635    pub regression_threshold: f64,
636    /// Append this run to a JSONL history file (one self-describing
637    /// JSON object per line). Creates the file and any missing parent
638    /// directories on first call. Each entry carries `captured_at`
639    /// (RFC3339), `iterations`, `warmup`, and the same `results` array
640    /// `--json` emits — long-running campaigns can build a regression
641    /// dataset to feed downstream tooling. The CLI table / JSON output
642    /// still prints; this flag only adds the append side effect.
643    #[arg(long, value_name = "PATH")]
644    pub history: Option<PathBuf>,
645    /// #1579 B8 — seed a scratch corpus of N rows before running the
646    /// workload and gate the verdict against the per-scale budget
647    /// table in `PERFORMANCE.md` §"Corpus-scale budgets". Omitting the
648    /// flag keeps the legacy ~500-row workload and legacy budgets.
649    /// Clamped to `[1, 1_000_000]`.
650    #[arg(long, value_name = "ROWS")]
651    pub scale: Option<usize>,
652}
653
654/// Default `--batch` page-size hint for `ai-memory migrate`. Currently
655/// an API-compatibility hint only — see the `MAX_ROWS` note in
656/// `src/migrate.rs::migrate`.
657#[cfg(feature = "sal")]
658const MIGRATE_BATCH_DEFAULT: usize = 1000;
659
660#[cfg(feature = "sal")]
661#[derive(Args)]
662pub struct MigrateArgs {
663    /// Source URL. `sqlite:///path/to/file.db` or
664    /// `postgres://user:pass@host:port/dbname`.
665    #[arg(long)]
666    pub from: String,
667    /// Destination URL. Same URL shape as `--from`.
668    #[arg(long)]
669    pub to: String,
670    /// Page-size hint. Default 1000. Retained for API compatibility —
671    /// the current migrator reads one page capped at `MAX_ROWS`
672    /// (1,000,000) and refuses loudly past it; see `src/migrate.rs`.
673    #[arg(long, default_value_t = MIGRATE_BATCH_DEFAULT)]
674    pub batch: usize,
675    /// Only migrate memories in this namespace.
676    #[arg(long)]
677    pub namespace: Option<String>,
678    /// Emit the report but do NOT write to the destination.
679    #[arg(long)]
680    pub dry_run: bool,
681    /// Emit the report as JSON rather than human-readable text.
682    #[arg(long)]
683    pub json: bool,
684}
685
686#[derive(Args)]
687pub struct ServeArgs {
688    #[arg(long, default_value = "127.0.0.1")]
689    pub host: String,
690    #[arg(long, default_value_t = DEFAULT_PORT)]
691    pub port: u16,
692    /// Path to PEM-encoded TLS certificate (may include the full chain).
693    /// Passing both `--tls-cert` and `--tls-key` switches `serve` to
694    /// HTTPS. rustls under the hood — no OpenSSL dep. Absent both
695    /// flags = plain HTTP (same as every previous release).
696    #[arg(long, requires = "tls_key")]
697    pub tls_cert: Option<PathBuf>,
698    /// Path to PEM-encoded TLS private key (PKCS#8 or RSA).
699    #[arg(long, requires = "tls_cert")]
700    pub tls_key: Option<PathBuf>,
701    /// Path to a file containing SHA-256 fingerprints of trusted client
702    /// certificates, one per line (case-insensitive hex, optionally with
703    /// `:` separators; comments start with `#`). When set, `serve`
704    /// demands client-cert mTLS on every connection and refuses any peer
705    /// whose cert fingerprint is not on the list. Requires `--tls-cert`
706    /// and `--tls-key`. This is the peer-mesh identity gate — a peer
707    /// without an authorised cert can't even open a TCP connection, let
708    /// alone hit `/sync/push`. Layer 2 of the peer-mesh crypto stack;
709    /// attested `agent_id` extraction (Layer 2b) lands post-v0.6.0.
710    #[arg(long, requires = "tls_cert")]
711    pub mtls_allowlist: Option<PathBuf>,
712    /// Seconds to wait for in-flight requests to complete on graceful
713    /// shutdown (SIGINT). Default 30. Bumped from 10 in v0.6.0 because
714    /// large `/sync/push` batches can take longer than 10s under load
715    /// (red-team #233).
716    #[arg(long, default_value_t = 30)]
717    pub shutdown_grace_secs: u64,
718
719    // -------- v0.7 federation (ADR-0001) ---------------------------
720    /// W-of-N write quorum. When >=1 and `--quorum-peers` is non-empty,
721    /// every HTTP write fans out to every peer and returns OK only
722    /// after the local commit + W-1 peer acks land within
723    /// `--quorum-timeout-ms`. Default 0 = federation disabled, daemon
724    /// behaves exactly like v0.6.0.
725    #[arg(long, default_value_t = 0)]
726    pub quorum_writes: usize,
727    /// Comma-separated list of peer base URLs. Each peer is assumed to
728    /// expose `POST /api/v1/sync/push` — the same endpoint the
729    /// sync-daemon already uses.
730    #[arg(long, value_delimiter = ',')]
731    pub quorum_peers: Vec<String>,
732    /// Deadline for quorum-ack collection. After this many ms the
733    /// write returns 503 `quorum_not_met`. Default 2000 assumes
734    /// same-DC peers; cross-region (WAN) meshes need 5000-10000 —
735    /// the do-1461 reference deployment uses 8000. See
736    /// docs/federation.md for sizing guidance. (#1565)
737    #[arg(long, default_value_t = 2000)]
738    pub quorum_timeout_ms: u64,
739    /// Optional mTLS client cert for outbound federation POSTs. Same
740    /// cert material the sync-daemon's `--client-cert` accepts.
741    #[arg(long)]
742    pub quorum_client_cert: Option<PathBuf>,
743    /// Optional mTLS client key for outbound federation POSTs.
744    #[arg(long)]
745    pub quorum_client_key: Option<PathBuf>,
746    /// Optional root CA cert to trust for outbound federation HTTPS.
747    /// Required whenever peers present a cert NOT rooted in Mozilla's
748    /// `webpki-roots` bundle (self-signed, private CA, ephemeral test
749    /// CA, etc.) — without this, the reqwest rustls-tls client rejects
750    /// peer certs and every quorum write times out as `quorum_not_met`.
751    /// See #333.
752    #[arg(long)]
753    pub quorum_ca_cert: Option<PathBuf>,
754    /// v0.6.0.1 (#320) — how often, in seconds, the daemon pulls peers
755    /// for any updates it missed while offline or partitioned. 0 disables
756    /// the catchup loop entirely. Default 30s keeps a post-partition
757    /// node convergent within one interval after resume.
758    #[arg(long, default_value_t = 30)]
759    pub catchup_interval_secs: u64,
760    /// v0.7.0 epic (ADR-001) — the federation identity this node signs and
761    /// presents as (`sender_agent_id`). Precedence-2 source, below the
762    /// `AI_MEMORY_FED_IDENTITY` env override and above the historical
763    /// `host:<hostname>` default. Set this to a stable, trust-domain-scoped
764    /// id (e.g. `region/nyc/node-7`) so a node's identity survives a
765    /// hostname change. Unset = keep the hostname default.
766    #[arg(long)]
767    pub federation_identity: Option<String>,
768
769    // -------- v0.7.0 Wave-3 — adapter selection --------------------
770    /// v0.7.0 Wave-3 — full SAL store URL. When set, the daemon binds
771    /// its [`MemoryStore`] handle to the URL-resolved adapter instead
772    /// of the default SQLite path derived from `--db`.
773    ///
774    /// Accepted shapes:
775    ///
776    /// - `sqlite:///absolute/path/to/file.db` — SQLite adapter (same
777    ///   semantics as `--db`).
778    /// - `postgres://user:pass@host:port/dbname` — Postgres adapter.
779    /// - `postgresql://...` — alias for the Postgres scheme.
780    ///
781    /// `--db` and `--store-url` are mutually exclusive: passing both
782    /// is rejected at startup with a clear error.
783    ///
784    /// Postgres-backed daemons require `--features sal,sal-postgres`
785    /// at build time; otherwise the URL is rejected at startup. See
786    /// `docs/postgres-age-guide.md` for the operator workflow.
787    ///
788    /// [`MemoryStore`]: crate::store::MemoryStore
789    #[cfg(feature = "sal")]
790    #[arg(long, value_name = "URL")]
791    pub store_url: Option<String>,
792}
793
794#[derive(Args)]
795pub struct CompletionsArgs {
796    pub shell: Shell,
797}
798
799// ---------------------------------------------------------------------------
800// Top-level dispatch
801// ---------------------------------------------------------------------------
802
803/// Top-level CLI dispatch. Called from `main()` after `Cli::parse()`.
804///
805/// Handles:
806/// - `--db-passphrase-file` → exports `AI_MEMORY_DB_PASSPHRASE`.
807/// - `is_write_command` → conditional post-run WAL checkpoint.
808/// - The match arm for every `Command` variant.
809#[allow(clippy::too_many_lines)]
810pub async fn run(cli: Cli, app_config: &AppConfig) -> Result<()> {
811    // v0.6.0.0: read the SQLCipher passphrase from a file and export it as
812    // AI_MEMORY_DB_PASSPHRASE for the duration of the process. File path
813    // comes from the --db-passphrase-file flag (global). No-op on standard
814    // SQLite builds (the env var is ignored unless the binary was built
815    // with --features sqlcipher).
816    if let Some(path) = &cli.db_passphrase_file {
817        let passphrase = passphrase_from_file(path)?;
818        // SAFETY: single-threaded startup before any worker threads spawn.
819        unsafe { std::env::set_var("AI_MEMORY_DB_PASSPHRASE", passphrase) };
820    }
821    let db_path = app_config.effective_db(&cli.db);
822    // Seed the process-wide per-agent quota defaults from the resolved
823    // `[limits]` config (env `AI_MEMORY_MAX_*` > `[limits]` > compiled
824    // default). `ensure_row` / the Postgres quota-row auto-inserts read
825    // these when stamping a fresh `agent_quotas` row, so every
826    // subcommand path (serve / mcp / CLI writes) charges the same
827    // operator-tuned daily caps. Idempotent — first writer wins; later
828    // calls are no-ops.
829    {
830        let limits = app_config.resolve_limits();
831        crate::quotas::set_quota_defaults(crate::quotas::QuotaDefaults {
832            max_memories_per_day: limits.max_memories_per_day,
833            max_storage_bytes: limits.max_storage_bytes,
834            max_links_per_day: limits.max_links_per_day,
835        });
836    }
837    // #1579 B7 — seed the process-wide sqlite `PRAGMA mmap_size` from
838    // the resolved `[storage]` config (env `AI_MEMORY_DB_MMAP_SIZE` >
839    // `[storage].db_mmap_size_bytes` > compiled 256 MiB default).
840    // Every subsequent `db::open` on any subcommand path (serve / mcp /
841    // CLI) applies it. Idempotent — first writer wins, same as the
842    // quota seeding above.
843    let resolved_storage = app_config.resolve_storage();
844    crate::storage::set_db_mmap_size(resolved_storage.db_mmap_size_bytes);
845    // #1604 — seed the process-wide rerank input-sequence cap from the
846    // resolved `[reranker]` config (env `AI_MEMORY_RERANK_MAX_SEQ` >
847    // `[reranker].max_seq_tokens` > compiled default). Every subsequent
848    // batched cross-encoder rerank forward on any subcommand path
849    // (serve / mcp / CLI) applies it. Idempotent — first writer wins,
850    // same as the mmap seeding above.
851    crate::reranker::set_rerank_max_seq(app_config.resolve_reranker().max_seq_tokens);
852    // #1590 — seed the process-wide operator-configured default
853    // namespace (Some ONLY when `[storage].default_namespace` — or the
854    // legacy flat field — was explicitly set). Every write surface
855    // (MCP `memory_store`, HTTP `POST /api/v1/memories`, the CLI
856    // namespace ladder) consults this; unconfigured deployments keep
857    // their historical per-surface defaults.
858    crate::config::set_configured_default_namespace(
859        resolved_storage
860            .explicit_default_namespace()
861            .map(str::to_string),
862    );
863    let j = cli.json;
864    let cli_agent_id: Option<String> = cli.agent_id.clone();
865    // Track whether command writes to DB (for WAL checkpoint)
866    let needs_checkpoint = is_write_command(&cli.command);
867    let db_path_for_checkpoint = if needs_checkpoint {
868        Some(db_path.clone())
869    } else {
870        None
871    };
872
873    let result = match cli.command {
874        Command::Serve(a) => {
875            // v0.7.0 Wave-3 — `--db` and `--store-url` are mutually
876            // exclusive when both are explicitly supplied. clap can't
877            // express this conflict cross-struct (the global `--db`
878            // lives on `Cli`, the new `--store-url` lives on
879            // `ServeArgs`), so the check happens here at runtime.
880            //
881            // `--db` carries a non-`None` `default_value`, so we can't
882            // tell from the parsed value alone whether the operator
883            // typed it on the command line. We approximate explicit
884            // intent through the `AI_MEMORY_DB` env var (which clap
885            // resolves into the same field) and a non-default path.
886            // When both signals indicate `--db` was deliberate AND
887            // `--store-url` is set, refuse to start.
888            #[cfg(feature = "sal")]
889            if let Some(ref url) = a.store_url {
890                let db_was_explicit =
891                    std::env::var("AI_MEMORY_DB").is_ok() || db_path != PathBuf::from(DEFAULT_DB);
892                if db_was_explicit {
893                    // #1579 A3 (SECURITY) — redact the URL credential
894                    // before it lands in the error output.
895                    anyhow::bail!(
896                        "--db and --store-url are mutually exclusive. \
897                         Pass exactly one. Got --db={} and --store-url={}",
898                        db_path.display(),
899                        crate::logging::redact_url_password(url),
900                    );
901                }
902            }
903            serve(db_path, a, app_config).await
904        }
905        Command::Mcp { tier, profile } => {
906            let feature_tier = app_config.effective_tier(Some(&tier));
907            // v0.6.4-001 — resolve profile (CLI/env > config > default core).
908            // Surface parse errors to stderr with the diagnostic that
909            // ProfileParseError already produces (lists valid profiles +
910            // valid families) before exiting.
911            let resolved_profile = match app_config.effective_profile(profile.as_deref()) {
912                Ok(p) => p,
913                Err(e) => {
914                    eprintln!("ai-memory mcp: invalid profile: {e}");
915                    std::process::exit(2);
916                }
917            };
918            // v0.7.0 F6 — `mcp::run_mcp_server` is a synchronous
919            // stdin-reading loop that internally calls
920            // `reqwest::blocking::Client` for every LLM-backed tool
921            // (`memory_consolidate`, `memory_expand_query`,
922            // `memory_auto_tag`, `memory_detect_contradiction`).
923            // Running that on a tokio worker thread directly does
924            // two bad things at once:
925            //   1. Pegs a worker thread on a synchronous read and
926            //      keeps the multi-threaded runtime spinning on
927            //      the remaining workers (the 99.3% CPU
928            //      `clock_gettime` / `mach_absolute_time` poll loop
929            //      observed in Round-2 sample profiling).
930            //   2. Calls `reqwest::blocking::Client::send()` from
931            //      within an active tokio runtime context, which
932            //      either panics ("Cannot start a runtime from
933            //      within a runtime") or silently fails the chat
934            //      RPC ("Failed to send chat request") — the
935            //      proximate cause of the four LLM-backed tools
936            //      returning errors while ollama itself was healthy.
937            // Routing the entire MCP loop through `spawn_blocking`
938            // gives it its own dedicated thread with no tokio
939            // runtime context, so the blocking reqwest calls inside
940            // `OllamaClient::generate` are issued cleanly.
941            let db_path_owned = db_path.clone();
942            let app_config_owned = app_config.clone();
943            tokio::task::spawn_blocking(move || {
944                mcp::run_mcp_server(
945                    &db_path_owned,
946                    feature_tier,
947                    &app_config_owned,
948                    &resolved_profile,
949                )
950            })
951            .await
952            .map_err(|e| anyhow::anyhow!("mcp join: {e}"))??;
953            Ok(())
954        }
955        Command::Store(a) => {
956            let stdout = std::io::stdout();
957            let stderr = std::io::stderr();
958            let mut so = stdout.lock();
959            let mut se = stderr.lock();
960            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
961            cli::store::run(
962                &db_path,
963                a,
964                j,
965                app_config,
966                cli_agent_id.as_deref(),
967                &mut out,
968            )
969        }
970        Command::Update(a) => {
971            let stdout = std::io::stdout();
972            let stderr = std::io::stderr();
973            let mut so = stdout.lock();
974            let mut se = stderr.lock();
975            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
976            cli::update::run(&db_path, &a, j, &mut out)
977        }
978        Command::Recall(a) => {
979            let stdout = std::io::stdout();
980            let stderr = std::io::stderr();
981            let mut so = stdout.lock();
982            let mut se = stderr.lock();
983            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
984            cli::recall::run(&db_path, &a, j, app_config, &mut out)
985        }
986        Command::Search(a) => {
987            let stdout = std::io::stdout();
988            let stderr = std::io::stderr();
989            let mut so = stdout.lock();
990            let mut se = stderr.lock();
991            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
992            cli::search::run(&db_path, &a, j, &mut out)
993        }
994        Command::Get(a) => {
995            let stdout = std::io::stdout();
996            let stderr = std::io::stderr();
997            let mut so = stdout.lock();
998            let mut se = stderr.lock();
999            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1000            cli::crud::cmd_get(&db_path, &a, j, &mut out)
1001        }
1002        Command::List(a) => {
1003            let stdout = std::io::stdout();
1004            let stderr = std::io::stderr();
1005            let mut so = stdout.lock();
1006            let mut se = stderr.lock();
1007            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1008            cli::crud::cmd_list(&db_path, &a, j, app_config, &mut out)
1009        }
1010        Command::Delete(a) => {
1011            let stdout = std::io::stdout();
1012            let stderr = std::io::stderr();
1013            let mut so = stdout.lock();
1014            let mut se = stderr.lock();
1015            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1016            cli::crud::cmd_delete(&db_path, &a, j, cli_agent_id.as_deref(), &mut out)
1017        }
1018        Command::Promote(a) => {
1019            let stdout = std::io::stdout();
1020            let stderr = std::io::stderr();
1021            let mut so = stdout.lock();
1022            let mut se = stderr.lock();
1023            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1024            cli::promote::cmd_promote(&db_path, &a, j, cli_agent_id.as_deref(), &mut out)
1025        }
1026        Command::Forget(a) => {
1027            let stdout = std::io::stdout();
1028            let stderr = std::io::stderr();
1029            let mut so = stdout.lock();
1030            let mut se = stderr.lock();
1031            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1032            cli::forget::cmd_forget(&db_path, &a, j, &mut out)
1033        }
1034        Command::Link(a) => {
1035            let stdout = std::io::stdout();
1036            let stderr = std::io::stderr();
1037            let mut so = stdout.lock();
1038            let mut se = stderr.lock();
1039            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1040            cli::link::cmd_link(&db_path, &a, j, &mut out)
1041        }
1042        Command::Consolidate(a) => {
1043            let stdout = std::io::stdout();
1044            let stderr = std::io::stderr();
1045            let mut so = stdout.lock();
1046            let mut se = stderr.lock();
1047            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1048            cli::consolidate::run(&db_path, a, j, cli_agent_id.as_deref(), &mut out)
1049        }
1050        Command::Resolve(a) => {
1051            let stdout = std::io::stdout();
1052            let stderr = std::io::stderr();
1053            let mut so = stdout.lock();
1054            let mut se = stderr.lock();
1055            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1056            cli::link::cmd_resolve(&db_path, &a, j, &mut out)
1057        }
1058        Command::Shell => cli::shell::run(&db_path),
1059        Command::Sync(a) => {
1060            let stdout = std::io::stdout();
1061            let stderr = std::io::stderr();
1062            let mut so = stdout.lock();
1063            let mut se = stderr.lock();
1064            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1065            cli::sync::run(&db_path, &a, j, cli_agent_id.as_deref(), &mut out)
1066        }
1067        Command::SyncDaemon(a) => cli::sync::run_daemon(&db_path, a, cli_agent_id.as_deref()).await,
1068        Command::AutoConsolidate(a) => {
1069            let stdout = std::io::stdout();
1070            let stderr = std::io::stderr();
1071            let mut so = stdout.lock();
1072            let mut se = stderr.lock();
1073            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1074            cli::consolidate::run_auto(&db_path, &a, j, cli_agent_id.as_deref(), &mut out)
1075        }
1076        Command::Gc => {
1077            let stdout = std::io::stdout();
1078            let stderr = std::io::stderr();
1079            let mut so = stdout.lock();
1080            let mut se = stderr.lock();
1081            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1082            cli::gc::run_gc(&db_path, j, app_config, &mut out)
1083        }
1084        Command::Stats => {
1085            let stdout = std::io::stdout();
1086            let stderr = std::io::stderr();
1087            let mut so = stdout.lock();
1088            let mut se = stderr.lock();
1089            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1090            cli::gc::run_stats(&db_path, j, &mut out)
1091        }
1092        Command::Namespaces => {
1093            let stdout = std::io::stdout();
1094            let stderr = std::io::stderr();
1095            let mut so = stdout.lock();
1096            let mut se = stderr.lock();
1097            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1098            cli::gc::run_namespaces(&db_path, j, &mut out)
1099        }
1100        Command::Namespace(a) => {
1101            // v0.7.0 (issue #800) — Batman Mode Crack 1. First-class CLI
1102            // wrapper around the MCP `memory_namespace_set_standard` /
1103            // `_get_standard` / `_clear_standard` tools so operators
1104            // don't need to drop into MCP-stdio JSON-RPC just to bind
1105            // a `GovernancePolicy` to a namespace.
1106            let stdout = std::io::stdout();
1107            let stderr = std::io::stderr();
1108            let mut so = stdout.lock();
1109            let mut se = stderr.lock();
1110            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1111            cli::namespace::run(&db_path, a, j, &mut out)
1112        }
1113        Command::Config(a) => {
1114            // v0.7.x (#1146) — enterprise configuration tooling.
1115            // `ai-memory config migrate` rewrites a legacy v1
1116            // (flat-field) `config.toml` to the v2 sectioned shape.
1117            let stdout = std::io::stdout();
1118            let stderr = std::io::stderr();
1119            let mut so = stdout.lock();
1120            let mut se = stderr.lock();
1121            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1122            match cli::commands::config::run(&db_path, a, &mut out)? {
1123                0 => Ok(()),
1124                code => std::process::exit(code),
1125            }
1126        }
1127        Command::Export => {
1128            let stdout = std::io::stdout();
1129            let stderr = std::io::stderr();
1130            let mut so = stdout.lock();
1131            let mut se = stderr.lock();
1132            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1133            cli::io::export(&db_path, &mut out)
1134        }
1135        Command::Import(a) => {
1136            let stdout = std::io::stdout();
1137            let stderr = std::io::stderr();
1138            let mut so = stdout.lock();
1139            let mut se = stderr.lock();
1140            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1141            cli::io::import(&db_path, &a, j, cli_agent_id.as_deref(), &mut out)
1142        }
1143        Command::Completions(a) => {
1144            generate(
1145                a.shell,
1146                &mut Cli::command(),
1147                "ai-memory",
1148                &mut std::io::stdout(),
1149            );
1150            Ok(())
1151        }
1152        Command::Man => {
1153            let cmd = Cli::command();
1154            let man = clap_mangen::Man::new(cmd);
1155            man.render(&mut std::io::stdout())?;
1156            Ok(())
1157        }
1158        Command::Mine(a) => {
1159            let stdout = std::io::stdout();
1160            let stderr = std::io::stderr();
1161            let mut so = stdout.lock();
1162            let mut se = stderr.lock();
1163            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1164            cli::io::mine(
1165                &db_path,
1166                a,
1167                j,
1168                app_config,
1169                cli_agent_id.as_deref(),
1170                &mut out,
1171            )
1172        }
1173        Command::Archive(a) => {
1174            let stdout = std::io::stdout();
1175            let stderr = std::io::stderr();
1176            let mut so = stdout.lock();
1177            let mut se = stderr.lock();
1178            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1179            cli::archive::run(&db_path, a, j, &mut out)
1180        }
1181        Command::Agents(a) => {
1182            let stdout = std::io::stdout();
1183            let stderr = std::io::stderr();
1184            let mut so = stdout.lock();
1185            let mut se = stderr.lock();
1186            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1187            cli::agents::run_agents(&db_path, a, j, &mut out)
1188        }
1189        Command::Identity(a) => {
1190            // v0.7 H1 — keypair lifecycle is DB-free. The handler
1191            // resolves the key directory itself (via --key-dir or the
1192            // default <config>/ai-memory/keys).
1193            let stdout = std::io::stdout();
1194            let stderr = std::io::stderr();
1195            let mut so = stdout.lock();
1196            let mut se = stderr.lock();
1197            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1198            cli::identity::run(a, j, &mut out)
1199        }
1200        Command::Offload(a) => {
1201            // v0.7.0 QW-3 — context-offload substrate primitive.
1202            // Reads `--file` (or `-` stdin), writes a row into
1203            // `offloaded_blobs`, returns the `ref_id`. The full
1204            // short-term-context-compression pattern (Mermaid canvas
1205            // + auto-cadence + node_id integration) targets v0.8.0.
1206            let stdout = std::io::stdout();
1207            let stderr = std::io::stderr();
1208            let mut so = stdout.lock();
1209            let mut se = stderr.lock();
1210            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1211            cli::offload::run_offload(&db_path, &a, &mut out)
1212        }
1213        Command::Deref(a) => {
1214            // v0.7.0 QW-3 — dereference a `ref_id` produced by
1215            // `ai-memory offload`. Refuses tampered rows.
1216            let stdout = std::io::stdout();
1217            let stderr = std::io::stderr();
1218            let mut so = stdout.lock();
1219            let mut se = stderr.lock();
1220            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1221            cli::offload::run_deref(&db_path, &a, &mut out)
1222        }
1223        Command::Rules(a) => {
1224            // v0.7.0 (issue #691) — substrate-level agent-action rules
1225            // engine. Mutation verbs require the operator key on disk;
1226            // read verbs (list / check) work without it.
1227            let stdout = std::io::stdout();
1228            let stderr = std::io::stderr();
1229            let mut so = stdout.lock();
1230            let mut se = stderr.lock();
1231            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1232            cli::rules::run(&db_path, a, j, &mut out)
1233        }
1234        Command::Pending(a) => {
1235            let stdout = std::io::stdout();
1236            let stderr = std::io::stderr();
1237            let mut so = stdout.lock();
1238            let mut se = stderr.lock();
1239            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1240            cli::agents::run_pending(&db_path, a, j, cli_agent_id.as_deref(), &mut out)
1241        }
1242        Command::Backup(a) => {
1243            let stdout = std::io::stdout();
1244            let stderr = std::io::stderr();
1245            let mut so = stdout.lock();
1246            let mut se = stderr.lock();
1247            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1248            cli::backup::run_backup(&db_path, &a, j, &mut out)
1249        }
1250        Command::Restore(a) => {
1251            let stdout = std::io::stdout();
1252            let stderr = std::io::stderr();
1253            let mut so = stdout.lock();
1254            let mut se = stderr.lock();
1255            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1256            cli::backup::run_restore(&db_path, &a, j, &mut out)
1257        }
1258        Command::Curator(a) => {
1259            // v0.7.0 #1548 — `--db` and `--store-url` are mutually
1260            // exclusive when both are explicitly supplied, mirroring the
1261            // `serve` arm above. The global `--db` carries a non-`None`
1262            // `default_value`, so we approximate explicit operator
1263            // intent through the `AI_MEMORY_DB` env var (which clap
1264            // resolves into the same field) or a non-default path.
1265            #[cfg(feature = "sal")]
1266            if let Some(ref url) = a.store_url {
1267                let db_was_explicit =
1268                    std::env::var("AI_MEMORY_DB").is_ok() || db_path != PathBuf::from(DEFAULT_DB);
1269                if db_was_explicit {
1270                    // #1579 A3 (SECURITY) — redact the URL credential
1271                    // before it lands in the error output.
1272                    anyhow::bail!(
1273                        "--db and --store-url are mutually exclusive. \
1274                         Pass exactly one. Got --db={} and --store-url={}",
1275                        db_path.display(),
1276                        crate::logging::redact_url_password(url),
1277                    );
1278                }
1279            }
1280            // Initialize the tracing subscriber so the daemon-start
1281            // banner and per-cycle `tracing::info!` lines in
1282            // `curator::run_daemon` actually emit. Previously only the
1283            // HTTP `serve` path called `init_tracing()`, leaving the
1284            // curator path silent regardless of `RUST_LOG`. `try_init`
1285            // inside `init_tracing` makes this safe to call even when
1286            // another subscriber is already installed.
1287            init_tracing();
1288            // Daemon mode runs indefinitely on a `spawn_blocking` worker
1289            // that itself calls `tracing::info!`. If the dispatch held
1290            // the process-wide `Stdout::lock()` while the daemon ran,
1291            // the blocking thread's tracing write would deadlock on the
1292            // ReentrantMutex (same-thread re-entry is fine; cross-thread
1293            // contention isn't). `--daemon` doesn't write to `out`
1294            // anyway, so route it to `io::sink()` and only lock the
1295            // real stdout/stderr for the modes that actually emit CLI
1296            // output (`--once`, `--reflect`, `--rollback`).
1297            if a.daemon {
1298                let mut so = std::io::sink();
1299                let mut se = std::io::sink();
1300                let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1301                cli::curator::run(&db_path, &a, app_config, &mut out).await
1302            } else {
1303                let stdout = std::io::stdout();
1304                let stderr = std::io::stderr();
1305                let mut so = stdout.lock();
1306                let mut se = stderr.lock();
1307                let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1308                cli::curator::run(&db_path, &a, app_config, &mut out).await
1309            }
1310        }
1311        Command::Bench(a) => cmd_bench(&a),
1312        #[cfg(feature = "sal")]
1313        Command::Migrate(a) => cmd_migrate(&a).await,
1314        #[cfg(feature = "sal")]
1315        Command::SchemaInit(a) => {
1316            let stdout = std::io::stdout();
1317            let stderr = std::io::stderr();
1318            let mut so = stdout.lock();
1319            let mut se = stderr.lock();
1320            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1321            cli::schema_init::run(&a, &mut out).await
1322        }
1323        Command::Doctor(a) => {
1324            // P7 / R7. The doctor is read-only; it never sets
1325            // `needs_checkpoint`. We compute the exit code from the
1326            // overall severity and propagate it via the process-exit
1327            // path below so callers (CI, ops scripts) can branch on it.
1328            //
1329            // The remote mode uses `reqwest::blocking::Client` which
1330            // panics when dropped on a tokio runtime thread, so the
1331            // entire doctor pass runs inside `spawn_blocking`.
1332            let db_path_doctor = db_path.clone();
1333            // v0.6.4-004 — `--tokens` (and its alias `--raw-table`) bypass
1334            // the regular health pass. Routes to a dedicated tokens
1335            // reporter that consumes `crate::sizes::tool_sizes()` and
1336            // `crate::profile::Family::for_tool` to roll up cost.
1337            if a.tokens || a.raw_table {
1338                let stdout = std::io::stdout();
1339                let stderr = std::io::stderr();
1340                let mut so = stdout.lock();
1341                let mut se = stderr.lock();
1342                let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1343                let exit = cli::doctor::run_tokens(
1344                    cli::doctor::TokensArgs {
1345                        json: a.json,
1346                        raw_table: a.raw_table,
1347                        profile: a.profile,
1348                        hooks: a.hooks,
1349                    },
1350                    &mut out,
1351                )?;
1352                std::process::exit(exit);
1353            }
1354            // v0.7-G3 — `--hooks` standalone routes to the hook
1355            // executor metrics reporter. Same dispatch shape as
1356            // `--tokens` so both share the "tokens reporter
1357            // bucket" the G3 prompt called out.
1358            if a.hooks {
1359                let stdout = std::io::stdout();
1360                let stderr = std::io::stderr();
1361                let mut so = stdout.lock();
1362                let mut se = stderr.lock();
1363                let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1364                let exit = cli::doctor::run_hooks(
1365                    cli::doctor::HooksReportArgs { json: a.json },
1366                    &mut out,
1367                )?;
1368                std::process::exit(exit);
1369            }
1370            let args = cli::doctor::DoctorArgs {
1371                remote: a.remote,
1372                json: a.json,
1373                fail_on_warn: a.fail_on_warn,
1374            };
1375            let join = tokio::task::spawn_blocking(move || {
1376                let stdout = std::io::stdout();
1377                let stderr = std::io::stderr();
1378                let mut so = stdout.lock();
1379                let mut se = stderr.lock();
1380                let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1381                cli::doctor::run(&db_path_doctor, &args, &mut out)
1382            })
1383            .await;
1384            match join {
1385                Ok(Ok(0)) => Ok(()),
1386                Ok(Ok(code)) => std::process::exit(code),
1387                Ok(Err(e)) => Err(e),
1388                Err(e) => Err(anyhow::anyhow!("doctor task join failed: {e}")),
1389            }
1390        }
1391        Command::Boot(a) => {
1392            // Issue #487. Read-only, fast, no embedder, no daemon. Suitable
1393            // for invocation from any AI-agent integration (Claude Code
1394            // SessionStart hook, Cursor / Cline / Continue / Windsurf
1395            // system-message, programmatic prepend in Claude Agent SDK /
1396            // OpenAI Apps SDK / Codex CLI, OpenClaw built-in, local models
1397            // via LM Studio / Ollama / vLLM).
1398            let stdout = std::io::stdout();
1399            let stderr = std::io::stderr();
1400            let mut so = stdout.lock();
1401            let mut se = stderr.lock();
1402            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1403            // PR-5: a `boot` invocation is itself an audit-worthy event.
1404            // Emission is a no-op when audit is disabled.
1405            crate::audit::emit(crate::audit::EventBuilder::new(
1406                crate::audit::AuditAction::SessionBoot,
1407                crate::audit::actor(
1408                    cli_agent_id.as_deref().unwrap_or("anonymous"),
1409                    "explicit_or_default",
1410                    None,
1411                ),
1412                crate::audit::target_sweep(a.namespace.as_deref().unwrap_or("auto")),
1413            ));
1414            cli::boot::run(&db_path, &a, app_config, &mut out)
1415        }
1416        Command::Install(a) => {
1417            // Issue #487 PR-2. Read-only filesystem op against the agent's
1418            // config file (NOT the ai-memory DB). Default is dry-run; --apply
1419            // is opt-in and writes a backup before mutating anything.
1420            let stdout = std::io::stdout();
1421            let stderr = std::io::stderr();
1422            let mut so = stdout.lock();
1423            let mut se = stderr.lock();
1424            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1425            cli::install::run(&a, &mut out)
1426        }
1427        Command::Wrap(a) => {
1428            // Issue #487 PR-6. Pure-Rust cross-platform replacement for
1429            // the bash / PowerShell wrappers PR-1 shipped in the
1430            // integration recipes. Runs boot in-process, builds the
1431            // system message, spawns the wrapped agent, and propagates
1432            // the agent's exit code via std::process::exit.
1433            let stdout = std::io::stdout();
1434            let stderr = std::io::stderr();
1435            let mut so = stdout.lock();
1436            let mut se = stderr.lock();
1437            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1438            let code = cli::wrap::run(&db_path, &a, app_config, &mut out)?;
1439            // Drop the locks/output before exit so any pending writes
1440            // get flushed by the OS on process teardown.
1441            drop(out);
1442            drop(so);
1443            drop(se);
1444            if code == 0 {
1445                Ok(())
1446            } else {
1447                std::process::exit(code);
1448            }
1449        }
1450        Command::Logs(a) => {
1451            let stdout = std::io::stdout();
1452            let stderr = std::io::stderr();
1453            let mut so = stdout.lock();
1454            let mut se = stderr.lock();
1455            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1456            cli::logs::run(a, app_config, &mut out)
1457        }
1458        Command::Audit(a) => {
1459            let stdout = std::io::stdout();
1460            let stderr = std::io::stderr();
1461            let mut so = stdout.lock();
1462            let mut se = stderr.lock();
1463            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1464            match cli::audit::run(a, app_config, &mut out)? {
1465                0 => Ok(()),
1466                code => std::process::exit(code),
1467            }
1468        }
1469        Command::Governance(a) => {
1470            let stdout = std::io::stdout();
1471            let stderr = std::io::stderr();
1472            let mut so = stdout.lock();
1473            let mut se = stderr.lock();
1474            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1475            match a.action {
1476                GovernanceAction::MigrateToPermissions(args) => {
1477                    cli::governance_migrate::run(args, &mut out)
1478                }
1479                GovernanceAction::InstallDefaults(args) => {
1480                    cli::governance_install_defaults::run(&db_path, args, &mut out)
1481                }
1482                GovernanceAction::CheckAction(args) => {
1483                    cli::governance_check_action::run(&db_path, &args, &mut out)
1484                }
1485            }
1486        }
1487        Command::VerifyReflectionChain(a) => {
1488            let stdout = std::io::stdout();
1489            let stderr = std::io::stderr();
1490            let mut so = stdout.lock();
1491            let mut se = stderr.lock();
1492            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1493            match cli::verify::run(&db_path, &a, &mut out)? {
1494                0 => Ok(()),
1495                code => std::process::exit(code),
1496            }
1497        }
1498        Command::VerifySignedEventsChain(a) => {
1499            let stdout = std::io::stdout();
1500            let stderr = std::io::stderr();
1501            let mut so = stdout.lock();
1502            let mut se = stderr.lock();
1503            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1504            match cli::verify_signed_events::run(&db_path, &a, &mut out)? {
1505                0 => Ok(()),
1506                code => std::process::exit(code),
1507            }
1508        }
1509        Command::ExportForensicBundle(a) => {
1510            let stdout = std::io::stdout();
1511            let stderr = std::io::stderr();
1512            let mut so = stdout.lock();
1513            let mut se = stderr.lock();
1514            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1515            match cli::export::export(&db_path, &a, &mut out)? {
1516                0 => Ok(()),
1517                code => std::process::exit(code),
1518            }
1519        }
1520        Command::VerifyForensicBundle(a) => {
1521            let stdout = std::io::stdout();
1522            let stderr = std::io::stderr();
1523            let mut so = stdout.lock();
1524            let mut se = stderr.lock();
1525            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1526            match cli::export::verify(&a, &mut out)? {
1527                0 => Ok(()),
1528                code => std::process::exit(code),
1529            }
1530        }
1531        Command::ExportReflections(a) => {
1532            let stdout = std::io::stdout();
1533            let stderr = std::io::stderr();
1534            let mut so = stdout.lock();
1535            let mut se = stderr.lock();
1536            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1537            match cli::commands::export_reflections::run(&db_path, &a, &mut out)? {
1538                0 => Ok(()),
1539                code => std::process::exit(code),
1540            }
1541        }
1542        Command::RecoverPreviousSession(a) => {
1543            // Issue #1389 — fail-safe recovery from host transcripts.
1544            // Graceful by design: the SessionStart-hook chain MUST
1545            // NOT wedge the agent boot, so per-line parse errors
1546            // surface in the report rather than as Err.
1547            let stdout = std::io::stdout();
1548            let stderr = std::io::stderr();
1549            let mut so = stdout.lock();
1550            let mut se = stderr.lock();
1551            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1552            match cli::commands::recover_previous_session::run(&db_path, &a, &mut out)? {
1553                0 => Ok(()),
1554                code => std::process::exit(code),
1555            }
1556        }
1557        Command::Atomise(a) => {
1558            let stdout = std::io::stdout();
1559            let stderr = std::io::stderr();
1560            let mut so = stdout.lock();
1561            let mut se = stderr.lock();
1562            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1563            match cli::commands::atomise::run(
1564                &db_path,
1565                &a,
1566                app_config,
1567                cli_agent_id.as_deref(),
1568                &mut out,
1569            )? {
1570                0 => Ok(()),
1571                code => std::process::exit(code),
1572            }
1573        }
1574        Command::Persona(a) => {
1575            let stdout = std::io::stdout();
1576            let stderr = std::io::stderr();
1577            let mut so = stdout.lock();
1578            let mut se = stderr.lock();
1579            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1580            // v0.7.0 QW-2 — the CLI deliberately runs WITHOUT a live
1581            // LLM client. `--regenerate` requires one; we surface the
1582            // documented "install Ollama" hint via exit code 2 rather
1583            // than spinning up a transient OllamaClient here. Operators
1584            // who want the regenerate path call `memory_persona_generate`
1585            // through MCP (where the daemon already owns the LLM).
1586            match cli::commands::persona::run(&db_path, &a, None, None, &mut out)? {
1587                0 => Ok(()),
1588                code => std::process::exit(code),
1589            }
1590        }
1591        Command::Calibrate(a) => {
1592            let stdout = std::io::stdout();
1593            let stderr = std::io::stderr();
1594            let mut so = stdout.lock();
1595            let mut se = stderr.lock();
1596            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1597            // v0.7.0 Form 5 (issue #758) — calibration driver.
1598            // Currently dispatches `calibrate confidence`; future
1599            // subcommands (e.g. `calibrate recall`) layer on alongside.
1600            match a.subcommand {
1601                cli::commands::calibrate_confidence::CalibrateSubcommand::Confidence(ref conf) => {
1602                    match cli::commands::calibrate_confidence::run(&db_path, conf, &mut out)? {
1603                        0 => Ok(()),
1604                        code => std::process::exit(code),
1605                    }
1606                }
1607            }
1608        }
1609        Command::Skill(a) => {
1610            let stdout = std::io::stdout();
1611            let stderr = std::io::stderr();
1612            let mut so = stdout.lock();
1613            let mut se = stderr.lock();
1614            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1615            // v0.7.0 Cluster E API-2 (issue #767) — `ai-memory skill
1616            // <subcommand>`. The CLI dispatches with `active_keypair =
1617            // None` to match the existing CLI convention (Persona /
1618            // Calibrate also run without daemon-side ambient state).
1619            // Operators who want signed skill registers/exports/promotes
1620            // hit the MCP / HTTP surface where the daemon owns the
1621            // keypair; the CLI surface stays unsigned by design so
1622            // shell scripts can drive skills without re-implementing
1623            // the keypair-load ceremony.
1624            match cli::commands::skill::run(&db_path, &a, None, &mut out)? {
1625                0 => Ok(()),
1626                code => std::process::exit(code),
1627            }
1628        }
1629        Command::Share(a) => {
1630            let stdout = std::io::stdout();
1631            let stderr = std::io::stderr();
1632            let mut so = stdout.lock();
1633            let mut se = stderr.lock();
1634            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1635            // v0.7.0 #1095 — `ai-memory share`. Wraps the same substrate
1636            // primitive (`mcp::tools::share::handle_share`) the MCP +
1637            // HTTP surfaces consume; wire envelope is byte-equal across
1638            // the three.
1639            cli::share::cmd_share(&db_path, &a, &mut out)
1640        }
1641        // v0.7.0 ARCH-3 / FX-12 — MCP/CLI parity build-out. Each
1642        // dispatch arm wraps the same substrate primitive the MCP tool
1643        // consumes; wire envelope is byte-equal across MCP / HTTP /
1644        // CLI. See `docs/v0.7.0/arch-3-mcp-cli-parity-audit.md`.
1645        Command::KgQuery(a) => {
1646            let stdout = std::io::stdout();
1647            let stderr = std::io::stderr();
1648            let mut so = stdout.lock();
1649            let mut se = stderr.lock();
1650            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1651            cli::commands::kg_query::cmd_kg_query(&db_path, &a, &mut out)
1652        }
1653        Command::FindPaths(a) => {
1654            let stdout = std::io::stdout();
1655            let stderr = std::io::stderr();
1656            let mut so = stdout.lock();
1657            let mut se = stderr.lock();
1658            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1659            cli::commands::find_paths::cmd_find_paths(&db_path, &a, &mut out)
1660        }
1661        Command::RecallObservations(a) => {
1662            let stdout = std::io::stdout();
1663            let stderr = std::io::stderr();
1664            let mut so = stdout.lock();
1665            let mut se = stderr.lock();
1666            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1667            cli::commands::recall_observations::cmd_recall_observations(&db_path, &a, &mut out)
1668        }
1669        Command::CheckDuplicate(a) => {
1670            let stdout = std::io::stdout();
1671            let stderr = std::io::stderr();
1672            let mut so = stdout.lock();
1673            let mut se = stderr.lock();
1674            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1675            cli::commands::check_duplicate::cmd_check_duplicate(&db_path, &a, app_config, &mut out)
1676                .await
1677        }
1678        Command::Expand(a) => {
1679            let stdout = std::io::stdout();
1680            let stderr = std::io::stderr();
1681            let mut so = stdout.lock();
1682            let mut se = stderr.lock();
1683            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1684            match cli::commands::expand::cmd_expand(&a, app_config, &mut out).await? {
1685                0 => Ok(()),
1686                code => std::process::exit(code),
1687            }
1688        }
1689        Command::Reembed(a) => {
1690            let stdout = std::io::stdout();
1691            let stderr = std::io::stderr();
1692            let mut so = stdout.lock();
1693            let mut se = stderr.lock();
1694            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1695            // v0.7.0 #1598 — full-corpus vector-space migration.
1696            // Non-zero exit codes map configuration outcomes
1697            // (no-embedder / init-failed) like `ai-memory expand`.
1698            match cli::commands::reembed::cmd_reembed(&db_path, &a, app_config, &mut out).await? {
1699                0 => Ok(()),
1700                code => std::process::exit(code),
1701            }
1702        }
1703        Command::Replay(a) => {
1704            let stdout = std::io::stdout();
1705            let stderr = std::io::stderr();
1706            let mut so = stdout.lock();
1707            let mut se = stderr.lock();
1708            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1709            cli::commands::replay::cmd_replay(&db_path, &a, &mut out)
1710        }
1711        // v0.7.0 ARCH-3 / FX-C3 (batch2) — 16 additional CLI parity
1712        // dispatch arms. Each wraps the same substrate primitive the
1713        // MCP tool consumes; wire envelope is byte-equal across MCP /
1714        // HTTP / CLI. See
1715        // `docs/v0.7.0/arch-3-mcp-cli-parity-audit.md` §"Added in
1716        // fix/arch3-mcp-cli-parity-batch2".
1717        Command::Reflect(a) => {
1718            let stdout = std::io::stdout();
1719            let stderr = std::io::stderr();
1720            let mut so = stdout.lock();
1721            let mut se = stderr.lock();
1722            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1723            cli::commands::reflect::cmd_reflect(&db_path, &a, &mut out)
1724        }
1725        Command::Subscribe(a) => {
1726            let stdout = std::io::stdout();
1727            let stderr = std::io::stderr();
1728            let mut so = stdout.lock();
1729            let mut se = stderr.lock();
1730            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1731            cli::commands::subscribe::cmd_subscribe(&db_path, &a, &mut out)
1732        }
1733        Command::Unsubscribe(a) => {
1734            let stdout = std::io::stdout();
1735            let stderr = std::io::stderr();
1736            let mut so = stdout.lock();
1737            let mut se = stderr.lock();
1738            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1739            cli::commands::unsubscribe::cmd_unsubscribe(&db_path, &a, &mut out)
1740        }
1741        Command::ListSubscriptions(a) => {
1742            let stdout = std::io::stdout();
1743            let stderr = std::io::stderr();
1744            let mut so = stdout.lock();
1745            let mut se = stderr.lock();
1746            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1747            cli::commands::list_subscriptions::cmd_list_subscriptions(&db_path, &a, &mut out)
1748        }
1749        Command::SubscriptionReplay(a) => {
1750            let stdout = std::io::stdout();
1751            let stderr = std::io::stderr();
1752            let mut so = stdout.lock();
1753            let mut se = stderr.lock();
1754            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1755            cli::commands::subscription_replay::cmd_subscription_replay(&db_path, &a, &mut out)
1756        }
1757        Command::SubscriptionDlqList(a) => {
1758            let stdout = std::io::stdout();
1759            let stderr = std::io::stderr();
1760            let mut so = stdout.lock();
1761            let mut se = stderr.lock();
1762            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1763            cli::commands::subscription_dlq_list::cmd_subscription_dlq_list(&db_path, &a, &mut out)
1764        }
1765        Command::Notify(a) => {
1766            let stdout = std::io::stdout();
1767            let stderr = std::io::stderr();
1768            let mut so = stdout.lock();
1769            let mut se = stderr.lock();
1770            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1771            cli::commands::notify::cmd_notify(&db_path, &a, app_config, &mut out)
1772        }
1773        Command::Inbox(a) => {
1774            let stdout = std::io::stdout();
1775            let stderr = std::io::stderr();
1776            let mut so = stdout.lock();
1777            let mut se = stderr.lock();
1778            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1779            cli::commands::inbox::cmd_inbox(&db_path, &a, &mut out)
1780        }
1781        Command::IngestMultistep(a) => {
1782            let stdout = std::io::stdout();
1783            let stderr = std::io::stderr();
1784            let mut so = stdout.lock();
1785            let mut se = stderr.lock();
1786            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1787            cli::commands::ingest_multistep::cmd_ingest_multistep(&a, app_config, &mut out)
1788        }
1789        Command::KgInvalidate(a) => {
1790            let stdout = std::io::stdout();
1791            let stderr = std::io::stderr();
1792            let mut so = stdout.lock();
1793            let mut se = stderr.lock();
1794            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1795            cli::commands::kg_invalidate::cmd_kg_invalidate(&db_path, &a, &mut out)
1796        }
1797        Command::KgTimeline(a) => {
1798            let stdout = std::io::stdout();
1799            let stderr = std::io::stderr();
1800            let mut so = stdout.lock();
1801            let mut se = stderr.lock();
1802            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1803            cli::commands::kg_timeline::cmd_kg_timeline(&db_path, &a, &mut out)
1804        }
1805        Command::EntityRegister(a) => {
1806            let stdout = std::io::stdout();
1807            let stderr = std::io::stderr();
1808            let mut so = stdout.lock();
1809            let mut se = stderr.lock();
1810            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1811            cli::commands::entity_register::cmd_entity_register(&db_path, &a, &mut out)
1812        }
1813        Command::EntityGetByAlias(a) => {
1814            let stdout = std::io::stdout();
1815            let stderr = std::io::stderr();
1816            let mut so = stdout.lock();
1817            let mut se = stderr.lock();
1818            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1819            cli::commands::entity_get_by_alias::cmd_entity_get_by_alias(&db_path, &a, &mut out)
1820        }
1821        Command::DependentsOfInvalidated(a) => {
1822            let stdout = std::io::stdout();
1823            let stderr = std::io::stderr();
1824            let mut so = stdout.lock();
1825            let mut se = stderr.lock();
1826            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1827            cli::commands::dependents_of_invalidated::cmd_dependents_of_invalidated(
1828                &db_path, &a, &mut out,
1829            )
1830        }
1831        Command::ReflectionOrigin(a) => {
1832            let stdout = std::io::stdout();
1833            let stderr = std::io::stderr();
1834            let mut so = stdout.lock();
1835            let mut se = stderr.lock();
1836            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1837            cli::commands::reflection_origin::cmd_reflection_origin(&db_path, &a, &mut out)
1838        }
1839        Command::QuotaStatus(a) => {
1840            let stdout = std::io::stdout();
1841            let stderr = std::io::stderr();
1842            let mut so = stdout.lock();
1843            let mut se = stderr.lock();
1844            let mut out = cli::CliOutput::from_std(&mut so, &mut se);
1845            cli::commands::quota_status::cmd_quota_status(&db_path, &a, &mut out)
1846        }
1847    };
1848
1849    // WAL checkpoint after write commands to prevent unbounded WAL growth
1850    if result.is_ok()
1851        && let Some(cp_path) = db_path_for_checkpoint
1852        && let Ok(conn) = db::open(&cp_path)
1853    {
1854        let _ = db::checkpoint(&conn);
1855    }
1856
1857    result
1858}
1859
1860// ---------------------------------------------------------------------------
1861// is_write_command — predicate for the post-run WAL checkpoint.
1862// ---------------------------------------------------------------------------
1863
1864/// Returns true if `cmd` is a write-class subcommand. The post-run WAL
1865/// checkpoint in [`run`] runs only when this returns `true`.
1866#[must_use]
1867pub fn is_write_command(cmd: &Command) -> bool {
1868    matches!(
1869        cmd,
1870        Command::Store(_)
1871            | Command::Update(_)
1872            | Command::Delete(_)
1873            | Command::Promote(_)
1874            | Command::Forget(_)
1875            | Command::Link(_)
1876            | Command::Consolidate(_)
1877            | Command::Resolve(_)
1878            | Command::Sync(_)
1879            | Command::SyncDaemon(_)
1880            | Command::Import(_)
1881            | Command::AutoConsolidate(_)
1882            | Command::Gc
1883            | Command::Atomise(_)
1884            // v0.7.0 Cluster E API-2 (issue #767) — register / export /
1885            // promote write to the `skills` and `signed_events` tables.
1886            // List / get / resource / compose are read-only but classify
1887            // the whole verb family as write-class so the post-run WAL
1888            // checkpoint keeps the long-lived sqlite file from growing
1889            // unbounded under register-heavy workloads.
1890            | Command::Skill(_)
1891            // v0.7.0 Batman Mode (issue #800) — `namespace set-standard`
1892            // and `clear-standard` write to `namespace_meta`. The
1893            // `get-standard` and `batman-policy` verbs are read-only
1894            // but we classify the whole family as write-class so the
1895            // post-run WAL checkpoint runs.
1896            | Command::Namespace(_)
1897            // v0.7.0 #1095 — `ai-memory share` copies a row into the
1898            // recipient agent's `_shared/<from>→<to>/` namespace, so
1899            // it must trip the post-run WAL checkpoint.
1900            | Command::Share(_)
1901            // v0.7.0 ARCH-3 / FX-C3 (batch2) — write-class verbs in
1902            // the new parity batch. The reads (list-subscriptions /
1903            // subscription-replay / subscription-dlq-list / inbox /
1904            // kg-timeline / entity-get-by-alias / dependents-of-
1905            // invalidated / reflection-origin / quota-status) are
1906            // omitted from this list.
1907            | Command::Reflect(_)
1908            | Command::Subscribe(_)
1909            | Command::Unsubscribe(_)
1910            | Command::Notify(_)
1911            | Command::IngestMultistep(_)
1912            | Command::KgInvalidate(_)
1913            | Command::EntityRegister(_)
1914    )
1915}
1916
1917// ---------------------------------------------------------------------------
1918// Startup helpers (passphrase, anonymize default)
1919// ---------------------------------------------------------------------------
1920
1921/// Read the `SQLCipher` passphrase from `path`. Strips a single trailing
1922/// newline / CRLF; rejects an empty passphrase (post-strip) with an error;
1923/// preserves all other internal whitespace.
1924///
1925/// v0.7.0 #1055 (Agent-2 #5) — on Unix, the function rejects the
1926/// passphrase file when its mode allows ANY group or world access
1927/// (`mode & 0o077 != 0`). Pre-#1055 the function accepted
1928/// world-readable / group-readable files even though CLAUDE.md and
1929/// the doc comment at `src/storage/connection.rs:139-141` promise the
1930/// passphrase file is mode 0400. Any local user with read access to
1931/// the configured path could read the `SQLCipher` passphrase and
1932/// decrypt the on-disk DB offline. Operators with a legitimate need
1933/// for the legacy permissive posture (shared-container deploys where
1934/// the secret is already gated upstream by the orchestrator) can opt
1935/// back in via `AI_MEMORY_PASSPHRASE_FILE_ALLOW_LAX_PERMS=1`. The
1936/// unsafe override is logged at WARN on every fire.
1937///
1938/// # Errors
1939///
1940/// - The file cannot be read (e.g. missing, permission denied).
1941/// - The passphrase, after stripping the trailing newline, is empty.
1942/// - (Unix only, post-#1055) the file's mode allows group or world
1943///   access without the env-var escape hatch.
1944pub fn passphrase_from_file(path: &Path) -> Result<String> {
1945    // v0.7.0 #1055 — Unix permission check. We use the `mode & 0o077`
1946    // bitmask which fires on any group or world rwx bit. Windows
1947    // has no equivalent file-mode ACL primitive; the check is
1948    // compile-conditional so the function still works on cross-
1949    // platform builds.
1950    #[cfg(unix)]
1951    {
1952        use std::os::unix::fs::PermissionsExt;
1953        let meta = std::fs::metadata(path).with_context(|| {
1954            format!(
1955                "stat passphrase file {} for permission check (#1055)",
1956                path.display()
1957            )
1958        })?;
1959        let mode = meta.permissions().mode();
1960        let lax_bits = mode & 0o077;
1961        if lax_bits != 0 {
1962            let fail_open = std::env::var("AI_MEMORY_PASSPHRASE_FILE_ALLOW_LAX_PERMS")
1963                .map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
1964                .unwrap_or(false);
1965            if fail_open {
1966                tracing::warn!(
1967                    target: "ai_memory::daemon_runtime",
1968                    path = %path.display(),
1969                    mode = format!("{:o}", mode & 0o777),
1970                    "passphrase_from_file: file is group/world-readable; \
1971                     AI_MEMORY_PASSPHRASE_FILE_ALLOW_LAX_PERMS=1 — accepting \
1972                     (UNSAFE, legacy posture). Tighten with `chmod 0400 <path>` \
1973                     and clear the env var."
1974                );
1975            } else {
1976                anyhow::bail!(
1977                    "passphrase file {} has lax permissions (mode {:o}, group/world bits set); \
1978                     tighten with `chmod 0400 {}` OR set \
1979                     AI_MEMORY_PASSPHRASE_FILE_ALLOW_LAX_PERMS=1 to opt out (#1055)",
1980                    path.display(),
1981                    mode & 0o777,
1982                    path.display(),
1983                );
1984            }
1985        }
1986    }
1987    let mut raw = std::fs::read_to_string(path)
1988        .with_context(|| format!("reading passphrase file {}", path.display()))?;
1989    let passphrase = raw.trim_end_matches(['\n', '\r']).to_string();
1990    // #1258 — zeroize the intermediate `raw` buffer so the secret bytes
1991    // do not linger on the heap after we hand the trimmed copy to the
1992    // caller. The caller is responsible for zeroizing the returned
1993    // `passphrase` when it falls out of scope (typically passed
1994    // straight into `AI_MEMORY_DB_PASSPHRASE`).
1995    {
1996        use zeroize::Zeroize;
1997        raw.zeroize();
1998    }
1999    if passphrase.is_empty() {
2000        anyhow::bail!("passphrase file {} is empty", path.display());
2001    }
2002    Ok(passphrase)
2003}
2004
2005/// Apply the configured `anonymize_default` to the runtime env: when the
2006/// config asks for anonymization but the user hasn't already set
2007/// `AI_MEMORY_ANONYMIZE`, set it to `"1"`. Idempotent — repeated calls are
2008/// a no-op once the env var is set.
2009///
2010/// Note: this writes to the process environment; callers must invoke it
2011/// from the single-threaded startup region (before any worker threads are
2012/// spawned). The production binary calls it from `main()` for that reason.
2013pub fn apply_anonymize_default(app_config: &AppConfig) {
2014    // #198: config → env mapping for agent_id anonymization. Env var already
2015    // set by the caller wins; config is only applied when the env is unset.
2016    if app_config.effective_anonymize_default()
2017        && std::env::var(crate::identity::ENV_ANONYMIZE).is_err()
2018    {
2019        // SAFETY: single-threaded startup before any worker threads spawn.
2020        unsafe { std::env::set_var(crate::identity::ENV_ANONYMIZE, "1") };
2021    }
2022}
2023
2024/// #976 (2026-05-20) — resolve the admin-allowlist with env-var
2025/// precedence over the config-file `[admin].agent_ids` block.
2026///
2027/// `AI_MEMORY_ADMIN_AGENT_IDS` is a comma-separated list of agent_ids.
2028/// The wildcard `*` is honoured (every authenticated caller becomes
2029/// admin — appropriate for test daemons + container deploys that
2030/// receive the admin allowlist from orchestration secrets instead of a
2031/// shipped config.toml). Same `validate_agent_id` filter as the config
2032/// path; malformed entries are dropped with a `warn` log so a single
2033/// typo cannot lock the operator out.
2034///
2035/// Returns the config-file allowlist when the env var is absent or
2036/// empty; returns an empty Vec when neither source provides agent_ids
2037/// (closes every admin-class endpoint by default — the secure
2038/// posture per the post-#946 NHI contract).
2039#[must_use]
2040pub fn resolve_admin_agent_ids(admin_cfg: Option<&crate::config::AdminConfig>) -> Vec<String> {
2041    if let Ok(raw) = std::env::var("AI_MEMORY_ADMIN_AGENT_IDS")
2042        && !raw.trim().is_empty()
2043    {
2044        let mut out = Vec::new();
2045        for entry in raw.split(',') {
2046            let id = entry.trim();
2047            if id.is_empty() {
2048                continue;
2049            }
2050            // #980 (2026-05-20) — the `AI_MEMORY_ADMIN_AGENT_IDS=*`
2051            // wildcard carve-out is REMOVED. Pre-#980 the env var
2052            // accepted `"*"` as an explicit "admit every caller"
2053            // sentinel; combined with the `is_admin_caller` wildcard
2054            // arm (also closed in #980), an operator who set the
2055            // env var (intentionally or via a copy-paste mishap)
2056            // opened every admin endpoint. Operators wanting a
2057            // permissive admin posture must now enumerate the agent
2058            // ids explicitly (e.g. comma-separated list of NHI
2059            // principals); the wildcard entry is rejected by
2060            // `validate_agent_id` (shape: `*` is not in the allowed
2061            // char class) and dropped with a WARN. The previous
2062            // explicit-test-only path lives behind `#[cfg(test)]` in
2063            // `is_admin_caller`; production deployments cannot reach
2064            // it regardless of how the allowlist is populated.
2065            match crate::validate::validate_agent_id(id) {
2066                Ok(()) => out.push(id.to_string()),
2067                Err(e) => {
2068                    tracing::warn!(
2069                        "AI_MEMORY_ADMIN_AGENT_IDS entry '{id}' rejected: {e}; dropping"
2070                    );
2071                }
2072            }
2073        }
2074        return out;
2075    }
2076    admin_cfg
2077        .map(crate::config::AdminConfig::validated_agent_ids)
2078        .unwrap_or_default()
2079}
2080
2081// ---------------------------------------------------------------------------
2082// Embedder / vector-index canonical builders
2083// ---------------------------------------------------------------------------
2084
2085/// #1521 — resolve the daemon embedder model under the canonical
2086/// precedence ladder, mirroring the [`AppConfig::resolve_embeddings`]
2087/// layering for the model dimension:
2088///
2089///   1. `[embeddings].model` (sectioned v2 config, #1146)
2090///   2. legacy flat `embedding_model` (deprecated)
2091///   3. tier-preset `embedding_model`
2092///   4. `None` (keyword-only / embeddings disabled)
2093///
2094/// The model is read from the explicit section/flat fields rather than
2095/// `ResolvedEmbeddings.model` (which defaults to nomic whenever ANY
2096/// `[embeddings]` key is present), so a url-only section on the semantic
2097/// tier still keeps the tier-preset MiniLM model. A configured id the
2098/// 2-model daemon embedder cannot construct (or an unparseable one)
2099/// degrades to the tier preset — the operator picked a pin, not
2100/// keyword-only. Pure: no network I/O, so the precedence is unit-testable
2101/// without an HF-Hub fetch (`build_embedder` does the construction).
2102#[allow(deprecated)]
2103pub(crate) fn resolve_embedder_model(
2104    tier_config: &crate::config::TierConfig,
2105    app_config: &AppConfig,
2106) -> Option<crate::config::EmbeddingModel> {
2107    let preset = tier_config.embedding_model;
2108    let preset_label = preset
2109        .map(|m| m.hf_model_id().to_string())
2110        .unwrap_or_else(|| "none".to_string());
2111
2112    let configured = app_config
2113        .embeddings
2114        .as_ref()
2115        .and_then(|section| section.model.clone())
2116        .filter(|raw| !raw.trim().is_empty())
2117        .map(|raw| (raw, "[embeddings].model"))
2118        .or_else(|| {
2119            app_config
2120                .embedding_model
2121                .clone()
2122                .filter(|raw| !raw.trim().is_empty())
2123                .map(|raw| (raw, "legacy embedding_model"))
2124        });
2125
2126    let Some((raw, origin)) = configured else {
2127        return preset;
2128    };
2129    match crate::config::EmbeddingModel::from_canonical_id(&raw) {
2130        Some(model) => {
2131            tracing::info!(
2132                "embedder: using configured model {} from {origin} (tier-preset would have been {})",
2133                model.hf_model_id(),
2134                preset_label
2135            );
2136            Some(model)
2137        }
2138        None => {
2139            tracing::warn!(
2140                "embedder: configured model {raw:?} (from {origin}) is not constructible by the \
2141                 daemon embedder (supported: nomic-embed-text-v1.5, all-MiniLM-L6-v2); \
2142                 falling back to tier-preset {preset_label}"
2143            );
2144            preset
2145        }
2146    }
2147}
2148
2149/// Construct the [`Embedder`] for a given tier. Returns `None` for the
2150/// keyword tier (no embedder requested) and on load failure (caller
2151/// degrades to keyword fallback). On failure the diagnostic is emitted
2152/// via `tracing::error!` so operators see it in `journalctl`.
2153///
2154/// This is the single canonical embedder builder used by both `serve()`
2155/// (HTTP daemon) and `cli::recall::run` (offline recall). Prior to W6
2156/// each call site had its own copy, with subtly different fallback
2157/// shapes — the bug at issue #322 was a direct consequence.
2158#[allow(deprecated)]
2159pub async fn build_embedder(feature_tier: FeatureTier, app_config: &AppConfig) -> Option<Embedder> {
2160    let tier_config = feature_tier.config();
2161    // #1521: consume the canonical embeddings resolver so the sectioned
2162    // `[embeddings]` block (#1146) drives the daemon embedder, not just
2163    // the deprecated flat fields.
2164    //
2165    // #1598 — construction is delegated to the single shared boot
2166    // entry `Embedder::from_resolved` (also used by the MCP stdio
2167    // init). For the local/ollama backend the model is resolved by
2168    // the pure `resolve_embedder_model` helper (precedence:
2169    // `[embeddings].model` section > legacy flat `embedding_model` >
2170    // tier preset); for API backends the operator's `model` id is
2171    // wired verbatim by the resolver and the tier preset only gates
2172    // whether embeddings are enabled at all (Some vs None).
2173    let resolved_embeddings = app_config.resolve_embeddings();
2174    let tier_model = if crate::config::is_api_embed_backend(&resolved_embeddings.backend) {
2175        tier_config.embedding_model
2176    } else {
2177        resolve_embedder_model(&tier_config, app_config)
2178    };
2179    let Some(emb_model) = tier_model else {
2180        tracing::info!(
2181            "embedder disabled — tier={} keyword-only (FTS5); semantic recall not wired",
2182            feature_tier.as_str()
2183        );
2184        return None;
2185    };
2186    // The HF-Hub sync API and candle model-load are blocking CPU work that
2187    // internally spin their own tokio runtime. Running them directly in this
2188    // async context panics with "Cannot drop a runtime in a context where
2189    // blocking is not allowed." Move the whole construction onto the blocking
2190    // pool so the inner runtime is owned by a dedicated thread.
2191    let resolved_for_build = resolved_embeddings.clone();
2192    let build = match tokio::task::spawn_blocking(move || {
2193        embeddings::Embedder::from_resolved(&resolved_for_build, Some(emb_model))
2194    })
2195    .await
2196    {
2197        Ok(b) => b,
2198        Err(e) => {
2199            tracing::error!("embedder spawn_blocking join failed: {e}");
2200            return None;
2201        }
2202    };
2203    match build {
2204        Ok(Some(emb)) => {
2205            tracing::info!(
2206                "embedder loaded ({}) — tier={} semantic recall enabled",
2207                emb.model_description(),
2208                feature_tier.as_str()
2209            );
2210            Some(emb)
2211        }
2212        // Unreachable with `Some(emb_model)` threaded above; kept
2213        // explicit so the keyword-tier contract of `from_resolved`
2214        // stays loud here (#1598).
2215        Ok(None) => None,
2216        Err(e) => {
2217            // v0.6.2 (#327): make embedder load failures loud. The
2218            // prior WARN level was easy to miss in DO droplet logs,
2219            // which led to scenario-18 black-holing (semantic recall
2220            // falling back to keyword-only without the operator
2221            // noticing). An ERROR-level log with an obvious marker
2222            // surfaces this immediately in `journalctl -u ai-memory`
2223            // or tail -f /var/log/ai-memory-serve.log.
2224            tracing::error!(
2225                "EMBEDDER LOAD FAILED — tier={} requested semantic features, \
2226                 but embedder init errored: {e:#}. Semantic recall DEGRADED to \
2227                 keyword (#1593/#1598 fail-closed; the chat LLM client is NEVER \
2228                 reused for embeddings). Semantic recall, sync_push embedding \
2229                 refresh (#322), and HNSW index will be NO-OPS. For local \
2230                 backends check network egress to HuggingFace Hub + available \
2231                 memory for model weights; for API backends check the resolved \
2232                 base URL / API key (`ai-memory doctor`). To force keyword-only \
2233                 explicitly (silences this error), set `tier = \"keyword\"` in \
2234                 config.toml.",
2235                feature_tier.as_str()
2236            );
2237            None
2238        }
2239    }
2240}
2241
2242/// v0.7.0 L5 — construct the LLM [`OllamaClient`] for autonomy-hook
2243/// capable feature tiers (`smart` / `autonomous`). Returns `None` for
2244/// the `keyword` / `semantic` tiers (no `llm_model` declared in the
2245/// [`TierConfig`]) and on Ollama unreachability (caller degrades to
2246/// non-LLM behaviour). On failure the diagnostic is emitted via
2247/// `tracing::warn!` so operators see it in `journalctl` without
2248/// killing the daemon — autonomy hooks are best-effort and the
2249/// store path must keep working when Ollama is offline.
2250///
2251/// **FX-D1 (v0.7.0, 2026-05-27).** Pre-FX-D1 this function wrapped
2252/// the sync [`llm::OllamaClient::build_from_resolved`] in
2253/// `tokio::task::spawn_blocking`. The sync constructor went through
2254/// `block_on_local`, whose FX-C1 design panicked on the current-thread
2255/// arm. Production tests that defaulted to `#[tokio::test]`
2256/// (current-thread) hit the panic — `spawn_blocking`'s blocking-pool
2257/// thread inherits the outer runtime handle, so `Handle::try_current()`
2258/// resolved to a `CurrentThread` flavor and tripped the panic. The
2259/// log line was: `task 294 panicked with message "OllamaClient sync
2260/// wrapper called from inside a current-thread tokio runtime."`.
2261///
2262/// The surgical fix is to call the async constructor
2263/// [`llm::OllamaClient::build_from_resolved_async`] directly — no
2264/// `spawn_blocking`, no `block_on_local`, no sync→async bridge — so
2265/// the construction runs on whichever tokio runtime the caller
2266/// brought. The defensive fix in `block_on_local` (replace the panic
2267/// with a fresh-OS-thread bridge) catches every other unknown
2268/// callsite that might hit the same shape; this surgical fix is the
2269/// optimal path at this known callsite.
2270pub async fn build_llm_client(
2271    feature_tier: FeatureTier,
2272    app_config: &AppConfig,
2273) -> Option<llm::OllamaClient> {
2274    // v0.7.x (#1146) — single canonical entry through the resolver.
2275    // The resolver folds CLI flags (none here — `ai-memory serve`
2276    // exposes no CLI LLM override), AI_MEMORY_LLM_* env vars, the
2277    // [llm] config section, the legacy llm_model/ollama_url flat
2278    // fields, and the compiled tier preset. The provenance fields
2279    // surface via the tracing log line so RUST_LOG=ai_memory=debug
2280    // shows which precedence layer won.
2281    let resolved = app_config.resolve_llm(None, None, None);
2282
2283    // No-preset-tier short-circuit: when the tier has no compiled
2284    // `llm_model` preset (Keyword + Semantic at v0.7.0) AND there is
2285    // no explicit operator intent (resolver `source == CompiledDefault`),
2286    // the resolver's Ollama-default-fallback should NOT pull a client
2287    // into existence. This matches pre-#1146 v0.6.x behaviour and
2288    // avoids paying a blocking reqwest call to a (likely-absent)
2289    // Ollama under tokio test contexts. Operators who explicitly
2290    // want an LLM on Keyword/Semantic set AI_MEMORY_LLM_BACKEND or
2291    // write a [llm] section, which moves `source` off the
2292    // CompiledDefault arm.
2293    if feature_tier.config().llm_model.is_none()
2294        && matches!(
2295            resolved.source,
2296            crate::config::ConfigSource::CompiledDefault
2297        )
2298    {
2299        tracing::debug!(
2300            "L5: llm client disabled — tier={} has no llm_model preset AND no \
2301             operator LLM config; set AI_MEMORY_LLM_BACKEND or [llm] section to enable",
2302            feature_tier.as_str()
2303        );
2304        return None;
2305    }
2306
2307    let backend = resolved.backend.clone();
2308    let model = resolved.model.clone();
2309    let source = resolved.source.as_str().to_string();
2310    let key_source = resolved.api_key_source.as_str().to_string();
2311    let tier_str = feature_tier.as_str().to_string();
2312
2313    // FX-D1 (2026-05-27): call the async constructor directly. The
2314    // pre-FX-D1 `spawn_blocking` wrapper drove the sync constructor
2315    // through `block_on_local`, which panicked on the current-thread
2316    // tokio arm (the default `#[tokio::test]` flavor). The async
2317    // path skips the sync→async bridge entirely so the construction
2318    // runs on whichever tokio runtime the caller brought, with no
2319    // re-entry hazard.
2320    let build = llm::OllamaClient::build_from_resolved_async(&resolved).await;
2321
2322    match build {
2323        Ok(Some(client)) => {
2324            tracing::info!(
2325                "L5: llm client ready — tier={tier_str} backend={backend} \
2326                 model={model} source={source} key_source={key_source} \
2327                 — auto_tag/expand_query/contradiction-detection/reflection \
2328                 hooks armed (#1146 resolver path)"
2329            );
2330            Some(client)
2331        }
2332        Ok(None) => {
2333            tracing::warn!(
2334                "L5: llm client disabled — resolver returned no client \
2335                 (tier={tier_str} backend={backend} source={source}); \
2336                 LLM-powered hooks are no-ops"
2337            );
2338            None
2339        }
2340        Err(e) => {
2341            tracing::warn!(
2342                "L5: llm client init failed (tier={tier_str} backend={backend} \
2343                 source={source}); LLM-powered hooks are no-ops: {e}"
2344            );
2345            None
2346        }
2347    }
2348}
2349
2350/// Build the in-memory [`VectorIndex`] from `conn`. When `embedder_present`
2351/// is false, returns `None` (the keyword-only path doesn't need an index).
2352/// When the embedder is present but the DB is empty (or query errors),
2353/// returns `Some(VectorIndex::empty())` so write paths can populate it
2354/// in-place.
2355#[must_use]
2356pub fn build_vector_index(conn: &Connection, embedder_present: bool) -> Option<VectorIndex> {
2357    if !embedder_present {
2358        return None;
2359    }
2360    match db::get_all_embeddings(conn) {
2361        Ok(entries) if !entries.is_empty() => Some(hnsw::VectorIndex::build(entries)),
2362        _ => Some(hnsw::VectorIndex::empty()),
2363    }
2364}
2365
2366/// #1579 B3 — read the boot warm-up entry set (every stored
2367/// embedding) over a private connection. Opened fresh so the boot
2368/// loader thread never touches the request-serving connection;
2369/// failures degrade to "no warm-up" with a WARN (the daemon keeps
2370/// serving keyword/FTS recall — the pre-#1579 failure posture).
2371pub(crate) fn load_boot_index_entries(db_path: &Path) -> Option<Vec<(String, Vec<f32>)>> {
2372    let conn = match db::open(db_path) {
2373        Ok(c) => c,
2374        Err(e) => {
2375            tracing::warn!(
2376                db_path = %db_path.display(),
2377                err = %e,
2378                "HNSW boot warm-up: could not open DB; semantic index stays cold (#1579 B3)"
2379            );
2380            return None;
2381        }
2382    };
2383    match db::get_all_embeddings(&conn) {
2384        Ok(entries) => Some(entries),
2385        Err(e) => {
2386            tracing::warn!(
2387                err = %e,
2388                "HNSW boot warm-up: get_all_embeddings failed; semantic index stays cold (#1579 B3)"
2389            );
2390            None
2391        }
2392    }
2393}
2394
2395/// #1579 B3 — async boot HNSW warm-up for `serve`.
2396///
2397/// Pre-#1579 the daemon built the HNSW graph SYNCHRONOUSLY at boot
2398/// (`get_all_embeddings` + `VectorIndex::build` on the startup path):
2399/// P1 measured spawn→initialize at 40 s for a 10k-vector corpus and
2400/// >28 min at 100k. This loader moves the whole load+build off the
2401/// startup path onto a background thread, reusing the #968
2402/// double-buffer rebuild machinery: the daemon binds and answers
2403/// immediately with an EMPTY index; semantic recall degrades to its
2404/// keyword/FTS blend until the warmed graph swaps in (the #519
2405/// proactive conflict check routes to its bounded-scan fallback for
2406/// the same window via [`hnsw::VectorIndex::is_fully_searchable`]).
2407///
2408/// Locking discipline: the `AppState.vector_index` outer mutex is
2409/// held only for microsecond-scale steps (seed-extend, schedule,
2410/// swap) — NEVER across the graph build, which runs detached on the
2411/// #968 rebuild thread. Request handlers therefore keep making
2412/// progress throughout the warm-up.
2413///
2414/// Emits one INFO line when the swap lands so operators can see
2415/// time-to-semantic-ready in the daemon log.
2416pub fn spawn_vector_index_boot_load(
2417    db_path: std::path::PathBuf,
2418    vector_index: Arc<tokio::sync::Mutex<Option<VectorIndex>>>,
2419) -> std::thread::JoinHandle<()> {
2420    std::thread::spawn(move || {
2421        let started = std::time::Instant::now();
2422        let Some(entries) = load_boot_index_entries(&db_path) else {
2423            return;
2424        };
2425        if entries.is_empty() {
2426            tracing::info!(
2427                "HNSW boot warm-up: no stored embeddings — index starts empty (#1579 B3)"
2428            );
2429            return;
2430        }
2431        let total = entries.len();
2432        // Step 1 — seed + schedule the background build under a BRIEF
2433        // outer lock. The returned handle is detached from the borrow
2434        // (the rebuild thread captures Arc'd internals, not `&self`),
2435        // so we can join it after dropping the guard.
2436        let build_handle = {
2437            let guard = vector_index.blocking_lock();
2438            let Some(idx) = guard.as_ref() else {
2439                return;
2440            };
2441            idx.seed_and_rebuild_async(entries)
2442        };
2443        let _ = build_handle.join();
2444        // Step 2 — swap the warmed graph in; loop covers the
2445        // rebuild-CAS race with any routine 200-overflow rebuild that
2446        // was scheduled by boot-window writes (see
2447        // `VectorIndex::warm_boot` for the same contract).
2448        loop {
2449            let pending = {
2450                let guard = vector_index.blocking_lock();
2451                let Some(idx) = guard.as_ref() else {
2452                    return;
2453                };
2454                if idx.is_fully_searchable() {
2455                    None
2456                } else {
2457                    Some(idx.rebuild_async())
2458                }
2459            };
2460            match pending {
2461                None => break,
2462                Some(handle) => {
2463                    let _ = handle.join();
2464                    // A no-op handle (rebuild CAS busy) joins
2465                    // instantly — pace the retry so the loop doesn't
2466                    // spin while the in-flight build finishes.
2467                    std::thread::sleep(crate::hnsw::REBUILD_WAIT_POLL_INTERVAL);
2468                }
2469            }
2470        }
2471        #[allow(clippy::cast_possible_truncation)]
2472        let elapsed_ms = started.elapsed().as_millis() as u64;
2473        tracing::info!(
2474            entries = total,
2475            elapsed_ms,
2476            "HNSW index warm (#1579 B3): async boot build swapped in; \
2477             semantic recall is now index-backed"
2478        );
2479    })
2480}
2481
2482// ---------------------------------------------------------------------------
2483// v0.7 Track H — H2 active keypair loading
2484// ---------------------------------------------------------------------------
2485
2486// Round-3 F12 — the daemon's fixed signing-key label. Canonical const
2487// (with the full F12 rationale) now lives at
2488// `crate::identity::keypair::DAEMON_KEYPAIR_LABEL` (#1558).
2489use crate::identity::keypair::DAEMON_KEYPAIR_LABEL;
2490
2491/// Round-3 F12 — ensure the daemon's signing keypair exists on disk and
2492/// load it for the serve [`AppState`]. Returns the in-memory keypair
2493/// (if any) plus the lifecycle outcome (Generated/AlreadyExists/
2494/// SkippedDisabled/None) so the startup banner can surface the
2495/// auto-gen line.
2496///
2497/// Resolution:
2498///   1. Resolve the default key directory
2499///      ([`crate::identity::keypair::default_key_dir`]).
2500///   2. Call [`crate::identity::keypair::ensure_keypair`] under the
2501///      stable [`DAEMON_KEYPAIR_LABEL`]. Idempotent: a daemon restart
2502///      never overwrites an existing keypair (which would silently
2503///      invalidate every prior signed link).
2504///   3. Load the keypair from disk and return it.
2505///
2506/// Failure at any step degrades the daemon to unsigned-link mode (the
2507/// pre-v0.7 posture) without aborting startup. Log lines describe
2508/// which path was taken so an operator inspecting daemon logs sees
2509/// the cause.
2510fn ensure_and_load_daemon_keypair() -> (
2511    Option<crate::identity::keypair::AgentKeypair>,
2512    Option<crate::identity::keypair::EnsureOutcome>,
2513) {
2514    let dir = match crate::identity::keypair::default_key_dir() {
2515        Ok(d) => d,
2516        Err(e) => {
2517            tracing::info!("identity: no default key dir available, link signing disabled: {e}");
2518            return (None, None);
2519        }
2520    };
2521    // The `[identity].disabled` config field is not yet wired in
2522    // v0.7.0; pass `false` so the helper auto-generates unless the
2523    // operator pre-staged a keypair. A future config field can opt
2524    // out without changing this call site.
2525    let outcome = match crate::identity::keypair::ensure_keypair(DAEMON_KEYPAIR_LABEL, &dir, false)
2526    {
2527        Ok(o) => o,
2528        Err(e) => {
2529            tracing::warn!("identity: keypair auto-gen failed: {e:#}");
2530            return (None, None);
2531        }
2532    };
2533    if matches!(
2534        outcome,
2535        crate::identity::keypair::EnsureOutcome::SkippedDisabled
2536    ) {
2537        return (None, Some(outcome));
2538    }
2539    let kp = match crate::identity::keypair::load(DAEMON_KEYPAIR_LABEL, &dir) {
2540        Ok(kp) if kp.can_sign() => {
2541            tracing::info!(
2542                "identity: loaded signing keypair for {DAEMON_KEYPAIR_LABEL} from {}",
2543                dir.display()
2544            );
2545            Some(kp)
2546        }
2547        Ok(_) => {
2548            tracing::info!(
2549                "identity: only public key on disk for {DAEMON_KEYPAIR_LABEL}; link signing disabled"
2550            );
2551            None
2552        }
2553        Err(e) => {
2554            tracing::warn!(
2555                "identity: keypair load failed for {DAEMON_KEYPAIR_LABEL}: {e:#}; link signing disabled"
2556            );
2557            None
2558        }
2559    };
2560    (kp, Some(outcome))
2561}
2562
2563// ---------------------------------------------------------------------------
2564// Background tasks (GC, WAL checkpoint)
2565// ---------------------------------------------------------------------------
2566
2567/// Spawn the periodic GC loop. Sleeps `interval`, then runs `db::gc`,
2568/// `db::auto_purge_archive`, and (Cluster G, #767) the shadow-
2569/// observation retention sweep against the daemon's shared connection.
2570/// The returned [`JoinHandle`] is owned by the caller; `serve()` aborts
2571/// it on shutdown.
2572///
2573/// `shadow_retention_days` honors the operator-tunable
2574/// `[confidence] shadow_retention_days` from `config.toml`, falling
2575/// back to [`crate::confidence::shadow::DEFAULT_SHADOW_RETENTION_DAYS`]
2576/// (30) when unset. `<= 0` disables the sweep (matches the
2577/// `archive_max_days` convention).
2578#[must_use]
2579pub fn spawn_gc_loop(
2580    state: Db,
2581    archive_max_days: Option<i64>,
2582    interval: Duration,
2583) -> JoinHandle<()> {
2584    spawn_gc_loop_with_shadow_retention(
2585        state,
2586        archive_max_days,
2587        crate::confidence::shadow::DEFAULT_SHADOW_RETENTION_DAYS,
2588        interval,
2589    )
2590}
2591
2592/// Cluster G (#767) — `spawn_gc_loop` variant that takes an explicit
2593/// shadow-observation retention window. Used by `bootstrap_serve` so
2594/// the operator-tunable `[confidence] shadow_retention_days` from
2595/// `config.toml` flows through. `spawn_gc_loop` is the no-arg wrapper
2596/// that picks the compiled default for legacy call sites (tests).
2597#[must_use]
2598pub fn spawn_gc_loop_with_shadow_retention(
2599    state: Db,
2600    archive_max_days: Option<i64>,
2601    shadow_retention_days: i64,
2602    interval: Duration,
2603) -> JoinHandle<()> {
2604    tokio::spawn(async move {
2605        loop {
2606            tokio::time::sleep(interval).await;
2607            let lock = state.lock().await;
2608            match db::gc(&lock.0, lock.3) {
2609                Ok(n) if n > 0 => tracing::info!("gc: expired {n} memories"),
2610                _ => {}
2611            }
2612            // Auto-purge old archives if configured
2613            match db::auto_purge_archive(&lock.0, archive_max_days) {
2614                Ok(n) if n > 0 => tracing::info!("gc: purged {n} old archived memories"),
2615                _ => {}
2616            }
2617            // Cluster G (#767, PERF-4) — shadow-mode observation
2618            // retention sweep. `<= 0` is a no-op (operator opt-out).
2619            match crate::confidence::shadow::gc_observations(&lock.0, shadow_retention_days) {
2620                Ok(n) if n > 0 => tracing::info!(
2621                    "gc: purged {n} shadow observations older than {shadow_retention_days}d"
2622                ),
2623                Ok(_) => {}
2624                Err(e) => tracing::warn!("shadow observation gc failed: {e}"),
2625            }
2626        }
2627    })
2628}
2629
2630/// v0.7.0 K2 — spawn the periodic `pending_actions` timeout sweeper.
2631///
2632/// Sleeps `interval`, then calls [`db::sweep_pending_action_timeouts`]
2633/// against the daemon's shared connection. Per-row
2634/// `default_timeout_seconds` overrides the global `default_secs` when
2635/// non-NULL. A non-positive `default_secs` disables the sweeper.
2636///
2637/// Returned [`JoinHandle`] is owned by the caller; `serve()` aborts it
2638/// on shutdown — same lifecycle as [`spawn_gc_loop`].
2639///
2640/// Closes the v0.6.3.1 honest-Capabilities-v2 disclosure that the
2641/// `default_timeout_seconds` field was advertised but unused.
2642#[must_use]
2643pub fn spawn_pending_timeout_sweep_loop(
2644    state: Db,
2645    db_path: PathBuf,
2646    default_secs: i64,
2647    interval: Duration,
2648) -> JoinHandle<()> {
2649    tokio::spawn(async move {
2650        loop {
2651            tokio::time::sleep(interval).await;
2652            // Hold the lock just long enough for the sweep call. The
2653            // expired ids returned by the sweeper are dispatched to
2654            // subscribers AFTER the lock drops so a slow webhook can
2655            // never starve write traffic.
2656            let expired = {
2657                let lock = state.lock().await;
2658                match db::sweep_pending_action_timeouts(&lock.0, default_secs) {
2659                    Ok(rows) => rows,
2660                    Err(e) => {
2661                        tracing::warn!("pending_actions sweep failed: {e}");
2662                        Vec::new()
2663                    }
2664                }
2665            };
2666            if expired.is_empty() {
2667                continue;
2668            }
2669            tracing::info!(
2670                "pending_actions sweep: marked {} row(s) expired",
2671                expired.len()
2672            );
2673            // Best-effort fan-out via the existing subscription
2674            // dispatcher. K2 piggybacks on the lifecycle event
2675            // shape — the namespace + id are enough for downstream
2676            // webhook consumers to look the row up. The full
2677            // approval-event surface (typed payloads, retry, DLQ)
2678            // arrives in K4 / K7.
2679            for (id, namespace) in expired {
2680                let lock = state.lock().await;
2681                crate::subscriptions::dispatch_event(
2682                    &lock.0,
2683                    "pending_action_expired",
2684                    &id,
2685                    &namespace,
2686                    None,
2687                    &db_path,
2688                );
2689            }
2690        }
2691    })
2692}
2693
2694/// v0.7.0 I3 — spawn the periodic transcript archive→prune sweeper.
2695///
2696/// Sleeps `interval`, then calls
2697/// [`crate::transcripts::sweep_transcript_lifecycle`] against the
2698/// daemon's shared connection. The per-namespace TTL configuration
2699/// is captured by `cfg` once at spawn time (operators editing
2700/// `[transcripts]` in `config.toml` after boot must restart the
2701/// daemon — same model as the K2 pending sweeper).
2702///
2703/// The returned [`JoinHandle`] is owned by the caller; `serve()`
2704/// aborts it on shutdown — same lifecycle as
2705/// [`spawn_pending_timeout_sweep_loop`].
2706#[must_use]
2707pub fn spawn_transcript_lifecycle_sweep_loop(
2708    state: Db,
2709    cfg: crate::config::TranscriptsConfig,
2710    interval: Duration,
2711) -> JoinHandle<()> {
2712    tokio::spawn(async move {
2713        loop {
2714            tokio::time::sleep(interval).await;
2715            // Hold the connection lock for the whole sweep: the
2716            // archive + prune phases share one `now` and the
2717            // archive-then-prune semantics require sequential
2718            // execution against the same view of the table. A 10-
2719            // minute cadence means the lock window is at most a few
2720            // ms even on busy databases.
2721            let report = {
2722                let lock = state.lock().await;
2723                match crate::transcripts::sweep_transcript_lifecycle(&lock.0, &cfg) {
2724                    Ok(r) => r,
2725                    Err(e) => {
2726                        tracing::warn!("transcript lifecycle sweep failed: {e}");
2727                        continue;
2728                    }
2729                }
2730            };
2731            if report.archived > 0 || report.pruned > 0 || report.errors > 0 {
2732                tracing::info!(
2733                    "transcript lifecycle sweep: archived={} pruned={} errors={}",
2734                    report.archived,
2735                    report.pruned,
2736                    report.errors,
2737                );
2738            }
2739        }
2740    })
2741}
2742
2743/// v0.7.0 K8 — spawn the periodic agent-quota daily-counter reset
2744/// sweeper.
2745///
2746/// Sleeps `interval`, then calls [`crate::quotas::reset_daily`] against
2747/// the daemon's shared connection. The SQL statement zeros
2748/// `current_memories_today` + `current_links_today` for every row
2749/// whose `day_started_at` is not the current UTC date — touched rows
2750/// equal "agents that crossed midnight since the last sweep tick"
2751/// which is at most one row per registered agent per 24h.
2752///
2753/// The returned [`JoinHandle`] is owned by the caller; `serve()`
2754/// aborts it on shutdown — same lifecycle as
2755/// [`spawn_pending_timeout_sweep_loop`].
2756#[must_use]
2757pub fn spawn_agent_quota_reset_loop(state: Db, interval: Duration) -> JoinHandle<()> {
2758    tokio::spawn(async move {
2759        loop {
2760            tokio::time::sleep(interval).await;
2761            let reset_count = {
2762                let lock = state.lock().await;
2763                match crate::quotas::reset_daily(&lock.0) {
2764                    Ok(n) => n,
2765                    Err(e) => {
2766                        tracing::warn!("agent_quotas daily reset failed: {e}");
2767                        continue;
2768                    }
2769                }
2770            };
2771            if reset_count > 0 {
2772                tracing::info!("agent_quotas daily reset: {reset_count} row(s) zeroed");
2773            }
2774        }
2775    })
2776}
2777
2778/// Spawn the periodic WAL checkpoint loop. First checkpoint runs
2779/// `interval / 2` after start (staggered from the GC loop to avoid
2780/// lock-contention bursts on cold start), then on a fixed cadence.
2781#[must_use]
2782pub fn spawn_wal_checkpoint_loop(state: Db, interval: Duration) -> JoinHandle<()> {
2783    let half = interval / 2;
2784    tokio::spawn(async move {
2785        // First checkpoint runs halfway through the interval so the two
2786        // long-running maintenance tasks never overlap on cold start.
2787        tokio::time::sleep(half).await;
2788        loop {
2789            {
2790                let lock = state.lock().await;
2791                match db::checkpoint(&lock.0) {
2792                    Ok(()) => tracing::debug!("wal checkpoint: ok"),
2793                    Err(e) => tracing::warn!("wal checkpoint failed: {e}"),
2794                }
2795            }
2796            tokio::time::sleep(interval).await;
2797        }
2798    })
2799}
2800
2801// ---------------------------------------------------------------------------
2802// Router composition
2803// ---------------------------------------------------------------------------
2804
2805/// Compose the production HTTP router. Thin wrapper around
2806/// [`crate::build_router`] (the W3-vintage source of truth for the
2807/// route table). `daemon_runtime::build_router` exists so test code in
2808/// this module can build the router without naming `crate::build_router`
2809/// directly, and so future router-composition logic (e.g. middleware
2810/// reorder, custom layers) lives in one place.
2811#[must_use]
2812pub fn build_router(app_state: AppState, api_key_state: ApiKeyState) -> Router {
2813    crate::build_router(api_key_state, app_state)
2814}
2815
2816// ---------------------------------------------------------------------------
2817// serve() — the HTTP daemon body, post-W6 split.
2818// ---------------------------------------------------------------------------
2819
2820/// Aggregated state produced by [`bootstrap_serve`].
2821pub struct ServeBootstrap {
2822    pub app_state: AppState,
2823    pub api_key_state: ApiKeyState,
2824    pub db_state: Db,
2825    pub archive_max_days: Option<i64>,
2826    pub task_handles: Vec<JoinHandle<()>>,
2827    /// Round-3 F12 — lifecycle outcome of the daemon's signing-keypair
2828    /// auto-gen path, captured by [`ensure_and_load_daemon_keypair`].
2829    /// Read by [`serve`] when composing the F8/F12 startup banner so
2830    /// operators see whether a fresh key was created on first boot.
2831    pub daemon_keypair_outcome: Option<crate::identity::keypair::EnsureOutcome>,
2832    /// v0.7.0 H7 (round-2) — resolved per-request HTTP timeout. The
2833    /// `serve` path passes this to [`crate::build_router_with_timeout`]
2834    /// so the timeout middleware is wired with the operator's
2835    /// `request_timeout_secs` (default 60 s).
2836    pub request_timeout: std::time::Duration,
2837    /// v0.7.0 Policy-Engine Item 3 — shared atomic metrics handle for the
2838    /// deferred-audit drainer. `serve` polls these on the shutdown path
2839    /// (after the HTTP server has quiesced) to wait for every submitted
2840    /// refusal to flush into `signed_events` before the WAL checkpoint +
2841    /// process exit. The producer-side queue itself lives on `AppState`
2842    /// and inside the process-wide governance-hook `OnceLock`s, so this
2843    /// metrics handle is the only drain-observability surface `serve`
2844    /// retains after the queue is moved into `AppState`.
2845    pub deferred_audit_metrics: crate::governance::deferred_audit::DeferredAuditMetrics,
2846}
2847
2848/// v0.7.0 Wave-3 — resolve a [`MemoryStore`] handle from the operator's
2849/// `--store-url` (when set) or fall back to a [`SqliteStore`] wrapping
2850/// the on-disk database `--db` already opened.
2851///
2852/// Returns the resolved [`StorageBackend`] tag plus the polymorphic
2853/// `Arc<dyn MemoryStore>` so the caller can wire both fields onto
2854/// `AppState` and have downstream handlers branch on the tag without
2855/// dynamic-dispatch probes.
2856///
2857/// URL precedence:
2858///
2859/// - `Some("postgres://...")` or `Some("postgresql://...")` →
2860///   [`PostgresStore::connect`]; resolves to
2861///   [`StorageBackend::Postgres`]. Requires `--features sal-postgres`
2862///   at build time; the URL is rejected at runtime under a sal-only
2863///   build with a clear error.
2864/// - `Some("sqlite:///path")` → [`SqliteStore::open`]; resolves to
2865///   [`StorageBackend::Sqlite`]. The on-disk path may or may not be
2866///   the same file `--db` already opened — both views see the same
2867///   rows when they coincide; the SQLite file-locking layer arbitrates
2868///   any cross-connection contention.
2869/// - `None` → [`SqliteStore::open`] against `db_path`; resolves to
2870///   [`StorageBackend::Sqlite`]. The default behaviour preserved
2871///   for every operator who has not opted in to `--store-url`.
2872///
2873/// Anything else exits non-zero with the same "unrecognised store URL"
2874/// diagnostic [`crate::migrate::open_store`] returns, keeping the
2875/// surface area consistent across `serve`, `migrate`, and
2876/// `schema-init`.
2877///
2878/// [`MemoryStore`]: crate::store::MemoryStore
2879/// [`SqliteStore`]: crate::store::sqlite::SqliteStore
2880/// [`PostgresStore::connect`]: crate::store::postgres::PostgresStore::connect
2881/// [`SqliteStore::open`]: crate::store::sqlite::SqliteStore::open
2882/// [`StorageBackend`]: crate::handlers::StorageBackend
2883/// [`StorageBackend::Postgres`]: crate::handlers::StorageBackend::Postgres
2884/// [`StorageBackend::Sqlite`]: crate::handlers::StorageBackend::Sqlite
2885#[cfg(feature = "sal")]
2886/// v0.7.x (issue #1169) — resolve the configured embedder dim for the
2887/// postgres-schema bootstrap (used by [`build_store_handle`]).
2888///
2889/// Resolution ladder (first arm wins):
2890///
2891/// 1. [`crate::config::AppConfig::resolve_embeddings`] returns
2892///    `ResolvedEmbeddings.embedding_dim` populated by the canonical
2893///    [`crate::config::canonical_embedding_dim`] lookup table when the
2894///    operator-picked model id is in [`crate::config::KNOWN_EMBEDDING_DIMS`].
2895/// 2. Legacy flat-field path: parse `app_config.embedding_model` as the
2896///    2-family [`crate::config::EmbeddingModel`] enum and pull its
2897///    compile-time `dim()` (`nomic_embed_v15` / `mini_lm_l6_v2`).
2898/// 3. Tier-preset fallback when neither resolver nor legacy parses
2899///    yields a dim — the historical pre-#1169 behaviour, retained as
2900///    the last-resort default.
2901///
2902/// Returns `None` only when no embedder is configured at all
2903/// (`tier_config.embedding_model.is_none()` AND no operator override) —
2904/// i.e. the keyword-only tier. The postgres bootstrap then falls back
2905/// to `DEFAULT_EMBEDDING_DIM` per `build_store_handle`'s
2906/// `configured_embedding_dim` doc comment.
2907#[cfg(feature = "sal")]
2908#[must_use]
2909#[allow(deprecated)]
2910fn resolve_configured_embedding_dim(
2911    app_config: &crate::config::AppConfig,
2912    tier_config: &crate::config::TierConfig,
2913) -> Option<u32> {
2914    let preset = tier_config.embedding_model;
2915    let resolved = app_config.resolve_embeddings();
2916    resolved
2917        .embedding_dim
2918        .or_else(|| {
2919            app_config
2920                .embedding_model
2921                .as_deref()
2922                .and_then(|raw| raw.parse::<crate::config::EmbeddingModel>().ok())
2923                .map(|m| u32::try_from(m.dim()).unwrap_or(384))
2924        })
2925        .or_else(|| preset.map(|m| u32::try_from(m.dim()).unwrap_or(384)))
2926}
2927
2928/// v0.7.0 #1548 — resolve the curator's SAL store handle from the same
2929/// URL-scheme dispatch the HTTP `serve` path uses. When `store_url` is
2930/// `Some`, the adapter is bound to the URL-resolved backend (SQLite *or*
2931/// Postgres); when `None`, it falls through to a SQLite store at the
2932/// `--db` path. The embedder dim + Postgres pool sizing are resolved
2933/// from `app_config` exactly as in `serve` so a postgres-backed curator
2934/// bootstraps an identically-shaped schema/pool to the HTTP daemon
2935/// pointed at the same federated store.
2936///
2937/// Returns only the `Arc<dyn MemoryStore>` — the curator passes do not
2938/// need the [`crate::handlers::StorageBackend`] tag the HTTP daemon
2939/// threads into its `AppState`.
2940#[cfg(feature = "sal")]
2941pub(crate) async fn build_curator_store(
2942    store_url: Option<&str>,
2943    db_path: &Path,
2944    app_config: &crate::config::AppConfig,
2945) -> Result<Arc<dyn crate::store::MemoryStore>> {
2946    let tier_config = app_config.effective_tier(None).config();
2947    let configured_embedding_dim = resolve_configured_embedding_dim(app_config, &tier_config);
2948    let (_backend, store) = build_store_handle(
2949        store_url,
2950        db_path,
2951        app_config.postgres_statement_timeout_secs,
2952        configured_embedding_dim,
2953        app_config.resolve_pg_pool(),
2954    )
2955    .await
2956    .context("build SAL store handle for curator")?;
2957    Ok(store)
2958}
2959
2960#[cfg(feature = "sal")]
2961async fn build_store_handle(
2962    store_url: Option<&str>,
2963    db_path: &Path,
2964    postgres_statement_timeout_secs: Option<u64>,
2965    // Issue #877: configured embedder dim. `None` keeps the legacy
2966    // `DEFAULT_EMBEDDING_DIM` (384, MiniLM) behaviour for callers that
2967    // explicitly do not load an embedder (keyword-only deployments).
2968    // When `Some(dim)` is passed, the postgres adapter takes the
2969    // auto-migrate path so a fresh-container schema bootstrapped at the
2970    // default 384 is converted in-place to match the configured
2971    // embedder's actual dimension (e.g. 768 for `nomic_embed_v15`).
2972    configured_embedding_dim: Option<u32>,
2973    // Resolved Postgres connection-pool sizing (`AI_MEMORY_PG_POOL_MAX` /
2974    // `_MIN` / `_ACQUIRE_TIMEOUT_SECS` > config.toml > compiled default),
2975    // produced by `AppConfig::resolve_pg_pool`. Threaded into the sqlx
2976    // `PgPoolOptions` build; inert on the sqlite path.
2977    pool: crate::store::PoolConfig,
2978) -> Result<(
2979    crate::handlers::StorageBackend,
2980    Arc<dyn crate::store::MemoryStore>,
2981)> {
2982    use crate::handlers::StorageBackend;
2983
2984    match store_url {
2985        Some(url) => {
2986            let lowered = url.to_ascii_lowercase();
2987            if crate::migrate::is_postgres_url(&lowered) {
2988                #[cfg(feature = "sal-postgres")]
2989                {
2990                    let timeout = postgres_statement_timeout_secs
2991                        .unwrap_or(crate::store::postgres::DEFAULT_STATEMENT_TIMEOUT_SECS);
2992                    // Issue #877: route through the auto-migrate entry
2993                    // point when the daemon resolved a configured
2994                    // embedder dim. Bootstrap goes via `connect_with_dim`
2995                    // so the *fresh* schema lands `vector(<dim>)` from
2996                    // the very first INIT; the auto-migrate then handles
2997                    // the pre-existing-schema-at-wrong-dim case.
2998                    // #1579 A3 (SECURITY) — log the password-redacted
2999                    // URL. Pre-fix this line shipped the full
3000                    // `--store-url` (credential included) to journald
3001                    // at INFO.
3002                    let display_url = crate::logging::redact_url_password(url);
3003                    let store = if let Some(dim) = configured_embedding_dim {
3004                        tracing::info!(
3005                            "Wave-3 (issue #877): opening Postgres SAL store at {display_url} \
3006                             (statement_timeout={timeout}s, embedding_dim={dim}, auto_migrate=on, \
3007                             pool_max={}, pool_min={}, acquire_timeout={}s)",
3008                            pool.max_connections,
3009                            pool.min_connections,
3010                            pool.acquire_timeout_secs
3011                        );
3012                        crate::store::postgres::PostgresStore::connect_with_dim_and_timeout_auto_migrate(
3013                            url, dim, timeout, pool,
3014                        )
3015                        .await
3016                        .context("connect postgres adapter (auto-migrate dim)")?
3017                    } else {
3018                        tracing::info!(
3019                            "Wave-3: opening Postgres SAL store at {display_url} \
3020                             (statement_timeout={timeout}s, no embedder configured, \
3021                             pool_max={}, pool_min={}, acquire_timeout={}s)",
3022                            pool.max_connections,
3023                            pool.min_connections,
3024                            pool.acquire_timeout_secs
3025                        );
3026                        crate::store::postgres::PostgresStore::connect_with_dim_and_timeout(
3027                            url,
3028                            crate::store::postgres::DEFAULT_EMBEDDING_DIM,
3029                            timeout,
3030                            pool,
3031                        )
3032                        .await
3033                        .context("connect postgres adapter")?
3034                    };
3035                    Ok((StorageBackend::Postgres, Arc::new(store)))
3036                }
3037                #[cfg(not(feature = "sal-postgres"))]
3038                {
3039                    let _ = url;
3040                    let _ = postgres_statement_timeout_secs;
3041                    let _ = configured_embedding_dim;
3042                    let _ = pool;
3043                    anyhow::bail!(
3044                        "--store-url postgres:// requires the binary to be built with \
3045                         --features sal-postgres; this binary was built with --features sal only"
3046                    );
3047                }
3048            } else if let Some(path) = url
3049                .strip_prefix("sqlite://")
3050                .or_else(|| url.strip_prefix("SQLITE://"))
3051            {
3052                let clean = path
3053                    .strip_prefix('/')
3054                    .map_or(path, |p| if p.starts_with('/') { p } else { path });
3055                tracing::info!("Wave-3: opening SQLite SAL store at {clean} (--store-url)");
3056                let store = crate::store::sqlite::SqliteStore::open(clean)
3057                    .map_err(|e| anyhow::anyhow!("open sqlite adapter: {e}"))?;
3058                Ok((StorageBackend::Sqlite, Arc::new(store)))
3059            } else {
3060                // #1579 A3 (SECURITY) — a mistyped scheme can still
3061                // carry credentials; redact before echoing.
3062                anyhow::bail!(
3063                    "unrecognised --store-url: {} (expected sqlite:///path or postgres://...)",
3064                    crate::logging::redact_url_password(url)
3065                )
3066            }
3067        }
3068        None => {
3069            let _ = postgres_statement_timeout_secs;
3070            let _ = configured_embedding_dim;
3071            let _ = pool;
3072            tracing::debug!("Wave-3: --store-url absent; opening SQLite SAL store at --db path");
3073            let store = crate::store::sqlite::SqliteStore::open(db_path)
3074                .map_err(|e| anyhow::anyhow!("open sqlite adapter: {e}"))?;
3075            Ok((StorageBackend::Sqlite, Arc::new(store)))
3076        }
3077    }
3078}
3079
3080/// v0.7.0 #1455 — `true` when the operator opted into the legacy
3081/// permissive governance posture via
3082/// `AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR` (`1` / `true`). Default
3083/// `false` keeps the fail-CLOSED secure default. Shared by the storage
3084/// pre-write hook and the wire-check hook so the two read the same
3085/// override identically.
3086/// Actor/queue label for wire-action governance consultations.
3087const WIRE_ACTION_ACTOR: &str = "daemon:wire_action";
3088
3089fn governance_fail_open_on_error() -> bool {
3090    std::env::var(ENV_GOVERNANCE_FAIL_OPEN)
3091        .map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
3092        .unwrap_or(false)
3093}
3094
3095/// #1455 legacy fail-open opt-out env var — one spelling shared by the
3096/// reader above and the operator-facing log hints below (#1558).
3097const ENV_GOVERNANCE_FAIL_OPEN: &str = "AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR";
3098
3099/// #1583 (SEC, MED) — install the substrate `GOVERNANCE_PRE_WRITE`
3100/// storage hook (the L1-6 agent-action `memory_write` gate). Extracted
3101/// from `bootstrap_serve` so every LONG-LIVED write surface installs
3102/// the SAME closure: the HTTP daemon (`serve`) AND the MCP stdio server
3103/// (`run_mcp_server`). Pre-#1583 only `serve` installed it, so
3104/// operator-configured agent-action rules were silently bypassed for
3105/// every MCP-driven write — the primary NHI agent interface.
3106///
3107/// CLI one-shot binaries (`ai-memory store …`) intentionally do NOT
3108/// call this (the L1-6 E operator-as-actor exemption — see
3109/// `src/storage/mod.rs` §hook doc + `cli_one_shot_does_not_install_hook`);
3110/// the operator's direct substrate ops stay unimpeded by design.
3111///
3112/// `hook_consultation_conn` MUST be a connection distinct from the
3113/// caller's main write connection (the hook fires synchronously from
3114/// inside `storage::insert`, which holds the main connection). When it
3115/// is `None` (open failed at install time) the hook fails CLOSED per
3116/// #1455.
3117pub(crate) fn install_governance_pre_write_hook(
3118    db_path: &Path,
3119    deferred_audit_queue: &crate::governance::deferred_audit::DeferredAuditQueue,
3120    rule_cache: &Arc<crate::governance::rule_cache::RuleCache>,
3121    hook_consultation_conn: Option<Arc<std::sync::Mutex<rusqlite::Connection>>>,
3122) {
3123    use crate::governance::agent_action::{
3124        AgentAction, Decision as RuleDecision, check_agent_action_deferred_cached,
3125    };
3126    let rules_db_path = db_path.to_path_buf();
3127    let queue_for_hook = deferred_audit_queue.clone();
3128    let cache_for_hook = Arc::clone(rule_cache);
3129    let conn_for_hook = hook_consultation_conn;
3130    let install_result = crate::storage::GOVERNANCE_PRE_WRITE.set(Box::new(
3131        move |mem: &crate::models::Memory| -> std::result::Result<(), String> {
3132            let action = AgentAction::Custom {
3133                custom_kind: "memory_write".to_string(),
3134                payload: serde_json::json!({
3135                    "namespace": mem.namespace,
3136                    "tier": mem.tier.as_str(),
3137                    (field_names::MEMORY_KIND): mem.memory_kind.as_str(),
3138                    "title": mem.title,
3139                }),
3140            };
3141            // Resolve the agent_id from the memory's metadata
3142            // (every substrate-written memory carries it under
3143            // `metadata.agent_id` — see CLAUDE.md §"Agent
3144            // Identity"). Fall back to a stable hook-source tag
3145            // when the metadata key is missing so the audit row
3146            // still attributes the refusal.
3147            let agent_id = mem
3148                .metadata
3149                .get("agent_id")
3150                .and_then(|v| v.as_str())
3151                .unwrap_or("substrate:pre_write_hook")
3152                .to_string();
3153            let Some(conn_arc) = conn_for_hook.as_ref() else {
3154                // v0.7.0 #1455 (SEC, MED) — FAIL-CLOSED when the hook
3155                // consultation connection could not be opened at
3156                // install time. The pre-#1455 posture degraded to
3157                // ALLOW, which meant a daemon that lost its rules DB
3158                // at boot (permissions flip, disk pressure, an
3159                // attacker who can make `db::open` fail) silently
3160                // disabled the entire substrate write-gate while
3161                // continuing to accept writes. That is the same
3162                // bypass class #1054 closed for consultation ERRORS;
3163                // an unavailable connection is just a permanent
3164                // consultation failure and gets the same secure
3165                // default + the same operator escape hatch.
3166                return governance_consultation_unavailable(
3167                    &queue_for_hook,
3168                    &agent_id,
3169                    &action,
3170                    &rules_db_path,
3171                    "L1-6 governance pre-write",
3172                );
3173            };
3174            let conn_guard = match conn_arc.lock() {
3175                Ok(g) => g,
3176                Err(poisoned) => {
3177                    tracing::warn!(
3178                        "L1-6 governance pre-write: consultation connection mutex poisoned; \
3179                             recovering inner connection and continuing"
3180                    );
3181                    poisoned.into_inner()
3182                }
3183            };
3184            let conn_for_check: &rusqlite::Connection = &conn_guard;
3185            match check_agent_action_deferred_cached(
3186                conn_for_check,
3187                Some(&cache_for_hook),
3188                &agent_id,
3189                &action,
3190                &queue_for_hook,
3191            ) {
3192                Ok(RuleDecision::Allow | RuleDecision::Warn { .. }) => Ok(()),
3193                Ok(RuleDecision::Refuse { rule_id, reason }) => {
3194                    tracing::info!(
3195                        "L1-6 governance pre-write refused namespace={:?} rule_id={} \
3196                             reason={} (chain-logged via deferred audit queue)",
3197                        mem.namespace,
3198                        rule_id,
3199                        reason
3200                    );
3201                    Err(reason)
3202                }
3203                Err(e) => {
3204                    // v0.7.0 #1054 (Agent-2 #4) — fail-CLOSED on
3205                    // rule-consultation error and chain-log the
3206                    // refusal so an attacker who can induce
3207                    // consultation errors (concurrent PRAGMA
3208                    // wal_checkpoint, ATTACH-as-readonly
3209                    // contention, etc.) cannot race a refused
3210                    // write through the gate. The pre-#1054
3211                    // posture degraded to ALLOW, which made the
3212                    // gate dependent on the rule consultation
3213                    // never erroring — a fragile invariant.
3214                    //
3215                    // Operators with a legitimate need for the
3216                    // legacy fail-open posture (e.g. during a
3217                    // chaos-test window where transient SQL
3218                    // pressure is expected) can opt back in via
3219                    // `AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR=1`.
3220                    // The unsafe override is logged at WARN on
3221                    // every fire and counts toward the
3222                    // governance posture surface so an audit can
3223                    // detect the legacy-permissive mode.
3224                    let reason = format!("governance:consultation_failed: {e}");
3225                    let fail_open = std::env::var("AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR")
3226                        .map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
3227                        .unwrap_or(false);
3228                    // Emit a governance.refusal-shaped row to the
3229                    // deferred audit queue regardless of the
3230                    // open/closed decision so the audit chain
3231                    // captures the consultation failure either
3232                    // way. The synthetic Decision::Refuse uses
3233                    // rule_id=`governance:consultation_failed` so
3234                    // a downstream auditor can distinguish
3235                    // "no rule fired" from "consultation broke".
3236                    let synthetic_refusal = RuleDecision::Refuse {
3237                        rule_id: "governance:consultation_failed".to_string(),
3238                        reason: reason.clone(),
3239                    };
3240                    queue_for_hook.submit_refusal(&agent_id, &action, &synthetic_refusal);
3241                    if fail_open {
3242                        tracing::warn!(
3243                            "L1-6 governance pre-write: rule consultation failed: {}; \
3244                                 AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR=1 — \
3245                                 degrading to ALLOW (UNSAFE, legacy posture)",
3246                            e
3247                        );
3248                        Ok(())
3249                    } else {
3250                        tracing::warn!(
3251                            "L1-6 governance pre-write: rule consultation failed: {}; \
3252                                 failing CLOSED (post-#1054 secure default — \
3253                                 set AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR=1 to revert)",
3254                            e
3255                        );
3256                        Err(reason)
3257                    }
3258                }
3259            }
3260        },
3261    ));
3262    if install_result.is_err() {
3263        // Already installed — happens if the same process boots a
3264        // write surface twice (test reuse via `bootstrap_serve`, or a
3265        // process that runs both `serve` and `mcp`). The OnceLock
3266        // contract guarantees the FIRST installed closure wins; we log
3267        // and proceed rather than abort.
3268        tracing::debug!(
3269            "L1-6 governance pre-write hook already installed (process-wide OnceLock); \
3270             the existing hook remains active for this process"
3271        );
3272    } else {
3273        tracing::info!(
3274            "L1-6 governance pre-write hook installed (substrate-authoritative \
3275             memory_write gate active + deferred chain-log on refusal)"
3276        );
3277    }
3278}
3279
3280/// v0.7.0 #1455 (SEC, MED) — shared fail-CLOSED handler for the case
3281/// where a governance hook's rule-consultation connection could not be
3282/// opened at install time. Chain-logs a synthetic
3283/// `governance:consultation_unavailable` refusal, then returns the
3284/// fail-CLOSED verdict (`Err`) unless the operator opted into the
3285/// legacy permissive posture. Reads the env override exactly once and
3286/// delegates the verdict to [`governance_consultation_unavailable_inner`]
3287/// so the decision is unit-testable without env mutation.
3288fn governance_consultation_unavailable(
3289    queue: &crate::governance::deferred_audit::DeferredAuditQueue,
3290    agent_id: &str,
3291    action: &crate::governance::agent_action::AgentAction,
3292    rules_db_path: &Path,
3293    surface: &str,
3294) -> std::result::Result<(), String> {
3295    governance_consultation_unavailable_inner(
3296        queue,
3297        agent_id,
3298        action,
3299        rules_db_path,
3300        surface,
3301        governance_fail_open_on_error(),
3302    )
3303}
3304
3305/// Pure inner of [`governance_consultation_unavailable`] — `fail_open`
3306/// is passed explicitly so tests can pin both the secure default
3307/// (`fail_open = false` ⇒ `Err`, the security contract) and the
3308/// operator-override path (`fail_open = true` ⇒ `Ok`) without touching
3309/// process env.
3310fn governance_consultation_unavailable_inner(
3311    queue: &crate::governance::deferred_audit::DeferredAuditQueue,
3312    agent_id: &str,
3313    action: &crate::governance::agent_action::AgentAction,
3314    rules_db_path: &Path,
3315    surface: &str,
3316    fail_open: bool,
3317) -> std::result::Result<(), String> {
3318    use crate::governance::agent_action::Decision as RuleDecision;
3319    let reason = format!(
3320        "governance:consultation_unavailable: rules DB at {} could not be opened at hook install",
3321        rules_db_path.display(),
3322    );
3323    // Chain-log the consultation failure regardless of the open/closed
3324    // decision so an audit can detect that the gate ran degraded.
3325    let synthetic_refusal = RuleDecision::Refuse {
3326        rule_id: "governance:consultation_unavailable".to_string(),
3327        reason: reason.clone(),
3328    };
3329    queue.submit_refusal(agent_id, action, &synthetic_refusal);
3330    if fail_open {
3331        tracing::warn!(
3332            "{surface}: hook consultation connection unavailable (rules DB at {}); \
3333             {ENV_GOVERNANCE_FAIL_OPEN}=1 — degrading to ALLOW (UNSAFE, legacy posture)",
3334            rules_db_path.display(),
3335        );
3336        Ok(())
3337    } else {
3338        tracing::warn!(
3339            "{surface}: hook consultation connection unavailable (rules DB at {}); failing CLOSED \
3340             (#1455 secure default — set {ENV_GOVERNANCE_FAIL_OPEN}=1 to revert)",
3341            rules_db_path.display(),
3342        );
3343        Err(reason)
3344    }
3345}
3346
3347/// #1458 (SEC, MED) — operator opt-in: when `AI_MEMORY_REQUIRE_API_KEY`
3348/// is truthy, the daemon hard-refuses to start without an `api_key` on
3349/// ANY bind host (including loopback). This is the hardened posture for
3350/// deployments that front the daemon with a reverse proxy /
3351/// `--network=host` container / `socat` forward — the loopback host
3352/// string the daemon sees does not reflect off-host reachability, so the
3353/// string-match loopback guard alone cannot protect them.
3354fn require_api_key_strict() -> bool {
3355    std::env::var("AI_MEMORY_REQUIRE_API_KEY")
3356        .map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
3357        .unwrap_or(false)
3358}
3359
3360/// #1458 (SEC, MED) — decide whether the daemon may bind given the
3361/// configured api_key, the bind `host`, and the `strict` opt-in.
3362///
3363/// Returns:
3364///   - `Ok(None)` — safe to bind silently (api_key is set);
3365///   - `Ok(Some(warning))` — bind permitted but emit `warning` (keyless
3366///     loopback, default single-tenant posture);
3367///   - `Err(reason)` — refuse to bind (keyless non-loopback, or keyless
3368///     under the `strict` opt-in).
3369///
3370/// Pulled out of `bootstrap_serve` so all three outcomes are unit
3371/// testable without standing up a daemon.
3372fn api_key_bind_guard(
3373    api_key_present: bool,
3374    host: &str,
3375    strict: bool,
3376) -> std::result::Result<Option<String>, String> {
3377    if api_key_present {
3378        return Ok(None);
3379    }
3380    if strict {
3381        return Err(format!(
3382            "refusing to start without an API key: AI_MEMORY_REQUIRE_API_KEY is set, which \
3383             mandates `api_key` on every bind (requested host {host:?}). A reverse proxy, \
3384             --network=host container, or socat forward can present loopback to the daemon \
3385             while exposing it off-host, so the loopback guard alone is insufficient. \
3386             Set top-level `api_key = \"...\"` in config (or --api-key on the CLI), or unset \
3387             AI_MEMORY_REQUIRE_API_KEY to fall back to the loopback-only default. (#1458)"
3388        ));
3389    }
3390    let is_loopback = host == "127.0.0.1"
3391        || host == "::1"
3392        || host == "localhost"
3393        || host == "0:0:0:0:0:0:0:1"
3394        || host == "[::1]";
3395    if !is_loopback {
3396        return Err(format!(
3397            "refusing to bind to non-loopback address {host:?} without an API key: \
3398             the daemon's api_key is unset (default-off auth would expose every \
3399             privileged endpoint to any caller that can reach the bind address). \
3400             Either set top-level `api_key = \"...\"` in config (or --api-key on the CLI) and rebind, \
3401             or rebind to 127.0.0.1 / ::1 / localhost for a single-tenant deployment. \
3402             (v0.7.0 fix campaign S5-C1, 2026-05-13. Note: api_key is a TOP-LEVEL \
3403             AppConfig field per src/config.rs:2283; [api] subsection is silently ignored by serde.)"
3404        ));
3405    }
3406    Ok(Some(format!(
3407        "API key NOT configured — daemon bound to loopback {host:?}. \
3408         Privileged endpoints (POST /memories, /links, /agents, /subscriptions) \
3409         accept any caller that reaches this listener. #1458: a reverse proxy, \
3410         --network=host container, or socat forward presents loopback to the daemon \
3411         while exposing it off-host, re-opening this keyless write surface — set \
3412         top-level `api_key = \"...\"` (or AI_MEMORY_REQUIRE_API_KEY=1 to hard-require it) \
3413         for any deployment that is not strictly single-tenant on this host. \
3414         /approve and /reject remain HMAC-gated regardless."
3415    )))
3416}
3417
3418/// Build all daemon state and spawn background tasks. Returns the
3419/// aggregated state without binding any sockets — testable in isolation.
3420///
3421/// DOC-6: this function reads several legacy `AppConfig` fields
3422/// (`auto_tag_model`, `llm_model`, `ollama_url`) directly for v0.7.x
3423/// backward compat; the `#[allow(deprecated)]` carves out the legacy
3424/// reads while keeping the deprecation warning live for external
3425/// consumers.
3426#[allow(deprecated)]
3427pub async fn bootstrap_serve(
3428    db_path: &Path,
3429    args: &ServeArgs,
3430    app_config: &AppConfig,
3431) -> Result<ServeBootstrap> {
3432    // S5-C1 (v0.7.0 fix campaign 2026-05-13): refuse default-off auth
3433    // on non-loopback binds. When `api_key` is unset, the `api_key_auth`
3434    // middleware is a pass-through — every privileged endpoint (write,
3435    // approve, reject, governance state) is reachable by any caller
3436    // that can open a TCP connection. The K10 SSE/approval path is
3437    // HMAC-gated and the legacy /approve + /reject paths are now also
3438    // HMAC-gated (see `handlers::approve_pending` and
3439    // `handlers::reject_pending`), but the broader write surface
3440    // (POST /api/v1/memories, /links, /agents, /subscriptions, …)
3441    // still rides on `api_key_auth`. Refusing to bind to a routable
3442    // address with no API key configured is the safe default;
3443    // operators who *intentionally* run a public daemon must set
3444    // `[api] api_key` (or `--api-key` on the CLI) explicitly.
3445    match api_key_bind_guard(
3446        app_config.api_key.is_some(),
3447        args.host.as_str(),
3448        require_api_key_strict(),
3449    ) {
3450        Ok(None) => {}
3451        Ok(Some(warning)) => tracing::warn!("{warning}"),
3452        Err(reason) => anyhow::bail!("{reason}"),
3453    }
3454
3455    let resolved_ttl = app_config.effective_ttl();
3456    let archive_on_gc = app_config.effective_archive_on_gc();
3457    let conn = db::open(db_path)?;
3458
3459    // v0.7.0 SEC-2 (Cluster D, issue #767) — fail-OPEN diagnostic + the
3460    // operator-opt-in fail-CLOSED knob. When `governance_rules` has any
3461    // `enabled = 1` row AND no operator pubkey is resolved, the L1-6
3462    // loader honours every enabled row without signature verification
3463    // (pre-L1-6 compat mode). A SQL-write gadget that mutates
3464    // `governance_rules` can therefore install / flip rules without
3465    // operator consent.
3466    //
3467    // Default: surface a once-per-process `tracing::error!` so the
3468    // operator sees the fail-OPEN posture on every daemon start.
3469    //
3470    // Operator opt-in: `[governance] require_operator_pubkey = true`
3471    // promotes the diagnostic to a hard refusal — `bootstrap_serve`
3472    // returns an `anyhow::Error` and the daemon does NOT start. This
3473    // is the right posture for hardened deployments that want strict
3474    // enforcement BEFORE the pubkey lands.
3475    let enabled_rule_count =
3476        crate::governance::rules_store::count_enabled_rules(&conn).unwrap_or(0);
3477    let pubkey_resolved = crate::governance::rules_store::resolve_operator_pubkey().is_some();
3478    if enabled_rule_count > 0 && !pubkey_resolved {
3479        crate::governance::rules_store::log_missing_operator_pubkey_once(enabled_rule_count);
3480        if app_config
3481            .governance
3482            .as_ref()
3483            .is_some_and(|g| g.require_operator_pubkey)
3484        {
3485            anyhow::bail!(
3486                "SEC-2 fail-closed: `[governance] require_operator_pubkey = true` is set but \
3487                 `governance_rules` contains {enabled_rule_count} enabled row(s) AND no \
3488                 operator pubkey is resolved (AI_MEMORY_OPERATOR_PUBKEY unset AND \
3489                 ~/.config/ai-memory/operator.key.pub absent). Refusing to start: a fail-OPEN \
3490                 L1-6 loader would honour every enabled rule without signature verification. \
3491                 Run `ai-memory rules keygen` + `ai-memory rules sign-seed` to activate L1-6, \
3492                 or unset `require_operator_pubkey` to accept the pre-L1-6 posture."
3493            );
3494        }
3495    }
3496
3497    // v0.7.0 L1-6 Deliverable E (issue #691) — install the substrate
3498    // governance pre-write hook BEFORE any write paths come live. The
3499    // hook consults the operator-signed `governance_rules` table for
3500    // a refusal verdict at every `storage::insert*` callsite; a
3501    // refusal short-circuits the SQL `INSERT` cleanly (no row
3502    // written, MemoryError::RefusedByGovernance bubbled).
3503    //
3504    // Layering: the hook is a `OnceLock<Box<Fn>>` in `src/storage/mod.rs`
3505    // — installation is one-shot for the process lifetime. CLI
3506    // one-shot binaries (`ai-memory store`, `ai-memory mine`, …)
3507    // never reach this codepath and so leave the hook empty by
3508    // design (operator standing directive: rules gate AGENT writes,
3509    // not the operator's direct CLI ops).
3510    //
3511    // The closure opens a fresh `Connection` per call (via
3512    // `db::open` against the same db_path) so it does NOT contend
3513    // with the substrate writer's lock held during `storage::insert`.
3514    // SQLite WAL mode allows the rule-read to proceed in parallel.
3515    // Failure to open the rule-consultation connection degrades to
3516    // ALLOW with a WARN: a transient FS issue must not wedge the
3517    // write surface, and the operator can detect the degradation
3518    // from the log surface.
3519    //
3520    // v0.7.0 Policy-Engine Item 3 (2026-05-14) — the hook now also
3521    // submits every refusal to the process-wide deferred-audit
3522    // queue via `check_agent_action_deferred`. The queue's
3523    // background drainer task chain-logs each refusal as a
3524    // `governance.refusal` row in `signed_events` AFTER the
3525    // in-flight `storage::insert` transaction has released its
3526    // lock. This closes the cryptographic-log gap that the prior
3527    // `_no_audit` variant left open (refusals were typed but not
3528    // chain-logged; the deadlock-avoidance came at the cost of
3529    // breaking the bypass-impossibility audit story for storage
3530    // writes).
3531    let (deferred_audit_queue, deferred_audit_supervisor) =
3532        crate::governance::deferred_audit::install_deferred_audit_drainer(db_path);
3533    // Capture the shared atomic metrics handle BEFORE the queue is cloned
3534    // into the governance hooks + moved onto `AppState`. `serve` polls
3535    // these on shutdown to drain the queue before the WAL checkpoint.
3536    let deferred_audit_metrics = deferred_audit_queue.metrics();
3537    tracing::info!(
3538        "policy-engine item 3: deferred-audit drainer spawned (chain-logs \
3539         storage refusals as `governance.refusal` rows in signed_events)"
3540    );
3541
3542    // v0.7.0 #991 — per-instance rule cache shared by the substrate
3543    // `GOVERNANCE_PRE_WRITE` storage hook (below), the
3544    // `wire_check::GOVERNANCE_PRE_ACTION` action hook (below), and the
3545    // `AppState.rule_cache` field (HTTP handler call sites). Cloning
3546    // the `Arc<RuleCache>` into each captures-by-reference; the cache
3547    // is dropped when the last reference (AppState + the two hooks)
3548    // goes away on daemon shutdown. Per-instance means multi-daemon
3549    // test fixtures don't cross-pollute (the contract that the #990
3550    // revert restored after #983 shipped a process-wide singleton).
3551    let rule_cache: Arc<crate::governance::rule_cache::RuleCache> =
3552        Arc::new(crate::governance::rule_cache::RuleCache::new());
3553
3554    // v0.7.0 #1017 (Agent-1 #3) — long-lived consultation connection
3555    // shared between the storage `GOVERNANCE_PRE_WRITE` hook and the
3556    // `wire_check::GOVERNANCE_PRE_ACTION` action hook. Pre-#1017 each
3557    // hook invocation called `db::open(&rules_db_path)` which runs
3558    // 4 PRAGMAs + SCHEMA execute_batch + migrate() + trigger probe —
3559    // ~1-2ms per write that paid the cost unconditionally even on
3560    // RuleCache hits. The #991 rule cache made the OPEN overhead the
3561    // dominant remaining hot-path cost; #1017 closes the gap by
3562    // opening the connection ONCE at install time and reusing it
3563    // across all hook invocations. The connection is wrapped in
3564    // `std::sync::Mutex` because hooks fire from both sync paths
3565    // (`storage::insert` is sync; wire-check is consulted from sync
3566    // `governance::wire_check::check` regardless of caller context).
3567    //
3568    // If `db::open` fails at install time, we install hooks that
3569    // degrade to ALLOW on every call with a WARN — same posture as
3570    // the pre-#1017 per-call open-failure leg. The operator sees the
3571    // diagnostic in daemon logs and can re-attempt.
3572    let hook_consultation_conn: Option<Arc<std::sync::Mutex<rusqlite::Connection>>> =
3573        match db::open(db_path) {
3574            Ok(c) => Some(Arc::new(std::sync::Mutex::new(c))),
3575            Err(e) => {
3576                tracing::warn!(
3577                    target: "ai_memory::daemon_runtime",
3578                    "v0.7.0 #1017: failed to open hook consultation connection at {}: {}; \
3579                     governance hooks will degrade to ALLOW on every invocation",
3580                    db_path.display(),
3581                    e,
3582                );
3583                None
3584            }
3585        };
3586
3587    // #1582/#1583 (SEC) — the substrate pre-write gate is installed via
3588    // the shared helper so EVERY long-lived write surface installs the
3589    // SAME closure. `serve` (here) and `mcp` (`run_mcp_server`) both call
3590    // it; CLI one-shot binaries intentionally do NOT (the L1-6 E
3591    // operator-as-actor exemption — see the helper's doc).
3592    install_governance_pre_write_hook(
3593        db_path,
3594        &deferred_audit_queue,
3595        &rule_cache,
3596        hook_consultation_conn.clone(),
3597    );
3598
3599    // v0.7.0 (issue #691 fold-1) — install the universal AgentAction
3600    // wire-point hook BEFORE any daemon-side write/network/spawn paths
3601    // come live. Mirrors the L1-6 E pattern above but covers the FOUR
3602    // agent-EXTERNAL action variants (Bash, FilesystemWrite,
3603    // NetworkRequest, ProcessSpawn) consulted by skill_export,
3604    // federation::sync, hooks::executor, and the LLM client. CLI
3605    // one-shot binaries never reach this path so the hook stays empty
3606    // for direct operator ops (L1-6 E operator-as-actor exemption).
3607    //
3608    // v0.7.0 #1034 (Agent-6 #2) — wire-check refusals now flow into the
3609    // SAME deferred-audit queue the substrate pre-write hook uses, so
3610    // every refusal — storage AND wire — chain-logs a `governance.refusal`
3611    // row in `signed_events`. Pre-#1034 the wire-check refusals only
3612    // emitted to the forensic JSONL log; the cryptographic-audit chain
3613    // missed them, breaking the bypass-impossibility audit story for the
3614    // four agent-EXTERNAL action variants. The closure uses the stable
3615    // `daemon:wire_action` tag for `agent_id` attribution because the
3616    // wire-check fires inside daemon-internal subsystems (federation,
3617    // hooks, LLM, skill_export) where there is no per-request agent
3618    // identity bound to the action; the storage hook's
3619    // `substrate:pre_write_hook` fallback uses the same shape.
3620    {
3621        use crate::governance::agent_action::{
3622            AgentAction, Decision as RuleDecision, check_agent_action_deferred_cached,
3623        };
3624        let rules_db_path = db_path.to_path_buf();
3625        let cache_for_wire_check = Arc::clone(&rule_cache);
3626        let queue_for_wire_check = deferred_audit_queue.clone();
3627        // v0.7.0 #1017 — share the same long-lived consultation
3628        // connection introduced above. Hook installs are serial so
3629        // there's no race on the Arc clone.
3630        let conn_for_wire_check = hook_consultation_conn.clone();
3631        let install_result = crate::governance::wire_check::GOVERNANCE_PRE_ACTION.set(Box::new(
3632            move |action: &AgentAction| -> std::result::Result<(), String> {
3633                let Some(conn_arc) = conn_for_wire_check.as_ref() else {
3634                    // v0.7.0 #1455 (SEC, MED) — FAIL-CLOSED when the
3635                    // consultation connection is unavailable, mirroring
3636                    // the storage hook above. A daemon-internal wire
3637                    // action (federation push, hooks spawn, LLM call,
3638                    // skill_export filesystem write) is HIGHER-stakes
3639                    // than a storage write, so degrading to ALLOW on a
3640                    // missing rules DB would be the worst place to fail
3641                    // open. Same secure default + escape hatch.
3642                    return governance_consultation_unavailable(
3643                        &queue_for_wire_check,
3644                        WIRE_ACTION_ACTOR,
3645                        action,
3646                        &rules_db_path,
3647                        "wire_check",
3648                    );
3649                };
3650                let conn_guard = match conn_arc.lock() {
3651                    Ok(g) => g,
3652                    Err(poisoned) => {
3653                        tracing::warn!(
3654                            "wire_check: consultation connection mutex poisoned; \
3655                             recovering inner connection and continuing"
3656                        );
3657                        poisoned.into_inner()
3658                    }
3659                };
3660                let conn_for_check: &rusqlite::Connection = &conn_guard;
3661                match check_agent_action_deferred_cached(
3662                    conn_for_check,
3663                    Some(&cache_for_wire_check),
3664                    WIRE_ACTION_ACTOR,
3665                    action,
3666                    &queue_for_wire_check,
3667                ) {
3668                    Ok(RuleDecision::Allow | RuleDecision::Warn { .. }) => Ok(()),
3669                    Ok(RuleDecision::Refuse { rule_id, reason }) => {
3670                        tracing::info!(
3671                            "wire_check refused action kind={} rule_id={} reason={} \
3672                             (chain-logged via deferred audit queue)",
3673                            action.kind(),
3674                            rule_id,
3675                            reason,
3676                        );
3677                        Err(reason)
3678                    }
3679                    Err(e) => {
3680                        // v0.7.0 #1054 (Agent-2 #4) — same fail-CLOSED
3681                        // posture as the storage hook above. Wire-check
3682                        // refusals for daemon-internal actions
3683                        // (federation push, hooks spawn, LLM call,
3684                        // skill_export) are higher-stakes than storage
3685                        // refusals — fail-open here would let a
3686                        // consultation race smuggle a refused
3687                        // network/filesystem/process action through
3688                        // the gate. Same env-var escape hatch:
3689                        // `AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR=1`.
3690                        let reason = format!("governance:consultation_failed: {e}");
3691                        let fail_open = std::env::var("AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR")
3692                            .map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
3693                            .unwrap_or(false);
3694                        let synthetic_refusal = RuleDecision::Refuse {
3695                            rule_id: "governance:consultation_failed".to_string(),
3696                            reason: reason.clone(),
3697                        };
3698                        queue_for_wire_check.submit_refusal(
3699                            WIRE_ACTION_ACTOR,
3700                            action,
3701                            &synthetic_refusal,
3702                        );
3703                        if fail_open {
3704                            tracing::warn!(
3705                                "wire_check: rule consultation failed: {}; \
3706                                 AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR=1 — \
3707                                 degrading to ALLOW for this action ({}) (UNSAFE, legacy posture)",
3708                                e,
3709                                action.kind(),
3710                            );
3711                            Ok(())
3712                        } else {
3713                            tracing::warn!(
3714                                "wire_check: rule consultation failed: {}; failing CLOSED \
3715                                 for this action ({}) (post-#1054 secure default — set \
3716                                 AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR=1 to revert)",
3717                                e,
3718                                action.kind(),
3719                            );
3720                            Err(reason)
3721                        }
3722                    }
3723                }
3724            },
3725        ));
3726        if install_result.is_err() {
3727            tracing::debug!(
3728                "wire_check pre-action hook already installed (process-wide OnceLock); \
3729                 the existing hook remains active for this daemon"
3730            );
3731        } else {
3732            tracing::info!(
3733                "wire_check pre-action hook installed (agent-action gate active for \
3734                 FilesystemWrite/NetworkRequest/ProcessSpawn/Bash/Custom)"
3735            );
3736        }
3737    }
3738
3739    // Issue #219: build the embedder + HNSW index up front so HTTP write
3740    // paths can populate them. Previously the daemon never constructed an
3741    // embedder, silently excluding every HTTP-authored memory from semantic
3742    // recall. Build only when the configured feature tier enables it —
3743    // keyword-only deployments keep their zero-dep, zero-RAM profile.
3744    // Daemon has no per-invocation tier override; honour the config tier.
3745    let feature_tier = app_config.effective_tier(None);
3746    let tier_config = feature_tier.config();
3747    let embedder = build_embedder(feature_tier, app_config).await;
3748    // #1579 B3 — async boot HNSW. The daemon binds with an EMPTY
3749    // index and becomes ready immediately; a background loader
3750    // (`spawn_vector_index_boot_load`) reads the stored embeddings
3751    // over its own connection, builds the graph on the #968 rebuild
3752    // thread, and swaps it in (INFO line on swap). Until then,
3753    // semantic recall serves its keyword/FTS blend and the #519
3754    // proactive conflict check uses its bounded-scan fallback. The
3755    // pre-#1579 synchronous build held boot for 40 s at 10k vectors
3756    // and >28 min at 100k (P1 audit).
3757    let vector_index_state: Arc<Mutex<Option<VectorIndex>>> = Arc::new(Mutex::new(
3758        embedder.is_some().then(hnsw::VectorIndex::empty),
3759    ));
3760    if embedder.is_some() {
3761        let _boot_index_loader =
3762            spawn_vector_index_boot_load(db_path.to_path_buf(), Arc::clone(&vector_index_state));
3763    }
3764
3765    // v0.7.0 L5 — build the LLM client for autonomy-hook capable tiers
3766    // (smart/autonomous). The HTTP `create_memory` handler reaches for
3767    // `app.llm` to call `auto_tag` (mirroring MCP `handle_store` at
3768    // `crate::mcp::handle_store` (auto-tag block)). When the configured tier has no
3769    // `llm_model` (keyword/semantic) or the Ollama endpoint is
3770    // unreachable, the client stays `None` and the hook silently
3771    // degrades to operator-supplied tags only.
3772    let llm = build_llm_client(feature_tier, app_config).await;
3773
3774    let db_state: Db = Arc::new(Mutex::new((
3775        conn,
3776        db_path.to_path_buf(),
3777        resolved_ttl,
3778        archive_on_gc,
3779    )));
3780
3781    // Federation: parsed from --quorum-writes / --quorum-peers. Disabled
3782    // entirely when either is absent — daemon behaves exactly like
3783    // v0.6.0 in that case.
3784    // #[cfg_attr] keeps the `mut` only when DLQ wire-up below is
3785    // active — under default-features the binding is read-only.
3786    #[cfg_attr(not(feature = "sal"), allow(unused_mut))]
3787    let mut federation = federation::FederationConfig::build(
3788        args.quorum_writes,
3789        &args.quorum_peers,
3790        std::time::Duration::from_millis(args.quorum_timeout_ms),
3791        args.quorum_client_cert.as_deref(),
3792        args.quorum_client_key.as_deref(),
3793        args.quorum_ca_cert.as_deref(),
3794        // v0.7.0 epic (ADR-001) — federation identity is resolved, not
3795        // hardcoded. Precedence: AI_MEMORY_FED_IDENTITY env >
3796        // `--federation-identity` operator config > the historical
3797        // `host:<hostname>` default. A blank flag is skipped by the
3798        // resolver, so it can never collapse the identity to empty.
3799        federation::identity::resolve_federation_identity(args.federation_identity.as_deref()),
3800        // v0.7.0 fold-A2A1.4 (#702) — thread the operator-configured
3801        // `[api] api_key` into federation outbound so peer POSTs carry
3802        // `x-api-key`. Without this, cross-host federation BREAKS when
3803        // any peer runs with api-key auth (peer returns 401 → quorum
3804        // never converges). `None` keeps the prior behaviour unchanged.
3805        app_config.api_key.clone(),
3806    )
3807    .context("federation config")?;
3808
3809    let mut task_handles: Vec<JoinHandle<()>> = Vec::new();
3810
3811    if let Some(ref fed) = federation {
3812        tracing::info!(
3813            "federation enabled: W={} over {} peer(s), timeout {}ms",
3814            fed.policy.w,
3815            fed.peer_count(),
3816            args.quorum_timeout_ms,
3817        );
3818        // v0.6.0.1 (#320) — post-partition catchup poller. Closes the gap
3819        // where a rejoining node only sees post-resume writes.
3820        //
3821        // v0.7.0 M3 — the catchup loop now plumbs the SAL store handle
3822        // through (instead of `db::insert_if_newer`) so postgres-backed
3823        // daemons route peer pushes to postgres. The actual spawn is
3824        // deferred until after `build_store_handle` resolves the
3825        // `Arc<dyn MemoryStore>` — see the post-store-build block below.
3826        if args.catchup_interval_secs > 0 {
3827            tracing::info!(
3828                "catchup loop enabled: polling {} peer(s) every {}s",
3829                fed.peer_count(),
3830                args.catchup_interval_secs,
3831            );
3832        } else {
3833            tracing::info!("catchup loop disabled (--catchup-interval-secs=0)");
3834        }
3835    }
3836
3837    // v0.7.0 A5 — resolve the effective MCP tool profile for the HTTP
3838    // path so `/capabilities` v3 reports honest loaded/total counts.
3839    // Mirrors the MCP-mode resolution at src/daemon_runtime.rs:501;
3840    // unresolvable profile (e.g., bad config.toml) falls back to
3841    // Profile::core() rather than blocking HTTP boot.
3842    let resolved_profile = app_config
3843        .effective_profile(None)
3844        .unwrap_or_else(|_| crate::profile::Profile::core());
3845    let mcp_config_for_http = app_config.mcp.clone();
3846    // v0.7 Track H — H2 + Round-3 F12: ensure-and-load the daemon's
3847    // outbound-link signing keypair. The helper auto-generates the
3848    // well-known `daemon` keypair under `~/.config/ai-memory/keys/` on
3849    // first start (idempotent — a restart never overwrites an existing
3850    // keypair) and returns it for the AppState. The lifecycle outcome
3851    // is captured separately so the startup banner can surface the
3852    // auto-gen path. Failure at any step degrades to unsigned-link
3853    // mode without aborting startup.
3854    let (active_keypair, daemon_keypair_outcome) = ensure_and_load_daemon_keypair();
3855
3856    // v0.7.0 B3-fix2 — gate the family-descriptor embedding precompute
3857    // behind `AI_MEMORY_PRECOMPUTE_FAMILY_EMBEDDINGS=1`, default OFF.
3858    //
3859    // ## Why default-OFF
3860    //
3861    // The B3 precompute is forward-infrastructure for B2's
3862    // `memory_smart_load(intent)`, which is not yet wired into any HTTP
3863    // or MCP handler — `best_family_match` is dead code in production
3864    // today (only one unit test calls it). Running 8 detached embeds at
3865    // boot therefore buys nothing for current callers but does compete
3866    // for the embedder's `std::sync::Mutex<BertModel>` against every
3867    // request that needs to embed (notify content, sync_push row
3868    // refresh, recall query, single-row create_memory).
3869    //
3870    // Under heavy parallel `cargo test` load (every integration test
3871    // spawns its own `ai-memory serve` subprocess, saturating CPU),
3872    // that contention pushes federation-quorum windows over the 5 s
3873    // ack budget — observed locally as `http_notify_fans_out_…` 503s
3874    // and `test_serve_mtls_…` POST timeouts that did not occur on
3875    // `origin/main` and disappear when the precompute is gated off.
3876    // Even the prior B3-fix's "detached spawn_blocking" form does not
3877    // help: the contention is on the embedder mutex inside `embed()`,
3878    // not on the tokio scheduler.
3879    //
3880    // ## Cell semantics preserved
3881    //
3882    // `AppState::family_embeddings` stays `Arc<RwLock<Option<…>>>` so
3883    // B2 can flip the env var on (or remove the gate entirely) the
3884    // day the smart loader actually consumes the cache, without an
3885    // `AppState` field-shape change. `None` continues to mean "not
3886    // yet populated" and `best_family_match` already short-circuits
3887    // to its non-embedding fallback in that state.
3888    let family_embeddings: Arc<
3889        tokio::sync::RwLock<Option<Vec<(crate::profile::Family, Vec<f32>)>>>,
3890    > = Arc::new(tokio::sync::RwLock::new(None));
3891    let embedder_arc = Arc::new(embedder);
3892    if std::env::var("AI_MEMORY_PRECOMPUTE_FAMILY_EMBEDDINGS")
3893        .ok()
3894        .as_deref()
3895        == Some("1")
3896    {
3897        let cache = family_embeddings.clone();
3898        let embedder_for_task = embedder_arc.clone();
3899        task_handles.push(tokio::spawn(async move {
3900            // ----------------------------------------------------------------
3901            // H1 (v0.7.0 round-2) — lock-discipline for the family-embedding
3902            // precompute:
3903            //
3904            //   1. The slow `Embedder::embed(descriptor)` calls run inside a
3905            //      `spawn_blocking` closure that holds NO lock on
3906            //      `family_embeddings`. Each (Family, Vec<f32>) pair is
3907            //      collected into a local `Vec` owned by the blocking task.
3908            //   2. Only AFTER the entire batch is computed do we take
3909            //      `family_embeddings.write().await` exactly ONCE to swap
3910            //      the populated `Some(Vec)` into the cache.
3911            //
3912            // Why: the prior shape that acquired the write lock before each
3913            // embed call would have parked every concurrent `try_read()`
3914            // reader for the duration of an ML inference round trip — up
3915            // to seconds on a cold runner. Concurrent recall handlers that
3916            // call `AppState::best_family_match` would be forced into the
3917            // no-cache fallback even when the embedder was fully operational.
3918            //
3919            // The two-phase shape below is the canonical "compute outside,
3920            // commit inside" lock pattern: readers see either `None`
3921            // (precompute not yet finished) or the fully-populated
3922            // `Some(Vec)` — never a half-built vector.
3923            // ----------------------------------------------------------------
3924            let computed = tokio::task::spawn_blocking(move || {
3925                // No lock held during embed calls — pairs are accumulated
3926                // into a local Vec returned to the async caller below.
3927                AppState::precompute_family_embeddings(
3928                    embedder_for_task
3929                        .as_ref()
3930                        .as_ref()
3931                        .map(|e| e as &dyn crate::embeddings::Embed),
3932                )
3933            })
3934            .await
3935            .unwrap_or_else(|e| {
3936                tracing::warn!(
3937                    error = %e,
3938                    "B3: family-descriptor precompute task panicked; \
3939                     family_embeddings will stay empty",
3940                );
3941                Vec::new()
3942            });
3943            if !computed.is_empty() {
3944                tracing::info!(
3945                    "B3: pre-computed {} family-descriptor embeddings (async)",
3946                    computed.len(),
3947                );
3948            }
3949            // Single-shot commit: write lock acquired ONCE here and
3950            // released immediately after the swap. No embedder calls run
3951            // under this lock.
3952            *cache.write().await = Some(computed);
3953        }));
3954    } else {
3955        tracing::debug!(
3956            "B3: family-descriptor precompute disabled \
3957             (AI_MEMORY_PRECOMPUTE_FAMILY_EMBEDDINGS != 1); \
3958             best_family_match will return None until B2 wires \
3959             the smart loader and the gate is flipped on"
3960        );
3961    }
3962
3963    // v0.7.0 Wave-3 — resolve the polymorphic `MemoryStore` handle from
3964    // the operator's `--store-url` (when set) or build a `SqliteStore`
3965    // wrapping the same on-disk database `--db` already opened. Both
3966    // branches end with a populated `Arc<dyn MemoryStore>` so handlers
3967    // can dispatch through the SAL unconditionally on `--features sal`
3968    // builds. The `storage_backend` flag below records which adapter
3969    // resolved so handlers can branch + the `/capabilities` payload can
3970    // surface it for operators.
3971    //
3972    // Standard builds (no `--features sal`) skip the trait wiring
3973    // entirely — the daemon stays a pure SQLite-on-disk deployment with
3974    // zero behavioural drift versus pre-Wave-3.
3975    // Issue #877: resolve the configured embedder dim from the same
3976    // resolution ladder `build_embedder` uses — app_config override wins,
3977    // then tier preset, then None. We re-derive it here (instead of
3978    // pulling from the materialised `embedder` handle) because the
3979    // embedder load itself can fail (network egress to HF Hub, OOM,
3980    // etc.) and we still need the *configured* dim to inform the
3981    // postgres bootstrap, otherwise a transient embedder load failure
3982    // would leave the schema mis-dimensioned silently. Falls back to
3983    // `None` only when no embedder model is configured at all
3984    // (keyword-only).
3985    //
3986    // v0.7.x (issue #1169): the resolution ladder now prefers the
3987    // resolver-side canonical dim lookup
3988    // ([`crate::config::canonical_embedding_dim`]) so an operator
3989    // pick of `[embeddings].model = "bge-large-en"` (or any other
3990    // model id outside the 2-family [`EmbeddingModel`] enum) bootstraps
3991    // the postgres schema at the live 1024-dim instead of silently
3992    // dropping to the tier-preset's 768-dim. The enum-parse arm
3993    // remains as the back-compat path for legacy flat-field configs
3994    // (`embedding_model = "nomic_embed_v15"`), and the tier preset is
3995    // the last-resort fallback. The pre-#1169 path lost the resolver
3996    // signal entirely — schema dim wrong on every non-enum operator
3997    // pick, with no log signal because the parse arm silently fell
3998    // through to the preset.
3999    #[cfg(feature = "sal")]
4000    let configured_embedding_dim: Option<u32> =
4001        resolve_configured_embedding_dim(app_config, &tier_config);
4002    #[cfg(feature = "sal")]
4003    let (storage_backend, store_handle) = build_store_handle(
4004        args.store_url.as_deref(),
4005        db_path,
4006        app_config.postgres_statement_timeout_secs,
4007        configured_embedding_dim,
4008        app_config.resolve_pg_pool(),
4009    )
4010    .await
4011    .context("build SAL store handle")?;
4012    #[cfg(not(feature = "sal"))]
4013    let storage_backend = crate::handlers::StorageBackend::Sqlite;
4014
4015    // v0.7.0 Track D #933 — federation push DLQ sink. Resolved here
4016    // (after `build_store_handle` returns the typed store) so the
4017    // `broadcast_store_quorum` fanout can land DLQ rows on per-peer
4018    // failure. Sqlite-backed daemons get the shared `Db` mutex sink;
4019    // postgres-backed daemons get the pool-backed sink. The chosen
4020    // sink is also handed to the `replay_federation_push_dlq` worker
4021    // spawned below so the same DLQ rows the broadcast wrote are the
4022    // ones the worker drains.
4023    //
4024    // Feature-gated to `--features sal` — the DLQ trait surface
4025    // requires `async-trait` which is a SAL-only dep. Default
4026    // (sqlite-only) builds preserve pre-#933 behaviour.
4027    #[cfg(feature = "sal")]
4028    if let Some(ref mut fed) = federation {
4029        let sink: std::sync::Arc<dyn federation::FederationDlqSink> = match storage_backend {
4030            #[cfg(feature = "sal-postgres")]
4031            crate::handlers::StorageBackend::Postgres => {
4032                // Recover the typed PostgresStore via the generic
4033                // `as_any` downcast hatch (renamed from
4034                // `as_any_for_postgres` per ARCH-15, FX-C4-batch2) so
4035                // the sink can issue raw SQL through
4036                // `PostgresStore::pool()`. Falls back to the sqlite
4037                // sink (which would error on every INSERT because the
4038                // postgres DB has no sqlite connection) when the
4039                // downcast fails — unreachable in practice because the
4040                // only backend returning `StorageBackend::Postgres` IS
4041                // PostgresStore.
4042                if let Some(pg) = store_handle
4043                    .as_any()
4044                    .downcast_ref::<crate::store::postgres::PostgresStore>()
4045                {
4046                    std::sync::Arc::new(federation::push_dlq::PostgresDlqSink::new(
4047                        std::sync::Arc::new(pg.clone()),
4048                    ))
4049                } else {
4050                    tracing::warn!(
4051                        "federation push DLQ: PostgresStore downcast failed; \
4052                             falling back to sqlite sink (DLQ writes WILL error \
4053                             on postgres-backed daemons until the cast is restored)"
4054                    );
4055                    std::sync::Arc::new(federation::push_dlq::SqliteDlqSink::new(db_state.clone()))
4056                }
4057            }
4058            _ => std::sync::Arc::new(federation::push_dlq::SqliteDlqSink::new(db_state.clone())),
4059        };
4060        fed.dlq_sink = Some(sink);
4061    }
4062
4063    // v0.7.0 M3 — spawn the federation catchup loop now that the SAL
4064    // store handle has resolved. The loop dispatches each peer-pulled
4065    // memory through `store.apply_remote_memory` (postgres-aware) on
4066    // `--features sal` builds; legacy builds fall back to the
4067    // `db::insert_if_newer` sqlite path.
4068    if let Some(ref fed) = federation
4069        && args.catchup_interval_secs > 0
4070    {
4071        let interval = std::time::Duration::from_secs(args.catchup_interval_secs);
4072        #[cfg(feature = "sal")]
4073        {
4074            federation::spawn_catchup_loop_with_store(
4075                fed.clone(),
4076                db_state.clone(),
4077                Some(store_handle.clone()),
4078                interval,
4079            );
4080        }
4081        #[cfg(not(feature = "sal"))]
4082        {
4083            federation::spawn_catchup_loop(fed.clone(), db_state.clone(), interval);
4084        }
4085
4086        // v0.7.0 Track D #933 — federation push DLQ replay worker.
4087        // Polls the DLQ at the same cadence as the catchup loop and
4088        // re-attempts `post_once` against each peer until the row
4089        // Acks. The worker maintains the
4090        // `ai_memory_federation_push_dlq_depth` Prometheus gauge.
4091        #[cfg(feature = "sal")]
4092        if let Some(sink) = fed.dlq_sink.clone() {
4093            let _replay_handle =
4094                federation::spawn_replay_federation_push_dlq(fed.clone(), sink, interval);
4095            tracing::info!(
4096                "federation push DLQ replay worker enabled: polling every {}s",
4097                args.catchup_interval_secs,
4098            );
4099        }
4100    }
4101
4102    // #1579 A4 — serve-boot embedding-backfill sweep over the SAL
4103    // store. The legacy backfill (`crate::mcp::run_embedding_backfill*`)
4104    // is rusqlite-`Connection`-bound and runs ONLY at MCP stdio boot,
4105    // so postgres-backed daemons (which exist exclusively behind
4106    // `serve --store-url postgres://…`) never re-embedded the rows the
4107    // v29 embedding-dim migration NULLed — fleet semantic recall was
4108    // dead (P3 audit: 37/7,994 rows embedded, 0 backfill journal
4109    // lines). This sweep drains `MemoryStore::list_unembedded` in
4110    // bounded `[embeddings].backfill_batch` chunks through the daemon
4111    // embedder. SQLite-backed serve daemons are a structural no-op
4112    // (the sqlite adapter inherits the empty `list_unembedded`
4113    // default — its side-table embeddings are backfilled by the MCP
4114    // boot path), so this changes nothing for them. Detached task:
4115    // boot readiness never blocks on the sweep.
4116    #[cfg(feature = "sal")]
4117    if embedder_arc.is_some() {
4118        let backfill_store = store_handle.clone();
4119        let backfill_embedder = embedder_arc.clone();
4120        let backfill_batch = usize::try_from(app_config.resolve_embeddings().backfill_batch)
4121            .unwrap_or(crate::mcp::DEFAULT_EMBED_BACKFILL_BATCH_SIZE);
4122        task_handles.push(tokio::spawn(async move {
4123            let Some(emb) = backfill_embedder.as_ref() else {
4124                return;
4125            };
4126            // Operator-level maintenance path: must see (and re-embed)
4127            // every row regardless of metadata.scope — same posture as
4128            // the federation catchup loop. Sentinel principal, not a
4129            // literal, per the #1558 identity-sentinel SSOT.
4130            let ctx = crate::store::CallerContext::for_admin(
4131                crate::identity::sentinels::EMBEDDING_BACKFILL,
4132            );
4133            let written = crate::store::run_embedding_backfill_on_store(
4134                backfill_store.as_ref(),
4135                &ctx,
4136                emb,
4137                backfill_batch,
4138            )
4139            .await;
4140            if written > 0 {
4141                tracing::info!(
4142                    "embedding backfill (serve boot, #1579 A4): {written} row(s) embedded"
4143                );
4144            }
4145        }));
4146    }
4147
4148    // FED-P3b — outbound credential renewal worker. When this node holds a
4149    // CA-issued credential file (`AI_MEMORY_FED_CRED_PATH`), keep it fresh:
4150    // an external issuer rewrites the short-lived credential on renewal and
4151    // this worker swaps it into the live send path without a daemon
4152    // restart. Independent of the catchup interval; a no-op (not spawned)
4153    // when no credential path is configured.
4154    if federation.is_some()
4155        && std::env::var(federation::identity::credential::FED_CREDENTIAL_PATH_ENV).is_ok()
4156    {
4157        let renewal_interval = Duration::from_secs(
4158            federation::identity::renewal::DEFAULT_RENEWAL_INTERVAL_SECS.unsigned_abs(),
4159        );
4160        let _renewal_handle = federation::identity::renewal::spawn_refresh_outbound_credential(
4161            db_state.clone(),
4162            renewal_interval,
4163        );
4164        tracing::info!(
4165            "federation outbound credential renewal worker enabled: refreshing every {}s",
4166            renewal_interval.as_secs(),
4167        );
4168    }
4169
4170    if matches!(storage_backend, crate::handlers::StorageBackend::Postgres) {
4171        tracing::warn!(
4172            "v0.7.0 Wave-3: postgres-backed daemon — handlers that have not \
4173             yet migrated to the SAL trait surface 501 Not Implemented. See \
4174             docs/postgres-age-guide.md for the supported endpoint inventory."
4175        );
4176    }
4177
4178    let app_state = AppState {
4179        db: db_state.clone(),
4180        embedder: embedder_arc,
4181        vector_index: vector_index_state,
4182        federation: Arc::new(federation),
4183        tier_config: Arc::new(tier_config),
4184        scoring: Arc::new(app_config.effective_scoring()),
4185        profile: Arc::new(resolved_profile),
4186        mcp_config: Arc::new(mcp_config_for_http),
4187        active_keypair: Arc::new(active_keypair),
4188        family_embeddings,
4189        storage_backend,
4190        #[cfg(feature = "sal")]
4191        store: store_handle,
4192        llm: Arc::new(llm),
4193        // v0.7.0 L15 — dedicated auto_tag model from config.toml.
4194        auto_tag_model: Arc::new(app_config.auto_tag_model.clone()),
4195        // v0.7.0 H8 (round-2) — per-LLM-call timeout (default 30s).
4196        llm_call_timeout: Duration::from_secs(app_config.effective_llm_call_timeout_secs()),
4197        // v0.7.0 H5 (round-2) — fresh per-process replay cache + the
4198        // resolved `[verify] require_nonce` toggle. Default `false`
4199        // preserves verify-anytime semantics for unmigrated clients;
4200        // operators opt into strict mode via `config.toml`.
4201        replay_cache: Arc::new(crate::identity::replay::ReplayCache::new()),
4202        verify_require_nonce: app_config.verify.as_ref().is_some_and(|v| v.require_nonce),
4203        // #1255 (MED, 2026-05-25) — persistence-enabled federation
4204        // nonce cache. Rehydrates from disk on boot so a daemon
4205        // restart does NOT re-open the replay window for any
4206        // captured `(body, sig, nonce)` tuple. Falls back to the
4207        // in-memory-only constructor with a WARN log if persistence
4208        // open fails (e.g. disk pressure, locked file) — the daemon
4209        // continues to boot at the pre-#1255 posture rather than
4210        // crash-looping on a transient sqlite issue.
4211        federation_nonce_cache: Arc::new(
4212            match crate::identity::replay::FederationNonceCache::new_with_db_persistence(db_path) {
4213                Ok(c) => c,
4214                Err(e) => {
4215                    tracing::warn!(
4216                        target: "ai_memory::identity::replay",
4217                        db_path = %db_path.display(),
4218                        err = %e,
4219                        "#1255: FederationNonceCache persistence open failed; falling back to \
4220                         in-memory cache. Daemon restarts will reopen the replay window until \
4221                         operators resolve the underlying sqlite issue."
4222                    );
4223                    crate::identity::replay::FederationNonceCache::new()
4224                }
4225            },
4226        ),
4227        // v0.7.0 (issue #519) — resolved autonomous_hooks flag for the
4228        // HTTP create_memory path's proactive conflict-detection
4229        // helper. Falls back to false when unset (preserves v0.6.x
4230        // post-hoc-only contradiction surface).
4231        autonomous_hooks: app_config.effective_autonomous_hooks(),
4232        // v0.7.0 (issue #518) — resolved recall_scope defaults from
4233        // `[agents.defaults.recall_scope]`. None preserves v0.6.x
4234        // recall semantics (no splice on session_default=true).
4235        recall_scope: Arc::new(app_config.effective_recall_scope().cloned()),
4236        // v0.7.0 Policy-Engine Item 3 — deferred-audit producer handle.
4237        // Always Some on bootstrap_serve (the drainer was spawned
4238        // above before the storage hook installed). Wrapped in
4239        // Arc<Option<...>> per the AppState clone-cheap idiom.
4240        deferred_audit_queue: Arc::new(Some(deferred_audit_queue)),
4241        // v0.7.0 SHIP cluster (#946 / #957 / #960 / #961, 2026-05-20)
4242        // — operator-configured `[admin] agent_ids = [...]` allowlist.
4243        // `validated_agent_ids()` drops malformed entries with a
4244        // `warn` log so a single typo cannot lock the operator out;
4245        // an absent `[admin]` block resolves to an empty Vec which
4246        // closes every admin-class endpoint by default.
4247        //
4248        // #976 (2026-05-20): `AI_MEMORY_ADMIN_AGENT_IDS` env var
4249        // overrides the config-file allowlist. Comma-separated list of
4250        // agent_ids; `*` is the wildcard (everyone is admin —
4251        // appropriate for test daemons + container deploys where the
4252        // allowlist comes from orchestration secrets, not config.toml).
4253        // Same `validate_agent_id` filter applies; malformed entries
4254        // warn + drop. Precedence: env var > `[admin]` config block.
4255        admin_agent_ids: Arc::new(resolve_admin_agent_ids(app_config.admin.as_ref())),
4256        // v0.7.0 #991 — share the per-instance rule cache constructed
4257        // above (and already wired into both hook closures) with the
4258        // HTTP handler entry points. One cache per daemon lifetime.
4259        rule_cache: Arc::clone(&rule_cache),
4260        // v0.7.x (issue #1168) — operator-resolved LLM / embeddings /
4261        // reranker triple. Threaded into the HTTP `/api/v1/capabilities`
4262        // handler so the wire-reported `models.*` block mirrors the
4263        // running daemon's actual model wiring (matching the boot
4264        // banner + the live LLM client), NOT the compiled tier preset.
4265        // The resolver folds CLI / env / `[llm]` / legacy / compiled-
4266        // default precedence and the resulting triple is process-stable.
4267        resolved_models: Arc::new(app_config.resolve_models()),
4268        runtime: crate::runtime_context::RuntimeContext::global_arc(),
4269        // Operator-resolved `[limits].max_page_size` (env
4270        // `AI_MEMORY_MAX_PAGE_SIZE`) — per-request page / bulk
4271        // materialization bound for list / search / bulk-create /
4272        // federation-sync handlers. Falls back to the compiled
4273        // `MAX_BULK_SIZE` default when unset.
4274        max_page_size: app_config.resolve_limits().max_page_size,
4275    };
4276
4277    // v0.7.0 Policy-Engine Item 3 — register the deferred-audit
4278    // supervisor task with the task_handles vec so `serve()` aborts
4279    // it on shutdown. The supervisor wraps the drainer with panic
4280    // recovery + graceful drain of buffered events when the queue is
4281    // closed. This MUST be in `task_handles` so the test assertion in
4282    // `test_bootstrap_serve_keyword_tier_no_embedder` updates its
4283    // expected count accordingly.
4284    task_handles.push(deferred_audit_supervisor);
4285
4286    // Automatic GC. Cluster G (#767) — pass through the operator-
4287    // tunable `[confidence] shadow_retention_days` so the periodic
4288    // sweep on `confidence_shadow_observations` runs at the configured
4289    // window (default 30 days).
4290    let shadow_retention_days = app_config.confidence.as_ref().map_or(
4291        crate::confidence::shadow::DEFAULT_SHADOW_RETENTION_DAYS,
4292        crate::config::ConfidenceConfig::effective_shadow_retention_days,
4293    );
4294    task_handles.push(spawn_gc_loop_with_shadow_retention(
4295        db_state.clone(),
4296        app_config.archive_max_days,
4297        shadow_retention_days,
4298        Duration::from_secs(GC_INTERVAL_SECS),
4299    ));
4300
4301    // v0.6.0 GA: periodic WAL checkpoint. Under continuous writes the WAL
4302    // file grows until SQLite's auto-checkpoint fires (every 1000 pages by
4303    // default) — which is inconsistent timing and can leave the file at
4304    // hundreds of MB between auto-checkpoints. A dedicated task running on
4305    // a fixed cadence keeps the WAL bounded and makes operational storage
4306    // behaviour predictable. We stagger from GC to avoid lock-contention
4307    // bursts. See docs/ARCHITECTURAL_LIMITS.md for why this workaround is
4308    // necessary in a single-connection daemon.
4309    task_handles.push(spawn_wal_checkpoint_loop(
4310        db_state.clone(),
4311        Duration::from_secs(WAL_CHECKPOINT_INTERVAL_SECS),
4312    ));
4313
4314    // v0.7.0 K2: pending_actions timeout sweeper. Closes the v0.6.3.1
4315    // honest-Capabilities-v2 disclosure that `default_timeout_seconds`
4316    // was advertised in v1 but unused. 60-second cadence; per-row
4317    // override via the `default_timeout_seconds` column. The global
4318    // default below is the fall-through when the per-row column is
4319    // NULL — matches the `doctor_oldest_pending_age_secs` 24h CRIT
4320    // window so a row that would already be flagged red also expires.
4321    task_handles.push(spawn_pending_timeout_sweep_loop(
4322        db_state.clone(),
4323        db_path.to_path_buf(),
4324        PENDING_TIMEOUT_DEFAULT_SECS,
4325        Duration::from_secs(PENDING_TIMEOUT_SWEEP_INTERVAL_SECS),
4326    ));
4327
4328    // v0.7.0 I3: transcript archive→prune lifecycle sweeper. Resolves
4329    // per-namespace TTL + grace from `[transcripts]` in config.toml
4330    // (compiled defaults: 30-day TTL, 7-day grace) and runs every 10
4331    // minutes — heavier than K2's 60s scan because phase 1 walks the
4332    // I2 join table per candidate. Companion to the K2 sweeper above:
4333    // both follow the same spawn-per-interval shape so shutdown +
4334    // observability behave identically.
4335    task_handles.push(spawn_transcript_lifecycle_sweep_loop(
4336        db_state.clone(),
4337        app_config.effective_transcripts(),
4338        Duration::from_secs(TRANSCRIPT_LIFECYCLE_SWEEP_INTERVAL_SECS),
4339    ));
4340
4341    // v0.7.0 K8: agent-quota daily-counter reset sweeper. Resets
4342    // `current_memories_today` + `current_links_today` for every row
4343    // whose `day_started_at` predates the current UTC date. 60-second
4344    // cadence — same shape as the K2 pending sweeper above. The
4345    // inline-roll branch in `crate::quotas::check_quota` /
4346    // `crate::quotas::record_op` is the per-write fallback so the
4347    // substrate stays honest even if this sweep is delayed.
4348    task_handles.push(spawn_agent_quota_reset_loop(
4349        db_state.clone(),
4350        Duration::from_secs(AGENT_QUOTA_RESET_INTERVAL_SECS),
4351    ));
4352
4353    // v0.7.0 fold-A2A1.4 (#702) — mtls_enforced is true when the
4354    // operator configured the full TLS+mTLS stack (cert+key+allowlist).
4355    // The api_key_auth middleware uses this to bypass the `x-api-key`
4356    // requirement on `/api/v1/sync/*` paths, because rustls has already
4357    // verified the client cert against the operator-pinned allowlist
4358    // — adding a shared-secret check on top is redundant and breaks
4359    // cross-host federation when the peer doesn't carry the secret.
4360    let mtls_enforced =
4361        args.tls_cert.is_some() && args.tls_key.is_some() && args.mtls_allowlist.is_some();
4362    let api_key_state = ApiKeyState {
4363        key: app_config.api_key.clone(),
4364        mtls_enforced,
4365    };
4366    if api_key_state.key.is_some() {
4367        if mtls_enforced {
4368            tracing::info!(
4369                "API key authentication enabled — federation endpoints (/api/v1/sync/*) \
4370                 bypass api-key check because mTLS allowlist is configured"
4371            );
4372        } else {
4373            tracing::info!("API key authentication enabled");
4374        }
4375    }
4376
4377    // #1570 (H6) — record whether request authentication is configured
4378    // so the shared admin-role gate can refuse to mint admin from a
4379    // bare self-asserted `X-Agent-Id` header on unauthenticated
4380    // deployments. Boot-time WARN when the operator configured admin
4381    // ids but the gate will refuse them all (no api_key, trust flag
4382    // off) — names the escape hatch so the remediation is one search
4383    // away. Mirrors the #1455 fail-closed convention.
4384    crate::handlers::admin_role::mark_request_authn_configured(api_key_state.key.is_some());
4385    if !app_state.admin_agent_ids.is_empty()
4386        && api_key_state.key.is_none()
4387        && !crate::handlers::admin_role::admin_header_trust_enabled()
4388    {
4389        tracing::warn!(
4390            "[admin].agent_ids is configured but no api_key is set: the X-Agent-Id header is \
4391             self-asserted, so admin-role requests will be REFUSED (403) until you either \
4392             configure an api_key or explicitly opt into the legacy header-trust posture with \
4393             {}=1 (#1570 secure default)",
4394            crate::handlers::admin_role::ENV_ADMIN_HEADER_TRUST,
4395        );
4396    }
4397
4398    Ok(ServeBootstrap {
4399        app_state,
4400        api_key_state,
4401        db_state,
4402        archive_max_days: app_config.archive_max_days,
4403        task_handles,
4404        daemon_keypair_outcome,
4405        // H7 (v0.7.0 round-2) — per-request HTTP timeout (default 60s).
4406        request_timeout: Duration::from_secs(app_config.effective_request_timeout_secs()),
4407        deferred_audit_metrics,
4408    })
4409}
4410
4411/// Init the tracing subscriber for the HTTP daemon. Idempotent at the
4412/// `tracing-subscriber` level — repeated calls log a warning and no-op
4413/// rather than panic. Split out from `serve()` so test code can opt out.
4414fn init_tracing() {
4415    let _ = tracing_subscriber::fmt()
4416        .with_env_filter(
4417            EnvFilter::from_default_env()
4418                .add_directive(crate::logging::DEFAULT_LOG_DIRECTIVE.parse().unwrap())
4419                .add_directive("tower_http=info".parse().unwrap()),
4420        )
4421        .try_init();
4422}
4423
4424/// Run the HTTP memory daemon. Loads TLS state, builds `AppState`, spawns
4425/// the GC + WAL-checkpoint loops, and binds a listener (TLS or plain HTTP).
4426///
4427/// Behaviour is preserved from the pre-W6 inline `main::serve` body — only
4428/// the structure has changed.
4429#[allow(clippy::too_many_lines)]
4430pub async fn serve(db_path: PathBuf, args: ServeArgs, app_config: &AppConfig) -> Result<()> {
4431    init_tracing();
4432
4433    let bootstrap = bootstrap_serve(&db_path, &args, app_config).await?;
4434
4435    // Round-2 F8 + Round-3 F12 — startup banner. Surfaces the effective
4436    // permissions mode (and the v0.7.0 enforce-default migration warning
4437    // when the operator has no `[permissions]` block in config) plus the
4438    // F12 keypair-autogen result captured by `ensure_and_load_daemon_keypair`
4439    // earlier in this fn.
4440    let banner_inputs = crate::cli::serve_banner::BannerInputs {
4441        // B4 (S5-M3) — `.and_then` (not `.map`) so a partial
4442        // `[permissions]` block without `mode = ` collapses to `None`
4443        // and the banner's migration WARN fires, matching
4444        // `AppConfig::effective_permissions_mode` semantics.
4445        configured_permissions_mode: app_config.permissions.as_ref().and_then(|p| p.mode),
4446        auto_generated_keypair_path: bootstrap.daemon_keypair_outcome.as_ref().and_then(
4447            |o| match o {
4448                crate::identity::keypair::EnsureOutcome::Generated { pub_path } => {
4449                    Some(pub_path.display().to_string())
4450                }
4451                _ => None,
4452            },
4453        ),
4454        identity_disabled: matches!(
4455            bootstrap.daemon_keypair_outcome,
4456            Some(crate::identity::keypair::EnsureOutcome::SkippedDisabled)
4457        ),
4458    };
4459    for line in crate::cli::serve_banner::compose_banner(&banner_inputs) {
4460        if line.is_warn() {
4461            tracing::warn!("{}", line.message());
4462        } else {
4463            tracing::info!("{}", line.message());
4464        }
4465    }
4466
4467    let addr = format!("{}:{}", args.host, args.port);
4468    tracing::info!("database: {}", db_path.display());
4469
4470    // Graceful shutdown. The signal future only waits for ctrl_c and
4471    // then resolves, which tells axum to begin graceful shutdown of
4472    // in-flight requests. The deferred-audit drain + WAL checkpoint run
4473    // AFTER the server has fully quiesced (below `serve`), so:
4474    //   1. no refusal submitted by an in-flight request is lost, and
4475    //   2. the final checkpoint captures every write — including the
4476    //      drainer's `signed_events` appends, which share the same WAL
4477    //      file even though the drainer holds its own connection.
4478    // v0.7.0 Policy-Engine Item 3 (audit-log-loss-on-shutdown fix): the
4479    // checkpoint used to live inside this future, firing at signal time
4480    // before in-flight requests (and the audit drainer) had quiesced —
4481    // so refusal rows submitted during graceful shutdown could be lost.
4482    let checkpoint_state = bootstrap.db_state.clone();
4483    let drain_metrics = bootstrap.deferred_audit_metrics.clone();
4484    let shutdown = async move {
4485        let _ = tokio::signal::ctrl_c().await;
4486        tracing::info!("shutting down — draining deferred-audit queue then checkpointing WAL");
4487    };
4488
4489    // Native TLS (Layer 1): if both --tls-cert and --tls-key are provided,
4490    // bind via axum-server + rustls. Plain HTTP otherwise — backward
4491    // compatible with every prior release. The `requires = …` clap
4492    // attributes prevent the half-configured case.
4493    if let (Some(cert), Some(key)) = (&args.tls_cert, &args.tls_key) {
4494        // rustls 0.23 needs an explicit CryptoProvider; install ring
4495        // before any TLS setup. Idempotent — second install is a
4496        // harmless no-op via ignore.
4497        let _ = rustls::crypto::ring::default_provider().install_default();
4498        // Load TLS / mTLS config BEFORE printing the "listening" log
4499        // so a misconfigured cert / key / allowlist surfaces the error
4500        // first (red-team #248).
4501        let tls_config = if let Some(allowlist_path) = &args.mtls_allowlist {
4502            tracing::info!(
4503                "mTLS enabled — client certs required. Allowlist: {}",
4504                allowlist_path.display()
4505            );
4506            tls::load_mtls_rustls_config(cert, key, allowlist_path).await?
4507        } else {
4508            tracing::warn!(
4509                "TLS enabled but mTLS NOT configured — sync endpoints \
4510                 (/api/v1/sync/push, /api/v1/sync/since) accept any client. \
4511                 Set --mtls-allowlist for production peer-mesh deployments \
4512                 (red-team #231)."
4513            );
4514            tls::load_rustls_config(cert, key).await?
4515        };
4516        let app = crate::build_router_with_timeout(
4517            bootstrap.api_key_state,
4518            bootstrap.app_state,
4519            bootstrap.request_timeout,
4520        );
4521        tracing::info!("ai-memory listening on https://{addr}");
4522        let socket_addr: std::net::SocketAddr = addr.parse()?;
4523        // axum-server doesn't have a direct graceful-shutdown on the
4524        // TLS builder yet; spawn the signal listener on the Handle
4525        // instead so ctrl_c triggers a graceful shutdown. Window is
4526        // operator-configurable via --shutdown-grace-secs (default 30,
4527        // bumped from 10 in v0.6.0 — red-team #233).
4528        let grace = std::time::Duration::from_secs(args.shutdown_grace_secs);
4529        let handle = axum_server::Handle::new();
4530        let handle_clone = handle.clone();
4531        tokio::spawn(async move {
4532            shutdown.await;
4533            handle_clone.graceful_shutdown(Some(grace));
4534        });
4535        // v0.7.0 #1581 — bind with the NoDelayAcceptor-wrapped rustls
4536        // acceptor instead of `bind_rustls` (whose DefaultAcceptor never
4537        // sets TCP_NODELAY). Without it, Nagle + the client's delayed-ACK
4538        // timer added a fixed ~40 ms to the FIRST request of every fresh
4539        // (m)TLS connection — the #1579 P3 fleet finding. Verifier chain
4540        // and accept/reject semantics are unchanged; see
4541        // `tls::serve_rustls_acceptor` + tests/mtls_nodelay_acceptor.rs.
4542        axum_server::bind(socket_addr)
4543            .acceptor(tls::serve_rustls_acceptor(&tls_config))
4544            .handle(handle)
4545            .serve(app.into_make_service())
4546            .await?;
4547    } else {
4548        tracing::warn!(
4549            "TLS NOT enabled — sync endpoints (/api/v1/sync/push, \
4550             /api/v1/sync/since) accept any caller over plain HTTP. \
4551             Set --tls-cert + --tls-key + --mtls-allowlist for production \
4552             peer-mesh deployments (red-team #231)."
4553        );
4554        tracing::info!("ai-memory listening on http://{addr}");
4555        // Wave 3 (v0.6.3): the non-TLS path delegates to
4556        // `daemon_runtime::serve_http_with_shutdown_future`, which is the
4557        // same `build_router` + `TcpListener::bind` + `axum::serve` body
4558        // the integration tests drive in-process. Production threads its
4559        // WAL-checkpoint-on-shutdown future in directly so the cleanup
4560        // semantic is preserved verbatim.
4561        serve_http_with_shutdown_future_and_timeout(
4562            &addr,
4563            bootstrap.api_key_state,
4564            bootstrap.app_state,
4565            bootstrap.request_timeout,
4566            shutdown,
4567        )
4568        .await?;
4569    }
4570
4571    // v0.7.0 Policy-Engine Item 3 — the HTTP server has now fully
4572    // quiesced (graceful shutdown complete; no in-flight request can
4573    // submit another refusal), so `submitted` is final. Drain the
4574    // deferred-audit queue before exit so every refusal captured during
4575    // the daemon's life lands in `signed_events`. We can NOT use
4576    // `close_and_flush` here: the governance hooks
4577    // (`storage::GOVERNANCE_PRE_WRITE`, `wire_check::GOVERNANCE_PRE_ACTION`)
4578    // hold sender clones inside process-wide `OnceLock`s that never drop,
4579    // so the channel never closes and awaiting the supervisor would block
4580    // forever. `drain_pending` instead polls the shared atomic metrics
4581    // until the drainer has caught up to the submitted count.
4582    let drained = crate::governance::deferred_audit::drain_pending(
4583        &drain_metrics,
4584        crate::governance::deferred_audit::DEFAULT_SHUTDOWN_DRAIN_TIMEOUT,
4585    )
4586    .await;
4587    if drained {
4588        tracing::info!(
4589            "deferred-audit queue drained ({} refusals accounted) — checkpointing WAL",
4590            drain_metrics.submitted_count()
4591        );
4592    } else {
4593        tracing::warn!(
4594            "deferred-audit drain timed out after {:?}: {} submitted but only {} accounted — \
4595             some refusal audit rows may not have flushed before exit",
4596            crate::governance::deferred_audit::DEFAULT_SHUTDOWN_DRAIN_TIMEOUT,
4597            drain_metrics.submitted_count(),
4598            drain_metrics.appended_count()
4599                + drain_metrics.append_failure_count()
4600                + drain_metrics.send_failure_count(),
4601        );
4602    }
4603
4604    // Final WAL checkpoint now that every writer (HTTP handlers + the
4605    // deferred-audit drainer) has quiesced. The drainer's appends share
4606    // this database's WAL file, so this single checkpoint folds them in
4607    // even though the drainer holds its own connection.
4608    {
4609        let lock = checkpoint_state.lock().await;
4610        let _ = db::checkpoint(&lock.0);
4611    }
4612
4613    Ok(())
4614}
4615
4616// ---------------------------------------------------------------------------
4617// cmd_bench / cmd_migrate (no-op for non-sal builds)
4618// ---------------------------------------------------------------------------
4619
4620fn cmd_bench(args: &BenchArgs) -> Result<()> {
4621    let iterations = args.iterations.clamp(1, crate::bench::MAX_ITERATIONS);
4622    let warmup = args.warmup.min(crate::bench::MAX_WARMUP);
4623    let regression_threshold = args
4624        .regression_threshold
4625        .clamp(0.0, crate::bench::MAX_REGRESSION_THRESHOLD_PCT);
4626    // Bench always seeds a disposable in-memory DB so the operator's
4627    // main DB (and disk) are untouched. SQLite's `:memory:` URL and
4628    // WAL-less mode keep the workload bounded by RAM and CPU.
4629    let conn = db::open(Path::new(":memory:"))?;
4630    // #1579 B8 — corpus scale (None = legacy default workload).
4631    let scale = args.scale.map(|s| s.clamp(1, crate::bench::MAX_SCALE));
4632    let config = bench::BenchConfig {
4633        iterations,
4634        warmup,
4635        namespace: bench::BENCH_NAMESPACE.to_string(),
4636        scale,
4637    };
4638    let results = bench::run(&conn, &config)?;
4639
4640    let regressions = if let Some(path) = &args.baseline {
4641        let baseline = bench::load_baseline(Path::new(path))?;
4642        Some(bench::compare_against_baseline(
4643            &results,
4644            &baseline,
4645            regression_threshold,
4646        ))
4647    } else {
4648        None
4649    };
4650
4651    if args.json {
4652        println!(
4653            "{}",
4654            serde_json::to_string_pretty(&serde_json::json!({
4655                "iterations": iterations,
4656                "warmup": warmup,
4657                "scale": scale,
4658                "results": results,
4659                "regressions": regressions,
4660            }))?
4661        );
4662    } else {
4663        print!("{}", bench::render_table(&results));
4664        if let Some(rows) = &regressions {
4665            println!();
4666            print!("{}", bench::render_regression_table(rows));
4667        }
4668    }
4669
4670    if let Some(history_path) = &args.history {
4671        let captured_at = chrono::Utc::now().to_rfc3339();
4672        bench::append_history(
4673            history_path,
4674            &captured_at,
4675            iterations,
4676            warmup,
4677            scale,
4678            &results,
4679        )?;
4680        let mut stderr = std::io::stderr().lock();
4681        let _ = writeln!(
4682            stderr,
4683            "bench: appended run to history file {}",
4684            history_path.display()
4685        );
4686    }
4687
4688    let budget_failed = results
4689        .iter()
4690        .any(|r| matches!(r.status, bench::Status::Fail));
4691    let regression_failed = regressions
4692        .as_ref()
4693        .is_some_and(|rows| rows.iter().any(|r| r.regressed));
4694
4695    if budget_failed && regression_failed {
4696        anyhow::bail!(
4697            "bench: at least one operation exceeded its p95 budget by >10% AND regressed >{regression_threshold:.1}% vs baseline"
4698        );
4699    }
4700    if budget_failed {
4701        anyhow::bail!("bench: at least one operation exceeded its p95 budget by >10%");
4702    }
4703    if regression_failed {
4704        anyhow::bail!(
4705            "bench: at least one operation regressed >{regression_threshold:.1}% vs baseline"
4706        );
4707    }
4708    Ok(())
4709}
4710
4711#[cfg(feature = "sal")]
4712async fn cmd_migrate(args: &MigrateArgs) -> Result<()> {
4713    let src = migrate::open_store(&args.from)
4714        .await
4715        .context("open source store")?;
4716    let dst = migrate::open_store(&args.to)
4717        .await
4718        .context("open destination store")?;
4719    let report = migrate::migrate(
4720        src.as_ref(),
4721        dst.as_ref(),
4722        args.batch,
4723        args.namespace.clone(),
4724        args.dry_run,
4725    )
4726    .await;
4727    // #1579 A3 (SECURITY) — the migrate report echoes both store URLs;
4728    // mask the userinfo password so credentials never land in stdout /
4729    // captured CI logs.
4730    let from_display = crate::logging::redact_url_password(&args.from);
4731    let to_display = crate::logging::redact_url_password(&args.to);
4732    if args.json {
4733        let value = serde_json::json!({
4734            "from_url": from_display,
4735            "to_url": to_display,
4736            "memories_read": report.memories_read,
4737            "memories_written": report.memories_written,
4738            "batches": report.batches,
4739            "errors": report.errors,
4740            "dry_run": report.dry_run,
4741        });
4742        println!("{}", serde_json::to_string_pretty(&value)?);
4743    } else {
4744        println!("migration report");
4745        println!("  from:              {from_display}");
4746        println!("  to:                {to_display}");
4747        println!("  memories_read:     {}", report.memories_read);
4748        println!("  memories_written:  {}", report.memories_written);
4749        println!("  batches:           {}", report.batches);
4750        println!("  dry_run:           {}", report.dry_run);
4751        println!("  errors:            {}", report.errors.len());
4752        for e in &report.errors {
4753            println!("    - {e}");
4754        }
4755    }
4756    if !report.errors.is_empty() {
4757        anyhow::bail!("migration completed with {} error(s)", report.errors.len());
4758    }
4759    Ok(())
4760}
4761
4762// ---------------------------------------------------------------------------
4763// Pre-W6 helpers — in-process HTTP harness, sync-daemon body, curator-daemon body.
4764// ---------------------------------------------------------------------------
4765
4766/// Run the HTTP daemon (plain HTTP, no TLS) with a programmable shutdown.
4767///
4768/// Mirrors the `else` branch of `serve()` in pre-W6 `main.rs` (the non-TLS
4769/// path). Builds the production `Router` via `build_router`, binds a
4770/// `TcpListener` to `addr`, and runs `axum::serve` with a graceful-shutdown
4771/// future that resolves when `shutdown.notify_one()` is called.
4772///
4773/// Tests pass a known port (pick one via `free_port()` and pass
4774/// `127.0.0.1:<port>`). The function returns when shutdown completes;
4775/// callers can `tokio::spawn` it and `notify` to stop.
4776pub async fn serve_http_with_shutdown(
4777    addr: &str,
4778    api_key_state: ApiKeyState,
4779    app_state: AppState,
4780    shutdown: Arc<Notify>,
4781) -> Result<()> {
4782    serve_http_with_shutdown_future(addr, api_key_state, app_state, async move {
4783        shutdown.notified().await;
4784    })
4785    .await
4786}
4787
4788/// Variant of [`serve_http_with_shutdown`] that takes an arbitrary
4789/// shutdown future. The production `serve()` needs to run a WAL
4790/// checkpoint after the OS signal but before tearing down the listener;
4791/// that cleanup work is awkward to express through a `Notify` alone.
4792/// Accepting a `Future` lets the caller embed any async cleanup into the
4793/// shutdown future itself, while the helper keeps the `build_router` +
4794/// `TcpListener::bind` + `axum::serve` body it already owns.
4795pub async fn serve_http_with_shutdown_future<F>(
4796    addr: &str,
4797    api_key_state: ApiKeyState,
4798    app_state: AppState,
4799    shutdown: F,
4800) -> Result<()>
4801where
4802    F: std::future::Future<Output = ()> + Send + 'static,
4803{
4804    serve_http_with_shutdown_future_and_timeout(
4805        addr,
4806        api_key_state,
4807        app_state,
4808        Duration::from_secs(crate::config::DEFAULT_REQUEST_TIMEOUT_SECS),
4809        shutdown,
4810    )
4811    .await
4812}
4813
4814/// v0.7.0 H7 (round-2) — variant of [`serve_http_with_shutdown_future`]
4815/// that accepts an explicit per-request timeout. Used by tests to
4816/// drive the slow-POST edge directly.
4817pub async fn serve_http_with_shutdown_future_and_timeout<F>(
4818    addr: &str,
4819    api_key_state: ApiKeyState,
4820    app_state: AppState,
4821    request_timeout: Duration,
4822    shutdown: F,
4823) -> Result<()>
4824where
4825    F: std::future::Future<Output = ()> + Send + 'static,
4826{
4827    let app = crate::build_router_with_timeout(api_key_state, app_state, request_timeout);
4828    let listener = tokio::net::TcpListener::bind(addr)
4829        .await
4830        .with_context(|| format!("bind {addr}"))?;
4831    axum::serve(listener, app)
4832        .with_graceful_shutdown(shutdown)
4833        .await
4834        .context("axum::serve")?;
4835    Ok(())
4836}
4837
4838/// Run a single sync cycle against one peer — pull then push.
4839///
4840/// Lifted verbatim (modulo path-of-Path-vs-PathBuf) from the pre-W6
4841/// `main.rs::sync_cycle_once` so the integration sync-daemon test can
4842/// drive it without subprocess. The signature matches the private
4843/// main.rs helper 1:1 to keep call sites identical.
4844pub async fn sync_cycle_once(
4845    client: &reqwest::Client,
4846    db_path: &Path,
4847    local_agent_id: &str,
4848    peer_url: &str,
4849    api_key: Option<&str>,
4850    batch_size: usize,
4851) -> Result<()> {
4852    let peer_url = peer_url.trim_end_matches('/');
4853
4854    // --- PULL --------------------------------------------------------
4855    let since = {
4856        let conn = db::open(db_path)?;
4857        db::sync_state_load(&conn, local_agent_id)?
4858            .entries
4859            .get(peer_url)
4860            .cloned()
4861    };
4862
4863    let mut pull_url = format!(
4864        "{peer_url}/api/v1/sync/since?limit={batch_size}&peer={}",
4865        urlencoding_minimal(local_agent_id)
4866    );
4867    if let Some(ref s) = since {
4868        pull_url.push_str("&since=");
4869        pull_url.push_str(&urlencoding_minimal(s));
4870    }
4871
4872    // v0.7.0 #238/#239 — attach `x-peer-id` so the peer's
4873    // attestation + scope-allowlist substrate sees our self-claim.
4874    let mut req = client
4875        .get(&pull_url)
4876        .header(crate::HEADER_AGENT_ID, local_agent_id)
4877        .header(
4878            crate::federation::peer_attestation::PEER_ID_HEADER,
4879            local_agent_id,
4880        );
4881    if let Some(key) = api_key {
4882        req = req.header(crate::HEADER_API_KEY, key);
4883    }
4884    let resp = req.send().await?;
4885    if !resp.status().is_success() {
4886        anyhow::bail!("sync-daemon: pull status {}", resp.status());
4887    }
4888    let pulled: SyncSinceResponse = resp.json().await?;
4889    let pull_count = pulled.memories.len();
4890    let latest_pulled = pulled.memories.last().map(|m| m.updated_at.clone());
4891
4892    {
4893        let conn = db::open(db_path)?;
4894        for mem in &pulled.memories {
4895            if crate::validate::RequestValidator::validate_memory(mem).is_ok() {
4896                let _ = db::insert_if_newer(&conn, mem);
4897            }
4898        }
4899        if let Some(ref at) = latest_pulled {
4900            db::sync_state_observe(&conn, local_agent_id, peer_url, at)?;
4901        }
4902    }
4903
4904    // --- PUSH --------------------------------------------------------
4905    let last_pushed = {
4906        let conn = db::open(db_path)?;
4907        db::sync_state_last_pushed(&conn, local_agent_id, peer_url)
4908    };
4909    let outgoing = {
4910        let conn = db::open(db_path)?;
4911        db::memories_updated_since(&conn, last_pushed.as_deref(), batch_size)?
4912    };
4913    let push_count = outgoing.len();
4914    let latest_pushed = outgoing.last().map(|m| m.updated_at.clone());
4915
4916    if !outgoing.is_empty() {
4917        let body = serde_json::json!({
4918            (field_names::SENDER_AGENT_ID): local_agent_id,
4919            "sender_clock": { "entries": {} },
4920            "memories": outgoing,
4921            "dry_run": false,
4922        });
4923        // v0.7.0 #238 — attach `x-peer-id` so the receiver attests
4924        // body.sender_agent_id against our wire-level peer identity.
4925        let mut req = client
4926            .post(format!("{peer_url}/api/v1/sync/push"))
4927            .header(crate::HEADER_AGENT_ID, local_agent_id)
4928            .header(
4929                crate::federation::peer_attestation::PEER_ID_HEADER,
4930                local_agent_id,
4931            )
4932            .header(crate::HEADER_CONTENT_TYPE, crate::MIME_JSON)
4933            .json(&body);
4934        if let Some(key) = api_key {
4935            req = req.header(crate::HEADER_API_KEY, key);
4936        }
4937        let resp = req.send().await?;
4938        if !resp.status().is_success() {
4939            anyhow::bail!("sync-daemon: push status {}", resp.status());
4940        }
4941        if let Some(at) = latest_pushed {
4942            let conn = db::open(db_path)?;
4943            db::sync_state_record_push(&conn, local_agent_id, peer_url, &at)?;
4944        }
4945    }
4946
4947    tracing::info!("sync-daemon: peer={peer_url} pulled={pull_count} pushed={push_count}");
4948    Ok(())
4949}
4950
4951/// Run the sync-daemon main loop with a programmable shutdown.
4952///
4953/// Mirrors the body of the pre-W6 `cmd_sync_daemon()` in `main.rs`: for
4954/// each cycle, fan out a `JoinSet` across `peers`, then race a sleep
4955/// against the shutdown notify. Returns when the notify fires. The
4956/// integration test can build a one-cycle test by setting `interval_secs=1`
4957/// and notifying after a short tokio sleep.
4958pub async fn run_sync_daemon_with_shutdown(
4959    db_path: PathBuf,
4960    local_agent_id: String,
4961    peers: Vec<String>,
4962    api_key: Option<String>,
4963    interval_secs: u64,
4964    batch_size: usize,
4965    shutdown: Arc<Notify>,
4966) -> Result<()> {
4967    let client = reqwest::Client::builder()
4968        .timeout(Duration::from_secs(30))
4969        .build()?;
4970    run_sync_daemon_with_shutdown_using_client(
4971        client,
4972        db_path,
4973        local_agent_id,
4974        peers,
4975        api_key,
4976        interval_secs,
4977        batch_size,
4978        shutdown,
4979    )
4980    .await
4981}
4982
4983/// Variant of [`run_sync_daemon_with_shutdown`] that takes a caller-built
4984/// `reqwest::Client`. The production `cmd_sync_daemon()` constructs an
4985/// mTLS-aware client (via `build_rustls_client_config`) and threads it
4986/// in here so the helper drives the same loop body the test version
4987/// drives — keeping `daemon_runtime` as the single source of truth for
4988/// the sync-daemon loop while preserving the production TLS contract.
4989pub async fn run_sync_daemon_with_shutdown_using_client(
4990    client: reqwest::Client,
4991    db_path: PathBuf,
4992    local_agent_id: String,
4993    peers: Vec<String>,
4994    api_key: Option<String>,
4995    interval_secs: u64,
4996    batch_size: usize,
4997    shutdown: Arc<Notify>,
4998) -> Result<()> {
4999    let interval = interval_secs.max(1);
5000    let batch_size = batch_size.max(1);
5001
5002    let db_path_owned: Arc<Path> = Arc::from(db_path.as_path());
5003    let local_agent_id_arc: Arc<str> = Arc::from(local_agent_id.as_str());
5004    let api_key_arc: Option<Arc<str>> = api_key.as_deref().map(Arc::from);
5005    let peers_arc: Vec<Arc<str>> = peers.iter().map(|s| Arc::from(s.as_str())).collect();
5006    loop {
5007        let mut set: tokio::task::JoinSet<()> = tokio::task::JoinSet::new();
5008        for peer_url in &peers_arc {
5009            let client = client.clone();
5010            let db_path = db_path_owned.clone();
5011            let local_agent_id = local_agent_id_arc.clone();
5012            let peer_url = peer_url.clone();
5013            let api_key = api_key_arc.clone();
5014            set.spawn(async move {
5015                if let Err(e) = sync_cycle_once(
5016                    &client,
5017                    &db_path,
5018                    &local_agent_id,
5019                    &peer_url,
5020                    api_key.as_deref(),
5021                    batch_size,
5022                )
5023                .await
5024                {
5025                    tracing::warn!("sync-daemon: peer {peer_url} cycle failed: {e}");
5026                }
5027            });
5028        }
5029        while set.join_next().await.is_some() {}
5030
5031        tokio::select! {
5032            () = tokio::time::sleep(Duration::from_secs(interval)) => {}
5033            () = shutdown.notified() => {
5034                tracing::info!("sync-daemon: shutdown signal received");
5035                return Ok(());
5036            }
5037        }
5038    }
5039}
5040
5041/// Run the curator daemon with a programmable shutdown.
5042///
5043/// Mirrors the daemon arm of the pre-W6 `cmd_curator()`. The inner work is
5044/// `curator::run_daemon` (a blocking, tight-loop-with-`AtomicBool` already
5045/// in lib code), which we drive from a `spawn_blocking`. Tests fire the
5046/// `Notify` to set the shutdown bool and the blocking task observes it
5047/// within ~500ms (`run_daemon`'s sleep tick).
5048pub async fn run_curator_daemon_with_shutdown(
5049    db_path: PathBuf,
5050    cfg: crate::curator::CuratorConfig,
5051    shutdown: Arc<Notify>,
5052) -> Result<()> {
5053    let shutdown_flag = Arc::new(AtomicBool::new(false));
5054    let shutdown_flag_for_signal = shutdown_flag.clone();
5055    tokio::spawn(async move {
5056        shutdown.notified().await;
5057        shutdown_flag_for_signal.store(true, Ordering::Relaxed);
5058    });
5059
5060    let llm_arc: Option<Arc<crate::llm::OllamaClient>> = None;
5061    // Issue #816 — load the daemon signing keypair so the curator's
5062    // auto-persona sweep can produce signed persona rows. `None`
5063    // (no key on disk + auto-gen disabled) leaves the sweep no-op,
5064    // matching the pre-#816 behaviour.
5065    let (kp_opt, _outcome) = ensure_and_load_daemon_keypair();
5066    let active_keypair = kp_opt.map(Arc::new);
5067    let db_owned = db_path;
5068    tokio::task::spawn_blocking(move || {
5069        crate::curator::run_daemon(db_owned, llm_arc, cfg, shutdown_flag, active_keypair);
5070    })
5071    .await
5072    .map_err(|e| anyhow::anyhow!("curator daemon join: {e}"))?;
5073    Ok(())
5074}
5075
5076/// Curator-daemon loop body, primitive-arg flavour for the binary.
5077///
5078/// The caller supplies the already-resolved LLM client (built via
5079/// `build_curator_llm` so the `--daemon` path shares the identical
5080/// #1146-resolver result with the `--once` path — see #1440). `None`
5081/// disables the LLM, leaving keyword-only curation.
5082#[allow(clippy::too_many_arguments)]
5083pub async fn run_curator_daemon_with_primitives(
5084    db_path: PathBuf,
5085    interval_secs: u64,
5086    max_ops_per_cycle: usize,
5087    dry_run: bool,
5088    include_namespaces: Vec<String>,
5089    exclude_namespaces: Vec<String>,
5090    llm: Option<Arc<crate::llm::OllamaClient>>,
5091    shutdown: Arc<Notify>,
5092) -> Result<()> {
5093    let cfg = crate::curator::CuratorConfig {
5094        interval_secs,
5095        max_ops_per_cycle,
5096        dry_run,
5097        include_namespaces,
5098        exclude_namespaces,
5099        compaction: crate::curator::CompactionConfig::default(),
5100    };
5101
5102    let shutdown_flag = Arc::new(AtomicBool::new(false));
5103    let shutdown_flag_for_signal = shutdown_flag.clone();
5104    tokio::spawn(async move {
5105        shutdown.notified().await;
5106        shutdown_flag_for_signal.store(true, Ordering::Relaxed);
5107    });
5108
5109    // Issue #816 — load the daemon signing keypair for the auto-persona
5110    // sweep. Mirrors the load in `run_curator_daemon_with_shutdown`;
5111    // both daemon entry-points need the same keypair resolution so the
5112    // CLI (`ai-memory curator --daemon`) and the test-driven shutdown
5113    // flow both honour the same on-disk state.
5114    let (kp_opt, _outcome) = ensure_and_load_daemon_keypair();
5115    let active_keypair = kp_opt.map(Arc::new);
5116
5117    tokio::task::spawn_blocking(move || {
5118        crate::curator::run_daemon(db_path, llm, cfg, shutdown_flag, active_keypair);
5119    })
5120    .await
5121    .map_err(|e| anyhow::anyhow!("curator daemon join: {e}"))?;
5122    Ok(())
5123}
5124
5125// -----------------------------------------------------------------------
5126// helpers
5127// -----------------------------------------------------------------------
5128
5129/// Minimal URL-component encoder — only the characters the sync-daemon
5130/// queries actually emit (RFC3339 timestamps with `:` and `+`, and
5131/// agent ids with `:`/`@`/`/`). Mirror of the pre-W6
5132/// `main.rs::urlencoding_minimal`.
5133fn urlencoding_minimal(s: &str) -> String {
5134    use std::fmt::Write as _;
5135    let mut out = String::with_capacity(s.len());
5136    for b in s.bytes() {
5137        match b {
5138            b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => {
5139                out.push(b as char);
5140            }
5141            _ => {
5142                let _ = write!(out, "%{b:02X}");
5143            }
5144        }
5145    }
5146    out
5147}
5148
5149/// Mirrors the pre-W6 `main.rs::SyncSinceResponse` — the fields we
5150/// deserialize from the peer's `/api/v1/sync/since` body. `count` and
5151/// `limit` are present in the wire payload but unused on the receive
5152/// side; allowed to be dead so `clippy::pedantic` doesn't trip.
5153#[derive(serde::Deserialize)]
5154struct SyncSinceResponse {
5155    #[allow(dead_code)]
5156    count: usize,
5157    #[allow(dead_code)]
5158    limit: usize,
5159    memories: Vec<crate::models::Memory>,
5160}
5161
5162/// Re-export the `Instant`/`Duration` types so test crate use sites stay
5163/// terse.  Kept private — internal to this module.
5164#[allow(dead_code)]
5165fn _imports_in_use(_: Instant, _: Duration) {}
5166
5167// ===========================================================================
5168// Tests
5169// ===========================================================================
5170
5171#[cfg(test)]
5172#[allow(deprecated)] // DOC-6: tests intentionally exercise legacy AppConfig flat fields
5173mod tests {
5174    use super::*;
5175    use crate::cli::test_utils::TestEnv;
5176    use crate::config::ResolvedTtl;
5177    use axum::body::Body;
5178    use axum::http::{Request, StatusCode};
5179    use tower::ServiceExt as _;
5180
5181    /// #1579 A3 (SECURITY) — regression pin: the Postgres SAL boot
5182    /// path must log the REDACTED store URL. Pre-fix,
5183    /// `build_store_handle` interpolated the raw `--store-url`
5184    /// (password included) into the INFO boot line, shipping the
5185    /// credential to journald / any log sink. The INFO line fires
5186    /// before the connect attempt, so an unreachable port (`:1`)
5187    /// still exercises the log site; the connect error itself is
5188    /// expected and asserted as `Err`.
5189    #[cfg(feature = "sal-postgres")]
5190    #[tokio::test]
5191    async fn issue_1579_a3_boot_log_redacts_store_url_password() {
5192        use std::sync::{Arc, Mutex};
5193
5194        #[derive(Clone, Default)]
5195        struct SharedBuf(Arc<Mutex<Vec<u8>>>);
5196        impl std::io::Write for SharedBuf {
5197            fn write(&mut self, b: &[u8]) -> std::io::Result<usize> {
5198                self.0.lock().expect("buf lock").extend_from_slice(b);
5199                Ok(b.len())
5200            }
5201            fn flush(&mut self) -> std::io::Result<()> {
5202                Ok(())
5203            }
5204        }
5205
5206        let buf = SharedBuf::default();
5207        let writer_buf = buf.clone();
5208        let subscriber = tracing_subscriber::fmt()
5209            .with_max_level(tracing::Level::INFO)
5210            .with_ansi(false)
5211            .with_writer(move || writer_buf.clone())
5212            .finish();
5213        // Thread-local default — `#[tokio::test]` runs the future on
5214        // the current thread, so every log the boot path emits during
5215        // the await lands in `buf`.
5216        let _guard = tracing::subscriber::set_default(subscriber);
5217
5218        let secret = "sup3r-s3cret-pw";
5219        let url = format!("postgres://ai_memory:{secret}@127.0.0.1:1/ai_memory");
5220        let dir = tempfile::tempdir().expect("tempdir");
5221        let db_path = dir.path().join("unused.db");
5222        let res = build_store_handle(
5223            Some(&url),
5224            &db_path,
5225            None,
5226            Some(384),
5227            crate::store::PoolConfig::default(),
5228        )
5229        .await;
5230        assert!(res.is_err(), "port 1 must refuse the connection");
5231
5232        let logs = String::from_utf8_lossy(&buf.0.lock().expect("buf lock")).to_string();
5233        assert!(
5234            logs.contains("opening Postgres SAL store at postgres://ai_memory:****@127.0.0.1:1"),
5235            "boot line must log the redacted URL; got:\n{logs}"
5236        );
5237        assert!(
5238            !logs.contains(secret),
5239            "store-URL password leaked into the boot log:\n{logs}"
5240        );
5241    }
5242
5243    /// #1455 (SEC, MED) — when a governance hook's rule-consultation
5244    /// connection could not be opened at install time, the gate MUST
5245    /// fail CLOSED by default (return `Err`), and only degrade to ALLOW
5246    /// when the operator explicitly opts into the legacy permissive
5247    /// posture. The pre-#1455 behaviour silently degraded to ALLOW,
5248    /// disabling the entire substrate write-gate whenever `db::open`
5249    /// failed at boot.
5250    #[test]
5251    fn governance_consultation_unavailable_fails_closed_by_default_1455() {
5252        use crate::governance::agent_action::AgentAction;
5253        use crate::governance::deferred_audit::DeferredAuditQueue;
5254
5255        // Keep the receiver alive so the audit submit doesn't trip the
5256        // closed-receiver WARN path (cosmetic; not under test here).
5257        let (queue, _rx) = DeferredAuditQueue::new();
5258        let action = AgentAction::Custom {
5259            custom_kind: "memory_write".to_string(),
5260            payload: serde_json::json!({ "namespace": "ns", "tier": "long" }),
5261        };
5262        let path = Path::new("/nonexistent/rules.db");
5263
5264        // Secure default: no operator override ⇒ fail CLOSED.
5265        let closed = governance_consultation_unavailable_inner(
5266            &queue,
5267            "agent:test",
5268            &action,
5269            path,
5270            "test-surface",
5271            false,
5272        );
5273        let reason = closed.expect_err("missing consultation conn MUST fail CLOSED");
5274        assert!(
5275            reason.contains("consultation_unavailable"),
5276            "fail-closed reason must name the cause: {reason}"
5277        );
5278
5279        // Operator override ⇒ legacy permissive ALLOW.
5280        let opened = governance_consultation_unavailable_inner(
5281            &queue,
5282            "agent:test",
5283            &action,
5284            path,
5285            "test-surface",
5286            true,
5287        );
5288        assert!(
5289            opened.is_ok(),
5290            "fail_open override MUST degrade to ALLOW (legacy posture)"
5291        );
5292    }
5293
5294    /// #1455 — the env-reading wrapper honours the documented
5295    /// `AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR` truthy values and
5296    /// defaults to `false` (fail-closed) when unset.
5297    #[test]
5298    fn governance_fail_open_on_error_env_parse_1455() {
5299        // Unset → secure default.
5300        unsafe { std::env::remove_var("AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR") };
5301        assert!(!governance_fail_open_on_error());
5302        // Truthy forms → permissive.
5303        unsafe { std::env::set_var("AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR", "1") };
5304        assert!(governance_fail_open_on_error());
5305        unsafe { std::env::set_var("AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR", "TRUE") };
5306        assert!(governance_fail_open_on_error());
5307        // Falsy / junk → secure default.
5308        unsafe { std::env::set_var("AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR", "0") };
5309        assert!(!governance_fail_open_on_error());
5310        unsafe { std::env::remove_var("AI_MEMORY_GOVERNANCE_FAIL_OPEN_ON_ERROR") };
5311    }
5312
5313    // ---- #1458 (SEC, MED): api_key bind guard ------------------------------
5314
5315    /// With an api_key configured the guard permits any bind silently.
5316    #[test]
5317    fn api_key_bind_guard_present_binds_silently_1458() {
5318        assert_eq!(api_key_bind_guard(true, "0.0.0.0", false).unwrap(), None);
5319        assert_eq!(api_key_bind_guard(true, "127.0.0.1", true).unwrap(), None);
5320    }
5321
5322    /// Keyless loopback bind is permitted but MUST warn about the
5323    /// reverse-proxy/host-network re-exposure hazard.
5324    #[test]
5325    fn api_key_bind_guard_keyless_loopback_warns_1458() {
5326        for host in ["127.0.0.1", "::1", "localhost", "[::1]", "0:0:0:0:0:0:0:1"] {
5327            let warning = api_key_bind_guard(false, host, false)
5328                .unwrap()
5329                .unwrap_or_else(|| panic!("keyless loopback {host} must warn, not bind silently"));
5330            assert!(
5331                warning.contains("reverse proxy") && warning.contains("off-host"),
5332                "warning must name the proxy hazard for {host}: {warning}"
5333            );
5334        }
5335    }
5336
5337    /// Keyless non-loopback bind is refused outright.
5338    #[test]
5339    fn api_key_bind_guard_keyless_non_loopback_refuses_1458() {
5340        let err = api_key_bind_guard(false, "0.0.0.0", false)
5341            .expect_err("keyless non-loopback bind MUST be refused");
5342        assert!(err.contains("refusing to bind to non-loopback"), "{err}");
5343    }
5344
5345    /// The strict opt-in refuses a keyless start even on loopback,
5346    /// because the loopback host string cannot see a fronting proxy.
5347    #[test]
5348    fn api_key_bind_guard_strict_refuses_keyless_loopback_1458() {
5349        let err = api_key_bind_guard(false, "127.0.0.1", true)
5350            .expect_err("strict mode MUST refuse keyless loopback bind");
5351        assert!(
5352            err.contains("AI_MEMORY_REQUIRE_API_KEY"),
5353            "strict refusal must name the knob: {err}"
5354        );
5355        // Strict is moot when a key IS present.
5356        assert_eq!(api_key_bind_guard(true, "127.0.0.1", true).unwrap(), None);
5357    }
5358
5359    /// The strict-mode env parser honours truthy forms and defaults off.
5360    #[test]
5361    fn require_api_key_strict_env_parse_1458() {
5362        unsafe { std::env::remove_var("AI_MEMORY_REQUIRE_API_KEY") };
5363        assert!(!require_api_key_strict());
5364        unsafe { std::env::set_var("AI_MEMORY_REQUIRE_API_KEY", "1") };
5365        assert!(require_api_key_strict());
5366        unsafe { std::env::set_var("AI_MEMORY_REQUIRE_API_KEY", "TRUE") };
5367        assert!(require_api_key_strict());
5368        unsafe { std::env::set_var("AI_MEMORY_REQUIRE_API_KEY", "0") };
5369        assert!(!require_api_key_strict());
5370        unsafe { std::env::remove_var("AI_MEMORY_REQUIRE_API_KEY") };
5371    }
5372
5373    // ----- helpers -------------------------------------------------------
5374
5375    fn args_with_db(_db: &Path) -> ServeArgs {
5376        ServeArgs {
5377            host: "127.0.0.1".to_string(),
5378            port: 0,
5379            tls_cert: None,
5380            tls_key: None,
5381            mtls_allowlist: None,
5382            shutdown_grace_secs: 30,
5383            quorum_writes: 0,
5384            quorum_peers: vec![],
5385            quorum_timeout_ms: 2000,
5386            quorum_client_cert: None,
5387            quorum_client_key: None,
5388            quorum_ca_cert: None,
5389            catchup_interval_secs: 0,
5390            federation_identity: None,
5391            #[cfg(feature = "sal")]
5392            store_url: None,
5393        }
5394    }
5395
5396    fn keyword_app_state(db_path: &Path) -> AppState {
5397        let conn = db::open(db_path).unwrap();
5398        let db_state: Db = Arc::new(Mutex::new((
5399            conn,
5400            db_path.to_path_buf(),
5401            ResolvedTtl::default(),
5402            true,
5403        )));
5404        AppState {
5405            db: db_state,
5406            embedder: Arc::new(None),
5407            vector_index: Arc::new(Mutex::new(None)),
5408            federation: Arc::new(None),
5409            tier_config: Arc::new(FeatureTier::Keyword.config()),
5410            scoring: Arc::new(crate::config::ResolvedScoring::default()),
5411            profile: Arc::new(crate::profile::Profile::core()),
5412            mcp_config: Arc::new(None),
5413            active_keypair: Arc::new(None),
5414            family_embeddings: Arc::new(tokio::sync::RwLock::new(Some(Vec::new()))),
5415            storage_backend: crate::handlers::StorageBackend::Sqlite,
5416            #[cfg(feature = "sal")]
5417            store: {
5418                let s = crate::store::sqlite::SqliteStore::open(db_path)
5419                    .expect("open SqliteStore for keyword_app_state");
5420                Arc::new(s)
5421            },
5422            llm: Arc::new(None),
5423            auto_tag_model: Arc::new(None),
5424            llm_call_timeout: Duration::from_secs(crate::config::DEFAULT_LLM_CALL_TIMEOUT_SECS),
5425            replay_cache: Arc::new(crate::identity::replay::ReplayCache::new()),
5426            verify_require_nonce: false,
5427            federation_nonce_cache: Arc::new(crate::identity::replay::FederationNonceCache::new()),
5428            autonomous_hooks: false,
5429            recall_scope: Arc::new(None),
5430            deferred_audit_queue: Arc::new(None),
5431            admin_agent_ids: Arc::new(Vec::new()),
5432            // v0.7.0 #991 — fresh per-test cache. No invalidation
5433            // required: tests don't share this AppState across rule
5434            // writes (each test that mutates rules opens its own
5435            // `fresh_conn()`).
5436            rule_cache: Arc::new(crate::governance::rule_cache::RuleCache::new()),
5437            resolved_models: Arc::new(crate::config::ResolvedModels::default()),
5438            runtime: crate::runtime_context::RuntimeContext::global_arc(),
5439            max_page_size: crate::handlers::MAX_BULK_SIZE,
5440        }
5441    }
5442
5443    /// Mutex env-var guard. Tests that flip env vars must serialize to
5444    /// avoid clobbering each other; `cargo test --test-threads=2` is the
5445    /// upstream gate but a per-test mutex keeps the tests honest.
5446    fn env_var_lock() -> std::sync::MutexGuard<'static, ()> {
5447        use std::sync::OnceLock;
5448        static LOCK: OnceLock<std::sync::Mutex<()>> = OnceLock::new();
5449        LOCK.get_or_init(|| std::sync::Mutex::new(()))
5450            .lock()
5451            .unwrap_or_else(|e| e.into_inner())
5452    }
5453
5454    // ----- is_write_command ---------------------------------------------
5455
5456    #[test]
5457    fn test_is_write_command_all_variants() {
5458        // Use clap's parser to build every Command variant. This avoids
5459        // having to know each Args struct's required-field set by name —
5460        // we just feed the same argv form an operator would use, and
5461        // assert the predicate returns the right answer.
5462        //
5463        // Writes (post-run WAL checkpoint expected):
5464        let writes: &[&[&str]] = &[
5465            &["ai-memory", "store", "title", "content"],
5466            &["ai-memory", "update", "id123", "--title", "t"],
5467            &["ai-memory", "delete", "id123"],
5468            &["ai-memory", "promote", "id123"],
5469            &["ai-memory", "forget", "pattern"],
5470            &["ai-memory", "link", "a", "b"],
5471            &["ai-memory", "consolidate", "ids"],
5472            &["ai-memory", "resolve", "a", "b"],
5473            &["ai-memory", "sync", "--peer", "/tmp/peer.db"],
5474            &[
5475                "ai-memory",
5476                "sync-daemon",
5477                "--peers",
5478                "http://x",
5479                "--interval-secs",
5480                "60",
5481            ],
5482            &["ai-memory", "import"],
5483            &["ai-memory", "auto-consolidate"],
5484            &["ai-memory", "gc"],
5485        ];
5486        let mut writes_checked = 0;
5487        for argv in writes {
5488            // Skip a variant whose required-field set our argv doesn't
5489            // match (clap will reject it). We still get coverage from the
5490            // variants that parse cleanly, which is the bulk.
5491            if let Ok(cli) = Cli::try_parse_from(*argv) {
5492                assert!(
5493                    is_write_command(&cli.command),
5494                    "expected write for {argv:?}"
5495                );
5496                writes_checked += 1;
5497            }
5498        }
5499        assert!(
5500            writes_checked >= 5,
5501            "expected at least 5 write variants checked, got {writes_checked}"
5502        );
5503
5504        // Reads / no-ops (no checkpoint expected):
5505        let reads: &[&[&str]] = &[
5506            &["ai-memory", "mcp"],
5507            &["ai-memory", "recall", "context"],
5508            &["ai-memory", "search", "query"],
5509            &["ai-memory", "get", "id"],
5510            &["ai-memory", "list"],
5511            &["ai-memory", "stats"],
5512            &["ai-memory", "namespaces"],
5513            &["ai-memory", "export"],
5514            &["ai-memory", "shell"],
5515            &["ai-memory", "man"],
5516            &["ai-memory", "completions", "bash"],
5517            &["ai-memory", "archive", "list"],
5518            &["ai-memory", "agents", "list"],
5519            &["ai-memory", "pending", "list"],
5520            &["ai-memory", "bench"],
5521            &["ai-memory", "serve", "--host", "127.0.0.1", "--port", "0"],
5522        ];
5523        let mut reads_checked = 0;
5524        for argv in reads {
5525            if let Ok(cli) = Cli::try_parse_from(*argv) {
5526                assert!(
5527                    !is_write_command(&cli.command),
5528                    "expected read for {argv:?}"
5529                );
5530                reads_checked += 1;
5531            }
5532        }
5533        assert!(
5534            reads_checked >= 8,
5535            "expected at least 8 read variants checked, got {reads_checked}"
5536        );
5537
5538        // Direct construction of the Args-less variants (10 variants
5539        // covered programmatically by clap above; pin the no-Args ones
5540        // here too for explicitness):
5541        assert!(is_write_command(&Command::Gc));
5542        assert!(!is_write_command(&Command::Stats));
5543        assert!(!is_write_command(&Command::Namespaces));
5544        assert!(!is_write_command(&Command::Export));
5545        assert!(!is_write_command(&Command::Shell));
5546        assert!(!is_write_command(&Command::Man));
5547        assert!(!is_write_command(&Command::Mcp {
5548            tier: "keyword".to_string(),
5549            profile: None,
5550        }));
5551    }
5552
5553    // ----- build_router via lib::build_router ---------------------------
5554
5555    #[tokio::test]
5556    async fn test_router_has_health_endpoint() {
5557        let env = TestEnv::fresh();
5558        let app_state = keyword_app_state(&env.db_path);
5559        let api_key_state = ApiKeyState {
5560            key: None,
5561            mtls_enforced: false,
5562        };
5563        let router = build_router(app_state, api_key_state);
5564        let resp = router
5565            .oneshot(
5566                Request::builder()
5567                    .method("GET")
5568                    .uri("/api/v1/health")
5569                    .body(Body::empty())
5570                    .unwrap(),
5571            )
5572            .await
5573            .unwrap();
5574        assert_eq!(resp.status(), StatusCode::OK);
5575    }
5576
5577    #[tokio::test]
5578    async fn test_router_has_metrics_at_both_paths() {
5579        let env = TestEnv::fresh();
5580        let app_state = keyword_app_state(&env.db_path);
5581        let api_key_state = ApiKeyState {
5582            key: None,
5583            mtls_enforced: false,
5584        };
5585        // /metrics
5586        let r1 = build_router(app_state.clone(), api_key_state.clone())
5587            .oneshot(
5588                Request::builder()
5589                    .method("GET")
5590                    .uri("/metrics")
5591                    .body(Body::empty())
5592                    .unwrap(),
5593            )
5594            .await
5595            .unwrap();
5596        assert_eq!(r1.status(), StatusCode::OK);
5597        // /api/v1/metrics
5598        let r2 = build_router(app_state, api_key_state)
5599            .oneshot(
5600                Request::builder()
5601                    .method("GET")
5602                    .uri("/api/v1/metrics")
5603                    .body(Body::empty())
5604                    .unwrap(),
5605            )
5606            .await
5607            .unwrap();
5608        assert_eq!(r2.status(), StatusCode::OK);
5609    }
5610
5611    #[tokio::test]
5612    async fn test_router_lists_all_v1_memory_routes() {
5613        let env = TestEnv::fresh();
5614        let app_state = keyword_app_state(&env.db_path);
5615        let api_key_state = ApiKeyState {
5616            key: None,
5617            mtls_enforced: false,
5618        };
5619        let router = build_router(app_state, api_key_state);
5620        let resp = router
5621            .oneshot(
5622                Request::builder()
5623                    .method("GET")
5624                    .uri("/api/v1/memories")
5625                    .body(Body::empty())
5626                    .unwrap(),
5627            )
5628            .await
5629            .unwrap();
5630        // Empty DB returns 200 with an empty list — anything non-error
5631        // proves the route is wired in.
5632        assert!(resp.status().is_success(), "got {}", resp.status());
5633    }
5634
5635    #[tokio::test]
5636    async fn test_router_applies_api_key_middleware_when_key_set() {
5637        let env = TestEnv::fresh();
5638        let app_state = keyword_app_state(&env.db_path);
5639        let api_key_state = ApiKeyState {
5640            key: Some("s3cret".to_string()),
5641            mtls_enforced: false,
5642        };
5643        let router = build_router(app_state, api_key_state);
5644        let resp = router
5645            .oneshot(
5646                Request::builder()
5647                    .method("GET")
5648                    .uri("/api/v1/memories")
5649                    .body(Body::empty())
5650                    .unwrap(),
5651            )
5652            .await
5653            .unwrap();
5654        assert_eq!(resp.status(), StatusCode::UNAUTHORIZED);
5655    }
5656
5657    #[tokio::test]
5658    async fn test_router_skips_api_key_middleware_when_key_none() {
5659        let env = TestEnv::fresh();
5660        let app_state = keyword_app_state(&env.db_path);
5661        let api_key_state = ApiKeyState {
5662            key: None,
5663            mtls_enforced: false,
5664        };
5665        let router = build_router(app_state, api_key_state);
5666        let resp = router
5667            .oneshot(
5668                Request::builder()
5669                    .method("GET")
5670                    .uri("/api/v1/memories")
5671                    .body(Body::empty())
5672                    .unwrap(),
5673            )
5674            .await
5675            .unwrap();
5676        assert_eq!(resp.status(), StatusCode::OK);
5677    }
5678
5679    // ----- build_embedder ------------------------------------------------
5680
5681    #[tokio::test]
5682    async fn test_build_embedder_keyword_tier_returns_none() {
5683        let cfg = AppConfig::default();
5684        let emb = build_embedder(FeatureTier::Keyword, &cfg).await;
5685        assert!(emb.is_none());
5686    }
5687
5688    #[tokio::test]
5689    async fn test_build_embedder_load_failure_returns_none() {
5690        // Can't easily induce a load failure without network — skip here.
5691        // Keyword tier covers the None branch; the ERROR-level fallback
5692        // path requires a live HF-hub-style mock, which is out of scope
5693        // for a unit test. The semantic-tier success/failure path is
5694        // exercised under `feature = "test-with-models"` in the
5695        // recall integration tests.
5696        // This test stays as a smoke check — it doesn't attempt to load.
5697    }
5698
5699    /// Issue #840 coverage — exercise the `app_config.embedding_model`
5700    /// override branch in `build_embedder` (daemon_runtime.rs L1504-1523).
5701    /// The keyword tier has no tier-preset model, so when the override is
5702    /// unparseable the resolution ladder falls through to `None` without
5703    /// attempting an HF-hub fetch. This pins the parse-failure log path
5704    /// and the `None` fallback that the L2 comment documents.
5705    #[tokio::test]
5706    async fn test_build_embedder_invalid_override_falls_back_to_preset() {
5707        let mut cfg = AppConfig::default();
5708        cfg.embedding_model = Some("not-a-real-embedding-model-2026".to_string());
5709        // Keyword tier preset is None; override parse fails → falls back
5710        // to preset None → returns None without touching HF-hub.
5711        let emb = build_embedder(FeatureTier::Keyword, &cfg).await;
5712        assert!(
5713            emb.is_none(),
5714            "unparseable override + keyword tier must return None"
5715        );
5716    }
5717
5718    // ----- resolve_embedder_model (#1521 precedence) --------------------
5719
5720    /// #1521 — the sectioned `[embeddings].model` block must beat the
5721    /// tier preset. Semantic tier presets MiniLM; a section pinning nomic
5722    /// must win. This is the core regression the issue describes (the
5723    /// section was silently dropped in favour of the preset).
5724    #[test]
5725    fn resolve_embedder_model_section_beats_tier_preset() {
5726        let mut cfg = AppConfig::default();
5727        cfg.embeddings = Some(crate::config::EmbeddingsSection {
5728            model: Some("nomic_embed_v15".to_string()),
5729            ..crate::config::EmbeddingsSection::default()
5730        });
5731        let tier = FeatureTier::Semantic.config();
5732        assert_eq!(
5733            resolve_embedder_model(&tier, &cfg),
5734            Some(crate::config::EmbeddingModel::NomicEmbedV15),
5735            "[embeddings].model must override the Semantic tier MiniLM preset"
5736        );
5737    }
5738
5739    /// #1521 — the deprecated flat `embedding_model` field must still be
5740    /// honored when no section is present (backward compat).
5741    #[test]
5742    fn resolve_embedder_model_legacy_flat_still_honored() {
5743        let mut cfg = AppConfig::default();
5744        cfg.embedding_model = Some("nomic_embed_v15".to_string());
5745        let tier = FeatureTier::Semantic.config();
5746        assert_eq!(
5747            resolve_embedder_model(&tier, &cfg),
5748            Some(crate::config::EmbeddingModel::NomicEmbedV15),
5749            "legacy flat embedding_model must still override the preset"
5750        );
5751    }
5752
5753    /// #1521 — when BOTH are set the section wins over the legacy flat
5754    /// field (precedence ladder ordering).
5755    #[test]
5756    fn resolve_embedder_model_section_beats_legacy_flat() {
5757        let mut cfg = AppConfig::default();
5758        cfg.embedding_model = Some("nomic_embed_v15".to_string());
5759        cfg.embeddings = Some(crate::config::EmbeddingsSection {
5760            model: Some("mini_lm_l6_v2".to_string()),
5761            ..crate::config::EmbeddingsSection::default()
5762        });
5763        let tier = FeatureTier::Semantic.config();
5764        assert_eq!(
5765            resolve_embedder_model(&tier, &cfg),
5766            Some(crate::config::EmbeddingModel::MiniLmL6V2),
5767            "[embeddings].model must win over legacy flat embedding_model"
5768        );
5769    }
5770
5771    /// #1521 — a url-only section (no model key) must NOT force a model;
5772    /// the tier preset is kept. Guards against keying the model decision
5773    /// off `ResolvedEmbeddings.model` (which defaults to nomic whenever
5774    /// any `[embeddings]` key is present).
5775    #[test]
5776    fn resolve_embedder_model_url_only_section_keeps_preset() {
5777        let mut cfg = AppConfig::default();
5778        cfg.embeddings = Some(crate::config::EmbeddingsSection {
5779            url: Some("http://127.0.0.1:11435".to_string()),
5780            ..crate::config::EmbeddingsSection::default()
5781        });
5782        let tier = FeatureTier::Semantic.config();
5783        assert_eq!(
5784            resolve_embedder_model(&tier, &cfg),
5785            Some(crate::config::EmbeddingModel::MiniLmL6V2),
5786            "url-only section must keep the Semantic MiniLM preset"
5787        );
5788    }
5789
5790    /// #1521 — a configured model the 2-model daemon embedder cannot
5791    /// construct degrades to the tier preset rather than disabling.
5792    #[test]
5793    fn resolve_embedder_model_unsupported_id_falls_back_to_preset() {
5794        let mut cfg = AppConfig::default();
5795        cfg.embeddings = Some(crate::config::EmbeddingsSection {
5796            model: Some("bge-large-en".to_string()),
5797            ..crate::config::EmbeddingsSection::default()
5798        });
5799        let tier = FeatureTier::Semantic.config();
5800        assert_eq!(
5801            resolve_embedder_model(&tier, &cfg),
5802            Some(crate::config::EmbeddingModel::MiniLmL6V2),
5803            "unsupported model id must fall back to the tier preset"
5804        );
5805    }
5806
5807    /// #1521 — nothing configured at any layer: keyword tier (no preset)
5808    /// yields None; semantic tier yields its MiniLM preset.
5809    #[test]
5810    fn resolve_embedder_model_unconfigured_uses_tier_preset() {
5811        let cfg = AppConfig::default();
5812        assert_eq!(
5813            resolve_embedder_model(&FeatureTier::Keyword.config(), &cfg),
5814            None,
5815            "keyword tier has no preset → None"
5816        );
5817        assert_eq!(
5818            resolve_embedder_model(&FeatureTier::Semantic.config(), &cfg),
5819            Some(crate::config::EmbeddingModel::MiniLmL6V2),
5820            "semantic tier preset is MiniLM"
5821        );
5822    }
5823
5824    // ----- build_vector_index -------------------------------------------
5825
5826    #[test]
5827    fn test_build_vector_index_no_embedder_returns_none() {
5828        let env = TestEnv::fresh();
5829        let conn = db::open(&env.db_path).unwrap();
5830        assert!(build_vector_index(&conn, false).is_none());
5831    }
5832
5833    #[test]
5834    fn test_build_vector_index_empty_db_returns_empty_index() {
5835        let env = TestEnv::fresh();
5836        let conn = db::open(&env.db_path).unwrap();
5837        let idx = build_vector_index(&conn, true);
5838        assert!(
5839            idx.is_some(),
5840            "empty DB with embedder must yield empty index"
5841        );
5842        assert_eq!(idx.unwrap().len(), 0);
5843    }
5844
5845    // ----- spawn_gc_loop / spawn_wal_checkpoint_loop --------------------
5846
5847    #[tokio::test(start_paused = true)]
5848    async fn test_spawn_gc_loop_runs_and_can_be_aborted() {
5849        let env = TestEnv::fresh();
5850        let conn = db::open(&env.db_path).unwrap();
5851        let state: Db = Arc::new(Mutex::new((
5852            conn,
5853            env.db_path.clone(),
5854            ResolvedTtl::default(),
5855            true,
5856        )));
5857        let h = spawn_gc_loop(state, None, Duration::from_secs(60));
5858        // Advance past the first sleep — the loop should now have ticked at
5859        // least once (its sleep arm has resolved). We can't easily observe
5860        // a side effect on an empty DB, so just abort and confirm the
5861        // handle is well-behaved.
5862        tokio::time::advance(Duration::from_secs(61)).await;
5863        // Yield once so the background task can see the tick.
5864        tokio::task::yield_now().await;
5865        h.abort();
5866        // Joining an aborted handle returns `JoinError` with cancelled() == true.
5867        let err = h.await.unwrap_err();
5868        assert!(err.is_cancelled());
5869    }
5870
5871    #[tokio::test(start_paused = true)]
5872    async fn test_spawn_wal_checkpoint_loop_runs_and_can_be_aborted() {
5873        let env = TestEnv::fresh();
5874        let conn = db::open(&env.db_path).unwrap();
5875        let state: Db = Arc::new(Mutex::new((
5876            conn,
5877            env.db_path.clone(),
5878            ResolvedTtl::default(),
5879            true,
5880        )));
5881        let h = spawn_wal_checkpoint_loop(state, Duration::from_secs(60));
5882        // First sleep is interval/2 = 30s. Advance past that + one full
5883        // interval to ensure at least one checkpoint cycle ran.
5884        tokio::time::advance(Duration::from_secs(31)).await;
5885        tokio::task::yield_now().await;
5886        tokio::time::advance(Duration::from_secs(60)).await;
5887        tokio::task::yield_now().await;
5888        h.abort();
5889        let err = h.await.unwrap_err();
5890        assert!(err.is_cancelled());
5891    }
5892
5893    // v0.7.0 K2 — pending_actions timeout sweeper integration test.
5894    //
5895    // Pre-seed a stale `pending_actions` row, spawn the sweep loop with
5896    // a very short interval, await long enough for at least one tick to
5897    // run on the real runtime, and assert the row was transitioned to
5898    // `status='expired'`. This is the daemon-side end-to-end check that
5899    // complements the per-function unit tests in `db::tests`. We use a
5900    // real (non-paused) runtime here because the SQL sweep query
5901    // (`julianday('now')`) consults the OS wall clock, not tokio's
5902    // virtual time — a `start_paused=true` test never observes ticks
5903    // against a back-dated row.
5904    #[tokio::test]
5905    async fn test_spawn_pending_timeout_sweep_loop_marks_stale_expired() {
5906        let env = TestEnv::fresh();
5907        let conn = db::open(&env.db_path).unwrap();
5908        // Seed a 2-hour-old pending row.
5909        let two_h_ago = (chrono::Utc::now() - chrono::Duration::hours(2)).to_rfc3339();
5910        conn.execute(
5911            "INSERT INTO pending_actions
5912             (id, action_type, namespace, payload, requested_by, requested_at,
5913              status)
5914             VALUES ('sweeper-1', 'store', 'ns/a', '{}', 'tester', ?1, 'pending')",
5915            rusqlite::params![two_h_ago],
5916        )
5917        .unwrap();
5918        let state: Db = Arc::new(Mutex::new((
5919            conn,
5920            env.db_path.clone(),
5921            ResolvedTtl::default(),
5922            true,
5923        )));
5924        // 1-hour global default; the seeded 2h-old row is stale.
5925        // Tick every 50ms so the test wraps in well under a second.
5926        let h = spawn_pending_timeout_sweep_loop(
5927            state.clone(),
5928            env.db_path.clone(),
5929            crate::SECS_PER_HOUR,
5930            Duration::from_millis(50),
5931        );
5932        // Poll the row up to 2s; succeed as soon as the sweep flips it.
5933        let mut flipped = false;
5934        for _ in 0..40 {
5935            tokio::time::sleep(Duration::from_millis(50)).await;
5936            let lock = state.lock().await;
5937            let status: String = lock
5938                .0
5939                .query_row(
5940                    "SELECT status FROM pending_actions WHERE id = 'sweeper-1'",
5941                    [],
5942                    |r| r.get(0),
5943                )
5944                .unwrap();
5945            if status == "expired" {
5946                flipped = true;
5947                break;
5948            }
5949        }
5950        h.abort();
5951        let _ = h.await;
5952        assert!(
5953            flipped,
5954            "sweeper must transition the stale row to 'expired' within 2s"
5955        );
5956    }
5957
5958    // ----- passphrase_from_file -----------------------------------------
5959
5960    /// v0.7.0 #1055 helper — write a passphrase file with mode 0400
5961    /// so the post-#1055 permission check accepts it. Tests calling
5962    /// the unhardened `std::fs::write` would inherit the OS default
5963    /// umask (typically 0644 on macOS, group/world-readable) which
5964    /// the production gate now rejects.
5965    #[cfg(unix)]
5966    fn write_passphrase_strict(path: &std::path::Path, body: &str) {
5967        use std::os::unix::fs::PermissionsExt;
5968        std::fs::write(path, body).unwrap();
5969        std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o400)).unwrap();
5970    }
5971    #[cfg(not(unix))]
5972    fn write_passphrase_strict(path: &std::path::Path, body: &str) {
5973        std::fs::write(path, body).unwrap();
5974    }
5975
5976    #[test]
5977    fn test_passphrase_strips_trailing_newline() {
5978        let dir = tempfile::tempdir().unwrap();
5979        let p = dir.path().join("pass");
5980        write_passphrase_strict(&p, "secret\n");
5981        assert_eq!(passphrase_from_file(&p).unwrap(), "secret");
5982    }
5983
5984    #[test]
5985    fn test_passphrase_strips_trailing_crlf() {
5986        let dir = tempfile::tempdir().unwrap();
5987        let p = dir.path().join("pass");
5988        write_passphrase_strict(&p, "secret\r\n");
5989        assert_eq!(passphrase_from_file(&p).unwrap(), "secret");
5990    }
5991
5992    #[test]
5993    fn test_passphrase_empty_file_errors() {
5994        let dir = tempfile::tempdir().unwrap();
5995        let p = dir.path().join("empty");
5996        write_passphrase_strict(&p, "");
5997        let err = passphrase_from_file(&p).unwrap_err();
5998        assert!(
5999            err.to_string().contains("empty"),
6000            "expected 'empty' error, got: {err}"
6001        );
6002    }
6003
6004    #[test]
6005    fn test_passphrase_empty_after_trim_errors() {
6006        // File contains only whitespace lines — after trim_end_matches
6007        // it remains "  \t" (internal whitespace preserved). Only "\n"
6008        // / "\r" alone would trigger the empty-after-strip case.
6009        let dir = tempfile::tempdir().unwrap();
6010        let p = dir.path().join("nl-only");
6011        write_passphrase_strict(&p, "\n");
6012        let err = passphrase_from_file(&p).unwrap_err();
6013        assert!(err.to_string().contains("empty"));
6014    }
6015
6016    #[test]
6017    fn test_passphrase_nonexistent_file_errors() {
6018        let dir = tempfile::tempdir().unwrap();
6019        let p = dir.path().join("does-not-exist");
6020        let err = passphrase_from_file(&p).unwrap_err();
6021        assert!(
6022            err.to_string().contains("reading passphrase file")
6023                || err.to_string().contains("stat passphrase file")
6024                || err.chain().any(|e| e.to_string().contains("No such file"))
6025                || err.chain().any(|e| e.to_string().contains("cannot find")),
6026            "got: {err:#}"
6027        );
6028    }
6029
6030    #[test]
6031    fn test_passphrase_preserves_internal_whitespace() {
6032        let dir = tempfile::tempdir().unwrap();
6033        let p = dir.path().join("pass");
6034        write_passphrase_strict(&p, "my pass phrase\n");
6035        assert_eq!(passphrase_from_file(&p).unwrap(), "my pass phrase");
6036    }
6037
6038    #[cfg(unix)]
6039    #[test]
6040    fn test_passphrase_rejects_lax_permissions_1055() {
6041        // v0.7.0 #1055 — file with mode 0644 (group/world readable)
6042        // is rejected by the permission gate. Pre-#1055 the function
6043        // accepted any readable file regardless of mode.
6044        //
6045        // Serialise on the shared `env_var_lock` so the sibling
6046        // `test_passphrase_lax_perms_env_overrides_1055` test can't
6047        // race the `AI_MEMORY_PASSPHRASE_FILE_ALLOW_LAX_PERMS` env
6048        // var into a state that bypasses the rejection.
6049        use std::os::unix::fs::PermissionsExt;
6050        let _g = env_var_lock();
6051        // SAFETY: serialised via env_var_lock; clear any stale state
6052        // from a sibling test that exited mid-test.
6053        unsafe { std::env::remove_var("AI_MEMORY_PASSPHRASE_FILE_ALLOW_LAX_PERMS") };
6054        let dir = tempfile::tempdir().unwrap();
6055        let p = dir.path().join("lax");
6056        std::fs::write(&p, "secret\n").unwrap();
6057        std::fs::set_permissions(&p, std::fs::Permissions::from_mode(0o644)).unwrap();
6058        let err = passphrase_from_file(&p).unwrap_err();
6059        let msg = err.to_string();
6060        assert!(
6061            msg.contains("lax permissions") && msg.contains("0400"),
6062            "#1055: expected lax-permission rejection with chmod 0400 hint; got: {msg}"
6063        );
6064        assert!(
6065            msg.contains("AI_MEMORY_PASSPHRASE_FILE_ALLOW_LAX_PERMS"),
6066            "#1055: failure message MUST reference the env-var escape hatch; got: {msg}"
6067        );
6068    }
6069
6070    #[cfg(unix)]
6071    #[test]
6072    fn test_passphrase_lax_perms_env_overrides_1055() {
6073        // v0.7.0 #1055 — operators can opt back into the legacy
6074        // permissive posture via
6075        // `AI_MEMORY_PASSPHRASE_FILE_ALLOW_LAX_PERMS=1`.
6076        use std::os::unix::fs::PermissionsExt;
6077        let _g = env_var_lock();
6078        let dir = tempfile::tempdir().unwrap();
6079        let p = dir.path().join("lax-with-env");
6080        std::fs::write(&p, "secret\n").unwrap();
6081        std::fs::set_permissions(&p, std::fs::Permissions::from_mode(0o644)).unwrap();
6082        // SAFETY: serialised via env_var_lock; the lock guard's
6083        // lifetime brackets the set + remove pair so no sibling
6084        // test observes the intermediate state.
6085        unsafe {
6086            std::env::set_var("AI_MEMORY_PASSPHRASE_FILE_ALLOW_LAX_PERMS", "1");
6087        }
6088        let result = passphrase_from_file(&p);
6089        unsafe {
6090            std::env::remove_var("AI_MEMORY_PASSPHRASE_FILE_ALLOW_LAX_PERMS");
6091        }
6092        assert_eq!(
6093            result.unwrap(),
6094            "secret",
6095            "#1055: env-var escape hatch MUST restore legacy permissive posture"
6096        );
6097    }
6098
6099    // ----- apply_anonymize_default --------------------------------------
6100
6101    #[test]
6102    fn test_anonymize_set_when_config_true_and_env_unset() {
6103        let _g = env_var_lock();
6104        // SAFETY: serialized via env_var_lock.
6105        unsafe { std::env::remove_var("AI_MEMORY_ANONYMIZE") };
6106        let mut cfg = AppConfig::default();
6107        cfg.identity = Some(crate::config::IdentityConfig {
6108            anonymize_default: true,
6109        });
6110        apply_anonymize_default(&cfg);
6111        assert_eq!(std::env::var("AI_MEMORY_ANONYMIZE").unwrap(), "1");
6112        // SAFETY: serialized via env_var_lock.
6113        unsafe { std::env::remove_var("AI_MEMORY_ANONYMIZE") };
6114    }
6115
6116    #[test]
6117    fn test_anonymize_unchanged_when_env_already_set() {
6118        let _g = env_var_lock();
6119        // SAFETY: serialized via env_var_lock.
6120        unsafe { std::env::set_var("AI_MEMORY_ANONYMIZE", "0") };
6121        let mut cfg = AppConfig::default();
6122        cfg.identity = Some(crate::config::IdentityConfig {
6123            anonymize_default: true,
6124        });
6125        apply_anonymize_default(&cfg);
6126        // Env var is left alone — caller-set value wins.
6127        assert_eq!(std::env::var("AI_MEMORY_ANONYMIZE").unwrap(), "0");
6128        // SAFETY: serialized via env_var_lock.
6129        unsafe { std::env::remove_var("AI_MEMORY_ANONYMIZE") };
6130    }
6131
6132    #[test]
6133    fn test_anonymize_unchanged_when_config_false() {
6134        let _g = env_var_lock();
6135        // SAFETY: serialized via env_var_lock.
6136        unsafe { std::env::remove_var("AI_MEMORY_ANONYMIZE") };
6137        let cfg = AppConfig::default();
6138        // Default config is false / None for identity.anonymize_default.
6139        apply_anonymize_default(&cfg);
6140        assert!(std::env::var("AI_MEMORY_ANONYMIZE").is_err());
6141    }
6142
6143    // ----- bootstrap_serve ----------------------------------------------
6144
6145    #[tokio::test]
6146    async fn test_bootstrap_serve_keyword_tier_no_embedder() {
6147        let env = TestEnv::fresh();
6148        let mut cfg = AppConfig::default();
6149        cfg.tier = Some("keyword".to_string());
6150        let args = args_with_db(&env.db_path);
6151        let bs = bootstrap_serve(&env.db_path, &args, &cfg).await.unwrap();
6152        // Keyword tier => no embedder, no vector index.
6153        assert!(bs.app_state.embedder.is_none());
6154        let vi = bs.app_state.vector_index.lock().await;
6155        assert!(vi.is_none());
6156        // Six task handles spawned (v0.7 policy-engine item 3 added
6157        // the deferred-audit supervisor + gc + wal_checkpoint +
6158        // v0.7 K2 pending_actions timeout sweep + v0.7 I3 transcript
6159        // archive→prune lifecycle sweep + v0.7 K8 agent_quotas
6160        // daily-counter reset sweep). v0.7 B3-fix2 gates the
6161        // family-descriptor embedding precompute behind
6162        // `AI_MEMORY_PRECOMPUTE_FAMILY_EMBEDDINGS=1` (default OFF) so
6163        // it does not contend with HTTP request-path embeds under
6164        // parallel CI load — see the gate site in `bootstrap_serve`
6165        // for the rationale. The task count reverts to six when the
6166        // env var is unset.
6167        assert_eq!(bs.task_handles.len(), 6);
6168        // Cleanly abort the spawned tasks so they don't leak across tests.
6169        for h in bs.task_handles {
6170            h.abort();
6171        }
6172    }
6173
6174    #[tokio::test]
6175    async fn test_bootstrap_serve_with_api_key_logs_enabled() {
6176        let env = TestEnv::fresh();
6177        let mut cfg = AppConfig::default();
6178        cfg.tier = Some("keyword".to_string());
6179        cfg.api_key = Some("test-key".to_string());
6180        let args = args_with_db(&env.db_path);
6181        let bs = bootstrap_serve(&env.db_path, &args, &cfg).await.unwrap();
6182        assert_eq!(bs.api_key_state.key.as_deref(), Some("test-key"));
6183        for h in bs.task_handles {
6184            h.abort();
6185        }
6186    }
6187
6188    #[tokio::test]
6189    async fn test_bootstrap_serve_federation_disabled_when_quorum_zero() {
6190        let env = TestEnv::fresh();
6191        let mut cfg = AppConfig::default();
6192        cfg.tier = Some("keyword".to_string());
6193        let args = args_with_db(&env.db_path);
6194        let bs = bootstrap_serve(&env.db_path, &args, &cfg).await.unwrap();
6195        assert!(bs.app_state.federation.is_none());
6196        for h in bs.task_handles {
6197            h.abort();
6198        }
6199    }
6200
6201    // ----- W12-F: deeper coverage --------------------------------------
6202    //
6203    // Targets the gaps left after W6 + W7 + D6: `bootstrap_serve` variants
6204    // that require a populated DB or federation, the `run` dispatch arms
6205    // not yet exercised, `cmd_bench` end-to-end with a tiny workload,
6206    // `cmd_migrate` (sal feature), `urlencoding_minimal` direct test,
6207    // and the gc / wal-checkpoint loop bodies executing through one
6208    // tick with a measurable side effect.
6209
6210    // ----- bootstrap_serve federation enabled ---------------------------
6211
6212    #[tokio::test]
6213    async fn test_bootstrap_serve_federation_enabled_attaches_config() {
6214        // quorum_writes=1 + one peer → FederationConfig::build returns
6215        // Some, so app_state.federation is wired in. Catchup loop is
6216        // disabled (catchup_interval_secs=0) — the spawn-catchup branch
6217        // is exercised by federation tests; we only verify wiring here.
6218        let env = TestEnv::fresh();
6219        let mut cfg = AppConfig::default();
6220        cfg.tier = Some("keyword".to_string());
6221        let mut args = args_with_db(&env.db_path);
6222        args.quorum_writes = 1;
6223        args.quorum_peers = vec!["http://127.0.0.1:65530".to_string()];
6224        args.quorum_timeout_ms = 100;
6225        args.catchup_interval_secs = 0;
6226        let bs = bootstrap_serve(&env.db_path, &args, &cfg).await.unwrap();
6227        assert!(bs.app_state.federation.is_some());
6228        for h in bs.task_handles {
6229            h.abort();
6230        }
6231    }
6232
6233    #[tokio::test]
6234    async fn test_bootstrap_serve_federation_enabled_with_catchup_loop() {
6235        // catchup_interval_secs > 0 → spawn_catchup_loop is invoked.
6236        // We can't directly observe the catchup loop's internal handle
6237        // (federation::spawn_catchup_loop returns a JoinHandle owned
6238        // privately by the federation module), but the side branch
6239        // "catchup loop enabled" runs and the bootstrap completes.
6240        let env = TestEnv::fresh();
6241        let mut cfg = AppConfig::default();
6242        cfg.tier = Some("keyword".to_string());
6243        let mut args = args_with_db(&env.db_path);
6244        args.quorum_writes = 1;
6245        args.quorum_peers = vec!["http://127.0.0.1:65531".to_string()];
6246        args.quorum_timeout_ms = 100;
6247        args.catchup_interval_secs = crate::SECS_PER_HOUR as u64; // long enough not to fire
6248        let bs = bootstrap_serve(&env.db_path, &args, &cfg).await.unwrap();
6249        assert!(bs.app_state.federation.is_some());
6250        for h in bs.task_handles {
6251            h.abort();
6252        }
6253    }
6254
6255    #[tokio::test]
6256    async fn test_bootstrap_serve_federation_invalid_peer_errors() {
6257        // FederationConfig::build returns Err on duplicate peer URLs
6258        // (#341). The bootstrap_serve `.context("federation config")`
6259        // wrap turns it into a daemon-startup error.
6260        let env = TestEnv::fresh();
6261        let mut cfg = AppConfig::default();
6262        cfg.tier = Some("keyword".to_string());
6263        let mut args = args_with_db(&env.db_path);
6264        args.quorum_writes = 1;
6265        args.quorum_peers = vec![
6266            "http://127.0.0.1:65532".to_string(),
6267            "http://127.0.0.1:65532/".to_string(), // duplicate after trim
6268        ];
6269        let res = bootstrap_serve(&env.db_path, &args, &cfg).await;
6270        let err = match res {
6271            Ok(_) => panic!("expected error from duplicate peer URLs"),
6272            Err(e) => e,
6273        };
6274        let s = format!("{err:#}");
6275        assert!(
6276            s.contains("federation") || s.contains("duplicate"),
6277            "got: {s}"
6278        );
6279    }
6280
6281    // ----- build_vector_index populated DB ------------------------------
6282
6283    #[test]
6284    fn test_build_vector_index_populated_db_returns_built_index() {
6285        // When the DB has stored embeddings AND the embedder is present,
6286        // `build_vector_index` should return Some(VectorIndex) populated
6287        // with those embeddings rather than an empty one.
6288        let env = TestEnv::fresh();
6289        let conn = db::open(&env.db_path).unwrap();
6290        // Insert one memory + an embedding via the public db helpers.
6291        let now = chrono::Utc::now().to_rfc3339();
6292        let mem = crate::models::Memory {
6293            id: uuid::Uuid::new_v4().to_string(),
6294            tier: crate::models::Tier::Mid,
6295            namespace: "ns".to_string(),
6296            title: "t".to_string(),
6297            content: "c".to_string(),
6298            tags: vec![],
6299            priority: 5,
6300            confidence: 1.0,
6301            source: "test".to_string(),
6302            access_count: 0,
6303            created_at: now.clone(),
6304            updated_at: now,
6305            last_accessed_at: None,
6306            expires_at: None,
6307            metadata: crate::models::default_metadata(),
6308            reflection_depth: 0,
6309            memory_kind: crate::models::MemoryKind::Observation,
6310            entity_id: None,
6311            persona_version: None,
6312            citations: Vec::new(),
6313            source_uri: None,
6314            source_span: None,
6315            confidence_source: crate::models::ConfidenceSource::CallerProvided,
6316            confidence_signals: None,
6317            confidence_decayed_at: None,
6318            version: 1,
6319        };
6320        let id = db::insert(&conn, &mem).unwrap();
6321        db::set_embedding(&conn, &id, &[1.0, 0.0, 0.0]).unwrap();
6322        let idx = build_vector_index(&conn, true).expect("populated index");
6323        assert!(
6324            idx.len() >= 1,
6325            "expected non-empty index, got len={}",
6326            idx.len()
6327        );
6328    }
6329
6330    // ----- #1579 B3: async boot HNSW loader ------------------------------
6331
6332    /// Boot-readiness contract: `spawn_vector_index_boot_load` returns
6333    /// immediately (the daemon can serve requests with the EMPTY
6334    /// index), the outer mutex stays responsive throughout the warm-up,
6335    /// and after the loader finishes the index covers every stored
6336    /// embedding and reports fully-searchable.
6337    #[tokio::test]
6338    async fn b3_1579_boot_loader_warms_index_off_the_startup_path() {
6339        let env = TestEnv::fresh();
6340        let conn = db::open(&env.db_path).unwrap();
6341        let now = chrono::Utc::now().to_rfc3339();
6342        let mut expected_ids = Vec::new();
6343        for i in 0..3 {
6344            let mem = crate::models::Memory {
6345                id: uuid::Uuid::new_v4().to_string(),
6346                tier: crate::models::Tier::Long,
6347                namespace: "ns-b3".to_string(),
6348                title: format!("warm-{i}"),
6349                content: format!("warm body {i}"),
6350                tags: vec![],
6351                priority: 5,
6352                confidence: 1.0,
6353                source: "test".to_string(),
6354                access_count: 0,
6355                created_at: now.clone(),
6356                updated_at: now.clone(),
6357                last_accessed_at: None,
6358                expires_at: None,
6359                metadata: crate::models::default_metadata(),
6360                reflection_depth: 0,
6361                memory_kind: crate::models::MemoryKind::Observation,
6362                entity_id: None,
6363                persona_version: None,
6364                citations: Vec::new(),
6365                source_uri: None,
6366                source_span: None,
6367                confidence_source: crate::models::ConfidenceSource::CallerProvided,
6368                confidence_signals: None,
6369                confidence_decayed_at: None,
6370                version: 1,
6371            };
6372            let id = db::insert(&conn, &mem).unwrap();
6373            let mut v = [0.0_f32; 3];
6374            v[i] = 1.0;
6375            db::set_embedding(&conn, &id, &v).unwrap();
6376            expected_ids.push(id);
6377        }
6378        drop(conn);
6379
6380        // The daemon-shaped state: empty index behind the AppState
6381        // mutex — exactly what `serve` now constructs before binding.
6382        let state: Arc<Mutex<Option<VectorIndex>>> =
6383            Arc::new(Mutex::new(Some(hnsw::VectorIndex::empty())));
6384        let handle = spawn_vector_index_boot_load(env.db_path.clone(), Arc::clone(&state));
6385
6386        // Readiness: the state is immediately lockable (no long-held
6387        // guard) — a request-path access during warm-up must not
6388        // deadlock or block on the graph build.
6389        {
6390            let guard = state.lock().await;
6391            assert!(
6392                guard.is_some(),
6393                "index present (possibly cold) during warm-up"
6394            );
6395        }
6396
6397        tokio::task::spawn_blocking(move || handle.join().expect("loader thread"))
6398            .await
6399            .expect("join task");
6400
6401        let guard = state.lock().await;
6402        let idx = guard.as_ref().expect("index");
6403        assert_eq!(idx.len(), 3, "every stored embedding seeded");
6404        assert!(
6405            idx.is_fully_searchable(),
6406            "loader must drive the #968 rebuild to a swapped-in graph"
6407        );
6408        let hits = idx.search(&[1.0, 0.0, 0.0], 1);
6409        assert_eq!(
6410            hits.first().map(|h| h.id.as_str()),
6411            Some(expected_ids[0].as_str()),
6412            "warmed index serves the seeded rows"
6413        );
6414    }
6415
6416    // ----- gc loop with non-empty side effect ---------------------------
6417    //
6418    // The existing `test_spawn_gc_loop_runs_and_can_be_aborted` only
6419    // covers the empty-DB path where db::gc returns 0. Seeding an expired
6420    // memory and pointing the gc loop at it lets the `Ok(n) if n > 0`
6421    // arm fire.
6422
6423    #[tokio::test(start_paused = true)]
6424    async fn test_spawn_gc_loop_purges_expired_memories() {
6425        let env = TestEnv::fresh();
6426        let conn = db::open(&env.db_path).unwrap();
6427        // Insert an expired memory (expires_at in the past).
6428        let past = (chrono::Utc::now() - chrono::Duration::days(1)).to_rfc3339();
6429        let now = chrono::Utc::now().to_rfc3339();
6430        let mem = crate::models::Memory {
6431            id: uuid::Uuid::new_v4().to_string(),
6432            tier: crate::models::Tier::Short,
6433            namespace: "ns-gc".to_string(),
6434            title: "stale".to_string(),
6435            content: "stale".to_string(),
6436            tags: vec![],
6437            priority: 1,
6438            confidence: 1.0,
6439            source: "test".to_string(),
6440            access_count: 0,
6441            created_at: now.clone(),
6442            updated_at: now,
6443            last_accessed_at: None,
6444            expires_at: Some(past),
6445            metadata: crate::models::default_metadata(),
6446            reflection_depth: 0,
6447            memory_kind: crate::models::MemoryKind::Observation,
6448            entity_id: None,
6449            persona_version: None,
6450            citations: Vec::new(),
6451            source_uri: None,
6452            source_span: None,
6453            confidence_source: crate::models::ConfidenceSource::CallerProvided,
6454            confidence_signals: None,
6455            confidence_decayed_at: None,
6456            version: 1,
6457        };
6458        db::insert(&conn, &mem).unwrap();
6459        drop(conn);
6460
6461        let conn = db::open(&env.db_path).unwrap();
6462        let state: Db = Arc::new(Mutex::new((
6463            conn,
6464            env.db_path.clone(),
6465            ResolvedTtl::default(),
6466            true,
6467        )));
6468        // archive_max_days=Some(1) lets the auto_purge_archive arm
6469        // execute too (covers the second match in the loop body).
6470        let h = spawn_gc_loop(state.clone(), Some(1), Duration::from_secs(60));
6471        // Advance past two full intervals to give both branches multiple
6472        // chances to log under paused time.
6473        tokio::time::advance(Duration::from_secs(61)).await;
6474        tokio::task::yield_now().await;
6475        tokio::time::advance(Duration::from_secs(61)).await;
6476        tokio::task::yield_now().await;
6477        h.abort();
6478        let _ = h.await;
6479    }
6480
6481    // ----- WAL checkpoint loop with measurable cycle --------------------
6482
6483    #[tokio::test(start_paused = true)]
6484    async fn test_spawn_wal_checkpoint_loop_runs_multiple_cycles() {
6485        let env = TestEnv::fresh();
6486        let conn = db::open(&env.db_path).unwrap();
6487        let state: Db = Arc::new(Mutex::new((
6488            conn,
6489            env.db_path.clone(),
6490            ResolvedTtl::default(),
6491            true,
6492        )));
6493        let h = spawn_wal_checkpoint_loop(state, Duration::from_secs(2));
6494        // First sleep is 1s (interval/2), then 2s per cycle. Advance
6495        // past three cycles.
6496        for _ in 0..4 {
6497            tokio::time::advance(Duration::from_secs(2)).await;
6498            tokio::task::yield_now().await;
6499        }
6500        h.abort();
6501        let _ = h.await;
6502    }
6503
6504    // ----- urlencoding_minimal -----------------------------------------
6505
6506    #[test]
6507    fn test_urlencoding_minimal_round_trip() {
6508        // Unreserved characters pass through unchanged.
6509        assert_eq!(urlencoding_minimal("abcXYZ-_.~"), "abcXYZ-_.~");
6510        assert_eq!(urlencoding_minimal("0123456789"), "0123456789");
6511        // Reserved / unsafe characters are percent-encoded.
6512        assert_eq!(urlencoding_minimal("a:b"), "a%3Ab");
6513        assert_eq!(urlencoding_minimal("a/b"), "a%2Fb");
6514        assert_eq!(urlencoding_minimal("a@b"), "a%40b");
6515        assert_eq!(urlencoding_minimal("a+b"), "a%2Bb");
6516        assert_eq!(urlencoding_minimal(" "), "%20");
6517        // Empty string is empty.
6518        assert_eq!(urlencoding_minimal(""), "");
6519        // RFC3339 timestamp shape (sync-daemon real input).
6520        assert_eq!(
6521            urlencoding_minimal("2024-01-02T03:04:05+00:00"),
6522            "2024-01-02T03%3A04%3A05%2B00%3A00"
6523        );
6524    }
6525
6526    // ----- run() dispatch for read-only commands ------------------------
6527    //
6528    // Each test parses a CLI argv via clap, hands the resulting `Cli`
6529    // to `daemon_runtime::run`, and asserts the dispatch path returned
6530    // Ok. We don't assert on stdout because run() writes to the
6531    // process stdout directly — what we care about for coverage is
6532    // that the match arm executed and the inner cli handler returned.
6533
6534    fn no_config_env() -> std::sync::MutexGuard<'static, ()> {
6535        // run() reads `AI_MEMORY_NO_CONFIG` indirectly via the AppConfig
6536        // we pass. We don't rely on the env directly here, but holding
6537        // env_var_lock keeps run() tests serialized so they don't race
6538        // on stdout / global subscribers.
6539        env_var_lock()
6540    }
6541
6542    #[tokio::test]
6543    async fn test_run_dispatch_stats_command() {
6544        let _g = no_config_env();
6545        let env = TestEnv::fresh();
6546        let cfg = AppConfig::default();
6547        let cli =
6548            Cli::try_parse_from(["ai-memory", "--db", env.db_path.to_str().unwrap(), "stats"])
6549                .unwrap();
6550        run(cli, &cfg).await.unwrap();
6551    }
6552
6553    #[tokio::test]
6554    async fn test_run_dispatch_namespaces_command() {
6555        let _g = no_config_env();
6556        let env = TestEnv::fresh();
6557        let cfg = AppConfig::default();
6558        let cli = Cli::try_parse_from([
6559            "ai-memory",
6560            "--db",
6561            env.db_path.to_str().unwrap(),
6562            "namespaces",
6563        ])
6564        .unwrap();
6565        run(cli, &cfg).await.unwrap();
6566    }
6567
6568    #[tokio::test]
6569    async fn test_run_dispatch_export_command() {
6570        let _g = no_config_env();
6571        let env = TestEnv::fresh();
6572        let cfg = AppConfig::default();
6573        let cli =
6574            Cli::try_parse_from(["ai-memory", "--db", env.db_path.to_str().unwrap(), "export"])
6575                .unwrap();
6576        run(cli, &cfg).await.unwrap();
6577    }
6578
6579    #[tokio::test]
6580    async fn test_run_dispatch_list_command() {
6581        let _g = no_config_env();
6582        let env = TestEnv::fresh();
6583        let cfg = AppConfig::default();
6584        let cli = Cli::try_parse_from(["ai-memory", "--db", env.db_path.to_str().unwrap(), "list"])
6585            .unwrap();
6586        run(cli, &cfg).await.unwrap();
6587    }
6588
6589    #[tokio::test]
6590    async fn test_run_dispatch_search_command() {
6591        let _g = no_config_env();
6592        let env = TestEnv::fresh();
6593        let cfg = AppConfig::default();
6594        let cli = Cli::try_parse_from([
6595            "ai-memory",
6596            "--db",
6597            env.db_path.to_str().unwrap(),
6598            "search",
6599            "anyq",
6600        ])
6601        .unwrap();
6602        run(cli, &cfg).await.unwrap();
6603    }
6604
6605    #[tokio::test]
6606    async fn test_run_dispatch_archive_list_command() {
6607        let _g = no_config_env();
6608        let env = TestEnv::fresh();
6609        let cfg = AppConfig::default();
6610        let cli = Cli::try_parse_from([
6611            "ai-memory",
6612            "--db",
6613            env.db_path.to_str().unwrap(),
6614            "archive",
6615            "list",
6616        ])
6617        .unwrap();
6618        run(cli, &cfg).await.unwrap();
6619    }
6620
6621    #[tokio::test]
6622    async fn test_run_dispatch_agents_list_command() {
6623        let _g = no_config_env();
6624        let env = TestEnv::fresh();
6625        let cfg = AppConfig::default();
6626        let cli = Cli::try_parse_from([
6627            "ai-memory",
6628            "--db",
6629            env.db_path.to_str().unwrap(),
6630            "agents",
6631            "list",
6632        ])
6633        .unwrap();
6634        run(cli, &cfg).await.unwrap();
6635    }
6636
6637    #[tokio::test]
6638    async fn test_run_dispatch_pending_list_command() {
6639        let _g = no_config_env();
6640        let env = TestEnv::fresh();
6641        let cfg = AppConfig::default();
6642        let cli = Cli::try_parse_from([
6643            "ai-memory",
6644            "--db",
6645            env.db_path.to_str().unwrap(),
6646            "pending",
6647            "list",
6648        ])
6649        .unwrap();
6650        run(cli, &cfg).await.unwrap();
6651    }
6652
6653    #[tokio::test]
6654    async fn test_run_dispatch_completions_command() {
6655        let _g = no_config_env();
6656        let env = TestEnv::fresh();
6657        let cfg = AppConfig::default();
6658        let cli = Cli::try_parse_from([
6659            "ai-memory",
6660            "--db",
6661            env.db_path.to_str().unwrap(),
6662            "completions",
6663            "bash",
6664        ])
6665        .unwrap();
6666        run(cli, &cfg).await.unwrap();
6667    }
6668
6669    #[tokio::test]
6670    async fn test_run_dispatch_man_command() {
6671        let _g = no_config_env();
6672        let env = TestEnv::fresh();
6673        let cfg = AppConfig::default();
6674        let cli = Cli::try_parse_from(["ai-memory", "--db", env.db_path.to_str().unwrap(), "man"])
6675            .unwrap();
6676        run(cli, &cfg).await.unwrap();
6677    }
6678
6679    #[tokio::test]
6680    async fn test_run_dispatch_gc_triggers_post_run_checkpoint() {
6681        // `Gc` is in is_write_command, so result.is_ok() && Some path
6682        // takes the post-run WAL checkpoint branch (lines 638-644).
6683        let _g = no_config_env();
6684        let env = TestEnv::fresh();
6685        let cfg = AppConfig::default();
6686        let cli = Cli::try_parse_from(["ai-memory", "--db", env.db_path.to_str().unwrap(), "gc"])
6687            .unwrap();
6688        run(cli, &cfg).await.unwrap();
6689    }
6690
6691    #[tokio::test]
6692    async fn test_run_dispatch_resolve_command() {
6693        // Seed two memories, then resolve one as superseding the other.
6694        let _g = no_config_env();
6695        let env = TestEnv::fresh();
6696        let id_a = crate::cli::test_utils::seed_memory(&env.db_path, "ns", "old", "old fact");
6697        let id_b = crate::cli::test_utils::seed_memory(&env.db_path, "ns", "new", "new fact");
6698        let cfg = AppConfig::default();
6699        let cli = Cli::try_parse_from([
6700            "ai-memory",
6701            "--db",
6702            env.db_path.to_str().unwrap(),
6703            "resolve",
6704            &id_a,
6705            &id_b,
6706        ])
6707        .unwrap();
6708        run(cli, &cfg).await.unwrap();
6709    }
6710
6711    #[tokio::test]
6712    async fn test_run_dispatch_get_command() {
6713        let _g = no_config_env();
6714        let env = TestEnv::fresh();
6715        let id = crate::cli::test_utils::seed_memory(&env.db_path, "ns", "t", "c");
6716        let cfg = AppConfig::default();
6717        let cli = Cli::try_parse_from([
6718            "ai-memory",
6719            "--db",
6720            env.db_path.to_str().unwrap(),
6721            "get",
6722            &id,
6723        ])
6724        .unwrap();
6725        run(cli, &cfg).await.unwrap();
6726    }
6727
6728    /// v0.7.0 V-4 closeout (#698) — dispatch coverage for the new
6729    /// `verify-signed-events-chain` subcommand. We don't tamper here
6730    /// (the lib-side test suite owns that property); the goal is to
6731    /// exercise the dispatch arm so a `cargo llvm-cov` pass over the
6732    /// daemon_runtime module sees it. On an empty DB the chain holds
6733    /// vacuously and the subcommand exits 0, so `run()` returns
6734    /// Ok(()).
6735    #[tokio::test]
6736    async fn test_run_dispatch_verify_signed_events_chain_command() {
6737        let _g = no_config_env();
6738        let env = TestEnv::fresh();
6739        let cfg = AppConfig::default();
6740        let cli = Cli::try_parse_from([
6741            "ai-memory",
6742            "--db",
6743            env.db_path.to_str().unwrap(),
6744            "verify-signed-events-chain",
6745        ])
6746        .unwrap();
6747        run(cli, &cfg).await.unwrap();
6748    }
6749
6750    #[tokio::test]
6751    async fn test_run_dispatch_promote_triggers_write_checkpoint() {
6752        // `Promote` is in is_write_command — covers the post-run
6753        // checkpoint branch on a different command.
6754        let _g = no_config_env();
6755        let env = TestEnv::fresh();
6756        let id = crate::cli::test_utils::seed_memory(&env.db_path, "ns", "t", "c");
6757        let cfg = AppConfig::default();
6758        let cli = Cli::try_parse_from([
6759            "ai-memory",
6760            "--db",
6761            env.db_path.to_str().unwrap(),
6762            "promote",
6763            &id,
6764        ])
6765        .unwrap();
6766        run(cli, &cfg).await.unwrap();
6767    }
6768
6769    // ----- run() dispatch for bench (cmd_bench end-to-end) --------------
6770
6771    #[tokio::test]
6772    async fn test_run_dispatch_bench_smoke_runs_one_iteration() {
6773        // iterations=1, warmup=0 keeps the workload tiny. The bench
6774        // body builds an in-memory DB internally — no on-disk side
6775        // effects. Covers cmd_bench from top to bottom on the
6776        // human-readable, no-baseline, no-history path.
6777        let _g = no_config_env();
6778        let env = TestEnv::fresh();
6779        let cfg = AppConfig::default();
6780        let cli = Cli::try_parse_from([
6781            "ai-memory",
6782            "--db",
6783            env.db_path.to_str().unwrap(),
6784            "bench",
6785            "--iterations",
6786            "1",
6787            "--warmup",
6788            "0",
6789        ])
6790        .unwrap();
6791        // Bench may fail the budget on a paused-time iter=1 run; we
6792        // accept either Ok or Err here — coverage is the goal.
6793        let _ = run(cli, &cfg).await;
6794    }
6795
6796    #[tokio::test]
6797    async fn test_run_dispatch_bench_json_with_history() {
6798        // Covers --json branch + --history append branch of cmd_bench.
6799        let _g = no_config_env();
6800        let env = TestEnv::fresh();
6801        let history = env.db_path.with_file_name("hist.jsonl");
6802        let cfg = AppConfig::default();
6803        let cli = Cli::try_parse_from([
6804            "ai-memory",
6805            "--db",
6806            env.db_path.to_str().unwrap(),
6807            "bench",
6808            "--iterations",
6809            "1",
6810            "--warmup",
6811            "0",
6812            "--json",
6813            "--history",
6814            history.to_str().unwrap(),
6815        ])
6816        .unwrap();
6817        let _ = run(cli, &cfg).await;
6818        // History file should now exist with at least one line.
6819        if history.exists() {
6820            let content = std::fs::read_to_string(&history).unwrap();
6821            assert!(content.contains("captured_at") || !content.is_empty());
6822        }
6823    }
6824
6825    // ----- run() dispatch for migrate (sal feature) --------------------
6826
6827    #[cfg(feature = "sal")]
6828    #[tokio::test]
6829    async fn test_run_dispatch_migrate_sqlite_to_sqlite_dry_run() {
6830        // Covers cmd_migrate happy path + dry-run / human-output branch.
6831        let _g = no_config_env();
6832        let src_env = TestEnv::fresh();
6833        let dst_env = TestEnv::fresh();
6834        // Seed source so migrate has work to do.
6835        crate::cli::test_utils::seed_memory(&src_env.db_path, "ns-mig", "t", "c");
6836        let from = format!("sqlite://{}", src_env.db_path.display());
6837        let to = format!("sqlite://{}", dst_env.db_path.display());
6838        let cfg = AppConfig::default();
6839        let cli = Cli::try_parse_from([
6840            "ai-memory",
6841            "--db",
6842            src_env.db_path.to_str().unwrap(),
6843            "migrate",
6844            "--from",
6845            &from,
6846            "--to",
6847            &to,
6848            "--dry-run",
6849        ])
6850        .unwrap();
6851        run(cli, &cfg).await.unwrap();
6852    }
6853
6854    #[cfg(feature = "sal")]
6855    #[tokio::test]
6856    async fn test_run_dispatch_migrate_json_output() {
6857        // Covers cmd_migrate --json branch.
6858        let _g = no_config_env();
6859        let src_env = TestEnv::fresh();
6860        let dst_env = TestEnv::fresh();
6861        crate::cli::test_utils::seed_memory(&src_env.db_path, "ns-mig", "t", "c");
6862        let from = format!("sqlite://{}", src_env.db_path.display());
6863        let to = format!("sqlite://{}", dst_env.db_path.display());
6864        let cfg = AppConfig::default();
6865        let cli = Cli::try_parse_from([
6866            "ai-memory",
6867            "--db",
6868            src_env.db_path.to_str().unwrap(),
6869            "migrate",
6870            "--from",
6871            &from,
6872            "--to",
6873            &to,
6874            "--json",
6875        ])
6876        .unwrap();
6877        run(cli, &cfg).await.unwrap();
6878    }
6879
6880    // ----- run() with passphrase file (covers lines 372-374) ------------
6881
6882    #[tokio::test]
6883    async fn test_run_with_db_passphrase_file_exports_env() {
6884        // Covers the `--db-passphrase-file` branch in run() (lines
6885        // 371-375) which calls passphrase_from_file then sets
6886        // AI_MEMORY_DB_PASSPHRASE in the environment.
6887        let _g = env_var_lock();
6888        // SAFETY: serialized via env_var_lock.
6889        unsafe { std::env::remove_var("AI_MEMORY_DB_PASSPHRASE") };
6890        let env = TestEnv::fresh();
6891        let pass_path = env.db_path.with_file_name("pass");
6892        std::fs::write(&pass_path, "test-passphrase\n").unwrap();
6893        // v0.7.0 #1055 — the production `passphrase_from_file` gate
6894        // rejects group/world-readable passphrase files; mirror the
6895        // operator-side 0400 mode here.
6896        #[cfg(unix)]
6897        {
6898            use std::os::unix::fs::PermissionsExt;
6899            std::fs::set_permissions(&pass_path, std::fs::Permissions::from_mode(0o400)).unwrap();
6900        }
6901        let cfg = AppConfig::default();
6902        let cli = Cli::try_parse_from([
6903            "ai-memory",
6904            "--db",
6905            env.db_path.to_str().unwrap(),
6906            "--db-passphrase-file",
6907            pass_path.to_str().unwrap(),
6908            "stats",
6909        ])
6910        .unwrap();
6911        run(cli, &cfg).await.unwrap();
6912        // Env var is now set.
6913        assert_eq!(
6914            std::env::var("AI_MEMORY_DB_PASSPHRASE").unwrap(),
6915            "test-passphrase"
6916        );
6917        // SAFETY: serialized via env_var_lock.
6918        unsafe { std::env::remove_var("AI_MEMORY_DB_PASSPHRASE") };
6919    }
6920
6921    // ----- init_tracing idempotence ------------------------------------
6922
6923    #[test]
6924    fn test_init_tracing_is_idempotent() {
6925        // Covers init_tracing — second call is a harmless no-op
6926        // (try_init returns Err which we ignore). Calling twice from
6927        // the same test exercises the second-call path on a process
6928        // that may or may not already have a global subscriber.
6929        init_tracing();
6930        init_tracing();
6931    }
6932
6933    // ----- serve_http_with_shutdown_future smoke -----------------------
6934    //
6935    // The non-TLS branch of `serve()` delegates here; cover the body
6936    // by binding to a free port, requesting /health, then shutting
6937    // down. This also covers the production code path that
6938    // `daemon_runtime::serve()` uses for the non-TLS case.
6939
6940    #[tokio::test]
6941    async fn test_serve_http_with_shutdown_future_serves_then_stops() {
6942        let env = TestEnv::fresh();
6943        let app_state = keyword_app_state(&env.db_path);
6944        let api_key_state = ApiKeyState {
6945            key: None,
6946            mtls_enforced: false,
6947        };
6948        // Pick a free port via a transient bind.
6949        let port = {
6950            let l = std::net::TcpListener::bind("127.0.0.1:0").unwrap();
6951            let p = l.local_addr().unwrap().port();
6952            drop(l);
6953            p
6954        };
6955        let addr = format!("127.0.0.1:{port}");
6956        let shutdown = Arc::new(Notify::new());
6957        let shutdown_clone = shutdown.clone();
6958        let handle = tokio::spawn(async move {
6959            serve_http_with_shutdown_future(&addr, api_key_state, app_state, async move {
6960                shutdown_clone.notified().await;
6961            })
6962            .await
6963        });
6964        // Give the server a moment to bind, then poke /health.
6965        for _ in 0..40 {
6966            if let Ok(client) = reqwest::Client::builder()
6967                .timeout(Duration::from_millis(200))
6968                .build()
6969                && client
6970                    .get(format!("http://127.0.0.1:{port}/api/v1/health"))
6971                    .send()
6972                    .await
6973                    .is_ok()
6974            {
6975                break;
6976            }
6977            tokio::time::sleep(Duration::from_millis(50)).await;
6978        }
6979        shutdown.notify_one();
6980        let res = handle.await.unwrap();
6981        assert!(res.is_ok(), "serve future returned: {res:?}");
6982    }
6983
6984    // ----- bind error surfacing ----------------------------------------
6985
6986    #[tokio::test]
6987    async fn test_serve_http_with_shutdown_future_bind_failure_errors() {
6988        // An unbindable address (port 1 on Linux/macOS without root)
6989        // should return an Err with the bind context. This covers the
6990        // `with_context` path on the TcpListener::bind line.
6991        let env = TestEnv::fresh();
6992        let app_state = keyword_app_state(&env.db_path);
6993        let api_key_state = ApiKeyState {
6994            key: None,
6995            mtls_enforced: false,
6996        };
6997        // 0.0.0.0:0 succeeds; we want a guaranteed failure. Bind to
6998        // port 1 which requires privileged perms — except on macOS in
6999        // some configs that may succeed. Use a clearly invalid address
7000        // form instead to force a bind-time error.
7001        let res = serve_http_with_shutdown_future(
7002            "definitely-not-an-address:99999",
7003            api_key_state,
7004            app_state,
7005            async {},
7006        )
7007        .await;
7008        assert!(res.is_err(), "expected bind error, got: {res:?}");
7009    }
7010
7011    // ----- v0.7.0 coverage close: dispatch arms for identity/rules/governance ---
7012    //
7013    // The grand-slam integration cascade lifted coverage uniformly except
7014    // for a handful of CLI dispatch arms in `run()` that no run-dispatch
7015    // test had ever entered: `Command::Identity`, `Command::Rules`,
7016    // `Command::Governance`. Each arm is just the stdout/stderr-lock
7017    // boilerplate + a one-line hand-off to the relevant `cli::*::run`
7018    // handler — those handlers already have their own unit tests under
7019    // `src/cli/identity.rs`, `src/cli/rules.rs`,
7020    // `src/cli/governance_migrate.rs`. The missing piece was the dispatch
7021    // boilerplate itself. These three tests exercise the read-only
7022    // (mutation-free, hermetic) verb of each arm so coverage closes
7023    // without adding any production semantics.
7024
7025    #[tokio::test]
7026    async fn test_run_dispatch_identity_list_command() {
7027        // Covers daemon_runtime::run dispatch arm `Command::Identity(a)`:
7028        // exercises the stdout/stderr lock + `cli::identity::run` hand-off.
7029        // `identity list` is read-only and DB-free; passing an empty
7030        // tempdir as --key-dir keeps the test hermetic (no HOME deps).
7031        let _g = no_config_env();
7032        let env = TestEnv::fresh();
7033        let key_dir = env.db_path.parent().unwrap().join("keys");
7034        std::fs::create_dir_all(&key_dir).unwrap();
7035        let cfg = AppConfig::default();
7036        let cli = Cli::try_parse_from([
7037            "ai-memory",
7038            "--db",
7039            env.db_path.to_str().unwrap(),
7040            "identity",
7041            "--key-dir",
7042            key_dir.to_str().unwrap(),
7043            "list",
7044        ])
7045        .unwrap();
7046        run(cli, &cfg).await.unwrap();
7047    }
7048
7049    #[tokio::test]
7050    async fn test_run_dispatch_rules_list_command() {
7051        // Covers daemon_runtime::run dispatch arm `Command::Rules(a)`:
7052        // exercises the stdout/stderr lock + `cli::rules::run` hand-off.
7053        // `rules list` is the documented read-only verb (no operator key
7054        // required per the module-level docstring of src/cli/rules.rs).
7055        // We open the DB once via `db::open` to materialize the full
7056        // schema (including the `governance_rules` table that migration
7057        // 0024 creates + seeds), then let the run() dispatch open its
7058        // own raw rusqlite connection against the same file.
7059        let _g = no_config_env();
7060        let env = TestEnv::fresh();
7061        drop(crate::db::open(&env.db_path).expect("db::open"));
7062        let key_dir = env.db_path.parent().unwrap().join("keys");
7063        std::fs::create_dir_all(&key_dir).unwrap();
7064        let cfg = AppConfig::default();
7065        let cli = Cli::try_parse_from([
7066            "ai-memory",
7067            "--db",
7068            env.db_path.to_str().unwrap(),
7069            "rules",
7070            "--key-dir",
7071            key_dir.to_str().unwrap(),
7072            "list",
7073        ])
7074        .unwrap();
7075        run(cli, &cfg).await.unwrap();
7076    }
7077
7078    #[tokio::test]
7079    async fn test_run_dispatch_governance_migrate_command() {
7080        // Covers daemon_runtime::run dispatch arm `Command::Governance(a)`
7081        // (including the inner `GovernanceAction::MigrateToPermissions`
7082        // match arm): exercises the stdout/stderr lock +
7083        // `cli::governance_migrate::run` hand-off. Dry-run is the
7084        // documented default, so we omit --config-out; the migrator
7085        // reads --config-in, parses the legacy `[governance]` block,
7086        // renders the v0.7 `[[permissions.rules]]` to stdout, and
7087        // returns Ok. No filesystem mutation outside the tempdir.
7088        let _g = no_config_env();
7089        let env = TestEnv::fresh();
7090        let cfg_path = env.db_path.parent().unwrap().join("legacy_cfg.toml");
7091        std::fs::write(
7092            &cfg_path,
7093            r#"
7094[governance]
7095
7096[[governance.policy]]
7097scope = "team/eng/*"
7098action = "write"
7099role = "engineer"
7100decision = "allow"
7101"#,
7102        )
7103        .unwrap();
7104        let cfg = AppConfig::default();
7105        let cli = Cli::try_parse_from([
7106            "ai-memory",
7107            "--db",
7108            env.db_path.to_str().unwrap(),
7109            "governance",
7110            "migrate-to-permissions",
7111            "--config-in",
7112            cfg_path.to_str().unwrap(),
7113        ])
7114        .unwrap();
7115        run(cli, &cfg).await.unwrap();
7116    }
7117
7118    // ----- v0.7.0 coverage close: fold-A2A1.4 mTLS bypass on /sync/* ----
7119    //
7120    // The grand-slam cascade landed `e188503` (fold-A2A1.4) which added 61
7121    // lines to `daemon_runtime.rs`: the `mtls_enforced` computation in
7122    // `bootstrap_serve` (true iff all of `--tls-cert`, `--tls-key`, and
7123    // `--mtls-allowlist` are set), the threaded api-key into
7124    // `FederationConfig::build`, and the differentiated tracing message
7125    // when api-key auth is enabled alongside mTLS. The post-cascade
7126    // coverage gate (run 25892100734) caught the regression at 85.60% on
7127    // `daemon_runtime.rs` — below the 86 floor — because the new
7128    // mtls_enforced=true branch + the bypass exit path through the
7129    // router were never entered by an existing test.
7130    //
7131    // The tests below close the gap by:
7132    //   1. Bootstrapping with all three TLS args set + api_key set so the
7133    //      `if mtls_enforced { tracing::info!(...federation endpoints...) }`
7134    //      branch executes and `api_key_state.mtls_enforced` is observed
7135    //      as true on the returned `ServeBootstrap`.
7136    //   2. Bootstrapping with the half-configured cases (cert+key, no
7137    //      allowlist; allowlist alone) to pin the AND-short-circuit on
7138    //      the `mtls_enforced` predicate.
7139    //   3. Driving the `build_router`-wired `api_key_auth` middleware
7140    //      through `daemon_runtime::build_router` with
7141    //      `mtls_enforced=true` so the `/api/v1/sync/...` bypass path is
7142    //      exercised, and asserting a non-`/sync/` path still 401s
7143    //      without the header.
7144    //
7145    // All hermetic: bootstrap_serve does NOT load the TLS cert / key /
7146    // allowlist files (that happens in `serve()` at the rustls config
7147    // site, after this struct is built), so passing non-existent paths
7148    // is sufficient to flip `mtls_enforced` to true without writing
7149    // real certificates.
7150
7151    #[tokio::test]
7152    async fn test_bootstrap_serve_mtls_enforced_true_with_all_three_tls_args() {
7153        // Covers `let mtls_enforced = ... && ... && ...` with the all-Some
7154        // case (true branch). Paired with `api_key = Some(...)` so the
7155        // outer `if api_key_state.key.is_some()` also fires and the
7156        // `if mtls_enforced { ... } else { ... }` chooses the
7157        // federation-bypass log message.
7158        let env = TestEnv::fresh();
7159        let mut cfg = AppConfig::default();
7160        cfg.tier = Some("keyword".to_string());
7161        cfg.api_key = Some("s3cret".to_string());
7162        let mut args = args_with_db(&env.db_path);
7163        // Paths don't need to exist — bootstrap_serve only inspects
7164        // Option presence to compute `mtls_enforced`. The rustls config
7165        // load that would actually read these files lives in `serve()`,
7166        // which we are NOT calling here.
7167        let cert_path = env.db_path.parent().unwrap().join("cert.pem");
7168        let key_path = env.db_path.parent().unwrap().join("key.pem");
7169        let allowlist_path = env.db_path.parent().unwrap().join("allowlist.json");
7170        args.tls_cert = Some(cert_path);
7171        args.tls_key = Some(key_path);
7172        args.mtls_allowlist = Some(allowlist_path);
7173        let bs = bootstrap_serve(&env.db_path, &args, &cfg).await.unwrap();
7174        assert!(
7175            bs.api_key_state.mtls_enforced,
7176            "mtls_enforced should be true when cert+key+allowlist all set"
7177        );
7178        assert_eq!(bs.api_key_state.key.as_deref(), Some("s3cret"));
7179        for h in bs.task_handles {
7180            h.abort();
7181        }
7182    }
7183
7184    #[tokio::test]
7185    async fn test_bootstrap_serve_mtls_enforced_false_when_allowlist_absent() {
7186        // Covers the AND short-circuit: cert+key set, allowlist None →
7187        // `mtls_enforced = false`. This is the TLS-but-no-mTLS
7188        // half-configured case (the `tracing::warn!("TLS enabled but
7189        // mTLS NOT configured …")` path in `serve()`). Bootstrap_serve
7190        // itself just records the flag as false; the `else` arm of the
7191        // api-key log fires.
7192        let env = TestEnv::fresh();
7193        let mut cfg = AppConfig::default();
7194        cfg.tier = Some("keyword".to_string());
7195        cfg.api_key = Some("only-tls".to_string());
7196        let mut args = args_with_db(&env.db_path);
7197        args.tls_cert = Some(env.db_path.parent().unwrap().join("cert.pem"));
7198        args.tls_key = Some(env.db_path.parent().unwrap().join("key.pem"));
7199        // mtls_allowlist intentionally left None.
7200        let bs = bootstrap_serve(&env.db_path, &args, &cfg).await.unwrap();
7201        assert!(
7202            !bs.api_key_state.mtls_enforced,
7203            "mtls_enforced should be false without --mtls-allowlist"
7204        );
7205        assert_eq!(bs.api_key_state.key.as_deref(), Some("only-tls"));
7206        for h in bs.task_handles {
7207            h.abort();
7208        }
7209    }
7210
7211    #[tokio::test]
7212    async fn test_bootstrap_serve_mtls_enforced_false_when_only_allowlist_set() {
7213        // Covers the AND short-circuit: cert/key None, allowlist Some →
7214        // false. (clap's `requires = "tls_cert"` would block this combo
7215        // at the CLI surface, but we're constructing `ServeArgs`
7216        // directly here so the inner predicate is the only gate. This
7217        // pins the predicate behaviour even if a refactor moves the
7218        // validation back to the call site.)
7219        let env = TestEnv::fresh();
7220        let mut cfg = AppConfig::default();
7221        cfg.tier = Some("keyword".to_string());
7222        let mut args = args_with_db(&env.db_path);
7223        args.mtls_allowlist = Some(env.db_path.parent().unwrap().join("allowlist.json"));
7224        // tls_cert and tls_key intentionally None.
7225        let bs = bootstrap_serve(&env.db_path, &args, &cfg).await.unwrap();
7226        assert!(
7227            !bs.api_key_state.mtls_enforced,
7228            "mtls_enforced should be false without --tls-cert"
7229        );
7230        for h in bs.task_handles {
7231            h.abort();
7232        }
7233    }
7234
7235    #[tokio::test]
7236    async fn test_bootstrap_serve_mtls_enforced_with_federation_threads_api_key() {
7237        // Joint exercise of the two fold-A2A1.4 surfaces in one
7238        // bootstrap: federation outbound carries the configured
7239        // `[api] api_key` (line ~2155, `app_config.api_key.clone()` into
7240        // `FederationConfig::build`) AND `mtls_enforced` is true.
7241        // Confirms both the api_key thread-through and the new tracing
7242        // message are activated together — the exact procurement-grade
7243        // deployment shape #702 was filed for.
7244        let env = TestEnv::fresh();
7245        let mut cfg = AppConfig::default();
7246        cfg.tier = Some("keyword".to_string());
7247        cfg.api_key = Some("fed-key".to_string());
7248        let mut args = args_with_db(&env.db_path);
7249        args.tls_cert = Some(env.db_path.parent().unwrap().join("cert.pem"));
7250        args.tls_key = Some(env.db_path.parent().unwrap().join("key.pem"));
7251        args.mtls_allowlist = Some(env.db_path.parent().unwrap().join("allowlist.json"));
7252        args.quorum_writes = 1;
7253        args.quorum_peers = vec!["http://127.0.0.1:65520".to_string()];
7254        args.quorum_timeout_ms = 100;
7255        let bs = bootstrap_serve(&env.db_path, &args, &cfg).await.unwrap();
7256        assert!(bs.api_key_state.mtls_enforced);
7257        assert_eq!(bs.api_key_state.key.as_deref(), Some("fed-key"));
7258        assert!(
7259            bs.app_state.federation.is_some(),
7260            "federation should be wired when quorum_writes>0 and peers nonempty"
7261        );
7262        for h in bs.task_handles {
7263            h.abort();
7264        }
7265    }
7266
7267    // ----- v0.7.0 coverage close: api_key_auth bypass through build_router ---
7268    //
7269    // Drives the `api_key_auth` middleware path with `mtls_enforced=true`
7270    // and a configured key. Two probes:
7271    //   - `/api/v1/sync/push` without `x-api-key` should be admitted to
7272    //     the handler stack (the federation-bypass arm). The handler
7273    //     itself rejects on payload shape, but the status is not 401 —
7274    //     proving the bypass fired.
7275    //   - `/api/v1/memories` without `x-api-key` should still 401, since
7276    //     the bypass is scoped to `/api/v1/sync/*`.
7277
7278    #[tokio::test]
7279    async fn test_build_router_with_mtls_enforced_allows_sync_without_api_key() {
7280        let env = TestEnv::fresh();
7281        let app_state = keyword_app_state(&env.db_path);
7282        let api_key_state = ApiKeyState {
7283            key: Some("s3cret".to_string()),
7284            mtls_enforced: true,
7285        };
7286        let router = build_router(app_state, api_key_state);
7287        // POST /api/v1/sync/push with empty body — the api_key_auth
7288        // middleware should NOT 401 (bypass scope hit). The downstream
7289        // handler will likely return 400/415/422 for a malformed body;
7290        // anything other than 401 proves the bypass executed.
7291        let resp = router
7292            .oneshot(
7293                Request::builder()
7294                    .method("POST")
7295                    .uri("/api/v1/sync/push")
7296                    .header(crate::HEADER_CONTENT_TYPE, crate::MIME_JSON)
7297                    .body(Body::from("{}"))
7298                    .unwrap(),
7299            )
7300            .await
7301            .unwrap();
7302        assert_ne!(
7303            resp.status(),
7304            StatusCode::UNAUTHORIZED,
7305            "expected /sync/* to bypass api-key with mtls_enforced=true, got 401"
7306        );
7307    }
7308
7309    #[tokio::test]
7310    async fn test_build_router_with_mtls_enforced_still_requires_key_on_non_sync() {
7311        let env = TestEnv::fresh();
7312        let app_state = keyword_app_state(&env.db_path);
7313        let api_key_state = ApiKeyState {
7314            key: Some("s3cret".to_string()),
7315            mtls_enforced: true,
7316        };
7317        let router = build_router(app_state, api_key_state);
7318        // GET /api/v1/memories without x-api-key — bypass is scoped to
7319        // /api/v1/sync/*, so this should still 401.
7320        let resp = router
7321            .oneshot(
7322                Request::builder()
7323                    .method("GET")
7324                    .uri("/api/v1/memories")
7325                    .body(Body::empty())
7326                    .unwrap(),
7327            )
7328            .await
7329            .unwrap();
7330        assert_eq!(
7331            resp.status(),
7332            StatusCode::UNAUTHORIZED,
7333            "non-/sync/ path must still demand x-api-key even with mtls_enforced"
7334        );
7335    }
7336
7337    #[tokio::test]
7338    async fn test_build_router_with_mtls_off_does_not_bypass_sync() {
7339        // Pins the negative: mtls_enforced=false → /sync/* WITHOUT the
7340        // header still gets 401. This is the v0.6.x backward-compatible
7341        // posture (api-key required on every path when set, no bypass).
7342        let env = TestEnv::fresh();
7343        let app_state = keyword_app_state(&env.db_path);
7344        let api_key_state = ApiKeyState {
7345            key: Some("s3cret".to_string()),
7346            mtls_enforced: false,
7347        };
7348        let router = build_router(app_state, api_key_state);
7349        let resp = router
7350            .oneshot(
7351                Request::builder()
7352                    .method("POST")
7353                    .uri("/api/v1/sync/push")
7354                    .header(crate::HEADER_CONTENT_TYPE, crate::MIME_JSON)
7355                    .body(Body::from("{}"))
7356                    .unwrap(),
7357            )
7358            .await
7359            .unwrap();
7360        assert_eq!(
7361            resp.status(),
7362            StatusCode::UNAUTHORIZED,
7363            "without mtls_enforced, /sync/* must still demand x-api-key"
7364        );
7365    }
7366
7367    #[tokio::test]
7368    async fn test_build_router_with_mtls_enforced_accepts_valid_key_on_non_sync() {
7369        // Defense-in-depth: even with mtls_enforced=true, supplying the
7370        // correct key on a non-/sync/ path still succeeds. Pins that
7371        // the bypass branch does not steal requests that legitimately
7372        // carry the header.
7373        let env = TestEnv::fresh();
7374        let app_state = keyword_app_state(&env.db_path);
7375        let api_key_state = ApiKeyState {
7376            key: Some("s3cret".to_string()),
7377            mtls_enforced: true,
7378        };
7379        let router = build_router(app_state, api_key_state);
7380        let resp = router
7381            .oneshot(
7382                Request::builder()
7383                    .method("GET")
7384                    .uri("/api/v1/memories")
7385                    .header("x-api-key", "s3cret")
7386                    .body(Body::empty())
7387                    .unwrap(),
7388            )
7389            .await
7390            .unwrap();
7391        assert!(
7392            resp.status().is_success(),
7393            "valid api-key on non-/sync/ path should succeed, got {}",
7394            resp.status()
7395        );
7396    }
7397
7398    // -----------------------------------------------------------------
7399    // v0.7-polish coverage recovery (issue #767) — Cluster D + G wires:
7400    // spawn_gc_loop_with_shadow_retention, spawn_transcript_lifecycle_
7401    // sweep_loop, spawn_agent_quota_reset_loop. Smoke-tests that prove
7402    // the loops spawn, abort cleanly, and tolerate a clean state.
7403    // -----------------------------------------------------------------
7404
7405    #[tokio::test]
7406    async fn test_spawn_gc_loop_with_shadow_retention_runs_and_can_be_aborted() {
7407        let env = TestEnv::fresh();
7408        let conn = db::open(&env.db_path).unwrap();
7409        let state: Db = Arc::new(Mutex::new((
7410            conn,
7411            env.db_path.clone(),
7412            ResolvedTtl::default(),
7413            true,
7414        )));
7415        // Long interval — we just want the spawn + abort cycle.
7416        let h = spawn_gc_loop_with_shadow_retention(state, Some(30), 7, Duration::from_secs(60));
7417        // Give it a brief moment to enter the loop body.
7418        tokio::time::sleep(Duration::from_millis(20)).await;
7419        h.abort();
7420        let _ = h.await;
7421    }
7422
7423    #[tokio::test]
7424    async fn test_spawn_gc_loop_with_shadow_retention_zero_days_is_opt_out() {
7425        // shadow_retention_days <= 0 should be tolerated — the shadow
7426        // gc helper short-circuits without touching the table.
7427        let env = TestEnv::fresh();
7428        let conn = db::open(&env.db_path).unwrap();
7429        let state: Db = Arc::new(Mutex::new((
7430            conn,
7431            env.db_path.clone(),
7432            ResolvedTtl::default(),
7433            true,
7434        )));
7435        let h = spawn_gc_loop_with_shadow_retention(
7436            state,
7437            None,
7438            0, // operator opt-out
7439            Duration::from_secs(60),
7440        );
7441        tokio::time::sleep(Duration::from_millis(20)).await;
7442        h.abort();
7443        let _ = h.await;
7444    }
7445
7446    #[tokio::test]
7447    async fn test_spawn_transcript_lifecycle_sweep_loop_runs_and_can_be_aborted() {
7448        let env = TestEnv::fresh();
7449        let conn = db::open(&env.db_path).unwrap();
7450        let state: Db = Arc::new(Mutex::new((
7451            conn,
7452            env.db_path.clone(),
7453            ResolvedTtl::default(),
7454            true,
7455        )));
7456        let cfg = crate::config::TranscriptsConfig::default();
7457        let h = spawn_transcript_lifecycle_sweep_loop(state, cfg, Duration::from_secs(60));
7458        tokio::time::sleep(Duration::from_millis(20)).await;
7459        h.abort();
7460        let _ = h.await;
7461    }
7462
7463    #[tokio::test]
7464    async fn test_spawn_agent_quota_reset_loop_runs_and_can_be_aborted() {
7465        let env = TestEnv::fresh();
7466        let conn = db::open(&env.db_path).unwrap();
7467        let state: Db = Arc::new(Mutex::new((
7468            conn,
7469            env.db_path.clone(),
7470            ResolvedTtl::default(),
7471            true,
7472        )));
7473        let h = spawn_agent_quota_reset_loop(state, Duration::from_secs(60));
7474        tokio::time::sleep(Duration::from_millis(20)).await;
7475        h.abort();
7476        let _ = h.await;
7477    }
7478
7479    #[tokio::test]
7480    async fn test_bootstrap_serve_sec2_fail_closed_when_pubkey_missing_and_rules_enabled() {
7481        // v0.7.0 SEC-2 (Cluster D) — when `[governance]
7482        // require_operator_pubkey = true` AND `governance_rules` has
7483        // any `enabled = 1` row AND no operator pubkey is resolved,
7484        // bootstrap_serve MUST refuse to start. This pins the
7485        // fail-closed posture documented at lines 2118-2153 in
7486        // bootstrap_serve.
7487        //
7488        // Dev-host hermeticity (issue #1370, 2026-05-27). The test
7489        // pre-#1370 cleared `AI_MEMORY_OPERATOR_PUBKEY` but did not
7490        // engage the `ForceNoOperatorPubkeyGuard` escape hatch added
7491        // under issue #819. `resolve_operator_pubkey()` checks TWO
7492        // sources — the env var AND `~/.config/ai-memory/operator.key.pub`
7493        // on disk (via `dirs::config_dir()`). On a dev host that has
7494        // staged a real operator pubkey at the platform config dir
7495        // (e.g. `~/Library/Application Support/ai-memory/` on macOS),
7496        // the on-disk lookup wins, `pubkey_resolved = true`, and the
7497        // SEC-2 fail-closed bail at `bootstrap_serve` never fires.
7498        // CI passes on clean-HOME runners; local fails. The guard
7499        // below forces `resolve_operator_pubkey()` to return None
7500        // for the test scope, matching the CI posture deterministically.
7501        let _no_pubkey_guard = crate::governance::rules_store::force_no_operator_pubkey_for_test();
7502        let _gate = env_var_lock();
7503        let env = TestEnv::fresh();
7504        let conn = db::open(&env.db_path).unwrap();
7505        // Create the governance_rules table + insert one enabled row.
7506        conn.execute_batch(
7507            "CREATE TABLE IF NOT EXISTS governance_rules (
7508                 id TEXT PRIMARY KEY,
7509                 kind TEXT NOT NULL,
7510                 matcher TEXT NOT NULL,
7511                 severity TEXT NOT NULL CHECK (severity IN ('refuse','warn','log')),
7512                 reason TEXT NOT NULL,
7513                 namespace TEXT NOT NULL DEFAULT '_global',
7514                 created_by TEXT NOT NULL,
7515                 created_at INTEGER NOT NULL,
7516                 enabled INTEGER NOT NULL DEFAULT 1,
7517                 signature BLOB,
7518                 attest_level TEXT NOT NULL DEFAULT 'unsigned'
7519             );",
7520        )
7521        .unwrap();
7522        conn.execute(
7523            "INSERT INTO governance_rules (id, kind, matcher, severity, reason, created_by, created_at)
7524             VALUES ('R1', 'bash', '{\"k\":\"v\"}', 'refuse', 'test', 'tester', 100)",
7525            [],
7526        )
7527        .unwrap();
7528        drop(conn);
7529        // Build cfg with require_operator_pubkey = true.
7530        let mut cfg = AppConfig::default();
7531        cfg.tier = Some("keyword".to_string());
7532        cfg.governance = Some(crate::config::GovernanceConfig {
7533            require_operator_pubkey: true,
7534        });
7535        // Ensure no pubkey is resolved by clearing the env var.
7536        let prior = std::env::var("AI_MEMORY_OPERATOR_PUBKEY").ok();
7537        unsafe { std::env::remove_var("AI_MEMORY_OPERATOR_PUBKEY") };
7538
7539        let args = args_with_db(&env.db_path);
7540        let res = bootstrap_serve(&env.db_path, &args, &cfg).await;
7541        // Restore env.
7542        if let Some(v) = prior {
7543            unsafe { std::env::set_var("AI_MEMORY_OPERATOR_PUBKEY", v) };
7544        }
7545        let err = match res {
7546            Err(e) => format!("{e:#}"),
7547            Ok(_) => panic!("expected SEC-2 fail-closed refusal"),
7548        };
7549        assert!(
7550            err.contains("SEC-2 fail-closed") || err.contains("require_operator_pubkey"),
7551            "got: {err}"
7552        );
7553    }
7554
7555    #[tokio::test]
7556    async fn test_build_llm_client_returns_none_for_keyword_tier() {
7557        // FeatureTier::Keyword has no llm_model, so the early-return
7558        // path fires without spawning any blocking work.
7559        // FX-F1: hold the env-guard so concurrent tests can't flip
7560        // AI_MEMORY_LLM_BACKEND under us mid-resolve.
7561        let _guard = env_var_lock();
7562        fx_f1_clear_llm_env();
7563        let cfg = AppConfig::default();
7564        let res = build_llm_client(FeatureTier::Keyword, &cfg).await;
7565        assert!(res.is_none(), "keyword tier must not build an LLM client");
7566    }
7567
7568    #[tokio::test]
7569    async fn test_build_llm_client_returns_none_when_ollama_unreachable() {
7570        // Smart tier requires LLM, but pointing at an unreachable URL
7571        // exercises the constructor-error path (final Err arm).
7572        let _guard = env_var_lock();
7573        fx_f1_clear_llm_env();
7574        let mut cfg = AppConfig::default();
7575        cfg.ollama_url = Some("http://127.0.0.1:1".to_string());
7576        let res = build_llm_client(FeatureTier::Smart, &cfg).await;
7577        // Either Some (constructor still returns Ok if it doesn't ping)
7578        // or None — both are valid: the assert proves the function does
7579        // not panic on an unreachable URL.
7580        let _ = res;
7581    }
7582
7583    #[test]
7584    fn test_build_vector_index_returns_some_when_embedder_present_and_db_empty() {
7585        // The else-branch of build_vector_index — when the embedder is
7586        // present and no rows exist, the helper still returns Some
7587        // (empty index). Already pinned by an existing test; this one
7588        // pins the explicit "some-non-empty" path by inserting a memory
7589        // with an embedding first.
7590        let env = TestEnv::fresh();
7591        let conn = db::open(&env.db_path).unwrap();
7592        let mem = crate::models::Memory {
7593            id: "vi-1".to_string(),
7594            tier: crate::models::Tier::Mid,
7595            namespace: "test".to_string(),
7596            title: "t".to_string(),
7597            content: "c".to_string(),
7598            tags: vec![],
7599            priority: 5,
7600            confidence: 1.0,
7601            source: "test".to_string(),
7602            access_count: 0,
7603            created_at: chrono::Utc::now().to_rfc3339(),
7604            updated_at: chrono::Utc::now().to_rfc3339(),
7605            last_accessed_at: None,
7606            expires_at: None,
7607            metadata: crate::models::default_metadata(),
7608            reflection_depth: 0,
7609            memory_kind: crate::models::MemoryKind::Observation,
7610            entity_id: None,
7611            persona_version: None,
7612            citations: Vec::new(),
7613            source_uri: None,
7614            source_span: None,
7615            confidence_source: crate::models::ConfidenceSource::CallerProvided,
7616            confidence_signals: None,
7617            confidence_decayed_at: None,
7618            version: 1,
7619        };
7620        let inserted_id = db::insert(&conn, &mem).unwrap();
7621        // Write a real-length embedding (384 dims of f32).
7622        let vec_data: Vec<f32> = (0..384).map(|i| i as f32 * 0.001).collect();
7623        db::set_embedding(&conn, &inserted_id, &vec_data).unwrap();
7624        let idx = build_vector_index(&conn, true);
7625        assert!(idx.is_some());
7626    }
7627
7628    // ===========================================================================
7629    // Issue #1169 — resolve_configured_embedding_dim resolution ladder
7630    // ===========================================================================
7631    //
7632    // These tests exercise the helper extracted from the postgres-bootstrap
7633    // path so the new code lands within the daemon_runtime.rs coverage floor.
7634    // The three resolution-ladder arms (resolver, legacy enum, tier preset)
7635    // are each pinned independently.
7636
7637    /// v0.7.x (#1169) — operator picks a model that's in
7638    /// [`crate::config::KNOWN_EMBEDDING_DIMS`]. The first arm of the
7639    /// ladder (resolver) wins and returns the canonical dim.
7640    #[cfg(feature = "sal")]
7641    #[test]
7642    fn resolve_configured_embedding_dim_resolver_arm_wins_for_known_model() {
7643        use crate::config::{AppConfig, EmbeddingsSection, FeatureTier};
7644
7645        let cfg = AppConfig {
7646            embeddings: Some(EmbeddingsSection {
7647                backend: Some("ollama".to_string()),
7648                model: Some("bge-large-en".to_string()),
7649                ..EmbeddingsSection::default()
7650            }),
7651            ..AppConfig::default()
7652        };
7653        let tier_config = FeatureTier::Autonomous.config();
7654        let dim = resolve_configured_embedding_dim(&cfg, &tier_config);
7655        assert_eq!(
7656            dim,
7657            Some(1024),
7658            "bge-large-en is in KNOWN_EMBEDDING_DIMS at 1024-dim; resolver wins"
7659        );
7660    }
7661
7662    /// v0.7.x (#1169) — operator leaves the new `[embeddings]` section
7663    /// unset AND has the legacy flat field `embedding_model =
7664    /// "nomic_embed_v15"`. The first arm returns the canonicalised
7665    /// resolver dim (the canonicaliser maps `nomic_embed_v15` to
7666    /// `nomic-embed-text-v1.5` which IS in the table) — so the
7667    /// resolver arm still wins, validating that the legacy alias path
7668    /// composes cleanly with the resolver.
7669    #[cfg(feature = "sal")]
7670    #[test]
7671    fn resolve_configured_embedding_dim_handles_legacy_alias_via_resolver() {
7672        use crate::config::{AppConfig, FeatureTier};
7673
7674        let cfg = AppConfig {
7675            embedding_model: Some("nomic_embed_v15".to_string()),
7676            ..AppConfig::default()
7677        };
7678        let tier_config = FeatureTier::Autonomous.config();
7679        let dim = resolve_configured_embedding_dim(&cfg, &tier_config);
7680        assert_eq!(
7681            dim,
7682            Some(768),
7683            "legacy alias nomic_embed_v15 canonicalises to nomic-embed-text-v1.5 (768)"
7684        );
7685    }
7686
7687    /// v0.7.x (#1169) — operator hasn't configured embeddings at all
7688    /// AND the tier preset has an embedder family — the tier-preset
7689    /// arm is the last-resort fallback.
7690    #[cfg(feature = "sal")]
7691    #[test]
7692    fn resolve_configured_embedding_dim_falls_back_to_tier_preset_when_no_override() {
7693        use crate::config::{AppConfig, FeatureTier};
7694
7695        let cfg = AppConfig::default();
7696        let tier_config = FeatureTier::Autonomous.config();
7697        let dim = resolve_configured_embedding_dim(&cfg, &tier_config);
7698        // Autonomous tier preset is NomicEmbedV15 (768). The resolver
7699        // also defaults to nomic-embed-text-v1.5 → 768 via the
7700        // KNOWN_EMBEDDING_DIMS table, so either arm gives the same
7701        // answer for the no-config case.
7702        assert_eq!(dim, Some(768));
7703    }
7704
7705    /// v0.7.x (#1169) — keyword tier has no embedder; resolver returns
7706    /// `None` (and the postgres bootstrap then uses its hardcoded
7707    /// `DEFAULT_EMBEDDING_DIM` fallback per the
7708    /// `configured_embedding_dim` doc comment on `build_store_handle`).
7709    #[cfg(feature = "sal")]
7710    #[test]
7711    fn resolve_configured_embedding_dim_returns_none_for_keyword_tier() {
7712        use crate::config::{AppConfig, FeatureTier};
7713
7714        let cfg = AppConfig::default();
7715        let tier_config = FeatureTier::Keyword.config();
7716        let dim = resolve_configured_embedding_dim(&cfg, &tier_config);
7717        // Keyword tier preset has `embedding_model = None`. The
7718        // resolver still returns `Some(768)` from the
7719        // canonical-default model id — that's the correct behavior
7720        // because the operator can ALWAYS use an embedder regardless
7721        // of tier preset; the tier preset only controls reranker /
7722        // synthesis primitives. The keyword-tier-disabled-embedder
7723        // posture is enforced at the `build_embedder` site, NOT
7724        // here. This test pins that subtlety: when the operator's
7725        // config has no [embeddings] block AND no legacy flat field
7726        // AND the tier preset disables embeddings, the resolver
7727        // still defaults to "nomic-embed-text-v1.5" (the wire-side
7728        // default at `resolve_embeddings`) — which IS in the table
7729        // — so the function returns `Some(768)` even on keyword
7730        // tier. The postgres-bootstrap caller treats that as the
7731        // configured dim regardless; pre-loading an unused 768-dim
7732        // pgvector column is operationally cheap.
7733        assert_eq!(dim, Some(768));
7734    }
7735
7736    /// v0.7.x (#1169) — operator picks a model that's NOT in
7737    /// [`crate::config::KNOWN_EMBEDDING_DIMS`] AND uses the new
7738    /// `[embeddings]` block (so the legacy flat field is absent).
7739    /// The resolver returns `None`; the legacy arm can't parse the
7740    /// model into the enum; the tier-preset arm wins as the final
7741    /// fallback. Pins the back-compat invariant for unrecognised
7742    /// model ids: pre-#1169 callers who relied on a number being
7743    /// present continue to see one.
7744    #[cfg(feature = "sal")]
7745    #[test]
7746    fn resolve_configured_embedding_dim_unknown_model_falls_to_tier_preset() {
7747        use crate::config::{AppConfig, EmbeddingsSection, FeatureTier};
7748
7749        let cfg = AppConfig {
7750            embeddings: Some(EmbeddingsSection {
7751                backend: Some("ollama".to_string()),
7752                model: Some("my-private-fork-v0.1".to_string()),
7753                ..EmbeddingsSection::default()
7754            }),
7755            ..AppConfig::default()
7756        };
7757        let tier_config = FeatureTier::Autonomous.config();
7758        let dim = resolve_configured_embedding_dim(&cfg, &tier_config);
7759        // Autonomous tier preset (NomicEmbedV15) → 768.
7760        assert_eq!(dim, Some(768));
7761    }
7762
7763    // ===========================================================================
7764    // FX-F1 (2026-05-27) — coverage uplift for the FX-D1 `build_llm_client`
7765    // overhaul. The pre-FX-F1 surface had two thin async tests
7766    // (Keyword early-return + Smart unreachable URL). FX-F1 adds the
7767    // missing branches: explicit operator-intent (Legacy / Config /
7768    // Env source via `ollama_url` or `llm.backend`), the Semantic
7769    // early-return path, every LLM backend's no-key Err arm, and an
7770    // Ollama happy-path through `build_from_resolved_async` against a
7771    // wiremock-backed `/api/tags` endpoint. Target floor for the file:
7772    // 85% (was 83.83% pre-FX-F1 per FX-F1 dispatch — the +1.17pp gap
7773    // closes by exercising the async ladder end-to-end).
7774    //
7775    // The env-mutating tests below serialise on the module-canonical
7776    // `env_var_lock()` defined above (line 4505) — the same mutex the
7777    // pre-existing env-touching tests (`test_anonymize_unchanged_when_env_already_set`,
7778    // `test_anonymize_unchanged_when_config_false`, etc.) already hold.
7779    // FX-F1 first added a parallel `FX_F1_ENV_GUARD` mutex for these
7780    // tests; that turned out to race the pre-existing tests because
7781    // independent mutexes don't serialise against each other (issue
7782    // surfaced by the QC pass on the FX-F1 patch, 2026-05-27).
7783
7784    /// SAFETY: env-var mutation is unsynchronised across threads at
7785    /// the OS level. `env_var_lock` serialises mutation across this
7786    /// test region so the unsafe is sound for the duration of each
7787    /// test that holds the guard. The cleared keys match every
7788    /// resolver ingress that `build_llm_client` and
7789    /// `build_from_resolved_async` consult.
7790    fn fx_f1_clear_llm_env() {
7791        for k in [
7792            "AI_MEMORY_LLM_BACKEND",
7793            "AI_MEMORY_LLM_MODEL",
7794            "AI_MEMORY_LLM_BASE_URL",
7795            "AI_MEMORY_LLM_API_KEY",
7796            "OLLAMA_BASE_URL",
7797            "XAI_API_KEY",
7798            "OPENAI_API_KEY",
7799            "ANTHROPIC_API_KEY",
7800            "GEMINI_API_KEY",
7801            "GOOGLE_API_KEY",
7802            "DEEPSEEK_API_KEY",
7803            "MOONSHOT_API_KEY",
7804            "KIMI_API_KEY",
7805            "DASHSCOPE_API_KEY",
7806            "QWEN_API_KEY",
7807            "MISTRAL_API_KEY",
7808            "GROQ_API_KEY",
7809            "TOGETHER_API_KEY",
7810            "CEREBRAS_API_KEY",
7811            "OPENROUTER_API_KEY",
7812            "FIREWORKS_API_KEY",
7813        ] {
7814            // SAFETY: guarded by env_var_lock at call sites.
7815            unsafe { std::env::remove_var(k) };
7816        }
7817    }
7818    // ===========================================================================
7819
7820    /// FX-F1 — Semantic tier has `llm_model = None` (per tier preset),
7821    /// so when `source = CompiledDefault` the early-return arm fires.
7822    /// Pins the second of the two "tier has no llm_model + no operator
7823    /// intent" arms; the Keyword variant is pinned above.
7824    #[tokio::test]
7825    async fn test_build_llm_client_semantic_tier_compiled_default_returns_none() {
7826        let _guard = env_var_lock();
7827        fx_f1_clear_llm_env();
7828        let cfg = AppConfig::default();
7829        let res = build_llm_client(FeatureTier::Semantic, &cfg).await;
7830        assert!(
7831            res.is_none(),
7832            "semantic tier with no operator config must short-circuit to None"
7833        );
7834    }
7835
7836    /// FX-F1 — Autonomous tier with no operator config and unreachable
7837    /// Ollama URL → resolver winds up with `Legacy` source (because
7838    /// `ollama_url` is set), bypasses the early-return arm, and falls
7839    /// through to the async constructor which returns Err (treated as
7840    /// None). Exercises the `Err(_)` match arm of `build_llm_client`.
7841    #[tokio::test]
7842    async fn test_build_llm_client_autonomous_tier_unreachable_ollama_returns_none() {
7843        let _guard = env_var_lock();
7844        fx_f1_clear_llm_env();
7845        let mut cfg = AppConfig::default();
7846        cfg.ollama_url = Some("http://127.0.0.1:1".to_string());
7847        let res = build_llm_client(FeatureTier::Autonomous, &cfg).await;
7848        // Unreachable endpoint → Err from new_with_url_async → None.
7849        assert!(
7850            res.is_none(),
7851            "autonomous tier against unreachable ollama must surface as None"
7852        );
7853    }
7854
7855    /// FX-F1 — Smart tier with an `llm.backend = "xai"` config section
7856    /// (no API key available) drives the resolver to `Config` source
7857    /// → bypasses the early-return → `build_from_resolved_async`
7858    /// returns the missing-API-key Err → mapped to None. Pins the
7859    /// non-Ollama-no-key path in build_llm_client.
7860    #[tokio::test]
7861    async fn test_build_llm_client_xai_backend_without_api_key_returns_none() {
7862        let _guard = env_var_lock();
7863        fx_f1_clear_llm_env();
7864        use crate::config::LlmSection;
7865        let mut cfg = AppConfig::default();
7866        cfg.llm = Some(LlmSection {
7867            backend: Some("xai".to_string()),
7868            model: Some("grok-4.3".to_string()),
7869            api_key_env: Some("AI_MEMORY_FX_F1_NEVER_SET_XAI_KEY".to_string()),
7870            ..LlmSection::default()
7871        });
7872        let res = build_llm_client(FeatureTier::Smart, &cfg).await;
7873        assert!(
7874            res.is_none(),
7875            "xai backend without API key MUST map to None (Err path)"
7876        );
7877    }
7878
7879    /// FX-F1 — Happy-path: Smart tier with `ollama_url` pointed at a
7880    /// wiremock-backed `/api/tags` endpoint. Resolver lands on the
7881    /// `Legacy` source (operator set `ollama_url`), bypasses the
7882    /// early-return, calls `build_from_resolved_async` which calls
7883    /// `new_with_url_async` against the mock — the health probe
7884    /// returns 200, so the constructor returns Ok(Some). The
7885    /// `Ok(Some(_))` arm of build_llm_client is exercised.
7886    #[tokio::test(flavor = "multi_thread")]
7887    async fn test_build_llm_client_ollama_happy_path_against_wiremock() {
7888        let _guard = env_var_lock();
7889        fx_f1_clear_llm_env();
7890        use wiremock::matchers::{method, path};
7891        use wiremock::{Mock, MockServer, ResponseTemplate};
7892        let server = MockServer::start().await;
7893        Mock::given(method("GET"))
7894            .and(path("/api/tags"))
7895            .respond_with(ResponseTemplate::new(200).set_body_string(r#"{"models":[]}"#))
7896            .mount(&server)
7897            .await;
7898        let mut cfg = AppConfig::default();
7899        cfg.ollama_url = Some(server.uri());
7900        cfg.llm_model = Some("test-model".to_string());
7901        let res = build_llm_client(FeatureTier::Smart, &cfg).await;
7902        assert!(
7903            res.is_some(),
7904            "wiremock-backed /api/tags must drive build_llm_client to Some"
7905        );
7906    }
7907
7908    /// FX-F1 — `build_from_resolved_async` Ollama arm directly. Mirrors
7909    /// the sync test in `llm::tests::*` but exercises the FX-D1 async
7910    /// sibling against a wiremock-backed endpoint. Pins the happy path.
7911    #[tokio::test(flavor = "multi_thread")]
7912    async fn test_build_from_resolved_async_ollama_happy_path() {
7913        let _guard = env_var_lock();
7914        fx_f1_clear_llm_env();
7915        use wiremock::matchers::{method, path};
7916        use wiremock::{Mock, MockServer, ResponseTemplate};
7917        let server = MockServer::start().await;
7918        Mock::given(method("GET"))
7919            .and(path("/api/tags"))
7920            .respond_with(ResponseTemplate::new(200).set_body_string(r#"{"models":[]}"#))
7921            .mount(&server)
7922            .await;
7923        let mut cfg = AppConfig::default();
7924        cfg.ollama_url = Some(server.uri());
7925        cfg.llm_model = Some("test-model".to_string());
7926        let resolved = cfg.resolve_llm(None, None, None);
7927        let client = crate::llm::OllamaClient::build_from_resolved_async(&resolved)
7928            .await
7929            .expect("build_from_resolved_async must succeed against healthy /api/tags");
7930        assert!(client.is_some());
7931        assert!(client.unwrap().is_ollama_native());
7932    }
7933
7934    /// FX-F1 — `build_from_resolved_async` Ollama arm against an
7935    /// unreachable URL (TCP RST). Pins the Err return path so the
7936    /// caller's `Ok(Some)/Ok(None)/Err` match still routes the failure
7937    /// without a panic.
7938    #[tokio::test(flavor = "multi_thread")]
7939    async fn test_build_from_resolved_async_ollama_unreachable_errs() {
7940        let _guard = env_var_lock();
7941        fx_f1_clear_llm_env();
7942        use std::net::TcpListener;
7943        let listener = TcpListener::bind("127.0.0.1:0").unwrap();
7944        let port = listener.local_addr().unwrap().port();
7945        drop(listener);
7946        let mut cfg = AppConfig::default();
7947        cfg.ollama_url = Some(format!("http://127.0.0.1:{port}"));
7948        cfg.llm_model = Some("test-model".to_string());
7949        let resolved = cfg.resolve_llm(None, None, None);
7950        let res = crate::llm::OllamaClient::build_from_resolved_async(&resolved).await;
7951        assert!(
7952            res.is_err(),
7953            "unreachable Ollama endpoint MUST surface as Err"
7954        );
7955    }
7956
7957    /// FX-F1 — `build_from_resolved_async` non-Ollama branch where the
7958    /// resolver could not produce an API key. Pins the missing-key Err
7959    /// arm with the canonical error-message pattern.
7960    #[tokio::test(flavor = "multi_thread")]
7961    async fn test_build_from_resolved_async_non_ollama_missing_key_errs() {
7962        let _guard = env_var_lock();
7963        fx_f1_clear_llm_env();
7964        use crate::config::LlmSection;
7965        let mut cfg = AppConfig::default();
7966        cfg.llm = Some(LlmSection {
7967            backend: Some("anthropic".to_string()),
7968            model: Some("claude-opus-4.7".to_string()),
7969            api_key_env: Some("AI_MEMORY_FX_F1_NEVER_SET_ANTHROPIC_KEY".to_string()),
7970            ..LlmSection::default()
7971        });
7972        let resolved = cfg.resolve_llm(None, None, None);
7973        let res = crate::llm::OllamaClient::build_from_resolved_async(&resolved).await;
7974        let err = match res {
7975            Err(e) => e,
7976            Ok(_) => panic!("anthropic backend without API key MUST Err"),
7977        };
7978        let msg = format!("{err}");
7979        assert!(
7980            msg.contains("requires an API key"),
7981            "missing-key error must cite the API key requirement; got: {msg}"
7982        );
7983    }
7984
7985    /// FX-F1 — `build_from_resolved_async` non-Ollama branch with an
7986    /// API key resolves to `Ok(Some)` because
7987    /// `new_openai_compatible` does no I/O at construct time. Pins
7988    /// the happy path on the OpenAI-compatible arm.
7989    #[tokio::test(flavor = "multi_thread")]
7990    async fn test_build_from_resolved_async_non_ollama_with_key_returns_some() {
7991        let _guard = env_var_lock();
7992        fx_f1_clear_llm_env();
7993        use crate::config::LlmSection;
7994        // Use a private env var that no other test touches; set it just
7995        // long enough for the resolver to pick it up, then unset.
7996        let env_name = "AI_MEMORY_FX_F1_OPENAI_KEY";
7997        // SAFETY: env mutation guarded by env_var_lock; restored below.
7998        unsafe { std::env::set_var(env_name, "sk-test-fx-f1-fake-key") };
7999        let mut cfg = AppConfig::default();
8000        cfg.llm = Some(LlmSection {
8001            backend: Some("openai".to_string()),
8002            model: Some("gpt-5".to_string()),
8003            api_key_env: Some(env_name.to_string()),
8004            ..LlmSection::default()
8005        });
8006        let resolved = cfg.resolve_llm(None, None, None);
8007        let res = crate::llm::OllamaClient::build_from_resolved_async(&resolved).await;
8008        unsafe { std::env::remove_var(env_name) };
8009        let client = res.expect("openai backend with key MUST return Ok");
8010        assert!(
8011            client.is_some(),
8012            "build_from_resolved_async with key MUST produce Some(client)"
8013        );
8014        assert!(
8015            !client.unwrap().is_ollama_native(),
8016            "openai backend must NOT report ollama-native"
8017        );
8018    }
8019
8020    /// FX-F1 — exercises the `Env` source bypass of the
8021    /// `build_llm_client` early-return arm: operator sets
8022    /// `AI_MEMORY_LLM_BACKEND=ollama` + `AI_MEMORY_LLM_BASE_URL`
8023    /// pointing at an unreachable endpoint. Resolver source = Env →
8024    /// no early-return → constructor errors → mapped to None
8025    /// (Err→None arm in build_llm_client).
8026    #[tokio::test]
8027    async fn test_build_llm_client_env_backend_unreachable_returns_none() {
8028        let _guard = env_var_lock();
8029        fx_f1_clear_llm_env();
8030        // SAFETY: env mutation guarded by env_var_lock; cleared below.
8031        unsafe {
8032            std::env::set_var("AI_MEMORY_LLM_BACKEND", "ollama");
8033            std::env::set_var("AI_MEMORY_LLM_BASE_URL", "http://127.0.0.1:1");
8034        }
8035        let cfg = AppConfig::default();
8036        let res = build_llm_client(FeatureTier::Keyword, &cfg).await;
8037        unsafe {
8038            std::env::remove_var("AI_MEMORY_LLM_BACKEND");
8039            std::env::remove_var("AI_MEMORY_LLM_BASE_URL");
8040        }
8041        // Env source bypasses the early return → constructor errors on
8042        // unreachable endpoint → mapped to None.
8043        assert!(
8044            res.is_none(),
8045            "env-source backend against unreachable URL MUST map to None"
8046        );
8047    }
8048
8049    // ===========================================================================
8050    // FX-F1 — additional helper-function coverage uplift.
8051    // The build_llm_client tests above close the FX-D1 gap; these tests
8052    // pin the smaller helper surfaces (`apply_anonymize_default`,
8053    // `resolve_admin_agent_ids`) that previously had narrow branches
8054    // uncovered. Each closes one or two uncovered lines so the file
8055    // floor (85%) clears comfortably.
8056    // ===========================================================================
8057
8058    /// FX-F1 — `apply_anonymize_default` writes the env var when both
8059    /// (a) the effective default is true AND (b) the env var is
8060    /// unset. Pre-FX-F1 this `unsafe { set_var }` arm was uncovered.
8061    #[test]
8062    fn test_apply_anonymize_default_sets_env_when_unset() {
8063        let _guard = env_var_lock();
8064        // SAFETY: serialised through env_var_lock.
8065        let prev = std::env::var("AI_MEMORY_ANONYMIZE").ok();
8066        unsafe { std::env::remove_var("AI_MEMORY_ANONYMIZE") };
8067        let mut cfg = AppConfig::default();
8068        cfg.identity = Some(crate::config::IdentityConfig {
8069            anonymize_default: true,
8070            ..crate::config::IdentityConfig::default()
8071        });
8072        apply_anonymize_default(&cfg);
8073        let got = std::env::var("AI_MEMORY_ANONYMIZE").ok();
8074        // Restore env before asserting so a failure doesn't leak.
8075        match prev {
8076            Some(v) => unsafe { std::env::set_var("AI_MEMORY_ANONYMIZE", v) },
8077            None => unsafe { std::env::remove_var("AI_MEMORY_ANONYMIZE") },
8078        }
8079        assert_eq!(
8080            got.as_deref(),
8081            Some("1"),
8082            "anonymize_default=true with env unset MUST set AI_MEMORY_ANONYMIZE=1"
8083        );
8084    }
8085
8086    /// FX-F1 — `apply_anonymize_default` is a no-op when the env var
8087    /// is already set. Mirrors the existing test gap on the "env wins
8088    /// over config" precedence rule.
8089    #[test]
8090    fn test_apply_anonymize_default_preserves_existing_env() {
8091        let _guard = env_var_lock();
8092        let prev = std::env::var("AI_MEMORY_ANONYMIZE").ok();
8093        unsafe { std::env::set_var("AI_MEMORY_ANONYMIZE", "0") };
8094        let mut cfg = AppConfig::default();
8095        cfg.identity = Some(crate::config::IdentityConfig {
8096            anonymize_default: true,
8097            ..crate::config::IdentityConfig::default()
8098        });
8099        apply_anonymize_default(&cfg);
8100        let got = std::env::var("AI_MEMORY_ANONYMIZE").ok();
8101        match prev {
8102            Some(v) => unsafe { std::env::set_var("AI_MEMORY_ANONYMIZE", v) },
8103            None => unsafe { std::env::remove_var("AI_MEMORY_ANONYMIZE") },
8104        }
8105        assert_eq!(
8106            got.as_deref(),
8107            Some("0"),
8108            "env-var precedence: pre-set AI_MEMORY_ANONYMIZE MUST survive apply_anonymize_default"
8109        );
8110    }
8111
8112    /// FX-F1 — `resolve_admin_agent_ids` empty-entry handling.
8113    /// `AI_MEMORY_ADMIN_AGENT_IDS="alice,,bob"` should drop the empty
8114    /// entry without erroring. Pins the `continue` branch on line
8115    /// 1882 of the env-csv walker.
8116    #[test]
8117    fn test_resolve_admin_agent_ids_skips_empty_entries() {
8118        let _guard = env_var_lock();
8119        let prev = std::env::var("AI_MEMORY_ADMIN_AGENT_IDS").ok();
8120        unsafe { std::env::set_var("AI_MEMORY_ADMIN_AGENT_IDS", "alice,,bob,,") };
8121        let ids = resolve_admin_agent_ids(None);
8122        match prev {
8123            Some(v) => unsafe { std::env::set_var("AI_MEMORY_ADMIN_AGENT_IDS", v) },
8124            None => unsafe { std::env::remove_var("AI_MEMORY_ADMIN_AGENT_IDS") },
8125        }
8126        assert_eq!(
8127            ids,
8128            vec!["alice".to_string(), "bob".to_string()],
8129            "empty entries between commas MUST be skipped, not surface as agent_ids"
8130        );
8131    }
8132
8133    /// FX-F1 — `resolve_admin_agent_ids` rejects malformed entries
8134    /// with a warn-log, preserving the valid ones. Pins the Err arm
8135    /// of `validate_agent_id` on line 1901-1905.
8136    #[test]
8137    fn test_resolve_admin_agent_ids_drops_malformed_entries() {
8138        let _guard = env_var_lock();
8139        let prev = std::env::var("AI_MEMORY_ADMIN_AGENT_IDS").ok();
8140        // `bad id with spaces` fails `validate_agent_id`'s shape
8141        // check; `alice` passes; `*` is the post-#980 reject.
8142        unsafe { std::env::set_var("AI_MEMORY_ADMIN_AGENT_IDS", "alice,bad id,*,bob") };
8143        let ids = resolve_admin_agent_ids(None);
8144        match prev {
8145            Some(v) => unsafe { std::env::set_var("AI_MEMORY_ADMIN_AGENT_IDS", v) },
8146            None => unsafe { std::env::remove_var("AI_MEMORY_ADMIN_AGENT_IDS") },
8147        }
8148        assert!(ids.contains(&"alice".to_string()));
8149        assert!(ids.contains(&"bob".to_string()));
8150        assert!(
8151            !ids.iter().any(|s| s.contains(' ')),
8152            "malformed entries MUST be dropped"
8153        );
8154        assert!(
8155            !ids.contains(&"*".to_string()),
8156            "wildcard `*` MUST be dropped (post-#980)"
8157        );
8158    }
8159
8160    /// FX-F1 — `resolve_admin_agent_ids` falls through to the config
8161    /// when the env var is unset/empty. Pins the
8162    /// `admin_cfg.map(...).unwrap_or_default()` tail.
8163    #[test]
8164    fn test_resolve_admin_agent_ids_falls_back_to_config() {
8165        let _guard = env_var_lock();
8166        let prev = std::env::var("AI_MEMORY_ADMIN_AGENT_IDS").ok();
8167        unsafe { std::env::remove_var("AI_MEMORY_ADMIN_AGENT_IDS") };
8168        // Empty env → fall through to config.
8169        let ids = resolve_admin_agent_ids(None);
8170        // Restore env before asserting.
8171        if let Some(v) = prev {
8172            unsafe { std::env::set_var("AI_MEMORY_ADMIN_AGENT_IDS", v) };
8173        }
8174        assert!(
8175            ids.is_empty(),
8176            "no env + no config MUST resolve to empty allowlist (secure default)"
8177        );
8178    }
8179
8180    /// FX-F1 — `resolve_admin_agent_ids` honours a whitespace-only
8181    /// `AI_MEMORY_ADMIN_AGENT_IDS` value as "unset" (the
8182    /// `!raw.trim().is_empty()` guard). Pins the guard arm.
8183    #[test]
8184    fn test_resolve_admin_agent_ids_whitespace_env_falls_to_config() {
8185        let _guard = env_var_lock();
8186        let prev = std::env::var("AI_MEMORY_ADMIN_AGENT_IDS").ok();
8187        unsafe { std::env::set_var("AI_MEMORY_ADMIN_AGENT_IDS", "   ") };
8188        let ids = resolve_admin_agent_ids(None);
8189        match prev {
8190            Some(v) => unsafe { std::env::set_var("AI_MEMORY_ADMIN_AGENT_IDS", v) },
8191            None => unsafe { std::env::remove_var("AI_MEMORY_ADMIN_AGENT_IDS") },
8192        }
8193        assert!(
8194            ids.is_empty(),
8195            "whitespace-only env MUST be treated as unset"
8196        );
8197    }
8198
8199    // ===========================================================================
8200    // FX-F2 (coverage, #1432) — close the daemon_runtime.rs floor regression
8201    // observed on the Per-Module Coverage Thresholds CI gate after the
8202    // post-FX-F1 churn (HEADER_AGENT_ID SSOT migration #19eddac9, L1-L4
8203    // capture-turn #49e04daf, etc.) shifted branch-hit counts and dropped
8204    // measured coverage from 85.00% (pinned by 197640745) to 84.89% (-0.11pp).
8205    // These tests cover the `build_store_handle` URL-scheme dispatch arms
8206    // and `resolve_configured_embedding_dim` resolution-ladder arms — every
8207    // branch in both helpers is exercised under `cfg(feature = "sal")` test
8208    // builds with no live Postgres needed.
8209    // ===========================================================================
8210
8211    /// FX-F2 — `build_store_handle` accepts a `sqlite:///path` URL and
8212    /// routes through the SqliteStore adapter (not the `--db` fallback).
8213    /// Pins the `strip_prefix("sqlite://")` arm + the SqliteStore
8214    /// `Ok(...)` tail at lines 2691-2701.
8215    #[cfg(feature = "sal")]
8216    #[tokio::test]
8217    async fn fx_f2_build_store_handle_sqlite_url_scheme() {
8218        let dir = tempfile::tempdir().unwrap();
8219        let db = dir.path().join("scheme.db");
8220        let url = format!("sqlite:///{}", db.display());
8221        let (backend, store) = build_store_handle(
8222            Some(&url),
8223            &db,
8224            None,
8225            None,
8226            crate::store::PoolConfig::default(),
8227        )
8228        .await
8229        .expect("sqlite:// URL must dispatch to SqliteStore");
8230        // Backend tag must reflect the SQLite path.
8231        assert!(
8232            matches!(backend, crate::handlers::StorageBackend::Sqlite),
8233            "sqlite:// URL MUST resolve to StorageBackend::Sqlite"
8234        );
8235        // Smoke-check that the store is usable (the SAL trait `Arc` is live).
8236        drop(store);
8237    }
8238
8239    /// FX-F2 — `build_store_handle` rejects an unrecognised URL scheme
8240    /// with the canonical bail message. Pins the `else { bail!(...) }`
8241    /// arm at lines 2702-2706 — the lone uncovered Err path on the
8242    /// sal-feature build.
8243    #[cfg(feature = "sal")]
8244    #[tokio::test]
8245    async fn fx_f2_build_store_handle_unknown_scheme_errors() {
8246        let dir = tempfile::tempdir().unwrap();
8247        let db = dir.path().join("ignored.db");
8248        let result = build_store_handle(
8249            Some("mysql://host/db"),
8250            &db,
8251            None,
8252            None,
8253            crate::store::PoolConfig::default(),
8254        )
8255        .await;
8256        let err = match result {
8257            Ok(_) => panic!("unrecognised scheme MUST bail; got Ok"),
8258            Err(e) => e,
8259        };
8260        let msg = format!("{err:#}");
8261        assert!(
8262            msg.contains("unrecognised --store-url"),
8263            "bail message MUST include the canonical prefix; got: {msg}"
8264        );
8265    }
8266
8267    /// FX-F2 — `build_store_handle` defaults to SqliteStore at the
8268    /// `--db` path when `--store-url` is absent. Pins the `None` arm
8269    /// at lines 2708-2715.
8270    #[cfg(feature = "sal")]
8271    #[tokio::test]
8272    async fn fx_f2_build_store_handle_no_url_falls_through_to_db_path() {
8273        let dir = tempfile::tempdir().unwrap();
8274        let db = dir.path().join("fallthrough.db");
8275        let (backend, _store) =
8276            build_store_handle(None, &db, None, None, crate::store::PoolConfig::default())
8277                .await
8278                .expect("absent --store-url MUST resolve to SqliteStore via --db");
8279        assert!(matches!(backend, crate::handlers::StorageBackend::Sqlite));
8280    }
8281
8282    /// FX-F2 — `resolve_configured_embedding_dim` returns the canonical
8283    /// dim from the resolver when the model id is in
8284    /// `KNOWN_EMBEDDING_DIMS`. Pins the first arm of the resolution
8285    /// ladder (line 2615-2616).
8286    #[cfg(feature = "sal")]
8287    #[test]
8288    fn fx_f2_resolve_configured_embedding_dim_canonical_lookup_wins() {
8289        let _g = env_var_lock();
8290        let mut cfg = AppConfig::default();
8291        // `nomic-embed-text-v1.5` is in KNOWN_EMBEDDING_DIMS at 768.
8292        cfg.embeddings = Some(crate::config::EmbeddingsSection {
8293            model: Some("nomic-embed-text-v1.5".to_string()),
8294            ..crate::config::EmbeddingsSection::default()
8295        });
8296        let tier_cfg = FeatureTier::Semantic.config();
8297        let dim = resolve_configured_embedding_dim(&cfg, &tier_cfg);
8298        assert!(
8299            matches!(dim, Some(d) if d == 768),
8300            "canonical lookup MUST return 768 for nomic-embed-text-v1.5; got: {dim:?}"
8301        );
8302    }
8303
8304    /// FX-F2 — `resolve_configured_embedding_dim` falls through to the
8305    /// legacy flat-field arm when the resolver yields no dim. Pins the
8306    /// `or_else(|| app_config.embedding_model...)` arm (line 2617-2623).
8307    /// The legacy `EmbeddingModel::from_str` accepts the underscore
8308    /// variant `mini_lm_l6_v2`; canonical lookup goes through the
8309    /// `[embeddings]` section, which we omit here so the resolver
8310    /// returns `embedding_dim = None` and the legacy parse arm fires.
8311    #[cfg(feature = "sal")]
8312    #[test]
8313    fn fx_f2_resolve_configured_embedding_dim_legacy_flat_field_path() {
8314        let _g = env_var_lock();
8315        let mut cfg = AppConfig::default();
8316        // No [embeddings] section → resolver returns None for dim.
8317        // Legacy flat-field `embedding_model` parses as the 2-family enum.
8318        cfg.embedding_model = Some("mini_lm_l6_v2".to_string());
8319        let tier_cfg = FeatureTier::Semantic.config();
8320        let dim = resolve_configured_embedding_dim(&cfg, &tier_cfg);
8321        assert!(
8322            matches!(dim, Some(d) if d == 384),
8323            "legacy flat-field path MUST resolve mini_lm_l6_v2 to 384; got: {dim:?}"
8324        );
8325    }
8326
8327    /// FX-F2 — `resolve_configured_embedding_dim` falls all the way
8328    /// through to the tier-preset arm when neither resolver nor legacy
8329    /// flat-field yields a dim. Pins the final `or_else(|| preset...)`
8330    /// arm (line 2624).
8331    #[cfg(feature = "sal")]
8332    #[test]
8333    fn fx_f2_resolve_configured_embedding_dim_preset_fallback() {
8334        let _g = env_var_lock();
8335        let cfg = AppConfig::default();
8336        // Default config: no [embeddings] section + no legacy
8337        // embedding_model field. Semantic tier preset HAS an embedding
8338        // model so the preset arm fires (Some(_)). Keyword tier preset
8339        // is None so we'd get None — but Semantic is the load-bearing
8340        // case for the postgres-schema-bootstrap path documented at the
8341        // function comment.
8342        let tier_cfg = FeatureTier::Semantic.config();
8343        let dim = resolve_configured_embedding_dim(&cfg, &tier_cfg);
8344        assert!(
8345            dim.is_some(),
8346            "Semantic tier preset MUST yield a dim via the fallback arm"
8347        );
8348    }
8349
8350    /// FX-F2 — `resolve_configured_embedding_dim` passes a parse-error
8351    /// in the legacy flat-field arm through to the next arm
8352    /// (`.and_then(|raw| raw.parse(...).ok())`). The function returns
8353    /// the resolver-supplied dim (whatever
8354    /// `AppConfig::resolve_embeddings()` produced from defaults) when
8355    /// the operator's malformed flat-field is dropped. Pins the
8356    /// `.and_then(..., .ok())` None-on-parse-fail arm at line 2621.
8357    #[cfg(feature = "sal")]
8358    #[test]
8359    fn fx_f2_resolve_configured_embedding_dim_malformed_legacy_drops_silently() {
8360        let _g = env_var_lock();
8361        let mut cfg = AppConfig::default();
8362        // Unparseable value — `EmbeddingModel::from_str` rejects it
8363        // and the `.ok()` swallows the error, falling through to the
8364        // preset arm.
8365        cfg.embedding_model = Some("not-a-real-model".to_string());
8366        let tier_cfg = FeatureTier::Semantic.config();
8367        let dim = resolve_configured_embedding_dim(&cfg, &tier_cfg);
8368        // The resolver+preset combination still yields a Some (default
8369        // semantic tier has an embedding model preset). The test pins
8370        // the silent-drop behaviour: the function does NOT panic /
8371        // bail on an unparseable legacy override.
8372        assert!(
8373            dim.is_some(),
8374            "unparseable legacy embedding_model MUST be dropped silently \
8375             (the .ok() arm), preset fallback fires"
8376        );
8377    }
8378
8379    // -----------------------------------------------------------------
8380    // FUPC — body-exercising sweep-loop tests. The pre-existing
8381    // spawn-and-abort smoke tests use a 60s interval, so the loop body
8382    // (the actual db::gc / sweep / checkpoint calls + their info-log
8383    // branches) never fires inside the 20ms abort window. These drive a
8384    // 1ms interval against seeded state so the body runs at least once.
8385    // -----------------------------------------------------------------
8386
8387    /// `spawn_gc_loop` body actually runs and archives an expired memory
8388    /// (the `Ok(n) if n > 0` info-log arm fires).
8389    #[tokio::test]
8390    async fn fupc_spawn_gc_loop_body_archives_expired() {
8391        use crate::models::{Memory, MemoryKind, Tier};
8392        let env = TestEnv::fresh();
8393        let conn = db::open(&env.db_path).unwrap();
8394        // Seed a memory already past its expiry so the gc sweep archives it.
8395        let mem = Memory {
8396            id: uuid::Uuid::new_v4().to_string(),
8397            tier: Tier::Short,
8398            namespace: "gc-ns".to_string(),
8399            title: "expired".to_string(),
8400            content: "stale".to_string(),
8401            priority: 5,
8402            confidence: 1.0,
8403            source: "test".to_string(),
8404            created_at: "2000-01-01T00:00:00Z".to_string(),
8405            updated_at: "2000-01-01T00:00:00Z".to_string(),
8406            expires_at: Some("2000-01-01T01:00:00Z".to_string()),
8407            memory_kind: MemoryKind::Observation,
8408            ..Memory::default()
8409        };
8410        db::insert(&conn, &mem).unwrap();
8411        let state: Db = Arc::new(Mutex::new((
8412            conn,
8413            env.db_path.clone(),
8414            ResolvedTtl::default(),
8415            true, // archive_on_gc
8416        )));
8417        let h = spawn_gc_loop(state.clone(), Some(30), Duration::from_millis(1));
8418        // Let several sweep ticks fire.
8419        tokio::time::sleep(Duration::from_millis(40)).await;
8420        h.abort();
8421        let _ = h.await;
8422        // The expired row must be gone from `memories` (archived + deleted).
8423        let lock = state.lock().await;
8424        let remaining: i64 = lock
8425            .0
8426            .query_row(
8427                "SELECT COUNT(*) FROM memories WHERE namespace = 'gc-ns'",
8428                [],
8429                |r| r.get(0),
8430            )
8431            .unwrap();
8432        assert_eq!(
8433            remaining, 0,
8434            "gc loop body must have archived the expired row"
8435        );
8436    }
8437
8438    /// `spawn_wal_checkpoint_loop` body actually runs (no panic, clean
8439    /// abort) against a live WAL-mode db.
8440    #[tokio::test]
8441    async fn fupc_spawn_wal_checkpoint_loop_body_runs() {
8442        let env = TestEnv::fresh();
8443        let conn = db::open(&env.db_path).unwrap();
8444        let state: Db = Arc::new(Mutex::new((
8445            conn,
8446            env.db_path.clone(),
8447            ResolvedTtl::default(),
8448            true,
8449        )));
8450        let h = spawn_wal_checkpoint_loop(state, Duration::from_millis(1));
8451        tokio::time::sleep(Duration::from_millis(30)).await;
8452        h.abort();
8453        let _ = h.await;
8454    }
8455
8456    /// `spawn_transcript_lifecycle_sweep_loop` body runs at a 1ms cadence
8457    /// against a clean db (the `Ok(r)` arm with a zero-count report — no
8458    /// info-log, no panic, clean abort).
8459    #[tokio::test]
8460    async fn fupc_spawn_transcript_lifecycle_sweep_body_runs_clean() {
8461        let env = TestEnv::fresh();
8462        let conn = db::open(&env.db_path).unwrap();
8463        let state: Db = Arc::new(Mutex::new((
8464            conn,
8465            env.db_path.clone(),
8466            ResolvedTtl::default(),
8467            true,
8468        )));
8469        let h = spawn_transcript_lifecycle_sweep_loop(
8470            state,
8471            crate::config::TranscriptsConfig::default(),
8472            Duration::from_millis(1),
8473        );
8474        tokio::time::sleep(Duration::from_millis(30)).await;
8475        h.abort();
8476        let _ = h.await;
8477    }
8478
8479    /// `spawn_agent_quota_reset_loop` body runs at a 1ms cadence against
8480    /// a clean db (the reset SQL touches zero rows, no panic, clean
8481    /// abort).
8482    #[tokio::test]
8483    async fn fupc_spawn_agent_quota_reset_body_runs_clean() {
8484        let env = TestEnv::fresh();
8485        let conn = db::open(&env.db_path).unwrap();
8486        let state: Db = Arc::new(Mutex::new((
8487            conn,
8488            env.db_path.clone(),
8489            ResolvedTtl::default(),
8490            true,
8491        )));
8492        let h = spawn_agent_quota_reset_loop(state, Duration::from_millis(1));
8493        tokio::time::sleep(Duration::from_millis(30)).await;
8494        h.abort();
8495        let _ = h.await;
8496    }
8497}