Skip to main content

fathomdb_engine/
admin.rs

1use std::fmt::Write as _;
2use std::fs;
3use std::io::{self, Read, Write};
4use std::path::{Path, PathBuf};
5use std::process::{Command, Stdio};
6use std::sync::Arc;
7use std::sync::mpsc;
8use std::thread;
9use std::time::{Duration, Instant, SystemTime};
10
11use fathomdb_schema::{SchemaError, SchemaManager};
12use rusqlite::{DatabaseName, OptionalExtension, TransactionBehavior};
13use serde::{Deserialize, Serialize};
14use sha2::{Digest, Sha256};
15
16use crate::{
17    EngineError, ProjectionRepairReport, ProjectionService, executable_trust,
18    ids::new_id,
19    operational::{
20        OperationalCollectionKind, OperationalCollectionRecord, OperationalCompactionReport,
21        OperationalCurrentRow, OperationalFilterClause, OperationalFilterField,
22        OperationalFilterFieldType, OperationalFilterMode, OperationalFilterValue,
23        OperationalHistoryValidationIssue, OperationalHistoryValidationReport,
24        OperationalMutationRow, OperationalPurgeReport, OperationalReadReport,
25        OperationalReadRequest, OperationalRegisterRequest, OperationalRepairReport,
26        OperationalRetentionActionKind, OperationalRetentionPlanItem,
27        OperationalRetentionPlanReport, OperationalRetentionRunItem, OperationalRetentionRunReport,
28        OperationalSecondaryIndexDefinition, OperationalSecondaryIndexRebuildReport,
29        OperationalTraceReport, extract_secondary_index_entries_for_current,
30        extract_secondary_index_entries_for_mutation, parse_operational_secondary_indexes_json,
31        parse_operational_validation_contract, validate_operational_payload_against_contract,
32    },
33    projection::ProjectionTarget,
34    sqlite,
35};
36
37/// Results of a physical and structural integrity check on the database.
38#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
39pub struct IntegrityReport {
40    pub physical_ok: bool,
41    pub foreign_keys_ok: bool,
42    pub missing_fts_rows: usize,
43    pub duplicate_active_logical_ids: usize,
44    pub operational_missing_collections: usize,
45    pub operational_missing_last_mutations: usize,
46    pub warnings: Vec<String>,
47}
48
49/// Options controlling how a safe database export is performed.
50#[derive(Clone, Copy, Debug)]
51pub struct SafeExportOptions {
52    /// When true, runs `PRAGMA wal_checkpoint(FULL)` before copying and fails if
53    /// any WAL frames could not be applied (busy != 0). Set to false only in
54    /// tests that seed a database without WAL mode.
55    pub force_checkpoint: bool,
56}
57
58impl Default for SafeExportOptions {
59    fn default() -> Self {
60        Self {
61            force_checkpoint: true,
62        }
63    }
64}
65
66// Must match PROTOCOL_VERSION in fathomdb-admin-bridge.rs
67const EXPORT_PROTOCOL_VERSION: u32 = 1;
68
69/// Manifest describing a completed safe export.
70#[derive(Clone, Debug, Serialize)]
71pub struct SafeExportManifest {
72    /// Unix timestamp (seconds since epoch) when the export was created.
73    pub exported_at: u64,
74    /// SHA-256 hex digest of the exported database file.
75    pub sha256: String,
76    /// Schema version recorded in `fathom_schema_migrations` at export time.
77    pub schema_version: u32,
78    /// Bridge protocol version compiled into this binary.
79    pub protocol_version: u32,
80    /// Number of `SQLite` pages in the exported database file.
81    pub page_count: u64,
82}
83
84/// Report from tracing all rows associated with a given `source_ref`.
85#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
86pub struct TraceReport {
87    pub source_ref: String,
88    pub node_rows: usize,
89    pub edge_rows: usize,
90    pub action_rows: usize,
91    pub operational_mutation_rows: usize,
92    pub node_logical_ids: Vec<String>,
93    pub action_ids: Vec<String>,
94    pub operational_mutation_ids: Vec<String>,
95}
96
97/// An edge that was skipped during a restore because an endpoint is missing.
98#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
99pub struct SkippedEdge {
100    pub edge_logical_id: String,
101    pub missing_endpoint: String,
102}
103
104/// Report from restoring a retired logical ID back to active state.
105#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
106pub struct LogicalRestoreReport {
107    pub logical_id: String,
108    pub was_noop: bool,
109    pub restored_node_rows: usize,
110    pub restored_edge_rows: usize,
111    pub restored_chunk_rows: usize,
112    pub restored_fts_rows: usize,
113    pub restored_vec_rows: usize,
114    pub skipped_edges: Vec<SkippedEdge>,
115    pub notes: Vec<String>,
116}
117
118/// Report from permanently purging all rows for a logical ID.
119#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
120pub struct LogicalPurgeReport {
121    pub logical_id: String,
122    pub was_noop: bool,
123    pub deleted_node_rows: usize,
124    pub deleted_edge_rows: usize,
125    pub deleted_chunk_rows: usize,
126    pub deleted_fts_rows: usize,
127    pub deleted_vec_rows: usize,
128    pub notes: Vec<String>,
129}
130
131/// Options controlling provenance event purging behavior.
132#[derive(Clone, Debug, Serialize, Deserialize)]
133pub struct ProvenancePurgeOptions {
134    pub dry_run: bool,
135    #[serde(default)]
136    pub preserve_event_types: Vec<String>,
137}
138
139/// Report from a provenance event purge operation.
140#[derive(Clone, Debug, Serialize)]
141pub struct ProvenancePurgeReport {
142    pub events_deleted: u64,
143    pub events_preserved: u64,
144    pub oldest_remaining: Option<i64>,
145}
146
147/// Service providing administrative operations (integrity checks, exports, restores, purges).
148#[derive(Debug)]
149pub struct AdminService {
150    database_path: PathBuf,
151    schema_manager: Arc<SchemaManager>,
152    projections: ProjectionService,
153}
154
155/// Results of a semantic consistency check on the graph data.
156#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
157pub struct SemanticReport {
158    /// Chunks whose `node_logical_id` has no active node.
159    pub orphaned_chunks: usize,
160    /// Active nodes with a NULL `source_ref` (loss of provenance).
161    pub null_source_ref_nodes: usize,
162    /// Steps referencing a `run_id` that does not exist in the runs table.
163    pub broken_step_fk: usize,
164    /// Actions referencing a `step_id` that does not exist in the steps table.
165    pub broken_action_fk: usize,
166    /// FTS rows whose `chunk_id` does not exist in the chunks table.
167    pub stale_fts_rows: usize,
168    /// FTS rows whose node has been superseded (`superseded_at` IS NOT NULL on all active rows).
169    pub fts_rows_for_superseded_nodes: usize,
170    /// Active edges where at least one endpoint has no active node.
171    pub dangling_edges: usize,
172    /// `logical_ids` where every version has been superseded (no active row).
173    pub orphaned_supersession_chains: usize,
174    /// Vec rows whose backing chunk no longer exists in the chunks table.
175    pub stale_vec_rows: usize,
176    /// Compatibility counter for vec rows whose chunk points at missing node history.
177    pub vec_rows_for_superseded_nodes: usize,
178    /// Latest-state keys whose latest mutation is a `put` but no current row exists.
179    pub missing_operational_current_rows: usize,
180    /// Current rows that do not match the latest mutation state.
181    pub stale_operational_current_rows: usize,
182    /// Mutations written after the owning collection was disabled.
183    pub disabled_collection_mutations: usize,
184    /// Access metadata rows whose `logical_id` no longer has any node history.
185    pub orphaned_last_access_metadata_rows: usize,
186    pub warnings: Vec<String>,
187}
188
189/// Configuration for regenerating vector embeddings via an external generator command.
190#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
191#[serde(rename_all = "snake_case")]
192pub struct VectorRegenerationConfig {
193    pub profile: String,
194    pub table_name: String,
195    pub model_identity: String,
196    pub model_version: String,
197    pub dimension: usize,
198    pub normalization_policy: String,
199    pub chunking_policy: String,
200    pub preprocessing_policy: String,
201    pub generator_command: Vec<String>,
202}
203
204/// Report from a vector embedding regeneration run.
205#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
206pub struct VectorRegenerationReport {
207    pub profile: String,
208    pub table_name: String,
209    pub dimension: usize,
210    pub total_chunks: usize,
211    pub regenerated_rows: usize,
212    pub contract_persisted: bool,
213    pub notes: Vec<String>,
214}
215
216/// Security and resource policy for the external vector generator subprocess.
217#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
218#[serde(rename_all = "snake_case")]
219pub struct VectorGeneratorPolicy {
220    pub timeout_ms: u64,
221    pub max_stdout_bytes: usize,
222    pub max_stderr_bytes: usize,
223    pub max_input_bytes: usize,
224    pub max_chunks: usize,
225    #[serde(default = "default_require_absolute_executable")]
226    pub require_absolute_executable: bool,
227    #[serde(default = "default_reject_world_writable_executable")]
228    pub reject_world_writable_executable: bool,
229    #[serde(default)]
230    pub allowed_executable_roots: Vec<String>,
231    #[serde(default)]
232    pub preserve_env_vars: Vec<String>,
233}
234
235impl Default for VectorGeneratorPolicy {
236    fn default() -> Self {
237        Self {
238            timeout_ms: 300_000,
239            max_stdout_bytes: 64 * 1024 * 1024,
240            max_stderr_bytes: 1024 * 1024,
241            max_input_bytes: 64 * 1024 * 1024,
242            max_chunks: 1_000_000,
243            require_absolute_executable: true,
244            reject_world_writable_executable: true,
245            allowed_executable_roots: vec![],
246            preserve_env_vars: vec![],
247        }
248    }
249}
250
251const fn default_require_absolute_executable() -> bool {
252    true
253}
254
255const fn default_reject_world_writable_executable() -> bool {
256    true
257}
258
259const CURRENT_VECTOR_CONTRACT_FORMAT_VERSION: i64 = 1;
260const MAX_PROFILE_LEN: usize = 128;
261const MAX_MODEL_IDENTITY_LEN: usize = 256;
262const MAX_MODEL_VERSION_LEN: usize = 128;
263const MAX_POLICY_LEN: usize = 128;
264const MAX_GENERATOR_COMMAND_ARG_LEN: usize = 4096;
265const MAX_GENERATOR_COMMAND_TOTAL_LEN: usize = 16 * 1024;
266const MAX_CONTRACT_JSON_BYTES: usize = 32 * 1024;
267const MAX_AUDIT_METADATA_BYTES: usize = 2048;
268const DEFAULT_OPERATIONAL_READ_LIMIT: usize = 100;
269const MAX_OPERATIONAL_READ_LIMIT: usize = 1000;
270
271/// Thread-safe handle to the shared [`AdminService`].
272#[derive(Clone, Debug)]
273pub struct AdminHandle {
274    inner: Arc<AdminService>,
275}
276
277impl AdminHandle {
278    /// Wrap an [`AdminService`] in a shared handle.
279    #[must_use]
280    pub fn new(service: AdminService) -> Self {
281        Self {
282            inner: Arc::new(service),
283        }
284    }
285
286    /// Clone the inner `Arc` to the [`AdminService`].
287    #[must_use]
288    pub fn service(&self) -> Arc<AdminService> {
289        Arc::clone(&self.inner)
290    }
291}
292
293impl AdminService {
294    /// Create a new admin service for the database at the given path.
295    #[must_use]
296    pub fn new(path: impl AsRef<Path>, schema_manager: Arc<SchemaManager>) -> Self {
297        let database_path = path.as_ref().to_path_buf();
298        let projections = ProjectionService::new(&database_path, Arc::clone(&schema_manager));
299        Self {
300            database_path,
301            schema_manager,
302            projections,
303        }
304    }
305
306    fn connect(&self) -> Result<rusqlite::Connection, EngineError> {
307        #[cfg(feature = "sqlite-vec")]
308        let conn = sqlite::open_connection_with_vec(&self.database_path)?;
309        #[cfg(not(feature = "sqlite-vec"))]
310        let conn = sqlite::open_connection(&self.database_path)?;
311        self.schema_manager.bootstrap(&conn)?;
312        Ok(conn)
313    }
314
315    /// # Errors
316    /// Returns [`EngineError`] if the database connection fails or any SQL query fails.
317    pub fn check_integrity(&self) -> Result<IntegrityReport, EngineError> {
318        let conn = self.connect()?;
319
320        let physical_result: String =
321            conn.query_row("PRAGMA integrity_check", [], |row| row.get(0))?;
322        let foreign_key_count: i64 =
323            conn.query_row("SELECT count(*) FROM pragma_foreign_key_check", [], |row| {
324                row.get(0)
325            })?;
326        let missing_fts_rows: i64 = conn.query_row(
327            r"
328            SELECT count(*)
329            FROM chunks c
330            JOIN nodes n
331              ON n.logical_id = c.node_logical_id
332             AND n.superseded_at IS NULL
333            WHERE NOT EXISTS (
334                SELECT 1
335                FROM fts_nodes f
336                WHERE f.chunk_id = c.id
337            )
338            ",
339            [],
340            |row| row.get(0),
341        )?;
342        let duplicate_active: i64 = conn.query_row(
343            r"
344            SELECT count(*)
345            FROM (
346                SELECT logical_id
347                FROM nodes
348                WHERE superseded_at IS NULL
349                GROUP BY logical_id
350                HAVING count(*) > 1
351            )
352            ",
353            [],
354            |row| row.get(0),
355        )?;
356        let operational_missing_collections: i64 = conn.query_row(
357            r"
358            SELECT (
359                SELECT count(*)
360                FROM operational_mutations m
361                LEFT JOIN operational_collections c ON c.name = m.collection_name
362                WHERE c.name IS NULL
363            ) + (
364                SELECT count(*)
365                FROM operational_current oc
366                LEFT JOIN operational_collections c ON c.name = oc.collection_name
367                WHERE c.name IS NULL
368            )
369            ",
370            [],
371            |row| row.get(0),
372        )?;
373        let operational_missing_last_mutations: i64 = conn.query_row(
374            r"
375            SELECT count(*)
376            FROM operational_current oc
377            LEFT JOIN operational_mutations m ON m.id = oc.last_mutation_id
378            WHERE m.id IS NULL
379            ",
380            [],
381            |row| row.get(0),
382        )?;
383
384        let mut warnings = Vec::new();
385        if missing_fts_rows > 0 {
386            warnings.push("missing FTS projections detected".to_owned());
387        }
388        if duplicate_active > 0 {
389            warnings.push("duplicate active logical_ids detected".to_owned());
390        }
391        if operational_missing_collections > 0 {
392            warnings.push("operational rows reference missing collections".to_owned());
393        }
394        if operational_missing_last_mutations > 0 {
395            warnings.push("operational current rows reference missing last mutations".to_owned());
396        }
397
398        // FIX(review): was `as usize` — unsound on 32-bit targets, wraps negatives silently.
399        // Options: (A) try_from().unwrap_or(0) — masks corruption, (B) try_from().expect() —
400        // panics on corruption, (C) propagate error. Chose (B) here: a negative count(*)
401        // signals data corruption, and the integrity report would be meaningless anyway.
402        Ok(IntegrityReport {
403            physical_ok: physical_result == "ok",
404            foreign_keys_ok: foreign_key_count == 0,
405            missing_fts_rows: i64_to_usize(missing_fts_rows),
406            duplicate_active_logical_ids: i64_to_usize(duplicate_active),
407            operational_missing_collections: i64_to_usize(operational_missing_collections),
408            operational_missing_last_mutations: i64_to_usize(operational_missing_last_mutations),
409            warnings,
410        })
411    }
412
413    /// # Errors
414    /// Returns [`EngineError`] if the database connection fails or any SQL query fails.
415    #[allow(clippy::too_many_lines)]
416    pub fn check_semantics(&self) -> Result<SemanticReport, EngineError> {
417        let conn = self.connect()?;
418
419        let orphaned_chunks: i64 = conn.query_row(
420            r"
421            SELECT count(*)
422            FROM chunks c
423            WHERE NOT EXISTS (
424                SELECT 1 FROM nodes n
425                WHERE n.logical_id = c.node_logical_id
426            )
427            ",
428            [],
429            |row| row.get(0),
430        )?;
431
432        let null_source_ref_nodes: i64 = conn.query_row(
433            "SELECT count(*) FROM nodes WHERE source_ref IS NULL AND superseded_at IS NULL",
434            [],
435            |row| row.get(0),
436        )?;
437
438        let broken_step_fk: i64 = conn.query_row(
439            r"
440            SELECT count(*) FROM steps s
441            WHERE NOT EXISTS (SELECT 1 FROM runs r WHERE r.id = s.run_id)
442            ",
443            [],
444            |row| row.get(0),
445        )?;
446
447        let broken_action_fk: i64 = conn.query_row(
448            r"
449            SELECT count(*) FROM actions a
450            WHERE NOT EXISTS (SELECT 1 FROM steps s WHERE s.id = a.step_id)
451            ",
452            [],
453            |row| row.get(0),
454        )?;
455
456        let stale_fts_rows: i64 = conn.query_row(
457            r"
458            SELECT count(*) FROM fts_nodes f
459            WHERE NOT EXISTS (SELECT 1 FROM chunks c WHERE c.id = f.chunk_id)
460            ",
461            [],
462            |row| row.get(0),
463        )?;
464
465        let fts_rows_for_superseded_nodes: i64 = conn.query_row(
466            r"
467            SELECT count(*) FROM fts_nodes f
468            WHERE NOT EXISTS (
469                SELECT 1 FROM nodes n
470                WHERE n.logical_id = f.node_logical_id AND n.superseded_at IS NULL
471            )
472            ",
473            [],
474            |row| row.get(0),
475        )?;
476
477        let dangling_edges: i64 = conn.query_row(
478            r"
479            SELECT count(*) FROM edges e
480            WHERE e.superseded_at IS NULL AND (
481                NOT EXISTS (SELECT 1 FROM nodes n WHERE n.logical_id = e.source_logical_id AND n.superseded_at IS NULL)
482                OR
483                NOT EXISTS (SELECT 1 FROM nodes n WHERE n.logical_id = e.target_logical_id AND n.superseded_at IS NULL)
484            )
485            ",
486            [],
487            |row| row.get(0),
488        )?;
489
490        let orphaned_supersession_chains: i64 = conn.query_row(
491            r"
492            SELECT count(*) FROM (
493                SELECT logical_id FROM nodes
494                GROUP BY logical_id
495                HAVING count(*) > 0 AND sum(CASE WHEN superseded_at IS NULL THEN 1 ELSE 0 END) = 0
496            )
497            ",
498            [],
499            |row| row.get(0),
500        )?;
501
502        // Vec stale row detection — degrades to 0 when the vec profile is absent.
503        #[cfg(feature = "sqlite-vec")]
504        let stale_vec_rows: i64 = match conn.query_row(
505            r"
506            SELECT count(*) FROM vec_nodes_active v
507            WHERE NOT EXISTS (SELECT 1 FROM chunks c WHERE c.id = v.chunk_id)
508            ",
509            [],
510            |row| row.get(0),
511        ) {
512            Ok(n) => n,
513            Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
514                if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
515            {
516                0
517            }
518            Err(e) => return Err(EngineError::Sqlite(e)),
519        };
520        #[cfg(not(feature = "sqlite-vec"))]
521        let stale_vec_rows: i64 = 0;
522
523        #[cfg(feature = "sqlite-vec")]
524        let vec_rows_for_superseded_nodes: i64 = match conn.query_row(
525            r"
526            SELECT count(*) FROM vec_nodes_active v
527            JOIN chunks c ON c.id = v.chunk_id
528            WHERE NOT EXISTS (
529                SELECT 1 FROM nodes n
530                WHERE n.logical_id = c.node_logical_id
531            )
532            ",
533            [],
534            |row| row.get(0),
535        ) {
536            Ok(n) => n,
537            Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
538                if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
539            {
540                0
541            }
542            Err(e) => return Err(EngineError::Sqlite(e)),
543        };
544        #[cfg(not(feature = "sqlite-vec"))]
545        let vec_rows_for_superseded_nodes: i64 = 0;
546        let missing_operational_current_rows: i64 = conn.query_row(
547            r"
548            SELECT count(*)
549            FROM operational_mutations m
550            JOIN operational_collections c
551              ON c.name = m.collection_name
552             AND c.kind = 'latest_state'
553            WHERE m.op_kind = 'put'
554              AND NOT EXISTS (
555                    SELECT 1
556                    FROM operational_mutations newer
557                    WHERE newer.collection_name = m.collection_name
558                      AND newer.record_key = m.record_key
559                      AND newer.mutation_order > m.mutation_order
560                )
561              AND NOT EXISTS (
562                    SELECT 1
563                    FROM operational_current oc
564                    WHERE oc.collection_name = m.collection_name
565                      AND oc.record_key = m.record_key
566                )
567            ",
568            [],
569            |row| row.get(0),
570        )?;
571        let stale_operational_current_rows: i64 = conn.query_row(
572            r"
573            SELECT count(*)
574            FROM operational_current oc
575            JOIN operational_collections c
576              ON c.name = oc.collection_name
577             AND c.kind = 'latest_state'
578            LEFT JOIN operational_mutations m ON m.id = oc.last_mutation_id
579            WHERE m.id IS NULL
580               OR m.collection_name != oc.collection_name
581               OR m.record_key != oc.record_key
582               OR m.op_kind != 'put'
583               OR m.payload_json != oc.payload_json
584               OR EXISTS (
585                    SELECT 1
586                    FROM operational_mutations newer
587                    WHERE newer.collection_name = oc.collection_name
588                      AND newer.record_key = oc.record_key
589                      AND newer.mutation_order > m.mutation_order
590                )
591            ",
592            [],
593            |row| row.get(0),
594        )?;
595        let disabled_collection_mutations: i64 = conn.query_row(
596            r"
597            SELECT count(*)
598            FROM operational_mutations m
599            JOIN operational_collections c ON c.name = m.collection_name
600            WHERE c.disabled_at IS NOT NULL AND m.created_at > c.disabled_at
601            ",
602            [],
603            |row| row.get(0),
604        )?;
605        let orphaned_last_access_metadata_rows: i64 = conn.query_row(
606            r"
607            SELECT count(*)
608            FROM node_access_metadata am
609            WHERE NOT EXISTS (
610                SELECT 1 FROM nodes n WHERE n.logical_id = am.logical_id
611            )
612            ",
613            [],
614            |row| row.get(0),
615        )?;
616
617        let mut warnings = Vec::new();
618        if orphaned_chunks > 0 {
619            warnings.push(format!(
620                "{orphaned_chunks} orphaned chunk(s) with no surviving node history"
621            ));
622        }
623        if null_source_ref_nodes > 0 {
624            warnings.push(format!(
625                "{null_source_ref_nodes} active node(s) with null source_ref"
626            ));
627        }
628        if broken_step_fk > 0 {
629            warnings.push(format!(
630                "{broken_step_fk} step(s) referencing non-existent run"
631            ));
632        }
633        if broken_action_fk > 0 {
634            warnings.push(format!(
635                "{broken_action_fk} action(s) referencing non-existent step"
636            ));
637        }
638        if stale_fts_rows > 0 {
639            warnings.push(format!(
640                "{stale_fts_rows} stale FTS row(s) referencing missing chunk"
641            ));
642        }
643        if fts_rows_for_superseded_nodes > 0 {
644            warnings.push(format!(
645                "{fts_rows_for_superseded_nodes} FTS row(s) for superseded node(s)"
646            ));
647        }
648        if dangling_edges > 0 {
649            warnings.push(format!(
650                "{dangling_edges} active edge(s) with missing endpoint node"
651            ));
652        }
653        if orphaned_supersession_chains > 0 {
654            warnings.push(format!(
655                "{orphaned_supersession_chains} logical_id(s) with all versions superseded"
656            ));
657        }
658        if stale_vec_rows > 0 {
659            warnings.push(format!(
660                "{stale_vec_rows} stale vec row(s) referencing missing chunk"
661            ));
662        }
663        if vec_rows_for_superseded_nodes > 0 {
664            warnings.push(format!(
665                "{vec_rows_for_superseded_nodes} vec row(s) whose node history is missing"
666            ));
667        }
668        if missing_operational_current_rows > 0 {
669            warnings.push(format!(
670                "{missing_operational_current_rows} latest-state key(s) missing operational_current rows"
671            ));
672        }
673        if stale_operational_current_rows > 0 {
674            warnings.push(format!(
675                "{stale_operational_current_rows} stale operational_current row(s)"
676            ));
677        }
678        if disabled_collection_mutations > 0 {
679            warnings.push(format!(
680                "{disabled_collection_mutations} mutation(s) were written after collection disable"
681            ));
682        }
683        if orphaned_last_access_metadata_rows > 0 {
684            warnings.push(format!(
685                "{orphaned_last_access_metadata_rows} last_access metadata row(s) reference missing node history"
686            ));
687        }
688
689        Ok(SemanticReport {
690            orphaned_chunks: i64_to_usize(orphaned_chunks),
691            null_source_ref_nodes: i64_to_usize(null_source_ref_nodes),
692            broken_step_fk: i64_to_usize(broken_step_fk),
693            broken_action_fk: i64_to_usize(broken_action_fk),
694            stale_fts_rows: i64_to_usize(stale_fts_rows),
695            fts_rows_for_superseded_nodes: i64_to_usize(fts_rows_for_superseded_nodes),
696            dangling_edges: i64_to_usize(dangling_edges),
697            orphaned_supersession_chains: i64_to_usize(orphaned_supersession_chains),
698            stale_vec_rows: i64_to_usize(stale_vec_rows),
699            vec_rows_for_superseded_nodes: i64_to_usize(vec_rows_for_superseded_nodes),
700            missing_operational_current_rows: i64_to_usize(missing_operational_current_rows),
701            stale_operational_current_rows: i64_to_usize(stale_operational_current_rows),
702            disabled_collection_mutations: i64_to_usize(disabled_collection_mutations),
703            orphaned_last_access_metadata_rows: i64_to_usize(orphaned_last_access_metadata_rows),
704            warnings,
705        })
706    }
707
708    /// # Errors
709    /// Returns [`EngineError`] if the collection metadata is invalid or the insert fails.
710    pub fn register_operational_collection(
711        &self,
712        request: &OperationalRegisterRequest,
713    ) -> Result<OperationalCollectionRecord, EngineError> {
714        if request.name.trim().is_empty() {
715            return Err(EngineError::InvalidWrite(
716                "operational collection name must not be empty".to_owned(),
717            ));
718        }
719        if request.schema_json.is_empty() {
720            return Err(EngineError::InvalidWrite(
721                "operational collection schema_json must not be empty".to_owned(),
722            ));
723        }
724        if request.retention_json.is_empty() {
725            return Err(EngineError::InvalidWrite(
726                "operational collection retention_json must not be empty".to_owned(),
727            ));
728        }
729        if request.filter_fields_json.is_empty() {
730            return Err(EngineError::InvalidWrite(
731                "operational collection filter_fields_json must not be empty".to_owned(),
732            ));
733        }
734        parse_operational_validation_contract(&request.validation_json)
735            .map_err(EngineError::InvalidWrite)?;
736        parse_operational_secondary_indexes_json(&request.secondary_indexes_json, request.kind)
737            .map_err(EngineError::InvalidWrite)?;
738        if request.format_version <= 0 {
739            return Err(EngineError::InvalidWrite(
740                "operational collection format_version must be positive".to_owned(),
741            ));
742        }
743        parse_operational_filter_fields(&request.filter_fields_json)
744            .map_err(EngineError::InvalidWrite)?;
745
746        let mut conn = self.connect()?;
747        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
748        tx.execute(
749            "INSERT INTO operational_collections \
750             (name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at) \
751             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, unixepoch())",
752            rusqlite::params![
753                request.name.as_str(),
754                request.kind.as_str(),
755                request.schema_json.as_str(),
756                request.retention_json.as_str(),
757                request.filter_fields_json.as_str(),
758                request.validation_json.as_str(),
759                request.secondary_indexes_json.as_str(),
760                request.format_version,
761            ],
762        )?;
763        persist_simple_provenance_event(
764            &tx,
765            "operational_collection_registered",
766            request.name.as_str(),
767            Some(serde_json::json!({
768                "kind": request.kind.as_str(),
769                "format_version": request.format_version,
770            })),
771        )?;
772        tx.commit()?;
773
774        self.describe_operational_collection(&request.name)?
775            .ok_or_else(|| {
776                EngineError::Bridge("registered collection missing after commit".to_owned())
777            })
778    }
779
780    /// # Errors
781    /// Returns [`EngineError`] if the database query fails.
782    pub fn describe_operational_collection(
783        &self,
784        name: &str,
785    ) -> Result<Option<OperationalCollectionRecord>, EngineError> {
786        let conn = self.connect()?;
787        load_operational_collection_record(&conn, name)
788    }
789
790    /// # Errors
791    /// Returns [`EngineError`] if the collection is missing, the filter contract is invalid,
792    /// or existing mutation backfill fails.
793    pub fn update_operational_collection_filters(
794        &self,
795        name: &str,
796        filter_fields_json: &str,
797    ) -> Result<OperationalCollectionRecord, EngineError> {
798        if filter_fields_json.is_empty() {
799            return Err(EngineError::InvalidWrite(
800                "operational collection filter_fields_json must not be empty".to_owned(),
801            ));
802        }
803        let declared_fields = parse_operational_filter_fields(filter_fields_json)
804            .map_err(EngineError::InvalidWrite)?;
805
806        let mut conn = self.connect()?;
807        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
808        load_operational_collection_record(&tx, name)?.ok_or_else(|| {
809            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
810        })?;
811        tx.execute(
812            "UPDATE operational_collections SET filter_fields_json = ?2 WHERE name = ?1",
813            rusqlite::params![name, filter_fields_json],
814        )?;
815        tx.execute(
816            "DELETE FROM operational_filter_values WHERE collection_name = ?1",
817            [name],
818        )?;
819
820        let mut mutation_stmt = tx.prepare(
821            "SELECT id, payload_json FROM operational_mutations \
822             WHERE collection_name = ?1 ORDER BY mutation_order",
823        )?;
824        let mutations = mutation_stmt
825            .query_map([name], |row| {
826                Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
827            })?
828            .collect::<Result<Vec<_>, _>>()?;
829        drop(mutation_stmt);
830
831        let mut insert_filter_value = tx.prepare_cached(
832            "INSERT INTO operational_filter_values \
833             (mutation_id, collection_name, field_name, string_value, integer_value) \
834             VALUES (?1, ?2, ?3, ?4, ?5)",
835        )?;
836        let mut inserted_values = 0usize;
837        for (mutation_id, payload_json) in &mutations {
838            for filter_value in
839                extract_operational_filter_values(&declared_fields, payload_json.as_str())
840            {
841                insert_filter_value.execute(rusqlite::params![
842                    mutation_id,
843                    name,
844                    filter_value.field_name,
845                    filter_value.string_value,
846                    filter_value.integer_value,
847                ])?;
848                inserted_values += 1;
849            }
850        }
851        drop(insert_filter_value);
852
853        persist_simple_provenance_event(
854            &tx,
855            "operational_collection_filter_fields_updated",
856            name,
857            Some(serde_json::json!({
858                "field_count": declared_fields.len(),
859                "mutations_backfilled": mutations.len(),
860                "inserted_filter_values": inserted_values,
861            })),
862        )?;
863        let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
864            EngineError::Bridge("operational collection missing after filter update".to_owned())
865        })?;
866        tx.commit()?;
867        Ok(updated)
868    }
869
870    /// # Errors
871    /// Returns [`EngineError`] if the collection is missing or the validation contract is invalid.
872    pub fn update_operational_collection_validation(
873        &self,
874        name: &str,
875        validation_json: &str,
876    ) -> Result<OperationalCollectionRecord, EngineError> {
877        parse_operational_validation_contract(validation_json)
878            .map_err(EngineError::InvalidWrite)?;
879
880        let mut conn = self.connect()?;
881        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
882        load_operational_collection_record(&tx, name)?.ok_or_else(|| {
883            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
884        })?;
885        tx.execute(
886            "UPDATE operational_collections SET validation_json = ?2 WHERE name = ?1",
887            rusqlite::params![name, validation_json],
888        )?;
889        persist_simple_provenance_event(
890            &tx,
891            "operational_collection_validation_updated",
892            name,
893            Some(serde_json::json!({
894                "has_validation": !validation_json.is_empty(),
895            })),
896        )?;
897        let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
898            EngineError::Bridge("operational collection missing after validation update".to_owned())
899        })?;
900        tx.commit()?;
901        Ok(updated)
902    }
903
904    /// # Errors
905    /// Returns [`EngineError`] if the collection is missing, the contract is invalid,
906    /// or derived index rebuild fails.
907    pub fn update_operational_collection_secondary_indexes(
908        &self,
909        name: &str,
910        secondary_indexes_json: &str,
911    ) -> Result<OperationalCollectionRecord, EngineError> {
912        let mut conn = self.connect()?;
913        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
914        let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
915            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
916        })?;
917        let indexes = parse_operational_secondary_indexes_json(secondary_indexes_json, record.kind)
918            .map_err(EngineError::InvalidWrite)?;
919        tx.execute(
920            "UPDATE operational_collections SET secondary_indexes_json = ?2 WHERE name = ?1",
921            rusqlite::params![name, secondary_indexes_json],
922        )?;
923        let (mutation_entries_rebuilt, current_entries_rebuilt) =
924            rebuild_operational_secondary_index_entries(&tx, &record.name, record.kind, &indexes)?;
925        persist_simple_provenance_event(
926            &tx,
927            "operational_collection_secondary_indexes_updated",
928            name,
929            Some(serde_json::json!({
930                "index_count": indexes.len(),
931                "mutation_entries_rebuilt": mutation_entries_rebuilt,
932                "current_entries_rebuilt": current_entries_rebuilt,
933            })),
934        )?;
935        let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
936            EngineError::Bridge(
937                "operational collection missing after secondary index update".to_owned(),
938            )
939        })?;
940        tx.commit()?;
941        Ok(updated)
942    }
943
944    /// # Errors
945    /// Returns [`EngineError`] if the collection is missing or rebuild fails.
946    pub fn rebuild_operational_secondary_indexes(
947        &self,
948        name: &str,
949    ) -> Result<OperationalSecondaryIndexRebuildReport, EngineError> {
950        let mut conn = self.connect()?;
951        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
952        let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
953            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
954        })?;
955        let indexes =
956            parse_operational_secondary_indexes_json(&record.secondary_indexes_json, record.kind)
957                .map_err(EngineError::InvalidWrite)?;
958        let (mutation_entries_rebuilt, current_entries_rebuilt) =
959            rebuild_operational_secondary_index_entries(&tx, &record.name, record.kind, &indexes)?;
960        persist_simple_provenance_event(
961            &tx,
962            "operational_secondary_indexes_rebuilt",
963            name,
964            Some(serde_json::json!({
965                "index_count": indexes.len(),
966                "mutation_entries_rebuilt": mutation_entries_rebuilt,
967                "current_entries_rebuilt": current_entries_rebuilt,
968            })),
969        )?;
970        tx.commit()?;
971        Ok(OperationalSecondaryIndexRebuildReport {
972            collection_name: name.to_owned(),
973            mutation_entries_rebuilt,
974            current_entries_rebuilt,
975        })
976    }
977
978    /// # Errors
979    /// Returns [`EngineError`] if the collection is missing or its validation contract is invalid.
980    pub fn validate_operational_collection_history(
981        &self,
982        name: &str,
983    ) -> Result<OperationalHistoryValidationReport, EngineError> {
984        let conn = self.connect()?;
985        let record = load_operational_collection_record(&conn, name)?.ok_or_else(|| {
986            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
987        })?;
988        let Some(contract) = parse_operational_validation_contract(&record.validation_json)
989            .map_err(EngineError::InvalidWrite)?
990        else {
991            return Err(EngineError::InvalidWrite(format!(
992                "operational collection '{name}' has no validation_json configured"
993            )));
994        };
995
996        let mut stmt = conn.prepare(
997            "SELECT id, record_key, op_kind, payload_json FROM operational_mutations \
998             WHERE collection_name = ?1 ORDER BY mutation_order",
999        )?;
1000        let rows = stmt
1001            .query_map([name], |row| {
1002                Ok((
1003                    row.get::<_, String>(0)?,
1004                    row.get::<_, String>(1)?,
1005                    row.get::<_, String>(2)?,
1006                    row.get::<_, String>(3)?,
1007                ))
1008            })?
1009            .collect::<Result<Vec<_>, _>>()?;
1010        drop(stmt);
1011
1012        let mut checked_rows = 0usize;
1013        let mut issues = Vec::new();
1014        for (mutation_id, record_key, op_kind, payload_json) in rows {
1015            if op_kind == "delete" {
1016                continue;
1017            }
1018            checked_rows += 1;
1019            if let Err(message) =
1020                validate_operational_payload_against_contract(&contract, payload_json.as_str())
1021            {
1022                issues.push(OperationalHistoryValidationIssue {
1023                    mutation_id,
1024                    record_key,
1025                    op_kind,
1026                    message,
1027                });
1028            }
1029        }
1030
1031        Ok(OperationalHistoryValidationReport {
1032            collection_name: name.to_owned(),
1033            checked_rows,
1034            invalid_row_count: issues.len(),
1035            issues,
1036        })
1037    }
1038
1039    /// # Errors
1040    /// Returns [`EngineError`] if the database query fails.
1041    pub fn disable_operational_collection(
1042        &self,
1043        name: &str,
1044    ) -> Result<OperationalCollectionRecord, EngineError> {
1045        let mut conn = self.connect()?;
1046        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1047        let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1048            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1049        })?;
1050        let changed = if record.disabled_at.is_none() {
1051            tx.execute(
1052                "UPDATE operational_collections SET disabled_at = unixepoch() WHERE name = ?1",
1053                [name],
1054            )?;
1055            true
1056        } else {
1057            false
1058        };
1059        let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1060            EngineError::Bridge("operational collection missing after disable".to_owned())
1061        })?;
1062        persist_simple_provenance_event(
1063            &tx,
1064            "operational_collection_disabled",
1065            name,
1066            Some(serde_json::json!({
1067                "disabled_at": record.disabled_at,
1068                "changed": changed,
1069            })),
1070        )?;
1071        tx.commit()?;
1072        Ok(record)
1073    }
1074
1075    /// # Errors
1076    /// Returns [`EngineError`] if the database query fails.
1077    pub fn compact_operational_collection(
1078        &self,
1079        name: &str,
1080        dry_run: bool,
1081    ) -> Result<OperationalCompactionReport, EngineError> {
1082        let mut conn = self.connect()?;
1083        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1084        let collection = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1085            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1086        })?;
1087        validate_append_only_operational_collection(&collection, "compact")?;
1088        let (mutation_ids, before_timestamp) =
1089            operational_compaction_candidates(&tx, &collection.retention_json, name)?;
1090        if dry_run {
1091            drop(tx);
1092            return Ok(OperationalCompactionReport {
1093                collection_name: name.to_owned(),
1094                deleted_mutations: mutation_ids.len(),
1095                dry_run: true,
1096                before_timestamp,
1097            });
1098        }
1099        let mut delete_stmt =
1100            tx.prepare_cached("DELETE FROM operational_mutations WHERE id = ?1")?;
1101        for mutation_id in &mutation_ids {
1102            delete_stmt.execute([mutation_id.as_str()])?;
1103        }
1104        drop(delete_stmt);
1105        persist_simple_provenance_event(
1106            &tx,
1107            "operational_collection_compacted",
1108            name,
1109            Some(serde_json::json!({
1110                "deleted_mutations": mutation_ids.len(),
1111                "before_timestamp": before_timestamp,
1112            })),
1113        )?;
1114        tx.commit()?;
1115        Ok(OperationalCompactionReport {
1116            collection_name: name.to_owned(),
1117            deleted_mutations: mutation_ids.len(),
1118            dry_run: false,
1119            before_timestamp,
1120        })
1121    }
1122
1123    /// # Errors
1124    /// Returns [`EngineError`] if the database query fails.
1125    pub fn purge_operational_collection(
1126        &self,
1127        name: &str,
1128        before_timestamp: i64,
1129    ) -> Result<OperationalPurgeReport, EngineError> {
1130        let mut conn = self.connect()?;
1131        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1132        let collection = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1133            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1134        })?;
1135        validate_append_only_operational_collection(&collection, "purge")?;
1136        let deleted_mutations = tx.execute(
1137            "DELETE FROM operational_mutations WHERE collection_name = ?1 AND created_at < ?2",
1138            rusqlite::params![name, before_timestamp],
1139        )?;
1140        persist_simple_provenance_event(
1141            &tx,
1142            "operational_collection_purged",
1143            name,
1144            Some(serde_json::json!({
1145                "deleted_mutations": deleted_mutations,
1146                "before_timestamp": before_timestamp,
1147            })),
1148        )?;
1149        tx.commit()?;
1150        Ok(OperationalPurgeReport {
1151            collection_name: name.to_owned(),
1152            deleted_mutations,
1153            before_timestamp,
1154        })
1155    }
1156
1157    /// # Errors
1158    /// Returns [`EngineError`] if collection selection or policy parsing fails.
1159    pub fn plan_operational_retention(
1160        &self,
1161        now_timestamp: i64,
1162        collection_names: Option<&[String]>,
1163        max_collections: Option<usize>,
1164    ) -> Result<OperationalRetentionPlanReport, EngineError> {
1165        let conn = self.connect()?;
1166        let records = load_operational_retention_records(&conn, collection_names, max_collections)?;
1167        let mut items = Vec::with_capacity(records.len());
1168        for record in records {
1169            items.push(plan_operational_retention_item(
1170                &conn,
1171                &record,
1172                now_timestamp,
1173            )?);
1174        }
1175        Ok(OperationalRetentionPlanReport {
1176            planned_at: now_timestamp,
1177            collections_examined: items.len(),
1178            items,
1179        })
1180    }
1181
1182    /// # Errors
1183    /// Returns [`EngineError`] if collection selection, policy parsing, or execution fails.
1184    pub fn run_operational_retention(
1185        &self,
1186        now_timestamp: i64,
1187        collection_names: Option<&[String]>,
1188        max_collections: Option<usize>,
1189        dry_run: bool,
1190    ) -> Result<OperationalRetentionRunReport, EngineError> {
1191        let mut conn = self.connect()?;
1192        let records = load_operational_retention_records(&conn, collection_names, max_collections)?;
1193        let mut items = Vec::with_capacity(records.len());
1194        let mut collections_acted_on = 0usize;
1195
1196        for record in records {
1197            let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1198            let item = run_operational_retention_item(&tx, &record, now_timestamp, dry_run)?;
1199            if item.deleted_mutations > 0 {
1200                collections_acted_on += 1;
1201            }
1202            if dry_run || item.action_kind == OperationalRetentionActionKind::Noop {
1203                drop(tx);
1204            } else {
1205                tx.commit()?;
1206            }
1207            items.push(item);
1208        }
1209
1210        Ok(OperationalRetentionRunReport {
1211            executed_at: now_timestamp,
1212            collections_examined: items.len(),
1213            collections_acted_on,
1214            dry_run,
1215            items,
1216        })
1217    }
1218
1219    /// # Errors
1220    /// Returns [`EngineError`] if the database query fails.
1221    pub fn trace_operational_collection(
1222        &self,
1223        collection_name: &str,
1224        record_key: Option<&str>,
1225    ) -> Result<OperationalTraceReport, EngineError> {
1226        let conn = self.connect()?;
1227        ensure_operational_collection_registered(&conn, collection_name)?;
1228        let mutations = if let Some(record_key) = record_key {
1229            let mut stmt = conn.prepare(
1230                "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
1231                 FROM operational_mutations \
1232                 WHERE collection_name = ?1 AND record_key = ?2 \
1233                 ORDER BY mutation_order",
1234            )?;
1235            stmt.query_map([collection_name, record_key], map_operational_mutation_row)?
1236                .collect::<Result<Vec<_>, _>>()?
1237        } else {
1238            let mut stmt = conn.prepare(
1239                "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
1240                 FROM operational_mutations \
1241                 WHERE collection_name = ?1 \
1242                 ORDER BY mutation_order",
1243            )?;
1244            stmt.query_map([collection_name], map_operational_mutation_row)?
1245                .collect::<Result<Vec<_>, _>>()?
1246        };
1247        let current_rows = if let Some(record_key) = record_key {
1248            let mut stmt = conn.prepare(
1249                "SELECT collection_name, record_key, payload_json, updated_at, last_mutation_id \
1250                 FROM operational_current \
1251                 WHERE collection_name = ?1 AND record_key = ?2 \
1252                 ORDER BY updated_at, record_key",
1253            )?;
1254            stmt.query_map([collection_name, record_key], map_operational_current_row)?
1255                .collect::<Result<Vec<_>, _>>()?
1256        } else {
1257            let mut stmt = conn.prepare(
1258                "SELECT collection_name, record_key, payload_json, updated_at, last_mutation_id \
1259                 FROM operational_current \
1260                 WHERE collection_name = ?1 \
1261                 ORDER BY updated_at, record_key",
1262            )?;
1263            stmt.query_map([collection_name], map_operational_current_row)?
1264                .collect::<Result<Vec<_>, _>>()?
1265        };
1266
1267        Ok(OperationalTraceReport {
1268            collection_name: collection_name.to_owned(),
1269            record_key: record_key.map(str::to_owned),
1270            mutation_count: mutations.len(),
1271            current_count: current_rows.len(),
1272            mutations,
1273            current_rows,
1274        })
1275    }
1276
1277    /// # Errors
1278    /// Returns [`EngineError`] if the collection contract is invalid or the filtered read fails.
1279    pub fn read_operational_collection(
1280        &self,
1281        request: &OperationalReadRequest,
1282    ) -> Result<OperationalReadReport, EngineError> {
1283        if request.collection_name.trim().is_empty() {
1284            return Err(EngineError::InvalidWrite(
1285                "operational read collection_name must not be empty".to_owned(),
1286            ));
1287        }
1288        if request.filters.is_empty() {
1289            return Err(EngineError::InvalidWrite(
1290                "operational read requires at least one filter clause".to_owned(),
1291            ));
1292        }
1293
1294        let conn = self.connect()?;
1295        let record = load_operational_collection_record(&conn, &request.collection_name)?
1296            .ok_or_else(|| {
1297                EngineError::InvalidWrite(format!(
1298                    "operational collection '{}' is not registered",
1299                    request.collection_name
1300                ))
1301            })?;
1302        validate_append_only_operational_collection(&record, "read")?;
1303        let declared_fields = parse_operational_filter_fields(&record.filter_fields_json)
1304            .map_err(EngineError::InvalidWrite)?;
1305        let secondary_indexes =
1306            parse_operational_secondary_indexes_json(&record.secondary_indexes_json, record.kind)
1307                .map_err(EngineError::InvalidWrite)?;
1308        let applied_limit = operational_read_limit(request.limit)?;
1309        let filters = compile_operational_read_filters(&request.filters, &declared_fields)?;
1310        if let Some(report) = execute_operational_secondary_index_read(
1311            &conn,
1312            &request.collection_name,
1313            &filters,
1314            &secondary_indexes,
1315            applied_limit,
1316        )? {
1317            return Ok(report);
1318        }
1319        execute_operational_filtered_read(&conn, &request.collection_name, &filters, applied_limit)
1320    }
1321
1322    /// # Errors
1323    /// Returns [`EngineError`] if the database query fails or collection validation fails.
1324    pub fn rebuild_operational_current(
1325        &self,
1326        collection_name: Option<&str>,
1327    ) -> Result<OperationalRepairReport, EngineError> {
1328        let mut conn = self.connect()?;
1329        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1330        let collections = if let Some(name) = collection_name {
1331            let maybe_kind: Option<String> = tx
1332                .query_row(
1333                    "SELECT kind FROM operational_collections WHERE name = ?1",
1334                    [name],
1335                    |row| row.get(0),
1336                )
1337                .optional()?;
1338            let Some(kind) = maybe_kind else {
1339                return Err(EngineError::InvalidWrite(format!(
1340                    "operational collection '{name}' is not registered"
1341                )));
1342            };
1343            if kind != OperationalCollectionKind::LatestState.as_str() {
1344                return Err(EngineError::InvalidWrite(format!(
1345                    "operational collection '{name}' is not latest_state"
1346                )));
1347            }
1348            vec![name.to_owned()]
1349        } else {
1350            let mut stmt = tx.prepare(
1351                "SELECT name FROM operational_collections WHERE kind = 'latest_state' ORDER BY name",
1352            )?;
1353            stmt.query_map([], |row| row.get::<_, String>(0))?
1354                .collect::<Result<Vec<_>, _>>()?
1355        };
1356
1357        let rebuilt_rows = rebuild_operational_current_rows(&tx, &collections)?;
1358        for collection in &collections {
1359            let record = load_operational_collection_record(&tx, collection)?.ok_or_else(|| {
1360                EngineError::Bridge(format!(
1361                    "operational collection '{collection}' missing during current rebuild"
1362                ))
1363            })?;
1364            let indexes = parse_operational_secondary_indexes_json(
1365                &record.secondary_indexes_json,
1366                record.kind,
1367            )
1368            .map_err(EngineError::InvalidWrite)?;
1369            if !indexes.is_empty() {
1370                rebuild_operational_secondary_index_entries(
1371                    &tx,
1372                    &record.name,
1373                    record.kind,
1374                    &indexes,
1375                )?;
1376            }
1377        }
1378
1379        persist_simple_provenance_event(
1380            &tx,
1381            "operational_current_rebuilt",
1382            collection_name.unwrap_or("*"),
1383            Some(serde_json::json!({
1384                "collections_rebuilt": collections.len(),
1385                "current_rows_rebuilt": rebuilt_rows,
1386            })),
1387        )?;
1388        tx.commit()?;
1389
1390        Ok(OperationalRepairReport {
1391            collections_rebuilt: collections.len(),
1392            current_rows_rebuilt: rebuilt_rows,
1393        })
1394    }
1395
1396    /// # Errors
1397    /// Returns [`EngineError`] if the database connection fails or the projection rebuild fails.
1398    pub fn rebuild_projections(
1399        &self,
1400        target: ProjectionTarget,
1401    ) -> Result<ProjectionRepairReport, EngineError> {
1402        self.projections.rebuild_projections(target)
1403    }
1404
1405    /// # Errors
1406    /// Returns [`EngineError`] if the database connection fails or the projection rebuild fails.
1407    pub fn rebuild_missing_projections(&self) -> Result<ProjectionRepairReport, EngineError> {
1408        self.projections.rebuild_missing_projections()
1409    }
1410
1411    /// Recreate enabled vector profiles from persisted `vector_profiles` metadata.
1412    ///
1413    /// # Errors
1414    /// Returns [`EngineError`] if the database connection fails, reading metadata fails,
1415    /// or sqlite-vec support is unavailable while enabled profiles are present.
1416    pub fn restore_vector_profiles(&self) -> Result<ProjectionRepairReport, EngineError> {
1417        let conn = self.connect()?;
1418        let profiles: Vec<(String, String, i64)> = {
1419            let mut stmt = conn.prepare(
1420                "SELECT profile, table_name, dimension \
1421                 FROM vector_profiles WHERE enabled = 1 ORDER BY profile",
1422            )?;
1423            stmt.query_map([], |row| {
1424                Ok((
1425                    row.get::<_, String>(0)?,
1426                    row.get::<_, String>(1)?,
1427                    row.get::<_, i64>(2)?,
1428                ))
1429            })?
1430            .collect::<Result<Vec<_>, _>>()?
1431        };
1432
1433        for (profile, table_name, dimension) in &profiles {
1434            let dimension = usize::try_from(*dimension).map_err(|_| {
1435                EngineError::Bridge(format!("invalid vector profile dimension: {dimension}"))
1436            })?;
1437            self.schema_manager
1438                .ensure_vector_profile(&conn, profile, table_name, dimension)?;
1439        }
1440
1441        Ok(ProjectionRepairReport {
1442            targets: vec![ProjectionTarget::Vec],
1443            rebuilt_rows: profiles.len(),
1444            notes: vec![],
1445        })
1446    }
1447
1448    /// Rebuild vector embeddings using an application-supplied regeneration
1449    /// contract and generator command.
1450    ///
1451    /// The config is persisted in `vector_embedding_contracts` so the metadata
1452    /// required for recovery survives future repair runs.
1453    ///
1454    /// # Errors
1455    /// Returns [`EngineError`] if the database connection fails, the config is
1456    /// invalid, the generator command fails, or the regenerated embeddings are
1457    /// malformed.
1458    #[allow(clippy::too_many_lines)]
1459    pub fn regenerate_vector_embeddings(
1460        &self,
1461        config: &VectorRegenerationConfig,
1462    ) -> Result<VectorRegenerationReport, EngineError> {
1463        self.regenerate_vector_embeddings_with_policy(config, &VectorGeneratorPolicy::default())
1464    }
1465
1466    /// # Errors
1467    /// Returns [`EngineError`] if the database connection fails, the config is
1468    /// invalid, the generator command fails, or the regenerated embeddings are
1469    /// malformed.
1470    #[allow(clippy::too_many_lines)]
1471    pub fn regenerate_vector_embeddings_with_policy(
1472        &self,
1473        config: &VectorRegenerationConfig,
1474        policy: &VectorGeneratorPolicy,
1475    ) -> Result<VectorRegenerationReport, EngineError> {
1476        let conn = self.connect()?;
1477        let config = validate_vector_regeneration_config(&conn, config, policy)
1478            .map_err(|failure| failure.to_engine_error())?;
1479        let chunks = collect_regeneration_chunks(&conn)?;
1480        let payload = build_regeneration_input(&config, chunks.clone());
1481        let snapshot_hash = compute_snapshot_hash(&payload)?;
1482        let audit_metadata = VectorRegenerationAuditMetadata {
1483            profile: config.profile.clone(),
1484            model_identity: config.model_identity.clone(),
1485            model_version: config.model_version.clone(),
1486            chunk_count: chunks.len(),
1487            snapshot_hash: snapshot_hash.clone(),
1488            failure_class: None,
1489        };
1490        persist_vector_regeneration_event(
1491            &conn,
1492            "vector_regeneration_requested",
1493            &config.profile,
1494            &audit_metadata,
1495        )?;
1496        let notes = generator_policy_notes(policy);
1497        let generated = match run_vector_generator_bounded(&config, &payload, policy) {
1498            Ok(generated) => generated,
1499            Err(failure) => {
1500                self.persist_vector_regeneration_failure_best_effort(
1501                    &config.profile,
1502                    &audit_metadata,
1503                    &failure,
1504                );
1505                return Err(failure.to_engine_error());
1506            }
1507        };
1508        let mut embedding_map = match validate_generated_embeddings(&config, &chunks, generated) {
1509            Ok(embedding_map) => embedding_map,
1510            Err(failure) => {
1511                self.persist_vector_regeneration_failure_best_effort(
1512                    &config.profile,
1513                    &audit_metadata,
1514                    &failure,
1515                );
1516                return Err(failure.to_engine_error());
1517            }
1518        };
1519
1520        let mut conn = conn;
1521        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1522        match self.schema_manager.ensure_vector_profile(
1523            &tx,
1524            &config.profile,
1525            &config.table_name,
1526            config.dimension,
1527        ) {
1528            Ok(()) => {}
1529            Err(SchemaError::MissingCapability(message)) => {
1530                let failure = VectorRegenerationFailure::new(
1531                    VectorRegenerationFailureClass::UnsupportedVecCapability,
1532                    message,
1533                );
1534                drop(tx);
1535                self.persist_vector_regeneration_failure_best_effort(
1536                    &config.profile,
1537                    &audit_metadata,
1538                    &failure,
1539                );
1540                return Err(failure.to_engine_error());
1541            }
1542            Err(error) => return Err(EngineError::Schema(error)),
1543        }
1544        let apply_chunks = collect_regeneration_chunks(&tx)?;
1545        let apply_payload = build_regeneration_input(&config, apply_chunks.clone());
1546        let apply_hash = compute_snapshot_hash(&apply_payload)?;
1547        if apply_hash != snapshot_hash {
1548            let failure = VectorRegenerationFailure::new(
1549                VectorRegenerationFailureClass::SnapshotDrift,
1550                "chunk snapshot changed during generation; retry".to_owned(),
1551            );
1552            drop(tx);
1553            self.persist_vector_regeneration_failure_best_effort(
1554                &config.profile,
1555                &audit_metadata,
1556                &failure,
1557            );
1558            return Err(failure.to_engine_error());
1559        }
1560        persist_vector_contract(&tx, &config, &snapshot_hash)?;
1561        tx.execute("DELETE FROM vec_nodes_active", [])?;
1562        let mut stmt = tx
1563            .prepare_cached("INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES (?1, ?2)")?;
1564        let mut regenerated_rows = 0usize;
1565        for chunk in &apply_chunks {
1566            let Some(embedding) = embedding_map.remove(&chunk.chunk_id) else {
1567                drop(stmt);
1568                drop(tx);
1569                let failure = VectorRegenerationFailure::new(
1570                    VectorRegenerationFailureClass::MalformedGeneratorJson,
1571                    format!(
1572                        "generator did not return embedding for chunk '{}'",
1573                        chunk.chunk_id
1574                    ),
1575                );
1576                self.persist_vector_regeneration_failure_best_effort(
1577                    &config.profile,
1578                    &audit_metadata,
1579                    &failure,
1580                );
1581                return Err(failure.to_engine_error());
1582            };
1583            stmt.execute(rusqlite::params![chunk.chunk_id.as_str(), embedding])?;
1584            regenerated_rows += 1;
1585        }
1586        drop(stmt);
1587        persist_vector_regeneration_event(
1588            &tx,
1589            "vector_regeneration_apply",
1590            &config.profile,
1591            &audit_metadata,
1592        )?;
1593        tx.commit()?;
1594
1595        Ok(VectorRegenerationReport {
1596            profile: config.profile.clone(),
1597            table_name: config.table_name.clone(),
1598            dimension: config.dimension,
1599            total_chunks: chunks.len(),
1600            regenerated_rows,
1601            contract_persisted: true,
1602            notes,
1603        })
1604    }
1605
1606    fn persist_vector_regeneration_failure_best_effort(
1607        &self,
1608        profile: &str,
1609        metadata: &VectorRegenerationAuditMetadata,
1610        failure: &VectorRegenerationFailure,
1611    ) {
1612        let Ok(conn) = self.connect() else {
1613            return;
1614        };
1615        let failure_metadata = VectorRegenerationAuditMetadata {
1616            profile: metadata.profile.clone(),
1617            model_identity: metadata.model_identity.clone(),
1618            model_version: metadata.model_version.clone(),
1619            chunk_count: metadata.chunk_count,
1620            snapshot_hash: metadata.snapshot_hash.clone(),
1621            failure_class: Some(failure.failure_class_label().to_owned()),
1622        };
1623        let _ = persist_vector_regeneration_event(
1624            &conn,
1625            "vector_regeneration_failed",
1626            profile,
1627            &failure_metadata,
1628        );
1629    }
1630
1631    /// # Errors
1632    /// Returns [`EngineError`] if the database connection fails or any SQL query fails.
1633    pub fn trace_source(&self, source_ref: &str) -> Result<TraceReport, EngineError> {
1634        let conn = self.connect()?;
1635
1636        let node_logical_ids = collect_strings(
1637            &conn,
1638            "SELECT logical_id FROM nodes WHERE source_ref = ?1 ORDER BY created_at",
1639            source_ref,
1640        )?;
1641        let action_ids = collect_strings(
1642            &conn,
1643            "SELECT id FROM actions WHERE source_ref = ?1 ORDER BY created_at",
1644            source_ref,
1645        )?;
1646        let operational_mutation_ids = collect_strings(
1647            &conn,
1648            "SELECT id FROM operational_mutations WHERE source_ref = ?1 ORDER BY mutation_order",
1649            source_ref,
1650        )?;
1651
1652        Ok(TraceReport {
1653            source_ref: source_ref.to_owned(),
1654            node_rows: count_source_ref(&conn, "nodes", source_ref)?,
1655            edge_rows: count_source_ref(&conn, "edges", source_ref)?,
1656            action_rows: count_source_ref(&conn, "actions", source_ref)?,
1657            operational_mutation_rows: count_source_ref(
1658                &conn,
1659                "operational_mutations",
1660                source_ref,
1661            )?,
1662            node_logical_ids,
1663            action_ids,
1664            operational_mutation_ids,
1665        })
1666    }
1667
1668    /// # Errors
1669    /// Returns [`EngineError`] if the database connection fails, the transaction cannot be
1670    /// started, or lifecycle restoration prerequisites are missing.
1671    #[allow(clippy::too_many_lines)]
1672    pub fn restore_logical_id(
1673        &self,
1674        logical_id: &str,
1675    ) -> Result<LogicalRestoreReport, EngineError> {
1676        let mut conn = self.connect()?;
1677        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1678
1679        let active_count: i64 = tx.query_row(
1680            "SELECT count(*) FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
1681            [logical_id],
1682            |row| row.get(0),
1683        )?;
1684        if active_count > 0 {
1685            return Ok(LogicalRestoreReport {
1686                logical_id: logical_id.to_owned(),
1687                was_noop: true,
1688                restored_node_rows: 0,
1689                restored_edge_rows: 0,
1690                restored_chunk_rows: 0,
1691                restored_fts_rows: 0,
1692                restored_vec_rows: 0,
1693                skipped_edges: Vec::new(),
1694                notes: vec!["logical_id already active".to_owned()],
1695            });
1696        }
1697
1698        let restored_node: Option<(String, String)> = tx
1699            .query_row(
1700                "SELECT row_id, kind FROM nodes \
1701                 WHERE logical_id = ?1 AND superseded_at IS NOT NULL \
1702                 ORDER BY superseded_at DESC, created_at DESC, rowid DESC LIMIT 1",
1703                [logical_id],
1704                |row| Ok((row.get(0)?, row.get(1)?)),
1705            )
1706            .optional()?;
1707        let (restored_node_row_id, restored_kind) = restored_node.ok_or_else(|| {
1708            EngineError::InvalidWrite(format!("logical_id '{logical_id}' is not retired"))
1709        })?;
1710
1711        tx.execute(
1712            "UPDATE nodes SET superseded_at = NULL WHERE row_id = ?1",
1713            [restored_node_row_id.as_str()],
1714        )?;
1715
1716        let retire_scope: Option<(i64, Option<String>, i64)> = tx
1717            .query_row(
1718                "SELECT rowid, source_ref, created_at FROM provenance_events \
1719                 WHERE event_type = 'node_retire' AND subject = ?1 \
1720                 ORDER BY created_at DESC, rowid DESC LIMIT 1",
1721                [logical_id],
1722                |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
1723            )
1724            .optional()?;
1725        let (restored_edge_rows, skipped_edges) = if let Some((
1726            retire_event_rowid,
1727            retire_source_ref,
1728            retire_created_at,
1729        )) = retire_scope
1730        {
1731            restore_validated_edges(
1732                &tx,
1733                logical_id,
1734                retire_source_ref.as_deref(),
1735                retire_created_at,
1736                retire_event_rowid,
1737            )?
1738        } else {
1739            (0, Vec::new())
1740        };
1741
1742        let restored_chunk_rows: usize = tx
1743            .query_row(
1744                "SELECT count(*) FROM chunks WHERE node_logical_id = ?1",
1745                [logical_id],
1746                |row| row.get::<_, i64>(0),
1747            )
1748            .map(i64_to_usize)?;
1749        tx.execute(
1750            "DELETE FROM fts_nodes WHERE node_logical_id = ?1",
1751            [logical_id],
1752        )?;
1753        let restored_fts_rows = tx.execute(
1754            "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
1755             SELECT id, node_logical_id, ?2, text_content \
1756             FROM chunks WHERE node_logical_id = ?1",
1757            rusqlite::params![logical_id, restored_kind],
1758        )?;
1759        let restored_vec_rows = count_vec_rows_for_logical_id(&tx, logical_id)?;
1760
1761        persist_simple_provenance_event(
1762            &tx,
1763            "restore_logical_id",
1764            logical_id,
1765            Some(serde_json::json!({
1766                "restored_node_rows": 1,
1767                "restored_edge_rows": restored_edge_rows,
1768                "restored_chunk_rows": restored_chunk_rows,
1769                "restored_fts_rows": restored_fts_rows,
1770                "restored_vec_rows": restored_vec_rows,
1771            })),
1772        )?;
1773        tx.commit()?;
1774
1775        Ok(LogicalRestoreReport {
1776            logical_id: logical_id.to_owned(),
1777            was_noop: false,
1778            restored_node_rows: 1,
1779            restored_edge_rows,
1780            restored_chunk_rows,
1781            restored_fts_rows,
1782            restored_vec_rows,
1783            skipped_edges,
1784            notes: Vec::new(),
1785        })
1786    }
1787
1788    /// # Errors
1789    /// Returns [`EngineError`] if the database connection fails, the transaction cannot be
1790    /// started, or the purge mutation fails.
1791    pub fn purge_logical_id(&self, logical_id: &str) -> Result<LogicalPurgeReport, EngineError> {
1792        let mut conn = self.connect()?;
1793        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1794
1795        let active_count: i64 = tx.query_row(
1796            "SELECT count(*) FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
1797            [logical_id],
1798            |row| row.get(0),
1799        )?;
1800        if active_count > 0 {
1801            return Ok(LogicalPurgeReport {
1802                logical_id: logical_id.to_owned(),
1803                was_noop: true,
1804                deleted_node_rows: 0,
1805                deleted_edge_rows: 0,
1806                deleted_chunk_rows: 0,
1807                deleted_fts_rows: 0,
1808                deleted_vec_rows: 0,
1809                notes: vec!["logical_id is active; purge skipped".to_owned()],
1810            });
1811        }
1812
1813        let node_rows: i64 = tx.query_row(
1814            "SELECT count(*) FROM nodes WHERE logical_id = ?1",
1815            [logical_id],
1816            |row| row.get(0),
1817        )?;
1818        if node_rows == 0 {
1819            return Err(EngineError::InvalidWrite(format!(
1820                "logical_id '{logical_id}' does not exist"
1821            )));
1822        }
1823
1824        let deleted_vec_rows = delete_vec_rows_for_logical_id(&tx, logical_id)?;
1825        let deleted_fts_rows = tx.execute(
1826            "DELETE FROM fts_nodes WHERE node_logical_id = ?1",
1827            [logical_id],
1828        )?;
1829        let deleted_edge_rows = tx.execute(
1830            "DELETE FROM edges WHERE source_logical_id = ?1 OR target_logical_id = ?1",
1831            [logical_id],
1832        )?;
1833        let deleted_chunk_rows = tx.execute(
1834            "DELETE FROM chunks WHERE node_logical_id = ?1",
1835            [logical_id],
1836        )?;
1837        let deleted_node_rows =
1838            tx.execute("DELETE FROM nodes WHERE logical_id = ?1", [logical_id])?;
1839        tx.execute(
1840            "DELETE FROM node_access_metadata WHERE logical_id = ?1",
1841            [logical_id],
1842        )?;
1843
1844        persist_simple_provenance_event(
1845            &tx,
1846            "purge_logical_id",
1847            logical_id,
1848            Some(serde_json::json!({
1849                "deleted_node_rows": deleted_node_rows,
1850                "deleted_edge_rows": deleted_edge_rows,
1851                "deleted_chunk_rows": deleted_chunk_rows,
1852                "deleted_fts_rows": deleted_fts_rows,
1853                "deleted_vec_rows": deleted_vec_rows,
1854            })),
1855        )?;
1856        tx.commit()?;
1857
1858        Ok(LogicalPurgeReport {
1859            logical_id: logical_id.to_owned(),
1860            was_noop: false,
1861            deleted_node_rows,
1862            deleted_edge_rows,
1863            deleted_chunk_rows,
1864            deleted_fts_rows,
1865            deleted_vec_rows,
1866            notes: Vec::new(),
1867        })
1868    }
1869
1870    /// Purge provenance events older than `before_timestamp`.
1871    ///
1872    /// By default, `excise` and `purge_logical_id` event types are preserved so that
1873    /// data-deletion audit trails survive. Pass an explicit
1874    /// `preserve_event_types` list to override this default.
1875    ///
1876    /// # Errors
1877    /// Returns [`EngineError`] if the database connection fails, the transaction
1878    /// cannot be started, or any SQL statement fails.
1879    pub fn purge_provenance_events(
1880        &self,
1881        before_timestamp: i64,
1882        options: &ProvenancePurgeOptions,
1883    ) -> Result<ProvenancePurgeReport, EngineError> {
1884        let mut conn = self.connect()?;
1885        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1886
1887        let preserved_types: Vec<&str> = if options.preserve_event_types.is_empty() {
1888            vec!["excise", "purge_logical_id"]
1889        } else {
1890            options
1891                .preserve_event_types
1892                .iter()
1893                .map(String::as_str)
1894                .collect()
1895        };
1896
1897        // Build the NOT IN clause dynamically based on preserved types.
1898        let placeholders: String = (0..preserved_types.len())
1899            .map(|i| format!("?{}", i + 2))
1900            .collect::<Vec<_>>()
1901            .join(", ");
1902        let count_query = format!(
1903            "SELECT count(*) FROM provenance_events \
1904             WHERE created_at < ?1 AND event_type NOT IN ({placeholders})"
1905        );
1906        let delete_query = format!(
1907            "DELETE FROM provenance_events WHERE rowid IN (\
1908             SELECT rowid FROM provenance_events \
1909             WHERE created_at < ?1 AND event_type NOT IN ({placeholders}) \
1910             LIMIT 10000)"
1911        );
1912
1913        let bind_params = |stmt: &mut rusqlite::Statement<'_>| -> Result<(), rusqlite::Error> {
1914            stmt.raw_bind_parameter(1, before_timestamp)?;
1915            for (i, event_type) in preserved_types.iter().enumerate() {
1916                stmt.raw_bind_parameter(i + 2, *event_type)?;
1917            }
1918            Ok(())
1919        };
1920
1921        let events_deleted = if options.dry_run {
1922            let mut stmt = tx.prepare(&count_query)?;
1923            bind_params(&mut stmt)?;
1924            stmt.raw_query()
1925                .next()?
1926                .map_or(0, |row| row.get::<_, u64>(0).unwrap_or(0))
1927        } else {
1928            let mut total_deleted: u64 = 0;
1929            loop {
1930                let mut stmt = tx.prepare(&delete_query)?;
1931                bind_params(&mut stmt)?;
1932                let deleted = stmt.raw_execute()?;
1933                if deleted == 0 {
1934                    break;
1935                }
1936                total_deleted += deleted as u64;
1937            }
1938            total_deleted
1939        };
1940
1941        let total_after: u64 =
1942            tx.query_row("SELECT count(*) FROM provenance_events", [], |row| {
1943                row.get(0)
1944            })?;
1945
1946        let oldest_remaining: Option<i64> = tx
1947            .query_row("SELECT MIN(created_at) FROM provenance_events", [], |row| {
1948                row.get(0)
1949            })
1950            .optional()?
1951            .flatten();
1952
1953        if !options.dry_run {
1954            tx.commit()?;
1955        }
1956
1957        // In dry_run mode nothing was deleted, so total_after includes the
1958        // would-be-deleted rows; subtract to get the preserved count.
1959        let events_preserved = if options.dry_run {
1960            total_after - events_deleted
1961        } else {
1962            total_after
1963        };
1964
1965        Ok(ProvenancePurgeReport {
1966            events_deleted,
1967            events_preserved,
1968            oldest_remaining,
1969        })
1970    }
1971
1972    /// # Errors
1973    /// Returns [`EngineError`] if the database connection fails, the transaction cannot be
1974    /// started, or any SQL statement fails.
1975    #[allow(clippy::too_many_lines)]
1976    pub fn excise_source(&self, source_ref: &str) -> Result<TraceReport, EngineError> {
1977        let mut conn = self.connect()?;
1978
1979        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1980        let affected_operational_collections = collect_strings_tx(
1981            &tx,
1982            "SELECT DISTINCT m.collection_name \
1983             FROM operational_mutations m \
1984             JOIN operational_collections c ON c.name = m.collection_name \
1985             WHERE m.source_ref = ?1 AND c.kind = 'latest_state' \
1986             ORDER BY m.collection_name",
1987            source_ref,
1988        )?;
1989
1990        // Collect (row_id, logical_id) for active rows that will be excised.
1991        let pairs: Vec<(String, String)> = {
1992            let mut stmt = tx.prepare(
1993                "SELECT row_id, logical_id FROM nodes \
1994                 WHERE source_ref = ?1 AND superseded_at IS NULL",
1995            )?;
1996            stmt.query_map([source_ref], |row| {
1997                Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1998            })?
1999            .collect::<Result<Vec<_>, _>>()?
2000        };
2001        let affected_logical_ids: Vec<String> = pairs
2002            .iter()
2003            .map(|(_, logical_id)| logical_id.clone())
2004            .collect();
2005
2006        // Supersede bad rows in all tables.
2007        tx.execute(
2008            "UPDATE nodes SET superseded_at = unixepoch() \
2009             WHERE source_ref = ?1 AND superseded_at IS NULL",
2010            [source_ref],
2011        )?;
2012        tx.execute(
2013            "UPDATE edges SET superseded_at = unixepoch() \
2014             WHERE source_ref = ?1 AND superseded_at IS NULL",
2015            [source_ref],
2016        )?;
2017        tx.execute(
2018            "UPDATE actions SET superseded_at = unixepoch() \
2019             WHERE source_ref = ?1 AND superseded_at IS NULL",
2020            [source_ref],
2021        )?;
2022        clear_operational_current_rows(&tx, &affected_operational_collections)?;
2023        tx.execute(
2024            "DELETE FROM operational_mutations WHERE source_ref = ?1",
2025            [source_ref],
2026        )?;
2027        for logical_id in &affected_logical_ids {
2028            delete_vec_rows_for_logical_id(&tx, logical_id)?;
2029            tx.execute(
2030                "DELETE FROM chunks WHERE node_logical_id = ?1",
2031                [logical_id.as_str()],
2032            )?;
2033        }
2034
2035        // Restore the most recent prior version for each affected logical_id.
2036        for (excised_row_id, logical_id) in &pairs {
2037            let prior: Option<String> = tx
2038                .query_row(
2039                    "SELECT row_id FROM nodes \
2040                     WHERE logical_id = ?1 AND row_id != ?2 \
2041                     ORDER BY created_at DESC LIMIT 1",
2042                    [logical_id.as_str(), excised_row_id.as_str()],
2043                    |row| row.get(0),
2044                )
2045                .optional()?;
2046            if let Some(prior_id) = prior {
2047                tx.execute(
2048                    "UPDATE nodes SET superseded_at = NULL WHERE row_id = ?1",
2049                    [prior_id.as_str()],
2050                )?;
2051            }
2052        }
2053
2054        for logical_id in &affected_logical_ids {
2055            let has_active_node = tx
2056                .query_row(
2057                    "SELECT 1 FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL LIMIT 1",
2058                    [logical_id.as_str()],
2059                    |row| row.get::<_, i64>(0),
2060                )
2061                .optional()?
2062                .is_some();
2063            if !has_active_node {
2064                tx.execute(
2065                    "DELETE FROM node_access_metadata WHERE logical_id = ?1",
2066                    [logical_id.as_str()],
2067                )?;
2068            }
2069        }
2070
2071        rebuild_operational_current_rows(&tx, &affected_operational_collections)?;
2072
2073        // Rebuild FTS atomically within the same transaction so readers never
2074        // observe a post-excise node state with a stale FTS index.
2075        tx.execute("DELETE FROM fts_nodes", [])?;
2076        tx.execute(
2077            r"
2078            INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content)
2079            SELECT c.id, n.logical_id, n.kind, c.text_content
2080            FROM chunks c
2081            JOIN nodes n
2082              ON n.logical_id = c.node_logical_id
2083             AND n.superseded_at IS NULL
2084            ",
2085            [],
2086        )?;
2087
2088        // Record the audit event inside the same transaction so the excision and its
2089        // audit record are committed atomically — no window where the excision is
2090        // durable but unaudited.
2091        tx.execute(
2092            "INSERT INTO provenance_events (id, event_type, subject, source_ref) \
2093             VALUES (?1, 'excise_source', ?2, ?2)",
2094            rusqlite::params![new_id(), source_ref],
2095        )?;
2096
2097        tx.commit()?;
2098
2099        self.trace_source(source_ref)
2100    }
2101
2102    /// # Errors
2103    /// Returns [`EngineError`] if the WAL checkpoint fails, the `SQLite` backup fails,
2104    /// the SHA-256 digest cannot be computed, or the manifest file cannot be written.
2105    pub fn safe_export(
2106        &self,
2107        destination_path: impl AsRef<Path>,
2108        options: SafeExportOptions,
2109    ) -> Result<SafeExportManifest, EngineError> {
2110        let destination_path = destination_path.as_ref();
2111
2112        // 1. Optionally checkpoint WAL before exporting. This keeps the on-disk file tidy for
2113        // callers that want a fully checkpointed export, but export correctness does not depend
2114        // on it because the backup API copies from the live SQLite connection state.
2115        let conn = self.connect()?;
2116
2117        if options.force_checkpoint {
2118            trace_info!("safe_export: wal checkpoint started");
2119            let (busy, log, checkpointed): (i64, i64, i64) =
2120                conn.query_row("PRAGMA wal_checkpoint(FULL)", [], |row| {
2121                    Ok((row.get(0)?, row.get(1)?, row.get(2)?))
2122                })?;
2123            if busy != 0 {
2124                trace_warn!(
2125                    busy,
2126                    log_frames = log,
2127                    checkpointed_frames = checkpointed,
2128                    "safe_export: wal checkpoint blocked by active readers"
2129                );
2130                return Err(EngineError::Bridge(format!(
2131                    "WAL checkpoint blocked: {busy} active reader(s) prevented a full checkpoint; \
2132                     log frames={log}, checkpointed={checkpointed}; \
2133                     retry export when no readers are active"
2134                )));
2135            }
2136            trace_info!(
2137                log_frames = log,
2138                checkpointed_frames = checkpointed,
2139                "safe_export: wal checkpoint completed"
2140            );
2141        }
2142
2143        let schema_version: u32 = conn
2144            .query_row(
2145                "SELECT COALESCE(MAX(version), 0) FROM fathom_schema_migrations",
2146                [],
2147                |row| row.get(0),
2148            )
2149            .unwrap_or(0);
2150
2151        // 2. Export the database through SQLite's online backup API so committed data in the WAL
2152        // is included even when `force_checkpoint` is false.
2153        if let Some(parent) = destination_path.parent() {
2154            fs::create_dir_all(parent)?;
2155        }
2156        conn.backup(DatabaseName::Main, destination_path, None)?;
2157
2158        drop(conn);
2159
2160        // 2b. Query page_count from the EXPORTED file so the manifest reflects what was
2161        // actually backed up, not the source (which may have changed between the PRAGMA
2162        // and the backup call).
2163        let page_count: u64 = {
2164            let export_conn = rusqlite::Connection::open_with_flags(
2165                destination_path,
2166                rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY
2167                    | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX,
2168            )?;
2169            export_conn.query_row("PRAGMA page_count", [], |row| row.get(0))?
2170        };
2171
2172        // 3. Compute SHA-256 of the exported file.
2173        // FIX(review): was fs::read loading entire DB into memory; use streaming hash.
2174        let sha256 = {
2175            let mut file = fs::File::open(destination_path)?;
2176            let mut hasher = Sha256::new();
2177            io::copy(&mut file, &mut hasher)?;
2178            format!("{:x}", hasher.finalize())
2179        };
2180
2181        // 4. Record when the export was created.
2182        let exported_at = SystemTime::now()
2183            .duration_since(SystemTime::UNIX_EPOCH)
2184            .map_err(|e| EngineError::Bridge(format!("system clock error: {e}")))?
2185            .as_secs();
2186
2187        let manifest = SafeExportManifest {
2188            exported_at,
2189            sha256,
2190            schema_version,
2191            protocol_version: EXPORT_PROTOCOL_VERSION,
2192            page_count,
2193        };
2194
2195        // 5. Write manifest alongside the exported file, using Path API for the name.
2196        let manifest_path = {
2197            let mut p = destination_path.to_path_buf();
2198            let stem = p
2199                .file_name()
2200                .map(|n| format!("{}.export-manifest.json", n.to_string_lossy()))
2201                .ok_or_else(|| {
2202                    EngineError::Bridge("destination path has no filename".to_owned())
2203                })?;
2204            p.set_file_name(stem);
2205            p
2206        };
2207        let manifest_json =
2208            serde_json::to_string(&manifest).map_err(|e| EngineError::Bridge(e.to_string()))?;
2209
2210        // Atomic manifest write: write to a temp file then rename so readers never
2211        // observe a partially-written manifest.
2212        let manifest_tmp = manifest_path.with_extension("json.tmp");
2213        if let Err(e) = fs::write(&manifest_tmp, &manifest_json)
2214            .and_then(|()| fs::rename(&manifest_tmp, &manifest_path))
2215        {
2216            let _ = fs::remove_file(&manifest_tmp);
2217            return Err(e.into());
2218        }
2219
2220        Ok(manifest)
2221    }
2222}
2223
2224#[allow(dead_code)]
2225#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
2226struct VectorEmbeddingContractRecord {
2227    profile: String,
2228    table_name: String,
2229    model_identity: String,
2230    model_version: String,
2231    dimension: usize,
2232    normalization_policy: String,
2233    chunking_policy: String,
2234    preprocessing_policy: String,
2235    generator_command_json: String,
2236    applied_at: i64,
2237    snapshot_hash: String,
2238    contract_format_version: i64,
2239}
2240
2241#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
2242struct VectorRegenerationInputChunk {
2243    chunk_id: String,
2244    node_logical_id: String,
2245    kind: String,
2246    text_content: String,
2247    byte_start: Option<i64>,
2248    byte_end: Option<i64>,
2249    source_ref: Option<String>,
2250    created_at: i64,
2251}
2252
2253#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
2254struct VectorRegenerationInput {
2255    profile: String,
2256    table_name: String,
2257    model_identity: String,
2258    model_version: String,
2259    dimension: usize,
2260    normalization_policy: String,
2261    chunking_policy: String,
2262    preprocessing_policy: String,
2263    chunks: Vec<VectorRegenerationInputChunk>,
2264}
2265
2266#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2267struct GeneratedEmbedding {
2268    chunk_id: String,
2269    embedding: Vec<f32>,
2270}
2271
2272#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2273struct GeneratedEmbeddings {
2274    embeddings: Vec<GeneratedEmbedding>,
2275}
2276
2277#[derive(Clone, Copy, Debug, PartialEq, Eq)]
2278pub(crate) enum VectorRegenerationFailureClass {
2279    InvalidContract,
2280    PayloadTooLarge,
2281    GeneratorTimeout,
2282    GeneratorStdoutOverflow,
2283    GeneratorStderrOverflow,
2284    GeneratorNonzeroExit,
2285    MalformedGeneratorJson,
2286    SnapshotDrift,
2287    UnsupportedVecCapability,
2288}
2289
2290impl VectorRegenerationFailureClass {
2291    fn label(self) -> &'static str {
2292        match self {
2293            Self::InvalidContract => "invalid contract",
2294            Self::PayloadTooLarge => "payload too large",
2295            Self::GeneratorTimeout => "generator timeout",
2296            Self::GeneratorStdoutOverflow => "generator stdout overflow",
2297            Self::GeneratorStderrOverflow => "generator stderr overflow",
2298            Self::GeneratorNonzeroExit => "generator nonzero exit",
2299            Self::MalformedGeneratorJson => "malformed generator json",
2300            Self::SnapshotDrift => "snapshot drift",
2301            Self::UnsupportedVecCapability => "unsupported vec capability",
2302        }
2303    }
2304
2305    fn retryable(self) -> bool {
2306        matches!(self, Self::SnapshotDrift)
2307    }
2308}
2309
2310#[derive(Clone, Debug, PartialEq, Eq)]
2311pub(crate) struct VectorRegenerationFailure {
2312    class: VectorRegenerationFailureClass,
2313    detail: String,
2314}
2315
2316impl VectorRegenerationFailure {
2317    pub(crate) fn new(class: VectorRegenerationFailureClass, detail: impl Into<String>) -> Self {
2318        Self {
2319            class,
2320            detail: detail.into(),
2321        }
2322    }
2323
2324    fn to_engine_error(&self) -> EngineError {
2325        let retry_suffix = if self.class.retryable() {
2326            " [retryable]"
2327        } else {
2328            ""
2329        };
2330        EngineError::Bridge(format!(
2331            "vector regeneration {}: {}{}",
2332            self.class.label(),
2333            self.detail,
2334            retry_suffix
2335        ))
2336    }
2337
2338    fn failure_class_label(&self) -> &'static str {
2339        self.class.label()
2340    }
2341}
2342
2343#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
2344struct VectorRegenerationAuditMetadata {
2345    profile: String,
2346    model_identity: String,
2347    model_version: String,
2348    chunk_count: usize,
2349    snapshot_hash: String,
2350    #[serde(skip_serializing_if = "Option::is_none")]
2351    failure_class: Option<String>,
2352}
2353
2354#[derive(Clone, Copy, Debug, PartialEq, Eq, Deserialize)]
2355#[serde(tag = "mode", rename_all = "snake_case")]
2356enum OperationalRetentionPolicy {
2357    KeepAll,
2358    PurgeBeforeSeconds { max_age_seconds: i64 },
2359    KeepLast { max_rows: usize },
2360}
2361
2362/// # Errors
2363/// Returns [`EngineError`] if the file cannot be read or the config is invalid.
2364pub fn load_vector_regeneration_config(
2365    path: impl AsRef<Path>,
2366) -> Result<VectorRegenerationConfig, EngineError> {
2367    let path = path.as_ref();
2368    let raw = fs::read_to_string(path)?;
2369    match path.extension().and_then(|ext| ext.to_str()) {
2370        Some("toml") => {
2371            toml::from_str(&raw).map_err(|error| EngineError::Bridge(error.to_string()))
2372        }
2373        Some("json") | None => {
2374            serde_json::from_str(&raw).map_err(|error| EngineError::Bridge(error.to_string()))
2375        }
2376        Some(other) => Err(EngineError::Bridge(format!(
2377            "unsupported vector regeneration config extension: {other}"
2378        ))),
2379    }
2380}
2381
2382fn validate_vector_regeneration_config(
2383    conn: &rusqlite::Connection,
2384    config: &VectorRegenerationConfig,
2385    policy: &VectorGeneratorPolicy,
2386) -> Result<VectorRegenerationConfig, VectorRegenerationFailure> {
2387    let profile = validate_bounded_text("profile", &config.profile, MAX_PROFILE_LEN)?;
2388    let table_name = validate_bounded_text("table_name", &config.table_name, MAX_PROFILE_LEN)?;
2389    if table_name != "vec_nodes_active" {
2390        return Err(VectorRegenerationFailure::new(
2391            VectorRegenerationFailureClass::InvalidContract,
2392            format!("table_name must be vec_nodes_active, got '{table_name}'"),
2393        ));
2394    }
2395    let model_identity = validate_bounded_text(
2396        "model_identity",
2397        &config.model_identity,
2398        MAX_MODEL_IDENTITY_LEN,
2399    )?;
2400    let model_version = validate_bounded_text(
2401        "model_version",
2402        &config.model_version,
2403        MAX_MODEL_VERSION_LEN,
2404    )?;
2405    if config.dimension == 0 {
2406        return Err(VectorRegenerationFailure::new(
2407            VectorRegenerationFailureClass::InvalidContract,
2408            "dimension must be greater than zero".to_owned(),
2409        ));
2410    }
2411    let normalization_policy = validate_bounded_text(
2412        "normalization_policy",
2413        &config.normalization_policy,
2414        MAX_POLICY_LEN,
2415    )?;
2416    let chunking_policy =
2417        validate_bounded_text("chunking_policy", &config.chunking_policy, MAX_POLICY_LEN)?;
2418    let preprocessing_policy = validate_bounded_text(
2419        "preprocessing_policy",
2420        &config.preprocessing_policy,
2421        MAX_POLICY_LEN,
2422    )?;
2423    let generator_command = validate_generator_command(&config.generator_command, policy)?;
2424
2425    if let Some(existing_dimension) = current_vector_profile_dimension(conn, &profile)?
2426        && existing_dimension != config.dimension
2427    {
2428        return Err(VectorRegenerationFailure::new(
2429            VectorRegenerationFailureClass::InvalidContract,
2430            format!(
2431                "dimension {} does not match existing vector profile dimension {}",
2432                config.dimension, existing_dimension
2433            ),
2434        ));
2435    }
2436
2437    validate_existing_contract_version(conn, &profile)?;
2438
2439    let normalized = VectorRegenerationConfig {
2440        profile,
2441        table_name,
2442        model_identity,
2443        model_version,
2444        dimension: config.dimension,
2445        normalization_policy,
2446        chunking_policy,
2447        preprocessing_policy,
2448        generator_command,
2449    };
2450    let serialized = serde_json::to_vec(&normalized).map_err(|error| {
2451        VectorRegenerationFailure::new(
2452            VectorRegenerationFailureClass::InvalidContract,
2453            error.to_string(),
2454        )
2455    })?;
2456    if serialized.len() > MAX_CONTRACT_JSON_BYTES {
2457        return Err(VectorRegenerationFailure::new(
2458            VectorRegenerationFailureClass::InvalidContract,
2459            format!("serialized contract exceeds {MAX_CONTRACT_JSON_BYTES} bytes"),
2460        ));
2461    }
2462
2463    Ok(normalized)
2464}
2465
2466#[allow(clippy::cast_possible_wrap)]
2467fn persist_vector_contract(
2468    conn: &rusqlite::Connection,
2469    config: &VectorRegenerationConfig,
2470    snapshot_hash: &str,
2471) -> Result<(), EngineError> {
2472    let generator_command_json = serde_json::to_string(&config.generator_command)
2473        .map_err(|error| EngineError::Bridge(error.to_string()))?;
2474    conn.execute(
2475        r"
2476        INSERT OR REPLACE INTO vector_embedding_contracts (
2477            profile,
2478            table_name,
2479            model_identity,
2480            model_version,
2481            dimension,
2482            normalization_policy,
2483            chunking_policy,
2484            preprocessing_policy,
2485            generator_command_json,
2486            applied_at,
2487            snapshot_hash,
2488            contract_format_version,
2489            updated_at
2490        ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, unixepoch(), ?10, ?11, unixepoch())
2491        ",
2492        rusqlite::params![
2493            config.profile.as_str(),
2494            config.table_name.as_str(),
2495            config.model_identity.as_str(),
2496            config.model_version.as_str(),
2497            config.dimension as i64,
2498            config.normalization_policy.as_str(),
2499            config.chunking_policy.as_str(),
2500            config.preprocessing_policy.as_str(),
2501            generator_command_json,
2502            snapshot_hash,
2503            CURRENT_VECTOR_CONTRACT_FORMAT_VERSION,
2504        ],
2505    )?;
2506    Ok(())
2507}
2508
2509fn persist_vector_regeneration_event(
2510    conn: &rusqlite::Connection,
2511    event_type: &str,
2512    subject: &str,
2513    metadata: &VectorRegenerationAuditMetadata,
2514) -> Result<(), EngineError> {
2515    let metadata_json = serialize_audit_metadata(metadata)?;
2516    conn.execute(
2517        "INSERT INTO provenance_events (id, event_type, subject, metadata_json) VALUES (?1, ?2, ?3, ?4)",
2518        rusqlite::params![new_id(), event_type, subject, metadata_json],
2519    )?;
2520    Ok(())
2521}
2522
2523fn persist_simple_provenance_event(
2524    conn: &rusqlite::Connection,
2525    event_type: &str,
2526    subject: &str,
2527    metadata: Option<serde_json::Value>,
2528) -> Result<(), EngineError> {
2529    let metadata_json = metadata.map(|value| value.to_string()).unwrap_or_default();
2530    conn.execute(
2531        "INSERT INTO provenance_events (id, event_type, subject, metadata_json) VALUES (?1, ?2, ?3, ?4)",
2532        rusqlite::params![new_id(), event_type, subject, metadata_json],
2533    )?;
2534    Ok(())
2535}
2536
2537fn build_regeneration_input(
2538    config: &VectorRegenerationConfig,
2539    chunks: Vec<VectorRegenerationInputChunk>,
2540) -> VectorRegenerationInput {
2541    VectorRegenerationInput {
2542        profile: config.profile.clone(),
2543        table_name: config.table_name.clone(),
2544        model_identity: config.model_identity.clone(),
2545        model_version: config.model_version.clone(),
2546        dimension: config.dimension,
2547        normalization_policy: config.normalization_policy.clone(),
2548        chunking_policy: config.chunking_policy.clone(),
2549        preprocessing_policy: config.preprocessing_policy.clone(),
2550        chunks,
2551    }
2552}
2553
2554fn compute_snapshot_hash(payload: &VectorRegenerationInput) -> Result<String, EngineError> {
2555    let bytes =
2556        serde_json::to_vec(payload).map_err(|error| EngineError::Bridge(error.to_string()))?;
2557    let mut hasher = Sha256::new();
2558    hasher.update(bytes);
2559    Ok(format!("{:x}", hasher.finalize()))
2560}
2561
2562fn collect_regeneration_chunks(
2563    conn: &rusqlite::Connection,
2564) -> Result<Vec<VectorRegenerationInputChunk>, EngineError> {
2565    let mut stmt = conn.prepare(
2566        r"
2567        SELECT c.id, c.node_logical_id, n.kind, c.text_content, c.byte_start, c.byte_end, n.source_ref, c.created_at
2568        FROM chunks c
2569        JOIN nodes n
2570          ON n.logical_id = c.node_logical_id
2571         AND n.superseded_at IS NULL
2572        ORDER BY c.created_at, c.id
2573        ",
2574    )?;
2575    let chunks = stmt
2576        .query_map([], |row| {
2577            Ok(VectorRegenerationInputChunk {
2578                chunk_id: row.get(0)?,
2579                node_logical_id: row.get(1)?,
2580                kind: row.get(2)?,
2581                text_content: row.get(3)?,
2582                byte_start: row.get(4)?,
2583                byte_end: row.get(5)?,
2584                source_ref: row.get(6)?,
2585                created_at: row.get(7)?,
2586            })
2587        })?
2588        .collect::<Result<Vec<_>, _>>()?;
2589    Ok(chunks)
2590}
2591
2592fn validate_generated_embeddings(
2593    config: &VectorRegenerationConfig,
2594    chunks: &[VectorRegenerationInputChunk],
2595    generated: GeneratedEmbeddings,
2596) -> Result<std::collections::HashMap<String, Vec<u8>>, VectorRegenerationFailure> {
2597    if generated.embeddings.len() != chunks.len() {
2598        return Err(VectorRegenerationFailure::new(
2599            VectorRegenerationFailureClass::MalformedGeneratorJson,
2600            format!(
2601                "generator returned {} embedding(s) for {} chunk(s)",
2602                generated.embeddings.len(),
2603                chunks.len()
2604            ),
2605        ));
2606    }
2607
2608    let mut embedding_map = std::collections::HashMap::new();
2609    for embedding in generated.embeddings {
2610        if embedding.embedding.len() != config.dimension {
2611            return Err(VectorRegenerationFailure::new(
2612                VectorRegenerationFailureClass::MalformedGeneratorJson,
2613                format!(
2614                    "embedding for chunk '{}' has dimension {}, expected {}",
2615                    embedding.chunk_id,
2616                    embedding.embedding.len(),
2617                    config.dimension
2618                ),
2619            ));
2620        }
2621        if embedding.embedding.iter().any(|value| !value.is_finite()) {
2622            return Err(VectorRegenerationFailure::new(
2623                VectorRegenerationFailureClass::MalformedGeneratorJson,
2624                format!(
2625                    "embedding for chunk '{}' contains non-finite values",
2626                    embedding.chunk_id
2627                ),
2628            ));
2629        }
2630        let bytes: Vec<u8> = embedding
2631            .embedding
2632            .iter()
2633            .flat_map(|value| value.to_le_bytes())
2634            .collect();
2635        if embedding_map
2636            .insert(embedding.chunk_id.clone(), bytes)
2637            .is_some()
2638        {
2639            return Err(VectorRegenerationFailure::new(
2640                VectorRegenerationFailureClass::MalformedGeneratorJson,
2641                format!(
2642                    "duplicate embedding returned for chunk '{}'",
2643                    embedding.chunk_id
2644                ),
2645            ));
2646        }
2647    }
2648
2649    Ok(embedding_map)
2650}
2651
2652fn generator_policy_notes(policy: &VectorGeneratorPolicy) -> Vec<String> {
2653    let mut notes = vec!["vector embeddings regenerated from application contract".to_owned()];
2654    if !policy.allowed_executable_roots.is_empty() {
2655        notes.push("generator executable roots enforced by operator policy".to_owned());
2656    }
2657    if !policy.preserve_env_vars.is_empty() {
2658        notes.push("generator environment reduced to preserved variables".to_owned());
2659    }
2660    notes
2661}
2662
2663enum GeneratorStream {
2664    Stdout,
2665    Stderr,
2666}
2667
2668enum StreamReadResult {
2669    Complete(Vec<u8>),
2670    Overflow,
2671    Io(io::Error),
2672}
2673
2674fn validate_bounded_text(
2675    field: &str,
2676    value: &str,
2677    max_len: usize,
2678) -> Result<String, VectorRegenerationFailure> {
2679    let trimmed = value.trim();
2680    if trimmed.is_empty() {
2681        return Err(VectorRegenerationFailure::new(
2682            VectorRegenerationFailureClass::InvalidContract,
2683            format!("{field} must not be empty"),
2684        ));
2685    }
2686    if trimmed.len() > max_len {
2687        return Err(VectorRegenerationFailure::new(
2688            VectorRegenerationFailureClass::InvalidContract,
2689            format!("{field} exceeds max length {max_len}"),
2690        ));
2691    }
2692    Ok(trimmed.to_owned())
2693}
2694
2695fn validate_generator_command(
2696    command: &[String],
2697    policy: &VectorGeneratorPolicy,
2698) -> Result<Vec<String>, VectorRegenerationFailure> {
2699    if command.is_empty() {
2700        return Err(VectorRegenerationFailure::new(
2701            VectorRegenerationFailureClass::InvalidContract,
2702            "generator_command must contain at least one element".to_owned(),
2703        ));
2704    }
2705    let mut total_len = 0usize;
2706    for argument in command {
2707        if argument.is_empty() {
2708            return Err(VectorRegenerationFailure::new(
2709                VectorRegenerationFailureClass::InvalidContract,
2710                "generator_command entries must not be empty".to_owned(),
2711            ));
2712        }
2713        if argument.len() > MAX_GENERATOR_COMMAND_ARG_LEN {
2714            return Err(VectorRegenerationFailure::new(
2715                VectorRegenerationFailureClass::InvalidContract,
2716                format!(
2717                    "generator_command argument exceeds max length {MAX_GENERATOR_COMMAND_ARG_LEN}"
2718                ),
2719            ));
2720        }
2721        total_len += argument.len();
2722    }
2723    if total_len > MAX_GENERATOR_COMMAND_TOTAL_LEN {
2724        return Err(VectorRegenerationFailure::new(
2725            VectorRegenerationFailureClass::InvalidContract,
2726            format!(
2727                "generator_command exceeds max serialized length {MAX_GENERATOR_COMMAND_TOTAL_LEN}"
2728            ),
2729        ));
2730    }
2731    executable_trust::validate_generator_executable(&command[0], policy)?;
2732    Ok(command.to_vec())
2733}
2734
2735fn current_vector_profile_dimension(
2736    conn: &rusqlite::Connection,
2737    profile: &str,
2738) -> Result<Option<usize>, VectorRegenerationFailure> {
2739    let dimension: Option<i64> = conn
2740        .query_row(
2741            "SELECT dimension FROM vector_profiles WHERE profile = ?1 AND enabled = 1",
2742            [profile],
2743            |row| row.get(0),
2744        )
2745        .optional()
2746        .map_err(|error| {
2747            VectorRegenerationFailure::new(
2748                VectorRegenerationFailureClass::InvalidContract,
2749                error.to_string(),
2750            )
2751        })?;
2752    dimension
2753        .map(|value| {
2754            usize::try_from(value).map_err(|_| {
2755                VectorRegenerationFailure::new(
2756                    VectorRegenerationFailureClass::InvalidContract,
2757                    format!("stored vector profile dimension is invalid: {value}"),
2758                )
2759            })
2760        })
2761        .transpose()
2762}
2763
2764fn validate_existing_contract_version(
2765    conn: &rusqlite::Connection,
2766    profile: &str,
2767) -> Result<(), VectorRegenerationFailure> {
2768    let version: Option<i64> = conn
2769        .query_row(
2770            "SELECT contract_format_version FROM vector_embedding_contracts WHERE profile = ?1",
2771            [profile],
2772            |row| row.get(0),
2773        )
2774        .optional()
2775        .map_err(|error| {
2776            VectorRegenerationFailure::new(
2777                VectorRegenerationFailureClass::InvalidContract,
2778                error.to_string(),
2779            )
2780        })?;
2781    if let Some(version) = version
2782        && version > CURRENT_VECTOR_CONTRACT_FORMAT_VERSION
2783    {
2784        return Err(VectorRegenerationFailure::new(
2785            VectorRegenerationFailureClass::InvalidContract,
2786            format!(
2787                "persisted contract format version {version} is unsupported; supported version is {CURRENT_VECTOR_CONTRACT_FORMAT_VERSION}"
2788            ),
2789        ));
2790    }
2791    Ok(())
2792}
2793
2794fn serialize_audit_metadata(
2795    metadata: &VectorRegenerationAuditMetadata,
2796) -> Result<String, EngineError> {
2797    let json =
2798        serde_json::to_string(metadata).map_err(|error| EngineError::Bridge(error.to_string()))?;
2799    if json.len() > MAX_AUDIT_METADATA_BYTES {
2800        return Err(VectorRegenerationFailure::new(
2801            VectorRegenerationFailureClass::InvalidContract,
2802            format!("audit metadata exceeds {MAX_AUDIT_METADATA_BYTES} bytes"),
2803        )
2804        .to_engine_error());
2805    }
2806    Ok(json)
2807}
2808
2809#[allow(clippy::too_many_lines)]
2810fn run_vector_generator_bounded(
2811    config: &VectorRegenerationConfig,
2812    payload: &VectorRegenerationInput,
2813    policy: &VectorGeneratorPolicy,
2814) -> Result<GeneratedEmbeddings, VectorRegenerationFailure> {
2815    if payload.chunks.len() > policy.max_chunks {
2816        return Err(VectorRegenerationFailure::new(
2817            VectorRegenerationFailureClass::PayloadTooLarge,
2818            format!(
2819                "chunk count {} exceeds max_chunks {}",
2820                payload.chunks.len(),
2821                policy.max_chunks
2822            ),
2823        ));
2824    }
2825
2826    let input = serde_json::to_vec(payload).map_err(|error| {
2827        VectorRegenerationFailure::new(
2828            VectorRegenerationFailureClass::MalformedGeneratorJson,
2829            error.to_string(),
2830        )
2831    })?;
2832    if input.len() > policy.max_input_bytes {
2833        return Err(VectorRegenerationFailure::new(
2834            VectorRegenerationFailureClass::PayloadTooLarge,
2835            format!(
2836                "serialized input {} bytes exceeds max_input_bytes {}",
2837                input.len(),
2838                policy.max_input_bytes
2839            ),
2840        ));
2841    }
2842
2843    let mut command = Command::new(config.generator_command.first().ok_or_else(|| {
2844        VectorRegenerationFailure::new(
2845            VectorRegenerationFailureClass::InvalidContract,
2846            "missing generator executable",
2847        )
2848    })?);
2849    command.args(config.generator_command.iter().skip(1));
2850    command.stdin(Stdio::piped());
2851    command.stdout(Stdio::piped());
2852    command.stderr(Stdio::piped());
2853    command.env_clear();
2854    for env_var in &policy.preserve_env_vars {
2855        if let Some(value) = std::env::var_os(env_var) {
2856            command.env(env_var, value);
2857        }
2858    }
2859
2860    let mut child = command.spawn().map_err(|error| {
2861        VectorRegenerationFailure::new(
2862            VectorRegenerationFailureClass::GeneratorNonzeroExit,
2863            format!("failed to spawn generator: {error}"),
2864        )
2865    })?;
2866    if let Some(mut stdin) = child.stdin.take() {
2867        stdin.write_all(&input).map_err(|error| {
2868            VectorRegenerationFailure::new(
2869                VectorRegenerationFailureClass::GeneratorNonzeroExit,
2870                format!("failed to write generator stdin: {error}"),
2871            )
2872        })?;
2873    } else {
2874        return Err(VectorRegenerationFailure::new(
2875            VectorRegenerationFailureClass::GeneratorNonzeroExit,
2876            "failed to open generator stdin",
2877        ));
2878    }
2879
2880    let stdout = child.stdout.take().ok_or_else(|| {
2881        VectorRegenerationFailure::new(
2882            VectorRegenerationFailureClass::GeneratorNonzeroExit,
2883            "failed to open generator stdout",
2884        )
2885    })?;
2886    let stderr = child.stderr.take().ok_or_else(|| {
2887        VectorRegenerationFailure::new(
2888            VectorRegenerationFailureClass::GeneratorNonzeroExit,
2889            "failed to open generator stderr",
2890        )
2891    })?;
2892
2893    let (tx, rx) = mpsc::channel();
2894    let stdout_handle = spawn_capped_reader(
2895        stdout,
2896        policy.max_stdout_bytes,
2897        GeneratorStream::Stdout,
2898        tx.clone(),
2899    );
2900    let stderr_handle =
2901        spawn_capped_reader(stderr, policy.max_stderr_bytes, GeneratorStream::Stderr, tx);
2902
2903    let start = Instant::now();
2904    let timeout = Duration::from_millis(policy.timeout_ms);
2905    let mut stdout_bytes: Option<Vec<u8>> = None;
2906    let mut stderr_bytes: Option<Vec<u8>> = None;
2907    let mut status = None;
2908    let mut stream_error: Option<VectorRegenerationFailure> = None;
2909
2910    while status.is_none() && stream_error.is_none() {
2911        while let Ok((stream, result)) = rx.try_recv() {
2912            match (stream, result) {
2913                (GeneratorStream::Stdout, StreamReadResult::Complete(bytes)) => {
2914                    stdout_bytes = Some(bytes);
2915                }
2916                (GeneratorStream::Stderr, StreamReadResult::Complete(bytes)) => {
2917                    stderr_bytes = Some(bytes);
2918                }
2919                (GeneratorStream::Stdout, StreamReadResult::Overflow) => {
2920                    stream_error = Some(VectorRegenerationFailure::new(
2921                        VectorRegenerationFailureClass::GeneratorStdoutOverflow,
2922                        format!(
2923                            "stdout exceeded max_stdout_bytes {}",
2924                            policy.max_stdout_bytes
2925                        ),
2926                    ));
2927                }
2928                (GeneratorStream::Stderr, StreamReadResult::Overflow) => {
2929                    stream_error = Some(VectorRegenerationFailure::new(
2930                        VectorRegenerationFailureClass::GeneratorStderrOverflow,
2931                        format!(
2932                            "stderr exceeded max_stderr_bytes {}",
2933                            policy.max_stderr_bytes
2934                        ),
2935                    ));
2936                }
2937                (_, StreamReadResult::Io(error)) => {
2938                    stream_error = Some(VectorRegenerationFailure::new(
2939                        VectorRegenerationFailureClass::GeneratorNonzeroExit,
2940                        format!("failed to read generator stream: {error}"),
2941                    ));
2942                }
2943            }
2944        }
2945
2946        if stream_error.is_some() {
2947            let _ = child.kill();
2948            break;
2949        }
2950        if start.elapsed() > timeout {
2951            let _ = child.kill();
2952            stream_error = Some(VectorRegenerationFailure::new(
2953                VectorRegenerationFailureClass::GeneratorTimeout,
2954                format!("generator exceeded timeout after {}ms", policy.timeout_ms),
2955            ));
2956            break;
2957        }
2958        status = child.try_wait().map_err(|error| {
2959            VectorRegenerationFailure::new(
2960                VectorRegenerationFailureClass::GeneratorNonzeroExit,
2961                format!("failed to poll generator status: {error}"),
2962            )
2963        })?;
2964        if status.is_none() {
2965            thread::sleep(Duration::from_millis(10));
2966        }
2967    }
2968
2969    let _ = child.wait();
2970    let _ = stdout_handle.join();
2971    let _ = stderr_handle.join();
2972
2973    while let Ok((stream, result)) = rx.try_recv() {
2974        match (stream, result) {
2975            (GeneratorStream::Stdout, StreamReadResult::Complete(bytes)) => {
2976                stdout_bytes = Some(bytes);
2977            }
2978            (GeneratorStream::Stderr, StreamReadResult::Complete(bytes)) => {
2979                stderr_bytes = Some(bytes);
2980            }
2981            (GeneratorStream::Stdout, StreamReadResult::Overflow) => {
2982                stream_error = Some(VectorRegenerationFailure::new(
2983                    VectorRegenerationFailureClass::GeneratorStdoutOverflow,
2984                    format!(
2985                        "stdout exceeded max_stdout_bytes {}",
2986                        policy.max_stdout_bytes
2987                    ),
2988                ));
2989            }
2990            (GeneratorStream::Stderr, StreamReadResult::Overflow) => {
2991                stream_error = Some(VectorRegenerationFailure::new(
2992                    VectorRegenerationFailureClass::GeneratorStderrOverflow,
2993                    format!(
2994                        "stderr exceeded max_stderr_bytes {}",
2995                        policy.max_stderr_bytes
2996                    ),
2997                ));
2998            }
2999            (_, StreamReadResult::Io(error)) => {
3000                stream_error = Some(VectorRegenerationFailure::new(
3001                    VectorRegenerationFailureClass::GeneratorNonzeroExit,
3002                    format!("failed to read generator stream: {error}"),
3003                ));
3004            }
3005        }
3006    }
3007
3008    if let Some(error) = stream_error {
3009        return Err(error);
3010    }
3011
3012    let status = status.ok_or_else(|| {
3013        VectorRegenerationFailure::new(
3014            VectorRegenerationFailureClass::GeneratorNonzeroExit,
3015            "vector generator exited without a status",
3016        )
3017    })?;
3018    if !status.success() {
3019        let stderr =
3020            truncate_error_text(&stderr_bytes.unwrap_or_default(), policy.max_stderr_bytes);
3021        return Err(VectorRegenerationFailure::new(
3022            VectorRegenerationFailureClass::GeneratorNonzeroExit,
3023            stderr,
3024        ));
3025    }
3026
3027    let stdout = stdout_bytes.unwrap_or_default();
3028    serde_json::from_slice(&stdout).map_err(|error| {
3029        VectorRegenerationFailure::new(
3030            VectorRegenerationFailureClass::MalformedGeneratorJson,
3031            format!("decode generator output: {error}"),
3032        )
3033    })
3034}
3035
3036fn spawn_capped_reader<R: Read + Send + 'static>(
3037    mut reader: R,
3038    max_bytes: usize,
3039    stream: GeneratorStream,
3040    tx: mpsc::Sender<(GeneratorStream, StreamReadResult)>,
3041) -> thread::JoinHandle<()> {
3042    thread::spawn(move || {
3043        let mut buffer = Vec::new();
3044        let mut chunk = [0u8; 8192];
3045        loop {
3046            match reader.read(&mut chunk) {
3047                Ok(0) => {
3048                    let _ = tx.send((stream, StreamReadResult::Complete(buffer)));
3049                    break;
3050                }
3051                Ok(read_bytes) => {
3052                    if buffer.len() + read_bytes > max_bytes {
3053                        let _ = tx.send((stream, StreamReadResult::Overflow));
3054                        break;
3055                    }
3056                    buffer.extend_from_slice(&chunk[..read_bytes]);
3057                }
3058                Err(error) => {
3059                    let _ = tx.send((stream, StreamReadResult::Io(error)));
3060                    break;
3061                }
3062            }
3063        }
3064    })
3065}
3066
3067fn truncate_error_text(bytes: &[u8], max_bytes: usize) -> String {
3068    let mut text = String::from_utf8_lossy(bytes).into_owned();
3069    if bytes.len() > max_bytes {
3070        text.push_str(" [truncated]");
3071    }
3072    text
3073}
3074
3075fn count_source_ref(
3076    conn: &rusqlite::Connection,
3077    table: &str,
3078    source_ref: &str,
3079) -> Result<usize, EngineError> {
3080    let sql = match table {
3081        "nodes" => "SELECT count(*) FROM nodes WHERE source_ref = ?1",
3082        "edges" => "SELECT count(*) FROM edges WHERE source_ref = ?1",
3083        "actions" => "SELECT count(*) FROM actions WHERE source_ref = ?1",
3084        "operational_mutations" => {
3085            "SELECT count(*) FROM operational_mutations WHERE source_ref = ?1"
3086        }
3087        other => return Err(EngineError::Bridge(format!("unknown table: {other}"))),
3088    };
3089    let count: i64 = conn.query_row(sql, [source_ref], |row| row.get(0))?;
3090    // FIX(review): was `count as usize` — unsound cast.
3091    // Chose option (C) here: propagate error since this is a user-facing helper.
3092    usize::try_from(count)
3093        .map_err(|_| EngineError::Bridge(format!("count overflow for table {table}: {count}")))
3094}
3095
3096fn rebuild_operational_current_rows(
3097    tx: &rusqlite::Transaction<'_>,
3098    collections: &[String],
3099) -> Result<usize, EngineError> {
3100    let mut rebuilt_rows = 0usize;
3101    clear_operational_current_rows(tx, collections)?;
3102    let mut ins_current = tx.prepare_cached(
3103        "INSERT INTO operational_current \
3104         (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
3105         VALUES (?1, ?2, ?3, ?4, ?5)",
3106    )?;
3107
3108    for collection in collections {
3109        let mut stmt = tx.prepare(
3110            "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
3111             FROM operational_mutations \
3112             WHERE collection_name = ?1 \
3113             ORDER BY record_key, mutation_order",
3114        )?;
3115        let mut latest_by_key: std::collections::HashMap<String, Option<(String, i64, String)>> =
3116            std::collections::HashMap::new();
3117        let rows = stmt.query_map([collection], map_operational_mutation_row)?;
3118        for row in rows {
3119            let mutation = row?;
3120            match mutation.op_kind.as_str() {
3121                "put" => {
3122                    latest_by_key.insert(
3123                        mutation.record_key,
3124                        Some((mutation.payload_json, mutation.created_at, mutation.id)),
3125                    );
3126                }
3127                "delete" => {
3128                    latest_by_key.insert(mutation.record_key, None);
3129                }
3130                _ => {}
3131            }
3132        }
3133
3134        for (record_key, state) in latest_by_key {
3135            if let Some((payload_json, updated_at, last_mutation_id)) = state {
3136                ins_current.execute(rusqlite::params![
3137                    collection,
3138                    record_key,
3139                    payload_json,
3140                    updated_at,
3141                    last_mutation_id,
3142                ])?;
3143                rebuilt_rows += 1;
3144            }
3145        }
3146    }
3147
3148    drop(ins_current);
3149    Ok(rebuilt_rows)
3150}
3151
3152fn clear_operational_current_rows(
3153    tx: &rusqlite::Transaction<'_>,
3154    collections: &[String],
3155) -> Result<(), EngineError> {
3156    let mut delete_current =
3157        tx.prepare_cached("DELETE FROM operational_current WHERE collection_name = ?1")?;
3158    let mut delete_secondary_current = tx.prepare_cached(
3159        "DELETE FROM operational_secondary_index_entries \
3160         WHERE collection_name = ?1 AND subject_kind = 'current'",
3161    )?;
3162    for collection in collections {
3163        delete_secondary_current.execute([collection])?;
3164        delete_current.execute([collection])?;
3165    }
3166    drop(delete_secondary_current);
3167    drop(delete_current);
3168    Ok(())
3169}
3170
3171fn clear_operational_secondary_index_entries(
3172    tx: &rusqlite::Transaction<'_>,
3173    collection_name: &str,
3174) -> Result<(), EngineError> {
3175    tx.execute(
3176        "DELETE FROM operational_secondary_index_entries WHERE collection_name = ?1",
3177        [collection_name],
3178    )?;
3179    Ok(())
3180}
3181
3182fn insert_operational_secondary_index_entry(
3183    tx: &rusqlite::Transaction<'_>,
3184    collection_name: &str,
3185    subject_kind: &str,
3186    mutation_id: &str,
3187    record_key: &str,
3188    entry: &crate::operational::OperationalSecondaryIndexEntry,
3189) -> Result<(), EngineError> {
3190    tx.execute(
3191        "INSERT INTO operational_secondary_index_entries \
3192         (collection_name, index_name, subject_kind, mutation_id, record_key, sort_timestamp, \
3193          slot1_text, slot1_integer, slot2_text, slot2_integer, slot3_text, slot3_integer) \
3194         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
3195        rusqlite::params![
3196            collection_name,
3197            entry.index_name,
3198            subject_kind,
3199            mutation_id,
3200            record_key,
3201            entry.sort_timestamp,
3202            entry.slot1_text,
3203            entry.slot1_integer,
3204            entry.slot2_text,
3205            entry.slot2_integer,
3206            entry.slot3_text,
3207            entry.slot3_integer,
3208        ],
3209    )?;
3210    Ok(())
3211}
3212
3213fn rebuild_operational_secondary_index_entries(
3214    tx: &rusqlite::Transaction<'_>,
3215    collection_name: &str,
3216    collection_kind: OperationalCollectionKind,
3217    indexes: &[OperationalSecondaryIndexDefinition],
3218) -> Result<(usize, usize), EngineError> {
3219    clear_operational_secondary_index_entries(tx, collection_name)?;
3220
3221    let mut mutation_entries_rebuilt = 0usize;
3222    if collection_kind == OperationalCollectionKind::AppendOnlyLog {
3223        let mut stmt = tx.prepare(
3224            "SELECT id, record_key, payload_json FROM operational_mutations \
3225             WHERE collection_name = ?1 ORDER BY mutation_order",
3226        )?;
3227        let rows = stmt
3228            .query_map([collection_name], |row| {
3229                Ok((
3230                    row.get::<_, String>(0)?,
3231                    row.get::<_, String>(1)?,
3232                    row.get::<_, String>(2)?,
3233                ))
3234            })?
3235            .collect::<Result<Vec<_>, _>>()?;
3236        drop(stmt);
3237        for (mutation_id, record_key, payload_json) in rows {
3238            for entry in extract_secondary_index_entries_for_mutation(indexes, &payload_json) {
3239                insert_operational_secondary_index_entry(
3240                    tx,
3241                    collection_name,
3242                    "mutation",
3243                    &mutation_id,
3244                    &record_key,
3245                    &entry,
3246                )?;
3247                mutation_entries_rebuilt += 1;
3248            }
3249        }
3250    }
3251
3252    let mut current_entries_rebuilt = 0usize;
3253    if collection_kind == OperationalCollectionKind::LatestState {
3254        let mut stmt = tx.prepare(
3255            "SELECT record_key, payload_json, updated_at, last_mutation_id FROM operational_current \
3256             WHERE collection_name = ?1 ORDER BY updated_at DESC, record_key",
3257        )?;
3258        let rows = stmt
3259            .query_map([collection_name], |row| {
3260                Ok((
3261                    row.get::<_, String>(0)?,
3262                    row.get::<_, String>(1)?,
3263                    row.get::<_, i64>(2)?,
3264                    row.get::<_, String>(3)?,
3265                ))
3266            })?
3267            .collect::<Result<Vec<_>, _>>()?;
3268        drop(stmt);
3269        for (record_key, payload_json, updated_at, last_mutation_id) in rows {
3270            for entry in
3271                extract_secondary_index_entries_for_current(indexes, &payload_json, updated_at)
3272            {
3273                insert_operational_secondary_index_entry(
3274                    tx,
3275                    collection_name,
3276                    "current",
3277                    &last_mutation_id,
3278                    &record_key,
3279                    &entry,
3280                )?;
3281                current_entries_rebuilt += 1;
3282            }
3283        }
3284    }
3285
3286    Ok((mutation_entries_rebuilt, current_entries_rebuilt))
3287}
3288
3289fn collect_strings_tx(
3290    tx: &rusqlite::Transaction<'_>,
3291    sql: &str,
3292    value: &str,
3293) -> Result<Vec<String>, EngineError> {
3294    let mut stmt = tx.prepare(sql)?;
3295    let rows = stmt.query_map([value], |row| row.get::<_, String>(0))?;
3296    rows.collect::<Result<Vec<_>, _>>()
3297        .map_err(EngineError::from)
3298}
3299
3300/// Convert a non-negative i64 count to usize, panicking on negative values
3301/// which would indicate data corruption.
3302#[allow(clippy::expect_used)]
3303fn i64_to_usize(val: i64) -> usize {
3304    usize::try_from(val).expect("count(*) must be non-negative")
3305}
3306
3307/// Runs a parameterized query and collects the first column as strings.
3308///
3309/// NOTE(review): sql parameter must be a hardcoded query string, never user input.
3310/// Options: (A) doc comment, (B) whitelist refactor like `count_source_ref`, (C) leave as-is.
3311/// Chose (A): function is private, only called with hardcoded SQL from `trace_source`.
3312/// Whitelist refactor not practical — queries have different SELECT/ORDER BY per table.
3313fn collect_strings(
3314    conn: &rusqlite::Connection,
3315    sql: &str,
3316    param: &str,
3317) -> Result<Vec<String>, EngineError> {
3318    let mut stmt = conn.prepare(sql)?;
3319    let values = stmt
3320        .query_map([param], |row| row.get::<_, String>(0))?
3321        .collect::<Result<Vec<_>, _>>()?;
3322    Ok(values)
3323}
3324
3325fn collect_edge_logical_ids_for_restore(
3326    tx: &rusqlite::Transaction<'_>,
3327    logical_id: &str,
3328    retire_source_ref: Option<&str>,
3329    retire_created_at: i64,
3330    retire_event_rowid: i64,
3331) -> Result<Vec<String>, EngineError> {
3332    let mut stmt = tx.prepare(
3333        "SELECT DISTINCT e.logical_id \
3334         FROM edges e \
3335         JOIN provenance_events p \
3336           ON p.subject = e.logical_id \
3337          AND p.event_type = 'edge_retire' \
3338          AND ( \
3339                p.created_at > ?3 \
3340                OR (p.created_at = ?3 AND p.rowid >= ?4) \
3341          ) \
3342          AND ((?2 IS NULL AND p.source_ref IS NULL) OR p.source_ref = ?2) \
3343         WHERE e.superseded_at IS NOT NULL \
3344           AND (e.source_logical_id = ?1 OR e.target_logical_id = ?1) \
3345           AND NOT EXISTS ( \
3346                SELECT 1 FROM edges active \
3347                WHERE active.logical_id = e.logical_id \
3348                  AND active.superseded_at IS NULL \
3349           ) \
3350         ORDER BY e.logical_id",
3351    )?;
3352    let edge_ids = stmt
3353        .query_map(
3354            rusqlite::params![
3355                logical_id,
3356                retire_source_ref,
3357                retire_created_at,
3358                retire_event_rowid
3359            ],
3360            |row| row.get::<_, String>(0),
3361        )?
3362        .collect::<Result<Vec<_>, _>>()?;
3363    Ok(edge_ids)
3364}
3365
3366/// Restores edges for a node being restored, skipping any whose counterpart
3367/// endpoint is not active (e.g. still retired or purged).
3368fn restore_validated_edges(
3369    tx: &rusqlite::Transaction<'_>,
3370    logical_id: &str,
3371    retire_source_ref: Option<&str>,
3372    retire_created_at: i64,
3373    retire_event_rowid: i64,
3374) -> Result<(usize, Vec<SkippedEdge>), EngineError> {
3375    let edge_logical_ids = collect_edge_logical_ids_for_restore(
3376        tx,
3377        logical_id,
3378        retire_source_ref,
3379        retire_created_at,
3380        retire_event_rowid,
3381    )?;
3382    let mut restored = 0usize;
3383    let mut skipped = Vec::new();
3384    for edge_logical_id in &edge_logical_ids {
3385        let edge_detail: Option<(String, String, String)> = tx
3386            .query_row(
3387                "SELECT row_id, source_logical_id, target_logical_id FROM edges \
3388                 WHERE logical_id = ?1 AND superseded_at IS NOT NULL \
3389                 ORDER BY superseded_at DESC, created_at DESC, rowid DESC LIMIT 1",
3390                [edge_logical_id.as_str()],
3391                |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
3392            )
3393            .optional()?;
3394        let Some((edge_row_id, source_lid, target_lid)) = edge_detail else {
3395            continue;
3396        };
3397        let other_endpoint = if source_lid == logical_id {
3398            &target_lid
3399        } else {
3400            &source_lid
3401        };
3402        let endpoint_active: bool = tx
3403            .query_row(
3404                "SELECT 1 FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL LIMIT 1",
3405                [other_endpoint.as_str()],
3406                |_| Ok(true),
3407            )
3408            .optional()?
3409            .unwrap_or(false);
3410        if !endpoint_active {
3411            skipped.push(SkippedEdge {
3412                edge_logical_id: edge_logical_id.clone(),
3413                missing_endpoint: other_endpoint.clone(),
3414            });
3415            continue;
3416        }
3417        restored += tx.execute(
3418            "UPDATE edges SET superseded_at = NULL WHERE row_id = ?1",
3419            [edge_row_id.as_str()],
3420        )?;
3421    }
3422    Ok((restored, skipped))
3423}
3424
3425#[cfg(feature = "sqlite-vec")]
3426fn count_vec_rows_for_logical_id(
3427    tx: &rusqlite::Transaction<'_>,
3428    logical_id: &str,
3429) -> Result<usize, EngineError> {
3430    match tx.query_row(
3431        "SELECT count(*) FROM vec_nodes_active v \
3432         JOIN chunks c ON c.id = v.chunk_id \
3433         WHERE c.node_logical_id = ?1",
3434        [logical_id],
3435        |row| row.get::<_, i64>(0),
3436    ) {
3437        Ok(count) => Ok(i64_to_usize(count)),
3438        Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
3439            if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
3440        {
3441            Ok(0)
3442        }
3443        Err(error) => Err(EngineError::Sqlite(error)),
3444    }
3445}
3446
3447#[cfg(not(feature = "sqlite-vec"))]
3448#[allow(clippy::unnecessary_wraps)]
3449fn count_vec_rows_for_logical_id(
3450    _tx: &rusqlite::Transaction<'_>,
3451    _logical_id: &str,
3452) -> Result<usize, EngineError> {
3453    Ok(0)
3454}
3455
3456#[cfg(feature = "sqlite-vec")]
3457fn delete_vec_rows_for_logical_id(
3458    tx: &rusqlite::Transaction<'_>,
3459    logical_id: &str,
3460) -> Result<usize, EngineError> {
3461    match tx.execute(
3462        "DELETE FROM vec_nodes_active \
3463         WHERE chunk_id IN (SELECT id FROM chunks WHERE node_logical_id = ?1)",
3464        [logical_id],
3465    ) {
3466        Ok(count) => Ok(count),
3467        Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
3468            if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
3469        {
3470            Ok(0)
3471        }
3472        Err(error) => Err(EngineError::Sqlite(error)),
3473    }
3474}
3475
3476#[cfg(not(feature = "sqlite-vec"))]
3477#[allow(clippy::unnecessary_wraps)]
3478fn delete_vec_rows_for_logical_id(
3479    _tx: &rusqlite::Transaction<'_>,
3480    _logical_id: &str,
3481) -> Result<usize, EngineError> {
3482    Ok(0)
3483}
3484
3485fn ensure_operational_collection_registered(
3486    conn: &rusqlite::Connection,
3487    collection_name: &str,
3488) -> Result<(), EngineError> {
3489    if load_operational_collection_record(conn, collection_name)?.is_none() {
3490        return Err(EngineError::InvalidWrite(format!(
3491            "operational collection '{collection_name}' is not registered"
3492        )));
3493    }
3494    Ok(())
3495}
3496
3497fn load_operational_collection_record(
3498    conn: &rusqlite::Connection,
3499    name: &str,
3500) -> Result<Option<OperationalCollectionRecord>, EngineError> {
3501    conn.query_row(
3502        "SELECT name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at, disabled_at \
3503         FROM operational_collections WHERE name = ?1",
3504        [name],
3505        map_operational_collection_row,
3506    )
3507    .optional()
3508    .map_err(EngineError::Sqlite)
3509}
3510
3511fn validate_append_only_operational_collection(
3512    record: &OperationalCollectionRecord,
3513    operation: &str,
3514) -> Result<(), EngineError> {
3515    if record.kind != OperationalCollectionKind::AppendOnlyLog {
3516        return Err(EngineError::InvalidWrite(format!(
3517            "operational collection '{}' must be append_only_log to {operation}",
3518            record.name
3519        )));
3520    }
3521    Ok(())
3522}
3523
3524#[derive(Clone, Debug, PartialEq, Eq)]
3525struct CompiledOperationalReadFilter {
3526    field: String,
3527    condition: OperationalReadCondition,
3528}
3529
3530#[derive(Clone, Debug)]
3531struct MatchedAppendOnlySecondaryIndexRead<'a> {
3532    index_name: &'a str,
3533    value_filter: &'a CompiledOperationalReadFilter,
3534    time_range: Option<&'a CompiledOperationalReadFilter>,
3535}
3536
3537#[derive(Clone, Debug, PartialEq, Eq)]
3538enum OperationalReadCondition {
3539    ExactString(String),
3540    ExactInteger(i64),
3541    Prefix(String),
3542    Range {
3543        lower: Option<i64>,
3544        upper: Option<i64>,
3545    },
3546}
3547
3548fn operational_read_limit(limit: Option<usize>) -> Result<usize, EngineError> {
3549    let applied_limit = limit.unwrap_or(DEFAULT_OPERATIONAL_READ_LIMIT);
3550    if applied_limit == 0 {
3551        return Err(EngineError::InvalidWrite(
3552            "operational read limit must be greater than zero".to_owned(),
3553        ));
3554    }
3555    Ok(applied_limit.min(MAX_OPERATIONAL_READ_LIMIT))
3556}
3557
3558fn parse_operational_filter_fields(
3559    filter_fields_json: &str,
3560) -> Result<Vec<OperationalFilterField>, String> {
3561    let fields: Vec<OperationalFilterField> = serde_json::from_str(filter_fields_json)
3562        .map_err(|error| format!("invalid filter_fields_json: {error}"))?;
3563    let mut seen = std::collections::HashSet::new();
3564    for field in &fields {
3565        if field.name.trim().is_empty() {
3566            return Err("filter_fields_json field names must not be empty".to_owned());
3567        }
3568        if !seen.insert(field.name.as_str()) {
3569            return Err(format!(
3570                "filter_fields_json contains duplicate field '{}'",
3571                field.name
3572            ));
3573        }
3574        if field.modes.is_empty() {
3575            return Err(format!(
3576                "filter_fields_json field '{}' must declare at least one mode",
3577                field.name
3578            ));
3579        }
3580        if field.modes.contains(&OperationalFilterMode::Prefix)
3581            && field.field_type != OperationalFilterFieldType::String
3582        {
3583            return Err(format!(
3584                "filter field '{}' only supports prefix for string types",
3585                field.name
3586            ));
3587        }
3588    }
3589    Ok(fields)
3590}
3591
3592fn compile_operational_read_filters(
3593    filters: &[OperationalFilterClause],
3594    declared_fields: &[OperationalFilterField],
3595) -> Result<Vec<CompiledOperationalReadFilter>, EngineError> {
3596    let field_map = declared_fields
3597        .iter()
3598        .map(|field| (field.name.as_str(), field))
3599        .collect::<std::collections::HashMap<_, _>>();
3600    filters
3601        .iter()
3602        .map(|filter| match filter {
3603            OperationalFilterClause::Exact { field, value } => {
3604                let declared = field_map.get(field.as_str()).ok_or_else(|| {
3605                    EngineError::InvalidWrite(format!(
3606                        "operational read filter uses undeclared field '{field}'"
3607                    ))
3608                })?;
3609                if !declared.modes.contains(&OperationalFilterMode::Exact) {
3610                    return Err(EngineError::InvalidWrite(format!(
3611                        "operational read field '{field}' does not allow exact filters"
3612                    )));
3613                }
3614                let condition = match (declared.field_type, value) {
3615                    (OperationalFilterFieldType::String, OperationalFilterValue::String(value)) => {
3616                        OperationalReadCondition::ExactString(value.clone())
3617                    }
3618                    (
3619                        OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp,
3620                        OperationalFilterValue::Integer(value),
3621                    ) => OperationalReadCondition::ExactInteger(*value),
3622                    _ => {
3623                        return Err(EngineError::InvalidWrite(format!(
3624                            "operational read field '{field}' received a value with the wrong type"
3625                        )));
3626                    }
3627                };
3628                Ok(CompiledOperationalReadFilter {
3629                    field: field.clone(),
3630                    condition,
3631                })
3632            }
3633            OperationalFilterClause::Prefix { field, value } => {
3634                let declared = field_map.get(field.as_str()).ok_or_else(|| {
3635                    EngineError::InvalidWrite(format!(
3636                        "operational read filter uses undeclared field '{field}'"
3637                    ))
3638                })?;
3639                if !declared.modes.contains(&OperationalFilterMode::Prefix) {
3640                    return Err(EngineError::InvalidWrite(format!(
3641                        "operational read field '{field}' does not allow prefix filters"
3642                    )));
3643                }
3644                if declared.field_type != OperationalFilterFieldType::String {
3645                    return Err(EngineError::InvalidWrite(format!(
3646                        "operational read field '{field}' only supports prefix filters for strings"
3647                    )));
3648                }
3649                Ok(CompiledOperationalReadFilter {
3650                    field: field.clone(),
3651                    condition: OperationalReadCondition::Prefix(value.clone()),
3652                })
3653            }
3654            OperationalFilterClause::Range {
3655                field,
3656                lower,
3657                upper,
3658            } => {
3659                let declared = field_map.get(field.as_str()).ok_or_else(|| {
3660                    EngineError::InvalidWrite(format!(
3661                        "operational read filter uses undeclared field '{field}'"
3662                    ))
3663                })?;
3664                if !declared.modes.contains(&OperationalFilterMode::Range) {
3665                    return Err(EngineError::InvalidWrite(format!(
3666                        "operational read field '{field}' does not allow range filters"
3667                    )));
3668                }
3669                if !matches!(
3670                    declared.field_type,
3671                    OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp
3672                ) {
3673                    return Err(EngineError::InvalidWrite(format!(
3674                        "operational read field '{field}' only supports range filters for integer/timestamp fields"
3675                    )));
3676                }
3677                if lower.is_none() && upper.is_none() {
3678                    return Err(EngineError::InvalidWrite(format!(
3679                        "operational read range filter for '{field}' must specify a lower or upper bound"
3680                    )));
3681                }
3682                Ok(CompiledOperationalReadFilter {
3683                    field: field.clone(),
3684                    condition: OperationalReadCondition::Range {
3685                        lower: *lower,
3686                        upper: *upper,
3687                    },
3688                })
3689            }
3690        })
3691        .collect()
3692}
3693
3694fn match_append_only_secondary_index_read<'a>(
3695    filters: &'a [CompiledOperationalReadFilter],
3696    indexes: &'a [OperationalSecondaryIndexDefinition],
3697) -> Option<MatchedAppendOnlySecondaryIndexRead<'a>> {
3698    indexes.iter().find_map(|index| {
3699        let OperationalSecondaryIndexDefinition::AppendOnlyFieldTime {
3700            name,
3701            field,
3702            value_type,
3703            time_field,
3704        } = index
3705        else {
3706            return None;
3707        };
3708        if !(1..=2).contains(&filters.len()) {
3709            return None;
3710        }
3711
3712        let mut value_filter = None;
3713        let mut time_range = None;
3714        for filter in filters {
3715            if filter.field == *field {
3716                let supported = matches!(
3717                    (&filter.condition, value_type),
3718                    (
3719                        OperationalReadCondition::ExactString(_)
3720                            | OperationalReadCondition::Prefix(_),
3721                        crate::operational::OperationalSecondaryIndexValueType::String
3722                    ) | (
3723                        OperationalReadCondition::ExactInteger(_),
3724                        crate::operational::OperationalSecondaryIndexValueType::Integer
3725                            | crate::operational::OperationalSecondaryIndexValueType::Timestamp
3726                    )
3727                );
3728                if !supported || value_filter.is_some() {
3729                    return None;
3730                }
3731                value_filter = Some(filter);
3732                continue;
3733            }
3734            if filter.field == *time_field {
3735                if !matches!(filter.condition, OperationalReadCondition::Range { .. })
3736                    || time_range.is_some()
3737                {
3738                    return None;
3739                }
3740                time_range = Some(filter);
3741                continue;
3742            }
3743            return None;
3744        }
3745
3746        value_filter.map(|value_filter| MatchedAppendOnlySecondaryIndexRead {
3747            index_name: name.as_str(),
3748            value_filter,
3749            time_range,
3750        })
3751    })
3752}
3753
3754fn execute_operational_secondary_index_read(
3755    conn: &rusqlite::Connection,
3756    collection_name: &str,
3757    filters: &[CompiledOperationalReadFilter],
3758    indexes: &[OperationalSecondaryIndexDefinition],
3759    applied_limit: usize,
3760) -> Result<Option<OperationalReadReport>, EngineError> {
3761    use rusqlite::types::Value;
3762
3763    let Some(matched) = match_append_only_secondary_index_read(filters, indexes) else {
3764        return Ok(None);
3765    };
3766
3767    let mut sql = String::from(
3768        "SELECT m.id, m.collection_name, m.record_key, m.op_kind, m.payload_json, m.source_ref, m.created_at \
3769         FROM operational_secondary_index_entries s \
3770         JOIN operational_mutations m ON m.id = s.mutation_id \
3771         WHERE s.collection_name = ?1 AND s.index_name = ?2 AND s.subject_kind = 'mutation' ",
3772    );
3773    let mut params = vec![
3774        Value::from(collection_name.to_owned()),
3775        Value::from(matched.index_name.to_owned()),
3776    ];
3777
3778    match &matched.value_filter.condition {
3779        OperationalReadCondition::ExactString(value) => {
3780            let _ = write!(sql, "AND s.slot1_text = ?{} ", params.len() + 1);
3781            params.push(Value::from(value.clone()));
3782        }
3783        OperationalReadCondition::Prefix(value) => {
3784            let _ = write!(sql, "AND s.slot1_text GLOB ?{} ", params.len() + 1);
3785            params.push(Value::from(glob_prefix_pattern(value)));
3786        }
3787        OperationalReadCondition::ExactInteger(value) => {
3788            let _ = write!(sql, "AND s.slot1_integer = ?{} ", params.len() + 1);
3789            params.push(Value::from(*value));
3790        }
3791        OperationalReadCondition::Range { .. } => return Ok(None),
3792    }
3793
3794    if let Some(time_range) = matched.time_range
3795        && let OperationalReadCondition::Range { lower, upper } = &time_range.condition
3796    {
3797        if let Some(lower) = lower {
3798            let _ = write!(sql, "AND s.sort_timestamp >= ?{} ", params.len() + 1);
3799            params.push(Value::from(*lower));
3800        }
3801        if let Some(upper) = upper {
3802            let _ = write!(sql, "AND s.sort_timestamp <= ?{} ", params.len() + 1);
3803            params.push(Value::from(*upper));
3804        }
3805    }
3806
3807    let _ = write!(
3808        sql,
3809        "ORDER BY s.sort_timestamp DESC, m.mutation_order DESC LIMIT ?{}",
3810        params.len() + 1
3811    );
3812    params.push(Value::from(i64::try_from(applied_limit + 1).map_err(
3813        |_| EngineError::Bridge("operational read limit overflow".to_owned()),
3814    )?));
3815
3816    let mut stmt = conn.prepare(&sql)?;
3817    let mut rows = stmt
3818        .query_map(
3819            rusqlite::params_from_iter(params),
3820            map_operational_mutation_row,
3821        )?
3822        .collect::<Result<Vec<_>, _>>()?;
3823    let was_limited = rows.len() > applied_limit;
3824    if was_limited {
3825        rows.truncate(applied_limit);
3826    }
3827
3828    Ok(Some(OperationalReadReport {
3829        collection_name: collection_name.to_owned(),
3830        row_count: rows.len(),
3831        applied_limit,
3832        was_limited,
3833        rows,
3834    }))
3835}
3836
3837fn execute_operational_filtered_read(
3838    conn: &rusqlite::Connection,
3839    collection_name: &str,
3840    filters: &[CompiledOperationalReadFilter],
3841    applied_limit: usize,
3842) -> Result<OperationalReadReport, EngineError> {
3843    use rusqlite::types::Value;
3844
3845    let mut sql = String::from(
3846        "SELECT m.id, m.collection_name, m.record_key, m.op_kind, m.payload_json, m.source_ref, m.created_at \
3847         FROM operational_mutations m ",
3848    );
3849    let mut params = vec![Value::from(collection_name.to_owned())];
3850    for (index, filter) in filters.iter().enumerate() {
3851        let _ = write!(
3852            sql,
3853            "JOIN operational_filter_values f{index} \
3854             ON f{index}.mutation_id = m.id \
3855            AND f{index}.collection_name = m.collection_name "
3856        );
3857        match &filter.condition {
3858            OperationalReadCondition::ExactString(value) => {
3859                let _ = write!(
3860                    sql,
3861                    "AND f{index}.field_name = ?{} AND f{index}.string_value = ?{} ",
3862                    params.len() + 1,
3863                    params.len() + 2
3864                );
3865                params.push(Value::from(filter.field.clone()));
3866                params.push(Value::from(value.clone()));
3867            }
3868            OperationalReadCondition::ExactInteger(value) => {
3869                let _ = write!(
3870                    sql,
3871                    "AND f{index}.field_name = ?{} AND f{index}.integer_value = ?{} ",
3872                    params.len() + 1,
3873                    params.len() + 2
3874                );
3875                params.push(Value::from(filter.field.clone()));
3876                params.push(Value::from(*value));
3877            }
3878            OperationalReadCondition::Prefix(value) => {
3879                let _ = write!(
3880                    sql,
3881                    "AND f{index}.field_name = ?{} AND f{index}.string_value GLOB ?{} ",
3882                    params.len() + 1,
3883                    params.len() + 2
3884                );
3885                params.push(Value::from(filter.field.clone()));
3886                params.push(Value::from(glob_prefix_pattern(value)));
3887            }
3888            OperationalReadCondition::Range { lower, upper } => {
3889                let _ = write!(sql, "AND f{index}.field_name = ?{} ", params.len() + 1);
3890                params.push(Value::from(filter.field.clone()));
3891                if let Some(lower) = lower {
3892                    let _ = write!(sql, "AND f{index}.integer_value >= ?{} ", params.len() + 1);
3893                    params.push(Value::from(*lower));
3894                }
3895                if let Some(upper) = upper {
3896                    let _ = write!(sql, "AND f{index}.integer_value <= ?{} ", params.len() + 1);
3897                    params.push(Value::from(*upper));
3898                }
3899            }
3900        }
3901    }
3902    let _ = write!(
3903        sql,
3904        "WHERE m.collection_name = ?1 ORDER BY m.mutation_order DESC LIMIT ?{}",
3905        params.len() + 1
3906    );
3907    params.push(Value::from(i64::try_from(applied_limit + 1).map_err(
3908        |_| EngineError::Bridge("operational read limit overflow".to_owned()),
3909    )?));
3910
3911    let mut stmt = conn.prepare(&sql)?;
3912    let mut rows = stmt
3913        .query_map(
3914            rusqlite::params_from_iter(params),
3915            map_operational_mutation_row,
3916        )?
3917        .collect::<Result<Vec<_>, _>>()?;
3918    let was_limited = rows.len() > applied_limit;
3919    if was_limited {
3920        rows.truncate(applied_limit);
3921    }
3922    Ok(OperationalReadReport {
3923        collection_name: collection_name.to_owned(),
3924        row_count: rows.len(),
3925        applied_limit,
3926        was_limited,
3927        rows,
3928    })
3929}
3930
3931fn glob_prefix_pattern(value: &str) -> String {
3932    let mut pattern = String::with_capacity(value.len() + 1);
3933    for ch in value.chars() {
3934        match ch {
3935            '*' => pattern.push_str("[*]"),
3936            '?' => pattern.push_str("[?]"),
3937            '[' => pattern.push_str("[[]"),
3938            _ => pattern.push(ch),
3939        }
3940    }
3941    pattern.push('*');
3942    pattern
3943}
3944
3945#[derive(Clone, Debug, PartialEq, Eq)]
3946struct ExtractedOperationalFilterValue {
3947    field_name: String,
3948    string_value: Option<String>,
3949    integer_value: Option<i64>,
3950}
3951
3952fn extract_operational_filter_values(
3953    filter_fields: &[OperationalFilterField],
3954    payload_json: &str,
3955) -> Vec<ExtractedOperationalFilterValue> {
3956    let Ok(parsed) = serde_json::from_str::<serde_json::Value>(payload_json) else {
3957        return Vec::new();
3958    };
3959    let Some(object) = parsed.as_object() else {
3960        return Vec::new();
3961    };
3962
3963    filter_fields
3964        .iter()
3965        .filter_map(|field| {
3966            let value = object.get(&field.name)?;
3967            match field.field_type {
3968                OperationalFilterFieldType::String => {
3969                    value
3970                        .as_str()
3971                        .map(|string_value| ExtractedOperationalFilterValue {
3972                            field_name: field.name.clone(),
3973                            string_value: Some(string_value.to_owned()),
3974                            integer_value: None,
3975                        })
3976                }
3977                OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp => {
3978                    value
3979                        .as_i64()
3980                        .map(|integer_value| ExtractedOperationalFilterValue {
3981                            field_name: field.name.clone(),
3982                            string_value: None,
3983                            integer_value: Some(integer_value),
3984                        })
3985                }
3986            }
3987        })
3988        .collect()
3989}
3990
3991fn operational_compaction_candidates(
3992    conn: &rusqlite::Connection,
3993    retention_json: &str,
3994    collection_name: &str,
3995) -> Result<(Vec<String>, Option<i64>), EngineError> {
3996    operational_compaction_candidates_at(
3997        conn,
3998        retention_json,
3999        collection_name,
4000        current_unix_timestamp()?,
4001    )
4002}
4003
4004fn operational_compaction_candidates_at(
4005    conn: &rusqlite::Connection,
4006    retention_json: &str,
4007    collection_name: &str,
4008    now_timestamp: i64,
4009) -> Result<(Vec<String>, Option<i64>), EngineError> {
4010    let policy = parse_operational_retention_policy(retention_json)?;
4011    match policy {
4012        OperationalRetentionPolicy::KeepAll => Ok((Vec::new(), None)),
4013        OperationalRetentionPolicy::PurgeBeforeSeconds { max_age_seconds } => {
4014            let before_timestamp = now_timestamp - max_age_seconds;
4015            let mut stmt = conn.prepare(
4016                "SELECT id FROM operational_mutations \
4017                 WHERE collection_name = ?1 AND created_at < ?2 \
4018                 ORDER BY mutation_order",
4019            )?;
4020            let mutation_ids = stmt
4021                .query_map(
4022                    rusqlite::params![collection_name, before_timestamp],
4023                    |row| row.get::<_, String>(0),
4024                )?
4025                .collect::<Result<Vec<_>, _>>()?;
4026            Ok((mutation_ids, Some(before_timestamp)))
4027        }
4028        OperationalRetentionPolicy::KeepLast { max_rows } => {
4029            let mut stmt = conn.prepare(
4030                "SELECT id FROM operational_mutations \
4031                 WHERE collection_name = ?1 \
4032                 ORDER BY mutation_order DESC",
4033            )?;
4034            let ordered_ids = stmt
4035                .query_map([collection_name], |row| row.get::<_, String>(0))?
4036                .collect::<Result<Vec<_>, _>>()?;
4037            Ok((ordered_ids.into_iter().skip(max_rows).collect(), None))
4038        }
4039    }
4040}
4041
4042fn parse_operational_retention_policy(
4043    retention_json: &str,
4044) -> Result<OperationalRetentionPolicy, EngineError> {
4045    let policy: OperationalRetentionPolicy = serde_json::from_str(retention_json)
4046        .map_err(|error| EngineError::InvalidWrite(format!("invalid retention_json: {error}")))?;
4047    match policy {
4048        OperationalRetentionPolicy::KeepAll => Ok(policy),
4049        OperationalRetentionPolicy::PurgeBeforeSeconds { max_age_seconds } => {
4050            if max_age_seconds <= 0 {
4051                return Err(EngineError::InvalidWrite(
4052                    "retention_json max_age_seconds must be greater than zero".to_owned(),
4053                ));
4054            }
4055            Ok(policy)
4056        }
4057        OperationalRetentionPolicy::KeepLast { max_rows } => {
4058            if max_rows == 0 {
4059                return Err(EngineError::InvalidWrite(
4060                    "retention_json max_rows must be greater than zero".to_owned(),
4061                ));
4062            }
4063            Ok(policy)
4064        }
4065    }
4066}
4067
4068fn load_operational_retention_records(
4069    conn: &rusqlite::Connection,
4070    collection_names: Option<&[String]>,
4071    max_collections: Option<usize>,
4072) -> Result<Vec<OperationalCollectionRecord>, EngineError> {
4073    let limit = max_collections.unwrap_or(usize::MAX);
4074    if limit == 0 {
4075        return Err(EngineError::InvalidWrite(
4076            "max_collections must be greater than zero".to_owned(),
4077        ));
4078    }
4079
4080    let mut records = Vec::new();
4081    if let Some(collection_names) = collection_names {
4082        for name in collection_names.iter().take(limit) {
4083            let record = load_operational_collection_record(conn, name)?.ok_or_else(|| {
4084                EngineError::InvalidWrite(format!(
4085                    "operational collection '{name}' is not registered"
4086                ))
4087            })?;
4088            records.push(record);
4089        }
4090        return Ok(records);
4091    }
4092
4093    let mut stmt = conn.prepare(
4094        "SELECT name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at, disabled_at \
4095         FROM operational_collections ORDER BY name",
4096    )?;
4097    let rows = stmt
4098        .query_map([], map_operational_collection_row)?
4099        .take(limit)
4100        .collect::<Result<Vec<_>, _>>()?;
4101    Ok(rows)
4102}
4103
4104fn last_operational_retention_run_at(
4105    conn: &rusqlite::Connection,
4106    collection_name: &str,
4107) -> Result<Option<i64>, EngineError> {
4108    conn.query_row(
4109        "SELECT MAX(executed_at) FROM operational_retention_runs WHERE collection_name = ?1",
4110        [collection_name],
4111        |row| row.get(0),
4112    )
4113    .optional()
4114    .map_err(EngineError::Sqlite)
4115    .map(Option::flatten)
4116}
4117
4118fn count_operational_mutations_for_collection(
4119    conn: &rusqlite::Connection,
4120    collection_name: &str,
4121) -> Result<usize, EngineError> {
4122    let count: i64 = conn.query_row(
4123        "SELECT count(*) FROM operational_mutations WHERE collection_name = ?1",
4124        [collection_name],
4125        |row| row.get(0),
4126    )?;
4127    usize::try_from(count).map_err(|_| {
4128        EngineError::Bridge(format!("count overflow for collection {collection_name}"))
4129    })
4130}
4131
4132fn retention_action_kind_and_limit(
4133    policy: &OperationalRetentionPolicy,
4134) -> (OperationalRetentionActionKind, Option<usize>) {
4135    match policy {
4136        OperationalRetentionPolicy::KeepAll => (OperationalRetentionActionKind::Noop, None),
4137        OperationalRetentionPolicy::PurgeBeforeSeconds { .. } => {
4138            (OperationalRetentionActionKind::PurgeBeforeSeconds, None)
4139        }
4140        OperationalRetentionPolicy::KeepLast { max_rows } => {
4141            (OperationalRetentionActionKind::KeepLast, Some(*max_rows))
4142        }
4143    }
4144}
4145
4146fn plan_operational_retention_item(
4147    conn: &rusqlite::Connection,
4148    record: &OperationalCollectionRecord,
4149    now_timestamp: i64,
4150) -> Result<OperationalRetentionPlanItem, EngineError> {
4151    let last_run_at = last_operational_retention_run_at(conn, &record.name)?;
4152    if record.kind != OperationalCollectionKind::AppendOnlyLog {
4153        return Ok(OperationalRetentionPlanItem {
4154            collection_name: record.name.clone(),
4155            action_kind: OperationalRetentionActionKind::Noop,
4156            candidate_deletions: 0,
4157            before_timestamp: None,
4158            max_rows: None,
4159            last_run_at,
4160        });
4161    }
4162    let policy = parse_operational_retention_policy(&record.retention_json)?;
4163    let (action_kind, max_rows) = retention_action_kind_and_limit(&policy);
4164    let (candidate_ids, before_timestamp) = operational_compaction_candidates_at(
4165        conn,
4166        &record.retention_json,
4167        &record.name,
4168        now_timestamp,
4169    )?;
4170    Ok(OperationalRetentionPlanItem {
4171        collection_name: record.name.clone(),
4172        action_kind,
4173        candidate_deletions: candidate_ids.len(),
4174        before_timestamp,
4175        max_rows,
4176        last_run_at,
4177    })
4178}
4179
4180fn run_operational_retention_item(
4181    tx: &rusqlite::Transaction<'_>,
4182    record: &OperationalCollectionRecord,
4183    now_timestamp: i64,
4184    dry_run: bool,
4185) -> Result<OperationalRetentionRunItem, EngineError> {
4186    let plan = plan_operational_retention_item(tx, record, now_timestamp)?;
4187    let mut deleted_mutations = 0usize;
4188    if record.kind == OperationalCollectionKind::AppendOnlyLog
4189        && plan.action_kind != OperationalRetentionActionKind::Noop
4190        && plan.candidate_deletions > 0
4191        && !dry_run
4192    {
4193        let (candidate_ids, _) = operational_compaction_candidates_at(
4194            tx,
4195            &record.retention_json,
4196            &record.name,
4197            now_timestamp,
4198        )?;
4199        let mut delete_stmt =
4200            tx.prepare_cached("DELETE FROM operational_mutations WHERE id = ?1")?;
4201        for mutation_id in &candidate_ids {
4202            delete_stmt.execute([mutation_id.as_str()])?;
4203            deleted_mutations += 1;
4204        }
4205        drop(delete_stmt);
4206
4207        persist_simple_provenance_event(
4208            tx,
4209            "operational_retention_run",
4210            &record.name,
4211            Some(serde_json::json!({
4212                "action_kind": plan.action_kind,
4213                "deleted_mutations": deleted_mutations,
4214                "before_timestamp": plan.before_timestamp,
4215                "max_rows": plan.max_rows,
4216                "executed_at": now_timestamp,
4217            })),
4218        )?;
4219    }
4220
4221    let live_rows_remaining = count_operational_mutations_for_collection(tx, &record.name)?;
4222    let effective_deleted_mutations = if dry_run {
4223        plan.candidate_deletions
4224    } else {
4225        deleted_mutations
4226    };
4227    let rows_remaining = if dry_run {
4228        live_rows_remaining.saturating_sub(effective_deleted_mutations)
4229    } else {
4230        live_rows_remaining
4231    };
4232    if !dry_run && plan.action_kind != OperationalRetentionActionKind::Noop {
4233        tx.execute(
4234            "INSERT INTO operational_retention_runs \
4235             (id, collection_name, executed_at, action_kind, dry_run, deleted_mutations, rows_remaining, metadata_json) \
4236             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
4237            rusqlite::params![
4238                new_id(),
4239                record.name,
4240                now_timestamp,
4241                serde_json::to_string(&plan.action_kind)
4242                    .unwrap_or_else(|_| "\"noop\"".to_owned())
4243                    .trim_matches('"')
4244                    .to_owned(),
4245                i32::from(dry_run),
4246                deleted_mutations,
4247                rows_remaining,
4248                serde_json::json!({
4249                    "before_timestamp": plan.before_timestamp,
4250                    "max_rows": plan.max_rows,
4251                })
4252                .to_string(),
4253            ],
4254        )?;
4255    }
4256
4257    Ok(OperationalRetentionRunItem {
4258        collection_name: plan.collection_name,
4259        action_kind: plan.action_kind,
4260        deleted_mutations: effective_deleted_mutations,
4261        before_timestamp: plan.before_timestamp,
4262        max_rows: plan.max_rows,
4263        rows_remaining,
4264    })
4265}
4266
4267fn current_unix_timestamp() -> Result<i64, EngineError> {
4268    let now = SystemTime::now()
4269        .duration_since(SystemTime::UNIX_EPOCH)
4270        .map_err(|error| EngineError::Bridge(format!("system clock error: {error}")))?;
4271    i64::try_from(now.as_secs())
4272        .map_err(|_| EngineError::Bridge("unix timestamp overflow".to_owned()))
4273}
4274
4275fn map_operational_collection_row(
4276    row: &rusqlite::Row<'_>,
4277) -> Result<OperationalCollectionRecord, rusqlite::Error> {
4278    let kind_text: String = row.get(1)?;
4279    let kind = OperationalCollectionKind::try_from(kind_text.as_str()).map_err(|message| {
4280        rusqlite::Error::FromSqlConversionFailure(
4281            1,
4282            rusqlite::types::Type::Text,
4283            Box::new(io::Error::new(io::ErrorKind::InvalidData, message)),
4284        )
4285    })?;
4286    Ok(OperationalCollectionRecord {
4287        name: row.get(0)?,
4288        kind,
4289        schema_json: row.get(2)?,
4290        retention_json: row.get(3)?,
4291        filter_fields_json: row.get(4)?,
4292        validation_json: row.get(5)?,
4293        secondary_indexes_json: row.get(6)?,
4294        format_version: row.get(7)?,
4295        created_at: row.get(8)?,
4296        disabled_at: row.get(9)?,
4297    })
4298}
4299
4300fn map_operational_mutation_row(
4301    row: &rusqlite::Row<'_>,
4302) -> Result<OperationalMutationRow, rusqlite::Error> {
4303    Ok(OperationalMutationRow {
4304        id: row.get(0)?,
4305        collection_name: row.get(1)?,
4306        record_key: row.get(2)?,
4307        op_kind: row.get(3)?,
4308        payload_json: row.get(4)?,
4309        source_ref: row.get(5)?,
4310        created_at: row.get(6)?,
4311    })
4312}
4313
4314fn map_operational_current_row(
4315    row: &rusqlite::Row<'_>,
4316) -> Result<OperationalCurrentRow, rusqlite::Error> {
4317    Ok(OperationalCurrentRow {
4318        collection_name: row.get(0)?,
4319        record_key: row.get(1)?,
4320        payload_json: row.get(2)?,
4321        updated_at: row.get(3)?,
4322        last_mutation_id: row.get(4)?,
4323    })
4324}
4325
4326#[cfg(test)]
4327#[allow(clippy::expect_used)]
4328mod tests {
4329    use std::fs;
4330    use std::sync::Arc;
4331
4332    use fathomdb_schema::SchemaManager;
4333    use tempfile::NamedTempFile;
4334
4335    use super::{AdminService, SafeExportOptions, VectorRegenerationConfig};
4336    use crate::sqlite;
4337    use crate::{EngineError, OperationalCollectionKind, OperationalRegisterRequest};
4338
4339    #[cfg(feature = "sqlite-vec")]
4340    use fathomdb_query::QueryBuilder;
4341
4342    #[cfg(feature = "sqlite-vec")]
4343    use super::{VectorGeneratorPolicy, load_vector_regeneration_config};
4344
4345    #[cfg(feature = "sqlite-vec")]
4346    use crate::ExecutionCoordinator;
4347
4348    #[cfg(feature = "sqlite-vec")]
4349    use crate::TelemetryCounters;
4350
4351    #[allow(dead_code)]
4352    #[cfg(unix)]
4353    fn set_file_mode(path: &std::path::Path, mode: u32) {
4354        use std::os::unix::fs::PermissionsExt;
4355
4356        let mut permissions = fs::metadata(path).expect("script metadata").permissions();
4357        permissions.set_mode(mode);
4358        fs::set_permissions(path, permissions).expect("chmod");
4359    }
4360
4361    #[allow(dead_code)]
4362    #[cfg(not(unix))]
4363    fn set_file_mode(_path: &std::path::Path, _mode: u32) {}
4364
4365    fn setup() -> (NamedTempFile, AdminService) {
4366        let db = NamedTempFile::new().expect("temp file");
4367        let schema = Arc::new(SchemaManager::new());
4368        {
4369            let conn = sqlite::open_connection(db.path()).expect("connection");
4370            schema.bootstrap(&conn).expect("bootstrap");
4371        }
4372        let service = AdminService::new(db.path(), Arc::clone(&schema));
4373        (db, service)
4374    }
4375
4376    #[test]
4377    fn check_integrity_includes_active_uniqueness_count() {
4378        let (_db, service) = setup();
4379        let report = service.check_integrity().expect("integrity check");
4380        assert_eq!(report.duplicate_active_logical_ids, 0);
4381        assert_eq!(report.operational_missing_collections, 0);
4382        assert_eq!(report.operational_missing_last_mutations, 0);
4383    }
4384
4385    #[test]
4386    fn trace_source_returns_node_logical_ids() {
4387        let (db, service) = setup();
4388        {
4389            let conn = sqlite::open_connection(db.path()).expect("conn");
4390            conn.execute(
4391                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4392                 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 'source-1')",
4393                [],
4394            )
4395            .expect("insert node");
4396        }
4397        let report = service.trace_source("source-1").expect("trace");
4398        assert_eq!(report.node_rows, 1);
4399        assert_eq!(report.node_logical_ids, vec!["lg1"]);
4400    }
4401
4402    #[test]
4403    fn trace_source_includes_operational_mutations() {
4404        let (db, service) = setup();
4405        {
4406            let conn = sqlite::open_connection(db.path()).expect("conn");
4407            conn.execute(
4408                "INSERT INTO operational_collections \
4409                 (name, kind, schema_json, retention_json, format_version, created_at) \
4410                 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
4411                [],
4412            )
4413            .expect("insert collection");
4414            conn.execute(
4415                "INSERT INTO operational_mutations \
4416                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
4417                 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"ok\"}', 'source-1', 100, 1)",
4418                [],
4419            )
4420            .expect("insert mutation");
4421        }
4422
4423        let report = service.trace_source("source-1").expect("trace");
4424        assert_eq!(report.operational_mutation_rows, 1);
4425        assert_eq!(report.operational_mutation_ids, vec!["m1"]);
4426    }
4427
4428    #[test]
4429    fn excise_source_restores_prior_active_node() {
4430        let (db, service) = setup();
4431        {
4432            let conn = sqlite::open_connection(db.path()).expect("conn");
4433            conn.execute(
4434                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
4435                 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
4436                [],
4437            )
4438            .expect("insert v1 superseded");
4439            conn.execute(
4440                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4441                 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
4442                [],
4443            )
4444            .expect("insert v2 active");
4445        }
4446        service.excise_source("source-2").expect("excise");
4447        {
4448            let conn = sqlite::open_connection(db.path()).expect("conn");
4449            let active_row_id: String = conn
4450                .query_row(
4451                    "SELECT row_id FROM nodes WHERE logical_id = 'lg1' AND superseded_at IS NULL",
4452                    [],
4453                    |row| row.get(0),
4454                )
4455                .expect("active row exists after excise");
4456            assert_eq!(active_row_id, "r1");
4457        }
4458    }
4459
4460    #[test]
4461    fn excise_source_deletes_operational_mutations_and_repairs_latest_state_current() {
4462        let (db, service) = setup();
4463        {
4464            let conn = sqlite::open_connection(db.path()).expect("conn");
4465            conn.execute(
4466                "INSERT INTO operational_collections \
4467                 (name, kind, schema_json, retention_json, format_version, created_at) \
4468                 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
4469                [],
4470            )
4471            .expect("insert collection");
4472            conn.execute(
4473                "INSERT INTO operational_mutations \
4474                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
4475                 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"old\"}', 'source-1', 100, 1)",
4476                [],
4477            )
4478            .expect("insert prior mutation");
4479            conn.execute(
4480                "INSERT INTO operational_mutations \
4481                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
4482                 VALUES ('m2', 'connector_health', 'gmail', 'put', '{\"status\":\"new\"}', 'source-2', 200, 2)",
4483                [],
4484            )
4485            .expect("insert excised mutation");
4486            conn.execute(
4487                "INSERT INTO operational_current \
4488                 (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
4489                 VALUES ('connector_health', 'gmail', '{\"status\":\"new\"}', 200, 'm2')",
4490                [],
4491            )
4492            .expect("insert current row");
4493        }
4494
4495        let traced = service
4496            .trace_source("source-2")
4497            .expect("trace before excise");
4498        assert_eq!(traced.operational_mutation_rows, 1);
4499        assert_eq!(traced.operational_mutation_ids, vec!["m2"]);
4500
4501        let excised = service.excise_source("source-2").expect("excise");
4502        assert_eq!(excised.operational_mutation_rows, 0);
4503        assert!(excised.operational_mutation_ids.is_empty());
4504
4505        {
4506            let conn = sqlite::open_connection(db.path()).expect("conn");
4507            let remaining: i64 = conn
4508                .query_row(
4509                    "SELECT count(*) FROM operational_mutations WHERE source_ref = 'source-2'",
4510                    [],
4511                    |row| row.get(0),
4512                )
4513                .expect("remaining count");
4514            assert_eq!(remaining, 0);
4515
4516            let current: (String, String) = conn
4517                .query_row(
4518                    "SELECT payload_json, last_mutation_id FROM operational_current \
4519                     WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
4520                    [],
4521                    |row| Ok((row.get(0)?, row.get(1)?)),
4522                )
4523                .expect("rebuilt current row");
4524            assert_eq!(current.0, "{\"status\":\"old\"}");
4525            assert_eq!(current.1, "m1");
4526        }
4527    }
4528
4529    #[test]
4530    fn restore_logical_id_reestablishes_last_pre_retire_content_and_attached_edges() {
4531        let (db, service) = setup();
4532        {
4533            let conn = sqlite::open_connection(db.path()).expect("conn");
4534            conn.execute(
4535                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4536                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
4537                [],
4538            )
4539            .expect("insert node");
4540            conn.execute(
4541                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4542                 VALUES ('node-row-topic', 'topic-1', 'Topic', '{}', 100, 'seed')",
4543                [],
4544            )
4545            .expect("insert target node");
4546            conn.execute(
4547                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
4548                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
4549                [],
4550            )
4551            .expect("insert chunk");
4552            conn.execute(
4553                "INSERT INTO edges \
4554                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
4555                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 'seed')",
4556                [],
4557            )
4558            .expect("insert edge");
4559            conn.execute(
4560                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
4561                 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
4562                [],
4563            )
4564            .expect("insert node retire event");
4565            conn.execute(
4566                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
4567                 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
4568                [],
4569            )
4570            .expect("insert edge retire event");
4571            conn.execute(
4572                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
4573                [],
4574            )
4575            .expect("retire node");
4576            conn.execute(
4577                "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
4578                [],
4579            )
4580            .expect("retire edge");
4581            conn.execute("DELETE FROM fts_nodes", [])
4582                .expect("clear fts");
4583        }
4584
4585        let report = service.restore_logical_id("doc-1").expect("restore");
4586        assert_eq!(report.logical_id, "doc-1");
4587        assert!(!report.was_noop);
4588        assert_eq!(report.restored_node_rows, 1);
4589        assert_eq!(report.restored_edge_rows, 1);
4590        assert_eq!(report.restored_chunk_rows, 1);
4591        assert_eq!(report.restored_fts_rows, 1);
4592
4593        let conn = sqlite::open_connection(db.path()).expect("conn");
4594        let active_node_count: i64 = conn
4595            .query_row(
4596                "SELECT count(*) FROM nodes WHERE logical_id = 'doc-1' AND superseded_at IS NULL",
4597                [],
4598                |row| row.get(0),
4599            )
4600            .expect("active node count");
4601        assert_eq!(active_node_count, 1);
4602        let active_edge_count: i64 = conn
4603            .query_row(
4604                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
4605                [],
4606                |row| row.get(0),
4607            )
4608            .expect("active edge count");
4609        assert_eq!(active_edge_count, 1);
4610        let fts_count: i64 = conn
4611            .query_row(
4612                "SELECT count(*) FROM fts_nodes WHERE chunk_id = 'chunk-1'",
4613                [],
4614                |row| row.get(0),
4615            )
4616            .expect("fts count");
4617        assert_eq!(fts_count, 1);
4618    }
4619
4620    #[test]
4621    fn restore_logical_id_restores_edges_retired_after_the_node_retire_event() {
4622        let (db, service) = setup();
4623        {
4624            let conn = sqlite::open_connection(db.path()).expect("conn");
4625            conn.execute(
4626                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4627                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
4628                [],
4629            )
4630            .expect("insert node");
4631            conn.execute(
4632                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4633                 VALUES ('node-row-topic', 'topic-1', 'Topic', '{}', 100, 'seed')",
4634                [],
4635            )
4636            .expect("insert target node");
4637            conn.execute(
4638                "INSERT INTO edges \
4639                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
4640                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 'seed')",
4641                [],
4642            )
4643            .expect("insert edge");
4644            conn.execute(
4645                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
4646                 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
4647                [],
4648            )
4649            .expect("insert node retire event");
4650            conn.execute(
4651                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
4652                 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 201, '')",
4653                [],
4654            )
4655            .expect("insert edge retire event");
4656            conn.execute(
4657                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
4658                [],
4659            )
4660            .expect("retire node");
4661            conn.execute(
4662                "UPDATE edges SET superseded_at = 201 WHERE logical_id = 'edge-1'",
4663                [],
4664            )
4665            .expect("retire edge");
4666        }
4667
4668        let report = service.restore_logical_id("doc-1").expect("restore");
4669        assert_eq!(report.restored_edge_rows, 1);
4670
4671        let conn = sqlite::open_connection(db.path()).expect("conn");
4672        let active_edge_count: i64 = conn
4673            .query_row(
4674                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
4675                [],
4676                |row| row.get(0),
4677            )
4678            .expect("active edge count");
4679        assert_eq!(active_edge_count, 1);
4680    }
4681
4682    #[test]
4683    fn restore_logical_id_prefers_latest_retired_revision_when_timestamps_tie() {
4684        let (db, service) = setup();
4685        {
4686            let conn = sqlite::open_connection(db.path()).expect("conn");
4687            conn.execute(
4688                "INSERT INTO nodes \
4689                 (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
4690                 VALUES ('node-row-older', 'doc-1', 'Document', '{\"title\":\"older\"}', 100, 200, 'forget-1')",
4691                [],
4692            )
4693            .expect("insert older retired node");
4694            conn.execute(
4695                "INSERT INTO nodes \
4696                 (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
4697                 VALUES ('node-row-newer', 'doc-1', 'Document', '{\"title\":\"newer\"}', 100, 200, 'forget-1')",
4698                [],
4699            )
4700            .expect("insert newer retired node");
4701            conn.execute(
4702                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
4703                 VALUES ('evt-retire-older', 'node_retire', 'doc-1', 'forget-1', 200, '')",
4704                [],
4705            )
4706            .expect("insert older retire event");
4707            conn.execute(
4708                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
4709                 VALUES ('evt-retire-newer', 'node_retire', 'doc-1', 'forget-1', 200, '')",
4710                [],
4711            )
4712            .expect("insert newer retire event");
4713        }
4714
4715        let report = service.restore_logical_id("doc-1").expect("restore");
4716
4717        assert!(!report.was_noop);
4718        let conn = sqlite::open_connection(db.path()).expect("conn");
4719        let active_row: (String, String) = conn
4720            .query_row(
4721                "SELECT row_id, properties FROM nodes \
4722                 WHERE logical_id = 'doc-1' AND superseded_at IS NULL",
4723                [],
4724                |row| Ok((row.get(0)?, row.get(1)?)),
4725            )
4726            .expect("restored active row");
4727        assert_eq!(active_row.0, "node-row-newer");
4728        assert_eq!(active_row.1, "{\"title\":\"newer\"}");
4729    }
4730
4731    #[test]
4732    fn purge_logical_id_removes_retired_content_and_records_tombstone() {
4733        let (db, service) = setup();
4734        {
4735            let conn = sqlite::open_connection(db.path()).expect("conn");
4736            conn.execute(
4737                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
4738                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
4739                [],
4740            )
4741            .expect("insert retired node");
4742            conn.execute(
4743                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
4744                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
4745                [],
4746            )
4747            .expect("insert chunk");
4748            conn.execute(
4749                "INSERT INTO edges \
4750                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, superseded_at, source_ref) \
4751                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 200, 'seed')",
4752                [],
4753            )
4754            .expect("insert retired edge");
4755            conn.execute(
4756                "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
4757                 VALUES ('chunk-1', 'doc-1', 'Document', 'budget narrative')",
4758                [],
4759            )
4760            .expect("insert fts");
4761        }
4762
4763        let report = service.purge_logical_id("doc-1").expect("purge");
4764        assert_eq!(report.logical_id, "doc-1");
4765        assert!(!report.was_noop);
4766        assert_eq!(report.deleted_node_rows, 1);
4767        assert_eq!(report.deleted_edge_rows, 1);
4768        assert_eq!(report.deleted_chunk_rows, 1);
4769        assert_eq!(report.deleted_fts_rows, 1);
4770
4771        let conn = sqlite::open_connection(db.path()).expect("conn");
4772        let remaining_nodes: i64 = conn
4773            .query_row(
4774                "SELECT count(*) FROM nodes WHERE logical_id = 'doc-1'",
4775                [],
4776                |row| row.get(0),
4777            )
4778            .expect("remaining nodes");
4779        assert_eq!(remaining_nodes, 0);
4780        let remaining_edges: i64 = conn
4781            .query_row(
4782                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1'",
4783                [],
4784                |row| row.get(0),
4785            )
4786            .expect("remaining edges");
4787        assert_eq!(remaining_edges, 0);
4788        let remaining_chunks: i64 = conn
4789            .query_row(
4790                "SELECT count(*) FROM chunks WHERE id = 'chunk-1'",
4791                [],
4792                |row| row.get(0),
4793            )
4794            .expect("remaining chunks");
4795        assert_eq!(remaining_chunks, 0);
4796        let purge_events: i64 = conn
4797            .query_row(
4798                "SELECT count(*) FROM provenance_events WHERE event_type = 'purge_logical_id' AND subject = 'doc-1'",
4799                [],
4800                |row| row.get(0),
4801            )
4802            .expect("purge events");
4803        assert_eq!(purge_events, 1);
4804    }
4805
4806    #[test]
4807    fn check_semantics_accepts_preserved_retired_chunks() {
4808        let (db, service) = setup();
4809        {
4810            let conn = sqlite::open_connection(db.path()).expect("conn");
4811            conn.execute(
4812                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
4813                 VALUES ('node-row-1', 'doc-1', 'Document', '{}', 100, 200, 'seed')",
4814                [],
4815            )
4816            .expect("insert retired node");
4817            conn.execute(
4818                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
4819                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
4820                [],
4821            )
4822            .expect("insert chunk");
4823        }
4824
4825        let report = service.check_semantics().expect("semantics");
4826        assert_eq!(report.orphaned_chunks, 0);
4827    }
4828
4829    #[test]
4830    fn check_semantics_detects_missing_retired_node_history_for_preserved_chunks() {
4831        let (db, service) = setup();
4832        {
4833            let conn = sqlite::open_connection(db.path()).expect("conn");
4834            conn.execute(
4835                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
4836                 VALUES ('chunk-1', 'ghost-doc', 'budget narrative', 100)",
4837                [],
4838            )
4839            .expect("insert orphaned chunk");
4840        }
4841
4842        let report = service.check_semantics().expect("semantics");
4843        assert_eq!(report.orphaned_chunks, 1);
4844    }
4845
4846    #[cfg(feature = "sqlite-vec")]
4847    #[test]
4848    fn check_semantics_detects_missing_retired_node_history_for_preserved_vec_rows() {
4849        let (db, service) = setup();
4850        {
4851            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
4852            service
4853                .schema_manager
4854                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
4855                .expect("ensure vec profile");
4856            conn.execute(
4857                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
4858                 VALUES ('chunk-1', 'ghost-doc', 'budget narrative', 100)",
4859                [],
4860            )
4861            .expect("insert orphaned chunk");
4862            conn.execute(
4863                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
4864                [],
4865            )
4866            .expect("insert vec row");
4867        }
4868
4869        let report = service.check_semantics().expect("semantics");
4870        assert_eq!(report.orphaned_chunks, 1);
4871        assert_eq!(report.vec_rows_for_superseded_nodes, 1);
4872    }
4873
4874    #[cfg(feature = "sqlite-vec")]
4875    #[test]
4876    fn restore_logical_id_reestablishes_vector_search_without_reingest() {
4877        let (db, service) = setup();
4878        {
4879            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
4880            service
4881                .schema_manager
4882                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
4883                .expect("ensure vec profile");
4884            conn.execute(
4885                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
4886                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
4887                [],
4888            )
4889            .expect("insert retired node");
4890            conn.execute(
4891                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
4892                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
4893                [],
4894            )
4895            .expect("insert chunk");
4896            conn.execute(
4897                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
4898                [],
4899            )
4900            .expect("insert vec row");
4901            conn.execute(
4902                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
4903                 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
4904                [],
4905            )
4906            .expect("insert retire event");
4907        }
4908
4909        let report = service.restore_logical_id("doc-1").expect("restore");
4910        assert_eq!(report.restored_vec_rows, 1);
4911
4912        let coordinator = ExecutionCoordinator::open(
4913            db.path(),
4914            Arc::new(SchemaManager::new()),
4915            Some(4),
4916            1,
4917            Arc::new(TelemetryCounters::default()),
4918        )
4919        .expect("coordinator");
4920        let compiled = QueryBuilder::nodes("Document")
4921            .vector_search("[0.0, 0.0, 0.0, 0.0]", 5)
4922            .compile()
4923            .expect("compile");
4924        let rows = coordinator
4925            .execute_compiled_read(&compiled)
4926            .expect("vector read");
4927        assert!(
4928            rows.nodes.iter().any(|row| row.logical_id == "doc-1"),
4929            "restore should make the preserved vec row visible again without re-ingest"
4930        );
4931    }
4932
4933    #[cfg(feature = "sqlite-vec")]
4934    #[test]
4935    fn purge_logical_id_deletes_vec_rows_for_retired_content() {
4936        let (db, service) = setup();
4937        {
4938            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
4939            service
4940                .schema_manager
4941                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
4942                .expect("ensure vec profile");
4943            conn.execute(
4944                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
4945                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
4946                [],
4947            )
4948            .expect("insert retired node");
4949            conn.execute(
4950                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
4951                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
4952                [],
4953            )
4954            .expect("insert chunk");
4955            conn.execute(
4956                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
4957                [],
4958            )
4959            .expect("insert vec row");
4960        }
4961
4962        let report = service.purge_logical_id("doc-1").expect("purge");
4963        assert_eq!(report.deleted_vec_rows, 1);
4964
4965        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
4966        let vec_count: i64 = conn
4967            .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
4968                row.get(0)
4969            })
4970            .expect("vec count");
4971        assert_eq!(vec_count, 0);
4972    }
4973
4974    #[cfg(feature = "sqlite-vec")]
4975    #[test]
4976    fn restore_logical_id_restores_visibility_of_regenerated_vectors() {
4977        let (db, service) = setup();
4978        let temp_dir = tempfile::tempdir().expect("temp dir");
4979        let script_path = temp_dir.path().join("vector-generator-restore.sh");
4980        fs::write(
4981            &script_path,
4982            r#"#!/usr/bin/env bash
4983set -euo pipefail
4984python3 -c 'import json, sys
4985payload = json.load(sys.stdin)
4986json.dump({"embeddings": [{"chunk_id": payload["chunks"][0]["chunk_id"], "embedding": [0.0, 0.0, 0.0, 0.0]}]}, sys.stdout)'
4987"#,
4988        )
4989        .expect("write script");
4990        set_file_mode(&script_path, 0o755);
4991
4992        {
4993            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
4994            service
4995                .schema_manager
4996                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
4997                .expect("ensure vec profile");
4998            conn.execute(
4999                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5000                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
5001                [],
5002            )
5003            .expect("insert node");
5004            conn.execute(
5005                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5006                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5007                [],
5008            )
5009            .expect("insert chunk");
5010        }
5011
5012        service
5013            .regenerate_vector_embeddings(&VectorRegenerationConfig {
5014                profile: "default".to_owned(),
5015                table_name: "vec_nodes_active".to_owned(),
5016                model_identity: "model".to_owned(),
5017                model_version: "1.0.0".to_owned(),
5018                dimension: 4,
5019                normalization_policy: "l2".to_owned(),
5020                chunking_policy: "per_chunk".to_owned(),
5021                preprocessing_policy: "trim".to_owned(),
5022                generator_command: vec![script_path.to_string_lossy().to_string()],
5023            })
5024            .expect("regenerate");
5025
5026        {
5027            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5028            conn.execute(
5029                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5030                 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5031                [],
5032            )
5033            .expect("insert retire event");
5034            conn.execute(
5035                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
5036                [],
5037            )
5038            .expect("retire node");
5039        }
5040
5041        let report = service.restore_logical_id("doc-1").expect("restore");
5042        assert_eq!(report.restored_vec_rows, 1);
5043
5044        let coordinator = ExecutionCoordinator::open(
5045            db.path(),
5046            Arc::new(SchemaManager::new()),
5047            Some(4),
5048            1,
5049            Arc::new(TelemetryCounters::default()),
5050        )
5051        .expect("coordinator");
5052        let compiled = QueryBuilder::nodes("Document")
5053            .vector_search("[0.0, 0.0, 0.0, 0.0]", 5)
5054            .compile()
5055            .expect("compile");
5056        let rows = coordinator
5057            .execute_compiled_read(&compiled)
5058            .expect("vector read");
5059        assert!(
5060            rows.nodes.iter().any(|row| row.logical_id == "doc-1"),
5061            "restored logical_id should become visible through regenerated vectors"
5062        );
5063    }
5064
5065    #[test]
5066    fn check_semantics_clean_db_returns_zeros() {
5067        let (_db, service) = setup();
5068        let report = service.check_semantics().expect("semantics check");
5069        assert_eq!(report.orphaned_chunks, 0);
5070        assert_eq!(report.null_source_ref_nodes, 0);
5071        assert_eq!(report.broken_step_fk, 0);
5072        assert_eq!(report.broken_action_fk, 0);
5073        assert_eq!(report.stale_fts_rows, 0);
5074        assert_eq!(report.fts_rows_for_superseded_nodes, 0);
5075        assert_eq!(report.dangling_edges, 0);
5076        assert_eq!(report.orphaned_supersession_chains, 0);
5077        assert_eq!(report.stale_vec_rows, 0);
5078        assert_eq!(report.vec_rows_for_superseded_nodes, 0);
5079        assert_eq!(report.missing_operational_current_rows, 0);
5080        assert_eq!(report.stale_operational_current_rows, 0);
5081        assert_eq!(report.disabled_collection_mutations, 0);
5082        assert!(report.warnings.is_empty());
5083    }
5084
5085    #[test]
5086    fn register_operational_collection_persists_and_emits_provenance() {
5087        let (db, service) = setup();
5088        let record = service
5089            .register_operational_collection(&OperationalRegisterRequest {
5090                name: "connector_health".to_owned(),
5091                kind: OperationalCollectionKind::LatestState,
5092                schema_json: "{}".to_owned(),
5093                retention_json: "{}".to_owned(),
5094                filter_fields_json: "[]".to_owned(),
5095                validation_json: String::new(),
5096                secondary_indexes_json: "[]".to_owned(),
5097                format_version: 1,
5098            })
5099            .expect("register collection");
5100
5101        assert_eq!(record.name, "connector_health");
5102        assert_eq!(record.kind, OperationalCollectionKind::LatestState);
5103        assert_eq!(record.schema_json, "{}");
5104        assert_eq!(record.retention_json, "{}");
5105        assert_eq!(record.filter_fields_json, "[]");
5106        assert!(record.created_at > 0);
5107        assert_eq!(record.disabled_at, None);
5108
5109        let described = service
5110            .describe_operational_collection("connector_health")
5111            .expect("describe collection")
5112            .expect("collection exists");
5113        assert_eq!(described, record);
5114
5115        let conn = sqlite::open_connection(db.path()).expect("conn");
5116        let provenance_count: i64 = conn
5117            .query_row(
5118                "SELECT count(*) FROM provenance_events \
5119                 WHERE event_type = 'operational_collection_registered' AND subject = 'connector_health'",
5120                [],
5121                |row| row.get(0),
5122            )
5123            .expect("provenance count");
5124        assert_eq!(provenance_count, 1);
5125    }
5126
5127    #[test]
5128    fn register_and_update_operational_collection_validation_round_trip() {
5129        let (db, service) = setup();
5130        let record = service
5131            .register_operational_collection(&OperationalRegisterRequest {
5132                name: "connector_health".to_owned(),
5133                kind: OperationalCollectionKind::LatestState,
5134                schema_json: "{}".to_owned(),
5135                retention_json: "{}".to_owned(),
5136                filter_fields_json: "[]".to_owned(),
5137                validation_json: String::new(),
5138                secondary_indexes_json: "[]".to_owned(),
5139                format_version: 1,
5140            })
5141            .expect("register collection");
5142        assert_eq!(record.validation_json, "");
5143
5144        let validation_json = r#"{"format_version":1,"mode":"enforce","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#;
5145        let updated = service
5146            .update_operational_collection_validation("connector_health", validation_json)
5147            .expect("update validation");
5148        assert_eq!(updated.validation_json, validation_json);
5149
5150        let described = service
5151            .describe_operational_collection("connector_health")
5152            .expect("describe collection")
5153            .expect("collection exists");
5154        assert_eq!(described.validation_json, validation_json);
5155
5156        let conn = sqlite::open_connection(db.path()).expect("conn");
5157        let provenance_count: i64 = conn
5158            .query_row(
5159                "SELECT count(*) FROM provenance_events \
5160                 WHERE event_type = 'operational_collection_validation_updated' \
5161                   AND subject = 'connector_health'",
5162                [],
5163                |row| row.get(0),
5164            )
5165            .expect("provenance count");
5166        assert_eq!(provenance_count, 1);
5167    }
5168
5169    #[test]
5170    fn register_update_and_rebuild_operational_secondary_indexes_round_trip() {
5171        let (db, service) = setup();
5172        let record = service
5173            .register_operational_collection(&OperationalRegisterRequest {
5174                name: "audit_log".to_owned(),
5175                kind: OperationalCollectionKind::AppendOnlyLog,
5176                schema_json: "{}".to_owned(),
5177                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
5178                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
5179                validation_json: String::new(),
5180                secondary_indexes_json: "[]".to_owned(),
5181                format_version: 1,
5182            })
5183            .expect("register collection");
5184        assert_eq!(record.secondary_indexes_json, "[]");
5185
5186        {
5187            let writer = crate::WriterActor::start(
5188                db.path(),
5189                Arc::new(SchemaManager::new()),
5190                crate::ProvenanceMode::Warn,
5191                Arc::new(crate::TelemetryCounters::default()),
5192            )
5193            .expect("writer");
5194            writer
5195                .submit(crate::WriteRequest {
5196                    label: "secondary-index-seed".to_owned(),
5197                    nodes: vec![],
5198                    node_retires: vec![],
5199                    edges: vec![],
5200                    edge_retires: vec![],
5201                    chunks: vec![],
5202                    runs: vec![],
5203                    steps: vec![],
5204                    actions: vec![],
5205                    optional_backfills: vec![],
5206                    vec_inserts: vec![],
5207                    operational_writes: vec![
5208                        crate::OperationalWrite::Append {
5209                            collection: "audit_log".to_owned(),
5210                            record_key: "evt-1".to_owned(),
5211                            payload_json: r#"{"actor":"alice","ts":100}"#.to_owned(),
5212                            source_ref: Some("src-1".to_owned()),
5213                        },
5214                        crate::OperationalWrite::Append {
5215                            collection: "audit_log".to_owned(),
5216                            record_key: "evt-2".to_owned(),
5217                            payload_json: r#"{"actor":"bob","ts":200}"#.to_owned(),
5218                            source_ref: Some("src-2".to_owned()),
5219                        },
5220                    ],
5221                })
5222                .expect("seed writes");
5223        }
5224
5225        let secondary_indexes_json = r#"[{"name":"actor_ts","kind":"append_only_field_time","field":"actor","value_type":"string","time_field":"ts"}]"#;
5226        let updated = service
5227            .update_operational_collection_secondary_indexes("audit_log", secondary_indexes_json)
5228            .expect("update secondary indexes");
5229        assert_eq!(updated.secondary_indexes_json, secondary_indexes_json);
5230
5231        let conn = sqlite::open_connection(db.path()).expect("conn");
5232        let entry_count: i64 = conn
5233            .query_row(
5234                "SELECT count(*) FROM operational_secondary_index_entries \
5235                 WHERE collection_name = 'audit_log' AND index_name = 'actor_ts'",
5236                [],
5237                |row| row.get(0),
5238            )
5239            .expect("secondary index count");
5240        assert_eq!(entry_count, 2);
5241        conn.execute(
5242            "DELETE FROM operational_secondary_index_entries WHERE collection_name = 'audit_log'",
5243            [],
5244        )
5245        .expect("clear index entries");
5246        drop(conn);
5247
5248        let rebuild = service
5249            .rebuild_operational_secondary_indexes("audit_log")
5250            .expect("rebuild secondary indexes");
5251        assert_eq!(rebuild.collection_name, "audit_log");
5252        assert_eq!(rebuild.mutation_entries_rebuilt, 2);
5253        assert_eq!(rebuild.current_entries_rebuilt, 0);
5254    }
5255
5256    #[test]
5257    fn register_operational_collection_rejects_invalid_validation_contract() {
5258        let (_db, service) = setup();
5259
5260        let error = service
5261            .register_operational_collection(&OperationalRegisterRequest {
5262                name: "connector_health".to_owned(),
5263                kind: OperationalCollectionKind::LatestState,
5264                schema_json: "{}".to_owned(),
5265                retention_json: "{}".to_owned(),
5266                filter_fields_json: "[]".to_owned(),
5267                validation_json: r#"{"format_version":1,"mode":"enforce","fields":[{"name":"status","type":"string","minimum":0}]}"#
5268                    .to_owned(),
5269                secondary_indexes_json: "[]".to_owned(),
5270                format_version: 1,
5271            })
5272            .expect_err("invalid validation contract should reject");
5273
5274        assert!(matches!(error, EngineError::InvalidWrite(_)));
5275        assert!(error.to_string().contains("minimum/maximum"));
5276    }
5277
5278    #[test]
5279    fn validate_operational_collection_history_reports_invalid_rows_without_mutation() {
5280        let (db, service) = setup();
5281        service
5282            .register_operational_collection(&OperationalRegisterRequest {
5283                name: "audit_log".to_owned(),
5284                kind: OperationalCollectionKind::AppendOnlyLog,
5285                schema_json: "{}".to_owned(),
5286                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
5287                filter_fields_json: "[]".to_owned(),
5288                validation_json: r#"{"format_version":1,"mode":"disabled","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#
5289                    .to_owned(),
5290                secondary_indexes_json: "[]".to_owned(),
5291                format_version: 1,
5292            })
5293            .expect("register collection");
5294        {
5295            let writer = crate::WriterActor::start(
5296                db.path(),
5297                Arc::new(SchemaManager::new()),
5298                crate::ProvenanceMode::Warn,
5299                Arc::new(crate::TelemetryCounters::default()),
5300            )
5301            .expect("writer");
5302            writer
5303                .submit(crate::WriteRequest {
5304                    label: "history-validation".to_owned(),
5305                    nodes: vec![],
5306                    node_retires: vec![],
5307                    edges: vec![],
5308                    edge_retires: vec![],
5309                    chunks: vec![],
5310                    runs: vec![],
5311                    steps: vec![],
5312                    actions: vec![],
5313                    optional_backfills: vec![],
5314                    vec_inserts: vec![],
5315                    operational_writes: vec![
5316                        crate::OperationalWrite::Append {
5317                            collection: "audit_log".to_owned(),
5318                            record_key: "evt-1".to_owned(),
5319                            payload_json: r#"{"status":"ok"}"#.to_owned(),
5320                            source_ref: Some("src-1".to_owned()),
5321                        },
5322                        crate::OperationalWrite::Append {
5323                            collection: "audit_log".to_owned(),
5324                            record_key: "evt-2".to_owned(),
5325                            payload_json: r#"{"status":"bogus"}"#.to_owned(),
5326                            source_ref: Some("src-2".to_owned()),
5327                        },
5328                    ],
5329                })
5330                .expect("write");
5331        }
5332
5333        let report = service
5334            .validate_operational_collection_history("audit_log")
5335            .expect("validate history");
5336        assert_eq!(report.collection_name, "audit_log");
5337        assert_eq!(report.checked_rows, 2);
5338        assert_eq!(report.invalid_row_count, 1);
5339        assert_eq!(report.issues.len(), 1);
5340        assert_eq!(report.issues[0].record_key, "evt-2");
5341        assert!(report.issues[0].message.contains("must be one of"));
5342
5343        let trace = service
5344            .trace_operational_collection("audit_log", None)
5345            .expect("trace");
5346        assert_eq!(trace.mutation_count, 2);
5347
5348        let conn = sqlite::open_connection(db.path()).expect("conn");
5349        let provenance_count: i64 = conn
5350            .query_row(
5351                "SELECT count(*) FROM provenance_events \
5352                 WHERE event_type = 'operational_collection_history_validated' \
5353                   AND subject = 'audit_log'",
5354                [],
5355                |row| row.get(0),
5356            )
5357            .expect("provenance count");
5358        assert_eq!(provenance_count, 0);
5359    }
5360
5361    #[test]
5362    fn trace_operational_collection_returns_mutations_and_current_rows() {
5363        let (db, service) = setup();
5364        service
5365            .register_operational_collection(&OperationalRegisterRequest {
5366                name: "connector_health".to_owned(),
5367                kind: OperationalCollectionKind::LatestState,
5368                schema_json: "{}".to_owned(),
5369                retention_json: "{}".to_owned(),
5370                filter_fields_json: "[]".to_owned(),
5371                validation_json: String::new(),
5372                secondary_indexes_json: "[]".to_owned(),
5373                format_version: 1,
5374            })
5375            .expect("register collection");
5376        {
5377            let writer = crate::WriterActor::start(
5378                db.path(),
5379                Arc::new(SchemaManager::new()),
5380                crate::ProvenanceMode::Warn,
5381                Arc::new(crate::TelemetryCounters::default()),
5382            )
5383            .expect("writer");
5384            writer
5385                .submit(crate::WriteRequest {
5386                    label: "operational".to_owned(),
5387                    nodes: vec![],
5388                    node_retires: vec![],
5389                    edges: vec![],
5390                    edge_retires: vec![],
5391                    chunks: vec![],
5392                    runs: vec![],
5393                    steps: vec![],
5394                    actions: vec![],
5395                    optional_backfills: vec![],
5396                    vec_inserts: vec![],
5397                    operational_writes: vec![crate::OperationalWrite::Put {
5398                        collection: "connector_health".to_owned(),
5399                        record_key: "gmail".to_owned(),
5400                        payload_json: r#"{"status":"ok"}"#.to_owned(),
5401                        source_ref: Some("src-1".to_owned()),
5402                    }],
5403                })
5404                .expect("write");
5405        }
5406
5407        let report = service
5408            .trace_operational_collection("connector_health", Some("gmail"))
5409            .expect("trace");
5410        assert_eq!(report.collection_name, "connector_health");
5411        assert_eq!(report.record_key.as_deref(), Some("gmail"));
5412        assert_eq!(report.mutation_count, 1);
5413        assert_eq!(report.current_count, 1);
5414        assert_eq!(report.mutations[0].op_kind, "put");
5415        assert_eq!(report.current_rows[0].payload_json, r#"{"status":"ok"}"#);
5416    }
5417
5418    #[test]
5419    fn trace_operational_collection_rejects_unknown_collection() {
5420        let (_db, service) = setup();
5421
5422        let error = service
5423            .trace_operational_collection("missing_collection", None)
5424            .expect_err("unknown collection should fail");
5425
5426        assert!(matches!(error, EngineError::InvalidWrite(_)));
5427        assert!(error.to_string().contains("is not registered"));
5428    }
5429
5430    #[test]
5431    fn rebuild_operational_current_repairs_missing_latest_state_rows() {
5432        let (db, service) = setup();
5433        service
5434            .register_operational_collection(&OperationalRegisterRequest {
5435                name: "connector_health".to_owned(),
5436                kind: OperationalCollectionKind::LatestState,
5437                schema_json: "{}".to_owned(),
5438                retention_json: "{}".to_owned(),
5439                filter_fields_json: "[]".to_owned(),
5440                validation_json: String::new(),
5441                secondary_indexes_json: "[]".to_owned(),
5442                format_version: 1,
5443            })
5444            .expect("register collection");
5445        {
5446            let writer = crate::WriterActor::start(
5447                db.path(),
5448                Arc::new(SchemaManager::new()),
5449                crate::ProvenanceMode::Warn,
5450                Arc::new(crate::TelemetryCounters::default()),
5451            )
5452            .expect("writer");
5453            writer
5454                .submit(crate::WriteRequest {
5455                    label: "operational".to_owned(),
5456                    nodes: vec![],
5457                    node_retires: vec![],
5458                    edges: vec![],
5459                    edge_retires: vec![],
5460                    chunks: vec![],
5461                    runs: vec![],
5462                    steps: vec![],
5463                    actions: vec![],
5464                    optional_backfills: vec![],
5465                    vec_inserts: vec![],
5466                    operational_writes: vec![crate::OperationalWrite::Put {
5467                        collection: "connector_health".to_owned(),
5468                        record_key: "gmail".to_owned(),
5469                        payload_json: r#"{"status":"ok"}"#.to_owned(),
5470                        source_ref: Some("src-1".to_owned()),
5471                    }],
5472                })
5473                .expect("write");
5474        }
5475        {
5476            let conn = sqlite::open_connection(db.path()).expect("conn");
5477            conn.execute(
5478                "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5479                [],
5480            )
5481            .expect("delete current row");
5482        }
5483
5484        let before = service.check_semantics().expect("semantics before rebuild");
5485        assert_eq!(before.missing_operational_current_rows, 1);
5486
5487        let repair = service
5488            .rebuild_operational_current(Some("connector_health"))
5489            .expect("rebuild current");
5490        assert_eq!(repair.collections_rebuilt, 1);
5491        assert_eq!(repair.current_rows_rebuilt, 1);
5492
5493        let after = service.check_semantics().expect("semantics after rebuild");
5494        assert_eq!(after.missing_operational_current_rows, 0);
5495
5496        let conn = sqlite::open_connection(db.path()).expect("conn");
5497        let payload: String = conn
5498            .query_row(
5499                "SELECT payload_json FROM operational_current \
5500                 WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5501                [],
5502                |row| row.get(0),
5503            )
5504            .expect("restored payload");
5505        assert_eq!(payload, r#"{"status":"ok"}"#);
5506    }
5507
5508    #[test]
5509    fn rebuild_operational_current_restores_latest_state_secondary_index_entries() {
5510        let (db, service) = setup();
5511        service
5512            .register_operational_collection(&OperationalRegisterRequest {
5513                name: "connector_health".to_owned(),
5514                kind: OperationalCollectionKind::LatestState,
5515                schema_json: "{}".to_owned(),
5516                retention_json: "{}".to_owned(),
5517                filter_fields_json: "[]".to_owned(),
5518                validation_json: String::new(),
5519                secondary_indexes_json: r#"[{"name":"status_current","kind":"latest_state_field","field":"status","value_type":"string"}]"#.to_owned(),
5520                format_version: 1,
5521            })
5522            .expect("register collection");
5523        {
5524            let writer = crate::WriterActor::start(
5525                db.path(),
5526                Arc::new(SchemaManager::new()),
5527                crate::ProvenanceMode::Warn,
5528                Arc::new(crate::TelemetryCounters::default()),
5529            )
5530            .expect("writer");
5531            writer
5532                .submit(crate::WriteRequest {
5533                    label: "operational".to_owned(),
5534                    nodes: vec![],
5535                    node_retires: vec![],
5536                    edges: vec![],
5537                    edge_retires: vec![],
5538                    chunks: vec![],
5539                    runs: vec![],
5540                    steps: vec![],
5541                    actions: vec![],
5542                    optional_backfills: vec![],
5543                    vec_inserts: vec![],
5544                    operational_writes: vec![crate::OperationalWrite::Put {
5545                        collection: "connector_health".to_owned(),
5546                        record_key: "gmail".to_owned(),
5547                        payload_json: r#"{"status":"ok"}"#.to_owned(),
5548                        source_ref: Some("src-1".to_owned()),
5549                    }],
5550                })
5551                .expect("write");
5552        }
5553        {
5554            let conn = sqlite::open_connection(db.path()).expect("conn");
5555            let entry_count: i64 = conn
5556                .query_row(
5557                    "SELECT count(*) FROM operational_secondary_index_entries \
5558                     WHERE collection_name = 'connector_health' AND subject_kind = 'current'",
5559                    [],
5560                    |row| row.get(0),
5561                )
5562                .expect("secondary index count before repair");
5563            assert_eq!(entry_count, 1);
5564            conn.execute(
5565                "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5566                [],
5567            )
5568            .expect("delete current row");
5569        }
5570
5571        service
5572            .rebuild_operational_current(Some("connector_health"))
5573            .expect("rebuild current");
5574
5575        let conn = sqlite::open_connection(db.path()).expect("conn");
5576        let entry_count: i64 = conn
5577            .query_row(
5578                "SELECT count(*) FROM operational_secondary_index_entries \
5579                 WHERE collection_name = 'connector_health' AND subject_kind = 'current'",
5580                [],
5581                |row| row.get(0),
5582            )
5583            .expect("secondary index count after repair");
5584        assert_eq!(entry_count, 1);
5585    }
5586
5587    #[test]
5588    fn operational_current_semantics_and_rebuild_follow_mutation_order() {
5589        let (db, service) = setup();
5590        {
5591            let conn = sqlite::open_connection(db.path()).expect("conn");
5592            conn.execute(
5593                "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
5594                 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
5595                [],
5596            )
5597            .expect("seed collection");
5598            conn.execute(
5599                "INSERT INTO operational_mutations \
5600                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5601                 VALUES ('m3', 'connector_health', 'gmail', 'put', '{\"status\":\"old\"}', 'src-1', 100, 1)",
5602                [],
5603            )
5604            .expect("seed first put");
5605            conn.execute(
5606                "INSERT INTO operational_mutations \
5607                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5608                 VALUES ('m2', 'connector_health', 'gmail', 'delete', '', 'src-2', 100, 2)",
5609                [],
5610            )
5611            .expect("seed delete");
5612            conn.execute(
5613                "INSERT INTO operational_mutations \
5614                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5615                 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"new\"}', 'src-3', 100, 3)",
5616                [],
5617            )
5618            .expect("seed final put");
5619            conn.execute(
5620                "INSERT INTO operational_current \
5621                 (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
5622                 VALUES ('connector_health', 'gmail', '{\"status\":\"new\"}', 100, 'm1')",
5623                [],
5624            )
5625            .expect("seed current");
5626        }
5627
5628        let before = service.check_semantics().expect("semantics before rebuild");
5629        assert_eq!(before.missing_operational_current_rows, 0);
5630        assert_eq!(before.stale_operational_current_rows, 0);
5631
5632        {
5633            let conn = sqlite::open_connection(db.path()).expect("conn");
5634            conn.execute(
5635                "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5636                [],
5637            )
5638            .expect("delete current row");
5639        }
5640
5641        let missing = service.check_semantics().expect("semantics after delete");
5642        assert_eq!(missing.missing_operational_current_rows, 1);
5643        assert_eq!(missing.stale_operational_current_rows, 0);
5644
5645        service
5646            .rebuild_operational_current(Some("connector_health"))
5647            .expect("rebuild current");
5648
5649        let after = service.check_semantics().expect("semantics after rebuild");
5650        assert_eq!(after.missing_operational_current_rows, 0);
5651        assert_eq!(after.stale_operational_current_rows, 0);
5652
5653        let conn = sqlite::open_connection(db.path()).expect("conn");
5654        let payload: String = conn
5655            .query_row(
5656                "SELECT payload_json FROM operational_current \
5657                 WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5658                [],
5659                |row| row.get(0),
5660            )
5661            .expect("restored payload");
5662        assert_eq!(payload, r#"{"status":"new"}"#);
5663    }
5664
5665    #[test]
5666    fn disable_operational_collection_sets_disabled_at_and_emits_provenance() {
5667        let (db, service) = setup();
5668        service
5669            .register_operational_collection(&OperationalRegisterRequest {
5670                name: "audit_log".to_owned(),
5671                kind: OperationalCollectionKind::AppendOnlyLog,
5672                schema_json: "{}".to_owned(),
5673                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
5674                filter_fields_json: "[]".to_owned(),
5675                validation_json: String::new(),
5676                secondary_indexes_json: "[]".to_owned(),
5677                format_version: 1,
5678            })
5679            .expect("register collection");
5680
5681        let record = service
5682            .disable_operational_collection("audit_log")
5683            .expect("disable collection");
5684        assert_eq!(record.name, "audit_log");
5685        assert!(record.disabled_at.is_some());
5686
5687        let disabled_at = record.disabled_at.expect("disabled_at");
5688        let described = service
5689            .describe_operational_collection("audit_log")
5690            .expect("describe collection")
5691            .expect("collection exists");
5692        assert_eq!(described.disabled_at, Some(disabled_at));
5693
5694        let writer = crate::WriterActor::start(
5695            db.path(),
5696            Arc::new(SchemaManager::new()),
5697            crate::ProvenanceMode::Warn,
5698            Arc::new(crate::TelemetryCounters::default()),
5699        )
5700        .expect("writer");
5701        let error = writer
5702            .submit(crate::WriteRequest {
5703                label: "disabled-operational".to_owned(),
5704                nodes: vec![],
5705                node_retires: vec![],
5706                edges: vec![],
5707                edge_retires: vec![],
5708                chunks: vec![],
5709                runs: vec![],
5710                steps: vec![],
5711                actions: vec![],
5712                optional_backfills: vec![],
5713                vec_inserts: vec![],
5714                operational_writes: vec![crate::OperationalWrite::Append {
5715                    collection: "audit_log".to_owned(),
5716                    record_key: "evt-1".to_owned(),
5717                    payload_json: r#"{"type":"sync"}"#.to_owned(),
5718                    source_ref: Some("src-1".to_owned()),
5719                }],
5720            })
5721            .expect_err("disabled collection should reject writes");
5722        assert!(matches!(error, EngineError::InvalidWrite(_)));
5723        assert!(error.to_string().contains("is disabled"));
5724
5725        let conn = sqlite::open_connection(db.path()).expect("conn");
5726        let provenance_count: i64 = conn
5727            .query_row(
5728                "SELECT count(*) FROM provenance_events \
5729                 WHERE event_type = 'operational_collection_disabled' AND subject = 'audit_log'",
5730                [],
5731                |row| row.get(0),
5732            )
5733            .expect("provenance count");
5734        assert_eq!(provenance_count, 1);
5735    }
5736
5737    #[test]
5738    fn purge_operational_collection_deletes_append_only_rows_before_cutoff() {
5739        let (db, service) = setup();
5740        {
5741            let conn = sqlite::open_connection(db.path()).expect("conn");
5742            conn.execute(
5743                "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
5744                 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_all\"}', 1, 100)",
5745                [],
5746            )
5747            .expect("seed collection");
5748            conn.execute(
5749                "INSERT INTO operational_mutations \
5750                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5751                 VALUES ('evt-1', 'audit_log', 'evt-1', 'append', '{\"seq\":1}', 'src-1', 100, 1)",
5752                [],
5753            )
5754            .expect("seed event 1");
5755            conn.execute(
5756                "INSERT INTO operational_mutations \
5757                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5758                 VALUES ('evt-2', 'audit_log', 'evt-2', 'append', '{\"seq\":2}', 'src-2', 200, 2)",
5759                [],
5760            )
5761            .expect("seed event 2");
5762            conn.execute(
5763                "INSERT INTO operational_mutations \
5764                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5765                 VALUES ('evt-3', 'audit_log', 'evt-3', 'append', '{\"seq\":3}', 'src-3', 300, 3)",
5766                [],
5767            )
5768            .expect("seed event 3");
5769        }
5770
5771        let report = service
5772            .purge_operational_collection("audit_log", 250)
5773            .expect("purge collection");
5774        assert_eq!(report.collection_name, "audit_log");
5775        assert_eq!(report.deleted_mutations, 2);
5776        assert_eq!(report.before_timestamp, 250);
5777
5778        let conn = sqlite::open_connection(db.path()).expect("conn");
5779        let remaining: Vec<String> = {
5780            let mut stmt = conn
5781                .prepare(
5782                    "SELECT id FROM operational_mutations \
5783                     WHERE collection_name = 'audit_log' ORDER BY mutation_order",
5784                )
5785                .expect("stmt");
5786            stmt.query_map([], |row| row.get(0))
5787                .expect("rows")
5788                .collect::<Result<_, _>>()
5789                .expect("collect")
5790        };
5791        assert_eq!(remaining, vec!["evt-3".to_owned()]);
5792        let provenance_count: i64 = conn
5793            .query_row(
5794                "SELECT count(*) FROM provenance_events \
5795                 WHERE event_type = 'operational_collection_purged' AND subject = 'audit_log'",
5796                [],
5797                |row| row.get(0),
5798            )
5799            .expect("provenance count");
5800        assert_eq!(provenance_count, 1);
5801    }
5802
5803    #[test]
5804    fn compact_operational_collection_dry_run_reports_without_mutation() {
5805        let (db, service) = setup();
5806        {
5807            let conn = sqlite::open_connection(db.path()).expect("conn");
5808            conn.execute(
5809                "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
5810                 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
5811                [],
5812            )
5813            .expect("seed collection");
5814            for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
5815                conn.execute(
5816                    "INSERT INTO operational_mutations \
5817                     (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5818                     VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
5819                    rusqlite::params![
5820                        format!("evt-{index}"),
5821                        format!("{{\"seq\":{index}}}"),
5822                        created_at,
5823                        index,
5824                    ],
5825                )
5826                .expect("seed event");
5827            }
5828        }
5829
5830        let report = service
5831            .compact_operational_collection("audit_log", true)
5832            .expect("compact collection");
5833        assert_eq!(report.collection_name, "audit_log");
5834        assert_eq!(report.deleted_mutations, 1);
5835        assert!(report.dry_run);
5836        assert_eq!(report.before_timestamp, None);
5837
5838        let conn = sqlite::open_connection(db.path()).expect("conn");
5839        let remaining_count: i64 = conn
5840            .query_row(
5841                "SELECT count(*) FROM operational_mutations WHERE collection_name = 'audit_log'",
5842                [],
5843                |row| row.get(0),
5844            )
5845            .expect("remaining count");
5846        assert_eq!(remaining_count, 3);
5847        let provenance_count: i64 = conn
5848            .query_row(
5849                "SELECT count(*) FROM provenance_events \
5850                 WHERE event_type = 'operational_collection_compacted' AND subject = 'audit_log'",
5851                [],
5852                |row| row.get(0),
5853            )
5854            .expect("provenance count");
5855        assert_eq!(provenance_count, 0);
5856    }
5857
5858    #[test]
5859    fn compact_operational_collection_keep_last_deletes_oldest_rows() {
5860        let (db, service) = setup();
5861        {
5862            let conn = sqlite::open_connection(db.path()).expect("conn");
5863            conn.execute(
5864                "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
5865                 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
5866                [],
5867            )
5868            .expect("seed collection");
5869            for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
5870                conn.execute(
5871                    "INSERT INTO operational_mutations \
5872                     (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5873                     VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
5874                    rusqlite::params![
5875                        format!("evt-{index}"),
5876                        format!("{{\"seq\":{index}}}"),
5877                        created_at,
5878                        index,
5879                    ],
5880                )
5881                .expect("seed event");
5882            }
5883        }
5884
5885        let report = service
5886            .compact_operational_collection("audit_log", false)
5887            .expect("compact collection");
5888        assert_eq!(report.deleted_mutations, 1);
5889        assert!(!report.dry_run);
5890
5891        let conn = sqlite::open_connection(db.path()).expect("conn");
5892        let remaining: Vec<String> = {
5893            let mut stmt = conn
5894                .prepare(
5895                    "SELECT id FROM operational_mutations \
5896                     WHERE collection_name = 'audit_log' ORDER BY mutation_order",
5897                )
5898                .expect("stmt");
5899            stmt.query_map([], |row| row.get(0))
5900                .expect("rows")
5901                .collect::<Result<_, _>>()
5902                .expect("collect")
5903        };
5904        assert_eq!(remaining, vec!["evt-2".to_owned(), "evt-3".to_owned()]);
5905        let provenance_count: i64 = conn
5906            .query_row(
5907                "SELECT count(*) FROM provenance_events \
5908                 WHERE event_type = 'operational_collection_compacted' AND subject = 'audit_log'",
5909                [],
5910                |row| row.get(0),
5911            )
5912            .expect("provenance count");
5913        assert_eq!(provenance_count, 1);
5914    }
5915
5916    #[test]
5917    fn plan_and_run_operational_retention_keep_last() {
5918        let (db, service) = setup();
5919        {
5920            let conn = sqlite::open_connection(db.path()).expect("conn");
5921            conn.execute(
5922                "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
5923                 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
5924                [],
5925            )
5926            .expect("seed collection");
5927            for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
5928                conn.execute(
5929                    "INSERT INTO operational_mutations \
5930                     (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5931                     VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
5932                    rusqlite::params![
5933                        format!("evt-{index}"),
5934                        format!("{{\"seq\":{index}}}"),
5935                        created_at,
5936                        index,
5937                    ],
5938                )
5939                .expect("seed event");
5940            }
5941        }
5942
5943        let plan = service
5944            .plan_operational_retention(1_000, None, Some(10))
5945            .expect("plan retention");
5946        assert_eq!(plan.collections_examined, 1);
5947        assert_eq!(plan.items[0].collection_name, "audit_log");
5948        assert_eq!(
5949            plan.items[0].action_kind,
5950            crate::operational::OperationalRetentionActionKind::KeepLast
5951        );
5952        assert_eq!(plan.items[0].candidate_deletions, 1);
5953        assert_eq!(plan.items[0].max_rows, Some(2));
5954        assert_eq!(plan.items[0].last_run_at, None);
5955
5956        let dry_run = service
5957            .run_operational_retention(1_000, None, Some(10), true)
5958            .expect("dry-run retention");
5959        assert!(dry_run.dry_run);
5960        assert_eq!(dry_run.collections_acted_on, 1);
5961        assert_eq!(dry_run.items[0].deleted_mutations, 1);
5962        assert_eq!(dry_run.items[0].rows_remaining, 2);
5963
5964        let conn = sqlite::open_connection(db.path()).expect("conn");
5965        let remaining_count: i64 = conn
5966            .query_row(
5967                "SELECT count(*) FROM operational_mutations WHERE collection_name = 'audit_log'",
5968                [],
5969                |row| row.get(0),
5970            )
5971            .expect("remaining count after dry run");
5972        assert_eq!(remaining_count, 3);
5973        let retention_run_count: i64 = conn
5974            .query_row(
5975                "SELECT count(*) FROM operational_retention_runs WHERE collection_name = 'audit_log'",
5976                [],
5977                |row| row.get(0),
5978            )
5979            .expect("retention run count");
5980        assert_eq!(retention_run_count, 0);
5981        drop(conn);
5982
5983        let executed = service
5984            .run_operational_retention(1_000, None, Some(10), false)
5985            .expect("execute retention");
5986        assert_eq!(executed.collections_acted_on, 1);
5987        assert_eq!(executed.items[0].deleted_mutations, 1);
5988        assert_eq!(executed.items[0].rows_remaining, 2);
5989
5990        let conn = sqlite::open_connection(db.path()).expect("conn");
5991        let remaining: Vec<String> = {
5992            let mut stmt = conn
5993                .prepare(
5994                    "SELECT id FROM operational_mutations \
5995                     WHERE collection_name = 'audit_log' ORDER BY mutation_order",
5996                )
5997                .expect("stmt");
5998            stmt.query_map([], |row| row.get(0))
5999                .expect("rows")
6000                .collect::<Result<_, _>>()
6001                .expect("collect")
6002        };
6003        assert_eq!(remaining, vec!["evt-2".to_owned(), "evt-3".to_owned()]);
6004        let last_run_at: i64 = conn
6005            .query_row(
6006                "SELECT executed_at FROM operational_retention_runs \
6007                 WHERE collection_name = 'audit_log' ORDER BY executed_at DESC LIMIT 1",
6008                [],
6009                |row| row.get(0),
6010            )
6011            .expect("last run at");
6012        assert_eq!(last_run_at, 1_000);
6013    }
6014
6015    #[test]
6016    fn dry_run_operational_retention_does_not_mark_noop_collection_as_acted_on() {
6017        let (db, service) = setup();
6018        let conn = sqlite::open_connection(db.path()).expect("conn");
6019        conn.execute(
6020            "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6021             VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6022            [],
6023        )
6024        .expect("seed collection");
6025        for (index, created_at) in [(1_i64, 100_i64), (2, 200)] {
6026            conn.execute(
6027                "INSERT INTO operational_mutations \
6028                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6029                 VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6030                rusqlite::params![
6031                    format!("evt-{index}"),
6032                    format!("{{\"seq\":{index}}}"),
6033                    created_at,
6034                    index,
6035                ],
6036            )
6037            .expect("seed event");
6038        }
6039        drop(conn);
6040
6041        let dry_run = service
6042            .run_operational_retention(1_000, None, Some(10), true)
6043            .expect("dry-run retention");
6044        assert!(dry_run.dry_run);
6045        assert_eq!(dry_run.collections_acted_on, 0);
6046        assert_eq!(dry_run.items[0].deleted_mutations, 0);
6047        assert_eq!(dry_run.items[0].rows_remaining, 2);
6048    }
6049
6050    #[test]
6051    fn compact_operational_collection_rejects_latest_state() {
6052        let (_db, service) = setup();
6053        service
6054            .register_operational_collection(&OperationalRegisterRequest {
6055                name: "connector_health".to_owned(),
6056                kind: OperationalCollectionKind::LatestState,
6057                schema_json: "{}".to_owned(),
6058                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6059                filter_fields_json: "[]".to_owned(),
6060                validation_json: String::new(),
6061                secondary_indexes_json: "[]".to_owned(),
6062                format_version: 1,
6063            })
6064            .expect("register collection");
6065
6066        let error = service
6067            .compact_operational_collection("connector_health", false)
6068            .expect_err("latest_state compaction should be rejected");
6069        assert!(matches!(error, EngineError::InvalidWrite(_)));
6070        assert!(error.to_string().contains("append_only_log"));
6071    }
6072
6073    #[test]
6074    fn register_operational_collection_persists_filter_fields_json() {
6075        let (_db, service) = setup();
6076
6077        let record = service
6078            .register_operational_collection(&OperationalRegisterRequest {
6079                name: "audit_log".to_owned(),
6080                kind: OperationalCollectionKind::AppendOnlyLog,
6081                schema_json: "{}".to_owned(),
6082                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6083                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
6084                validation_json: String::new(),
6085                secondary_indexes_json: "[]".to_owned(),
6086                format_version: 1,
6087            })
6088            .expect("register collection");
6089
6090        assert_eq!(
6091            record.filter_fields_json,
6092            r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#
6093        );
6094    }
6095
6096    #[test]
6097    fn read_operational_collection_filters_append_only_rows_by_declared_fields() {
6098        let (db, service) = setup();
6099        service
6100            .register_operational_collection(&OperationalRegisterRequest {
6101                name: "audit_log".to_owned(),
6102                kind: OperationalCollectionKind::AppendOnlyLog,
6103                schema_json: "{}".to_owned(),
6104                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6105                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"seq","type":"integer","modes":["exact","range"]},{"name":"ts","type":"timestamp","modes":["exact","range"]}]"#.to_owned(),
6106                validation_json: String::new(),
6107                secondary_indexes_json: "[]".to_owned(),
6108                format_version: 1,
6109            })
6110            .expect("register collection");
6111        {
6112            let writer = crate::WriterActor::start(
6113                db.path(),
6114                Arc::new(SchemaManager::new()),
6115                crate::ProvenanceMode::Warn,
6116                Arc::new(crate::TelemetryCounters::default()),
6117            )
6118            .expect("writer");
6119            writer
6120                .submit(crate::WriteRequest {
6121                    label: "operational".to_owned(),
6122                    nodes: vec![],
6123                    node_retires: vec![],
6124                    edges: vec![],
6125                    edge_retires: vec![],
6126                    chunks: vec![],
6127                    runs: vec![],
6128                    steps: vec![],
6129                    actions: vec![],
6130                    optional_backfills: vec![],
6131                    vec_inserts: vec![],
6132                    operational_writes: vec![
6133                        crate::OperationalWrite::Append {
6134                            collection: "audit_log".to_owned(),
6135                            record_key: "evt-1".to_owned(),
6136                            payload_json: r#"{"actor":"alice","seq":1,"ts":100}"#.to_owned(),
6137                            source_ref: Some("src-1".to_owned()),
6138                        },
6139                        crate::OperationalWrite::Append {
6140                            collection: "audit_log".to_owned(),
6141                            record_key: "evt-2".to_owned(),
6142                            payload_json: r#"{"actor":"alice-admin","seq":2,"ts":200}"#.to_owned(),
6143                            source_ref: Some("src-2".to_owned()),
6144                        },
6145                        crate::OperationalWrite::Append {
6146                            collection: "audit_log".to_owned(),
6147                            record_key: "evt-3".to_owned(),
6148                            payload_json: r#"{"actor":"bob","seq":3,"ts":300}"#.to_owned(),
6149                            source_ref: Some("src-3".to_owned()),
6150                        },
6151                    ],
6152                })
6153                .expect("write");
6154        }
6155
6156        let report = service
6157            .read_operational_collection(&crate::operational::OperationalReadRequest {
6158                collection_name: "audit_log".to_owned(),
6159                filters: vec![
6160                    crate::operational::OperationalFilterClause::Prefix {
6161                        field: "actor".to_owned(),
6162                        value: "alice".to_owned(),
6163                    },
6164                    crate::operational::OperationalFilterClause::Range {
6165                        field: "ts".to_owned(),
6166                        lower: Some(150),
6167                        upper: Some(250),
6168                    },
6169                ],
6170                limit: Some(10),
6171            })
6172            .expect("filtered read");
6173
6174        assert_eq!(report.collection_name, "audit_log");
6175        assert_eq!(report.row_count, 1);
6176        assert!(!report.was_limited);
6177        assert_eq!(report.rows.len(), 1);
6178        assert_eq!(report.rows[0].record_key, "evt-2");
6179        assert_eq!(
6180            report.rows[0].payload_json,
6181            r#"{"actor":"alice-admin","seq":2,"ts":200}"#
6182        );
6183    }
6184
6185    #[test]
6186    fn read_operational_collection_uses_secondary_index_when_filter_values_are_missing() {
6187        let (db, service) = setup();
6188        service
6189            .register_operational_collection(&OperationalRegisterRequest {
6190                name: "audit_log".to_owned(),
6191                kind: OperationalCollectionKind::AppendOnlyLog,
6192                schema_json: "{}".to_owned(),
6193                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6194                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
6195                validation_json: String::new(),
6196                secondary_indexes_json: r#"[{"name":"actor_ts","kind":"append_only_field_time","field":"actor","value_type":"string","time_field":"ts"}]"#.to_owned(),
6197                format_version: 1,
6198            })
6199            .expect("register collection");
6200        {
6201            let writer = crate::WriterActor::start(
6202                db.path(),
6203                Arc::new(SchemaManager::new()),
6204                crate::ProvenanceMode::Warn,
6205                Arc::new(crate::TelemetryCounters::default()),
6206            )
6207            .expect("writer");
6208            writer
6209                .submit(crate::WriteRequest {
6210                    label: "operational".to_owned(),
6211                    nodes: vec![],
6212                    node_retires: vec![],
6213                    edges: vec![],
6214                    edge_retires: vec![],
6215                    chunks: vec![],
6216                    runs: vec![],
6217                    steps: vec![],
6218                    actions: vec![],
6219                    optional_backfills: vec![],
6220                    vec_inserts: vec![],
6221                    operational_writes: vec![
6222                        crate::OperationalWrite::Append {
6223                            collection: "audit_log".to_owned(),
6224                            record_key: "evt-1".to_owned(),
6225                            payload_json: r#"{"actor":"alice","ts":100}"#.to_owned(),
6226                            source_ref: Some("src-1".to_owned()),
6227                        },
6228                        crate::OperationalWrite::Append {
6229                            collection: "audit_log".to_owned(),
6230                            record_key: "evt-2".to_owned(),
6231                            payload_json: r#"{"actor":"alice-admin","ts":200}"#.to_owned(),
6232                            source_ref: Some("src-2".to_owned()),
6233                        },
6234                    ],
6235                })
6236                .expect("write");
6237        }
6238        let conn = sqlite::open_connection(db.path()).expect("conn");
6239        conn.execute(
6240            "DELETE FROM operational_filter_values WHERE collection_name = 'audit_log'",
6241            [],
6242        )
6243        .expect("clear filter values");
6244        drop(conn);
6245
6246        let report = service
6247            .read_operational_collection(&crate::operational::OperationalReadRequest {
6248                collection_name: "audit_log".to_owned(),
6249                filters: vec![
6250                    crate::operational::OperationalFilterClause::Prefix {
6251                        field: "actor".to_owned(),
6252                        value: "alice".to_owned(),
6253                    },
6254                    crate::operational::OperationalFilterClause::Range {
6255                        field: "ts".to_owned(),
6256                        lower: Some(150),
6257                        upper: Some(250),
6258                    },
6259                ],
6260                limit: Some(10),
6261            })
6262            .expect("secondary-index read");
6263
6264        assert_eq!(report.row_count, 1);
6265        assert_eq!(report.rows[0].record_key, "evt-2");
6266    }
6267
6268    #[test]
6269    fn read_operational_collection_rejects_undeclared_fields_and_latest_state_collections() {
6270        let (_db, service) = setup();
6271        service
6272            .register_operational_collection(&OperationalRegisterRequest {
6273                name: "connector_health".to_owned(),
6274                kind: OperationalCollectionKind::LatestState,
6275                schema_json: "{}".to_owned(),
6276                retention_json: "{}".to_owned(),
6277                filter_fields_json: r#"[{"name":"status","type":"string","modes":["exact"]}]"#
6278                    .to_owned(),
6279                validation_json: String::new(),
6280                secondary_indexes_json: "[]".to_owned(),
6281                format_version: 1,
6282            })
6283            .expect("register collection");
6284
6285        let latest_state_error = service
6286            .read_operational_collection(&crate::operational::OperationalReadRequest {
6287                collection_name: "connector_health".to_owned(),
6288                filters: vec![crate::operational::OperationalFilterClause::Exact {
6289                    field: "status".to_owned(),
6290                    value: crate::operational::OperationalFilterValue::String("ok".to_owned()),
6291                }],
6292                limit: Some(10),
6293            })
6294            .expect_err("latest_state filtered reads should be rejected");
6295        assert!(latest_state_error.to_string().contains("append_only_log"));
6296
6297        service
6298            .register_operational_collection(&OperationalRegisterRequest {
6299                name: "audit_log".to_owned(),
6300                kind: OperationalCollectionKind::AppendOnlyLog,
6301                schema_json: "{}".to_owned(),
6302                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6303                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact"]}]"#
6304                    .to_owned(),
6305                validation_json: String::new(),
6306                secondary_indexes_json: "[]".to_owned(),
6307                format_version: 1,
6308            })
6309            .expect("register append-only collection");
6310
6311        let undeclared_error = service
6312            .read_operational_collection(&crate::operational::OperationalReadRequest {
6313                collection_name: "audit_log".to_owned(),
6314                filters: vec![crate::operational::OperationalFilterClause::Exact {
6315                    field: "missing".to_owned(),
6316                    value: crate::operational::OperationalFilterValue::String("x".to_owned()),
6317                }],
6318                limit: Some(10),
6319            })
6320            .expect_err("undeclared field should be rejected");
6321        assert!(undeclared_error.to_string().contains("undeclared"));
6322    }
6323
6324    #[test]
6325    fn read_operational_collection_applies_limit_and_reports_truncation() {
6326        let (db, service) = setup();
6327        service
6328            .register_operational_collection(&OperationalRegisterRequest {
6329                name: "audit_log".to_owned(),
6330                kind: OperationalCollectionKind::AppendOnlyLog,
6331                schema_json: "{}".to_owned(),
6332                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6333                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["prefix"]}]"#
6334                    .to_owned(),
6335                validation_json: String::new(),
6336                secondary_indexes_json: "[]".to_owned(),
6337                format_version: 1,
6338            })
6339            .expect("register collection");
6340        {
6341            let writer = crate::WriterActor::start(
6342                db.path(),
6343                Arc::new(SchemaManager::new()),
6344                crate::ProvenanceMode::Warn,
6345                Arc::new(crate::TelemetryCounters::default()),
6346            )
6347            .expect("writer");
6348            writer
6349                .submit(crate::WriteRequest {
6350                    label: "operational".to_owned(),
6351                    nodes: vec![],
6352                    node_retires: vec![],
6353                    edges: vec![],
6354                    edge_retires: vec![],
6355                    chunks: vec![],
6356                    runs: vec![],
6357                    steps: vec![],
6358                    actions: vec![],
6359                    optional_backfills: vec![],
6360                    vec_inserts: vec![],
6361                    operational_writes: vec![
6362                        crate::OperationalWrite::Append {
6363                            collection: "audit_log".to_owned(),
6364                            record_key: "evt-1".to_owned(),
6365                            payload_json: r#"{"actor":"alice-1"}"#.to_owned(),
6366                            source_ref: Some("src-1".to_owned()),
6367                        },
6368                        crate::OperationalWrite::Append {
6369                            collection: "audit_log".to_owned(),
6370                            record_key: "evt-2".to_owned(),
6371                            payload_json: r#"{"actor":"alice-2"}"#.to_owned(),
6372                            source_ref: Some("src-2".to_owned()),
6373                        },
6374                    ],
6375                })
6376                .expect("write");
6377        }
6378
6379        let report = service
6380            .read_operational_collection(&crate::operational::OperationalReadRequest {
6381                collection_name: "audit_log".to_owned(),
6382                filters: vec![crate::operational::OperationalFilterClause::Prefix {
6383                    field: "actor".to_owned(),
6384                    value: "alice".to_owned(),
6385                }],
6386                limit: Some(1),
6387            })
6388            .expect("limited read");
6389
6390        assert_eq!(report.row_count, 1);
6391        assert_eq!(report.applied_limit, 1);
6392        assert!(report.was_limited);
6393        assert_eq!(report.rows[0].record_key, "evt-2");
6394    }
6395
6396    #[test]
6397    fn preexisting_operational_collection_can_gain_filter_contract_after_upgrade() {
6398        let db = NamedTempFile::new().expect("temp db");
6399        let conn = sqlite::open_connection(db.path()).expect("conn");
6400        conn.execute_batch(
6401            r#"
6402            CREATE TABLE operational_collections (
6403                name TEXT PRIMARY KEY,
6404                kind TEXT NOT NULL,
6405                schema_json TEXT NOT NULL,
6406                retention_json TEXT NOT NULL,
6407                format_version INTEGER NOT NULL DEFAULT 1,
6408                created_at INTEGER NOT NULL DEFAULT 100,
6409                disabled_at INTEGER
6410            );
6411            CREATE TABLE operational_mutations (
6412                id TEXT PRIMARY KEY,
6413                collection_name TEXT NOT NULL,
6414                record_key TEXT NOT NULL,
6415                op_kind TEXT NOT NULL,
6416                payload_json TEXT NOT NULL,
6417                source_ref TEXT,
6418                created_at INTEGER NOT NULL DEFAULT 100,
6419                mutation_order INTEGER NOT NULL DEFAULT 1
6420            );
6421            INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at)
6422            VALUES ('audit_log', 'append_only_log', '{}', '{"mode":"keep_all"}', 1, 100);
6423            INSERT INTO operational_mutations
6424                (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order)
6425            VALUES
6426                ('evt-1', 'audit_log', 'evt-1', 'append', '{"actor":"alice","ts":0}', 'src-1', 100, 1);
6427            "#,
6428        )
6429        .expect("seed pre-v10 schema");
6430        drop(conn);
6431
6432        let service = AdminService::new(db.path(), Arc::new(SchemaManager::new()));
6433        let pre_update = service
6434            .read_operational_collection(&crate::operational::OperationalReadRequest {
6435                collection_name: "audit_log".to_owned(),
6436                filters: vec![crate::operational::OperationalFilterClause::Exact {
6437                    field: "actor".to_owned(),
6438                    value: crate::operational::OperationalFilterValue::String("alice".to_owned()),
6439                }],
6440                limit: Some(10),
6441            })
6442            .expect_err("read should reject undeclared fields before migration update");
6443        assert!(pre_update.to_string().contains("undeclared"));
6444
6445        let updated = service
6446            .update_operational_collection_filters(
6447                "audit_log",
6448                r#"[{"name":"actor","type":"string","modes":["exact"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#,
6449            )
6450            .expect("update filter contract");
6451        assert!(updated.filter_fields_json.contains("\"actor\""));
6452
6453        let report = service
6454            .read_operational_collection(&crate::operational::OperationalReadRequest {
6455                collection_name: "audit_log".to_owned(),
6456                filters: vec![crate::operational::OperationalFilterClause::Range {
6457                    field: "ts".to_owned(),
6458                    lower: Some(0),
6459                    upper: Some(0),
6460                }],
6461                limit: Some(10),
6462            })
6463            .expect("read after explicit filter update");
6464        assert_eq!(report.row_count, 1);
6465        assert_eq!(report.rows[0].record_key, "evt-1");
6466    }
6467
6468    #[cfg(feature = "sqlite-vec")]
6469    #[test]
6470    fn check_semantics_detects_stale_vec_rows() {
6471        use crate::sqlite::open_connection_with_vec;
6472
6473        let db = NamedTempFile::new().expect("temp file");
6474        let schema = Arc::new(SchemaManager::new());
6475        {
6476            let conn = open_connection_with_vec(db.path()).expect("vec conn");
6477            schema.bootstrap(&conn).expect("bootstrap");
6478            schema
6479                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 3)
6480                .expect("vec profile");
6481            // Insert a vec row whose chunk does not exist.
6482            let bytes: Vec<u8> = [0.1f32, 0.2f32, 0.3f32]
6483                .iter()
6484                .flat_map(|f| f.to_le_bytes())
6485                .collect();
6486            conn.execute(
6487                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('ghost-chunk', ?1)",
6488                rusqlite::params![bytes],
6489            )
6490            .expect("insert stale vec row");
6491        }
6492        let service = AdminService::new(db.path(), Arc::clone(&schema));
6493        let report = service.check_semantics().expect("semantics check");
6494        assert_eq!(report.stale_vec_rows, 1);
6495        assert!(
6496            report.warnings.iter().any(|w| w.contains("stale vec")),
6497            "warning must mention stale vec"
6498        );
6499    }
6500
6501    #[cfg(feature = "sqlite-vec")]
6502    #[test]
6503    fn restore_vector_profiles_recreates_vec_table_from_metadata() {
6504        let db = NamedTempFile::new().expect("temp file");
6505        let schema = Arc::new(SchemaManager::new());
6506        {
6507            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6508            schema.bootstrap(&conn).expect("bootstrap");
6509            conn.execute(
6510                "INSERT INTO vector_profiles (profile, table_name, dimension, enabled) \
6511                 VALUES ('default', 'vec_nodes_active', 3, 1)",
6512                [],
6513            )
6514            .expect("insert vector profile");
6515        }
6516
6517        let service = AdminService::new(db.path(), Arc::clone(&schema));
6518        let report = service
6519            .restore_vector_profiles()
6520            .expect("restore vector profiles");
6521        assert_eq!(
6522            report.targets,
6523            vec![crate::projection::ProjectionTarget::Vec]
6524        );
6525        assert_eq!(report.rebuilt_rows, 1);
6526
6527        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6528        let count: i64 = conn
6529            .query_row(
6530                "SELECT count(*) FROM sqlite_schema WHERE name = 'vec_nodes_active'",
6531                [],
6532                |row| row.get(0),
6533            )
6534            .expect("vec schema count");
6535        assert_eq!(count, 1, "vec table should exist after restore");
6536    }
6537
6538    #[cfg(feature = "sqlite-vec")]
6539    #[test]
6540    fn load_vector_regeneration_config_supports_json_and_toml() {
6541        let dir = tempfile::tempdir().expect("temp dir");
6542        let json_path = dir.path().join("regen.json");
6543        let toml_path = dir.path().join("regen.toml");
6544
6545        let config = VectorRegenerationConfig {
6546            profile: "default".to_owned(),
6547            table_name: "vec_nodes_active".to_owned(),
6548            model_identity: "model-a".to_owned(),
6549            model_version: "1.0".to_owned(),
6550            dimension: 4,
6551            normalization_policy: "l2".to_owned(),
6552            chunking_policy: "per_chunk".to_owned(),
6553            preprocessing_policy: "trim".to_owned(),
6554            generator_command: vec!["/bin/echo".to_owned()],
6555        };
6556
6557        fs::write(&json_path, serde_json::to_string(&config).expect("json")).expect("write json");
6558        fs::write(&toml_path, toml::to_string(&config).expect("toml")).expect("write toml");
6559
6560        let parsed_json = load_vector_regeneration_config(&json_path).expect("json parse");
6561        let parsed_toml = load_vector_regeneration_config(&toml_path).expect("toml parse");
6562
6563        assert_eq!(parsed_json, config);
6564        assert_eq!(parsed_toml, config);
6565    }
6566
6567    #[cfg(all(not(feature = "sqlite-vec"), unix))]
6568    #[test]
6569    fn regenerate_vector_embeddings_unsupported_vec_capability_writes_request_and_failed_audit() {
6570        let db = NamedTempFile::new().expect("temp file");
6571        let schema = Arc::new(SchemaManager::new());
6572        let temp_dir = tempfile::tempdir().expect("temp dir");
6573        let script_path = temp_dir.path().join("vector-generator-no-vec.sh");
6574
6575        fs::write(
6576            &script_path,
6577            r#"#!/usr/bin/env bash
6578set -euo pipefail
6579python3 -c 'import json, sys
6580payload = json.load(sys.stdin)
6581embeddings = [{"chunk_id": chunk["chunk_id"], "embedding": [1.0, 0.0, 0.0, 0.0]} for chunk in payload["chunks"]]
6582json.dump({"embeddings": embeddings}, sys.stdout)'
6583"#,
6584        )
6585        .expect("write generator script");
6586        set_file_mode(&script_path, 0o755);
6587
6588        {
6589            let conn = sqlite::open_connection(db.path()).expect("connection");
6590            schema.bootstrap(&conn).expect("bootstrap");
6591            conn.execute(
6592                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
6593                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
6594                [],
6595            )
6596            .expect("insert node");
6597            conn.execute(
6598                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
6599                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
6600                [],
6601            )
6602            .expect("insert chunk");
6603        }
6604
6605        let service = AdminService::new(db.path(), Arc::clone(&schema));
6606        let error = service
6607            .regenerate_vector_embeddings(&VectorRegenerationConfig {
6608                profile: "default".to_owned(),
6609                table_name: "vec_nodes_active".to_owned(),
6610                model_identity: "test-model".to_owned(),
6611                model_version: "1.0.0".to_owned(),
6612                dimension: 4,
6613                normalization_policy: "l2".to_owned(),
6614                chunking_policy: "per_chunk".to_owned(),
6615                preprocessing_policy: "trim".to_owned(),
6616                generator_command: vec![script_path.to_string_lossy().to_string()],
6617            })
6618            .expect_err("sqlite-vec capability should be required");
6619
6620        assert!(error.to_string().contains("unsupported vec capability"));
6621
6622        let conn = sqlite::open_connection(db.path()).expect("connection");
6623        let request_count: i64 = conn
6624            .query_row(
6625                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_requested' AND subject = 'default'",
6626                [],
6627                |row| row.get(0),
6628            )
6629            .expect("request count");
6630        assert_eq!(request_count, 1);
6631        let failed_count: i64 = conn
6632            .query_row(
6633                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
6634                [],
6635                |row| row.get(0),
6636            )
6637            .expect("failed count");
6638        assert_eq!(failed_count, 1);
6639        let metadata_json: String = conn
6640            .query_row(
6641                "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
6642                [],
6643                |row| row.get(0),
6644            )
6645            .expect("failed metadata");
6646        assert!(metadata_json.contains("\"failure_class\":\"unsupported vec capability\""));
6647    }
6648
6649    #[cfg(feature = "sqlite-vec")]
6650    #[test]
6651    fn regenerate_vector_embeddings_rebuilds_embeddings_from_generator() {
6652        let db = NamedTempFile::new().expect("temp file");
6653        let schema = Arc::new(SchemaManager::new());
6654        let temp_dir = tempfile::tempdir().expect("temp dir");
6655        let script_path = temp_dir.path().join("vector-generator.sh");
6656
6657        fs::write(
6658            &script_path,
6659            r#"#!/usr/bin/env bash
6660set -euo pipefail
6661python3 -c 'import json, sys
6662payload = json.load(sys.stdin)
6663embeddings = []
6664for chunk in payload["chunks"]:
6665    text = chunk["text_content"].lower()
6666    if "budget" in text:
6667        embedding = [1.0, 0.0, 0.0, 0.0]
6668    else:
6669        embedding = [0.0, 1.0, 0.0, 0.0]
6670    embeddings.append({"chunk_id": chunk["chunk_id"], "embedding": embedding})
6671json.dump({"embeddings": embeddings}, sys.stdout)'
6672"#,
6673        )
6674        .expect("write generator script");
6675        set_file_mode(&script_path, 0o755);
6676
6677        {
6678            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6679            schema.bootstrap(&conn).expect("bootstrap");
6680            conn.execute(
6681                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
6682                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
6683                [],
6684            )
6685            .expect("insert node");
6686            conn.execute(
6687                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
6688                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
6689                [],
6690            )
6691            .expect("insert chunk 1");
6692            conn.execute(
6693                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
6694                 VALUES ('chunk-2', 'doc-1', 'travel plan', 101)",
6695                [],
6696            )
6697            .expect("insert chunk 2");
6698        }
6699
6700        let service = AdminService::new(db.path(), Arc::clone(&schema));
6701        let report = service
6702            .regenerate_vector_embeddings(&VectorRegenerationConfig {
6703                profile: "default".to_owned(),
6704                table_name: "vec_nodes_active".to_owned(),
6705                model_identity: "test-model".to_owned(),
6706                model_version: "1.0.0".to_owned(),
6707                dimension: 4,
6708                normalization_policy: "l2".to_owned(),
6709                chunking_policy: "per_chunk".to_owned(),
6710                preprocessing_policy: "trim".to_owned(),
6711                generator_command: vec![script_path.to_string_lossy().to_string()],
6712            })
6713            .expect("regenerate vectors");
6714
6715        assert_eq!(report.profile, "default");
6716        assert_eq!(report.table_name, "vec_nodes_active");
6717        assert_eq!(report.dimension, 4);
6718        assert_eq!(report.total_chunks, 2);
6719        assert_eq!(report.regenerated_rows, 2);
6720        assert!(report.contract_persisted);
6721
6722        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6723        let vec_count: i64 = conn
6724            .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
6725                row.get(0)
6726            })
6727            .expect("vec count");
6728        assert_eq!(vec_count, 2);
6729
6730        let contract_count: i64 = conn
6731            .query_row(
6732                "SELECT count(*) FROM vector_embedding_contracts WHERE profile = 'default'",
6733                [],
6734                |row| row.get(0),
6735            )
6736            .expect("contract count");
6737        assert_eq!(contract_count, 1);
6738        let applied_at: i64 = conn
6739            .query_row(
6740                "SELECT applied_at FROM vector_embedding_contracts WHERE profile = 'default'",
6741                [],
6742                |row| row.get(0),
6743            )
6744            .expect("applied_at");
6745        assert!(applied_at > 0);
6746        let snapshot_hash: String = conn
6747            .query_row(
6748                "SELECT snapshot_hash FROM vector_embedding_contracts WHERE profile = 'default'",
6749                [],
6750                |row| row.get(0),
6751            )
6752            .expect("snapshot_hash");
6753        assert!(!snapshot_hash.is_empty());
6754        let contract_format_version: i64 = conn
6755            .query_row(
6756                "SELECT contract_format_version FROM vector_embedding_contracts WHERE profile = 'default'",
6757                [],
6758                |row| row.get(0),
6759            )
6760            .expect("contract_format_version");
6761        assert_eq!(contract_format_version, 1);
6762        let request_count: i64 = conn
6763            .query_row(
6764                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_requested' AND subject = 'default'",
6765                [],
6766                |row| row.get(0),
6767            )
6768            .expect("request audit count");
6769        assert_eq!(request_count, 1);
6770        let apply_count: i64 = conn
6771            .query_row(
6772                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_apply' AND subject = 'default'",
6773                [],
6774                |row| row.get(0),
6775            )
6776            .expect("apply audit count");
6777        assert_eq!(apply_count, 1);
6778        let apply_metadata: String = conn
6779            .query_row(
6780                "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_apply' AND subject = 'default'",
6781                [],
6782                |row| row.get(0),
6783            )
6784            .expect("apply metadata");
6785        assert!(apply_metadata.contains("\"profile\":\"default\""));
6786        assert!(apply_metadata.contains("\"snapshot_hash\":"));
6787    }
6788
6789    #[cfg(feature = "sqlite-vec")]
6790    #[test]
6791    fn regenerate_vector_embeddings_failure_leaves_contract_and_vec_rows_unchanged() {
6792        let db = NamedTempFile::new().expect("temp file");
6793        let schema = Arc::new(SchemaManager::new());
6794        let temp_dir = tempfile::tempdir().expect("temp dir");
6795        let script_path = temp_dir.path().join("vector-generator-fail.sh");
6796
6797        fs::write(
6798            &script_path,
6799            "#!/usr/bin/env bash\nset -euo pipefail\necho 'generator boom' >&2\nexit 17\n",
6800        )
6801        .expect("write failing script");
6802        set_file_mode(&script_path, 0o755);
6803
6804        {
6805            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6806            schema.bootstrap(&conn).expect("bootstrap");
6807            conn.execute(
6808                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
6809                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
6810                [],
6811            )
6812            .expect("insert node");
6813            conn.execute(
6814                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
6815                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
6816                [],
6817            )
6818            .expect("insert chunk");
6819            schema
6820                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
6821                .expect("ensure vec profile");
6822            conn.execute(
6823                r"
6824                INSERT INTO vector_embedding_contracts (
6825                    profile,
6826                    table_name,
6827                    model_identity,
6828                    model_version,
6829                    dimension,
6830                    normalization_policy,
6831                    chunking_policy,
6832                    preprocessing_policy,
6833                    generator_command_json,
6834                    applied_at,
6835                    snapshot_hash
6836                ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)
6837                ",
6838                rusqlite::params![
6839                    "default",
6840                    "vec_nodes_active",
6841                    "old-model",
6842                    "0.9.0",
6843                    4,
6844                    "l2",
6845                    "per_chunk",
6846                    "trim",
6847                    "[\"/bin/echo\"]",
6848                    111,
6849                    "old-snapshot"
6850                ],
6851            )
6852            .expect("seed contract");
6853            conn.execute(
6854                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
6855                [],
6856            )
6857            .expect("seed vec row");
6858        }
6859
6860        let service = AdminService::new(db.path(), Arc::clone(&schema));
6861        let error = service
6862            .regenerate_vector_embeddings_with_policy(
6863                &VectorRegenerationConfig {
6864                    profile: "default".to_owned(),
6865                    table_name: "vec_nodes_active".to_owned(),
6866                    model_identity: "new-model".to_owned(),
6867                    model_version: "1.0.0".to_owned(),
6868                    dimension: 4,
6869                    normalization_policy: "l2".to_owned(),
6870                    chunking_policy: "per_chunk".to_owned(),
6871                    preprocessing_policy: "trim".to_owned(),
6872                    generator_command: vec![script_path.to_string_lossy().to_string()],
6873                },
6874                &VectorGeneratorPolicy::default(),
6875            )
6876            .expect_err("generator should fail");
6877
6878        assert!(error.to_string().contains("generator nonzero exit"));
6879
6880        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6881        let model_identity: String = conn
6882            .query_row(
6883                "SELECT model_identity FROM vector_embedding_contracts WHERE profile = 'default'",
6884                [],
6885                |row| row.get(0),
6886            )
6887            .expect("model identity");
6888        assert_eq!(model_identity, "old-model");
6889        let snapshot_hash: String = conn
6890            .query_row(
6891                "SELECT snapshot_hash FROM vector_embedding_contracts WHERE profile = 'default'",
6892                [],
6893                |row| row.get(0),
6894            )
6895            .expect("snapshot hash");
6896        assert_eq!(snapshot_hash, "old-snapshot");
6897        let vec_count: i64 = conn
6898            .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
6899                row.get(0)
6900            })
6901            .expect("vec count");
6902        assert_eq!(vec_count, 1);
6903        let failure_count: i64 = conn
6904            .query_row(
6905                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
6906                [],
6907                |row| row.get(0),
6908            )
6909            .expect("failure count");
6910        assert_eq!(failure_count, 1);
6911        let failure_metadata: String = conn
6912            .query_row(
6913                "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
6914                [],
6915                |row| row.get(0),
6916            )
6917            .expect("failure metadata");
6918        assert!(failure_metadata.contains("\"failure_class\":\"generator nonzero exit\""));
6919    }
6920
6921    #[cfg(feature = "sqlite-vec")]
6922    #[test]
6923    fn regenerate_vector_embeddings_snapshot_drift_is_retryable_and_non_mutating() {
6924        let db = NamedTempFile::new().expect("temp file");
6925        let schema = Arc::new(SchemaManager::new());
6926        let temp_dir = tempfile::tempdir().expect("temp dir");
6927        let script_path = temp_dir.path().join("vector-generator-drift.sh");
6928        let db_path = db.path().to_string_lossy().to_string();
6929
6930        fs::write(
6931            &script_path,
6932            format!(
6933                r#"#!/usr/bin/env bash
6934set -euo pipefail
6935python3 -c 'import json, sqlite3, sys
6936payload = json.load(sys.stdin)
6937conn = sqlite3.connect({db_path:?})
6938conn.execute("INSERT INTO chunks (id, node_logical_id, text_content, created_at) VALUES (?, ?, ?, ?)", ("chunk-2", "doc-1", "late arriving text", 101))
6939conn.commit()
6940conn.close()
6941embeddings = [{{"chunk_id": chunk["chunk_id"], "embedding": [1.0, 0.0, 0.0, 0.0]}} for chunk in payload["chunks"]]
6942json.dump({{"embeddings": embeddings}}, sys.stdout)'
6943"#,
6944            ),
6945        )
6946        .expect("write drift script");
6947        set_file_mode(&script_path, 0o755);
6948
6949        {
6950            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6951            schema.bootstrap(&conn).expect("bootstrap");
6952            conn.execute(
6953                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
6954                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
6955                [],
6956            )
6957            .expect("insert node");
6958            conn.execute(
6959                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
6960                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
6961                [],
6962            )
6963            .expect("insert chunk");
6964            schema
6965                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
6966                .expect("ensure vec profile");
6967        }
6968
6969        let service = AdminService::new(db.path(), Arc::clone(&schema));
6970        let error = service
6971            .regenerate_vector_embeddings_with_policy(
6972                &VectorRegenerationConfig {
6973                    profile: "default".to_owned(),
6974                    table_name: "vec_nodes_active".to_owned(),
6975                    model_identity: "test-model".to_owned(),
6976                    model_version: "1.0.0".to_owned(),
6977                    dimension: 4,
6978                    normalization_policy: "l2".to_owned(),
6979                    chunking_policy: "per_chunk".to_owned(),
6980                    preprocessing_policy: "trim".to_owned(),
6981                    generator_command: vec![script_path.to_string_lossy().to_string()],
6982                },
6983                &VectorGeneratorPolicy::default(),
6984            )
6985            .expect_err("snapshot drift should fail");
6986
6987        assert!(
6988            error
6989                .to_string()
6990                .contains("vector regeneration snapshot drift:")
6991        );
6992        assert!(error.to_string().contains("[retryable]"));
6993
6994        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6995        let contract_count: i64 = conn
6996            .query_row(
6997                "SELECT count(*) FROM vector_embedding_contracts",
6998                [],
6999                |row| row.get(0),
7000            )
7001            .expect("contract count");
7002        assert_eq!(contract_count, 0);
7003        let vec_count: i64 = conn
7004            .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
7005                row.get(0)
7006            })
7007            .expect("vec count");
7008        assert_eq!(vec_count, 0);
7009        let failure_count: i64 = conn
7010            .query_row(
7011                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7012                [],
7013                |row| row.get(0),
7014            )
7015            .expect("failure count");
7016        assert_eq!(failure_count, 1);
7017    }
7018
7019    #[cfg(feature = "sqlite-vec")]
7020    #[test]
7021    fn regenerate_vector_embeddings_times_out_and_kills_generator() {
7022        let (_db, service) = setup();
7023        let temp_dir = tempfile::tempdir().expect("temp dir");
7024        let script_path = temp_dir.path().join("vector-generator-timeout.sh");
7025
7026        fs::write(
7027            &script_path,
7028            "#!/usr/bin/env bash\nset -euo pipefail\nsleep 1\nprintf '{\"embeddings\":[]}'\n",
7029        )
7030        .expect("write timeout script");
7031        set_file_mode(&script_path, 0o755);
7032
7033        let error = service
7034            .regenerate_vector_embeddings_with_policy(
7035                &VectorRegenerationConfig {
7036                    profile: "default".to_owned(),
7037                    table_name: "vec_nodes_active".to_owned(),
7038                    model_identity: "model".to_owned(),
7039                    model_version: "1.0.0".to_owned(),
7040                    dimension: 4,
7041                    normalization_policy: "l2".to_owned(),
7042                    chunking_policy: "per_chunk".to_owned(),
7043                    preprocessing_policy: "trim".to_owned(),
7044                    generator_command: vec![script_path.to_string_lossy().to_string()],
7045                },
7046                &VectorGeneratorPolicy {
7047                    timeout_ms: 50,
7048                    max_stdout_bytes: 1024,
7049                    max_stderr_bytes: 1024,
7050                    max_input_bytes: 1024,
7051                    max_chunks: 10,
7052                    require_absolute_executable: true,
7053                    reject_world_writable_executable: true,
7054                    allowed_executable_roots: vec![],
7055                    preserve_env_vars: vec![],
7056                },
7057            )
7058            .expect_err("generator should time out");
7059        assert!(error.to_string().contains("generator timeout"));
7060    }
7061
7062    #[cfg(feature = "sqlite-vec")]
7063    #[test]
7064    fn regenerate_vector_embeddings_rejects_oversized_stdout() {
7065        let (_db, service) = setup();
7066        let temp_dir = tempfile::tempdir().expect("temp dir");
7067        let script_path = temp_dir.path().join("vector-generator-stdout.sh");
7068
7069        fs::write(
7070            &script_path,
7071            "#!/usr/bin/env bash\nset -euo pipefail\npython3 -c 'import sys; sys.stdout.write(\"x\" * 5000)'\n",
7072        )
7073        .expect("write stdout script");
7074        set_file_mode(&script_path, 0o755);
7075
7076        let error = service
7077            .regenerate_vector_embeddings_with_policy(
7078                &VectorRegenerationConfig {
7079                    profile: "default".to_owned(),
7080                    table_name: "vec_nodes_active".to_owned(),
7081                    model_identity: "model".to_owned(),
7082                    model_version: "1.0.0".to_owned(),
7083                    dimension: 4,
7084                    normalization_policy: "l2".to_owned(),
7085                    chunking_policy: "per_chunk".to_owned(),
7086                    preprocessing_policy: "trim".to_owned(),
7087                    generator_command: vec![script_path.to_string_lossy().to_string()],
7088                },
7089                &VectorGeneratorPolicy {
7090                    timeout_ms: 1000,
7091                    max_stdout_bytes: 128,
7092                    max_stderr_bytes: 1024,
7093                    max_input_bytes: 1024,
7094                    max_chunks: 10,
7095                    require_absolute_executable: true,
7096                    reject_world_writable_executable: true,
7097                    allowed_executable_roots: vec![],
7098                    preserve_env_vars: vec![],
7099                },
7100            )
7101            .expect_err("generator stdout should overflow");
7102        assert!(error.to_string().contains("stdout overflow"));
7103    }
7104
7105    #[cfg(feature = "sqlite-vec")]
7106    #[test]
7107    fn regenerate_vector_embeddings_rejects_oversized_stderr() {
7108        let (_db, service) = setup();
7109        let temp_dir = tempfile::tempdir().expect("temp dir");
7110        let script_path = temp_dir.path().join("vector-generator-stderr.sh");
7111
7112        fs::write(
7113            &script_path,
7114            "#!/usr/bin/env bash\nset -euo pipefail\npython3 -c 'import sys; sys.stderr.write(\"e\" * 5000); sys.exit(7)'\n",
7115        )
7116        .expect("write stderr script");
7117        set_file_mode(&script_path, 0o755);
7118
7119        let error = service
7120            .regenerate_vector_embeddings_with_policy(
7121                &VectorRegenerationConfig {
7122                    profile: "default".to_owned(),
7123                    table_name: "vec_nodes_active".to_owned(),
7124                    model_identity: "model".to_owned(),
7125                    model_version: "1.0.0".to_owned(),
7126                    dimension: 4,
7127                    normalization_policy: "l2".to_owned(),
7128                    chunking_policy: "per_chunk".to_owned(),
7129                    preprocessing_policy: "trim".to_owned(),
7130                    generator_command: vec![script_path.to_string_lossy().to_string()],
7131                },
7132                &VectorGeneratorPolicy {
7133                    timeout_ms: 1000,
7134                    max_stdout_bytes: 1024,
7135                    max_stderr_bytes: 128,
7136                    max_input_bytes: 1024,
7137                    max_chunks: 10,
7138                    require_absolute_executable: true,
7139                    reject_world_writable_executable: true,
7140                    allowed_executable_roots: vec![],
7141                    preserve_env_vars: vec![],
7142                },
7143            )
7144            .expect_err("generator stderr should overflow");
7145        assert!(error.to_string().contains("stderr overflow"));
7146    }
7147
7148    #[cfg(feature = "sqlite-vec")]
7149    #[test]
7150    fn regenerate_vector_embeddings_rejects_oversized_input_before_spawn() {
7151        let db = NamedTempFile::new().expect("temp file");
7152        let schema = Arc::new(SchemaManager::new());
7153        {
7154            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7155            schema.bootstrap(&conn).expect("bootstrap");
7156            conn.execute(
7157                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7158                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7159                [],
7160            )
7161            .expect("insert node");
7162            conn.execute(
7163                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7164                 VALUES ('chunk-1', 'doc-1', 'this chunk is intentionally long to exceed the configured input limit', 100)",
7165                [],
7166            )
7167            .expect("insert chunk");
7168        }
7169
7170        let service = AdminService::new(db.path(), Arc::clone(&schema));
7171        let error = service
7172            .regenerate_vector_embeddings_with_policy(
7173                &VectorRegenerationConfig {
7174                    profile: "default".to_owned(),
7175                    table_name: "vec_nodes_active".to_owned(),
7176                    model_identity: "model".to_owned(),
7177                    model_version: "1.0.0".to_owned(),
7178                    dimension: 4,
7179                    normalization_policy: "l2".to_owned(),
7180                    chunking_policy: "per_chunk".to_owned(),
7181                    preprocessing_policy: "trim".to_owned(),
7182                    generator_command: vec!["/bin/echo".to_owned()],
7183                },
7184                &VectorGeneratorPolicy {
7185                    timeout_ms: 1000,
7186                    max_stdout_bytes: 1024,
7187                    max_stderr_bytes: 1024,
7188                    max_input_bytes: 32,
7189                    max_chunks: 10,
7190                    require_absolute_executable: true,
7191                    reject_world_writable_executable: true,
7192                    allowed_executable_roots: vec![],
7193                    preserve_env_vars: vec![],
7194                },
7195            )
7196            .expect_err("input size should be rejected before spawn");
7197        assert!(error.to_string().contains("payload too large"));
7198    }
7199
7200    #[cfg(feature = "sqlite-vec")]
7201    #[test]
7202    fn regenerate_vector_embeddings_rejects_excessive_chunk_count_before_spawn() {
7203        let db = NamedTempFile::new().expect("temp file");
7204        let schema = Arc::new(SchemaManager::new());
7205        {
7206            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7207            schema.bootstrap(&conn).expect("bootstrap");
7208            conn.execute(
7209                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7210                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7211                [],
7212            )
7213            .expect("insert node");
7214            conn.execute(
7215                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) VALUES ('chunk-1', 'doc-1', 'a', 100)",
7216                [],
7217            )
7218            .expect("insert chunk 1");
7219            conn.execute(
7220                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) VALUES ('chunk-2', 'doc-1', 'b', 101)",
7221                [],
7222            )
7223            .expect("insert chunk 2");
7224        }
7225
7226        let service = AdminService::new(db.path(), Arc::clone(&schema));
7227        let error = service
7228            .regenerate_vector_embeddings_with_policy(
7229                &VectorRegenerationConfig {
7230                    profile: "default".to_owned(),
7231                    table_name: "vec_nodes_active".to_owned(),
7232                    model_identity: "model".to_owned(),
7233                    model_version: "1.0.0".to_owned(),
7234                    dimension: 4,
7235                    normalization_policy: "l2".to_owned(),
7236                    chunking_policy: "per_chunk".to_owned(),
7237                    preprocessing_policy: "trim".to_owned(),
7238                    generator_command: vec!["/bin/echo".to_owned()],
7239                },
7240                &VectorGeneratorPolicy {
7241                    timeout_ms: 1000,
7242                    max_stdout_bytes: 1024,
7243                    max_stderr_bytes: 1024,
7244                    max_input_bytes: 2048,
7245                    max_chunks: 1,
7246                    require_absolute_executable: true,
7247                    reject_world_writable_executable: true,
7248                    allowed_executable_roots: vec![],
7249                    preserve_env_vars: vec![],
7250                },
7251            )
7252            .expect_err("chunk count should be rejected before spawn");
7253        assert!(error.to_string().contains("payload too large"));
7254    }
7255
7256    #[cfg(feature = "sqlite-vec")]
7257    #[test]
7258    fn regenerate_vector_embeddings_malformed_json_leaves_contract_and_vec_rows_unchanged() {
7259        let db = NamedTempFile::new().expect("temp file");
7260        let schema = Arc::new(SchemaManager::new());
7261        let temp_dir = tempfile::tempdir().expect("temp dir");
7262        let script_path = temp_dir.path().join("vector-generator-bad-json.sh");
7263
7264        fs::write(
7265            &script_path,
7266            "#!/usr/bin/env bash\nset -euo pipefail\nprintf 'not-json'\n",
7267        )
7268        .expect("write bad json script");
7269        set_file_mode(&script_path, 0o755);
7270
7271        {
7272            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7273            schema.bootstrap(&conn).expect("bootstrap");
7274            conn.execute(
7275                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7276                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7277                [],
7278            )
7279            .expect("insert node");
7280            conn.execute(
7281                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7282                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7283                [],
7284            )
7285            .expect("insert chunk");
7286            schema
7287                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
7288                .expect("ensure vec profile");
7289            conn.execute(
7290                r"
7291                INSERT INTO vector_embedding_contracts (
7292                    profile,
7293                    table_name,
7294                    model_identity,
7295                    model_version,
7296                    dimension,
7297                    normalization_policy,
7298                    chunking_policy,
7299                    preprocessing_policy,
7300                    generator_command_json,
7301                    applied_at,
7302                    snapshot_hash
7303                ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)
7304                ",
7305                rusqlite::params![
7306                    "default",
7307                    "vec_nodes_active",
7308                    "old-model",
7309                    "0.9.0",
7310                    4,
7311                    "l2",
7312                    "per_chunk",
7313                    "trim",
7314                    "[\"/bin/echo\"]",
7315                    111,
7316                    "old-snapshot"
7317                ],
7318            )
7319            .expect("seed contract");
7320            conn.execute(
7321                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
7322                [],
7323            )
7324            .expect("seed vec row");
7325        }
7326
7327        let service = AdminService::new(db.path(), Arc::clone(&schema));
7328        let error = service
7329            .regenerate_vector_embeddings_with_policy(
7330                &VectorRegenerationConfig {
7331                    profile: "default".to_owned(),
7332                    table_name: "vec_nodes_active".to_owned(),
7333                    model_identity: "new-model".to_owned(),
7334                    model_version: "1.0.0".to_owned(),
7335                    dimension: 4,
7336                    normalization_policy: "l2".to_owned(),
7337                    chunking_policy: "per_chunk".to_owned(),
7338                    preprocessing_policy: "trim".to_owned(),
7339                    generator_command: vec![script_path.to_string_lossy().to_string()],
7340                },
7341                &VectorGeneratorPolicy::default(),
7342            )
7343            .expect_err("bad json should fail");
7344
7345        assert!(error.to_string().contains("decode generator output"));
7346
7347        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7348        let model_identity: String = conn
7349            .query_row(
7350                "SELECT model_identity FROM vector_embedding_contracts WHERE profile = 'default'",
7351                [],
7352                |row| row.get(0),
7353            )
7354            .expect("model identity");
7355        assert_eq!(model_identity, "old-model");
7356        let vec_count: i64 = conn
7357            .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
7358                row.get(0)
7359            })
7360            .expect("vec count");
7361        assert_eq!(vec_count, 1);
7362        let failure_count: i64 = conn
7363            .query_row(
7364                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7365                [],
7366                |row| row.get(0),
7367            )
7368            .expect("failure count");
7369        assert_eq!(failure_count, 1);
7370    }
7371
7372    #[cfg(feature = "sqlite-vec")]
7373    #[test]
7374    fn regenerate_vector_embeddings_rejects_whitespace_only_profile_before_mutation() {
7375        let db = NamedTempFile::new().expect("temp file");
7376        let schema = Arc::new(SchemaManager::new());
7377        {
7378            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7379            schema.bootstrap(&conn).expect("bootstrap");
7380            conn.execute(
7381                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7382                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7383                [],
7384            )
7385            .expect("insert node");
7386            conn.execute(
7387                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7388                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7389                [],
7390            )
7391            .expect("insert chunk");
7392        }
7393
7394        let service = AdminService::new(db.path(), Arc::clone(&schema));
7395        let error = service
7396            .regenerate_vector_embeddings(&VectorRegenerationConfig {
7397                profile: "   ".to_owned(),
7398                table_name: "vec_nodes_active".to_owned(),
7399                model_identity: "test-model".to_owned(),
7400                model_version: "1.0.0".to_owned(),
7401                dimension: 4,
7402                normalization_policy: "l2".to_owned(),
7403                chunking_policy: "per_chunk".to_owned(),
7404                preprocessing_policy: "trim".to_owned(),
7405                generator_command: vec!["/bin/echo".to_owned()],
7406            })
7407            .expect_err("whitespace profile should be rejected");
7408
7409        assert!(error.to_string().contains("invalid contract"));
7410        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7411        let contract_count: i64 = conn
7412            .query_row(
7413                "SELECT count(*) FROM vector_embedding_contracts",
7414                [],
7415                |row| row.get(0),
7416            )
7417            .expect("contract count");
7418        assert_eq!(contract_count, 0);
7419        let provenance_count: i64 = conn
7420            .query_row("SELECT count(*) FROM provenance_events", [], |row| {
7421                row.get(0)
7422            })
7423            .expect("provenance count");
7424        assert_eq!(provenance_count, 0);
7425    }
7426
7427    #[cfg(feature = "sqlite-vec")]
7428    #[test]
7429    fn regenerate_vector_embeddings_rejects_world_writable_executable_when_policy_requires_it() {
7430        let (_db, service) = setup();
7431        let temp_dir = tempfile::tempdir().expect("temp dir");
7432        let script_path = temp_dir.path().join("vector-generator-world-writable.sh");
7433
7434        fs::write(
7435            &script_path,
7436            "#!/usr/bin/env bash\nset -euo pipefail\nprintf '{\"embeddings\":[]}'\n",
7437        )
7438        .expect("write script");
7439        set_file_mode(&script_path, 0o777);
7440
7441        let error = service
7442            .regenerate_vector_embeddings_with_policy(
7443                &VectorRegenerationConfig {
7444                    profile: "default".to_owned(),
7445                    table_name: "vec_nodes_active".to_owned(),
7446                    model_identity: "model".to_owned(),
7447                    model_version: "1.0.0".to_owned(),
7448                    dimension: 4,
7449                    normalization_policy: "l2".to_owned(),
7450                    chunking_policy: "per_chunk".to_owned(),
7451                    preprocessing_policy: "trim".to_owned(),
7452                    generator_command: vec![script_path.to_string_lossy().to_string()],
7453                },
7454                &VectorGeneratorPolicy::default(),
7455            )
7456            .expect_err("world-writable executable should be rejected");
7457
7458        assert!(error.to_string().contains("world-writable executable"));
7459    }
7460
7461    #[cfg(feature = "sqlite-vec")]
7462    #[test]
7463    fn regenerate_vector_embeddings_rejects_executable_outside_allowlisted_roots() {
7464        let (_db, service) = setup();
7465        let temp_dir = tempfile::tempdir().expect("temp dir");
7466        let allowed_dir = tempfile::tempdir().expect("allowed dir");
7467        let script_path = temp_dir.path().join("vector-generator-outside-root.sh");
7468
7469        fs::write(
7470            &script_path,
7471            "#!/usr/bin/env bash\nset -euo pipefail\nprintf '{\"embeddings\":[]}'\n",
7472        )
7473        .expect("write script");
7474        set_file_mode(&script_path, 0o755);
7475
7476        let error = service
7477            .regenerate_vector_embeddings_with_policy(
7478                &VectorRegenerationConfig {
7479                    profile: "default".to_owned(),
7480                    table_name: "vec_nodes_active".to_owned(),
7481                    model_identity: "model".to_owned(),
7482                    model_version: "1.0.0".to_owned(),
7483                    dimension: 4,
7484                    normalization_policy: "l2".to_owned(),
7485                    chunking_policy: "per_chunk".to_owned(),
7486                    preprocessing_policy: "trim".to_owned(),
7487                    generator_command: vec![script_path.to_string_lossy().to_string()],
7488                },
7489                &VectorGeneratorPolicy {
7490                    timeout_ms: 1000,
7491                    max_stdout_bytes: 1024,
7492                    max_stderr_bytes: 1024,
7493                    max_input_bytes: 1024,
7494                    max_chunks: 10,
7495                    require_absolute_executable: true,
7496                    reject_world_writable_executable: true,
7497                    allowed_executable_roots: vec![
7498                        allowed_dir.path().to_string_lossy().to_string(),
7499                    ],
7500                    preserve_env_vars: vec![],
7501                },
7502            )
7503            .expect_err("disallowed root should be rejected");
7504
7505        assert!(
7506            error
7507                .to_string()
7508                .contains("outside allowed executable roots")
7509        );
7510    }
7511
7512    #[cfg(feature = "sqlite-vec")]
7513    #[test]
7514    fn regenerate_vector_embeddings_rejects_future_contract_format_version() {
7515        let db = NamedTempFile::new().expect("temp file");
7516        let schema = Arc::new(SchemaManager::new());
7517        {
7518            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7519            schema.bootstrap(&conn).expect("bootstrap");
7520            conn.execute(
7521                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7522                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7523                [],
7524            )
7525            .expect("insert node");
7526            conn.execute(
7527                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7528                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7529                [],
7530            )
7531            .expect("insert chunk");
7532            conn.execute(
7533                r"
7534                INSERT INTO vector_embedding_contracts (
7535                    profile,
7536                    table_name,
7537                    model_identity,
7538                    model_version,
7539                    dimension,
7540                    normalization_policy,
7541                    chunking_policy,
7542                    preprocessing_policy,
7543                    generator_command_json,
7544                    applied_at,
7545                    snapshot_hash,
7546                    contract_format_version,
7547                    updated_at
7548                ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13)
7549                ",
7550                rusqlite::params![
7551                    "default",
7552                    "vec_nodes_active",
7553                    "old-model",
7554                    "0.9.0",
7555                    4,
7556                    "l2",
7557                    "per_chunk",
7558                    "trim",
7559                    "[\"/bin/echo\"]",
7560                    111,
7561                    "old-snapshot",
7562                    99,
7563                    111,
7564                ],
7565            )
7566            .expect("seed future contract");
7567        }
7568
7569        let service = AdminService::new(db.path(), Arc::clone(&schema));
7570        let error = service
7571            .regenerate_vector_embeddings(&VectorRegenerationConfig {
7572                profile: "default".to_owned(),
7573                table_name: "vec_nodes_active".to_owned(),
7574                model_identity: "test-model".to_owned(),
7575                model_version: "1.0.0".to_owned(),
7576                dimension: 4,
7577                normalization_policy: "l2".to_owned(),
7578                chunking_policy: "per_chunk".to_owned(),
7579                preprocessing_policy: "trim".to_owned(),
7580                generator_command: vec!["/bin/echo".to_owned()],
7581            })
7582            .expect_err("future contract version should be rejected");
7583
7584        assert!(error.to_string().contains("unsupported"));
7585        assert!(error.to_string().contains("format version"));
7586    }
7587
7588    #[cfg(feature = "sqlite-vec")]
7589    #[test]
7590    fn regenerate_vector_embeddings_clears_environment_except_preserved_vars() {
7591        let db = NamedTempFile::new().expect("temp file");
7592        let schema = Arc::new(SchemaManager::new());
7593        let temp_dir = tempfile::tempdir().expect("temp dir");
7594        let script_path = temp_dir.path().join("vector-generator-env.sh");
7595        {
7596            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7597            schema.bootstrap(&conn).expect("bootstrap");
7598            conn.execute(
7599                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7600                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7601                [],
7602            )
7603            .expect("insert node");
7604            conn.execute(
7605                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7606                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7607                [],
7608            )
7609            .expect("insert chunk");
7610        }
7611
7612        fs::write(
7613            &script_path,
7614            r#"#!/usr/bin/env bash
7615set -euo pipefail
7616if [[ "${VECTOR_TEST_SECRET:-}" != "expected" ]]; then
7617  echo "missing secret" >&2
7618  exit 9
7619fi
7620python3 -c 'import json, sys
7621payload = json.load(sys.stdin)
7622json.dump({"embeddings": [{"chunk_id": payload["chunks"][0]["chunk_id"], "embedding": [1.0, 0.0, 0.0, 0.0]}]}, sys.stdout)'
7623"#,
7624        )
7625        .expect("write script");
7626        set_file_mode(&script_path, 0o755);
7627
7628        let service = AdminService::new(db.path(), Arc::clone(&schema));
7629        unsafe {
7630            std::env::set_var("VECTOR_TEST_SECRET", "expected");
7631        }
7632        let missing_env = service
7633            .regenerate_vector_embeddings_with_policy(
7634                &VectorRegenerationConfig {
7635                    profile: "default".to_owned(),
7636                    table_name: "vec_nodes_active".to_owned(),
7637                    model_identity: "model".to_owned(),
7638                    model_version: "1.0.0".to_owned(),
7639                    dimension: 4,
7640                    normalization_policy: "l2".to_owned(),
7641                    chunking_policy: "per_chunk".to_owned(),
7642                    preprocessing_policy: "trim".to_owned(),
7643                    generator_command: vec![script_path.to_string_lossy().to_string()],
7644                },
7645                &VectorGeneratorPolicy::default(),
7646            )
7647            .expect_err("non-preserved env var should be dropped");
7648        assert!(missing_env.to_string().contains("nonzero exit"));
7649
7650        let report = service
7651            .regenerate_vector_embeddings_with_policy(
7652                &VectorRegenerationConfig {
7653                    profile: "default".to_owned(),
7654                    table_name: "vec_nodes_active".to_owned(),
7655                    model_identity: "model".to_owned(),
7656                    model_version: "1.0.0".to_owned(),
7657                    dimension: 4,
7658                    normalization_policy: "l2".to_owned(),
7659                    chunking_policy: "per_chunk".to_owned(),
7660                    preprocessing_policy: "trim".to_owned(),
7661                    generator_command: vec![script_path.to_string_lossy().to_string()],
7662                },
7663                &VectorGeneratorPolicy {
7664                    timeout_ms: 1000,
7665                    max_stdout_bytes: 1024,
7666                    max_stderr_bytes: 1024,
7667                    max_input_bytes: 4096,
7668                    max_chunks: 10,
7669                    require_absolute_executable: true,
7670                    reject_world_writable_executable: true,
7671                    allowed_executable_roots: vec![],
7672                    preserve_env_vars: vec!["VECTOR_TEST_SECRET".to_owned()],
7673                },
7674            )
7675            .expect("preserved env var should allow success");
7676        assert_eq!(report.regenerated_rows, 1);
7677        unsafe {
7678            std::env::remove_var("VECTOR_TEST_SECRET");
7679        }
7680    }
7681
7682    #[test]
7683    fn check_semantics_detects_orphaned_chunk() {
7684        let (db, service) = setup();
7685        {
7686            // Open without FK enforcement to insert chunk with no active node.
7687            let conn = sqlite::open_connection(db.path()).expect("conn");
7688            conn.execute(
7689                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7690                 VALUES ('c1', 'ghost-node', 'text', 100)",
7691                [],
7692            )
7693            .expect("insert orphaned chunk");
7694        }
7695        let report = service.check_semantics().expect("semantics check");
7696        assert_eq!(report.orphaned_chunks, 1);
7697    }
7698
7699    #[test]
7700    fn check_semantics_detects_null_source_ref() {
7701        let (db, service) = setup();
7702        {
7703            let conn = sqlite::open_connection(db.path()).expect("conn");
7704            conn.execute(
7705                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at) \
7706                 VALUES ('r1', 'lg1', 'Meeting', '{}', 100)",
7707                [],
7708            )
7709            .expect("insert node with null source_ref");
7710        }
7711        let report = service.check_semantics().expect("semantics check");
7712        assert_eq!(report.null_source_ref_nodes, 1);
7713    }
7714
7715    #[test]
7716    fn check_semantics_detects_broken_step_fk() {
7717        let (db, service) = setup();
7718        {
7719            // Explicitly disable FK enforcement for this connection so we can insert
7720            // an orphaned step (ghost run_id) to simulate a partial-write failure.
7721            let conn = sqlite::open_connection(db.path()).expect("conn");
7722            conn.execute_batch("PRAGMA foreign_keys = OFF;")
7723                .expect("disable FK");
7724            conn.execute(
7725                "INSERT INTO steps (id, run_id, kind, status, properties, created_at) \
7726                 VALUES ('s1', 'ghost-run', 'llm', 'completed', '{}', 100)",
7727                [],
7728            )
7729            .expect("insert step with ghost run_id");
7730        }
7731        let report = service.check_semantics().expect("semantics check");
7732        assert_eq!(report.broken_step_fk, 1);
7733    }
7734
7735    #[test]
7736    fn check_semantics_detects_broken_action_fk() {
7737        let (db, service) = setup();
7738        {
7739            let conn = sqlite::open_connection(db.path()).expect("conn");
7740            conn.execute_batch("PRAGMA foreign_keys = OFF;")
7741                .expect("disable FK");
7742            conn.execute(
7743                "INSERT INTO actions (id, step_id, kind, status, properties, created_at) \
7744                 VALUES ('a1', 'ghost-step', 'emit', 'completed', '{}', 100)",
7745                [],
7746            )
7747            .expect("insert action with ghost step_id");
7748        }
7749        let report = service.check_semantics().expect("semantics check");
7750        assert_eq!(report.broken_action_fk, 1);
7751    }
7752
7753    #[test]
7754    fn check_semantics_detects_stale_fts_rows() {
7755        let (db, service) = setup();
7756        {
7757            let conn = sqlite::open_connection(db.path()).expect("conn");
7758            // FTS virtual tables have no FK constraints; insert a row referencing
7759            // a chunk_id that does not exist in the chunks table.
7760            conn.execute(
7761                "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
7762                 VALUES ('ghost-chunk', 'any-node', 'Meeting', 'stale content')",
7763                [],
7764            )
7765            .expect("insert stale FTS row");
7766        }
7767        let report = service.check_semantics().expect("semantics check");
7768        assert_eq!(report.stale_fts_rows, 1);
7769    }
7770
7771    #[test]
7772    fn check_semantics_detects_fts_rows_for_superseded_nodes() {
7773        let (db, service) = setup();
7774        {
7775            let conn = sqlite::open_connection(db.path()).expect("conn");
7776            // Insert a node that has been fully superseded (superseded_at IS NOT NULL).
7777            conn.execute(
7778                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
7779                 VALUES ('r1', 'lg-sup', 'Meeting', '{}', 100, 200, 'src-1')",
7780                [],
7781            )
7782            .expect("insert superseded node");
7783            // Insert an FTS row for the superseded node's logical_id.
7784            conn.execute(
7785                "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
7786                 VALUES ('ck-x', 'lg-sup', 'Meeting', 'superseded content')",
7787                [],
7788            )
7789            .expect("insert FTS row for superseded node");
7790        }
7791        let report = service.check_semantics().expect("semantics check");
7792        assert_eq!(report.fts_rows_for_superseded_nodes, 1);
7793    }
7794
7795    #[test]
7796    fn check_semantics_detects_dangling_edges() {
7797        let (db, service) = setup();
7798        {
7799            let conn = sqlite::open_connection(db.path()).expect("conn");
7800            conn.execute_batch("PRAGMA foreign_keys = OFF;")
7801                .expect("disable FK");
7802            // One active node as source; target does not exist — edge is dangling.
7803            conn.execute(
7804                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7805                 VALUES ('r1', 'lg-src', 'Meeting', '{}', 100, 'src-1')",
7806                [],
7807            )
7808            .expect("insert source node");
7809            conn.execute(
7810                "INSERT INTO edges \
7811                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
7812                 VALUES ('e1', 'edge-1', 'lg-src', 'ghost-target', 'LINKS', '{}', 100, 'src-1')",
7813                [],
7814            )
7815            .expect("insert dangling edge");
7816        }
7817        let report = service.check_semantics().expect("semantics check");
7818        assert_eq!(report.dangling_edges, 1);
7819    }
7820
7821    #[test]
7822    fn check_semantics_detects_orphaned_supersession_chains() {
7823        let (db, service) = setup();
7824        {
7825            let conn = sqlite::open_connection(db.path()).expect("conn");
7826            // Every version of this logical_id is superseded — no active row remains.
7827            conn.execute(
7828                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
7829                 VALUES ('r1', 'lg-orphaned', 'Meeting', '{}', 100, 200, 'src-1')",
7830                [],
7831            )
7832            .expect("insert fully superseded node");
7833        }
7834        let report = service.check_semantics().expect("semantics check");
7835        assert_eq!(report.orphaned_supersession_chains, 1);
7836    }
7837
7838    #[test]
7839    fn safe_export_writes_manifest_with_sha256() {
7840        let (_db, service) = setup();
7841        let export_dir = tempfile::TempDir::new().expect("temp dir");
7842        let export_path = export_dir.path().join("backup.db");
7843
7844        let manifest = service
7845            .safe_export(
7846                &export_path,
7847                SafeExportOptions {
7848                    force_checkpoint: false,
7849                },
7850            )
7851            .expect("export");
7852
7853        assert!(export_path.exists(), "exported db should exist");
7854        let manifest_path = export_dir.path().join("backup.db.export-manifest.json");
7855        assert!(
7856            manifest_path.exists(),
7857            "manifest file should exist at {}",
7858            manifest_path.display()
7859        );
7860        assert_eq!(manifest.sha256.len(), 64, "sha256 should be 64 hex chars");
7861        assert!(
7862            manifest.exported_at > 0,
7863            "exported_at should be a unix timestamp"
7864        );
7865        assert_eq!(
7866            manifest.schema_version,
7867            SchemaManager::new().current_version().0,
7868            "schema_version should match the live schema version"
7869        );
7870        assert_eq!(manifest.protocol_version, 1, "protocol_version should be 1");
7871        assert!(manifest.page_count > 0, "page_count should be positive");
7872    }
7873
7874    #[test]
7875    fn safe_export_preserves_operational_validation_contracts() {
7876        let (_db, service) = setup();
7877        let validation_json = r#"{"format_version":1,"mode":"enforce","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#;
7878        service
7879            .register_operational_collection(&OperationalRegisterRequest {
7880                name: "connector_health".to_owned(),
7881                kind: OperationalCollectionKind::LatestState,
7882                schema_json: "{}".to_owned(),
7883                retention_json: "{}".to_owned(),
7884                filter_fields_json: "[]".to_owned(),
7885                validation_json: validation_json.to_owned(),
7886                secondary_indexes_json: "[]".to_owned(),
7887                format_version: 1,
7888            })
7889            .expect("register collection");
7890
7891        let export_dir = tempfile::TempDir::new().expect("temp dir");
7892        let export_path = export_dir.path().join("backup.db");
7893        service
7894            .safe_export(
7895                &export_path,
7896                SafeExportOptions {
7897                    force_checkpoint: false,
7898                },
7899            )
7900            .expect("export");
7901
7902        let exported = sqlite::open_connection(&export_path).expect("exported conn");
7903        let exported_validation_json: String = exported
7904            .query_row(
7905                "SELECT validation_json FROM operational_collections WHERE name = 'connector_health'",
7906                [],
7907                |row| row.get(0),
7908            )
7909            .expect("validation_json");
7910        assert_eq!(exported_validation_json, validation_json);
7911    }
7912
7913    #[test]
7914    fn safe_export_force_checkpoint_false_skips_wal_pragma() {
7915        let (_db, service) = setup();
7916        let export_dir = tempfile::TempDir::new().expect("temp dir");
7917        let export_path = export_dir.path().join("no-wal.db");
7918
7919        // force_checkpoint: false must not error even on a non-WAL database
7920        let manifest = service
7921            .safe_export(
7922                &export_path,
7923                SafeExportOptions {
7924                    force_checkpoint: false,
7925                },
7926            )
7927            .expect("export with no checkpoint");
7928
7929        assert!(
7930            manifest.page_count > 0,
7931            "page_count must be populated regardless of checkpoint mode"
7932        );
7933        assert_eq!(
7934            manifest.schema_version,
7935            SchemaManager::new().current_version().0
7936        );
7937        assert_eq!(manifest.protocol_version, 1);
7938    }
7939
7940    #[test]
7941    fn safe_export_force_checkpoint_false_still_captures_wal_backed_changes() {
7942        let (db, service) = setup();
7943        let conn = sqlite::open_connection(db.path()).expect("conn");
7944        let journal_mode: String = conn
7945            .query_row("PRAGMA journal_mode=WAL", [], |row| row.get(0))
7946            .expect("enable wal");
7947        assert_eq!(journal_mode.to_lowercase(), "wal");
7948        let auto_checkpoint_pages: i64 = conn
7949            .query_row("PRAGMA wal_autocheckpoint=0", [], |row| row.get(0))
7950            .expect("disable auto checkpoint");
7951        assert_eq!(auto_checkpoint_pages, 0);
7952        conn.execute(
7953            "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7954             VALUES ('r-wal', 'lg-wal', 'Meeting', '{}', 100, 'src-wal')",
7955            [],
7956        )
7957        .expect("insert wal-backed node");
7958
7959        let export_dir = tempfile::TempDir::new().expect("temp dir");
7960        let export_path = export_dir.path().join("wal-backed.db");
7961        service
7962            .safe_export(
7963                &export_path,
7964                SafeExportOptions {
7965                    force_checkpoint: false,
7966                },
7967            )
7968            .expect("export wal-backed db");
7969
7970        let exported = sqlite::open_connection(&export_path).expect("open exported db");
7971        let exported_count: i64 = exported
7972            .query_row(
7973                "SELECT count(*) FROM nodes WHERE logical_id = 'lg-wal'",
7974                [],
7975                |row| row.get(0),
7976            )
7977            .expect("count exported nodes");
7978        assert_eq!(
7979            exported_count, 1,
7980            "safe_export must include committed rows that are still resident in the WAL"
7981        );
7982    }
7983
7984    #[test]
7985    fn excise_source_removes_searchable_content_after_excision() {
7986        let (db, service) = setup();
7987        {
7988            let conn = sqlite::open_connection(db.path()).expect("conn");
7989            conn.execute(
7990                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
7991                 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
7992                [],
7993            )
7994            .expect("insert v1");
7995            conn.execute(
7996                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7997                 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
7998                [],
7999            )
8000            .expect("insert v2");
8001            conn.execute(
8002                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8003                 VALUES ('ck1', 'lg1', 'hello world', 100)",
8004                [],
8005            )
8006            .expect("insert chunk");
8007        }
8008        service.excise_source("source-2").expect("excise");
8009        {
8010            let conn = sqlite::open_connection(db.path()).expect("conn");
8011            let fts_count: i64 = conn
8012                .query_row(
8013                    "SELECT count(*) FROM fts_nodes WHERE chunk_id = 'ck1'",
8014                    [],
8015                    |row| row.get(0),
8016                )
8017                .expect("fts count");
8018            assert_eq!(
8019                fts_count, 0,
8020                "excised content should not remain searchable after excise"
8021            );
8022        }
8023    }
8024
8025    #[cfg(feature = "sqlite-vec")]
8026    #[test]
8027    fn excise_source_cleans_chunks_and_vec_rows_for_excised_version() {
8028        let (db, service) = setup();
8029        {
8030            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8031            service
8032                .schema_manager
8033                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
8034                .expect("ensure vec profile");
8035            conn.execute(
8036                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
8037                 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
8038                [],
8039            )
8040            .expect("insert v1");
8041            conn.execute(
8042                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8043                 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
8044                [],
8045            )
8046            .expect("insert v2");
8047            conn.execute(
8048                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8049                 VALUES ('ck1', 'lg1', 'new content', 200)",
8050                [],
8051            )
8052            .expect("insert chunk");
8053            conn.execute(
8054                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('ck1', zeroblob(16))",
8055                [],
8056            )
8057            .expect("insert vec row");
8058        }
8059
8060        service.excise_source("source-2").expect("excise");
8061
8062        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8063        let active_row: String = conn
8064            .query_row(
8065                "SELECT row_id FROM nodes WHERE logical_id = 'lg1' AND superseded_at IS NULL",
8066                [],
8067                |row| row.get(0),
8068            )
8069            .expect("restored active row");
8070        assert_eq!(active_row, "r1");
8071        let chunk_count: i64 = conn
8072            .query_row(
8073                "SELECT count(*) FROM chunks WHERE node_logical_id = 'lg1'",
8074                [],
8075                |row| row.get(0),
8076            )
8077            .expect("chunk count");
8078        assert_eq!(
8079            chunk_count, 0,
8080            "excised source content must not survive as chunks"
8081        );
8082        let vec_count: i64 = conn
8083            .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
8084                row.get(0)
8085            })
8086            .expect("vec count");
8087        assert_eq!(vec_count, 0, "excised source vec rows must be removed");
8088        let fts_count: i64 = conn
8089            .query_row(
8090                "SELECT count(*) FROM fts_nodes WHERE node_logical_id = 'lg1'",
8091                [],
8092                |row| row.get(0),
8093            )
8094            .expect("fts count");
8095        assert_eq!(
8096            fts_count, 0,
8097            "excised source content must not remain searchable"
8098        );
8099    }
8100
8101    #[test]
8102    fn export_page_count_matches_exported_file() {
8103        let (_db, service) = setup();
8104        let export_dir = tempfile::TempDir::new().expect("temp dir");
8105        let export_path = export_dir.path().join("page-count.db");
8106
8107        let manifest = service
8108            .safe_export(
8109                &export_path,
8110                SafeExportOptions {
8111                    force_checkpoint: false,
8112                },
8113            )
8114            .expect("export");
8115
8116        let exported = sqlite::open_connection(&export_path).expect("open exported db");
8117        let actual_page_count: u64 = exported
8118            .query_row("PRAGMA page_count", [], |row| row.get(0))
8119            .expect("page_count from exported file");
8120
8121        assert_eq!(
8122            manifest.page_count, actual_page_count,
8123            "manifest page_count must match the exported file's PRAGMA page_count"
8124        );
8125    }
8126
8127    #[test]
8128    fn no_temp_file_after_successful_export() {
8129        let (_db, service) = setup();
8130        let export_dir = tempfile::TempDir::new().expect("temp dir");
8131        let export_path = export_dir.path().join("no-tmp.db");
8132
8133        service
8134            .safe_export(
8135                &export_path,
8136                SafeExportOptions {
8137                    force_checkpoint: false,
8138                },
8139            )
8140            .expect("export");
8141
8142        let tmp_files: Vec<_> = fs::read_dir(export_dir.path())
8143            .expect("read export dir")
8144            .filter_map(Result::ok)
8145            .filter(|e| e.path().extension().is_some_and(|ext| ext == "tmp"))
8146            .collect();
8147
8148        assert!(
8149            tmp_files.is_empty(),
8150            "no .tmp files should remain after a successful export, found: {tmp_files:?}"
8151        );
8152    }
8153
8154    #[test]
8155    fn export_manifest_is_valid_json() {
8156        let (_db, service) = setup();
8157        let export_dir = tempfile::TempDir::new().expect("temp dir");
8158        let export_path = export_dir.path().join("valid-json.db");
8159
8160        service
8161            .safe_export(
8162                &export_path,
8163                SafeExportOptions {
8164                    force_checkpoint: false,
8165                },
8166            )
8167            .expect("export");
8168
8169        let manifest_path = export_dir.path().join("valid-json.db.export-manifest.json");
8170        let manifest_contents = fs::read_to_string(&manifest_path).expect("read manifest");
8171        let parsed: serde_json::Value =
8172            serde_json::from_str(&manifest_contents).expect("manifest must be valid JSON");
8173
8174        assert!(
8175            parsed.get("exported_at").is_some(),
8176            "manifest must contain exported_at"
8177        );
8178        assert!(
8179            parsed.get("sha256").is_some(),
8180            "manifest must contain sha256"
8181        );
8182        assert!(
8183            parsed.get("schema_version").is_some(),
8184            "manifest must contain schema_version"
8185        );
8186        assert!(
8187            parsed.get("protocol_version").is_some(),
8188            "manifest must contain protocol_version"
8189        );
8190        assert!(
8191            parsed.get("page_count").is_some(),
8192            "manifest must contain page_count"
8193        );
8194    }
8195
8196    #[test]
8197    fn provenance_purge_dry_run_reports_counts() {
8198        let (db, service) = setup();
8199        {
8200            let conn = sqlite::open_connection(db.path()).expect("conn");
8201            conn.execute(
8202                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8203                 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
8204                [],
8205            )
8206            .expect("insert p1");
8207            conn.execute(
8208                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8209                 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 200)",
8210                [],
8211            )
8212            .expect("insert p2");
8213            conn.execute(
8214                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8215                 VALUES ('p3', 'excise', 'lg3', 'src-1', 300)",
8216                [],
8217            )
8218            .expect("insert p3");
8219        }
8220
8221        let options = super::ProvenancePurgeOptions {
8222            dry_run: true,
8223            preserve_event_types: Vec::new(),
8224        };
8225        let report = service
8226            .purge_provenance_events(250, &options)
8227            .expect("dry run purge");
8228
8229        assert_eq!(report.events_deleted, 2);
8230        assert_eq!(report.events_preserved, 1);
8231        assert!(report.oldest_remaining.is_some());
8232
8233        let conn = sqlite::open_connection(db.path()).expect("conn");
8234        let total: i64 = conn
8235            .query_row("SELECT count(*) FROM provenance_events", [], |row| {
8236                row.get(0)
8237            })
8238            .expect("count");
8239        assert_eq!(total, 3, "dry_run must not delete any events");
8240    }
8241
8242    #[test]
8243    fn provenance_purge_deletes_old_events() {
8244        let (db, service) = setup();
8245        {
8246            let conn = sqlite::open_connection(db.path()).expect("conn");
8247            conn.execute(
8248                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8249                 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
8250                [],
8251            )
8252            .expect("insert p1");
8253            conn.execute(
8254                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8255                 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 200)",
8256                [],
8257            )
8258            .expect("insert p2");
8259        }
8260
8261        let options = super::ProvenancePurgeOptions {
8262            dry_run: false,
8263            preserve_event_types: Vec::new(),
8264        };
8265        let report = service
8266            .purge_provenance_events(150, &options)
8267            .expect("purge");
8268
8269        assert_eq!(report.events_deleted, 1);
8270        assert_eq!(report.events_preserved, 1);
8271        assert_eq!(report.oldest_remaining, Some(200));
8272
8273        let conn = sqlite::open_connection(db.path()).expect("conn");
8274        let remaining: i64 = conn
8275            .query_row("SELECT count(*) FROM provenance_events", [], |row| {
8276                row.get(0)
8277            })
8278            .expect("count");
8279        assert_eq!(remaining, 1);
8280    }
8281
8282    #[test]
8283    fn provenance_purge_preserves_specified_types() {
8284        let (db, service) = setup();
8285        {
8286            let conn = sqlite::open_connection(db.path()).expect("conn");
8287            conn.execute(
8288                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8289                 VALUES ('p1', 'excise', 'lg1', 'src-1', 100)",
8290                [],
8291            )
8292            .expect("insert p1");
8293            conn.execute(
8294                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8295                 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 100)",
8296                [],
8297            )
8298            .expect("insert p2");
8299            conn.execute(
8300                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8301                 VALUES ('p3', 'node_insert', 'lg3', 'src-1', 100)",
8302                [],
8303            )
8304            .expect("insert p3");
8305        }
8306
8307        let options = super::ProvenancePurgeOptions {
8308            dry_run: false,
8309            preserve_event_types: Vec::new(),
8310        };
8311        let report = service
8312            .purge_provenance_events(500, &options)
8313            .expect("purge");
8314
8315        assert_eq!(report.events_deleted, 2);
8316        assert_eq!(report.events_preserved, 1);
8317
8318        let conn = sqlite::open_connection(db.path()).expect("conn");
8319        let remaining_type: String = conn
8320            .query_row("SELECT event_type FROM provenance_events", [], |row| {
8321                row.get(0)
8322            })
8323            .expect("remaining event type");
8324        assert_eq!(remaining_type, "excise");
8325    }
8326
8327    #[test]
8328    fn provenance_purge_noop_with_zero_timestamp() {
8329        let (db, service) = setup();
8330        {
8331            let conn = sqlite::open_connection(db.path()).expect("conn");
8332            conn.execute(
8333                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8334                 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
8335                [],
8336            )
8337            .expect("insert p1");
8338        }
8339
8340        let options = super::ProvenancePurgeOptions {
8341            dry_run: false,
8342            preserve_event_types: Vec::new(),
8343        };
8344        let report = service.purge_provenance_events(0, &options).expect("purge");
8345
8346        assert_eq!(report.events_deleted, 0);
8347        assert_eq!(report.events_preserved, 1);
8348        assert_eq!(report.oldest_remaining, Some(100));
8349    }
8350
8351    #[test]
8352    fn restore_skips_edge_when_counterpart_purged() {
8353        let (db, service) = setup();
8354        {
8355            let conn = sqlite::open_connection(db.path()).expect("conn");
8356            // Create node A (doc-1) and node B (doc-2)
8357            conn.execute(
8358                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8359                 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
8360                [],
8361            )
8362            .expect("insert node A");
8363            conn.execute(
8364                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8365                 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
8366                [],
8367            )
8368            .expect("insert node B");
8369            // Create edge between A and B
8370            conn.execute(
8371                "INSERT INTO edges \
8372                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
8373                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
8374                [],
8375            )
8376            .expect("insert edge");
8377            // Retire both A and B, and the edge
8378            conn.execute(
8379                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8380                 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
8381                [],
8382            )
8383            .expect("insert retire event A");
8384            conn.execute(
8385                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8386                 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
8387                [],
8388            )
8389            .expect("insert edge retire event");
8390            conn.execute(
8391                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
8392                [],
8393            )
8394            .expect("retire node A");
8395            conn.execute(
8396                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-2'",
8397                [],
8398            )
8399            .expect("retire node B");
8400            conn.execute(
8401                "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
8402                [],
8403            )
8404            .expect("retire edge");
8405            // Simulate purge of B: delete node rows but leave the edge intact
8406            // to reproduce the dangling-edge scenario the validation guards against.
8407            conn.execute("DELETE FROM nodes WHERE logical_id = 'doc-2'", [])
8408                .expect("purge node B rows");
8409        }
8410
8411        // Restore A — the edge should be skipped because B has no active node
8412        let report = service.restore_logical_id("doc-1").expect("restore A");
8413        assert!(!report.was_noop);
8414        assert_eq!(report.restored_node_rows, 1);
8415        assert_eq!(report.restored_edge_rows, 0, "edge should not be restored");
8416        assert_eq!(report.skipped_edges.len(), 1);
8417        assert_eq!(report.skipped_edges[0].edge_logical_id, "edge-1");
8418        assert_eq!(report.skipped_edges[0].missing_endpoint, "doc-2");
8419
8420        // Verify the edge is still retired in the database
8421        let conn = sqlite::open_connection(db.path()).expect("conn");
8422        let active_edge_count: i64 = conn
8423            .query_row(
8424                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
8425                [],
8426                |row| row.get(0),
8427            )
8428            .expect("active edge count");
8429        assert_eq!(active_edge_count, 0, "edge must remain retired");
8430    }
8431
8432    #[test]
8433    fn restore_restores_edges_to_active_nodes() {
8434        let (db, service) = setup();
8435        {
8436            let conn = sqlite::open_connection(db.path()).expect("conn");
8437            // Create node A and node B (B stays active)
8438            conn.execute(
8439                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8440                 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
8441                [],
8442            )
8443            .expect("insert node A");
8444            conn.execute(
8445                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8446                 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
8447                [],
8448            )
8449            .expect("insert node B");
8450            // Create edge between A and B
8451            conn.execute(
8452                "INSERT INTO edges \
8453                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
8454                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
8455                [],
8456            )
8457            .expect("insert edge");
8458            // Retire only A
8459            conn.execute(
8460                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8461                 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
8462                [],
8463            )
8464            .expect("insert retire event A");
8465            conn.execute(
8466                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8467                 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
8468                [],
8469            )
8470            .expect("insert edge retire event");
8471            conn.execute(
8472                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
8473                [],
8474            )
8475            .expect("retire node A");
8476            conn.execute(
8477                "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
8478                [],
8479            )
8480            .expect("retire edge");
8481        }
8482
8483        // Restore A — B is active, so the edge should be restored normally
8484        let report = service.restore_logical_id("doc-1").expect("restore A");
8485        assert!(!report.was_noop);
8486        assert_eq!(report.restored_node_rows, 1);
8487        assert!(report.restored_edge_rows > 0, "edge should be restored");
8488        assert!(
8489            report.skipped_edges.is_empty(),
8490            "no edges should be skipped"
8491        );
8492
8493        let conn = sqlite::open_connection(db.path()).expect("conn");
8494        let active_edge_count: i64 = conn
8495            .query_row(
8496                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
8497                [],
8498                |row| row.get(0),
8499            )
8500            .expect("active edge count");
8501        assert_eq!(active_edge_count, 1, "edge must be active");
8502    }
8503
8504    #[test]
8505    fn restore_restores_edges_when_both_restored() {
8506        let (db, service) = setup();
8507        {
8508            let conn = sqlite::open_connection(db.path()).expect("conn");
8509            // Create node A and node B
8510            conn.execute(
8511                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8512                 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
8513                [],
8514            )
8515            .expect("insert node A");
8516            conn.execute(
8517                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8518                 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
8519                [],
8520            )
8521            .expect("insert node B");
8522            // Create edge between A and B
8523            conn.execute(
8524                "INSERT INTO edges \
8525                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
8526                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
8527                [],
8528            )
8529            .expect("insert edge");
8530            // Retire both A and B
8531            conn.execute(
8532                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8533                 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
8534                [],
8535            )
8536            .expect("insert retire event A");
8537            conn.execute(
8538                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8539                 VALUES ('evt-retire-b', 'node_retire', 'doc-2', 'forget-1', 200, '')",
8540                [],
8541            )
8542            .expect("insert retire event B");
8543            conn.execute(
8544                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8545                 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
8546                [],
8547            )
8548            .expect("insert edge retire event");
8549            conn.execute(
8550                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
8551                [],
8552            )
8553            .expect("retire node A");
8554            conn.execute(
8555                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-2'",
8556                [],
8557            )
8558            .expect("retire node B");
8559            conn.execute(
8560                "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
8561                [],
8562            )
8563            .expect("retire edge");
8564        }
8565
8566        // Restore B first — edge is skipped because A is still retired
8567        let report_b = service.restore_logical_id("doc-2").expect("restore B");
8568        assert!(!report_b.was_noop);
8569
8570        // Restore A — B is now active, so the edge should be restored
8571        let report_a = service.restore_logical_id("doc-1").expect("restore A");
8572        assert!(!report_a.was_noop);
8573        assert_eq!(report_a.restored_node_rows, 1);
8574        assert!(
8575            report_a.restored_edge_rows > 0,
8576            "edge should be restored when both endpoints active"
8577        );
8578        assert!(
8579            report_a.skipped_edges.is_empty(),
8580            "no edges should be skipped"
8581        );
8582
8583        let conn = sqlite::open_connection(db.path()).expect("conn");
8584        let active_edge_count: i64 = conn
8585            .query_row(
8586                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
8587                [],
8588                |row| row.get(0),
8589            )
8590            .expect("active edge count");
8591        assert_eq!(
8592            active_edge_count, 1,
8593            "edge must be active after both endpoints restored"
8594        );
8595    }
8596}