Skip to main content

fathomdb_engine/
admin.rs

1use std::fmt::Write as _;
2use std::fs;
3use std::io;
4use std::path::{Path, PathBuf};
5use std::sync::Arc;
6use std::sync::mpsc::SyncSender;
7use std::time::SystemTime;
8
9use fathomdb_schema::{SchemaError, SchemaManager};
10use rusqlite::{DatabaseName, OptionalExtension, TransactionBehavior};
11use serde::{Deserialize, Serialize};
12use sha2::{Digest, Sha256};
13
14use crate::rebuild_actor::{RebuildMode, RebuildRequest, RebuildStateRow};
15
16use crate::{
17    EngineError, ProjectionRepairReport, ProjectionService,
18    embedder::{QueryEmbedder, QueryEmbedderIdentity},
19    ids::new_id,
20    operational::{
21        OperationalCollectionKind, OperationalCollectionRecord, OperationalCompactionReport,
22        OperationalCurrentRow, OperationalFilterClause, OperationalFilterField,
23        OperationalFilterFieldType, OperationalFilterMode, OperationalFilterValue,
24        OperationalHistoryValidationIssue, OperationalHistoryValidationReport,
25        OperationalMutationRow, OperationalPurgeReport, OperationalReadReport,
26        OperationalReadRequest, OperationalRegisterRequest, OperationalRepairReport,
27        OperationalRetentionActionKind, OperationalRetentionPlanItem,
28        OperationalRetentionPlanReport, OperationalRetentionRunItem, OperationalRetentionRunReport,
29        OperationalSecondaryIndexDefinition, OperationalSecondaryIndexRebuildReport,
30        OperationalTraceReport, extract_secondary_index_entries_for_current,
31        extract_secondary_index_entries_for_mutation, parse_operational_secondary_indexes_json,
32        parse_operational_validation_contract, validate_operational_payload_against_contract,
33    },
34    projection::ProjectionTarget,
35    sqlite,
36};
37
38/// Results of a physical and structural integrity check on the database.
39#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
40pub struct IntegrityReport {
41    pub physical_ok: bool,
42    pub foreign_keys_ok: bool,
43    pub missing_fts_rows: usize,
44    pub missing_property_fts_rows: usize,
45    pub duplicate_active_logical_ids: usize,
46    pub operational_missing_collections: usize,
47    pub operational_missing_last_mutations: usize,
48    pub warnings: Vec<String>,
49}
50
51/// A registered FTS property projection schema for a node kind.
52#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
53pub struct FtsPropertySchemaRecord {
54    /// The node kind this schema applies to.
55    pub kind: String,
56    /// Flat display list of registered JSON property paths
57    /// (e.g. `["$.name", "$.title"]`). For recursive entries this lists
58    /// only the root path; mode information is carried by
59    /// [`Self::entries`].
60    pub property_paths: Vec<String>,
61    /// Full per-entry schema shape with mode
62    /// ([`FtsPropertyPathMode::Scalar`] | [`FtsPropertyPathMode::Recursive`]).
63    /// Read this field for mode-accurate round-trip of the registered
64    /// schema.
65    pub entries: Vec<FtsPropertyPathSpec>,
66    /// Subtree paths excluded from recursive walks. Empty for
67    /// scalar-only schemas or recursive schemas with no exclusions.
68    pub exclude_paths: Vec<String>,
69    /// Separator used when concatenating extracted values.
70    pub separator: String,
71    /// Schema format version.
72    pub format_version: i64,
73}
74
75/// Extraction mode for a single registered FTS property path.
76#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize)]
77#[serde(rename_all = "snake_case")]
78pub enum FtsPropertyPathMode {
79    /// Resolve the path and append the scalar value(s). Matches legacy
80    /// pre-Phase-4 behaviour.
81    #[default]
82    Scalar,
83    /// Recursively walk every scalar leaf rooted at the path. Each leaf
84    /// contributes one entry to the position map.
85    Recursive,
86}
87
88/// A single registered property-FTS path with its extraction mode.
89#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
90pub struct FtsPropertyPathSpec {
91    /// JSON path to the property (must start with `$.`).
92    pub path: String,
93    /// Whether to treat this path as a scalar or recursively walk it.
94    pub mode: FtsPropertyPathMode,
95}
96
97impl FtsPropertyPathSpec {
98    #[must_use]
99    pub fn scalar(path: impl Into<String>) -> Self {
100        Self {
101            path: path.into(),
102            mode: FtsPropertyPathMode::Scalar,
103        }
104    }
105
106    #[must_use]
107    pub fn recursive(path: impl Into<String>) -> Self {
108        Self {
109            path: path.into(),
110            mode: FtsPropertyPathMode::Recursive,
111        }
112    }
113}
114
115/// Options controlling how a safe database export is performed.
116#[derive(Clone, Copy, Debug)]
117pub struct SafeExportOptions {
118    /// When true, runs `PRAGMA wal_checkpoint(FULL)` before copying and fails if
119    /// any WAL frames could not be applied (busy != 0). Set to false only in
120    /// tests that seed a database without WAL mode.
121    pub force_checkpoint: bool,
122}
123
124impl Default for SafeExportOptions {
125    fn default() -> Self {
126        Self {
127            force_checkpoint: true,
128        }
129    }
130}
131
132// Must match PROTOCOL_VERSION in fathomdb-admin-bridge.rs
133const EXPORT_PROTOCOL_VERSION: u32 = 1;
134
135/// Manifest describing a completed safe export.
136#[derive(Clone, Debug, Serialize)]
137pub struct SafeExportManifest {
138    /// Unix timestamp (seconds since epoch) when the export was created.
139    pub exported_at: u64,
140    /// SHA-256 hex digest of the exported database file.
141    pub sha256: String,
142    /// Schema version recorded in `fathom_schema_migrations` at export time.
143    pub schema_version: u32,
144    /// Bridge protocol version compiled into this binary.
145    pub protocol_version: u32,
146    /// Number of `SQLite` pages in the exported database file.
147    pub page_count: u64,
148}
149
150/// Report from tracing all rows associated with a given `source_ref`.
151#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
152pub struct TraceReport {
153    pub source_ref: String,
154    pub node_rows: usize,
155    pub edge_rows: usize,
156    pub action_rows: usize,
157    pub operational_mutation_rows: usize,
158    pub node_logical_ids: Vec<String>,
159    pub action_ids: Vec<String>,
160    pub operational_mutation_ids: Vec<String>,
161}
162
163/// An edge that was skipped during a restore because an endpoint is missing.
164#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
165pub struct SkippedEdge {
166    pub edge_logical_id: String,
167    pub missing_endpoint: String,
168}
169
170/// Report from restoring a retired logical ID back to active state.
171#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
172pub struct LogicalRestoreReport {
173    pub logical_id: String,
174    pub was_noop: bool,
175    pub restored_node_rows: usize,
176    pub restored_edge_rows: usize,
177    pub restored_chunk_rows: usize,
178    pub restored_fts_rows: usize,
179    pub restored_property_fts_rows: usize,
180    pub restored_vec_rows: usize,
181    pub skipped_edges: Vec<SkippedEdge>,
182    pub notes: Vec<String>,
183}
184
185/// Report from permanently purging all rows for a logical ID.
186#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
187pub struct LogicalPurgeReport {
188    pub logical_id: String,
189    pub was_noop: bool,
190    pub deleted_node_rows: usize,
191    pub deleted_edge_rows: usize,
192    pub deleted_chunk_rows: usize,
193    pub deleted_fts_rows: usize,
194    pub deleted_vec_rows: usize,
195    pub notes: Vec<String>,
196}
197
198/// Options controlling provenance event purging behavior.
199#[derive(Clone, Debug, Serialize, Deserialize)]
200pub struct ProvenancePurgeOptions {
201    pub dry_run: bool,
202    #[serde(default)]
203    pub preserve_event_types: Vec<String>,
204}
205
206/// Report from a provenance event purge operation.
207#[derive(Clone, Debug, Serialize)]
208pub struct ProvenancePurgeReport {
209    pub events_deleted: u64,
210    pub events_preserved: u64,
211    pub oldest_remaining: Option<i64>,
212}
213
214/// Service providing administrative operations (integrity checks, exports, restores, purges).
215#[derive(Debug)]
216pub struct AdminService {
217    database_path: PathBuf,
218    schema_manager: Arc<SchemaManager>,
219    projections: ProjectionService,
220    /// Sender side of the rebuild actor's channel.  `None` when the engine
221    /// was opened without a rebuild actor (e.g. in tests that use
222    /// [`AdminService::new`] directly).
223    rebuild_sender: Option<SyncSender<RebuildRequest>>,
224}
225
226/// Results of a semantic consistency check on the graph data.
227#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
228pub struct SemanticReport {
229    /// Chunks whose `node_logical_id` has no active node.
230    pub orphaned_chunks: usize,
231    /// Active nodes with a NULL `source_ref` (loss of provenance).
232    pub null_source_ref_nodes: usize,
233    /// Steps referencing a `run_id` that does not exist in the runs table.
234    pub broken_step_fk: usize,
235    /// Actions referencing a `step_id` that does not exist in the steps table.
236    pub broken_action_fk: usize,
237    /// FTS rows whose `chunk_id` does not exist in the chunks table.
238    pub stale_fts_rows: usize,
239    /// FTS rows whose node has been superseded (`superseded_at` IS NOT NULL on all active rows).
240    pub fts_rows_for_superseded_nodes: usize,
241    /// Property FTS rows whose node has been superseded or does not exist.
242    pub stale_property_fts_rows: usize,
243    /// Property FTS rows whose kind has no registered FTS property schema.
244    pub orphaned_property_fts_rows: usize,
245    /// Property FTS rows whose `kind` does not match the active node's actual kind.
246    pub mismatched_kind_property_fts_rows: usize,
247    /// Active logical IDs with more than one `fts_node_properties` row.
248    pub duplicate_property_fts_rows: usize,
249    /// Property FTS rows whose `text_content` no longer matches the canonical extraction.
250    pub drifted_property_fts_rows: usize,
251    /// Active edges where at least one endpoint has no active node.
252    pub dangling_edges: usize,
253    /// `logical_ids` where every version has been superseded (no active row).
254    pub orphaned_supersession_chains: usize,
255    /// Vec rows whose backing chunk no longer exists in the chunks table.
256    pub stale_vec_rows: usize,
257    /// Compatibility counter for vec rows whose chunk points at missing node history.
258    pub vec_rows_for_superseded_nodes: usize,
259    /// Latest-state keys whose latest mutation is a `put` but no current row exists.
260    pub missing_operational_current_rows: usize,
261    /// Current rows that do not match the latest mutation state.
262    pub stale_operational_current_rows: usize,
263    /// Mutations written after the owning collection was disabled.
264    pub disabled_collection_mutations: usize,
265    /// Access metadata rows whose `logical_id` no longer has any node history.
266    pub orphaned_last_access_metadata_rows: usize,
267    pub warnings: Vec<String>,
268}
269
270/// Configuration for regenerating vector embeddings.
271///
272/// 0.4.0 architectural invariant: vector identity is the embedder's
273/// responsibility, not the regeneration config's. This struct carries only
274/// WHERE the vectors live and HOW to chunk/preprocess them — never WHAT
275/// model produced them. The embedder supplied at regen-call time is the
276/// single source of truth for `model_identity`, `model_version`,
277/// `dimension`, and `normalization_policy`; the resulting vector profile
278/// is stamped directly from [`QueryEmbedder::identity`].
279#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
280#[serde(rename_all = "snake_case", deny_unknown_fields)]
281pub struct VectorRegenerationConfig {
282    pub profile: String,
283    pub table_name: String,
284    pub chunking_policy: String,
285    pub preprocessing_policy: String,
286}
287
288/// Report from a vector embedding regeneration run.
289#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
290pub struct VectorRegenerationReport {
291    pub profile: String,
292    pub table_name: String,
293    pub dimension: usize,
294    pub total_chunks: usize,
295    pub regenerated_rows: usize,
296    pub contract_persisted: bool,
297    pub notes: Vec<String>,
298}
299
300const CURRENT_VECTOR_CONTRACT_FORMAT_VERSION: i64 = 1;
301const MAX_PROFILE_LEN: usize = 128;
302const MAX_POLICY_LEN: usize = 128;
303const MAX_CONTRACT_JSON_BYTES: usize = 32 * 1024;
304const MAX_AUDIT_METADATA_BYTES: usize = 2048;
305const DEFAULT_OPERATIONAL_READ_LIMIT: usize = 100;
306const MAX_OPERATIONAL_READ_LIMIT: usize = 1000;
307
308/// Thread-safe handle to the shared [`AdminService`].
309#[derive(Clone, Debug)]
310pub struct AdminHandle {
311    inner: Arc<AdminService>,
312}
313
314impl AdminHandle {
315    /// Wrap an [`AdminService`] in a shared handle.
316    #[must_use]
317    pub fn new(service: AdminService) -> Self {
318        Self {
319            inner: Arc::new(service),
320        }
321    }
322
323    /// Clone the inner `Arc` to the [`AdminService`].
324    #[must_use]
325    pub fn service(&self) -> Arc<AdminService> {
326        Arc::clone(&self.inner)
327    }
328}
329
330impl AdminService {
331    /// Create a new admin service for the database at the given path.
332    #[must_use]
333    pub fn new(path: impl AsRef<Path>, schema_manager: Arc<SchemaManager>) -> Self {
334        let database_path = path.as_ref().to_path_buf();
335        let projections = ProjectionService::new(&database_path, Arc::clone(&schema_manager));
336        Self {
337            database_path,
338            schema_manager,
339            projections,
340            rebuild_sender: None,
341        }
342    }
343
344    /// Create a new admin service wired to the background rebuild actor.
345    #[must_use]
346    pub fn new_with_rebuild(
347        path: impl AsRef<Path>,
348        schema_manager: Arc<SchemaManager>,
349        rebuild_sender: SyncSender<RebuildRequest>,
350    ) -> Self {
351        let database_path = path.as_ref().to_path_buf();
352        let projections = ProjectionService::new(&database_path, Arc::clone(&schema_manager));
353        Self {
354            database_path,
355            schema_manager,
356            projections,
357            rebuild_sender: Some(rebuild_sender),
358        }
359    }
360
361    fn connect(&self) -> Result<rusqlite::Connection, EngineError> {
362        #[cfg(feature = "sqlite-vec")]
363        let conn = sqlite::open_connection_with_vec(&self.database_path)?;
364        #[cfg(not(feature = "sqlite-vec"))]
365        let conn = sqlite::open_connection(&self.database_path)?;
366        self.schema_manager.bootstrap(&conn)?;
367        Ok(conn)
368    }
369
370    /// # Errors
371    /// Returns [`EngineError`] if the database connection fails or any SQL query fails.
372    pub fn check_integrity(&self) -> Result<IntegrityReport, EngineError> {
373        let conn = self.connect()?;
374
375        let physical_result: String =
376            conn.query_row("PRAGMA integrity_check", [], |row| row.get(0))?;
377        let foreign_key_count: i64 =
378            conn.query_row("SELECT count(*) FROM pragma_foreign_key_check", [], |row| {
379                row.get(0)
380            })?;
381        let missing_fts_rows: i64 = conn.query_row(
382            r"
383            SELECT count(*)
384            FROM chunks c
385            JOIN nodes n
386              ON n.logical_id = c.node_logical_id
387             AND n.superseded_at IS NULL
388            WHERE NOT EXISTS (
389                SELECT 1
390                FROM fts_nodes f
391                WHERE f.chunk_id = c.id
392            )
393            ",
394            [],
395            |row| row.get(0),
396        )?;
397        let duplicate_active: i64 = conn.query_row(
398            r"
399            SELECT count(*)
400            FROM (
401                SELECT logical_id
402                FROM nodes
403                WHERE superseded_at IS NULL
404                GROUP BY logical_id
405                HAVING count(*) > 1
406            )
407            ",
408            [],
409            |row| row.get(0),
410        )?;
411        let operational_missing_collections: i64 = conn.query_row(
412            r"
413            SELECT (
414                SELECT count(*)
415                FROM operational_mutations m
416                LEFT JOIN operational_collections c ON c.name = m.collection_name
417                WHERE c.name IS NULL
418            ) + (
419                SELECT count(*)
420                FROM operational_current oc
421                LEFT JOIN operational_collections c ON c.name = oc.collection_name
422                WHERE c.name IS NULL
423            )
424            ",
425            [],
426            |row| row.get(0),
427        )?;
428        let operational_missing_last_mutations: i64 = conn.query_row(
429            r"
430            SELECT count(*)
431            FROM operational_current oc
432            LEFT JOIN operational_mutations m ON m.id = oc.last_mutation_id
433            WHERE m.id IS NULL
434            ",
435            [],
436            |row| row.get(0),
437        )?;
438
439        // Count missing property FTS rows using the same extraction logic as
440        // write/rebuild. A pure-SQL check would overcount: nodes whose declared
441        // paths legitimately normalize to no values correctly have no row.
442        let missing_property_fts_rows = count_missing_property_fts_rows(&conn)?;
443
444        let mut warnings = Vec::new();
445        if missing_fts_rows > 0 {
446            warnings.push("missing FTS projections detected".to_owned());
447        }
448        if missing_property_fts_rows > 0 {
449            warnings.push("missing property FTS projections detected".to_owned());
450        }
451        if duplicate_active > 0 {
452            warnings.push("duplicate active logical_ids detected".to_owned());
453        }
454        if operational_missing_collections > 0 {
455            warnings.push("operational rows reference missing collections".to_owned());
456        }
457        if operational_missing_last_mutations > 0 {
458            warnings.push("operational current rows reference missing last mutations".to_owned());
459        }
460
461        // FIX(review): was `as usize` — unsound on 32-bit targets, wraps negatives silently.
462        // Options: (A) try_from().unwrap_or(0) — masks corruption, (B) try_from().expect() —
463        // panics on corruption, (C) propagate error. Chose (B) here: a negative count(*)
464        // signals data corruption, and the integrity report would be meaningless anyway.
465        Ok(IntegrityReport {
466            physical_ok: physical_result == "ok",
467            foreign_keys_ok: foreign_key_count == 0,
468            missing_fts_rows: i64_to_usize(missing_fts_rows),
469            missing_property_fts_rows: i64_to_usize(missing_property_fts_rows),
470            duplicate_active_logical_ids: i64_to_usize(duplicate_active),
471            operational_missing_collections: i64_to_usize(operational_missing_collections),
472            operational_missing_last_mutations: i64_to_usize(operational_missing_last_mutations),
473            warnings,
474        })
475    }
476
477    /// # Errors
478    /// Returns [`EngineError`] if the database connection fails or any SQL query fails.
479    #[allow(clippy::too_many_lines)]
480    pub fn check_semantics(&self) -> Result<SemanticReport, EngineError> {
481        let conn = self.connect()?;
482
483        let orphaned_chunks: i64 = conn.query_row(
484            r"
485            SELECT count(*)
486            FROM chunks c
487            WHERE NOT EXISTS (
488                SELECT 1 FROM nodes n
489                WHERE n.logical_id = c.node_logical_id
490            )
491            ",
492            [],
493            |row| row.get(0),
494        )?;
495
496        let null_source_ref_nodes: i64 = conn.query_row(
497            "SELECT count(*) FROM nodes WHERE source_ref IS NULL AND superseded_at IS NULL",
498            [],
499            |row| row.get(0),
500        )?;
501
502        let broken_step_fk: i64 = conn.query_row(
503            r"
504            SELECT count(*) FROM steps s
505            WHERE NOT EXISTS (SELECT 1 FROM runs r WHERE r.id = s.run_id)
506            ",
507            [],
508            |row| row.get(0),
509        )?;
510
511        let broken_action_fk: i64 = conn.query_row(
512            r"
513            SELECT count(*) FROM actions a
514            WHERE NOT EXISTS (SELECT 1 FROM steps s WHERE s.id = a.step_id)
515            ",
516            [],
517            |row| row.get(0),
518        )?;
519
520        let stale_fts_rows: i64 = conn.query_row(
521            r"
522            SELECT count(*) FROM fts_nodes f
523            WHERE NOT EXISTS (SELECT 1 FROM chunks c WHERE c.id = f.chunk_id)
524            ",
525            [],
526            |row| row.get(0),
527        )?;
528
529        let fts_rows_for_superseded_nodes: i64 = conn.query_row(
530            r"
531            SELECT count(*) FROM fts_nodes f
532            WHERE NOT EXISTS (
533                SELECT 1 FROM nodes n
534                WHERE n.logical_id = f.node_logical_id AND n.superseded_at IS NULL
535            )
536            ",
537            [],
538            |row| row.get(0),
539        )?;
540
541        let stale_property_fts_rows: i64 = conn.query_row(
542            r"
543            SELECT count(*) FROM fts_node_properties fp
544            WHERE NOT EXISTS (
545                SELECT 1 FROM nodes n
546                WHERE n.logical_id = fp.node_logical_id AND n.superseded_at IS NULL
547            )
548            ",
549            [],
550            |row| row.get(0),
551        )?;
552
553        let orphaned_property_fts_rows: i64 = conn.query_row(
554            r"
555            SELECT count(*) FROM fts_node_properties fp
556            WHERE NOT EXISTS (
557                SELECT 1 FROM fts_property_schemas s WHERE s.kind = fp.kind
558            )
559            ",
560            [],
561            |row| row.get(0),
562        )?;
563
564        let mismatched_kind_property_fts_rows: i64 = conn.query_row(
565            r"
566            SELECT count(*) FROM fts_node_properties fp
567            JOIN nodes n ON n.logical_id = fp.node_logical_id AND n.superseded_at IS NULL
568            WHERE n.kind != fp.kind
569            ",
570            [],
571            |row| row.get(0),
572        )?;
573
574        let duplicate_property_fts_rows: i64 = conn.query_row(
575            r"
576            SELECT count(*) FROM (
577                SELECT node_logical_id FROM fts_node_properties
578                GROUP BY node_logical_id
579                HAVING count(*) > 1
580            )
581            ",
582            [],
583            |row| row.get(0),
584        )?;
585
586        let drifted_property_fts_rows = count_drifted_property_fts_rows(&conn)?;
587
588        let dangling_edges: i64 = conn.query_row(
589            r"
590            SELECT count(*) FROM edges e
591            WHERE e.superseded_at IS NULL AND (
592                NOT EXISTS (SELECT 1 FROM nodes n WHERE n.logical_id = e.source_logical_id AND n.superseded_at IS NULL)
593                OR
594                NOT EXISTS (SELECT 1 FROM nodes n WHERE n.logical_id = e.target_logical_id AND n.superseded_at IS NULL)
595            )
596            ",
597            [],
598            |row| row.get(0),
599        )?;
600
601        let orphaned_supersession_chains: i64 = conn.query_row(
602            r"
603            SELECT count(*) FROM (
604                SELECT logical_id FROM nodes
605                GROUP BY logical_id
606                HAVING count(*) > 0 AND sum(CASE WHEN superseded_at IS NULL THEN 1 ELSE 0 END) = 0
607            )
608            ",
609            [],
610            |row| row.get(0),
611        )?;
612
613        // Vec stale row detection — degrades to 0 when the vec profile is absent.
614        #[cfg(feature = "sqlite-vec")]
615        let stale_vec_rows: i64 = match conn.query_row(
616            r"
617            SELECT count(*) FROM vec_nodes_active v
618            WHERE NOT EXISTS (SELECT 1 FROM chunks c WHERE c.id = v.chunk_id)
619            ",
620            [],
621            |row| row.get(0),
622        ) {
623            Ok(n) => n,
624            Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
625                if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
626            {
627                0
628            }
629            Err(e) => return Err(EngineError::Sqlite(e)),
630        };
631        #[cfg(not(feature = "sqlite-vec"))]
632        let stale_vec_rows: i64 = 0;
633
634        #[cfg(feature = "sqlite-vec")]
635        let vec_rows_for_superseded_nodes: i64 = match conn.query_row(
636            r"
637            SELECT count(*) FROM vec_nodes_active v
638            JOIN chunks c ON c.id = v.chunk_id
639            WHERE NOT EXISTS (
640                SELECT 1 FROM nodes n
641                WHERE n.logical_id = c.node_logical_id
642            )
643            ",
644            [],
645            |row| row.get(0),
646        ) {
647            Ok(n) => n,
648            Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
649                if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
650            {
651                0
652            }
653            Err(e) => return Err(EngineError::Sqlite(e)),
654        };
655        #[cfg(not(feature = "sqlite-vec"))]
656        let vec_rows_for_superseded_nodes: i64 = 0;
657        let missing_operational_current_rows: i64 = conn.query_row(
658            r"
659            SELECT count(*)
660            FROM operational_mutations m
661            JOIN operational_collections c
662              ON c.name = m.collection_name
663             AND c.kind = 'latest_state'
664            WHERE m.op_kind = 'put'
665              AND NOT EXISTS (
666                    SELECT 1
667                    FROM operational_mutations newer
668                    WHERE newer.collection_name = m.collection_name
669                      AND newer.record_key = m.record_key
670                      AND newer.mutation_order > m.mutation_order
671                )
672              AND NOT EXISTS (
673                    SELECT 1
674                    FROM operational_current oc
675                    WHERE oc.collection_name = m.collection_name
676                      AND oc.record_key = m.record_key
677                )
678            ",
679            [],
680            |row| row.get(0),
681        )?;
682        let stale_operational_current_rows: i64 = conn.query_row(
683            r"
684            SELECT count(*)
685            FROM operational_current oc
686            JOIN operational_collections c
687              ON c.name = oc.collection_name
688             AND c.kind = 'latest_state'
689            LEFT JOIN operational_mutations m ON m.id = oc.last_mutation_id
690            WHERE m.id IS NULL
691               OR m.collection_name != oc.collection_name
692               OR m.record_key != oc.record_key
693               OR m.op_kind != 'put'
694               OR m.payload_json != oc.payload_json
695               OR EXISTS (
696                    SELECT 1
697                    FROM operational_mutations newer
698                    WHERE newer.collection_name = oc.collection_name
699                      AND newer.record_key = oc.record_key
700                      AND newer.mutation_order > m.mutation_order
701                )
702            ",
703            [],
704            |row| row.get(0),
705        )?;
706        let disabled_collection_mutations: i64 = conn.query_row(
707            r"
708            SELECT count(*)
709            FROM operational_mutations m
710            JOIN operational_collections c ON c.name = m.collection_name
711            WHERE c.disabled_at IS NOT NULL AND m.created_at > c.disabled_at
712            ",
713            [],
714            |row| row.get(0),
715        )?;
716        let orphaned_last_access_metadata_rows: i64 = conn.query_row(
717            r"
718            SELECT count(*)
719            FROM node_access_metadata am
720            WHERE NOT EXISTS (
721                SELECT 1 FROM nodes n WHERE n.logical_id = am.logical_id
722            )
723            ",
724            [],
725            |row| row.get(0),
726        )?;
727
728        let mut warnings = Vec::new();
729        if orphaned_chunks > 0 {
730            warnings.push(format!(
731                "{orphaned_chunks} orphaned chunk(s) with no surviving node history"
732            ));
733        }
734        if null_source_ref_nodes > 0 {
735            warnings.push(format!(
736                "{null_source_ref_nodes} active node(s) with null source_ref"
737            ));
738        }
739        if broken_step_fk > 0 {
740            warnings.push(format!(
741                "{broken_step_fk} step(s) referencing non-existent run"
742            ));
743        }
744        if broken_action_fk > 0 {
745            warnings.push(format!(
746                "{broken_action_fk} action(s) referencing non-existent step"
747            ));
748        }
749        if stale_fts_rows > 0 {
750            warnings.push(format!(
751                "{stale_fts_rows} stale FTS row(s) referencing missing chunk"
752            ));
753        }
754        if fts_rows_for_superseded_nodes > 0 {
755            warnings.push(format!(
756                "{fts_rows_for_superseded_nodes} FTS row(s) for superseded node(s)"
757            ));
758        }
759        if stale_property_fts_rows > 0 {
760            warnings.push(format!(
761                "{stale_property_fts_rows} stale property FTS row(s) for superseded/missing node(s)"
762            ));
763        }
764        if orphaned_property_fts_rows > 0 {
765            warnings.push(format!(
766                "{orphaned_property_fts_rows} orphaned property FTS row(s) for unregistered kind(s)"
767            ));
768        }
769        if mismatched_kind_property_fts_rows > 0 {
770            warnings.push(format!(
771                "{mismatched_kind_property_fts_rows} property FTS row(s) whose kind does not match the active node"
772            ));
773        }
774        if duplicate_property_fts_rows > 0 {
775            warnings.push(format!(
776                "{duplicate_property_fts_rows} active logical ID(s) with duplicate property FTS rows"
777            ));
778        }
779        if drifted_property_fts_rows > 0 {
780            warnings.push(format!(
781                "{drifted_property_fts_rows} property FTS row(s) with stale text_content"
782            ));
783        }
784        if dangling_edges > 0 {
785            warnings.push(format!(
786                "{dangling_edges} active edge(s) with missing endpoint node"
787            ));
788        }
789        if orphaned_supersession_chains > 0 {
790            warnings.push(format!(
791                "{orphaned_supersession_chains} logical_id(s) with all versions superseded"
792            ));
793        }
794        if stale_vec_rows > 0 {
795            warnings.push(format!(
796                "{stale_vec_rows} stale vec row(s) referencing missing chunk"
797            ));
798        }
799        if vec_rows_for_superseded_nodes > 0 {
800            warnings.push(format!(
801                "{vec_rows_for_superseded_nodes} vec row(s) whose node history is missing"
802            ));
803        }
804        if missing_operational_current_rows > 0 {
805            warnings.push(format!(
806                "{missing_operational_current_rows} latest-state key(s) missing operational_current rows"
807            ));
808        }
809        if stale_operational_current_rows > 0 {
810            warnings.push(format!(
811                "{stale_operational_current_rows} stale operational_current row(s)"
812            ));
813        }
814        if disabled_collection_mutations > 0 {
815            warnings.push(format!(
816                "{disabled_collection_mutations} mutation(s) were written after collection disable"
817            ));
818        }
819        if orphaned_last_access_metadata_rows > 0 {
820            warnings.push(format!(
821                "{orphaned_last_access_metadata_rows} last_access metadata row(s) reference missing node history"
822            ));
823        }
824
825        Ok(SemanticReport {
826            orphaned_chunks: i64_to_usize(orphaned_chunks),
827            null_source_ref_nodes: i64_to_usize(null_source_ref_nodes),
828            broken_step_fk: i64_to_usize(broken_step_fk),
829            broken_action_fk: i64_to_usize(broken_action_fk),
830            stale_fts_rows: i64_to_usize(stale_fts_rows),
831            fts_rows_for_superseded_nodes: i64_to_usize(fts_rows_for_superseded_nodes),
832            stale_property_fts_rows: i64_to_usize(stale_property_fts_rows),
833            orphaned_property_fts_rows: i64_to_usize(orphaned_property_fts_rows),
834            mismatched_kind_property_fts_rows: i64_to_usize(mismatched_kind_property_fts_rows),
835            duplicate_property_fts_rows: i64_to_usize(duplicate_property_fts_rows),
836            drifted_property_fts_rows: i64_to_usize(drifted_property_fts_rows),
837            dangling_edges: i64_to_usize(dangling_edges),
838            orphaned_supersession_chains: i64_to_usize(orphaned_supersession_chains),
839            stale_vec_rows: i64_to_usize(stale_vec_rows),
840            vec_rows_for_superseded_nodes: i64_to_usize(vec_rows_for_superseded_nodes),
841            missing_operational_current_rows: i64_to_usize(missing_operational_current_rows),
842            stale_operational_current_rows: i64_to_usize(stale_operational_current_rows),
843            disabled_collection_mutations: i64_to_usize(disabled_collection_mutations),
844            orphaned_last_access_metadata_rows: i64_to_usize(orphaned_last_access_metadata_rows),
845            warnings,
846        })
847    }
848
849    /// # Errors
850    /// Returns [`EngineError`] if the collection metadata is invalid or the insert fails.
851    pub fn register_operational_collection(
852        &self,
853        request: &OperationalRegisterRequest,
854    ) -> Result<OperationalCollectionRecord, EngineError> {
855        if request.name.trim().is_empty() {
856            return Err(EngineError::InvalidWrite(
857                "operational collection name must not be empty".to_owned(),
858            ));
859        }
860        if request.schema_json.is_empty() {
861            return Err(EngineError::InvalidWrite(
862                "operational collection schema_json must not be empty".to_owned(),
863            ));
864        }
865        if request.retention_json.is_empty() {
866            return Err(EngineError::InvalidWrite(
867                "operational collection retention_json must not be empty".to_owned(),
868            ));
869        }
870        if request.filter_fields_json.is_empty() {
871            return Err(EngineError::InvalidWrite(
872                "operational collection filter_fields_json must not be empty".to_owned(),
873            ));
874        }
875        parse_operational_validation_contract(&request.validation_json)
876            .map_err(EngineError::InvalidWrite)?;
877        parse_operational_secondary_indexes_json(&request.secondary_indexes_json, request.kind)
878            .map_err(EngineError::InvalidWrite)?;
879        if request.format_version <= 0 {
880            return Err(EngineError::InvalidWrite(
881                "operational collection format_version must be positive".to_owned(),
882            ));
883        }
884        parse_operational_filter_fields(&request.filter_fields_json)
885            .map_err(EngineError::InvalidWrite)?;
886
887        let mut conn = self.connect()?;
888        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
889        tx.execute(
890            "INSERT INTO operational_collections \
891             (name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at) \
892             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, unixepoch())",
893            rusqlite::params![
894                request.name.as_str(),
895                request.kind.as_str(),
896                request.schema_json.as_str(),
897                request.retention_json.as_str(),
898                request.filter_fields_json.as_str(),
899                request.validation_json.as_str(),
900                request.secondary_indexes_json.as_str(),
901                request.format_version,
902            ],
903        )?;
904        persist_simple_provenance_event(
905            &tx,
906            "operational_collection_registered",
907            request.name.as_str(),
908            Some(serde_json::json!({
909                "kind": request.kind.as_str(),
910                "format_version": request.format_version,
911            })),
912        )?;
913        tx.commit()?;
914
915        self.describe_operational_collection(&request.name)?
916            .ok_or_else(|| {
917                EngineError::Bridge("registered collection missing after commit".to_owned())
918            })
919    }
920
921    /// # Errors
922    /// Returns [`EngineError`] if the database query fails.
923    pub fn describe_operational_collection(
924        &self,
925        name: &str,
926    ) -> Result<Option<OperationalCollectionRecord>, EngineError> {
927        let conn = self.connect()?;
928        load_operational_collection_record(&conn, name)
929    }
930
931    /// # Errors
932    /// Returns [`EngineError`] if the collection is missing, the filter contract is invalid,
933    /// or existing mutation backfill fails.
934    pub fn update_operational_collection_filters(
935        &self,
936        name: &str,
937        filter_fields_json: &str,
938    ) -> Result<OperationalCollectionRecord, EngineError> {
939        if filter_fields_json.is_empty() {
940            return Err(EngineError::InvalidWrite(
941                "operational collection filter_fields_json must not be empty".to_owned(),
942            ));
943        }
944        let declared_fields = parse_operational_filter_fields(filter_fields_json)
945            .map_err(EngineError::InvalidWrite)?;
946
947        let mut conn = self.connect()?;
948        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
949        load_operational_collection_record(&tx, name)?.ok_or_else(|| {
950            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
951        })?;
952        tx.execute(
953            "UPDATE operational_collections SET filter_fields_json = ?2 WHERE name = ?1",
954            rusqlite::params![name, filter_fields_json],
955        )?;
956        tx.execute(
957            "DELETE FROM operational_filter_values WHERE collection_name = ?1",
958            [name],
959        )?;
960
961        let mut mutation_stmt = tx.prepare(
962            "SELECT id, payload_json FROM operational_mutations \
963             WHERE collection_name = ?1 ORDER BY mutation_order",
964        )?;
965        let mutations = mutation_stmt
966            .query_map([name], |row| {
967                Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
968            })?
969            .collect::<Result<Vec<_>, _>>()?;
970        drop(mutation_stmt);
971
972        let mut insert_filter_value = tx.prepare_cached(
973            "INSERT INTO operational_filter_values \
974             (mutation_id, collection_name, field_name, string_value, integer_value) \
975             VALUES (?1, ?2, ?3, ?4, ?5)",
976        )?;
977        let mut inserted_values = 0usize;
978        for (mutation_id, payload_json) in &mutations {
979            for filter_value in
980                extract_operational_filter_values(&declared_fields, payload_json.as_str())
981            {
982                insert_filter_value.execute(rusqlite::params![
983                    mutation_id,
984                    name,
985                    filter_value.field_name,
986                    filter_value.string_value,
987                    filter_value.integer_value,
988                ])?;
989                inserted_values += 1;
990            }
991        }
992        drop(insert_filter_value);
993
994        persist_simple_provenance_event(
995            &tx,
996            "operational_collection_filter_fields_updated",
997            name,
998            Some(serde_json::json!({
999                "field_count": declared_fields.len(),
1000                "mutations_backfilled": mutations.len(),
1001                "inserted_filter_values": inserted_values,
1002            })),
1003        )?;
1004        let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1005            EngineError::Bridge("operational collection missing after filter update".to_owned())
1006        })?;
1007        tx.commit()?;
1008        Ok(updated)
1009    }
1010
1011    /// # Errors
1012    /// Returns [`EngineError`] if the collection is missing or the validation contract is invalid.
1013    pub fn update_operational_collection_validation(
1014        &self,
1015        name: &str,
1016        validation_json: &str,
1017    ) -> Result<OperationalCollectionRecord, EngineError> {
1018        parse_operational_validation_contract(validation_json)
1019            .map_err(EngineError::InvalidWrite)?;
1020
1021        let mut conn = self.connect()?;
1022        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1023        load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1024            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1025        })?;
1026        tx.execute(
1027            "UPDATE operational_collections SET validation_json = ?2 WHERE name = ?1",
1028            rusqlite::params![name, validation_json],
1029        )?;
1030        persist_simple_provenance_event(
1031            &tx,
1032            "operational_collection_validation_updated",
1033            name,
1034            Some(serde_json::json!({
1035                "has_validation": !validation_json.is_empty(),
1036            })),
1037        )?;
1038        let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1039            EngineError::Bridge("operational collection missing after validation update".to_owned())
1040        })?;
1041        tx.commit()?;
1042        Ok(updated)
1043    }
1044
1045    /// # Errors
1046    /// Returns [`EngineError`] if the collection is missing, the contract is invalid,
1047    /// or derived index rebuild fails.
1048    pub fn update_operational_collection_secondary_indexes(
1049        &self,
1050        name: &str,
1051        secondary_indexes_json: &str,
1052    ) -> Result<OperationalCollectionRecord, EngineError> {
1053        let mut conn = self.connect()?;
1054        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1055        let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1056            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1057        })?;
1058        let indexes = parse_operational_secondary_indexes_json(secondary_indexes_json, record.kind)
1059            .map_err(EngineError::InvalidWrite)?;
1060        tx.execute(
1061            "UPDATE operational_collections SET secondary_indexes_json = ?2 WHERE name = ?1",
1062            rusqlite::params![name, secondary_indexes_json],
1063        )?;
1064        let (mutation_entries_rebuilt, current_entries_rebuilt) =
1065            rebuild_operational_secondary_index_entries(&tx, &record.name, record.kind, &indexes)?;
1066        persist_simple_provenance_event(
1067            &tx,
1068            "operational_collection_secondary_indexes_updated",
1069            name,
1070            Some(serde_json::json!({
1071                "index_count": indexes.len(),
1072                "mutation_entries_rebuilt": mutation_entries_rebuilt,
1073                "current_entries_rebuilt": current_entries_rebuilt,
1074            })),
1075        )?;
1076        let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1077            EngineError::Bridge(
1078                "operational collection missing after secondary index update".to_owned(),
1079            )
1080        })?;
1081        tx.commit()?;
1082        Ok(updated)
1083    }
1084
1085    /// # Errors
1086    /// Returns [`EngineError`] if the collection is missing or rebuild fails.
1087    pub fn rebuild_operational_secondary_indexes(
1088        &self,
1089        name: &str,
1090    ) -> Result<OperationalSecondaryIndexRebuildReport, EngineError> {
1091        let mut conn = self.connect()?;
1092        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1093        let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1094            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1095        })?;
1096        let indexes =
1097            parse_operational_secondary_indexes_json(&record.secondary_indexes_json, record.kind)
1098                .map_err(EngineError::InvalidWrite)?;
1099        let (mutation_entries_rebuilt, current_entries_rebuilt) =
1100            rebuild_operational_secondary_index_entries(&tx, &record.name, record.kind, &indexes)?;
1101        persist_simple_provenance_event(
1102            &tx,
1103            "operational_secondary_indexes_rebuilt",
1104            name,
1105            Some(serde_json::json!({
1106                "index_count": indexes.len(),
1107                "mutation_entries_rebuilt": mutation_entries_rebuilt,
1108                "current_entries_rebuilt": current_entries_rebuilt,
1109            })),
1110        )?;
1111        tx.commit()?;
1112        Ok(OperationalSecondaryIndexRebuildReport {
1113            collection_name: name.to_owned(),
1114            mutation_entries_rebuilt,
1115            current_entries_rebuilt,
1116        })
1117    }
1118
1119    /// # Errors
1120    /// Returns [`EngineError`] if the collection is missing or its validation contract is invalid.
1121    pub fn validate_operational_collection_history(
1122        &self,
1123        name: &str,
1124    ) -> Result<OperationalHistoryValidationReport, EngineError> {
1125        let conn = self.connect()?;
1126        let record = load_operational_collection_record(&conn, name)?.ok_or_else(|| {
1127            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1128        })?;
1129        let Some(contract) = parse_operational_validation_contract(&record.validation_json)
1130            .map_err(EngineError::InvalidWrite)?
1131        else {
1132            return Err(EngineError::InvalidWrite(format!(
1133                "operational collection '{name}' has no validation_json configured"
1134            )));
1135        };
1136
1137        let mut stmt = conn.prepare(
1138            "SELECT id, record_key, op_kind, payload_json FROM operational_mutations \
1139             WHERE collection_name = ?1 ORDER BY mutation_order",
1140        )?;
1141        let rows = stmt
1142            .query_map([name], |row| {
1143                Ok((
1144                    row.get::<_, String>(0)?,
1145                    row.get::<_, String>(1)?,
1146                    row.get::<_, String>(2)?,
1147                    row.get::<_, String>(3)?,
1148                ))
1149            })?
1150            .collect::<Result<Vec<_>, _>>()?;
1151        drop(stmt);
1152
1153        let mut checked_rows = 0usize;
1154        let mut issues = Vec::new();
1155        for (mutation_id, record_key, op_kind, payload_json) in rows {
1156            if op_kind == "delete" {
1157                continue;
1158            }
1159            checked_rows += 1;
1160            if let Err(message) =
1161                validate_operational_payload_against_contract(&contract, payload_json.as_str())
1162            {
1163                issues.push(OperationalHistoryValidationIssue {
1164                    mutation_id,
1165                    record_key,
1166                    op_kind,
1167                    message,
1168                });
1169            }
1170        }
1171
1172        Ok(OperationalHistoryValidationReport {
1173            collection_name: name.to_owned(),
1174            checked_rows,
1175            invalid_row_count: issues.len(),
1176            issues,
1177        })
1178    }
1179
1180    /// # Errors
1181    /// Returns [`EngineError`] if the database query fails.
1182    pub fn disable_operational_collection(
1183        &self,
1184        name: &str,
1185    ) -> Result<OperationalCollectionRecord, EngineError> {
1186        let mut conn = self.connect()?;
1187        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1188        let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1189            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1190        })?;
1191        let changed = if record.disabled_at.is_none() {
1192            tx.execute(
1193                "UPDATE operational_collections SET disabled_at = unixepoch() WHERE name = ?1",
1194                [name],
1195            )?;
1196            true
1197        } else {
1198            false
1199        };
1200        let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1201            EngineError::Bridge("operational collection missing after disable".to_owned())
1202        })?;
1203        persist_simple_provenance_event(
1204            &tx,
1205            "operational_collection_disabled",
1206            name,
1207            Some(serde_json::json!({
1208                "disabled_at": record.disabled_at,
1209                "changed": changed,
1210            })),
1211        )?;
1212        tx.commit()?;
1213        Ok(record)
1214    }
1215
1216    /// # Errors
1217    /// Returns [`EngineError`] if the database query fails.
1218    pub fn compact_operational_collection(
1219        &self,
1220        name: &str,
1221        dry_run: bool,
1222    ) -> Result<OperationalCompactionReport, EngineError> {
1223        let mut conn = self.connect()?;
1224        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1225        let collection = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1226            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1227        })?;
1228        validate_append_only_operational_collection(&collection, "compact")?;
1229        let (mutation_ids, before_timestamp) =
1230            operational_compaction_candidates(&tx, &collection.retention_json, name)?;
1231        if dry_run {
1232            drop(tx);
1233            return Ok(OperationalCompactionReport {
1234                collection_name: name.to_owned(),
1235                deleted_mutations: mutation_ids.len(),
1236                dry_run: true,
1237                before_timestamp,
1238            });
1239        }
1240        let mut delete_stmt =
1241            tx.prepare_cached("DELETE FROM operational_mutations WHERE id = ?1")?;
1242        for mutation_id in &mutation_ids {
1243            delete_stmt.execute([mutation_id.as_str()])?;
1244        }
1245        drop(delete_stmt);
1246        persist_simple_provenance_event(
1247            &tx,
1248            "operational_collection_compacted",
1249            name,
1250            Some(serde_json::json!({
1251                "deleted_mutations": mutation_ids.len(),
1252                "before_timestamp": before_timestamp,
1253            })),
1254        )?;
1255        tx.commit()?;
1256        Ok(OperationalCompactionReport {
1257            collection_name: name.to_owned(),
1258            deleted_mutations: mutation_ids.len(),
1259            dry_run: false,
1260            before_timestamp,
1261        })
1262    }
1263
1264    /// # Errors
1265    /// Returns [`EngineError`] if the database query fails.
1266    pub fn purge_operational_collection(
1267        &self,
1268        name: &str,
1269        before_timestamp: i64,
1270    ) -> Result<OperationalPurgeReport, EngineError> {
1271        let mut conn = self.connect()?;
1272        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1273        let collection = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1274            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1275        })?;
1276        validate_append_only_operational_collection(&collection, "purge")?;
1277        let deleted_mutations = tx.execute(
1278            "DELETE FROM operational_mutations WHERE collection_name = ?1 AND created_at < ?2",
1279            rusqlite::params![name, before_timestamp],
1280        )?;
1281        persist_simple_provenance_event(
1282            &tx,
1283            "operational_collection_purged",
1284            name,
1285            Some(serde_json::json!({
1286                "deleted_mutations": deleted_mutations,
1287                "before_timestamp": before_timestamp,
1288            })),
1289        )?;
1290        tx.commit()?;
1291        Ok(OperationalPurgeReport {
1292            collection_name: name.to_owned(),
1293            deleted_mutations,
1294            before_timestamp,
1295        })
1296    }
1297
1298    /// # Errors
1299    /// Returns [`EngineError`] if collection selection or policy parsing fails.
1300    pub fn plan_operational_retention(
1301        &self,
1302        now_timestamp: i64,
1303        collection_names: Option<&[String]>,
1304        max_collections: Option<usize>,
1305    ) -> Result<OperationalRetentionPlanReport, EngineError> {
1306        let conn = self.connect()?;
1307        let records = load_operational_retention_records(&conn, collection_names, max_collections)?;
1308        let mut items = Vec::with_capacity(records.len());
1309        for record in records {
1310            items.push(plan_operational_retention_item(
1311                &conn,
1312                &record,
1313                now_timestamp,
1314            )?);
1315        }
1316        Ok(OperationalRetentionPlanReport {
1317            planned_at: now_timestamp,
1318            collections_examined: items.len(),
1319            items,
1320        })
1321    }
1322
1323    /// # Errors
1324    /// Returns [`EngineError`] if collection selection, policy parsing, or execution fails.
1325    pub fn run_operational_retention(
1326        &self,
1327        now_timestamp: i64,
1328        collection_names: Option<&[String]>,
1329        max_collections: Option<usize>,
1330        dry_run: bool,
1331    ) -> Result<OperationalRetentionRunReport, EngineError> {
1332        let mut conn = self.connect()?;
1333        let records = load_operational_retention_records(&conn, collection_names, max_collections)?;
1334        let mut items = Vec::with_capacity(records.len());
1335        let mut collections_acted_on = 0usize;
1336
1337        for record in records {
1338            let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1339            let item = run_operational_retention_item(&tx, &record, now_timestamp, dry_run)?;
1340            if item.deleted_mutations > 0 {
1341                collections_acted_on += 1;
1342            }
1343            if dry_run || item.action_kind == OperationalRetentionActionKind::Noop {
1344                drop(tx);
1345            } else {
1346                tx.commit()?;
1347            }
1348            items.push(item);
1349        }
1350
1351        Ok(OperationalRetentionRunReport {
1352            executed_at: now_timestamp,
1353            collections_examined: items.len(),
1354            collections_acted_on,
1355            dry_run,
1356            items,
1357        })
1358    }
1359
1360    /// # Errors
1361    /// Returns [`EngineError`] if the database query fails.
1362    pub fn trace_operational_collection(
1363        &self,
1364        collection_name: &str,
1365        record_key: Option<&str>,
1366    ) -> Result<OperationalTraceReport, EngineError> {
1367        let conn = self.connect()?;
1368        ensure_operational_collection_registered(&conn, collection_name)?;
1369        let mutations = if let Some(record_key) = record_key {
1370            let mut stmt = conn.prepare(
1371                "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
1372                 FROM operational_mutations \
1373                 WHERE collection_name = ?1 AND record_key = ?2 \
1374                 ORDER BY mutation_order",
1375            )?;
1376            stmt.query_map([collection_name, record_key], map_operational_mutation_row)?
1377                .collect::<Result<Vec<_>, _>>()?
1378        } else {
1379            let mut stmt = conn.prepare(
1380                "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
1381                 FROM operational_mutations \
1382                 WHERE collection_name = ?1 \
1383                 ORDER BY mutation_order",
1384            )?;
1385            stmt.query_map([collection_name], map_operational_mutation_row)?
1386                .collect::<Result<Vec<_>, _>>()?
1387        };
1388        let current_rows = if let Some(record_key) = record_key {
1389            let mut stmt = conn.prepare(
1390                "SELECT collection_name, record_key, payload_json, updated_at, last_mutation_id \
1391                 FROM operational_current \
1392                 WHERE collection_name = ?1 AND record_key = ?2 \
1393                 ORDER BY updated_at, record_key",
1394            )?;
1395            stmt.query_map([collection_name, record_key], map_operational_current_row)?
1396                .collect::<Result<Vec<_>, _>>()?
1397        } else {
1398            let mut stmt = conn.prepare(
1399                "SELECT collection_name, record_key, payload_json, updated_at, last_mutation_id \
1400                 FROM operational_current \
1401                 WHERE collection_name = ?1 \
1402                 ORDER BY updated_at, record_key",
1403            )?;
1404            stmt.query_map([collection_name], map_operational_current_row)?
1405                .collect::<Result<Vec<_>, _>>()?
1406        };
1407
1408        Ok(OperationalTraceReport {
1409            collection_name: collection_name.to_owned(),
1410            record_key: record_key.map(str::to_owned),
1411            mutation_count: mutations.len(),
1412            current_count: current_rows.len(),
1413            mutations,
1414            current_rows,
1415        })
1416    }
1417
1418    /// # Errors
1419    /// Returns [`EngineError`] if the collection contract is invalid or the filtered read fails.
1420    pub fn read_operational_collection(
1421        &self,
1422        request: &OperationalReadRequest,
1423    ) -> Result<OperationalReadReport, EngineError> {
1424        if request.collection_name.trim().is_empty() {
1425            return Err(EngineError::InvalidWrite(
1426                "operational read collection_name must not be empty".to_owned(),
1427            ));
1428        }
1429        if request.filters.is_empty() {
1430            return Err(EngineError::InvalidWrite(
1431                "operational read requires at least one filter clause".to_owned(),
1432            ));
1433        }
1434
1435        let conn = self.connect()?;
1436        let record = load_operational_collection_record(&conn, &request.collection_name)?
1437            .ok_or_else(|| {
1438                EngineError::InvalidWrite(format!(
1439                    "operational collection '{}' is not registered",
1440                    request.collection_name
1441                ))
1442            })?;
1443        validate_append_only_operational_collection(&record, "read")?;
1444        let declared_fields = parse_operational_filter_fields(&record.filter_fields_json)
1445            .map_err(EngineError::InvalidWrite)?;
1446        let secondary_indexes =
1447            parse_operational_secondary_indexes_json(&record.secondary_indexes_json, record.kind)
1448                .map_err(EngineError::InvalidWrite)?;
1449        let applied_limit = operational_read_limit(request.limit)?;
1450        let filters = compile_operational_read_filters(&request.filters, &declared_fields)?;
1451        if let Some(report) = execute_operational_secondary_index_read(
1452            &conn,
1453            &request.collection_name,
1454            &filters,
1455            &secondary_indexes,
1456            applied_limit,
1457        )? {
1458            return Ok(report);
1459        }
1460        execute_operational_filtered_read(&conn, &request.collection_name, &filters, applied_limit)
1461    }
1462
1463    /// # Errors
1464    /// Returns [`EngineError`] if the database query fails or collection validation fails.
1465    pub fn rebuild_operational_current(
1466        &self,
1467        collection_name: Option<&str>,
1468    ) -> Result<OperationalRepairReport, EngineError> {
1469        let mut conn = self.connect()?;
1470        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1471        let collections = if let Some(name) = collection_name {
1472            let maybe_kind: Option<String> = tx
1473                .query_row(
1474                    "SELECT kind FROM operational_collections WHERE name = ?1",
1475                    [name],
1476                    |row| row.get(0),
1477                )
1478                .optional()?;
1479            let Some(kind) = maybe_kind else {
1480                return Err(EngineError::InvalidWrite(format!(
1481                    "operational collection '{name}' is not registered"
1482                )));
1483            };
1484            if kind != OperationalCollectionKind::LatestState.as_str() {
1485                return Err(EngineError::InvalidWrite(format!(
1486                    "operational collection '{name}' is not latest_state"
1487                )));
1488            }
1489            vec![name.to_owned()]
1490        } else {
1491            let mut stmt = tx.prepare(
1492                "SELECT name FROM operational_collections WHERE kind = 'latest_state' ORDER BY name",
1493            )?;
1494            stmt.query_map([], |row| row.get::<_, String>(0))?
1495                .collect::<Result<Vec<_>, _>>()?
1496        };
1497
1498        let rebuilt_rows = rebuild_operational_current_rows(&tx, &collections)?;
1499        for collection in &collections {
1500            let record = load_operational_collection_record(&tx, collection)?.ok_or_else(|| {
1501                EngineError::Bridge(format!(
1502                    "operational collection '{collection}' missing during current rebuild"
1503                ))
1504            })?;
1505            let indexes = parse_operational_secondary_indexes_json(
1506                &record.secondary_indexes_json,
1507                record.kind,
1508            )
1509            .map_err(EngineError::InvalidWrite)?;
1510            if !indexes.is_empty() {
1511                rebuild_operational_secondary_index_entries(
1512                    &tx,
1513                    &record.name,
1514                    record.kind,
1515                    &indexes,
1516                )?;
1517            }
1518        }
1519
1520        persist_simple_provenance_event(
1521            &tx,
1522            "operational_current_rebuilt",
1523            collection_name.unwrap_or("*"),
1524            Some(serde_json::json!({
1525                "collections_rebuilt": collections.len(),
1526                "current_rows_rebuilt": rebuilt_rows,
1527            })),
1528        )?;
1529        tx.commit()?;
1530
1531        Ok(OperationalRepairReport {
1532            collections_rebuilt: collections.len(),
1533            current_rows_rebuilt: rebuilt_rows,
1534        })
1535    }
1536
1537    /// # Errors
1538    /// Returns [`EngineError`] if the database connection fails or the projection rebuild fails.
1539    pub fn rebuild_projections(
1540        &self,
1541        target: ProjectionTarget,
1542    ) -> Result<ProjectionRepairReport, EngineError> {
1543        self.projections.rebuild_projections(target)
1544    }
1545
1546    /// # Errors
1547    /// Returns [`EngineError`] if the database connection fails or the projection rebuild fails.
1548    pub fn rebuild_missing_projections(&self) -> Result<ProjectionRepairReport, EngineError> {
1549        self.projections.rebuild_missing_projections()
1550    }
1551
1552    /// Register (or update) an FTS property projection schema for the given node kind.
1553    ///
1554    /// After registration, any node of this kind will have the declared JSON property
1555    /// paths extracted, concatenated, and indexed in the `fts_node_properties` FTS5 table.
1556    ///
1557    /// # Errors
1558    /// Returns [`EngineError`] if `property_paths` is empty, contains duplicates,
1559    /// or if the database write fails.
1560    pub fn register_fts_property_schema(
1561        &self,
1562        kind: &str,
1563        property_paths: &[String],
1564        separator: Option<&str>,
1565    ) -> Result<FtsPropertySchemaRecord, EngineError> {
1566        let specs: Vec<FtsPropertyPathSpec> = property_paths
1567            .iter()
1568            .map(|p| FtsPropertyPathSpec::scalar(p.clone()))
1569            .collect();
1570        self.register_fts_property_schema_with_entries(
1571            kind,
1572            &specs,
1573            separator,
1574            &[],
1575            RebuildMode::Eager,
1576        )
1577    }
1578
1579    /// Register (or update) an FTS property projection schema with
1580    /// per-path modes and optional exclude paths.
1581    ///
1582    /// Under `RebuildMode::Eager` (the legacy mode), the full rebuild runs
1583    /// inside the registration transaction — same behavior as before Pack 7.
1584    ///
1585    /// Under `RebuildMode::Async` (the 0.4.1 default), the schema row is
1586    /// persisted in a short IMMEDIATE transaction, a rebuild-state row is
1587    /// upserted, and the actual rebuild is handed off to the background
1588    /// `RebuildActor`.  The register call returns in <100ms even for large
1589    /// kinds.
1590    ///
1591    /// # Errors
1592    /// Returns [`EngineError`] if the paths are invalid, the JSON
1593    /// serialization fails, or the (schema-persist / rebuild) transaction fails.
1594    pub fn register_fts_property_schema_with_entries(
1595        &self,
1596        kind: &str,
1597        entries: &[FtsPropertyPathSpec],
1598        separator: Option<&str>,
1599        exclude_paths: &[String],
1600        mode: RebuildMode,
1601    ) -> Result<FtsPropertySchemaRecord, EngineError> {
1602        let paths: Vec<String> = entries.iter().map(|e| e.path.clone()).collect();
1603        validate_fts_property_paths(&paths)?;
1604        for p in exclude_paths {
1605            if !p.starts_with("$.") {
1606                return Err(EngineError::InvalidWrite(format!(
1607                    "exclude_paths entries must start with '$.' but got: {p}"
1608                )));
1609            }
1610        }
1611        let separator = separator.unwrap_or(" ");
1612        let paths_json = serialize_property_paths_json(entries, exclude_paths)?;
1613
1614        match mode {
1615            RebuildMode::Eager => self.register_fts_property_schema_eager(
1616                kind,
1617                entries,
1618                separator,
1619                exclude_paths,
1620                &paths,
1621                &paths_json,
1622            ),
1623            RebuildMode::Async => {
1624                self.register_fts_property_schema_async(kind, separator, &paths, &paths_json)
1625            }
1626        }
1627    }
1628
1629    /// Eager path: existing transactional behavior unchanged.
1630    fn register_fts_property_schema_eager(
1631        &self,
1632        kind: &str,
1633        entries: &[FtsPropertyPathSpec],
1634        separator: &str,
1635        exclude_paths: &[String],
1636        paths: &[String],
1637        paths_json: &str,
1638    ) -> Result<FtsPropertySchemaRecord, EngineError> {
1639        let mut conn = self.connect()?;
1640        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1641
1642        // Determine whether the registration introduces a recursive path
1643        // that was not present in the previously-registered schema for
1644        // this kind. If so, we must eagerly rebuild property FTS rows and
1645        // position map for every active node of this kind within the same
1646        // transaction.
1647        let previous_row: Option<(String, String)> = tx
1648            .query_row(
1649                "SELECT property_paths_json, separator FROM fts_property_schemas WHERE kind = ?1",
1650                [kind],
1651                |row| {
1652                    let json: String = row.get(0)?;
1653                    let sep: String = row.get(1)?;
1654                    Ok((json, sep))
1655                },
1656            )
1657            .optional()?;
1658        let had_previous_schema = previous_row.is_some();
1659        let previous_recursive_paths: Vec<String> = previous_row
1660            .map(|(json, sep)| crate::writer::parse_property_schema_json(&json, &sep))
1661            .map_or(Vec::new(), |schema| {
1662                schema
1663                    .paths
1664                    .into_iter()
1665                    .filter(|p| p.mode == crate::writer::PropertyPathMode::Recursive)
1666                    .map(|p| p.path)
1667                    .collect()
1668            });
1669        let new_recursive_paths: Vec<&str> = entries
1670            .iter()
1671            .filter(|e| e.mode == FtsPropertyPathMode::Recursive)
1672            .map(|e| e.path.as_str())
1673            .collect();
1674        let introduces_new_recursive = new_recursive_paths
1675            .iter()
1676            .any(|p| !previous_recursive_paths.iter().any(|prev| prev == p));
1677
1678        tx.execute(
1679            "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
1680             VALUES (?1, ?2, ?3) \
1681             ON CONFLICT(kind) DO UPDATE SET property_paths_json = ?2, separator = ?3",
1682            rusqlite::params![kind, paths_json, separator],
1683        )?;
1684
1685        // Eager transactional rebuild: always fire on any update (i.e.
1686        // whenever the row already existed). First-time registrations never
1687        // have a previous schema, so they cost nothing; updates trigger a
1688        // rebuild unconditionally. This covers recursive-path additions
1689        // AND scalar-only re-registrations where only the path or
1690        // separator changed — without a rebuild the existing rows would
1691        // retain stale scalar-derived text. (P4-P2-1)
1692        let needs_rebuild = introduces_new_recursive || had_previous_schema;
1693        if needs_rebuild {
1694            tx.execute("DELETE FROM fts_node_properties WHERE kind = ?1", [kind])?;
1695            tx.execute(
1696                "DELETE FROM fts_node_property_positions WHERE kind = ?1",
1697                [kind],
1698            )?;
1699            // Scope the rebuild to `kind` only. The multi-kind
1700            // `insert_property_fts_rows` iterates over every registered
1701            // schema and would re-insert rows for siblings that were not
1702            // deleted above, duplicating their FTS entries.
1703            crate::projection::insert_property_fts_rows_for_kind(&tx, kind)?;
1704        }
1705
1706        persist_simple_provenance_event(
1707            &tx,
1708            "fts_property_schema_registered",
1709            kind,
1710            Some(serde_json::json!({
1711                "property_paths": paths,
1712                "separator": separator,
1713                "exclude_paths": exclude_paths,
1714                "eager_rebuild": needs_rebuild,
1715            })),
1716        )?;
1717        tx.commit()?;
1718
1719        self.describe_fts_property_schema(kind)?.ok_or_else(|| {
1720            EngineError::Bridge("registered FTS property schema missing after commit".to_owned())
1721        })
1722    }
1723
1724    /// Async path: schema persisted in a short tx; rebuild handed to actor.
1725    fn register_fts_property_schema_async(
1726        &self,
1727        kind: &str,
1728        separator: &str,
1729        paths: &[String],
1730        paths_json: &str,
1731    ) -> Result<FtsPropertySchemaRecord, EngineError> {
1732        let mut conn = self.connect()?;
1733        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1734
1735        // Detect first-registration vs re-registration.
1736        let had_previous_schema: bool = tx
1737            .query_row(
1738                "SELECT count(*) FROM fts_property_schemas WHERE kind = ?1",
1739                rusqlite::params![kind],
1740                |r| r.get::<_, i64>(0),
1741            )
1742            .unwrap_or(0)
1743            > 0;
1744
1745        // Upsert schema row (fast — just a metadata write).
1746        tx.execute(
1747            "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
1748             VALUES (?1, ?2, ?3) \
1749             ON CONFLICT(kind) DO UPDATE SET property_paths_json = ?2, separator = ?3",
1750            rusqlite::params![kind, paths_json, separator],
1751        )?;
1752
1753        // Retrieve the rowid of the schema row as schema_id.
1754        let schema_id: i64 = tx.query_row(
1755            "SELECT rowid FROM fts_property_schemas WHERE kind = ?1",
1756            rusqlite::params![kind],
1757            |r| r.get(0),
1758        )?;
1759
1760        let now_ms = crate::rebuild_actor::now_unix_ms_pub();
1761        let is_first = i64::from(!had_previous_schema);
1762
1763        // Upsert rebuild state row.
1764        tx.execute(
1765            "INSERT INTO fts_property_rebuild_state \
1766             (kind, schema_id, state, rows_done, started_at, is_first_registration) \
1767             VALUES (?1, ?2, 'PENDING', 0, ?3, ?4) \
1768             ON CONFLICT(kind) DO UPDATE SET \
1769                 schema_id = excluded.schema_id, \
1770                 state = 'PENDING', \
1771                 rows_total = NULL, \
1772                 rows_done = 0, \
1773                 started_at = excluded.started_at, \
1774                 last_progress_at = NULL, \
1775                 error_message = NULL, \
1776                 is_first_registration = excluded.is_first_registration",
1777            rusqlite::params![kind, schema_id, now_ms, is_first],
1778        )?;
1779
1780        persist_simple_provenance_event(
1781            &tx,
1782            "fts_property_schema_registered",
1783            kind,
1784            Some(serde_json::json!({
1785                "property_paths": paths,
1786                "separator": separator,
1787                "mode": "async",
1788            })),
1789        )?;
1790        tx.commit()?;
1791
1792        // Enqueue the rebuild request if the actor is available.
1793        // try_send is non-blocking: if the channel is full (capacity 64), the
1794        // request is dropped. The state row stays PENDING and the caller can
1795        // observe this via get_property_fts_rebuild_state. No automatic retry
1796        // in 0.4.1 — caller must re-invoke register to re-enqueue.
1797        if let Some(sender) = &self.rebuild_sender
1798            && sender
1799                .try_send(RebuildRequest {
1800                    kind: kind.to_owned(),
1801                    schema_id,
1802                })
1803                .is_err()
1804        {
1805            trace_warn!(
1806                kind = %kind,
1807                "rebuild channel full; rebuild request dropped — state remains PENDING"
1808            );
1809        }
1810
1811        self.describe_fts_property_schema(kind)?.ok_or_else(|| {
1812            EngineError::Bridge("registered FTS property schema missing after commit".to_owned())
1813        })
1814    }
1815
1816    /// Return the rebuild state row for a kind, if one exists.
1817    ///
1818    /// # Errors
1819    /// Returns [`EngineError`] if the database query fails.
1820    pub fn get_property_fts_rebuild_state(
1821        &self,
1822        kind: &str,
1823    ) -> Result<Option<RebuildStateRow>, EngineError> {
1824        let conn = self.connect()?;
1825        let row = conn
1826            .query_row(
1827                "SELECT kind, schema_id, state, rows_total, rows_done, \
1828                 started_at, is_first_registration, error_message \
1829                 FROM fts_property_rebuild_state WHERE kind = ?1",
1830                rusqlite::params![kind],
1831                |r| {
1832                    Ok(RebuildStateRow {
1833                        kind: r.get(0)?,
1834                        schema_id: r.get(1)?,
1835                        state: r.get(2)?,
1836                        rows_total: r.get(3)?,
1837                        rows_done: r.get(4)?,
1838                        started_at: r.get(5)?,
1839                        is_first_registration: r.get::<_, i64>(6)? != 0,
1840                        error_message: r.get(7)?,
1841                    })
1842                },
1843            )
1844            .optional()?;
1845        Ok(row)
1846    }
1847
1848    /// Return the count of rows in `fts_property_rebuild_staging` for a kind.
1849    /// Used by tests to verify the staging table was populated.
1850    ///
1851    /// # Errors
1852    /// Returns [`EngineError`] if the database query fails.
1853    pub fn count_staging_rows(&self, kind: &str) -> Result<i64, EngineError> {
1854        let conn = self.connect()?;
1855        let count: i64 = conn.query_row(
1856            "SELECT count(*) FROM fts_property_rebuild_staging WHERE kind = ?1",
1857            rusqlite::params![kind],
1858            |r| r.get(0),
1859        )?;
1860        Ok(count)
1861    }
1862
1863    /// Return whether a specific node is present in `fts_property_rebuild_staging`.
1864    /// Used by tests to verify the double-write path.
1865    ///
1866    /// # Errors
1867    /// Returns [`EngineError`] if the database query fails.
1868    pub fn staging_row_exists(
1869        &self,
1870        kind: &str,
1871        node_logical_id: &str,
1872    ) -> Result<bool, EngineError> {
1873        let conn = self.connect()?;
1874        let count: i64 = conn.query_row(
1875            "SELECT count(*) FROM fts_property_rebuild_staging WHERE kind = ?1 AND node_logical_id = ?2",
1876            rusqlite::params![kind, node_logical_id],
1877            |r| r.get(0),
1878        )?;
1879        Ok(count > 0)
1880    }
1881
1882    /// Return the FTS property schema for a single node kind, if registered.
1883    ///
1884    /// # Errors
1885    /// Returns [`EngineError`] if the database query fails.
1886    pub fn describe_fts_property_schema(
1887        &self,
1888        kind: &str,
1889    ) -> Result<Option<FtsPropertySchemaRecord>, EngineError> {
1890        let conn = self.connect()?;
1891        load_fts_property_schema_record(&conn, kind)
1892    }
1893
1894    /// Return all registered FTS property schemas.
1895    ///
1896    /// # Errors
1897    /// Returns [`EngineError`] if the database query fails.
1898    pub fn list_fts_property_schemas(&self) -> Result<Vec<FtsPropertySchemaRecord>, EngineError> {
1899        let conn = self.connect()?;
1900        let mut stmt = conn.prepare(
1901            "SELECT kind, property_paths_json, separator, format_version \
1902             FROM fts_property_schemas ORDER BY kind",
1903        )?;
1904        let records = stmt
1905            .query_map([], |row| {
1906                let kind: String = row.get(0)?;
1907                let paths_json: String = row.get(1)?;
1908                let separator: String = row.get(2)?;
1909                let format_version: i64 = row.get(3)?;
1910                Ok(build_fts_property_schema_record(
1911                    kind,
1912                    &paths_json,
1913                    separator,
1914                    format_version,
1915                ))
1916            })?
1917            .collect::<Result<Vec<_>, _>>()?;
1918        Ok(records)
1919    }
1920
1921    /// Remove the FTS property schema for a node kind.
1922    ///
1923    /// This does **not** delete existing `fts_node_properties` rows for this kind;
1924    /// call `rebuild_projections(Fts)` to clean up stale rows.
1925    ///
1926    /// # Errors
1927    /// Returns [`EngineError`] if the kind is not registered or the delete fails.
1928    pub fn remove_fts_property_schema(&self, kind: &str) -> Result<(), EngineError> {
1929        let mut conn = self.connect()?;
1930        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1931        let deleted = tx.execute("DELETE FROM fts_property_schemas WHERE kind = ?1", [kind])?;
1932        if deleted == 0 {
1933            return Err(EngineError::InvalidWrite(format!(
1934                "FTS property schema for kind '{kind}' is not registered"
1935            )));
1936        }
1937        persist_simple_provenance_event(&tx, "fts_property_schema_removed", kind, None)?;
1938        tx.commit()?;
1939        Ok(())
1940    }
1941
1942    /// Recreate enabled vector profiles from persisted `vector_profiles` metadata.
1943    ///
1944    /// # Errors
1945    /// Returns [`EngineError`] if the database connection fails, reading metadata fails,
1946    /// or sqlite-vec support is unavailable while enabled profiles are present.
1947    pub fn restore_vector_profiles(&self) -> Result<ProjectionRepairReport, EngineError> {
1948        let conn = self.connect()?;
1949        let profiles: Vec<(String, String, i64)> = {
1950            let mut stmt = conn.prepare(
1951                "SELECT profile, table_name, dimension \
1952                 FROM vector_profiles WHERE enabled = 1 ORDER BY profile",
1953            )?;
1954            stmt.query_map([], |row| {
1955                Ok((
1956                    row.get::<_, String>(0)?,
1957                    row.get::<_, String>(1)?,
1958                    row.get::<_, i64>(2)?,
1959                ))
1960            })?
1961            .collect::<Result<Vec<_>, _>>()?
1962        };
1963
1964        for (profile, table_name, dimension) in &profiles {
1965            let dimension = usize::try_from(*dimension).map_err(|_| {
1966                EngineError::Bridge(format!("invalid vector profile dimension: {dimension}"))
1967            })?;
1968            self.schema_manager
1969                .ensure_vector_profile(&conn, profile, table_name, dimension)?;
1970        }
1971
1972        Ok(ProjectionRepairReport {
1973            targets: vec![ProjectionTarget::Vec],
1974            rebuilt_rows: profiles.len(),
1975            notes: vec![],
1976        })
1977    }
1978
1979    /// Rebuild vector embeddings using an application-supplied regeneration
1980    /// contract and generator command.
1981    ///
1982    /// The config is persisted in `vector_embedding_contracts` so the metadata
1983    /// required for recovery survives future repair runs.
1984    ///
1985    /// Vector identity is stamped from [`QueryEmbedder::identity`] — the
1986    /// caller supplies the embedder and cannot override its identity. This
1987    /// makes drift between the read-path and write-path identity stories
1988    /// structurally impossible.
1989    ///
1990    /// # Errors
1991    /// Returns [`EngineError`] if the database connection fails, the config is
1992    /// invalid, the embedder fails, or the regenerated embeddings are
1993    /// malformed.
1994    #[allow(clippy::too_many_lines)]
1995    pub fn regenerate_vector_embeddings(
1996        &self,
1997        embedder: &dyn QueryEmbedder,
1998        config: &VectorRegenerationConfig,
1999    ) -> Result<VectorRegenerationReport, EngineError> {
2000        let conn = self.connect()?;
2001        let identity = embedder.identity();
2002        let config = validate_vector_regeneration_config(&conn, config, &identity)
2003            .map_err(|failure| failure.to_engine_error())?;
2004        let chunks = collect_regeneration_chunks(&conn)?;
2005        let payload = build_regeneration_input(&config, &identity, chunks.clone());
2006        let snapshot_hash = compute_snapshot_hash(&payload)?;
2007        let audit_metadata = VectorRegenerationAuditMetadata {
2008            profile: config.profile.clone(),
2009            model_identity: identity.model_identity.clone(),
2010            model_version: identity.model_version.clone(),
2011            chunk_count: chunks.len(),
2012            snapshot_hash: snapshot_hash.clone(),
2013            failure_class: None,
2014        };
2015        persist_vector_regeneration_event(
2016            &conn,
2017            "vector_regeneration_requested",
2018            &config.profile,
2019            &audit_metadata,
2020        )?;
2021        let notes = vec!["vector embeddings regenerated via configured embedder".to_owned()];
2022
2023        let mut embedding_map: std::collections::HashMap<String, Vec<u8>> =
2024            std::collections::HashMap::with_capacity(chunks.len());
2025        for chunk in &chunks {
2026            let vector = match embedder.embed_query(&chunk.text_content) {
2027                Ok(vector) => vector,
2028                Err(error) => {
2029                    let failure = VectorRegenerationFailure::new(
2030                        VectorRegenerationFailureClass::EmbedderFailure,
2031                        format!("embedder failed for chunk '{}': {error}", chunk.chunk_id),
2032                    );
2033                    self.persist_vector_regeneration_failure_best_effort(
2034                        &config.profile,
2035                        &audit_metadata,
2036                        &failure,
2037                    );
2038                    return Err(failure.to_engine_error());
2039                }
2040            };
2041            if vector.len() != identity.dimension {
2042                let failure = VectorRegenerationFailure::new(
2043                    VectorRegenerationFailureClass::InvalidEmbedderOutput,
2044                    format!(
2045                        "embedder produced {} values for chunk '{}', expected {}",
2046                        vector.len(),
2047                        chunk.chunk_id,
2048                        identity.dimension
2049                    ),
2050                );
2051                self.persist_vector_regeneration_failure_best_effort(
2052                    &config.profile,
2053                    &audit_metadata,
2054                    &failure,
2055                );
2056                return Err(failure.to_engine_error());
2057            }
2058            if vector.iter().any(|value| !value.is_finite()) {
2059                let failure = VectorRegenerationFailure::new(
2060                    VectorRegenerationFailureClass::InvalidEmbedderOutput,
2061                    format!(
2062                        "embedder returned non-finite values for chunk '{}'",
2063                        chunk.chunk_id
2064                    ),
2065                );
2066                self.persist_vector_regeneration_failure_best_effort(
2067                    &config.profile,
2068                    &audit_metadata,
2069                    &failure,
2070                );
2071                return Err(failure.to_engine_error());
2072            }
2073            let bytes: Vec<u8> = vector
2074                .iter()
2075                .flat_map(|value| value.to_le_bytes())
2076                .collect();
2077            embedding_map.insert(chunk.chunk_id.clone(), bytes);
2078        }
2079
2080        let mut conn = conn;
2081        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2082        match self.schema_manager.ensure_vector_profile(
2083            &tx,
2084            &config.profile,
2085            &config.table_name,
2086            identity.dimension,
2087        ) {
2088            Ok(()) => {}
2089            Err(SchemaError::MissingCapability(message)) => {
2090                let failure = VectorRegenerationFailure::new(
2091                    VectorRegenerationFailureClass::UnsupportedVecCapability,
2092                    message,
2093                );
2094                drop(tx);
2095                self.persist_vector_regeneration_failure_best_effort(
2096                    &config.profile,
2097                    &audit_metadata,
2098                    &failure,
2099                );
2100                return Err(failure.to_engine_error());
2101            }
2102            Err(error) => return Err(EngineError::Schema(error)),
2103        }
2104        let apply_chunks = collect_regeneration_chunks(&tx)?;
2105        let apply_payload = build_regeneration_input(&config, &identity, apply_chunks.clone());
2106        let apply_hash = compute_snapshot_hash(&apply_payload)?;
2107        if apply_hash != snapshot_hash {
2108            let failure = VectorRegenerationFailure::new(
2109                VectorRegenerationFailureClass::SnapshotDrift,
2110                "chunk snapshot changed during generation; retry".to_owned(),
2111            );
2112            drop(tx);
2113            self.persist_vector_regeneration_failure_best_effort(
2114                &config.profile,
2115                &audit_metadata,
2116                &failure,
2117            );
2118            return Err(failure.to_engine_error());
2119        }
2120        persist_vector_contract(&tx, &config, &identity, &snapshot_hash)?;
2121        tx.execute("DELETE FROM vec_nodes_active", [])?;
2122        let mut stmt = tx
2123            .prepare_cached("INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES (?1, ?2)")?;
2124        let mut regenerated_rows = 0usize;
2125        for chunk in &apply_chunks {
2126            let Some(embedding) = embedding_map.remove(&chunk.chunk_id) else {
2127                drop(stmt);
2128                drop(tx);
2129                let failure = VectorRegenerationFailure::new(
2130                    VectorRegenerationFailureClass::InvalidEmbedderOutput,
2131                    format!(
2132                        "embedder did not produce a vector for chunk '{}'",
2133                        chunk.chunk_id
2134                    ),
2135                );
2136                self.persist_vector_regeneration_failure_best_effort(
2137                    &config.profile,
2138                    &audit_metadata,
2139                    &failure,
2140                );
2141                return Err(failure.to_engine_error());
2142            };
2143            stmt.execute(rusqlite::params![chunk.chunk_id.as_str(), embedding])?;
2144            regenerated_rows += 1;
2145        }
2146        drop(stmt);
2147        persist_vector_regeneration_event(
2148            &tx,
2149            "vector_regeneration_apply",
2150            &config.profile,
2151            &audit_metadata,
2152        )?;
2153        tx.commit()?;
2154
2155        Ok(VectorRegenerationReport {
2156            profile: config.profile.clone(),
2157            table_name: config.table_name.clone(),
2158            dimension: identity.dimension,
2159            total_chunks: chunks.len(),
2160            regenerated_rows,
2161            contract_persisted: true,
2162            notes,
2163        })
2164    }
2165
2166    fn persist_vector_regeneration_failure_best_effort(
2167        &self,
2168        profile: &str,
2169        metadata: &VectorRegenerationAuditMetadata,
2170        failure: &VectorRegenerationFailure,
2171    ) {
2172        let Ok(conn) = self.connect() else {
2173            return;
2174        };
2175        let failure_metadata = VectorRegenerationAuditMetadata {
2176            profile: metadata.profile.clone(),
2177            model_identity: metadata.model_identity.clone(),
2178            model_version: metadata.model_version.clone(),
2179            chunk_count: metadata.chunk_count,
2180            snapshot_hash: metadata.snapshot_hash.clone(),
2181            failure_class: Some(failure.failure_class_label().to_owned()),
2182        };
2183        let _ = persist_vector_regeneration_event(
2184            &conn,
2185            "vector_regeneration_failed",
2186            profile,
2187            &failure_metadata,
2188        );
2189    }
2190
2191    /// # Errors
2192    /// Returns [`EngineError`] if the database connection fails or any SQL query fails.
2193    pub fn trace_source(&self, source_ref: &str) -> Result<TraceReport, EngineError> {
2194        let conn = self.connect()?;
2195
2196        let node_logical_ids = collect_strings(
2197            &conn,
2198            "SELECT logical_id FROM nodes WHERE source_ref = ?1 ORDER BY created_at",
2199            source_ref,
2200        )?;
2201        let action_ids = collect_strings(
2202            &conn,
2203            "SELECT id FROM actions WHERE source_ref = ?1 ORDER BY created_at",
2204            source_ref,
2205        )?;
2206        let operational_mutation_ids = collect_strings(
2207            &conn,
2208            "SELECT id FROM operational_mutations WHERE source_ref = ?1 ORDER BY mutation_order",
2209            source_ref,
2210        )?;
2211
2212        Ok(TraceReport {
2213            source_ref: source_ref.to_owned(),
2214            node_rows: count_source_ref(&conn, "nodes", source_ref)?,
2215            edge_rows: count_source_ref(&conn, "edges", source_ref)?,
2216            action_rows: count_source_ref(&conn, "actions", source_ref)?,
2217            operational_mutation_rows: count_source_ref(
2218                &conn,
2219                "operational_mutations",
2220                source_ref,
2221            )?,
2222            node_logical_ids,
2223            action_ids,
2224            operational_mutation_ids,
2225        })
2226    }
2227
2228    /// # Errors
2229    /// Returns [`EngineError`] if the database connection fails, the transaction cannot be
2230    /// started, or lifecycle restoration prerequisites are missing.
2231    #[allow(clippy::too_many_lines)]
2232    pub fn restore_logical_id(
2233        &self,
2234        logical_id: &str,
2235    ) -> Result<LogicalRestoreReport, EngineError> {
2236        let mut conn = self.connect()?;
2237        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2238
2239        let active_count: i64 = tx.query_row(
2240            "SELECT count(*) FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
2241            [logical_id],
2242            |row| row.get(0),
2243        )?;
2244        if active_count > 0 {
2245            return Ok(LogicalRestoreReport {
2246                logical_id: logical_id.to_owned(),
2247                was_noop: true,
2248                restored_node_rows: 0,
2249                restored_edge_rows: 0,
2250                restored_chunk_rows: 0,
2251                restored_fts_rows: 0,
2252                restored_property_fts_rows: 0,
2253                restored_vec_rows: 0,
2254                skipped_edges: Vec::new(),
2255                notes: vec!["logical_id already active".to_owned()],
2256            });
2257        }
2258
2259        let restored_node: Option<(String, String)> = tx
2260            .query_row(
2261                "SELECT row_id, kind FROM nodes \
2262                 WHERE logical_id = ?1 AND superseded_at IS NOT NULL \
2263                 ORDER BY superseded_at DESC, created_at DESC, rowid DESC LIMIT 1",
2264                [logical_id],
2265                |row| Ok((row.get(0)?, row.get(1)?)),
2266            )
2267            .optional()?;
2268        let (restored_node_row_id, restored_kind) = restored_node.ok_or_else(|| {
2269            EngineError::InvalidWrite(format!("logical_id '{logical_id}' is not retired"))
2270        })?;
2271
2272        tx.execute(
2273            "UPDATE nodes SET superseded_at = NULL WHERE row_id = ?1",
2274            [restored_node_row_id.as_str()],
2275        )?;
2276
2277        let retire_scope: Option<(i64, Option<String>, i64)> = tx
2278            .query_row(
2279                "SELECT rowid, source_ref, created_at FROM provenance_events \
2280                 WHERE event_type = 'node_retire' AND subject = ?1 \
2281                 ORDER BY created_at DESC, rowid DESC LIMIT 1",
2282                [logical_id],
2283                |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
2284            )
2285            .optional()?;
2286        let (restored_edge_rows, skipped_edges) = if let Some((
2287            retire_event_rowid,
2288            retire_source_ref,
2289            retire_created_at,
2290        )) = retire_scope
2291        {
2292            restore_validated_edges(
2293                &tx,
2294                logical_id,
2295                retire_source_ref.as_deref(),
2296                retire_created_at,
2297                retire_event_rowid,
2298            )?
2299        } else {
2300            (0, Vec::new())
2301        };
2302
2303        let restored_chunk_rows: usize = tx
2304            .query_row(
2305                "SELECT count(*) FROM chunks WHERE node_logical_id = ?1",
2306                [logical_id],
2307                |row| row.get::<_, i64>(0),
2308            )
2309            .map(i64_to_usize)?;
2310        tx.execute(
2311            "DELETE FROM fts_nodes WHERE node_logical_id = ?1",
2312            [logical_id],
2313        )?;
2314        let restored_fts_rows = tx.execute(
2315            "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
2316             SELECT id, node_logical_id, ?2, text_content \
2317             FROM chunks WHERE node_logical_id = ?1",
2318            rusqlite::params![logical_id, restored_kind],
2319        )?;
2320        let restored_vec_rows = count_vec_rows_for_logical_id(&tx, logical_id)?;
2321
2322        // Rebuild property FTS for the restored node.
2323        tx.execute(
2324            "DELETE FROM fts_node_properties WHERE node_logical_id = ?1",
2325            [logical_id],
2326        )?;
2327        let restored_property_fts_rows =
2328            rebuild_single_node_property_fts(&tx, logical_id, &restored_kind)?;
2329
2330        persist_simple_provenance_event(
2331            &tx,
2332            "restore_logical_id",
2333            logical_id,
2334            Some(serde_json::json!({
2335                "restored_node_rows": 1,
2336                "restored_edge_rows": restored_edge_rows,
2337                "restored_chunk_rows": restored_chunk_rows,
2338                "restored_fts_rows": restored_fts_rows,
2339                "restored_property_fts_rows": restored_property_fts_rows,
2340                "restored_vec_rows": restored_vec_rows,
2341            })),
2342        )?;
2343        tx.commit()?;
2344
2345        Ok(LogicalRestoreReport {
2346            logical_id: logical_id.to_owned(),
2347            was_noop: false,
2348            restored_node_rows: 1,
2349            restored_edge_rows,
2350            restored_chunk_rows,
2351            restored_fts_rows,
2352            restored_property_fts_rows,
2353            restored_vec_rows,
2354            skipped_edges,
2355            notes: Vec::new(),
2356        })
2357    }
2358
2359    /// # Errors
2360    /// Returns [`EngineError`] if the database connection fails, the transaction cannot be
2361    /// started, or the purge mutation fails.
2362    pub fn purge_logical_id(&self, logical_id: &str) -> Result<LogicalPurgeReport, EngineError> {
2363        let mut conn = self.connect()?;
2364        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2365
2366        let active_count: i64 = tx.query_row(
2367            "SELECT count(*) FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
2368            [logical_id],
2369            |row| row.get(0),
2370        )?;
2371        if active_count > 0 {
2372            return Ok(LogicalPurgeReport {
2373                logical_id: logical_id.to_owned(),
2374                was_noop: true,
2375                deleted_node_rows: 0,
2376                deleted_edge_rows: 0,
2377                deleted_chunk_rows: 0,
2378                deleted_fts_rows: 0,
2379                deleted_vec_rows: 0,
2380                notes: vec!["logical_id is active; purge skipped".to_owned()],
2381            });
2382        }
2383
2384        let node_rows: i64 = tx.query_row(
2385            "SELECT count(*) FROM nodes WHERE logical_id = ?1",
2386            [logical_id],
2387            |row| row.get(0),
2388        )?;
2389        if node_rows == 0 {
2390            return Err(EngineError::InvalidWrite(format!(
2391                "logical_id '{logical_id}' does not exist"
2392            )));
2393        }
2394
2395        let deleted_vec_rows = delete_vec_rows_for_logical_id(&tx, logical_id)?;
2396        let deleted_fts_rows = tx.execute(
2397            "DELETE FROM fts_nodes WHERE node_logical_id = ?1",
2398            [logical_id],
2399        )?;
2400        let deleted_edge_rows = tx.execute(
2401            "DELETE FROM edges WHERE source_logical_id = ?1 OR target_logical_id = ?1",
2402            [logical_id],
2403        )?;
2404        let deleted_chunk_rows = tx.execute(
2405            "DELETE FROM chunks WHERE node_logical_id = ?1",
2406            [logical_id],
2407        )?;
2408        let deleted_node_rows =
2409            tx.execute("DELETE FROM nodes WHERE logical_id = ?1", [logical_id])?;
2410        tx.execute(
2411            "DELETE FROM node_access_metadata WHERE logical_id = ?1",
2412            [logical_id],
2413        )?;
2414
2415        persist_simple_provenance_event(
2416            &tx,
2417            "purge_logical_id",
2418            logical_id,
2419            Some(serde_json::json!({
2420                "deleted_node_rows": deleted_node_rows,
2421                "deleted_edge_rows": deleted_edge_rows,
2422                "deleted_chunk_rows": deleted_chunk_rows,
2423                "deleted_fts_rows": deleted_fts_rows,
2424                "deleted_vec_rows": deleted_vec_rows,
2425            })),
2426        )?;
2427        tx.commit()?;
2428
2429        Ok(LogicalPurgeReport {
2430            logical_id: logical_id.to_owned(),
2431            was_noop: false,
2432            deleted_node_rows,
2433            deleted_edge_rows,
2434            deleted_chunk_rows,
2435            deleted_fts_rows,
2436            deleted_vec_rows,
2437            notes: Vec::new(),
2438        })
2439    }
2440
2441    /// Purge provenance events older than `before_timestamp`.
2442    ///
2443    /// By default, `excise` and `purge_logical_id` event types are preserved so that
2444    /// data-deletion audit trails survive. Pass an explicit
2445    /// `preserve_event_types` list to override this default.
2446    ///
2447    /// # Errors
2448    /// Returns [`EngineError`] if the database connection fails, the transaction
2449    /// cannot be started, or any SQL statement fails.
2450    pub fn purge_provenance_events(
2451        &self,
2452        before_timestamp: i64,
2453        options: &ProvenancePurgeOptions,
2454    ) -> Result<ProvenancePurgeReport, EngineError> {
2455        let mut conn = self.connect()?;
2456        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2457
2458        let preserved_types: Vec<&str> = if options.preserve_event_types.is_empty() {
2459            vec!["excise", "purge_logical_id"]
2460        } else {
2461            options
2462                .preserve_event_types
2463                .iter()
2464                .map(String::as_str)
2465                .collect()
2466        };
2467
2468        // Build the NOT IN clause dynamically based on preserved types.
2469        let placeholders: String = (0..preserved_types.len())
2470            .map(|i| format!("?{}", i + 2))
2471            .collect::<Vec<_>>()
2472            .join(", ");
2473        let count_query = format!(
2474            "SELECT count(*) FROM provenance_events \
2475             WHERE created_at < ?1 AND event_type NOT IN ({placeholders})"
2476        );
2477        let delete_query = format!(
2478            "DELETE FROM provenance_events WHERE rowid IN (\
2479             SELECT rowid FROM provenance_events \
2480             WHERE created_at < ?1 AND event_type NOT IN ({placeholders}) \
2481             LIMIT 10000)"
2482        );
2483
2484        let bind_params = |stmt: &mut rusqlite::Statement<'_>| -> Result<(), rusqlite::Error> {
2485            stmt.raw_bind_parameter(1, before_timestamp)?;
2486            for (i, event_type) in preserved_types.iter().enumerate() {
2487                stmt.raw_bind_parameter(i + 2, *event_type)?;
2488            }
2489            Ok(())
2490        };
2491
2492        let events_deleted = if options.dry_run {
2493            let mut stmt = tx.prepare(&count_query)?;
2494            bind_params(&mut stmt)?;
2495            stmt.raw_query()
2496                .next()?
2497                .map_or(0, |row| row.get::<_, u64>(0).unwrap_or(0))
2498        } else {
2499            let mut total_deleted: u64 = 0;
2500            loop {
2501                let mut stmt = tx.prepare(&delete_query)?;
2502                bind_params(&mut stmt)?;
2503                let deleted = stmt.raw_execute()?;
2504                if deleted == 0 {
2505                    break;
2506                }
2507                total_deleted += deleted as u64;
2508            }
2509            total_deleted
2510        };
2511
2512        let total_after: u64 =
2513            tx.query_row("SELECT count(*) FROM provenance_events", [], |row| {
2514                row.get(0)
2515            })?;
2516
2517        let oldest_remaining: Option<i64> = tx
2518            .query_row("SELECT MIN(created_at) FROM provenance_events", [], |row| {
2519                row.get(0)
2520            })
2521            .optional()?
2522            .flatten();
2523
2524        if !options.dry_run {
2525            tx.commit()?;
2526        }
2527
2528        // In dry_run mode nothing was deleted, so total_after includes the
2529        // would-be-deleted rows; subtract to get the preserved count.
2530        let events_preserved = if options.dry_run {
2531            total_after - events_deleted
2532        } else {
2533            total_after
2534        };
2535
2536        Ok(ProvenancePurgeReport {
2537            events_deleted,
2538            events_preserved,
2539            oldest_remaining,
2540        })
2541    }
2542
2543    /// # Errors
2544    /// Returns [`EngineError`] if the database connection fails, the transaction cannot be
2545    /// started, or any SQL statement fails.
2546    #[allow(clippy::too_many_lines)]
2547    pub fn excise_source(&self, source_ref: &str) -> Result<TraceReport, EngineError> {
2548        let mut conn = self.connect()?;
2549
2550        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2551        let affected_operational_collections = collect_strings_tx(
2552            &tx,
2553            "SELECT DISTINCT m.collection_name \
2554             FROM operational_mutations m \
2555             JOIN operational_collections c ON c.name = m.collection_name \
2556             WHERE m.source_ref = ?1 AND c.kind = 'latest_state' \
2557             ORDER BY m.collection_name",
2558            source_ref,
2559        )?;
2560
2561        // Collect (row_id, logical_id) for active rows that will be excised.
2562        let pairs: Vec<(String, String)> = {
2563            let mut stmt = tx.prepare(
2564                "SELECT row_id, logical_id FROM nodes \
2565                 WHERE source_ref = ?1 AND superseded_at IS NULL",
2566            )?;
2567            stmt.query_map([source_ref], |row| {
2568                Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
2569            })?
2570            .collect::<Result<Vec<_>, _>>()?
2571        };
2572        let affected_logical_ids: Vec<String> = pairs
2573            .iter()
2574            .map(|(_, logical_id)| logical_id.clone())
2575            .collect();
2576
2577        // Supersede bad rows in all tables.
2578        tx.execute(
2579            "UPDATE nodes SET superseded_at = unixepoch() \
2580             WHERE source_ref = ?1 AND superseded_at IS NULL",
2581            [source_ref],
2582        )?;
2583        tx.execute(
2584            "UPDATE edges SET superseded_at = unixepoch() \
2585             WHERE source_ref = ?1 AND superseded_at IS NULL",
2586            [source_ref],
2587        )?;
2588        tx.execute(
2589            "UPDATE actions SET superseded_at = unixepoch() \
2590             WHERE source_ref = ?1 AND superseded_at IS NULL",
2591            [source_ref],
2592        )?;
2593        clear_operational_current_rows(&tx, &affected_operational_collections)?;
2594        tx.execute(
2595            "DELETE FROM operational_mutations WHERE source_ref = ?1",
2596            [source_ref],
2597        )?;
2598        for logical_id in &affected_logical_ids {
2599            delete_vec_rows_for_logical_id(&tx, logical_id)?;
2600            tx.execute(
2601                "DELETE FROM chunks WHERE node_logical_id = ?1",
2602                [logical_id.as_str()],
2603            )?;
2604        }
2605
2606        // Restore the most recent prior version for each affected logical_id.
2607        for (excised_row_id, logical_id) in &pairs {
2608            let prior: Option<String> = tx
2609                .query_row(
2610                    "SELECT row_id FROM nodes \
2611                     WHERE logical_id = ?1 AND row_id != ?2 \
2612                     ORDER BY created_at DESC LIMIT 1",
2613                    [logical_id.as_str(), excised_row_id.as_str()],
2614                    |row| row.get(0),
2615                )
2616                .optional()?;
2617            if let Some(prior_id) = prior {
2618                tx.execute(
2619                    "UPDATE nodes SET superseded_at = NULL WHERE row_id = ?1",
2620                    [prior_id.as_str()],
2621                )?;
2622            }
2623        }
2624
2625        for logical_id in &affected_logical_ids {
2626            let has_active_node = tx
2627                .query_row(
2628                    "SELECT 1 FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL LIMIT 1",
2629                    [logical_id.as_str()],
2630                    |row| row.get::<_, i64>(0),
2631                )
2632                .optional()?
2633                .is_some();
2634            if !has_active_node {
2635                tx.execute(
2636                    "DELETE FROM node_access_metadata WHERE logical_id = ?1",
2637                    [logical_id.as_str()],
2638                )?;
2639            }
2640        }
2641
2642        rebuild_operational_current_rows(&tx, &affected_operational_collections)?;
2643
2644        // Rebuild FTS atomically within the same transaction so readers never
2645        // observe a post-excise node state with a stale FTS index.
2646        tx.execute("DELETE FROM fts_nodes", [])?;
2647        tx.execute(
2648            r"
2649            INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content)
2650            SELECT c.id, n.logical_id, n.kind, c.text_content
2651            FROM chunks c
2652            JOIN nodes n
2653              ON n.logical_id = c.node_logical_id
2654             AND n.superseded_at IS NULL
2655            ",
2656            [],
2657        )?;
2658
2659        // Rebuild property FTS in the same transaction.
2660        rebuild_property_fts_in_tx(&tx)?;
2661
2662        // Record the audit event inside the same transaction so the excision and its
2663        // audit record are committed atomically — no window where the excision is
2664        // durable but unaudited.
2665        tx.execute(
2666            "INSERT INTO provenance_events (id, event_type, subject, source_ref) \
2667             VALUES (?1, 'excise_source', ?2, ?2)",
2668            rusqlite::params![new_id(), source_ref],
2669        )?;
2670
2671        tx.commit()?;
2672
2673        self.trace_source(source_ref)
2674    }
2675
2676    /// # Errors
2677    /// Returns [`EngineError`] if the WAL checkpoint fails, the `SQLite` backup fails,
2678    /// the SHA-256 digest cannot be computed, or the manifest file cannot be written.
2679    pub fn safe_export(
2680        &self,
2681        destination_path: impl AsRef<Path>,
2682        options: SafeExportOptions,
2683    ) -> Result<SafeExportManifest, EngineError> {
2684        let destination_path = destination_path.as_ref();
2685
2686        // 1. Optionally checkpoint WAL before exporting. This keeps the on-disk file tidy for
2687        // callers that want a fully checkpointed export, but export correctness does not depend
2688        // on it because the backup API copies from the live SQLite connection state.
2689        let conn = self.connect()?;
2690
2691        if options.force_checkpoint {
2692            trace_info!("safe_export: wal checkpoint started");
2693            let (busy, log, checkpointed): (i64, i64, i64) =
2694                conn.query_row("PRAGMA wal_checkpoint(FULL)", [], |row| {
2695                    Ok((row.get(0)?, row.get(1)?, row.get(2)?))
2696                })?;
2697            if busy != 0 {
2698                trace_warn!(
2699                    busy,
2700                    log_frames = log,
2701                    checkpointed_frames = checkpointed,
2702                    "safe_export: wal checkpoint blocked by active readers"
2703                );
2704                return Err(EngineError::Bridge(format!(
2705                    "WAL checkpoint blocked: {busy} active reader(s) prevented a full checkpoint; \
2706                     log frames={log}, checkpointed={checkpointed}; \
2707                     retry export when no readers are active"
2708                )));
2709            }
2710            trace_info!(
2711                log_frames = log,
2712                checkpointed_frames = checkpointed,
2713                "safe_export: wal checkpoint completed"
2714            );
2715        }
2716
2717        let schema_version: u32 = conn
2718            .query_row(
2719                "SELECT COALESCE(MAX(version), 0) FROM fathom_schema_migrations",
2720                [],
2721                |row| row.get(0),
2722            )
2723            .unwrap_or(0);
2724
2725        // 2. Export the database through SQLite's online backup API so committed data in the WAL
2726        // is included even when `force_checkpoint` is false.
2727        if let Some(parent) = destination_path.parent() {
2728            fs::create_dir_all(parent)?;
2729        }
2730        conn.backup(DatabaseName::Main, destination_path, None)?;
2731
2732        drop(conn);
2733
2734        // 2b. Query page_count from the EXPORTED file so the manifest reflects what was
2735        // actually backed up, not the source (which may have changed between the PRAGMA
2736        // and the backup call).
2737        let page_count: u64 = {
2738            let export_conn = rusqlite::Connection::open_with_flags(
2739                destination_path,
2740                rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY
2741                    | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX,
2742            )?;
2743            export_conn.query_row("PRAGMA page_count", [], |row| row.get(0))?
2744        };
2745
2746        // 3. Compute SHA-256 of the exported file.
2747        // FIX(review): was fs::read loading entire DB into memory; use streaming hash.
2748        let sha256 = {
2749            let mut file = fs::File::open(destination_path)?;
2750            let mut hasher = Sha256::new();
2751            io::copy(&mut file, &mut hasher)?;
2752            format!("{:x}", hasher.finalize())
2753        };
2754
2755        // 4. Record when the export was created.
2756        let exported_at = SystemTime::now()
2757            .duration_since(SystemTime::UNIX_EPOCH)
2758            .map_err(|e| EngineError::Bridge(format!("system clock error: {e}")))?
2759            .as_secs();
2760
2761        let manifest = SafeExportManifest {
2762            exported_at,
2763            sha256,
2764            schema_version,
2765            protocol_version: EXPORT_PROTOCOL_VERSION,
2766            page_count,
2767        };
2768
2769        // 5. Write manifest alongside the exported file, using Path API for the name.
2770        let manifest_path = {
2771            let mut p = destination_path.to_path_buf();
2772            let stem = p
2773                .file_name()
2774                .map(|n| format!("{}.export-manifest.json", n.to_string_lossy()))
2775                .ok_or_else(|| {
2776                    EngineError::Bridge("destination path has no filename".to_owned())
2777                })?;
2778            p.set_file_name(stem);
2779            p
2780        };
2781        let manifest_json =
2782            serde_json::to_string(&manifest).map_err(|e| EngineError::Bridge(e.to_string()))?;
2783
2784        // Atomic manifest write: write to a temp file then rename so readers never
2785        // observe a partially-written manifest.
2786        let manifest_tmp = manifest_path.with_extension("json.tmp");
2787        if let Err(e) = fs::write(&manifest_tmp, &manifest_json)
2788            .and_then(|()| fs::rename(&manifest_tmp, &manifest_path))
2789        {
2790            let _ = fs::remove_file(&manifest_tmp);
2791            return Err(e.into());
2792        }
2793
2794        Ok(manifest)
2795    }
2796}
2797
2798#[allow(dead_code)]
2799#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
2800struct VectorEmbeddingContractRecord {
2801    profile: String,
2802    table_name: String,
2803    model_identity: String,
2804    model_version: String,
2805    dimension: usize,
2806    normalization_policy: String,
2807    chunking_policy: String,
2808    preprocessing_policy: String,
2809    generator_command_json: String,
2810    applied_at: i64,
2811    snapshot_hash: String,
2812    contract_format_version: i64,
2813}
2814
2815#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
2816struct VectorRegenerationInputChunk {
2817    chunk_id: String,
2818    node_logical_id: String,
2819    kind: String,
2820    text_content: String,
2821    byte_start: Option<i64>,
2822    byte_end: Option<i64>,
2823    source_ref: Option<String>,
2824    created_at: i64,
2825}
2826
2827#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
2828struct VectorRegenerationInput {
2829    profile: String,
2830    table_name: String,
2831    model_identity: String,
2832    model_version: String,
2833    dimension: usize,
2834    normalization_policy: String,
2835    chunking_policy: String,
2836    preprocessing_policy: String,
2837    chunks: Vec<VectorRegenerationInputChunk>,
2838}
2839
2840#[derive(Clone, Copy, Debug, PartialEq, Eq)]
2841pub(crate) enum VectorRegenerationFailureClass {
2842    InvalidContract,
2843    EmbedderFailure,
2844    InvalidEmbedderOutput,
2845    SnapshotDrift,
2846    UnsupportedVecCapability,
2847}
2848
2849impl VectorRegenerationFailureClass {
2850    fn label(self) -> &'static str {
2851        match self {
2852            Self::InvalidContract => "invalid contract",
2853            Self::EmbedderFailure => "embedder failure",
2854            Self::InvalidEmbedderOutput => "invalid embedder output",
2855            Self::SnapshotDrift => "snapshot drift",
2856            Self::UnsupportedVecCapability => "unsupported vec capability",
2857        }
2858    }
2859
2860    fn retryable(self) -> bool {
2861        matches!(self, Self::SnapshotDrift)
2862    }
2863}
2864
2865#[derive(Clone, Debug, PartialEq, Eq)]
2866pub(crate) struct VectorRegenerationFailure {
2867    class: VectorRegenerationFailureClass,
2868    detail: String,
2869}
2870
2871impl VectorRegenerationFailure {
2872    pub(crate) fn new(class: VectorRegenerationFailureClass, detail: impl Into<String>) -> Self {
2873        Self {
2874            class,
2875            detail: detail.into(),
2876        }
2877    }
2878
2879    fn to_engine_error(&self) -> EngineError {
2880        let retry_suffix = if self.class.retryable() {
2881            " [retryable]"
2882        } else {
2883            ""
2884        };
2885        EngineError::Bridge(format!(
2886            "vector regeneration {}: {}{}",
2887            self.class.label(),
2888            self.detail,
2889            retry_suffix
2890        ))
2891    }
2892
2893    fn failure_class_label(&self) -> &'static str {
2894        self.class.label()
2895    }
2896}
2897
2898#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
2899struct VectorRegenerationAuditMetadata {
2900    profile: String,
2901    model_identity: String,
2902    model_version: String,
2903    chunk_count: usize,
2904    snapshot_hash: String,
2905    #[serde(skip_serializing_if = "Option::is_none")]
2906    failure_class: Option<String>,
2907}
2908
2909#[derive(Clone, Copy, Debug, PartialEq, Eq, Deserialize)]
2910#[serde(tag = "mode", rename_all = "snake_case")]
2911enum OperationalRetentionPolicy {
2912    KeepAll,
2913    PurgeBeforeSeconds { max_age_seconds: i64 },
2914    KeepLast { max_rows: usize },
2915}
2916
2917/// # Errors
2918/// Returns [`EngineError`] if the file cannot be read or the config is invalid.
2919pub fn load_vector_regeneration_config(
2920    path: impl AsRef<Path>,
2921) -> Result<VectorRegenerationConfig, EngineError> {
2922    let path = path.as_ref();
2923    let raw = fs::read_to_string(path)?;
2924    match path.extension().and_then(|ext| ext.to_str()) {
2925        Some("toml") => {
2926            toml::from_str(&raw).map_err(|error| EngineError::Bridge(error.to_string()))
2927        }
2928        Some("json") | None => {
2929            serde_json::from_str(&raw).map_err(|error| EngineError::Bridge(error.to_string()))
2930        }
2931        Some(other) => Err(EngineError::Bridge(format!(
2932            "unsupported vector regeneration config extension: {other}"
2933        ))),
2934    }
2935}
2936
2937fn validate_vector_regeneration_config(
2938    conn: &rusqlite::Connection,
2939    config: &VectorRegenerationConfig,
2940    identity: &QueryEmbedderIdentity,
2941) -> Result<VectorRegenerationConfig, VectorRegenerationFailure> {
2942    let profile = validate_bounded_text("profile", &config.profile, MAX_PROFILE_LEN)?;
2943    let table_name = validate_bounded_text("table_name", &config.table_name, MAX_PROFILE_LEN)?;
2944    if table_name != "vec_nodes_active" {
2945        return Err(VectorRegenerationFailure::new(
2946            VectorRegenerationFailureClass::InvalidContract,
2947            format!("table_name must be vec_nodes_active, got '{table_name}'"),
2948        ));
2949    }
2950    if identity.dimension == 0 {
2951        return Err(VectorRegenerationFailure::new(
2952            VectorRegenerationFailureClass::InvalidContract,
2953            "embedder reports dimension 0".to_owned(),
2954        ));
2955    }
2956    let chunking_policy =
2957        validate_bounded_text("chunking_policy", &config.chunking_policy, MAX_POLICY_LEN)?;
2958    let preprocessing_policy = validate_bounded_text(
2959        "preprocessing_policy",
2960        &config.preprocessing_policy,
2961        MAX_POLICY_LEN,
2962    )?;
2963
2964    if let Some(existing_dimension) = current_vector_profile_dimension(conn, &profile)?
2965        && existing_dimension != identity.dimension
2966    {
2967        return Err(VectorRegenerationFailure::new(
2968            VectorRegenerationFailureClass::InvalidContract,
2969            format!(
2970                "embedder dimension {} does not match existing vector profile dimension {}",
2971                identity.dimension, existing_dimension
2972            ),
2973        ));
2974    }
2975
2976    validate_existing_contract_version(conn, &profile)?;
2977
2978    let normalized = VectorRegenerationConfig {
2979        profile,
2980        table_name,
2981        chunking_policy,
2982        preprocessing_policy,
2983    };
2984    let serialized = serde_json::to_vec(&normalized).map_err(|error| {
2985        VectorRegenerationFailure::new(
2986            VectorRegenerationFailureClass::InvalidContract,
2987            error.to_string(),
2988        )
2989    })?;
2990    if serialized.len() > MAX_CONTRACT_JSON_BYTES {
2991        return Err(VectorRegenerationFailure::new(
2992            VectorRegenerationFailureClass::InvalidContract,
2993            format!("serialized contract exceeds {MAX_CONTRACT_JSON_BYTES} bytes"),
2994        ));
2995    }
2996
2997    Ok(normalized)
2998}
2999
3000#[allow(clippy::cast_possible_wrap)]
3001fn persist_vector_contract(
3002    conn: &rusqlite::Connection,
3003    config: &VectorRegenerationConfig,
3004    identity: &QueryEmbedderIdentity,
3005    snapshot_hash: &str,
3006) -> Result<(), EngineError> {
3007    conn.execute(
3008        r"
3009        INSERT OR REPLACE INTO vector_embedding_contracts (
3010            profile,
3011            table_name,
3012            model_identity,
3013            model_version,
3014            dimension,
3015            normalization_policy,
3016            chunking_policy,
3017            preprocessing_policy,
3018            generator_command_json,
3019            applied_at,
3020            snapshot_hash,
3021            contract_format_version,
3022            updated_at
3023        ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, unixepoch(), ?10, ?11, unixepoch())
3024        ",
3025        rusqlite::params![
3026            config.profile.as_str(),
3027            config.table_name.as_str(),
3028            identity.model_identity.as_str(),
3029            identity.model_version.as_str(),
3030            identity.dimension as i64,
3031            identity.normalization_policy.as_str(),
3032            config.chunking_policy.as_str(),
3033            config.preprocessing_policy.as_str(),
3034            "[]",
3035            snapshot_hash,
3036            CURRENT_VECTOR_CONTRACT_FORMAT_VERSION,
3037        ],
3038    )?;
3039    Ok(())
3040}
3041
3042fn persist_vector_regeneration_event(
3043    conn: &rusqlite::Connection,
3044    event_type: &str,
3045    subject: &str,
3046    metadata: &VectorRegenerationAuditMetadata,
3047) -> Result<(), EngineError> {
3048    let metadata_json = serialize_audit_metadata(metadata)?;
3049    conn.execute(
3050        "INSERT INTO provenance_events (id, event_type, subject, metadata_json) VALUES (?1, ?2, ?3, ?4)",
3051        rusqlite::params![new_id(), event_type, subject, metadata_json],
3052    )?;
3053    Ok(())
3054}
3055
3056fn persist_simple_provenance_event(
3057    conn: &rusqlite::Connection,
3058    event_type: &str,
3059    subject: &str,
3060    metadata: Option<serde_json::Value>,
3061) -> Result<(), EngineError> {
3062    let metadata_json = metadata.map(|value| value.to_string()).unwrap_or_default();
3063    conn.execute(
3064        "INSERT INTO provenance_events (id, event_type, subject, metadata_json) VALUES (?1, ?2, ?3, ?4)",
3065        rusqlite::params![new_id(), event_type, subject, metadata_json],
3066    )?;
3067    Ok(())
3068}
3069
3070/// Count active nodes that should have a property FTS row (extraction yields a value)
3071/// but don't. Uses the same extraction logic as write/rebuild to avoid false positives
3072/// for nodes whose declared paths legitimately normalize to no values.
3073fn count_missing_property_fts_rows(conn: &rusqlite::Connection) -> Result<i64, EngineError> {
3074    let schemas = crate::writer::load_fts_property_schemas(conn)?;
3075    if schemas.is_empty() {
3076        return Ok(0);
3077    }
3078
3079    let mut missing = 0i64;
3080    for (kind, schema) in &schemas {
3081        let mut stmt = conn.prepare(
3082            "SELECT n.logical_id, n.properties FROM nodes n \
3083             WHERE n.kind = ?1 AND n.superseded_at IS NULL \
3084               AND NOT EXISTS (SELECT 1 FROM fts_node_properties fp WHERE fp.node_logical_id = n.logical_id)",
3085        )?;
3086        let rows = stmt.query_map([kind.as_str()], |row| {
3087            Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
3088        })?;
3089        for row in rows {
3090            let (_logical_id, properties_str) = row?;
3091            let props: serde_json::Value =
3092                serde_json::from_str(&properties_str).unwrap_or_default();
3093            if crate::writer::extract_property_fts(&props, schema)
3094                .0
3095                .is_some()
3096            {
3097                missing += 1;
3098            }
3099        }
3100    }
3101    Ok(missing)
3102}
3103
3104/// Count property FTS rows whose `text_content` has drifted from the current canonical
3105/// value computed by `compute_property_fts_text(...)`. This catches:
3106/// - rows whose text no longer matches the current node properties and schema
3107/// - rows that should have been removed (extraction now yields no value)
3108fn count_drifted_property_fts_rows(conn: &rusqlite::Connection) -> Result<i64, EngineError> {
3109    let schemas = crate::writer::load_fts_property_schemas(conn)?;
3110    if schemas.is_empty() {
3111        return Ok(0);
3112    }
3113
3114    let mut drifted = 0i64;
3115    for (kind, schema) in &schemas {
3116        let mut stmt = conn.prepare(
3117            "SELECT fp.node_logical_id, fp.text_content, n.properties \
3118             FROM fts_node_properties fp \
3119             JOIN nodes n ON n.logical_id = fp.node_logical_id AND n.superseded_at IS NULL \
3120             WHERE fp.kind = ?1 AND n.kind = ?1",
3121        )?;
3122        let rows = stmt.query_map([kind.as_str()], |row| {
3123            Ok((
3124                row.get::<_, String>(0)?,
3125                row.get::<_, String>(1)?,
3126                row.get::<_, String>(2)?,
3127            ))
3128        })?;
3129        for row in rows {
3130            let (_logical_id, stored_text, properties_str) = row?;
3131            let props: serde_json::Value =
3132                serde_json::from_str(&properties_str).unwrap_or_default();
3133            let (expected, _positions, _stats) =
3134                crate::writer::extract_property_fts(&props, schema);
3135            match expected {
3136                Some(text) if text == stored_text => {}
3137                _ => drifted += 1,
3138            }
3139        }
3140    }
3141    Ok(drifted)
3142}
3143
3144/// Rebuild property FTS rows from canonical state within an existing transaction.
3145fn rebuild_property_fts_in_tx(conn: &rusqlite::Connection) -> Result<usize, EngineError> {
3146    conn.execute("DELETE FROM fts_node_properties", [])?;
3147    conn.execute("DELETE FROM fts_node_property_positions", [])?;
3148    let inserted = crate::projection::insert_property_fts_rows(
3149        conn,
3150        "SELECT logical_id, properties FROM nodes WHERE kind = ?1 AND superseded_at IS NULL",
3151    )?;
3152    Ok(inserted)
3153}
3154
3155/// Rebuild property FTS for a single node. Returns 1 if a row was inserted, 0 otherwise.
3156/// The caller must delete any existing `fts_node_properties` row for this node first.
3157fn rebuild_single_node_property_fts(
3158    conn: &rusqlite::Connection,
3159    logical_id: &str,
3160    kind: &str,
3161) -> Result<usize, EngineError> {
3162    let schema: Option<(String, String)> = conn
3163        .query_row(
3164            "SELECT property_paths_json, separator FROM fts_property_schemas WHERE kind = ?1",
3165            [kind],
3166            |row| {
3167                let paths_json: String = row.get(0)?;
3168                let separator: String = row.get(1)?;
3169                Ok((paths_json, separator))
3170            },
3171        )
3172        .optional()?;
3173    let Some((paths_json, separator)) = schema else {
3174        return Ok(0);
3175    };
3176    let parsed = crate::writer::parse_property_schema_json(&paths_json, &separator);
3177    let properties_str: Option<String> = conn
3178        .query_row(
3179            "SELECT properties FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
3180            [logical_id],
3181            |row| row.get(0),
3182        )
3183        .optional()?;
3184    let Some(properties_str) = properties_str else {
3185        return Ok(0);
3186    };
3187    let props: serde_json::Value = serde_json::from_str(&properties_str).unwrap_or_default();
3188    let (text, positions, _stats) = crate::writer::extract_property_fts(&props, &parsed);
3189    let Some(text) = text else {
3190        return Ok(0);
3191    };
3192    conn.execute(
3193        "DELETE FROM fts_node_property_positions WHERE node_logical_id = ?1",
3194        rusqlite::params![logical_id],
3195    )?;
3196    conn.execute(
3197        "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) VALUES (?1, ?2, ?3)",
3198        rusqlite::params![logical_id, kind, text],
3199    )?;
3200    for pos in &positions {
3201        conn.execute(
3202            "INSERT INTO fts_node_property_positions \
3203             (node_logical_id, kind, start_offset, end_offset, leaf_path) \
3204             VALUES (?1, ?2, ?3, ?4, ?5)",
3205            rusqlite::params![
3206                logical_id,
3207                kind,
3208                i64::try_from(pos.start_offset).unwrap_or(i64::MAX),
3209                i64::try_from(pos.end_offset).unwrap_or(i64::MAX),
3210                pos.leaf_path,
3211            ],
3212        )?;
3213    }
3214    Ok(1)
3215}
3216
3217fn serialize_property_paths_json(
3218    entries: &[FtsPropertyPathSpec],
3219    exclude_paths: &[String],
3220) -> Result<String, EngineError> {
3221    // Scalar-only schemas with no exclude_paths are serialised in the
3222    // legacy shape (bare array of strings) for full backwards
3223    // compatibility with earlier schema versions.
3224    let all_scalar = entries
3225        .iter()
3226        .all(|e| e.mode == FtsPropertyPathMode::Scalar);
3227    if all_scalar && exclude_paths.is_empty() {
3228        let paths: Vec<&str> = entries.iter().map(|e| e.path.as_str()).collect();
3229        return serde_json::to_string(&paths).map_err(|e| {
3230            EngineError::InvalidWrite(format!("failed to serialize property paths: {e}"))
3231        });
3232    }
3233
3234    let mut obj = serde_json::Map::new();
3235    let paths_json: Vec<serde_json::Value> = entries
3236        .iter()
3237        .map(|e| {
3238            let mode_str = match e.mode {
3239                FtsPropertyPathMode::Scalar => "scalar",
3240                FtsPropertyPathMode::Recursive => "recursive",
3241            };
3242            serde_json::json!({ "path": e.path, "mode": mode_str })
3243        })
3244        .collect();
3245    obj.insert("paths".to_owned(), serde_json::Value::Array(paths_json));
3246    if !exclude_paths.is_empty() {
3247        obj.insert("exclude_paths".to_owned(), serde_json::json!(exclude_paths));
3248    }
3249    serde_json::to_string(&serde_json::Value::Object(obj))
3250        .map_err(|e| EngineError::InvalidWrite(format!("failed to serialize property paths: {e}")))
3251}
3252
3253fn validate_fts_property_paths(paths: &[String]) -> Result<(), EngineError> {
3254    if paths.is_empty() {
3255        return Err(EngineError::InvalidWrite(
3256            "FTS property paths must not be empty".to_owned(),
3257        ));
3258    }
3259    let mut seen = std::collections::HashSet::new();
3260    for path in paths {
3261        if !path.starts_with("$.") {
3262            return Err(EngineError::InvalidWrite(format!(
3263                "FTS property path must start with '$.' but got: {path}"
3264            )));
3265        }
3266        let after_prefix = &path[2..]; // safe: already validated "$." prefix
3267        let segments: Vec<&str> = after_prefix.split('.').collect();
3268        if segments.is_empty() || segments.iter().any(|s| s.is_empty()) {
3269            return Err(EngineError::InvalidWrite(format!(
3270                "FTS property path has empty segment(s): {path}"
3271            )));
3272        }
3273        for seg in &segments {
3274            if !seg.chars().all(|c| c.is_alphanumeric() || c == '_') {
3275                return Err(EngineError::InvalidWrite(format!(
3276                    "FTS property path segment contains invalid characters: {path}"
3277                )));
3278            }
3279        }
3280        if !seen.insert(path) {
3281            return Err(EngineError::InvalidWrite(format!(
3282                "duplicate FTS property path: {path}"
3283            )));
3284        }
3285    }
3286    Ok(())
3287}
3288
3289fn load_fts_property_schema_record(
3290    conn: &rusqlite::Connection,
3291    kind: &str,
3292) -> Result<Option<FtsPropertySchemaRecord>, EngineError> {
3293    let row = conn
3294        .query_row(
3295            "SELECT kind, property_paths_json, separator, format_version \
3296             FROM fts_property_schemas WHERE kind = ?1",
3297            [kind],
3298            |row| {
3299                let kind: String = row.get(0)?;
3300                let paths_json: String = row.get(1)?;
3301                let separator: String = row.get(2)?;
3302                let format_version: i64 = row.get(3)?;
3303                Ok(build_fts_property_schema_record(
3304                    kind,
3305                    &paths_json,
3306                    separator,
3307                    format_version,
3308                ))
3309            },
3310        )
3311        .optional()?;
3312    Ok(row)
3313}
3314
3315/// Build an [`FtsPropertySchemaRecord`] from a raw
3316/// `fts_property_schemas` row. Delegates JSON parsing to
3317/// [`crate::writer::parse_property_schema_json`] — the same parser the
3318/// recursive walker uses at rebuild time — so both the legacy bare-array
3319/// shape and the Phase 4 object-shaped envelope round-trip correctly.
3320fn build_fts_property_schema_record(
3321    kind: String,
3322    paths_json: &str,
3323    separator: String,
3324    format_version: i64,
3325) -> FtsPropertySchemaRecord {
3326    let schema = crate::writer::parse_property_schema_json(paths_json, &separator);
3327    let entries: Vec<FtsPropertyPathSpec> = schema
3328        .paths
3329        .into_iter()
3330        .map(|entry| FtsPropertyPathSpec {
3331            path: entry.path,
3332            mode: match entry.mode {
3333                crate::writer::PropertyPathMode::Scalar => FtsPropertyPathMode::Scalar,
3334                crate::writer::PropertyPathMode::Recursive => FtsPropertyPathMode::Recursive,
3335            },
3336        })
3337        .collect();
3338    let property_paths: Vec<String> = entries.iter().map(|e| e.path.clone()).collect();
3339    FtsPropertySchemaRecord {
3340        kind,
3341        property_paths,
3342        entries,
3343        exclude_paths: schema.exclude_paths,
3344        separator,
3345        format_version,
3346    }
3347}
3348
3349fn build_regeneration_input(
3350    config: &VectorRegenerationConfig,
3351    identity: &QueryEmbedderIdentity,
3352    chunks: Vec<VectorRegenerationInputChunk>,
3353) -> VectorRegenerationInput {
3354    VectorRegenerationInput {
3355        profile: config.profile.clone(),
3356        table_name: config.table_name.clone(),
3357        model_identity: identity.model_identity.clone(),
3358        model_version: identity.model_version.clone(),
3359        dimension: identity.dimension,
3360        normalization_policy: identity.normalization_policy.clone(),
3361        chunking_policy: config.chunking_policy.clone(),
3362        preprocessing_policy: config.preprocessing_policy.clone(),
3363        chunks,
3364    }
3365}
3366
3367fn compute_snapshot_hash(payload: &VectorRegenerationInput) -> Result<String, EngineError> {
3368    let bytes =
3369        serde_json::to_vec(payload).map_err(|error| EngineError::Bridge(error.to_string()))?;
3370    let mut hasher = Sha256::new();
3371    hasher.update(bytes);
3372    Ok(format!("{:x}", hasher.finalize()))
3373}
3374
3375fn collect_regeneration_chunks(
3376    conn: &rusqlite::Connection,
3377) -> Result<Vec<VectorRegenerationInputChunk>, EngineError> {
3378    let mut stmt = conn.prepare(
3379        r"
3380        SELECT c.id, c.node_logical_id, n.kind, c.text_content, c.byte_start, c.byte_end, n.source_ref, c.created_at
3381        FROM chunks c
3382        JOIN nodes n
3383          ON n.logical_id = c.node_logical_id
3384         AND n.superseded_at IS NULL
3385        ORDER BY c.created_at, c.id
3386        ",
3387    )?;
3388    let chunks = stmt
3389        .query_map([], |row| {
3390            Ok(VectorRegenerationInputChunk {
3391                chunk_id: row.get(0)?,
3392                node_logical_id: row.get(1)?,
3393                kind: row.get(2)?,
3394                text_content: row.get(3)?,
3395                byte_start: row.get(4)?,
3396                byte_end: row.get(5)?,
3397                source_ref: row.get(6)?,
3398                created_at: row.get(7)?,
3399            })
3400        })?
3401        .collect::<Result<Vec<_>, _>>()?;
3402    Ok(chunks)
3403}
3404
3405fn validate_bounded_text(
3406    field: &str,
3407    value: &str,
3408    max_len: usize,
3409) -> Result<String, VectorRegenerationFailure> {
3410    let trimmed = value.trim();
3411    if trimmed.is_empty() {
3412        return Err(VectorRegenerationFailure::new(
3413            VectorRegenerationFailureClass::InvalidContract,
3414            format!("{field} must not be empty"),
3415        ));
3416    }
3417    if trimmed.len() > max_len {
3418        return Err(VectorRegenerationFailure::new(
3419            VectorRegenerationFailureClass::InvalidContract,
3420            format!("{field} exceeds max length {max_len}"),
3421        ));
3422    }
3423    Ok(trimmed.to_owned())
3424}
3425
3426fn current_vector_profile_dimension(
3427    conn: &rusqlite::Connection,
3428    profile: &str,
3429) -> Result<Option<usize>, VectorRegenerationFailure> {
3430    let dimension: Option<i64> = conn
3431        .query_row(
3432            "SELECT dimension FROM vector_profiles WHERE profile = ?1 AND enabled = 1",
3433            [profile],
3434            |row| row.get(0),
3435        )
3436        .optional()
3437        .map_err(|error| {
3438            VectorRegenerationFailure::new(
3439                VectorRegenerationFailureClass::InvalidContract,
3440                error.to_string(),
3441            )
3442        })?;
3443    dimension
3444        .map(|value| {
3445            usize::try_from(value).map_err(|_| {
3446                VectorRegenerationFailure::new(
3447                    VectorRegenerationFailureClass::InvalidContract,
3448                    format!("stored vector profile dimension is invalid: {value}"),
3449                )
3450            })
3451        })
3452        .transpose()
3453}
3454
3455fn validate_existing_contract_version(
3456    conn: &rusqlite::Connection,
3457    profile: &str,
3458) -> Result<(), VectorRegenerationFailure> {
3459    let version: Option<i64> = conn
3460        .query_row(
3461            "SELECT contract_format_version FROM vector_embedding_contracts WHERE profile = ?1",
3462            [profile],
3463            |row| row.get(0),
3464        )
3465        .optional()
3466        .map_err(|error| {
3467            VectorRegenerationFailure::new(
3468                VectorRegenerationFailureClass::InvalidContract,
3469                error.to_string(),
3470            )
3471        })?;
3472    if let Some(version) = version
3473        && version > CURRENT_VECTOR_CONTRACT_FORMAT_VERSION
3474    {
3475        return Err(VectorRegenerationFailure::new(
3476            VectorRegenerationFailureClass::InvalidContract,
3477            format!(
3478                "persisted contract format version {version} is unsupported; supported version is {CURRENT_VECTOR_CONTRACT_FORMAT_VERSION}"
3479            ),
3480        ));
3481    }
3482    Ok(())
3483}
3484
3485fn serialize_audit_metadata(
3486    metadata: &VectorRegenerationAuditMetadata,
3487) -> Result<String, EngineError> {
3488    let json =
3489        serde_json::to_string(metadata).map_err(|error| EngineError::Bridge(error.to_string()))?;
3490    if json.len() > MAX_AUDIT_METADATA_BYTES {
3491        return Err(VectorRegenerationFailure::new(
3492            VectorRegenerationFailureClass::InvalidContract,
3493            format!("audit metadata exceeds {MAX_AUDIT_METADATA_BYTES} bytes"),
3494        )
3495        .to_engine_error());
3496    }
3497    Ok(json)
3498}
3499
3500fn count_source_ref(
3501    conn: &rusqlite::Connection,
3502    table: &str,
3503    source_ref: &str,
3504) -> Result<usize, EngineError> {
3505    let sql = match table {
3506        "nodes" => "SELECT count(*) FROM nodes WHERE source_ref = ?1",
3507        "edges" => "SELECT count(*) FROM edges WHERE source_ref = ?1",
3508        "actions" => "SELECT count(*) FROM actions WHERE source_ref = ?1",
3509        "operational_mutations" => {
3510            "SELECT count(*) FROM operational_mutations WHERE source_ref = ?1"
3511        }
3512        other => return Err(EngineError::Bridge(format!("unknown table: {other}"))),
3513    };
3514    let count: i64 = conn.query_row(sql, [source_ref], |row| row.get(0))?;
3515    // FIX(review): was `count as usize` — unsound cast.
3516    // Chose option (C) here: propagate error since this is a user-facing helper.
3517    usize::try_from(count)
3518        .map_err(|_| EngineError::Bridge(format!("count overflow for table {table}: {count}")))
3519}
3520
3521fn rebuild_operational_current_rows(
3522    tx: &rusqlite::Transaction<'_>,
3523    collections: &[String],
3524) -> Result<usize, EngineError> {
3525    let mut rebuilt_rows = 0usize;
3526    clear_operational_current_rows(tx, collections)?;
3527    let mut ins_current = tx.prepare_cached(
3528        "INSERT INTO operational_current \
3529         (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
3530         VALUES (?1, ?2, ?3, ?4, ?5)",
3531    )?;
3532
3533    for collection in collections {
3534        let mut stmt = tx.prepare(
3535            "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
3536             FROM operational_mutations \
3537             WHERE collection_name = ?1 \
3538             ORDER BY record_key, mutation_order",
3539        )?;
3540        let mut latest_by_key: std::collections::HashMap<String, Option<(String, i64, String)>> =
3541            std::collections::HashMap::new();
3542        let rows = stmt.query_map([collection], map_operational_mutation_row)?;
3543        for row in rows {
3544            let mutation = row?;
3545            match mutation.op_kind.as_str() {
3546                "put" => {
3547                    latest_by_key.insert(
3548                        mutation.record_key,
3549                        Some((mutation.payload_json, mutation.created_at, mutation.id)),
3550                    );
3551                }
3552                "delete" => {
3553                    latest_by_key.insert(mutation.record_key, None);
3554                }
3555                _ => {}
3556            }
3557        }
3558
3559        for (record_key, state) in latest_by_key {
3560            if let Some((payload_json, updated_at, last_mutation_id)) = state {
3561                ins_current.execute(rusqlite::params![
3562                    collection,
3563                    record_key,
3564                    payload_json,
3565                    updated_at,
3566                    last_mutation_id,
3567                ])?;
3568                rebuilt_rows += 1;
3569            }
3570        }
3571    }
3572
3573    drop(ins_current);
3574    Ok(rebuilt_rows)
3575}
3576
3577fn clear_operational_current_rows(
3578    tx: &rusqlite::Transaction<'_>,
3579    collections: &[String],
3580) -> Result<(), EngineError> {
3581    let mut delete_current =
3582        tx.prepare_cached("DELETE FROM operational_current WHERE collection_name = ?1")?;
3583    let mut delete_secondary_current = tx.prepare_cached(
3584        "DELETE FROM operational_secondary_index_entries \
3585         WHERE collection_name = ?1 AND subject_kind = 'current'",
3586    )?;
3587    for collection in collections {
3588        delete_secondary_current.execute([collection])?;
3589        delete_current.execute([collection])?;
3590    }
3591    drop(delete_secondary_current);
3592    drop(delete_current);
3593    Ok(())
3594}
3595
3596fn clear_operational_secondary_index_entries(
3597    tx: &rusqlite::Transaction<'_>,
3598    collection_name: &str,
3599) -> Result<(), EngineError> {
3600    tx.execute(
3601        "DELETE FROM operational_secondary_index_entries WHERE collection_name = ?1",
3602        [collection_name],
3603    )?;
3604    Ok(())
3605}
3606
3607fn insert_operational_secondary_index_entry(
3608    tx: &rusqlite::Transaction<'_>,
3609    collection_name: &str,
3610    subject_kind: &str,
3611    mutation_id: &str,
3612    record_key: &str,
3613    entry: &crate::operational::OperationalSecondaryIndexEntry,
3614) -> Result<(), EngineError> {
3615    tx.execute(
3616        "INSERT INTO operational_secondary_index_entries \
3617         (collection_name, index_name, subject_kind, mutation_id, record_key, sort_timestamp, \
3618          slot1_text, slot1_integer, slot2_text, slot2_integer, slot3_text, slot3_integer) \
3619         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
3620        rusqlite::params![
3621            collection_name,
3622            entry.index_name,
3623            subject_kind,
3624            mutation_id,
3625            record_key,
3626            entry.sort_timestamp,
3627            entry.slot1_text,
3628            entry.slot1_integer,
3629            entry.slot2_text,
3630            entry.slot2_integer,
3631            entry.slot3_text,
3632            entry.slot3_integer,
3633        ],
3634    )?;
3635    Ok(())
3636}
3637
3638fn rebuild_operational_secondary_index_entries(
3639    tx: &rusqlite::Transaction<'_>,
3640    collection_name: &str,
3641    collection_kind: OperationalCollectionKind,
3642    indexes: &[OperationalSecondaryIndexDefinition],
3643) -> Result<(usize, usize), EngineError> {
3644    clear_operational_secondary_index_entries(tx, collection_name)?;
3645
3646    let mut mutation_entries_rebuilt = 0usize;
3647    if collection_kind == OperationalCollectionKind::AppendOnlyLog {
3648        let mut stmt = tx.prepare(
3649            "SELECT id, record_key, payload_json FROM operational_mutations \
3650             WHERE collection_name = ?1 ORDER BY mutation_order",
3651        )?;
3652        let rows = stmt
3653            .query_map([collection_name], |row| {
3654                Ok((
3655                    row.get::<_, String>(0)?,
3656                    row.get::<_, String>(1)?,
3657                    row.get::<_, String>(2)?,
3658                ))
3659            })?
3660            .collect::<Result<Vec<_>, _>>()?;
3661        drop(stmt);
3662        for (mutation_id, record_key, payload_json) in rows {
3663            for entry in extract_secondary_index_entries_for_mutation(indexes, &payload_json) {
3664                insert_operational_secondary_index_entry(
3665                    tx,
3666                    collection_name,
3667                    "mutation",
3668                    &mutation_id,
3669                    &record_key,
3670                    &entry,
3671                )?;
3672                mutation_entries_rebuilt += 1;
3673            }
3674        }
3675    }
3676
3677    let mut current_entries_rebuilt = 0usize;
3678    if collection_kind == OperationalCollectionKind::LatestState {
3679        let mut stmt = tx.prepare(
3680            "SELECT record_key, payload_json, updated_at, last_mutation_id FROM operational_current \
3681             WHERE collection_name = ?1 ORDER BY updated_at DESC, record_key",
3682        )?;
3683        let rows = stmt
3684            .query_map([collection_name], |row| {
3685                Ok((
3686                    row.get::<_, String>(0)?,
3687                    row.get::<_, String>(1)?,
3688                    row.get::<_, i64>(2)?,
3689                    row.get::<_, String>(3)?,
3690                ))
3691            })?
3692            .collect::<Result<Vec<_>, _>>()?;
3693        drop(stmt);
3694        for (record_key, payload_json, updated_at, last_mutation_id) in rows {
3695            for entry in
3696                extract_secondary_index_entries_for_current(indexes, &payload_json, updated_at)
3697            {
3698                insert_operational_secondary_index_entry(
3699                    tx,
3700                    collection_name,
3701                    "current",
3702                    &last_mutation_id,
3703                    &record_key,
3704                    &entry,
3705                )?;
3706                current_entries_rebuilt += 1;
3707            }
3708        }
3709    }
3710
3711    Ok((mutation_entries_rebuilt, current_entries_rebuilt))
3712}
3713
3714fn collect_strings_tx(
3715    tx: &rusqlite::Transaction<'_>,
3716    sql: &str,
3717    value: &str,
3718) -> Result<Vec<String>, EngineError> {
3719    let mut stmt = tx.prepare(sql)?;
3720    let rows = stmt.query_map([value], |row| row.get::<_, String>(0))?;
3721    rows.collect::<Result<Vec<_>, _>>()
3722        .map_err(EngineError::from)
3723}
3724
3725/// Convert a non-negative i64 count to usize, panicking on negative values
3726/// which would indicate data corruption.
3727#[allow(clippy::expect_used)]
3728fn i64_to_usize(val: i64) -> usize {
3729    usize::try_from(val).expect("count(*) must be non-negative")
3730}
3731
3732/// Runs a parameterized query and collects the first column as strings.
3733///
3734/// NOTE(review): sql parameter must be a hardcoded query string, never user input.
3735/// Options: (A) doc comment, (B) whitelist refactor like `count_source_ref`, (C) leave as-is.
3736/// Chose (A): function is private, only called with hardcoded SQL from `trace_source`.
3737/// Whitelist refactor not practical — queries have different SELECT/ORDER BY per table.
3738fn collect_strings(
3739    conn: &rusqlite::Connection,
3740    sql: &str,
3741    param: &str,
3742) -> Result<Vec<String>, EngineError> {
3743    let mut stmt = conn.prepare(sql)?;
3744    let values = stmt
3745        .query_map([param], |row| row.get::<_, String>(0))?
3746        .collect::<Result<Vec<_>, _>>()?;
3747    Ok(values)
3748}
3749
3750fn collect_edge_logical_ids_for_restore(
3751    tx: &rusqlite::Transaction<'_>,
3752    logical_id: &str,
3753    retire_source_ref: Option<&str>,
3754    retire_created_at: i64,
3755    retire_event_rowid: i64,
3756) -> Result<Vec<String>, EngineError> {
3757    let mut stmt = tx.prepare(
3758        "SELECT DISTINCT e.logical_id \
3759         FROM edges e \
3760         JOIN provenance_events p \
3761           ON p.subject = e.logical_id \
3762          AND p.event_type = 'edge_retire' \
3763          AND ( \
3764                p.created_at > ?3 \
3765                OR (p.created_at = ?3 AND p.rowid >= ?4) \
3766          ) \
3767          AND ((?2 IS NULL AND p.source_ref IS NULL) OR p.source_ref = ?2) \
3768         WHERE e.superseded_at IS NOT NULL \
3769           AND (e.source_logical_id = ?1 OR e.target_logical_id = ?1) \
3770           AND NOT EXISTS ( \
3771                SELECT 1 FROM edges active \
3772                WHERE active.logical_id = e.logical_id \
3773                  AND active.superseded_at IS NULL \
3774           ) \
3775         ORDER BY e.logical_id",
3776    )?;
3777    let edge_ids = stmt
3778        .query_map(
3779            rusqlite::params![
3780                logical_id,
3781                retire_source_ref,
3782                retire_created_at,
3783                retire_event_rowid
3784            ],
3785            |row| row.get::<_, String>(0),
3786        )?
3787        .collect::<Result<Vec<_>, _>>()?;
3788    Ok(edge_ids)
3789}
3790
3791/// Restores edges for a node being restored, skipping any whose counterpart
3792/// endpoint is not active (e.g. still retired or purged).
3793fn restore_validated_edges(
3794    tx: &rusqlite::Transaction<'_>,
3795    logical_id: &str,
3796    retire_source_ref: Option<&str>,
3797    retire_created_at: i64,
3798    retire_event_rowid: i64,
3799) -> Result<(usize, Vec<SkippedEdge>), EngineError> {
3800    let edge_logical_ids = collect_edge_logical_ids_for_restore(
3801        tx,
3802        logical_id,
3803        retire_source_ref,
3804        retire_created_at,
3805        retire_event_rowid,
3806    )?;
3807    let mut restored = 0usize;
3808    let mut skipped = Vec::new();
3809    for edge_logical_id in &edge_logical_ids {
3810        let edge_detail: Option<(String, String, String)> = tx
3811            .query_row(
3812                "SELECT row_id, source_logical_id, target_logical_id FROM edges \
3813                 WHERE logical_id = ?1 AND superseded_at IS NOT NULL \
3814                 ORDER BY superseded_at DESC, created_at DESC, rowid DESC LIMIT 1",
3815                [edge_logical_id.as_str()],
3816                |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
3817            )
3818            .optional()?;
3819        let Some((edge_row_id, source_lid, target_lid)) = edge_detail else {
3820            continue;
3821        };
3822        let other_endpoint = if source_lid == logical_id {
3823            &target_lid
3824        } else {
3825            &source_lid
3826        };
3827        let endpoint_active: bool = tx
3828            .query_row(
3829                "SELECT 1 FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL LIMIT 1",
3830                [other_endpoint.as_str()],
3831                |_| Ok(true),
3832            )
3833            .optional()?
3834            .unwrap_or(false);
3835        if !endpoint_active {
3836            skipped.push(SkippedEdge {
3837                edge_logical_id: edge_logical_id.clone(),
3838                missing_endpoint: other_endpoint.clone(),
3839            });
3840            continue;
3841        }
3842        restored += tx.execute(
3843            "UPDATE edges SET superseded_at = NULL WHERE row_id = ?1",
3844            [edge_row_id.as_str()],
3845        )?;
3846    }
3847    Ok((restored, skipped))
3848}
3849
3850#[cfg(feature = "sqlite-vec")]
3851fn count_vec_rows_for_logical_id(
3852    tx: &rusqlite::Transaction<'_>,
3853    logical_id: &str,
3854) -> Result<usize, EngineError> {
3855    match tx.query_row(
3856        "SELECT count(*) FROM vec_nodes_active v \
3857         JOIN chunks c ON c.id = v.chunk_id \
3858         WHERE c.node_logical_id = ?1",
3859        [logical_id],
3860        |row| row.get::<_, i64>(0),
3861    ) {
3862        Ok(count) => Ok(i64_to_usize(count)),
3863        Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
3864            if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
3865        {
3866            Ok(0)
3867        }
3868        Err(error) => Err(EngineError::Sqlite(error)),
3869    }
3870}
3871
3872#[cfg(not(feature = "sqlite-vec"))]
3873#[allow(clippy::unnecessary_wraps)]
3874fn count_vec_rows_for_logical_id(
3875    _tx: &rusqlite::Transaction<'_>,
3876    _logical_id: &str,
3877) -> Result<usize, EngineError> {
3878    Ok(0)
3879}
3880
3881#[cfg(feature = "sqlite-vec")]
3882fn delete_vec_rows_for_logical_id(
3883    tx: &rusqlite::Transaction<'_>,
3884    logical_id: &str,
3885) -> Result<usize, EngineError> {
3886    match tx.execute(
3887        "DELETE FROM vec_nodes_active \
3888         WHERE chunk_id IN (SELECT id FROM chunks WHERE node_logical_id = ?1)",
3889        [logical_id],
3890    ) {
3891        Ok(count) => Ok(count),
3892        Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
3893            if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
3894        {
3895            Ok(0)
3896        }
3897        Err(error) => Err(EngineError::Sqlite(error)),
3898    }
3899}
3900
3901#[cfg(not(feature = "sqlite-vec"))]
3902#[allow(clippy::unnecessary_wraps)]
3903fn delete_vec_rows_for_logical_id(
3904    _tx: &rusqlite::Transaction<'_>,
3905    _logical_id: &str,
3906) -> Result<usize, EngineError> {
3907    Ok(0)
3908}
3909
3910fn ensure_operational_collection_registered(
3911    conn: &rusqlite::Connection,
3912    collection_name: &str,
3913) -> Result<(), EngineError> {
3914    if load_operational_collection_record(conn, collection_name)?.is_none() {
3915        return Err(EngineError::InvalidWrite(format!(
3916            "operational collection '{collection_name}' is not registered"
3917        )));
3918    }
3919    Ok(())
3920}
3921
3922fn load_operational_collection_record(
3923    conn: &rusqlite::Connection,
3924    name: &str,
3925) -> Result<Option<OperationalCollectionRecord>, EngineError> {
3926    conn.query_row(
3927        "SELECT name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at, disabled_at \
3928         FROM operational_collections WHERE name = ?1",
3929        [name],
3930        map_operational_collection_row,
3931    )
3932    .optional()
3933    .map_err(EngineError::Sqlite)
3934}
3935
3936fn validate_append_only_operational_collection(
3937    record: &OperationalCollectionRecord,
3938    operation: &str,
3939) -> Result<(), EngineError> {
3940    if record.kind != OperationalCollectionKind::AppendOnlyLog {
3941        return Err(EngineError::InvalidWrite(format!(
3942            "operational collection '{}' must be append_only_log to {operation}",
3943            record.name
3944        )));
3945    }
3946    Ok(())
3947}
3948
3949#[derive(Clone, Debug, PartialEq, Eq)]
3950struct CompiledOperationalReadFilter {
3951    field: String,
3952    condition: OperationalReadCondition,
3953}
3954
3955#[derive(Clone, Debug)]
3956struct MatchedAppendOnlySecondaryIndexRead<'a> {
3957    index_name: &'a str,
3958    value_filter: &'a CompiledOperationalReadFilter,
3959    time_range: Option<&'a CompiledOperationalReadFilter>,
3960}
3961
3962#[derive(Clone, Debug, PartialEq, Eq)]
3963enum OperationalReadCondition {
3964    ExactString(String),
3965    ExactInteger(i64),
3966    Prefix(String),
3967    Range {
3968        lower: Option<i64>,
3969        upper: Option<i64>,
3970    },
3971}
3972
3973fn operational_read_limit(limit: Option<usize>) -> Result<usize, EngineError> {
3974    let applied_limit = limit.unwrap_or(DEFAULT_OPERATIONAL_READ_LIMIT);
3975    if applied_limit == 0 {
3976        return Err(EngineError::InvalidWrite(
3977            "operational read limit must be greater than zero".to_owned(),
3978        ));
3979    }
3980    Ok(applied_limit.min(MAX_OPERATIONAL_READ_LIMIT))
3981}
3982
3983fn parse_operational_filter_fields(
3984    filter_fields_json: &str,
3985) -> Result<Vec<OperationalFilterField>, String> {
3986    let fields: Vec<OperationalFilterField> = serde_json::from_str(filter_fields_json)
3987        .map_err(|error| format!("invalid filter_fields_json: {error}"))?;
3988    let mut seen = std::collections::HashSet::new();
3989    for field in &fields {
3990        if field.name.trim().is_empty() {
3991            return Err("filter_fields_json field names must not be empty".to_owned());
3992        }
3993        if !seen.insert(field.name.as_str()) {
3994            return Err(format!(
3995                "filter_fields_json contains duplicate field '{}'",
3996                field.name
3997            ));
3998        }
3999        if field.modes.is_empty() {
4000            return Err(format!(
4001                "filter_fields_json field '{}' must declare at least one mode",
4002                field.name
4003            ));
4004        }
4005        if field.modes.contains(&OperationalFilterMode::Prefix)
4006            && field.field_type != OperationalFilterFieldType::String
4007        {
4008            return Err(format!(
4009                "filter field '{}' only supports prefix for string types",
4010                field.name
4011            ));
4012        }
4013    }
4014    Ok(fields)
4015}
4016
4017fn compile_operational_read_filters(
4018    filters: &[OperationalFilterClause],
4019    declared_fields: &[OperationalFilterField],
4020) -> Result<Vec<CompiledOperationalReadFilter>, EngineError> {
4021    let field_map = declared_fields
4022        .iter()
4023        .map(|field| (field.name.as_str(), field))
4024        .collect::<std::collections::HashMap<_, _>>();
4025    filters
4026        .iter()
4027        .map(|filter| match filter {
4028            OperationalFilterClause::Exact { field, value } => {
4029                let declared = field_map.get(field.as_str()).ok_or_else(|| {
4030                    EngineError::InvalidWrite(format!(
4031                        "operational read filter uses undeclared field '{field}'"
4032                    ))
4033                })?;
4034                if !declared.modes.contains(&OperationalFilterMode::Exact) {
4035                    return Err(EngineError::InvalidWrite(format!(
4036                        "operational read field '{field}' does not allow exact filters"
4037                    )));
4038                }
4039                let condition = match (declared.field_type, value) {
4040                    (OperationalFilterFieldType::String, OperationalFilterValue::String(value)) => {
4041                        OperationalReadCondition::ExactString(value.clone())
4042                    }
4043                    (
4044                        OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp,
4045                        OperationalFilterValue::Integer(value),
4046                    ) => OperationalReadCondition::ExactInteger(*value),
4047                    _ => {
4048                        return Err(EngineError::InvalidWrite(format!(
4049                            "operational read field '{field}' received a value with the wrong type"
4050                        )));
4051                    }
4052                };
4053                Ok(CompiledOperationalReadFilter {
4054                    field: field.clone(),
4055                    condition,
4056                })
4057            }
4058            OperationalFilterClause::Prefix { field, value } => {
4059                let declared = field_map.get(field.as_str()).ok_or_else(|| {
4060                    EngineError::InvalidWrite(format!(
4061                        "operational read filter uses undeclared field '{field}'"
4062                    ))
4063                })?;
4064                if !declared.modes.contains(&OperationalFilterMode::Prefix) {
4065                    return Err(EngineError::InvalidWrite(format!(
4066                        "operational read field '{field}' does not allow prefix filters"
4067                    )));
4068                }
4069                if declared.field_type != OperationalFilterFieldType::String {
4070                    return Err(EngineError::InvalidWrite(format!(
4071                        "operational read field '{field}' only supports prefix filters for strings"
4072                    )));
4073                }
4074                Ok(CompiledOperationalReadFilter {
4075                    field: field.clone(),
4076                    condition: OperationalReadCondition::Prefix(value.clone()),
4077                })
4078            }
4079            OperationalFilterClause::Range {
4080                field,
4081                lower,
4082                upper,
4083            } => {
4084                let declared = field_map.get(field.as_str()).ok_or_else(|| {
4085                    EngineError::InvalidWrite(format!(
4086                        "operational read filter uses undeclared field '{field}'"
4087                    ))
4088                })?;
4089                if !declared.modes.contains(&OperationalFilterMode::Range) {
4090                    return Err(EngineError::InvalidWrite(format!(
4091                        "operational read field '{field}' does not allow range filters"
4092                    )));
4093                }
4094                if !matches!(
4095                    declared.field_type,
4096                    OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp
4097                ) {
4098                    return Err(EngineError::InvalidWrite(format!(
4099                        "operational read field '{field}' only supports range filters for integer/timestamp fields"
4100                    )));
4101                }
4102                if lower.is_none() && upper.is_none() {
4103                    return Err(EngineError::InvalidWrite(format!(
4104                        "operational read range filter for '{field}' must specify a lower or upper bound"
4105                    )));
4106                }
4107                Ok(CompiledOperationalReadFilter {
4108                    field: field.clone(),
4109                    condition: OperationalReadCondition::Range {
4110                        lower: *lower,
4111                        upper: *upper,
4112                    },
4113                })
4114            }
4115        })
4116        .collect()
4117}
4118
4119fn match_append_only_secondary_index_read<'a>(
4120    filters: &'a [CompiledOperationalReadFilter],
4121    indexes: &'a [OperationalSecondaryIndexDefinition],
4122) -> Option<MatchedAppendOnlySecondaryIndexRead<'a>> {
4123    indexes.iter().find_map(|index| {
4124        let OperationalSecondaryIndexDefinition::AppendOnlyFieldTime {
4125            name,
4126            field,
4127            value_type,
4128            time_field,
4129        } = index
4130        else {
4131            return None;
4132        };
4133        if !(1..=2).contains(&filters.len()) {
4134            return None;
4135        }
4136
4137        let mut value_filter = None;
4138        let mut time_range = None;
4139        for filter in filters {
4140            if filter.field == *field {
4141                let supported = matches!(
4142                    (&filter.condition, value_type),
4143                    (
4144                        OperationalReadCondition::ExactString(_)
4145                            | OperationalReadCondition::Prefix(_),
4146                        crate::operational::OperationalSecondaryIndexValueType::String
4147                    ) | (
4148                        OperationalReadCondition::ExactInteger(_),
4149                        crate::operational::OperationalSecondaryIndexValueType::Integer
4150                            | crate::operational::OperationalSecondaryIndexValueType::Timestamp
4151                    )
4152                );
4153                if !supported || value_filter.is_some() {
4154                    return None;
4155                }
4156                value_filter = Some(filter);
4157                continue;
4158            }
4159            if filter.field == *time_field {
4160                if !matches!(filter.condition, OperationalReadCondition::Range { .. })
4161                    || time_range.is_some()
4162                {
4163                    return None;
4164                }
4165                time_range = Some(filter);
4166                continue;
4167            }
4168            return None;
4169        }
4170
4171        value_filter.map(|value_filter| MatchedAppendOnlySecondaryIndexRead {
4172            index_name: name.as_str(),
4173            value_filter,
4174            time_range,
4175        })
4176    })
4177}
4178
4179fn execute_operational_secondary_index_read(
4180    conn: &rusqlite::Connection,
4181    collection_name: &str,
4182    filters: &[CompiledOperationalReadFilter],
4183    indexes: &[OperationalSecondaryIndexDefinition],
4184    applied_limit: usize,
4185) -> Result<Option<OperationalReadReport>, EngineError> {
4186    use rusqlite::types::Value;
4187
4188    let Some(matched) = match_append_only_secondary_index_read(filters, indexes) else {
4189        return Ok(None);
4190    };
4191
4192    let mut sql = String::from(
4193        "SELECT m.id, m.collection_name, m.record_key, m.op_kind, m.payload_json, m.source_ref, m.created_at \
4194         FROM operational_secondary_index_entries s \
4195         JOIN operational_mutations m ON m.id = s.mutation_id \
4196         WHERE s.collection_name = ?1 AND s.index_name = ?2 AND s.subject_kind = 'mutation' ",
4197    );
4198    let mut params = vec![
4199        Value::from(collection_name.to_owned()),
4200        Value::from(matched.index_name.to_owned()),
4201    ];
4202
4203    match &matched.value_filter.condition {
4204        OperationalReadCondition::ExactString(value) => {
4205            let _ = write!(sql, "AND s.slot1_text = ?{} ", params.len() + 1);
4206            params.push(Value::from(value.clone()));
4207        }
4208        OperationalReadCondition::Prefix(value) => {
4209            let _ = write!(sql, "AND s.slot1_text GLOB ?{} ", params.len() + 1);
4210            params.push(Value::from(glob_prefix_pattern(value)));
4211        }
4212        OperationalReadCondition::ExactInteger(value) => {
4213            let _ = write!(sql, "AND s.slot1_integer = ?{} ", params.len() + 1);
4214            params.push(Value::from(*value));
4215        }
4216        OperationalReadCondition::Range { .. } => return Ok(None),
4217    }
4218
4219    if let Some(time_range) = matched.time_range
4220        && let OperationalReadCondition::Range { lower, upper } = &time_range.condition
4221    {
4222        if let Some(lower) = lower {
4223            let _ = write!(sql, "AND s.sort_timestamp >= ?{} ", params.len() + 1);
4224            params.push(Value::from(*lower));
4225        }
4226        if let Some(upper) = upper {
4227            let _ = write!(sql, "AND s.sort_timestamp <= ?{} ", params.len() + 1);
4228            params.push(Value::from(*upper));
4229        }
4230    }
4231
4232    let _ = write!(
4233        sql,
4234        "ORDER BY s.sort_timestamp DESC, m.mutation_order DESC LIMIT ?{}",
4235        params.len() + 1
4236    );
4237    params.push(Value::from(i64::try_from(applied_limit + 1).map_err(
4238        |_| EngineError::Bridge("operational read limit overflow".to_owned()),
4239    )?));
4240
4241    let mut stmt = conn.prepare(&sql)?;
4242    let mut rows = stmt
4243        .query_map(
4244            rusqlite::params_from_iter(params),
4245            map_operational_mutation_row,
4246        )?
4247        .collect::<Result<Vec<_>, _>>()?;
4248    let was_limited = rows.len() > applied_limit;
4249    if was_limited {
4250        rows.truncate(applied_limit);
4251    }
4252
4253    Ok(Some(OperationalReadReport {
4254        collection_name: collection_name.to_owned(),
4255        row_count: rows.len(),
4256        applied_limit,
4257        was_limited,
4258        rows,
4259    }))
4260}
4261
4262fn execute_operational_filtered_read(
4263    conn: &rusqlite::Connection,
4264    collection_name: &str,
4265    filters: &[CompiledOperationalReadFilter],
4266    applied_limit: usize,
4267) -> Result<OperationalReadReport, EngineError> {
4268    use rusqlite::types::Value;
4269
4270    let mut sql = String::from(
4271        "SELECT m.id, m.collection_name, m.record_key, m.op_kind, m.payload_json, m.source_ref, m.created_at \
4272         FROM operational_mutations m ",
4273    );
4274    let mut params = vec![Value::from(collection_name.to_owned())];
4275    for (index, filter) in filters.iter().enumerate() {
4276        let _ = write!(
4277            sql,
4278            "JOIN operational_filter_values f{index} \
4279             ON f{index}.mutation_id = m.id \
4280            AND f{index}.collection_name = m.collection_name "
4281        );
4282        match &filter.condition {
4283            OperationalReadCondition::ExactString(value) => {
4284                let _ = write!(
4285                    sql,
4286                    "AND f{index}.field_name = ?{} AND f{index}.string_value = ?{} ",
4287                    params.len() + 1,
4288                    params.len() + 2
4289                );
4290                params.push(Value::from(filter.field.clone()));
4291                params.push(Value::from(value.clone()));
4292            }
4293            OperationalReadCondition::ExactInteger(value) => {
4294                let _ = write!(
4295                    sql,
4296                    "AND f{index}.field_name = ?{} AND f{index}.integer_value = ?{} ",
4297                    params.len() + 1,
4298                    params.len() + 2
4299                );
4300                params.push(Value::from(filter.field.clone()));
4301                params.push(Value::from(*value));
4302            }
4303            OperationalReadCondition::Prefix(value) => {
4304                let _ = write!(
4305                    sql,
4306                    "AND f{index}.field_name = ?{} AND f{index}.string_value GLOB ?{} ",
4307                    params.len() + 1,
4308                    params.len() + 2
4309                );
4310                params.push(Value::from(filter.field.clone()));
4311                params.push(Value::from(glob_prefix_pattern(value)));
4312            }
4313            OperationalReadCondition::Range { lower, upper } => {
4314                let _ = write!(sql, "AND f{index}.field_name = ?{} ", params.len() + 1);
4315                params.push(Value::from(filter.field.clone()));
4316                if let Some(lower) = lower {
4317                    let _ = write!(sql, "AND f{index}.integer_value >= ?{} ", params.len() + 1);
4318                    params.push(Value::from(*lower));
4319                }
4320                if let Some(upper) = upper {
4321                    let _ = write!(sql, "AND f{index}.integer_value <= ?{} ", params.len() + 1);
4322                    params.push(Value::from(*upper));
4323                }
4324            }
4325        }
4326    }
4327    let _ = write!(
4328        sql,
4329        "WHERE m.collection_name = ?1 ORDER BY m.mutation_order DESC LIMIT ?{}",
4330        params.len() + 1
4331    );
4332    params.push(Value::from(i64::try_from(applied_limit + 1).map_err(
4333        |_| EngineError::Bridge("operational read limit overflow".to_owned()),
4334    )?));
4335
4336    let mut stmt = conn.prepare(&sql)?;
4337    let mut rows = stmt
4338        .query_map(
4339            rusqlite::params_from_iter(params),
4340            map_operational_mutation_row,
4341        )?
4342        .collect::<Result<Vec<_>, _>>()?;
4343    let was_limited = rows.len() > applied_limit;
4344    if was_limited {
4345        rows.truncate(applied_limit);
4346    }
4347    Ok(OperationalReadReport {
4348        collection_name: collection_name.to_owned(),
4349        row_count: rows.len(),
4350        applied_limit,
4351        was_limited,
4352        rows,
4353    })
4354}
4355
4356fn glob_prefix_pattern(value: &str) -> String {
4357    let mut pattern = String::with_capacity(value.len() + 1);
4358    for ch in value.chars() {
4359        match ch {
4360            '*' => pattern.push_str("[*]"),
4361            '?' => pattern.push_str("[?]"),
4362            '[' => pattern.push_str("[[]"),
4363            _ => pattern.push(ch),
4364        }
4365    }
4366    pattern.push('*');
4367    pattern
4368}
4369
4370#[derive(Clone, Debug, PartialEq, Eq)]
4371struct ExtractedOperationalFilterValue {
4372    field_name: String,
4373    string_value: Option<String>,
4374    integer_value: Option<i64>,
4375}
4376
4377fn extract_operational_filter_values(
4378    filter_fields: &[OperationalFilterField],
4379    payload_json: &str,
4380) -> Vec<ExtractedOperationalFilterValue> {
4381    let Ok(parsed) = serde_json::from_str::<serde_json::Value>(payload_json) else {
4382        return Vec::new();
4383    };
4384    let Some(object) = parsed.as_object() else {
4385        return Vec::new();
4386    };
4387
4388    filter_fields
4389        .iter()
4390        .filter_map(|field| {
4391            let value = object.get(&field.name)?;
4392            match field.field_type {
4393                OperationalFilterFieldType::String => {
4394                    value
4395                        .as_str()
4396                        .map(|string_value| ExtractedOperationalFilterValue {
4397                            field_name: field.name.clone(),
4398                            string_value: Some(string_value.to_owned()),
4399                            integer_value: None,
4400                        })
4401                }
4402                OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp => {
4403                    value
4404                        .as_i64()
4405                        .map(|integer_value| ExtractedOperationalFilterValue {
4406                            field_name: field.name.clone(),
4407                            string_value: None,
4408                            integer_value: Some(integer_value),
4409                        })
4410                }
4411            }
4412        })
4413        .collect()
4414}
4415
4416fn operational_compaction_candidates(
4417    conn: &rusqlite::Connection,
4418    retention_json: &str,
4419    collection_name: &str,
4420) -> Result<(Vec<String>, Option<i64>), EngineError> {
4421    operational_compaction_candidates_at(
4422        conn,
4423        retention_json,
4424        collection_name,
4425        current_unix_timestamp()?,
4426    )
4427}
4428
4429fn operational_compaction_candidates_at(
4430    conn: &rusqlite::Connection,
4431    retention_json: &str,
4432    collection_name: &str,
4433    now_timestamp: i64,
4434) -> Result<(Vec<String>, Option<i64>), EngineError> {
4435    let policy = parse_operational_retention_policy(retention_json)?;
4436    match policy {
4437        OperationalRetentionPolicy::KeepAll => Ok((Vec::new(), None)),
4438        OperationalRetentionPolicy::PurgeBeforeSeconds { max_age_seconds } => {
4439            let before_timestamp = now_timestamp - max_age_seconds;
4440            let mut stmt = conn.prepare(
4441                "SELECT id FROM operational_mutations \
4442                 WHERE collection_name = ?1 AND created_at < ?2 \
4443                 ORDER BY mutation_order",
4444            )?;
4445            let mutation_ids = stmt
4446                .query_map(
4447                    rusqlite::params![collection_name, before_timestamp],
4448                    |row| row.get::<_, String>(0),
4449                )?
4450                .collect::<Result<Vec<_>, _>>()?;
4451            Ok((mutation_ids, Some(before_timestamp)))
4452        }
4453        OperationalRetentionPolicy::KeepLast { max_rows } => {
4454            let mut stmt = conn.prepare(
4455                "SELECT id FROM operational_mutations \
4456                 WHERE collection_name = ?1 \
4457                 ORDER BY mutation_order DESC",
4458            )?;
4459            let ordered_ids = stmt
4460                .query_map([collection_name], |row| row.get::<_, String>(0))?
4461                .collect::<Result<Vec<_>, _>>()?;
4462            Ok((ordered_ids.into_iter().skip(max_rows).collect(), None))
4463        }
4464    }
4465}
4466
4467fn parse_operational_retention_policy(
4468    retention_json: &str,
4469) -> Result<OperationalRetentionPolicy, EngineError> {
4470    let policy: OperationalRetentionPolicy = serde_json::from_str(retention_json)
4471        .map_err(|error| EngineError::InvalidWrite(format!("invalid retention_json: {error}")))?;
4472    match policy {
4473        OperationalRetentionPolicy::KeepAll => Ok(policy),
4474        OperationalRetentionPolicy::PurgeBeforeSeconds { max_age_seconds } => {
4475            if max_age_seconds <= 0 {
4476                return Err(EngineError::InvalidWrite(
4477                    "retention_json max_age_seconds must be greater than zero".to_owned(),
4478                ));
4479            }
4480            Ok(policy)
4481        }
4482        OperationalRetentionPolicy::KeepLast { max_rows } => {
4483            if max_rows == 0 {
4484                return Err(EngineError::InvalidWrite(
4485                    "retention_json max_rows must be greater than zero".to_owned(),
4486                ));
4487            }
4488            Ok(policy)
4489        }
4490    }
4491}
4492
4493fn load_operational_retention_records(
4494    conn: &rusqlite::Connection,
4495    collection_names: Option<&[String]>,
4496    max_collections: Option<usize>,
4497) -> Result<Vec<OperationalCollectionRecord>, EngineError> {
4498    let limit = max_collections.unwrap_or(usize::MAX);
4499    if limit == 0 {
4500        return Err(EngineError::InvalidWrite(
4501            "max_collections must be greater than zero".to_owned(),
4502        ));
4503    }
4504
4505    let mut records = Vec::new();
4506    if let Some(collection_names) = collection_names {
4507        for name in collection_names.iter().take(limit) {
4508            let record = load_operational_collection_record(conn, name)?.ok_or_else(|| {
4509                EngineError::InvalidWrite(format!(
4510                    "operational collection '{name}' is not registered"
4511                ))
4512            })?;
4513            records.push(record);
4514        }
4515        return Ok(records);
4516    }
4517
4518    let mut stmt = conn.prepare(
4519        "SELECT name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at, disabled_at \
4520         FROM operational_collections ORDER BY name",
4521    )?;
4522    let rows = stmt
4523        .query_map([], map_operational_collection_row)?
4524        .take(limit)
4525        .collect::<Result<Vec<_>, _>>()?;
4526    Ok(rows)
4527}
4528
4529fn last_operational_retention_run_at(
4530    conn: &rusqlite::Connection,
4531    collection_name: &str,
4532) -> Result<Option<i64>, EngineError> {
4533    conn.query_row(
4534        "SELECT MAX(executed_at) FROM operational_retention_runs WHERE collection_name = ?1",
4535        [collection_name],
4536        |row| row.get(0),
4537    )
4538    .optional()
4539    .map_err(EngineError::Sqlite)
4540    .map(Option::flatten)
4541}
4542
4543fn count_operational_mutations_for_collection(
4544    conn: &rusqlite::Connection,
4545    collection_name: &str,
4546) -> Result<usize, EngineError> {
4547    let count: i64 = conn.query_row(
4548        "SELECT count(*) FROM operational_mutations WHERE collection_name = ?1",
4549        [collection_name],
4550        |row| row.get(0),
4551    )?;
4552    usize::try_from(count).map_err(|_| {
4553        EngineError::Bridge(format!("count overflow for collection {collection_name}"))
4554    })
4555}
4556
4557fn retention_action_kind_and_limit(
4558    policy: &OperationalRetentionPolicy,
4559) -> (OperationalRetentionActionKind, Option<usize>) {
4560    match policy {
4561        OperationalRetentionPolicy::KeepAll => (OperationalRetentionActionKind::Noop, None),
4562        OperationalRetentionPolicy::PurgeBeforeSeconds { .. } => {
4563            (OperationalRetentionActionKind::PurgeBeforeSeconds, None)
4564        }
4565        OperationalRetentionPolicy::KeepLast { max_rows } => {
4566            (OperationalRetentionActionKind::KeepLast, Some(*max_rows))
4567        }
4568    }
4569}
4570
4571fn plan_operational_retention_item(
4572    conn: &rusqlite::Connection,
4573    record: &OperationalCollectionRecord,
4574    now_timestamp: i64,
4575) -> Result<OperationalRetentionPlanItem, EngineError> {
4576    let last_run_at = last_operational_retention_run_at(conn, &record.name)?;
4577    if record.kind != OperationalCollectionKind::AppendOnlyLog {
4578        return Ok(OperationalRetentionPlanItem {
4579            collection_name: record.name.clone(),
4580            action_kind: OperationalRetentionActionKind::Noop,
4581            candidate_deletions: 0,
4582            before_timestamp: None,
4583            max_rows: None,
4584            last_run_at,
4585        });
4586    }
4587    let policy = parse_operational_retention_policy(&record.retention_json)?;
4588    let (action_kind, max_rows) = retention_action_kind_and_limit(&policy);
4589    let (candidate_ids, before_timestamp) = operational_compaction_candidates_at(
4590        conn,
4591        &record.retention_json,
4592        &record.name,
4593        now_timestamp,
4594    )?;
4595    Ok(OperationalRetentionPlanItem {
4596        collection_name: record.name.clone(),
4597        action_kind,
4598        candidate_deletions: candidate_ids.len(),
4599        before_timestamp,
4600        max_rows,
4601        last_run_at,
4602    })
4603}
4604
4605fn run_operational_retention_item(
4606    tx: &rusqlite::Transaction<'_>,
4607    record: &OperationalCollectionRecord,
4608    now_timestamp: i64,
4609    dry_run: bool,
4610) -> Result<OperationalRetentionRunItem, EngineError> {
4611    let plan = plan_operational_retention_item(tx, record, now_timestamp)?;
4612    let mut deleted_mutations = 0usize;
4613    if record.kind == OperationalCollectionKind::AppendOnlyLog
4614        && plan.action_kind != OperationalRetentionActionKind::Noop
4615        && plan.candidate_deletions > 0
4616        && !dry_run
4617    {
4618        let (candidate_ids, _) = operational_compaction_candidates_at(
4619            tx,
4620            &record.retention_json,
4621            &record.name,
4622            now_timestamp,
4623        )?;
4624        let mut delete_stmt =
4625            tx.prepare_cached("DELETE FROM operational_mutations WHERE id = ?1")?;
4626        for mutation_id in &candidate_ids {
4627            delete_stmt.execute([mutation_id.as_str()])?;
4628            deleted_mutations += 1;
4629        }
4630        drop(delete_stmt);
4631
4632        persist_simple_provenance_event(
4633            tx,
4634            "operational_retention_run",
4635            &record.name,
4636            Some(serde_json::json!({
4637                "action_kind": plan.action_kind,
4638                "deleted_mutations": deleted_mutations,
4639                "before_timestamp": plan.before_timestamp,
4640                "max_rows": plan.max_rows,
4641                "executed_at": now_timestamp,
4642            })),
4643        )?;
4644    }
4645
4646    let live_rows_remaining = count_operational_mutations_for_collection(tx, &record.name)?;
4647    let effective_deleted_mutations = if dry_run {
4648        plan.candidate_deletions
4649    } else {
4650        deleted_mutations
4651    };
4652    let rows_remaining = if dry_run {
4653        live_rows_remaining.saturating_sub(effective_deleted_mutations)
4654    } else {
4655        live_rows_remaining
4656    };
4657    if !dry_run && plan.action_kind != OperationalRetentionActionKind::Noop {
4658        tx.execute(
4659            "INSERT INTO operational_retention_runs \
4660             (id, collection_name, executed_at, action_kind, dry_run, deleted_mutations, rows_remaining, metadata_json) \
4661             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
4662            rusqlite::params![
4663                new_id(),
4664                record.name,
4665                now_timestamp,
4666                serde_json::to_string(&plan.action_kind)
4667                    .unwrap_or_else(|_| "\"noop\"".to_owned())
4668                    .trim_matches('"')
4669                    .to_owned(),
4670                i32::from(dry_run),
4671                deleted_mutations,
4672                rows_remaining,
4673                serde_json::json!({
4674                    "before_timestamp": plan.before_timestamp,
4675                    "max_rows": plan.max_rows,
4676                })
4677                .to_string(),
4678            ],
4679        )?;
4680    }
4681
4682    Ok(OperationalRetentionRunItem {
4683        collection_name: plan.collection_name,
4684        action_kind: plan.action_kind,
4685        deleted_mutations: effective_deleted_mutations,
4686        before_timestamp: plan.before_timestamp,
4687        max_rows: plan.max_rows,
4688        rows_remaining,
4689    })
4690}
4691
4692fn current_unix_timestamp() -> Result<i64, EngineError> {
4693    let now = SystemTime::now()
4694        .duration_since(SystemTime::UNIX_EPOCH)
4695        .map_err(|error| EngineError::Bridge(format!("system clock error: {error}")))?;
4696    i64::try_from(now.as_secs())
4697        .map_err(|_| EngineError::Bridge("unix timestamp overflow".to_owned()))
4698}
4699
4700fn map_operational_collection_row(
4701    row: &rusqlite::Row<'_>,
4702) -> Result<OperationalCollectionRecord, rusqlite::Error> {
4703    let kind_text: String = row.get(1)?;
4704    let kind = OperationalCollectionKind::try_from(kind_text.as_str()).map_err(|message| {
4705        rusqlite::Error::FromSqlConversionFailure(
4706            1,
4707            rusqlite::types::Type::Text,
4708            Box::new(io::Error::new(io::ErrorKind::InvalidData, message)),
4709        )
4710    })?;
4711    Ok(OperationalCollectionRecord {
4712        name: row.get(0)?,
4713        kind,
4714        schema_json: row.get(2)?,
4715        retention_json: row.get(3)?,
4716        filter_fields_json: row.get(4)?,
4717        validation_json: row.get(5)?,
4718        secondary_indexes_json: row.get(6)?,
4719        format_version: row.get(7)?,
4720        created_at: row.get(8)?,
4721        disabled_at: row.get(9)?,
4722    })
4723}
4724
4725fn map_operational_mutation_row(
4726    row: &rusqlite::Row<'_>,
4727) -> Result<OperationalMutationRow, rusqlite::Error> {
4728    Ok(OperationalMutationRow {
4729        id: row.get(0)?,
4730        collection_name: row.get(1)?,
4731        record_key: row.get(2)?,
4732        op_kind: row.get(3)?,
4733        payload_json: row.get(4)?,
4734        source_ref: row.get(5)?,
4735        created_at: row.get(6)?,
4736    })
4737}
4738
4739fn map_operational_current_row(
4740    row: &rusqlite::Row<'_>,
4741) -> Result<OperationalCurrentRow, rusqlite::Error> {
4742    Ok(OperationalCurrentRow {
4743        collection_name: row.get(0)?,
4744        record_key: row.get(1)?,
4745        payload_json: row.get(2)?,
4746        updated_at: row.get(3)?,
4747        last_mutation_id: row.get(4)?,
4748    })
4749}
4750
4751#[cfg(test)]
4752#[allow(clippy::expect_used)]
4753mod tests {
4754    use std::fs;
4755    use std::sync::Arc;
4756
4757    use fathomdb_schema::SchemaManager;
4758    use tempfile::NamedTempFile;
4759
4760    use super::{
4761        AdminService, FtsPropertyPathMode, FtsPropertyPathSpec, SafeExportOptions,
4762        VectorRegenerationConfig,
4763    };
4764    use crate::embedder::{EmbedderError, QueryEmbedder, QueryEmbedderIdentity};
4765    use crate::projection::ProjectionTarget;
4766    use crate::sqlite;
4767    use crate::{
4768        EngineError, ExecutionCoordinator, OperationalCollectionKind, OperationalRegisterRequest,
4769        TelemetryCounters,
4770    };
4771
4772    use fathomdb_query::QueryBuilder;
4773
4774    #[cfg(feature = "sqlite-vec")]
4775    use super::load_vector_regeneration_config;
4776
4777    /// In-process embedder used by the regeneration test suite. The
4778    /// vector is parameterized so individual tests can distinguish which
4779    /// embedder produced which profile row.
4780    #[derive(Debug)]
4781    #[allow(dead_code)]
4782    struct TestEmbedder {
4783        identity: QueryEmbedderIdentity,
4784        vector: Vec<f32>,
4785    }
4786
4787    #[allow(dead_code)]
4788    impl TestEmbedder {
4789        fn new(model: &str, dimension: usize) -> Self {
4790            Self {
4791                identity: QueryEmbedderIdentity {
4792                    model_identity: model.to_owned(),
4793                    model_version: "1.0.0".to_owned(),
4794                    dimension,
4795                    normalization_policy: "l2".to_owned(),
4796                },
4797                vector: vec![1.0; dimension],
4798            }
4799        }
4800    }
4801
4802    impl QueryEmbedder for TestEmbedder {
4803        fn embed_query(&self, _text: &str) -> Result<Vec<f32>, EmbedderError> {
4804            Ok(self.vector.clone())
4805        }
4806        fn identity(&self) -> QueryEmbedderIdentity {
4807            self.identity.clone()
4808        }
4809    }
4810
4811    /// Embedder that always fails — used to exercise the post-request
4812    /// failure audit path without the complexity of subprocess machinery.
4813    #[derive(Debug)]
4814    #[allow(dead_code)]
4815    struct FailingEmbedder {
4816        identity: QueryEmbedderIdentity,
4817    }
4818
4819    impl QueryEmbedder for FailingEmbedder {
4820        fn embed_query(&self, _text: &str) -> Result<Vec<f32>, EmbedderError> {
4821            Err(EmbedderError::Failed("test failure".to_owned()))
4822        }
4823        fn identity(&self) -> QueryEmbedderIdentity {
4824            self.identity.clone()
4825        }
4826    }
4827
4828    #[allow(dead_code)]
4829    #[cfg(unix)]
4830    fn set_file_mode(path: &std::path::Path, mode: u32) {
4831        use std::os::unix::fs::PermissionsExt;
4832
4833        let mut permissions = fs::metadata(path).expect("script metadata").permissions();
4834        permissions.set_mode(mode);
4835        fs::set_permissions(path, permissions).expect("chmod");
4836    }
4837
4838    #[allow(dead_code)]
4839    #[cfg(not(unix))]
4840    fn set_file_mode(_path: &std::path::Path, _mode: u32) {}
4841
4842    fn setup() -> (NamedTempFile, AdminService) {
4843        let db = NamedTempFile::new().expect("temp file");
4844        let schema = Arc::new(SchemaManager::new());
4845        {
4846            let conn = sqlite::open_connection(db.path()).expect("connection");
4847            schema.bootstrap(&conn).expect("bootstrap");
4848        }
4849        let service = AdminService::new(db.path(), Arc::clone(&schema));
4850        (db, service)
4851    }
4852
4853    #[test]
4854    fn check_integrity_includes_active_uniqueness_count() {
4855        let (_db, service) = setup();
4856        let report = service.check_integrity().expect("integrity check");
4857        assert_eq!(report.duplicate_active_logical_ids, 0);
4858        assert_eq!(report.operational_missing_collections, 0);
4859        assert_eq!(report.operational_missing_last_mutations, 0);
4860    }
4861
4862    #[test]
4863    fn trace_source_returns_node_logical_ids() {
4864        let (db, service) = setup();
4865        {
4866            let conn = sqlite::open_connection(db.path()).expect("conn");
4867            conn.execute(
4868                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4869                 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 'source-1')",
4870                [],
4871            )
4872            .expect("insert node");
4873        }
4874        let report = service.trace_source("source-1").expect("trace");
4875        assert_eq!(report.node_rows, 1);
4876        assert_eq!(report.node_logical_ids, vec!["lg1"]);
4877    }
4878
4879    #[test]
4880    fn trace_source_includes_operational_mutations() {
4881        let (db, service) = setup();
4882        {
4883            let conn = sqlite::open_connection(db.path()).expect("conn");
4884            conn.execute(
4885                "INSERT INTO operational_collections \
4886                 (name, kind, schema_json, retention_json, format_version, created_at) \
4887                 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
4888                [],
4889            )
4890            .expect("insert collection");
4891            conn.execute(
4892                "INSERT INTO operational_mutations \
4893                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
4894                 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"ok\"}', 'source-1', 100, 1)",
4895                [],
4896            )
4897            .expect("insert mutation");
4898        }
4899
4900        let report = service.trace_source("source-1").expect("trace");
4901        assert_eq!(report.operational_mutation_rows, 1);
4902        assert_eq!(report.operational_mutation_ids, vec!["m1"]);
4903    }
4904
4905    #[test]
4906    fn excise_source_restores_prior_active_node() {
4907        let (db, service) = setup();
4908        {
4909            let conn = sqlite::open_connection(db.path()).expect("conn");
4910            conn.execute(
4911                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
4912                 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
4913                [],
4914            )
4915            .expect("insert v1 superseded");
4916            conn.execute(
4917                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4918                 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
4919                [],
4920            )
4921            .expect("insert v2 active");
4922        }
4923        service.excise_source("source-2").expect("excise");
4924        {
4925            let conn = sqlite::open_connection(db.path()).expect("conn");
4926            let active_row_id: String = conn
4927                .query_row(
4928                    "SELECT row_id FROM nodes WHERE logical_id = 'lg1' AND superseded_at IS NULL",
4929                    [],
4930                    |row| row.get(0),
4931                )
4932                .expect("active row exists after excise");
4933            assert_eq!(active_row_id, "r1");
4934        }
4935    }
4936
4937    #[test]
4938    fn excise_source_deletes_operational_mutations_and_repairs_latest_state_current() {
4939        let (db, service) = setup();
4940        {
4941            let conn = sqlite::open_connection(db.path()).expect("conn");
4942            conn.execute(
4943                "INSERT INTO operational_collections \
4944                 (name, kind, schema_json, retention_json, format_version, created_at) \
4945                 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
4946                [],
4947            )
4948            .expect("insert collection");
4949            conn.execute(
4950                "INSERT INTO operational_mutations \
4951                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
4952                 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"old\"}', 'source-1', 100, 1)",
4953                [],
4954            )
4955            .expect("insert prior mutation");
4956            conn.execute(
4957                "INSERT INTO operational_mutations \
4958                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
4959                 VALUES ('m2', 'connector_health', 'gmail', 'put', '{\"status\":\"new\"}', 'source-2', 200, 2)",
4960                [],
4961            )
4962            .expect("insert excised mutation");
4963            conn.execute(
4964                "INSERT INTO operational_current \
4965                 (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
4966                 VALUES ('connector_health', 'gmail', '{\"status\":\"new\"}', 200, 'm2')",
4967                [],
4968            )
4969            .expect("insert current row");
4970        }
4971
4972        let traced = service
4973            .trace_source("source-2")
4974            .expect("trace before excise");
4975        assert_eq!(traced.operational_mutation_rows, 1);
4976        assert_eq!(traced.operational_mutation_ids, vec!["m2"]);
4977
4978        let excised = service.excise_source("source-2").expect("excise");
4979        assert_eq!(excised.operational_mutation_rows, 0);
4980        assert!(excised.operational_mutation_ids.is_empty());
4981
4982        {
4983            let conn = sqlite::open_connection(db.path()).expect("conn");
4984            let remaining: i64 = conn
4985                .query_row(
4986                    "SELECT count(*) FROM operational_mutations WHERE source_ref = 'source-2'",
4987                    [],
4988                    |row| row.get(0),
4989                )
4990                .expect("remaining count");
4991            assert_eq!(remaining, 0);
4992
4993            let current: (String, String) = conn
4994                .query_row(
4995                    "SELECT payload_json, last_mutation_id FROM operational_current \
4996                     WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
4997                    [],
4998                    |row| Ok((row.get(0)?, row.get(1)?)),
4999                )
5000                .expect("rebuilt current row");
5001            assert_eq!(current.0, "{\"status\":\"old\"}");
5002            assert_eq!(current.1, "m1");
5003        }
5004    }
5005
5006    #[test]
5007    fn restore_logical_id_reestablishes_last_pre_retire_content_and_attached_edges() {
5008        let (db, service) = setup();
5009        {
5010            let conn = sqlite::open_connection(db.path()).expect("conn");
5011            conn.execute(
5012                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5013                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
5014                [],
5015            )
5016            .expect("insert node");
5017            conn.execute(
5018                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5019                 VALUES ('node-row-topic', 'topic-1', 'Topic', '{}', 100, 'seed')",
5020                [],
5021            )
5022            .expect("insert target node");
5023            conn.execute(
5024                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5025                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5026                [],
5027            )
5028            .expect("insert chunk");
5029            conn.execute(
5030                "INSERT INTO edges \
5031                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
5032                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 'seed')",
5033                [],
5034            )
5035            .expect("insert edge");
5036            conn.execute(
5037                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5038                 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5039                [],
5040            )
5041            .expect("insert node retire event");
5042            conn.execute(
5043                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5044                 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
5045                [],
5046            )
5047            .expect("insert edge retire event");
5048            conn.execute(
5049                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
5050                [],
5051            )
5052            .expect("retire node");
5053            conn.execute(
5054                "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
5055                [],
5056            )
5057            .expect("retire edge");
5058            conn.execute("DELETE FROM fts_nodes", [])
5059                .expect("clear fts");
5060        }
5061
5062        let report = service.restore_logical_id("doc-1").expect("restore");
5063        assert_eq!(report.logical_id, "doc-1");
5064        assert!(!report.was_noop);
5065        assert_eq!(report.restored_node_rows, 1);
5066        assert_eq!(report.restored_edge_rows, 1);
5067        assert_eq!(report.restored_chunk_rows, 1);
5068        assert_eq!(report.restored_fts_rows, 1);
5069
5070        let conn = sqlite::open_connection(db.path()).expect("conn");
5071        let active_node_count: i64 = conn
5072            .query_row(
5073                "SELECT count(*) FROM nodes WHERE logical_id = 'doc-1' AND superseded_at IS NULL",
5074                [],
5075                |row| row.get(0),
5076            )
5077            .expect("active node count");
5078        assert_eq!(active_node_count, 1);
5079        let active_edge_count: i64 = conn
5080            .query_row(
5081                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
5082                [],
5083                |row| row.get(0),
5084            )
5085            .expect("active edge count");
5086        assert_eq!(active_edge_count, 1);
5087        let fts_count: i64 = conn
5088            .query_row(
5089                "SELECT count(*) FROM fts_nodes WHERE chunk_id = 'chunk-1'",
5090                [],
5091                |row| row.get(0),
5092            )
5093            .expect("fts count");
5094        assert_eq!(fts_count, 1);
5095    }
5096
5097    #[test]
5098    fn restore_logical_id_restores_edges_retired_after_the_node_retire_event() {
5099        let (db, service) = setup();
5100        {
5101            let conn = sqlite::open_connection(db.path()).expect("conn");
5102            conn.execute(
5103                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5104                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
5105                [],
5106            )
5107            .expect("insert node");
5108            conn.execute(
5109                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5110                 VALUES ('node-row-topic', 'topic-1', 'Topic', '{}', 100, 'seed')",
5111                [],
5112            )
5113            .expect("insert target node");
5114            conn.execute(
5115                "INSERT INTO edges \
5116                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
5117                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 'seed')",
5118                [],
5119            )
5120            .expect("insert edge");
5121            conn.execute(
5122                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5123                 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5124                [],
5125            )
5126            .expect("insert node retire event");
5127            conn.execute(
5128                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5129                 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 201, '')",
5130                [],
5131            )
5132            .expect("insert edge retire event");
5133            conn.execute(
5134                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
5135                [],
5136            )
5137            .expect("retire node");
5138            conn.execute(
5139                "UPDATE edges SET superseded_at = 201 WHERE logical_id = 'edge-1'",
5140                [],
5141            )
5142            .expect("retire edge");
5143        }
5144
5145        let report = service.restore_logical_id("doc-1").expect("restore");
5146        assert_eq!(report.restored_edge_rows, 1);
5147
5148        let conn = sqlite::open_connection(db.path()).expect("conn");
5149        let active_edge_count: i64 = conn
5150            .query_row(
5151                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
5152                [],
5153                |row| row.get(0),
5154            )
5155            .expect("active edge count");
5156        assert_eq!(active_edge_count, 1);
5157    }
5158
5159    #[test]
5160    fn restore_logical_id_prefers_latest_retired_revision_when_timestamps_tie() {
5161        let (db, service) = setup();
5162        {
5163            let conn = sqlite::open_connection(db.path()).expect("conn");
5164            conn.execute(
5165                "INSERT INTO nodes \
5166                 (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5167                 VALUES ('node-row-older', 'doc-1', 'Document', '{\"title\":\"older\"}', 100, 200, 'forget-1')",
5168                [],
5169            )
5170            .expect("insert older retired node");
5171            conn.execute(
5172                "INSERT INTO nodes \
5173                 (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5174                 VALUES ('node-row-newer', 'doc-1', 'Document', '{\"title\":\"newer\"}', 100, 200, 'forget-1')",
5175                [],
5176            )
5177            .expect("insert newer retired node");
5178            conn.execute(
5179                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5180                 VALUES ('evt-retire-older', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5181                [],
5182            )
5183            .expect("insert older retire event");
5184            conn.execute(
5185                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5186                 VALUES ('evt-retire-newer', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5187                [],
5188            )
5189            .expect("insert newer retire event");
5190        }
5191
5192        let report = service.restore_logical_id("doc-1").expect("restore");
5193
5194        assert!(!report.was_noop);
5195        let conn = sqlite::open_connection(db.path()).expect("conn");
5196        let active_row: (String, String) = conn
5197            .query_row(
5198                "SELECT row_id, properties FROM nodes \
5199                 WHERE logical_id = 'doc-1' AND superseded_at IS NULL",
5200                [],
5201                |row| Ok((row.get(0)?, row.get(1)?)),
5202            )
5203            .expect("restored active row");
5204        assert_eq!(active_row.0, "node-row-newer");
5205        assert_eq!(active_row.1, "{\"title\":\"newer\"}");
5206    }
5207
5208    #[test]
5209    fn purge_logical_id_removes_retired_content_and_records_tombstone() {
5210        let (db, service) = setup();
5211        {
5212            let conn = sqlite::open_connection(db.path()).expect("conn");
5213            conn.execute(
5214                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5215                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
5216                [],
5217            )
5218            .expect("insert retired node");
5219            conn.execute(
5220                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5221                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5222                [],
5223            )
5224            .expect("insert chunk");
5225            conn.execute(
5226                "INSERT INTO edges \
5227                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, superseded_at, source_ref) \
5228                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 200, 'seed')",
5229                [],
5230            )
5231            .expect("insert retired edge");
5232            conn.execute(
5233                "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
5234                 VALUES ('chunk-1', 'doc-1', 'Document', 'budget narrative')",
5235                [],
5236            )
5237            .expect("insert fts");
5238        }
5239
5240        let report = service.purge_logical_id("doc-1").expect("purge");
5241        assert_eq!(report.logical_id, "doc-1");
5242        assert!(!report.was_noop);
5243        assert_eq!(report.deleted_node_rows, 1);
5244        assert_eq!(report.deleted_edge_rows, 1);
5245        assert_eq!(report.deleted_chunk_rows, 1);
5246        assert_eq!(report.deleted_fts_rows, 1);
5247
5248        let conn = sqlite::open_connection(db.path()).expect("conn");
5249        let remaining_nodes: i64 = conn
5250            .query_row(
5251                "SELECT count(*) FROM nodes WHERE logical_id = 'doc-1'",
5252                [],
5253                |row| row.get(0),
5254            )
5255            .expect("remaining nodes");
5256        assert_eq!(remaining_nodes, 0);
5257        let remaining_edges: i64 = conn
5258            .query_row(
5259                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1'",
5260                [],
5261                |row| row.get(0),
5262            )
5263            .expect("remaining edges");
5264        assert_eq!(remaining_edges, 0);
5265        let remaining_chunks: i64 = conn
5266            .query_row(
5267                "SELECT count(*) FROM chunks WHERE id = 'chunk-1'",
5268                [],
5269                |row| row.get(0),
5270            )
5271            .expect("remaining chunks");
5272        assert_eq!(remaining_chunks, 0);
5273        let purge_events: i64 = conn
5274            .query_row(
5275                "SELECT count(*) FROM provenance_events WHERE event_type = 'purge_logical_id' AND subject = 'doc-1'",
5276                [],
5277                |row| row.get(0),
5278            )
5279            .expect("purge events");
5280        assert_eq!(purge_events, 1);
5281    }
5282
5283    #[test]
5284    fn check_semantics_accepts_preserved_retired_chunks() {
5285        let (db, service) = setup();
5286        {
5287            let conn = sqlite::open_connection(db.path()).expect("conn");
5288            conn.execute(
5289                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5290                 VALUES ('node-row-1', 'doc-1', 'Document', '{}', 100, 200, 'seed')",
5291                [],
5292            )
5293            .expect("insert retired node");
5294            conn.execute(
5295                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5296                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5297                [],
5298            )
5299            .expect("insert chunk");
5300        }
5301
5302        let report = service.check_semantics().expect("semantics");
5303        assert_eq!(report.orphaned_chunks, 0);
5304    }
5305
5306    #[test]
5307    fn check_semantics_detects_missing_retired_node_history_for_preserved_chunks() {
5308        let (db, service) = setup();
5309        {
5310            let conn = sqlite::open_connection(db.path()).expect("conn");
5311            conn.execute(
5312                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5313                 VALUES ('chunk-1', 'ghost-doc', 'budget narrative', 100)",
5314                [],
5315            )
5316            .expect("insert orphaned chunk");
5317        }
5318
5319        let report = service.check_semantics().expect("semantics");
5320        assert_eq!(report.orphaned_chunks, 1);
5321    }
5322
5323    #[cfg(feature = "sqlite-vec")]
5324    #[test]
5325    fn check_semantics_detects_missing_retired_node_history_for_preserved_vec_rows() {
5326        let (db, service) = setup();
5327        {
5328            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5329            service
5330                .schema_manager
5331                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5332                .expect("ensure vec profile");
5333            conn.execute(
5334                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5335                 VALUES ('chunk-1', 'ghost-doc', 'budget narrative', 100)",
5336                [],
5337            )
5338            .expect("insert orphaned chunk");
5339            conn.execute(
5340                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
5341                [],
5342            )
5343            .expect("insert vec row");
5344        }
5345
5346        let report = service.check_semantics().expect("semantics");
5347        assert_eq!(report.orphaned_chunks, 1);
5348        assert_eq!(report.vec_rows_for_superseded_nodes, 1);
5349    }
5350
5351    #[cfg(feature = "sqlite-vec")]
5352    #[test]
5353    fn restore_logical_id_reestablishes_vector_search_without_reingest() {
5354        let (db, service) = setup();
5355        {
5356            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5357            service
5358                .schema_manager
5359                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5360                .expect("ensure vec profile");
5361            conn.execute(
5362                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5363                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
5364                [],
5365            )
5366            .expect("insert retired node");
5367            conn.execute(
5368                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5369                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5370                [],
5371            )
5372            .expect("insert chunk");
5373            conn.execute(
5374                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
5375                [],
5376            )
5377            .expect("insert vec row");
5378            conn.execute(
5379                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5380                 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5381                [],
5382            )
5383            .expect("insert retire event");
5384        }
5385
5386        let report = service.restore_logical_id("doc-1").expect("restore");
5387        assert_eq!(report.restored_vec_rows, 1);
5388
5389        let coordinator = ExecutionCoordinator::open(
5390            db.path(),
5391            Arc::new(SchemaManager::new()),
5392            Some(4),
5393            1,
5394            Arc::new(TelemetryCounters::default()),
5395            None,
5396        )
5397        .expect("coordinator");
5398        let compiled = QueryBuilder::nodes("Document")
5399            .vector_search("[0.0, 0.0, 0.0, 0.0]", 5)
5400            .compile()
5401            .expect("compile");
5402        let rows = coordinator
5403            .execute_compiled_read(&compiled)
5404            .expect("vector read");
5405        assert!(
5406            rows.nodes.iter().any(|row| row.logical_id == "doc-1"),
5407            "restore should make the preserved vec row visible again without re-ingest"
5408        );
5409    }
5410
5411    #[cfg(feature = "sqlite-vec")]
5412    #[test]
5413    fn purge_logical_id_deletes_vec_rows_for_retired_content() {
5414        let (db, service) = setup();
5415        {
5416            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5417            service
5418                .schema_manager
5419                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5420                .expect("ensure vec profile");
5421            conn.execute(
5422                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5423                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
5424                [],
5425            )
5426            .expect("insert retired node");
5427            conn.execute(
5428                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5429                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5430                [],
5431            )
5432            .expect("insert chunk");
5433            conn.execute(
5434                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
5435                [],
5436            )
5437            .expect("insert vec row");
5438        }
5439
5440        let report = service.purge_logical_id("doc-1").expect("purge");
5441        assert_eq!(report.deleted_vec_rows, 1);
5442
5443        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5444        let vec_count: i64 = conn
5445            .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
5446                row.get(0)
5447            })
5448            .expect("vec count");
5449        assert_eq!(vec_count, 0);
5450    }
5451
5452    #[cfg(feature = "sqlite-vec")]
5453    #[test]
5454    fn restore_logical_id_restores_visibility_of_regenerated_vectors() {
5455        let (db, service) = setup();
5456
5457        {
5458            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5459            service
5460                .schema_manager
5461                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5462                .expect("ensure vec profile");
5463            conn.execute(
5464                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5465                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
5466                [],
5467            )
5468            .expect("insert node");
5469            conn.execute(
5470                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5471                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5472                [],
5473            )
5474            .expect("insert chunk");
5475        }
5476
5477        let embedder = TestEmbedder::new("test-model", 4);
5478        service
5479            .regenerate_vector_embeddings(
5480                &embedder,
5481                &VectorRegenerationConfig {
5482                    profile: "default".to_owned(),
5483                    table_name: "vec_nodes_active".to_owned(),
5484                    chunking_policy: "per_chunk".to_owned(),
5485                    preprocessing_policy: "trim".to_owned(),
5486                },
5487            )
5488            .expect("regenerate");
5489
5490        {
5491            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5492            conn.execute(
5493                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5494                 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5495                [],
5496            )
5497            .expect("insert retire event");
5498            conn.execute(
5499                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
5500                [],
5501            )
5502            .expect("retire node");
5503        }
5504
5505        let report = service.restore_logical_id("doc-1").expect("restore");
5506        assert_eq!(report.restored_vec_rows, 1);
5507
5508        let coordinator = ExecutionCoordinator::open(
5509            db.path(),
5510            Arc::new(SchemaManager::new()),
5511            Some(4),
5512            1,
5513            Arc::new(TelemetryCounters::default()),
5514            None,
5515        )
5516        .expect("coordinator");
5517        let compiled = QueryBuilder::nodes("Document")
5518            .vector_search("[0.0, 0.0, 0.0, 0.0]", 5)
5519            .compile()
5520            .expect("compile");
5521        let rows = coordinator
5522            .execute_compiled_read(&compiled)
5523            .expect("vector read");
5524        assert!(
5525            rows.nodes.iter().any(|row| row.logical_id == "doc-1"),
5526            "restored logical_id should become visible through regenerated vectors"
5527        );
5528    }
5529
5530    #[test]
5531    fn check_semantics_clean_db_returns_zeros() {
5532        let (_db, service) = setup();
5533        let report = service.check_semantics().expect("semantics check");
5534        assert_eq!(report.orphaned_chunks, 0);
5535        assert_eq!(report.null_source_ref_nodes, 0);
5536        assert_eq!(report.broken_step_fk, 0);
5537        assert_eq!(report.broken_action_fk, 0);
5538        assert_eq!(report.stale_fts_rows, 0);
5539        assert_eq!(report.fts_rows_for_superseded_nodes, 0);
5540        assert_eq!(report.dangling_edges, 0);
5541        assert_eq!(report.orphaned_supersession_chains, 0);
5542        assert_eq!(report.stale_vec_rows, 0);
5543        assert_eq!(report.vec_rows_for_superseded_nodes, 0);
5544        assert_eq!(report.missing_operational_current_rows, 0);
5545        assert_eq!(report.stale_operational_current_rows, 0);
5546        assert_eq!(report.disabled_collection_mutations, 0);
5547        assert_eq!(report.mismatched_kind_property_fts_rows, 0);
5548        assert_eq!(report.duplicate_property_fts_rows, 0);
5549        assert_eq!(report.drifted_property_fts_rows, 0);
5550        assert!(report.warnings.is_empty());
5551    }
5552
5553    #[test]
5554    fn register_operational_collection_persists_and_emits_provenance() {
5555        let (db, service) = setup();
5556        let record = service
5557            .register_operational_collection(&OperationalRegisterRequest {
5558                name: "connector_health".to_owned(),
5559                kind: OperationalCollectionKind::LatestState,
5560                schema_json: "{}".to_owned(),
5561                retention_json: "{}".to_owned(),
5562                filter_fields_json: "[]".to_owned(),
5563                validation_json: String::new(),
5564                secondary_indexes_json: "[]".to_owned(),
5565                format_version: 1,
5566            })
5567            .expect("register collection");
5568
5569        assert_eq!(record.name, "connector_health");
5570        assert_eq!(record.kind, OperationalCollectionKind::LatestState);
5571        assert_eq!(record.schema_json, "{}");
5572        assert_eq!(record.retention_json, "{}");
5573        assert_eq!(record.filter_fields_json, "[]");
5574        assert!(record.created_at > 0);
5575        assert_eq!(record.disabled_at, None);
5576
5577        let described = service
5578            .describe_operational_collection("connector_health")
5579            .expect("describe collection")
5580            .expect("collection exists");
5581        assert_eq!(described, record);
5582
5583        let conn = sqlite::open_connection(db.path()).expect("conn");
5584        let provenance_count: i64 = conn
5585            .query_row(
5586                "SELECT count(*) FROM provenance_events \
5587                 WHERE event_type = 'operational_collection_registered' AND subject = 'connector_health'",
5588                [],
5589                |row| row.get(0),
5590            )
5591            .expect("provenance count");
5592        assert_eq!(provenance_count, 1);
5593    }
5594
5595    #[test]
5596    fn register_and_update_operational_collection_validation_round_trip() {
5597        let (db, service) = setup();
5598        let record = service
5599            .register_operational_collection(&OperationalRegisterRequest {
5600                name: "connector_health".to_owned(),
5601                kind: OperationalCollectionKind::LatestState,
5602                schema_json: "{}".to_owned(),
5603                retention_json: "{}".to_owned(),
5604                filter_fields_json: "[]".to_owned(),
5605                validation_json: String::new(),
5606                secondary_indexes_json: "[]".to_owned(),
5607                format_version: 1,
5608            })
5609            .expect("register collection");
5610        assert_eq!(record.validation_json, "");
5611
5612        let validation_json = r#"{"format_version":1,"mode":"enforce","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#;
5613        let updated = service
5614            .update_operational_collection_validation("connector_health", validation_json)
5615            .expect("update validation");
5616        assert_eq!(updated.validation_json, validation_json);
5617
5618        let described = service
5619            .describe_operational_collection("connector_health")
5620            .expect("describe collection")
5621            .expect("collection exists");
5622        assert_eq!(described.validation_json, validation_json);
5623
5624        let conn = sqlite::open_connection(db.path()).expect("conn");
5625        let provenance_count: i64 = conn
5626            .query_row(
5627                "SELECT count(*) FROM provenance_events \
5628                 WHERE event_type = 'operational_collection_validation_updated' \
5629                   AND subject = 'connector_health'",
5630                [],
5631                |row| row.get(0),
5632            )
5633            .expect("provenance count");
5634        assert_eq!(provenance_count, 1);
5635    }
5636
5637    #[test]
5638    fn register_update_and_rebuild_operational_secondary_indexes_round_trip() {
5639        let (db, service) = setup();
5640        let record = service
5641            .register_operational_collection(&OperationalRegisterRequest {
5642                name: "audit_log".to_owned(),
5643                kind: OperationalCollectionKind::AppendOnlyLog,
5644                schema_json: "{}".to_owned(),
5645                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
5646                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
5647                validation_json: String::new(),
5648                secondary_indexes_json: "[]".to_owned(),
5649                format_version: 1,
5650            })
5651            .expect("register collection");
5652        assert_eq!(record.secondary_indexes_json, "[]");
5653
5654        {
5655            let writer = crate::WriterActor::start(
5656                db.path(),
5657                Arc::new(SchemaManager::new()),
5658                crate::ProvenanceMode::Warn,
5659                Arc::new(crate::TelemetryCounters::default()),
5660            )
5661            .expect("writer");
5662            writer
5663                .submit(crate::WriteRequest {
5664                    label: "secondary-index-seed".to_owned(),
5665                    nodes: vec![],
5666                    node_retires: vec![],
5667                    edges: vec![],
5668                    edge_retires: vec![],
5669                    chunks: vec![],
5670                    runs: vec![],
5671                    steps: vec![],
5672                    actions: vec![],
5673                    optional_backfills: vec![],
5674                    vec_inserts: vec![],
5675                    operational_writes: vec![
5676                        crate::OperationalWrite::Append {
5677                            collection: "audit_log".to_owned(),
5678                            record_key: "evt-1".to_owned(),
5679                            payload_json: r#"{"actor":"alice","ts":100}"#.to_owned(),
5680                            source_ref: Some("src-1".to_owned()),
5681                        },
5682                        crate::OperationalWrite::Append {
5683                            collection: "audit_log".to_owned(),
5684                            record_key: "evt-2".to_owned(),
5685                            payload_json: r#"{"actor":"bob","ts":200}"#.to_owned(),
5686                            source_ref: Some("src-2".to_owned()),
5687                        },
5688                    ],
5689                })
5690                .expect("seed writes");
5691        }
5692
5693        let secondary_indexes_json = r#"[{"name":"actor_ts","kind":"append_only_field_time","field":"actor","value_type":"string","time_field":"ts"}]"#;
5694        let updated = service
5695            .update_operational_collection_secondary_indexes("audit_log", secondary_indexes_json)
5696            .expect("update secondary indexes");
5697        assert_eq!(updated.secondary_indexes_json, secondary_indexes_json);
5698
5699        let conn = sqlite::open_connection(db.path()).expect("conn");
5700        let entry_count: i64 = conn
5701            .query_row(
5702                "SELECT count(*) FROM operational_secondary_index_entries \
5703                 WHERE collection_name = 'audit_log' AND index_name = 'actor_ts'",
5704                [],
5705                |row| row.get(0),
5706            )
5707            .expect("secondary index count");
5708        assert_eq!(entry_count, 2);
5709        conn.execute(
5710            "DELETE FROM operational_secondary_index_entries WHERE collection_name = 'audit_log'",
5711            [],
5712        )
5713        .expect("clear index entries");
5714        drop(conn);
5715
5716        let rebuild = service
5717            .rebuild_operational_secondary_indexes("audit_log")
5718            .expect("rebuild secondary indexes");
5719        assert_eq!(rebuild.collection_name, "audit_log");
5720        assert_eq!(rebuild.mutation_entries_rebuilt, 2);
5721        assert_eq!(rebuild.current_entries_rebuilt, 0);
5722    }
5723
5724    #[test]
5725    fn register_operational_collection_rejects_invalid_validation_contract() {
5726        let (_db, service) = setup();
5727
5728        let error = service
5729            .register_operational_collection(&OperationalRegisterRequest {
5730                name: "connector_health".to_owned(),
5731                kind: OperationalCollectionKind::LatestState,
5732                schema_json: "{}".to_owned(),
5733                retention_json: "{}".to_owned(),
5734                filter_fields_json: "[]".to_owned(),
5735                validation_json: r#"{"format_version":1,"mode":"enforce","fields":[{"name":"status","type":"string","minimum":0}]}"#
5736                    .to_owned(),
5737                secondary_indexes_json: "[]".to_owned(),
5738                format_version: 1,
5739            })
5740            .expect_err("invalid validation contract should reject");
5741
5742        assert!(matches!(error, EngineError::InvalidWrite(_)));
5743        assert!(error.to_string().contains("minimum/maximum"));
5744    }
5745
5746    #[test]
5747    fn validate_operational_collection_history_reports_invalid_rows_without_mutation() {
5748        let (db, service) = setup();
5749        service
5750            .register_operational_collection(&OperationalRegisterRequest {
5751                name: "audit_log".to_owned(),
5752                kind: OperationalCollectionKind::AppendOnlyLog,
5753                schema_json: "{}".to_owned(),
5754                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
5755                filter_fields_json: "[]".to_owned(),
5756                validation_json: r#"{"format_version":1,"mode":"disabled","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#
5757                    .to_owned(),
5758                secondary_indexes_json: "[]".to_owned(),
5759                format_version: 1,
5760            })
5761            .expect("register collection");
5762        {
5763            let writer = crate::WriterActor::start(
5764                db.path(),
5765                Arc::new(SchemaManager::new()),
5766                crate::ProvenanceMode::Warn,
5767                Arc::new(crate::TelemetryCounters::default()),
5768            )
5769            .expect("writer");
5770            writer
5771                .submit(crate::WriteRequest {
5772                    label: "history-validation".to_owned(),
5773                    nodes: vec![],
5774                    node_retires: vec![],
5775                    edges: vec![],
5776                    edge_retires: vec![],
5777                    chunks: vec![],
5778                    runs: vec![],
5779                    steps: vec![],
5780                    actions: vec![],
5781                    optional_backfills: vec![],
5782                    vec_inserts: vec![],
5783                    operational_writes: vec![
5784                        crate::OperationalWrite::Append {
5785                            collection: "audit_log".to_owned(),
5786                            record_key: "evt-1".to_owned(),
5787                            payload_json: r#"{"status":"ok"}"#.to_owned(),
5788                            source_ref: Some("src-1".to_owned()),
5789                        },
5790                        crate::OperationalWrite::Append {
5791                            collection: "audit_log".to_owned(),
5792                            record_key: "evt-2".to_owned(),
5793                            payload_json: r#"{"status":"bogus"}"#.to_owned(),
5794                            source_ref: Some("src-2".to_owned()),
5795                        },
5796                    ],
5797                })
5798                .expect("write");
5799        }
5800
5801        let report = service
5802            .validate_operational_collection_history("audit_log")
5803            .expect("validate history");
5804        assert_eq!(report.collection_name, "audit_log");
5805        assert_eq!(report.checked_rows, 2);
5806        assert_eq!(report.invalid_row_count, 1);
5807        assert_eq!(report.issues.len(), 1);
5808        assert_eq!(report.issues[0].record_key, "evt-2");
5809        assert!(report.issues[0].message.contains("must be one of"));
5810
5811        let trace = service
5812            .trace_operational_collection("audit_log", None)
5813            .expect("trace");
5814        assert_eq!(trace.mutation_count, 2);
5815
5816        let conn = sqlite::open_connection(db.path()).expect("conn");
5817        let provenance_count: i64 = conn
5818            .query_row(
5819                "SELECT count(*) FROM provenance_events \
5820                 WHERE event_type = 'operational_collection_history_validated' \
5821                   AND subject = 'audit_log'",
5822                [],
5823                |row| row.get(0),
5824            )
5825            .expect("provenance count");
5826        assert_eq!(provenance_count, 0);
5827    }
5828
5829    #[test]
5830    fn trace_operational_collection_returns_mutations_and_current_rows() {
5831        let (db, service) = setup();
5832        service
5833            .register_operational_collection(&OperationalRegisterRequest {
5834                name: "connector_health".to_owned(),
5835                kind: OperationalCollectionKind::LatestState,
5836                schema_json: "{}".to_owned(),
5837                retention_json: "{}".to_owned(),
5838                filter_fields_json: "[]".to_owned(),
5839                validation_json: String::new(),
5840                secondary_indexes_json: "[]".to_owned(),
5841                format_version: 1,
5842            })
5843            .expect("register collection");
5844        {
5845            let writer = crate::WriterActor::start(
5846                db.path(),
5847                Arc::new(SchemaManager::new()),
5848                crate::ProvenanceMode::Warn,
5849                Arc::new(crate::TelemetryCounters::default()),
5850            )
5851            .expect("writer");
5852            writer
5853                .submit(crate::WriteRequest {
5854                    label: "operational".to_owned(),
5855                    nodes: vec![],
5856                    node_retires: vec![],
5857                    edges: vec![],
5858                    edge_retires: vec![],
5859                    chunks: vec![],
5860                    runs: vec![],
5861                    steps: vec![],
5862                    actions: vec![],
5863                    optional_backfills: vec![],
5864                    vec_inserts: vec![],
5865                    operational_writes: vec![crate::OperationalWrite::Put {
5866                        collection: "connector_health".to_owned(),
5867                        record_key: "gmail".to_owned(),
5868                        payload_json: r#"{"status":"ok"}"#.to_owned(),
5869                        source_ref: Some("src-1".to_owned()),
5870                    }],
5871                })
5872                .expect("write");
5873        }
5874
5875        let report = service
5876            .trace_operational_collection("connector_health", Some("gmail"))
5877            .expect("trace");
5878        assert_eq!(report.collection_name, "connector_health");
5879        assert_eq!(report.record_key.as_deref(), Some("gmail"));
5880        assert_eq!(report.mutation_count, 1);
5881        assert_eq!(report.current_count, 1);
5882        assert_eq!(report.mutations[0].op_kind, "put");
5883        assert_eq!(report.current_rows[0].payload_json, r#"{"status":"ok"}"#);
5884    }
5885
5886    #[test]
5887    fn trace_operational_collection_rejects_unknown_collection() {
5888        let (_db, service) = setup();
5889
5890        let error = service
5891            .trace_operational_collection("missing_collection", None)
5892            .expect_err("unknown collection should fail");
5893
5894        assert!(matches!(error, EngineError::InvalidWrite(_)));
5895        assert!(error.to_string().contains("is not registered"));
5896    }
5897
5898    #[test]
5899    fn rebuild_operational_current_repairs_missing_latest_state_rows() {
5900        let (db, service) = setup();
5901        service
5902            .register_operational_collection(&OperationalRegisterRequest {
5903                name: "connector_health".to_owned(),
5904                kind: OperationalCollectionKind::LatestState,
5905                schema_json: "{}".to_owned(),
5906                retention_json: "{}".to_owned(),
5907                filter_fields_json: "[]".to_owned(),
5908                validation_json: String::new(),
5909                secondary_indexes_json: "[]".to_owned(),
5910                format_version: 1,
5911            })
5912            .expect("register collection");
5913        {
5914            let writer = crate::WriterActor::start(
5915                db.path(),
5916                Arc::new(SchemaManager::new()),
5917                crate::ProvenanceMode::Warn,
5918                Arc::new(crate::TelemetryCounters::default()),
5919            )
5920            .expect("writer");
5921            writer
5922                .submit(crate::WriteRequest {
5923                    label: "operational".to_owned(),
5924                    nodes: vec![],
5925                    node_retires: vec![],
5926                    edges: vec![],
5927                    edge_retires: vec![],
5928                    chunks: vec![],
5929                    runs: vec![],
5930                    steps: vec![],
5931                    actions: vec![],
5932                    optional_backfills: vec![],
5933                    vec_inserts: vec![],
5934                    operational_writes: vec![crate::OperationalWrite::Put {
5935                        collection: "connector_health".to_owned(),
5936                        record_key: "gmail".to_owned(),
5937                        payload_json: r#"{"status":"ok"}"#.to_owned(),
5938                        source_ref: Some("src-1".to_owned()),
5939                    }],
5940                })
5941                .expect("write");
5942        }
5943        {
5944            let conn = sqlite::open_connection(db.path()).expect("conn");
5945            conn.execute(
5946                "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5947                [],
5948            )
5949            .expect("delete current row");
5950        }
5951
5952        let before = service.check_semantics().expect("semantics before rebuild");
5953        assert_eq!(before.missing_operational_current_rows, 1);
5954
5955        let repair = service
5956            .rebuild_operational_current(Some("connector_health"))
5957            .expect("rebuild current");
5958        assert_eq!(repair.collections_rebuilt, 1);
5959        assert_eq!(repair.current_rows_rebuilt, 1);
5960
5961        let after = service.check_semantics().expect("semantics after rebuild");
5962        assert_eq!(after.missing_operational_current_rows, 0);
5963
5964        let conn = sqlite::open_connection(db.path()).expect("conn");
5965        let payload: String = conn
5966            .query_row(
5967                "SELECT payload_json FROM operational_current \
5968                 WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5969                [],
5970                |row| row.get(0),
5971            )
5972            .expect("restored payload");
5973        assert_eq!(payload, r#"{"status":"ok"}"#);
5974    }
5975
5976    #[test]
5977    fn rebuild_operational_current_restores_latest_state_secondary_index_entries() {
5978        let (db, service) = setup();
5979        service
5980            .register_operational_collection(&OperationalRegisterRequest {
5981                name: "connector_health".to_owned(),
5982                kind: OperationalCollectionKind::LatestState,
5983                schema_json: "{}".to_owned(),
5984                retention_json: "{}".to_owned(),
5985                filter_fields_json: "[]".to_owned(),
5986                validation_json: String::new(),
5987                secondary_indexes_json: r#"[{"name":"status_current","kind":"latest_state_field","field":"status","value_type":"string"}]"#.to_owned(),
5988                format_version: 1,
5989            })
5990            .expect("register collection");
5991        {
5992            let writer = crate::WriterActor::start(
5993                db.path(),
5994                Arc::new(SchemaManager::new()),
5995                crate::ProvenanceMode::Warn,
5996                Arc::new(crate::TelemetryCounters::default()),
5997            )
5998            .expect("writer");
5999            writer
6000                .submit(crate::WriteRequest {
6001                    label: "operational".to_owned(),
6002                    nodes: vec![],
6003                    node_retires: vec![],
6004                    edges: vec![],
6005                    edge_retires: vec![],
6006                    chunks: vec![],
6007                    runs: vec![],
6008                    steps: vec![],
6009                    actions: vec![],
6010                    optional_backfills: vec![],
6011                    vec_inserts: vec![],
6012                    operational_writes: vec![crate::OperationalWrite::Put {
6013                        collection: "connector_health".to_owned(),
6014                        record_key: "gmail".to_owned(),
6015                        payload_json: r#"{"status":"ok"}"#.to_owned(),
6016                        source_ref: Some("src-1".to_owned()),
6017                    }],
6018                })
6019                .expect("write");
6020        }
6021        {
6022            let conn = sqlite::open_connection(db.path()).expect("conn");
6023            let entry_count: i64 = conn
6024                .query_row(
6025                    "SELECT count(*) FROM operational_secondary_index_entries \
6026                     WHERE collection_name = 'connector_health' AND subject_kind = 'current'",
6027                    [],
6028                    |row| row.get(0),
6029                )
6030                .expect("secondary index count before repair");
6031            assert_eq!(entry_count, 1);
6032            conn.execute(
6033                "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6034                [],
6035            )
6036            .expect("delete current row");
6037        }
6038
6039        service
6040            .rebuild_operational_current(Some("connector_health"))
6041            .expect("rebuild current");
6042
6043        let conn = sqlite::open_connection(db.path()).expect("conn");
6044        let entry_count: i64 = conn
6045            .query_row(
6046                "SELECT count(*) FROM operational_secondary_index_entries \
6047                 WHERE collection_name = 'connector_health' AND subject_kind = 'current'",
6048                [],
6049                |row| row.get(0),
6050            )
6051            .expect("secondary index count after repair");
6052        assert_eq!(entry_count, 1);
6053    }
6054
6055    #[test]
6056    fn operational_current_semantics_and_rebuild_follow_mutation_order() {
6057        let (db, service) = setup();
6058        {
6059            let conn = sqlite::open_connection(db.path()).expect("conn");
6060            conn.execute(
6061                "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6062                 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
6063                [],
6064            )
6065            .expect("seed collection");
6066            conn.execute(
6067                "INSERT INTO operational_mutations \
6068                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6069                 VALUES ('m3', 'connector_health', 'gmail', 'put', '{\"status\":\"old\"}', 'src-1', 100, 1)",
6070                [],
6071            )
6072            .expect("seed first put");
6073            conn.execute(
6074                "INSERT INTO operational_mutations \
6075                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6076                 VALUES ('m2', 'connector_health', 'gmail', 'delete', '', 'src-2', 100, 2)",
6077                [],
6078            )
6079            .expect("seed delete");
6080            conn.execute(
6081                "INSERT INTO operational_mutations \
6082                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6083                 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"new\"}', 'src-3', 100, 3)",
6084                [],
6085            )
6086            .expect("seed final put");
6087            conn.execute(
6088                "INSERT INTO operational_current \
6089                 (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
6090                 VALUES ('connector_health', 'gmail', '{\"status\":\"new\"}', 100, 'm1')",
6091                [],
6092            )
6093            .expect("seed current");
6094        }
6095
6096        let before = service.check_semantics().expect("semantics before rebuild");
6097        assert_eq!(before.missing_operational_current_rows, 0);
6098        assert_eq!(before.stale_operational_current_rows, 0);
6099
6100        {
6101            let conn = sqlite::open_connection(db.path()).expect("conn");
6102            conn.execute(
6103                "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6104                [],
6105            )
6106            .expect("delete current row");
6107        }
6108
6109        let missing = service.check_semantics().expect("semantics after delete");
6110        assert_eq!(missing.missing_operational_current_rows, 1);
6111        assert_eq!(missing.stale_operational_current_rows, 0);
6112
6113        service
6114            .rebuild_operational_current(Some("connector_health"))
6115            .expect("rebuild current");
6116
6117        let after = service.check_semantics().expect("semantics after rebuild");
6118        assert_eq!(after.missing_operational_current_rows, 0);
6119        assert_eq!(after.stale_operational_current_rows, 0);
6120
6121        let conn = sqlite::open_connection(db.path()).expect("conn");
6122        let payload: String = conn
6123            .query_row(
6124                "SELECT payload_json FROM operational_current \
6125                 WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6126                [],
6127                |row| row.get(0),
6128            )
6129            .expect("restored payload");
6130        assert_eq!(payload, r#"{"status":"new"}"#);
6131    }
6132
6133    #[test]
6134    fn disable_operational_collection_sets_disabled_at_and_emits_provenance() {
6135        let (db, service) = setup();
6136        service
6137            .register_operational_collection(&OperationalRegisterRequest {
6138                name: "audit_log".to_owned(),
6139                kind: OperationalCollectionKind::AppendOnlyLog,
6140                schema_json: "{}".to_owned(),
6141                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6142                filter_fields_json: "[]".to_owned(),
6143                validation_json: String::new(),
6144                secondary_indexes_json: "[]".to_owned(),
6145                format_version: 1,
6146            })
6147            .expect("register collection");
6148
6149        let record = service
6150            .disable_operational_collection("audit_log")
6151            .expect("disable collection");
6152        assert_eq!(record.name, "audit_log");
6153        assert!(record.disabled_at.is_some());
6154
6155        let disabled_at = record.disabled_at.expect("disabled_at");
6156        let described = service
6157            .describe_operational_collection("audit_log")
6158            .expect("describe collection")
6159            .expect("collection exists");
6160        assert_eq!(described.disabled_at, Some(disabled_at));
6161
6162        let writer = crate::WriterActor::start(
6163            db.path(),
6164            Arc::new(SchemaManager::new()),
6165            crate::ProvenanceMode::Warn,
6166            Arc::new(crate::TelemetryCounters::default()),
6167        )
6168        .expect("writer");
6169        let error = writer
6170            .submit(crate::WriteRequest {
6171                label: "disabled-operational".to_owned(),
6172                nodes: vec![],
6173                node_retires: vec![],
6174                edges: vec![],
6175                edge_retires: vec![],
6176                chunks: vec![],
6177                runs: vec![],
6178                steps: vec![],
6179                actions: vec![],
6180                optional_backfills: vec![],
6181                vec_inserts: vec![],
6182                operational_writes: vec![crate::OperationalWrite::Append {
6183                    collection: "audit_log".to_owned(),
6184                    record_key: "evt-1".to_owned(),
6185                    payload_json: r#"{"type":"sync"}"#.to_owned(),
6186                    source_ref: Some("src-1".to_owned()),
6187                }],
6188            })
6189            .expect_err("disabled collection should reject writes");
6190        assert!(matches!(error, EngineError::InvalidWrite(_)));
6191        assert!(error.to_string().contains("is disabled"));
6192
6193        let conn = sqlite::open_connection(db.path()).expect("conn");
6194        let provenance_count: i64 = conn
6195            .query_row(
6196                "SELECT count(*) FROM provenance_events \
6197                 WHERE event_type = 'operational_collection_disabled' AND subject = 'audit_log'",
6198                [],
6199                |row| row.get(0),
6200            )
6201            .expect("provenance count");
6202        assert_eq!(provenance_count, 1);
6203    }
6204
6205    #[test]
6206    fn purge_operational_collection_deletes_append_only_rows_before_cutoff() {
6207        let (db, service) = setup();
6208        {
6209            let conn = sqlite::open_connection(db.path()).expect("conn");
6210            conn.execute(
6211                "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6212                 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_all\"}', 1, 100)",
6213                [],
6214            )
6215            .expect("seed collection");
6216            conn.execute(
6217                "INSERT INTO operational_mutations \
6218                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6219                 VALUES ('evt-1', 'audit_log', 'evt-1', 'append', '{\"seq\":1}', 'src-1', 100, 1)",
6220                [],
6221            )
6222            .expect("seed event 1");
6223            conn.execute(
6224                "INSERT INTO operational_mutations \
6225                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6226                 VALUES ('evt-2', 'audit_log', 'evt-2', 'append', '{\"seq\":2}', 'src-2', 200, 2)",
6227                [],
6228            )
6229            .expect("seed event 2");
6230            conn.execute(
6231                "INSERT INTO operational_mutations \
6232                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6233                 VALUES ('evt-3', 'audit_log', 'evt-3', 'append', '{\"seq\":3}', 'src-3', 300, 3)",
6234                [],
6235            )
6236            .expect("seed event 3");
6237        }
6238
6239        let report = service
6240            .purge_operational_collection("audit_log", 250)
6241            .expect("purge collection");
6242        assert_eq!(report.collection_name, "audit_log");
6243        assert_eq!(report.deleted_mutations, 2);
6244        assert_eq!(report.before_timestamp, 250);
6245
6246        let conn = sqlite::open_connection(db.path()).expect("conn");
6247        let remaining: Vec<String> = {
6248            let mut stmt = conn
6249                .prepare(
6250                    "SELECT id FROM operational_mutations \
6251                     WHERE collection_name = 'audit_log' ORDER BY mutation_order",
6252                )
6253                .expect("stmt");
6254            stmt.query_map([], |row| row.get(0))
6255                .expect("rows")
6256                .collect::<Result<_, _>>()
6257                .expect("collect")
6258        };
6259        assert_eq!(remaining, vec!["evt-3".to_owned()]);
6260        let provenance_count: i64 = conn
6261            .query_row(
6262                "SELECT count(*) FROM provenance_events \
6263                 WHERE event_type = 'operational_collection_purged' AND subject = 'audit_log'",
6264                [],
6265                |row| row.get(0),
6266            )
6267            .expect("provenance count");
6268        assert_eq!(provenance_count, 1);
6269    }
6270
6271    #[test]
6272    fn compact_operational_collection_dry_run_reports_without_mutation() {
6273        let (db, service) = setup();
6274        {
6275            let conn = sqlite::open_connection(db.path()).expect("conn");
6276            conn.execute(
6277                "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6278                 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6279                [],
6280            )
6281            .expect("seed collection");
6282            for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
6283                conn.execute(
6284                    "INSERT INTO operational_mutations \
6285                     (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6286                     VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6287                    rusqlite::params![
6288                        format!("evt-{index}"),
6289                        format!("{{\"seq\":{index}}}"),
6290                        created_at,
6291                        index,
6292                    ],
6293                )
6294                .expect("seed event");
6295            }
6296        }
6297
6298        let report = service
6299            .compact_operational_collection("audit_log", true)
6300            .expect("compact collection");
6301        assert_eq!(report.collection_name, "audit_log");
6302        assert_eq!(report.deleted_mutations, 1);
6303        assert!(report.dry_run);
6304        assert_eq!(report.before_timestamp, None);
6305
6306        let conn = sqlite::open_connection(db.path()).expect("conn");
6307        let remaining_count: i64 = conn
6308            .query_row(
6309                "SELECT count(*) FROM operational_mutations WHERE collection_name = 'audit_log'",
6310                [],
6311                |row| row.get(0),
6312            )
6313            .expect("remaining count");
6314        assert_eq!(remaining_count, 3);
6315        let provenance_count: i64 = conn
6316            .query_row(
6317                "SELECT count(*) FROM provenance_events \
6318                 WHERE event_type = 'operational_collection_compacted' AND subject = 'audit_log'",
6319                [],
6320                |row| row.get(0),
6321            )
6322            .expect("provenance count");
6323        assert_eq!(provenance_count, 0);
6324    }
6325
6326    #[test]
6327    fn compact_operational_collection_keep_last_deletes_oldest_rows() {
6328        let (db, service) = setup();
6329        {
6330            let conn = sqlite::open_connection(db.path()).expect("conn");
6331            conn.execute(
6332                "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6333                 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6334                [],
6335            )
6336            .expect("seed collection");
6337            for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
6338                conn.execute(
6339                    "INSERT INTO operational_mutations \
6340                     (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6341                     VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6342                    rusqlite::params![
6343                        format!("evt-{index}"),
6344                        format!("{{\"seq\":{index}}}"),
6345                        created_at,
6346                        index,
6347                    ],
6348                )
6349                .expect("seed event");
6350            }
6351        }
6352
6353        let report = service
6354            .compact_operational_collection("audit_log", false)
6355            .expect("compact collection");
6356        assert_eq!(report.deleted_mutations, 1);
6357        assert!(!report.dry_run);
6358
6359        let conn = sqlite::open_connection(db.path()).expect("conn");
6360        let remaining: Vec<String> = {
6361            let mut stmt = conn
6362                .prepare(
6363                    "SELECT id FROM operational_mutations \
6364                     WHERE collection_name = 'audit_log' ORDER BY mutation_order",
6365                )
6366                .expect("stmt");
6367            stmt.query_map([], |row| row.get(0))
6368                .expect("rows")
6369                .collect::<Result<_, _>>()
6370                .expect("collect")
6371        };
6372        assert_eq!(remaining, vec!["evt-2".to_owned(), "evt-3".to_owned()]);
6373        let provenance_count: i64 = conn
6374            .query_row(
6375                "SELECT count(*) FROM provenance_events \
6376                 WHERE event_type = 'operational_collection_compacted' AND subject = 'audit_log'",
6377                [],
6378                |row| row.get(0),
6379            )
6380            .expect("provenance count");
6381        assert_eq!(provenance_count, 1);
6382    }
6383
6384    #[test]
6385    fn plan_and_run_operational_retention_keep_last() {
6386        let (db, service) = setup();
6387        {
6388            let conn = sqlite::open_connection(db.path()).expect("conn");
6389            conn.execute(
6390                "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6391                 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6392                [],
6393            )
6394            .expect("seed collection");
6395            for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
6396                conn.execute(
6397                    "INSERT INTO operational_mutations \
6398                     (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6399                     VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6400                    rusqlite::params![
6401                        format!("evt-{index}"),
6402                        format!("{{\"seq\":{index}}}"),
6403                        created_at,
6404                        index,
6405                    ],
6406                )
6407                .expect("seed event");
6408            }
6409        }
6410
6411        let plan = service
6412            .plan_operational_retention(1_000, None, Some(10))
6413            .expect("plan retention");
6414        assert_eq!(plan.collections_examined, 1);
6415        assert_eq!(plan.items[0].collection_name, "audit_log");
6416        assert_eq!(
6417            plan.items[0].action_kind,
6418            crate::operational::OperationalRetentionActionKind::KeepLast
6419        );
6420        assert_eq!(plan.items[0].candidate_deletions, 1);
6421        assert_eq!(plan.items[0].max_rows, Some(2));
6422        assert_eq!(plan.items[0].last_run_at, None);
6423
6424        let dry_run = service
6425            .run_operational_retention(1_000, None, Some(10), true)
6426            .expect("dry-run retention");
6427        assert!(dry_run.dry_run);
6428        assert_eq!(dry_run.collections_acted_on, 1);
6429        assert_eq!(dry_run.items[0].deleted_mutations, 1);
6430        assert_eq!(dry_run.items[0].rows_remaining, 2);
6431
6432        let conn = sqlite::open_connection(db.path()).expect("conn");
6433        let remaining_count: i64 = conn
6434            .query_row(
6435                "SELECT count(*) FROM operational_mutations WHERE collection_name = 'audit_log'",
6436                [],
6437                |row| row.get(0),
6438            )
6439            .expect("remaining count after dry run");
6440        assert_eq!(remaining_count, 3);
6441        let retention_run_count: i64 = conn
6442            .query_row(
6443                "SELECT count(*) FROM operational_retention_runs WHERE collection_name = 'audit_log'",
6444                [],
6445                |row| row.get(0),
6446            )
6447            .expect("retention run count");
6448        assert_eq!(retention_run_count, 0);
6449        drop(conn);
6450
6451        let executed = service
6452            .run_operational_retention(1_000, None, Some(10), false)
6453            .expect("execute retention");
6454        assert_eq!(executed.collections_acted_on, 1);
6455        assert_eq!(executed.items[0].deleted_mutations, 1);
6456        assert_eq!(executed.items[0].rows_remaining, 2);
6457
6458        let conn = sqlite::open_connection(db.path()).expect("conn");
6459        let remaining: Vec<String> = {
6460            let mut stmt = conn
6461                .prepare(
6462                    "SELECT id FROM operational_mutations \
6463                     WHERE collection_name = 'audit_log' ORDER BY mutation_order",
6464                )
6465                .expect("stmt");
6466            stmt.query_map([], |row| row.get(0))
6467                .expect("rows")
6468                .collect::<Result<_, _>>()
6469                .expect("collect")
6470        };
6471        assert_eq!(remaining, vec!["evt-2".to_owned(), "evt-3".to_owned()]);
6472        let last_run_at: i64 = conn
6473            .query_row(
6474                "SELECT executed_at FROM operational_retention_runs \
6475                 WHERE collection_name = 'audit_log' ORDER BY executed_at DESC LIMIT 1",
6476                [],
6477                |row| row.get(0),
6478            )
6479            .expect("last run at");
6480        assert_eq!(last_run_at, 1_000);
6481    }
6482
6483    #[test]
6484    fn dry_run_operational_retention_does_not_mark_noop_collection_as_acted_on() {
6485        let (db, service) = setup();
6486        let conn = sqlite::open_connection(db.path()).expect("conn");
6487        conn.execute(
6488            "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6489             VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6490            [],
6491        )
6492        .expect("seed collection");
6493        for (index, created_at) in [(1_i64, 100_i64), (2, 200)] {
6494            conn.execute(
6495                "INSERT INTO operational_mutations \
6496                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6497                 VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6498                rusqlite::params![
6499                    format!("evt-{index}"),
6500                    format!("{{\"seq\":{index}}}"),
6501                    created_at,
6502                    index,
6503                ],
6504            )
6505            .expect("seed event");
6506        }
6507        drop(conn);
6508
6509        let dry_run = service
6510            .run_operational_retention(1_000, None, Some(10), true)
6511            .expect("dry-run retention");
6512        assert!(dry_run.dry_run);
6513        assert_eq!(dry_run.collections_acted_on, 0);
6514        assert_eq!(dry_run.items[0].deleted_mutations, 0);
6515        assert_eq!(dry_run.items[0].rows_remaining, 2);
6516    }
6517
6518    #[test]
6519    fn compact_operational_collection_rejects_latest_state() {
6520        let (_db, service) = setup();
6521        service
6522            .register_operational_collection(&OperationalRegisterRequest {
6523                name: "connector_health".to_owned(),
6524                kind: OperationalCollectionKind::LatestState,
6525                schema_json: "{}".to_owned(),
6526                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6527                filter_fields_json: "[]".to_owned(),
6528                validation_json: String::new(),
6529                secondary_indexes_json: "[]".to_owned(),
6530                format_version: 1,
6531            })
6532            .expect("register collection");
6533
6534        let error = service
6535            .compact_operational_collection("connector_health", false)
6536            .expect_err("latest_state compaction should be rejected");
6537        assert!(matches!(error, EngineError::InvalidWrite(_)));
6538        assert!(error.to_string().contains("append_only_log"));
6539    }
6540
6541    #[test]
6542    fn register_operational_collection_persists_filter_fields_json() {
6543        let (_db, service) = setup();
6544
6545        let record = service
6546            .register_operational_collection(&OperationalRegisterRequest {
6547                name: "audit_log".to_owned(),
6548                kind: OperationalCollectionKind::AppendOnlyLog,
6549                schema_json: "{}".to_owned(),
6550                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6551                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
6552                validation_json: String::new(),
6553                secondary_indexes_json: "[]".to_owned(),
6554                format_version: 1,
6555            })
6556            .expect("register collection");
6557
6558        assert_eq!(
6559            record.filter_fields_json,
6560            r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#
6561        );
6562    }
6563
6564    #[test]
6565    fn read_operational_collection_filters_append_only_rows_by_declared_fields() {
6566        let (db, service) = setup();
6567        service
6568            .register_operational_collection(&OperationalRegisterRequest {
6569                name: "audit_log".to_owned(),
6570                kind: OperationalCollectionKind::AppendOnlyLog,
6571                schema_json: "{}".to_owned(),
6572                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6573                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"seq","type":"integer","modes":["exact","range"]},{"name":"ts","type":"timestamp","modes":["exact","range"]}]"#.to_owned(),
6574                validation_json: String::new(),
6575                secondary_indexes_json: "[]".to_owned(),
6576                format_version: 1,
6577            })
6578            .expect("register collection");
6579        {
6580            let writer = crate::WriterActor::start(
6581                db.path(),
6582                Arc::new(SchemaManager::new()),
6583                crate::ProvenanceMode::Warn,
6584                Arc::new(crate::TelemetryCounters::default()),
6585            )
6586            .expect("writer");
6587            writer
6588                .submit(crate::WriteRequest {
6589                    label: "operational".to_owned(),
6590                    nodes: vec![],
6591                    node_retires: vec![],
6592                    edges: vec![],
6593                    edge_retires: vec![],
6594                    chunks: vec![],
6595                    runs: vec![],
6596                    steps: vec![],
6597                    actions: vec![],
6598                    optional_backfills: vec![],
6599                    vec_inserts: vec![],
6600                    operational_writes: vec![
6601                        crate::OperationalWrite::Append {
6602                            collection: "audit_log".to_owned(),
6603                            record_key: "evt-1".to_owned(),
6604                            payload_json: r#"{"actor":"alice","seq":1,"ts":100}"#.to_owned(),
6605                            source_ref: Some("src-1".to_owned()),
6606                        },
6607                        crate::OperationalWrite::Append {
6608                            collection: "audit_log".to_owned(),
6609                            record_key: "evt-2".to_owned(),
6610                            payload_json: r#"{"actor":"alice-admin","seq":2,"ts":200}"#.to_owned(),
6611                            source_ref: Some("src-2".to_owned()),
6612                        },
6613                        crate::OperationalWrite::Append {
6614                            collection: "audit_log".to_owned(),
6615                            record_key: "evt-3".to_owned(),
6616                            payload_json: r#"{"actor":"bob","seq":3,"ts":300}"#.to_owned(),
6617                            source_ref: Some("src-3".to_owned()),
6618                        },
6619                    ],
6620                })
6621                .expect("write");
6622        }
6623
6624        let report = service
6625            .read_operational_collection(&crate::operational::OperationalReadRequest {
6626                collection_name: "audit_log".to_owned(),
6627                filters: vec![
6628                    crate::operational::OperationalFilterClause::Prefix {
6629                        field: "actor".to_owned(),
6630                        value: "alice".to_owned(),
6631                    },
6632                    crate::operational::OperationalFilterClause::Range {
6633                        field: "ts".to_owned(),
6634                        lower: Some(150),
6635                        upper: Some(250),
6636                    },
6637                ],
6638                limit: Some(10),
6639            })
6640            .expect("filtered read");
6641
6642        assert_eq!(report.collection_name, "audit_log");
6643        assert_eq!(report.row_count, 1);
6644        assert!(!report.was_limited);
6645        assert_eq!(report.rows.len(), 1);
6646        assert_eq!(report.rows[0].record_key, "evt-2");
6647        assert_eq!(
6648            report.rows[0].payload_json,
6649            r#"{"actor":"alice-admin","seq":2,"ts":200}"#
6650        );
6651    }
6652
6653    #[test]
6654    fn read_operational_collection_uses_secondary_index_when_filter_values_are_missing() {
6655        let (db, service) = setup();
6656        service
6657            .register_operational_collection(&OperationalRegisterRequest {
6658                name: "audit_log".to_owned(),
6659                kind: OperationalCollectionKind::AppendOnlyLog,
6660                schema_json: "{}".to_owned(),
6661                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6662                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
6663                validation_json: String::new(),
6664                secondary_indexes_json: r#"[{"name":"actor_ts","kind":"append_only_field_time","field":"actor","value_type":"string","time_field":"ts"}]"#.to_owned(),
6665                format_version: 1,
6666            })
6667            .expect("register collection");
6668        {
6669            let writer = crate::WriterActor::start(
6670                db.path(),
6671                Arc::new(SchemaManager::new()),
6672                crate::ProvenanceMode::Warn,
6673                Arc::new(crate::TelemetryCounters::default()),
6674            )
6675            .expect("writer");
6676            writer
6677                .submit(crate::WriteRequest {
6678                    label: "operational".to_owned(),
6679                    nodes: vec![],
6680                    node_retires: vec![],
6681                    edges: vec![],
6682                    edge_retires: vec![],
6683                    chunks: vec![],
6684                    runs: vec![],
6685                    steps: vec![],
6686                    actions: vec![],
6687                    optional_backfills: vec![],
6688                    vec_inserts: vec![],
6689                    operational_writes: vec![
6690                        crate::OperationalWrite::Append {
6691                            collection: "audit_log".to_owned(),
6692                            record_key: "evt-1".to_owned(),
6693                            payload_json: r#"{"actor":"alice","ts":100}"#.to_owned(),
6694                            source_ref: Some("src-1".to_owned()),
6695                        },
6696                        crate::OperationalWrite::Append {
6697                            collection: "audit_log".to_owned(),
6698                            record_key: "evt-2".to_owned(),
6699                            payload_json: r#"{"actor":"alice-admin","ts":200}"#.to_owned(),
6700                            source_ref: Some("src-2".to_owned()),
6701                        },
6702                    ],
6703                })
6704                .expect("write");
6705        }
6706        let conn = sqlite::open_connection(db.path()).expect("conn");
6707        conn.execute(
6708            "DELETE FROM operational_filter_values WHERE collection_name = 'audit_log'",
6709            [],
6710        )
6711        .expect("clear filter values");
6712        drop(conn);
6713
6714        let report = service
6715            .read_operational_collection(&crate::operational::OperationalReadRequest {
6716                collection_name: "audit_log".to_owned(),
6717                filters: vec![
6718                    crate::operational::OperationalFilterClause::Prefix {
6719                        field: "actor".to_owned(),
6720                        value: "alice".to_owned(),
6721                    },
6722                    crate::operational::OperationalFilterClause::Range {
6723                        field: "ts".to_owned(),
6724                        lower: Some(150),
6725                        upper: Some(250),
6726                    },
6727                ],
6728                limit: Some(10),
6729            })
6730            .expect("secondary-index read");
6731
6732        assert_eq!(report.row_count, 1);
6733        assert_eq!(report.rows[0].record_key, "evt-2");
6734    }
6735
6736    #[test]
6737    fn read_operational_collection_rejects_undeclared_fields_and_latest_state_collections() {
6738        let (_db, service) = setup();
6739        service
6740            .register_operational_collection(&OperationalRegisterRequest {
6741                name: "connector_health".to_owned(),
6742                kind: OperationalCollectionKind::LatestState,
6743                schema_json: "{}".to_owned(),
6744                retention_json: "{}".to_owned(),
6745                filter_fields_json: r#"[{"name":"status","type":"string","modes":["exact"]}]"#
6746                    .to_owned(),
6747                validation_json: String::new(),
6748                secondary_indexes_json: "[]".to_owned(),
6749                format_version: 1,
6750            })
6751            .expect("register collection");
6752
6753        let latest_state_error = service
6754            .read_operational_collection(&crate::operational::OperationalReadRequest {
6755                collection_name: "connector_health".to_owned(),
6756                filters: vec![crate::operational::OperationalFilterClause::Exact {
6757                    field: "status".to_owned(),
6758                    value: crate::operational::OperationalFilterValue::String("ok".to_owned()),
6759                }],
6760                limit: Some(10),
6761            })
6762            .expect_err("latest_state filtered reads should be rejected");
6763        assert!(latest_state_error.to_string().contains("append_only_log"));
6764
6765        service
6766            .register_operational_collection(&OperationalRegisterRequest {
6767                name: "audit_log".to_owned(),
6768                kind: OperationalCollectionKind::AppendOnlyLog,
6769                schema_json: "{}".to_owned(),
6770                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6771                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact"]}]"#
6772                    .to_owned(),
6773                validation_json: String::new(),
6774                secondary_indexes_json: "[]".to_owned(),
6775                format_version: 1,
6776            })
6777            .expect("register append-only collection");
6778
6779        let undeclared_error = service
6780            .read_operational_collection(&crate::operational::OperationalReadRequest {
6781                collection_name: "audit_log".to_owned(),
6782                filters: vec![crate::operational::OperationalFilterClause::Exact {
6783                    field: "missing".to_owned(),
6784                    value: crate::operational::OperationalFilterValue::String("x".to_owned()),
6785                }],
6786                limit: Some(10),
6787            })
6788            .expect_err("undeclared field should be rejected");
6789        assert!(undeclared_error.to_string().contains("undeclared"));
6790    }
6791
6792    #[test]
6793    fn read_operational_collection_applies_limit_and_reports_truncation() {
6794        let (db, service) = setup();
6795        service
6796            .register_operational_collection(&OperationalRegisterRequest {
6797                name: "audit_log".to_owned(),
6798                kind: OperationalCollectionKind::AppendOnlyLog,
6799                schema_json: "{}".to_owned(),
6800                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6801                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["prefix"]}]"#
6802                    .to_owned(),
6803                validation_json: String::new(),
6804                secondary_indexes_json: "[]".to_owned(),
6805                format_version: 1,
6806            })
6807            .expect("register collection");
6808        {
6809            let writer = crate::WriterActor::start(
6810                db.path(),
6811                Arc::new(SchemaManager::new()),
6812                crate::ProvenanceMode::Warn,
6813                Arc::new(crate::TelemetryCounters::default()),
6814            )
6815            .expect("writer");
6816            writer
6817                .submit(crate::WriteRequest {
6818                    label: "operational".to_owned(),
6819                    nodes: vec![],
6820                    node_retires: vec![],
6821                    edges: vec![],
6822                    edge_retires: vec![],
6823                    chunks: vec![],
6824                    runs: vec![],
6825                    steps: vec![],
6826                    actions: vec![],
6827                    optional_backfills: vec![],
6828                    vec_inserts: vec![],
6829                    operational_writes: vec![
6830                        crate::OperationalWrite::Append {
6831                            collection: "audit_log".to_owned(),
6832                            record_key: "evt-1".to_owned(),
6833                            payload_json: r#"{"actor":"alice-1"}"#.to_owned(),
6834                            source_ref: Some("src-1".to_owned()),
6835                        },
6836                        crate::OperationalWrite::Append {
6837                            collection: "audit_log".to_owned(),
6838                            record_key: "evt-2".to_owned(),
6839                            payload_json: r#"{"actor":"alice-2"}"#.to_owned(),
6840                            source_ref: Some("src-2".to_owned()),
6841                        },
6842                    ],
6843                })
6844                .expect("write");
6845        }
6846
6847        let report = service
6848            .read_operational_collection(&crate::operational::OperationalReadRequest {
6849                collection_name: "audit_log".to_owned(),
6850                filters: vec![crate::operational::OperationalFilterClause::Prefix {
6851                    field: "actor".to_owned(),
6852                    value: "alice".to_owned(),
6853                }],
6854                limit: Some(1),
6855            })
6856            .expect("limited read");
6857
6858        assert_eq!(report.row_count, 1);
6859        assert_eq!(report.applied_limit, 1);
6860        assert!(report.was_limited);
6861        assert_eq!(report.rows[0].record_key, "evt-2");
6862    }
6863
6864    #[test]
6865    fn preexisting_operational_collection_can_gain_filter_contract_after_upgrade() {
6866        let db = NamedTempFile::new().expect("temp db");
6867        let conn = sqlite::open_connection(db.path()).expect("conn");
6868        conn.execute_batch(
6869            r#"
6870            CREATE TABLE operational_collections (
6871                name TEXT PRIMARY KEY,
6872                kind TEXT NOT NULL,
6873                schema_json TEXT NOT NULL,
6874                retention_json TEXT NOT NULL,
6875                format_version INTEGER NOT NULL DEFAULT 1,
6876                created_at INTEGER NOT NULL DEFAULT 100,
6877                disabled_at INTEGER
6878            );
6879            CREATE TABLE operational_mutations (
6880                id TEXT PRIMARY KEY,
6881                collection_name TEXT NOT NULL,
6882                record_key TEXT NOT NULL,
6883                op_kind TEXT NOT NULL,
6884                payload_json TEXT NOT NULL,
6885                source_ref TEXT,
6886                created_at INTEGER NOT NULL DEFAULT 100,
6887                mutation_order INTEGER NOT NULL DEFAULT 1
6888            );
6889            INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at)
6890            VALUES ('audit_log', 'append_only_log', '{}', '{"mode":"keep_all"}', 1, 100);
6891            INSERT INTO operational_mutations
6892                (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order)
6893            VALUES
6894                ('evt-1', 'audit_log', 'evt-1', 'append', '{"actor":"alice","ts":0}', 'src-1', 100, 1);
6895            "#,
6896        )
6897        .expect("seed pre-v10 schema");
6898        drop(conn);
6899
6900        let service = AdminService::new(db.path(), Arc::new(SchemaManager::new()));
6901        let pre_update = service
6902            .read_operational_collection(&crate::operational::OperationalReadRequest {
6903                collection_name: "audit_log".to_owned(),
6904                filters: vec![crate::operational::OperationalFilterClause::Exact {
6905                    field: "actor".to_owned(),
6906                    value: crate::operational::OperationalFilterValue::String("alice".to_owned()),
6907                }],
6908                limit: Some(10),
6909            })
6910            .expect_err("read should reject undeclared fields before migration update");
6911        assert!(pre_update.to_string().contains("undeclared"));
6912
6913        let updated = service
6914            .update_operational_collection_filters(
6915                "audit_log",
6916                r#"[{"name":"actor","type":"string","modes":["exact"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#,
6917            )
6918            .expect("update filter contract");
6919        assert!(updated.filter_fields_json.contains("\"actor\""));
6920
6921        let report = service
6922            .read_operational_collection(&crate::operational::OperationalReadRequest {
6923                collection_name: "audit_log".to_owned(),
6924                filters: vec![crate::operational::OperationalFilterClause::Range {
6925                    field: "ts".to_owned(),
6926                    lower: Some(0),
6927                    upper: Some(0),
6928                }],
6929                limit: Some(10),
6930            })
6931            .expect("read after explicit filter update");
6932        assert_eq!(report.row_count, 1);
6933        assert_eq!(report.rows[0].record_key, "evt-1");
6934    }
6935
6936    #[cfg(feature = "sqlite-vec")]
6937    #[test]
6938    fn check_semantics_detects_stale_vec_rows() {
6939        use crate::sqlite::open_connection_with_vec;
6940
6941        let db = NamedTempFile::new().expect("temp file");
6942        let schema = Arc::new(SchemaManager::new());
6943        {
6944            let conn = open_connection_with_vec(db.path()).expect("vec conn");
6945            schema.bootstrap(&conn).expect("bootstrap");
6946            schema
6947                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 3)
6948                .expect("vec profile");
6949            // Insert a vec row whose chunk does not exist.
6950            let bytes: Vec<u8> = [0.1f32, 0.2f32, 0.3f32]
6951                .iter()
6952                .flat_map(|f| f.to_le_bytes())
6953                .collect();
6954            conn.execute(
6955                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('ghost-chunk', ?1)",
6956                rusqlite::params![bytes],
6957            )
6958            .expect("insert stale vec row");
6959        }
6960        let service = AdminService::new(db.path(), Arc::clone(&schema));
6961        let report = service.check_semantics().expect("semantics check");
6962        assert_eq!(report.stale_vec_rows, 1);
6963        assert!(
6964            report.warnings.iter().any(|w| w.contains("stale vec")),
6965            "warning must mention stale vec"
6966        );
6967    }
6968
6969    #[cfg(feature = "sqlite-vec")]
6970    #[test]
6971    fn restore_vector_profiles_recreates_vec_table_from_metadata() {
6972        let db = NamedTempFile::new().expect("temp file");
6973        let schema = Arc::new(SchemaManager::new());
6974        {
6975            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6976            schema.bootstrap(&conn).expect("bootstrap");
6977            conn.execute(
6978                "INSERT INTO vector_profiles (profile, table_name, dimension, enabled) \
6979                 VALUES ('default', 'vec_nodes_active', 3, 1)",
6980                [],
6981            )
6982            .expect("insert vector profile");
6983        }
6984
6985        let service = AdminService::new(db.path(), Arc::clone(&schema));
6986        let report = service
6987            .restore_vector_profiles()
6988            .expect("restore vector profiles");
6989        assert_eq!(
6990            report.targets,
6991            vec![crate::projection::ProjectionTarget::Vec]
6992        );
6993        assert_eq!(report.rebuilt_rows, 1);
6994
6995        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6996        let count: i64 = conn
6997            .query_row(
6998                "SELECT count(*) FROM sqlite_schema WHERE name = 'vec_nodes_active'",
6999                [],
7000                |row| row.get(0),
7001            )
7002            .expect("vec schema count");
7003        assert_eq!(count, 1, "vec table should exist after restore");
7004    }
7005
7006    #[cfg(feature = "sqlite-vec")]
7007    #[test]
7008    fn load_vector_regeneration_config_supports_json_and_toml() {
7009        let dir = tempfile::tempdir().expect("temp dir");
7010        let json_path = dir.path().join("regen.json");
7011        let toml_path = dir.path().join("regen.toml");
7012
7013        let config = VectorRegenerationConfig {
7014            profile: "default".to_owned(),
7015            table_name: "vec_nodes_active".to_owned(),
7016            chunking_policy: "per_chunk".to_owned(),
7017            preprocessing_policy: "trim".to_owned(),
7018        };
7019
7020        fs::write(&json_path, serde_json::to_string(&config).expect("json")).expect("write json");
7021        fs::write(&toml_path, toml::to_string(&config).expect("toml")).expect("write toml");
7022
7023        let parsed_json = load_vector_regeneration_config(&json_path).expect("json parse");
7024        let parsed_toml = load_vector_regeneration_config(&toml_path).expect("toml parse");
7025
7026        assert_eq!(parsed_json, config);
7027        assert_eq!(parsed_toml, config);
7028    }
7029
7030    /// The 0.4.0 rewrite removed the identity fields from the config.
7031    /// Any client that still serializes the pre-0.4 fields must be
7032    /// rejected AT THE SERDE BOUNDARY with a clear error — never
7033    /// silently accepted.
7034    #[test]
7035    fn regenerate_vector_embeddings_config_rejects_old_identity_fields() {
7036        let legacy_json = r#"{
7037            "profile": "default",
7038            "table_name": "vec_nodes_active",
7039            "model_identity": "old-model",
7040            "model_version": "1.0",
7041            "dimension": 4,
7042            "normalization_policy": "l2",
7043            "chunking_policy": "per_chunk",
7044            "preprocessing_policy": "trim",
7045            "generator_command": ["/bin/echo"]
7046        }"#;
7047        let result: Result<VectorRegenerationConfig, _> = serde_json::from_str(legacy_json);
7048        assert!(
7049            result.is_err(),
7050            "legacy identity fields must be rejected at deserialization"
7051        );
7052    }
7053
7054    #[cfg(all(not(feature = "sqlite-vec"), unix))]
7055    #[test]
7056    fn regenerate_vector_embeddings_unsupported_vec_capability_writes_request_and_failed_audit() {
7057        let db = NamedTempFile::new().expect("temp file");
7058        let schema = Arc::new(SchemaManager::new());
7059
7060        {
7061            let conn = sqlite::open_connection(db.path()).expect("connection");
7062            schema.bootstrap(&conn).expect("bootstrap");
7063            conn.execute(
7064                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7065                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7066                [],
7067            )
7068            .expect("insert node");
7069            conn.execute(
7070                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7071                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7072                [],
7073            )
7074            .expect("insert chunk");
7075        }
7076
7077        let service = AdminService::new(db.path(), Arc::clone(&schema));
7078        let embedder = TestEmbedder::new("test-model", 4);
7079        let error = service
7080            .regenerate_vector_embeddings(
7081                &embedder,
7082                &VectorRegenerationConfig {
7083                    profile: "default".to_owned(),
7084                    table_name: "vec_nodes_active".to_owned(),
7085                    chunking_policy: "per_chunk".to_owned(),
7086                    preprocessing_policy: "trim".to_owned(),
7087                },
7088            )
7089            .expect_err("sqlite-vec capability should be required");
7090
7091        assert!(error.to_string().contains("unsupported vec capability"));
7092
7093        let conn = sqlite::open_connection(db.path()).expect("connection");
7094        let request_count: i64 = conn
7095            .query_row(
7096                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_requested' AND subject = 'default'",
7097                [],
7098                |row| row.get(0),
7099            )
7100            .expect("request count");
7101        assert_eq!(request_count, 1);
7102        let failed_count: i64 = conn
7103            .query_row(
7104                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7105                [],
7106                |row| row.get(0),
7107            )
7108            .expect("failed count");
7109        assert_eq!(failed_count, 1);
7110        let metadata_json: String = conn
7111            .query_row(
7112                "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7113                [],
7114                |row| row.get(0),
7115            )
7116            .expect("failed metadata");
7117        assert!(metadata_json.contains("\"failure_class\":\"unsupported vec capability\""));
7118    }
7119
7120    #[cfg(feature = "sqlite-vec")]
7121    #[test]
7122    #[allow(clippy::too_many_lines)]
7123    fn regenerate_vector_embeddings_rebuilds_embeddings_via_embedder() {
7124        let db = NamedTempFile::new().expect("temp file");
7125        let schema = Arc::new(SchemaManager::new());
7126
7127        {
7128            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7129            schema.bootstrap(&conn).expect("bootstrap");
7130            conn.execute(
7131                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7132                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7133                [],
7134            )
7135            .expect("insert node");
7136            conn.execute(
7137                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7138                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7139                [],
7140            )
7141            .expect("insert chunk 1");
7142            conn.execute(
7143                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7144                 VALUES ('chunk-2', 'doc-1', 'travel plan', 101)",
7145                [],
7146            )
7147            .expect("insert chunk 2");
7148        }
7149
7150        let service = AdminService::new(db.path(), Arc::clone(&schema));
7151        let embedder = TestEmbedder::new("test-model", 4);
7152        let report = service
7153            .regenerate_vector_embeddings(
7154                &embedder,
7155                &VectorRegenerationConfig {
7156                    profile: "default".to_owned(),
7157                    table_name: "vec_nodes_active".to_owned(),
7158                    chunking_policy: "per_chunk".to_owned(),
7159                    preprocessing_policy: "trim".to_owned(),
7160                },
7161            )
7162            .expect("regenerate vectors");
7163
7164        assert_eq!(report.profile, "default");
7165        assert_eq!(report.table_name, "vec_nodes_active");
7166        assert_eq!(report.dimension, 4);
7167        assert_eq!(report.total_chunks, 2);
7168        assert_eq!(report.regenerated_rows, 2);
7169        assert!(report.contract_persisted);
7170
7171        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7172        let vec_count: i64 = conn
7173            .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
7174                row.get(0)
7175            })
7176            .expect("vec count");
7177        assert_eq!(vec_count, 2);
7178
7179        // The persisted vector contract must reflect the embedder
7180        // identity — not any string the caller passed in, because the
7181        // caller never passes one.
7182        let (model_identity, model_version, dimension, normalization_policy): (
7183            String,
7184            String,
7185            i64,
7186            String,
7187        ) = conn
7188            .query_row(
7189                "SELECT model_identity, model_version, dimension, normalization_policy \
7190                 FROM vector_embedding_contracts WHERE profile = 'default'",
7191                [],
7192                |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)),
7193            )
7194            .expect("contract row");
7195        assert_eq!(model_identity, "test-model");
7196        assert_eq!(model_version, "1.0.0");
7197        assert_eq!(dimension, 4);
7198        assert_eq!(normalization_policy, "l2");
7199
7200        let contract_format_version: i64 = conn
7201            .query_row(
7202                "SELECT contract_format_version FROM vector_embedding_contracts WHERE profile = 'default'",
7203                [],
7204                |row| row.get(0),
7205            )
7206            .expect("contract_format_version");
7207        assert_eq!(contract_format_version, 1);
7208        let request_count: i64 = conn
7209            .query_row(
7210                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_requested' AND subject = 'default'",
7211                [],
7212                |row| row.get(0),
7213            )
7214            .expect("request audit count");
7215        assert_eq!(request_count, 1);
7216        let apply_count: i64 = conn
7217            .query_row(
7218                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_apply' AND subject = 'default'",
7219                [],
7220                |row| row.get(0),
7221            )
7222            .expect("apply audit count");
7223        assert_eq!(apply_count, 1);
7224        let apply_metadata: String = conn
7225            .query_row(
7226                "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_apply' AND subject = 'default'",
7227                [],
7228                |row| row.get(0),
7229            )
7230            .expect("apply metadata");
7231        assert!(apply_metadata.contains("\"profile\":\"default\""));
7232        assert!(apply_metadata.contains("\"snapshot_hash\":"));
7233        assert!(apply_metadata.contains("\"model_identity\":\"test-model\""));
7234    }
7235
7236    #[cfg(feature = "sqlite-vec")]
7237    #[test]
7238    #[allow(clippy::too_many_lines)]
7239    fn regenerate_vector_embeddings_embedder_failure_leaves_contract_and_vec_rows_unchanged() {
7240        let db = NamedTempFile::new().expect("temp file");
7241        let schema = Arc::new(SchemaManager::new());
7242
7243        {
7244            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7245            schema.bootstrap(&conn).expect("bootstrap");
7246            conn.execute(
7247                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7248                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7249                [],
7250            )
7251            .expect("insert node");
7252            conn.execute(
7253                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7254                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7255                [],
7256            )
7257            .expect("insert chunk");
7258            schema
7259                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
7260                .expect("ensure vec profile");
7261            conn.execute(
7262                r"
7263                INSERT INTO vector_embedding_contracts (
7264                    profile,
7265                    table_name,
7266                    model_identity,
7267                    model_version,
7268                    dimension,
7269                    normalization_policy,
7270                    chunking_policy,
7271                    preprocessing_policy,
7272                    generator_command_json,
7273                    applied_at,
7274                    snapshot_hash
7275                ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)
7276                ",
7277                rusqlite::params![
7278                    "default",
7279                    "vec_nodes_active",
7280                    "old-model",
7281                    "0.9.0",
7282                    4,
7283                    "l2",
7284                    "per_chunk",
7285                    "trim",
7286                    "[]",
7287                    111,
7288                    "old-snapshot"
7289                ],
7290            )
7291            .expect("seed contract");
7292            conn.execute(
7293                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
7294                [],
7295            )
7296            .expect("seed vec row");
7297        }
7298
7299        let service = AdminService::new(db.path(), Arc::clone(&schema));
7300        let failing = FailingEmbedder {
7301            identity: QueryEmbedderIdentity {
7302                model_identity: "new-model".to_owned(),
7303                model_version: "1.0.0".to_owned(),
7304                dimension: 4,
7305                normalization_policy: "l2".to_owned(),
7306            },
7307        };
7308        let error = service
7309            .regenerate_vector_embeddings(
7310                &failing,
7311                &VectorRegenerationConfig {
7312                    profile: "default".to_owned(),
7313                    table_name: "vec_nodes_active".to_owned(),
7314                    chunking_policy: "per_chunk".to_owned(),
7315                    preprocessing_policy: "trim".to_owned(),
7316                },
7317            )
7318            .expect_err("embedder should fail");
7319
7320        assert!(error.to_string().contains("embedder failure"));
7321
7322        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7323        let model_identity: String = conn
7324            .query_row(
7325                "SELECT model_identity FROM vector_embedding_contracts WHERE profile = 'default'",
7326                [],
7327                |row| row.get(0),
7328            )
7329            .expect("model identity");
7330        assert_eq!(model_identity, "old-model");
7331        let snapshot_hash: String = conn
7332            .query_row(
7333                "SELECT snapshot_hash FROM vector_embedding_contracts WHERE profile = 'default'",
7334                [],
7335                |row| row.get(0),
7336            )
7337            .expect("snapshot hash");
7338        assert_eq!(snapshot_hash, "old-snapshot");
7339        let vec_count: i64 = conn
7340            .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
7341                row.get(0)
7342            })
7343            .expect("vec count");
7344        assert_eq!(vec_count, 1);
7345        let failure_count: i64 = conn
7346            .query_row(
7347                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7348                [],
7349                |row| row.get(0),
7350            )
7351            .expect("failure count");
7352        assert_eq!(failure_count, 1);
7353        let failure_metadata: String = conn
7354            .query_row(
7355                "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7356                [],
7357                |row| row.get(0),
7358            )
7359            .expect("failure metadata");
7360        assert!(failure_metadata.contains("\"failure_class\":\"embedder failure\""));
7361    }
7362
7363    // Subprocess generator tests (snapshot-drift-via-concurrent-writer,
7364    // timeout, stdout/stderr overflow, oversized input, excessive chunk
7365    // count, malformed JSON, world-writable executable, disallowed
7366    // executable root, environment preservation) were removed in 0.4.0
7367    // along with the subprocess generator pattern itself. The failure
7368    // modes they exercised belong to the deleted
7369    // `run_vector_generator_bounded` pipeline and have no equivalent in
7370    // the direct-embedder path. See
7371    // `.claude/memory/project_vector_identity_invariant.md`.
7372
7373    #[cfg(feature = "sqlite-vec")]
7374    #[test]
7375    fn regenerate_vector_embeddings_rejects_whitespace_only_profile_before_mutation() {
7376        let db = NamedTempFile::new().expect("temp file");
7377        let schema = Arc::new(SchemaManager::new());
7378        {
7379            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7380            schema.bootstrap(&conn).expect("bootstrap");
7381            conn.execute(
7382                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7383                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7384                [],
7385            )
7386            .expect("insert node");
7387            conn.execute(
7388                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7389                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7390                [],
7391            )
7392            .expect("insert chunk");
7393        }
7394
7395        let service = AdminService::new(db.path(), Arc::clone(&schema));
7396        let embedder = TestEmbedder::new("test-model", 4);
7397        let error = service
7398            .regenerate_vector_embeddings(
7399                &embedder,
7400                &VectorRegenerationConfig {
7401                    profile: "   ".to_owned(),
7402                    table_name: "vec_nodes_active".to_owned(),
7403                    chunking_policy: "per_chunk".to_owned(),
7404                    preprocessing_policy: "trim".to_owned(),
7405                },
7406            )
7407            .expect_err("whitespace profile should be rejected");
7408
7409        assert!(error.to_string().contains("invalid contract"));
7410        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7411        let contract_count: i64 = conn
7412            .query_row(
7413                "SELECT count(*) FROM vector_embedding_contracts",
7414                [],
7415                |row| row.get(0),
7416            )
7417            .expect("contract count");
7418        assert_eq!(contract_count, 0);
7419        let provenance_count: i64 = conn
7420            .query_row("SELECT count(*) FROM provenance_events", [], |row| {
7421                row.get(0)
7422            })
7423            .expect("provenance count");
7424        assert_eq!(provenance_count, 0);
7425    }
7426
7427    #[cfg(feature = "sqlite-vec")]
7428    #[test]
7429    fn regenerate_vector_embeddings_rejects_future_contract_format_version() {
7430        let db = NamedTempFile::new().expect("temp file");
7431        let schema = Arc::new(SchemaManager::new());
7432        {
7433            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7434            schema.bootstrap(&conn).expect("bootstrap");
7435            conn.execute(
7436                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7437                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7438                [],
7439            )
7440            .expect("insert node");
7441            conn.execute(
7442                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7443                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7444                [],
7445            )
7446            .expect("insert chunk");
7447            conn.execute(
7448                r"
7449                INSERT INTO vector_embedding_contracts (
7450                    profile,
7451                    table_name,
7452                    model_identity,
7453                    model_version,
7454                    dimension,
7455                    normalization_policy,
7456                    chunking_policy,
7457                    preprocessing_policy,
7458                    generator_command_json,
7459                    applied_at,
7460                    snapshot_hash,
7461                    contract_format_version,
7462                    updated_at
7463                ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13)
7464                ",
7465                rusqlite::params![
7466                    "default",
7467                    "vec_nodes_active",
7468                    "old-model",
7469                    "0.9.0",
7470                    4,
7471                    "l2",
7472                    "per_chunk",
7473                    "trim",
7474                    "[]",
7475                    111,
7476                    "old-snapshot",
7477                    99,
7478                    111,
7479                ],
7480            )
7481            .expect("seed future contract");
7482        }
7483
7484        let service = AdminService::new(db.path(), Arc::clone(&schema));
7485        let embedder = TestEmbedder::new("test-model", 4);
7486        let error = service
7487            .regenerate_vector_embeddings(
7488                &embedder,
7489                &VectorRegenerationConfig {
7490                    profile: "default".to_owned(),
7491                    table_name: "vec_nodes_active".to_owned(),
7492                    chunking_policy: "per_chunk".to_owned(),
7493                    preprocessing_policy: "trim".to_owned(),
7494                },
7495            )
7496            .expect_err("future contract version should be rejected");
7497
7498        assert!(error.to_string().contains("unsupported"));
7499        assert!(error.to_string().contains("format version"));
7500    }
7501
7502    #[test]
7503    fn check_semantics_detects_orphaned_chunk() {
7504        let (db, service) = setup();
7505        {
7506            // Open without FK enforcement to insert chunk with no active node.
7507            let conn = sqlite::open_connection(db.path()).expect("conn");
7508            conn.execute(
7509                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7510                 VALUES ('c1', 'ghost-node', 'text', 100)",
7511                [],
7512            )
7513            .expect("insert orphaned chunk");
7514        }
7515        let report = service.check_semantics().expect("semantics check");
7516        assert_eq!(report.orphaned_chunks, 1);
7517    }
7518
7519    #[test]
7520    fn check_semantics_detects_null_source_ref() {
7521        let (db, service) = setup();
7522        {
7523            let conn = sqlite::open_connection(db.path()).expect("conn");
7524            conn.execute(
7525                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at) \
7526                 VALUES ('r1', 'lg1', 'Meeting', '{}', 100)",
7527                [],
7528            )
7529            .expect("insert node with null source_ref");
7530        }
7531        let report = service.check_semantics().expect("semantics check");
7532        assert_eq!(report.null_source_ref_nodes, 1);
7533    }
7534
7535    #[test]
7536    fn check_semantics_detects_broken_step_fk() {
7537        let (db, service) = setup();
7538        {
7539            // Explicitly disable FK enforcement for this connection so we can insert
7540            // an orphaned step (ghost run_id) to simulate a partial-write failure.
7541            let conn = sqlite::open_connection(db.path()).expect("conn");
7542            conn.execute_batch("PRAGMA foreign_keys = OFF;")
7543                .expect("disable FK");
7544            conn.execute(
7545                "INSERT INTO steps (id, run_id, kind, status, properties, created_at) \
7546                 VALUES ('s1', 'ghost-run', 'llm', 'completed', '{}', 100)",
7547                [],
7548            )
7549            .expect("insert step with ghost run_id");
7550        }
7551        let report = service.check_semantics().expect("semantics check");
7552        assert_eq!(report.broken_step_fk, 1);
7553    }
7554
7555    #[test]
7556    fn check_semantics_detects_broken_action_fk() {
7557        let (db, service) = setup();
7558        {
7559            let conn = sqlite::open_connection(db.path()).expect("conn");
7560            conn.execute_batch("PRAGMA foreign_keys = OFF;")
7561                .expect("disable FK");
7562            conn.execute(
7563                "INSERT INTO actions (id, step_id, kind, status, properties, created_at) \
7564                 VALUES ('a1', 'ghost-step', 'emit', 'completed', '{}', 100)",
7565                [],
7566            )
7567            .expect("insert action with ghost step_id");
7568        }
7569        let report = service.check_semantics().expect("semantics check");
7570        assert_eq!(report.broken_action_fk, 1);
7571    }
7572
7573    #[test]
7574    fn check_semantics_detects_stale_fts_rows() {
7575        let (db, service) = setup();
7576        {
7577            let conn = sqlite::open_connection(db.path()).expect("conn");
7578            // FTS virtual tables have no FK constraints; insert a row referencing
7579            // a chunk_id that does not exist in the chunks table.
7580            conn.execute(
7581                "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
7582                 VALUES ('ghost-chunk', 'any-node', 'Meeting', 'stale content')",
7583                [],
7584            )
7585            .expect("insert stale FTS row");
7586        }
7587        let report = service.check_semantics().expect("semantics check");
7588        assert_eq!(report.stale_fts_rows, 1);
7589    }
7590
7591    #[test]
7592    fn check_semantics_detects_fts_rows_for_superseded_nodes() {
7593        let (db, service) = setup();
7594        {
7595            let conn = sqlite::open_connection(db.path()).expect("conn");
7596            // Insert a node that has been fully superseded (superseded_at IS NOT NULL).
7597            conn.execute(
7598                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
7599                 VALUES ('r1', 'lg-sup', 'Meeting', '{}', 100, 200, 'src-1')",
7600                [],
7601            )
7602            .expect("insert superseded node");
7603            // Insert an FTS row for the superseded node's logical_id.
7604            conn.execute(
7605                "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
7606                 VALUES ('ck-x', 'lg-sup', 'Meeting', 'superseded content')",
7607                [],
7608            )
7609            .expect("insert FTS row for superseded node");
7610        }
7611        let report = service.check_semantics().expect("semantics check");
7612        assert_eq!(report.fts_rows_for_superseded_nodes, 1);
7613    }
7614
7615    #[test]
7616    fn check_semantics_detects_dangling_edges() {
7617        let (db, service) = setup();
7618        {
7619            let conn = sqlite::open_connection(db.path()).expect("conn");
7620            conn.execute_batch("PRAGMA foreign_keys = OFF;")
7621                .expect("disable FK");
7622            // One active node as source; target does not exist — edge is dangling.
7623            conn.execute(
7624                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7625                 VALUES ('r1', 'lg-src', 'Meeting', '{}', 100, 'src-1')",
7626                [],
7627            )
7628            .expect("insert source node");
7629            conn.execute(
7630                "INSERT INTO edges \
7631                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
7632                 VALUES ('e1', 'edge-1', 'lg-src', 'ghost-target', 'LINKS', '{}', 100, 'src-1')",
7633                [],
7634            )
7635            .expect("insert dangling edge");
7636        }
7637        let report = service.check_semantics().expect("semantics check");
7638        assert_eq!(report.dangling_edges, 1);
7639    }
7640
7641    #[test]
7642    fn check_semantics_detects_orphaned_supersession_chains() {
7643        let (db, service) = setup();
7644        {
7645            let conn = sqlite::open_connection(db.path()).expect("conn");
7646            // Every version of this logical_id is superseded — no active row remains.
7647            conn.execute(
7648                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
7649                 VALUES ('r1', 'lg-orphaned', 'Meeting', '{}', 100, 200, 'src-1')",
7650                [],
7651            )
7652            .expect("insert fully superseded node");
7653        }
7654        let report = service.check_semantics().expect("semantics check");
7655        assert_eq!(report.orphaned_supersession_chains, 1);
7656    }
7657
7658    #[test]
7659    fn check_semantics_detects_mismatched_kind_property_fts_rows() {
7660        let (db, service) = setup();
7661        {
7662            let conn = sqlite::open_connection(db.path()).expect("conn");
7663            // Insert an active node with kind "Goal".
7664            conn.execute(
7665                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7666                 VALUES ('r1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'src-1')",
7667                [],
7668            )
7669            .expect("insert node");
7670            // Insert a property FTS row with a DIFFERENT kind than the node.
7671            conn.execute(
7672                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
7673                 VALUES ('goal-1', 'WrongKind', 'Ship v2')",
7674                [],
7675            )
7676            .expect("insert mismatched property FTS row");
7677        }
7678        let report = service.check_semantics().expect("semantics check");
7679        assert_eq!(report.mismatched_kind_property_fts_rows, 1);
7680    }
7681
7682    #[test]
7683    fn check_semantics_detects_duplicate_property_fts_rows() {
7684        let (db, service) = setup();
7685        {
7686            let conn = sqlite::open_connection(db.path()).expect("conn");
7687            conn.execute(
7688                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7689                 VALUES ('r1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'src-1')",
7690                [],
7691            )
7692            .expect("insert node");
7693            // Insert two property FTS rows for the same logical ID.
7694            conn.execute(
7695                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
7696                 VALUES ('goal-1', 'Goal', 'Ship v2')",
7697                [],
7698            )
7699            .expect("insert first property FTS row");
7700            conn.execute(
7701                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
7702                 VALUES ('goal-1', 'Goal', 'Ship v2 duplicate')",
7703                [],
7704            )
7705            .expect("insert duplicate property FTS row");
7706        }
7707        let report = service.check_semantics().expect("semantics check");
7708        assert_eq!(report.duplicate_property_fts_rows, 1);
7709    }
7710
7711    #[test]
7712    fn check_semantics_detects_drifted_property_fts_text() {
7713        let (db, service) = setup();
7714        {
7715            let conn = sqlite::open_connection(db.path()).expect("conn");
7716            conn.execute(
7717                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
7718                 VALUES ('Goal', '[\"$.name\"]', ' ')",
7719                [],
7720            )
7721            .expect("register schema");
7722            conn.execute(
7723                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7724                 VALUES ('r1', 'goal-1', 'Goal', '{\"name\":\"Current name\"}', 100, 'src-1')",
7725                [],
7726            )
7727            .expect("insert node");
7728            // Insert a property FTS row with outdated text content.
7729            conn.execute(
7730                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
7731                 VALUES ('goal-1', 'Goal', 'Old stale name')",
7732                [],
7733            )
7734            .expect("insert stale property FTS row");
7735        }
7736        let report = service.check_semantics().expect("semantics check");
7737        assert_eq!(report.drifted_property_fts_rows, 1);
7738    }
7739
7740    #[test]
7741    fn check_semantics_detects_property_fts_row_that_should_not_exist() {
7742        let (db, service) = setup();
7743        {
7744            let conn = sqlite::open_connection(db.path()).expect("conn");
7745            conn.execute(
7746                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
7747                 VALUES ('Goal', '[\"$.searchable\"]', ' ')",
7748                [],
7749            )
7750            .expect("register schema");
7751            // Node does NOT have $.searchable — extraction yields no value.
7752            conn.execute(
7753                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7754                 VALUES ('r1', 'goal-1', 'Goal', '{\"other\":\"field\"}', 100, 'src-1')",
7755                [],
7756            )
7757            .expect("insert node");
7758            // But a property FTS row exists anyway.
7759            conn.execute(
7760                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
7761                 VALUES ('goal-1', 'Goal', 'phantom text')",
7762                [],
7763            )
7764            .expect("insert phantom property FTS row");
7765        }
7766        let report = service.check_semantics().expect("semantics check");
7767        assert_eq!(
7768            report.drifted_property_fts_rows, 1,
7769            "row that should not exist must be counted as drifted"
7770        );
7771    }
7772
7773    #[test]
7774    fn safe_export_writes_manifest_with_sha256() {
7775        let (_db, service) = setup();
7776        let export_dir = tempfile::TempDir::new().expect("temp dir");
7777        let export_path = export_dir.path().join("backup.db");
7778
7779        let manifest = service
7780            .safe_export(
7781                &export_path,
7782                SafeExportOptions {
7783                    force_checkpoint: false,
7784                },
7785            )
7786            .expect("export");
7787
7788        assert!(export_path.exists(), "exported db should exist");
7789        let manifest_path = export_dir.path().join("backup.db.export-manifest.json");
7790        assert!(
7791            manifest_path.exists(),
7792            "manifest file should exist at {}",
7793            manifest_path.display()
7794        );
7795        assert_eq!(manifest.sha256.len(), 64, "sha256 should be 64 hex chars");
7796        assert!(
7797            manifest.exported_at > 0,
7798            "exported_at should be a unix timestamp"
7799        );
7800        assert_eq!(
7801            manifest.schema_version,
7802            SchemaManager::new().current_version().0,
7803            "schema_version should match the live schema version"
7804        );
7805        assert_eq!(manifest.protocol_version, 1, "protocol_version should be 1");
7806        assert!(manifest.page_count > 0, "page_count should be positive");
7807    }
7808
7809    #[test]
7810    fn safe_export_preserves_operational_validation_contracts() {
7811        let (_db, service) = setup();
7812        let validation_json = r#"{"format_version":1,"mode":"enforce","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#;
7813        service
7814            .register_operational_collection(&OperationalRegisterRequest {
7815                name: "connector_health".to_owned(),
7816                kind: OperationalCollectionKind::LatestState,
7817                schema_json: "{}".to_owned(),
7818                retention_json: "{}".to_owned(),
7819                filter_fields_json: "[]".to_owned(),
7820                validation_json: validation_json.to_owned(),
7821                secondary_indexes_json: "[]".to_owned(),
7822                format_version: 1,
7823            })
7824            .expect("register collection");
7825
7826        let export_dir = tempfile::TempDir::new().expect("temp dir");
7827        let export_path = export_dir.path().join("backup.db");
7828        service
7829            .safe_export(
7830                &export_path,
7831                SafeExportOptions {
7832                    force_checkpoint: false,
7833                },
7834            )
7835            .expect("export");
7836
7837        let exported = sqlite::open_connection(&export_path).expect("exported conn");
7838        let exported_validation_json: String = exported
7839            .query_row(
7840                "SELECT validation_json FROM operational_collections WHERE name = 'connector_health'",
7841                [],
7842                |row| row.get(0),
7843            )
7844            .expect("validation_json");
7845        assert_eq!(exported_validation_json, validation_json);
7846    }
7847
7848    #[test]
7849    fn safe_export_force_checkpoint_false_skips_wal_pragma() {
7850        let (_db, service) = setup();
7851        let export_dir = tempfile::TempDir::new().expect("temp dir");
7852        let export_path = export_dir.path().join("no-wal.db");
7853
7854        // force_checkpoint: false must not error even on a non-WAL database
7855        let manifest = service
7856            .safe_export(
7857                &export_path,
7858                SafeExportOptions {
7859                    force_checkpoint: false,
7860                },
7861            )
7862            .expect("export with no checkpoint");
7863
7864        assert!(
7865            manifest.page_count > 0,
7866            "page_count must be populated regardless of checkpoint mode"
7867        );
7868        assert_eq!(
7869            manifest.schema_version,
7870            SchemaManager::new().current_version().0
7871        );
7872        assert_eq!(manifest.protocol_version, 1);
7873    }
7874
7875    #[test]
7876    fn safe_export_force_checkpoint_false_still_captures_wal_backed_changes() {
7877        let (db, service) = setup();
7878        let conn = sqlite::open_connection(db.path()).expect("conn");
7879        let journal_mode: String = conn
7880            .query_row("PRAGMA journal_mode=WAL", [], |row| row.get(0))
7881            .expect("enable wal");
7882        assert_eq!(journal_mode.to_lowercase(), "wal");
7883        let auto_checkpoint_pages: i64 = conn
7884            .query_row("PRAGMA wal_autocheckpoint=0", [], |row| row.get(0))
7885            .expect("disable auto checkpoint");
7886        assert_eq!(auto_checkpoint_pages, 0);
7887        conn.execute(
7888            "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7889             VALUES ('r-wal', 'lg-wal', 'Meeting', '{}', 100, 'src-wal')",
7890            [],
7891        )
7892        .expect("insert wal-backed node");
7893
7894        let export_dir = tempfile::TempDir::new().expect("temp dir");
7895        let export_path = export_dir.path().join("wal-backed.db");
7896        service
7897            .safe_export(
7898                &export_path,
7899                SafeExportOptions {
7900                    force_checkpoint: false,
7901                },
7902            )
7903            .expect("export wal-backed db");
7904
7905        let exported = sqlite::open_connection(&export_path).expect("open exported db");
7906        let exported_count: i64 = exported
7907            .query_row(
7908                "SELECT count(*) FROM nodes WHERE logical_id = 'lg-wal'",
7909                [],
7910                |row| row.get(0),
7911            )
7912            .expect("count exported nodes");
7913        assert_eq!(
7914            exported_count, 1,
7915            "safe_export must include committed rows that are still resident in the WAL"
7916        );
7917    }
7918
7919    #[test]
7920    fn excise_source_removes_searchable_content_after_excision() {
7921        let (db, service) = setup();
7922        {
7923            let conn = sqlite::open_connection(db.path()).expect("conn");
7924            conn.execute(
7925                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
7926                 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
7927                [],
7928            )
7929            .expect("insert v1");
7930            conn.execute(
7931                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7932                 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
7933                [],
7934            )
7935            .expect("insert v2");
7936            conn.execute(
7937                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7938                 VALUES ('ck1', 'lg1', 'hello world', 100)",
7939                [],
7940            )
7941            .expect("insert chunk");
7942        }
7943        service.excise_source("source-2").expect("excise");
7944        {
7945            let conn = sqlite::open_connection(db.path()).expect("conn");
7946            let fts_count: i64 = conn
7947                .query_row(
7948                    "SELECT count(*) FROM fts_nodes WHERE chunk_id = 'ck1'",
7949                    [],
7950                    |row| row.get(0),
7951                )
7952                .expect("fts count");
7953            assert_eq!(
7954                fts_count, 0,
7955                "excised content should not remain searchable after excise"
7956            );
7957        }
7958    }
7959
7960    #[cfg(feature = "sqlite-vec")]
7961    #[test]
7962    fn excise_source_cleans_chunks_and_vec_rows_for_excised_version() {
7963        let (db, service) = setup();
7964        {
7965            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7966            service
7967                .schema_manager
7968                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
7969                .expect("ensure vec profile");
7970            conn.execute(
7971                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
7972                 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
7973                [],
7974            )
7975            .expect("insert v1");
7976            conn.execute(
7977                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7978                 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
7979                [],
7980            )
7981            .expect("insert v2");
7982            conn.execute(
7983                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7984                 VALUES ('ck1', 'lg1', 'new content', 200)",
7985                [],
7986            )
7987            .expect("insert chunk");
7988            conn.execute(
7989                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('ck1', zeroblob(16))",
7990                [],
7991            )
7992            .expect("insert vec row");
7993        }
7994
7995        service.excise_source("source-2").expect("excise");
7996
7997        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7998        let active_row: String = conn
7999            .query_row(
8000                "SELECT row_id FROM nodes WHERE logical_id = 'lg1' AND superseded_at IS NULL",
8001                [],
8002                |row| row.get(0),
8003            )
8004            .expect("restored active row");
8005        assert_eq!(active_row, "r1");
8006        let chunk_count: i64 = conn
8007            .query_row(
8008                "SELECT count(*) FROM chunks WHERE node_logical_id = 'lg1'",
8009                [],
8010                |row| row.get(0),
8011            )
8012            .expect("chunk count");
8013        assert_eq!(
8014            chunk_count, 0,
8015            "excised source content must not survive as chunks"
8016        );
8017        let vec_count: i64 = conn
8018            .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
8019                row.get(0)
8020            })
8021            .expect("vec count");
8022        assert_eq!(vec_count, 0, "excised source vec rows must be removed");
8023        let fts_count: i64 = conn
8024            .query_row(
8025                "SELECT count(*) FROM fts_nodes WHERE node_logical_id = 'lg1'",
8026                [],
8027                |row| row.get(0),
8028            )
8029            .expect("fts count");
8030        assert_eq!(
8031            fts_count, 0,
8032            "excised source content must not remain searchable"
8033        );
8034    }
8035
8036    #[test]
8037    fn export_page_count_matches_exported_file() {
8038        let (_db, service) = setup();
8039        let export_dir = tempfile::TempDir::new().expect("temp dir");
8040        let export_path = export_dir.path().join("page-count.db");
8041
8042        let manifest = service
8043            .safe_export(
8044                &export_path,
8045                SafeExportOptions {
8046                    force_checkpoint: false,
8047                },
8048            )
8049            .expect("export");
8050
8051        let exported = sqlite::open_connection(&export_path).expect("open exported db");
8052        let actual_page_count: u64 = exported
8053            .query_row("PRAGMA page_count", [], |row| row.get(0))
8054            .expect("page_count from exported file");
8055
8056        assert_eq!(
8057            manifest.page_count, actual_page_count,
8058            "manifest page_count must match the exported file's PRAGMA page_count"
8059        );
8060    }
8061
8062    #[test]
8063    fn no_temp_file_after_successful_export() {
8064        let (_db, service) = setup();
8065        let export_dir = tempfile::TempDir::new().expect("temp dir");
8066        let export_path = export_dir.path().join("no-tmp.db");
8067
8068        service
8069            .safe_export(
8070                &export_path,
8071                SafeExportOptions {
8072                    force_checkpoint: false,
8073                },
8074            )
8075            .expect("export");
8076
8077        let tmp_files: Vec<_> = fs::read_dir(export_dir.path())
8078            .expect("read export dir")
8079            .filter_map(Result::ok)
8080            .filter(|e| e.path().extension().is_some_and(|ext| ext == "tmp"))
8081            .collect();
8082
8083        assert!(
8084            tmp_files.is_empty(),
8085            "no .tmp files should remain after a successful export, found: {tmp_files:?}"
8086        );
8087    }
8088
8089    #[test]
8090    fn export_manifest_is_valid_json() {
8091        let (_db, service) = setup();
8092        let export_dir = tempfile::TempDir::new().expect("temp dir");
8093        let export_path = export_dir.path().join("valid-json.db");
8094
8095        service
8096            .safe_export(
8097                &export_path,
8098                SafeExportOptions {
8099                    force_checkpoint: false,
8100                },
8101            )
8102            .expect("export");
8103
8104        let manifest_path = export_dir.path().join("valid-json.db.export-manifest.json");
8105        let manifest_contents = fs::read_to_string(&manifest_path).expect("read manifest");
8106        let parsed: serde_json::Value =
8107            serde_json::from_str(&manifest_contents).expect("manifest must be valid JSON");
8108
8109        assert!(
8110            parsed.get("exported_at").is_some(),
8111            "manifest must contain exported_at"
8112        );
8113        assert!(
8114            parsed.get("sha256").is_some(),
8115            "manifest must contain sha256"
8116        );
8117        assert!(
8118            parsed.get("schema_version").is_some(),
8119            "manifest must contain schema_version"
8120        );
8121        assert!(
8122            parsed.get("protocol_version").is_some(),
8123            "manifest must contain protocol_version"
8124        );
8125        assert!(
8126            parsed.get("page_count").is_some(),
8127            "manifest must contain page_count"
8128        );
8129    }
8130
8131    #[test]
8132    fn provenance_purge_dry_run_reports_counts() {
8133        let (db, service) = setup();
8134        {
8135            let conn = sqlite::open_connection(db.path()).expect("conn");
8136            conn.execute(
8137                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8138                 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
8139                [],
8140            )
8141            .expect("insert p1");
8142            conn.execute(
8143                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8144                 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 200)",
8145                [],
8146            )
8147            .expect("insert p2");
8148            conn.execute(
8149                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8150                 VALUES ('p3', 'excise', 'lg3', 'src-1', 300)",
8151                [],
8152            )
8153            .expect("insert p3");
8154        }
8155
8156        let options = super::ProvenancePurgeOptions {
8157            dry_run: true,
8158            preserve_event_types: Vec::new(),
8159        };
8160        let report = service
8161            .purge_provenance_events(250, &options)
8162            .expect("dry run purge");
8163
8164        assert_eq!(report.events_deleted, 2);
8165        assert_eq!(report.events_preserved, 1);
8166        assert!(report.oldest_remaining.is_some());
8167
8168        let conn = sqlite::open_connection(db.path()).expect("conn");
8169        let total: i64 = conn
8170            .query_row("SELECT count(*) FROM provenance_events", [], |row| {
8171                row.get(0)
8172            })
8173            .expect("count");
8174        assert_eq!(total, 3, "dry_run must not delete any events");
8175    }
8176
8177    #[test]
8178    fn provenance_purge_deletes_old_events() {
8179        let (db, service) = setup();
8180        {
8181            let conn = sqlite::open_connection(db.path()).expect("conn");
8182            conn.execute(
8183                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8184                 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
8185                [],
8186            )
8187            .expect("insert p1");
8188            conn.execute(
8189                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8190                 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 200)",
8191                [],
8192            )
8193            .expect("insert p2");
8194        }
8195
8196        let options = super::ProvenancePurgeOptions {
8197            dry_run: false,
8198            preserve_event_types: Vec::new(),
8199        };
8200        let report = service
8201            .purge_provenance_events(150, &options)
8202            .expect("purge");
8203
8204        assert_eq!(report.events_deleted, 1);
8205        assert_eq!(report.events_preserved, 1);
8206        assert_eq!(report.oldest_remaining, Some(200));
8207
8208        let conn = sqlite::open_connection(db.path()).expect("conn");
8209        let remaining: i64 = conn
8210            .query_row("SELECT count(*) FROM provenance_events", [], |row| {
8211                row.get(0)
8212            })
8213            .expect("count");
8214        assert_eq!(remaining, 1);
8215    }
8216
8217    #[test]
8218    fn provenance_purge_preserves_specified_types() {
8219        let (db, service) = setup();
8220        {
8221            let conn = sqlite::open_connection(db.path()).expect("conn");
8222            conn.execute(
8223                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8224                 VALUES ('p1', 'excise', 'lg1', 'src-1', 100)",
8225                [],
8226            )
8227            .expect("insert p1");
8228            conn.execute(
8229                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8230                 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 100)",
8231                [],
8232            )
8233            .expect("insert p2");
8234            conn.execute(
8235                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8236                 VALUES ('p3', 'node_insert', 'lg3', 'src-1', 100)",
8237                [],
8238            )
8239            .expect("insert p3");
8240        }
8241
8242        let options = super::ProvenancePurgeOptions {
8243            dry_run: false,
8244            preserve_event_types: Vec::new(),
8245        };
8246        let report = service
8247            .purge_provenance_events(500, &options)
8248            .expect("purge");
8249
8250        assert_eq!(report.events_deleted, 2);
8251        assert_eq!(report.events_preserved, 1);
8252
8253        let conn = sqlite::open_connection(db.path()).expect("conn");
8254        let remaining_type: String = conn
8255            .query_row("SELECT event_type FROM provenance_events", [], |row| {
8256                row.get(0)
8257            })
8258            .expect("remaining event type");
8259        assert_eq!(remaining_type, "excise");
8260    }
8261
8262    #[test]
8263    fn provenance_purge_noop_with_zero_timestamp() {
8264        let (db, service) = setup();
8265        {
8266            let conn = sqlite::open_connection(db.path()).expect("conn");
8267            conn.execute(
8268                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8269                 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
8270                [],
8271            )
8272            .expect("insert p1");
8273        }
8274
8275        let options = super::ProvenancePurgeOptions {
8276            dry_run: false,
8277            preserve_event_types: Vec::new(),
8278        };
8279        let report = service.purge_provenance_events(0, &options).expect("purge");
8280
8281        assert_eq!(report.events_deleted, 0);
8282        assert_eq!(report.events_preserved, 1);
8283        assert_eq!(report.oldest_remaining, Some(100));
8284    }
8285
8286    #[test]
8287    fn restore_skips_edge_when_counterpart_purged() {
8288        let (db, service) = setup();
8289        {
8290            let conn = sqlite::open_connection(db.path()).expect("conn");
8291            // Create node A (doc-1) and node B (doc-2)
8292            conn.execute(
8293                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8294                 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
8295                [],
8296            )
8297            .expect("insert node A");
8298            conn.execute(
8299                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8300                 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
8301                [],
8302            )
8303            .expect("insert node B");
8304            // Create edge between A and B
8305            conn.execute(
8306                "INSERT INTO edges \
8307                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
8308                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
8309                [],
8310            )
8311            .expect("insert edge");
8312            // Retire both A and B, and the edge
8313            conn.execute(
8314                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8315                 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
8316                [],
8317            )
8318            .expect("insert retire event A");
8319            conn.execute(
8320                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8321                 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
8322                [],
8323            )
8324            .expect("insert edge retire event");
8325            conn.execute(
8326                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
8327                [],
8328            )
8329            .expect("retire node A");
8330            conn.execute(
8331                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-2'",
8332                [],
8333            )
8334            .expect("retire node B");
8335            conn.execute(
8336                "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
8337                [],
8338            )
8339            .expect("retire edge");
8340            // Simulate purge of B: delete node rows but leave the edge intact
8341            // to reproduce the dangling-edge scenario the validation guards against.
8342            conn.execute("DELETE FROM nodes WHERE logical_id = 'doc-2'", [])
8343                .expect("purge node B rows");
8344        }
8345
8346        // Restore A — the edge should be skipped because B has no active node
8347        let report = service.restore_logical_id("doc-1").expect("restore A");
8348        assert!(!report.was_noop);
8349        assert_eq!(report.restored_node_rows, 1);
8350        assert_eq!(report.restored_edge_rows, 0, "edge should not be restored");
8351        assert_eq!(report.skipped_edges.len(), 1);
8352        assert_eq!(report.skipped_edges[0].edge_logical_id, "edge-1");
8353        assert_eq!(report.skipped_edges[0].missing_endpoint, "doc-2");
8354
8355        // Verify the edge is still retired in the database
8356        let conn = sqlite::open_connection(db.path()).expect("conn");
8357        let active_edge_count: i64 = conn
8358            .query_row(
8359                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
8360                [],
8361                |row| row.get(0),
8362            )
8363            .expect("active edge count");
8364        assert_eq!(active_edge_count, 0, "edge must remain retired");
8365    }
8366
8367    #[test]
8368    fn restore_restores_edges_to_active_nodes() {
8369        let (db, service) = setup();
8370        {
8371            let conn = sqlite::open_connection(db.path()).expect("conn");
8372            // Create node A and node B (B stays active)
8373            conn.execute(
8374                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8375                 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
8376                [],
8377            )
8378            .expect("insert node A");
8379            conn.execute(
8380                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8381                 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
8382                [],
8383            )
8384            .expect("insert node B");
8385            // Create edge between A and B
8386            conn.execute(
8387                "INSERT INTO edges \
8388                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
8389                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
8390                [],
8391            )
8392            .expect("insert edge");
8393            // Retire only A
8394            conn.execute(
8395                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8396                 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
8397                [],
8398            )
8399            .expect("insert retire event A");
8400            conn.execute(
8401                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8402                 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
8403                [],
8404            )
8405            .expect("insert edge retire event");
8406            conn.execute(
8407                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
8408                [],
8409            )
8410            .expect("retire node A");
8411            conn.execute(
8412                "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
8413                [],
8414            )
8415            .expect("retire edge");
8416        }
8417
8418        // Restore A — B is active, so the edge should be restored normally
8419        let report = service.restore_logical_id("doc-1").expect("restore A");
8420        assert!(!report.was_noop);
8421        assert_eq!(report.restored_node_rows, 1);
8422        assert!(report.restored_edge_rows > 0, "edge should be restored");
8423        assert!(
8424            report.skipped_edges.is_empty(),
8425            "no edges should be skipped"
8426        );
8427
8428        let conn = sqlite::open_connection(db.path()).expect("conn");
8429        let active_edge_count: i64 = conn
8430            .query_row(
8431                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
8432                [],
8433                |row| row.get(0),
8434            )
8435            .expect("active edge count");
8436        assert_eq!(active_edge_count, 1, "edge must be active");
8437    }
8438
8439    #[test]
8440    fn restore_restores_edges_when_both_restored() {
8441        let (db, service) = setup();
8442        {
8443            let conn = sqlite::open_connection(db.path()).expect("conn");
8444            // Create node A and node B
8445            conn.execute(
8446                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8447                 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
8448                [],
8449            )
8450            .expect("insert node A");
8451            conn.execute(
8452                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8453                 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
8454                [],
8455            )
8456            .expect("insert node B");
8457            // Create edge between A and B
8458            conn.execute(
8459                "INSERT INTO edges \
8460                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
8461                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
8462                [],
8463            )
8464            .expect("insert edge");
8465            // Retire both A and B
8466            conn.execute(
8467                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8468                 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
8469                [],
8470            )
8471            .expect("insert retire event A");
8472            conn.execute(
8473                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8474                 VALUES ('evt-retire-b', 'node_retire', 'doc-2', 'forget-1', 200, '')",
8475                [],
8476            )
8477            .expect("insert retire event B");
8478            conn.execute(
8479                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8480                 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
8481                [],
8482            )
8483            .expect("insert edge retire event");
8484            conn.execute(
8485                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
8486                [],
8487            )
8488            .expect("retire node A");
8489            conn.execute(
8490                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-2'",
8491                [],
8492            )
8493            .expect("retire node B");
8494            conn.execute(
8495                "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
8496                [],
8497            )
8498            .expect("retire edge");
8499        }
8500
8501        // Restore B first — edge is skipped because A is still retired
8502        let report_b = service.restore_logical_id("doc-2").expect("restore B");
8503        assert!(!report_b.was_noop);
8504
8505        // Restore A — B is now active, so the edge should be restored
8506        let report_a = service.restore_logical_id("doc-1").expect("restore A");
8507        assert!(!report_a.was_noop);
8508        assert_eq!(report_a.restored_node_rows, 1);
8509        assert!(
8510            report_a.restored_edge_rows > 0,
8511            "edge should be restored when both endpoints active"
8512        );
8513        assert!(
8514            report_a.skipped_edges.is_empty(),
8515            "no edges should be skipped"
8516        );
8517
8518        let conn = sqlite::open_connection(db.path()).expect("conn");
8519        let active_edge_count: i64 = conn
8520            .query_row(
8521                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
8522                [],
8523                |row| row.get(0),
8524            )
8525            .expect("active edge count");
8526        assert_eq!(
8527            active_edge_count, 1,
8528            "edge must be active after both endpoints restored"
8529        );
8530    }
8531
8532    // ── FTS property schema end-to-end tests ──────────────────────────
8533
8534    #[test]
8535    fn fts_property_schema_crud_round_trip() {
8536        let (_db, service) = setup();
8537
8538        // Register
8539        let record = service
8540            .register_fts_property_schema(
8541                "Meeting",
8542                &["$.title".to_owned(), "$.summary".to_owned()],
8543                None,
8544            )
8545            .expect("register");
8546        assert_eq!(record.kind, "Meeting");
8547        assert_eq!(record.property_paths, vec!["$.title", "$.summary"]);
8548        assert_eq!(record.separator, " ");
8549        assert_eq!(record.format_version, 1);
8550
8551        // Describe
8552        let described = service
8553            .describe_fts_property_schema("Meeting")
8554            .expect("describe")
8555            .expect("should exist");
8556        assert_eq!(described, record);
8557
8558        // Describe missing kind
8559        let missing = service
8560            .describe_fts_property_schema("NoSuchKind")
8561            .expect("describe missing");
8562        assert!(missing.is_none());
8563
8564        // List
8565        let list = service.list_fts_property_schemas().expect("list");
8566        assert_eq!(list.len(), 1);
8567        assert_eq!(list[0].kind, "Meeting");
8568
8569        // Update (idempotent upsert)
8570        let updated = service
8571            .register_fts_property_schema(
8572                "Meeting",
8573                &["$.title".to_owned(), "$.notes".to_owned()],
8574                Some("\n"),
8575            )
8576            .expect("update");
8577        assert_eq!(updated.property_paths, vec!["$.title", "$.notes"]);
8578        assert_eq!(updated.separator, "\n");
8579
8580        // Remove
8581        service
8582            .remove_fts_property_schema("Meeting")
8583            .expect("remove");
8584        let after_remove = service
8585            .describe_fts_property_schema("Meeting")
8586            .expect("describe after remove");
8587        assert!(after_remove.is_none());
8588
8589        // Remove non-existent is an error
8590        let err = service.remove_fts_property_schema("Meeting");
8591        assert!(err.is_err());
8592    }
8593
8594    #[test]
8595    fn describe_fts_property_schema_round_trips_recursive_entries() {
8596        let (_db, service) = setup();
8597
8598        let entries = vec![
8599            FtsPropertyPathSpec::scalar("$.title"),
8600            FtsPropertyPathSpec::recursive("$.payload"),
8601        ];
8602        let exclude = vec!["$.payload.private".to_owned()];
8603        let registered = service
8604            .register_fts_property_schema_with_entries(
8605                "KnowledgeItem",
8606                &entries,
8607                Some(" "),
8608                &exclude,
8609                crate::rebuild_actor::RebuildMode::Eager,
8610            )
8611            .expect("register recursive");
8612
8613        // The register entry point now echoes back the fully-populated
8614        // record via the same load helper used by describe/list.
8615        assert_eq!(registered.entries, entries);
8616        assert_eq!(registered.exclude_paths, exclude);
8617        assert_eq!(registered.property_paths, vec!["$.title", "$.payload"]);
8618
8619        let described = service
8620            .describe_fts_property_schema("KnowledgeItem")
8621            .expect("describe")
8622            .expect("should exist");
8623        assert_eq!(described.kind, "KnowledgeItem");
8624        assert_eq!(described.entries, entries);
8625        assert_eq!(described.exclude_paths, exclude);
8626        assert_eq!(described.property_paths, vec!["$.title", "$.payload"]);
8627        assert_eq!(described.separator, " ");
8628        assert_eq!(described.format_version, 1);
8629    }
8630
8631    #[test]
8632    fn list_fts_property_schemas_round_trips_recursive_entries() {
8633        let (_db, service) = setup();
8634
8635        let entries = vec![
8636            FtsPropertyPathSpec::scalar("$.title"),
8637            FtsPropertyPathSpec::recursive("$.payload"),
8638        ];
8639        let exclude = vec!["$.payload.secret".to_owned()];
8640        service
8641            .register_fts_property_schema_with_entries(
8642                "KnowledgeItem",
8643                &entries,
8644                Some(" "),
8645                &exclude,
8646                crate::rebuild_actor::RebuildMode::Eager,
8647            )
8648            .expect("register recursive");
8649
8650        let listed = service.list_fts_property_schemas().expect("list");
8651        assert_eq!(listed.len(), 1);
8652        let record = &listed[0];
8653        assert_eq!(record.kind, "KnowledgeItem");
8654        assert_eq!(record.entries, entries);
8655        assert_eq!(record.exclude_paths, exclude);
8656        assert_eq!(record.property_paths, vec!["$.title", "$.payload"]);
8657    }
8658
8659    #[test]
8660    fn describe_fts_property_schema_round_trips_scalar_only_entries() {
8661        let (_db, service) = setup();
8662
8663        service
8664            .register_fts_property_schema(
8665                "Meeting",
8666                &["$.title".to_owned(), "$.summary".to_owned()],
8667                None,
8668            )
8669            .expect("register scalar");
8670
8671        let described = service
8672            .describe_fts_property_schema("Meeting")
8673            .expect("describe")
8674            .expect("should exist");
8675        assert_eq!(described.property_paths, vec!["$.title", "$.summary"]);
8676        assert_eq!(described.entries.len(), 2);
8677        for entry in &described.entries {
8678            assert_eq!(
8679                entry.mode,
8680                FtsPropertyPathMode::Scalar,
8681                "scalar-only schema should deserialize every entry as Scalar"
8682            );
8683        }
8684        assert!(described.exclude_paths.is_empty());
8685    }
8686
8687    #[test]
8688    fn restore_reestablishes_property_fts_visibility() {
8689        let (db, service) = setup();
8690        {
8691            let conn = sqlite::open_connection(db.path()).expect("conn");
8692            // Register a property schema for Document kind.
8693            conn.execute(
8694                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8695                 VALUES ('Document', '[\"$.title\", \"$.body\"]', ' ')",
8696                [],
8697            )
8698            .expect("register schema");
8699            // Insert an active node with extractable properties.
8700            conn.execute(
8701                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8702                 VALUES ('row-1', 'doc-1', 'Document', '{\"title\":\"Budget\",\"body\":\"Q3 forecast\"}', 100, 'seed')",
8703                [],
8704            )
8705            .expect("insert node");
8706            // Insert a chunk so restore has something to work with for FTS.
8707            conn.execute(
8708                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8709                 VALUES ('chunk-1', 'doc-1', 'budget text', 100)",
8710                [],
8711            )
8712            .expect("insert chunk");
8713            // Insert property FTS row (as write path would).
8714            conn.execute(
8715                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8716                 VALUES ('doc-1', 'Document', 'Budget Q3 forecast')",
8717                [],
8718            )
8719            .expect("insert property fts");
8720            // Simulate retire: supersede node, clear FTS.
8721            conn.execute(
8722                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8723                 VALUES ('evt-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
8724                [],
8725            )
8726            .expect("retire event");
8727            conn.execute(
8728                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
8729                [],
8730            )
8731            .expect("supersede");
8732            conn.execute("DELETE FROM fts_nodes", [])
8733                .expect("clear chunk fts");
8734            conn.execute("DELETE FROM fts_node_properties", [])
8735                .expect("clear property fts");
8736        }
8737
8738        let report = service.restore_logical_id("doc-1").expect("restore");
8739        assert_eq!(report.restored_property_fts_rows, 1);
8740
8741        // Verify the property FTS row was recreated.
8742        let conn = sqlite::open_connection(db.path()).expect("conn");
8743        let prop_fts_count: i64 = conn
8744            .query_row(
8745                "SELECT count(*) FROM fts_node_properties WHERE node_logical_id = 'doc-1'",
8746                [],
8747                |row| row.get(0),
8748            )
8749            .expect("prop fts count");
8750        assert_eq!(prop_fts_count, 1, "property FTS must be restored");
8751
8752        let text: String = conn
8753            .query_row(
8754                "SELECT text_content FROM fts_node_properties WHERE node_logical_id = 'doc-1'",
8755                [],
8756                |row| row.get(0),
8757            )
8758            .expect("prop fts text");
8759        assert_eq!(text, "Budget Q3 forecast");
8760    }
8761
8762    #[test]
8763    fn safe_export_preserves_fts_property_schemas() {
8764        let (_db, service) = setup();
8765        service
8766            .register_fts_property_schema(
8767                "Goal",
8768                &["$.name".to_owned(), "$.rationale".to_owned()],
8769                None,
8770            )
8771            .expect("register schema");
8772
8773        let export_dir = tempfile::TempDir::new().expect("temp dir");
8774        let export_path = export_dir.path().join("backup.db");
8775        service
8776            .safe_export(
8777                &export_path,
8778                SafeExportOptions {
8779                    force_checkpoint: false,
8780                },
8781            )
8782            .expect("export");
8783
8784        // Open the exported DB and verify the schema survived.
8785        let exported_conn = rusqlite::Connection::open(&export_path).expect("open exported db");
8786        let kind: String = exported_conn
8787            .query_row(
8788                "SELECT kind FROM fts_property_schemas WHERE kind = 'Goal'",
8789                [],
8790                |row| row.get(0),
8791            )
8792            .expect("schema must exist in export");
8793        assert_eq!(kind, "Goal");
8794        let paths_json: String = exported_conn
8795            .query_row(
8796                "SELECT property_paths_json FROM fts_property_schemas WHERE kind = 'Goal'",
8797                [],
8798                |row| row.get(0),
8799            )
8800            .expect("paths must exist");
8801        let paths: Vec<String> = serde_json::from_str(&paths_json).expect("valid json");
8802        assert_eq!(paths, vec!["$.name", "$.rationale"]);
8803    }
8804
8805    #[test]
8806    #[allow(clippy::too_many_lines)]
8807    fn export_recovery_rebuilds_property_fts_from_canonical_state() {
8808        let (db, service) = setup();
8809        // Register a schema and insert two nodes with extractable properties.
8810        service
8811            .register_fts_property_schema("Goal", &["$.name".to_owned()], None)
8812            .expect("register");
8813        {
8814            let conn = sqlite::open_connection(db.path()).expect("conn");
8815            conn.execute(
8816                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8817                 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
8818                [],
8819            )
8820            .expect("insert node 1");
8821            conn.execute(
8822                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8823                 VALUES ('goal-1', 'Goal', 'Ship v2')",
8824                [],
8825            )
8826            .expect("insert property FTS row 1");
8827            conn.execute(
8828                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8829                 VALUES ('row-2', 'goal-2', 'Goal', '{\"name\":\"Launch redesign\"}', 100, 'seed')",
8830                [],
8831            )
8832            .expect("insert node 2");
8833            conn.execute(
8834                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8835                 VALUES ('goal-2', 'Goal', 'Launch redesign')",
8836                [],
8837            )
8838            .expect("insert property FTS row 2");
8839        }
8840
8841        // Export.
8842        let export_dir = tempfile::TempDir::new().expect("temp dir");
8843        let export_path = export_dir.path().join("backup.db");
8844        service
8845            .safe_export(
8846                &export_path,
8847                SafeExportOptions {
8848                    force_checkpoint: false,
8849                },
8850            )
8851            .expect("export");
8852
8853        // Corrupt the derived rows: replace correct text with wrong text for
8854        // goal-1, and delete the row for goal-2 entirely. This exercises both
8855        // corrupted-but-present rows and missing rows in the same recovery.
8856        {
8857            let conn = rusqlite::Connection::open(&export_path).expect("open export");
8858            conn.execute(
8859                "DELETE FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
8860                [],
8861            )
8862            .expect("delete old row");
8863            conn.execute(
8864                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8865                 VALUES ('goal-1', 'Goal', 'completely wrong stale text')",
8866                [],
8867            )
8868            .expect("insert corrupted row");
8869            conn.execute(
8870                "DELETE FROM fts_node_properties WHERE node_logical_id = 'goal-2'",
8871                [],
8872            )
8873            .expect("delete goal-2 row");
8874        }
8875
8876        // Open the exported DB and rebuild projections from canonical state.
8877        let schema = Arc::new(SchemaManager::new());
8878        let exported_service = AdminService::new(&export_path, Arc::clone(&schema));
8879        exported_service
8880            .rebuild_projections(ProjectionTarget::Fts)
8881            .expect("rebuild");
8882
8883        // Verify text_search(...) returns the correct result for goal-1's
8884        // canonical property ("Ship") — not the corrupted text.
8885        let coordinator = ExecutionCoordinator::open(
8886            &export_path,
8887            Arc::clone(&schema),
8888            None,
8889            1,
8890            Arc::new(TelemetryCounters::default()),
8891            None,
8892        )
8893        .expect("coordinator");
8894
8895        let compiled = QueryBuilder::nodes("Goal")
8896            .text_search("Ship", 10)
8897            .limit(10)
8898            .compile()
8899            .expect("compile");
8900        let rows = coordinator
8901            .execute_compiled_read(&compiled)
8902            .expect("execute read");
8903        assert_eq!(rows.nodes.len(), 1);
8904        assert_eq!(rows.nodes[0].logical_id, "goal-1");
8905
8906        // Verify text_search(...) recovers the previously missing goal-2 row.
8907        let compiled2 = QueryBuilder::nodes("Goal")
8908            .text_search("redesign", 10)
8909            .limit(10)
8910            .compile()
8911            .expect("compile");
8912        let rows2 = coordinator
8913            .execute_compiled_read(&compiled2)
8914            .expect("execute read");
8915        assert_eq!(rows2.nodes.len(), 1);
8916        assert_eq!(rows2.nodes[0].logical_id, "goal-2");
8917
8918        // The corrupted text must not be searchable after recovery.
8919        let compiled3 = QueryBuilder::nodes("Goal")
8920            .text_search("stale", 10)
8921            .limit(10)
8922            .compile()
8923            .expect("compile");
8924        let rows3 = coordinator
8925            .execute_compiled_read(&compiled3)
8926            .expect("execute read");
8927        assert_eq!(
8928            rows3.nodes.len(),
8929            0,
8930            "corrupted text must not appear in search after rebuild"
8931        );
8932
8933        // Verify integrity and semantics are clean after recovery.
8934        let integrity = exported_service.check_integrity().expect("integrity");
8935        assert_eq!(integrity.missing_property_fts_rows, 0);
8936        let semantics = exported_service.check_semantics().expect("semantics");
8937        assert_eq!(semantics.drifted_property_fts_rows, 0);
8938        assert_eq!(semantics.orphaned_property_fts_rows, 0);
8939        assert_eq!(semantics.duplicate_property_fts_rows, 0);
8940    }
8941
8942    #[test]
8943    fn check_integrity_no_false_positives_for_empty_extraction() {
8944        let (db, service) = setup();
8945        {
8946            let conn = sqlite::open_connection(db.path()).expect("conn");
8947            // Register a schema that looks for $.searchable
8948            conn.execute(
8949                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8950                 VALUES ('Ticket', '[\"$.searchable\"]', ' ')",
8951                [],
8952            )
8953            .expect("register schema");
8954            // Insert a node whose properties do NOT contain $.searchable —
8955            // correctly has no property FTS row.
8956            conn.execute(
8957                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8958                 VALUES ('row-1', 'ticket-1', 'Ticket', '{\"status\":\"open\"}', 100, 'seed')",
8959                [],
8960            )
8961            .expect("insert node");
8962        }
8963
8964        let report = service.check_integrity().expect("integrity");
8965        assert_eq!(
8966            report.missing_property_fts_rows, 0,
8967            "node with no extractable values must not be counted as missing"
8968        );
8969    }
8970
8971    #[test]
8972    fn check_integrity_detects_genuinely_missing_property_fts_rows() {
8973        let (db, service) = setup();
8974        {
8975            let conn = sqlite::open_connection(db.path()).expect("conn");
8976            conn.execute(
8977                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8978                 VALUES ('Ticket', '[\"$.title\"]', ' ')",
8979                [],
8980            )
8981            .expect("register schema");
8982            // Insert a node WITH an extractable $.title but no property FTS row.
8983            conn.execute(
8984                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8985                 VALUES ('row-1', 'ticket-1', 'Ticket', '{\"title\":\"fix login bug\"}', 100, 'seed')",
8986                [],
8987            )
8988            .expect("insert node");
8989        }
8990
8991        let report = service.check_integrity().expect("integrity");
8992        assert_eq!(
8993            report.missing_property_fts_rows, 1,
8994            "node with extractable values but no property FTS row must be detected"
8995        );
8996    }
8997
8998    #[test]
8999    fn rebuild_projections_fts_restores_missing_property_fts_rows() {
9000        let (db, service) = setup();
9001        {
9002            let conn = sqlite::open_connection(db.path()).expect("conn");
9003            conn.execute(
9004                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9005                 VALUES ('Goal', '[\"$.name\"]', ' ')",
9006                [],
9007            )
9008            .expect("register schema");
9009            conn.execute(
9010                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9011                 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9012                [],
9013            )
9014            .expect("insert node");
9015            // Deliberately do NOT insert a property FTS row.
9016        }
9017
9018        let report = service
9019            .rebuild_projections(ProjectionTarget::Fts)
9020            .expect("rebuild");
9021        assert!(
9022            report.rebuilt_rows >= 1,
9023            "rebuild must insert at least one property FTS row"
9024        );
9025
9026        let conn = sqlite::open_connection(db.path()).expect("conn");
9027        let text: String = conn
9028            .query_row(
9029                "SELECT text_content FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
9030                [],
9031                |row| row.get(0),
9032            )
9033            .expect("property FTS row must exist after rebuild");
9034        assert_eq!(text, "Ship v2");
9035    }
9036
9037    #[test]
9038    fn rebuild_missing_projections_fills_gap_for_deleted_property_fts_row() {
9039        let (db, service) = setup();
9040        {
9041            let conn = sqlite::open_connection(db.path()).expect("conn");
9042            conn.execute(
9043                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9044                 VALUES ('Goal', '[\"$.name\"]', ' ')",
9045                [],
9046            )
9047            .expect("register schema");
9048            conn.execute(
9049                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9050                 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9051                [],
9052            )
9053            .expect("insert node");
9054            // Insert and then delete the property FTS row to simulate corruption.
9055            conn.execute(
9056                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
9057                 VALUES ('goal-1', 'Goal', 'Ship v2')",
9058                [],
9059            )
9060            .expect("insert property fts");
9061            conn.execute(
9062                "DELETE FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
9063                [],
9064            )
9065            .expect("delete property fts");
9066        }
9067
9068        let report = service
9069            .rebuild_missing_projections()
9070            .expect("rebuild missing");
9071        assert!(
9072            report.rebuilt_rows >= 1,
9073            "missing rebuild must insert the gap-fill row"
9074        );
9075
9076        let conn = sqlite::open_connection(db.path()).expect("conn");
9077        let count: i64 = conn
9078            .query_row(
9079                "SELECT count(*) FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
9080                [],
9081                |row| row.get(0),
9082            )
9083            .expect("count");
9084        assert_eq!(
9085            count, 1,
9086            "gap-fill must restore exactly one property FTS row"
9087        );
9088    }
9089
9090    #[test]
9091    fn remove_schema_then_rebuild_cleans_stale_property_fts_rows() {
9092        let (db, service) = setup();
9093        service
9094            .register_fts_property_schema("Goal", &["$.name".to_owned()], None)
9095            .expect("register");
9096        {
9097            let conn = sqlite::open_connection(db.path()).expect("conn");
9098            conn.execute(
9099                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9100                 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9101                [],
9102            )
9103            .expect("insert node");
9104            // Manually insert a property FTS row (simulating the write path).
9105            conn.execute(
9106                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
9107                 VALUES ('goal-1', 'Goal', 'Ship v2')",
9108                [],
9109            )
9110            .expect("insert property fts");
9111        }
9112
9113        // Remove the schema — stale rows now exist.
9114        service.remove_fts_property_schema("Goal").expect("remove");
9115
9116        // Verify stale rows are detected.
9117        let semantics = service.check_semantics().expect("semantics");
9118        assert_eq!(
9119            semantics.orphaned_property_fts_rows, 1,
9120            "stale property FTS rows must be detected after schema removal"
9121        );
9122
9123        // Full rebuild should clean them.
9124        service
9125            .rebuild_projections(ProjectionTarget::Fts)
9126            .expect("rebuild");
9127
9128        let conn = sqlite::open_connection(db.path()).expect("conn");
9129        let count: i64 = conn
9130            .query_row(
9131                "SELECT count(*) FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
9132                [],
9133                |row| row.get(0),
9134            )
9135            .expect("count");
9136        assert_eq!(
9137            count, 0,
9138            "rebuild after schema removal must delete stale property FTS rows"
9139        );
9140    }
9141
9142    mod validate_fts_property_paths_tests {
9143        use super::super::validate_fts_property_paths;
9144
9145        #[test]
9146        fn valid_simple_path() {
9147            assert!(validate_fts_property_paths(&["$.name".to_owned()]).is_ok());
9148        }
9149
9150        #[test]
9151        fn valid_nested_path() {
9152            assert!(validate_fts_property_paths(&["$.address.city".to_owned()]).is_ok());
9153        }
9154
9155        #[test]
9156        fn valid_underscore_segment() {
9157            assert!(validate_fts_property_paths(&["$.a_b".to_owned()]).is_ok());
9158        }
9159
9160        #[test]
9161        fn rejects_bare_prefix() {
9162            let result = validate_fts_property_paths(&["$.".to_owned()]);
9163            assert!(result.is_err(), "path '$.' must be rejected");
9164        }
9165
9166        #[test]
9167        fn rejects_double_dot() {
9168            let result = validate_fts_property_paths(&["$..x".to_owned()]);
9169            assert!(result.is_err(), "path '$..x' must be rejected");
9170        }
9171
9172        #[test]
9173        fn rejects_trailing_dot() {
9174            let result = validate_fts_property_paths(&["$.foo.".to_owned()]);
9175            assert!(result.is_err(), "path '$.foo.' must be rejected");
9176        }
9177
9178        #[test]
9179        fn rejects_space_in_segment() {
9180            let result = validate_fts_property_paths(&["$.foo bar".to_owned()]);
9181            assert!(result.is_err(), "path '$.foo bar' must be rejected");
9182        }
9183
9184        #[test]
9185        fn rejects_bracket_syntax() {
9186            let result = validate_fts_property_paths(&["$.foo[0]".to_owned()]);
9187            assert!(result.is_err(), "path '$.foo[0]' must be rejected");
9188        }
9189
9190        #[test]
9191        fn rejects_duplicates() {
9192            let result = validate_fts_property_paths(&["$.name".to_owned(), "$.name".to_owned()]);
9193            assert!(result.is_err(), "duplicate paths must be rejected");
9194        }
9195
9196        #[test]
9197        fn rejects_empty_list() {
9198            let result = validate_fts_property_paths(&[]);
9199            assert!(result.is_err(), "empty path list must be rejected");
9200        }
9201    }
9202}