Skip to main content

fathomdb_engine/
admin.rs

1use std::fmt::Write as _;
2use std::fs;
3use std::io;
4use std::path::{Path, PathBuf};
5use std::sync::Arc;
6use std::time::SystemTime;
7
8use fathomdb_schema::{SchemaError, SchemaManager};
9use rusqlite::{DatabaseName, OptionalExtension, TransactionBehavior};
10use serde::{Deserialize, Serialize};
11use sha2::{Digest, Sha256};
12
13use crate::{
14    EngineError, ProjectionRepairReport, ProjectionService,
15    embedder::{QueryEmbedder, QueryEmbedderIdentity},
16    ids::new_id,
17    operational::{
18        OperationalCollectionKind, OperationalCollectionRecord, OperationalCompactionReport,
19        OperationalCurrentRow, OperationalFilterClause, OperationalFilterField,
20        OperationalFilterFieldType, OperationalFilterMode, OperationalFilterValue,
21        OperationalHistoryValidationIssue, OperationalHistoryValidationReport,
22        OperationalMutationRow, OperationalPurgeReport, OperationalReadReport,
23        OperationalReadRequest, OperationalRegisterRequest, OperationalRepairReport,
24        OperationalRetentionActionKind, OperationalRetentionPlanItem,
25        OperationalRetentionPlanReport, OperationalRetentionRunItem, OperationalRetentionRunReport,
26        OperationalSecondaryIndexDefinition, OperationalSecondaryIndexRebuildReport,
27        OperationalTraceReport, extract_secondary_index_entries_for_current,
28        extract_secondary_index_entries_for_mutation, parse_operational_secondary_indexes_json,
29        parse_operational_validation_contract, validate_operational_payload_against_contract,
30    },
31    projection::ProjectionTarget,
32    sqlite,
33};
34
35/// Results of a physical and structural integrity check on the database.
36#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
37pub struct IntegrityReport {
38    pub physical_ok: bool,
39    pub foreign_keys_ok: bool,
40    pub missing_fts_rows: usize,
41    pub missing_property_fts_rows: usize,
42    pub duplicate_active_logical_ids: usize,
43    pub operational_missing_collections: usize,
44    pub operational_missing_last_mutations: usize,
45    pub warnings: Vec<String>,
46}
47
48/// A registered FTS property projection schema for a node kind.
49#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
50pub struct FtsPropertySchemaRecord {
51    /// The node kind this schema applies to.
52    pub kind: String,
53    /// Flat display list of registered JSON property paths
54    /// (e.g. `["$.name", "$.title"]`). For recursive entries this lists
55    /// only the root path; mode information is carried by
56    /// [`Self::entries`].
57    pub property_paths: Vec<String>,
58    /// Full per-entry schema shape with mode
59    /// ([`FtsPropertyPathMode::Scalar`] | [`FtsPropertyPathMode::Recursive`]).
60    /// Read this field for mode-accurate round-trip of the registered
61    /// schema.
62    pub entries: Vec<FtsPropertyPathSpec>,
63    /// Subtree paths excluded from recursive walks. Empty for
64    /// scalar-only schemas or recursive schemas with no exclusions.
65    pub exclude_paths: Vec<String>,
66    /// Separator used when concatenating extracted values.
67    pub separator: String,
68    /// Schema format version.
69    pub format_version: i64,
70}
71
72/// Extraction mode for a single registered FTS property path.
73#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize)]
74#[serde(rename_all = "snake_case")]
75pub enum FtsPropertyPathMode {
76    /// Resolve the path and append the scalar value(s). Matches legacy
77    /// pre-Phase-4 behaviour.
78    #[default]
79    Scalar,
80    /// Recursively walk every scalar leaf rooted at the path. Each leaf
81    /// contributes one entry to the position map.
82    Recursive,
83}
84
85/// A single registered property-FTS path with its extraction mode.
86#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
87pub struct FtsPropertyPathSpec {
88    /// JSON path to the property (must start with `$.`).
89    pub path: String,
90    /// Whether to treat this path as a scalar or recursively walk it.
91    pub mode: FtsPropertyPathMode,
92}
93
94impl FtsPropertyPathSpec {
95    #[must_use]
96    pub fn scalar(path: impl Into<String>) -> Self {
97        Self {
98            path: path.into(),
99            mode: FtsPropertyPathMode::Scalar,
100        }
101    }
102
103    #[must_use]
104    pub fn recursive(path: impl Into<String>) -> Self {
105        Self {
106            path: path.into(),
107            mode: FtsPropertyPathMode::Recursive,
108        }
109    }
110}
111
112/// Options controlling how a safe database export is performed.
113#[derive(Clone, Copy, Debug)]
114pub struct SafeExportOptions {
115    /// When true, runs `PRAGMA wal_checkpoint(FULL)` before copying and fails if
116    /// any WAL frames could not be applied (busy != 0). Set to false only in
117    /// tests that seed a database without WAL mode.
118    pub force_checkpoint: bool,
119}
120
121impl Default for SafeExportOptions {
122    fn default() -> Self {
123        Self {
124            force_checkpoint: true,
125        }
126    }
127}
128
129// Must match PROTOCOL_VERSION in fathomdb-admin-bridge.rs
130const EXPORT_PROTOCOL_VERSION: u32 = 1;
131
132/// Manifest describing a completed safe export.
133#[derive(Clone, Debug, Serialize)]
134pub struct SafeExportManifest {
135    /// Unix timestamp (seconds since epoch) when the export was created.
136    pub exported_at: u64,
137    /// SHA-256 hex digest of the exported database file.
138    pub sha256: String,
139    /// Schema version recorded in `fathom_schema_migrations` at export time.
140    pub schema_version: u32,
141    /// Bridge protocol version compiled into this binary.
142    pub protocol_version: u32,
143    /// Number of `SQLite` pages in the exported database file.
144    pub page_count: u64,
145}
146
147/// Report from tracing all rows associated with a given `source_ref`.
148#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
149pub struct TraceReport {
150    pub source_ref: String,
151    pub node_rows: usize,
152    pub edge_rows: usize,
153    pub action_rows: usize,
154    pub operational_mutation_rows: usize,
155    pub node_logical_ids: Vec<String>,
156    pub action_ids: Vec<String>,
157    pub operational_mutation_ids: Vec<String>,
158}
159
160/// An edge that was skipped during a restore because an endpoint is missing.
161#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
162pub struct SkippedEdge {
163    pub edge_logical_id: String,
164    pub missing_endpoint: String,
165}
166
167/// Report from restoring a retired logical ID back to active state.
168#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
169pub struct LogicalRestoreReport {
170    pub logical_id: String,
171    pub was_noop: bool,
172    pub restored_node_rows: usize,
173    pub restored_edge_rows: usize,
174    pub restored_chunk_rows: usize,
175    pub restored_fts_rows: usize,
176    pub restored_property_fts_rows: usize,
177    pub restored_vec_rows: usize,
178    pub skipped_edges: Vec<SkippedEdge>,
179    pub notes: Vec<String>,
180}
181
182/// Report from permanently purging all rows for a logical ID.
183#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
184pub struct LogicalPurgeReport {
185    pub logical_id: String,
186    pub was_noop: bool,
187    pub deleted_node_rows: usize,
188    pub deleted_edge_rows: usize,
189    pub deleted_chunk_rows: usize,
190    pub deleted_fts_rows: usize,
191    pub deleted_vec_rows: usize,
192    pub notes: Vec<String>,
193}
194
195/// Options controlling provenance event purging behavior.
196#[derive(Clone, Debug, Serialize, Deserialize)]
197pub struct ProvenancePurgeOptions {
198    pub dry_run: bool,
199    #[serde(default)]
200    pub preserve_event_types: Vec<String>,
201}
202
203/// Report from a provenance event purge operation.
204#[derive(Clone, Debug, Serialize)]
205pub struct ProvenancePurgeReport {
206    pub events_deleted: u64,
207    pub events_preserved: u64,
208    pub oldest_remaining: Option<i64>,
209}
210
211/// Service providing administrative operations (integrity checks, exports, restores, purges).
212#[derive(Debug)]
213pub struct AdminService {
214    database_path: PathBuf,
215    schema_manager: Arc<SchemaManager>,
216    projections: ProjectionService,
217}
218
219/// Results of a semantic consistency check on the graph data.
220#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
221pub struct SemanticReport {
222    /// Chunks whose `node_logical_id` has no active node.
223    pub orphaned_chunks: usize,
224    /// Active nodes with a NULL `source_ref` (loss of provenance).
225    pub null_source_ref_nodes: usize,
226    /// Steps referencing a `run_id` that does not exist in the runs table.
227    pub broken_step_fk: usize,
228    /// Actions referencing a `step_id` that does not exist in the steps table.
229    pub broken_action_fk: usize,
230    /// FTS rows whose `chunk_id` does not exist in the chunks table.
231    pub stale_fts_rows: usize,
232    /// FTS rows whose node has been superseded (`superseded_at` IS NOT NULL on all active rows).
233    pub fts_rows_for_superseded_nodes: usize,
234    /// Property FTS rows whose node has been superseded or does not exist.
235    pub stale_property_fts_rows: usize,
236    /// Property FTS rows whose kind has no registered FTS property schema.
237    pub orphaned_property_fts_rows: usize,
238    /// Property FTS rows whose `kind` does not match the active node's actual kind.
239    pub mismatched_kind_property_fts_rows: usize,
240    /// Active logical IDs with more than one `fts_node_properties` row.
241    pub duplicate_property_fts_rows: usize,
242    /// Property FTS rows whose `text_content` no longer matches the canonical extraction.
243    pub drifted_property_fts_rows: usize,
244    /// Active edges where at least one endpoint has no active node.
245    pub dangling_edges: usize,
246    /// `logical_ids` where every version has been superseded (no active row).
247    pub orphaned_supersession_chains: usize,
248    /// Vec rows whose backing chunk no longer exists in the chunks table.
249    pub stale_vec_rows: usize,
250    /// Compatibility counter for vec rows whose chunk points at missing node history.
251    pub vec_rows_for_superseded_nodes: usize,
252    /// Latest-state keys whose latest mutation is a `put` but no current row exists.
253    pub missing_operational_current_rows: usize,
254    /// Current rows that do not match the latest mutation state.
255    pub stale_operational_current_rows: usize,
256    /// Mutations written after the owning collection was disabled.
257    pub disabled_collection_mutations: usize,
258    /// Access metadata rows whose `logical_id` no longer has any node history.
259    pub orphaned_last_access_metadata_rows: usize,
260    pub warnings: Vec<String>,
261}
262
263/// Configuration for regenerating vector embeddings.
264///
265/// 0.4.0 architectural invariant: vector identity is the embedder's
266/// responsibility, not the regeneration config's. This struct carries only
267/// WHERE the vectors live and HOW to chunk/preprocess them — never WHAT
268/// model produced them. The embedder supplied at regen-call time is the
269/// single source of truth for `model_identity`, `model_version`,
270/// `dimension`, and `normalization_policy`; the resulting vector profile
271/// is stamped directly from [`QueryEmbedder::identity`].
272#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
273#[serde(rename_all = "snake_case", deny_unknown_fields)]
274pub struct VectorRegenerationConfig {
275    pub profile: String,
276    pub table_name: String,
277    pub chunking_policy: String,
278    pub preprocessing_policy: String,
279}
280
281/// Report from a vector embedding regeneration run.
282#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
283pub struct VectorRegenerationReport {
284    pub profile: String,
285    pub table_name: String,
286    pub dimension: usize,
287    pub total_chunks: usize,
288    pub regenerated_rows: usize,
289    pub contract_persisted: bool,
290    pub notes: Vec<String>,
291}
292
293const CURRENT_VECTOR_CONTRACT_FORMAT_VERSION: i64 = 1;
294const MAX_PROFILE_LEN: usize = 128;
295const MAX_POLICY_LEN: usize = 128;
296const MAX_CONTRACT_JSON_BYTES: usize = 32 * 1024;
297const MAX_AUDIT_METADATA_BYTES: usize = 2048;
298const DEFAULT_OPERATIONAL_READ_LIMIT: usize = 100;
299const MAX_OPERATIONAL_READ_LIMIT: usize = 1000;
300
301/// Thread-safe handle to the shared [`AdminService`].
302#[derive(Clone, Debug)]
303pub struct AdminHandle {
304    inner: Arc<AdminService>,
305}
306
307impl AdminHandle {
308    /// Wrap an [`AdminService`] in a shared handle.
309    #[must_use]
310    pub fn new(service: AdminService) -> Self {
311        Self {
312            inner: Arc::new(service),
313        }
314    }
315
316    /// Clone the inner `Arc` to the [`AdminService`].
317    #[must_use]
318    pub fn service(&self) -> Arc<AdminService> {
319        Arc::clone(&self.inner)
320    }
321}
322
323impl AdminService {
324    /// Create a new admin service for the database at the given path.
325    #[must_use]
326    pub fn new(path: impl AsRef<Path>, schema_manager: Arc<SchemaManager>) -> Self {
327        let database_path = path.as_ref().to_path_buf();
328        let projections = ProjectionService::new(&database_path, Arc::clone(&schema_manager));
329        Self {
330            database_path,
331            schema_manager,
332            projections,
333        }
334    }
335
336    fn connect(&self) -> Result<rusqlite::Connection, EngineError> {
337        #[cfg(feature = "sqlite-vec")]
338        let conn = sqlite::open_connection_with_vec(&self.database_path)?;
339        #[cfg(not(feature = "sqlite-vec"))]
340        let conn = sqlite::open_connection(&self.database_path)?;
341        self.schema_manager.bootstrap(&conn)?;
342        Ok(conn)
343    }
344
345    /// # Errors
346    /// Returns [`EngineError`] if the database connection fails or any SQL query fails.
347    pub fn check_integrity(&self) -> Result<IntegrityReport, EngineError> {
348        let conn = self.connect()?;
349
350        let physical_result: String =
351            conn.query_row("PRAGMA integrity_check", [], |row| row.get(0))?;
352        let foreign_key_count: i64 =
353            conn.query_row("SELECT count(*) FROM pragma_foreign_key_check", [], |row| {
354                row.get(0)
355            })?;
356        let missing_fts_rows: i64 = conn.query_row(
357            r"
358            SELECT count(*)
359            FROM chunks c
360            JOIN nodes n
361              ON n.logical_id = c.node_logical_id
362             AND n.superseded_at IS NULL
363            WHERE NOT EXISTS (
364                SELECT 1
365                FROM fts_nodes f
366                WHERE f.chunk_id = c.id
367            )
368            ",
369            [],
370            |row| row.get(0),
371        )?;
372        let duplicate_active: i64 = conn.query_row(
373            r"
374            SELECT count(*)
375            FROM (
376                SELECT logical_id
377                FROM nodes
378                WHERE superseded_at IS NULL
379                GROUP BY logical_id
380                HAVING count(*) > 1
381            )
382            ",
383            [],
384            |row| row.get(0),
385        )?;
386        let operational_missing_collections: i64 = conn.query_row(
387            r"
388            SELECT (
389                SELECT count(*)
390                FROM operational_mutations m
391                LEFT JOIN operational_collections c ON c.name = m.collection_name
392                WHERE c.name IS NULL
393            ) + (
394                SELECT count(*)
395                FROM operational_current oc
396                LEFT JOIN operational_collections c ON c.name = oc.collection_name
397                WHERE c.name IS NULL
398            )
399            ",
400            [],
401            |row| row.get(0),
402        )?;
403        let operational_missing_last_mutations: i64 = conn.query_row(
404            r"
405            SELECT count(*)
406            FROM operational_current oc
407            LEFT JOIN operational_mutations m ON m.id = oc.last_mutation_id
408            WHERE m.id IS NULL
409            ",
410            [],
411            |row| row.get(0),
412        )?;
413
414        // Count missing property FTS rows using the same extraction logic as
415        // write/rebuild. A pure-SQL check would overcount: nodes whose declared
416        // paths legitimately normalize to no values correctly have no row.
417        let missing_property_fts_rows = count_missing_property_fts_rows(&conn)?;
418
419        let mut warnings = Vec::new();
420        if missing_fts_rows > 0 {
421            warnings.push("missing FTS projections detected".to_owned());
422        }
423        if missing_property_fts_rows > 0 {
424            warnings.push("missing property FTS projections detected".to_owned());
425        }
426        if duplicate_active > 0 {
427            warnings.push("duplicate active logical_ids detected".to_owned());
428        }
429        if operational_missing_collections > 0 {
430            warnings.push("operational rows reference missing collections".to_owned());
431        }
432        if operational_missing_last_mutations > 0 {
433            warnings.push("operational current rows reference missing last mutations".to_owned());
434        }
435
436        // FIX(review): was `as usize` — unsound on 32-bit targets, wraps negatives silently.
437        // Options: (A) try_from().unwrap_or(0) — masks corruption, (B) try_from().expect() —
438        // panics on corruption, (C) propagate error. Chose (B) here: a negative count(*)
439        // signals data corruption, and the integrity report would be meaningless anyway.
440        Ok(IntegrityReport {
441            physical_ok: physical_result == "ok",
442            foreign_keys_ok: foreign_key_count == 0,
443            missing_fts_rows: i64_to_usize(missing_fts_rows),
444            missing_property_fts_rows: i64_to_usize(missing_property_fts_rows),
445            duplicate_active_logical_ids: i64_to_usize(duplicate_active),
446            operational_missing_collections: i64_to_usize(operational_missing_collections),
447            operational_missing_last_mutations: i64_to_usize(operational_missing_last_mutations),
448            warnings,
449        })
450    }
451
452    /// # Errors
453    /// Returns [`EngineError`] if the database connection fails or any SQL query fails.
454    #[allow(clippy::too_many_lines)]
455    pub fn check_semantics(&self) -> Result<SemanticReport, EngineError> {
456        let conn = self.connect()?;
457
458        let orphaned_chunks: i64 = conn.query_row(
459            r"
460            SELECT count(*)
461            FROM chunks c
462            WHERE NOT EXISTS (
463                SELECT 1 FROM nodes n
464                WHERE n.logical_id = c.node_logical_id
465            )
466            ",
467            [],
468            |row| row.get(0),
469        )?;
470
471        let null_source_ref_nodes: i64 = conn.query_row(
472            "SELECT count(*) FROM nodes WHERE source_ref IS NULL AND superseded_at IS NULL",
473            [],
474            |row| row.get(0),
475        )?;
476
477        let broken_step_fk: i64 = conn.query_row(
478            r"
479            SELECT count(*) FROM steps s
480            WHERE NOT EXISTS (SELECT 1 FROM runs r WHERE r.id = s.run_id)
481            ",
482            [],
483            |row| row.get(0),
484        )?;
485
486        let broken_action_fk: i64 = conn.query_row(
487            r"
488            SELECT count(*) FROM actions a
489            WHERE NOT EXISTS (SELECT 1 FROM steps s WHERE s.id = a.step_id)
490            ",
491            [],
492            |row| row.get(0),
493        )?;
494
495        let stale_fts_rows: i64 = conn.query_row(
496            r"
497            SELECT count(*) FROM fts_nodes f
498            WHERE NOT EXISTS (SELECT 1 FROM chunks c WHERE c.id = f.chunk_id)
499            ",
500            [],
501            |row| row.get(0),
502        )?;
503
504        let fts_rows_for_superseded_nodes: i64 = conn.query_row(
505            r"
506            SELECT count(*) FROM fts_nodes f
507            WHERE NOT EXISTS (
508                SELECT 1 FROM nodes n
509                WHERE n.logical_id = f.node_logical_id AND n.superseded_at IS NULL
510            )
511            ",
512            [],
513            |row| row.get(0),
514        )?;
515
516        let stale_property_fts_rows: i64 = conn.query_row(
517            r"
518            SELECT count(*) FROM fts_node_properties fp
519            WHERE NOT EXISTS (
520                SELECT 1 FROM nodes n
521                WHERE n.logical_id = fp.node_logical_id AND n.superseded_at IS NULL
522            )
523            ",
524            [],
525            |row| row.get(0),
526        )?;
527
528        let orphaned_property_fts_rows: i64 = conn.query_row(
529            r"
530            SELECT count(*) FROM fts_node_properties fp
531            WHERE NOT EXISTS (
532                SELECT 1 FROM fts_property_schemas s WHERE s.kind = fp.kind
533            )
534            ",
535            [],
536            |row| row.get(0),
537        )?;
538
539        let mismatched_kind_property_fts_rows: i64 = conn.query_row(
540            r"
541            SELECT count(*) FROM fts_node_properties fp
542            JOIN nodes n ON n.logical_id = fp.node_logical_id AND n.superseded_at IS NULL
543            WHERE n.kind != fp.kind
544            ",
545            [],
546            |row| row.get(0),
547        )?;
548
549        let duplicate_property_fts_rows: i64 = conn.query_row(
550            r"
551            SELECT count(*) FROM (
552                SELECT node_logical_id FROM fts_node_properties
553                GROUP BY node_logical_id
554                HAVING count(*) > 1
555            )
556            ",
557            [],
558            |row| row.get(0),
559        )?;
560
561        let drifted_property_fts_rows = count_drifted_property_fts_rows(&conn)?;
562
563        let dangling_edges: i64 = conn.query_row(
564            r"
565            SELECT count(*) FROM edges e
566            WHERE e.superseded_at IS NULL AND (
567                NOT EXISTS (SELECT 1 FROM nodes n WHERE n.logical_id = e.source_logical_id AND n.superseded_at IS NULL)
568                OR
569                NOT EXISTS (SELECT 1 FROM nodes n WHERE n.logical_id = e.target_logical_id AND n.superseded_at IS NULL)
570            )
571            ",
572            [],
573            |row| row.get(0),
574        )?;
575
576        let orphaned_supersession_chains: i64 = conn.query_row(
577            r"
578            SELECT count(*) FROM (
579                SELECT logical_id FROM nodes
580                GROUP BY logical_id
581                HAVING count(*) > 0 AND sum(CASE WHEN superseded_at IS NULL THEN 1 ELSE 0 END) = 0
582            )
583            ",
584            [],
585            |row| row.get(0),
586        )?;
587
588        // Vec stale row detection — degrades to 0 when the vec profile is absent.
589        #[cfg(feature = "sqlite-vec")]
590        let stale_vec_rows: i64 = match conn.query_row(
591            r"
592            SELECT count(*) FROM vec_nodes_active v
593            WHERE NOT EXISTS (SELECT 1 FROM chunks c WHERE c.id = v.chunk_id)
594            ",
595            [],
596            |row| row.get(0),
597        ) {
598            Ok(n) => n,
599            Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
600                if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
601            {
602                0
603            }
604            Err(e) => return Err(EngineError::Sqlite(e)),
605        };
606        #[cfg(not(feature = "sqlite-vec"))]
607        let stale_vec_rows: i64 = 0;
608
609        #[cfg(feature = "sqlite-vec")]
610        let vec_rows_for_superseded_nodes: i64 = match conn.query_row(
611            r"
612            SELECT count(*) FROM vec_nodes_active v
613            JOIN chunks c ON c.id = v.chunk_id
614            WHERE NOT EXISTS (
615                SELECT 1 FROM nodes n
616                WHERE n.logical_id = c.node_logical_id
617            )
618            ",
619            [],
620            |row| row.get(0),
621        ) {
622            Ok(n) => n,
623            Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
624                if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
625            {
626                0
627            }
628            Err(e) => return Err(EngineError::Sqlite(e)),
629        };
630        #[cfg(not(feature = "sqlite-vec"))]
631        let vec_rows_for_superseded_nodes: i64 = 0;
632        let missing_operational_current_rows: i64 = conn.query_row(
633            r"
634            SELECT count(*)
635            FROM operational_mutations m
636            JOIN operational_collections c
637              ON c.name = m.collection_name
638             AND c.kind = 'latest_state'
639            WHERE m.op_kind = 'put'
640              AND NOT EXISTS (
641                    SELECT 1
642                    FROM operational_mutations newer
643                    WHERE newer.collection_name = m.collection_name
644                      AND newer.record_key = m.record_key
645                      AND newer.mutation_order > m.mutation_order
646                )
647              AND NOT EXISTS (
648                    SELECT 1
649                    FROM operational_current oc
650                    WHERE oc.collection_name = m.collection_name
651                      AND oc.record_key = m.record_key
652                )
653            ",
654            [],
655            |row| row.get(0),
656        )?;
657        let stale_operational_current_rows: i64 = conn.query_row(
658            r"
659            SELECT count(*)
660            FROM operational_current oc
661            JOIN operational_collections c
662              ON c.name = oc.collection_name
663             AND c.kind = 'latest_state'
664            LEFT JOIN operational_mutations m ON m.id = oc.last_mutation_id
665            WHERE m.id IS NULL
666               OR m.collection_name != oc.collection_name
667               OR m.record_key != oc.record_key
668               OR m.op_kind != 'put'
669               OR m.payload_json != oc.payload_json
670               OR EXISTS (
671                    SELECT 1
672                    FROM operational_mutations newer
673                    WHERE newer.collection_name = oc.collection_name
674                      AND newer.record_key = oc.record_key
675                      AND newer.mutation_order > m.mutation_order
676                )
677            ",
678            [],
679            |row| row.get(0),
680        )?;
681        let disabled_collection_mutations: i64 = conn.query_row(
682            r"
683            SELECT count(*)
684            FROM operational_mutations m
685            JOIN operational_collections c ON c.name = m.collection_name
686            WHERE c.disabled_at IS NOT NULL AND m.created_at > c.disabled_at
687            ",
688            [],
689            |row| row.get(0),
690        )?;
691        let orphaned_last_access_metadata_rows: i64 = conn.query_row(
692            r"
693            SELECT count(*)
694            FROM node_access_metadata am
695            WHERE NOT EXISTS (
696                SELECT 1 FROM nodes n WHERE n.logical_id = am.logical_id
697            )
698            ",
699            [],
700            |row| row.get(0),
701        )?;
702
703        let mut warnings = Vec::new();
704        if orphaned_chunks > 0 {
705            warnings.push(format!(
706                "{orphaned_chunks} orphaned chunk(s) with no surviving node history"
707            ));
708        }
709        if null_source_ref_nodes > 0 {
710            warnings.push(format!(
711                "{null_source_ref_nodes} active node(s) with null source_ref"
712            ));
713        }
714        if broken_step_fk > 0 {
715            warnings.push(format!(
716                "{broken_step_fk} step(s) referencing non-existent run"
717            ));
718        }
719        if broken_action_fk > 0 {
720            warnings.push(format!(
721                "{broken_action_fk} action(s) referencing non-existent step"
722            ));
723        }
724        if stale_fts_rows > 0 {
725            warnings.push(format!(
726                "{stale_fts_rows} stale FTS row(s) referencing missing chunk"
727            ));
728        }
729        if fts_rows_for_superseded_nodes > 0 {
730            warnings.push(format!(
731                "{fts_rows_for_superseded_nodes} FTS row(s) for superseded node(s)"
732            ));
733        }
734        if stale_property_fts_rows > 0 {
735            warnings.push(format!(
736                "{stale_property_fts_rows} stale property FTS row(s) for superseded/missing node(s)"
737            ));
738        }
739        if orphaned_property_fts_rows > 0 {
740            warnings.push(format!(
741                "{orphaned_property_fts_rows} orphaned property FTS row(s) for unregistered kind(s)"
742            ));
743        }
744        if mismatched_kind_property_fts_rows > 0 {
745            warnings.push(format!(
746                "{mismatched_kind_property_fts_rows} property FTS row(s) whose kind does not match the active node"
747            ));
748        }
749        if duplicate_property_fts_rows > 0 {
750            warnings.push(format!(
751                "{duplicate_property_fts_rows} active logical ID(s) with duplicate property FTS rows"
752            ));
753        }
754        if drifted_property_fts_rows > 0 {
755            warnings.push(format!(
756                "{drifted_property_fts_rows} property FTS row(s) with stale text_content"
757            ));
758        }
759        if dangling_edges > 0 {
760            warnings.push(format!(
761                "{dangling_edges} active edge(s) with missing endpoint node"
762            ));
763        }
764        if orphaned_supersession_chains > 0 {
765            warnings.push(format!(
766                "{orphaned_supersession_chains} logical_id(s) with all versions superseded"
767            ));
768        }
769        if stale_vec_rows > 0 {
770            warnings.push(format!(
771                "{stale_vec_rows} stale vec row(s) referencing missing chunk"
772            ));
773        }
774        if vec_rows_for_superseded_nodes > 0 {
775            warnings.push(format!(
776                "{vec_rows_for_superseded_nodes} vec row(s) whose node history is missing"
777            ));
778        }
779        if missing_operational_current_rows > 0 {
780            warnings.push(format!(
781                "{missing_operational_current_rows} latest-state key(s) missing operational_current rows"
782            ));
783        }
784        if stale_operational_current_rows > 0 {
785            warnings.push(format!(
786                "{stale_operational_current_rows} stale operational_current row(s)"
787            ));
788        }
789        if disabled_collection_mutations > 0 {
790            warnings.push(format!(
791                "{disabled_collection_mutations} mutation(s) were written after collection disable"
792            ));
793        }
794        if orphaned_last_access_metadata_rows > 0 {
795            warnings.push(format!(
796                "{orphaned_last_access_metadata_rows} last_access metadata row(s) reference missing node history"
797            ));
798        }
799
800        Ok(SemanticReport {
801            orphaned_chunks: i64_to_usize(orphaned_chunks),
802            null_source_ref_nodes: i64_to_usize(null_source_ref_nodes),
803            broken_step_fk: i64_to_usize(broken_step_fk),
804            broken_action_fk: i64_to_usize(broken_action_fk),
805            stale_fts_rows: i64_to_usize(stale_fts_rows),
806            fts_rows_for_superseded_nodes: i64_to_usize(fts_rows_for_superseded_nodes),
807            stale_property_fts_rows: i64_to_usize(stale_property_fts_rows),
808            orphaned_property_fts_rows: i64_to_usize(orphaned_property_fts_rows),
809            mismatched_kind_property_fts_rows: i64_to_usize(mismatched_kind_property_fts_rows),
810            duplicate_property_fts_rows: i64_to_usize(duplicate_property_fts_rows),
811            drifted_property_fts_rows: i64_to_usize(drifted_property_fts_rows),
812            dangling_edges: i64_to_usize(dangling_edges),
813            orphaned_supersession_chains: i64_to_usize(orphaned_supersession_chains),
814            stale_vec_rows: i64_to_usize(stale_vec_rows),
815            vec_rows_for_superseded_nodes: i64_to_usize(vec_rows_for_superseded_nodes),
816            missing_operational_current_rows: i64_to_usize(missing_operational_current_rows),
817            stale_operational_current_rows: i64_to_usize(stale_operational_current_rows),
818            disabled_collection_mutations: i64_to_usize(disabled_collection_mutations),
819            orphaned_last_access_metadata_rows: i64_to_usize(orphaned_last_access_metadata_rows),
820            warnings,
821        })
822    }
823
824    /// # Errors
825    /// Returns [`EngineError`] if the collection metadata is invalid or the insert fails.
826    pub fn register_operational_collection(
827        &self,
828        request: &OperationalRegisterRequest,
829    ) -> Result<OperationalCollectionRecord, EngineError> {
830        if request.name.trim().is_empty() {
831            return Err(EngineError::InvalidWrite(
832                "operational collection name must not be empty".to_owned(),
833            ));
834        }
835        if request.schema_json.is_empty() {
836            return Err(EngineError::InvalidWrite(
837                "operational collection schema_json must not be empty".to_owned(),
838            ));
839        }
840        if request.retention_json.is_empty() {
841            return Err(EngineError::InvalidWrite(
842                "operational collection retention_json must not be empty".to_owned(),
843            ));
844        }
845        if request.filter_fields_json.is_empty() {
846            return Err(EngineError::InvalidWrite(
847                "operational collection filter_fields_json must not be empty".to_owned(),
848            ));
849        }
850        parse_operational_validation_contract(&request.validation_json)
851            .map_err(EngineError::InvalidWrite)?;
852        parse_operational_secondary_indexes_json(&request.secondary_indexes_json, request.kind)
853            .map_err(EngineError::InvalidWrite)?;
854        if request.format_version <= 0 {
855            return Err(EngineError::InvalidWrite(
856                "operational collection format_version must be positive".to_owned(),
857            ));
858        }
859        parse_operational_filter_fields(&request.filter_fields_json)
860            .map_err(EngineError::InvalidWrite)?;
861
862        let mut conn = self.connect()?;
863        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
864        tx.execute(
865            "INSERT INTO operational_collections \
866             (name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at) \
867             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, unixepoch())",
868            rusqlite::params![
869                request.name.as_str(),
870                request.kind.as_str(),
871                request.schema_json.as_str(),
872                request.retention_json.as_str(),
873                request.filter_fields_json.as_str(),
874                request.validation_json.as_str(),
875                request.secondary_indexes_json.as_str(),
876                request.format_version,
877            ],
878        )?;
879        persist_simple_provenance_event(
880            &tx,
881            "operational_collection_registered",
882            request.name.as_str(),
883            Some(serde_json::json!({
884                "kind": request.kind.as_str(),
885                "format_version": request.format_version,
886            })),
887        )?;
888        tx.commit()?;
889
890        self.describe_operational_collection(&request.name)?
891            .ok_or_else(|| {
892                EngineError::Bridge("registered collection missing after commit".to_owned())
893            })
894    }
895
896    /// # Errors
897    /// Returns [`EngineError`] if the database query fails.
898    pub fn describe_operational_collection(
899        &self,
900        name: &str,
901    ) -> Result<Option<OperationalCollectionRecord>, EngineError> {
902        let conn = self.connect()?;
903        load_operational_collection_record(&conn, name)
904    }
905
906    /// # Errors
907    /// Returns [`EngineError`] if the collection is missing, the filter contract is invalid,
908    /// or existing mutation backfill fails.
909    pub fn update_operational_collection_filters(
910        &self,
911        name: &str,
912        filter_fields_json: &str,
913    ) -> Result<OperationalCollectionRecord, EngineError> {
914        if filter_fields_json.is_empty() {
915            return Err(EngineError::InvalidWrite(
916                "operational collection filter_fields_json must not be empty".to_owned(),
917            ));
918        }
919        let declared_fields = parse_operational_filter_fields(filter_fields_json)
920            .map_err(EngineError::InvalidWrite)?;
921
922        let mut conn = self.connect()?;
923        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
924        load_operational_collection_record(&tx, name)?.ok_or_else(|| {
925            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
926        })?;
927        tx.execute(
928            "UPDATE operational_collections SET filter_fields_json = ?2 WHERE name = ?1",
929            rusqlite::params![name, filter_fields_json],
930        )?;
931        tx.execute(
932            "DELETE FROM operational_filter_values WHERE collection_name = ?1",
933            [name],
934        )?;
935
936        let mut mutation_stmt = tx.prepare(
937            "SELECT id, payload_json FROM operational_mutations \
938             WHERE collection_name = ?1 ORDER BY mutation_order",
939        )?;
940        let mutations = mutation_stmt
941            .query_map([name], |row| {
942                Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
943            })?
944            .collect::<Result<Vec<_>, _>>()?;
945        drop(mutation_stmt);
946
947        let mut insert_filter_value = tx.prepare_cached(
948            "INSERT INTO operational_filter_values \
949             (mutation_id, collection_name, field_name, string_value, integer_value) \
950             VALUES (?1, ?2, ?3, ?4, ?5)",
951        )?;
952        let mut inserted_values = 0usize;
953        for (mutation_id, payload_json) in &mutations {
954            for filter_value in
955                extract_operational_filter_values(&declared_fields, payload_json.as_str())
956            {
957                insert_filter_value.execute(rusqlite::params![
958                    mutation_id,
959                    name,
960                    filter_value.field_name,
961                    filter_value.string_value,
962                    filter_value.integer_value,
963                ])?;
964                inserted_values += 1;
965            }
966        }
967        drop(insert_filter_value);
968
969        persist_simple_provenance_event(
970            &tx,
971            "operational_collection_filter_fields_updated",
972            name,
973            Some(serde_json::json!({
974                "field_count": declared_fields.len(),
975                "mutations_backfilled": mutations.len(),
976                "inserted_filter_values": inserted_values,
977            })),
978        )?;
979        let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
980            EngineError::Bridge("operational collection missing after filter update".to_owned())
981        })?;
982        tx.commit()?;
983        Ok(updated)
984    }
985
986    /// # Errors
987    /// Returns [`EngineError`] if the collection is missing or the validation contract is invalid.
988    pub fn update_operational_collection_validation(
989        &self,
990        name: &str,
991        validation_json: &str,
992    ) -> Result<OperationalCollectionRecord, EngineError> {
993        parse_operational_validation_contract(validation_json)
994            .map_err(EngineError::InvalidWrite)?;
995
996        let mut conn = self.connect()?;
997        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
998        load_operational_collection_record(&tx, name)?.ok_or_else(|| {
999            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1000        })?;
1001        tx.execute(
1002            "UPDATE operational_collections SET validation_json = ?2 WHERE name = ?1",
1003            rusqlite::params![name, validation_json],
1004        )?;
1005        persist_simple_provenance_event(
1006            &tx,
1007            "operational_collection_validation_updated",
1008            name,
1009            Some(serde_json::json!({
1010                "has_validation": !validation_json.is_empty(),
1011            })),
1012        )?;
1013        let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1014            EngineError::Bridge("operational collection missing after validation update".to_owned())
1015        })?;
1016        tx.commit()?;
1017        Ok(updated)
1018    }
1019
1020    /// # Errors
1021    /// Returns [`EngineError`] if the collection is missing, the contract is invalid,
1022    /// or derived index rebuild fails.
1023    pub fn update_operational_collection_secondary_indexes(
1024        &self,
1025        name: &str,
1026        secondary_indexes_json: &str,
1027    ) -> Result<OperationalCollectionRecord, EngineError> {
1028        let mut conn = self.connect()?;
1029        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1030        let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1031            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1032        })?;
1033        let indexes = parse_operational_secondary_indexes_json(secondary_indexes_json, record.kind)
1034            .map_err(EngineError::InvalidWrite)?;
1035        tx.execute(
1036            "UPDATE operational_collections SET secondary_indexes_json = ?2 WHERE name = ?1",
1037            rusqlite::params![name, secondary_indexes_json],
1038        )?;
1039        let (mutation_entries_rebuilt, current_entries_rebuilt) =
1040            rebuild_operational_secondary_index_entries(&tx, &record.name, record.kind, &indexes)?;
1041        persist_simple_provenance_event(
1042            &tx,
1043            "operational_collection_secondary_indexes_updated",
1044            name,
1045            Some(serde_json::json!({
1046                "index_count": indexes.len(),
1047                "mutation_entries_rebuilt": mutation_entries_rebuilt,
1048                "current_entries_rebuilt": current_entries_rebuilt,
1049            })),
1050        )?;
1051        let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1052            EngineError::Bridge(
1053                "operational collection missing after secondary index update".to_owned(),
1054            )
1055        })?;
1056        tx.commit()?;
1057        Ok(updated)
1058    }
1059
1060    /// # Errors
1061    /// Returns [`EngineError`] if the collection is missing or rebuild fails.
1062    pub fn rebuild_operational_secondary_indexes(
1063        &self,
1064        name: &str,
1065    ) -> Result<OperationalSecondaryIndexRebuildReport, EngineError> {
1066        let mut conn = self.connect()?;
1067        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1068        let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1069            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1070        })?;
1071        let indexes =
1072            parse_operational_secondary_indexes_json(&record.secondary_indexes_json, record.kind)
1073                .map_err(EngineError::InvalidWrite)?;
1074        let (mutation_entries_rebuilt, current_entries_rebuilt) =
1075            rebuild_operational_secondary_index_entries(&tx, &record.name, record.kind, &indexes)?;
1076        persist_simple_provenance_event(
1077            &tx,
1078            "operational_secondary_indexes_rebuilt",
1079            name,
1080            Some(serde_json::json!({
1081                "index_count": indexes.len(),
1082                "mutation_entries_rebuilt": mutation_entries_rebuilt,
1083                "current_entries_rebuilt": current_entries_rebuilt,
1084            })),
1085        )?;
1086        tx.commit()?;
1087        Ok(OperationalSecondaryIndexRebuildReport {
1088            collection_name: name.to_owned(),
1089            mutation_entries_rebuilt,
1090            current_entries_rebuilt,
1091        })
1092    }
1093
1094    /// # Errors
1095    /// Returns [`EngineError`] if the collection is missing or its validation contract is invalid.
1096    pub fn validate_operational_collection_history(
1097        &self,
1098        name: &str,
1099    ) -> Result<OperationalHistoryValidationReport, EngineError> {
1100        let conn = self.connect()?;
1101        let record = load_operational_collection_record(&conn, name)?.ok_or_else(|| {
1102            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1103        })?;
1104        let Some(contract) = parse_operational_validation_contract(&record.validation_json)
1105            .map_err(EngineError::InvalidWrite)?
1106        else {
1107            return Err(EngineError::InvalidWrite(format!(
1108                "operational collection '{name}' has no validation_json configured"
1109            )));
1110        };
1111
1112        let mut stmt = conn.prepare(
1113            "SELECT id, record_key, op_kind, payload_json FROM operational_mutations \
1114             WHERE collection_name = ?1 ORDER BY mutation_order",
1115        )?;
1116        let rows = stmt
1117            .query_map([name], |row| {
1118                Ok((
1119                    row.get::<_, String>(0)?,
1120                    row.get::<_, String>(1)?,
1121                    row.get::<_, String>(2)?,
1122                    row.get::<_, String>(3)?,
1123                ))
1124            })?
1125            .collect::<Result<Vec<_>, _>>()?;
1126        drop(stmt);
1127
1128        let mut checked_rows = 0usize;
1129        let mut issues = Vec::new();
1130        for (mutation_id, record_key, op_kind, payload_json) in rows {
1131            if op_kind == "delete" {
1132                continue;
1133            }
1134            checked_rows += 1;
1135            if let Err(message) =
1136                validate_operational_payload_against_contract(&contract, payload_json.as_str())
1137            {
1138                issues.push(OperationalHistoryValidationIssue {
1139                    mutation_id,
1140                    record_key,
1141                    op_kind,
1142                    message,
1143                });
1144            }
1145        }
1146
1147        Ok(OperationalHistoryValidationReport {
1148            collection_name: name.to_owned(),
1149            checked_rows,
1150            invalid_row_count: issues.len(),
1151            issues,
1152        })
1153    }
1154
1155    /// # Errors
1156    /// Returns [`EngineError`] if the database query fails.
1157    pub fn disable_operational_collection(
1158        &self,
1159        name: &str,
1160    ) -> Result<OperationalCollectionRecord, EngineError> {
1161        let mut conn = self.connect()?;
1162        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1163        let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1164            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1165        })?;
1166        let changed = if record.disabled_at.is_none() {
1167            tx.execute(
1168                "UPDATE operational_collections SET disabled_at = unixepoch() WHERE name = ?1",
1169                [name],
1170            )?;
1171            true
1172        } else {
1173            false
1174        };
1175        let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1176            EngineError::Bridge("operational collection missing after disable".to_owned())
1177        })?;
1178        persist_simple_provenance_event(
1179            &tx,
1180            "operational_collection_disabled",
1181            name,
1182            Some(serde_json::json!({
1183                "disabled_at": record.disabled_at,
1184                "changed": changed,
1185            })),
1186        )?;
1187        tx.commit()?;
1188        Ok(record)
1189    }
1190
1191    /// # Errors
1192    /// Returns [`EngineError`] if the database query fails.
1193    pub fn compact_operational_collection(
1194        &self,
1195        name: &str,
1196        dry_run: bool,
1197    ) -> Result<OperationalCompactionReport, EngineError> {
1198        let mut conn = self.connect()?;
1199        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1200        let collection = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1201            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1202        })?;
1203        validate_append_only_operational_collection(&collection, "compact")?;
1204        let (mutation_ids, before_timestamp) =
1205            operational_compaction_candidates(&tx, &collection.retention_json, name)?;
1206        if dry_run {
1207            drop(tx);
1208            return Ok(OperationalCompactionReport {
1209                collection_name: name.to_owned(),
1210                deleted_mutations: mutation_ids.len(),
1211                dry_run: true,
1212                before_timestamp,
1213            });
1214        }
1215        let mut delete_stmt =
1216            tx.prepare_cached("DELETE FROM operational_mutations WHERE id = ?1")?;
1217        for mutation_id in &mutation_ids {
1218            delete_stmt.execute([mutation_id.as_str()])?;
1219        }
1220        drop(delete_stmt);
1221        persist_simple_provenance_event(
1222            &tx,
1223            "operational_collection_compacted",
1224            name,
1225            Some(serde_json::json!({
1226                "deleted_mutations": mutation_ids.len(),
1227                "before_timestamp": before_timestamp,
1228            })),
1229        )?;
1230        tx.commit()?;
1231        Ok(OperationalCompactionReport {
1232            collection_name: name.to_owned(),
1233            deleted_mutations: mutation_ids.len(),
1234            dry_run: false,
1235            before_timestamp,
1236        })
1237    }
1238
1239    /// # Errors
1240    /// Returns [`EngineError`] if the database query fails.
1241    pub fn purge_operational_collection(
1242        &self,
1243        name: &str,
1244        before_timestamp: i64,
1245    ) -> Result<OperationalPurgeReport, EngineError> {
1246        let mut conn = self.connect()?;
1247        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1248        let collection = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1249            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1250        })?;
1251        validate_append_only_operational_collection(&collection, "purge")?;
1252        let deleted_mutations = tx.execute(
1253            "DELETE FROM operational_mutations WHERE collection_name = ?1 AND created_at < ?2",
1254            rusqlite::params![name, before_timestamp],
1255        )?;
1256        persist_simple_provenance_event(
1257            &tx,
1258            "operational_collection_purged",
1259            name,
1260            Some(serde_json::json!({
1261                "deleted_mutations": deleted_mutations,
1262                "before_timestamp": before_timestamp,
1263            })),
1264        )?;
1265        tx.commit()?;
1266        Ok(OperationalPurgeReport {
1267            collection_name: name.to_owned(),
1268            deleted_mutations,
1269            before_timestamp,
1270        })
1271    }
1272
1273    /// # Errors
1274    /// Returns [`EngineError`] if collection selection or policy parsing fails.
1275    pub fn plan_operational_retention(
1276        &self,
1277        now_timestamp: i64,
1278        collection_names: Option<&[String]>,
1279        max_collections: Option<usize>,
1280    ) -> Result<OperationalRetentionPlanReport, EngineError> {
1281        let conn = self.connect()?;
1282        let records = load_operational_retention_records(&conn, collection_names, max_collections)?;
1283        let mut items = Vec::with_capacity(records.len());
1284        for record in records {
1285            items.push(plan_operational_retention_item(
1286                &conn,
1287                &record,
1288                now_timestamp,
1289            )?);
1290        }
1291        Ok(OperationalRetentionPlanReport {
1292            planned_at: now_timestamp,
1293            collections_examined: items.len(),
1294            items,
1295        })
1296    }
1297
1298    /// # Errors
1299    /// Returns [`EngineError`] if collection selection, policy parsing, or execution fails.
1300    pub fn run_operational_retention(
1301        &self,
1302        now_timestamp: i64,
1303        collection_names: Option<&[String]>,
1304        max_collections: Option<usize>,
1305        dry_run: bool,
1306    ) -> Result<OperationalRetentionRunReport, EngineError> {
1307        let mut conn = self.connect()?;
1308        let records = load_operational_retention_records(&conn, collection_names, max_collections)?;
1309        let mut items = Vec::with_capacity(records.len());
1310        let mut collections_acted_on = 0usize;
1311
1312        for record in records {
1313            let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1314            let item = run_operational_retention_item(&tx, &record, now_timestamp, dry_run)?;
1315            if item.deleted_mutations > 0 {
1316                collections_acted_on += 1;
1317            }
1318            if dry_run || item.action_kind == OperationalRetentionActionKind::Noop {
1319                drop(tx);
1320            } else {
1321                tx.commit()?;
1322            }
1323            items.push(item);
1324        }
1325
1326        Ok(OperationalRetentionRunReport {
1327            executed_at: now_timestamp,
1328            collections_examined: items.len(),
1329            collections_acted_on,
1330            dry_run,
1331            items,
1332        })
1333    }
1334
1335    /// # Errors
1336    /// Returns [`EngineError`] if the database query fails.
1337    pub fn trace_operational_collection(
1338        &self,
1339        collection_name: &str,
1340        record_key: Option<&str>,
1341    ) -> Result<OperationalTraceReport, EngineError> {
1342        let conn = self.connect()?;
1343        ensure_operational_collection_registered(&conn, collection_name)?;
1344        let mutations = if let Some(record_key) = record_key {
1345            let mut stmt = conn.prepare(
1346                "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
1347                 FROM operational_mutations \
1348                 WHERE collection_name = ?1 AND record_key = ?2 \
1349                 ORDER BY mutation_order",
1350            )?;
1351            stmt.query_map([collection_name, record_key], map_operational_mutation_row)?
1352                .collect::<Result<Vec<_>, _>>()?
1353        } else {
1354            let mut stmt = conn.prepare(
1355                "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
1356                 FROM operational_mutations \
1357                 WHERE collection_name = ?1 \
1358                 ORDER BY mutation_order",
1359            )?;
1360            stmt.query_map([collection_name], map_operational_mutation_row)?
1361                .collect::<Result<Vec<_>, _>>()?
1362        };
1363        let current_rows = if let Some(record_key) = record_key {
1364            let mut stmt = conn.prepare(
1365                "SELECT collection_name, record_key, payload_json, updated_at, last_mutation_id \
1366                 FROM operational_current \
1367                 WHERE collection_name = ?1 AND record_key = ?2 \
1368                 ORDER BY updated_at, record_key",
1369            )?;
1370            stmt.query_map([collection_name, record_key], map_operational_current_row)?
1371                .collect::<Result<Vec<_>, _>>()?
1372        } else {
1373            let mut stmt = conn.prepare(
1374                "SELECT collection_name, record_key, payload_json, updated_at, last_mutation_id \
1375                 FROM operational_current \
1376                 WHERE collection_name = ?1 \
1377                 ORDER BY updated_at, record_key",
1378            )?;
1379            stmt.query_map([collection_name], map_operational_current_row)?
1380                .collect::<Result<Vec<_>, _>>()?
1381        };
1382
1383        Ok(OperationalTraceReport {
1384            collection_name: collection_name.to_owned(),
1385            record_key: record_key.map(str::to_owned),
1386            mutation_count: mutations.len(),
1387            current_count: current_rows.len(),
1388            mutations,
1389            current_rows,
1390        })
1391    }
1392
1393    /// # Errors
1394    /// Returns [`EngineError`] if the collection contract is invalid or the filtered read fails.
1395    pub fn read_operational_collection(
1396        &self,
1397        request: &OperationalReadRequest,
1398    ) -> Result<OperationalReadReport, EngineError> {
1399        if request.collection_name.trim().is_empty() {
1400            return Err(EngineError::InvalidWrite(
1401                "operational read collection_name must not be empty".to_owned(),
1402            ));
1403        }
1404        if request.filters.is_empty() {
1405            return Err(EngineError::InvalidWrite(
1406                "operational read requires at least one filter clause".to_owned(),
1407            ));
1408        }
1409
1410        let conn = self.connect()?;
1411        let record = load_operational_collection_record(&conn, &request.collection_name)?
1412            .ok_or_else(|| {
1413                EngineError::InvalidWrite(format!(
1414                    "operational collection '{}' is not registered",
1415                    request.collection_name
1416                ))
1417            })?;
1418        validate_append_only_operational_collection(&record, "read")?;
1419        let declared_fields = parse_operational_filter_fields(&record.filter_fields_json)
1420            .map_err(EngineError::InvalidWrite)?;
1421        let secondary_indexes =
1422            parse_operational_secondary_indexes_json(&record.secondary_indexes_json, record.kind)
1423                .map_err(EngineError::InvalidWrite)?;
1424        let applied_limit = operational_read_limit(request.limit)?;
1425        let filters = compile_operational_read_filters(&request.filters, &declared_fields)?;
1426        if let Some(report) = execute_operational_secondary_index_read(
1427            &conn,
1428            &request.collection_name,
1429            &filters,
1430            &secondary_indexes,
1431            applied_limit,
1432        )? {
1433            return Ok(report);
1434        }
1435        execute_operational_filtered_read(&conn, &request.collection_name, &filters, applied_limit)
1436    }
1437
1438    /// # Errors
1439    /// Returns [`EngineError`] if the database query fails or collection validation fails.
1440    pub fn rebuild_operational_current(
1441        &self,
1442        collection_name: Option<&str>,
1443    ) -> Result<OperationalRepairReport, EngineError> {
1444        let mut conn = self.connect()?;
1445        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1446        let collections = if let Some(name) = collection_name {
1447            let maybe_kind: Option<String> = tx
1448                .query_row(
1449                    "SELECT kind FROM operational_collections WHERE name = ?1",
1450                    [name],
1451                    |row| row.get(0),
1452                )
1453                .optional()?;
1454            let Some(kind) = maybe_kind else {
1455                return Err(EngineError::InvalidWrite(format!(
1456                    "operational collection '{name}' is not registered"
1457                )));
1458            };
1459            if kind != OperationalCollectionKind::LatestState.as_str() {
1460                return Err(EngineError::InvalidWrite(format!(
1461                    "operational collection '{name}' is not latest_state"
1462                )));
1463            }
1464            vec![name.to_owned()]
1465        } else {
1466            let mut stmt = tx.prepare(
1467                "SELECT name FROM operational_collections WHERE kind = 'latest_state' ORDER BY name",
1468            )?;
1469            stmt.query_map([], |row| row.get::<_, String>(0))?
1470                .collect::<Result<Vec<_>, _>>()?
1471        };
1472
1473        let rebuilt_rows = rebuild_operational_current_rows(&tx, &collections)?;
1474        for collection in &collections {
1475            let record = load_operational_collection_record(&tx, collection)?.ok_or_else(|| {
1476                EngineError::Bridge(format!(
1477                    "operational collection '{collection}' missing during current rebuild"
1478                ))
1479            })?;
1480            let indexes = parse_operational_secondary_indexes_json(
1481                &record.secondary_indexes_json,
1482                record.kind,
1483            )
1484            .map_err(EngineError::InvalidWrite)?;
1485            if !indexes.is_empty() {
1486                rebuild_operational_secondary_index_entries(
1487                    &tx,
1488                    &record.name,
1489                    record.kind,
1490                    &indexes,
1491                )?;
1492            }
1493        }
1494
1495        persist_simple_provenance_event(
1496            &tx,
1497            "operational_current_rebuilt",
1498            collection_name.unwrap_or("*"),
1499            Some(serde_json::json!({
1500                "collections_rebuilt": collections.len(),
1501                "current_rows_rebuilt": rebuilt_rows,
1502            })),
1503        )?;
1504        tx.commit()?;
1505
1506        Ok(OperationalRepairReport {
1507            collections_rebuilt: collections.len(),
1508            current_rows_rebuilt: rebuilt_rows,
1509        })
1510    }
1511
1512    /// # Errors
1513    /// Returns [`EngineError`] if the database connection fails or the projection rebuild fails.
1514    pub fn rebuild_projections(
1515        &self,
1516        target: ProjectionTarget,
1517    ) -> Result<ProjectionRepairReport, EngineError> {
1518        self.projections.rebuild_projections(target)
1519    }
1520
1521    /// # Errors
1522    /// Returns [`EngineError`] if the database connection fails or the projection rebuild fails.
1523    pub fn rebuild_missing_projections(&self) -> Result<ProjectionRepairReport, EngineError> {
1524        self.projections.rebuild_missing_projections()
1525    }
1526
1527    /// Register (or update) an FTS property projection schema for the given node kind.
1528    ///
1529    /// After registration, any node of this kind will have the declared JSON property
1530    /// paths extracted, concatenated, and indexed in the `fts_node_properties` FTS5 table.
1531    ///
1532    /// # Errors
1533    /// Returns [`EngineError`] if `property_paths` is empty, contains duplicates,
1534    /// or if the database write fails.
1535    pub fn register_fts_property_schema(
1536        &self,
1537        kind: &str,
1538        property_paths: &[String],
1539        separator: Option<&str>,
1540    ) -> Result<FtsPropertySchemaRecord, EngineError> {
1541        let specs: Vec<FtsPropertyPathSpec> = property_paths
1542            .iter()
1543            .map(|p| FtsPropertyPathSpec::scalar(p.clone()))
1544            .collect();
1545        self.register_fts_property_schema_with_entries(kind, &specs, separator, &[])
1546    }
1547
1548    /// Register (or update) an FTS property projection schema with
1549    /// per-path modes and optional exclude paths. When the registered
1550    /// schema introduces a new recursive-mode path for this kind, this
1551    /// method eagerly rebuilds `fts_node_properties` and
1552    /// `fts_node_property_positions` for every active node of that kind,
1553    /// all in the same transaction as the schema row update.
1554    ///
1555    /// # Errors
1556    /// Returns [`EngineError`] if the paths are invalid, the JSON
1557    /// serialization fails, or the rebuild transaction fails.
1558    pub fn register_fts_property_schema_with_entries(
1559        &self,
1560        kind: &str,
1561        entries: &[FtsPropertyPathSpec],
1562        separator: Option<&str>,
1563        exclude_paths: &[String],
1564    ) -> Result<FtsPropertySchemaRecord, EngineError> {
1565        let paths: Vec<String> = entries.iter().map(|e| e.path.clone()).collect();
1566        validate_fts_property_paths(&paths)?;
1567        for p in exclude_paths {
1568            if !p.starts_with("$.") {
1569                return Err(EngineError::InvalidWrite(format!(
1570                    "exclude_paths entries must start with '$.' but got: {p}"
1571                )));
1572            }
1573        }
1574        let separator = separator.unwrap_or(" ");
1575        let paths_json = serialize_property_paths_json(entries, exclude_paths)?;
1576
1577        let mut conn = self.connect()?;
1578        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1579
1580        // Determine whether the registration introduces a recursive path
1581        // that was not present in the previously-registered schema for
1582        // this kind. If so, we must eagerly rebuild property FTS rows and
1583        // position map for every active node of this kind within the same
1584        // transaction.
1585        let previous_row: Option<(String, String)> = tx
1586            .query_row(
1587                "SELECT property_paths_json, separator FROM fts_property_schemas WHERE kind = ?1",
1588                [kind],
1589                |row| {
1590                    let json: String = row.get(0)?;
1591                    let sep: String = row.get(1)?;
1592                    Ok((json, sep))
1593                },
1594            )
1595            .optional()?;
1596        let had_previous_schema = previous_row.is_some();
1597        let previous_recursive_paths: Vec<String> = previous_row
1598            .map(|(json, sep)| crate::writer::parse_property_schema_json(&json, &sep))
1599            .map_or(Vec::new(), |schema| {
1600                schema
1601                    .paths
1602                    .into_iter()
1603                    .filter(|p| p.mode == crate::writer::PropertyPathMode::Recursive)
1604                    .map(|p| p.path)
1605                    .collect()
1606            });
1607        let new_recursive_paths: Vec<&str> = entries
1608            .iter()
1609            .filter(|e| e.mode == FtsPropertyPathMode::Recursive)
1610            .map(|e| e.path.as_str())
1611            .collect();
1612        let introduces_new_recursive = new_recursive_paths
1613            .iter()
1614            .any(|p| !previous_recursive_paths.iter().any(|prev| prev == p));
1615
1616        tx.execute(
1617            "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
1618             VALUES (?1, ?2, ?3) \
1619             ON CONFLICT(kind) DO UPDATE SET property_paths_json = ?2, separator = ?3",
1620            rusqlite::params![kind, paths_json, separator],
1621        )?;
1622
1623        // Eager transactional rebuild: always fire on any update (i.e.
1624        // whenever the row already existed). First-time registrations never
1625        // have a previous schema, so they cost nothing; updates trigger a
1626        // rebuild unconditionally. This covers recursive-path additions
1627        // AND scalar-only re-registrations where only the path or
1628        // separator changed — without a rebuild the existing rows would
1629        // retain stale scalar-derived text. (P4-P2-1)
1630        let needs_rebuild = introduces_new_recursive || had_previous_schema;
1631        if needs_rebuild {
1632            tx.execute("DELETE FROM fts_node_properties WHERE kind = ?1", [kind])?;
1633            tx.execute(
1634                "DELETE FROM fts_node_property_positions WHERE kind = ?1",
1635                [kind],
1636            )?;
1637            // Scope the rebuild to `kind` only. The multi-kind
1638            // `insert_property_fts_rows` iterates over every registered
1639            // schema and would re-insert rows for siblings that were not
1640            // deleted above, duplicating their FTS entries.
1641            crate::projection::insert_property_fts_rows_for_kind(&tx, kind)?;
1642        }
1643
1644        persist_simple_provenance_event(
1645            &tx,
1646            "fts_property_schema_registered",
1647            kind,
1648            Some(serde_json::json!({
1649                "property_paths": paths,
1650                "separator": separator,
1651                "exclude_paths": exclude_paths,
1652                "eager_rebuild": needs_rebuild,
1653            })),
1654        )?;
1655        tx.commit()?;
1656
1657        self.describe_fts_property_schema(kind)?.ok_or_else(|| {
1658            EngineError::Bridge("registered FTS property schema missing after commit".to_owned())
1659        })
1660    }
1661
1662    /// Return the FTS property schema for a single node kind, if registered.
1663    ///
1664    /// # Errors
1665    /// Returns [`EngineError`] if the database query fails.
1666    pub fn describe_fts_property_schema(
1667        &self,
1668        kind: &str,
1669    ) -> Result<Option<FtsPropertySchemaRecord>, EngineError> {
1670        let conn = self.connect()?;
1671        load_fts_property_schema_record(&conn, kind)
1672    }
1673
1674    /// Return all registered FTS property schemas.
1675    ///
1676    /// # Errors
1677    /// Returns [`EngineError`] if the database query fails.
1678    pub fn list_fts_property_schemas(&self) -> Result<Vec<FtsPropertySchemaRecord>, EngineError> {
1679        let conn = self.connect()?;
1680        let mut stmt = conn.prepare(
1681            "SELECT kind, property_paths_json, separator, format_version \
1682             FROM fts_property_schemas ORDER BY kind",
1683        )?;
1684        let records = stmt
1685            .query_map([], |row| {
1686                let kind: String = row.get(0)?;
1687                let paths_json: String = row.get(1)?;
1688                let separator: String = row.get(2)?;
1689                let format_version: i64 = row.get(3)?;
1690                Ok(build_fts_property_schema_record(
1691                    kind,
1692                    &paths_json,
1693                    separator,
1694                    format_version,
1695                ))
1696            })?
1697            .collect::<Result<Vec<_>, _>>()?;
1698        Ok(records)
1699    }
1700
1701    /// Remove the FTS property schema for a node kind.
1702    ///
1703    /// This does **not** delete existing `fts_node_properties` rows for this kind;
1704    /// call `rebuild_projections(Fts)` to clean up stale rows.
1705    ///
1706    /// # Errors
1707    /// Returns [`EngineError`] if the kind is not registered or the delete fails.
1708    pub fn remove_fts_property_schema(&self, kind: &str) -> Result<(), EngineError> {
1709        let mut conn = self.connect()?;
1710        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1711        let deleted = tx.execute("DELETE FROM fts_property_schemas WHERE kind = ?1", [kind])?;
1712        if deleted == 0 {
1713            return Err(EngineError::InvalidWrite(format!(
1714                "FTS property schema for kind '{kind}' is not registered"
1715            )));
1716        }
1717        persist_simple_provenance_event(&tx, "fts_property_schema_removed", kind, None)?;
1718        tx.commit()?;
1719        Ok(())
1720    }
1721
1722    /// Recreate enabled vector profiles from persisted `vector_profiles` metadata.
1723    ///
1724    /// # Errors
1725    /// Returns [`EngineError`] if the database connection fails, reading metadata fails,
1726    /// or sqlite-vec support is unavailable while enabled profiles are present.
1727    pub fn restore_vector_profiles(&self) -> Result<ProjectionRepairReport, EngineError> {
1728        let conn = self.connect()?;
1729        let profiles: Vec<(String, String, i64)> = {
1730            let mut stmt = conn.prepare(
1731                "SELECT profile, table_name, dimension \
1732                 FROM vector_profiles WHERE enabled = 1 ORDER BY profile",
1733            )?;
1734            stmt.query_map([], |row| {
1735                Ok((
1736                    row.get::<_, String>(0)?,
1737                    row.get::<_, String>(1)?,
1738                    row.get::<_, i64>(2)?,
1739                ))
1740            })?
1741            .collect::<Result<Vec<_>, _>>()?
1742        };
1743
1744        for (profile, table_name, dimension) in &profiles {
1745            let dimension = usize::try_from(*dimension).map_err(|_| {
1746                EngineError::Bridge(format!("invalid vector profile dimension: {dimension}"))
1747            })?;
1748            self.schema_manager
1749                .ensure_vector_profile(&conn, profile, table_name, dimension)?;
1750        }
1751
1752        Ok(ProjectionRepairReport {
1753            targets: vec![ProjectionTarget::Vec],
1754            rebuilt_rows: profiles.len(),
1755            notes: vec![],
1756        })
1757    }
1758
1759    /// Rebuild vector embeddings using an application-supplied regeneration
1760    /// contract and generator command.
1761    ///
1762    /// The config is persisted in `vector_embedding_contracts` so the metadata
1763    /// required for recovery survives future repair runs.
1764    ///
1765    /// Vector identity is stamped from [`QueryEmbedder::identity`] — the
1766    /// caller supplies the embedder and cannot override its identity. This
1767    /// makes drift between the read-path and write-path identity stories
1768    /// structurally impossible.
1769    ///
1770    /// # Errors
1771    /// Returns [`EngineError`] if the database connection fails, the config is
1772    /// invalid, the embedder fails, or the regenerated embeddings are
1773    /// malformed.
1774    #[allow(clippy::too_many_lines)]
1775    pub fn regenerate_vector_embeddings(
1776        &self,
1777        embedder: &dyn QueryEmbedder,
1778        config: &VectorRegenerationConfig,
1779    ) -> Result<VectorRegenerationReport, EngineError> {
1780        let conn = self.connect()?;
1781        let identity = embedder.identity();
1782        let config = validate_vector_regeneration_config(&conn, config, &identity)
1783            .map_err(|failure| failure.to_engine_error())?;
1784        let chunks = collect_regeneration_chunks(&conn)?;
1785        let payload = build_regeneration_input(&config, &identity, chunks.clone());
1786        let snapshot_hash = compute_snapshot_hash(&payload)?;
1787        let audit_metadata = VectorRegenerationAuditMetadata {
1788            profile: config.profile.clone(),
1789            model_identity: identity.model_identity.clone(),
1790            model_version: identity.model_version.clone(),
1791            chunk_count: chunks.len(),
1792            snapshot_hash: snapshot_hash.clone(),
1793            failure_class: None,
1794        };
1795        persist_vector_regeneration_event(
1796            &conn,
1797            "vector_regeneration_requested",
1798            &config.profile,
1799            &audit_metadata,
1800        )?;
1801        let notes = vec!["vector embeddings regenerated via configured embedder".to_owned()];
1802
1803        let mut embedding_map: std::collections::HashMap<String, Vec<u8>> =
1804            std::collections::HashMap::with_capacity(chunks.len());
1805        for chunk in &chunks {
1806            let vector = match embedder.embed_query(&chunk.text_content) {
1807                Ok(vector) => vector,
1808                Err(error) => {
1809                    let failure = VectorRegenerationFailure::new(
1810                        VectorRegenerationFailureClass::EmbedderFailure,
1811                        format!("embedder failed for chunk '{}': {error}", chunk.chunk_id),
1812                    );
1813                    self.persist_vector_regeneration_failure_best_effort(
1814                        &config.profile,
1815                        &audit_metadata,
1816                        &failure,
1817                    );
1818                    return Err(failure.to_engine_error());
1819                }
1820            };
1821            if vector.len() != identity.dimension {
1822                let failure = VectorRegenerationFailure::new(
1823                    VectorRegenerationFailureClass::InvalidEmbedderOutput,
1824                    format!(
1825                        "embedder produced {} values for chunk '{}', expected {}",
1826                        vector.len(),
1827                        chunk.chunk_id,
1828                        identity.dimension
1829                    ),
1830                );
1831                self.persist_vector_regeneration_failure_best_effort(
1832                    &config.profile,
1833                    &audit_metadata,
1834                    &failure,
1835                );
1836                return Err(failure.to_engine_error());
1837            }
1838            if vector.iter().any(|value| !value.is_finite()) {
1839                let failure = VectorRegenerationFailure::new(
1840                    VectorRegenerationFailureClass::InvalidEmbedderOutput,
1841                    format!(
1842                        "embedder returned non-finite values for chunk '{}'",
1843                        chunk.chunk_id
1844                    ),
1845                );
1846                self.persist_vector_regeneration_failure_best_effort(
1847                    &config.profile,
1848                    &audit_metadata,
1849                    &failure,
1850                );
1851                return Err(failure.to_engine_error());
1852            }
1853            let bytes: Vec<u8> = vector
1854                .iter()
1855                .flat_map(|value| value.to_le_bytes())
1856                .collect();
1857            embedding_map.insert(chunk.chunk_id.clone(), bytes);
1858        }
1859
1860        let mut conn = conn;
1861        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1862        match self.schema_manager.ensure_vector_profile(
1863            &tx,
1864            &config.profile,
1865            &config.table_name,
1866            identity.dimension,
1867        ) {
1868            Ok(()) => {}
1869            Err(SchemaError::MissingCapability(message)) => {
1870                let failure = VectorRegenerationFailure::new(
1871                    VectorRegenerationFailureClass::UnsupportedVecCapability,
1872                    message,
1873                );
1874                drop(tx);
1875                self.persist_vector_regeneration_failure_best_effort(
1876                    &config.profile,
1877                    &audit_metadata,
1878                    &failure,
1879                );
1880                return Err(failure.to_engine_error());
1881            }
1882            Err(error) => return Err(EngineError::Schema(error)),
1883        }
1884        let apply_chunks = collect_regeneration_chunks(&tx)?;
1885        let apply_payload = build_regeneration_input(&config, &identity, apply_chunks.clone());
1886        let apply_hash = compute_snapshot_hash(&apply_payload)?;
1887        if apply_hash != snapshot_hash {
1888            let failure = VectorRegenerationFailure::new(
1889                VectorRegenerationFailureClass::SnapshotDrift,
1890                "chunk snapshot changed during generation; retry".to_owned(),
1891            );
1892            drop(tx);
1893            self.persist_vector_regeneration_failure_best_effort(
1894                &config.profile,
1895                &audit_metadata,
1896                &failure,
1897            );
1898            return Err(failure.to_engine_error());
1899        }
1900        persist_vector_contract(&tx, &config, &identity, &snapshot_hash)?;
1901        tx.execute("DELETE FROM vec_nodes_active", [])?;
1902        let mut stmt = tx
1903            .prepare_cached("INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES (?1, ?2)")?;
1904        let mut regenerated_rows = 0usize;
1905        for chunk in &apply_chunks {
1906            let Some(embedding) = embedding_map.remove(&chunk.chunk_id) else {
1907                drop(stmt);
1908                drop(tx);
1909                let failure = VectorRegenerationFailure::new(
1910                    VectorRegenerationFailureClass::InvalidEmbedderOutput,
1911                    format!(
1912                        "embedder did not produce a vector for chunk '{}'",
1913                        chunk.chunk_id
1914                    ),
1915                );
1916                self.persist_vector_regeneration_failure_best_effort(
1917                    &config.profile,
1918                    &audit_metadata,
1919                    &failure,
1920                );
1921                return Err(failure.to_engine_error());
1922            };
1923            stmt.execute(rusqlite::params![chunk.chunk_id.as_str(), embedding])?;
1924            regenerated_rows += 1;
1925        }
1926        drop(stmt);
1927        persist_vector_regeneration_event(
1928            &tx,
1929            "vector_regeneration_apply",
1930            &config.profile,
1931            &audit_metadata,
1932        )?;
1933        tx.commit()?;
1934
1935        Ok(VectorRegenerationReport {
1936            profile: config.profile.clone(),
1937            table_name: config.table_name.clone(),
1938            dimension: identity.dimension,
1939            total_chunks: chunks.len(),
1940            regenerated_rows,
1941            contract_persisted: true,
1942            notes,
1943        })
1944    }
1945
1946    fn persist_vector_regeneration_failure_best_effort(
1947        &self,
1948        profile: &str,
1949        metadata: &VectorRegenerationAuditMetadata,
1950        failure: &VectorRegenerationFailure,
1951    ) {
1952        let Ok(conn) = self.connect() else {
1953            return;
1954        };
1955        let failure_metadata = VectorRegenerationAuditMetadata {
1956            profile: metadata.profile.clone(),
1957            model_identity: metadata.model_identity.clone(),
1958            model_version: metadata.model_version.clone(),
1959            chunk_count: metadata.chunk_count,
1960            snapshot_hash: metadata.snapshot_hash.clone(),
1961            failure_class: Some(failure.failure_class_label().to_owned()),
1962        };
1963        let _ = persist_vector_regeneration_event(
1964            &conn,
1965            "vector_regeneration_failed",
1966            profile,
1967            &failure_metadata,
1968        );
1969    }
1970
1971    /// # Errors
1972    /// Returns [`EngineError`] if the database connection fails or any SQL query fails.
1973    pub fn trace_source(&self, source_ref: &str) -> Result<TraceReport, EngineError> {
1974        let conn = self.connect()?;
1975
1976        let node_logical_ids = collect_strings(
1977            &conn,
1978            "SELECT logical_id FROM nodes WHERE source_ref = ?1 ORDER BY created_at",
1979            source_ref,
1980        )?;
1981        let action_ids = collect_strings(
1982            &conn,
1983            "SELECT id FROM actions WHERE source_ref = ?1 ORDER BY created_at",
1984            source_ref,
1985        )?;
1986        let operational_mutation_ids = collect_strings(
1987            &conn,
1988            "SELECT id FROM operational_mutations WHERE source_ref = ?1 ORDER BY mutation_order",
1989            source_ref,
1990        )?;
1991
1992        Ok(TraceReport {
1993            source_ref: source_ref.to_owned(),
1994            node_rows: count_source_ref(&conn, "nodes", source_ref)?,
1995            edge_rows: count_source_ref(&conn, "edges", source_ref)?,
1996            action_rows: count_source_ref(&conn, "actions", source_ref)?,
1997            operational_mutation_rows: count_source_ref(
1998                &conn,
1999                "operational_mutations",
2000                source_ref,
2001            )?,
2002            node_logical_ids,
2003            action_ids,
2004            operational_mutation_ids,
2005        })
2006    }
2007
2008    /// # Errors
2009    /// Returns [`EngineError`] if the database connection fails, the transaction cannot be
2010    /// started, or lifecycle restoration prerequisites are missing.
2011    #[allow(clippy::too_many_lines)]
2012    pub fn restore_logical_id(
2013        &self,
2014        logical_id: &str,
2015    ) -> Result<LogicalRestoreReport, EngineError> {
2016        let mut conn = self.connect()?;
2017        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2018
2019        let active_count: i64 = tx.query_row(
2020            "SELECT count(*) FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
2021            [logical_id],
2022            |row| row.get(0),
2023        )?;
2024        if active_count > 0 {
2025            return Ok(LogicalRestoreReport {
2026                logical_id: logical_id.to_owned(),
2027                was_noop: true,
2028                restored_node_rows: 0,
2029                restored_edge_rows: 0,
2030                restored_chunk_rows: 0,
2031                restored_fts_rows: 0,
2032                restored_property_fts_rows: 0,
2033                restored_vec_rows: 0,
2034                skipped_edges: Vec::new(),
2035                notes: vec!["logical_id already active".to_owned()],
2036            });
2037        }
2038
2039        let restored_node: Option<(String, String)> = tx
2040            .query_row(
2041                "SELECT row_id, kind FROM nodes \
2042                 WHERE logical_id = ?1 AND superseded_at IS NOT NULL \
2043                 ORDER BY superseded_at DESC, created_at DESC, rowid DESC LIMIT 1",
2044                [logical_id],
2045                |row| Ok((row.get(0)?, row.get(1)?)),
2046            )
2047            .optional()?;
2048        let (restored_node_row_id, restored_kind) = restored_node.ok_or_else(|| {
2049            EngineError::InvalidWrite(format!("logical_id '{logical_id}' is not retired"))
2050        })?;
2051
2052        tx.execute(
2053            "UPDATE nodes SET superseded_at = NULL WHERE row_id = ?1",
2054            [restored_node_row_id.as_str()],
2055        )?;
2056
2057        let retire_scope: Option<(i64, Option<String>, i64)> = tx
2058            .query_row(
2059                "SELECT rowid, source_ref, created_at FROM provenance_events \
2060                 WHERE event_type = 'node_retire' AND subject = ?1 \
2061                 ORDER BY created_at DESC, rowid DESC LIMIT 1",
2062                [logical_id],
2063                |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
2064            )
2065            .optional()?;
2066        let (restored_edge_rows, skipped_edges) = if let Some((
2067            retire_event_rowid,
2068            retire_source_ref,
2069            retire_created_at,
2070        )) = retire_scope
2071        {
2072            restore_validated_edges(
2073                &tx,
2074                logical_id,
2075                retire_source_ref.as_deref(),
2076                retire_created_at,
2077                retire_event_rowid,
2078            )?
2079        } else {
2080            (0, Vec::new())
2081        };
2082
2083        let restored_chunk_rows: usize = tx
2084            .query_row(
2085                "SELECT count(*) FROM chunks WHERE node_logical_id = ?1",
2086                [logical_id],
2087                |row| row.get::<_, i64>(0),
2088            )
2089            .map(i64_to_usize)?;
2090        tx.execute(
2091            "DELETE FROM fts_nodes WHERE node_logical_id = ?1",
2092            [logical_id],
2093        )?;
2094        let restored_fts_rows = tx.execute(
2095            "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
2096             SELECT id, node_logical_id, ?2, text_content \
2097             FROM chunks WHERE node_logical_id = ?1",
2098            rusqlite::params![logical_id, restored_kind],
2099        )?;
2100        let restored_vec_rows = count_vec_rows_for_logical_id(&tx, logical_id)?;
2101
2102        // Rebuild property FTS for the restored node.
2103        tx.execute(
2104            "DELETE FROM fts_node_properties WHERE node_logical_id = ?1",
2105            [logical_id],
2106        )?;
2107        let restored_property_fts_rows =
2108            rebuild_single_node_property_fts(&tx, logical_id, &restored_kind)?;
2109
2110        persist_simple_provenance_event(
2111            &tx,
2112            "restore_logical_id",
2113            logical_id,
2114            Some(serde_json::json!({
2115                "restored_node_rows": 1,
2116                "restored_edge_rows": restored_edge_rows,
2117                "restored_chunk_rows": restored_chunk_rows,
2118                "restored_fts_rows": restored_fts_rows,
2119                "restored_property_fts_rows": restored_property_fts_rows,
2120                "restored_vec_rows": restored_vec_rows,
2121            })),
2122        )?;
2123        tx.commit()?;
2124
2125        Ok(LogicalRestoreReport {
2126            logical_id: logical_id.to_owned(),
2127            was_noop: false,
2128            restored_node_rows: 1,
2129            restored_edge_rows,
2130            restored_chunk_rows,
2131            restored_fts_rows,
2132            restored_property_fts_rows,
2133            restored_vec_rows,
2134            skipped_edges,
2135            notes: Vec::new(),
2136        })
2137    }
2138
2139    /// # Errors
2140    /// Returns [`EngineError`] if the database connection fails, the transaction cannot be
2141    /// started, or the purge mutation fails.
2142    pub fn purge_logical_id(&self, logical_id: &str) -> Result<LogicalPurgeReport, EngineError> {
2143        let mut conn = self.connect()?;
2144        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2145
2146        let active_count: i64 = tx.query_row(
2147            "SELECT count(*) FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
2148            [logical_id],
2149            |row| row.get(0),
2150        )?;
2151        if active_count > 0 {
2152            return Ok(LogicalPurgeReport {
2153                logical_id: logical_id.to_owned(),
2154                was_noop: true,
2155                deleted_node_rows: 0,
2156                deleted_edge_rows: 0,
2157                deleted_chunk_rows: 0,
2158                deleted_fts_rows: 0,
2159                deleted_vec_rows: 0,
2160                notes: vec!["logical_id is active; purge skipped".to_owned()],
2161            });
2162        }
2163
2164        let node_rows: i64 = tx.query_row(
2165            "SELECT count(*) FROM nodes WHERE logical_id = ?1",
2166            [logical_id],
2167            |row| row.get(0),
2168        )?;
2169        if node_rows == 0 {
2170            return Err(EngineError::InvalidWrite(format!(
2171                "logical_id '{logical_id}' does not exist"
2172            )));
2173        }
2174
2175        let deleted_vec_rows = delete_vec_rows_for_logical_id(&tx, logical_id)?;
2176        let deleted_fts_rows = tx.execute(
2177            "DELETE FROM fts_nodes WHERE node_logical_id = ?1",
2178            [logical_id],
2179        )?;
2180        let deleted_edge_rows = tx.execute(
2181            "DELETE FROM edges WHERE source_logical_id = ?1 OR target_logical_id = ?1",
2182            [logical_id],
2183        )?;
2184        let deleted_chunk_rows = tx.execute(
2185            "DELETE FROM chunks WHERE node_logical_id = ?1",
2186            [logical_id],
2187        )?;
2188        let deleted_node_rows =
2189            tx.execute("DELETE FROM nodes WHERE logical_id = ?1", [logical_id])?;
2190        tx.execute(
2191            "DELETE FROM node_access_metadata WHERE logical_id = ?1",
2192            [logical_id],
2193        )?;
2194
2195        persist_simple_provenance_event(
2196            &tx,
2197            "purge_logical_id",
2198            logical_id,
2199            Some(serde_json::json!({
2200                "deleted_node_rows": deleted_node_rows,
2201                "deleted_edge_rows": deleted_edge_rows,
2202                "deleted_chunk_rows": deleted_chunk_rows,
2203                "deleted_fts_rows": deleted_fts_rows,
2204                "deleted_vec_rows": deleted_vec_rows,
2205            })),
2206        )?;
2207        tx.commit()?;
2208
2209        Ok(LogicalPurgeReport {
2210            logical_id: logical_id.to_owned(),
2211            was_noop: false,
2212            deleted_node_rows,
2213            deleted_edge_rows,
2214            deleted_chunk_rows,
2215            deleted_fts_rows,
2216            deleted_vec_rows,
2217            notes: Vec::new(),
2218        })
2219    }
2220
2221    /// Purge provenance events older than `before_timestamp`.
2222    ///
2223    /// By default, `excise` and `purge_logical_id` event types are preserved so that
2224    /// data-deletion audit trails survive. Pass an explicit
2225    /// `preserve_event_types` list to override this default.
2226    ///
2227    /// # Errors
2228    /// Returns [`EngineError`] if the database connection fails, the transaction
2229    /// cannot be started, or any SQL statement fails.
2230    pub fn purge_provenance_events(
2231        &self,
2232        before_timestamp: i64,
2233        options: &ProvenancePurgeOptions,
2234    ) -> Result<ProvenancePurgeReport, EngineError> {
2235        let mut conn = self.connect()?;
2236        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2237
2238        let preserved_types: Vec<&str> = if options.preserve_event_types.is_empty() {
2239            vec!["excise", "purge_logical_id"]
2240        } else {
2241            options
2242                .preserve_event_types
2243                .iter()
2244                .map(String::as_str)
2245                .collect()
2246        };
2247
2248        // Build the NOT IN clause dynamically based on preserved types.
2249        let placeholders: String = (0..preserved_types.len())
2250            .map(|i| format!("?{}", i + 2))
2251            .collect::<Vec<_>>()
2252            .join(", ");
2253        let count_query = format!(
2254            "SELECT count(*) FROM provenance_events \
2255             WHERE created_at < ?1 AND event_type NOT IN ({placeholders})"
2256        );
2257        let delete_query = format!(
2258            "DELETE FROM provenance_events WHERE rowid IN (\
2259             SELECT rowid FROM provenance_events \
2260             WHERE created_at < ?1 AND event_type NOT IN ({placeholders}) \
2261             LIMIT 10000)"
2262        );
2263
2264        let bind_params = |stmt: &mut rusqlite::Statement<'_>| -> Result<(), rusqlite::Error> {
2265            stmt.raw_bind_parameter(1, before_timestamp)?;
2266            for (i, event_type) in preserved_types.iter().enumerate() {
2267                stmt.raw_bind_parameter(i + 2, *event_type)?;
2268            }
2269            Ok(())
2270        };
2271
2272        let events_deleted = if options.dry_run {
2273            let mut stmt = tx.prepare(&count_query)?;
2274            bind_params(&mut stmt)?;
2275            stmt.raw_query()
2276                .next()?
2277                .map_or(0, |row| row.get::<_, u64>(0).unwrap_or(0))
2278        } else {
2279            let mut total_deleted: u64 = 0;
2280            loop {
2281                let mut stmt = tx.prepare(&delete_query)?;
2282                bind_params(&mut stmt)?;
2283                let deleted = stmt.raw_execute()?;
2284                if deleted == 0 {
2285                    break;
2286                }
2287                total_deleted += deleted as u64;
2288            }
2289            total_deleted
2290        };
2291
2292        let total_after: u64 =
2293            tx.query_row("SELECT count(*) FROM provenance_events", [], |row| {
2294                row.get(0)
2295            })?;
2296
2297        let oldest_remaining: Option<i64> = tx
2298            .query_row("SELECT MIN(created_at) FROM provenance_events", [], |row| {
2299                row.get(0)
2300            })
2301            .optional()?
2302            .flatten();
2303
2304        if !options.dry_run {
2305            tx.commit()?;
2306        }
2307
2308        // In dry_run mode nothing was deleted, so total_after includes the
2309        // would-be-deleted rows; subtract to get the preserved count.
2310        let events_preserved = if options.dry_run {
2311            total_after - events_deleted
2312        } else {
2313            total_after
2314        };
2315
2316        Ok(ProvenancePurgeReport {
2317            events_deleted,
2318            events_preserved,
2319            oldest_remaining,
2320        })
2321    }
2322
2323    /// # Errors
2324    /// Returns [`EngineError`] if the database connection fails, the transaction cannot be
2325    /// started, or any SQL statement fails.
2326    #[allow(clippy::too_many_lines)]
2327    pub fn excise_source(&self, source_ref: &str) -> Result<TraceReport, EngineError> {
2328        let mut conn = self.connect()?;
2329
2330        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2331        let affected_operational_collections = collect_strings_tx(
2332            &tx,
2333            "SELECT DISTINCT m.collection_name \
2334             FROM operational_mutations m \
2335             JOIN operational_collections c ON c.name = m.collection_name \
2336             WHERE m.source_ref = ?1 AND c.kind = 'latest_state' \
2337             ORDER BY m.collection_name",
2338            source_ref,
2339        )?;
2340
2341        // Collect (row_id, logical_id) for active rows that will be excised.
2342        let pairs: Vec<(String, String)> = {
2343            let mut stmt = tx.prepare(
2344                "SELECT row_id, logical_id FROM nodes \
2345                 WHERE source_ref = ?1 AND superseded_at IS NULL",
2346            )?;
2347            stmt.query_map([source_ref], |row| {
2348                Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
2349            })?
2350            .collect::<Result<Vec<_>, _>>()?
2351        };
2352        let affected_logical_ids: Vec<String> = pairs
2353            .iter()
2354            .map(|(_, logical_id)| logical_id.clone())
2355            .collect();
2356
2357        // Supersede bad rows in all tables.
2358        tx.execute(
2359            "UPDATE nodes SET superseded_at = unixepoch() \
2360             WHERE source_ref = ?1 AND superseded_at IS NULL",
2361            [source_ref],
2362        )?;
2363        tx.execute(
2364            "UPDATE edges SET superseded_at = unixepoch() \
2365             WHERE source_ref = ?1 AND superseded_at IS NULL",
2366            [source_ref],
2367        )?;
2368        tx.execute(
2369            "UPDATE actions SET superseded_at = unixepoch() \
2370             WHERE source_ref = ?1 AND superseded_at IS NULL",
2371            [source_ref],
2372        )?;
2373        clear_operational_current_rows(&tx, &affected_operational_collections)?;
2374        tx.execute(
2375            "DELETE FROM operational_mutations WHERE source_ref = ?1",
2376            [source_ref],
2377        )?;
2378        for logical_id in &affected_logical_ids {
2379            delete_vec_rows_for_logical_id(&tx, logical_id)?;
2380            tx.execute(
2381                "DELETE FROM chunks WHERE node_logical_id = ?1",
2382                [logical_id.as_str()],
2383            )?;
2384        }
2385
2386        // Restore the most recent prior version for each affected logical_id.
2387        for (excised_row_id, logical_id) in &pairs {
2388            let prior: Option<String> = tx
2389                .query_row(
2390                    "SELECT row_id FROM nodes \
2391                     WHERE logical_id = ?1 AND row_id != ?2 \
2392                     ORDER BY created_at DESC LIMIT 1",
2393                    [logical_id.as_str(), excised_row_id.as_str()],
2394                    |row| row.get(0),
2395                )
2396                .optional()?;
2397            if let Some(prior_id) = prior {
2398                tx.execute(
2399                    "UPDATE nodes SET superseded_at = NULL WHERE row_id = ?1",
2400                    [prior_id.as_str()],
2401                )?;
2402            }
2403        }
2404
2405        for logical_id in &affected_logical_ids {
2406            let has_active_node = tx
2407                .query_row(
2408                    "SELECT 1 FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL LIMIT 1",
2409                    [logical_id.as_str()],
2410                    |row| row.get::<_, i64>(0),
2411                )
2412                .optional()?
2413                .is_some();
2414            if !has_active_node {
2415                tx.execute(
2416                    "DELETE FROM node_access_metadata WHERE logical_id = ?1",
2417                    [logical_id.as_str()],
2418                )?;
2419            }
2420        }
2421
2422        rebuild_operational_current_rows(&tx, &affected_operational_collections)?;
2423
2424        // Rebuild FTS atomically within the same transaction so readers never
2425        // observe a post-excise node state with a stale FTS index.
2426        tx.execute("DELETE FROM fts_nodes", [])?;
2427        tx.execute(
2428            r"
2429            INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content)
2430            SELECT c.id, n.logical_id, n.kind, c.text_content
2431            FROM chunks c
2432            JOIN nodes n
2433              ON n.logical_id = c.node_logical_id
2434             AND n.superseded_at IS NULL
2435            ",
2436            [],
2437        )?;
2438
2439        // Rebuild property FTS in the same transaction.
2440        rebuild_property_fts_in_tx(&tx)?;
2441
2442        // Record the audit event inside the same transaction so the excision and its
2443        // audit record are committed atomically — no window where the excision is
2444        // durable but unaudited.
2445        tx.execute(
2446            "INSERT INTO provenance_events (id, event_type, subject, source_ref) \
2447             VALUES (?1, 'excise_source', ?2, ?2)",
2448            rusqlite::params![new_id(), source_ref],
2449        )?;
2450
2451        tx.commit()?;
2452
2453        self.trace_source(source_ref)
2454    }
2455
2456    /// # Errors
2457    /// Returns [`EngineError`] if the WAL checkpoint fails, the `SQLite` backup fails,
2458    /// the SHA-256 digest cannot be computed, or the manifest file cannot be written.
2459    pub fn safe_export(
2460        &self,
2461        destination_path: impl AsRef<Path>,
2462        options: SafeExportOptions,
2463    ) -> Result<SafeExportManifest, EngineError> {
2464        let destination_path = destination_path.as_ref();
2465
2466        // 1. Optionally checkpoint WAL before exporting. This keeps the on-disk file tidy for
2467        // callers that want a fully checkpointed export, but export correctness does not depend
2468        // on it because the backup API copies from the live SQLite connection state.
2469        let conn = self.connect()?;
2470
2471        if options.force_checkpoint {
2472            trace_info!("safe_export: wal checkpoint started");
2473            let (busy, log, checkpointed): (i64, i64, i64) =
2474                conn.query_row("PRAGMA wal_checkpoint(FULL)", [], |row| {
2475                    Ok((row.get(0)?, row.get(1)?, row.get(2)?))
2476                })?;
2477            if busy != 0 {
2478                trace_warn!(
2479                    busy,
2480                    log_frames = log,
2481                    checkpointed_frames = checkpointed,
2482                    "safe_export: wal checkpoint blocked by active readers"
2483                );
2484                return Err(EngineError::Bridge(format!(
2485                    "WAL checkpoint blocked: {busy} active reader(s) prevented a full checkpoint; \
2486                     log frames={log}, checkpointed={checkpointed}; \
2487                     retry export when no readers are active"
2488                )));
2489            }
2490            trace_info!(
2491                log_frames = log,
2492                checkpointed_frames = checkpointed,
2493                "safe_export: wal checkpoint completed"
2494            );
2495        }
2496
2497        let schema_version: u32 = conn
2498            .query_row(
2499                "SELECT COALESCE(MAX(version), 0) FROM fathom_schema_migrations",
2500                [],
2501                |row| row.get(0),
2502            )
2503            .unwrap_or(0);
2504
2505        // 2. Export the database through SQLite's online backup API so committed data in the WAL
2506        // is included even when `force_checkpoint` is false.
2507        if let Some(parent) = destination_path.parent() {
2508            fs::create_dir_all(parent)?;
2509        }
2510        conn.backup(DatabaseName::Main, destination_path, None)?;
2511
2512        drop(conn);
2513
2514        // 2b. Query page_count from the EXPORTED file so the manifest reflects what was
2515        // actually backed up, not the source (which may have changed between the PRAGMA
2516        // and the backup call).
2517        let page_count: u64 = {
2518            let export_conn = rusqlite::Connection::open_with_flags(
2519                destination_path,
2520                rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY
2521                    | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX,
2522            )?;
2523            export_conn.query_row("PRAGMA page_count", [], |row| row.get(0))?
2524        };
2525
2526        // 3. Compute SHA-256 of the exported file.
2527        // FIX(review): was fs::read loading entire DB into memory; use streaming hash.
2528        let sha256 = {
2529            let mut file = fs::File::open(destination_path)?;
2530            let mut hasher = Sha256::new();
2531            io::copy(&mut file, &mut hasher)?;
2532            format!("{:x}", hasher.finalize())
2533        };
2534
2535        // 4. Record when the export was created.
2536        let exported_at = SystemTime::now()
2537            .duration_since(SystemTime::UNIX_EPOCH)
2538            .map_err(|e| EngineError::Bridge(format!("system clock error: {e}")))?
2539            .as_secs();
2540
2541        let manifest = SafeExportManifest {
2542            exported_at,
2543            sha256,
2544            schema_version,
2545            protocol_version: EXPORT_PROTOCOL_VERSION,
2546            page_count,
2547        };
2548
2549        // 5. Write manifest alongside the exported file, using Path API for the name.
2550        let manifest_path = {
2551            let mut p = destination_path.to_path_buf();
2552            let stem = p
2553                .file_name()
2554                .map(|n| format!("{}.export-manifest.json", n.to_string_lossy()))
2555                .ok_or_else(|| {
2556                    EngineError::Bridge("destination path has no filename".to_owned())
2557                })?;
2558            p.set_file_name(stem);
2559            p
2560        };
2561        let manifest_json =
2562            serde_json::to_string(&manifest).map_err(|e| EngineError::Bridge(e.to_string()))?;
2563
2564        // Atomic manifest write: write to a temp file then rename so readers never
2565        // observe a partially-written manifest.
2566        let manifest_tmp = manifest_path.with_extension("json.tmp");
2567        if let Err(e) = fs::write(&manifest_tmp, &manifest_json)
2568            .and_then(|()| fs::rename(&manifest_tmp, &manifest_path))
2569        {
2570            let _ = fs::remove_file(&manifest_tmp);
2571            return Err(e.into());
2572        }
2573
2574        Ok(manifest)
2575    }
2576}
2577
2578#[allow(dead_code)]
2579#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
2580struct VectorEmbeddingContractRecord {
2581    profile: String,
2582    table_name: String,
2583    model_identity: String,
2584    model_version: String,
2585    dimension: usize,
2586    normalization_policy: String,
2587    chunking_policy: String,
2588    preprocessing_policy: String,
2589    generator_command_json: String,
2590    applied_at: i64,
2591    snapshot_hash: String,
2592    contract_format_version: i64,
2593}
2594
2595#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
2596struct VectorRegenerationInputChunk {
2597    chunk_id: String,
2598    node_logical_id: String,
2599    kind: String,
2600    text_content: String,
2601    byte_start: Option<i64>,
2602    byte_end: Option<i64>,
2603    source_ref: Option<String>,
2604    created_at: i64,
2605}
2606
2607#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
2608struct VectorRegenerationInput {
2609    profile: String,
2610    table_name: String,
2611    model_identity: String,
2612    model_version: String,
2613    dimension: usize,
2614    normalization_policy: String,
2615    chunking_policy: String,
2616    preprocessing_policy: String,
2617    chunks: Vec<VectorRegenerationInputChunk>,
2618}
2619
2620#[derive(Clone, Copy, Debug, PartialEq, Eq)]
2621pub(crate) enum VectorRegenerationFailureClass {
2622    InvalidContract,
2623    EmbedderFailure,
2624    InvalidEmbedderOutput,
2625    SnapshotDrift,
2626    UnsupportedVecCapability,
2627}
2628
2629impl VectorRegenerationFailureClass {
2630    fn label(self) -> &'static str {
2631        match self {
2632            Self::InvalidContract => "invalid contract",
2633            Self::EmbedderFailure => "embedder failure",
2634            Self::InvalidEmbedderOutput => "invalid embedder output",
2635            Self::SnapshotDrift => "snapshot drift",
2636            Self::UnsupportedVecCapability => "unsupported vec capability",
2637        }
2638    }
2639
2640    fn retryable(self) -> bool {
2641        matches!(self, Self::SnapshotDrift)
2642    }
2643}
2644
2645#[derive(Clone, Debug, PartialEq, Eq)]
2646pub(crate) struct VectorRegenerationFailure {
2647    class: VectorRegenerationFailureClass,
2648    detail: String,
2649}
2650
2651impl VectorRegenerationFailure {
2652    pub(crate) fn new(class: VectorRegenerationFailureClass, detail: impl Into<String>) -> Self {
2653        Self {
2654            class,
2655            detail: detail.into(),
2656        }
2657    }
2658
2659    fn to_engine_error(&self) -> EngineError {
2660        let retry_suffix = if self.class.retryable() {
2661            " [retryable]"
2662        } else {
2663            ""
2664        };
2665        EngineError::Bridge(format!(
2666            "vector regeneration {}: {}{}",
2667            self.class.label(),
2668            self.detail,
2669            retry_suffix
2670        ))
2671    }
2672
2673    fn failure_class_label(&self) -> &'static str {
2674        self.class.label()
2675    }
2676}
2677
2678#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
2679struct VectorRegenerationAuditMetadata {
2680    profile: String,
2681    model_identity: String,
2682    model_version: String,
2683    chunk_count: usize,
2684    snapshot_hash: String,
2685    #[serde(skip_serializing_if = "Option::is_none")]
2686    failure_class: Option<String>,
2687}
2688
2689#[derive(Clone, Copy, Debug, PartialEq, Eq, Deserialize)]
2690#[serde(tag = "mode", rename_all = "snake_case")]
2691enum OperationalRetentionPolicy {
2692    KeepAll,
2693    PurgeBeforeSeconds { max_age_seconds: i64 },
2694    KeepLast { max_rows: usize },
2695}
2696
2697/// # Errors
2698/// Returns [`EngineError`] if the file cannot be read or the config is invalid.
2699pub fn load_vector_regeneration_config(
2700    path: impl AsRef<Path>,
2701) -> Result<VectorRegenerationConfig, EngineError> {
2702    let path = path.as_ref();
2703    let raw = fs::read_to_string(path)?;
2704    match path.extension().and_then(|ext| ext.to_str()) {
2705        Some("toml") => {
2706            toml::from_str(&raw).map_err(|error| EngineError::Bridge(error.to_string()))
2707        }
2708        Some("json") | None => {
2709            serde_json::from_str(&raw).map_err(|error| EngineError::Bridge(error.to_string()))
2710        }
2711        Some(other) => Err(EngineError::Bridge(format!(
2712            "unsupported vector regeneration config extension: {other}"
2713        ))),
2714    }
2715}
2716
2717fn validate_vector_regeneration_config(
2718    conn: &rusqlite::Connection,
2719    config: &VectorRegenerationConfig,
2720    identity: &QueryEmbedderIdentity,
2721) -> Result<VectorRegenerationConfig, VectorRegenerationFailure> {
2722    let profile = validate_bounded_text("profile", &config.profile, MAX_PROFILE_LEN)?;
2723    let table_name = validate_bounded_text("table_name", &config.table_name, MAX_PROFILE_LEN)?;
2724    if table_name != "vec_nodes_active" {
2725        return Err(VectorRegenerationFailure::new(
2726            VectorRegenerationFailureClass::InvalidContract,
2727            format!("table_name must be vec_nodes_active, got '{table_name}'"),
2728        ));
2729    }
2730    if identity.dimension == 0 {
2731        return Err(VectorRegenerationFailure::new(
2732            VectorRegenerationFailureClass::InvalidContract,
2733            "embedder reports dimension 0".to_owned(),
2734        ));
2735    }
2736    let chunking_policy =
2737        validate_bounded_text("chunking_policy", &config.chunking_policy, MAX_POLICY_LEN)?;
2738    let preprocessing_policy = validate_bounded_text(
2739        "preprocessing_policy",
2740        &config.preprocessing_policy,
2741        MAX_POLICY_LEN,
2742    )?;
2743
2744    if let Some(existing_dimension) = current_vector_profile_dimension(conn, &profile)?
2745        && existing_dimension != identity.dimension
2746    {
2747        return Err(VectorRegenerationFailure::new(
2748            VectorRegenerationFailureClass::InvalidContract,
2749            format!(
2750                "embedder dimension {} does not match existing vector profile dimension {}",
2751                identity.dimension, existing_dimension
2752            ),
2753        ));
2754    }
2755
2756    validate_existing_contract_version(conn, &profile)?;
2757
2758    let normalized = VectorRegenerationConfig {
2759        profile,
2760        table_name,
2761        chunking_policy,
2762        preprocessing_policy,
2763    };
2764    let serialized = serde_json::to_vec(&normalized).map_err(|error| {
2765        VectorRegenerationFailure::new(
2766            VectorRegenerationFailureClass::InvalidContract,
2767            error.to_string(),
2768        )
2769    })?;
2770    if serialized.len() > MAX_CONTRACT_JSON_BYTES {
2771        return Err(VectorRegenerationFailure::new(
2772            VectorRegenerationFailureClass::InvalidContract,
2773            format!("serialized contract exceeds {MAX_CONTRACT_JSON_BYTES} bytes"),
2774        ));
2775    }
2776
2777    Ok(normalized)
2778}
2779
2780#[allow(clippy::cast_possible_wrap)]
2781fn persist_vector_contract(
2782    conn: &rusqlite::Connection,
2783    config: &VectorRegenerationConfig,
2784    identity: &QueryEmbedderIdentity,
2785    snapshot_hash: &str,
2786) -> Result<(), EngineError> {
2787    conn.execute(
2788        r"
2789        INSERT OR REPLACE INTO vector_embedding_contracts (
2790            profile,
2791            table_name,
2792            model_identity,
2793            model_version,
2794            dimension,
2795            normalization_policy,
2796            chunking_policy,
2797            preprocessing_policy,
2798            generator_command_json,
2799            applied_at,
2800            snapshot_hash,
2801            contract_format_version,
2802            updated_at
2803        ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, unixepoch(), ?10, ?11, unixepoch())
2804        ",
2805        rusqlite::params![
2806            config.profile.as_str(),
2807            config.table_name.as_str(),
2808            identity.model_identity.as_str(),
2809            identity.model_version.as_str(),
2810            identity.dimension as i64,
2811            identity.normalization_policy.as_str(),
2812            config.chunking_policy.as_str(),
2813            config.preprocessing_policy.as_str(),
2814            "[]",
2815            snapshot_hash,
2816            CURRENT_VECTOR_CONTRACT_FORMAT_VERSION,
2817        ],
2818    )?;
2819    Ok(())
2820}
2821
2822fn persist_vector_regeneration_event(
2823    conn: &rusqlite::Connection,
2824    event_type: &str,
2825    subject: &str,
2826    metadata: &VectorRegenerationAuditMetadata,
2827) -> Result<(), EngineError> {
2828    let metadata_json = serialize_audit_metadata(metadata)?;
2829    conn.execute(
2830        "INSERT INTO provenance_events (id, event_type, subject, metadata_json) VALUES (?1, ?2, ?3, ?4)",
2831        rusqlite::params![new_id(), event_type, subject, metadata_json],
2832    )?;
2833    Ok(())
2834}
2835
2836fn persist_simple_provenance_event(
2837    conn: &rusqlite::Connection,
2838    event_type: &str,
2839    subject: &str,
2840    metadata: Option<serde_json::Value>,
2841) -> Result<(), EngineError> {
2842    let metadata_json = metadata.map(|value| value.to_string()).unwrap_or_default();
2843    conn.execute(
2844        "INSERT INTO provenance_events (id, event_type, subject, metadata_json) VALUES (?1, ?2, ?3, ?4)",
2845        rusqlite::params![new_id(), event_type, subject, metadata_json],
2846    )?;
2847    Ok(())
2848}
2849
2850/// Count active nodes that should have a property FTS row (extraction yields a value)
2851/// but don't. Uses the same extraction logic as write/rebuild to avoid false positives
2852/// for nodes whose declared paths legitimately normalize to no values.
2853fn count_missing_property_fts_rows(conn: &rusqlite::Connection) -> Result<i64, EngineError> {
2854    let schemas = crate::writer::load_fts_property_schemas(conn)?;
2855    if schemas.is_empty() {
2856        return Ok(0);
2857    }
2858
2859    let mut missing = 0i64;
2860    for (kind, schema) in &schemas {
2861        let mut stmt = conn.prepare(
2862            "SELECT n.logical_id, n.properties FROM nodes n \
2863             WHERE n.kind = ?1 AND n.superseded_at IS NULL \
2864               AND NOT EXISTS (SELECT 1 FROM fts_node_properties fp WHERE fp.node_logical_id = n.logical_id)",
2865        )?;
2866        let rows = stmt.query_map([kind.as_str()], |row| {
2867            Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
2868        })?;
2869        for row in rows {
2870            let (_logical_id, properties_str) = row?;
2871            let props: serde_json::Value =
2872                serde_json::from_str(&properties_str).unwrap_or_default();
2873            if crate::writer::extract_property_fts(&props, schema)
2874                .0
2875                .is_some()
2876            {
2877                missing += 1;
2878            }
2879        }
2880    }
2881    Ok(missing)
2882}
2883
2884/// Count property FTS rows whose `text_content` has drifted from the current canonical
2885/// value computed by `compute_property_fts_text(...)`. This catches:
2886/// - rows whose text no longer matches the current node properties and schema
2887/// - rows that should have been removed (extraction now yields no value)
2888fn count_drifted_property_fts_rows(conn: &rusqlite::Connection) -> Result<i64, EngineError> {
2889    let schemas = crate::writer::load_fts_property_schemas(conn)?;
2890    if schemas.is_empty() {
2891        return Ok(0);
2892    }
2893
2894    let mut drifted = 0i64;
2895    for (kind, schema) in &schemas {
2896        let mut stmt = conn.prepare(
2897            "SELECT fp.node_logical_id, fp.text_content, n.properties \
2898             FROM fts_node_properties fp \
2899             JOIN nodes n ON n.logical_id = fp.node_logical_id AND n.superseded_at IS NULL \
2900             WHERE fp.kind = ?1 AND n.kind = ?1",
2901        )?;
2902        let rows = stmt.query_map([kind.as_str()], |row| {
2903            Ok((
2904                row.get::<_, String>(0)?,
2905                row.get::<_, String>(1)?,
2906                row.get::<_, String>(2)?,
2907            ))
2908        })?;
2909        for row in rows {
2910            let (_logical_id, stored_text, properties_str) = row?;
2911            let props: serde_json::Value =
2912                serde_json::from_str(&properties_str).unwrap_or_default();
2913            let (expected, _positions, _stats) =
2914                crate::writer::extract_property_fts(&props, schema);
2915            match expected {
2916                Some(text) if text == stored_text => {}
2917                _ => drifted += 1,
2918            }
2919        }
2920    }
2921    Ok(drifted)
2922}
2923
2924/// Rebuild property FTS rows from canonical state within an existing transaction.
2925fn rebuild_property_fts_in_tx(conn: &rusqlite::Connection) -> Result<usize, EngineError> {
2926    conn.execute("DELETE FROM fts_node_properties", [])?;
2927    conn.execute("DELETE FROM fts_node_property_positions", [])?;
2928    let inserted = crate::projection::insert_property_fts_rows(
2929        conn,
2930        "SELECT logical_id, properties FROM nodes WHERE kind = ?1 AND superseded_at IS NULL",
2931    )?;
2932    Ok(inserted)
2933}
2934
2935/// Rebuild property FTS for a single node. Returns 1 if a row was inserted, 0 otherwise.
2936/// The caller must delete any existing `fts_node_properties` row for this node first.
2937fn rebuild_single_node_property_fts(
2938    conn: &rusqlite::Connection,
2939    logical_id: &str,
2940    kind: &str,
2941) -> Result<usize, EngineError> {
2942    let schema: Option<(String, String)> = conn
2943        .query_row(
2944            "SELECT property_paths_json, separator FROM fts_property_schemas WHERE kind = ?1",
2945            [kind],
2946            |row| {
2947                let paths_json: String = row.get(0)?;
2948                let separator: String = row.get(1)?;
2949                Ok((paths_json, separator))
2950            },
2951        )
2952        .optional()?;
2953    let Some((paths_json, separator)) = schema else {
2954        return Ok(0);
2955    };
2956    let parsed = crate::writer::parse_property_schema_json(&paths_json, &separator);
2957    let properties_str: Option<String> = conn
2958        .query_row(
2959            "SELECT properties FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
2960            [logical_id],
2961            |row| row.get(0),
2962        )
2963        .optional()?;
2964    let Some(properties_str) = properties_str else {
2965        return Ok(0);
2966    };
2967    let props: serde_json::Value = serde_json::from_str(&properties_str).unwrap_or_default();
2968    let (text, positions, _stats) = crate::writer::extract_property_fts(&props, &parsed);
2969    let Some(text) = text else {
2970        return Ok(0);
2971    };
2972    conn.execute(
2973        "DELETE FROM fts_node_property_positions WHERE node_logical_id = ?1",
2974        rusqlite::params![logical_id],
2975    )?;
2976    conn.execute(
2977        "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) VALUES (?1, ?2, ?3)",
2978        rusqlite::params![logical_id, kind, text],
2979    )?;
2980    for pos in &positions {
2981        conn.execute(
2982            "INSERT INTO fts_node_property_positions \
2983             (node_logical_id, kind, start_offset, end_offset, leaf_path) \
2984             VALUES (?1, ?2, ?3, ?4, ?5)",
2985            rusqlite::params![
2986                logical_id,
2987                kind,
2988                i64::try_from(pos.start_offset).unwrap_or(i64::MAX),
2989                i64::try_from(pos.end_offset).unwrap_or(i64::MAX),
2990                pos.leaf_path,
2991            ],
2992        )?;
2993    }
2994    Ok(1)
2995}
2996
2997fn serialize_property_paths_json(
2998    entries: &[FtsPropertyPathSpec],
2999    exclude_paths: &[String],
3000) -> Result<String, EngineError> {
3001    // Scalar-only schemas with no exclude_paths are serialised in the
3002    // legacy shape (bare array of strings) for full backwards
3003    // compatibility with earlier schema versions.
3004    let all_scalar = entries
3005        .iter()
3006        .all(|e| e.mode == FtsPropertyPathMode::Scalar);
3007    if all_scalar && exclude_paths.is_empty() {
3008        let paths: Vec<&str> = entries.iter().map(|e| e.path.as_str()).collect();
3009        return serde_json::to_string(&paths).map_err(|e| {
3010            EngineError::InvalidWrite(format!("failed to serialize property paths: {e}"))
3011        });
3012    }
3013
3014    let mut obj = serde_json::Map::new();
3015    let paths_json: Vec<serde_json::Value> = entries
3016        .iter()
3017        .map(|e| {
3018            let mode_str = match e.mode {
3019                FtsPropertyPathMode::Scalar => "scalar",
3020                FtsPropertyPathMode::Recursive => "recursive",
3021            };
3022            serde_json::json!({ "path": e.path, "mode": mode_str })
3023        })
3024        .collect();
3025    obj.insert("paths".to_owned(), serde_json::Value::Array(paths_json));
3026    if !exclude_paths.is_empty() {
3027        obj.insert("exclude_paths".to_owned(), serde_json::json!(exclude_paths));
3028    }
3029    serde_json::to_string(&serde_json::Value::Object(obj))
3030        .map_err(|e| EngineError::InvalidWrite(format!("failed to serialize property paths: {e}")))
3031}
3032
3033fn validate_fts_property_paths(paths: &[String]) -> Result<(), EngineError> {
3034    if paths.is_empty() {
3035        return Err(EngineError::InvalidWrite(
3036            "FTS property paths must not be empty".to_owned(),
3037        ));
3038    }
3039    let mut seen = std::collections::HashSet::new();
3040    for path in paths {
3041        if !path.starts_with("$.") {
3042            return Err(EngineError::InvalidWrite(format!(
3043                "FTS property path must start with '$.' but got: {path}"
3044            )));
3045        }
3046        let after_prefix = &path[2..]; // safe: already validated "$." prefix
3047        let segments: Vec<&str> = after_prefix.split('.').collect();
3048        if segments.is_empty() || segments.iter().any(|s| s.is_empty()) {
3049            return Err(EngineError::InvalidWrite(format!(
3050                "FTS property path has empty segment(s): {path}"
3051            )));
3052        }
3053        for seg in &segments {
3054            if !seg.chars().all(|c| c.is_alphanumeric() || c == '_') {
3055                return Err(EngineError::InvalidWrite(format!(
3056                    "FTS property path segment contains invalid characters: {path}"
3057                )));
3058            }
3059        }
3060        if !seen.insert(path) {
3061            return Err(EngineError::InvalidWrite(format!(
3062                "duplicate FTS property path: {path}"
3063            )));
3064        }
3065    }
3066    Ok(())
3067}
3068
3069fn load_fts_property_schema_record(
3070    conn: &rusqlite::Connection,
3071    kind: &str,
3072) -> Result<Option<FtsPropertySchemaRecord>, EngineError> {
3073    let row = conn
3074        .query_row(
3075            "SELECT kind, property_paths_json, separator, format_version \
3076             FROM fts_property_schemas WHERE kind = ?1",
3077            [kind],
3078            |row| {
3079                let kind: String = row.get(0)?;
3080                let paths_json: String = row.get(1)?;
3081                let separator: String = row.get(2)?;
3082                let format_version: i64 = row.get(3)?;
3083                Ok(build_fts_property_schema_record(
3084                    kind,
3085                    &paths_json,
3086                    separator,
3087                    format_version,
3088                ))
3089            },
3090        )
3091        .optional()?;
3092    Ok(row)
3093}
3094
3095/// Build an [`FtsPropertySchemaRecord`] from a raw
3096/// `fts_property_schemas` row. Delegates JSON parsing to
3097/// [`crate::writer::parse_property_schema_json`] — the same parser the
3098/// recursive walker uses at rebuild time — so both the legacy bare-array
3099/// shape and the Phase 4 object-shaped envelope round-trip correctly.
3100fn build_fts_property_schema_record(
3101    kind: String,
3102    paths_json: &str,
3103    separator: String,
3104    format_version: i64,
3105) -> FtsPropertySchemaRecord {
3106    let schema = crate::writer::parse_property_schema_json(paths_json, &separator);
3107    let entries: Vec<FtsPropertyPathSpec> = schema
3108        .paths
3109        .into_iter()
3110        .map(|entry| FtsPropertyPathSpec {
3111            path: entry.path,
3112            mode: match entry.mode {
3113                crate::writer::PropertyPathMode::Scalar => FtsPropertyPathMode::Scalar,
3114                crate::writer::PropertyPathMode::Recursive => FtsPropertyPathMode::Recursive,
3115            },
3116        })
3117        .collect();
3118    let property_paths: Vec<String> = entries.iter().map(|e| e.path.clone()).collect();
3119    FtsPropertySchemaRecord {
3120        kind,
3121        property_paths,
3122        entries,
3123        exclude_paths: schema.exclude_paths,
3124        separator,
3125        format_version,
3126    }
3127}
3128
3129fn build_regeneration_input(
3130    config: &VectorRegenerationConfig,
3131    identity: &QueryEmbedderIdentity,
3132    chunks: Vec<VectorRegenerationInputChunk>,
3133) -> VectorRegenerationInput {
3134    VectorRegenerationInput {
3135        profile: config.profile.clone(),
3136        table_name: config.table_name.clone(),
3137        model_identity: identity.model_identity.clone(),
3138        model_version: identity.model_version.clone(),
3139        dimension: identity.dimension,
3140        normalization_policy: identity.normalization_policy.clone(),
3141        chunking_policy: config.chunking_policy.clone(),
3142        preprocessing_policy: config.preprocessing_policy.clone(),
3143        chunks,
3144    }
3145}
3146
3147fn compute_snapshot_hash(payload: &VectorRegenerationInput) -> Result<String, EngineError> {
3148    let bytes =
3149        serde_json::to_vec(payload).map_err(|error| EngineError::Bridge(error.to_string()))?;
3150    let mut hasher = Sha256::new();
3151    hasher.update(bytes);
3152    Ok(format!("{:x}", hasher.finalize()))
3153}
3154
3155fn collect_regeneration_chunks(
3156    conn: &rusqlite::Connection,
3157) -> Result<Vec<VectorRegenerationInputChunk>, EngineError> {
3158    let mut stmt = conn.prepare(
3159        r"
3160        SELECT c.id, c.node_logical_id, n.kind, c.text_content, c.byte_start, c.byte_end, n.source_ref, c.created_at
3161        FROM chunks c
3162        JOIN nodes n
3163          ON n.logical_id = c.node_logical_id
3164         AND n.superseded_at IS NULL
3165        ORDER BY c.created_at, c.id
3166        ",
3167    )?;
3168    let chunks = stmt
3169        .query_map([], |row| {
3170            Ok(VectorRegenerationInputChunk {
3171                chunk_id: row.get(0)?,
3172                node_logical_id: row.get(1)?,
3173                kind: row.get(2)?,
3174                text_content: row.get(3)?,
3175                byte_start: row.get(4)?,
3176                byte_end: row.get(5)?,
3177                source_ref: row.get(6)?,
3178                created_at: row.get(7)?,
3179            })
3180        })?
3181        .collect::<Result<Vec<_>, _>>()?;
3182    Ok(chunks)
3183}
3184
3185fn validate_bounded_text(
3186    field: &str,
3187    value: &str,
3188    max_len: usize,
3189) -> Result<String, VectorRegenerationFailure> {
3190    let trimmed = value.trim();
3191    if trimmed.is_empty() {
3192        return Err(VectorRegenerationFailure::new(
3193            VectorRegenerationFailureClass::InvalidContract,
3194            format!("{field} must not be empty"),
3195        ));
3196    }
3197    if trimmed.len() > max_len {
3198        return Err(VectorRegenerationFailure::new(
3199            VectorRegenerationFailureClass::InvalidContract,
3200            format!("{field} exceeds max length {max_len}"),
3201        ));
3202    }
3203    Ok(trimmed.to_owned())
3204}
3205
3206fn current_vector_profile_dimension(
3207    conn: &rusqlite::Connection,
3208    profile: &str,
3209) -> Result<Option<usize>, VectorRegenerationFailure> {
3210    let dimension: Option<i64> = conn
3211        .query_row(
3212            "SELECT dimension FROM vector_profiles WHERE profile = ?1 AND enabled = 1",
3213            [profile],
3214            |row| row.get(0),
3215        )
3216        .optional()
3217        .map_err(|error| {
3218            VectorRegenerationFailure::new(
3219                VectorRegenerationFailureClass::InvalidContract,
3220                error.to_string(),
3221            )
3222        })?;
3223    dimension
3224        .map(|value| {
3225            usize::try_from(value).map_err(|_| {
3226                VectorRegenerationFailure::new(
3227                    VectorRegenerationFailureClass::InvalidContract,
3228                    format!("stored vector profile dimension is invalid: {value}"),
3229                )
3230            })
3231        })
3232        .transpose()
3233}
3234
3235fn validate_existing_contract_version(
3236    conn: &rusqlite::Connection,
3237    profile: &str,
3238) -> Result<(), VectorRegenerationFailure> {
3239    let version: Option<i64> = conn
3240        .query_row(
3241            "SELECT contract_format_version FROM vector_embedding_contracts WHERE profile = ?1",
3242            [profile],
3243            |row| row.get(0),
3244        )
3245        .optional()
3246        .map_err(|error| {
3247            VectorRegenerationFailure::new(
3248                VectorRegenerationFailureClass::InvalidContract,
3249                error.to_string(),
3250            )
3251        })?;
3252    if let Some(version) = version
3253        && version > CURRENT_VECTOR_CONTRACT_FORMAT_VERSION
3254    {
3255        return Err(VectorRegenerationFailure::new(
3256            VectorRegenerationFailureClass::InvalidContract,
3257            format!(
3258                "persisted contract format version {version} is unsupported; supported version is {CURRENT_VECTOR_CONTRACT_FORMAT_VERSION}"
3259            ),
3260        ));
3261    }
3262    Ok(())
3263}
3264
3265fn serialize_audit_metadata(
3266    metadata: &VectorRegenerationAuditMetadata,
3267) -> Result<String, EngineError> {
3268    let json =
3269        serde_json::to_string(metadata).map_err(|error| EngineError::Bridge(error.to_string()))?;
3270    if json.len() > MAX_AUDIT_METADATA_BYTES {
3271        return Err(VectorRegenerationFailure::new(
3272            VectorRegenerationFailureClass::InvalidContract,
3273            format!("audit metadata exceeds {MAX_AUDIT_METADATA_BYTES} bytes"),
3274        )
3275        .to_engine_error());
3276    }
3277    Ok(json)
3278}
3279
3280fn count_source_ref(
3281    conn: &rusqlite::Connection,
3282    table: &str,
3283    source_ref: &str,
3284) -> Result<usize, EngineError> {
3285    let sql = match table {
3286        "nodes" => "SELECT count(*) FROM nodes WHERE source_ref = ?1",
3287        "edges" => "SELECT count(*) FROM edges WHERE source_ref = ?1",
3288        "actions" => "SELECT count(*) FROM actions WHERE source_ref = ?1",
3289        "operational_mutations" => {
3290            "SELECT count(*) FROM operational_mutations WHERE source_ref = ?1"
3291        }
3292        other => return Err(EngineError::Bridge(format!("unknown table: {other}"))),
3293    };
3294    let count: i64 = conn.query_row(sql, [source_ref], |row| row.get(0))?;
3295    // FIX(review): was `count as usize` — unsound cast.
3296    // Chose option (C) here: propagate error since this is a user-facing helper.
3297    usize::try_from(count)
3298        .map_err(|_| EngineError::Bridge(format!("count overflow for table {table}: {count}")))
3299}
3300
3301fn rebuild_operational_current_rows(
3302    tx: &rusqlite::Transaction<'_>,
3303    collections: &[String],
3304) -> Result<usize, EngineError> {
3305    let mut rebuilt_rows = 0usize;
3306    clear_operational_current_rows(tx, collections)?;
3307    let mut ins_current = tx.prepare_cached(
3308        "INSERT INTO operational_current \
3309         (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
3310         VALUES (?1, ?2, ?3, ?4, ?5)",
3311    )?;
3312
3313    for collection in collections {
3314        let mut stmt = tx.prepare(
3315            "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
3316             FROM operational_mutations \
3317             WHERE collection_name = ?1 \
3318             ORDER BY record_key, mutation_order",
3319        )?;
3320        let mut latest_by_key: std::collections::HashMap<String, Option<(String, i64, String)>> =
3321            std::collections::HashMap::new();
3322        let rows = stmt.query_map([collection], map_operational_mutation_row)?;
3323        for row in rows {
3324            let mutation = row?;
3325            match mutation.op_kind.as_str() {
3326                "put" => {
3327                    latest_by_key.insert(
3328                        mutation.record_key,
3329                        Some((mutation.payload_json, mutation.created_at, mutation.id)),
3330                    );
3331                }
3332                "delete" => {
3333                    latest_by_key.insert(mutation.record_key, None);
3334                }
3335                _ => {}
3336            }
3337        }
3338
3339        for (record_key, state) in latest_by_key {
3340            if let Some((payload_json, updated_at, last_mutation_id)) = state {
3341                ins_current.execute(rusqlite::params![
3342                    collection,
3343                    record_key,
3344                    payload_json,
3345                    updated_at,
3346                    last_mutation_id,
3347                ])?;
3348                rebuilt_rows += 1;
3349            }
3350        }
3351    }
3352
3353    drop(ins_current);
3354    Ok(rebuilt_rows)
3355}
3356
3357fn clear_operational_current_rows(
3358    tx: &rusqlite::Transaction<'_>,
3359    collections: &[String],
3360) -> Result<(), EngineError> {
3361    let mut delete_current =
3362        tx.prepare_cached("DELETE FROM operational_current WHERE collection_name = ?1")?;
3363    let mut delete_secondary_current = tx.prepare_cached(
3364        "DELETE FROM operational_secondary_index_entries \
3365         WHERE collection_name = ?1 AND subject_kind = 'current'",
3366    )?;
3367    for collection in collections {
3368        delete_secondary_current.execute([collection])?;
3369        delete_current.execute([collection])?;
3370    }
3371    drop(delete_secondary_current);
3372    drop(delete_current);
3373    Ok(())
3374}
3375
3376fn clear_operational_secondary_index_entries(
3377    tx: &rusqlite::Transaction<'_>,
3378    collection_name: &str,
3379) -> Result<(), EngineError> {
3380    tx.execute(
3381        "DELETE FROM operational_secondary_index_entries WHERE collection_name = ?1",
3382        [collection_name],
3383    )?;
3384    Ok(())
3385}
3386
3387fn insert_operational_secondary_index_entry(
3388    tx: &rusqlite::Transaction<'_>,
3389    collection_name: &str,
3390    subject_kind: &str,
3391    mutation_id: &str,
3392    record_key: &str,
3393    entry: &crate::operational::OperationalSecondaryIndexEntry,
3394) -> Result<(), EngineError> {
3395    tx.execute(
3396        "INSERT INTO operational_secondary_index_entries \
3397         (collection_name, index_name, subject_kind, mutation_id, record_key, sort_timestamp, \
3398          slot1_text, slot1_integer, slot2_text, slot2_integer, slot3_text, slot3_integer) \
3399         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
3400        rusqlite::params![
3401            collection_name,
3402            entry.index_name,
3403            subject_kind,
3404            mutation_id,
3405            record_key,
3406            entry.sort_timestamp,
3407            entry.slot1_text,
3408            entry.slot1_integer,
3409            entry.slot2_text,
3410            entry.slot2_integer,
3411            entry.slot3_text,
3412            entry.slot3_integer,
3413        ],
3414    )?;
3415    Ok(())
3416}
3417
3418fn rebuild_operational_secondary_index_entries(
3419    tx: &rusqlite::Transaction<'_>,
3420    collection_name: &str,
3421    collection_kind: OperationalCollectionKind,
3422    indexes: &[OperationalSecondaryIndexDefinition],
3423) -> Result<(usize, usize), EngineError> {
3424    clear_operational_secondary_index_entries(tx, collection_name)?;
3425
3426    let mut mutation_entries_rebuilt = 0usize;
3427    if collection_kind == OperationalCollectionKind::AppendOnlyLog {
3428        let mut stmt = tx.prepare(
3429            "SELECT id, record_key, payload_json FROM operational_mutations \
3430             WHERE collection_name = ?1 ORDER BY mutation_order",
3431        )?;
3432        let rows = stmt
3433            .query_map([collection_name], |row| {
3434                Ok((
3435                    row.get::<_, String>(0)?,
3436                    row.get::<_, String>(1)?,
3437                    row.get::<_, String>(2)?,
3438                ))
3439            })?
3440            .collect::<Result<Vec<_>, _>>()?;
3441        drop(stmt);
3442        for (mutation_id, record_key, payload_json) in rows {
3443            for entry in extract_secondary_index_entries_for_mutation(indexes, &payload_json) {
3444                insert_operational_secondary_index_entry(
3445                    tx,
3446                    collection_name,
3447                    "mutation",
3448                    &mutation_id,
3449                    &record_key,
3450                    &entry,
3451                )?;
3452                mutation_entries_rebuilt += 1;
3453            }
3454        }
3455    }
3456
3457    let mut current_entries_rebuilt = 0usize;
3458    if collection_kind == OperationalCollectionKind::LatestState {
3459        let mut stmt = tx.prepare(
3460            "SELECT record_key, payload_json, updated_at, last_mutation_id FROM operational_current \
3461             WHERE collection_name = ?1 ORDER BY updated_at DESC, record_key",
3462        )?;
3463        let rows = stmt
3464            .query_map([collection_name], |row| {
3465                Ok((
3466                    row.get::<_, String>(0)?,
3467                    row.get::<_, String>(1)?,
3468                    row.get::<_, i64>(2)?,
3469                    row.get::<_, String>(3)?,
3470                ))
3471            })?
3472            .collect::<Result<Vec<_>, _>>()?;
3473        drop(stmt);
3474        for (record_key, payload_json, updated_at, last_mutation_id) in rows {
3475            for entry in
3476                extract_secondary_index_entries_for_current(indexes, &payload_json, updated_at)
3477            {
3478                insert_operational_secondary_index_entry(
3479                    tx,
3480                    collection_name,
3481                    "current",
3482                    &last_mutation_id,
3483                    &record_key,
3484                    &entry,
3485                )?;
3486                current_entries_rebuilt += 1;
3487            }
3488        }
3489    }
3490
3491    Ok((mutation_entries_rebuilt, current_entries_rebuilt))
3492}
3493
3494fn collect_strings_tx(
3495    tx: &rusqlite::Transaction<'_>,
3496    sql: &str,
3497    value: &str,
3498) -> Result<Vec<String>, EngineError> {
3499    let mut stmt = tx.prepare(sql)?;
3500    let rows = stmt.query_map([value], |row| row.get::<_, String>(0))?;
3501    rows.collect::<Result<Vec<_>, _>>()
3502        .map_err(EngineError::from)
3503}
3504
3505/// Convert a non-negative i64 count to usize, panicking on negative values
3506/// which would indicate data corruption.
3507#[allow(clippy::expect_used)]
3508fn i64_to_usize(val: i64) -> usize {
3509    usize::try_from(val).expect("count(*) must be non-negative")
3510}
3511
3512/// Runs a parameterized query and collects the first column as strings.
3513///
3514/// NOTE(review): sql parameter must be a hardcoded query string, never user input.
3515/// Options: (A) doc comment, (B) whitelist refactor like `count_source_ref`, (C) leave as-is.
3516/// Chose (A): function is private, only called with hardcoded SQL from `trace_source`.
3517/// Whitelist refactor not practical — queries have different SELECT/ORDER BY per table.
3518fn collect_strings(
3519    conn: &rusqlite::Connection,
3520    sql: &str,
3521    param: &str,
3522) -> Result<Vec<String>, EngineError> {
3523    let mut stmt = conn.prepare(sql)?;
3524    let values = stmt
3525        .query_map([param], |row| row.get::<_, String>(0))?
3526        .collect::<Result<Vec<_>, _>>()?;
3527    Ok(values)
3528}
3529
3530fn collect_edge_logical_ids_for_restore(
3531    tx: &rusqlite::Transaction<'_>,
3532    logical_id: &str,
3533    retire_source_ref: Option<&str>,
3534    retire_created_at: i64,
3535    retire_event_rowid: i64,
3536) -> Result<Vec<String>, EngineError> {
3537    let mut stmt = tx.prepare(
3538        "SELECT DISTINCT e.logical_id \
3539         FROM edges e \
3540         JOIN provenance_events p \
3541           ON p.subject = e.logical_id \
3542          AND p.event_type = 'edge_retire' \
3543          AND ( \
3544                p.created_at > ?3 \
3545                OR (p.created_at = ?3 AND p.rowid >= ?4) \
3546          ) \
3547          AND ((?2 IS NULL AND p.source_ref IS NULL) OR p.source_ref = ?2) \
3548         WHERE e.superseded_at IS NOT NULL \
3549           AND (e.source_logical_id = ?1 OR e.target_logical_id = ?1) \
3550           AND NOT EXISTS ( \
3551                SELECT 1 FROM edges active \
3552                WHERE active.logical_id = e.logical_id \
3553                  AND active.superseded_at IS NULL \
3554           ) \
3555         ORDER BY e.logical_id",
3556    )?;
3557    let edge_ids = stmt
3558        .query_map(
3559            rusqlite::params![
3560                logical_id,
3561                retire_source_ref,
3562                retire_created_at,
3563                retire_event_rowid
3564            ],
3565            |row| row.get::<_, String>(0),
3566        )?
3567        .collect::<Result<Vec<_>, _>>()?;
3568    Ok(edge_ids)
3569}
3570
3571/// Restores edges for a node being restored, skipping any whose counterpart
3572/// endpoint is not active (e.g. still retired or purged).
3573fn restore_validated_edges(
3574    tx: &rusqlite::Transaction<'_>,
3575    logical_id: &str,
3576    retire_source_ref: Option<&str>,
3577    retire_created_at: i64,
3578    retire_event_rowid: i64,
3579) -> Result<(usize, Vec<SkippedEdge>), EngineError> {
3580    let edge_logical_ids = collect_edge_logical_ids_for_restore(
3581        tx,
3582        logical_id,
3583        retire_source_ref,
3584        retire_created_at,
3585        retire_event_rowid,
3586    )?;
3587    let mut restored = 0usize;
3588    let mut skipped = Vec::new();
3589    for edge_logical_id in &edge_logical_ids {
3590        let edge_detail: Option<(String, String, String)> = tx
3591            .query_row(
3592                "SELECT row_id, source_logical_id, target_logical_id FROM edges \
3593                 WHERE logical_id = ?1 AND superseded_at IS NOT NULL \
3594                 ORDER BY superseded_at DESC, created_at DESC, rowid DESC LIMIT 1",
3595                [edge_logical_id.as_str()],
3596                |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
3597            )
3598            .optional()?;
3599        let Some((edge_row_id, source_lid, target_lid)) = edge_detail else {
3600            continue;
3601        };
3602        let other_endpoint = if source_lid == logical_id {
3603            &target_lid
3604        } else {
3605            &source_lid
3606        };
3607        let endpoint_active: bool = tx
3608            .query_row(
3609                "SELECT 1 FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL LIMIT 1",
3610                [other_endpoint.as_str()],
3611                |_| Ok(true),
3612            )
3613            .optional()?
3614            .unwrap_or(false);
3615        if !endpoint_active {
3616            skipped.push(SkippedEdge {
3617                edge_logical_id: edge_logical_id.clone(),
3618                missing_endpoint: other_endpoint.clone(),
3619            });
3620            continue;
3621        }
3622        restored += tx.execute(
3623            "UPDATE edges SET superseded_at = NULL WHERE row_id = ?1",
3624            [edge_row_id.as_str()],
3625        )?;
3626    }
3627    Ok((restored, skipped))
3628}
3629
3630#[cfg(feature = "sqlite-vec")]
3631fn count_vec_rows_for_logical_id(
3632    tx: &rusqlite::Transaction<'_>,
3633    logical_id: &str,
3634) -> Result<usize, EngineError> {
3635    match tx.query_row(
3636        "SELECT count(*) FROM vec_nodes_active v \
3637         JOIN chunks c ON c.id = v.chunk_id \
3638         WHERE c.node_logical_id = ?1",
3639        [logical_id],
3640        |row| row.get::<_, i64>(0),
3641    ) {
3642        Ok(count) => Ok(i64_to_usize(count)),
3643        Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
3644            if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
3645        {
3646            Ok(0)
3647        }
3648        Err(error) => Err(EngineError::Sqlite(error)),
3649    }
3650}
3651
3652#[cfg(not(feature = "sqlite-vec"))]
3653#[allow(clippy::unnecessary_wraps)]
3654fn count_vec_rows_for_logical_id(
3655    _tx: &rusqlite::Transaction<'_>,
3656    _logical_id: &str,
3657) -> Result<usize, EngineError> {
3658    Ok(0)
3659}
3660
3661#[cfg(feature = "sqlite-vec")]
3662fn delete_vec_rows_for_logical_id(
3663    tx: &rusqlite::Transaction<'_>,
3664    logical_id: &str,
3665) -> Result<usize, EngineError> {
3666    match tx.execute(
3667        "DELETE FROM vec_nodes_active \
3668         WHERE chunk_id IN (SELECT id FROM chunks WHERE node_logical_id = ?1)",
3669        [logical_id],
3670    ) {
3671        Ok(count) => Ok(count),
3672        Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
3673            if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
3674        {
3675            Ok(0)
3676        }
3677        Err(error) => Err(EngineError::Sqlite(error)),
3678    }
3679}
3680
3681#[cfg(not(feature = "sqlite-vec"))]
3682#[allow(clippy::unnecessary_wraps)]
3683fn delete_vec_rows_for_logical_id(
3684    _tx: &rusqlite::Transaction<'_>,
3685    _logical_id: &str,
3686) -> Result<usize, EngineError> {
3687    Ok(0)
3688}
3689
3690fn ensure_operational_collection_registered(
3691    conn: &rusqlite::Connection,
3692    collection_name: &str,
3693) -> Result<(), EngineError> {
3694    if load_operational_collection_record(conn, collection_name)?.is_none() {
3695        return Err(EngineError::InvalidWrite(format!(
3696            "operational collection '{collection_name}' is not registered"
3697        )));
3698    }
3699    Ok(())
3700}
3701
3702fn load_operational_collection_record(
3703    conn: &rusqlite::Connection,
3704    name: &str,
3705) -> Result<Option<OperationalCollectionRecord>, EngineError> {
3706    conn.query_row(
3707        "SELECT name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at, disabled_at \
3708         FROM operational_collections WHERE name = ?1",
3709        [name],
3710        map_operational_collection_row,
3711    )
3712    .optional()
3713    .map_err(EngineError::Sqlite)
3714}
3715
3716fn validate_append_only_operational_collection(
3717    record: &OperationalCollectionRecord,
3718    operation: &str,
3719) -> Result<(), EngineError> {
3720    if record.kind != OperationalCollectionKind::AppendOnlyLog {
3721        return Err(EngineError::InvalidWrite(format!(
3722            "operational collection '{}' must be append_only_log to {operation}",
3723            record.name
3724        )));
3725    }
3726    Ok(())
3727}
3728
3729#[derive(Clone, Debug, PartialEq, Eq)]
3730struct CompiledOperationalReadFilter {
3731    field: String,
3732    condition: OperationalReadCondition,
3733}
3734
3735#[derive(Clone, Debug)]
3736struct MatchedAppendOnlySecondaryIndexRead<'a> {
3737    index_name: &'a str,
3738    value_filter: &'a CompiledOperationalReadFilter,
3739    time_range: Option<&'a CompiledOperationalReadFilter>,
3740}
3741
3742#[derive(Clone, Debug, PartialEq, Eq)]
3743enum OperationalReadCondition {
3744    ExactString(String),
3745    ExactInteger(i64),
3746    Prefix(String),
3747    Range {
3748        lower: Option<i64>,
3749        upper: Option<i64>,
3750    },
3751}
3752
3753fn operational_read_limit(limit: Option<usize>) -> Result<usize, EngineError> {
3754    let applied_limit = limit.unwrap_or(DEFAULT_OPERATIONAL_READ_LIMIT);
3755    if applied_limit == 0 {
3756        return Err(EngineError::InvalidWrite(
3757            "operational read limit must be greater than zero".to_owned(),
3758        ));
3759    }
3760    Ok(applied_limit.min(MAX_OPERATIONAL_READ_LIMIT))
3761}
3762
3763fn parse_operational_filter_fields(
3764    filter_fields_json: &str,
3765) -> Result<Vec<OperationalFilterField>, String> {
3766    let fields: Vec<OperationalFilterField> = serde_json::from_str(filter_fields_json)
3767        .map_err(|error| format!("invalid filter_fields_json: {error}"))?;
3768    let mut seen = std::collections::HashSet::new();
3769    for field in &fields {
3770        if field.name.trim().is_empty() {
3771            return Err("filter_fields_json field names must not be empty".to_owned());
3772        }
3773        if !seen.insert(field.name.as_str()) {
3774            return Err(format!(
3775                "filter_fields_json contains duplicate field '{}'",
3776                field.name
3777            ));
3778        }
3779        if field.modes.is_empty() {
3780            return Err(format!(
3781                "filter_fields_json field '{}' must declare at least one mode",
3782                field.name
3783            ));
3784        }
3785        if field.modes.contains(&OperationalFilterMode::Prefix)
3786            && field.field_type != OperationalFilterFieldType::String
3787        {
3788            return Err(format!(
3789                "filter field '{}' only supports prefix for string types",
3790                field.name
3791            ));
3792        }
3793    }
3794    Ok(fields)
3795}
3796
3797fn compile_operational_read_filters(
3798    filters: &[OperationalFilterClause],
3799    declared_fields: &[OperationalFilterField],
3800) -> Result<Vec<CompiledOperationalReadFilter>, EngineError> {
3801    let field_map = declared_fields
3802        .iter()
3803        .map(|field| (field.name.as_str(), field))
3804        .collect::<std::collections::HashMap<_, _>>();
3805    filters
3806        .iter()
3807        .map(|filter| match filter {
3808            OperationalFilterClause::Exact { field, value } => {
3809                let declared = field_map.get(field.as_str()).ok_or_else(|| {
3810                    EngineError::InvalidWrite(format!(
3811                        "operational read filter uses undeclared field '{field}'"
3812                    ))
3813                })?;
3814                if !declared.modes.contains(&OperationalFilterMode::Exact) {
3815                    return Err(EngineError::InvalidWrite(format!(
3816                        "operational read field '{field}' does not allow exact filters"
3817                    )));
3818                }
3819                let condition = match (declared.field_type, value) {
3820                    (OperationalFilterFieldType::String, OperationalFilterValue::String(value)) => {
3821                        OperationalReadCondition::ExactString(value.clone())
3822                    }
3823                    (
3824                        OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp,
3825                        OperationalFilterValue::Integer(value),
3826                    ) => OperationalReadCondition::ExactInteger(*value),
3827                    _ => {
3828                        return Err(EngineError::InvalidWrite(format!(
3829                            "operational read field '{field}' received a value with the wrong type"
3830                        )));
3831                    }
3832                };
3833                Ok(CompiledOperationalReadFilter {
3834                    field: field.clone(),
3835                    condition,
3836                })
3837            }
3838            OperationalFilterClause::Prefix { field, value } => {
3839                let declared = field_map.get(field.as_str()).ok_or_else(|| {
3840                    EngineError::InvalidWrite(format!(
3841                        "operational read filter uses undeclared field '{field}'"
3842                    ))
3843                })?;
3844                if !declared.modes.contains(&OperationalFilterMode::Prefix) {
3845                    return Err(EngineError::InvalidWrite(format!(
3846                        "operational read field '{field}' does not allow prefix filters"
3847                    )));
3848                }
3849                if declared.field_type != OperationalFilterFieldType::String {
3850                    return Err(EngineError::InvalidWrite(format!(
3851                        "operational read field '{field}' only supports prefix filters for strings"
3852                    )));
3853                }
3854                Ok(CompiledOperationalReadFilter {
3855                    field: field.clone(),
3856                    condition: OperationalReadCondition::Prefix(value.clone()),
3857                })
3858            }
3859            OperationalFilterClause::Range {
3860                field,
3861                lower,
3862                upper,
3863            } => {
3864                let declared = field_map.get(field.as_str()).ok_or_else(|| {
3865                    EngineError::InvalidWrite(format!(
3866                        "operational read filter uses undeclared field '{field}'"
3867                    ))
3868                })?;
3869                if !declared.modes.contains(&OperationalFilterMode::Range) {
3870                    return Err(EngineError::InvalidWrite(format!(
3871                        "operational read field '{field}' does not allow range filters"
3872                    )));
3873                }
3874                if !matches!(
3875                    declared.field_type,
3876                    OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp
3877                ) {
3878                    return Err(EngineError::InvalidWrite(format!(
3879                        "operational read field '{field}' only supports range filters for integer/timestamp fields"
3880                    )));
3881                }
3882                if lower.is_none() && upper.is_none() {
3883                    return Err(EngineError::InvalidWrite(format!(
3884                        "operational read range filter for '{field}' must specify a lower or upper bound"
3885                    )));
3886                }
3887                Ok(CompiledOperationalReadFilter {
3888                    field: field.clone(),
3889                    condition: OperationalReadCondition::Range {
3890                        lower: *lower,
3891                        upper: *upper,
3892                    },
3893                })
3894            }
3895        })
3896        .collect()
3897}
3898
3899fn match_append_only_secondary_index_read<'a>(
3900    filters: &'a [CompiledOperationalReadFilter],
3901    indexes: &'a [OperationalSecondaryIndexDefinition],
3902) -> Option<MatchedAppendOnlySecondaryIndexRead<'a>> {
3903    indexes.iter().find_map(|index| {
3904        let OperationalSecondaryIndexDefinition::AppendOnlyFieldTime {
3905            name,
3906            field,
3907            value_type,
3908            time_field,
3909        } = index
3910        else {
3911            return None;
3912        };
3913        if !(1..=2).contains(&filters.len()) {
3914            return None;
3915        }
3916
3917        let mut value_filter = None;
3918        let mut time_range = None;
3919        for filter in filters {
3920            if filter.field == *field {
3921                let supported = matches!(
3922                    (&filter.condition, value_type),
3923                    (
3924                        OperationalReadCondition::ExactString(_)
3925                            | OperationalReadCondition::Prefix(_),
3926                        crate::operational::OperationalSecondaryIndexValueType::String
3927                    ) | (
3928                        OperationalReadCondition::ExactInteger(_),
3929                        crate::operational::OperationalSecondaryIndexValueType::Integer
3930                            | crate::operational::OperationalSecondaryIndexValueType::Timestamp
3931                    )
3932                );
3933                if !supported || value_filter.is_some() {
3934                    return None;
3935                }
3936                value_filter = Some(filter);
3937                continue;
3938            }
3939            if filter.field == *time_field {
3940                if !matches!(filter.condition, OperationalReadCondition::Range { .. })
3941                    || time_range.is_some()
3942                {
3943                    return None;
3944                }
3945                time_range = Some(filter);
3946                continue;
3947            }
3948            return None;
3949        }
3950
3951        value_filter.map(|value_filter| MatchedAppendOnlySecondaryIndexRead {
3952            index_name: name.as_str(),
3953            value_filter,
3954            time_range,
3955        })
3956    })
3957}
3958
3959fn execute_operational_secondary_index_read(
3960    conn: &rusqlite::Connection,
3961    collection_name: &str,
3962    filters: &[CompiledOperationalReadFilter],
3963    indexes: &[OperationalSecondaryIndexDefinition],
3964    applied_limit: usize,
3965) -> Result<Option<OperationalReadReport>, EngineError> {
3966    use rusqlite::types::Value;
3967
3968    let Some(matched) = match_append_only_secondary_index_read(filters, indexes) else {
3969        return Ok(None);
3970    };
3971
3972    let mut sql = String::from(
3973        "SELECT m.id, m.collection_name, m.record_key, m.op_kind, m.payload_json, m.source_ref, m.created_at \
3974         FROM operational_secondary_index_entries s \
3975         JOIN operational_mutations m ON m.id = s.mutation_id \
3976         WHERE s.collection_name = ?1 AND s.index_name = ?2 AND s.subject_kind = 'mutation' ",
3977    );
3978    let mut params = vec![
3979        Value::from(collection_name.to_owned()),
3980        Value::from(matched.index_name.to_owned()),
3981    ];
3982
3983    match &matched.value_filter.condition {
3984        OperationalReadCondition::ExactString(value) => {
3985            let _ = write!(sql, "AND s.slot1_text = ?{} ", params.len() + 1);
3986            params.push(Value::from(value.clone()));
3987        }
3988        OperationalReadCondition::Prefix(value) => {
3989            let _ = write!(sql, "AND s.slot1_text GLOB ?{} ", params.len() + 1);
3990            params.push(Value::from(glob_prefix_pattern(value)));
3991        }
3992        OperationalReadCondition::ExactInteger(value) => {
3993            let _ = write!(sql, "AND s.slot1_integer = ?{} ", params.len() + 1);
3994            params.push(Value::from(*value));
3995        }
3996        OperationalReadCondition::Range { .. } => return Ok(None),
3997    }
3998
3999    if let Some(time_range) = matched.time_range
4000        && let OperationalReadCondition::Range { lower, upper } = &time_range.condition
4001    {
4002        if let Some(lower) = lower {
4003            let _ = write!(sql, "AND s.sort_timestamp >= ?{} ", params.len() + 1);
4004            params.push(Value::from(*lower));
4005        }
4006        if let Some(upper) = upper {
4007            let _ = write!(sql, "AND s.sort_timestamp <= ?{} ", params.len() + 1);
4008            params.push(Value::from(*upper));
4009        }
4010    }
4011
4012    let _ = write!(
4013        sql,
4014        "ORDER BY s.sort_timestamp DESC, m.mutation_order DESC LIMIT ?{}",
4015        params.len() + 1
4016    );
4017    params.push(Value::from(i64::try_from(applied_limit + 1).map_err(
4018        |_| EngineError::Bridge("operational read limit overflow".to_owned()),
4019    )?));
4020
4021    let mut stmt = conn.prepare(&sql)?;
4022    let mut rows = stmt
4023        .query_map(
4024            rusqlite::params_from_iter(params),
4025            map_operational_mutation_row,
4026        )?
4027        .collect::<Result<Vec<_>, _>>()?;
4028    let was_limited = rows.len() > applied_limit;
4029    if was_limited {
4030        rows.truncate(applied_limit);
4031    }
4032
4033    Ok(Some(OperationalReadReport {
4034        collection_name: collection_name.to_owned(),
4035        row_count: rows.len(),
4036        applied_limit,
4037        was_limited,
4038        rows,
4039    }))
4040}
4041
4042fn execute_operational_filtered_read(
4043    conn: &rusqlite::Connection,
4044    collection_name: &str,
4045    filters: &[CompiledOperationalReadFilter],
4046    applied_limit: usize,
4047) -> Result<OperationalReadReport, EngineError> {
4048    use rusqlite::types::Value;
4049
4050    let mut sql = String::from(
4051        "SELECT m.id, m.collection_name, m.record_key, m.op_kind, m.payload_json, m.source_ref, m.created_at \
4052         FROM operational_mutations m ",
4053    );
4054    let mut params = vec![Value::from(collection_name.to_owned())];
4055    for (index, filter) in filters.iter().enumerate() {
4056        let _ = write!(
4057            sql,
4058            "JOIN operational_filter_values f{index} \
4059             ON f{index}.mutation_id = m.id \
4060            AND f{index}.collection_name = m.collection_name "
4061        );
4062        match &filter.condition {
4063            OperationalReadCondition::ExactString(value) => {
4064                let _ = write!(
4065                    sql,
4066                    "AND f{index}.field_name = ?{} AND f{index}.string_value = ?{} ",
4067                    params.len() + 1,
4068                    params.len() + 2
4069                );
4070                params.push(Value::from(filter.field.clone()));
4071                params.push(Value::from(value.clone()));
4072            }
4073            OperationalReadCondition::ExactInteger(value) => {
4074                let _ = write!(
4075                    sql,
4076                    "AND f{index}.field_name = ?{} AND f{index}.integer_value = ?{} ",
4077                    params.len() + 1,
4078                    params.len() + 2
4079                );
4080                params.push(Value::from(filter.field.clone()));
4081                params.push(Value::from(*value));
4082            }
4083            OperationalReadCondition::Prefix(value) => {
4084                let _ = write!(
4085                    sql,
4086                    "AND f{index}.field_name = ?{} AND f{index}.string_value GLOB ?{} ",
4087                    params.len() + 1,
4088                    params.len() + 2
4089                );
4090                params.push(Value::from(filter.field.clone()));
4091                params.push(Value::from(glob_prefix_pattern(value)));
4092            }
4093            OperationalReadCondition::Range { lower, upper } => {
4094                let _ = write!(sql, "AND f{index}.field_name = ?{} ", params.len() + 1);
4095                params.push(Value::from(filter.field.clone()));
4096                if let Some(lower) = lower {
4097                    let _ = write!(sql, "AND f{index}.integer_value >= ?{} ", params.len() + 1);
4098                    params.push(Value::from(*lower));
4099                }
4100                if let Some(upper) = upper {
4101                    let _ = write!(sql, "AND f{index}.integer_value <= ?{} ", params.len() + 1);
4102                    params.push(Value::from(*upper));
4103                }
4104            }
4105        }
4106    }
4107    let _ = write!(
4108        sql,
4109        "WHERE m.collection_name = ?1 ORDER BY m.mutation_order DESC LIMIT ?{}",
4110        params.len() + 1
4111    );
4112    params.push(Value::from(i64::try_from(applied_limit + 1).map_err(
4113        |_| EngineError::Bridge("operational read limit overflow".to_owned()),
4114    )?));
4115
4116    let mut stmt = conn.prepare(&sql)?;
4117    let mut rows = stmt
4118        .query_map(
4119            rusqlite::params_from_iter(params),
4120            map_operational_mutation_row,
4121        )?
4122        .collect::<Result<Vec<_>, _>>()?;
4123    let was_limited = rows.len() > applied_limit;
4124    if was_limited {
4125        rows.truncate(applied_limit);
4126    }
4127    Ok(OperationalReadReport {
4128        collection_name: collection_name.to_owned(),
4129        row_count: rows.len(),
4130        applied_limit,
4131        was_limited,
4132        rows,
4133    })
4134}
4135
4136fn glob_prefix_pattern(value: &str) -> String {
4137    let mut pattern = String::with_capacity(value.len() + 1);
4138    for ch in value.chars() {
4139        match ch {
4140            '*' => pattern.push_str("[*]"),
4141            '?' => pattern.push_str("[?]"),
4142            '[' => pattern.push_str("[[]"),
4143            _ => pattern.push(ch),
4144        }
4145    }
4146    pattern.push('*');
4147    pattern
4148}
4149
4150#[derive(Clone, Debug, PartialEq, Eq)]
4151struct ExtractedOperationalFilterValue {
4152    field_name: String,
4153    string_value: Option<String>,
4154    integer_value: Option<i64>,
4155}
4156
4157fn extract_operational_filter_values(
4158    filter_fields: &[OperationalFilterField],
4159    payload_json: &str,
4160) -> Vec<ExtractedOperationalFilterValue> {
4161    let Ok(parsed) = serde_json::from_str::<serde_json::Value>(payload_json) else {
4162        return Vec::new();
4163    };
4164    let Some(object) = parsed.as_object() else {
4165        return Vec::new();
4166    };
4167
4168    filter_fields
4169        .iter()
4170        .filter_map(|field| {
4171            let value = object.get(&field.name)?;
4172            match field.field_type {
4173                OperationalFilterFieldType::String => {
4174                    value
4175                        .as_str()
4176                        .map(|string_value| ExtractedOperationalFilterValue {
4177                            field_name: field.name.clone(),
4178                            string_value: Some(string_value.to_owned()),
4179                            integer_value: None,
4180                        })
4181                }
4182                OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp => {
4183                    value
4184                        .as_i64()
4185                        .map(|integer_value| ExtractedOperationalFilterValue {
4186                            field_name: field.name.clone(),
4187                            string_value: None,
4188                            integer_value: Some(integer_value),
4189                        })
4190                }
4191            }
4192        })
4193        .collect()
4194}
4195
4196fn operational_compaction_candidates(
4197    conn: &rusqlite::Connection,
4198    retention_json: &str,
4199    collection_name: &str,
4200) -> Result<(Vec<String>, Option<i64>), EngineError> {
4201    operational_compaction_candidates_at(
4202        conn,
4203        retention_json,
4204        collection_name,
4205        current_unix_timestamp()?,
4206    )
4207}
4208
4209fn operational_compaction_candidates_at(
4210    conn: &rusqlite::Connection,
4211    retention_json: &str,
4212    collection_name: &str,
4213    now_timestamp: i64,
4214) -> Result<(Vec<String>, Option<i64>), EngineError> {
4215    let policy = parse_operational_retention_policy(retention_json)?;
4216    match policy {
4217        OperationalRetentionPolicy::KeepAll => Ok((Vec::new(), None)),
4218        OperationalRetentionPolicy::PurgeBeforeSeconds { max_age_seconds } => {
4219            let before_timestamp = now_timestamp - max_age_seconds;
4220            let mut stmt = conn.prepare(
4221                "SELECT id FROM operational_mutations \
4222                 WHERE collection_name = ?1 AND created_at < ?2 \
4223                 ORDER BY mutation_order",
4224            )?;
4225            let mutation_ids = stmt
4226                .query_map(
4227                    rusqlite::params![collection_name, before_timestamp],
4228                    |row| row.get::<_, String>(0),
4229                )?
4230                .collect::<Result<Vec<_>, _>>()?;
4231            Ok((mutation_ids, Some(before_timestamp)))
4232        }
4233        OperationalRetentionPolicy::KeepLast { max_rows } => {
4234            let mut stmt = conn.prepare(
4235                "SELECT id FROM operational_mutations \
4236                 WHERE collection_name = ?1 \
4237                 ORDER BY mutation_order DESC",
4238            )?;
4239            let ordered_ids = stmt
4240                .query_map([collection_name], |row| row.get::<_, String>(0))?
4241                .collect::<Result<Vec<_>, _>>()?;
4242            Ok((ordered_ids.into_iter().skip(max_rows).collect(), None))
4243        }
4244    }
4245}
4246
4247fn parse_operational_retention_policy(
4248    retention_json: &str,
4249) -> Result<OperationalRetentionPolicy, EngineError> {
4250    let policy: OperationalRetentionPolicy = serde_json::from_str(retention_json)
4251        .map_err(|error| EngineError::InvalidWrite(format!("invalid retention_json: {error}")))?;
4252    match policy {
4253        OperationalRetentionPolicy::KeepAll => Ok(policy),
4254        OperationalRetentionPolicy::PurgeBeforeSeconds { max_age_seconds } => {
4255            if max_age_seconds <= 0 {
4256                return Err(EngineError::InvalidWrite(
4257                    "retention_json max_age_seconds must be greater than zero".to_owned(),
4258                ));
4259            }
4260            Ok(policy)
4261        }
4262        OperationalRetentionPolicy::KeepLast { max_rows } => {
4263            if max_rows == 0 {
4264                return Err(EngineError::InvalidWrite(
4265                    "retention_json max_rows must be greater than zero".to_owned(),
4266                ));
4267            }
4268            Ok(policy)
4269        }
4270    }
4271}
4272
4273fn load_operational_retention_records(
4274    conn: &rusqlite::Connection,
4275    collection_names: Option<&[String]>,
4276    max_collections: Option<usize>,
4277) -> Result<Vec<OperationalCollectionRecord>, EngineError> {
4278    let limit = max_collections.unwrap_or(usize::MAX);
4279    if limit == 0 {
4280        return Err(EngineError::InvalidWrite(
4281            "max_collections must be greater than zero".to_owned(),
4282        ));
4283    }
4284
4285    let mut records = Vec::new();
4286    if let Some(collection_names) = collection_names {
4287        for name in collection_names.iter().take(limit) {
4288            let record = load_operational_collection_record(conn, name)?.ok_or_else(|| {
4289                EngineError::InvalidWrite(format!(
4290                    "operational collection '{name}' is not registered"
4291                ))
4292            })?;
4293            records.push(record);
4294        }
4295        return Ok(records);
4296    }
4297
4298    let mut stmt = conn.prepare(
4299        "SELECT name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at, disabled_at \
4300         FROM operational_collections ORDER BY name",
4301    )?;
4302    let rows = stmt
4303        .query_map([], map_operational_collection_row)?
4304        .take(limit)
4305        .collect::<Result<Vec<_>, _>>()?;
4306    Ok(rows)
4307}
4308
4309fn last_operational_retention_run_at(
4310    conn: &rusqlite::Connection,
4311    collection_name: &str,
4312) -> Result<Option<i64>, EngineError> {
4313    conn.query_row(
4314        "SELECT MAX(executed_at) FROM operational_retention_runs WHERE collection_name = ?1",
4315        [collection_name],
4316        |row| row.get(0),
4317    )
4318    .optional()
4319    .map_err(EngineError::Sqlite)
4320    .map(Option::flatten)
4321}
4322
4323fn count_operational_mutations_for_collection(
4324    conn: &rusqlite::Connection,
4325    collection_name: &str,
4326) -> Result<usize, EngineError> {
4327    let count: i64 = conn.query_row(
4328        "SELECT count(*) FROM operational_mutations WHERE collection_name = ?1",
4329        [collection_name],
4330        |row| row.get(0),
4331    )?;
4332    usize::try_from(count).map_err(|_| {
4333        EngineError::Bridge(format!("count overflow for collection {collection_name}"))
4334    })
4335}
4336
4337fn retention_action_kind_and_limit(
4338    policy: &OperationalRetentionPolicy,
4339) -> (OperationalRetentionActionKind, Option<usize>) {
4340    match policy {
4341        OperationalRetentionPolicy::KeepAll => (OperationalRetentionActionKind::Noop, None),
4342        OperationalRetentionPolicy::PurgeBeforeSeconds { .. } => {
4343            (OperationalRetentionActionKind::PurgeBeforeSeconds, None)
4344        }
4345        OperationalRetentionPolicy::KeepLast { max_rows } => {
4346            (OperationalRetentionActionKind::KeepLast, Some(*max_rows))
4347        }
4348    }
4349}
4350
4351fn plan_operational_retention_item(
4352    conn: &rusqlite::Connection,
4353    record: &OperationalCollectionRecord,
4354    now_timestamp: i64,
4355) -> Result<OperationalRetentionPlanItem, EngineError> {
4356    let last_run_at = last_operational_retention_run_at(conn, &record.name)?;
4357    if record.kind != OperationalCollectionKind::AppendOnlyLog {
4358        return Ok(OperationalRetentionPlanItem {
4359            collection_name: record.name.clone(),
4360            action_kind: OperationalRetentionActionKind::Noop,
4361            candidate_deletions: 0,
4362            before_timestamp: None,
4363            max_rows: None,
4364            last_run_at,
4365        });
4366    }
4367    let policy = parse_operational_retention_policy(&record.retention_json)?;
4368    let (action_kind, max_rows) = retention_action_kind_and_limit(&policy);
4369    let (candidate_ids, before_timestamp) = operational_compaction_candidates_at(
4370        conn,
4371        &record.retention_json,
4372        &record.name,
4373        now_timestamp,
4374    )?;
4375    Ok(OperationalRetentionPlanItem {
4376        collection_name: record.name.clone(),
4377        action_kind,
4378        candidate_deletions: candidate_ids.len(),
4379        before_timestamp,
4380        max_rows,
4381        last_run_at,
4382    })
4383}
4384
4385fn run_operational_retention_item(
4386    tx: &rusqlite::Transaction<'_>,
4387    record: &OperationalCollectionRecord,
4388    now_timestamp: i64,
4389    dry_run: bool,
4390) -> Result<OperationalRetentionRunItem, EngineError> {
4391    let plan = plan_operational_retention_item(tx, record, now_timestamp)?;
4392    let mut deleted_mutations = 0usize;
4393    if record.kind == OperationalCollectionKind::AppendOnlyLog
4394        && plan.action_kind != OperationalRetentionActionKind::Noop
4395        && plan.candidate_deletions > 0
4396        && !dry_run
4397    {
4398        let (candidate_ids, _) = operational_compaction_candidates_at(
4399            tx,
4400            &record.retention_json,
4401            &record.name,
4402            now_timestamp,
4403        )?;
4404        let mut delete_stmt =
4405            tx.prepare_cached("DELETE FROM operational_mutations WHERE id = ?1")?;
4406        for mutation_id in &candidate_ids {
4407            delete_stmt.execute([mutation_id.as_str()])?;
4408            deleted_mutations += 1;
4409        }
4410        drop(delete_stmt);
4411
4412        persist_simple_provenance_event(
4413            tx,
4414            "operational_retention_run",
4415            &record.name,
4416            Some(serde_json::json!({
4417                "action_kind": plan.action_kind,
4418                "deleted_mutations": deleted_mutations,
4419                "before_timestamp": plan.before_timestamp,
4420                "max_rows": plan.max_rows,
4421                "executed_at": now_timestamp,
4422            })),
4423        )?;
4424    }
4425
4426    let live_rows_remaining = count_operational_mutations_for_collection(tx, &record.name)?;
4427    let effective_deleted_mutations = if dry_run {
4428        plan.candidate_deletions
4429    } else {
4430        deleted_mutations
4431    };
4432    let rows_remaining = if dry_run {
4433        live_rows_remaining.saturating_sub(effective_deleted_mutations)
4434    } else {
4435        live_rows_remaining
4436    };
4437    if !dry_run && plan.action_kind != OperationalRetentionActionKind::Noop {
4438        tx.execute(
4439            "INSERT INTO operational_retention_runs \
4440             (id, collection_name, executed_at, action_kind, dry_run, deleted_mutations, rows_remaining, metadata_json) \
4441             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
4442            rusqlite::params![
4443                new_id(),
4444                record.name,
4445                now_timestamp,
4446                serde_json::to_string(&plan.action_kind)
4447                    .unwrap_or_else(|_| "\"noop\"".to_owned())
4448                    .trim_matches('"')
4449                    .to_owned(),
4450                i32::from(dry_run),
4451                deleted_mutations,
4452                rows_remaining,
4453                serde_json::json!({
4454                    "before_timestamp": plan.before_timestamp,
4455                    "max_rows": plan.max_rows,
4456                })
4457                .to_string(),
4458            ],
4459        )?;
4460    }
4461
4462    Ok(OperationalRetentionRunItem {
4463        collection_name: plan.collection_name,
4464        action_kind: plan.action_kind,
4465        deleted_mutations: effective_deleted_mutations,
4466        before_timestamp: plan.before_timestamp,
4467        max_rows: plan.max_rows,
4468        rows_remaining,
4469    })
4470}
4471
4472fn current_unix_timestamp() -> Result<i64, EngineError> {
4473    let now = SystemTime::now()
4474        .duration_since(SystemTime::UNIX_EPOCH)
4475        .map_err(|error| EngineError::Bridge(format!("system clock error: {error}")))?;
4476    i64::try_from(now.as_secs())
4477        .map_err(|_| EngineError::Bridge("unix timestamp overflow".to_owned()))
4478}
4479
4480fn map_operational_collection_row(
4481    row: &rusqlite::Row<'_>,
4482) -> Result<OperationalCollectionRecord, rusqlite::Error> {
4483    let kind_text: String = row.get(1)?;
4484    let kind = OperationalCollectionKind::try_from(kind_text.as_str()).map_err(|message| {
4485        rusqlite::Error::FromSqlConversionFailure(
4486            1,
4487            rusqlite::types::Type::Text,
4488            Box::new(io::Error::new(io::ErrorKind::InvalidData, message)),
4489        )
4490    })?;
4491    Ok(OperationalCollectionRecord {
4492        name: row.get(0)?,
4493        kind,
4494        schema_json: row.get(2)?,
4495        retention_json: row.get(3)?,
4496        filter_fields_json: row.get(4)?,
4497        validation_json: row.get(5)?,
4498        secondary_indexes_json: row.get(6)?,
4499        format_version: row.get(7)?,
4500        created_at: row.get(8)?,
4501        disabled_at: row.get(9)?,
4502    })
4503}
4504
4505fn map_operational_mutation_row(
4506    row: &rusqlite::Row<'_>,
4507) -> Result<OperationalMutationRow, rusqlite::Error> {
4508    Ok(OperationalMutationRow {
4509        id: row.get(0)?,
4510        collection_name: row.get(1)?,
4511        record_key: row.get(2)?,
4512        op_kind: row.get(3)?,
4513        payload_json: row.get(4)?,
4514        source_ref: row.get(5)?,
4515        created_at: row.get(6)?,
4516    })
4517}
4518
4519fn map_operational_current_row(
4520    row: &rusqlite::Row<'_>,
4521) -> Result<OperationalCurrentRow, rusqlite::Error> {
4522    Ok(OperationalCurrentRow {
4523        collection_name: row.get(0)?,
4524        record_key: row.get(1)?,
4525        payload_json: row.get(2)?,
4526        updated_at: row.get(3)?,
4527        last_mutation_id: row.get(4)?,
4528    })
4529}
4530
4531#[cfg(test)]
4532#[allow(clippy::expect_used)]
4533mod tests {
4534    use std::fs;
4535    use std::sync::Arc;
4536
4537    use fathomdb_schema::SchemaManager;
4538    use tempfile::NamedTempFile;
4539
4540    use super::{
4541        AdminService, FtsPropertyPathMode, FtsPropertyPathSpec, SafeExportOptions,
4542        VectorRegenerationConfig,
4543    };
4544    use crate::embedder::{EmbedderError, QueryEmbedder, QueryEmbedderIdentity};
4545    use crate::projection::ProjectionTarget;
4546    use crate::sqlite;
4547    use crate::{
4548        EngineError, ExecutionCoordinator, OperationalCollectionKind, OperationalRegisterRequest,
4549        TelemetryCounters,
4550    };
4551
4552    use fathomdb_query::QueryBuilder;
4553
4554    #[cfg(feature = "sqlite-vec")]
4555    use super::load_vector_regeneration_config;
4556
4557    /// In-process embedder used by the regeneration test suite. The
4558    /// vector is parameterized so individual tests can distinguish which
4559    /// embedder produced which profile row.
4560    #[derive(Debug)]
4561    #[allow(dead_code)]
4562    struct TestEmbedder {
4563        identity: QueryEmbedderIdentity,
4564        vector: Vec<f32>,
4565    }
4566
4567    #[allow(dead_code)]
4568    impl TestEmbedder {
4569        fn new(model: &str, dimension: usize) -> Self {
4570            Self {
4571                identity: QueryEmbedderIdentity {
4572                    model_identity: model.to_owned(),
4573                    model_version: "1.0.0".to_owned(),
4574                    dimension,
4575                    normalization_policy: "l2".to_owned(),
4576                },
4577                vector: vec![1.0; dimension],
4578            }
4579        }
4580    }
4581
4582    impl QueryEmbedder for TestEmbedder {
4583        fn embed_query(&self, _text: &str) -> Result<Vec<f32>, EmbedderError> {
4584            Ok(self.vector.clone())
4585        }
4586        fn identity(&self) -> QueryEmbedderIdentity {
4587            self.identity.clone()
4588        }
4589    }
4590
4591    /// Embedder that always fails — used to exercise the post-request
4592    /// failure audit path without the complexity of subprocess machinery.
4593    #[derive(Debug)]
4594    #[allow(dead_code)]
4595    struct FailingEmbedder {
4596        identity: QueryEmbedderIdentity,
4597    }
4598
4599    impl QueryEmbedder for FailingEmbedder {
4600        fn embed_query(&self, _text: &str) -> Result<Vec<f32>, EmbedderError> {
4601            Err(EmbedderError::Failed("test failure".to_owned()))
4602        }
4603        fn identity(&self) -> QueryEmbedderIdentity {
4604            self.identity.clone()
4605        }
4606    }
4607
4608    #[allow(dead_code)]
4609    #[cfg(unix)]
4610    fn set_file_mode(path: &std::path::Path, mode: u32) {
4611        use std::os::unix::fs::PermissionsExt;
4612
4613        let mut permissions = fs::metadata(path).expect("script metadata").permissions();
4614        permissions.set_mode(mode);
4615        fs::set_permissions(path, permissions).expect("chmod");
4616    }
4617
4618    #[allow(dead_code)]
4619    #[cfg(not(unix))]
4620    fn set_file_mode(_path: &std::path::Path, _mode: u32) {}
4621
4622    fn setup() -> (NamedTempFile, AdminService) {
4623        let db = NamedTempFile::new().expect("temp file");
4624        let schema = Arc::new(SchemaManager::new());
4625        {
4626            let conn = sqlite::open_connection(db.path()).expect("connection");
4627            schema.bootstrap(&conn).expect("bootstrap");
4628        }
4629        let service = AdminService::new(db.path(), Arc::clone(&schema));
4630        (db, service)
4631    }
4632
4633    #[test]
4634    fn check_integrity_includes_active_uniqueness_count() {
4635        let (_db, service) = setup();
4636        let report = service.check_integrity().expect("integrity check");
4637        assert_eq!(report.duplicate_active_logical_ids, 0);
4638        assert_eq!(report.operational_missing_collections, 0);
4639        assert_eq!(report.operational_missing_last_mutations, 0);
4640    }
4641
4642    #[test]
4643    fn trace_source_returns_node_logical_ids() {
4644        let (db, service) = setup();
4645        {
4646            let conn = sqlite::open_connection(db.path()).expect("conn");
4647            conn.execute(
4648                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4649                 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 'source-1')",
4650                [],
4651            )
4652            .expect("insert node");
4653        }
4654        let report = service.trace_source("source-1").expect("trace");
4655        assert_eq!(report.node_rows, 1);
4656        assert_eq!(report.node_logical_ids, vec!["lg1"]);
4657    }
4658
4659    #[test]
4660    fn trace_source_includes_operational_mutations() {
4661        let (db, service) = setup();
4662        {
4663            let conn = sqlite::open_connection(db.path()).expect("conn");
4664            conn.execute(
4665                "INSERT INTO operational_collections \
4666                 (name, kind, schema_json, retention_json, format_version, created_at) \
4667                 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
4668                [],
4669            )
4670            .expect("insert collection");
4671            conn.execute(
4672                "INSERT INTO operational_mutations \
4673                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
4674                 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"ok\"}', 'source-1', 100, 1)",
4675                [],
4676            )
4677            .expect("insert mutation");
4678        }
4679
4680        let report = service.trace_source("source-1").expect("trace");
4681        assert_eq!(report.operational_mutation_rows, 1);
4682        assert_eq!(report.operational_mutation_ids, vec!["m1"]);
4683    }
4684
4685    #[test]
4686    fn excise_source_restores_prior_active_node() {
4687        let (db, service) = setup();
4688        {
4689            let conn = sqlite::open_connection(db.path()).expect("conn");
4690            conn.execute(
4691                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
4692                 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
4693                [],
4694            )
4695            .expect("insert v1 superseded");
4696            conn.execute(
4697                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4698                 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
4699                [],
4700            )
4701            .expect("insert v2 active");
4702        }
4703        service.excise_source("source-2").expect("excise");
4704        {
4705            let conn = sqlite::open_connection(db.path()).expect("conn");
4706            let active_row_id: String = conn
4707                .query_row(
4708                    "SELECT row_id FROM nodes WHERE logical_id = 'lg1' AND superseded_at IS NULL",
4709                    [],
4710                    |row| row.get(0),
4711                )
4712                .expect("active row exists after excise");
4713            assert_eq!(active_row_id, "r1");
4714        }
4715    }
4716
4717    #[test]
4718    fn excise_source_deletes_operational_mutations_and_repairs_latest_state_current() {
4719        let (db, service) = setup();
4720        {
4721            let conn = sqlite::open_connection(db.path()).expect("conn");
4722            conn.execute(
4723                "INSERT INTO operational_collections \
4724                 (name, kind, schema_json, retention_json, format_version, created_at) \
4725                 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
4726                [],
4727            )
4728            .expect("insert collection");
4729            conn.execute(
4730                "INSERT INTO operational_mutations \
4731                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
4732                 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"old\"}', 'source-1', 100, 1)",
4733                [],
4734            )
4735            .expect("insert prior mutation");
4736            conn.execute(
4737                "INSERT INTO operational_mutations \
4738                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
4739                 VALUES ('m2', 'connector_health', 'gmail', 'put', '{\"status\":\"new\"}', 'source-2', 200, 2)",
4740                [],
4741            )
4742            .expect("insert excised mutation");
4743            conn.execute(
4744                "INSERT INTO operational_current \
4745                 (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
4746                 VALUES ('connector_health', 'gmail', '{\"status\":\"new\"}', 200, 'm2')",
4747                [],
4748            )
4749            .expect("insert current row");
4750        }
4751
4752        let traced = service
4753            .trace_source("source-2")
4754            .expect("trace before excise");
4755        assert_eq!(traced.operational_mutation_rows, 1);
4756        assert_eq!(traced.operational_mutation_ids, vec!["m2"]);
4757
4758        let excised = service.excise_source("source-2").expect("excise");
4759        assert_eq!(excised.operational_mutation_rows, 0);
4760        assert!(excised.operational_mutation_ids.is_empty());
4761
4762        {
4763            let conn = sqlite::open_connection(db.path()).expect("conn");
4764            let remaining: i64 = conn
4765                .query_row(
4766                    "SELECT count(*) FROM operational_mutations WHERE source_ref = 'source-2'",
4767                    [],
4768                    |row| row.get(0),
4769                )
4770                .expect("remaining count");
4771            assert_eq!(remaining, 0);
4772
4773            let current: (String, String) = conn
4774                .query_row(
4775                    "SELECT payload_json, last_mutation_id FROM operational_current \
4776                     WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
4777                    [],
4778                    |row| Ok((row.get(0)?, row.get(1)?)),
4779                )
4780                .expect("rebuilt current row");
4781            assert_eq!(current.0, "{\"status\":\"old\"}");
4782            assert_eq!(current.1, "m1");
4783        }
4784    }
4785
4786    #[test]
4787    fn restore_logical_id_reestablishes_last_pre_retire_content_and_attached_edges() {
4788        let (db, service) = setup();
4789        {
4790            let conn = sqlite::open_connection(db.path()).expect("conn");
4791            conn.execute(
4792                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4793                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
4794                [],
4795            )
4796            .expect("insert node");
4797            conn.execute(
4798                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4799                 VALUES ('node-row-topic', 'topic-1', 'Topic', '{}', 100, 'seed')",
4800                [],
4801            )
4802            .expect("insert target node");
4803            conn.execute(
4804                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
4805                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
4806                [],
4807            )
4808            .expect("insert chunk");
4809            conn.execute(
4810                "INSERT INTO edges \
4811                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
4812                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 'seed')",
4813                [],
4814            )
4815            .expect("insert edge");
4816            conn.execute(
4817                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
4818                 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
4819                [],
4820            )
4821            .expect("insert node retire event");
4822            conn.execute(
4823                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
4824                 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
4825                [],
4826            )
4827            .expect("insert edge retire event");
4828            conn.execute(
4829                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
4830                [],
4831            )
4832            .expect("retire node");
4833            conn.execute(
4834                "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
4835                [],
4836            )
4837            .expect("retire edge");
4838            conn.execute("DELETE FROM fts_nodes", [])
4839                .expect("clear fts");
4840        }
4841
4842        let report = service.restore_logical_id("doc-1").expect("restore");
4843        assert_eq!(report.logical_id, "doc-1");
4844        assert!(!report.was_noop);
4845        assert_eq!(report.restored_node_rows, 1);
4846        assert_eq!(report.restored_edge_rows, 1);
4847        assert_eq!(report.restored_chunk_rows, 1);
4848        assert_eq!(report.restored_fts_rows, 1);
4849
4850        let conn = sqlite::open_connection(db.path()).expect("conn");
4851        let active_node_count: i64 = conn
4852            .query_row(
4853                "SELECT count(*) FROM nodes WHERE logical_id = 'doc-1' AND superseded_at IS NULL",
4854                [],
4855                |row| row.get(0),
4856            )
4857            .expect("active node count");
4858        assert_eq!(active_node_count, 1);
4859        let active_edge_count: i64 = conn
4860            .query_row(
4861                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
4862                [],
4863                |row| row.get(0),
4864            )
4865            .expect("active edge count");
4866        assert_eq!(active_edge_count, 1);
4867        let fts_count: i64 = conn
4868            .query_row(
4869                "SELECT count(*) FROM fts_nodes WHERE chunk_id = 'chunk-1'",
4870                [],
4871                |row| row.get(0),
4872            )
4873            .expect("fts count");
4874        assert_eq!(fts_count, 1);
4875    }
4876
4877    #[test]
4878    fn restore_logical_id_restores_edges_retired_after_the_node_retire_event() {
4879        let (db, service) = setup();
4880        {
4881            let conn = sqlite::open_connection(db.path()).expect("conn");
4882            conn.execute(
4883                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4884                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
4885                [],
4886            )
4887            .expect("insert node");
4888            conn.execute(
4889                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4890                 VALUES ('node-row-topic', 'topic-1', 'Topic', '{}', 100, 'seed')",
4891                [],
4892            )
4893            .expect("insert target node");
4894            conn.execute(
4895                "INSERT INTO edges \
4896                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
4897                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 'seed')",
4898                [],
4899            )
4900            .expect("insert edge");
4901            conn.execute(
4902                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
4903                 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
4904                [],
4905            )
4906            .expect("insert node retire event");
4907            conn.execute(
4908                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
4909                 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 201, '')",
4910                [],
4911            )
4912            .expect("insert edge retire event");
4913            conn.execute(
4914                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
4915                [],
4916            )
4917            .expect("retire node");
4918            conn.execute(
4919                "UPDATE edges SET superseded_at = 201 WHERE logical_id = 'edge-1'",
4920                [],
4921            )
4922            .expect("retire edge");
4923        }
4924
4925        let report = service.restore_logical_id("doc-1").expect("restore");
4926        assert_eq!(report.restored_edge_rows, 1);
4927
4928        let conn = sqlite::open_connection(db.path()).expect("conn");
4929        let active_edge_count: i64 = conn
4930            .query_row(
4931                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
4932                [],
4933                |row| row.get(0),
4934            )
4935            .expect("active edge count");
4936        assert_eq!(active_edge_count, 1);
4937    }
4938
4939    #[test]
4940    fn restore_logical_id_prefers_latest_retired_revision_when_timestamps_tie() {
4941        let (db, service) = setup();
4942        {
4943            let conn = sqlite::open_connection(db.path()).expect("conn");
4944            conn.execute(
4945                "INSERT INTO nodes \
4946                 (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
4947                 VALUES ('node-row-older', 'doc-1', 'Document', '{\"title\":\"older\"}', 100, 200, 'forget-1')",
4948                [],
4949            )
4950            .expect("insert older retired node");
4951            conn.execute(
4952                "INSERT INTO nodes \
4953                 (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
4954                 VALUES ('node-row-newer', 'doc-1', 'Document', '{\"title\":\"newer\"}', 100, 200, 'forget-1')",
4955                [],
4956            )
4957            .expect("insert newer retired node");
4958            conn.execute(
4959                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
4960                 VALUES ('evt-retire-older', 'node_retire', 'doc-1', 'forget-1', 200, '')",
4961                [],
4962            )
4963            .expect("insert older retire event");
4964            conn.execute(
4965                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
4966                 VALUES ('evt-retire-newer', 'node_retire', 'doc-1', 'forget-1', 200, '')",
4967                [],
4968            )
4969            .expect("insert newer retire event");
4970        }
4971
4972        let report = service.restore_logical_id("doc-1").expect("restore");
4973
4974        assert!(!report.was_noop);
4975        let conn = sqlite::open_connection(db.path()).expect("conn");
4976        let active_row: (String, String) = conn
4977            .query_row(
4978                "SELECT row_id, properties FROM nodes \
4979                 WHERE logical_id = 'doc-1' AND superseded_at IS NULL",
4980                [],
4981                |row| Ok((row.get(0)?, row.get(1)?)),
4982            )
4983            .expect("restored active row");
4984        assert_eq!(active_row.0, "node-row-newer");
4985        assert_eq!(active_row.1, "{\"title\":\"newer\"}");
4986    }
4987
4988    #[test]
4989    fn purge_logical_id_removes_retired_content_and_records_tombstone() {
4990        let (db, service) = setup();
4991        {
4992            let conn = sqlite::open_connection(db.path()).expect("conn");
4993            conn.execute(
4994                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
4995                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
4996                [],
4997            )
4998            .expect("insert retired node");
4999            conn.execute(
5000                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5001                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5002                [],
5003            )
5004            .expect("insert chunk");
5005            conn.execute(
5006                "INSERT INTO edges \
5007                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, superseded_at, source_ref) \
5008                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 200, 'seed')",
5009                [],
5010            )
5011            .expect("insert retired edge");
5012            conn.execute(
5013                "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
5014                 VALUES ('chunk-1', 'doc-1', 'Document', 'budget narrative')",
5015                [],
5016            )
5017            .expect("insert fts");
5018        }
5019
5020        let report = service.purge_logical_id("doc-1").expect("purge");
5021        assert_eq!(report.logical_id, "doc-1");
5022        assert!(!report.was_noop);
5023        assert_eq!(report.deleted_node_rows, 1);
5024        assert_eq!(report.deleted_edge_rows, 1);
5025        assert_eq!(report.deleted_chunk_rows, 1);
5026        assert_eq!(report.deleted_fts_rows, 1);
5027
5028        let conn = sqlite::open_connection(db.path()).expect("conn");
5029        let remaining_nodes: i64 = conn
5030            .query_row(
5031                "SELECT count(*) FROM nodes WHERE logical_id = 'doc-1'",
5032                [],
5033                |row| row.get(0),
5034            )
5035            .expect("remaining nodes");
5036        assert_eq!(remaining_nodes, 0);
5037        let remaining_edges: i64 = conn
5038            .query_row(
5039                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1'",
5040                [],
5041                |row| row.get(0),
5042            )
5043            .expect("remaining edges");
5044        assert_eq!(remaining_edges, 0);
5045        let remaining_chunks: i64 = conn
5046            .query_row(
5047                "SELECT count(*) FROM chunks WHERE id = 'chunk-1'",
5048                [],
5049                |row| row.get(0),
5050            )
5051            .expect("remaining chunks");
5052        assert_eq!(remaining_chunks, 0);
5053        let purge_events: i64 = conn
5054            .query_row(
5055                "SELECT count(*) FROM provenance_events WHERE event_type = 'purge_logical_id' AND subject = 'doc-1'",
5056                [],
5057                |row| row.get(0),
5058            )
5059            .expect("purge events");
5060        assert_eq!(purge_events, 1);
5061    }
5062
5063    #[test]
5064    fn check_semantics_accepts_preserved_retired_chunks() {
5065        let (db, service) = setup();
5066        {
5067            let conn = sqlite::open_connection(db.path()).expect("conn");
5068            conn.execute(
5069                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5070                 VALUES ('node-row-1', 'doc-1', 'Document', '{}', 100, 200, 'seed')",
5071                [],
5072            )
5073            .expect("insert retired node");
5074            conn.execute(
5075                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5076                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5077                [],
5078            )
5079            .expect("insert chunk");
5080        }
5081
5082        let report = service.check_semantics().expect("semantics");
5083        assert_eq!(report.orphaned_chunks, 0);
5084    }
5085
5086    #[test]
5087    fn check_semantics_detects_missing_retired_node_history_for_preserved_chunks() {
5088        let (db, service) = setup();
5089        {
5090            let conn = sqlite::open_connection(db.path()).expect("conn");
5091            conn.execute(
5092                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5093                 VALUES ('chunk-1', 'ghost-doc', 'budget narrative', 100)",
5094                [],
5095            )
5096            .expect("insert orphaned chunk");
5097        }
5098
5099        let report = service.check_semantics().expect("semantics");
5100        assert_eq!(report.orphaned_chunks, 1);
5101    }
5102
5103    #[cfg(feature = "sqlite-vec")]
5104    #[test]
5105    fn check_semantics_detects_missing_retired_node_history_for_preserved_vec_rows() {
5106        let (db, service) = setup();
5107        {
5108            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5109            service
5110                .schema_manager
5111                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5112                .expect("ensure vec profile");
5113            conn.execute(
5114                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5115                 VALUES ('chunk-1', 'ghost-doc', 'budget narrative', 100)",
5116                [],
5117            )
5118            .expect("insert orphaned chunk");
5119            conn.execute(
5120                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
5121                [],
5122            )
5123            .expect("insert vec row");
5124        }
5125
5126        let report = service.check_semantics().expect("semantics");
5127        assert_eq!(report.orphaned_chunks, 1);
5128        assert_eq!(report.vec_rows_for_superseded_nodes, 1);
5129    }
5130
5131    #[cfg(feature = "sqlite-vec")]
5132    #[test]
5133    fn restore_logical_id_reestablishes_vector_search_without_reingest() {
5134        let (db, service) = setup();
5135        {
5136            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5137            service
5138                .schema_manager
5139                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5140                .expect("ensure vec profile");
5141            conn.execute(
5142                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5143                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
5144                [],
5145            )
5146            .expect("insert retired node");
5147            conn.execute(
5148                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5149                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5150                [],
5151            )
5152            .expect("insert chunk");
5153            conn.execute(
5154                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
5155                [],
5156            )
5157            .expect("insert vec row");
5158            conn.execute(
5159                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5160                 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5161                [],
5162            )
5163            .expect("insert retire event");
5164        }
5165
5166        let report = service.restore_logical_id("doc-1").expect("restore");
5167        assert_eq!(report.restored_vec_rows, 1);
5168
5169        let coordinator = ExecutionCoordinator::open(
5170            db.path(),
5171            Arc::new(SchemaManager::new()),
5172            Some(4),
5173            1,
5174            Arc::new(TelemetryCounters::default()),
5175            None,
5176        )
5177        .expect("coordinator");
5178        let compiled = QueryBuilder::nodes("Document")
5179            .vector_search("[0.0, 0.0, 0.0, 0.0]", 5)
5180            .compile()
5181            .expect("compile");
5182        let rows = coordinator
5183            .execute_compiled_read(&compiled)
5184            .expect("vector read");
5185        assert!(
5186            rows.nodes.iter().any(|row| row.logical_id == "doc-1"),
5187            "restore should make the preserved vec row visible again without re-ingest"
5188        );
5189    }
5190
5191    #[cfg(feature = "sqlite-vec")]
5192    #[test]
5193    fn purge_logical_id_deletes_vec_rows_for_retired_content() {
5194        let (db, service) = setup();
5195        {
5196            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5197            service
5198                .schema_manager
5199                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5200                .expect("ensure vec profile");
5201            conn.execute(
5202                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5203                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
5204                [],
5205            )
5206            .expect("insert retired node");
5207            conn.execute(
5208                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5209                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5210                [],
5211            )
5212            .expect("insert chunk");
5213            conn.execute(
5214                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
5215                [],
5216            )
5217            .expect("insert vec row");
5218        }
5219
5220        let report = service.purge_logical_id("doc-1").expect("purge");
5221        assert_eq!(report.deleted_vec_rows, 1);
5222
5223        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5224        let vec_count: i64 = conn
5225            .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
5226                row.get(0)
5227            })
5228            .expect("vec count");
5229        assert_eq!(vec_count, 0);
5230    }
5231
5232    #[cfg(feature = "sqlite-vec")]
5233    #[test]
5234    fn restore_logical_id_restores_visibility_of_regenerated_vectors() {
5235        let (db, service) = setup();
5236
5237        {
5238            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5239            service
5240                .schema_manager
5241                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5242                .expect("ensure vec profile");
5243            conn.execute(
5244                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5245                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
5246                [],
5247            )
5248            .expect("insert node");
5249            conn.execute(
5250                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5251                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5252                [],
5253            )
5254            .expect("insert chunk");
5255        }
5256
5257        let embedder = TestEmbedder::new("test-model", 4);
5258        service
5259            .regenerate_vector_embeddings(
5260                &embedder,
5261                &VectorRegenerationConfig {
5262                    profile: "default".to_owned(),
5263                    table_name: "vec_nodes_active".to_owned(),
5264                    chunking_policy: "per_chunk".to_owned(),
5265                    preprocessing_policy: "trim".to_owned(),
5266                },
5267            )
5268            .expect("regenerate");
5269
5270        {
5271            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5272            conn.execute(
5273                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5274                 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5275                [],
5276            )
5277            .expect("insert retire event");
5278            conn.execute(
5279                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
5280                [],
5281            )
5282            .expect("retire node");
5283        }
5284
5285        let report = service.restore_logical_id("doc-1").expect("restore");
5286        assert_eq!(report.restored_vec_rows, 1);
5287
5288        let coordinator = ExecutionCoordinator::open(
5289            db.path(),
5290            Arc::new(SchemaManager::new()),
5291            Some(4),
5292            1,
5293            Arc::new(TelemetryCounters::default()),
5294            None,
5295        )
5296        .expect("coordinator");
5297        let compiled = QueryBuilder::nodes("Document")
5298            .vector_search("[0.0, 0.0, 0.0, 0.0]", 5)
5299            .compile()
5300            .expect("compile");
5301        let rows = coordinator
5302            .execute_compiled_read(&compiled)
5303            .expect("vector read");
5304        assert!(
5305            rows.nodes.iter().any(|row| row.logical_id == "doc-1"),
5306            "restored logical_id should become visible through regenerated vectors"
5307        );
5308    }
5309
5310    #[test]
5311    fn check_semantics_clean_db_returns_zeros() {
5312        let (_db, service) = setup();
5313        let report = service.check_semantics().expect("semantics check");
5314        assert_eq!(report.orphaned_chunks, 0);
5315        assert_eq!(report.null_source_ref_nodes, 0);
5316        assert_eq!(report.broken_step_fk, 0);
5317        assert_eq!(report.broken_action_fk, 0);
5318        assert_eq!(report.stale_fts_rows, 0);
5319        assert_eq!(report.fts_rows_for_superseded_nodes, 0);
5320        assert_eq!(report.dangling_edges, 0);
5321        assert_eq!(report.orphaned_supersession_chains, 0);
5322        assert_eq!(report.stale_vec_rows, 0);
5323        assert_eq!(report.vec_rows_for_superseded_nodes, 0);
5324        assert_eq!(report.missing_operational_current_rows, 0);
5325        assert_eq!(report.stale_operational_current_rows, 0);
5326        assert_eq!(report.disabled_collection_mutations, 0);
5327        assert_eq!(report.mismatched_kind_property_fts_rows, 0);
5328        assert_eq!(report.duplicate_property_fts_rows, 0);
5329        assert_eq!(report.drifted_property_fts_rows, 0);
5330        assert!(report.warnings.is_empty());
5331    }
5332
5333    #[test]
5334    fn register_operational_collection_persists_and_emits_provenance() {
5335        let (db, service) = setup();
5336        let record = service
5337            .register_operational_collection(&OperationalRegisterRequest {
5338                name: "connector_health".to_owned(),
5339                kind: OperationalCollectionKind::LatestState,
5340                schema_json: "{}".to_owned(),
5341                retention_json: "{}".to_owned(),
5342                filter_fields_json: "[]".to_owned(),
5343                validation_json: String::new(),
5344                secondary_indexes_json: "[]".to_owned(),
5345                format_version: 1,
5346            })
5347            .expect("register collection");
5348
5349        assert_eq!(record.name, "connector_health");
5350        assert_eq!(record.kind, OperationalCollectionKind::LatestState);
5351        assert_eq!(record.schema_json, "{}");
5352        assert_eq!(record.retention_json, "{}");
5353        assert_eq!(record.filter_fields_json, "[]");
5354        assert!(record.created_at > 0);
5355        assert_eq!(record.disabled_at, None);
5356
5357        let described = service
5358            .describe_operational_collection("connector_health")
5359            .expect("describe collection")
5360            .expect("collection exists");
5361        assert_eq!(described, record);
5362
5363        let conn = sqlite::open_connection(db.path()).expect("conn");
5364        let provenance_count: i64 = conn
5365            .query_row(
5366                "SELECT count(*) FROM provenance_events \
5367                 WHERE event_type = 'operational_collection_registered' AND subject = 'connector_health'",
5368                [],
5369                |row| row.get(0),
5370            )
5371            .expect("provenance count");
5372        assert_eq!(provenance_count, 1);
5373    }
5374
5375    #[test]
5376    fn register_and_update_operational_collection_validation_round_trip() {
5377        let (db, service) = setup();
5378        let record = service
5379            .register_operational_collection(&OperationalRegisterRequest {
5380                name: "connector_health".to_owned(),
5381                kind: OperationalCollectionKind::LatestState,
5382                schema_json: "{}".to_owned(),
5383                retention_json: "{}".to_owned(),
5384                filter_fields_json: "[]".to_owned(),
5385                validation_json: String::new(),
5386                secondary_indexes_json: "[]".to_owned(),
5387                format_version: 1,
5388            })
5389            .expect("register collection");
5390        assert_eq!(record.validation_json, "");
5391
5392        let validation_json = r#"{"format_version":1,"mode":"enforce","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#;
5393        let updated = service
5394            .update_operational_collection_validation("connector_health", validation_json)
5395            .expect("update validation");
5396        assert_eq!(updated.validation_json, validation_json);
5397
5398        let described = service
5399            .describe_operational_collection("connector_health")
5400            .expect("describe collection")
5401            .expect("collection exists");
5402        assert_eq!(described.validation_json, validation_json);
5403
5404        let conn = sqlite::open_connection(db.path()).expect("conn");
5405        let provenance_count: i64 = conn
5406            .query_row(
5407                "SELECT count(*) FROM provenance_events \
5408                 WHERE event_type = 'operational_collection_validation_updated' \
5409                   AND subject = 'connector_health'",
5410                [],
5411                |row| row.get(0),
5412            )
5413            .expect("provenance count");
5414        assert_eq!(provenance_count, 1);
5415    }
5416
5417    #[test]
5418    fn register_update_and_rebuild_operational_secondary_indexes_round_trip() {
5419        let (db, service) = setup();
5420        let record = service
5421            .register_operational_collection(&OperationalRegisterRequest {
5422                name: "audit_log".to_owned(),
5423                kind: OperationalCollectionKind::AppendOnlyLog,
5424                schema_json: "{}".to_owned(),
5425                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
5426                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
5427                validation_json: String::new(),
5428                secondary_indexes_json: "[]".to_owned(),
5429                format_version: 1,
5430            })
5431            .expect("register collection");
5432        assert_eq!(record.secondary_indexes_json, "[]");
5433
5434        {
5435            let writer = crate::WriterActor::start(
5436                db.path(),
5437                Arc::new(SchemaManager::new()),
5438                crate::ProvenanceMode::Warn,
5439                Arc::new(crate::TelemetryCounters::default()),
5440            )
5441            .expect("writer");
5442            writer
5443                .submit(crate::WriteRequest {
5444                    label: "secondary-index-seed".to_owned(),
5445                    nodes: vec![],
5446                    node_retires: vec![],
5447                    edges: vec![],
5448                    edge_retires: vec![],
5449                    chunks: vec![],
5450                    runs: vec![],
5451                    steps: vec![],
5452                    actions: vec![],
5453                    optional_backfills: vec![],
5454                    vec_inserts: vec![],
5455                    operational_writes: vec![
5456                        crate::OperationalWrite::Append {
5457                            collection: "audit_log".to_owned(),
5458                            record_key: "evt-1".to_owned(),
5459                            payload_json: r#"{"actor":"alice","ts":100}"#.to_owned(),
5460                            source_ref: Some("src-1".to_owned()),
5461                        },
5462                        crate::OperationalWrite::Append {
5463                            collection: "audit_log".to_owned(),
5464                            record_key: "evt-2".to_owned(),
5465                            payload_json: r#"{"actor":"bob","ts":200}"#.to_owned(),
5466                            source_ref: Some("src-2".to_owned()),
5467                        },
5468                    ],
5469                })
5470                .expect("seed writes");
5471        }
5472
5473        let secondary_indexes_json = r#"[{"name":"actor_ts","kind":"append_only_field_time","field":"actor","value_type":"string","time_field":"ts"}]"#;
5474        let updated = service
5475            .update_operational_collection_secondary_indexes("audit_log", secondary_indexes_json)
5476            .expect("update secondary indexes");
5477        assert_eq!(updated.secondary_indexes_json, secondary_indexes_json);
5478
5479        let conn = sqlite::open_connection(db.path()).expect("conn");
5480        let entry_count: i64 = conn
5481            .query_row(
5482                "SELECT count(*) FROM operational_secondary_index_entries \
5483                 WHERE collection_name = 'audit_log' AND index_name = 'actor_ts'",
5484                [],
5485                |row| row.get(0),
5486            )
5487            .expect("secondary index count");
5488        assert_eq!(entry_count, 2);
5489        conn.execute(
5490            "DELETE FROM operational_secondary_index_entries WHERE collection_name = 'audit_log'",
5491            [],
5492        )
5493        .expect("clear index entries");
5494        drop(conn);
5495
5496        let rebuild = service
5497            .rebuild_operational_secondary_indexes("audit_log")
5498            .expect("rebuild secondary indexes");
5499        assert_eq!(rebuild.collection_name, "audit_log");
5500        assert_eq!(rebuild.mutation_entries_rebuilt, 2);
5501        assert_eq!(rebuild.current_entries_rebuilt, 0);
5502    }
5503
5504    #[test]
5505    fn register_operational_collection_rejects_invalid_validation_contract() {
5506        let (_db, service) = setup();
5507
5508        let error = service
5509            .register_operational_collection(&OperationalRegisterRequest {
5510                name: "connector_health".to_owned(),
5511                kind: OperationalCollectionKind::LatestState,
5512                schema_json: "{}".to_owned(),
5513                retention_json: "{}".to_owned(),
5514                filter_fields_json: "[]".to_owned(),
5515                validation_json: r#"{"format_version":1,"mode":"enforce","fields":[{"name":"status","type":"string","minimum":0}]}"#
5516                    .to_owned(),
5517                secondary_indexes_json: "[]".to_owned(),
5518                format_version: 1,
5519            })
5520            .expect_err("invalid validation contract should reject");
5521
5522        assert!(matches!(error, EngineError::InvalidWrite(_)));
5523        assert!(error.to_string().contains("minimum/maximum"));
5524    }
5525
5526    #[test]
5527    fn validate_operational_collection_history_reports_invalid_rows_without_mutation() {
5528        let (db, service) = setup();
5529        service
5530            .register_operational_collection(&OperationalRegisterRequest {
5531                name: "audit_log".to_owned(),
5532                kind: OperationalCollectionKind::AppendOnlyLog,
5533                schema_json: "{}".to_owned(),
5534                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
5535                filter_fields_json: "[]".to_owned(),
5536                validation_json: r#"{"format_version":1,"mode":"disabled","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#
5537                    .to_owned(),
5538                secondary_indexes_json: "[]".to_owned(),
5539                format_version: 1,
5540            })
5541            .expect("register collection");
5542        {
5543            let writer = crate::WriterActor::start(
5544                db.path(),
5545                Arc::new(SchemaManager::new()),
5546                crate::ProvenanceMode::Warn,
5547                Arc::new(crate::TelemetryCounters::default()),
5548            )
5549            .expect("writer");
5550            writer
5551                .submit(crate::WriteRequest {
5552                    label: "history-validation".to_owned(),
5553                    nodes: vec![],
5554                    node_retires: vec![],
5555                    edges: vec![],
5556                    edge_retires: vec![],
5557                    chunks: vec![],
5558                    runs: vec![],
5559                    steps: vec![],
5560                    actions: vec![],
5561                    optional_backfills: vec![],
5562                    vec_inserts: vec![],
5563                    operational_writes: vec![
5564                        crate::OperationalWrite::Append {
5565                            collection: "audit_log".to_owned(),
5566                            record_key: "evt-1".to_owned(),
5567                            payload_json: r#"{"status":"ok"}"#.to_owned(),
5568                            source_ref: Some("src-1".to_owned()),
5569                        },
5570                        crate::OperationalWrite::Append {
5571                            collection: "audit_log".to_owned(),
5572                            record_key: "evt-2".to_owned(),
5573                            payload_json: r#"{"status":"bogus"}"#.to_owned(),
5574                            source_ref: Some("src-2".to_owned()),
5575                        },
5576                    ],
5577                })
5578                .expect("write");
5579        }
5580
5581        let report = service
5582            .validate_operational_collection_history("audit_log")
5583            .expect("validate history");
5584        assert_eq!(report.collection_name, "audit_log");
5585        assert_eq!(report.checked_rows, 2);
5586        assert_eq!(report.invalid_row_count, 1);
5587        assert_eq!(report.issues.len(), 1);
5588        assert_eq!(report.issues[0].record_key, "evt-2");
5589        assert!(report.issues[0].message.contains("must be one of"));
5590
5591        let trace = service
5592            .trace_operational_collection("audit_log", None)
5593            .expect("trace");
5594        assert_eq!(trace.mutation_count, 2);
5595
5596        let conn = sqlite::open_connection(db.path()).expect("conn");
5597        let provenance_count: i64 = conn
5598            .query_row(
5599                "SELECT count(*) FROM provenance_events \
5600                 WHERE event_type = 'operational_collection_history_validated' \
5601                   AND subject = 'audit_log'",
5602                [],
5603                |row| row.get(0),
5604            )
5605            .expect("provenance count");
5606        assert_eq!(provenance_count, 0);
5607    }
5608
5609    #[test]
5610    fn trace_operational_collection_returns_mutations_and_current_rows() {
5611        let (db, service) = setup();
5612        service
5613            .register_operational_collection(&OperationalRegisterRequest {
5614                name: "connector_health".to_owned(),
5615                kind: OperationalCollectionKind::LatestState,
5616                schema_json: "{}".to_owned(),
5617                retention_json: "{}".to_owned(),
5618                filter_fields_json: "[]".to_owned(),
5619                validation_json: String::new(),
5620                secondary_indexes_json: "[]".to_owned(),
5621                format_version: 1,
5622            })
5623            .expect("register collection");
5624        {
5625            let writer = crate::WriterActor::start(
5626                db.path(),
5627                Arc::new(SchemaManager::new()),
5628                crate::ProvenanceMode::Warn,
5629                Arc::new(crate::TelemetryCounters::default()),
5630            )
5631            .expect("writer");
5632            writer
5633                .submit(crate::WriteRequest {
5634                    label: "operational".to_owned(),
5635                    nodes: vec![],
5636                    node_retires: vec![],
5637                    edges: vec![],
5638                    edge_retires: vec![],
5639                    chunks: vec![],
5640                    runs: vec![],
5641                    steps: vec![],
5642                    actions: vec![],
5643                    optional_backfills: vec![],
5644                    vec_inserts: vec![],
5645                    operational_writes: vec![crate::OperationalWrite::Put {
5646                        collection: "connector_health".to_owned(),
5647                        record_key: "gmail".to_owned(),
5648                        payload_json: r#"{"status":"ok"}"#.to_owned(),
5649                        source_ref: Some("src-1".to_owned()),
5650                    }],
5651                })
5652                .expect("write");
5653        }
5654
5655        let report = service
5656            .trace_operational_collection("connector_health", Some("gmail"))
5657            .expect("trace");
5658        assert_eq!(report.collection_name, "connector_health");
5659        assert_eq!(report.record_key.as_deref(), Some("gmail"));
5660        assert_eq!(report.mutation_count, 1);
5661        assert_eq!(report.current_count, 1);
5662        assert_eq!(report.mutations[0].op_kind, "put");
5663        assert_eq!(report.current_rows[0].payload_json, r#"{"status":"ok"}"#);
5664    }
5665
5666    #[test]
5667    fn trace_operational_collection_rejects_unknown_collection() {
5668        let (_db, service) = setup();
5669
5670        let error = service
5671            .trace_operational_collection("missing_collection", None)
5672            .expect_err("unknown collection should fail");
5673
5674        assert!(matches!(error, EngineError::InvalidWrite(_)));
5675        assert!(error.to_string().contains("is not registered"));
5676    }
5677
5678    #[test]
5679    fn rebuild_operational_current_repairs_missing_latest_state_rows() {
5680        let (db, service) = setup();
5681        service
5682            .register_operational_collection(&OperationalRegisterRequest {
5683                name: "connector_health".to_owned(),
5684                kind: OperationalCollectionKind::LatestState,
5685                schema_json: "{}".to_owned(),
5686                retention_json: "{}".to_owned(),
5687                filter_fields_json: "[]".to_owned(),
5688                validation_json: String::new(),
5689                secondary_indexes_json: "[]".to_owned(),
5690                format_version: 1,
5691            })
5692            .expect("register collection");
5693        {
5694            let writer = crate::WriterActor::start(
5695                db.path(),
5696                Arc::new(SchemaManager::new()),
5697                crate::ProvenanceMode::Warn,
5698                Arc::new(crate::TelemetryCounters::default()),
5699            )
5700            .expect("writer");
5701            writer
5702                .submit(crate::WriteRequest {
5703                    label: "operational".to_owned(),
5704                    nodes: vec![],
5705                    node_retires: vec![],
5706                    edges: vec![],
5707                    edge_retires: vec![],
5708                    chunks: vec![],
5709                    runs: vec![],
5710                    steps: vec![],
5711                    actions: vec![],
5712                    optional_backfills: vec![],
5713                    vec_inserts: vec![],
5714                    operational_writes: vec![crate::OperationalWrite::Put {
5715                        collection: "connector_health".to_owned(),
5716                        record_key: "gmail".to_owned(),
5717                        payload_json: r#"{"status":"ok"}"#.to_owned(),
5718                        source_ref: Some("src-1".to_owned()),
5719                    }],
5720                })
5721                .expect("write");
5722        }
5723        {
5724            let conn = sqlite::open_connection(db.path()).expect("conn");
5725            conn.execute(
5726                "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5727                [],
5728            )
5729            .expect("delete current row");
5730        }
5731
5732        let before = service.check_semantics().expect("semantics before rebuild");
5733        assert_eq!(before.missing_operational_current_rows, 1);
5734
5735        let repair = service
5736            .rebuild_operational_current(Some("connector_health"))
5737            .expect("rebuild current");
5738        assert_eq!(repair.collections_rebuilt, 1);
5739        assert_eq!(repair.current_rows_rebuilt, 1);
5740
5741        let after = service.check_semantics().expect("semantics after rebuild");
5742        assert_eq!(after.missing_operational_current_rows, 0);
5743
5744        let conn = sqlite::open_connection(db.path()).expect("conn");
5745        let payload: String = conn
5746            .query_row(
5747                "SELECT payload_json FROM operational_current \
5748                 WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5749                [],
5750                |row| row.get(0),
5751            )
5752            .expect("restored payload");
5753        assert_eq!(payload, r#"{"status":"ok"}"#);
5754    }
5755
5756    #[test]
5757    fn rebuild_operational_current_restores_latest_state_secondary_index_entries() {
5758        let (db, service) = setup();
5759        service
5760            .register_operational_collection(&OperationalRegisterRequest {
5761                name: "connector_health".to_owned(),
5762                kind: OperationalCollectionKind::LatestState,
5763                schema_json: "{}".to_owned(),
5764                retention_json: "{}".to_owned(),
5765                filter_fields_json: "[]".to_owned(),
5766                validation_json: String::new(),
5767                secondary_indexes_json: r#"[{"name":"status_current","kind":"latest_state_field","field":"status","value_type":"string"}]"#.to_owned(),
5768                format_version: 1,
5769            })
5770            .expect("register collection");
5771        {
5772            let writer = crate::WriterActor::start(
5773                db.path(),
5774                Arc::new(SchemaManager::new()),
5775                crate::ProvenanceMode::Warn,
5776                Arc::new(crate::TelemetryCounters::default()),
5777            )
5778            .expect("writer");
5779            writer
5780                .submit(crate::WriteRequest {
5781                    label: "operational".to_owned(),
5782                    nodes: vec![],
5783                    node_retires: vec![],
5784                    edges: vec![],
5785                    edge_retires: vec![],
5786                    chunks: vec![],
5787                    runs: vec![],
5788                    steps: vec![],
5789                    actions: vec![],
5790                    optional_backfills: vec![],
5791                    vec_inserts: vec![],
5792                    operational_writes: vec![crate::OperationalWrite::Put {
5793                        collection: "connector_health".to_owned(),
5794                        record_key: "gmail".to_owned(),
5795                        payload_json: r#"{"status":"ok"}"#.to_owned(),
5796                        source_ref: Some("src-1".to_owned()),
5797                    }],
5798                })
5799                .expect("write");
5800        }
5801        {
5802            let conn = sqlite::open_connection(db.path()).expect("conn");
5803            let entry_count: i64 = conn
5804                .query_row(
5805                    "SELECT count(*) FROM operational_secondary_index_entries \
5806                     WHERE collection_name = 'connector_health' AND subject_kind = 'current'",
5807                    [],
5808                    |row| row.get(0),
5809                )
5810                .expect("secondary index count before repair");
5811            assert_eq!(entry_count, 1);
5812            conn.execute(
5813                "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5814                [],
5815            )
5816            .expect("delete current row");
5817        }
5818
5819        service
5820            .rebuild_operational_current(Some("connector_health"))
5821            .expect("rebuild current");
5822
5823        let conn = sqlite::open_connection(db.path()).expect("conn");
5824        let entry_count: i64 = conn
5825            .query_row(
5826                "SELECT count(*) FROM operational_secondary_index_entries \
5827                 WHERE collection_name = 'connector_health' AND subject_kind = 'current'",
5828                [],
5829                |row| row.get(0),
5830            )
5831            .expect("secondary index count after repair");
5832        assert_eq!(entry_count, 1);
5833    }
5834
5835    #[test]
5836    fn operational_current_semantics_and_rebuild_follow_mutation_order() {
5837        let (db, service) = setup();
5838        {
5839            let conn = sqlite::open_connection(db.path()).expect("conn");
5840            conn.execute(
5841                "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
5842                 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
5843                [],
5844            )
5845            .expect("seed collection");
5846            conn.execute(
5847                "INSERT INTO operational_mutations \
5848                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5849                 VALUES ('m3', 'connector_health', 'gmail', 'put', '{\"status\":\"old\"}', 'src-1', 100, 1)",
5850                [],
5851            )
5852            .expect("seed first put");
5853            conn.execute(
5854                "INSERT INTO operational_mutations \
5855                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5856                 VALUES ('m2', 'connector_health', 'gmail', 'delete', '', 'src-2', 100, 2)",
5857                [],
5858            )
5859            .expect("seed delete");
5860            conn.execute(
5861                "INSERT INTO operational_mutations \
5862                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5863                 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"new\"}', 'src-3', 100, 3)",
5864                [],
5865            )
5866            .expect("seed final put");
5867            conn.execute(
5868                "INSERT INTO operational_current \
5869                 (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
5870                 VALUES ('connector_health', 'gmail', '{\"status\":\"new\"}', 100, 'm1')",
5871                [],
5872            )
5873            .expect("seed current");
5874        }
5875
5876        let before = service.check_semantics().expect("semantics before rebuild");
5877        assert_eq!(before.missing_operational_current_rows, 0);
5878        assert_eq!(before.stale_operational_current_rows, 0);
5879
5880        {
5881            let conn = sqlite::open_connection(db.path()).expect("conn");
5882            conn.execute(
5883                "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5884                [],
5885            )
5886            .expect("delete current row");
5887        }
5888
5889        let missing = service.check_semantics().expect("semantics after delete");
5890        assert_eq!(missing.missing_operational_current_rows, 1);
5891        assert_eq!(missing.stale_operational_current_rows, 0);
5892
5893        service
5894            .rebuild_operational_current(Some("connector_health"))
5895            .expect("rebuild current");
5896
5897        let after = service.check_semantics().expect("semantics after rebuild");
5898        assert_eq!(after.missing_operational_current_rows, 0);
5899        assert_eq!(after.stale_operational_current_rows, 0);
5900
5901        let conn = sqlite::open_connection(db.path()).expect("conn");
5902        let payload: String = conn
5903            .query_row(
5904                "SELECT payload_json FROM operational_current \
5905                 WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5906                [],
5907                |row| row.get(0),
5908            )
5909            .expect("restored payload");
5910        assert_eq!(payload, r#"{"status":"new"}"#);
5911    }
5912
5913    #[test]
5914    fn disable_operational_collection_sets_disabled_at_and_emits_provenance() {
5915        let (db, service) = setup();
5916        service
5917            .register_operational_collection(&OperationalRegisterRequest {
5918                name: "audit_log".to_owned(),
5919                kind: OperationalCollectionKind::AppendOnlyLog,
5920                schema_json: "{}".to_owned(),
5921                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
5922                filter_fields_json: "[]".to_owned(),
5923                validation_json: String::new(),
5924                secondary_indexes_json: "[]".to_owned(),
5925                format_version: 1,
5926            })
5927            .expect("register collection");
5928
5929        let record = service
5930            .disable_operational_collection("audit_log")
5931            .expect("disable collection");
5932        assert_eq!(record.name, "audit_log");
5933        assert!(record.disabled_at.is_some());
5934
5935        let disabled_at = record.disabled_at.expect("disabled_at");
5936        let described = service
5937            .describe_operational_collection("audit_log")
5938            .expect("describe collection")
5939            .expect("collection exists");
5940        assert_eq!(described.disabled_at, Some(disabled_at));
5941
5942        let writer = crate::WriterActor::start(
5943            db.path(),
5944            Arc::new(SchemaManager::new()),
5945            crate::ProvenanceMode::Warn,
5946            Arc::new(crate::TelemetryCounters::default()),
5947        )
5948        .expect("writer");
5949        let error = writer
5950            .submit(crate::WriteRequest {
5951                label: "disabled-operational".to_owned(),
5952                nodes: vec![],
5953                node_retires: vec![],
5954                edges: vec![],
5955                edge_retires: vec![],
5956                chunks: vec![],
5957                runs: vec![],
5958                steps: vec![],
5959                actions: vec![],
5960                optional_backfills: vec![],
5961                vec_inserts: vec![],
5962                operational_writes: vec![crate::OperationalWrite::Append {
5963                    collection: "audit_log".to_owned(),
5964                    record_key: "evt-1".to_owned(),
5965                    payload_json: r#"{"type":"sync"}"#.to_owned(),
5966                    source_ref: Some("src-1".to_owned()),
5967                }],
5968            })
5969            .expect_err("disabled collection should reject writes");
5970        assert!(matches!(error, EngineError::InvalidWrite(_)));
5971        assert!(error.to_string().contains("is disabled"));
5972
5973        let conn = sqlite::open_connection(db.path()).expect("conn");
5974        let provenance_count: i64 = conn
5975            .query_row(
5976                "SELECT count(*) FROM provenance_events \
5977                 WHERE event_type = 'operational_collection_disabled' AND subject = 'audit_log'",
5978                [],
5979                |row| row.get(0),
5980            )
5981            .expect("provenance count");
5982        assert_eq!(provenance_count, 1);
5983    }
5984
5985    #[test]
5986    fn purge_operational_collection_deletes_append_only_rows_before_cutoff() {
5987        let (db, service) = setup();
5988        {
5989            let conn = sqlite::open_connection(db.path()).expect("conn");
5990            conn.execute(
5991                "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
5992                 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_all\"}', 1, 100)",
5993                [],
5994            )
5995            .expect("seed collection");
5996            conn.execute(
5997                "INSERT INTO operational_mutations \
5998                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5999                 VALUES ('evt-1', 'audit_log', 'evt-1', 'append', '{\"seq\":1}', 'src-1', 100, 1)",
6000                [],
6001            )
6002            .expect("seed event 1");
6003            conn.execute(
6004                "INSERT INTO operational_mutations \
6005                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6006                 VALUES ('evt-2', 'audit_log', 'evt-2', 'append', '{\"seq\":2}', 'src-2', 200, 2)",
6007                [],
6008            )
6009            .expect("seed event 2");
6010            conn.execute(
6011                "INSERT INTO operational_mutations \
6012                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6013                 VALUES ('evt-3', 'audit_log', 'evt-3', 'append', '{\"seq\":3}', 'src-3', 300, 3)",
6014                [],
6015            )
6016            .expect("seed event 3");
6017        }
6018
6019        let report = service
6020            .purge_operational_collection("audit_log", 250)
6021            .expect("purge collection");
6022        assert_eq!(report.collection_name, "audit_log");
6023        assert_eq!(report.deleted_mutations, 2);
6024        assert_eq!(report.before_timestamp, 250);
6025
6026        let conn = sqlite::open_connection(db.path()).expect("conn");
6027        let remaining: Vec<String> = {
6028            let mut stmt = conn
6029                .prepare(
6030                    "SELECT id FROM operational_mutations \
6031                     WHERE collection_name = 'audit_log' ORDER BY mutation_order",
6032                )
6033                .expect("stmt");
6034            stmt.query_map([], |row| row.get(0))
6035                .expect("rows")
6036                .collect::<Result<_, _>>()
6037                .expect("collect")
6038        };
6039        assert_eq!(remaining, vec!["evt-3".to_owned()]);
6040        let provenance_count: i64 = conn
6041            .query_row(
6042                "SELECT count(*) FROM provenance_events \
6043                 WHERE event_type = 'operational_collection_purged' AND subject = 'audit_log'",
6044                [],
6045                |row| row.get(0),
6046            )
6047            .expect("provenance count");
6048        assert_eq!(provenance_count, 1);
6049    }
6050
6051    #[test]
6052    fn compact_operational_collection_dry_run_reports_without_mutation() {
6053        let (db, service) = setup();
6054        {
6055            let conn = sqlite::open_connection(db.path()).expect("conn");
6056            conn.execute(
6057                "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6058                 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6059                [],
6060            )
6061            .expect("seed collection");
6062            for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
6063                conn.execute(
6064                    "INSERT INTO operational_mutations \
6065                     (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6066                     VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6067                    rusqlite::params![
6068                        format!("evt-{index}"),
6069                        format!("{{\"seq\":{index}}}"),
6070                        created_at,
6071                        index,
6072                    ],
6073                )
6074                .expect("seed event");
6075            }
6076        }
6077
6078        let report = service
6079            .compact_operational_collection("audit_log", true)
6080            .expect("compact collection");
6081        assert_eq!(report.collection_name, "audit_log");
6082        assert_eq!(report.deleted_mutations, 1);
6083        assert!(report.dry_run);
6084        assert_eq!(report.before_timestamp, None);
6085
6086        let conn = sqlite::open_connection(db.path()).expect("conn");
6087        let remaining_count: i64 = conn
6088            .query_row(
6089                "SELECT count(*) FROM operational_mutations WHERE collection_name = 'audit_log'",
6090                [],
6091                |row| row.get(0),
6092            )
6093            .expect("remaining count");
6094        assert_eq!(remaining_count, 3);
6095        let provenance_count: i64 = conn
6096            .query_row(
6097                "SELECT count(*) FROM provenance_events \
6098                 WHERE event_type = 'operational_collection_compacted' AND subject = 'audit_log'",
6099                [],
6100                |row| row.get(0),
6101            )
6102            .expect("provenance count");
6103        assert_eq!(provenance_count, 0);
6104    }
6105
6106    #[test]
6107    fn compact_operational_collection_keep_last_deletes_oldest_rows() {
6108        let (db, service) = setup();
6109        {
6110            let conn = sqlite::open_connection(db.path()).expect("conn");
6111            conn.execute(
6112                "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6113                 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6114                [],
6115            )
6116            .expect("seed collection");
6117            for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
6118                conn.execute(
6119                    "INSERT INTO operational_mutations \
6120                     (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6121                     VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6122                    rusqlite::params![
6123                        format!("evt-{index}"),
6124                        format!("{{\"seq\":{index}}}"),
6125                        created_at,
6126                        index,
6127                    ],
6128                )
6129                .expect("seed event");
6130            }
6131        }
6132
6133        let report = service
6134            .compact_operational_collection("audit_log", false)
6135            .expect("compact collection");
6136        assert_eq!(report.deleted_mutations, 1);
6137        assert!(!report.dry_run);
6138
6139        let conn = sqlite::open_connection(db.path()).expect("conn");
6140        let remaining: Vec<String> = {
6141            let mut stmt = conn
6142                .prepare(
6143                    "SELECT id FROM operational_mutations \
6144                     WHERE collection_name = 'audit_log' ORDER BY mutation_order",
6145                )
6146                .expect("stmt");
6147            stmt.query_map([], |row| row.get(0))
6148                .expect("rows")
6149                .collect::<Result<_, _>>()
6150                .expect("collect")
6151        };
6152        assert_eq!(remaining, vec!["evt-2".to_owned(), "evt-3".to_owned()]);
6153        let provenance_count: i64 = conn
6154            .query_row(
6155                "SELECT count(*) FROM provenance_events \
6156                 WHERE event_type = 'operational_collection_compacted' AND subject = 'audit_log'",
6157                [],
6158                |row| row.get(0),
6159            )
6160            .expect("provenance count");
6161        assert_eq!(provenance_count, 1);
6162    }
6163
6164    #[test]
6165    fn plan_and_run_operational_retention_keep_last() {
6166        let (db, service) = setup();
6167        {
6168            let conn = sqlite::open_connection(db.path()).expect("conn");
6169            conn.execute(
6170                "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6171                 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6172                [],
6173            )
6174            .expect("seed collection");
6175            for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
6176                conn.execute(
6177                    "INSERT INTO operational_mutations \
6178                     (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6179                     VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6180                    rusqlite::params![
6181                        format!("evt-{index}"),
6182                        format!("{{\"seq\":{index}}}"),
6183                        created_at,
6184                        index,
6185                    ],
6186                )
6187                .expect("seed event");
6188            }
6189        }
6190
6191        let plan = service
6192            .plan_operational_retention(1_000, None, Some(10))
6193            .expect("plan retention");
6194        assert_eq!(plan.collections_examined, 1);
6195        assert_eq!(plan.items[0].collection_name, "audit_log");
6196        assert_eq!(
6197            plan.items[0].action_kind,
6198            crate::operational::OperationalRetentionActionKind::KeepLast
6199        );
6200        assert_eq!(plan.items[0].candidate_deletions, 1);
6201        assert_eq!(plan.items[0].max_rows, Some(2));
6202        assert_eq!(plan.items[0].last_run_at, None);
6203
6204        let dry_run = service
6205            .run_operational_retention(1_000, None, Some(10), true)
6206            .expect("dry-run retention");
6207        assert!(dry_run.dry_run);
6208        assert_eq!(dry_run.collections_acted_on, 1);
6209        assert_eq!(dry_run.items[0].deleted_mutations, 1);
6210        assert_eq!(dry_run.items[0].rows_remaining, 2);
6211
6212        let conn = sqlite::open_connection(db.path()).expect("conn");
6213        let remaining_count: i64 = conn
6214            .query_row(
6215                "SELECT count(*) FROM operational_mutations WHERE collection_name = 'audit_log'",
6216                [],
6217                |row| row.get(0),
6218            )
6219            .expect("remaining count after dry run");
6220        assert_eq!(remaining_count, 3);
6221        let retention_run_count: i64 = conn
6222            .query_row(
6223                "SELECT count(*) FROM operational_retention_runs WHERE collection_name = 'audit_log'",
6224                [],
6225                |row| row.get(0),
6226            )
6227            .expect("retention run count");
6228        assert_eq!(retention_run_count, 0);
6229        drop(conn);
6230
6231        let executed = service
6232            .run_operational_retention(1_000, None, Some(10), false)
6233            .expect("execute retention");
6234        assert_eq!(executed.collections_acted_on, 1);
6235        assert_eq!(executed.items[0].deleted_mutations, 1);
6236        assert_eq!(executed.items[0].rows_remaining, 2);
6237
6238        let conn = sqlite::open_connection(db.path()).expect("conn");
6239        let remaining: Vec<String> = {
6240            let mut stmt = conn
6241                .prepare(
6242                    "SELECT id FROM operational_mutations \
6243                     WHERE collection_name = 'audit_log' ORDER BY mutation_order",
6244                )
6245                .expect("stmt");
6246            stmt.query_map([], |row| row.get(0))
6247                .expect("rows")
6248                .collect::<Result<_, _>>()
6249                .expect("collect")
6250        };
6251        assert_eq!(remaining, vec!["evt-2".to_owned(), "evt-3".to_owned()]);
6252        let last_run_at: i64 = conn
6253            .query_row(
6254                "SELECT executed_at FROM operational_retention_runs \
6255                 WHERE collection_name = 'audit_log' ORDER BY executed_at DESC LIMIT 1",
6256                [],
6257                |row| row.get(0),
6258            )
6259            .expect("last run at");
6260        assert_eq!(last_run_at, 1_000);
6261    }
6262
6263    #[test]
6264    fn dry_run_operational_retention_does_not_mark_noop_collection_as_acted_on() {
6265        let (db, service) = setup();
6266        let conn = sqlite::open_connection(db.path()).expect("conn");
6267        conn.execute(
6268            "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6269             VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6270            [],
6271        )
6272        .expect("seed collection");
6273        for (index, created_at) in [(1_i64, 100_i64), (2, 200)] {
6274            conn.execute(
6275                "INSERT INTO operational_mutations \
6276                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6277                 VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6278                rusqlite::params![
6279                    format!("evt-{index}"),
6280                    format!("{{\"seq\":{index}}}"),
6281                    created_at,
6282                    index,
6283                ],
6284            )
6285            .expect("seed event");
6286        }
6287        drop(conn);
6288
6289        let dry_run = service
6290            .run_operational_retention(1_000, None, Some(10), true)
6291            .expect("dry-run retention");
6292        assert!(dry_run.dry_run);
6293        assert_eq!(dry_run.collections_acted_on, 0);
6294        assert_eq!(dry_run.items[0].deleted_mutations, 0);
6295        assert_eq!(dry_run.items[0].rows_remaining, 2);
6296    }
6297
6298    #[test]
6299    fn compact_operational_collection_rejects_latest_state() {
6300        let (_db, service) = setup();
6301        service
6302            .register_operational_collection(&OperationalRegisterRequest {
6303                name: "connector_health".to_owned(),
6304                kind: OperationalCollectionKind::LatestState,
6305                schema_json: "{}".to_owned(),
6306                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6307                filter_fields_json: "[]".to_owned(),
6308                validation_json: String::new(),
6309                secondary_indexes_json: "[]".to_owned(),
6310                format_version: 1,
6311            })
6312            .expect("register collection");
6313
6314        let error = service
6315            .compact_operational_collection("connector_health", false)
6316            .expect_err("latest_state compaction should be rejected");
6317        assert!(matches!(error, EngineError::InvalidWrite(_)));
6318        assert!(error.to_string().contains("append_only_log"));
6319    }
6320
6321    #[test]
6322    fn register_operational_collection_persists_filter_fields_json() {
6323        let (_db, service) = setup();
6324
6325        let record = service
6326            .register_operational_collection(&OperationalRegisterRequest {
6327                name: "audit_log".to_owned(),
6328                kind: OperationalCollectionKind::AppendOnlyLog,
6329                schema_json: "{}".to_owned(),
6330                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6331                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
6332                validation_json: String::new(),
6333                secondary_indexes_json: "[]".to_owned(),
6334                format_version: 1,
6335            })
6336            .expect("register collection");
6337
6338        assert_eq!(
6339            record.filter_fields_json,
6340            r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#
6341        );
6342    }
6343
6344    #[test]
6345    fn read_operational_collection_filters_append_only_rows_by_declared_fields() {
6346        let (db, service) = setup();
6347        service
6348            .register_operational_collection(&OperationalRegisterRequest {
6349                name: "audit_log".to_owned(),
6350                kind: OperationalCollectionKind::AppendOnlyLog,
6351                schema_json: "{}".to_owned(),
6352                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6353                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"seq","type":"integer","modes":["exact","range"]},{"name":"ts","type":"timestamp","modes":["exact","range"]}]"#.to_owned(),
6354                validation_json: String::new(),
6355                secondary_indexes_json: "[]".to_owned(),
6356                format_version: 1,
6357            })
6358            .expect("register collection");
6359        {
6360            let writer = crate::WriterActor::start(
6361                db.path(),
6362                Arc::new(SchemaManager::new()),
6363                crate::ProvenanceMode::Warn,
6364                Arc::new(crate::TelemetryCounters::default()),
6365            )
6366            .expect("writer");
6367            writer
6368                .submit(crate::WriteRequest {
6369                    label: "operational".to_owned(),
6370                    nodes: vec![],
6371                    node_retires: vec![],
6372                    edges: vec![],
6373                    edge_retires: vec![],
6374                    chunks: vec![],
6375                    runs: vec![],
6376                    steps: vec![],
6377                    actions: vec![],
6378                    optional_backfills: vec![],
6379                    vec_inserts: vec![],
6380                    operational_writes: vec![
6381                        crate::OperationalWrite::Append {
6382                            collection: "audit_log".to_owned(),
6383                            record_key: "evt-1".to_owned(),
6384                            payload_json: r#"{"actor":"alice","seq":1,"ts":100}"#.to_owned(),
6385                            source_ref: Some("src-1".to_owned()),
6386                        },
6387                        crate::OperationalWrite::Append {
6388                            collection: "audit_log".to_owned(),
6389                            record_key: "evt-2".to_owned(),
6390                            payload_json: r#"{"actor":"alice-admin","seq":2,"ts":200}"#.to_owned(),
6391                            source_ref: Some("src-2".to_owned()),
6392                        },
6393                        crate::OperationalWrite::Append {
6394                            collection: "audit_log".to_owned(),
6395                            record_key: "evt-3".to_owned(),
6396                            payload_json: r#"{"actor":"bob","seq":3,"ts":300}"#.to_owned(),
6397                            source_ref: Some("src-3".to_owned()),
6398                        },
6399                    ],
6400                })
6401                .expect("write");
6402        }
6403
6404        let report = service
6405            .read_operational_collection(&crate::operational::OperationalReadRequest {
6406                collection_name: "audit_log".to_owned(),
6407                filters: vec![
6408                    crate::operational::OperationalFilterClause::Prefix {
6409                        field: "actor".to_owned(),
6410                        value: "alice".to_owned(),
6411                    },
6412                    crate::operational::OperationalFilterClause::Range {
6413                        field: "ts".to_owned(),
6414                        lower: Some(150),
6415                        upper: Some(250),
6416                    },
6417                ],
6418                limit: Some(10),
6419            })
6420            .expect("filtered read");
6421
6422        assert_eq!(report.collection_name, "audit_log");
6423        assert_eq!(report.row_count, 1);
6424        assert!(!report.was_limited);
6425        assert_eq!(report.rows.len(), 1);
6426        assert_eq!(report.rows[0].record_key, "evt-2");
6427        assert_eq!(
6428            report.rows[0].payload_json,
6429            r#"{"actor":"alice-admin","seq":2,"ts":200}"#
6430        );
6431    }
6432
6433    #[test]
6434    fn read_operational_collection_uses_secondary_index_when_filter_values_are_missing() {
6435        let (db, service) = setup();
6436        service
6437            .register_operational_collection(&OperationalRegisterRequest {
6438                name: "audit_log".to_owned(),
6439                kind: OperationalCollectionKind::AppendOnlyLog,
6440                schema_json: "{}".to_owned(),
6441                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6442                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
6443                validation_json: String::new(),
6444                secondary_indexes_json: r#"[{"name":"actor_ts","kind":"append_only_field_time","field":"actor","value_type":"string","time_field":"ts"}]"#.to_owned(),
6445                format_version: 1,
6446            })
6447            .expect("register collection");
6448        {
6449            let writer = crate::WriterActor::start(
6450                db.path(),
6451                Arc::new(SchemaManager::new()),
6452                crate::ProvenanceMode::Warn,
6453                Arc::new(crate::TelemetryCounters::default()),
6454            )
6455            .expect("writer");
6456            writer
6457                .submit(crate::WriteRequest {
6458                    label: "operational".to_owned(),
6459                    nodes: vec![],
6460                    node_retires: vec![],
6461                    edges: vec![],
6462                    edge_retires: vec![],
6463                    chunks: vec![],
6464                    runs: vec![],
6465                    steps: vec![],
6466                    actions: vec![],
6467                    optional_backfills: vec![],
6468                    vec_inserts: vec![],
6469                    operational_writes: vec![
6470                        crate::OperationalWrite::Append {
6471                            collection: "audit_log".to_owned(),
6472                            record_key: "evt-1".to_owned(),
6473                            payload_json: r#"{"actor":"alice","ts":100}"#.to_owned(),
6474                            source_ref: Some("src-1".to_owned()),
6475                        },
6476                        crate::OperationalWrite::Append {
6477                            collection: "audit_log".to_owned(),
6478                            record_key: "evt-2".to_owned(),
6479                            payload_json: r#"{"actor":"alice-admin","ts":200}"#.to_owned(),
6480                            source_ref: Some("src-2".to_owned()),
6481                        },
6482                    ],
6483                })
6484                .expect("write");
6485        }
6486        let conn = sqlite::open_connection(db.path()).expect("conn");
6487        conn.execute(
6488            "DELETE FROM operational_filter_values WHERE collection_name = 'audit_log'",
6489            [],
6490        )
6491        .expect("clear filter values");
6492        drop(conn);
6493
6494        let report = service
6495            .read_operational_collection(&crate::operational::OperationalReadRequest {
6496                collection_name: "audit_log".to_owned(),
6497                filters: vec![
6498                    crate::operational::OperationalFilterClause::Prefix {
6499                        field: "actor".to_owned(),
6500                        value: "alice".to_owned(),
6501                    },
6502                    crate::operational::OperationalFilterClause::Range {
6503                        field: "ts".to_owned(),
6504                        lower: Some(150),
6505                        upper: Some(250),
6506                    },
6507                ],
6508                limit: Some(10),
6509            })
6510            .expect("secondary-index read");
6511
6512        assert_eq!(report.row_count, 1);
6513        assert_eq!(report.rows[0].record_key, "evt-2");
6514    }
6515
6516    #[test]
6517    fn read_operational_collection_rejects_undeclared_fields_and_latest_state_collections() {
6518        let (_db, service) = setup();
6519        service
6520            .register_operational_collection(&OperationalRegisterRequest {
6521                name: "connector_health".to_owned(),
6522                kind: OperationalCollectionKind::LatestState,
6523                schema_json: "{}".to_owned(),
6524                retention_json: "{}".to_owned(),
6525                filter_fields_json: r#"[{"name":"status","type":"string","modes":["exact"]}]"#
6526                    .to_owned(),
6527                validation_json: String::new(),
6528                secondary_indexes_json: "[]".to_owned(),
6529                format_version: 1,
6530            })
6531            .expect("register collection");
6532
6533        let latest_state_error = service
6534            .read_operational_collection(&crate::operational::OperationalReadRequest {
6535                collection_name: "connector_health".to_owned(),
6536                filters: vec![crate::operational::OperationalFilterClause::Exact {
6537                    field: "status".to_owned(),
6538                    value: crate::operational::OperationalFilterValue::String("ok".to_owned()),
6539                }],
6540                limit: Some(10),
6541            })
6542            .expect_err("latest_state filtered reads should be rejected");
6543        assert!(latest_state_error.to_string().contains("append_only_log"));
6544
6545        service
6546            .register_operational_collection(&OperationalRegisterRequest {
6547                name: "audit_log".to_owned(),
6548                kind: OperationalCollectionKind::AppendOnlyLog,
6549                schema_json: "{}".to_owned(),
6550                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6551                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact"]}]"#
6552                    .to_owned(),
6553                validation_json: String::new(),
6554                secondary_indexes_json: "[]".to_owned(),
6555                format_version: 1,
6556            })
6557            .expect("register append-only collection");
6558
6559        let undeclared_error = service
6560            .read_operational_collection(&crate::operational::OperationalReadRequest {
6561                collection_name: "audit_log".to_owned(),
6562                filters: vec![crate::operational::OperationalFilterClause::Exact {
6563                    field: "missing".to_owned(),
6564                    value: crate::operational::OperationalFilterValue::String("x".to_owned()),
6565                }],
6566                limit: Some(10),
6567            })
6568            .expect_err("undeclared field should be rejected");
6569        assert!(undeclared_error.to_string().contains("undeclared"));
6570    }
6571
6572    #[test]
6573    fn read_operational_collection_applies_limit_and_reports_truncation() {
6574        let (db, service) = setup();
6575        service
6576            .register_operational_collection(&OperationalRegisterRequest {
6577                name: "audit_log".to_owned(),
6578                kind: OperationalCollectionKind::AppendOnlyLog,
6579                schema_json: "{}".to_owned(),
6580                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6581                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["prefix"]}]"#
6582                    .to_owned(),
6583                validation_json: String::new(),
6584                secondary_indexes_json: "[]".to_owned(),
6585                format_version: 1,
6586            })
6587            .expect("register collection");
6588        {
6589            let writer = crate::WriterActor::start(
6590                db.path(),
6591                Arc::new(SchemaManager::new()),
6592                crate::ProvenanceMode::Warn,
6593                Arc::new(crate::TelemetryCounters::default()),
6594            )
6595            .expect("writer");
6596            writer
6597                .submit(crate::WriteRequest {
6598                    label: "operational".to_owned(),
6599                    nodes: vec![],
6600                    node_retires: vec![],
6601                    edges: vec![],
6602                    edge_retires: vec![],
6603                    chunks: vec![],
6604                    runs: vec![],
6605                    steps: vec![],
6606                    actions: vec![],
6607                    optional_backfills: vec![],
6608                    vec_inserts: vec![],
6609                    operational_writes: vec![
6610                        crate::OperationalWrite::Append {
6611                            collection: "audit_log".to_owned(),
6612                            record_key: "evt-1".to_owned(),
6613                            payload_json: r#"{"actor":"alice-1"}"#.to_owned(),
6614                            source_ref: Some("src-1".to_owned()),
6615                        },
6616                        crate::OperationalWrite::Append {
6617                            collection: "audit_log".to_owned(),
6618                            record_key: "evt-2".to_owned(),
6619                            payload_json: r#"{"actor":"alice-2"}"#.to_owned(),
6620                            source_ref: Some("src-2".to_owned()),
6621                        },
6622                    ],
6623                })
6624                .expect("write");
6625        }
6626
6627        let report = service
6628            .read_operational_collection(&crate::operational::OperationalReadRequest {
6629                collection_name: "audit_log".to_owned(),
6630                filters: vec![crate::operational::OperationalFilterClause::Prefix {
6631                    field: "actor".to_owned(),
6632                    value: "alice".to_owned(),
6633                }],
6634                limit: Some(1),
6635            })
6636            .expect("limited read");
6637
6638        assert_eq!(report.row_count, 1);
6639        assert_eq!(report.applied_limit, 1);
6640        assert!(report.was_limited);
6641        assert_eq!(report.rows[0].record_key, "evt-2");
6642    }
6643
6644    #[test]
6645    fn preexisting_operational_collection_can_gain_filter_contract_after_upgrade() {
6646        let db = NamedTempFile::new().expect("temp db");
6647        let conn = sqlite::open_connection(db.path()).expect("conn");
6648        conn.execute_batch(
6649            r#"
6650            CREATE TABLE operational_collections (
6651                name TEXT PRIMARY KEY,
6652                kind TEXT NOT NULL,
6653                schema_json TEXT NOT NULL,
6654                retention_json TEXT NOT NULL,
6655                format_version INTEGER NOT NULL DEFAULT 1,
6656                created_at INTEGER NOT NULL DEFAULT 100,
6657                disabled_at INTEGER
6658            );
6659            CREATE TABLE operational_mutations (
6660                id TEXT PRIMARY KEY,
6661                collection_name TEXT NOT NULL,
6662                record_key TEXT NOT NULL,
6663                op_kind TEXT NOT NULL,
6664                payload_json TEXT NOT NULL,
6665                source_ref TEXT,
6666                created_at INTEGER NOT NULL DEFAULT 100,
6667                mutation_order INTEGER NOT NULL DEFAULT 1
6668            );
6669            INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at)
6670            VALUES ('audit_log', 'append_only_log', '{}', '{"mode":"keep_all"}', 1, 100);
6671            INSERT INTO operational_mutations
6672                (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order)
6673            VALUES
6674                ('evt-1', 'audit_log', 'evt-1', 'append', '{"actor":"alice","ts":0}', 'src-1', 100, 1);
6675            "#,
6676        )
6677        .expect("seed pre-v10 schema");
6678        drop(conn);
6679
6680        let service = AdminService::new(db.path(), Arc::new(SchemaManager::new()));
6681        let pre_update = service
6682            .read_operational_collection(&crate::operational::OperationalReadRequest {
6683                collection_name: "audit_log".to_owned(),
6684                filters: vec![crate::operational::OperationalFilterClause::Exact {
6685                    field: "actor".to_owned(),
6686                    value: crate::operational::OperationalFilterValue::String("alice".to_owned()),
6687                }],
6688                limit: Some(10),
6689            })
6690            .expect_err("read should reject undeclared fields before migration update");
6691        assert!(pre_update.to_string().contains("undeclared"));
6692
6693        let updated = service
6694            .update_operational_collection_filters(
6695                "audit_log",
6696                r#"[{"name":"actor","type":"string","modes":["exact"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#,
6697            )
6698            .expect("update filter contract");
6699        assert!(updated.filter_fields_json.contains("\"actor\""));
6700
6701        let report = service
6702            .read_operational_collection(&crate::operational::OperationalReadRequest {
6703                collection_name: "audit_log".to_owned(),
6704                filters: vec![crate::operational::OperationalFilterClause::Range {
6705                    field: "ts".to_owned(),
6706                    lower: Some(0),
6707                    upper: Some(0),
6708                }],
6709                limit: Some(10),
6710            })
6711            .expect("read after explicit filter update");
6712        assert_eq!(report.row_count, 1);
6713        assert_eq!(report.rows[0].record_key, "evt-1");
6714    }
6715
6716    #[cfg(feature = "sqlite-vec")]
6717    #[test]
6718    fn check_semantics_detects_stale_vec_rows() {
6719        use crate::sqlite::open_connection_with_vec;
6720
6721        let db = NamedTempFile::new().expect("temp file");
6722        let schema = Arc::new(SchemaManager::new());
6723        {
6724            let conn = open_connection_with_vec(db.path()).expect("vec conn");
6725            schema.bootstrap(&conn).expect("bootstrap");
6726            schema
6727                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 3)
6728                .expect("vec profile");
6729            // Insert a vec row whose chunk does not exist.
6730            let bytes: Vec<u8> = [0.1f32, 0.2f32, 0.3f32]
6731                .iter()
6732                .flat_map(|f| f.to_le_bytes())
6733                .collect();
6734            conn.execute(
6735                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('ghost-chunk', ?1)",
6736                rusqlite::params![bytes],
6737            )
6738            .expect("insert stale vec row");
6739        }
6740        let service = AdminService::new(db.path(), Arc::clone(&schema));
6741        let report = service.check_semantics().expect("semantics check");
6742        assert_eq!(report.stale_vec_rows, 1);
6743        assert!(
6744            report.warnings.iter().any(|w| w.contains("stale vec")),
6745            "warning must mention stale vec"
6746        );
6747    }
6748
6749    #[cfg(feature = "sqlite-vec")]
6750    #[test]
6751    fn restore_vector_profiles_recreates_vec_table_from_metadata() {
6752        let db = NamedTempFile::new().expect("temp file");
6753        let schema = Arc::new(SchemaManager::new());
6754        {
6755            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6756            schema.bootstrap(&conn).expect("bootstrap");
6757            conn.execute(
6758                "INSERT INTO vector_profiles (profile, table_name, dimension, enabled) \
6759                 VALUES ('default', 'vec_nodes_active', 3, 1)",
6760                [],
6761            )
6762            .expect("insert vector profile");
6763        }
6764
6765        let service = AdminService::new(db.path(), Arc::clone(&schema));
6766        let report = service
6767            .restore_vector_profiles()
6768            .expect("restore vector profiles");
6769        assert_eq!(
6770            report.targets,
6771            vec![crate::projection::ProjectionTarget::Vec]
6772        );
6773        assert_eq!(report.rebuilt_rows, 1);
6774
6775        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6776        let count: i64 = conn
6777            .query_row(
6778                "SELECT count(*) FROM sqlite_schema WHERE name = 'vec_nodes_active'",
6779                [],
6780                |row| row.get(0),
6781            )
6782            .expect("vec schema count");
6783        assert_eq!(count, 1, "vec table should exist after restore");
6784    }
6785
6786    #[cfg(feature = "sqlite-vec")]
6787    #[test]
6788    fn load_vector_regeneration_config_supports_json_and_toml() {
6789        let dir = tempfile::tempdir().expect("temp dir");
6790        let json_path = dir.path().join("regen.json");
6791        let toml_path = dir.path().join("regen.toml");
6792
6793        let config = VectorRegenerationConfig {
6794            profile: "default".to_owned(),
6795            table_name: "vec_nodes_active".to_owned(),
6796            chunking_policy: "per_chunk".to_owned(),
6797            preprocessing_policy: "trim".to_owned(),
6798        };
6799
6800        fs::write(&json_path, serde_json::to_string(&config).expect("json")).expect("write json");
6801        fs::write(&toml_path, toml::to_string(&config).expect("toml")).expect("write toml");
6802
6803        let parsed_json = load_vector_regeneration_config(&json_path).expect("json parse");
6804        let parsed_toml = load_vector_regeneration_config(&toml_path).expect("toml parse");
6805
6806        assert_eq!(parsed_json, config);
6807        assert_eq!(parsed_toml, config);
6808    }
6809
6810    /// The 0.4.0 rewrite removed the identity fields from the config.
6811    /// Any client that still serializes the pre-0.4 fields must be
6812    /// rejected AT THE SERDE BOUNDARY with a clear error — never
6813    /// silently accepted.
6814    #[test]
6815    fn regenerate_vector_embeddings_config_rejects_old_identity_fields() {
6816        let legacy_json = r#"{
6817            "profile": "default",
6818            "table_name": "vec_nodes_active",
6819            "model_identity": "old-model",
6820            "model_version": "1.0",
6821            "dimension": 4,
6822            "normalization_policy": "l2",
6823            "chunking_policy": "per_chunk",
6824            "preprocessing_policy": "trim",
6825            "generator_command": ["/bin/echo"]
6826        }"#;
6827        let result: Result<VectorRegenerationConfig, _> = serde_json::from_str(legacy_json);
6828        assert!(
6829            result.is_err(),
6830            "legacy identity fields must be rejected at deserialization"
6831        );
6832    }
6833
6834    #[cfg(all(not(feature = "sqlite-vec"), unix))]
6835    #[test]
6836    fn regenerate_vector_embeddings_unsupported_vec_capability_writes_request_and_failed_audit() {
6837        let db = NamedTempFile::new().expect("temp file");
6838        let schema = Arc::new(SchemaManager::new());
6839
6840        {
6841            let conn = sqlite::open_connection(db.path()).expect("connection");
6842            schema.bootstrap(&conn).expect("bootstrap");
6843            conn.execute(
6844                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
6845                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
6846                [],
6847            )
6848            .expect("insert node");
6849            conn.execute(
6850                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
6851                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
6852                [],
6853            )
6854            .expect("insert chunk");
6855        }
6856
6857        let service = AdminService::new(db.path(), Arc::clone(&schema));
6858        let embedder = TestEmbedder::new("test-model", 4);
6859        let error = service
6860            .regenerate_vector_embeddings(
6861                &embedder,
6862                &VectorRegenerationConfig {
6863                    profile: "default".to_owned(),
6864                    table_name: "vec_nodes_active".to_owned(),
6865                    chunking_policy: "per_chunk".to_owned(),
6866                    preprocessing_policy: "trim".to_owned(),
6867                },
6868            )
6869            .expect_err("sqlite-vec capability should be required");
6870
6871        assert!(error.to_string().contains("unsupported vec capability"));
6872
6873        let conn = sqlite::open_connection(db.path()).expect("connection");
6874        let request_count: i64 = conn
6875            .query_row(
6876                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_requested' AND subject = 'default'",
6877                [],
6878                |row| row.get(0),
6879            )
6880            .expect("request count");
6881        assert_eq!(request_count, 1);
6882        let failed_count: i64 = conn
6883            .query_row(
6884                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
6885                [],
6886                |row| row.get(0),
6887            )
6888            .expect("failed count");
6889        assert_eq!(failed_count, 1);
6890        let metadata_json: String = conn
6891            .query_row(
6892                "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
6893                [],
6894                |row| row.get(0),
6895            )
6896            .expect("failed metadata");
6897        assert!(metadata_json.contains("\"failure_class\":\"unsupported vec capability\""));
6898    }
6899
6900    #[cfg(feature = "sqlite-vec")]
6901    #[test]
6902    #[allow(clippy::too_many_lines)]
6903    fn regenerate_vector_embeddings_rebuilds_embeddings_via_embedder() {
6904        let db = NamedTempFile::new().expect("temp file");
6905        let schema = Arc::new(SchemaManager::new());
6906
6907        {
6908            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6909            schema.bootstrap(&conn).expect("bootstrap");
6910            conn.execute(
6911                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
6912                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
6913                [],
6914            )
6915            .expect("insert node");
6916            conn.execute(
6917                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
6918                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
6919                [],
6920            )
6921            .expect("insert chunk 1");
6922            conn.execute(
6923                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
6924                 VALUES ('chunk-2', 'doc-1', 'travel plan', 101)",
6925                [],
6926            )
6927            .expect("insert chunk 2");
6928        }
6929
6930        let service = AdminService::new(db.path(), Arc::clone(&schema));
6931        let embedder = TestEmbedder::new("test-model", 4);
6932        let report = service
6933            .regenerate_vector_embeddings(
6934                &embedder,
6935                &VectorRegenerationConfig {
6936                    profile: "default".to_owned(),
6937                    table_name: "vec_nodes_active".to_owned(),
6938                    chunking_policy: "per_chunk".to_owned(),
6939                    preprocessing_policy: "trim".to_owned(),
6940                },
6941            )
6942            .expect("regenerate vectors");
6943
6944        assert_eq!(report.profile, "default");
6945        assert_eq!(report.table_name, "vec_nodes_active");
6946        assert_eq!(report.dimension, 4);
6947        assert_eq!(report.total_chunks, 2);
6948        assert_eq!(report.regenerated_rows, 2);
6949        assert!(report.contract_persisted);
6950
6951        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6952        let vec_count: i64 = conn
6953            .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
6954                row.get(0)
6955            })
6956            .expect("vec count");
6957        assert_eq!(vec_count, 2);
6958
6959        // The persisted vector contract must reflect the embedder
6960        // identity — not any string the caller passed in, because the
6961        // caller never passes one.
6962        let (model_identity, model_version, dimension, normalization_policy): (
6963            String,
6964            String,
6965            i64,
6966            String,
6967        ) = conn
6968            .query_row(
6969                "SELECT model_identity, model_version, dimension, normalization_policy \
6970                 FROM vector_embedding_contracts WHERE profile = 'default'",
6971                [],
6972                |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)),
6973            )
6974            .expect("contract row");
6975        assert_eq!(model_identity, "test-model");
6976        assert_eq!(model_version, "1.0.0");
6977        assert_eq!(dimension, 4);
6978        assert_eq!(normalization_policy, "l2");
6979
6980        let contract_format_version: i64 = conn
6981            .query_row(
6982                "SELECT contract_format_version FROM vector_embedding_contracts WHERE profile = 'default'",
6983                [],
6984                |row| row.get(0),
6985            )
6986            .expect("contract_format_version");
6987        assert_eq!(contract_format_version, 1);
6988        let request_count: i64 = conn
6989            .query_row(
6990                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_requested' AND subject = 'default'",
6991                [],
6992                |row| row.get(0),
6993            )
6994            .expect("request audit count");
6995        assert_eq!(request_count, 1);
6996        let apply_count: i64 = conn
6997            .query_row(
6998                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_apply' AND subject = 'default'",
6999                [],
7000                |row| row.get(0),
7001            )
7002            .expect("apply audit count");
7003        assert_eq!(apply_count, 1);
7004        let apply_metadata: String = conn
7005            .query_row(
7006                "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_apply' AND subject = 'default'",
7007                [],
7008                |row| row.get(0),
7009            )
7010            .expect("apply metadata");
7011        assert!(apply_metadata.contains("\"profile\":\"default\""));
7012        assert!(apply_metadata.contains("\"snapshot_hash\":"));
7013        assert!(apply_metadata.contains("\"model_identity\":\"test-model\""));
7014    }
7015
7016    #[cfg(feature = "sqlite-vec")]
7017    #[test]
7018    #[allow(clippy::too_many_lines)]
7019    fn regenerate_vector_embeddings_embedder_failure_leaves_contract_and_vec_rows_unchanged() {
7020        let db = NamedTempFile::new().expect("temp file");
7021        let schema = Arc::new(SchemaManager::new());
7022
7023        {
7024            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7025            schema.bootstrap(&conn).expect("bootstrap");
7026            conn.execute(
7027                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7028                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7029                [],
7030            )
7031            .expect("insert node");
7032            conn.execute(
7033                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7034                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7035                [],
7036            )
7037            .expect("insert chunk");
7038            schema
7039                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
7040                .expect("ensure vec profile");
7041            conn.execute(
7042                r"
7043                INSERT INTO vector_embedding_contracts (
7044                    profile,
7045                    table_name,
7046                    model_identity,
7047                    model_version,
7048                    dimension,
7049                    normalization_policy,
7050                    chunking_policy,
7051                    preprocessing_policy,
7052                    generator_command_json,
7053                    applied_at,
7054                    snapshot_hash
7055                ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)
7056                ",
7057                rusqlite::params![
7058                    "default",
7059                    "vec_nodes_active",
7060                    "old-model",
7061                    "0.9.0",
7062                    4,
7063                    "l2",
7064                    "per_chunk",
7065                    "trim",
7066                    "[]",
7067                    111,
7068                    "old-snapshot"
7069                ],
7070            )
7071            .expect("seed contract");
7072            conn.execute(
7073                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
7074                [],
7075            )
7076            .expect("seed vec row");
7077        }
7078
7079        let service = AdminService::new(db.path(), Arc::clone(&schema));
7080        let failing = FailingEmbedder {
7081            identity: QueryEmbedderIdentity {
7082                model_identity: "new-model".to_owned(),
7083                model_version: "1.0.0".to_owned(),
7084                dimension: 4,
7085                normalization_policy: "l2".to_owned(),
7086            },
7087        };
7088        let error = service
7089            .regenerate_vector_embeddings(
7090                &failing,
7091                &VectorRegenerationConfig {
7092                    profile: "default".to_owned(),
7093                    table_name: "vec_nodes_active".to_owned(),
7094                    chunking_policy: "per_chunk".to_owned(),
7095                    preprocessing_policy: "trim".to_owned(),
7096                },
7097            )
7098            .expect_err("embedder should fail");
7099
7100        assert!(error.to_string().contains("embedder failure"));
7101
7102        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7103        let model_identity: String = conn
7104            .query_row(
7105                "SELECT model_identity FROM vector_embedding_contracts WHERE profile = 'default'",
7106                [],
7107                |row| row.get(0),
7108            )
7109            .expect("model identity");
7110        assert_eq!(model_identity, "old-model");
7111        let snapshot_hash: String = conn
7112            .query_row(
7113                "SELECT snapshot_hash FROM vector_embedding_contracts WHERE profile = 'default'",
7114                [],
7115                |row| row.get(0),
7116            )
7117            .expect("snapshot hash");
7118        assert_eq!(snapshot_hash, "old-snapshot");
7119        let vec_count: i64 = conn
7120            .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
7121                row.get(0)
7122            })
7123            .expect("vec count");
7124        assert_eq!(vec_count, 1);
7125        let failure_count: i64 = conn
7126            .query_row(
7127                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7128                [],
7129                |row| row.get(0),
7130            )
7131            .expect("failure count");
7132        assert_eq!(failure_count, 1);
7133        let failure_metadata: String = conn
7134            .query_row(
7135                "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7136                [],
7137                |row| row.get(0),
7138            )
7139            .expect("failure metadata");
7140        assert!(failure_metadata.contains("\"failure_class\":\"embedder failure\""));
7141    }
7142
7143    // Subprocess generator tests (snapshot-drift-via-concurrent-writer,
7144    // timeout, stdout/stderr overflow, oversized input, excessive chunk
7145    // count, malformed JSON, world-writable executable, disallowed
7146    // executable root, environment preservation) were removed in 0.4.0
7147    // along with the subprocess generator pattern itself. The failure
7148    // modes they exercised belong to the deleted
7149    // `run_vector_generator_bounded` pipeline and have no equivalent in
7150    // the direct-embedder path. See
7151    // `.claude/memory/project_vector_identity_invariant.md`.
7152
7153    #[cfg(feature = "sqlite-vec")]
7154    #[test]
7155    fn regenerate_vector_embeddings_rejects_whitespace_only_profile_before_mutation() {
7156        let db = NamedTempFile::new().expect("temp file");
7157        let schema = Arc::new(SchemaManager::new());
7158        {
7159            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7160            schema.bootstrap(&conn).expect("bootstrap");
7161            conn.execute(
7162                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7163                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7164                [],
7165            )
7166            .expect("insert node");
7167            conn.execute(
7168                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7169                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7170                [],
7171            )
7172            .expect("insert chunk");
7173        }
7174
7175        let service = AdminService::new(db.path(), Arc::clone(&schema));
7176        let embedder = TestEmbedder::new("test-model", 4);
7177        let error = service
7178            .regenerate_vector_embeddings(
7179                &embedder,
7180                &VectorRegenerationConfig {
7181                    profile: "   ".to_owned(),
7182                    table_name: "vec_nodes_active".to_owned(),
7183                    chunking_policy: "per_chunk".to_owned(),
7184                    preprocessing_policy: "trim".to_owned(),
7185                },
7186            )
7187            .expect_err("whitespace profile should be rejected");
7188
7189        assert!(error.to_string().contains("invalid contract"));
7190        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7191        let contract_count: i64 = conn
7192            .query_row(
7193                "SELECT count(*) FROM vector_embedding_contracts",
7194                [],
7195                |row| row.get(0),
7196            )
7197            .expect("contract count");
7198        assert_eq!(contract_count, 0);
7199        let provenance_count: i64 = conn
7200            .query_row("SELECT count(*) FROM provenance_events", [], |row| {
7201                row.get(0)
7202            })
7203            .expect("provenance count");
7204        assert_eq!(provenance_count, 0);
7205    }
7206
7207    #[cfg(feature = "sqlite-vec")]
7208    #[test]
7209    fn regenerate_vector_embeddings_rejects_future_contract_format_version() {
7210        let db = NamedTempFile::new().expect("temp file");
7211        let schema = Arc::new(SchemaManager::new());
7212        {
7213            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7214            schema.bootstrap(&conn).expect("bootstrap");
7215            conn.execute(
7216                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7217                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7218                [],
7219            )
7220            .expect("insert node");
7221            conn.execute(
7222                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7223                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7224                [],
7225            )
7226            .expect("insert chunk");
7227            conn.execute(
7228                r"
7229                INSERT INTO vector_embedding_contracts (
7230                    profile,
7231                    table_name,
7232                    model_identity,
7233                    model_version,
7234                    dimension,
7235                    normalization_policy,
7236                    chunking_policy,
7237                    preprocessing_policy,
7238                    generator_command_json,
7239                    applied_at,
7240                    snapshot_hash,
7241                    contract_format_version,
7242                    updated_at
7243                ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13)
7244                ",
7245                rusqlite::params![
7246                    "default",
7247                    "vec_nodes_active",
7248                    "old-model",
7249                    "0.9.0",
7250                    4,
7251                    "l2",
7252                    "per_chunk",
7253                    "trim",
7254                    "[]",
7255                    111,
7256                    "old-snapshot",
7257                    99,
7258                    111,
7259                ],
7260            )
7261            .expect("seed future contract");
7262        }
7263
7264        let service = AdminService::new(db.path(), Arc::clone(&schema));
7265        let embedder = TestEmbedder::new("test-model", 4);
7266        let error = service
7267            .regenerate_vector_embeddings(
7268                &embedder,
7269                &VectorRegenerationConfig {
7270                    profile: "default".to_owned(),
7271                    table_name: "vec_nodes_active".to_owned(),
7272                    chunking_policy: "per_chunk".to_owned(),
7273                    preprocessing_policy: "trim".to_owned(),
7274                },
7275            )
7276            .expect_err("future contract version should be rejected");
7277
7278        assert!(error.to_string().contains("unsupported"));
7279        assert!(error.to_string().contains("format version"));
7280    }
7281
7282    #[test]
7283    fn check_semantics_detects_orphaned_chunk() {
7284        let (db, service) = setup();
7285        {
7286            // Open without FK enforcement to insert chunk with no active node.
7287            let conn = sqlite::open_connection(db.path()).expect("conn");
7288            conn.execute(
7289                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7290                 VALUES ('c1', 'ghost-node', 'text', 100)",
7291                [],
7292            )
7293            .expect("insert orphaned chunk");
7294        }
7295        let report = service.check_semantics().expect("semantics check");
7296        assert_eq!(report.orphaned_chunks, 1);
7297    }
7298
7299    #[test]
7300    fn check_semantics_detects_null_source_ref() {
7301        let (db, service) = setup();
7302        {
7303            let conn = sqlite::open_connection(db.path()).expect("conn");
7304            conn.execute(
7305                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at) \
7306                 VALUES ('r1', 'lg1', 'Meeting', '{}', 100)",
7307                [],
7308            )
7309            .expect("insert node with null source_ref");
7310        }
7311        let report = service.check_semantics().expect("semantics check");
7312        assert_eq!(report.null_source_ref_nodes, 1);
7313    }
7314
7315    #[test]
7316    fn check_semantics_detects_broken_step_fk() {
7317        let (db, service) = setup();
7318        {
7319            // Explicitly disable FK enforcement for this connection so we can insert
7320            // an orphaned step (ghost run_id) to simulate a partial-write failure.
7321            let conn = sqlite::open_connection(db.path()).expect("conn");
7322            conn.execute_batch("PRAGMA foreign_keys = OFF;")
7323                .expect("disable FK");
7324            conn.execute(
7325                "INSERT INTO steps (id, run_id, kind, status, properties, created_at) \
7326                 VALUES ('s1', 'ghost-run', 'llm', 'completed', '{}', 100)",
7327                [],
7328            )
7329            .expect("insert step with ghost run_id");
7330        }
7331        let report = service.check_semantics().expect("semantics check");
7332        assert_eq!(report.broken_step_fk, 1);
7333    }
7334
7335    #[test]
7336    fn check_semantics_detects_broken_action_fk() {
7337        let (db, service) = setup();
7338        {
7339            let conn = sqlite::open_connection(db.path()).expect("conn");
7340            conn.execute_batch("PRAGMA foreign_keys = OFF;")
7341                .expect("disable FK");
7342            conn.execute(
7343                "INSERT INTO actions (id, step_id, kind, status, properties, created_at) \
7344                 VALUES ('a1', 'ghost-step', 'emit', 'completed', '{}', 100)",
7345                [],
7346            )
7347            .expect("insert action with ghost step_id");
7348        }
7349        let report = service.check_semantics().expect("semantics check");
7350        assert_eq!(report.broken_action_fk, 1);
7351    }
7352
7353    #[test]
7354    fn check_semantics_detects_stale_fts_rows() {
7355        let (db, service) = setup();
7356        {
7357            let conn = sqlite::open_connection(db.path()).expect("conn");
7358            // FTS virtual tables have no FK constraints; insert a row referencing
7359            // a chunk_id that does not exist in the chunks table.
7360            conn.execute(
7361                "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
7362                 VALUES ('ghost-chunk', 'any-node', 'Meeting', 'stale content')",
7363                [],
7364            )
7365            .expect("insert stale FTS row");
7366        }
7367        let report = service.check_semantics().expect("semantics check");
7368        assert_eq!(report.stale_fts_rows, 1);
7369    }
7370
7371    #[test]
7372    fn check_semantics_detects_fts_rows_for_superseded_nodes() {
7373        let (db, service) = setup();
7374        {
7375            let conn = sqlite::open_connection(db.path()).expect("conn");
7376            // Insert a node that has been fully superseded (superseded_at IS NOT NULL).
7377            conn.execute(
7378                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
7379                 VALUES ('r1', 'lg-sup', 'Meeting', '{}', 100, 200, 'src-1')",
7380                [],
7381            )
7382            .expect("insert superseded node");
7383            // Insert an FTS row for the superseded node's logical_id.
7384            conn.execute(
7385                "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
7386                 VALUES ('ck-x', 'lg-sup', 'Meeting', 'superseded content')",
7387                [],
7388            )
7389            .expect("insert FTS row for superseded node");
7390        }
7391        let report = service.check_semantics().expect("semantics check");
7392        assert_eq!(report.fts_rows_for_superseded_nodes, 1);
7393    }
7394
7395    #[test]
7396    fn check_semantics_detects_dangling_edges() {
7397        let (db, service) = setup();
7398        {
7399            let conn = sqlite::open_connection(db.path()).expect("conn");
7400            conn.execute_batch("PRAGMA foreign_keys = OFF;")
7401                .expect("disable FK");
7402            // One active node as source; target does not exist — edge is dangling.
7403            conn.execute(
7404                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7405                 VALUES ('r1', 'lg-src', 'Meeting', '{}', 100, 'src-1')",
7406                [],
7407            )
7408            .expect("insert source node");
7409            conn.execute(
7410                "INSERT INTO edges \
7411                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
7412                 VALUES ('e1', 'edge-1', 'lg-src', 'ghost-target', 'LINKS', '{}', 100, 'src-1')",
7413                [],
7414            )
7415            .expect("insert dangling edge");
7416        }
7417        let report = service.check_semantics().expect("semantics check");
7418        assert_eq!(report.dangling_edges, 1);
7419    }
7420
7421    #[test]
7422    fn check_semantics_detects_orphaned_supersession_chains() {
7423        let (db, service) = setup();
7424        {
7425            let conn = sqlite::open_connection(db.path()).expect("conn");
7426            // Every version of this logical_id is superseded — no active row remains.
7427            conn.execute(
7428                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
7429                 VALUES ('r1', 'lg-orphaned', 'Meeting', '{}', 100, 200, 'src-1')",
7430                [],
7431            )
7432            .expect("insert fully superseded node");
7433        }
7434        let report = service.check_semantics().expect("semantics check");
7435        assert_eq!(report.orphaned_supersession_chains, 1);
7436    }
7437
7438    #[test]
7439    fn check_semantics_detects_mismatched_kind_property_fts_rows() {
7440        let (db, service) = setup();
7441        {
7442            let conn = sqlite::open_connection(db.path()).expect("conn");
7443            // Insert an active node with kind "Goal".
7444            conn.execute(
7445                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7446                 VALUES ('r1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'src-1')",
7447                [],
7448            )
7449            .expect("insert node");
7450            // Insert a property FTS row with a DIFFERENT kind than the node.
7451            conn.execute(
7452                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
7453                 VALUES ('goal-1', 'WrongKind', 'Ship v2')",
7454                [],
7455            )
7456            .expect("insert mismatched property FTS row");
7457        }
7458        let report = service.check_semantics().expect("semantics check");
7459        assert_eq!(report.mismatched_kind_property_fts_rows, 1);
7460    }
7461
7462    #[test]
7463    fn check_semantics_detects_duplicate_property_fts_rows() {
7464        let (db, service) = setup();
7465        {
7466            let conn = sqlite::open_connection(db.path()).expect("conn");
7467            conn.execute(
7468                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7469                 VALUES ('r1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'src-1')",
7470                [],
7471            )
7472            .expect("insert node");
7473            // Insert two property FTS rows for the same logical ID.
7474            conn.execute(
7475                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
7476                 VALUES ('goal-1', 'Goal', 'Ship v2')",
7477                [],
7478            )
7479            .expect("insert first property FTS row");
7480            conn.execute(
7481                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
7482                 VALUES ('goal-1', 'Goal', 'Ship v2 duplicate')",
7483                [],
7484            )
7485            .expect("insert duplicate property FTS row");
7486        }
7487        let report = service.check_semantics().expect("semantics check");
7488        assert_eq!(report.duplicate_property_fts_rows, 1);
7489    }
7490
7491    #[test]
7492    fn check_semantics_detects_drifted_property_fts_text() {
7493        let (db, service) = setup();
7494        {
7495            let conn = sqlite::open_connection(db.path()).expect("conn");
7496            conn.execute(
7497                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
7498                 VALUES ('Goal', '[\"$.name\"]', ' ')",
7499                [],
7500            )
7501            .expect("register schema");
7502            conn.execute(
7503                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7504                 VALUES ('r1', 'goal-1', 'Goal', '{\"name\":\"Current name\"}', 100, 'src-1')",
7505                [],
7506            )
7507            .expect("insert node");
7508            // Insert a property FTS row with outdated text content.
7509            conn.execute(
7510                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
7511                 VALUES ('goal-1', 'Goal', 'Old stale name')",
7512                [],
7513            )
7514            .expect("insert stale property FTS row");
7515        }
7516        let report = service.check_semantics().expect("semantics check");
7517        assert_eq!(report.drifted_property_fts_rows, 1);
7518    }
7519
7520    #[test]
7521    fn check_semantics_detects_property_fts_row_that_should_not_exist() {
7522        let (db, service) = setup();
7523        {
7524            let conn = sqlite::open_connection(db.path()).expect("conn");
7525            conn.execute(
7526                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
7527                 VALUES ('Goal', '[\"$.searchable\"]', ' ')",
7528                [],
7529            )
7530            .expect("register schema");
7531            // Node does NOT have $.searchable — extraction yields no value.
7532            conn.execute(
7533                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7534                 VALUES ('r1', 'goal-1', 'Goal', '{\"other\":\"field\"}', 100, 'src-1')",
7535                [],
7536            )
7537            .expect("insert node");
7538            // But a property FTS row exists anyway.
7539            conn.execute(
7540                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
7541                 VALUES ('goal-1', 'Goal', 'phantom text')",
7542                [],
7543            )
7544            .expect("insert phantom property FTS row");
7545        }
7546        let report = service.check_semantics().expect("semantics check");
7547        assert_eq!(
7548            report.drifted_property_fts_rows, 1,
7549            "row that should not exist must be counted as drifted"
7550        );
7551    }
7552
7553    #[test]
7554    fn safe_export_writes_manifest_with_sha256() {
7555        let (_db, service) = setup();
7556        let export_dir = tempfile::TempDir::new().expect("temp dir");
7557        let export_path = export_dir.path().join("backup.db");
7558
7559        let manifest = service
7560            .safe_export(
7561                &export_path,
7562                SafeExportOptions {
7563                    force_checkpoint: false,
7564                },
7565            )
7566            .expect("export");
7567
7568        assert!(export_path.exists(), "exported db should exist");
7569        let manifest_path = export_dir.path().join("backup.db.export-manifest.json");
7570        assert!(
7571            manifest_path.exists(),
7572            "manifest file should exist at {}",
7573            manifest_path.display()
7574        );
7575        assert_eq!(manifest.sha256.len(), 64, "sha256 should be 64 hex chars");
7576        assert!(
7577            manifest.exported_at > 0,
7578            "exported_at should be a unix timestamp"
7579        );
7580        assert_eq!(
7581            manifest.schema_version,
7582            SchemaManager::new().current_version().0,
7583            "schema_version should match the live schema version"
7584        );
7585        assert_eq!(manifest.protocol_version, 1, "protocol_version should be 1");
7586        assert!(manifest.page_count > 0, "page_count should be positive");
7587    }
7588
7589    #[test]
7590    fn safe_export_preserves_operational_validation_contracts() {
7591        let (_db, service) = setup();
7592        let validation_json = r#"{"format_version":1,"mode":"enforce","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#;
7593        service
7594            .register_operational_collection(&OperationalRegisterRequest {
7595                name: "connector_health".to_owned(),
7596                kind: OperationalCollectionKind::LatestState,
7597                schema_json: "{}".to_owned(),
7598                retention_json: "{}".to_owned(),
7599                filter_fields_json: "[]".to_owned(),
7600                validation_json: validation_json.to_owned(),
7601                secondary_indexes_json: "[]".to_owned(),
7602                format_version: 1,
7603            })
7604            .expect("register collection");
7605
7606        let export_dir = tempfile::TempDir::new().expect("temp dir");
7607        let export_path = export_dir.path().join("backup.db");
7608        service
7609            .safe_export(
7610                &export_path,
7611                SafeExportOptions {
7612                    force_checkpoint: false,
7613                },
7614            )
7615            .expect("export");
7616
7617        let exported = sqlite::open_connection(&export_path).expect("exported conn");
7618        let exported_validation_json: String = exported
7619            .query_row(
7620                "SELECT validation_json FROM operational_collections WHERE name = 'connector_health'",
7621                [],
7622                |row| row.get(0),
7623            )
7624            .expect("validation_json");
7625        assert_eq!(exported_validation_json, validation_json);
7626    }
7627
7628    #[test]
7629    fn safe_export_force_checkpoint_false_skips_wal_pragma() {
7630        let (_db, service) = setup();
7631        let export_dir = tempfile::TempDir::new().expect("temp dir");
7632        let export_path = export_dir.path().join("no-wal.db");
7633
7634        // force_checkpoint: false must not error even on a non-WAL database
7635        let manifest = service
7636            .safe_export(
7637                &export_path,
7638                SafeExportOptions {
7639                    force_checkpoint: false,
7640                },
7641            )
7642            .expect("export with no checkpoint");
7643
7644        assert!(
7645            manifest.page_count > 0,
7646            "page_count must be populated regardless of checkpoint mode"
7647        );
7648        assert_eq!(
7649            manifest.schema_version,
7650            SchemaManager::new().current_version().0
7651        );
7652        assert_eq!(manifest.protocol_version, 1);
7653    }
7654
7655    #[test]
7656    fn safe_export_force_checkpoint_false_still_captures_wal_backed_changes() {
7657        let (db, service) = setup();
7658        let conn = sqlite::open_connection(db.path()).expect("conn");
7659        let journal_mode: String = conn
7660            .query_row("PRAGMA journal_mode=WAL", [], |row| row.get(0))
7661            .expect("enable wal");
7662        assert_eq!(journal_mode.to_lowercase(), "wal");
7663        let auto_checkpoint_pages: i64 = conn
7664            .query_row("PRAGMA wal_autocheckpoint=0", [], |row| row.get(0))
7665            .expect("disable auto checkpoint");
7666        assert_eq!(auto_checkpoint_pages, 0);
7667        conn.execute(
7668            "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7669             VALUES ('r-wal', 'lg-wal', 'Meeting', '{}', 100, 'src-wal')",
7670            [],
7671        )
7672        .expect("insert wal-backed node");
7673
7674        let export_dir = tempfile::TempDir::new().expect("temp dir");
7675        let export_path = export_dir.path().join("wal-backed.db");
7676        service
7677            .safe_export(
7678                &export_path,
7679                SafeExportOptions {
7680                    force_checkpoint: false,
7681                },
7682            )
7683            .expect("export wal-backed db");
7684
7685        let exported = sqlite::open_connection(&export_path).expect("open exported db");
7686        let exported_count: i64 = exported
7687            .query_row(
7688                "SELECT count(*) FROM nodes WHERE logical_id = 'lg-wal'",
7689                [],
7690                |row| row.get(0),
7691            )
7692            .expect("count exported nodes");
7693        assert_eq!(
7694            exported_count, 1,
7695            "safe_export must include committed rows that are still resident in the WAL"
7696        );
7697    }
7698
7699    #[test]
7700    fn excise_source_removes_searchable_content_after_excision() {
7701        let (db, service) = setup();
7702        {
7703            let conn = sqlite::open_connection(db.path()).expect("conn");
7704            conn.execute(
7705                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
7706                 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
7707                [],
7708            )
7709            .expect("insert v1");
7710            conn.execute(
7711                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7712                 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
7713                [],
7714            )
7715            .expect("insert v2");
7716            conn.execute(
7717                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7718                 VALUES ('ck1', 'lg1', 'hello world', 100)",
7719                [],
7720            )
7721            .expect("insert chunk");
7722        }
7723        service.excise_source("source-2").expect("excise");
7724        {
7725            let conn = sqlite::open_connection(db.path()).expect("conn");
7726            let fts_count: i64 = conn
7727                .query_row(
7728                    "SELECT count(*) FROM fts_nodes WHERE chunk_id = 'ck1'",
7729                    [],
7730                    |row| row.get(0),
7731                )
7732                .expect("fts count");
7733            assert_eq!(
7734                fts_count, 0,
7735                "excised content should not remain searchable after excise"
7736            );
7737        }
7738    }
7739
7740    #[cfg(feature = "sqlite-vec")]
7741    #[test]
7742    fn excise_source_cleans_chunks_and_vec_rows_for_excised_version() {
7743        let (db, service) = setup();
7744        {
7745            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7746            service
7747                .schema_manager
7748                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
7749                .expect("ensure vec profile");
7750            conn.execute(
7751                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
7752                 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
7753                [],
7754            )
7755            .expect("insert v1");
7756            conn.execute(
7757                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7758                 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
7759                [],
7760            )
7761            .expect("insert v2");
7762            conn.execute(
7763                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7764                 VALUES ('ck1', 'lg1', 'new content', 200)",
7765                [],
7766            )
7767            .expect("insert chunk");
7768            conn.execute(
7769                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('ck1', zeroblob(16))",
7770                [],
7771            )
7772            .expect("insert vec row");
7773        }
7774
7775        service.excise_source("source-2").expect("excise");
7776
7777        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7778        let active_row: String = conn
7779            .query_row(
7780                "SELECT row_id FROM nodes WHERE logical_id = 'lg1' AND superseded_at IS NULL",
7781                [],
7782                |row| row.get(0),
7783            )
7784            .expect("restored active row");
7785        assert_eq!(active_row, "r1");
7786        let chunk_count: i64 = conn
7787            .query_row(
7788                "SELECT count(*) FROM chunks WHERE node_logical_id = 'lg1'",
7789                [],
7790                |row| row.get(0),
7791            )
7792            .expect("chunk count");
7793        assert_eq!(
7794            chunk_count, 0,
7795            "excised source content must not survive as chunks"
7796        );
7797        let vec_count: i64 = conn
7798            .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
7799                row.get(0)
7800            })
7801            .expect("vec count");
7802        assert_eq!(vec_count, 0, "excised source vec rows must be removed");
7803        let fts_count: i64 = conn
7804            .query_row(
7805                "SELECT count(*) FROM fts_nodes WHERE node_logical_id = 'lg1'",
7806                [],
7807                |row| row.get(0),
7808            )
7809            .expect("fts count");
7810        assert_eq!(
7811            fts_count, 0,
7812            "excised source content must not remain searchable"
7813        );
7814    }
7815
7816    #[test]
7817    fn export_page_count_matches_exported_file() {
7818        let (_db, service) = setup();
7819        let export_dir = tempfile::TempDir::new().expect("temp dir");
7820        let export_path = export_dir.path().join("page-count.db");
7821
7822        let manifest = service
7823            .safe_export(
7824                &export_path,
7825                SafeExportOptions {
7826                    force_checkpoint: false,
7827                },
7828            )
7829            .expect("export");
7830
7831        let exported = sqlite::open_connection(&export_path).expect("open exported db");
7832        let actual_page_count: u64 = exported
7833            .query_row("PRAGMA page_count", [], |row| row.get(0))
7834            .expect("page_count from exported file");
7835
7836        assert_eq!(
7837            manifest.page_count, actual_page_count,
7838            "manifest page_count must match the exported file's PRAGMA page_count"
7839        );
7840    }
7841
7842    #[test]
7843    fn no_temp_file_after_successful_export() {
7844        let (_db, service) = setup();
7845        let export_dir = tempfile::TempDir::new().expect("temp dir");
7846        let export_path = export_dir.path().join("no-tmp.db");
7847
7848        service
7849            .safe_export(
7850                &export_path,
7851                SafeExportOptions {
7852                    force_checkpoint: false,
7853                },
7854            )
7855            .expect("export");
7856
7857        let tmp_files: Vec<_> = fs::read_dir(export_dir.path())
7858            .expect("read export dir")
7859            .filter_map(Result::ok)
7860            .filter(|e| e.path().extension().is_some_and(|ext| ext == "tmp"))
7861            .collect();
7862
7863        assert!(
7864            tmp_files.is_empty(),
7865            "no .tmp files should remain after a successful export, found: {tmp_files:?}"
7866        );
7867    }
7868
7869    #[test]
7870    fn export_manifest_is_valid_json() {
7871        let (_db, service) = setup();
7872        let export_dir = tempfile::TempDir::new().expect("temp dir");
7873        let export_path = export_dir.path().join("valid-json.db");
7874
7875        service
7876            .safe_export(
7877                &export_path,
7878                SafeExportOptions {
7879                    force_checkpoint: false,
7880                },
7881            )
7882            .expect("export");
7883
7884        let manifest_path = export_dir.path().join("valid-json.db.export-manifest.json");
7885        let manifest_contents = fs::read_to_string(&manifest_path).expect("read manifest");
7886        let parsed: serde_json::Value =
7887            serde_json::from_str(&manifest_contents).expect("manifest must be valid JSON");
7888
7889        assert!(
7890            parsed.get("exported_at").is_some(),
7891            "manifest must contain exported_at"
7892        );
7893        assert!(
7894            parsed.get("sha256").is_some(),
7895            "manifest must contain sha256"
7896        );
7897        assert!(
7898            parsed.get("schema_version").is_some(),
7899            "manifest must contain schema_version"
7900        );
7901        assert!(
7902            parsed.get("protocol_version").is_some(),
7903            "manifest must contain protocol_version"
7904        );
7905        assert!(
7906            parsed.get("page_count").is_some(),
7907            "manifest must contain page_count"
7908        );
7909    }
7910
7911    #[test]
7912    fn provenance_purge_dry_run_reports_counts() {
7913        let (db, service) = setup();
7914        {
7915            let conn = sqlite::open_connection(db.path()).expect("conn");
7916            conn.execute(
7917                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
7918                 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
7919                [],
7920            )
7921            .expect("insert p1");
7922            conn.execute(
7923                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
7924                 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 200)",
7925                [],
7926            )
7927            .expect("insert p2");
7928            conn.execute(
7929                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
7930                 VALUES ('p3', 'excise', 'lg3', 'src-1', 300)",
7931                [],
7932            )
7933            .expect("insert p3");
7934        }
7935
7936        let options = super::ProvenancePurgeOptions {
7937            dry_run: true,
7938            preserve_event_types: Vec::new(),
7939        };
7940        let report = service
7941            .purge_provenance_events(250, &options)
7942            .expect("dry run purge");
7943
7944        assert_eq!(report.events_deleted, 2);
7945        assert_eq!(report.events_preserved, 1);
7946        assert!(report.oldest_remaining.is_some());
7947
7948        let conn = sqlite::open_connection(db.path()).expect("conn");
7949        let total: i64 = conn
7950            .query_row("SELECT count(*) FROM provenance_events", [], |row| {
7951                row.get(0)
7952            })
7953            .expect("count");
7954        assert_eq!(total, 3, "dry_run must not delete any events");
7955    }
7956
7957    #[test]
7958    fn provenance_purge_deletes_old_events() {
7959        let (db, service) = setup();
7960        {
7961            let conn = sqlite::open_connection(db.path()).expect("conn");
7962            conn.execute(
7963                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
7964                 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
7965                [],
7966            )
7967            .expect("insert p1");
7968            conn.execute(
7969                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
7970                 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 200)",
7971                [],
7972            )
7973            .expect("insert p2");
7974        }
7975
7976        let options = super::ProvenancePurgeOptions {
7977            dry_run: false,
7978            preserve_event_types: Vec::new(),
7979        };
7980        let report = service
7981            .purge_provenance_events(150, &options)
7982            .expect("purge");
7983
7984        assert_eq!(report.events_deleted, 1);
7985        assert_eq!(report.events_preserved, 1);
7986        assert_eq!(report.oldest_remaining, Some(200));
7987
7988        let conn = sqlite::open_connection(db.path()).expect("conn");
7989        let remaining: i64 = conn
7990            .query_row("SELECT count(*) FROM provenance_events", [], |row| {
7991                row.get(0)
7992            })
7993            .expect("count");
7994        assert_eq!(remaining, 1);
7995    }
7996
7997    #[test]
7998    fn provenance_purge_preserves_specified_types() {
7999        let (db, service) = setup();
8000        {
8001            let conn = sqlite::open_connection(db.path()).expect("conn");
8002            conn.execute(
8003                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8004                 VALUES ('p1', 'excise', 'lg1', 'src-1', 100)",
8005                [],
8006            )
8007            .expect("insert p1");
8008            conn.execute(
8009                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8010                 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 100)",
8011                [],
8012            )
8013            .expect("insert p2");
8014            conn.execute(
8015                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8016                 VALUES ('p3', 'node_insert', 'lg3', 'src-1', 100)",
8017                [],
8018            )
8019            .expect("insert p3");
8020        }
8021
8022        let options = super::ProvenancePurgeOptions {
8023            dry_run: false,
8024            preserve_event_types: Vec::new(),
8025        };
8026        let report = service
8027            .purge_provenance_events(500, &options)
8028            .expect("purge");
8029
8030        assert_eq!(report.events_deleted, 2);
8031        assert_eq!(report.events_preserved, 1);
8032
8033        let conn = sqlite::open_connection(db.path()).expect("conn");
8034        let remaining_type: String = conn
8035            .query_row("SELECT event_type FROM provenance_events", [], |row| {
8036                row.get(0)
8037            })
8038            .expect("remaining event type");
8039        assert_eq!(remaining_type, "excise");
8040    }
8041
8042    #[test]
8043    fn provenance_purge_noop_with_zero_timestamp() {
8044        let (db, service) = setup();
8045        {
8046            let conn = sqlite::open_connection(db.path()).expect("conn");
8047            conn.execute(
8048                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8049                 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
8050                [],
8051            )
8052            .expect("insert p1");
8053        }
8054
8055        let options = super::ProvenancePurgeOptions {
8056            dry_run: false,
8057            preserve_event_types: Vec::new(),
8058        };
8059        let report = service.purge_provenance_events(0, &options).expect("purge");
8060
8061        assert_eq!(report.events_deleted, 0);
8062        assert_eq!(report.events_preserved, 1);
8063        assert_eq!(report.oldest_remaining, Some(100));
8064    }
8065
8066    #[test]
8067    fn restore_skips_edge_when_counterpart_purged() {
8068        let (db, service) = setup();
8069        {
8070            let conn = sqlite::open_connection(db.path()).expect("conn");
8071            // Create node A (doc-1) and node B (doc-2)
8072            conn.execute(
8073                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8074                 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
8075                [],
8076            )
8077            .expect("insert node A");
8078            conn.execute(
8079                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8080                 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
8081                [],
8082            )
8083            .expect("insert node B");
8084            // Create edge between A and B
8085            conn.execute(
8086                "INSERT INTO edges \
8087                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
8088                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
8089                [],
8090            )
8091            .expect("insert edge");
8092            // Retire both A and B, and the edge
8093            conn.execute(
8094                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8095                 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
8096                [],
8097            )
8098            .expect("insert retire event A");
8099            conn.execute(
8100                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8101                 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
8102                [],
8103            )
8104            .expect("insert edge retire event");
8105            conn.execute(
8106                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
8107                [],
8108            )
8109            .expect("retire node A");
8110            conn.execute(
8111                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-2'",
8112                [],
8113            )
8114            .expect("retire node B");
8115            conn.execute(
8116                "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
8117                [],
8118            )
8119            .expect("retire edge");
8120            // Simulate purge of B: delete node rows but leave the edge intact
8121            // to reproduce the dangling-edge scenario the validation guards against.
8122            conn.execute("DELETE FROM nodes WHERE logical_id = 'doc-2'", [])
8123                .expect("purge node B rows");
8124        }
8125
8126        // Restore A — the edge should be skipped because B has no active node
8127        let report = service.restore_logical_id("doc-1").expect("restore A");
8128        assert!(!report.was_noop);
8129        assert_eq!(report.restored_node_rows, 1);
8130        assert_eq!(report.restored_edge_rows, 0, "edge should not be restored");
8131        assert_eq!(report.skipped_edges.len(), 1);
8132        assert_eq!(report.skipped_edges[0].edge_logical_id, "edge-1");
8133        assert_eq!(report.skipped_edges[0].missing_endpoint, "doc-2");
8134
8135        // Verify the edge is still retired in the database
8136        let conn = sqlite::open_connection(db.path()).expect("conn");
8137        let active_edge_count: i64 = conn
8138            .query_row(
8139                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
8140                [],
8141                |row| row.get(0),
8142            )
8143            .expect("active edge count");
8144        assert_eq!(active_edge_count, 0, "edge must remain retired");
8145    }
8146
8147    #[test]
8148    fn restore_restores_edges_to_active_nodes() {
8149        let (db, service) = setup();
8150        {
8151            let conn = sqlite::open_connection(db.path()).expect("conn");
8152            // Create node A and node B (B stays active)
8153            conn.execute(
8154                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8155                 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
8156                [],
8157            )
8158            .expect("insert node A");
8159            conn.execute(
8160                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8161                 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
8162                [],
8163            )
8164            .expect("insert node B");
8165            // Create edge between A and B
8166            conn.execute(
8167                "INSERT INTO edges \
8168                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
8169                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
8170                [],
8171            )
8172            .expect("insert edge");
8173            // Retire only A
8174            conn.execute(
8175                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8176                 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
8177                [],
8178            )
8179            .expect("insert retire event A");
8180            conn.execute(
8181                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8182                 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
8183                [],
8184            )
8185            .expect("insert edge retire event");
8186            conn.execute(
8187                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
8188                [],
8189            )
8190            .expect("retire node A");
8191            conn.execute(
8192                "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
8193                [],
8194            )
8195            .expect("retire edge");
8196        }
8197
8198        // Restore A — B is active, so the edge should be restored normally
8199        let report = service.restore_logical_id("doc-1").expect("restore A");
8200        assert!(!report.was_noop);
8201        assert_eq!(report.restored_node_rows, 1);
8202        assert!(report.restored_edge_rows > 0, "edge should be restored");
8203        assert!(
8204            report.skipped_edges.is_empty(),
8205            "no edges should be skipped"
8206        );
8207
8208        let conn = sqlite::open_connection(db.path()).expect("conn");
8209        let active_edge_count: i64 = conn
8210            .query_row(
8211                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
8212                [],
8213                |row| row.get(0),
8214            )
8215            .expect("active edge count");
8216        assert_eq!(active_edge_count, 1, "edge must be active");
8217    }
8218
8219    #[test]
8220    fn restore_restores_edges_when_both_restored() {
8221        let (db, service) = setup();
8222        {
8223            let conn = sqlite::open_connection(db.path()).expect("conn");
8224            // Create node A and node B
8225            conn.execute(
8226                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8227                 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
8228                [],
8229            )
8230            .expect("insert node A");
8231            conn.execute(
8232                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8233                 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
8234                [],
8235            )
8236            .expect("insert node B");
8237            // Create edge between A and B
8238            conn.execute(
8239                "INSERT INTO edges \
8240                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
8241                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
8242                [],
8243            )
8244            .expect("insert edge");
8245            // Retire both A and B
8246            conn.execute(
8247                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8248                 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
8249                [],
8250            )
8251            .expect("insert retire event A");
8252            conn.execute(
8253                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8254                 VALUES ('evt-retire-b', 'node_retire', 'doc-2', 'forget-1', 200, '')",
8255                [],
8256            )
8257            .expect("insert retire event B");
8258            conn.execute(
8259                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8260                 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
8261                [],
8262            )
8263            .expect("insert edge retire event");
8264            conn.execute(
8265                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
8266                [],
8267            )
8268            .expect("retire node A");
8269            conn.execute(
8270                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-2'",
8271                [],
8272            )
8273            .expect("retire node B");
8274            conn.execute(
8275                "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
8276                [],
8277            )
8278            .expect("retire edge");
8279        }
8280
8281        // Restore B first — edge is skipped because A is still retired
8282        let report_b = service.restore_logical_id("doc-2").expect("restore B");
8283        assert!(!report_b.was_noop);
8284
8285        // Restore A — B is now active, so the edge should be restored
8286        let report_a = service.restore_logical_id("doc-1").expect("restore A");
8287        assert!(!report_a.was_noop);
8288        assert_eq!(report_a.restored_node_rows, 1);
8289        assert!(
8290            report_a.restored_edge_rows > 0,
8291            "edge should be restored when both endpoints active"
8292        );
8293        assert!(
8294            report_a.skipped_edges.is_empty(),
8295            "no edges should be skipped"
8296        );
8297
8298        let conn = sqlite::open_connection(db.path()).expect("conn");
8299        let active_edge_count: i64 = conn
8300            .query_row(
8301                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
8302                [],
8303                |row| row.get(0),
8304            )
8305            .expect("active edge count");
8306        assert_eq!(
8307            active_edge_count, 1,
8308            "edge must be active after both endpoints restored"
8309        );
8310    }
8311
8312    // ── FTS property schema end-to-end tests ──────────────────────────
8313
8314    #[test]
8315    fn fts_property_schema_crud_round_trip() {
8316        let (_db, service) = setup();
8317
8318        // Register
8319        let record = service
8320            .register_fts_property_schema(
8321                "Meeting",
8322                &["$.title".to_owned(), "$.summary".to_owned()],
8323                None,
8324            )
8325            .expect("register");
8326        assert_eq!(record.kind, "Meeting");
8327        assert_eq!(record.property_paths, vec!["$.title", "$.summary"]);
8328        assert_eq!(record.separator, " ");
8329        assert_eq!(record.format_version, 1);
8330
8331        // Describe
8332        let described = service
8333            .describe_fts_property_schema("Meeting")
8334            .expect("describe")
8335            .expect("should exist");
8336        assert_eq!(described, record);
8337
8338        // Describe missing kind
8339        let missing = service
8340            .describe_fts_property_schema("NoSuchKind")
8341            .expect("describe missing");
8342        assert!(missing.is_none());
8343
8344        // List
8345        let list = service.list_fts_property_schemas().expect("list");
8346        assert_eq!(list.len(), 1);
8347        assert_eq!(list[0].kind, "Meeting");
8348
8349        // Update (idempotent upsert)
8350        let updated = service
8351            .register_fts_property_schema(
8352                "Meeting",
8353                &["$.title".to_owned(), "$.notes".to_owned()],
8354                Some("\n"),
8355            )
8356            .expect("update");
8357        assert_eq!(updated.property_paths, vec!["$.title", "$.notes"]);
8358        assert_eq!(updated.separator, "\n");
8359
8360        // Remove
8361        service
8362            .remove_fts_property_schema("Meeting")
8363            .expect("remove");
8364        let after_remove = service
8365            .describe_fts_property_schema("Meeting")
8366            .expect("describe after remove");
8367        assert!(after_remove.is_none());
8368
8369        // Remove non-existent is an error
8370        let err = service.remove_fts_property_schema("Meeting");
8371        assert!(err.is_err());
8372    }
8373
8374    #[test]
8375    fn describe_fts_property_schema_round_trips_recursive_entries() {
8376        let (_db, service) = setup();
8377
8378        let entries = vec![
8379            FtsPropertyPathSpec::scalar("$.title"),
8380            FtsPropertyPathSpec::recursive("$.payload"),
8381        ];
8382        let exclude = vec!["$.payload.private".to_owned()];
8383        let registered = service
8384            .register_fts_property_schema_with_entries(
8385                "KnowledgeItem",
8386                &entries,
8387                Some(" "),
8388                &exclude,
8389            )
8390            .expect("register recursive");
8391
8392        // The register entry point now echoes back the fully-populated
8393        // record via the same load helper used by describe/list.
8394        assert_eq!(registered.entries, entries);
8395        assert_eq!(registered.exclude_paths, exclude);
8396        assert_eq!(registered.property_paths, vec!["$.title", "$.payload"]);
8397
8398        let described = service
8399            .describe_fts_property_schema("KnowledgeItem")
8400            .expect("describe")
8401            .expect("should exist");
8402        assert_eq!(described.kind, "KnowledgeItem");
8403        assert_eq!(described.entries, entries);
8404        assert_eq!(described.exclude_paths, exclude);
8405        assert_eq!(described.property_paths, vec!["$.title", "$.payload"]);
8406        assert_eq!(described.separator, " ");
8407        assert_eq!(described.format_version, 1);
8408    }
8409
8410    #[test]
8411    fn list_fts_property_schemas_round_trips_recursive_entries() {
8412        let (_db, service) = setup();
8413
8414        let entries = vec![
8415            FtsPropertyPathSpec::scalar("$.title"),
8416            FtsPropertyPathSpec::recursive("$.payload"),
8417        ];
8418        let exclude = vec!["$.payload.secret".to_owned()];
8419        service
8420            .register_fts_property_schema_with_entries(
8421                "KnowledgeItem",
8422                &entries,
8423                Some(" "),
8424                &exclude,
8425            )
8426            .expect("register recursive");
8427
8428        let listed = service.list_fts_property_schemas().expect("list");
8429        assert_eq!(listed.len(), 1);
8430        let record = &listed[0];
8431        assert_eq!(record.kind, "KnowledgeItem");
8432        assert_eq!(record.entries, entries);
8433        assert_eq!(record.exclude_paths, exclude);
8434        assert_eq!(record.property_paths, vec!["$.title", "$.payload"]);
8435    }
8436
8437    #[test]
8438    fn describe_fts_property_schema_round_trips_scalar_only_entries() {
8439        let (_db, service) = setup();
8440
8441        service
8442            .register_fts_property_schema(
8443                "Meeting",
8444                &["$.title".to_owned(), "$.summary".to_owned()],
8445                None,
8446            )
8447            .expect("register scalar");
8448
8449        let described = service
8450            .describe_fts_property_schema("Meeting")
8451            .expect("describe")
8452            .expect("should exist");
8453        assert_eq!(described.property_paths, vec!["$.title", "$.summary"]);
8454        assert_eq!(described.entries.len(), 2);
8455        for entry in &described.entries {
8456            assert_eq!(
8457                entry.mode,
8458                FtsPropertyPathMode::Scalar,
8459                "scalar-only schema should deserialize every entry as Scalar"
8460            );
8461        }
8462        assert!(described.exclude_paths.is_empty());
8463    }
8464
8465    #[test]
8466    fn restore_reestablishes_property_fts_visibility() {
8467        let (db, service) = setup();
8468        {
8469            let conn = sqlite::open_connection(db.path()).expect("conn");
8470            // Register a property schema for Document kind.
8471            conn.execute(
8472                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8473                 VALUES ('Document', '[\"$.title\", \"$.body\"]', ' ')",
8474                [],
8475            )
8476            .expect("register schema");
8477            // Insert an active node with extractable properties.
8478            conn.execute(
8479                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8480                 VALUES ('row-1', 'doc-1', 'Document', '{\"title\":\"Budget\",\"body\":\"Q3 forecast\"}', 100, 'seed')",
8481                [],
8482            )
8483            .expect("insert node");
8484            // Insert a chunk so restore has something to work with for FTS.
8485            conn.execute(
8486                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8487                 VALUES ('chunk-1', 'doc-1', 'budget text', 100)",
8488                [],
8489            )
8490            .expect("insert chunk");
8491            // Insert property FTS row (as write path would).
8492            conn.execute(
8493                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8494                 VALUES ('doc-1', 'Document', 'Budget Q3 forecast')",
8495                [],
8496            )
8497            .expect("insert property fts");
8498            // Simulate retire: supersede node, clear FTS.
8499            conn.execute(
8500                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8501                 VALUES ('evt-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
8502                [],
8503            )
8504            .expect("retire event");
8505            conn.execute(
8506                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
8507                [],
8508            )
8509            .expect("supersede");
8510            conn.execute("DELETE FROM fts_nodes", [])
8511                .expect("clear chunk fts");
8512            conn.execute("DELETE FROM fts_node_properties", [])
8513                .expect("clear property fts");
8514        }
8515
8516        let report = service.restore_logical_id("doc-1").expect("restore");
8517        assert_eq!(report.restored_property_fts_rows, 1);
8518
8519        // Verify the property FTS row was recreated.
8520        let conn = sqlite::open_connection(db.path()).expect("conn");
8521        let prop_fts_count: i64 = conn
8522            .query_row(
8523                "SELECT count(*) FROM fts_node_properties WHERE node_logical_id = 'doc-1'",
8524                [],
8525                |row| row.get(0),
8526            )
8527            .expect("prop fts count");
8528        assert_eq!(prop_fts_count, 1, "property FTS must be restored");
8529
8530        let text: String = conn
8531            .query_row(
8532                "SELECT text_content FROM fts_node_properties WHERE node_logical_id = 'doc-1'",
8533                [],
8534                |row| row.get(0),
8535            )
8536            .expect("prop fts text");
8537        assert_eq!(text, "Budget Q3 forecast");
8538    }
8539
8540    #[test]
8541    fn safe_export_preserves_fts_property_schemas() {
8542        let (_db, service) = setup();
8543        service
8544            .register_fts_property_schema(
8545                "Goal",
8546                &["$.name".to_owned(), "$.rationale".to_owned()],
8547                None,
8548            )
8549            .expect("register schema");
8550
8551        let export_dir = tempfile::TempDir::new().expect("temp dir");
8552        let export_path = export_dir.path().join("backup.db");
8553        service
8554            .safe_export(
8555                &export_path,
8556                SafeExportOptions {
8557                    force_checkpoint: false,
8558                },
8559            )
8560            .expect("export");
8561
8562        // Open the exported DB and verify the schema survived.
8563        let exported_conn = rusqlite::Connection::open(&export_path).expect("open exported db");
8564        let kind: String = exported_conn
8565            .query_row(
8566                "SELECT kind FROM fts_property_schemas WHERE kind = 'Goal'",
8567                [],
8568                |row| row.get(0),
8569            )
8570            .expect("schema must exist in export");
8571        assert_eq!(kind, "Goal");
8572        let paths_json: String = exported_conn
8573            .query_row(
8574                "SELECT property_paths_json FROM fts_property_schemas WHERE kind = 'Goal'",
8575                [],
8576                |row| row.get(0),
8577            )
8578            .expect("paths must exist");
8579        let paths: Vec<String> = serde_json::from_str(&paths_json).expect("valid json");
8580        assert_eq!(paths, vec!["$.name", "$.rationale"]);
8581    }
8582
8583    #[test]
8584    #[allow(clippy::too_many_lines)]
8585    fn export_recovery_rebuilds_property_fts_from_canonical_state() {
8586        let (db, service) = setup();
8587        // Register a schema and insert two nodes with extractable properties.
8588        service
8589            .register_fts_property_schema("Goal", &["$.name".to_owned()], None)
8590            .expect("register");
8591        {
8592            let conn = sqlite::open_connection(db.path()).expect("conn");
8593            conn.execute(
8594                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8595                 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
8596                [],
8597            )
8598            .expect("insert node 1");
8599            conn.execute(
8600                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8601                 VALUES ('goal-1', 'Goal', 'Ship v2')",
8602                [],
8603            )
8604            .expect("insert property FTS row 1");
8605            conn.execute(
8606                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8607                 VALUES ('row-2', 'goal-2', 'Goal', '{\"name\":\"Launch redesign\"}', 100, 'seed')",
8608                [],
8609            )
8610            .expect("insert node 2");
8611            conn.execute(
8612                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8613                 VALUES ('goal-2', 'Goal', 'Launch redesign')",
8614                [],
8615            )
8616            .expect("insert property FTS row 2");
8617        }
8618
8619        // Export.
8620        let export_dir = tempfile::TempDir::new().expect("temp dir");
8621        let export_path = export_dir.path().join("backup.db");
8622        service
8623            .safe_export(
8624                &export_path,
8625                SafeExportOptions {
8626                    force_checkpoint: false,
8627                },
8628            )
8629            .expect("export");
8630
8631        // Corrupt the derived rows: replace correct text with wrong text for
8632        // goal-1, and delete the row for goal-2 entirely. This exercises both
8633        // corrupted-but-present rows and missing rows in the same recovery.
8634        {
8635            let conn = rusqlite::Connection::open(&export_path).expect("open export");
8636            conn.execute(
8637                "DELETE FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
8638                [],
8639            )
8640            .expect("delete old row");
8641            conn.execute(
8642                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8643                 VALUES ('goal-1', 'Goal', 'completely wrong stale text')",
8644                [],
8645            )
8646            .expect("insert corrupted row");
8647            conn.execute(
8648                "DELETE FROM fts_node_properties WHERE node_logical_id = 'goal-2'",
8649                [],
8650            )
8651            .expect("delete goal-2 row");
8652        }
8653
8654        // Open the exported DB and rebuild projections from canonical state.
8655        let schema = Arc::new(SchemaManager::new());
8656        let exported_service = AdminService::new(&export_path, Arc::clone(&schema));
8657        exported_service
8658            .rebuild_projections(ProjectionTarget::Fts)
8659            .expect("rebuild");
8660
8661        // Verify text_search(...) returns the correct result for goal-1's
8662        // canonical property ("Ship") — not the corrupted text.
8663        let coordinator = ExecutionCoordinator::open(
8664            &export_path,
8665            Arc::clone(&schema),
8666            None,
8667            1,
8668            Arc::new(TelemetryCounters::default()),
8669            None,
8670        )
8671        .expect("coordinator");
8672
8673        let compiled = QueryBuilder::nodes("Goal")
8674            .text_search("Ship", 10)
8675            .limit(10)
8676            .compile()
8677            .expect("compile");
8678        let rows = coordinator
8679            .execute_compiled_read(&compiled)
8680            .expect("execute read");
8681        assert_eq!(rows.nodes.len(), 1);
8682        assert_eq!(rows.nodes[0].logical_id, "goal-1");
8683
8684        // Verify text_search(...) recovers the previously missing goal-2 row.
8685        let compiled2 = QueryBuilder::nodes("Goal")
8686            .text_search("redesign", 10)
8687            .limit(10)
8688            .compile()
8689            .expect("compile");
8690        let rows2 = coordinator
8691            .execute_compiled_read(&compiled2)
8692            .expect("execute read");
8693        assert_eq!(rows2.nodes.len(), 1);
8694        assert_eq!(rows2.nodes[0].logical_id, "goal-2");
8695
8696        // The corrupted text must not be searchable after recovery.
8697        let compiled3 = QueryBuilder::nodes("Goal")
8698            .text_search("stale", 10)
8699            .limit(10)
8700            .compile()
8701            .expect("compile");
8702        let rows3 = coordinator
8703            .execute_compiled_read(&compiled3)
8704            .expect("execute read");
8705        assert_eq!(
8706            rows3.nodes.len(),
8707            0,
8708            "corrupted text must not appear in search after rebuild"
8709        );
8710
8711        // Verify integrity and semantics are clean after recovery.
8712        let integrity = exported_service.check_integrity().expect("integrity");
8713        assert_eq!(integrity.missing_property_fts_rows, 0);
8714        let semantics = exported_service.check_semantics().expect("semantics");
8715        assert_eq!(semantics.drifted_property_fts_rows, 0);
8716        assert_eq!(semantics.orphaned_property_fts_rows, 0);
8717        assert_eq!(semantics.duplicate_property_fts_rows, 0);
8718    }
8719
8720    #[test]
8721    fn check_integrity_no_false_positives_for_empty_extraction() {
8722        let (db, service) = setup();
8723        {
8724            let conn = sqlite::open_connection(db.path()).expect("conn");
8725            // Register a schema that looks for $.searchable
8726            conn.execute(
8727                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8728                 VALUES ('Ticket', '[\"$.searchable\"]', ' ')",
8729                [],
8730            )
8731            .expect("register schema");
8732            // Insert a node whose properties do NOT contain $.searchable —
8733            // correctly has no property FTS row.
8734            conn.execute(
8735                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8736                 VALUES ('row-1', 'ticket-1', 'Ticket', '{\"status\":\"open\"}', 100, 'seed')",
8737                [],
8738            )
8739            .expect("insert node");
8740        }
8741
8742        let report = service.check_integrity().expect("integrity");
8743        assert_eq!(
8744            report.missing_property_fts_rows, 0,
8745            "node with no extractable values must not be counted as missing"
8746        );
8747    }
8748
8749    #[test]
8750    fn check_integrity_detects_genuinely_missing_property_fts_rows() {
8751        let (db, service) = setup();
8752        {
8753            let conn = sqlite::open_connection(db.path()).expect("conn");
8754            conn.execute(
8755                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8756                 VALUES ('Ticket', '[\"$.title\"]', ' ')",
8757                [],
8758            )
8759            .expect("register schema");
8760            // Insert a node WITH an extractable $.title but no property FTS row.
8761            conn.execute(
8762                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8763                 VALUES ('row-1', 'ticket-1', 'Ticket', '{\"title\":\"fix login bug\"}', 100, 'seed')",
8764                [],
8765            )
8766            .expect("insert node");
8767        }
8768
8769        let report = service.check_integrity().expect("integrity");
8770        assert_eq!(
8771            report.missing_property_fts_rows, 1,
8772            "node with extractable values but no property FTS row must be detected"
8773        );
8774    }
8775
8776    #[test]
8777    fn rebuild_projections_fts_restores_missing_property_fts_rows() {
8778        let (db, service) = setup();
8779        {
8780            let conn = sqlite::open_connection(db.path()).expect("conn");
8781            conn.execute(
8782                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8783                 VALUES ('Goal', '[\"$.name\"]', ' ')",
8784                [],
8785            )
8786            .expect("register schema");
8787            conn.execute(
8788                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8789                 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
8790                [],
8791            )
8792            .expect("insert node");
8793            // Deliberately do NOT insert a property FTS row.
8794        }
8795
8796        let report = service
8797            .rebuild_projections(ProjectionTarget::Fts)
8798            .expect("rebuild");
8799        assert!(
8800            report.rebuilt_rows >= 1,
8801            "rebuild must insert at least one property FTS row"
8802        );
8803
8804        let conn = sqlite::open_connection(db.path()).expect("conn");
8805        let text: String = conn
8806            .query_row(
8807                "SELECT text_content FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
8808                [],
8809                |row| row.get(0),
8810            )
8811            .expect("property FTS row must exist after rebuild");
8812        assert_eq!(text, "Ship v2");
8813    }
8814
8815    #[test]
8816    fn rebuild_missing_projections_fills_gap_for_deleted_property_fts_row() {
8817        let (db, service) = setup();
8818        {
8819            let conn = sqlite::open_connection(db.path()).expect("conn");
8820            conn.execute(
8821                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8822                 VALUES ('Goal', '[\"$.name\"]', ' ')",
8823                [],
8824            )
8825            .expect("register schema");
8826            conn.execute(
8827                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8828                 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
8829                [],
8830            )
8831            .expect("insert node");
8832            // Insert and then delete the property FTS row to simulate corruption.
8833            conn.execute(
8834                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8835                 VALUES ('goal-1', 'Goal', 'Ship v2')",
8836                [],
8837            )
8838            .expect("insert property fts");
8839            conn.execute(
8840                "DELETE FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
8841                [],
8842            )
8843            .expect("delete property fts");
8844        }
8845
8846        let report = service
8847            .rebuild_missing_projections()
8848            .expect("rebuild missing");
8849        assert!(
8850            report.rebuilt_rows >= 1,
8851            "missing rebuild must insert the gap-fill row"
8852        );
8853
8854        let conn = sqlite::open_connection(db.path()).expect("conn");
8855        let count: i64 = conn
8856            .query_row(
8857                "SELECT count(*) FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
8858                [],
8859                |row| row.get(0),
8860            )
8861            .expect("count");
8862        assert_eq!(
8863            count, 1,
8864            "gap-fill must restore exactly one property FTS row"
8865        );
8866    }
8867
8868    #[test]
8869    fn remove_schema_then_rebuild_cleans_stale_property_fts_rows() {
8870        let (db, service) = setup();
8871        service
8872            .register_fts_property_schema("Goal", &["$.name".to_owned()], None)
8873            .expect("register");
8874        {
8875            let conn = sqlite::open_connection(db.path()).expect("conn");
8876            conn.execute(
8877                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8878                 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
8879                [],
8880            )
8881            .expect("insert node");
8882            // Manually insert a property FTS row (simulating the write path).
8883            conn.execute(
8884                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8885                 VALUES ('goal-1', 'Goal', 'Ship v2')",
8886                [],
8887            )
8888            .expect("insert property fts");
8889        }
8890
8891        // Remove the schema — stale rows now exist.
8892        service.remove_fts_property_schema("Goal").expect("remove");
8893
8894        // Verify stale rows are detected.
8895        let semantics = service.check_semantics().expect("semantics");
8896        assert_eq!(
8897            semantics.orphaned_property_fts_rows, 1,
8898            "stale property FTS rows must be detected after schema removal"
8899        );
8900
8901        // Full rebuild should clean them.
8902        service
8903            .rebuild_projections(ProjectionTarget::Fts)
8904            .expect("rebuild");
8905
8906        let conn = sqlite::open_connection(db.path()).expect("conn");
8907        let count: i64 = conn
8908            .query_row(
8909                "SELECT count(*) FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
8910                [],
8911                |row| row.get(0),
8912            )
8913            .expect("count");
8914        assert_eq!(
8915            count, 0,
8916            "rebuild after schema removal must delete stale property FTS rows"
8917        );
8918    }
8919
8920    mod validate_fts_property_paths_tests {
8921        use super::super::validate_fts_property_paths;
8922
8923        #[test]
8924        fn valid_simple_path() {
8925            assert!(validate_fts_property_paths(&["$.name".to_owned()]).is_ok());
8926        }
8927
8928        #[test]
8929        fn valid_nested_path() {
8930            assert!(validate_fts_property_paths(&["$.address.city".to_owned()]).is_ok());
8931        }
8932
8933        #[test]
8934        fn valid_underscore_segment() {
8935            assert!(validate_fts_property_paths(&["$.a_b".to_owned()]).is_ok());
8936        }
8937
8938        #[test]
8939        fn rejects_bare_prefix() {
8940            let result = validate_fts_property_paths(&["$.".to_owned()]);
8941            assert!(result.is_err(), "path '$.' must be rejected");
8942        }
8943
8944        #[test]
8945        fn rejects_double_dot() {
8946            let result = validate_fts_property_paths(&["$..x".to_owned()]);
8947            assert!(result.is_err(), "path '$..x' must be rejected");
8948        }
8949
8950        #[test]
8951        fn rejects_trailing_dot() {
8952            let result = validate_fts_property_paths(&["$.foo.".to_owned()]);
8953            assert!(result.is_err(), "path '$.foo.' must be rejected");
8954        }
8955
8956        #[test]
8957        fn rejects_space_in_segment() {
8958            let result = validate_fts_property_paths(&["$.foo bar".to_owned()]);
8959            assert!(result.is_err(), "path '$.foo bar' must be rejected");
8960        }
8961
8962        #[test]
8963        fn rejects_bracket_syntax() {
8964            let result = validate_fts_property_paths(&["$.foo[0]".to_owned()]);
8965            assert!(result.is_err(), "path '$.foo[0]' must be rejected");
8966        }
8967
8968        #[test]
8969        fn rejects_duplicates() {
8970            let result = validate_fts_property_paths(&["$.name".to_owned(), "$.name".to_owned()]);
8971            assert!(result.is_err(), "duplicate paths must be rejected");
8972        }
8973
8974        #[test]
8975        fn rejects_empty_list() {
8976            let result = validate_fts_property_paths(&[]);
8977            assert!(result.is_err(), "empty path list must be rejected");
8978        }
8979    }
8980}