Skip to main content

fathomdb_engine/
admin.rs

1use std::fmt::Write as _;
2use std::fs;
3use std::io::{self, Read, Write};
4use std::path::{Path, PathBuf};
5use std::process::{Command, Stdio};
6use std::sync::Arc;
7use std::sync::mpsc;
8use std::thread;
9use std::time::{Duration, Instant, SystemTime};
10
11use fathomdb_schema::{SchemaError, SchemaManager};
12use rusqlite::{DatabaseName, OptionalExtension, TransactionBehavior};
13use serde::{Deserialize, Serialize};
14use sha2::{Digest, Sha256};
15
16use crate::{
17    EngineError, ProjectionRepairReport, ProjectionService, executable_trust,
18    ids::new_id,
19    operational::{
20        OperationalCollectionKind, OperationalCollectionRecord, OperationalCompactionReport,
21        OperationalCurrentRow, OperationalFilterClause, OperationalFilterField,
22        OperationalFilterFieldType, OperationalFilterMode, OperationalFilterValue,
23        OperationalHistoryValidationIssue, OperationalHistoryValidationReport,
24        OperationalMutationRow, OperationalPurgeReport, OperationalReadReport,
25        OperationalReadRequest, OperationalRegisterRequest, OperationalRepairReport,
26        OperationalRetentionActionKind, OperationalRetentionPlanItem,
27        OperationalRetentionPlanReport, OperationalRetentionRunItem, OperationalRetentionRunReport,
28        OperationalSecondaryIndexDefinition, OperationalSecondaryIndexRebuildReport,
29        OperationalTraceReport, extract_secondary_index_entries_for_current,
30        extract_secondary_index_entries_for_mutation, parse_operational_secondary_indexes_json,
31        parse_operational_validation_contract, validate_operational_payload_against_contract,
32    },
33    projection::ProjectionTarget,
34    sqlite,
35};
36
37/// Results of a physical and structural integrity check on the database.
38#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
39pub struct IntegrityReport {
40    pub physical_ok: bool,
41    pub foreign_keys_ok: bool,
42    pub missing_fts_rows: usize,
43    pub missing_property_fts_rows: usize,
44    pub duplicate_active_logical_ids: usize,
45    pub operational_missing_collections: usize,
46    pub operational_missing_last_mutations: usize,
47    pub warnings: Vec<String>,
48}
49
50/// A registered FTS property projection schema for a node kind.
51#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
52pub struct FtsPropertySchemaRecord {
53    /// The node kind this schema applies to.
54    pub kind: String,
55    /// Flat display list of registered JSON property paths
56    /// (e.g. `["$.name", "$.title"]`). For recursive entries this lists
57    /// only the root path; mode information is carried by
58    /// [`Self::entries`].
59    pub property_paths: Vec<String>,
60    /// Full per-entry schema shape with mode
61    /// ([`FtsPropertyPathMode::Scalar`] | [`FtsPropertyPathMode::Recursive`]).
62    /// Read this field for mode-accurate round-trip of the registered
63    /// schema.
64    pub entries: Vec<FtsPropertyPathSpec>,
65    /// Subtree paths excluded from recursive walks. Empty for
66    /// scalar-only schemas or recursive schemas with no exclusions.
67    pub exclude_paths: Vec<String>,
68    /// Separator used when concatenating extracted values.
69    pub separator: String,
70    /// Schema format version.
71    pub format_version: i64,
72}
73
74/// Extraction mode for a single registered FTS property path.
75#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize)]
76#[serde(rename_all = "snake_case")]
77pub enum FtsPropertyPathMode {
78    /// Resolve the path and append the scalar value(s). Matches legacy
79    /// pre-Phase-4 behaviour.
80    #[default]
81    Scalar,
82    /// Recursively walk every scalar leaf rooted at the path. Each leaf
83    /// contributes one entry to the position map.
84    Recursive,
85}
86
87/// A single registered property-FTS path with its extraction mode.
88#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
89pub struct FtsPropertyPathSpec {
90    /// JSON path to the property (must start with `$.`).
91    pub path: String,
92    /// Whether to treat this path as a scalar or recursively walk it.
93    pub mode: FtsPropertyPathMode,
94}
95
96impl FtsPropertyPathSpec {
97    #[must_use]
98    pub fn scalar(path: impl Into<String>) -> Self {
99        Self {
100            path: path.into(),
101            mode: FtsPropertyPathMode::Scalar,
102        }
103    }
104
105    #[must_use]
106    pub fn recursive(path: impl Into<String>) -> Self {
107        Self {
108            path: path.into(),
109            mode: FtsPropertyPathMode::Recursive,
110        }
111    }
112}
113
114/// Options controlling how a safe database export is performed.
115#[derive(Clone, Copy, Debug)]
116pub struct SafeExportOptions {
117    /// When true, runs `PRAGMA wal_checkpoint(FULL)` before copying and fails if
118    /// any WAL frames could not be applied (busy != 0). Set to false only in
119    /// tests that seed a database without WAL mode.
120    pub force_checkpoint: bool,
121}
122
123impl Default for SafeExportOptions {
124    fn default() -> Self {
125        Self {
126            force_checkpoint: true,
127        }
128    }
129}
130
131// Must match PROTOCOL_VERSION in fathomdb-admin-bridge.rs
132const EXPORT_PROTOCOL_VERSION: u32 = 1;
133
134/// Manifest describing a completed safe export.
135#[derive(Clone, Debug, Serialize)]
136pub struct SafeExportManifest {
137    /// Unix timestamp (seconds since epoch) when the export was created.
138    pub exported_at: u64,
139    /// SHA-256 hex digest of the exported database file.
140    pub sha256: String,
141    /// Schema version recorded in `fathom_schema_migrations` at export time.
142    pub schema_version: u32,
143    /// Bridge protocol version compiled into this binary.
144    pub protocol_version: u32,
145    /// Number of `SQLite` pages in the exported database file.
146    pub page_count: u64,
147}
148
149/// Report from tracing all rows associated with a given `source_ref`.
150#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
151pub struct TraceReport {
152    pub source_ref: String,
153    pub node_rows: usize,
154    pub edge_rows: usize,
155    pub action_rows: usize,
156    pub operational_mutation_rows: usize,
157    pub node_logical_ids: Vec<String>,
158    pub action_ids: Vec<String>,
159    pub operational_mutation_ids: Vec<String>,
160}
161
162/// An edge that was skipped during a restore because an endpoint is missing.
163#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
164pub struct SkippedEdge {
165    pub edge_logical_id: String,
166    pub missing_endpoint: String,
167}
168
169/// Report from restoring a retired logical ID back to active state.
170#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
171pub struct LogicalRestoreReport {
172    pub logical_id: String,
173    pub was_noop: bool,
174    pub restored_node_rows: usize,
175    pub restored_edge_rows: usize,
176    pub restored_chunk_rows: usize,
177    pub restored_fts_rows: usize,
178    pub restored_property_fts_rows: usize,
179    pub restored_vec_rows: usize,
180    pub skipped_edges: Vec<SkippedEdge>,
181    pub notes: Vec<String>,
182}
183
184/// Report from permanently purging all rows for a logical ID.
185#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
186pub struct LogicalPurgeReport {
187    pub logical_id: String,
188    pub was_noop: bool,
189    pub deleted_node_rows: usize,
190    pub deleted_edge_rows: usize,
191    pub deleted_chunk_rows: usize,
192    pub deleted_fts_rows: usize,
193    pub deleted_vec_rows: usize,
194    pub notes: Vec<String>,
195}
196
197/// Options controlling provenance event purging behavior.
198#[derive(Clone, Debug, Serialize, Deserialize)]
199pub struct ProvenancePurgeOptions {
200    pub dry_run: bool,
201    #[serde(default)]
202    pub preserve_event_types: Vec<String>,
203}
204
205/// Report from a provenance event purge operation.
206#[derive(Clone, Debug, Serialize)]
207pub struct ProvenancePurgeReport {
208    pub events_deleted: u64,
209    pub events_preserved: u64,
210    pub oldest_remaining: Option<i64>,
211}
212
213/// Service providing administrative operations (integrity checks, exports, restores, purges).
214#[derive(Debug)]
215pub struct AdminService {
216    database_path: PathBuf,
217    schema_manager: Arc<SchemaManager>,
218    projections: ProjectionService,
219}
220
221/// Results of a semantic consistency check on the graph data.
222#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
223pub struct SemanticReport {
224    /// Chunks whose `node_logical_id` has no active node.
225    pub orphaned_chunks: usize,
226    /// Active nodes with a NULL `source_ref` (loss of provenance).
227    pub null_source_ref_nodes: usize,
228    /// Steps referencing a `run_id` that does not exist in the runs table.
229    pub broken_step_fk: usize,
230    /// Actions referencing a `step_id` that does not exist in the steps table.
231    pub broken_action_fk: usize,
232    /// FTS rows whose `chunk_id` does not exist in the chunks table.
233    pub stale_fts_rows: usize,
234    /// FTS rows whose node has been superseded (`superseded_at` IS NOT NULL on all active rows).
235    pub fts_rows_for_superseded_nodes: usize,
236    /// Property FTS rows whose node has been superseded or does not exist.
237    pub stale_property_fts_rows: usize,
238    /// Property FTS rows whose kind has no registered FTS property schema.
239    pub orphaned_property_fts_rows: usize,
240    /// Property FTS rows whose `kind` does not match the active node's actual kind.
241    pub mismatched_kind_property_fts_rows: usize,
242    /// Active logical IDs with more than one `fts_node_properties` row.
243    pub duplicate_property_fts_rows: usize,
244    /// Property FTS rows whose `text_content` no longer matches the canonical extraction.
245    pub drifted_property_fts_rows: usize,
246    /// Active edges where at least one endpoint has no active node.
247    pub dangling_edges: usize,
248    /// `logical_ids` where every version has been superseded (no active row).
249    pub orphaned_supersession_chains: usize,
250    /// Vec rows whose backing chunk no longer exists in the chunks table.
251    pub stale_vec_rows: usize,
252    /// Compatibility counter for vec rows whose chunk points at missing node history.
253    pub vec_rows_for_superseded_nodes: usize,
254    /// Latest-state keys whose latest mutation is a `put` but no current row exists.
255    pub missing_operational_current_rows: usize,
256    /// Current rows that do not match the latest mutation state.
257    pub stale_operational_current_rows: usize,
258    /// Mutations written after the owning collection was disabled.
259    pub disabled_collection_mutations: usize,
260    /// Access metadata rows whose `logical_id` no longer has any node history.
261    pub orphaned_last_access_metadata_rows: usize,
262    pub warnings: Vec<String>,
263}
264
265/// Configuration for regenerating vector embeddings via an external generator command.
266#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
267#[serde(rename_all = "snake_case")]
268pub struct VectorRegenerationConfig {
269    pub profile: String,
270    pub table_name: String,
271    pub model_identity: String,
272    pub model_version: String,
273    pub dimension: usize,
274    pub normalization_policy: String,
275    pub chunking_policy: String,
276    pub preprocessing_policy: String,
277    pub generator_command: Vec<String>,
278}
279
280/// Report from a vector embedding regeneration run.
281#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
282pub struct VectorRegenerationReport {
283    pub profile: String,
284    pub table_name: String,
285    pub dimension: usize,
286    pub total_chunks: usize,
287    pub regenerated_rows: usize,
288    pub contract_persisted: bool,
289    pub notes: Vec<String>,
290}
291
292/// Security and resource policy for the external vector generator subprocess.
293#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
294#[serde(rename_all = "snake_case")]
295pub struct VectorGeneratorPolicy {
296    pub timeout_ms: u64,
297    pub max_stdout_bytes: usize,
298    pub max_stderr_bytes: usize,
299    pub max_input_bytes: usize,
300    pub max_chunks: usize,
301    #[serde(default = "default_require_absolute_executable")]
302    pub require_absolute_executable: bool,
303    #[serde(default = "default_reject_world_writable_executable")]
304    pub reject_world_writable_executable: bool,
305    #[serde(default)]
306    pub allowed_executable_roots: Vec<String>,
307    #[serde(default)]
308    pub preserve_env_vars: Vec<String>,
309}
310
311impl Default for VectorGeneratorPolicy {
312    fn default() -> Self {
313        Self {
314            timeout_ms: 300_000,
315            max_stdout_bytes: 64 * 1024 * 1024,
316            max_stderr_bytes: 1024 * 1024,
317            max_input_bytes: 64 * 1024 * 1024,
318            max_chunks: 1_000_000,
319            require_absolute_executable: true,
320            reject_world_writable_executable: true,
321            allowed_executable_roots: vec![],
322            preserve_env_vars: vec![],
323        }
324    }
325}
326
327const fn default_require_absolute_executable() -> bool {
328    true
329}
330
331const fn default_reject_world_writable_executable() -> bool {
332    true
333}
334
335const CURRENT_VECTOR_CONTRACT_FORMAT_VERSION: i64 = 1;
336const MAX_PROFILE_LEN: usize = 128;
337const MAX_MODEL_IDENTITY_LEN: usize = 256;
338const MAX_MODEL_VERSION_LEN: usize = 128;
339const MAX_POLICY_LEN: usize = 128;
340const MAX_GENERATOR_COMMAND_ARG_LEN: usize = 4096;
341const MAX_GENERATOR_COMMAND_TOTAL_LEN: usize = 16 * 1024;
342const MAX_CONTRACT_JSON_BYTES: usize = 32 * 1024;
343const MAX_AUDIT_METADATA_BYTES: usize = 2048;
344const DEFAULT_OPERATIONAL_READ_LIMIT: usize = 100;
345const MAX_OPERATIONAL_READ_LIMIT: usize = 1000;
346
347/// Thread-safe handle to the shared [`AdminService`].
348#[derive(Clone, Debug)]
349pub struct AdminHandle {
350    inner: Arc<AdminService>,
351}
352
353impl AdminHandle {
354    /// Wrap an [`AdminService`] in a shared handle.
355    #[must_use]
356    pub fn new(service: AdminService) -> Self {
357        Self {
358            inner: Arc::new(service),
359        }
360    }
361
362    /// Clone the inner `Arc` to the [`AdminService`].
363    #[must_use]
364    pub fn service(&self) -> Arc<AdminService> {
365        Arc::clone(&self.inner)
366    }
367}
368
369impl AdminService {
370    /// Create a new admin service for the database at the given path.
371    #[must_use]
372    pub fn new(path: impl AsRef<Path>, schema_manager: Arc<SchemaManager>) -> Self {
373        let database_path = path.as_ref().to_path_buf();
374        let projections = ProjectionService::new(&database_path, Arc::clone(&schema_manager));
375        Self {
376            database_path,
377            schema_manager,
378            projections,
379        }
380    }
381
382    fn connect(&self) -> Result<rusqlite::Connection, EngineError> {
383        #[cfg(feature = "sqlite-vec")]
384        let conn = sqlite::open_connection_with_vec(&self.database_path)?;
385        #[cfg(not(feature = "sqlite-vec"))]
386        let conn = sqlite::open_connection(&self.database_path)?;
387        self.schema_manager.bootstrap(&conn)?;
388        Ok(conn)
389    }
390
391    /// # Errors
392    /// Returns [`EngineError`] if the database connection fails or any SQL query fails.
393    pub fn check_integrity(&self) -> Result<IntegrityReport, EngineError> {
394        let conn = self.connect()?;
395
396        let physical_result: String =
397            conn.query_row("PRAGMA integrity_check", [], |row| row.get(0))?;
398        let foreign_key_count: i64 =
399            conn.query_row("SELECT count(*) FROM pragma_foreign_key_check", [], |row| {
400                row.get(0)
401            })?;
402        let missing_fts_rows: i64 = conn.query_row(
403            r"
404            SELECT count(*)
405            FROM chunks c
406            JOIN nodes n
407              ON n.logical_id = c.node_logical_id
408             AND n.superseded_at IS NULL
409            WHERE NOT EXISTS (
410                SELECT 1
411                FROM fts_nodes f
412                WHERE f.chunk_id = c.id
413            )
414            ",
415            [],
416            |row| row.get(0),
417        )?;
418        let duplicate_active: i64 = conn.query_row(
419            r"
420            SELECT count(*)
421            FROM (
422                SELECT logical_id
423                FROM nodes
424                WHERE superseded_at IS NULL
425                GROUP BY logical_id
426                HAVING count(*) > 1
427            )
428            ",
429            [],
430            |row| row.get(0),
431        )?;
432        let operational_missing_collections: i64 = conn.query_row(
433            r"
434            SELECT (
435                SELECT count(*)
436                FROM operational_mutations m
437                LEFT JOIN operational_collections c ON c.name = m.collection_name
438                WHERE c.name IS NULL
439            ) + (
440                SELECT count(*)
441                FROM operational_current oc
442                LEFT JOIN operational_collections c ON c.name = oc.collection_name
443                WHERE c.name IS NULL
444            )
445            ",
446            [],
447            |row| row.get(0),
448        )?;
449        let operational_missing_last_mutations: i64 = conn.query_row(
450            r"
451            SELECT count(*)
452            FROM operational_current oc
453            LEFT JOIN operational_mutations m ON m.id = oc.last_mutation_id
454            WHERE m.id IS NULL
455            ",
456            [],
457            |row| row.get(0),
458        )?;
459
460        // Count missing property FTS rows using the same extraction logic as
461        // write/rebuild. A pure-SQL check would overcount: nodes whose declared
462        // paths legitimately normalize to no values correctly have no row.
463        let missing_property_fts_rows = count_missing_property_fts_rows(&conn)?;
464
465        let mut warnings = Vec::new();
466        if missing_fts_rows > 0 {
467            warnings.push("missing FTS projections detected".to_owned());
468        }
469        if missing_property_fts_rows > 0 {
470            warnings.push("missing property FTS projections detected".to_owned());
471        }
472        if duplicate_active > 0 {
473            warnings.push("duplicate active logical_ids detected".to_owned());
474        }
475        if operational_missing_collections > 0 {
476            warnings.push("operational rows reference missing collections".to_owned());
477        }
478        if operational_missing_last_mutations > 0 {
479            warnings.push("operational current rows reference missing last mutations".to_owned());
480        }
481
482        // FIX(review): was `as usize` — unsound on 32-bit targets, wraps negatives silently.
483        // Options: (A) try_from().unwrap_or(0) — masks corruption, (B) try_from().expect() —
484        // panics on corruption, (C) propagate error. Chose (B) here: a negative count(*)
485        // signals data corruption, and the integrity report would be meaningless anyway.
486        Ok(IntegrityReport {
487            physical_ok: physical_result == "ok",
488            foreign_keys_ok: foreign_key_count == 0,
489            missing_fts_rows: i64_to_usize(missing_fts_rows),
490            missing_property_fts_rows: i64_to_usize(missing_property_fts_rows),
491            duplicate_active_logical_ids: i64_to_usize(duplicate_active),
492            operational_missing_collections: i64_to_usize(operational_missing_collections),
493            operational_missing_last_mutations: i64_to_usize(operational_missing_last_mutations),
494            warnings,
495        })
496    }
497
498    /// # Errors
499    /// Returns [`EngineError`] if the database connection fails or any SQL query fails.
500    #[allow(clippy::too_many_lines)]
501    pub fn check_semantics(&self) -> Result<SemanticReport, EngineError> {
502        let conn = self.connect()?;
503
504        let orphaned_chunks: i64 = conn.query_row(
505            r"
506            SELECT count(*)
507            FROM chunks c
508            WHERE NOT EXISTS (
509                SELECT 1 FROM nodes n
510                WHERE n.logical_id = c.node_logical_id
511            )
512            ",
513            [],
514            |row| row.get(0),
515        )?;
516
517        let null_source_ref_nodes: i64 = conn.query_row(
518            "SELECT count(*) FROM nodes WHERE source_ref IS NULL AND superseded_at IS NULL",
519            [],
520            |row| row.get(0),
521        )?;
522
523        let broken_step_fk: i64 = conn.query_row(
524            r"
525            SELECT count(*) FROM steps s
526            WHERE NOT EXISTS (SELECT 1 FROM runs r WHERE r.id = s.run_id)
527            ",
528            [],
529            |row| row.get(0),
530        )?;
531
532        let broken_action_fk: i64 = conn.query_row(
533            r"
534            SELECT count(*) FROM actions a
535            WHERE NOT EXISTS (SELECT 1 FROM steps s WHERE s.id = a.step_id)
536            ",
537            [],
538            |row| row.get(0),
539        )?;
540
541        let stale_fts_rows: i64 = conn.query_row(
542            r"
543            SELECT count(*) FROM fts_nodes f
544            WHERE NOT EXISTS (SELECT 1 FROM chunks c WHERE c.id = f.chunk_id)
545            ",
546            [],
547            |row| row.get(0),
548        )?;
549
550        let fts_rows_for_superseded_nodes: i64 = conn.query_row(
551            r"
552            SELECT count(*) FROM fts_nodes f
553            WHERE NOT EXISTS (
554                SELECT 1 FROM nodes n
555                WHERE n.logical_id = f.node_logical_id AND n.superseded_at IS NULL
556            )
557            ",
558            [],
559            |row| row.get(0),
560        )?;
561
562        let stale_property_fts_rows: i64 = conn.query_row(
563            r"
564            SELECT count(*) FROM fts_node_properties fp
565            WHERE NOT EXISTS (
566                SELECT 1 FROM nodes n
567                WHERE n.logical_id = fp.node_logical_id AND n.superseded_at IS NULL
568            )
569            ",
570            [],
571            |row| row.get(0),
572        )?;
573
574        let orphaned_property_fts_rows: i64 = conn.query_row(
575            r"
576            SELECT count(*) FROM fts_node_properties fp
577            WHERE NOT EXISTS (
578                SELECT 1 FROM fts_property_schemas s WHERE s.kind = fp.kind
579            )
580            ",
581            [],
582            |row| row.get(0),
583        )?;
584
585        let mismatched_kind_property_fts_rows: i64 = conn.query_row(
586            r"
587            SELECT count(*) FROM fts_node_properties fp
588            JOIN nodes n ON n.logical_id = fp.node_logical_id AND n.superseded_at IS NULL
589            WHERE n.kind != fp.kind
590            ",
591            [],
592            |row| row.get(0),
593        )?;
594
595        let duplicate_property_fts_rows: i64 = conn.query_row(
596            r"
597            SELECT count(*) FROM (
598                SELECT node_logical_id FROM fts_node_properties
599                GROUP BY node_logical_id
600                HAVING count(*) > 1
601            )
602            ",
603            [],
604            |row| row.get(0),
605        )?;
606
607        let drifted_property_fts_rows = count_drifted_property_fts_rows(&conn)?;
608
609        let dangling_edges: i64 = conn.query_row(
610            r"
611            SELECT count(*) FROM edges e
612            WHERE e.superseded_at IS NULL AND (
613                NOT EXISTS (SELECT 1 FROM nodes n WHERE n.logical_id = e.source_logical_id AND n.superseded_at IS NULL)
614                OR
615                NOT EXISTS (SELECT 1 FROM nodes n WHERE n.logical_id = e.target_logical_id AND n.superseded_at IS NULL)
616            )
617            ",
618            [],
619            |row| row.get(0),
620        )?;
621
622        let orphaned_supersession_chains: i64 = conn.query_row(
623            r"
624            SELECT count(*) FROM (
625                SELECT logical_id FROM nodes
626                GROUP BY logical_id
627                HAVING count(*) > 0 AND sum(CASE WHEN superseded_at IS NULL THEN 1 ELSE 0 END) = 0
628            )
629            ",
630            [],
631            |row| row.get(0),
632        )?;
633
634        // Vec stale row detection — degrades to 0 when the vec profile is absent.
635        #[cfg(feature = "sqlite-vec")]
636        let stale_vec_rows: i64 = match conn.query_row(
637            r"
638            SELECT count(*) FROM vec_nodes_active v
639            WHERE NOT EXISTS (SELECT 1 FROM chunks c WHERE c.id = v.chunk_id)
640            ",
641            [],
642            |row| row.get(0),
643        ) {
644            Ok(n) => n,
645            Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
646                if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
647            {
648                0
649            }
650            Err(e) => return Err(EngineError::Sqlite(e)),
651        };
652        #[cfg(not(feature = "sqlite-vec"))]
653        let stale_vec_rows: i64 = 0;
654
655        #[cfg(feature = "sqlite-vec")]
656        let vec_rows_for_superseded_nodes: i64 = match conn.query_row(
657            r"
658            SELECT count(*) FROM vec_nodes_active v
659            JOIN chunks c ON c.id = v.chunk_id
660            WHERE NOT EXISTS (
661                SELECT 1 FROM nodes n
662                WHERE n.logical_id = c.node_logical_id
663            )
664            ",
665            [],
666            |row| row.get(0),
667        ) {
668            Ok(n) => n,
669            Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
670                if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
671            {
672                0
673            }
674            Err(e) => return Err(EngineError::Sqlite(e)),
675        };
676        #[cfg(not(feature = "sqlite-vec"))]
677        let vec_rows_for_superseded_nodes: i64 = 0;
678        let missing_operational_current_rows: i64 = conn.query_row(
679            r"
680            SELECT count(*)
681            FROM operational_mutations m
682            JOIN operational_collections c
683              ON c.name = m.collection_name
684             AND c.kind = 'latest_state'
685            WHERE m.op_kind = 'put'
686              AND NOT EXISTS (
687                    SELECT 1
688                    FROM operational_mutations newer
689                    WHERE newer.collection_name = m.collection_name
690                      AND newer.record_key = m.record_key
691                      AND newer.mutation_order > m.mutation_order
692                )
693              AND NOT EXISTS (
694                    SELECT 1
695                    FROM operational_current oc
696                    WHERE oc.collection_name = m.collection_name
697                      AND oc.record_key = m.record_key
698                )
699            ",
700            [],
701            |row| row.get(0),
702        )?;
703        let stale_operational_current_rows: i64 = conn.query_row(
704            r"
705            SELECT count(*)
706            FROM operational_current oc
707            JOIN operational_collections c
708              ON c.name = oc.collection_name
709             AND c.kind = 'latest_state'
710            LEFT JOIN operational_mutations m ON m.id = oc.last_mutation_id
711            WHERE m.id IS NULL
712               OR m.collection_name != oc.collection_name
713               OR m.record_key != oc.record_key
714               OR m.op_kind != 'put'
715               OR m.payload_json != oc.payload_json
716               OR EXISTS (
717                    SELECT 1
718                    FROM operational_mutations newer
719                    WHERE newer.collection_name = oc.collection_name
720                      AND newer.record_key = oc.record_key
721                      AND newer.mutation_order > m.mutation_order
722                )
723            ",
724            [],
725            |row| row.get(0),
726        )?;
727        let disabled_collection_mutations: i64 = conn.query_row(
728            r"
729            SELECT count(*)
730            FROM operational_mutations m
731            JOIN operational_collections c ON c.name = m.collection_name
732            WHERE c.disabled_at IS NOT NULL AND m.created_at > c.disabled_at
733            ",
734            [],
735            |row| row.get(0),
736        )?;
737        let orphaned_last_access_metadata_rows: i64 = conn.query_row(
738            r"
739            SELECT count(*)
740            FROM node_access_metadata am
741            WHERE NOT EXISTS (
742                SELECT 1 FROM nodes n WHERE n.logical_id = am.logical_id
743            )
744            ",
745            [],
746            |row| row.get(0),
747        )?;
748
749        let mut warnings = Vec::new();
750        if orphaned_chunks > 0 {
751            warnings.push(format!(
752                "{orphaned_chunks} orphaned chunk(s) with no surviving node history"
753            ));
754        }
755        if null_source_ref_nodes > 0 {
756            warnings.push(format!(
757                "{null_source_ref_nodes} active node(s) with null source_ref"
758            ));
759        }
760        if broken_step_fk > 0 {
761            warnings.push(format!(
762                "{broken_step_fk} step(s) referencing non-existent run"
763            ));
764        }
765        if broken_action_fk > 0 {
766            warnings.push(format!(
767                "{broken_action_fk} action(s) referencing non-existent step"
768            ));
769        }
770        if stale_fts_rows > 0 {
771            warnings.push(format!(
772                "{stale_fts_rows} stale FTS row(s) referencing missing chunk"
773            ));
774        }
775        if fts_rows_for_superseded_nodes > 0 {
776            warnings.push(format!(
777                "{fts_rows_for_superseded_nodes} FTS row(s) for superseded node(s)"
778            ));
779        }
780        if stale_property_fts_rows > 0 {
781            warnings.push(format!(
782                "{stale_property_fts_rows} stale property FTS row(s) for superseded/missing node(s)"
783            ));
784        }
785        if orphaned_property_fts_rows > 0 {
786            warnings.push(format!(
787                "{orphaned_property_fts_rows} orphaned property FTS row(s) for unregistered kind(s)"
788            ));
789        }
790        if mismatched_kind_property_fts_rows > 0 {
791            warnings.push(format!(
792                "{mismatched_kind_property_fts_rows} property FTS row(s) whose kind does not match the active node"
793            ));
794        }
795        if duplicate_property_fts_rows > 0 {
796            warnings.push(format!(
797                "{duplicate_property_fts_rows} active logical ID(s) with duplicate property FTS rows"
798            ));
799        }
800        if drifted_property_fts_rows > 0 {
801            warnings.push(format!(
802                "{drifted_property_fts_rows} property FTS row(s) with stale text_content"
803            ));
804        }
805        if dangling_edges > 0 {
806            warnings.push(format!(
807                "{dangling_edges} active edge(s) with missing endpoint node"
808            ));
809        }
810        if orphaned_supersession_chains > 0 {
811            warnings.push(format!(
812                "{orphaned_supersession_chains} logical_id(s) with all versions superseded"
813            ));
814        }
815        if stale_vec_rows > 0 {
816            warnings.push(format!(
817                "{stale_vec_rows} stale vec row(s) referencing missing chunk"
818            ));
819        }
820        if vec_rows_for_superseded_nodes > 0 {
821            warnings.push(format!(
822                "{vec_rows_for_superseded_nodes} vec row(s) whose node history is missing"
823            ));
824        }
825        if missing_operational_current_rows > 0 {
826            warnings.push(format!(
827                "{missing_operational_current_rows} latest-state key(s) missing operational_current rows"
828            ));
829        }
830        if stale_operational_current_rows > 0 {
831            warnings.push(format!(
832                "{stale_operational_current_rows} stale operational_current row(s)"
833            ));
834        }
835        if disabled_collection_mutations > 0 {
836            warnings.push(format!(
837                "{disabled_collection_mutations} mutation(s) were written after collection disable"
838            ));
839        }
840        if orphaned_last_access_metadata_rows > 0 {
841            warnings.push(format!(
842                "{orphaned_last_access_metadata_rows} last_access metadata row(s) reference missing node history"
843            ));
844        }
845
846        Ok(SemanticReport {
847            orphaned_chunks: i64_to_usize(orphaned_chunks),
848            null_source_ref_nodes: i64_to_usize(null_source_ref_nodes),
849            broken_step_fk: i64_to_usize(broken_step_fk),
850            broken_action_fk: i64_to_usize(broken_action_fk),
851            stale_fts_rows: i64_to_usize(stale_fts_rows),
852            fts_rows_for_superseded_nodes: i64_to_usize(fts_rows_for_superseded_nodes),
853            stale_property_fts_rows: i64_to_usize(stale_property_fts_rows),
854            orphaned_property_fts_rows: i64_to_usize(orphaned_property_fts_rows),
855            mismatched_kind_property_fts_rows: i64_to_usize(mismatched_kind_property_fts_rows),
856            duplicate_property_fts_rows: i64_to_usize(duplicate_property_fts_rows),
857            drifted_property_fts_rows: i64_to_usize(drifted_property_fts_rows),
858            dangling_edges: i64_to_usize(dangling_edges),
859            orphaned_supersession_chains: i64_to_usize(orphaned_supersession_chains),
860            stale_vec_rows: i64_to_usize(stale_vec_rows),
861            vec_rows_for_superseded_nodes: i64_to_usize(vec_rows_for_superseded_nodes),
862            missing_operational_current_rows: i64_to_usize(missing_operational_current_rows),
863            stale_operational_current_rows: i64_to_usize(stale_operational_current_rows),
864            disabled_collection_mutations: i64_to_usize(disabled_collection_mutations),
865            orphaned_last_access_metadata_rows: i64_to_usize(orphaned_last_access_metadata_rows),
866            warnings,
867        })
868    }
869
870    /// # Errors
871    /// Returns [`EngineError`] if the collection metadata is invalid or the insert fails.
872    pub fn register_operational_collection(
873        &self,
874        request: &OperationalRegisterRequest,
875    ) -> Result<OperationalCollectionRecord, EngineError> {
876        if request.name.trim().is_empty() {
877            return Err(EngineError::InvalidWrite(
878                "operational collection name must not be empty".to_owned(),
879            ));
880        }
881        if request.schema_json.is_empty() {
882            return Err(EngineError::InvalidWrite(
883                "operational collection schema_json must not be empty".to_owned(),
884            ));
885        }
886        if request.retention_json.is_empty() {
887            return Err(EngineError::InvalidWrite(
888                "operational collection retention_json must not be empty".to_owned(),
889            ));
890        }
891        if request.filter_fields_json.is_empty() {
892            return Err(EngineError::InvalidWrite(
893                "operational collection filter_fields_json must not be empty".to_owned(),
894            ));
895        }
896        parse_operational_validation_contract(&request.validation_json)
897            .map_err(EngineError::InvalidWrite)?;
898        parse_operational_secondary_indexes_json(&request.secondary_indexes_json, request.kind)
899            .map_err(EngineError::InvalidWrite)?;
900        if request.format_version <= 0 {
901            return Err(EngineError::InvalidWrite(
902                "operational collection format_version must be positive".to_owned(),
903            ));
904        }
905        parse_operational_filter_fields(&request.filter_fields_json)
906            .map_err(EngineError::InvalidWrite)?;
907
908        let mut conn = self.connect()?;
909        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
910        tx.execute(
911            "INSERT INTO operational_collections \
912             (name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at) \
913             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, unixepoch())",
914            rusqlite::params![
915                request.name.as_str(),
916                request.kind.as_str(),
917                request.schema_json.as_str(),
918                request.retention_json.as_str(),
919                request.filter_fields_json.as_str(),
920                request.validation_json.as_str(),
921                request.secondary_indexes_json.as_str(),
922                request.format_version,
923            ],
924        )?;
925        persist_simple_provenance_event(
926            &tx,
927            "operational_collection_registered",
928            request.name.as_str(),
929            Some(serde_json::json!({
930                "kind": request.kind.as_str(),
931                "format_version": request.format_version,
932            })),
933        )?;
934        tx.commit()?;
935
936        self.describe_operational_collection(&request.name)?
937            .ok_or_else(|| {
938                EngineError::Bridge("registered collection missing after commit".to_owned())
939            })
940    }
941
942    /// # Errors
943    /// Returns [`EngineError`] if the database query fails.
944    pub fn describe_operational_collection(
945        &self,
946        name: &str,
947    ) -> Result<Option<OperationalCollectionRecord>, EngineError> {
948        let conn = self.connect()?;
949        load_operational_collection_record(&conn, name)
950    }
951
952    /// # Errors
953    /// Returns [`EngineError`] if the collection is missing, the filter contract is invalid,
954    /// or existing mutation backfill fails.
955    pub fn update_operational_collection_filters(
956        &self,
957        name: &str,
958        filter_fields_json: &str,
959    ) -> Result<OperationalCollectionRecord, EngineError> {
960        if filter_fields_json.is_empty() {
961            return Err(EngineError::InvalidWrite(
962                "operational collection filter_fields_json must not be empty".to_owned(),
963            ));
964        }
965        let declared_fields = parse_operational_filter_fields(filter_fields_json)
966            .map_err(EngineError::InvalidWrite)?;
967
968        let mut conn = self.connect()?;
969        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
970        load_operational_collection_record(&tx, name)?.ok_or_else(|| {
971            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
972        })?;
973        tx.execute(
974            "UPDATE operational_collections SET filter_fields_json = ?2 WHERE name = ?1",
975            rusqlite::params![name, filter_fields_json],
976        )?;
977        tx.execute(
978            "DELETE FROM operational_filter_values WHERE collection_name = ?1",
979            [name],
980        )?;
981
982        let mut mutation_stmt = tx.prepare(
983            "SELECT id, payload_json FROM operational_mutations \
984             WHERE collection_name = ?1 ORDER BY mutation_order",
985        )?;
986        let mutations = mutation_stmt
987            .query_map([name], |row| {
988                Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
989            })?
990            .collect::<Result<Vec<_>, _>>()?;
991        drop(mutation_stmt);
992
993        let mut insert_filter_value = tx.prepare_cached(
994            "INSERT INTO operational_filter_values \
995             (mutation_id, collection_name, field_name, string_value, integer_value) \
996             VALUES (?1, ?2, ?3, ?4, ?5)",
997        )?;
998        let mut inserted_values = 0usize;
999        for (mutation_id, payload_json) in &mutations {
1000            for filter_value in
1001                extract_operational_filter_values(&declared_fields, payload_json.as_str())
1002            {
1003                insert_filter_value.execute(rusqlite::params![
1004                    mutation_id,
1005                    name,
1006                    filter_value.field_name,
1007                    filter_value.string_value,
1008                    filter_value.integer_value,
1009                ])?;
1010                inserted_values += 1;
1011            }
1012        }
1013        drop(insert_filter_value);
1014
1015        persist_simple_provenance_event(
1016            &tx,
1017            "operational_collection_filter_fields_updated",
1018            name,
1019            Some(serde_json::json!({
1020                "field_count": declared_fields.len(),
1021                "mutations_backfilled": mutations.len(),
1022                "inserted_filter_values": inserted_values,
1023            })),
1024        )?;
1025        let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1026            EngineError::Bridge("operational collection missing after filter update".to_owned())
1027        })?;
1028        tx.commit()?;
1029        Ok(updated)
1030    }
1031
1032    /// # Errors
1033    /// Returns [`EngineError`] if the collection is missing or the validation contract is invalid.
1034    pub fn update_operational_collection_validation(
1035        &self,
1036        name: &str,
1037        validation_json: &str,
1038    ) -> Result<OperationalCollectionRecord, EngineError> {
1039        parse_operational_validation_contract(validation_json)
1040            .map_err(EngineError::InvalidWrite)?;
1041
1042        let mut conn = self.connect()?;
1043        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1044        load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1045            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1046        })?;
1047        tx.execute(
1048            "UPDATE operational_collections SET validation_json = ?2 WHERE name = ?1",
1049            rusqlite::params![name, validation_json],
1050        )?;
1051        persist_simple_provenance_event(
1052            &tx,
1053            "operational_collection_validation_updated",
1054            name,
1055            Some(serde_json::json!({
1056                "has_validation": !validation_json.is_empty(),
1057            })),
1058        )?;
1059        let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1060            EngineError::Bridge("operational collection missing after validation update".to_owned())
1061        })?;
1062        tx.commit()?;
1063        Ok(updated)
1064    }
1065
1066    /// # Errors
1067    /// Returns [`EngineError`] if the collection is missing, the contract is invalid,
1068    /// or derived index rebuild fails.
1069    pub fn update_operational_collection_secondary_indexes(
1070        &self,
1071        name: &str,
1072        secondary_indexes_json: &str,
1073    ) -> Result<OperationalCollectionRecord, EngineError> {
1074        let mut conn = self.connect()?;
1075        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1076        let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1077            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1078        })?;
1079        let indexes = parse_operational_secondary_indexes_json(secondary_indexes_json, record.kind)
1080            .map_err(EngineError::InvalidWrite)?;
1081        tx.execute(
1082            "UPDATE operational_collections SET secondary_indexes_json = ?2 WHERE name = ?1",
1083            rusqlite::params![name, secondary_indexes_json],
1084        )?;
1085        let (mutation_entries_rebuilt, current_entries_rebuilt) =
1086            rebuild_operational_secondary_index_entries(&tx, &record.name, record.kind, &indexes)?;
1087        persist_simple_provenance_event(
1088            &tx,
1089            "operational_collection_secondary_indexes_updated",
1090            name,
1091            Some(serde_json::json!({
1092                "index_count": indexes.len(),
1093                "mutation_entries_rebuilt": mutation_entries_rebuilt,
1094                "current_entries_rebuilt": current_entries_rebuilt,
1095            })),
1096        )?;
1097        let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1098            EngineError::Bridge(
1099                "operational collection missing after secondary index update".to_owned(),
1100            )
1101        })?;
1102        tx.commit()?;
1103        Ok(updated)
1104    }
1105
1106    /// # Errors
1107    /// Returns [`EngineError`] if the collection is missing or rebuild fails.
1108    pub fn rebuild_operational_secondary_indexes(
1109        &self,
1110        name: &str,
1111    ) -> Result<OperationalSecondaryIndexRebuildReport, EngineError> {
1112        let mut conn = self.connect()?;
1113        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1114        let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1115            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1116        })?;
1117        let indexes =
1118            parse_operational_secondary_indexes_json(&record.secondary_indexes_json, record.kind)
1119                .map_err(EngineError::InvalidWrite)?;
1120        let (mutation_entries_rebuilt, current_entries_rebuilt) =
1121            rebuild_operational_secondary_index_entries(&tx, &record.name, record.kind, &indexes)?;
1122        persist_simple_provenance_event(
1123            &tx,
1124            "operational_secondary_indexes_rebuilt",
1125            name,
1126            Some(serde_json::json!({
1127                "index_count": indexes.len(),
1128                "mutation_entries_rebuilt": mutation_entries_rebuilt,
1129                "current_entries_rebuilt": current_entries_rebuilt,
1130            })),
1131        )?;
1132        tx.commit()?;
1133        Ok(OperationalSecondaryIndexRebuildReport {
1134            collection_name: name.to_owned(),
1135            mutation_entries_rebuilt,
1136            current_entries_rebuilt,
1137        })
1138    }
1139
1140    /// # Errors
1141    /// Returns [`EngineError`] if the collection is missing or its validation contract is invalid.
1142    pub fn validate_operational_collection_history(
1143        &self,
1144        name: &str,
1145    ) -> Result<OperationalHistoryValidationReport, EngineError> {
1146        let conn = self.connect()?;
1147        let record = load_operational_collection_record(&conn, name)?.ok_or_else(|| {
1148            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1149        })?;
1150        let Some(contract) = parse_operational_validation_contract(&record.validation_json)
1151            .map_err(EngineError::InvalidWrite)?
1152        else {
1153            return Err(EngineError::InvalidWrite(format!(
1154                "operational collection '{name}' has no validation_json configured"
1155            )));
1156        };
1157
1158        let mut stmt = conn.prepare(
1159            "SELECT id, record_key, op_kind, payload_json FROM operational_mutations \
1160             WHERE collection_name = ?1 ORDER BY mutation_order",
1161        )?;
1162        let rows = stmt
1163            .query_map([name], |row| {
1164                Ok((
1165                    row.get::<_, String>(0)?,
1166                    row.get::<_, String>(1)?,
1167                    row.get::<_, String>(2)?,
1168                    row.get::<_, String>(3)?,
1169                ))
1170            })?
1171            .collect::<Result<Vec<_>, _>>()?;
1172        drop(stmt);
1173
1174        let mut checked_rows = 0usize;
1175        let mut issues = Vec::new();
1176        for (mutation_id, record_key, op_kind, payload_json) in rows {
1177            if op_kind == "delete" {
1178                continue;
1179            }
1180            checked_rows += 1;
1181            if let Err(message) =
1182                validate_operational_payload_against_contract(&contract, payload_json.as_str())
1183            {
1184                issues.push(OperationalHistoryValidationIssue {
1185                    mutation_id,
1186                    record_key,
1187                    op_kind,
1188                    message,
1189                });
1190            }
1191        }
1192
1193        Ok(OperationalHistoryValidationReport {
1194            collection_name: name.to_owned(),
1195            checked_rows,
1196            invalid_row_count: issues.len(),
1197            issues,
1198        })
1199    }
1200
1201    /// # Errors
1202    /// Returns [`EngineError`] if the database query fails.
1203    pub fn disable_operational_collection(
1204        &self,
1205        name: &str,
1206    ) -> Result<OperationalCollectionRecord, EngineError> {
1207        let mut conn = self.connect()?;
1208        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1209        let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1210            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1211        })?;
1212        let changed = if record.disabled_at.is_none() {
1213            tx.execute(
1214                "UPDATE operational_collections SET disabled_at = unixepoch() WHERE name = ?1",
1215                [name],
1216            )?;
1217            true
1218        } else {
1219            false
1220        };
1221        let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1222            EngineError::Bridge("operational collection missing after disable".to_owned())
1223        })?;
1224        persist_simple_provenance_event(
1225            &tx,
1226            "operational_collection_disabled",
1227            name,
1228            Some(serde_json::json!({
1229                "disabled_at": record.disabled_at,
1230                "changed": changed,
1231            })),
1232        )?;
1233        tx.commit()?;
1234        Ok(record)
1235    }
1236
1237    /// # Errors
1238    /// Returns [`EngineError`] if the database query fails.
1239    pub fn compact_operational_collection(
1240        &self,
1241        name: &str,
1242        dry_run: bool,
1243    ) -> Result<OperationalCompactionReport, EngineError> {
1244        let mut conn = self.connect()?;
1245        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1246        let collection = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1247            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1248        })?;
1249        validate_append_only_operational_collection(&collection, "compact")?;
1250        let (mutation_ids, before_timestamp) =
1251            operational_compaction_candidates(&tx, &collection.retention_json, name)?;
1252        if dry_run {
1253            drop(tx);
1254            return Ok(OperationalCompactionReport {
1255                collection_name: name.to_owned(),
1256                deleted_mutations: mutation_ids.len(),
1257                dry_run: true,
1258                before_timestamp,
1259            });
1260        }
1261        let mut delete_stmt =
1262            tx.prepare_cached("DELETE FROM operational_mutations WHERE id = ?1")?;
1263        for mutation_id in &mutation_ids {
1264            delete_stmt.execute([mutation_id.as_str()])?;
1265        }
1266        drop(delete_stmt);
1267        persist_simple_provenance_event(
1268            &tx,
1269            "operational_collection_compacted",
1270            name,
1271            Some(serde_json::json!({
1272                "deleted_mutations": mutation_ids.len(),
1273                "before_timestamp": before_timestamp,
1274            })),
1275        )?;
1276        tx.commit()?;
1277        Ok(OperationalCompactionReport {
1278            collection_name: name.to_owned(),
1279            deleted_mutations: mutation_ids.len(),
1280            dry_run: false,
1281            before_timestamp,
1282        })
1283    }
1284
1285    /// # Errors
1286    /// Returns [`EngineError`] if the database query fails.
1287    pub fn purge_operational_collection(
1288        &self,
1289        name: &str,
1290        before_timestamp: i64,
1291    ) -> Result<OperationalPurgeReport, EngineError> {
1292        let mut conn = self.connect()?;
1293        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1294        let collection = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1295            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1296        })?;
1297        validate_append_only_operational_collection(&collection, "purge")?;
1298        let deleted_mutations = tx.execute(
1299            "DELETE FROM operational_mutations WHERE collection_name = ?1 AND created_at < ?2",
1300            rusqlite::params![name, before_timestamp],
1301        )?;
1302        persist_simple_provenance_event(
1303            &tx,
1304            "operational_collection_purged",
1305            name,
1306            Some(serde_json::json!({
1307                "deleted_mutations": deleted_mutations,
1308                "before_timestamp": before_timestamp,
1309            })),
1310        )?;
1311        tx.commit()?;
1312        Ok(OperationalPurgeReport {
1313            collection_name: name.to_owned(),
1314            deleted_mutations,
1315            before_timestamp,
1316        })
1317    }
1318
1319    /// # Errors
1320    /// Returns [`EngineError`] if collection selection or policy parsing fails.
1321    pub fn plan_operational_retention(
1322        &self,
1323        now_timestamp: i64,
1324        collection_names: Option<&[String]>,
1325        max_collections: Option<usize>,
1326    ) -> Result<OperationalRetentionPlanReport, EngineError> {
1327        let conn = self.connect()?;
1328        let records = load_operational_retention_records(&conn, collection_names, max_collections)?;
1329        let mut items = Vec::with_capacity(records.len());
1330        for record in records {
1331            items.push(plan_operational_retention_item(
1332                &conn,
1333                &record,
1334                now_timestamp,
1335            )?);
1336        }
1337        Ok(OperationalRetentionPlanReport {
1338            planned_at: now_timestamp,
1339            collections_examined: items.len(),
1340            items,
1341        })
1342    }
1343
1344    /// # Errors
1345    /// Returns [`EngineError`] if collection selection, policy parsing, or execution fails.
1346    pub fn run_operational_retention(
1347        &self,
1348        now_timestamp: i64,
1349        collection_names: Option<&[String]>,
1350        max_collections: Option<usize>,
1351        dry_run: bool,
1352    ) -> Result<OperationalRetentionRunReport, EngineError> {
1353        let mut conn = self.connect()?;
1354        let records = load_operational_retention_records(&conn, collection_names, max_collections)?;
1355        let mut items = Vec::with_capacity(records.len());
1356        let mut collections_acted_on = 0usize;
1357
1358        for record in records {
1359            let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1360            let item = run_operational_retention_item(&tx, &record, now_timestamp, dry_run)?;
1361            if item.deleted_mutations > 0 {
1362                collections_acted_on += 1;
1363            }
1364            if dry_run || item.action_kind == OperationalRetentionActionKind::Noop {
1365                drop(tx);
1366            } else {
1367                tx.commit()?;
1368            }
1369            items.push(item);
1370        }
1371
1372        Ok(OperationalRetentionRunReport {
1373            executed_at: now_timestamp,
1374            collections_examined: items.len(),
1375            collections_acted_on,
1376            dry_run,
1377            items,
1378        })
1379    }
1380
1381    /// # Errors
1382    /// Returns [`EngineError`] if the database query fails.
1383    pub fn trace_operational_collection(
1384        &self,
1385        collection_name: &str,
1386        record_key: Option<&str>,
1387    ) -> Result<OperationalTraceReport, EngineError> {
1388        let conn = self.connect()?;
1389        ensure_operational_collection_registered(&conn, collection_name)?;
1390        let mutations = if let Some(record_key) = record_key {
1391            let mut stmt = conn.prepare(
1392                "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
1393                 FROM operational_mutations \
1394                 WHERE collection_name = ?1 AND record_key = ?2 \
1395                 ORDER BY mutation_order",
1396            )?;
1397            stmt.query_map([collection_name, record_key], map_operational_mutation_row)?
1398                .collect::<Result<Vec<_>, _>>()?
1399        } else {
1400            let mut stmt = conn.prepare(
1401                "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
1402                 FROM operational_mutations \
1403                 WHERE collection_name = ?1 \
1404                 ORDER BY mutation_order",
1405            )?;
1406            stmt.query_map([collection_name], map_operational_mutation_row)?
1407                .collect::<Result<Vec<_>, _>>()?
1408        };
1409        let current_rows = if let Some(record_key) = record_key {
1410            let mut stmt = conn.prepare(
1411                "SELECT collection_name, record_key, payload_json, updated_at, last_mutation_id \
1412                 FROM operational_current \
1413                 WHERE collection_name = ?1 AND record_key = ?2 \
1414                 ORDER BY updated_at, record_key",
1415            )?;
1416            stmt.query_map([collection_name, record_key], map_operational_current_row)?
1417                .collect::<Result<Vec<_>, _>>()?
1418        } else {
1419            let mut stmt = conn.prepare(
1420                "SELECT collection_name, record_key, payload_json, updated_at, last_mutation_id \
1421                 FROM operational_current \
1422                 WHERE collection_name = ?1 \
1423                 ORDER BY updated_at, record_key",
1424            )?;
1425            stmt.query_map([collection_name], map_operational_current_row)?
1426                .collect::<Result<Vec<_>, _>>()?
1427        };
1428
1429        Ok(OperationalTraceReport {
1430            collection_name: collection_name.to_owned(),
1431            record_key: record_key.map(str::to_owned),
1432            mutation_count: mutations.len(),
1433            current_count: current_rows.len(),
1434            mutations,
1435            current_rows,
1436        })
1437    }
1438
1439    /// # Errors
1440    /// Returns [`EngineError`] if the collection contract is invalid or the filtered read fails.
1441    pub fn read_operational_collection(
1442        &self,
1443        request: &OperationalReadRequest,
1444    ) -> Result<OperationalReadReport, EngineError> {
1445        if request.collection_name.trim().is_empty() {
1446            return Err(EngineError::InvalidWrite(
1447                "operational read collection_name must not be empty".to_owned(),
1448            ));
1449        }
1450        if request.filters.is_empty() {
1451            return Err(EngineError::InvalidWrite(
1452                "operational read requires at least one filter clause".to_owned(),
1453            ));
1454        }
1455
1456        let conn = self.connect()?;
1457        let record = load_operational_collection_record(&conn, &request.collection_name)?
1458            .ok_or_else(|| {
1459                EngineError::InvalidWrite(format!(
1460                    "operational collection '{}' is not registered",
1461                    request.collection_name
1462                ))
1463            })?;
1464        validate_append_only_operational_collection(&record, "read")?;
1465        let declared_fields = parse_operational_filter_fields(&record.filter_fields_json)
1466            .map_err(EngineError::InvalidWrite)?;
1467        let secondary_indexes =
1468            parse_operational_secondary_indexes_json(&record.secondary_indexes_json, record.kind)
1469                .map_err(EngineError::InvalidWrite)?;
1470        let applied_limit = operational_read_limit(request.limit)?;
1471        let filters = compile_operational_read_filters(&request.filters, &declared_fields)?;
1472        if let Some(report) = execute_operational_secondary_index_read(
1473            &conn,
1474            &request.collection_name,
1475            &filters,
1476            &secondary_indexes,
1477            applied_limit,
1478        )? {
1479            return Ok(report);
1480        }
1481        execute_operational_filtered_read(&conn, &request.collection_name, &filters, applied_limit)
1482    }
1483
1484    /// # Errors
1485    /// Returns [`EngineError`] if the database query fails or collection validation fails.
1486    pub fn rebuild_operational_current(
1487        &self,
1488        collection_name: Option<&str>,
1489    ) -> Result<OperationalRepairReport, EngineError> {
1490        let mut conn = self.connect()?;
1491        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1492        let collections = if let Some(name) = collection_name {
1493            let maybe_kind: Option<String> = tx
1494                .query_row(
1495                    "SELECT kind FROM operational_collections WHERE name = ?1",
1496                    [name],
1497                    |row| row.get(0),
1498                )
1499                .optional()?;
1500            let Some(kind) = maybe_kind else {
1501                return Err(EngineError::InvalidWrite(format!(
1502                    "operational collection '{name}' is not registered"
1503                )));
1504            };
1505            if kind != OperationalCollectionKind::LatestState.as_str() {
1506                return Err(EngineError::InvalidWrite(format!(
1507                    "operational collection '{name}' is not latest_state"
1508                )));
1509            }
1510            vec![name.to_owned()]
1511        } else {
1512            let mut stmt = tx.prepare(
1513                "SELECT name FROM operational_collections WHERE kind = 'latest_state' ORDER BY name",
1514            )?;
1515            stmt.query_map([], |row| row.get::<_, String>(0))?
1516                .collect::<Result<Vec<_>, _>>()?
1517        };
1518
1519        let rebuilt_rows = rebuild_operational_current_rows(&tx, &collections)?;
1520        for collection in &collections {
1521            let record = load_operational_collection_record(&tx, collection)?.ok_or_else(|| {
1522                EngineError::Bridge(format!(
1523                    "operational collection '{collection}' missing during current rebuild"
1524                ))
1525            })?;
1526            let indexes = parse_operational_secondary_indexes_json(
1527                &record.secondary_indexes_json,
1528                record.kind,
1529            )
1530            .map_err(EngineError::InvalidWrite)?;
1531            if !indexes.is_empty() {
1532                rebuild_operational_secondary_index_entries(
1533                    &tx,
1534                    &record.name,
1535                    record.kind,
1536                    &indexes,
1537                )?;
1538            }
1539        }
1540
1541        persist_simple_provenance_event(
1542            &tx,
1543            "operational_current_rebuilt",
1544            collection_name.unwrap_or("*"),
1545            Some(serde_json::json!({
1546                "collections_rebuilt": collections.len(),
1547                "current_rows_rebuilt": rebuilt_rows,
1548            })),
1549        )?;
1550        tx.commit()?;
1551
1552        Ok(OperationalRepairReport {
1553            collections_rebuilt: collections.len(),
1554            current_rows_rebuilt: rebuilt_rows,
1555        })
1556    }
1557
1558    /// # Errors
1559    /// Returns [`EngineError`] if the database connection fails or the projection rebuild fails.
1560    pub fn rebuild_projections(
1561        &self,
1562        target: ProjectionTarget,
1563    ) -> Result<ProjectionRepairReport, EngineError> {
1564        self.projections.rebuild_projections(target)
1565    }
1566
1567    /// # Errors
1568    /// Returns [`EngineError`] if the database connection fails or the projection rebuild fails.
1569    pub fn rebuild_missing_projections(&self) -> Result<ProjectionRepairReport, EngineError> {
1570        self.projections.rebuild_missing_projections()
1571    }
1572
1573    /// Register (or update) an FTS property projection schema for the given node kind.
1574    ///
1575    /// After registration, any node of this kind will have the declared JSON property
1576    /// paths extracted, concatenated, and indexed in the `fts_node_properties` FTS5 table.
1577    ///
1578    /// # Errors
1579    /// Returns [`EngineError`] if `property_paths` is empty, contains duplicates,
1580    /// or if the database write fails.
1581    pub fn register_fts_property_schema(
1582        &self,
1583        kind: &str,
1584        property_paths: &[String],
1585        separator: Option<&str>,
1586    ) -> Result<FtsPropertySchemaRecord, EngineError> {
1587        let specs: Vec<FtsPropertyPathSpec> = property_paths
1588            .iter()
1589            .map(|p| FtsPropertyPathSpec::scalar(p.clone()))
1590            .collect();
1591        self.register_fts_property_schema_with_entries(kind, &specs, separator, &[])
1592    }
1593
1594    /// Register (or update) an FTS property projection schema with
1595    /// per-path modes and optional exclude paths. When the registered
1596    /// schema introduces a new recursive-mode path for this kind, this
1597    /// method eagerly rebuilds `fts_node_properties` and
1598    /// `fts_node_property_positions` for every active node of that kind,
1599    /// all in the same transaction as the schema row update.
1600    ///
1601    /// # Errors
1602    /// Returns [`EngineError`] if the paths are invalid, the JSON
1603    /// serialization fails, or the rebuild transaction fails.
1604    pub fn register_fts_property_schema_with_entries(
1605        &self,
1606        kind: &str,
1607        entries: &[FtsPropertyPathSpec],
1608        separator: Option<&str>,
1609        exclude_paths: &[String],
1610    ) -> Result<FtsPropertySchemaRecord, EngineError> {
1611        let paths: Vec<String> = entries.iter().map(|e| e.path.clone()).collect();
1612        validate_fts_property_paths(&paths)?;
1613        for p in exclude_paths {
1614            if !p.starts_with("$.") {
1615                return Err(EngineError::InvalidWrite(format!(
1616                    "exclude_paths entries must start with '$.' but got: {p}"
1617                )));
1618            }
1619        }
1620        let separator = separator.unwrap_or(" ");
1621        let paths_json = serialize_property_paths_json(entries, exclude_paths)?;
1622
1623        let mut conn = self.connect()?;
1624        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1625
1626        // Determine whether the registration introduces a recursive path
1627        // that was not present in the previously-registered schema for
1628        // this kind. If so, we must eagerly rebuild property FTS rows and
1629        // position map for every active node of this kind within the same
1630        // transaction.
1631        let previous_row: Option<(String, String)> = tx
1632            .query_row(
1633                "SELECT property_paths_json, separator FROM fts_property_schemas WHERE kind = ?1",
1634                [kind],
1635                |row| {
1636                    let json: String = row.get(0)?;
1637                    let sep: String = row.get(1)?;
1638                    Ok((json, sep))
1639                },
1640            )
1641            .optional()?;
1642        let had_previous_schema = previous_row.is_some();
1643        let previous_recursive_paths: Vec<String> = previous_row
1644            .map(|(json, sep)| crate::writer::parse_property_schema_json(&json, &sep))
1645            .map_or(Vec::new(), |schema| {
1646                schema
1647                    .paths
1648                    .into_iter()
1649                    .filter(|p| p.mode == crate::writer::PropertyPathMode::Recursive)
1650                    .map(|p| p.path)
1651                    .collect()
1652            });
1653        let new_recursive_paths: Vec<&str> = entries
1654            .iter()
1655            .filter(|e| e.mode == FtsPropertyPathMode::Recursive)
1656            .map(|e| e.path.as_str())
1657            .collect();
1658        let introduces_new_recursive = new_recursive_paths
1659            .iter()
1660            .any(|p| !previous_recursive_paths.iter().any(|prev| prev == p));
1661
1662        tx.execute(
1663            "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
1664             VALUES (?1, ?2, ?3) \
1665             ON CONFLICT(kind) DO UPDATE SET property_paths_json = ?2, separator = ?3",
1666            rusqlite::params![kind, paths_json, separator],
1667        )?;
1668
1669        // Eager transactional rebuild: always fire on any update (i.e.
1670        // whenever the row already existed). First-time registrations never
1671        // have a previous schema, so they cost nothing; updates trigger a
1672        // rebuild unconditionally. This covers recursive-path additions
1673        // AND scalar-only re-registrations where only the path or
1674        // separator changed — without a rebuild the existing rows would
1675        // retain stale scalar-derived text. (P4-P2-1)
1676        let needs_rebuild = introduces_new_recursive || had_previous_schema;
1677        if needs_rebuild {
1678            tx.execute("DELETE FROM fts_node_properties WHERE kind = ?1", [kind])?;
1679            tx.execute(
1680                "DELETE FROM fts_node_property_positions WHERE kind = ?1",
1681                [kind],
1682            )?;
1683            // Scope the rebuild to `kind` only. The multi-kind
1684            // `insert_property_fts_rows` iterates over every registered
1685            // schema and would re-insert rows for siblings that were not
1686            // deleted above, duplicating their FTS entries.
1687            crate::projection::insert_property_fts_rows_for_kind(&tx, kind)?;
1688        }
1689
1690        persist_simple_provenance_event(
1691            &tx,
1692            "fts_property_schema_registered",
1693            kind,
1694            Some(serde_json::json!({
1695                "property_paths": paths,
1696                "separator": separator,
1697                "exclude_paths": exclude_paths,
1698                "eager_rebuild": needs_rebuild,
1699            })),
1700        )?;
1701        tx.commit()?;
1702
1703        self.describe_fts_property_schema(kind)?.ok_or_else(|| {
1704            EngineError::Bridge("registered FTS property schema missing after commit".to_owned())
1705        })
1706    }
1707
1708    /// Return the FTS property schema for a single node kind, if registered.
1709    ///
1710    /// # Errors
1711    /// Returns [`EngineError`] if the database query fails.
1712    pub fn describe_fts_property_schema(
1713        &self,
1714        kind: &str,
1715    ) -> Result<Option<FtsPropertySchemaRecord>, EngineError> {
1716        let conn = self.connect()?;
1717        load_fts_property_schema_record(&conn, kind)
1718    }
1719
1720    /// Return all registered FTS property schemas.
1721    ///
1722    /// # Errors
1723    /// Returns [`EngineError`] if the database query fails.
1724    pub fn list_fts_property_schemas(&self) -> Result<Vec<FtsPropertySchemaRecord>, EngineError> {
1725        let conn = self.connect()?;
1726        let mut stmt = conn.prepare(
1727            "SELECT kind, property_paths_json, separator, format_version \
1728             FROM fts_property_schemas ORDER BY kind",
1729        )?;
1730        let records = stmt
1731            .query_map([], |row| {
1732                let kind: String = row.get(0)?;
1733                let paths_json: String = row.get(1)?;
1734                let separator: String = row.get(2)?;
1735                let format_version: i64 = row.get(3)?;
1736                Ok(build_fts_property_schema_record(
1737                    kind,
1738                    &paths_json,
1739                    separator,
1740                    format_version,
1741                ))
1742            })?
1743            .collect::<Result<Vec<_>, _>>()?;
1744        Ok(records)
1745    }
1746
1747    /// Remove the FTS property schema for a node kind.
1748    ///
1749    /// This does **not** delete existing `fts_node_properties` rows for this kind;
1750    /// call `rebuild_projections(Fts)` to clean up stale rows.
1751    ///
1752    /// # Errors
1753    /// Returns [`EngineError`] if the kind is not registered or the delete fails.
1754    pub fn remove_fts_property_schema(&self, kind: &str) -> Result<(), EngineError> {
1755        let mut conn = self.connect()?;
1756        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1757        let deleted = tx.execute("DELETE FROM fts_property_schemas WHERE kind = ?1", [kind])?;
1758        if deleted == 0 {
1759            return Err(EngineError::InvalidWrite(format!(
1760                "FTS property schema for kind '{kind}' is not registered"
1761            )));
1762        }
1763        persist_simple_provenance_event(&tx, "fts_property_schema_removed", kind, None)?;
1764        tx.commit()?;
1765        Ok(())
1766    }
1767
1768    /// Recreate enabled vector profiles from persisted `vector_profiles` metadata.
1769    ///
1770    /// # Errors
1771    /// Returns [`EngineError`] if the database connection fails, reading metadata fails,
1772    /// or sqlite-vec support is unavailable while enabled profiles are present.
1773    pub fn restore_vector_profiles(&self) -> Result<ProjectionRepairReport, EngineError> {
1774        let conn = self.connect()?;
1775        let profiles: Vec<(String, String, i64)> = {
1776            let mut stmt = conn.prepare(
1777                "SELECT profile, table_name, dimension \
1778                 FROM vector_profiles WHERE enabled = 1 ORDER BY profile",
1779            )?;
1780            stmt.query_map([], |row| {
1781                Ok((
1782                    row.get::<_, String>(0)?,
1783                    row.get::<_, String>(1)?,
1784                    row.get::<_, i64>(2)?,
1785                ))
1786            })?
1787            .collect::<Result<Vec<_>, _>>()?
1788        };
1789
1790        for (profile, table_name, dimension) in &profiles {
1791            let dimension = usize::try_from(*dimension).map_err(|_| {
1792                EngineError::Bridge(format!("invalid vector profile dimension: {dimension}"))
1793            })?;
1794            self.schema_manager
1795                .ensure_vector_profile(&conn, profile, table_name, dimension)?;
1796        }
1797
1798        Ok(ProjectionRepairReport {
1799            targets: vec![ProjectionTarget::Vec],
1800            rebuilt_rows: profiles.len(),
1801            notes: vec![],
1802        })
1803    }
1804
1805    /// Rebuild vector embeddings using an application-supplied regeneration
1806    /// contract and generator command.
1807    ///
1808    /// The config is persisted in `vector_embedding_contracts` so the metadata
1809    /// required for recovery survives future repair runs.
1810    ///
1811    /// # Errors
1812    /// Returns [`EngineError`] if the database connection fails, the config is
1813    /// invalid, the generator command fails, or the regenerated embeddings are
1814    /// malformed.
1815    #[allow(clippy::too_many_lines)]
1816    pub fn regenerate_vector_embeddings(
1817        &self,
1818        config: &VectorRegenerationConfig,
1819    ) -> Result<VectorRegenerationReport, EngineError> {
1820        self.regenerate_vector_embeddings_with_policy(config, &VectorGeneratorPolicy::default())
1821    }
1822
1823    /// # Errors
1824    /// Returns [`EngineError`] if the database connection fails, the config is
1825    /// invalid, the generator command fails, or the regenerated embeddings are
1826    /// malformed.
1827    #[allow(clippy::too_many_lines)]
1828    pub fn regenerate_vector_embeddings_with_policy(
1829        &self,
1830        config: &VectorRegenerationConfig,
1831        policy: &VectorGeneratorPolicy,
1832    ) -> Result<VectorRegenerationReport, EngineError> {
1833        let conn = self.connect()?;
1834        let config = validate_vector_regeneration_config(&conn, config, policy)
1835            .map_err(|failure| failure.to_engine_error())?;
1836        let chunks = collect_regeneration_chunks(&conn)?;
1837        let payload = build_regeneration_input(&config, chunks.clone());
1838        let snapshot_hash = compute_snapshot_hash(&payload)?;
1839        let audit_metadata = VectorRegenerationAuditMetadata {
1840            profile: config.profile.clone(),
1841            model_identity: config.model_identity.clone(),
1842            model_version: config.model_version.clone(),
1843            chunk_count: chunks.len(),
1844            snapshot_hash: snapshot_hash.clone(),
1845            failure_class: None,
1846        };
1847        persist_vector_regeneration_event(
1848            &conn,
1849            "vector_regeneration_requested",
1850            &config.profile,
1851            &audit_metadata,
1852        )?;
1853        let notes = generator_policy_notes(policy);
1854        let generated = match run_vector_generator_bounded(&config, &payload, policy) {
1855            Ok(generated) => generated,
1856            Err(failure) => {
1857                self.persist_vector_regeneration_failure_best_effort(
1858                    &config.profile,
1859                    &audit_metadata,
1860                    &failure,
1861                );
1862                return Err(failure.to_engine_error());
1863            }
1864        };
1865        let mut embedding_map = match validate_generated_embeddings(&config, &chunks, generated) {
1866            Ok(embedding_map) => embedding_map,
1867            Err(failure) => {
1868                self.persist_vector_regeneration_failure_best_effort(
1869                    &config.profile,
1870                    &audit_metadata,
1871                    &failure,
1872                );
1873                return Err(failure.to_engine_error());
1874            }
1875        };
1876
1877        let mut conn = conn;
1878        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1879        match self.schema_manager.ensure_vector_profile(
1880            &tx,
1881            &config.profile,
1882            &config.table_name,
1883            config.dimension,
1884        ) {
1885            Ok(()) => {}
1886            Err(SchemaError::MissingCapability(message)) => {
1887                let failure = VectorRegenerationFailure::new(
1888                    VectorRegenerationFailureClass::UnsupportedVecCapability,
1889                    message,
1890                );
1891                drop(tx);
1892                self.persist_vector_regeneration_failure_best_effort(
1893                    &config.profile,
1894                    &audit_metadata,
1895                    &failure,
1896                );
1897                return Err(failure.to_engine_error());
1898            }
1899            Err(error) => return Err(EngineError::Schema(error)),
1900        }
1901        let apply_chunks = collect_regeneration_chunks(&tx)?;
1902        let apply_payload = build_regeneration_input(&config, apply_chunks.clone());
1903        let apply_hash = compute_snapshot_hash(&apply_payload)?;
1904        if apply_hash != snapshot_hash {
1905            let failure = VectorRegenerationFailure::new(
1906                VectorRegenerationFailureClass::SnapshotDrift,
1907                "chunk snapshot changed during generation; retry".to_owned(),
1908            );
1909            drop(tx);
1910            self.persist_vector_regeneration_failure_best_effort(
1911                &config.profile,
1912                &audit_metadata,
1913                &failure,
1914            );
1915            return Err(failure.to_engine_error());
1916        }
1917        persist_vector_contract(&tx, &config, &snapshot_hash)?;
1918        tx.execute("DELETE FROM vec_nodes_active", [])?;
1919        let mut stmt = tx
1920            .prepare_cached("INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES (?1, ?2)")?;
1921        let mut regenerated_rows = 0usize;
1922        for chunk in &apply_chunks {
1923            let Some(embedding) = embedding_map.remove(&chunk.chunk_id) else {
1924                drop(stmt);
1925                drop(tx);
1926                let failure = VectorRegenerationFailure::new(
1927                    VectorRegenerationFailureClass::MalformedGeneratorJson,
1928                    format!(
1929                        "generator did not return embedding for chunk '{}'",
1930                        chunk.chunk_id
1931                    ),
1932                );
1933                self.persist_vector_regeneration_failure_best_effort(
1934                    &config.profile,
1935                    &audit_metadata,
1936                    &failure,
1937                );
1938                return Err(failure.to_engine_error());
1939            };
1940            stmt.execute(rusqlite::params![chunk.chunk_id.as_str(), embedding])?;
1941            regenerated_rows += 1;
1942        }
1943        drop(stmt);
1944        persist_vector_regeneration_event(
1945            &tx,
1946            "vector_regeneration_apply",
1947            &config.profile,
1948            &audit_metadata,
1949        )?;
1950        tx.commit()?;
1951
1952        Ok(VectorRegenerationReport {
1953            profile: config.profile.clone(),
1954            table_name: config.table_name.clone(),
1955            dimension: config.dimension,
1956            total_chunks: chunks.len(),
1957            regenerated_rows,
1958            contract_persisted: true,
1959            notes,
1960        })
1961    }
1962
1963    fn persist_vector_regeneration_failure_best_effort(
1964        &self,
1965        profile: &str,
1966        metadata: &VectorRegenerationAuditMetadata,
1967        failure: &VectorRegenerationFailure,
1968    ) {
1969        let Ok(conn) = self.connect() else {
1970            return;
1971        };
1972        let failure_metadata = VectorRegenerationAuditMetadata {
1973            profile: metadata.profile.clone(),
1974            model_identity: metadata.model_identity.clone(),
1975            model_version: metadata.model_version.clone(),
1976            chunk_count: metadata.chunk_count,
1977            snapshot_hash: metadata.snapshot_hash.clone(),
1978            failure_class: Some(failure.failure_class_label().to_owned()),
1979        };
1980        let _ = persist_vector_regeneration_event(
1981            &conn,
1982            "vector_regeneration_failed",
1983            profile,
1984            &failure_metadata,
1985        );
1986    }
1987
1988    /// # Errors
1989    /// Returns [`EngineError`] if the database connection fails or any SQL query fails.
1990    pub fn trace_source(&self, source_ref: &str) -> Result<TraceReport, EngineError> {
1991        let conn = self.connect()?;
1992
1993        let node_logical_ids = collect_strings(
1994            &conn,
1995            "SELECT logical_id FROM nodes WHERE source_ref = ?1 ORDER BY created_at",
1996            source_ref,
1997        )?;
1998        let action_ids = collect_strings(
1999            &conn,
2000            "SELECT id FROM actions WHERE source_ref = ?1 ORDER BY created_at",
2001            source_ref,
2002        )?;
2003        let operational_mutation_ids = collect_strings(
2004            &conn,
2005            "SELECT id FROM operational_mutations WHERE source_ref = ?1 ORDER BY mutation_order",
2006            source_ref,
2007        )?;
2008
2009        Ok(TraceReport {
2010            source_ref: source_ref.to_owned(),
2011            node_rows: count_source_ref(&conn, "nodes", source_ref)?,
2012            edge_rows: count_source_ref(&conn, "edges", source_ref)?,
2013            action_rows: count_source_ref(&conn, "actions", source_ref)?,
2014            operational_mutation_rows: count_source_ref(
2015                &conn,
2016                "operational_mutations",
2017                source_ref,
2018            )?,
2019            node_logical_ids,
2020            action_ids,
2021            operational_mutation_ids,
2022        })
2023    }
2024
2025    /// # Errors
2026    /// Returns [`EngineError`] if the database connection fails, the transaction cannot be
2027    /// started, or lifecycle restoration prerequisites are missing.
2028    #[allow(clippy::too_many_lines)]
2029    pub fn restore_logical_id(
2030        &self,
2031        logical_id: &str,
2032    ) -> Result<LogicalRestoreReport, EngineError> {
2033        let mut conn = self.connect()?;
2034        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2035
2036        let active_count: i64 = tx.query_row(
2037            "SELECT count(*) FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
2038            [logical_id],
2039            |row| row.get(0),
2040        )?;
2041        if active_count > 0 {
2042            return Ok(LogicalRestoreReport {
2043                logical_id: logical_id.to_owned(),
2044                was_noop: true,
2045                restored_node_rows: 0,
2046                restored_edge_rows: 0,
2047                restored_chunk_rows: 0,
2048                restored_fts_rows: 0,
2049                restored_property_fts_rows: 0,
2050                restored_vec_rows: 0,
2051                skipped_edges: Vec::new(),
2052                notes: vec!["logical_id already active".to_owned()],
2053            });
2054        }
2055
2056        let restored_node: Option<(String, String)> = tx
2057            .query_row(
2058                "SELECT row_id, kind FROM nodes \
2059                 WHERE logical_id = ?1 AND superseded_at IS NOT NULL \
2060                 ORDER BY superseded_at DESC, created_at DESC, rowid DESC LIMIT 1",
2061                [logical_id],
2062                |row| Ok((row.get(0)?, row.get(1)?)),
2063            )
2064            .optional()?;
2065        let (restored_node_row_id, restored_kind) = restored_node.ok_or_else(|| {
2066            EngineError::InvalidWrite(format!("logical_id '{logical_id}' is not retired"))
2067        })?;
2068
2069        tx.execute(
2070            "UPDATE nodes SET superseded_at = NULL WHERE row_id = ?1",
2071            [restored_node_row_id.as_str()],
2072        )?;
2073
2074        let retire_scope: Option<(i64, Option<String>, i64)> = tx
2075            .query_row(
2076                "SELECT rowid, source_ref, created_at FROM provenance_events \
2077                 WHERE event_type = 'node_retire' AND subject = ?1 \
2078                 ORDER BY created_at DESC, rowid DESC LIMIT 1",
2079                [logical_id],
2080                |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
2081            )
2082            .optional()?;
2083        let (restored_edge_rows, skipped_edges) = if let Some((
2084            retire_event_rowid,
2085            retire_source_ref,
2086            retire_created_at,
2087        )) = retire_scope
2088        {
2089            restore_validated_edges(
2090                &tx,
2091                logical_id,
2092                retire_source_ref.as_deref(),
2093                retire_created_at,
2094                retire_event_rowid,
2095            )?
2096        } else {
2097            (0, Vec::new())
2098        };
2099
2100        let restored_chunk_rows: usize = tx
2101            .query_row(
2102                "SELECT count(*) FROM chunks WHERE node_logical_id = ?1",
2103                [logical_id],
2104                |row| row.get::<_, i64>(0),
2105            )
2106            .map(i64_to_usize)?;
2107        tx.execute(
2108            "DELETE FROM fts_nodes WHERE node_logical_id = ?1",
2109            [logical_id],
2110        )?;
2111        let restored_fts_rows = tx.execute(
2112            "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
2113             SELECT id, node_logical_id, ?2, text_content \
2114             FROM chunks WHERE node_logical_id = ?1",
2115            rusqlite::params![logical_id, restored_kind],
2116        )?;
2117        let restored_vec_rows = count_vec_rows_for_logical_id(&tx, logical_id)?;
2118
2119        // Rebuild property FTS for the restored node.
2120        tx.execute(
2121            "DELETE FROM fts_node_properties WHERE node_logical_id = ?1",
2122            [logical_id],
2123        )?;
2124        let restored_property_fts_rows =
2125            rebuild_single_node_property_fts(&tx, logical_id, &restored_kind)?;
2126
2127        persist_simple_provenance_event(
2128            &tx,
2129            "restore_logical_id",
2130            logical_id,
2131            Some(serde_json::json!({
2132                "restored_node_rows": 1,
2133                "restored_edge_rows": restored_edge_rows,
2134                "restored_chunk_rows": restored_chunk_rows,
2135                "restored_fts_rows": restored_fts_rows,
2136                "restored_property_fts_rows": restored_property_fts_rows,
2137                "restored_vec_rows": restored_vec_rows,
2138            })),
2139        )?;
2140        tx.commit()?;
2141
2142        Ok(LogicalRestoreReport {
2143            logical_id: logical_id.to_owned(),
2144            was_noop: false,
2145            restored_node_rows: 1,
2146            restored_edge_rows,
2147            restored_chunk_rows,
2148            restored_fts_rows,
2149            restored_property_fts_rows,
2150            restored_vec_rows,
2151            skipped_edges,
2152            notes: Vec::new(),
2153        })
2154    }
2155
2156    /// # Errors
2157    /// Returns [`EngineError`] if the database connection fails, the transaction cannot be
2158    /// started, or the purge mutation fails.
2159    pub fn purge_logical_id(&self, logical_id: &str) -> Result<LogicalPurgeReport, EngineError> {
2160        let mut conn = self.connect()?;
2161        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2162
2163        let active_count: i64 = tx.query_row(
2164            "SELECT count(*) FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
2165            [logical_id],
2166            |row| row.get(0),
2167        )?;
2168        if active_count > 0 {
2169            return Ok(LogicalPurgeReport {
2170                logical_id: logical_id.to_owned(),
2171                was_noop: true,
2172                deleted_node_rows: 0,
2173                deleted_edge_rows: 0,
2174                deleted_chunk_rows: 0,
2175                deleted_fts_rows: 0,
2176                deleted_vec_rows: 0,
2177                notes: vec!["logical_id is active; purge skipped".to_owned()],
2178            });
2179        }
2180
2181        let node_rows: i64 = tx.query_row(
2182            "SELECT count(*) FROM nodes WHERE logical_id = ?1",
2183            [logical_id],
2184            |row| row.get(0),
2185        )?;
2186        if node_rows == 0 {
2187            return Err(EngineError::InvalidWrite(format!(
2188                "logical_id '{logical_id}' does not exist"
2189            )));
2190        }
2191
2192        let deleted_vec_rows = delete_vec_rows_for_logical_id(&tx, logical_id)?;
2193        let deleted_fts_rows = tx.execute(
2194            "DELETE FROM fts_nodes WHERE node_logical_id = ?1",
2195            [logical_id],
2196        )?;
2197        let deleted_edge_rows = tx.execute(
2198            "DELETE FROM edges WHERE source_logical_id = ?1 OR target_logical_id = ?1",
2199            [logical_id],
2200        )?;
2201        let deleted_chunk_rows = tx.execute(
2202            "DELETE FROM chunks WHERE node_logical_id = ?1",
2203            [logical_id],
2204        )?;
2205        let deleted_node_rows =
2206            tx.execute("DELETE FROM nodes WHERE logical_id = ?1", [logical_id])?;
2207        tx.execute(
2208            "DELETE FROM node_access_metadata WHERE logical_id = ?1",
2209            [logical_id],
2210        )?;
2211
2212        persist_simple_provenance_event(
2213            &tx,
2214            "purge_logical_id",
2215            logical_id,
2216            Some(serde_json::json!({
2217                "deleted_node_rows": deleted_node_rows,
2218                "deleted_edge_rows": deleted_edge_rows,
2219                "deleted_chunk_rows": deleted_chunk_rows,
2220                "deleted_fts_rows": deleted_fts_rows,
2221                "deleted_vec_rows": deleted_vec_rows,
2222            })),
2223        )?;
2224        tx.commit()?;
2225
2226        Ok(LogicalPurgeReport {
2227            logical_id: logical_id.to_owned(),
2228            was_noop: false,
2229            deleted_node_rows,
2230            deleted_edge_rows,
2231            deleted_chunk_rows,
2232            deleted_fts_rows,
2233            deleted_vec_rows,
2234            notes: Vec::new(),
2235        })
2236    }
2237
2238    /// Purge provenance events older than `before_timestamp`.
2239    ///
2240    /// By default, `excise` and `purge_logical_id` event types are preserved so that
2241    /// data-deletion audit trails survive. Pass an explicit
2242    /// `preserve_event_types` list to override this default.
2243    ///
2244    /// # Errors
2245    /// Returns [`EngineError`] if the database connection fails, the transaction
2246    /// cannot be started, or any SQL statement fails.
2247    pub fn purge_provenance_events(
2248        &self,
2249        before_timestamp: i64,
2250        options: &ProvenancePurgeOptions,
2251    ) -> Result<ProvenancePurgeReport, EngineError> {
2252        let mut conn = self.connect()?;
2253        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2254
2255        let preserved_types: Vec<&str> = if options.preserve_event_types.is_empty() {
2256            vec!["excise", "purge_logical_id"]
2257        } else {
2258            options
2259                .preserve_event_types
2260                .iter()
2261                .map(String::as_str)
2262                .collect()
2263        };
2264
2265        // Build the NOT IN clause dynamically based on preserved types.
2266        let placeholders: String = (0..preserved_types.len())
2267            .map(|i| format!("?{}", i + 2))
2268            .collect::<Vec<_>>()
2269            .join(", ");
2270        let count_query = format!(
2271            "SELECT count(*) FROM provenance_events \
2272             WHERE created_at < ?1 AND event_type NOT IN ({placeholders})"
2273        );
2274        let delete_query = format!(
2275            "DELETE FROM provenance_events WHERE rowid IN (\
2276             SELECT rowid FROM provenance_events \
2277             WHERE created_at < ?1 AND event_type NOT IN ({placeholders}) \
2278             LIMIT 10000)"
2279        );
2280
2281        let bind_params = |stmt: &mut rusqlite::Statement<'_>| -> Result<(), rusqlite::Error> {
2282            stmt.raw_bind_parameter(1, before_timestamp)?;
2283            for (i, event_type) in preserved_types.iter().enumerate() {
2284                stmt.raw_bind_parameter(i + 2, *event_type)?;
2285            }
2286            Ok(())
2287        };
2288
2289        let events_deleted = if options.dry_run {
2290            let mut stmt = tx.prepare(&count_query)?;
2291            bind_params(&mut stmt)?;
2292            stmt.raw_query()
2293                .next()?
2294                .map_or(0, |row| row.get::<_, u64>(0).unwrap_or(0))
2295        } else {
2296            let mut total_deleted: u64 = 0;
2297            loop {
2298                let mut stmt = tx.prepare(&delete_query)?;
2299                bind_params(&mut stmt)?;
2300                let deleted = stmt.raw_execute()?;
2301                if deleted == 0 {
2302                    break;
2303                }
2304                total_deleted += deleted as u64;
2305            }
2306            total_deleted
2307        };
2308
2309        let total_after: u64 =
2310            tx.query_row("SELECT count(*) FROM provenance_events", [], |row| {
2311                row.get(0)
2312            })?;
2313
2314        let oldest_remaining: Option<i64> = tx
2315            .query_row("SELECT MIN(created_at) FROM provenance_events", [], |row| {
2316                row.get(0)
2317            })
2318            .optional()?
2319            .flatten();
2320
2321        if !options.dry_run {
2322            tx.commit()?;
2323        }
2324
2325        // In dry_run mode nothing was deleted, so total_after includes the
2326        // would-be-deleted rows; subtract to get the preserved count.
2327        let events_preserved = if options.dry_run {
2328            total_after - events_deleted
2329        } else {
2330            total_after
2331        };
2332
2333        Ok(ProvenancePurgeReport {
2334            events_deleted,
2335            events_preserved,
2336            oldest_remaining,
2337        })
2338    }
2339
2340    /// # Errors
2341    /// Returns [`EngineError`] if the database connection fails, the transaction cannot be
2342    /// started, or any SQL statement fails.
2343    #[allow(clippy::too_many_lines)]
2344    pub fn excise_source(&self, source_ref: &str) -> Result<TraceReport, EngineError> {
2345        let mut conn = self.connect()?;
2346
2347        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2348        let affected_operational_collections = collect_strings_tx(
2349            &tx,
2350            "SELECT DISTINCT m.collection_name \
2351             FROM operational_mutations m \
2352             JOIN operational_collections c ON c.name = m.collection_name \
2353             WHERE m.source_ref = ?1 AND c.kind = 'latest_state' \
2354             ORDER BY m.collection_name",
2355            source_ref,
2356        )?;
2357
2358        // Collect (row_id, logical_id) for active rows that will be excised.
2359        let pairs: Vec<(String, String)> = {
2360            let mut stmt = tx.prepare(
2361                "SELECT row_id, logical_id FROM nodes \
2362                 WHERE source_ref = ?1 AND superseded_at IS NULL",
2363            )?;
2364            stmt.query_map([source_ref], |row| {
2365                Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
2366            })?
2367            .collect::<Result<Vec<_>, _>>()?
2368        };
2369        let affected_logical_ids: Vec<String> = pairs
2370            .iter()
2371            .map(|(_, logical_id)| logical_id.clone())
2372            .collect();
2373
2374        // Supersede bad rows in all tables.
2375        tx.execute(
2376            "UPDATE nodes SET superseded_at = unixepoch() \
2377             WHERE source_ref = ?1 AND superseded_at IS NULL",
2378            [source_ref],
2379        )?;
2380        tx.execute(
2381            "UPDATE edges SET superseded_at = unixepoch() \
2382             WHERE source_ref = ?1 AND superseded_at IS NULL",
2383            [source_ref],
2384        )?;
2385        tx.execute(
2386            "UPDATE actions SET superseded_at = unixepoch() \
2387             WHERE source_ref = ?1 AND superseded_at IS NULL",
2388            [source_ref],
2389        )?;
2390        clear_operational_current_rows(&tx, &affected_operational_collections)?;
2391        tx.execute(
2392            "DELETE FROM operational_mutations WHERE source_ref = ?1",
2393            [source_ref],
2394        )?;
2395        for logical_id in &affected_logical_ids {
2396            delete_vec_rows_for_logical_id(&tx, logical_id)?;
2397            tx.execute(
2398                "DELETE FROM chunks WHERE node_logical_id = ?1",
2399                [logical_id.as_str()],
2400            )?;
2401        }
2402
2403        // Restore the most recent prior version for each affected logical_id.
2404        for (excised_row_id, logical_id) in &pairs {
2405            let prior: Option<String> = tx
2406                .query_row(
2407                    "SELECT row_id FROM nodes \
2408                     WHERE logical_id = ?1 AND row_id != ?2 \
2409                     ORDER BY created_at DESC LIMIT 1",
2410                    [logical_id.as_str(), excised_row_id.as_str()],
2411                    |row| row.get(0),
2412                )
2413                .optional()?;
2414            if let Some(prior_id) = prior {
2415                tx.execute(
2416                    "UPDATE nodes SET superseded_at = NULL WHERE row_id = ?1",
2417                    [prior_id.as_str()],
2418                )?;
2419            }
2420        }
2421
2422        for logical_id in &affected_logical_ids {
2423            let has_active_node = tx
2424                .query_row(
2425                    "SELECT 1 FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL LIMIT 1",
2426                    [logical_id.as_str()],
2427                    |row| row.get::<_, i64>(0),
2428                )
2429                .optional()?
2430                .is_some();
2431            if !has_active_node {
2432                tx.execute(
2433                    "DELETE FROM node_access_metadata WHERE logical_id = ?1",
2434                    [logical_id.as_str()],
2435                )?;
2436            }
2437        }
2438
2439        rebuild_operational_current_rows(&tx, &affected_operational_collections)?;
2440
2441        // Rebuild FTS atomically within the same transaction so readers never
2442        // observe a post-excise node state with a stale FTS index.
2443        tx.execute("DELETE FROM fts_nodes", [])?;
2444        tx.execute(
2445            r"
2446            INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content)
2447            SELECT c.id, n.logical_id, n.kind, c.text_content
2448            FROM chunks c
2449            JOIN nodes n
2450              ON n.logical_id = c.node_logical_id
2451             AND n.superseded_at IS NULL
2452            ",
2453            [],
2454        )?;
2455
2456        // Rebuild property FTS in the same transaction.
2457        rebuild_property_fts_in_tx(&tx)?;
2458
2459        // Record the audit event inside the same transaction so the excision and its
2460        // audit record are committed atomically — no window where the excision is
2461        // durable but unaudited.
2462        tx.execute(
2463            "INSERT INTO provenance_events (id, event_type, subject, source_ref) \
2464             VALUES (?1, 'excise_source', ?2, ?2)",
2465            rusqlite::params![new_id(), source_ref],
2466        )?;
2467
2468        tx.commit()?;
2469
2470        self.trace_source(source_ref)
2471    }
2472
2473    /// # Errors
2474    /// Returns [`EngineError`] if the WAL checkpoint fails, the `SQLite` backup fails,
2475    /// the SHA-256 digest cannot be computed, or the manifest file cannot be written.
2476    pub fn safe_export(
2477        &self,
2478        destination_path: impl AsRef<Path>,
2479        options: SafeExportOptions,
2480    ) -> Result<SafeExportManifest, EngineError> {
2481        let destination_path = destination_path.as_ref();
2482
2483        // 1. Optionally checkpoint WAL before exporting. This keeps the on-disk file tidy for
2484        // callers that want a fully checkpointed export, but export correctness does not depend
2485        // on it because the backup API copies from the live SQLite connection state.
2486        let conn = self.connect()?;
2487
2488        if options.force_checkpoint {
2489            trace_info!("safe_export: wal checkpoint started");
2490            let (busy, log, checkpointed): (i64, i64, i64) =
2491                conn.query_row("PRAGMA wal_checkpoint(FULL)", [], |row| {
2492                    Ok((row.get(0)?, row.get(1)?, row.get(2)?))
2493                })?;
2494            if busy != 0 {
2495                trace_warn!(
2496                    busy,
2497                    log_frames = log,
2498                    checkpointed_frames = checkpointed,
2499                    "safe_export: wal checkpoint blocked by active readers"
2500                );
2501                return Err(EngineError::Bridge(format!(
2502                    "WAL checkpoint blocked: {busy} active reader(s) prevented a full checkpoint; \
2503                     log frames={log}, checkpointed={checkpointed}; \
2504                     retry export when no readers are active"
2505                )));
2506            }
2507            trace_info!(
2508                log_frames = log,
2509                checkpointed_frames = checkpointed,
2510                "safe_export: wal checkpoint completed"
2511            );
2512        }
2513
2514        let schema_version: u32 = conn
2515            .query_row(
2516                "SELECT COALESCE(MAX(version), 0) FROM fathom_schema_migrations",
2517                [],
2518                |row| row.get(0),
2519            )
2520            .unwrap_or(0);
2521
2522        // 2. Export the database through SQLite's online backup API so committed data in the WAL
2523        // is included even when `force_checkpoint` is false.
2524        if let Some(parent) = destination_path.parent() {
2525            fs::create_dir_all(parent)?;
2526        }
2527        conn.backup(DatabaseName::Main, destination_path, None)?;
2528
2529        drop(conn);
2530
2531        // 2b. Query page_count from the EXPORTED file so the manifest reflects what was
2532        // actually backed up, not the source (which may have changed between the PRAGMA
2533        // and the backup call).
2534        let page_count: u64 = {
2535            let export_conn = rusqlite::Connection::open_with_flags(
2536                destination_path,
2537                rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY
2538                    | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX,
2539            )?;
2540            export_conn.query_row("PRAGMA page_count", [], |row| row.get(0))?
2541        };
2542
2543        // 3. Compute SHA-256 of the exported file.
2544        // FIX(review): was fs::read loading entire DB into memory; use streaming hash.
2545        let sha256 = {
2546            let mut file = fs::File::open(destination_path)?;
2547            let mut hasher = Sha256::new();
2548            io::copy(&mut file, &mut hasher)?;
2549            format!("{:x}", hasher.finalize())
2550        };
2551
2552        // 4. Record when the export was created.
2553        let exported_at = SystemTime::now()
2554            .duration_since(SystemTime::UNIX_EPOCH)
2555            .map_err(|e| EngineError::Bridge(format!("system clock error: {e}")))?
2556            .as_secs();
2557
2558        let manifest = SafeExportManifest {
2559            exported_at,
2560            sha256,
2561            schema_version,
2562            protocol_version: EXPORT_PROTOCOL_VERSION,
2563            page_count,
2564        };
2565
2566        // 5. Write manifest alongside the exported file, using Path API for the name.
2567        let manifest_path = {
2568            let mut p = destination_path.to_path_buf();
2569            let stem = p
2570                .file_name()
2571                .map(|n| format!("{}.export-manifest.json", n.to_string_lossy()))
2572                .ok_or_else(|| {
2573                    EngineError::Bridge("destination path has no filename".to_owned())
2574                })?;
2575            p.set_file_name(stem);
2576            p
2577        };
2578        let manifest_json =
2579            serde_json::to_string(&manifest).map_err(|e| EngineError::Bridge(e.to_string()))?;
2580
2581        // Atomic manifest write: write to a temp file then rename so readers never
2582        // observe a partially-written manifest.
2583        let manifest_tmp = manifest_path.with_extension("json.tmp");
2584        if let Err(e) = fs::write(&manifest_tmp, &manifest_json)
2585            .and_then(|()| fs::rename(&manifest_tmp, &manifest_path))
2586        {
2587            let _ = fs::remove_file(&manifest_tmp);
2588            return Err(e.into());
2589        }
2590
2591        Ok(manifest)
2592    }
2593}
2594
2595#[allow(dead_code)]
2596#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
2597struct VectorEmbeddingContractRecord {
2598    profile: String,
2599    table_name: String,
2600    model_identity: String,
2601    model_version: String,
2602    dimension: usize,
2603    normalization_policy: String,
2604    chunking_policy: String,
2605    preprocessing_policy: String,
2606    generator_command_json: String,
2607    applied_at: i64,
2608    snapshot_hash: String,
2609    contract_format_version: i64,
2610}
2611
2612#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
2613struct VectorRegenerationInputChunk {
2614    chunk_id: String,
2615    node_logical_id: String,
2616    kind: String,
2617    text_content: String,
2618    byte_start: Option<i64>,
2619    byte_end: Option<i64>,
2620    source_ref: Option<String>,
2621    created_at: i64,
2622}
2623
2624#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
2625struct VectorRegenerationInput {
2626    profile: String,
2627    table_name: String,
2628    model_identity: String,
2629    model_version: String,
2630    dimension: usize,
2631    normalization_policy: String,
2632    chunking_policy: String,
2633    preprocessing_policy: String,
2634    chunks: Vec<VectorRegenerationInputChunk>,
2635}
2636
2637#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2638struct GeneratedEmbedding {
2639    chunk_id: String,
2640    embedding: Vec<f32>,
2641}
2642
2643#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2644struct GeneratedEmbeddings {
2645    embeddings: Vec<GeneratedEmbedding>,
2646}
2647
2648#[derive(Clone, Copy, Debug, PartialEq, Eq)]
2649pub(crate) enum VectorRegenerationFailureClass {
2650    InvalidContract,
2651    PayloadTooLarge,
2652    GeneratorTimeout,
2653    GeneratorStdoutOverflow,
2654    GeneratorStderrOverflow,
2655    GeneratorNonzeroExit,
2656    MalformedGeneratorJson,
2657    SnapshotDrift,
2658    UnsupportedVecCapability,
2659}
2660
2661impl VectorRegenerationFailureClass {
2662    fn label(self) -> &'static str {
2663        match self {
2664            Self::InvalidContract => "invalid contract",
2665            Self::PayloadTooLarge => "payload too large",
2666            Self::GeneratorTimeout => "generator timeout",
2667            Self::GeneratorStdoutOverflow => "generator stdout overflow",
2668            Self::GeneratorStderrOverflow => "generator stderr overflow",
2669            Self::GeneratorNonzeroExit => "generator nonzero exit",
2670            Self::MalformedGeneratorJson => "malformed generator json",
2671            Self::SnapshotDrift => "snapshot drift",
2672            Self::UnsupportedVecCapability => "unsupported vec capability",
2673        }
2674    }
2675
2676    fn retryable(self) -> bool {
2677        matches!(self, Self::SnapshotDrift)
2678    }
2679}
2680
2681#[derive(Clone, Debug, PartialEq, Eq)]
2682pub(crate) struct VectorRegenerationFailure {
2683    class: VectorRegenerationFailureClass,
2684    detail: String,
2685}
2686
2687impl VectorRegenerationFailure {
2688    pub(crate) fn new(class: VectorRegenerationFailureClass, detail: impl Into<String>) -> Self {
2689        Self {
2690            class,
2691            detail: detail.into(),
2692        }
2693    }
2694
2695    fn to_engine_error(&self) -> EngineError {
2696        let retry_suffix = if self.class.retryable() {
2697            " [retryable]"
2698        } else {
2699            ""
2700        };
2701        EngineError::Bridge(format!(
2702            "vector regeneration {}: {}{}",
2703            self.class.label(),
2704            self.detail,
2705            retry_suffix
2706        ))
2707    }
2708
2709    fn failure_class_label(&self) -> &'static str {
2710        self.class.label()
2711    }
2712}
2713
2714#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
2715struct VectorRegenerationAuditMetadata {
2716    profile: String,
2717    model_identity: String,
2718    model_version: String,
2719    chunk_count: usize,
2720    snapshot_hash: String,
2721    #[serde(skip_serializing_if = "Option::is_none")]
2722    failure_class: Option<String>,
2723}
2724
2725#[derive(Clone, Copy, Debug, PartialEq, Eq, Deserialize)]
2726#[serde(tag = "mode", rename_all = "snake_case")]
2727enum OperationalRetentionPolicy {
2728    KeepAll,
2729    PurgeBeforeSeconds { max_age_seconds: i64 },
2730    KeepLast { max_rows: usize },
2731}
2732
2733/// # Errors
2734/// Returns [`EngineError`] if the file cannot be read or the config is invalid.
2735pub fn load_vector_regeneration_config(
2736    path: impl AsRef<Path>,
2737) -> Result<VectorRegenerationConfig, EngineError> {
2738    let path = path.as_ref();
2739    let raw = fs::read_to_string(path)?;
2740    match path.extension().and_then(|ext| ext.to_str()) {
2741        Some("toml") => {
2742            toml::from_str(&raw).map_err(|error| EngineError::Bridge(error.to_string()))
2743        }
2744        Some("json") | None => {
2745            serde_json::from_str(&raw).map_err(|error| EngineError::Bridge(error.to_string()))
2746        }
2747        Some(other) => Err(EngineError::Bridge(format!(
2748            "unsupported vector regeneration config extension: {other}"
2749        ))),
2750    }
2751}
2752
2753fn validate_vector_regeneration_config(
2754    conn: &rusqlite::Connection,
2755    config: &VectorRegenerationConfig,
2756    policy: &VectorGeneratorPolicy,
2757) -> Result<VectorRegenerationConfig, VectorRegenerationFailure> {
2758    let profile = validate_bounded_text("profile", &config.profile, MAX_PROFILE_LEN)?;
2759    let table_name = validate_bounded_text("table_name", &config.table_name, MAX_PROFILE_LEN)?;
2760    if table_name != "vec_nodes_active" {
2761        return Err(VectorRegenerationFailure::new(
2762            VectorRegenerationFailureClass::InvalidContract,
2763            format!("table_name must be vec_nodes_active, got '{table_name}'"),
2764        ));
2765    }
2766    let model_identity = validate_bounded_text(
2767        "model_identity",
2768        &config.model_identity,
2769        MAX_MODEL_IDENTITY_LEN,
2770    )?;
2771    let model_version = validate_bounded_text(
2772        "model_version",
2773        &config.model_version,
2774        MAX_MODEL_VERSION_LEN,
2775    )?;
2776    if config.dimension == 0 {
2777        return Err(VectorRegenerationFailure::new(
2778            VectorRegenerationFailureClass::InvalidContract,
2779            "dimension must be greater than zero".to_owned(),
2780        ));
2781    }
2782    let normalization_policy = validate_bounded_text(
2783        "normalization_policy",
2784        &config.normalization_policy,
2785        MAX_POLICY_LEN,
2786    )?;
2787    let chunking_policy =
2788        validate_bounded_text("chunking_policy", &config.chunking_policy, MAX_POLICY_LEN)?;
2789    let preprocessing_policy = validate_bounded_text(
2790        "preprocessing_policy",
2791        &config.preprocessing_policy,
2792        MAX_POLICY_LEN,
2793    )?;
2794    let generator_command = validate_generator_command(&config.generator_command, policy)?;
2795
2796    if let Some(existing_dimension) = current_vector_profile_dimension(conn, &profile)?
2797        && existing_dimension != config.dimension
2798    {
2799        return Err(VectorRegenerationFailure::new(
2800            VectorRegenerationFailureClass::InvalidContract,
2801            format!(
2802                "dimension {} does not match existing vector profile dimension {}",
2803                config.dimension, existing_dimension
2804            ),
2805        ));
2806    }
2807
2808    validate_existing_contract_version(conn, &profile)?;
2809
2810    let normalized = VectorRegenerationConfig {
2811        profile,
2812        table_name,
2813        model_identity,
2814        model_version,
2815        dimension: config.dimension,
2816        normalization_policy,
2817        chunking_policy,
2818        preprocessing_policy,
2819        generator_command,
2820    };
2821    let serialized = serde_json::to_vec(&normalized).map_err(|error| {
2822        VectorRegenerationFailure::new(
2823            VectorRegenerationFailureClass::InvalidContract,
2824            error.to_string(),
2825        )
2826    })?;
2827    if serialized.len() > MAX_CONTRACT_JSON_BYTES {
2828        return Err(VectorRegenerationFailure::new(
2829            VectorRegenerationFailureClass::InvalidContract,
2830            format!("serialized contract exceeds {MAX_CONTRACT_JSON_BYTES} bytes"),
2831        ));
2832    }
2833
2834    Ok(normalized)
2835}
2836
2837#[allow(clippy::cast_possible_wrap)]
2838fn persist_vector_contract(
2839    conn: &rusqlite::Connection,
2840    config: &VectorRegenerationConfig,
2841    snapshot_hash: &str,
2842) -> Result<(), EngineError> {
2843    let generator_command_json = serde_json::to_string(&config.generator_command)
2844        .map_err(|error| EngineError::Bridge(error.to_string()))?;
2845    conn.execute(
2846        r"
2847        INSERT OR REPLACE INTO vector_embedding_contracts (
2848            profile,
2849            table_name,
2850            model_identity,
2851            model_version,
2852            dimension,
2853            normalization_policy,
2854            chunking_policy,
2855            preprocessing_policy,
2856            generator_command_json,
2857            applied_at,
2858            snapshot_hash,
2859            contract_format_version,
2860            updated_at
2861        ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, unixepoch(), ?10, ?11, unixepoch())
2862        ",
2863        rusqlite::params![
2864            config.profile.as_str(),
2865            config.table_name.as_str(),
2866            config.model_identity.as_str(),
2867            config.model_version.as_str(),
2868            config.dimension as i64,
2869            config.normalization_policy.as_str(),
2870            config.chunking_policy.as_str(),
2871            config.preprocessing_policy.as_str(),
2872            generator_command_json,
2873            snapshot_hash,
2874            CURRENT_VECTOR_CONTRACT_FORMAT_VERSION,
2875        ],
2876    )?;
2877    Ok(())
2878}
2879
2880fn persist_vector_regeneration_event(
2881    conn: &rusqlite::Connection,
2882    event_type: &str,
2883    subject: &str,
2884    metadata: &VectorRegenerationAuditMetadata,
2885) -> Result<(), EngineError> {
2886    let metadata_json = serialize_audit_metadata(metadata)?;
2887    conn.execute(
2888        "INSERT INTO provenance_events (id, event_type, subject, metadata_json) VALUES (?1, ?2, ?3, ?4)",
2889        rusqlite::params![new_id(), event_type, subject, metadata_json],
2890    )?;
2891    Ok(())
2892}
2893
2894fn persist_simple_provenance_event(
2895    conn: &rusqlite::Connection,
2896    event_type: &str,
2897    subject: &str,
2898    metadata: Option<serde_json::Value>,
2899) -> Result<(), EngineError> {
2900    let metadata_json = metadata.map(|value| value.to_string()).unwrap_or_default();
2901    conn.execute(
2902        "INSERT INTO provenance_events (id, event_type, subject, metadata_json) VALUES (?1, ?2, ?3, ?4)",
2903        rusqlite::params![new_id(), event_type, subject, metadata_json],
2904    )?;
2905    Ok(())
2906}
2907
2908/// Count active nodes that should have a property FTS row (extraction yields a value)
2909/// but don't. Uses the same extraction logic as write/rebuild to avoid false positives
2910/// for nodes whose declared paths legitimately normalize to no values.
2911fn count_missing_property_fts_rows(conn: &rusqlite::Connection) -> Result<i64, EngineError> {
2912    let schemas = crate::writer::load_fts_property_schemas(conn)?;
2913    if schemas.is_empty() {
2914        return Ok(0);
2915    }
2916
2917    let mut missing = 0i64;
2918    for (kind, schema) in &schemas {
2919        let mut stmt = conn.prepare(
2920            "SELECT n.logical_id, n.properties FROM nodes n \
2921             WHERE n.kind = ?1 AND n.superseded_at IS NULL \
2922               AND NOT EXISTS (SELECT 1 FROM fts_node_properties fp WHERE fp.node_logical_id = n.logical_id)",
2923        )?;
2924        let rows = stmt.query_map([kind.as_str()], |row| {
2925            Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
2926        })?;
2927        for row in rows {
2928            let (_logical_id, properties_str) = row?;
2929            let props: serde_json::Value =
2930                serde_json::from_str(&properties_str).unwrap_or_default();
2931            if crate::writer::extract_property_fts(&props, schema)
2932                .0
2933                .is_some()
2934            {
2935                missing += 1;
2936            }
2937        }
2938    }
2939    Ok(missing)
2940}
2941
2942/// Count property FTS rows whose `text_content` has drifted from the current canonical
2943/// value computed by `compute_property_fts_text(...)`. This catches:
2944/// - rows whose text no longer matches the current node properties and schema
2945/// - rows that should have been removed (extraction now yields no value)
2946fn count_drifted_property_fts_rows(conn: &rusqlite::Connection) -> Result<i64, EngineError> {
2947    let schemas = crate::writer::load_fts_property_schemas(conn)?;
2948    if schemas.is_empty() {
2949        return Ok(0);
2950    }
2951
2952    let mut drifted = 0i64;
2953    for (kind, schema) in &schemas {
2954        let mut stmt = conn.prepare(
2955            "SELECT fp.node_logical_id, fp.text_content, n.properties \
2956             FROM fts_node_properties fp \
2957             JOIN nodes n ON n.logical_id = fp.node_logical_id AND n.superseded_at IS NULL \
2958             WHERE fp.kind = ?1 AND n.kind = ?1",
2959        )?;
2960        let rows = stmt.query_map([kind.as_str()], |row| {
2961            Ok((
2962                row.get::<_, String>(0)?,
2963                row.get::<_, String>(1)?,
2964                row.get::<_, String>(2)?,
2965            ))
2966        })?;
2967        for row in rows {
2968            let (_logical_id, stored_text, properties_str) = row?;
2969            let props: serde_json::Value =
2970                serde_json::from_str(&properties_str).unwrap_or_default();
2971            let (expected, _positions, _stats) =
2972                crate::writer::extract_property_fts(&props, schema);
2973            match expected {
2974                Some(text) if text == stored_text => {}
2975                _ => drifted += 1,
2976            }
2977        }
2978    }
2979    Ok(drifted)
2980}
2981
2982/// Rebuild property FTS rows from canonical state within an existing transaction.
2983fn rebuild_property_fts_in_tx(conn: &rusqlite::Connection) -> Result<usize, EngineError> {
2984    conn.execute("DELETE FROM fts_node_properties", [])?;
2985    conn.execute("DELETE FROM fts_node_property_positions", [])?;
2986    let inserted = crate::projection::insert_property_fts_rows(
2987        conn,
2988        "SELECT logical_id, properties FROM nodes WHERE kind = ?1 AND superseded_at IS NULL",
2989    )?;
2990    Ok(inserted)
2991}
2992
2993/// Rebuild property FTS for a single node. Returns 1 if a row was inserted, 0 otherwise.
2994/// The caller must delete any existing `fts_node_properties` row for this node first.
2995fn rebuild_single_node_property_fts(
2996    conn: &rusqlite::Connection,
2997    logical_id: &str,
2998    kind: &str,
2999) -> Result<usize, EngineError> {
3000    let schema: Option<(String, String)> = conn
3001        .query_row(
3002            "SELECT property_paths_json, separator FROM fts_property_schemas WHERE kind = ?1",
3003            [kind],
3004            |row| {
3005                let paths_json: String = row.get(0)?;
3006                let separator: String = row.get(1)?;
3007                Ok((paths_json, separator))
3008            },
3009        )
3010        .optional()?;
3011    let Some((paths_json, separator)) = schema else {
3012        return Ok(0);
3013    };
3014    let parsed = crate::writer::parse_property_schema_json(&paths_json, &separator);
3015    let properties_str: Option<String> = conn
3016        .query_row(
3017            "SELECT properties FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
3018            [logical_id],
3019            |row| row.get(0),
3020        )
3021        .optional()?;
3022    let Some(properties_str) = properties_str else {
3023        return Ok(0);
3024    };
3025    let props: serde_json::Value = serde_json::from_str(&properties_str).unwrap_or_default();
3026    let (text, positions, _stats) = crate::writer::extract_property_fts(&props, &parsed);
3027    let Some(text) = text else {
3028        return Ok(0);
3029    };
3030    conn.execute(
3031        "DELETE FROM fts_node_property_positions WHERE node_logical_id = ?1",
3032        rusqlite::params![logical_id],
3033    )?;
3034    conn.execute(
3035        "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) VALUES (?1, ?2, ?3)",
3036        rusqlite::params![logical_id, kind, text],
3037    )?;
3038    for pos in &positions {
3039        conn.execute(
3040            "INSERT INTO fts_node_property_positions \
3041             (node_logical_id, kind, start_offset, end_offset, leaf_path) \
3042             VALUES (?1, ?2, ?3, ?4, ?5)",
3043            rusqlite::params![
3044                logical_id,
3045                kind,
3046                i64::try_from(pos.start_offset).unwrap_or(i64::MAX),
3047                i64::try_from(pos.end_offset).unwrap_or(i64::MAX),
3048                pos.leaf_path,
3049            ],
3050        )?;
3051    }
3052    Ok(1)
3053}
3054
3055fn serialize_property_paths_json(
3056    entries: &[FtsPropertyPathSpec],
3057    exclude_paths: &[String],
3058) -> Result<String, EngineError> {
3059    // Scalar-only schemas with no exclude_paths are serialised in the
3060    // legacy shape (bare array of strings) for full backwards
3061    // compatibility with earlier schema versions.
3062    let all_scalar = entries
3063        .iter()
3064        .all(|e| e.mode == FtsPropertyPathMode::Scalar);
3065    if all_scalar && exclude_paths.is_empty() {
3066        let paths: Vec<&str> = entries.iter().map(|e| e.path.as_str()).collect();
3067        return serde_json::to_string(&paths).map_err(|e| {
3068            EngineError::InvalidWrite(format!("failed to serialize property paths: {e}"))
3069        });
3070    }
3071
3072    let mut obj = serde_json::Map::new();
3073    let paths_json: Vec<serde_json::Value> = entries
3074        .iter()
3075        .map(|e| {
3076            let mode_str = match e.mode {
3077                FtsPropertyPathMode::Scalar => "scalar",
3078                FtsPropertyPathMode::Recursive => "recursive",
3079            };
3080            serde_json::json!({ "path": e.path, "mode": mode_str })
3081        })
3082        .collect();
3083    obj.insert("paths".to_owned(), serde_json::Value::Array(paths_json));
3084    if !exclude_paths.is_empty() {
3085        obj.insert("exclude_paths".to_owned(), serde_json::json!(exclude_paths));
3086    }
3087    serde_json::to_string(&serde_json::Value::Object(obj))
3088        .map_err(|e| EngineError::InvalidWrite(format!("failed to serialize property paths: {e}")))
3089}
3090
3091fn validate_fts_property_paths(paths: &[String]) -> Result<(), EngineError> {
3092    if paths.is_empty() {
3093        return Err(EngineError::InvalidWrite(
3094            "FTS property paths must not be empty".to_owned(),
3095        ));
3096    }
3097    let mut seen = std::collections::HashSet::new();
3098    for path in paths {
3099        if !path.starts_with("$.") {
3100            return Err(EngineError::InvalidWrite(format!(
3101                "FTS property path must start with '$.' but got: {path}"
3102            )));
3103        }
3104        let after_prefix = &path[2..]; // safe: already validated "$." prefix
3105        let segments: Vec<&str> = after_prefix.split('.').collect();
3106        if segments.is_empty() || segments.iter().any(|s| s.is_empty()) {
3107            return Err(EngineError::InvalidWrite(format!(
3108                "FTS property path has empty segment(s): {path}"
3109            )));
3110        }
3111        for seg in &segments {
3112            if !seg.chars().all(|c| c.is_alphanumeric() || c == '_') {
3113                return Err(EngineError::InvalidWrite(format!(
3114                    "FTS property path segment contains invalid characters: {path}"
3115                )));
3116            }
3117        }
3118        if !seen.insert(path) {
3119            return Err(EngineError::InvalidWrite(format!(
3120                "duplicate FTS property path: {path}"
3121            )));
3122        }
3123    }
3124    Ok(())
3125}
3126
3127fn load_fts_property_schema_record(
3128    conn: &rusqlite::Connection,
3129    kind: &str,
3130) -> Result<Option<FtsPropertySchemaRecord>, EngineError> {
3131    let row = conn
3132        .query_row(
3133            "SELECT kind, property_paths_json, separator, format_version \
3134             FROM fts_property_schemas WHERE kind = ?1",
3135            [kind],
3136            |row| {
3137                let kind: String = row.get(0)?;
3138                let paths_json: String = row.get(1)?;
3139                let separator: String = row.get(2)?;
3140                let format_version: i64 = row.get(3)?;
3141                Ok(build_fts_property_schema_record(
3142                    kind,
3143                    &paths_json,
3144                    separator,
3145                    format_version,
3146                ))
3147            },
3148        )
3149        .optional()?;
3150    Ok(row)
3151}
3152
3153/// Build an [`FtsPropertySchemaRecord`] from a raw
3154/// `fts_property_schemas` row. Delegates JSON parsing to
3155/// [`crate::writer::parse_property_schema_json`] — the same parser the
3156/// recursive walker uses at rebuild time — so both the legacy bare-array
3157/// shape and the Phase 4 object-shaped envelope round-trip correctly.
3158fn build_fts_property_schema_record(
3159    kind: String,
3160    paths_json: &str,
3161    separator: String,
3162    format_version: i64,
3163) -> FtsPropertySchemaRecord {
3164    let schema = crate::writer::parse_property_schema_json(paths_json, &separator);
3165    let entries: Vec<FtsPropertyPathSpec> = schema
3166        .paths
3167        .into_iter()
3168        .map(|entry| FtsPropertyPathSpec {
3169            path: entry.path,
3170            mode: match entry.mode {
3171                crate::writer::PropertyPathMode::Scalar => FtsPropertyPathMode::Scalar,
3172                crate::writer::PropertyPathMode::Recursive => FtsPropertyPathMode::Recursive,
3173            },
3174        })
3175        .collect();
3176    let property_paths: Vec<String> = entries.iter().map(|e| e.path.clone()).collect();
3177    FtsPropertySchemaRecord {
3178        kind,
3179        property_paths,
3180        entries,
3181        exclude_paths: schema.exclude_paths,
3182        separator,
3183        format_version,
3184    }
3185}
3186
3187fn build_regeneration_input(
3188    config: &VectorRegenerationConfig,
3189    chunks: Vec<VectorRegenerationInputChunk>,
3190) -> VectorRegenerationInput {
3191    VectorRegenerationInput {
3192        profile: config.profile.clone(),
3193        table_name: config.table_name.clone(),
3194        model_identity: config.model_identity.clone(),
3195        model_version: config.model_version.clone(),
3196        dimension: config.dimension,
3197        normalization_policy: config.normalization_policy.clone(),
3198        chunking_policy: config.chunking_policy.clone(),
3199        preprocessing_policy: config.preprocessing_policy.clone(),
3200        chunks,
3201    }
3202}
3203
3204fn compute_snapshot_hash(payload: &VectorRegenerationInput) -> Result<String, EngineError> {
3205    let bytes =
3206        serde_json::to_vec(payload).map_err(|error| EngineError::Bridge(error.to_string()))?;
3207    let mut hasher = Sha256::new();
3208    hasher.update(bytes);
3209    Ok(format!("{:x}", hasher.finalize()))
3210}
3211
3212fn collect_regeneration_chunks(
3213    conn: &rusqlite::Connection,
3214) -> Result<Vec<VectorRegenerationInputChunk>, EngineError> {
3215    let mut stmt = conn.prepare(
3216        r"
3217        SELECT c.id, c.node_logical_id, n.kind, c.text_content, c.byte_start, c.byte_end, n.source_ref, c.created_at
3218        FROM chunks c
3219        JOIN nodes n
3220          ON n.logical_id = c.node_logical_id
3221         AND n.superseded_at IS NULL
3222        ORDER BY c.created_at, c.id
3223        ",
3224    )?;
3225    let chunks = stmt
3226        .query_map([], |row| {
3227            Ok(VectorRegenerationInputChunk {
3228                chunk_id: row.get(0)?,
3229                node_logical_id: row.get(1)?,
3230                kind: row.get(2)?,
3231                text_content: row.get(3)?,
3232                byte_start: row.get(4)?,
3233                byte_end: row.get(5)?,
3234                source_ref: row.get(6)?,
3235                created_at: row.get(7)?,
3236            })
3237        })?
3238        .collect::<Result<Vec<_>, _>>()?;
3239    Ok(chunks)
3240}
3241
3242fn validate_generated_embeddings(
3243    config: &VectorRegenerationConfig,
3244    chunks: &[VectorRegenerationInputChunk],
3245    generated: GeneratedEmbeddings,
3246) -> Result<std::collections::HashMap<String, Vec<u8>>, VectorRegenerationFailure> {
3247    if generated.embeddings.len() != chunks.len() {
3248        return Err(VectorRegenerationFailure::new(
3249            VectorRegenerationFailureClass::MalformedGeneratorJson,
3250            format!(
3251                "generator returned {} embedding(s) for {} chunk(s)",
3252                generated.embeddings.len(),
3253                chunks.len()
3254            ),
3255        ));
3256    }
3257
3258    let mut embedding_map = std::collections::HashMap::new();
3259    for embedding in generated.embeddings {
3260        if embedding.embedding.len() != config.dimension {
3261            return Err(VectorRegenerationFailure::new(
3262                VectorRegenerationFailureClass::MalformedGeneratorJson,
3263                format!(
3264                    "embedding for chunk '{}' has dimension {}, expected {}",
3265                    embedding.chunk_id,
3266                    embedding.embedding.len(),
3267                    config.dimension
3268                ),
3269            ));
3270        }
3271        if embedding.embedding.iter().any(|value| !value.is_finite()) {
3272            return Err(VectorRegenerationFailure::new(
3273                VectorRegenerationFailureClass::MalformedGeneratorJson,
3274                format!(
3275                    "embedding for chunk '{}' contains non-finite values",
3276                    embedding.chunk_id
3277                ),
3278            ));
3279        }
3280        let bytes: Vec<u8> = embedding
3281            .embedding
3282            .iter()
3283            .flat_map(|value| value.to_le_bytes())
3284            .collect();
3285        if embedding_map
3286            .insert(embedding.chunk_id.clone(), bytes)
3287            .is_some()
3288        {
3289            return Err(VectorRegenerationFailure::new(
3290                VectorRegenerationFailureClass::MalformedGeneratorJson,
3291                format!(
3292                    "duplicate embedding returned for chunk '{}'",
3293                    embedding.chunk_id
3294                ),
3295            ));
3296        }
3297    }
3298
3299    Ok(embedding_map)
3300}
3301
3302fn generator_policy_notes(policy: &VectorGeneratorPolicy) -> Vec<String> {
3303    let mut notes = vec!["vector embeddings regenerated from application contract".to_owned()];
3304    if !policy.allowed_executable_roots.is_empty() {
3305        notes.push("generator executable roots enforced by operator policy".to_owned());
3306    }
3307    if !policy.preserve_env_vars.is_empty() {
3308        notes.push("generator environment reduced to preserved variables".to_owned());
3309    }
3310    notes
3311}
3312
3313enum GeneratorStream {
3314    Stdout,
3315    Stderr,
3316}
3317
3318enum StreamReadResult {
3319    Complete(Vec<u8>),
3320    Overflow,
3321    Io(io::Error),
3322}
3323
3324fn validate_bounded_text(
3325    field: &str,
3326    value: &str,
3327    max_len: usize,
3328) -> Result<String, VectorRegenerationFailure> {
3329    let trimmed = value.trim();
3330    if trimmed.is_empty() {
3331        return Err(VectorRegenerationFailure::new(
3332            VectorRegenerationFailureClass::InvalidContract,
3333            format!("{field} must not be empty"),
3334        ));
3335    }
3336    if trimmed.len() > max_len {
3337        return Err(VectorRegenerationFailure::new(
3338            VectorRegenerationFailureClass::InvalidContract,
3339            format!("{field} exceeds max length {max_len}"),
3340        ));
3341    }
3342    Ok(trimmed.to_owned())
3343}
3344
3345fn validate_generator_command(
3346    command: &[String],
3347    policy: &VectorGeneratorPolicy,
3348) -> Result<Vec<String>, VectorRegenerationFailure> {
3349    if command.is_empty() {
3350        return Err(VectorRegenerationFailure::new(
3351            VectorRegenerationFailureClass::InvalidContract,
3352            "generator_command must contain at least one element".to_owned(),
3353        ));
3354    }
3355    let mut total_len = 0usize;
3356    for argument in command {
3357        if argument.is_empty() {
3358            return Err(VectorRegenerationFailure::new(
3359                VectorRegenerationFailureClass::InvalidContract,
3360                "generator_command entries must not be empty".to_owned(),
3361            ));
3362        }
3363        if argument.len() > MAX_GENERATOR_COMMAND_ARG_LEN {
3364            return Err(VectorRegenerationFailure::new(
3365                VectorRegenerationFailureClass::InvalidContract,
3366                format!(
3367                    "generator_command argument exceeds max length {MAX_GENERATOR_COMMAND_ARG_LEN}"
3368                ),
3369            ));
3370        }
3371        total_len += argument.len();
3372    }
3373    if total_len > MAX_GENERATOR_COMMAND_TOTAL_LEN {
3374        return Err(VectorRegenerationFailure::new(
3375            VectorRegenerationFailureClass::InvalidContract,
3376            format!(
3377                "generator_command exceeds max serialized length {MAX_GENERATOR_COMMAND_TOTAL_LEN}"
3378            ),
3379        ));
3380    }
3381    executable_trust::validate_generator_executable(&command[0], policy)?;
3382    Ok(command.to_vec())
3383}
3384
3385fn current_vector_profile_dimension(
3386    conn: &rusqlite::Connection,
3387    profile: &str,
3388) -> Result<Option<usize>, VectorRegenerationFailure> {
3389    let dimension: Option<i64> = conn
3390        .query_row(
3391            "SELECT dimension FROM vector_profiles WHERE profile = ?1 AND enabled = 1",
3392            [profile],
3393            |row| row.get(0),
3394        )
3395        .optional()
3396        .map_err(|error| {
3397            VectorRegenerationFailure::new(
3398                VectorRegenerationFailureClass::InvalidContract,
3399                error.to_string(),
3400            )
3401        })?;
3402    dimension
3403        .map(|value| {
3404            usize::try_from(value).map_err(|_| {
3405                VectorRegenerationFailure::new(
3406                    VectorRegenerationFailureClass::InvalidContract,
3407                    format!("stored vector profile dimension is invalid: {value}"),
3408                )
3409            })
3410        })
3411        .transpose()
3412}
3413
3414fn validate_existing_contract_version(
3415    conn: &rusqlite::Connection,
3416    profile: &str,
3417) -> Result<(), VectorRegenerationFailure> {
3418    let version: Option<i64> = conn
3419        .query_row(
3420            "SELECT contract_format_version FROM vector_embedding_contracts WHERE profile = ?1",
3421            [profile],
3422            |row| row.get(0),
3423        )
3424        .optional()
3425        .map_err(|error| {
3426            VectorRegenerationFailure::new(
3427                VectorRegenerationFailureClass::InvalidContract,
3428                error.to_string(),
3429            )
3430        })?;
3431    if let Some(version) = version
3432        && version > CURRENT_VECTOR_CONTRACT_FORMAT_VERSION
3433    {
3434        return Err(VectorRegenerationFailure::new(
3435            VectorRegenerationFailureClass::InvalidContract,
3436            format!(
3437                "persisted contract format version {version} is unsupported; supported version is {CURRENT_VECTOR_CONTRACT_FORMAT_VERSION}"
3438            ),
3439        ));
3440    }
3441    Ok(())
3442}
3443
3444fn serialize_audit_metadata(
3445    metadata: &VectorRegenerationAuditMetadata,
3446) -> Result<String, EngineError> {
3447    let json =
3448        serde_json::to_string(metadata).map_err(|error| EngineError::Bridge(error.to_string()))?;
3449    if json.len() > MAX_AUDIT_METADATA_BYTES {
3450        return Err(VectorRegenerationFailure::new(
3451            VectorRegenerationFailureClass::InvalidContract,
3452            format!("audit metadata exceeds {MAX_AUDIT_METADATA_BYTES} bytes"),
3453        )
3454        .to_engine_error());
3455    }
3456    Ok(json)
3457}
3458
3459#[allow(clippy::too_many_lines)]
3460fn run_vector_generator_bounded(
3461    config: &VectorRegenerationConfig,
3462    payload: &VectorRegenerationInput,
3463    policy: &VectorGeneratorPolicy,
3464) -> Result<GeneratedEmbeddings, VectorRegenerationFailure> {
3465    if payload.chunks.len() > policy.max_chunks {
3466        return Err(VectorRegenerationFailure::new(
3467            VectorRegenerationFailureClass::PayloadTooLarge,
3468            format!(
3469                "chunk count {} exceeds max_chunks {}",
3470                payload.chunks.len(),
3471                policy.max_chunks
3472            ),
3473        ));
3474    }
3475
3476    let input = serde_json::to_vec(payload).map_err(|error| {
3477        VectorRegenerationFailure::new(
3478            VectorRegenerationFailureClass::MalformedGeneratorJson,
3479            error.to_string(),
3480        )
3481    })?;
3482    if input.len() > policy.max_input_bytes {
3483        return Err(VectorRegenerationFailure::new(
3484            VectorRegenerationFailureClass::PayloadTooLarge,
3485            format!(
3486                "serialized input {} bytes exceeds max_input_bytes {}",
3487                input.len(),
3488                policy.max_input_bytes
3489            ),
3490        ));
3491    }
3492
3493    let mut command = Command::new(config.generator_command.first().ok_or_else(|| {
3494        VectorRegenerationFailure::new(
3495            VectorRegenerationFailureClass::InvalidContract,
3496            "missing generator executable",
3497        )
3498    })?);
3499    command.args(config.generator_command.iter().skip(1));
3500    command.stdin(Stdio::piped());
3501    command.stdout(Stdio::piped());
3502    command.stderr(Stdio::piped());
3503    command.env_clear();
3504    for env_var in &policy.preserve_env_vars {
3505        if let Some(value) = std::env::var_os(env_var) {
3506            command.env(env_var, value);
3507        }
3508    }
3509
3510    let mut child = command.spawn().map_err(|error| {
3511        VectorRegenerationFailure::new(
3512            VectorRegenerationFailureClass::GeneratorNonzeroExit,
3513            format!("failed to spawn generator: {error}"),
3514        )
3515    })?;
3516    if let Some(mut stdin) = child.stdin.take() {
3517        stdin.write_all(&input).map_err(|error| {
3518            VectorRegenerationFailure::new(
3519                VectorRegenerationFailureClass::GeneratorNonzeroExit,
3520                format!("failed to write generator stdin: {error}"),
3521            )
3522        })?;
3523    } else {
3524        return Err(VectorRegenerationFailure::new(
3525            VectorRegenerationFailureClass::GeneratorNonzeroExit,
3526            "failed to open generator stdin",
3527        ));
3528    }
3529
3530    let stdout = child.stdout.take().ok_or_else(|| {
3531        VectorRegenerationFailure::new(
3532            VectorRegenerationFailureClass::GeneratorNonzeroExit,
3533            "failed to open generator stdout",
3534        )
3535    })?;
3536    let stderr = child.stderr.take().ok_or_else(|| {
3537        VectorRegenerationFailure::new(
3538            VectorRegenerationFailureClass::GeneratorNonzeroExit,
3539            "failed to open generator stderr",
3540        )
3541    })?;
3542
3543    let (tx, rx) = mpsc::channel();
3544    let stdout_handle = spawn_capped_reader(
3545        stdout,
3546        policy.max_stdout_bytes,
3547        GeneratorStream::Stdout,
3548        tx.clone(),
3549    );
3550    let stderr_handle =
3551        spawn_capped_reader(stderr, policy.max_stderr_bytes, GeneratorStream::Stderr, tx);
3552
3553    let start = Instant::now();
3554    let timeout = Duration::from_millis(policy.timeout_ms);
3555    let mut stdout_bytes: Option<Vec<u8>> = None;
3556    let mut stderr_bytes: Option<Vec<u8>> = None;
3557    let mut status = None;
3558    let mut stream_error: Option<VectorRegenerationFailure> = None;
3559
3560    while status.is_none() && stream_error.is_none() {
3561        while let Ok((stream, result)) = rx.try_recv() {
3562            match (stream, result) {
3563                (GeneratorStream::Stdout, StreamReadResult::Complete(bytes)) => {
3564                    stdout_bytes = Some(bytes);
3565                }
3566                (GeneratorStream::Stderr, StreamReadResult::Complete(bytes)) => {
3567                    stderr_bytes = Some(bytes);
3568                }
3569                (GeneratorStream::Stdout, StreamReadResult::Overflow) => {
3570                    stream_error = Some(VectorRegenerationFailure::new(
3571                        VectorRegenerationFailureClass::GeneratorStdoutOverflow,
3572                        format!(
3573                            "stdout exceeded max_stdout_bytes {}",
3574                            policy.max_stdout_bytes
3575                        ),
3576                    ));
3577                }
3578                (GeneratorStream::Stderr, StreamReadResult::Overflow) => {
3579                    stream_error = Some(VectorRegenerationFailure::new(
3580                        VectorRegenerationFailureClass::GeneratorStderrOverflow,
3581                        format!(
3582                            "stderr exceeded max_stderr_bytes {}",
3583                            policy.max_stderr_bytes
3584                        ),
3585                    ));
3586                }
3587                (_, StreamReadResult::Io(error)) => {
3588                    stream_error = Some(VectorRegenerationFailure::new(
3589                        VectorRegenerationFailureClass::GeneratorNonzeroExit,
3590                        format!("failed to read generator stream: {error}"),
3591                    ));
3592                }
3593            }
3594        }
3595
3596        if stream_error.is_some() {
3597            let _ = child.kill();
3598            break;
3599        }
3600        if start.elapsed() > timeout {
3601            let _ = child.kill();
3602            stream_error = Some(VectorRegenerationFailure::new(
3603                VectorRegenerationFailureClass::GeneratorTimeout,
3604                format!("generator exceeded timeout after {}ms", policy.timeout_ms),
3605            ));
3606            break;
3607        }
3608        status = child.try_wait().map_err(|error| {
3609            VectorRegenerationFailure::new(
3610                VectorRegenerationFailureClass::GeneratorNonzeroExit,
3611                format!("failed to poll generator status: {error}"),
3612            )
3613        })?;
3614        if status.is_none() {
3615            thread::sleep(Duration::from_millis(10));
3616        }
3617    }
3618
3619    let _ = child.wait();
3620    let _ = stdout_handle.join();
3621    let _ = stderr_handle.join();
3622
3623    while let Ok((stream, result)) = rx.try_recv() {
3624        match (stream, result) {
3625            (GeneratorStream::Stdout, StreamReadResult::Complete(bytes)) => {
3626                stdout_bytes = Some(bytes);
3627            }
3628            (GeneratorStream::Stderr, StreamReadResult::Complete(bytes)) => {
3629                stderr_bytes = Some(bytes);
3630            }
3631            (GeneratorStream::Stdout, StreamReadResult::Overflow) => {
3632                stream_error = Some(VectorRegenerationFailure::new(
3633                    VectorRegenerationFailureClass::GeneratorStdoutOverflow,
3634                    format!(
3635                        "stdout exceeded max_stdout_bytes {}",
3636                        policy.max_stdout_bytes
3637                    ),
3638                ));
3639            }
3640            (GeneratorStream::Stderr, StreamReadResult::Overflow) => {
3641                stream_error = Some(VectorRegenerationFailure::new(
3642                    VectorRegenerationFailureClass::GeneratorStderrOverflow,
3643                    format!(
3644                        "stderr exceeded max_stderr_bytes {}",
3645                        policy.max_stderr_bytes
3646                    ),
3647                ));
3648            }
3649            (_, StreamReadResult::Io(error)) => {
3650                stream_error = Some(VectorRegenerationFailure::new(
3651                    VectorRegenerationFailureClass::GeneratorNonzeroExit,
3652                    format!("failed to read generator stream: {error}"),
3653                ));
3654            }
3655        }
3656    }
3657
3658    if let Some(error) = stream_error {
3659        return Err(error);
3660    }
3661
3662    let status = status.ok_or_else(|| {
3663        VectorRegenerationFailure::new(
3664            VectorRegenerationFailureClass::GeneratorNonzeroExit,
3665            "vector generator exited without a status",
3666        )
3667    })?;
3668    if !status.success() {
3669        let stderr =
3670            truncate_error_text(&stderr_bytes.unwrap_or_default(), policy.max_stderr_bytes);
3671        return Err(VectorRegenerationFailure::new(
3672            VectorRegenerationFailureClass::GeneratorNonzeroExit,
3673            stderr,
3674        ));
3675    }
3676
3677    let stdout = stdout_bytes.unwrap_or_default();
3678    serde_json::from_slice(&stdout).map_err(|error| {
3679        VectorRegenerationFailure::new(
3680            VectorRegenerationFailureClass::MalformedGeneratorJson,
3681            format!("decode generator output: {error}"),
3682        )
3683    })
3684}
3685
3686fn spawn_capped_reader<R: Read + Send + 'static>(
3687    mut reader: R,
3688    max_bytes: usize,
3689    stream: GeneratorStream,
3690    tx: mpsc::Sender<(GeneratorStream, StreamReadResult)>,
3691) -> thread::JoinHandle<()> {
3692    thread::spawn(move || {
3693        let mut buffer = Vec::new();
3694        let mut chunk = [0u8; 8192];
3695        loop {
3696            match reader.read(&mut chunk) {
3697                Ok(0) => {
3698                    let _ = tx.send((stream, StreamReadResult::Complete(buffer)));
3699                    break;
3700                }
3701                Ok(read_bytes) => {
3702                    if buffer.len() + read_bytes > max_bytes {
3703                        let _ = tx.send((stream, StreamReadResult::Overflow));
3704                        break;
3705                    }
3706                    buffer.extend_from_slice(&chunk[..read_bytes]);
3707                }
3708                Err(error) => {
3709                    let _ = tx.send((stream, StreamReadResult::Io(error)));
3710                    break;
3711                }
3712            }
3713        }
3714    })
3715}
3716
3717fn truncate_error_text(bytes: &[u8], max_bytes: usize) -> String {
3718    let mut text = String::from_utf8_lossy(bytes).into_owned();
3719    if bytes.len() > max_bytes {
3720        text.push_str(" [truncated]");
3721    }
3722    text
3723}
3724
3725fn count_source_ref(
3726    conn: &rusqlite::Connection,
3727    table: &str,
3728    source_ref: &str,
3729) -> Result<usize, EngineError> {
3730    let sql = match table {
3731        "nodes" => "SELECT count(*) FROM nodes WHERE source_ref = ?1",
3732        "edges" => "SELECT count(*) FROM edges WHERE source_ref = ?1",
3733        "actions" => "SELECT count(*) FROM actions WHERE source_ref = ?1",
3734        "operational_mutations" => {
3735            "SELECT count(*) FROM operational_mutations WHERE source_ref = ?1"
3736        }
3737        other => return Err(EngineError::Bridge(format!("unknown table: {other}"))),
3738    };
3739    let count: i64 = conn.query_row(sql, [source_ref], |row| row.get(0))?;
3740    // FIX(review): was `count as usize` — unsound cast.
3741    // Chose option (C) here: propagate error since this is a user-facing helper.
3742    usize::try_from(count)
3743        .map_err(|_| EngineError::Bridge(format!("count overflow for table {table}: {count}")))
3744}
3745
3746fn rebuild_operational_current_rows(
3747    tx: &rusqlite::Transaction<'_>,
3748    collections: &[String],
3749) -> Result<usize, EngineError> {
3750    let mut rebuilt_rows = 0usize;
3751    clear_operational_current_rows(tx, collections)?;
3752    let mut ins_current = tx.prepare_cached(
3753        "INSERT INTO operational_current \
3754         (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
3755         VALUES (?1, ?2, ?3, ?4, ?5)",
3756    )?;
3757
3758    for collection in collections {
3759        let mut stmt = tx.prepare(
3760            "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
3761             FROM operational_mutations \
3762             WHERE collection_name = ?1 \
3763             ORDER BY record_key, mutation_order",
3764        )?;
3765        let mut latest_by_key: std::collections::HashMap<String, Option<(String, i64, String)>> =
3766            std::collections::HashMap::new();
3767        let rows = stmt.query_map([collection], map_operational_mutation_row)?;
3768        for row in rows {
3769            let mutation = row?;
3770            match mutation.op_kind.as_str() {
3771                "put" => {
3772                    latest_by_key.insert(
3773                        mutation.record_key,
3774                        Some((mutation.payload_json, mutation.created_at, mutation.id)),
3775                    );
3776                }
3777                "delete" => {
3778                    latest_by_key.insert(mutation.record_key, None);
3779                }
3780                _ => {}
3781            }
3782        }
3783
3784        for (record_key, state) in latest_by_key {
3785            if let Some((payload_json, updated_at, last_mutation_id)) = state {
3786                ins_current.execute(rusqlite::params![
3787                    collection,
3788                    record_key,
3789                    payload_json,
3790                    updated_at,
3791                    last_mutation_id,
3792                ])?;
3793                rebuilt_rows += 1;
3794            }
3795        }
3796    }
3797
3798    drop(ins_current);
3799    Ok(rebuilt_rows)
3800}
3801
3802fn clear_operational_current_rows(
3803    tx: &rusqlite::Transaction<'_>,
3804    collections: &[String],
3805) -> Result<(), EngineError> {
3806    let mut delete_current =
3807        tx.prepare_cached("DELETE FROM operational_current WHERE collection_name = ?1")?;
3808    let mut delete_secondary_current = tx.prepare_cached(
3809        "DELETE FROM operational_secondary_index_entries \
3810         WHERE collection_name = ?1 AND subject_kind = 'current'",
3811    )?;
3812    for collection in collections {
3813        delete_secondary_current.execute([collection])?;
3814        delete_current.execute([collection])?;
3815    }
3816    drop(delete_secondary_current);
3817    drop(delete_current);
3818    Ok(())
3819}
3820
3821fn clear_operational_secondary_index_entries(
3822    tx: &rusqlite::Transaction<'_>,
3823    collection_name: &str,
3824) -> Result<(), EngineError> {
3825    tx.execute(
3826        "DELETE FROM operational_secondary_index_entries WHERE collection_name = ?1",
3827        [collection_name],
3828    )?;
3829    Ok(())
3830}
3831
3832fn insert_operational_secondary_index_entry(
3833    tx: &rusqlite::Transaction<'_>,
3834    collection_name: &str,
3835    subject_kind: &str,
3836    mutation_id: &str,
3837    record_key: &str,
3838    entry: &crate::operational::OperationalSecondaryIndexEntry,
3839) -> Result<(), EngineError> {
3840    tx.execute(
3841        "INSERT INTO operational_secondary_index_entries \
3842         (collection_name, index_name, subject_kind, mutation_id, record_key, sort_timestamp, \
3843          slot1_text, slot1_integer, slot2_text, slot2_integer, slot3_text, slot3_integer) \
3844         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
3845        rusqlite::params![
3846            collection_name,
3847            entry.index_name,
3848            subject_kind,
3849            mutation_id,
3850            record_key,
3851            entry.sort_timestamp,
3852            entry.slot1_text,
3853            entry.slot1_integer,
3854            entry.slot2_text,
3855            entry.slot2_integer,
3856            entry.slot3_text,
3857            entry.slot3_integer,
3858        ],
3859    )?;
3860    Ok(())
3861}
3862
3863fn rebuild_operational_secondary_index_entries(
3864    tx: &rusqlite::Transaction<'_>,
3865    collection_name: &str,
3866    collection_kind: OperationalCollectionKind,
3867    indexes: &[OperationalSecondaryIndexDefinition],
3868) -> Result<(usize, usize), EngineError> {
3869    clear_operational_secondary_index_entries(tx, collection_name)?;
3870
3871    let mut mutation_entries_rebuilt = 0usize;
3872    if collection_kind == OperationalCollectionKind::AppendOnlyLog {
3873        let mut stmt = tx.prepare(
3874            "SELECT id, record_key, payload_json FROM operational_mutations \
3875             WHERE collection_name = ?1 ORDER BY mutation_order",
3876        )?;
3877        let rows = stmt
3878            .query_map([collection_name], |row| {
3879                Ok((
3880                    row.get::<_, String>(0)?,
3881                    row.get::<_, String>(1)?,
3882                    row.get::<_, String>(2)?,
3883                ))
3884            })?
3885            .collect::<Result<Vec<_>, _>>()?;
3886        drop(stmt);
3887        for (mutation_id, record_key, payload_json) in rows {
3888            for entry in extract_secondary_index_entries_for_mutation(indexes, &payload_json) {
3889                insert_operational_secondary_index_entry(
3890                    tx,
3891                    collection_name,
3892                    "mutation",
3893                    &mutation_id,
3894                    &record_key,
3895                    &entry,
3896                )?;
3897                mutation_entries_rebuilt += 1;
3898            }
3899        }
3900    }
3901
3902    let mut current_entries_rebuilt = 0usize;
3903    if collection_kind == OperationalCollectionKind::LatestState {
3904        let mut stmt = tx.prepare(
3905            "SELECT record_key, payload_json, updated_at, last_mutation_id FROM operational_current \
3906             WHERE collection_name = ?1 ORDER BY updated_at DESC, record_key",
3907        )?;
3908        let rows = stmt
3909            .query_map([collection_name], |row| {
3910                Ok((
3911                    row.get::<_, String>(0)?,
3912                    row.get::<_, String>(1)?,
3913                    row.get::<_, i64>(2)?,
3914                    row.get::<_, String>(3)?,
3915                ))
3916            })?
3917            .collect::<Result<Vec<_>, _>>()?;
3918        drop(stmt);
3919        for (record_key, payload_json, updated_at, last_mutation_id) in rows {
3920            for entry in
3921                extract_secondary_index_entries_for_current(indexes, &payload_json, updated_at)
3922            {
3923                insert_operational_secondary_index_entry(
3924                    tx,
3925                    collection_name,
3926                    "current",
3927                    &last_mutation_id,
3928                    &record_key,
3929                    &entry,
3930                )?;
3931                current_entries_rebuilt += 1;
3932            }
3933        }
3934    }
3935
3936    Ok((mutation_entries_rebuilt, current_entries_rebuilt))
3937}
3938
3939fn collect_strings_tx(
3940    tx: &rusqlite::Transaction<'_>,
3941    sql: &str,
3942    value: &str,
3943) -> Result<Vec<String>, EngineError> {
3944    let mut stmt = tx.prepare(sql)?;
3945    let rows = stmt.query_map([value], |row| row.get::<_, String>(0))?;
3946    rows.collect::<Result<Vec<_>, _>>()
3947        .map_err(EngineError::from)
3948}
3949
3950/// Convert a non-negative i64 count to usize, panicking on negative values
3951/// which would indicate data corruption.
3952#[allow(clippy::expect_used)]
3953fn i64_to_usize(val: i64) -> usize {
3954    usize::try_from(val).expect("count(*) must be non-negative")
3955}
3956
3957/// Runs a parameterized query and collects the first column as strings.
3958///
3959/// NOTE(review): sql parameter must be a hardcoded query string, never user input.
3960/// Options: (A) doc comment, (B) whitelist refactor like `count_source_ref`, (C) leave as-is.
3961/// Chose (A): function is private, only called with hardcoded SQL from `trace_source`.
3962/// Whitelist refactor not practical — queries have different SELECT/ORDER BY per table.
3963fn collect_strings(
3964    conn: &rusqlite::Connection,
3965    sql: &str,
3966    param: &str,
3967) -> Result<Vec<String>, EngineError> {
3968    let mut stmt = conn.prepare(sql)?;
3969    let values = stmt
3970        .query_map([param], |row| row.get::<_, String>(0))?
3971        .collect::<Result<Vec<_>, _>>()?;
3972    Ok(values)
3973}
3974
3975fn collect_edge_logical_ids_for_restore(
3976    tx: &rusqlite::Transaction<'_>,
3977    logical_id: &str,
3978    retire_source_ref: Option<&str>,
3979    retire_created_at: i64,
3980    retire_event_rowid: i64,
3981) -> Result<Vec<String>, EngineError> {
3982    let mut stmt = tx.prepare(
3983        "SELECT DISTINCT e.logical_id \
3984         FROM edges e \
3985         JOIN provenance_events p \
3986           ON p.subject = e.logical_id \
3987          AND p.event_type = 'edge_retire' \
3988          AND ( \
3989                p.created_at > ?3 \
3990                OR (p.created_at = ?3 AND p.rowid >= ?4) \
3991          ) \
3992          AND ((?2 IS NULL AND p.source_ref IS NULL) OR p.source_ref = ?2) \
3993         WHERE e.superseded_at IS NOT NULL \
3994           AND (e.source_logical_id = ?1 OR e.target_logical_id = ?1) \
3995           AND NOT EXISTS ( \
3996                SELECT 1 FROM edges active \
3997                WHERE active.logical_id = e.logical_id \
3998                  AND active.superseded_at IS NULL \
3999           ) \
4000         ORDER BY e.logical_id",
4001    )?;
4002    let edge_ids = stmt
4003        .query_map(
4004            rusqlite::params![
4005                logical_id,
4006                retire_source_ref,
4007                retire_created_at,
4008                retire_event_rowid
4009            ],
4010            |row| row.get::<_, String>(0),
4011        )?
4012        .collect::<Result<Vec<_>, _>>()?;
4013    Ok(edge_ids)
4014}
4015
4016/// Restores edges for a node being restored, skipping any whose counterpart
4017/// endpoint is not active (e.g. still retired or purged).
4018fn restore_validated_edges(
4019    tx: &rusqlite::Transaction<'_>,
4020    logical_id: &str,
4021    retire_source_ref: Option<&str>,
4022    retire_created_at: i64,
4023    retire_event_rowid: i64,
4024) -> Result<(usize, Vec<SkippedEdge>), EngineError> {
4025    let edge_logical_ids = collect_edge_logical_ids_for_restore(
4026        tx,
4027        logical_id,
4028        retire_source_ref,
4029        retire_created_at,
4030        retire_event_rowid,
4031    )?;
4032    let mut restored = 0usize;
4033    let mut skipped = Vec::new();
4034    for edge_logical_id in &edge_logical_ids {
4035        let edge_detail: Option<(String, String, String)> = tx
4036            .query_row(
4037                "SELECT row_id, source_logical_id, target_logical_id FROM edges \
4038                 WHERE logical_id = ?1 AND superseded_at IS NOT NULL \
4039                 ORDER BY superseded_at DESC, created_at DESC, rowid DESC LIMIT 1",
4040                [edge_logical_id.as_str()],
4041                |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
4042            )
4043            .optional()?;
4044        let Some((edge_row_id, source_lid, target_lid)) = edge_detail else {
4045            continue;
4046        };
4047        let other_endpoint = if source_lid == logical_id {
4048            &target_lid
4049        } else {
4050            &source_lid
4051        };
4052        let endpoint_active: bool = tx
4053            .query_row(
4054                "SELECT 1 FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL LIMIT 1",
4055                [other_endpoint.as_str()],
4056                |_| Ok(true),
4057            )
4058            .optional()?
4059            .unwrap_or(false);
4060        if !endpoint_active {
4061            skipped.push(SkippedEdge {
4062                edge_logical_id: edge_logical_id.clone(),
4063                missing_endpoint: other_endpoint.clone(),
4064            });
4065            continue;
4066        }
4067        restored += tx.execute(
4068            "UPDATE edges SET superseded_at = NULL WHERE row_id = ?1",
4069            [edge_row_id.as_str()],
4070        )?;
4071    }
4072    Ok((restored, skipped))
4073}
4074
4075#[cfg(feature = "sqlite-vec")]
4076fn count_vec_rows_for_logical_id(
4077    tx: &rusqlite::Transaction<'_>,
4078    logical_id: &str,
4079) -> Result<usize, EngineError> {
4080    match tx.query_row(
4081        "SELECT count(*) FROM vec_nodes_active v \
4082         JOIN chunks c ON c.id = v.chunk_id \
4083         WHERE c.node_logical_id = ?1",
4084        [logical_id],
4085        |row| row.get::<_, i64>(0),
4086    ) {
4087        Ok(count) => Ok(i64_to_usize(count)),
4088        Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
4089            if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
4090        {
4091            Ok(0)
4092        }
4093        Err(error) => Err(EngineError::Sqlite(error)),
4094    }
4095}
4096
4097#[cfg(not(feature = "sqlite-vec"))]
4098#[allow(clippy::unnecessary_wraps)]
4099fn count_vec_rows_for_logical_id(
4100    _tx: &rusqlite::Transaction<'_>,
4101    _logical_id: &str,
4102) -> Result<usize, EngineError> {
4103    Ok(0)
4104}
4105
4106#[cfg(feature = "sqlite-vec")]
4107fn delete_vec_rows_for_logical_id(
4108    tx: &rusqlite::Transaction<'_>,
4109    logical_id: &str,
4110) -> Result<usize, EngineError> {
4111    match tx.execute(
4112        "DELETE FROM vec_nodes_active \
4113         WHERE chunk_id IN (SELECT id FROM chunks WHERE node_logical_id = ?1)",
4114        [logical_id],
4115    ) {
4116        Ok(count) => Ok(count),
4117        Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
4118            if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
4119        {
4120            Ok(0)
4121        }
4122        Err(error) => Err(EngineError::Sqlite(error)),
4123    }
4124}
4125
4126#[cfg(not(feature = "sqlite-vec"))]
4127#[allow(clippy::unnecessary_wraps)]
4128fn delete_vec_rows_for_logical_id(
4129    _tx: &rusqlite::Transaction<'_>,
4130    _logical_id: &str,
4131) -> Result<usize, EngineError> {
4132    Ok(0)
4133}
4134
4135fn ensure_operational_collection_registered(
4136    conn: &rusqlite::Connection,
4137    collection_name: &str,
4138) -> Result<(), EngineError> {
4139    if load_operational_collection_record(conn, collection_name)?.is_none() {
4140        return Err(EngineError::InvalidWrite(format!(
4141            "operational collection '{collection_name}' is not registered"
4142        )));
4143    }
4144    Ok(())
4145}
4146
4147fn load_operational_collection_record(
4148    conn: &rusqlite::Connection,
4149    name: &str,
4150) -> Result<Option<OperationalCollectionRecord>, EngineError> {
4151    conn.query_row(
4152        "SELECT name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at, disabled_at \
4153         FROM operational_collections WHERE name = ?1",
4154        [name],
4155        map_operational_collection_row,
4156    )
4157    .optional()
4158    .map_err(EngineError::Sqlite)
4159}
4160
4161fn validate_append_only_operational_collection(
4162    record: &OperationalCollectionRecord,
4163    operation: &str,
4164) -> Result<(), EngineError> {
4165    if record.kind != OperationalCollectionKind::AppendOnlyLog {
4166        return Err(EngineError::InvalidWrite(format!(
4167            "operational collection '{}' must be append_only_log to {operation}",
4168            record.name
4169        )));
4170    }
4171    Ok(())
4172}
4173
4174#[derive(Clone, Debug, PartialEq, Eq)]
4175struct CompiledOperationalReadFilter {
4176    field: String,
4177    condition: OperationalReadCondition,
4178}
4179
4180#[derive(Clone, Debug)]
4181struct MatchedAppendOnlySecondaryIndexRead<'a> {
4182    index_name: &'a str,
4183    value_filter: &'a CompiledOperationalReadFilter,
4184    time_range: Option<&'a CompiledOperationalReadFilter>,
4185}
4186
4187#[derive(Clone, Debug, PartialEq, Eq)]
4188enum OperationalReadCondition {
4189    ExactString(String),
4190    ExactInteger(i64),
4191    Prefix(String),
4192    Range {
4193        lower: Option<i64>,
4194        upper: Option<i64>,
4195    },
4196}
4197
4198fn operational_read_limit(limit: Option<usize>) -> Result<usize, EngineError> {
4199    let applied_limit = limit.unwrap_or(DEFAULT_OPERATIONAL_READ_LIMIT);
4200    if applied_limit == 0 {
4201        return Err(EngineError::InvalidWrite(
4202            "operational read limit must be greater than zero".to_owned(),
4203        ));
4204    }
4205    Ok(applied_limit.min(MAX_OPERATIONAL_READ_LIMIT))
4206}
4207
4208fn parse_operational_filter_fields(
4209    filter_fields_json: &str,
4210) -> Result<Vec<OperationalFilterField>, String> {
4211    let fields: Vec<OperationalFilterField> = serde_json::from_str(filter_fields_json)
4212        .map_err(|error| format!("invalid filter_fields_json: {error}"))?;
4213    let mut seen = std::collections::HashSet::new();
4214    for field in &fields {
4215        if field.name.trim().is_empty() {
4216            return Err("filter_fields_json field names must not be empty".to_owned());
4217        }
4218        if !seen.insert(field.name.as_str()) {
4219            return Err(format!(
4220                "filter_fields_json contains duplicate field '{}'",
4221                field.name
4222            ));
4223        }
4224        if field.modes.is_empty() {
4225            return Err(format!(
4226                "filter_fields_json field '{}' must declare at least one mode",
4227                field.name
4228            ));
4229        }
4230        if field.modes.contains(&OperationalFilterMode::Prefix)
4231            && field.field_type != OperationalFilterFieldType::String
4232        {
4233            return Err(format!(
4234                "filter field '{}' only supports prefix for string types",
4235                field.name
4236            ));
4237        }
4238    }
4239    Ok(fields)
4240}
4241
4242fn compile_operational_read_filters(
4243    filters: &[OperationalFilterClause],
4244    declared_fields: &[OperationalFilterField],
4245) -> Result<Vec<CompiledOperationalReadFilter>, EngineError> {
4246    let field_map = declared_fields
4247        .iter()
4248        .map(|field| (field.name.as_str(), field))
4249        .collect::<std::collections::HashMap<_, _>>();
4250    filters
4251        .iter()
4252        .map(|filter| match filter {
4253            OperationalFilterClause::Exact { field, value } => {
4254                let declared = field_map.get(field.as_str()).ok_or_else(|| {
4255                    EngineError::InvalidWrite(format!(
4256                        "operational read filter uses undeclared field '{field}'"
4257                    ))
4258                })?;
4259                if !declared.modes.contains(&OperationalFilterMode::Exact) {
4260                    return Err(EngineError::InvalidWrite(format!(
4261                        "operational read field '{field}' does not allow exact filters"
4262                    )));
4263                }
4264                let condition = match (declared.field_type, value) {
4265                    (OperationalFilterFieldType::String, OperationalFilterValue::String(value)) => {
4266                        OperationalReadCondition::ExactString(value.clone())
4267                    }
4268                    (
4269                        OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp,
4270                        OperationalFilterValue::Integer(value),
4271                    ) => OperationalReadCondition::ExactInteger(*value),
4272                    _ => {
4273                        return Err(EngineError::InvalidWrite(format!(
4274                            "operational read field '{field}' received a value with the wrong type"
4275                        )));
4276                    }
4277                };
4278                Ok(CompiledOperationalReadFilter {
4279                    field: field.clone(),
4280                    condition,
4281                })
4282            }
4283            OperationalFilterClause::Prefix { field, value } => {
4284                let declared = field_map.get(field.as_str()).ok_or_else(|| {
4285                    EngineError::InvalidWrite(format!(
4286                        "operational read filter uses undeclared field '{field}'"
4287                    ))
4288                })?;
4289                if !declared.modes.contains(&OperationalFilterMode::Prefix) {
4290                    return Err(EngineError::InvalidWrite(format!(
4291                        "operational read field '{field}' does not allow prefix filters"
4292                    )));
4293                }
4294                if declared.field_type != OperationalFilterFieldType::String {
4295                    return Err(EngineError::InvalidWrite(format!(
4296                        "operational read field '{field}' only supports prefix filters for strings"
4297                    )));
4298                }
4299                Ok(CompiledOperationalReadFilter {
4300                    field: field.clone(),
4301                    condition: OperationalReadCondition::Prefix(value.clone()),
4302                })
4303            }
4304            OperationalFilterClause::Range {
4305                field,
4306                lower,
4307                upper,
4308            } => {
4309                let declared = field_map.get(field.as_str()).ok_or_else(|| {
4310                    EngineError::InvalidWrite(format!(
4311                        "operational read filter uses undeclared field '{field}'"
4312                    ))
4313                })?;
4314                if !declared.modes.contains(&OperationalFilterMode::Range) {
4315                    return Err(EngineError::InvalidWrite(format!(
4316                        "operational read field '{field}' does not allow range filters"
4317                    )));
4318                }
4319                if !matches!(
4320                    declared.field_type,
4321                    OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp
4322                ) {
4323                    return Err(EngineError::InvalidWrite(format!(
4324                        "operational read field '{field}' only supports range filters for integer/timestamp fields"
4325                    )));
4326                }
4327                if lower.is_none() && upper.is_none() {
4328                    return Err(EngineError::InvalidWrite(format!(
4329                        "operational read range filter for '{field}' must specify a lower or upper bound"
4330                    )));
4331                }
4332                Ok(CompiledOperationalReadFilter {
4333                    field: field.clone(),
4334                    condition: OperationalReadCondition::Range {
4335                        lower: *lower,
4336                        upper: *upper,
4337                    },
4338                })
4339            }
4340        })
4341        .collect()
4342}
4343
4344fn match_append_only_secondary_index_read<'a>(
4345    filters: &'a [CompiledOperationalReadFilter],
4346    indexes: &'a [OperationalSecondaryIndexDefinition],
4347) -> Option<MatchedAppendOnlySecondaryIndexRead<'a>> {
4348    indexes.iter().find_map(|index| {
4349        let OperationalSecondaryIndexDefinition::AppendOnlyFieldTime {
4350            name,
4351            field,
4352            value_type,
4353            time_field,
4354        } = index
4355        else {
4356            return None;
4357        };
4358        if !(1..=2).contains(&filters.len()) {
4359            return None;
4360        }
4361
4362        let mut value_filter = None;
4363        let mut time_range = None;
4364        for filter in filters {
4365            if filter.field == *field {
4366                let supported = matches!(
4367                    (&filter.condition, value_type),
4368                    (
4369                        OperationalReadCondition::ExactString(_)
4370                            | OperationalReadCondition::Prefix(_),
4371                        crate::operational::OperationalSecondaryIndexValueType::String
4372                    ) | (
4373                        OperationalReadCondition::ExactInteger(_),
4374                        crate::operational::OperationalSecondaryIndexValueType::Integer
4375                            | crate::operational::OperationalSecondaryIndexValueType::Timestamp
4376                    )
4377                );
4378                if !supported || value_filter.is_some() {
4379                    return None;
4380                }
4381                value_filter = Some(filter);
4382                continue;
4383            }
4384            if filter.field == *time_field {
4385                if !matches!(filter.condition, OperationalReadCondition::Range { .. })
4386                    || time_range.is_some()
4387                {
4388                    return None;
4389                }
4390                time_range = Some(filter);
4391                continue;
4392            }
4393            return None;
4394        }
4395
4396        value_filter.map(|value_filter| MatchedAppendOnlySecondaryIndexRead {
4397            index_name: name.as_str(),
4398            value_filter,
4399            time_range,
4400        })
4401    })
4402}
4403
4404fn execute_operational_secondary_index_read(
4405    conn: &rusqlite::Connection,
4406    collection_name: &str,
4407    filters: &[CompiledOperationalReadFilter],
4408    indexes: &[OperationalSecondaryIndexDefinition],
4409    applied_limit: usize,
4410) -> Result<Option<OperationalReadReport>, EngineError> {
4411    use rusqlite::types::Value;
4412
4413    let Some(matched) = match_append_only_secondary_index_read(filters, indexes) else {
4414        return Ok(None);
4415    };
4416
4417    let mut sql = String::from(
4418        "SELECT m.id, m.collection_name, m.record_key, m.op_kind, m.payload_json, m.source_ref, m.created_at \
4419         FROM operational_secondary_index_entries s \
4420         JOIN operational_mutations m ON m.id = s.mutation_id \
4421         WHERE s.collection_name = ?1 AND s.index_name = ?2 AND s.subject_kind = 'mutation' ",
4422    );
4423    let mut params = vec![
4424        Value::from(collection_name.to_owned()),
4425        Value::from(matched.index_name.to_owned()),
4426    ];
4427
4428    match &matched.value_filter.condition {
4429        OperationalReadCondition::ExactString(value) => {
4430            let _ = write!(sql, "AND s.slot1_text = ?{} ", params.len() + 1);
4431            params.push(Value::from(value.clone()));
4432        }
4433        OperationalReadCondition::Prefix(value) => {
4434            let _ = write!(sql, "AND s.slot1_text GLOB ?{} ", params.len() + 1);
4435            params.push(Value::from(glob_prefix_pattern(value)));
4436        }
4437        OperationalReadCondition::ExactInteger(value) => {
4438            let _ = write!(sql, "AND s.slot1_integer = ?{} ", params.len() + 1);
4439            params.push(Value::from(*value));
4440        }
4441        OperationalReadCondition::Range { .. } => return Ok(None),
4442    }
4443
4444    if let Some(time_range) = matched.time_range
4445        && let OperationalReadCondition::Range { lower, upper } = &time_range.condition
4446    {
4447        if let Some(lower) = lower {
4448            let _ = write!(sql, "AND s.sort_timestamp >= ?{} ", params.len() + 1);
4449            params.push(Value::from(*lower));
4450        }
4451        if let Some(upper) = upper {
4452            let _ = write!(sql, "AND s.sort_timestamp <= ?{} ", params.len() + 1);
4453            params.push(Value::from(*upper));
4454        }
4455    }
4456
4457    let _ = write!(
4458        sql,
4459        "ORDER BY s.sort_timestamp DESC, m.mutation_order DESC LIMIT ?{}",
4460        params.len() + 1
4461    );
4462    params.push(Value::from(i64::try_from(applied_limit + 1).map_err(
4463        |_| EngineError::Bridge("operational read limit overflow".to_owned()),
4464    )?));
4465
4466    let mut stmt = conn.prepare(&sql)?;
4467    let mut rows = stmt
4468        .query_map(
4469            rusqlite::params_from_iter(params),
4470            map_operational_mutation_row,
4471        )?
4472        .collect::<Result<Vec<_>, _>>()?;
4473    let was_limited = rows.len() > applied_limit;
4474    if was_limited {
4475        rows.truncate(applied_limit);
4476    }
4477
4478    Ok(Some(OperationalReadReport {
4479        collection_name: collection_name.to_owned(),
4480        row_count: rows.len(),
4481        applied_limit,
4482        was_limited,
4483        rows,
4484    }))
4485}
4486
4487fn execute_operational_filtered_read(
4488    conn: &rusqlite::Connection,
4489    collection_name: &str,
4490    filters: &[CompiledOperationalReadFilter],
4491    applied_limit: usize,
4492) -> Result<OperationalReadReport, EngineError> {
4493    use rusqlite::types::Value;
4494
4495    let mut sql = String::from(
4496        "SELECT m.id, m.collection_name, m.record_key, m.op_kind, m.payload_json, m.source_ref, m.created_at \
4497         FROM operational_mutations m ",
4498    );
4499    let mut params = vec![Value::from(collection_name.to_owned())];
4500    for (index, filter) in filters.iter().enumerate() {
4501        let _ = write!(
4502            sql,
4503            "JOIN operational_filter_values f{index} \
4504             ON f{index}.mutation_id = m.id \
4505            AND f{index}.collection_name = m.collection_name "
4506        );
4507        match &filter.condition {
4508            OperationalReadCondition::ExactString(value) => {
4509                let _ = write!(
4510                    sql,
4511                    "AND f{index}.field_name = ?{} AND f{index}.string_value = ?{} ",
4512                    params.len() + 1,
4513                    params.len() + 2
4514                );
4515                params.push(Value::from(filter.field.clone()));
4516                params.push(Value::from(value.clone()));
4517            }
4518            OperationalReadCondition::ExactInteger(value) => {
4519                let _ = write!(
4520                    sql,
4521                    "AND f{index}.field_name = ?{} AND f{index}.integer_value = ?{} ",
4522                    params.len() + 1,
4523                    params.len() + 2
4524                );
4525                params.push(Value::from(filter.field.clone()));
4526                params.push(Value::from(*value));
4527            }
4528            OperationalReadCondition::Prefix(value) => {
4529                let _ = write!(
4530                    sql,
4531                    "AND f{index}.field_name = ?{} AND f{index}.string_value GLOB ?{} ",
4532                    params.len() + 1,
4533                    params.len() + 2
4534                );
4535                params.push(Value::from(filter.field.clone()));
4536                params.push(Value::from(glob_prefix_pattern(value)));
4537            }
4538            OperationalReadCondition::Range { lower, upper } => {
4539                let _ = write!(sql, "AND f{index}.field_name = ?{} ", params.len() + 1);
4540                params.push(Value::from(filter.field.clone()));
4541                if let Some(lower) = lower {
4542                    let _ = write!(sql, "AND f{index}.integer_value >= ?{} ", params.len() + 1);
4543                    params.push(Value::from(*lower));
4544                }
4545                if let Some(upper) = upper {
4546                    let _ = write!(sql, "AND f{index}.integer_value <= ?{} ", params.len() + 1);
4547                    params.push(Value::from(*upper));
4548                }
4549            }
4550        }
4551    }
4552    let _ = write!(
4553        sql,
4554        "WHERE m.collection_name = ?1 ORDER BY m.mutation_order DESC LIMIT ?{}",
4555        params.len() + 1
4556    );
4557    params.push(Value::from(i64::try_from(applied_limit + 1).map_err(
4558        |_| EngineError::Bridge("operational read limit overflow".to_owned()),
4559    )?));
4560
4561    let mut stmt = conn.prepare(&sql)?;
4562    let mut rows = stmt
4563        .query_map(
4564            rusqlite::params_from_iter(params),
4565            map_operational_mutation_row,
4566        )?
4567        .collect::<Result<Vec<_>, _>>()?;
4568    let was_limited = rows.len() > applied_limit;
4569    if was_limited {
4570        rows.truncate(applied_limit);
4571    }
4572    Ok(OperationalReadReport {
4573        collection_name: collection_name.to_owned(),
4574        row_count: rows.len(),
4575        applied_limit,
4576        was_limited,
4577        rows,
4578    })
4579}
4580
4581fn glob_prefix_pattern(value: &str) -> String {
4582    let mut pattern = String::with_capacity(value.len() + 1);
4583    for ch in value.chars() {
4584        match ch {
4585            '*' => pattern.push_str("[*]"),
4586            '?' => pattern.push_str("[?]"),
4587            '[' => pattern.push_str("[[]"),
4588            _ => pattern.push(ch),
4589        }
4590    }
4591    pattern.push('*');
4592    pattern
4593}
4594
4595#[derive(Clone, Debug, PartialEq, Eq)]
4596struct ExtractedOperationalFilterValue {
4597    field_name: String,
4598    string_value: Option<String>,
4599    integer_value: Option<i64>,
4600}
4601
4602fn extract_operational_filter_values(
4603    filter_fields: &[OperationalFilterField],
4604    payload_json: &str,
4605) -> Vec<ExtractedOperationalFilterValue> {
4606    let Ok(parsed) = serde_json::from_str::<serde_json::Value>(payload_json) else {
4607        return Vec::new();
4608    };
4609    let Some(object) = parsed.as_object() else {
4610        return Vec::new();
4611    };
4612
4613    filter_fields
4614        .iter()
4615        .filter_map(|field| {
4616            let value = object.get(&field.name)?;
4617            match field.field_type {
4618                OperationalFilterFieldType::String => {
4619                    value
4620                        .as_str()
4621                        .map(|string_value| ExtractedOperationalFilterValue {
4622                            field_name: field.name.clone(),
4623                            string_value: Some(string_value.to_owned()),
4624                            integer_value: None,
4625                        })
4626                }
4627                OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp => {
4628                    value
4629                        .as_i64()
4630                        .map(|integer_value| ExtractedOperationalFilterValue {
4631                            field_name: field.name.clone(),
4632                            string_value: None,
4633                            integer_value: Some(integer_value),
4634                        })
4635                }
4636            }
4637        })
4638        .collect()
4639}
4640
4641fn operational_compaction_candidates(
4642    conn: &rusqlite::Connection,
4643    retention_json: &str,
4644    collection_name: &str,
4645) -> Result<(Vec<String>, Option<i64>), EngineError> {
4646    operational_compaction_candidates_at(
4647        conn,
4648        retention_json,
4649        collection_name,
4650        current_unix_timestamp()?,
4651    )
4652}
4653
4654fn operational_compaction_candidates_at(
4655    conn: &rusqlite::Connection,
4656    retention_json: &str,
4657    collection_name: &str,
4658    now_timestamp: i64,
4659) -> Result<(Vec<String>, Option<i64>), EngineError> {
4660    let policy = parse_operational_retention_policy(retention_json)?;
4661    match policy {
4662        OperationalRetentionPolicy::KeepAll => Ok((Vec::new(), None)),
4663        OperationalRetentionPolicy::PurgeBeforeSeconds { max_age_seconds } => {
4664            let before_timestamp = now_timestamp - max_age_seconds;
4665            let mut stmt = conn.prepare(
4666                "SELECT id FROM operational_mutations \
4667                 WHERE collection_name = ?1 AND created_at < ?2 \
4668                 ORDER BY mutation_order",
4669            )?;
4670            let mutation_ids = stmt
4671                .query_map(
4672                    rusqlite::params![collection_name, before_timestamp],
4673                    |row| row.get::<_, String>(0),
4674                )?
4675                .collect::<Result<Vec<_>, _>>()?;
4676            Ok((mutation_ids, Some(before_timestamp)))
4677        }
4678        OperationalRetentionPolicy::KeepLast { max_rows } => {
4679            let mut stmt = conn.prepare(
4680                "SELECT id FROM operational_mutations \
4681                 WHERE collection_name = ?1 \
4682                 ORDER BY mutation_order DESC",
4683            )?;
4684            let ordered_ids = stmt
4685                .query_map([collection_name], |row| row.get::<_, String>(0))?
4686                .collect::<Result<Vec<_>, _>>()?;
4687            Ok((ordered_ids.into_iter().skip(max_rows).collect(), None))
4688        }
4689    }
4690}
4691
4692fn parse_operational_retention_policy(
4693    retention_json: &str,
4694) -> Result<OperationalRetentionPolicy, EngineError> {
4695    let policy: OperationalRetentionPolicy = serde_json::from_str(retention_json)
4696        .map_err(|error| EngineError::InvalidWrite(format!("invalid retention_json: {error}")))?;
4697    match policy {
4698        OperationalRetentionPolicy::KeepAll => Ok(policy),
4699        OperationalRetentionPolicy::PurgeBeforeSeconds { max_age_seconds } => {
4700            if max_age_seconds <= 0 {
4701                return Err(EngineError::InvalidWrite(
4702                    "retention_json max_age_seconds must be greater than zero".to_owned(),
4703                ));
4704            }
4705            Ok(policy)
4706        }
4707        OperationalRetentionPolicy::KeepLast { max_rows } => {
4708            if max_rows == 0 {
4709                return Err(EngineError::InvalidWrite(
4710                    "retention_json max_rows must be greater than zero".to_owned(),
4711                ));
4712            }
4713            Ok(policy)
4714        }
4715    }
4716}
4717
4718fn load_operational_retention_records(
4719    conn: &rusqlite::Connection,
4720    collection_names: Option<&[String]>,
4721    max_collections: Option<usize>,
4722) -> Result<Vec<OperationalCollectionRecord>, EngineError> {
4723    let limit = max_collections.unwrap_or(usize::MAX);
4724    if limit == 0 {
4725        return Err(EngineError::InvalidWrite(
4726            "max_collections must be greater than zero".to_owned(),
4727        ));
4728    }
4729
4730    let mut records = Vec::new();
4731    if let Some(collection_names) = collection_names {
4732        for name in collection_names.iter().take(limit) {
4733            let record = load_operational_collection_record(conn, name)?.ok_or_else(|| {
4734                EngineError::InvalidWrite(format!(
4735                    "operational collection '{name}' is not registered"
4736                ))
4737            })?;
4738            records.push(record);
4739        }
4740        return Ok(records);
4741    }
4742
4743    let mut stmt = conn.prepare(
4744        "SELECT name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at, disabled_at \
4745         FROM operational_collections ORDER BY name",
4746    )?;
4747    let rows = stmt
4748        .query_map([], map_operational_collection_row)?
4749        .take(limit)
4750        .collect::<Result<Vec<_>, _>>()?;
4751    Ok(rows)
4752}
4753
4754fn last_operational_retention_run_at(
4755    conn: &rusqlite::Connection,
4756    collection_name: &str,
4757) -> Result<Option<i64>, EngineError> {
4758    conn.query_row(
4759        "SELECT MAX(executed_at) FROM operational_retention_runs WHERE collection_name = ?1",
4760        [collection_name],
4761        |row| row.get(0),
4762    )
4763    .optional()
4764    .map_err(EngineError::Sqlite)
4765    .map(Option::flatten)
4766}
4767
4768fn count_operational_mutations_for_collection(
4769    conn: &rusqlite::Connection,
4770    collection_name: &str,
4771) -> Result<usize, EngineError> {
4772    let count: i64 = conn.query_row(
4773        "SELECT count(*) FROM operational_mutations WHERE collection_name = ?1",
4774        [collection_name],
4775        |row| row.get(0),
4776    )?;
4777    usize::try_from(count).map_err(|_| {
4778        EngineError::Bridge(format!("count overflow for collection {collection_name}"))
4779    })
4780}
4781
4782fn retention_action_kind_and_limit(
4783    policy: &OperationalRetentionPolicy,
4784) -> (OperationalRetentionActionKind, Option<usize>) {
4785    match policy {
4786        OperationalRetentionPolicy::KeepAll => (OperationalRetentionActionKind::Noop, None),
4787        OperationalRetentionPolicy::PurgeBeforeSeconds { .. } => {
4788            (OperationalRetentionActionKind::PurgeBeforeSeconds, None)
4789        }
4790        OperationalRetentionPolicy::KeepLast { max_rows } => {
4791            (OperationalRetentionActionKind::KeepLast, Some(*max_rows))
4792        }
4793    }
4794}
4795
4796fn plan_operational_retention_item(
4797    conn: &rusqlite::Connection,
4798    record: &OperationalCollectionRecord,
4799    now_timestamp: i64,
4800) -> Result<OperationalRetentionPlanItem, EngineError> {
4801    let last_run_at = last_operational_retention_run_at(conn, &record.name)?;
4802    if record.kind != OperationalCollectionKind::AppendOnlyLog {
4803        return Ok(OperationalRetentionPlanItem {
4804            collection_name: record.name.clone(),
4805            action_kind: OperationalRetentionActionKind::Noop,
4806            candidate_deletions: 0,
4807            before_timestamp: None,
4808            max_rows: None,
4809            last_run_at,
4810        });
4811    }
4812    let policy = parse_operational_retention_policy(&record.retention_json)?;
4813    let (action_kind, max_rows) = retention_action_kind_and_limit(&policy);
4814    let (candidate_ids, before_timestamp) = operational_compaction_candidates_at(
4815        conn,
4816        &record.retention_json,
4817        &record.name,
4818        now_timestamp,
4819    )?;
4820    Ok(OperationalRetentionPlanItem {
4821        collection_name: record.name.clone(),
4822        action_kind,
4823        candidate_deletions: candidate_ids.len(),
4824        before_timestamp,
4825        max_rows,
4826        last_run_at,
4827    })
4828}
4829
4830fn run_operational_retention_item(
4831    tx: &rusqlite::Transaction<'_>,
4832    record: &OperationalCollectionRecord,
4833    now_timestamp: i64,
4834    dry_run: bool,
4835) -> Result<OperationalRetentionRunItem, EngineError> {
4836    let plan = plan_operational_retention_item(tx, record, now_timestamp)?;
4837    let mut deleted_mutations = 0usize;
4838    if record.kind == OperationalCollectionKind::AppendOnlyLog
4839        && plan.action_kind != OperationalRetentionActionKind::Noop
4840        && plan.candidate_deletions > 0
4841        && !dry_run
4842    {
4843        let (candidate_ids, _) = operational_compaction_candidates_at(
4844            tx,
4845            &record.retention_json,
4846            &record.name,
4847            now_timestamp,
4848        )?;
4849        let mut delete_stmt =
4850            tx.prepare_cached("DELETE FROM operational_mutations WHERE id = ?1")?;
4851        for mutation_id in &candidate_ids {
4852            delete_stmt.execute([mutation_id.as_str()])?;
4853            deleted_mutations += 1;
4854        }
4855        drop(delete_stmt);
4856
4857        persist_simple_provenance_event(
4858            tx,
4859            "operational_retention_run",
4860            &record.name,
4861            Some(serde_json::json!({
4862                "action_kind": plan.action_kind,
4863                "deleted_mutations": deleted_mutations,
4864                "before_timestamp": plan.before_timestamp,
4865                "max_rows": plan.max_rows,
4866                "executed_at": now_timestamp,
4867            })),
4868        )?;
4869    }
4870
4871    let live_rows_remaining = count_operational_mutations_for_collection(tx, &record.name)?;
4872    let effective_deleted_mutations = if dry_run {
4873        plan.candidate_deletions
4874    } else {
4875        deleted_mutations
4876    };
4877    let rows_remaining = if dry_run {
4878        live_rows_remaining.saturating_sub(effective_deleted_mutations)
4879    } else {
4880        live_rows_remaining
4881    };
4882    if !dry_run && plan.action_kind != OperationalRetentionActionKind::Noop {
4883        tx.execute(
4884            "INSERT INTO operational_retention_runs \
4885             (id, collection_name, executed_at, action_kind, dry_run, deleted_mutations, rows_remaining, metadata_json) \
4886             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
4887            rusqlite::params![
4888                new_id(),
4889                record.name,
4890                now_timestamp,
4891                serde_json::to_string(&plan.action_kind)
4892                    .unwrap_or_else(|_| "\"noop\"".to_owned())
4893                    .trim_matches('"')
4894                    .to_owned(),
4895                i32::from(dry_run),
4896                deleted_mutations,
4897                rows_remaining,
4898                serde_json::json!({
4899                    "before_timestamp": plan.before_timestamp,
4900                    "max_rows": plan.max_rows,
4901                })
4902                .to_string(),
4903            ],
4904        )?;
4905    }
4906
4907    Ok(OperationalRetentionRunItem {
4908        collection_name: plan.collection_name,
4909        action_kind: plan.action_kind,
4910        deleted_mutations: effective_deleted_mutations,
4911        before_timestamp: plan.before_timestamp,
4912        max_rows: plan.max_rows,
4913        rows_remaining,
4914    })
4915}
4916
4917fn current_unix_timestamp() -> Result<i64, EngineError> {
4918    let now = SystemTime::now()
4919        .duration_since(SystemTime::UNIX_EPOCH)
4920        .map_err(|error| EngineError::Bridge(format!("system clock error: {error}")))?;
4921    i64::try_from(now.as_secs())
4922        .map_err(|_| EngineError::Bridge("unix timestamp overflow".to_owned()))
4923}
4924
4925fn map_operational_collection_row(
4926    row: &rusqlite::Row<'_>,
4927) -> Result<OperationalCollectionRecord, rusqlite::Error> {
4928    let kind_text: String = row.get(1)?;
4929    let kind = OperationalCollectionKind::try_from(kind_text.as_str()).map_err(|message| {
4930        rusqlite::Error::FromSqlConversionFailure(
4931            1,
4932            rusqlite::types::Type::Text,
4933            Box::new(io::Error::new(io::ErrorKind::InvalidData, message)),
4934        )
4935    })?;
4936    Ok(OperationalCollectionRecord {
4937        name: row.get(0)?,
4938        kind,
4939        schema_json: row.get(2)?,
4940        retention_json: row.get(3)?,
4941        filter_fields_json: row.get(4)?,
4942        validation_json: row.get(5)?,
4943        secondary_indexes_json: row.get(6)?,
4944        format_version: row.get(7)?,
4945        created_at: row.get(8)?,
4946        disabled_at: row.get(9)?,
4947    })
4948}
4949
4950fn map_operational_mutation_row(
4951    row: &rusqlite::Row<'_>,
4952) -> Result<OperationalMutationRow, rusqlite::Error> {
4953    Ok(OperationalMutationRow {
4954        id: row.get(0)?,
4955        collection_name: row.get(1)?,
4956        record_key: row.get(2)?,
4957        op_kind: row.get(3)?,
4958        payload_json: row.get(4)?,
4959        source_ref: row.get(5)?,
4960        created_at: row.get(6)?,
4961    })
4962}
4963
4964fn map_operational_current_row(
4965    row: &rusqlite::Row<'_>,
4966) -> Result<OperationalCurrentRow, rusqlite::Error> {
4967    Ok(OperationalCurrentRow {
4968        collection_name: row.get(0)?,
4969        record_key: row.get(1)?,
4970        payload_json: row.get(2)?,
4971        updated_at: row.get(3)?,
4972        last_mutation_id: row.get(4)?,
4973    })
4974}
4975
4976#[cfg(test)]
4977#[allow(clippy::expect_used)]
4978mod tests {
4979    use std::fs;
4980    use std::sync::Arc;
4981
4982    use fathomdb_schema::SchemaManager;
4983    use tempfile::NamedTempFile;
4984
4985    use super::{
4986        AdminService, FtsPropertyPathMode, FtsPropertyPathSpec, SafeExportOptions,
4987        VectorRegenerationConfig,
4988    };
4989    use crate::projection::ProjectionTarget;
4990    use crate::sqlite;
4991    use crate::{
4992        EngineError, ExecutionCoordinator, OperationalCollectionKind, OperationalRegisterRequest,
4993        TelemetryCounters,
4994    };
4995
4996    use fathomdb_query::QueryBuilder;
4997
4998    #[cfg(feature = "sqlite-vec")]
4999    use super::{VectorGeneratorPolicy, load_vector_regeneration_config};
5000
5001    #[allow(dead_code)]
5002    #[cfg(unix)]
5003    fn set_file_mode(path: &std::path::Path, mode: u32) {
5004        use std::os::unix::fs::PermissionsExt;
5005
5006        let mut permissions = fs::metadata(path).expect("script metadata").permissions();
5007        permissions.set_mode(mode);
5008        fs::set_permissions(path, permissions).expect("chmod");
5009    }
5010
5011    #[allow(dead_code)]
5012    #[cfg(not(unix))]
5013    fn set_file_mode(_path: &std::path::Path, _mode: u32) {}
5014
5015    fn setup() -> (NamedTempFile, AdminService) {
5016        let db = NamedTempFile::new().expect("temp file");
5017        let schema = Arc::new(SchemaManager::new());
5018        {
5019            let conn = sqlite::open_connection(db.path()).expect("connection");
5020            schema.bootstrap(&conn).expect("bootstrap");
5021        }
5022        let service = AdminService::new(db.path(), Arc::clone(&schema));
5023        (db, service)
5024    }
5025
5026    #[test]
5027    fn check_integrity_includes_active_uniqueness_count() {
5028        let (_db, service) = setup();
5029        let report = service.check_integrity().expect("integrity check");
5030        assert_eq!(report.duplicate_active_logical_ids, 0);
5031        assert_eq!(report.operational_missing_collections, 0);
5032        assert_eq!(report.operational_missing_last_mutations, 0);
5033    }
5034
5035    #[test]
5036    fn trace_source_returns_node_logical_ids() {
5037        let (db, service) = setup();
5038        {
5039            let conn = sqlite::open_connection(db.path()).expect("conn");
5040            conn.execute(
5041                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5042                 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 'source-1')",
5043                [],
5044            )
5045            .expect("insert node");
5046        }
5047        let report = service.trace_source("source-1").expect("trace");
5048        assert_eq!(report.node_rows, 1);
5049        assert_eq!(report.node_logical_ids, vec!["lg1"]);
5050    }
5051
5052    #[test]
5053    fn trace_source_includes_operational_mutations() {
5054        let (db, service) = setup();
5055        {
5056            let conn = sqlite::open_connection(db.path()).expect("conn");
5057            conn.execute(
5058                "INSERT INTO operational_collections \
5059                 (name, kind, schema_json, retention_json, format_version, created_at) \
5060                 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
5061                [],
5062            )
5063            .expect("insert collection");
5064            conn.execute(
5065                "INSERT INTO operational_mutations \
5066                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5067                 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"ok\"}', 'source-1', 100, 1)",
5068                [],
5069            )
5070            .expect("insert mutation");
5071        }
5072
5073        let report = service.trace_source("source-1").expect("trace");
5074        assert_eq!(report.operational_mutation_rows, 1);
5075        assert_eq!(report.operational_mutation_ids, vec!["m1"]);
5076    }
5077
5078    #[test]
5079    fn excise_source_restores_prior_active_node() {
5080        let (db, service) = setup();
5081        {
5082            let conn = sqlite::open_connection(db.path()).expect("conn");
5083            conn.execute(
5084                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5085                 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
5086                [],
5087            )
5088            .expect("insert v1 superseded");
5089            conn.execute(
5090                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5091                 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
5092                [],
5093            )
5094            .expect("insert v2 active");
5095        }
5096        service.excise_source("source-2").expect("excise");
5097        {
5098            let conn = sqlite::open_connection(db.path()).expect("conn");
5099            let active_row_id: String = conn
5100                .query_row(
5101                    "SELECT row_id FROM nodes WHERE logical_id = 'lg1' AND superseded_at IS NULL",
5102                    [],
5103                    |row| row.get(0),
5104                )
5105                .expect("active row exists after excise");
5106            assert_eq!(active_row_id, "r1");
5107        }
5108    }
5109
5110    #[test]
5111    fn excise_source_deletes_operational_mutations_and_repairs_latest_state_current() {
5112        let (db, service) = setup();
5113        {
5114            let conn = sqlite::open_connection(db.path()).expect("conn");
5115            conn.execute(
5116                "INSERT INTO operational_collections \
5117                 (name, kind, schema_json, retention_json, format_version, created_at) \
5118                 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
5119                [],
5120            )
5121            .expect("insert collection");
5122            conn.execute(
5123                "INSERT INTO operational_mutations \
5124                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5125                 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"old\"}', 'source-1', 100, 1)",
5126                [],
5127            )
5128            .expect("insert prior mutation");
5129            conn.execute(
5130                "INSERT INTO operational_mutations \
5131                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5132                 VALUES ('m2', 'connector_health', 'gmail', 'put', '{\"status\":\"new\"}', 'source-2', 200, 2)",
5133                [],
5134            )
5135            .expect("insert excised mutation");
5136            conn.execute(
5137                "INSERT INTO operational_current \
5138                 (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
5139                 VALUES ('connector_health', 'gmail', '{\"status\":\"new\"}', 200, 'm2')",
5140                [],
5141            )
5142            .expect("insert current row");
5143        }
5144
5145        let traced = service
5146            .trace_source("source-2")
5147            .expect("trace before excise");
5148        assert_eq!(traced.operational_mutation_rows, 1);
5149        assert_eq!(traced.operational_mutation_ids, vec!["m2"]);
5150
5151        let excised = service.excise_source("source-2").expect("excise");
5152        assert_eq!(excised.operational_mutation_rows, 0);
5153        assert!(excised.operational_mutation_ids.is_empty());
5154
5155        {
5156            let conn = sqlite::open_connection(db.path()).expect("conn");
5157            let remaining: i64 = conn
5158                .query_row(
5159                    "SELECT count(*) FROM operational_mutations WHERE source_ref = 'source-2'",
5160                    [],
5161                    |row| row.get(0),
5162                )
5163                .expect("remaining count");
5164            assert_eq!(remaining, 0);
5165
5166            let current: (String, String) = conn
5167                .query_row(
5168                    "SELECT payload_json, last_mutation_id FROM operational_current \
5169                     WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5170                    [],
5171                    |row| Ok((row.get(0)?, row.get(1)?)),
5172                )
5173                .expect("rebuilt current row");
5174            assert_eq!(current.0, "{\"status\":\"old\"}");
5175            assert_eq!(current.1, "m1");
5176        }
5177    }
5178
5179    #[test]
5180    fn restore_logical_id_reestablishes_last_pre_retire_content_and_attached_edges() {
5181        let (db, service) = setup();
5182        {
5183            let conn = sqlite::open_connection(db.path()).expect("conn");
5184            conn.execute(
5185                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5186                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
5187                [],
5188            )
5189            .expect("insert node");
5190            conn.execute(
5191                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5192                 VALUES ('node-row-topic', 'topic-1', 'Topic', '{}', 100, 'seed')",
5193                [],
5194            )
5195            .expect("insert target node");
5196            conn.execute(
5197                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5198                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5199                [],
5200            )
5201            .expect("insert chunk");
5202            conn.execute(
5203                "INSERT INTO edges \
5204                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
5205                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 'seed')",
5206                [],
5207            )
5208            .expect("insert edge");
5209            conn.execute(
5210                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5211                 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5212                [],
5213            )
5214            .expect("insert node retire event");
5215            conn.execute(
5216                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5217                 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
5218                [],
5219            )
5220            .expect("insert edge retire event");
5221            conn.execute(
5222                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
5223                [],
5224            )
5225            .expect("retire node");
5226            conn.execute(
5227                "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
5228                [],
5229            )
5230            .expect("retire edge");
5231            conn.execute("DELETE FROM fts_nodes", [])
5232                .expect("clear fts");
5233        }
5234
5235        let report = service.restore_logical_id("doc-1").expect("restore");
5236        assert_eq!(report.logical_id, "doc-1");
5237        assert!(!report.was_noop);
5238        assert_eq!(report.restored_node_rows, 1);
5239        assert_eq!(report.restored_edge_rows, 1);
5240        assert_eq!(report.restored_chunk_rows, 1);
5241        assert_eq!(report.restored_fts_rows, 1);
5242
5243        let conn = sqlite::open_connection(db.path()).expect("conn");
5244        let active_node_count: i64 = conn
5245            .query_row(
5246                "SELECT count(*) FROM nodes WHERE logical_id = 'doc-1' AND superseded_at IS NULL",
5247                [],
5248                |row| row.get(0),
5249            )
5250            .expect("active node count");
5251        assert_eq!(active_node_count, 1);
5252        let active_edge_count: i64 = conn
5253            .query_row(
5254                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
5255                [],
5256                |row| row.get(0),
5257            )
5258            .expect("active edge count");
5259        assert_eq!(active_edge_count, 1);
5260        let fts_count: i64 = conn
5261            .query_row(
5262                "SELECT count(*) FROM fts_nodes WHERE chunk_id = 'chunk-1'",
5263                [],
5264                |row| row.get(0),
5265            )
5266            .expect("fts count");
5267        assert_eq!(fts_count, 1);
5268    }
5269
5270    #[test]
5271    fn restore_logical_id_restores_edges_retired_after_the_node_retire_event() {
5272        let (db, service) = setup();
5273        {
5274            let conn = sqlite::open_connection(db.path()).expect("conn");
5275            conn.execute(
5276                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5277                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
5278                [],
5279            )
5280            .expect("insert node");
5281            conn.execute(
5282                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5283                 VALUES ('node-row-topic', 'topic-1', 'Topic', '{}', 100, 'seed')",
5284                [],
5285            )
5286            .expect("insert target node");
5287            conn.execute(
5288                "INSERT INTO edges \
5289                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
5290                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 'seed')",
5291                [],
5292            )
5293            .expect("insert edge");
5294            conn.execute(
5295                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5296                 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5297                [],
5298            )
5299            .expect("insert node retire event");
5300            conn.execute(
5301                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5302                 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 201, '')",
5303                [],
5304            )
5305            .expect("insert edge retire event");
5306            conn.execute(
5307                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
5308                [],
5309            )
5310            .expect("retire node");
5311            conn.execute(
5312                "UPDATE edges SET superseded_at = 201 WHERE logical_id = 'edge-1'",
5313                [],
5314            )
5315            .expect("retire edge");
5316        }
5317
5318        let report = service.restore_logical_id("doc-1").expect("restore");
5319        assert_eq!(report.restored_edge_rows, 1);
5320
5321        let conn = sqlite::open_connection(db.path()).expect("conn");
5322        let active_edge_count: i64 = conn
5323            .query_row(
5324                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
5325                [],
5326                |row| row.get(0),
5327            )
5328            .expect("active edge count");
5329        assert_eq!(active_edge_count, 1);
5330    }
5331
5332    #[test]
5333    fn restore_logical_id_prefers_latest_retired_revision_when_timestamps_tie() {
5334        let (db, service) = setup();
5335        {
5336            let conn = sqlite::open_connection(db.path()).expect("conn");
5337            conn.execute(
5338                "INSERT INTO nodes \
5339                 (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5340                 VALUES ('node-row-older', 'doc-1', 'Document', '{\"title\":\"older\"}', 100, 200, 'forget-1')",
5341                [],
5342            )
5343            .expect("insert older retired node");
5344            conn.execute(
5345                "INSERT INTO nodes \
5346                 (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5347                 VALUES ('node-row-newer', 'doc-1', 'Document', '{\"title\":\"newer\"}', 100, 200, 'forget-1')",
5348                [],
5349            )
5350            .expect("insert newer retired node");
5351            conn.execute(
5352                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5353                 VALUES ('evt-retire-older', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5354                [],
5355            )
5356            .expect("insert older retire event");
5357            conn.execute(
5358                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5359                 VALUES ('evt-retire-newer', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5360                [],
5361            )
5362            .expect("insert newer retire event");
5363        }
5364
5365        let report = service.restore_logical_id("doc-1").expect("restore");
5366
5367        assert!(!report.was_noop);
5368        let conn = sqlite::open_connection(db.path()).expect("conn");
5369        let active_row: (String, String) = conn
5370            .query_row(
5371                "SELECT row_id, properties FROM nodes \
5372                 WHERE logical_id = 'doc-1' AND superseded_at IS NULL",
5373                [],
5374                |row| Ok((row.get(0)?, row.get(1)?)),
5375            )
5376            .expect("restored active row");
5377        assert_eq!(active_row.0, "node-row-newer");
5378        assert_eq!(active_row.1, "{\"title\":\"newer\"}");
5379    }
5380
5381    #[test]
5382    fn purge_logical_id_removes_retired_content_and_records_tombstone() {
5383        let (db, service) = setup();
5384        {
5385            let conn = sqlite::open_connection(db.path()).expect("conn");
5386            conn.execute(
5387                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5388                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
5389                [],
5390            )
5391            .expect("insert retired node");
5392            conn.execute(
5393                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5394                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5395                [],
5396            )
5397            .expect("insert chunk");
5398            conn.execute(
5399                "INSERT INTO edges \
5400                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, superseded_at, source_ref) \
5401                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 200, 'seed')",
5402                [],
5403            )
5404            .expect("insert retired edge");
5405            conn.execute(
5406                "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
5407                 VALUES ('chunk-1', 'doc-1', 'Document', 'budget narrative')",
5408                [],
5409            )
5410            .expect("insert fts");
5411        }
5412
5413        let report = service.purge_logical_id("doc-1").expect("purge");
5414        assert_eq!(report.logical_id, "doc-1");
5415        assert!(!report.was_noop);
5416        assert_eq!(report.deleted_node_rows, 1);
5417        assert_eq!(report.deleted_edge_rows, 1);
5418        assert_eq!(report.deleted_chunk_rows, 1);
5419        assert_eq!(report.deleted_fts_rows, 1);
5420
5421        let conn = sqlite::open_connection(db.path()).expect("conn");
5422        let remaining_nodes: i64 = conn
5423            .query_row(
5424                "SELECT count(*) FROM nodes WHERE logical_id = 'doc-1'",
5425                [],
5426                |row| row.get(0),
5427            )
5428            .expect("remaining nodes");
5429        assert_eq!(remaining_nodes, 0);
5430        let remaining_edges: i64 = conn
5431            .query_row(
5432                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1'",
5433                [],
5434                |row| row.get(0),
5435            )
5436            .expect("remaining edges");
5437        assert_eq!(remaining_edges, 0);
5438        let remaining_chunks: i64 = conn
5439            .query_row(
5440                "SELECT count(*) FROM chunks WHERE id = 'chunk-1'",
5441                [],
5442                |row| row.get(0),
5443            )
5444            .expect("remaining chunks");
5445        assert_eq!(remaining_chunks, 0);
5446        let purge_events: i64 = conn
5447            .query_row(
5448                "SELECT count(*) FROM provenance_events WHERE event_type = 'purge_logical_id' AND subject = 'doc-1'",
5449                [],
5450                |row| row.get(0),
5451            )
5452            .expect("purge events");
5453        assert_eq!(purge_events, 1);
5454    }
5455
5456    #[test]
5457    fn check_semantics_accepts_preserved_retired_chunks() {
5458        let (db, service) = setup();
5459        {
5460            let conn = sqlite::open_connection(db.path()).expect("conn");
5461            conn.execute(
5462                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5463                 VALUES ('node-row-1', 'doc-1', 'Document', '{}', 100, 200, 'seed')",
5464                [],
5465            )
5466            .expect("insert retired node");
5467            conn.execute(
5468                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5469                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5470                [],
5471            )
5472            .expect("insert chunk");
5473        }
5474
5475        let report = service.check_semantics().expect("semantics");
5476        assert_eq!(report.orphaned_chunks, 0);
5477    }
5478
5479    #[test]
5480    fn check_semantics_detects_missing_retired_node_history_for_preserved_chunks() {
5481        let (db, service) = setup();
5482        {
5483            let conn = sqlite::open_connection(db.path()).expect("conn");
5484            conn.execute(
5485                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5486                 VALUES ('chunk-1', 'ghost-doc', 'budget narrative', 100)",
5487                [],
5488            )
5489            .expect("insert orphaned chunk");
5490        }
5491
5492        let report = service.check_semantics().expect("semantics");
5493        assert_eq!(report.orphaned_chunks, 1);
5494    }
5495
5496    #[cfg(feature = "sqlite-vec")]
5497    #[test]
5498    fn check_semantics_detects_missing_retired_node_history_for_preserved_vec_rows() {
5499        let (db, service) = setup();
5500        {
5501            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5502            service
5503                .schema_manager
5504                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5505                .expect("ensure vec profile");
5506            conn.execute(
5507                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5508                 VALUES ('chunk-1', 'ghost-doc', 'budget narrative', 100)",
5509                [],
5510            )
5511            .expect("insert orphaned chunk");
5512            conn.execute(
5513                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
5514                [],
5515            )
5516            .expect("insert vec row");
5517        }
5518
5519        let report = service.check_semantics().expect("semantics");
5520        assert_eq!(report.orphaned_chunks, 1);
5521        assert_eq!(report.vec_rows_for_superseded_nodes, 1);
5522    }
5523
5524    #[cfg(feature = "sqlite-vec")]
5525    #[test]
5526    fn restore_logical_id_reestablishes_vector_search_without_reingest() {
5527        let (db, service) = setup();
5528        {
5529            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5530            service
5531                .schema_manager
5532                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5533                .expect("ensure vec profile");
5534            conn.execute(
5535                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5536                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
5537                [],
5538            )
5539            .expect("insert retired node");
5540            conn.execute(
5541                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5542                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5543                [],
5544            )
5545            .expect("insert chunk");
5546            conn.execute(
5547                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
5548                [],
5549            )
5550            .expect("insert vec row");
5551            conn.execute(
5552                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5553                 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5554                [],
5555            )
5556            .expect("insert retire event");
5557        }
5558
5559        let report = service.restore_logical_id("doc-1").expect("restore");
5560        assert_eq!(report.restored_vec_rows, 1);
5561
5562        let coordinator = ExecutionCoordinator::open(
5563            db.path(),
5564            Arc::new(SchemaManager::new()),
5565            Some(4),
5566            1,
5567            Arc::new(TelemetryCounters::default()),
5568            None,
5569        )
5570        .expect("coordinator");
5571        let compiled = QueryBuilder::nodes("Document")
5572            .vector_search("[0.0, 0.0, 0.0, 0.0]", 5)
5573            .compile()
5574            .expect("compile");
5575        let rows = coordinator
5576            .execute_compiled_read(&compiled)
5577            .expect("vector read");
5578        assert!(
5579            rows.nodes.iter().any(|row| row.logical_id == "doc-1"),
5580            "restore should make the preserved vec row visible again without re-ingest"
5581        );
5582    }
5583
5584    #[cfg(feature = "sqlite-vec")]
5585    #[test]
5586    fn purge_logical_id_deletes_vec_rows_for_retired_content() {
5587        let (db, service) = setup();
5588        {
5589            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5590            service
5591                .schema_manager
5592                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5593                .expect("ensure vec profile");
5594            conn.execute(
5595                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5596                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
5597                [],
5598            )
5599            .expect("insert retired node");
5600            conn.execute(
5601                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5602                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5603                [],
5604            )
5605            .expect("insert chunk");
5606            conn.execute(
5607                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
5608                [],
5609            )
5610            .expect("insert vec row");
5611        }
5612
5613        let report = service.purge_logical_id("doc-1").expect("purge");
5614        assert_eq!(report.deleted_vec_rows, 1);
5615
5616        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5617        let vec_count: i64 = conn
5618            .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
5619                row.get(0)
5620            })
5621            .expect("vec count");
5622        assert_eq!(vec_count, 0);
5623    }
5624
5625    #[cfg(feature = "sqlite-vec")]
5626    #[test]
5627    fn restore_logical_id_restores_visibility_of_regenerated_vectors() {
5628        let (db, service) = setup();
5629        let temp_dir = tempfile::tempdir().expect("temp dir");
5630        let script_path = temp_dir.path().join("vector-generator-restore.sh");
5631        fs::write(
5632            &script_path,
5633            r#"#!/usr/bin/env bash
5634set -euo pipefail
5635python3 -c 'import json, sys
5636payload = json.load(sys.stdin)
5637json.dump({"embeddings": [{"chunk_id": payload["chunks"][0]["chunk_id"], "embedding": [0.0, 0.0, 0.0, 0.0]}]}, sys.stdout)'
5638"#,
5639        )
5640        .expect("write script");
5641        set_file_mode(&script_path, 0o755);
5642
5643        {
5644            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5645            service
5646                .schema_manager
5647                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5648                .expect("ensure vec profile");
5649            conn.execute(
5650                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5651                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
5652                [],
5653            )
5654            .expect("insert node");
5655            conn.execute(
5656                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5657                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5658                [],
5659            )
5660            .expect("insert chunk");
5661        }
5662
5663        service
5664            .regenerate_vector_embeddings(&VectorRegenerationConfig {
5665                profile: "default".to_owned(),
5666                table_name: "vec_nodes_active".to_owned(),
5667                model_identity: "model".to_owned(),
5668                model_version: "1.0.0".to_owned(),
5669                dimension: 4,
5670                normalization_policy: "l2".to_owned(),
5671                chunking_policy: "per_chunk".to_owned(),
5672                preprocessing_policy: "trim".to_owned(),
5673                generator_command: vec![script_path.to_string_lossy().to_string()],
5674            })
5675            .expect("regenerate");
5676
5677        {
5678            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5679            conn.execute(
5680                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5681                 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5682                [],
5683            )
5684            .expect("insert retire event");
5685            conn.execute(
5686                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
5687                [],
5688            )
5689            .expect("retire node");
5690        }
5691
5692        let report = service.restore_logical_id("doc-1").expect("restore");
5693        assert_eq!(report.restored_vec_rows, 1);
5694
5695        let coordinator = ExecutionCoordinator::open(
5696            db.path(),
5697            Arc::new(SchemaManager::new()),
5698            Some(4),
5699            1,
5700            Arc::new(TelemetryCounters::default()),
5701            None,
5702        )
5703        .expect("coordinator");
5704        let compiled = QueryBuilder::nodes("Document")
5705            .vector_search("[0.0, 0.0, 0.0, 0.0]", 5)
5706            .compile()
5707            .expect("compile");
5708        let rows = coordinator
5709            .execute_compiled_read(&compiled)
5710            .expect("vector read");
5711        assert!(
5712            rows.nodes.iter().any(|row| row.logical_id == "doc-1"),
5713            "restored logical_id should become visible through regenerated vectors"
5714        );
5715    }
5716
5717    #[test]
5718    fn check_semantics_clean_db_returns_zeros() {
5719        let (_db, service) = setup();
5720        let report = service.check_semantics().expect("semantics check");
5721        assert_eq!(report.orphaned_chunks, 0);
5722        assert_eq!(report.null_source_ref_nodes, 0);
5723        assert_eq!(report.broken_step_fk, 0);
5724        assert_eq!(report.broken_action_fk, 0);
5725        assert_eq!(report.stale_fts_rows, 0);
5726        assert_eq!(report.fts_rows_for_superseded_nodes, 0);
5727        assert_eq!(report.dangling_edges, 0);
5728        assert_eq!(report.orphaned_supersession_chains, 0);
5729        assert_eq!(report.stale_vec_rows, 0);
5730        assert_eq!(report.vec_rows_for_superseded_nodes, 0);
5731        assert_eq!(report.missing_operational_current_rows, 0);
5732        assert_eq!(report.stale_operational_current_rows, 0);
5733        assert_eq!(report.disabled_collection_mutations, 0);
5734        assert_eq!(report.mismatched_kind_property_fts_rows, 0);
5735        assert_eq!(report.duplicate_property_fts_rows, 0);
5736        assert_eq!(report.drifted_property_fts_rows, 0);
5737        assert!(report.warnings.is_empty());
5738    }
5739
5740    #[test]
5741    fn register_operational_collection_persists_and_emits_provenance() {
5742        let (db, service) = setup();
5743        let record = service
5744            .register_operational_collection(&OperationalRegisterRequest {
5745                name: "connector_health".to_owned(),
5746                kind: OperationalCollectionKind::LatestState,
5747                schema_json: "{}".to_owned(),
5748                retention_json: "{}".to_owned(),
5749                filter_fields_json: "[]".to_owned(),
5750                validation_json: String::new(),
5751                secondary_indexes_json: "[]".to_owned(),
5752                format_version: 1,
5753            })
5754            .expect("register collection");
5755
5756        assert_eq!(record.name, "connector_health");
5757        assert_eq!(record.kind, OperationalCollectionKind::LatestState);
5758        assert_eq!(record.schema_json, "{}");
5759        assert_eq!(record.retention_json, "{}");
5760        assert_eq!(record.filter_fields_json, "[]");
5761        assert!(record.created_at > 0);
5762        assert_eq!(record.disabled_at, None);
5763
5764        let described = service
5765            .describe_operational_collection("connector_health")
5766            .expect("describe collection")
5767            .expect("collection exists");
5768        assert_eq!(described, record);
5769
5770        let conn = sqlite::open_connection(db.path()).expect("conn");
5771        let provenance_count: i64 = conn
5772            .query_row(
5773                "SELECT count(*) FROM provenance_events \
5774                 WHERE event_type = 'operational_collection_registered' AND subject = 'connector_health'",
5775                [],
5776                |row| row.get(0),
5777            )
5778            .expect("provenance count");
5779        assert_eq!(provenance_count, 1);
5780    }
5781
5782    #[test]
5783    fn register_and_update_operational_collection_validation_round_trip() {
5784        let (db, service) = setup();
5785        let record = service
5786            .register_operational_collection(&OperationalRegisterRequest {
5787                name: "connector_health".to_owned(),
5788                kind: OperationalCollectionKind::LatestState,
5789                schema_json: "{}".to_owned(),
5790                retention_json: "{}".to_owned(),
5791                filter_fields_json: "[]".to_owned(),
5792                validation_json: String::new(),
5793                secondary_indexes_json: "[]".to_owned(),
5794                format_version: 1,
5795            })
5796            .expect("register collection");
5797        assert_eq!(record.validation_json, "");
5798
5799        let validation_json = r#"{"format_version":1,"mode":"enforce","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#;
5800        let updated = service
5801            .update_operational_collection_validation("connector_health", validation_json)
5802            .expect("update validation");
5803        assert_eq!(updated.validation_json, validation_json);
5804
5805        let described = service
5806            .describe_operational_collection("connector_health")
5807            .expect("describe collection")
5808            .expect("collection exists");
5809        assert_eq!(described.validation_json, validation_json);
5810
5811        let conn = sqlite::open_connection(db.path()).expect("conn");
5812        let provenance_count: i64 = conn
5813            .query_row(
5814                "SELECT count(*) FROM provenance_events \
5815                 WHERE event_type = 'operational_collection_validation_updated' \
5816                   AND subject = 'connector_health'",
5817                [],
5818                |row| row.get(0),
5819            )
5820            .expect("provenance count");
5821        assert_eq!(provenance_count, 1);
5822    }
5823
5824    #[test]
5825    fn register_update_and_rebuild_operational_secondary_indexes_round_trip() {
5826        let (db, service) = setup();
5827        let record = service
5828            .register_operational_collection(&OperationalRegisterRequest {
5829                name: "audit_log".to_owned(),
5830                kind: OperationalCollectionKind::AppendOnlyLog,
5831                schema_json: "{}".to_owned(),
5832                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
5833                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
5834                validation_json: String::new(),
5835                secondary_indexes_json: "[]".to_owned(),
5836                format_version: 1,
5837            })
5838            .expect("register collection");
5839        assert_eq!(record.secondary_indexes_json, "[]");
5840
5841        {
5842            let writer = crate::WriterActor::start(
5843                db.path(),
5844                Arc::new(SchemaManager::new()),
5845                crate::ProvenanceMode::Warn,
5846                Arc::new(crate::TelemetryCounters::default()),
5847            )
5848            .expect("writer");
5849            writer
5850                .submit(crate::WriteRequest {
5851                    label: "secondary-index-seed".to_owned(),
5852                    nodes: vec![],
5853                    node_retires: vec![],
5854                    edges: vec![],
5855                    edge_retires: vec![],
5856                    chunks: vec![],
5857                    runs: vec![],
5858                    steps: vec![],
5859                    actions: vec![],
5860                    optional_backfills: vec![],
5861                    vec_inserts: vec![],
5862                    operational_writes: vec![
5863                        crate::OperationalWrite::Append {
5864                            collection: "audit_log".to_owned(),
5865                            record_key: "evt-1".to_owned(),
5866                            payload_json: r#"{"actor":"alice","ts":100}"#.to_owned(),
5867                            source_ref: Some("src-1".to_owned()),
5868                        },
5869                        crate::OperationalWrite::Append {
5870                            collection: "audit_log".to_owned(),
5871                            record_key: "evt-2".to_owned(),
5872                            payload_json: r#"{"actor":"bob","ts":200}"#.to_owned(),
5873                            source_ref: Some("src-2".to_owned()),
5874                        },
5875                    ],
5876                })
5877                .expect("seed writes");
5878        }
5879
5880        let secondary_indexes_json = r#"[{"name":"actor_ts","kind":"append_only_field_time","field":"actor","value_type":"string","time_field":"ts"}]"#;
5881        let updated = service
5882            .update_operational_collection_secondary_indexes("audit_log", secondary_indexes_json)
5883            .expect("update secondary indexes");
5884        assert_eq!(updated.secondary_indexes_json, secondary_indexes_json);
5885
5886        let conn = sqlite::open_connection(db.path()).expect("conn");
5887        let entry_count: i64 = conn
5888            .query_row(
5889                "SELECT count(*) FROM operational_secondary_index_entries \
5890                 WHERE collection_name = 'audit_log' AND index_name = 'actor_ts'",
5891                [],
5892                |row| row.get(0),
5893            )
5894            .expect("secondary index count");
5895        assert_eq!(entry_count, 2);
5896        conn.execute(
5897            "DELETE FROM operational_secondary_index_entries WHERE collection_name = 'audit_log'",
5898            [],
5899        )
5900        .expect("clear index entries");
5901        drop(conn);
5902
5903        let rebuild = service
5904            .rebuild_operational_secondary_indexes("audit_log")
5905            .expect("rebuild secondary indexes");
5906        assert_eq!(rebuild.collection_name, "audit_log");
5907        assert_eq!(rebuild.mutation_entries_rebuilt, 2);
5908        assert_eq!(rebuild.current_entries_rebuilt, 0);
5909    }
5910
5911    #[test]
5912    fn register_operational_collection_rejects_invalid_validation_contract() {
5913        let (_db, service) = setup();
5914
5915        let error = service
5916            .register_operational_collection(&OperationalRegisterRequest {
5917                name: "connector_health".to_owned(),
5918                kind: OperationalCollectionKind::LatestState,
5919                schema_json: "{}".to_owned(),
5920                retention_json: "{}".to_owned(),
5921                filter_fields_json: "[]".to_owned(),
5922                validation_json: r#"{"format_version":1,"mode":"enforce","fields":[{"name":"status","type":"string","minimum":0}]}"#
5923                    .to_owned(),
5924                secondary_indexes_json: "[]".to_owned(),
5925                format_version: 1,
5926            })
5927            .expect_err("invalid validation contract should reject");
5928
5929        assert!(matches!(error, EngineError::InvalidWrite(_)));
5930        assert!(error.to_string().contains("minimum/maximum"));
5931    }
5932
5933    #[test]
5934    fn validate_operational_collection_history_reports_invalid_rows_without_mutation() {
5935        let (db, service) = setup();
5936        service
5937            .register_operational_collection(&OperationalRegisterRequest {
5938                name: "audit_log".to_owned(),
5939                kind: OperationalCollectionKind::AppendOnlyLog,
5940                schema_json: "{}".to_owned(),
5941                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
5942                filter_fields_json: "[]".to_owned(),
5943                validation_json: r#"{"format_version":1,"mode":"disabled","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#
5944                    .to_owned(),
5945                secondary_indexes_json: "[]".to_owned(),
5946                format_version: 1,
5947            })
5948            .expect("register collection");
5949        {
5950            let writer = crate::WriterActor::start(
5951                db.path(),
5952                Arc::new(SchemaManager::new()),
5953                crate::ProvenanceMode::Warn,
5954                Arc::new(crate::TelemetryCounters::default()),
5955            )
5956            .expect("writer");
5957            writer
5958                .submit(crate::WriteRequest {
5959                    label: "history-validation".to_owned(),
5960                    nodes: vec![],
5961                    node_retires: vec![],
5962                    edges: vec![],
5963                    edge_retires: vec![],
5964                    chunks: vec![],
5965                    runs: vec![],
5966                    steps: vec![],
5967                    actions: vec![],
5968                    optional_backfills: vec![],
5969                    vec_inserts: vec![],
5970                    operational_writes: vec![
5971                        crate::OperationalWrite::Append {
5972                            collection: "audit_log".to_owned(),
5973                            record_key: "evt-1".to_owned(),
5974                            payload_json: r#"{"status":"ok"}"#.to_owned(),
5975                            source_ref: Some("src-1".to_owned()),
5976                        },
5977                        crate::OperationalWrite::Append {
5978                            collection: "audit_log".to_owned(),
5979                            record_key: "evt-2".to_owned(),
5980                            payload_json: r#"{"status":"bogus"}"#.to_owned(),
5981                            source_ref: Some("src-2".to_owned()),
5982                        },
5983                    ],
5984                })
5985                .expect("write");
5986        }
5987
5988        let report = service
5989            .validate_operational_collection_history("audit_log")
5990            .expect("validate history");
5991        assert_eq!(report.collection_name, "audit_log");
5992        assert_eq!(report.checked_rows, 2);
5993        assert_eq!(report.invalid_row_count, 1);
5994        assert_eq!(report.issues.len(), 1);
5995        assert_eq!(report.issues[0].record_key, "evt-2");
5996        assert!(report.issues[0].message.contains("must be one of"));
5997
5998        let trace = service
5999            .trace_operational_collection("audit_log", None)
6000            .expect("trace");
6001        assert_eq!(trace.mutation_count, 2);
6002
6003        let conn = sqlite::open_connection(db.path()).expect("conn");
6004        let provenance_count: i64 = conn
6005            .query_row(
6006                "SELECT count(*) FROM provenance_events \
6007                 WHERE event_type = 'operational_collection_history_validated' \
6008                   AND subject = 'audit_log'",
6009                [],
6010                |row| row.get(0),
6011            )
6012            .expect("provenance count");
6013        assert_eq!(provenance_count, 0);
6014    }
6015
6016    #[test]
6017    fn trace_operational_collection_returns_mutations_and_current_rows() {
6018        let (db, service) = setup();
6019        service
6020            .register_operational_collection(&OperationalRegisterRequest {
6021                name: "connector_health".to_owned(),
6022                kind: OperationalCollectionKind::LatestState,
6023                schema_json: "{}".to_owned(),
6024                retention_json: "{}".to_owned(),
6025                filter_fields_json: "[]".to_owned(),
6026                validation_json: String::new(),
6027                secondary_indexes_json: "[]".to_owned(),
6028                format_version: 1,
6029            })
6030            .expect("register collection");
6031        {
6032            let writer = crate::WriterActor::start(
6033                db.path(),
6034                Arc::new(SchemaManager::new()),
6035                crate::ProvenanceMode::Warn,
6036                Arc::new(crate::TelemetryCounters::default()),
6037            )
6038            .expect("writer");
6039            writer
6040                .submit(crate::WriteRequest {
6041                    label: "operational".to_owned(),
6042                    nodes: vec![],
6043                    node_retires: vec![],
6044                    edges: vec![],
6045                    edge_retires: vec![],
6046                    chunks: vec![],
6047                    runs: vec![],
6048                    steps: vec![],
6049                    actions: vec![],
6050                    optional_backfills: vec![],
6051                    vec_inserts: vec![],
6052                    operational_writes: vec![crate::OperationalWrite::Put {
6053                        collection: "connector_health".to_owned(),
6054                        record_key: "gmail".to_owned(),
6055                        payload_json: r#"{"status":"ok"}"#.to_owned(),
6056                        source_ref: Some("src-1".to_owned()),
6057                    }],
6058                })
6059                .expect("write");
6060        }
6061
6062        let report = service
6063            .trace_operational_collection("connector_health", Some("gmail"))
6064            .expect("trace");
6065        assert_eq!(report.collection_name, "connector_health");
6066        assert_eq!(report.record_key.as_deref(), Some("gmail"));
6067        assert_eq!(report.mutation_count, 1);
6068        assert_eq!(report.current_count, 1);
6069        assert_eq!(report.mutations[0].op_kind, "put");
6070        assert_eq!(report.current_rows[0].payload_json, r#"{"status":"ok"}"#);
6071    }
6072
6073    #[test]
6074    fn trace_operational_collection_rejects_unknown_collection() {
6075        let (_db, service) = setup();
6076
6077        let error = service
6078            .trace_operational_collection("missing_collection", None)
6079            .expect_err("unknown collection should fail");
6080
6081        assert!(matches!(error, EngineError::InvalidWrite(_)));
6082        assert!(error.to_string().contains("is not registered"));
6083    }
6084
6085    #[test]
6086    fn rebuild_operational_current_repairs_missing_latest_state_rows() {
6087        let (db, service) = setup();
6088        service
6089            .register_operational_collection(&OperationalRegisterRequest {
6090                name: "connector_health".to_owned(),
6091                kind: OperationalCollectionKind::LatestState,
6092                schema_json: "{}".to_owned(),
6093                retention_json: "{}".to_owned(),
6094                filter_fields_json: "[]".to_owned(),
6095                validation_json: String::new(),
6096                secondary_indexes_json: "[]".to_owned(),
6097                format_version: 1,
6098            })
6099            .expect("register collection");
6100        {
6101            let writer = crate::WriterActor::start(
6102                db.path(),
6103                Arc::new(SchemaManager::new()),
6104                crate::ProvenanceMode::Warn,
6105                Arc::new(crate::TelemetryCounters::default()),
6106            )
6107            .expect("writer");
6108            writer
6109                .submit(crate::WriteRequest {
6110                    label: "operational".to_owned(),
6111                    nodes: vec![],
6112                    node_retires: vec![],
6113                    edges: vec![],
6114                    edge_retires: vec![],
6115                    chunks: vec![],
6116                    runs: vec![],
6117                    steps: vec![],
6118                    actions: vec![],
6119                    optional_backfills: vec![],
6120                    vec_inserts: vec![],
6121                    operational_writes: vec![crate::OperationalWrite::Put {
6122                        collection: "connector_health".to_owned(),
6123                        record_key: "gmail".to_owned(),
6124                        payload_json: r#"{"status":"ok"}"#.to_owned(),
6125                        source_ref: Some("src-1".to_owned()),
6126                    }],
6127                })
6128                .expect("write");
6129        }
6130        {
6131            let conn = sqlite::open_connection(db.path()).expect("conn");
6132            conn.execute(
6133                "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6134                [],
6135            )
6136            .expect("delete current row");
6137        }
6138
6139        let before = service.check_semantics().expect("semantics before rebuild");
6140        assert_eq!(before.missing_operational_current_rows, 1);
6141
6142        let repair = service
6143            .rebuild_operational_current(Some("connector_health"))
6144            .expect("rebuild current");
6145        assert_eq!(repair.collections_rebuilt, 1);
6146        assert_eq!(repair.current_rows_rebuilt, 1);
6147
6148        let after = service.check_semantics().expect("semantics after rebuild");
6149        assert_eq!(after.missing_operational_current_rows, 0);
6150
6151        let conn = sqlite::open_connection(db.path()).expect("conn");
6152        let payload: String = conn
6153            .query_row(
6154                "SELECT payload_json FROM operational_current \
6155                 WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6156                [],
6157                |row| row.get(0),
6158            )
6159            .expect("restored payload");
6160        assert_eq!(payload, r#"{"status":"ok"}"#);
6161    }
6162
6163    #[test]
6164    fn rebuild_operational_current_restores_latest_state_secondary_index_entries() {
6165        let (db, service) = setup();
6166        service
6167            .register_operational_collection(&OperationalRegisterRequest {
6168                name: "connector_health".to_owned(),
6169                kind: OperationalCollectionKind::LatestState,
6170                schema_json: "{}".to_owned(),
6171                retention_json: "{}".to_owned(),
6172                filter_fields_json: "[]".to_owned(),
6173                validation_json: String::new(),
6174                secondary_indexes_json: r#"[{"name":"status_current","kind":"latest_state_field","field":"status","value_type":"string"}]"#.to_owned(),
6175                format_version: 1,
6176            })
6177            .expect("register collection");
6178        {
6179            let writer = crate::WriterActor::start(
6180                db.path(),
6181                Arc::new(SchemaManager::new()),
6182                crate::ProvenanceMode::Warn,
6183                Arc::new(crate::TelemetryCounters::default()),
6184            )
6185            .expect("writer");
6186            writer
6187                .submit(crate::WriteRequest {
6188                    label: "operational".to_owned(),
6189                    nodes: vec![],
6190                    node_retires: vec![],
6191                    edges: vec![],
6192                    edge_retires: vec![],
6193                    chunks: vec![],
6194                    runs: vec![],
6195                    steps: vec![],
6196                    actions: vec![],
6197                    optional_backfills: vec![],
6198                    vec_inserts: vec![],
6199                    operational_writes: vec![crate::OperationalWrite::Put {
6200                        collection: "connector_health".to_owned(),
6201                        record_key: "gmail".to_owned(),
6202                        payload_json: r#"{"status":"ok"}"#.to_owned(),
6203                        source_ref: Some("src-1".to_owned()),
6204                    }],
6205                })
6206                .expect("write");
6207        }
6208        {
6209            let conn = sqlite::open_connection(db.path()).expect("conn");
6210            let entry_count: i64 = conn
6211                .query_row(
6212                    "SELECT count(*) FROM operational_secondary_index_entries \
6213                     WHERE collection_name = 'connector_health' AND subject_kind = 'current'",
6214                    [],
6215                    |row| row.get(0),
6216                )
6217                .expect("secondary index count before repair");
6218            assert_eq!(entry_count, 1);
6219            conn.execute(
6220                "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6221                [],
6222            )
6223            .expect("delete current row");
6224        }
6225
6226        service
6227            .rebuild_operational_current(Some("connector_health"))
6228            .expect("rebuild current");
6229
6230        let conn = sqlite::open_connection(db.path()).expect("conn");
6231        let entry_count: i64 = conn
6232            .query_row(
6233                "SELECT count(*) FROM operational_secondary_index_entries \
6234                 WHERE collection_name = 'connector_health' AND subject_kind = 'current'",
6235                [],
6236                |row| row.get(0),
6237            )
6238            .expect("secondary index count after repair");
6239        assert_eq!(entry_count, 1);
6240    }
6241
6242    #[test]
6243    fn operational_current_semantics_and_rebuild_follow_mutation_order() {
6244        let (db, service) = setup();
6245        {
6246            let conn = sqlite::open_connection(db.path()).expect("conn");
6247            conn.execute(
6248                "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6249                 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
6250                [],
6251            )
6252            .expect("seed collection");
6253            conn.execute(
6254                "INSERT INTO operational_mutations \
6255                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6256                 VALUES ('m3', 'connector_health', 'gmail', 'put', '{\"status\":\"old\"}', 'src-1', 100, 1)",
6257                [],
6258            )
6259            .expect("seed first put");
6260            conn.execute(
6261                "INSERT INTO operational_mutations \
6262                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6263                 VALUES ('m2', 'connector_health', 'gmail', 'delete', '', 'src-2', 100, 2)",
6264                [],
6265            )
6266            .expect("seed delete");
6267            conn.execute(
6268                "INSERT INTO operational_mutations \
6269                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6270                 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"new\"}', 'src-3', 100, 3)",
6271                [],
6272            )
6273            .expect("seed final put");
6274            conn.execute(
6275                "INSERT INTO operational_current \
6276                 (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
6277                 VALUES ('connector_health', 'gmail', '{\"status\":\"new\"}', 100, 'm1')",
6278                [],
6279            )
6280            .expect("seed current");
6281        }
6282
6283        let before = service.check_semantics().expect("semantics before rebuild");
6284        assert_eq!(before.missing_operational_current_rows, 0);
6285        assert_eq!(before.stale_operational_current_rows, 0);
6286
6287        {
6288            let conn = sqlite::open_connection(db.path()).expect("conn");
6289            conn.execute(
6290                "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6291                [],
6292            )
6293            .expect("delete current row");
6294        }
6295
6296        let missing = service.check_semantics().expect("semantics after delete");
6297        assert_eq!(missing.missing_operational_current_rows, 1);
6298        assert_eq!(missing.stale_operational_current_rows, 0);
6299
6300        service
6301            .rebuild_operational_current(Some("connector_health"))
6302            .expect("rebuild current");
6303
6304        let after = service.check_semantics().expect("semantics after rebuild");
6305        assert_eq!(after.missing_operational_current_rows, 0);
6306        assert_eq!(after.stale_operational_current_rows, 0);
6307
6308        let conn = sqlite::open_connection(db.path()).expect("conn");
6309        let payload: String = conn
6310            .query_row(
6311                "SELECT payload_json FROM operational_current \
6312                 WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6313                [],
6314                |row| row.get(0),
6315            )
6316            .expect("restored payload");
6317        assert_eq!(payload, r#"{"status":"new"}"#);
6318    }
6319
6320    #[test]
6321    fn disable_operational_collection_sets_disabled_at_and_emits_provenance() {
6322        let (db, service) = setup();
6323        service
6324            .register_operational_collection(&OperationalRegisterRequest {
6325                name: "audit_log".to_owned(),
6326                kind: OperationalCollectionKind::AppendOnlyLog,
6327                schema_json: "{}".to_owned(),
6328                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6329                filter_fields_json: "[]".to_owned(),
6330                validation_json: String::new(),
6331                secondary_indexes_json: "[]".to_owned(),
6332                format_version: 1,
6333            })
6334            .expect("register collection");
6335
6336        let record = service
6337            .disable_operational_collection("audit_log")
6338            .expect("disable collection");
6339        assert_eq!(record.name, "audit_log");
6340        assert!(record.disabled_at.is_some());
6341
6342        let disabled_at = record.disabled_at.expect("disabled_at");
6343        let described = service
6344            .describe_operational_collection("audit_log")
6345            .expect("describe collection")
6346            .expect("collection exists");
6347        assert_eq!(described.disabled_at, Some(disabled_at));
6348
6349        let writer = crate::WriterActor::start(
6350            db.path(),
6351            Arc::new(SchemaManager::new()),
6352            crate::ProvenanceMode::Warn,
6353            Arc::new(crate::TelemetryCounters::default()),
6354        )
6355        .expect("writer");
6356        let error = writer
6357            .submit(crate::WriteRequest {
6358                label: "disabled-operational".to_owned(),
6359                nodes: vec![],
6360                node_retires: vec![],
6361                edges: vec![],
6362                edge_retires: vec![],
6363                chunks: vec![],
6364                runs: vec![],
6365                steps: vec![],
6366                actions: vec![],
6367                optional_backfills: vec![],
6368                vec_inserts: vec![],
6369                operational_writes: vec![crate::OperationalWrite::Append {
6370                    collection: "audit_log".to_owned(),
6371                    record_key: "evt-1".to_owned(),
6372                    payload_json: r#"{"type":"sync"}"#.to_owned(),
6373                    source_ref: Some("src-1".to_owned()),
6374                }],
6375            })
6376            .expect_err("disabled collection should reject writes");
6377        assert!(matches!(error, EngineError::InvalidWrite(_)));
6378        assert!(error.to_string().contains("is disabled"));
6379
6380        let conn = sqlite::open_connection(db.path()).expect("conn");
6381        let provenance_count: i64 = conn
6382            .query_row(
6383                "SELECT count(*) FROM provenance_events \
6384                 WHERE event_type = 'operational_collection_disabled' AND subject = 'audit_log'",
6385                [],
6386                |row| row.get(0),
6387            )
6388            .expect("provenance count");
6389        assert_eq!(provenance_count, 1);
6390    }
6391
6392    #[test]
6393    fn purge_operational_collection_deletes_append_only_rows_before_cutoff() {
6394        let (db, service) = setup();
6395        {
6396            let conn = sqlite::open_connection(db.path()).expect("conn");
6397            conn.execute(
6398                "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6399                 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_all\"}', 1, 100)",
6400                [],
6401            )
6402            .expect("seed collection");
6403            conn.execute(
6404                "INSERT INTO operational_mutations \
6405                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6406                 VALUES ('evt-1', 'audit_log', 'evt-1', 'append', '{\"seq\":1}', 'src-1', 100, 1)",
6407                [],
6408            )
6409            .expect("seed event 1");
6410            conn.execute(
6411                "INSERT INTO operational_mutations \
6412                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6413                 VALUES ('evt-2', 'audit_log', 'evt-2', 'append', '{\"seq\":2}', 'src-2', 200, 2)",
6414                [],
6415            )
6416            .expect("seed event 2");
6417            conn.execute(
6418                "INSERT INTO operational_mutations \
6419                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6420                 VALUES ('evt-3', 'audit_log', 'evt-3', 'append', '{\"seq\":3}', 'src-3', 300, 3)",
6421                [],
6422            )
6423            .expect("seed event 3");
6424        }
6425
6426        let report = service
6427            .purge_operational_collection("audit_log", 250)
6428            .expect("purge collection");
6429        assert_eq!(report.collection_name, "audit_log");
6430        assert_eq!(report.deleted_mutations, 2);
6431        assert_eq!(report.before_timestamp, 250);
6432
6433        let conn = sqlite::open_connection(db.path()).expect("conn");
6434        let remaining: Vec<String> = {
6435            let mut stmt = conn
6436                .prepare(
6437                    "SELECT id FROM operational_mutations \
6438                     WHERE collection_name = 'audit_log' ORDER BY mutation_order",
6439                )
6440                .expect("stmt");
6441            stmt.query_map([], |row| row.get(0))
6442                .expect("rows")
6443                .collect::<Result<_, _>>()
6444                .expect("collect")
6445        };
6446        assert_eq!(remaining, vec!["evt-3".to_owned()]);
6447        let provenance_count: i64 = conn
6448            .query_row(
6449                "SELECT count(*) FROM provenance_events \
6450                 WHERE event_type = 'operational_collection_purged' AND subject = 'audit_log'",
6451                [],
6452                |row| row.get(0),
6453            )
6454            .expect("provenance count");
6455        assert_eq!(provenance_count, 1);
6456    }
6457
6458    #[test]
6459    fn compact_operational_collection_dry_run_reports_without_mutation() {
6460        let (db, service) = setup();
6461        {
6462            let conn = sqlite::open_connection(db.path()).expect("conn");
6463            conn.execute(
6464                "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6465                 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6466                [],
6467            )
6468            .expect("seed collection");
6469            for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
6470                conn.execute(
6471                    "INSERT INTO operational_mutations \
6472                     (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6473                     VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6474                    rusqlite::params![
6475                        format!("evt-{index}"),
6476                        format!("{{\"seq\":{index}}}"),
6477                        created_at,
6478                        index,
6479                    ],
6480                )
6481                .expect("seed event");
6482            }
6483        }
6484
6485        let report = service
6486            .compact_operational_collection("audit_log", true)
6487            .expect("compact collection");
6488        assert_eq!(report.collection_name, "audit_log");
6489        assert_eq!(report.deleted_mutations, 1);
6490        assert!(report.dry_run);
6491        assert_eq!(report.before_timestamp, None);
6492
6493        let conn = sqlite::open_connection(db.path()).expect("conn");
6494        let remaining_count: i64 = conn
6495            .query_row(
6496                "SELECT count(*) FROM operational_mutations WHERE collection_name = 'audit_log'",
6497                [],
6498                |row| row.get(0),
6499            )
6500            .expect("remaining count");
6501        assert_eq!(remaining_count, 3);
6502        let provenance_count: i64 = conn
6503            .query_row(
6504                "SELECT count(*) FROM provenance_events \
6505                 WHERE event_type = 'operational_collection_compacted' AND subject = 'audit_log'",
6506                [],
6507                |row| row.get(0),
6508            )
6509            .expect("provenance count");
6510        assert_eq!(provenance_count, 0);
6511    }
6512
6513    #[test]
6514    fn compact_operational_collection_keep_last_deletes_oldest_rows() {
6515        let (db, service) = setup();
6516        {
6517            let conn = sqlite::open_connection(db.path()).expect("conn");
6518            conn.execute(
6519                "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6520                 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6521                [],
6522            )
6523            .expect("seed collection");
6524            for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
6525                conn.execute(
6526                    "INSERT INTO operational_mutations \
6527                     (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6528                     VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6529                    rusqlite::params![
6530                        format!("evt-{index}"),
6531                        format!("{{\"seq\":{index}}}"),
6532                        created_at,
6533                        index,
6534                    ],
6535                )
6536                .expect("seed event");
6537            }
6538        }
6539
6540        let report = service
6541            .compact_operational_collection("audit_log", false)
6542            .expect("compact collection");
6543        assert_eq!(report.deleted_mutations, 1);
6544        assert!(!report.dry_run);
6545
6546        let conn = sqlite::open_connection(db.path()).expect("conn");
6547        let remaining: Vec<String> = {
6548            let mut stmt = conn
6549                .prepare(
6550                    "SELECT id FROM operational_mutations \
6551                     WHERE collection_name = 'audit_log' ORDER BY mutation_order",
6552                )
6553                .expect("stmt");
6554            stmt.query_map([], |row| row.get(0))
6555                .expect("rows")
6556                .collect::<Result<_, _>>()
6557                .expect("collect")
6558        };
6559        assert_eq!(remaining, vec!["evt-2".to_owned(), "evt-3".to_owned()]);
6560        let provenance_count: i64 = conn
6561            .query_row(
6562                "SELECT count(*) FROM provenance_events \
6563                 WHERE event_type = 'operational_collection_compacted' AND subject = 'audit_log'",
6564                [],
6565                |row| row.get(0),
6566            )
6567            .expect("provenance count");
6568        assert_eq!(provenance_count, 1);
6569    }
6570
6571    #[test]
6572    fn plan_and_run_operational_retention_keep_last() {
6573        let (db, service) = setup();
6574        {
6575            let conn = sqlite::open_connection(db.path()).expect("conn");
6576            conn.execute(
6577                "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6578                 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6579                [],
6580            )
6581            .expect("seed collection");
6582            for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
6583                conn.execute(
6584                    "INSERT INTO operational_mutations \
6585                     (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6586                     VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6587                    rusqlite::params![
6588                        format!("evt-{index}"),
6589                        format!("{{\"seq\":{index}}}"),
6590                        created_at,
6591                        index,
6592                    ],
6593                )
6594                .expect("seed event");
6595            }
6596        }
6597
6598        let plan = service
6599            .plan_operational_retention(1_000, None, Some(10))
6600            .expect("plan retention");
6601        assert_eq!(plan.collections_examined, 1);
6602        assert_eq!(plan.items[0].collection_name, "audit_log");
6603        assert_eq!(
6604            plan.items[0].action_kind,
6605            crate::operational::OperationalRetentionActionKind::KeepLast
6606        );
6607        assert_eq!(plan.items[0].candidate_deletions, 1);
6608        assert_eq!(plan.items[0].max_rows, Some(2));
6609        assert_eq!(plan.items[0].last_run_at, None);
6610
6611        let dry_run = service
6612            .run_operational_retention(1_000, None, Some(10), true)
6613            .expect("dry-run retention");
6614        assert!(dry_run.dry_run);
6615        assert_eq!(dry_run.collections_acted_on, 1);
6616        assert_eq!(dry_run.items[0].deleted_mutations, 1);
6617        assert_eq!(dry_run.items[0].rows_remaining, 2);
6618
6619        let conn = sqlite::open_connection(db.path()).expect("conn");
6620        let remaining_count: i64 = conn
6621            .query_row(
6622                "SELECT count(*) FROM operational_mutations WHERE collection_name = 'audit_log'",
6623                [],
6624                |row| row.get(0),
6625            )
6626            .expect("remaining count after dry run");
6627        assert_eq!(remaining_count, 3);
6628        let retention_run_count: i64 = conn
6629            .query_row(
6630                "SELECT count(*) FROM operational_retention_runs WHERE collection_name = 'audit_log'",
6631                [],
6632                |row| row.get(0),
6633            )
6634            .expect("retention run count");
6635        assert_eq!(retention_run_count, 0);
6636        drop(conn);
6637
6638        let executed = service
6639            .run_operational_retention(1_000, None, Some(10), false)
6640            .expect("execute retention");
6641        assert_eq!(executed.collections_acted_on, 1);
6642        assert_eq!(executed.items[0].deleted_mutations, 1);
6643        assert_eq!(executed.items[0].rows_remaining, 2);
6644
6645        let conn = sqlite::open_connection(db.path()).expect("conn");
6646        let remaining: Vec<String> = {
6647            let mut stmt = conn
6648                .prepare(
6649                    "SELECT id FROM operational_mutations \
6650                     WHERE collection_name = 'audit_log' ORDER BY mutation_order",
6651                )
6652                .expect("stmt");
6653            stmt.query_map([], |row| row.get(0))
6654                .expect("rows")
6655                .collect::<Result<_, _>>()
6656                .expect("collect")
6657        };
6658        assert_eq!(remaining, vec!["evt-2".to_owned(), "evt-3".to_owned()]);
6659        let last_run_at: i64 = conn
6660            .query_row(
6661                "SELECT executed_at FROM operational_retention_runs \
6662                 WHERE collection_name = 'audit_log' ORDER BY executed_at DESC LIMIT 1",
6663                [],
6664                |row| row.get(0),
6665            )
6666            .expect("last run at");
6667        assert_eq!(last_run_at, 1_000);
6668    }
6669
6670    #[test]
6671    fn dry_run_operational_retention_does_not_mark_noop_collection_as_acted_on() {
6672        let (db, service) = setup();
6673        let conn = sqlite::open_connection(db.path()).expect("conn");
6674        conn.execute(
6675            "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6676             VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6677            [],
6678        )
6679        .expect("seed collection");
6680        for (index, created_at) in [(1_i64, 100_i64), (2, 200)] {
6681            conn.execute(
6682                "INSERT INTO operational_mutations \
6683                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6684                 VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6685                rusqlite::params![
6686                    format!("evt-{index}"),
6687                    format!("{{\"seq\":{index}}}"),
6688                    created_at,
6689                    index,
6690                ],
6691            )
6692            .expect("seed event");
6693        }
6694        drop(conn);
6695
6696        let dry_run = service
6697            .run_operational_retention(1_000, None, Some(10), true)
6698            .expect("dry-run retention");
6699        assert!(dry_run.dry_run);
6700        assert_eq!(dry_run.collections_acted_on, 0);
6701        assert_eq!(dry_run.items[0].deleted_mutations, 0);
6702        assert_eq!(dry_run.items[0].rows_remaining, 2);
6703    }
6704
6705    #[test]
6706    fn compact_operational_collection_rejects_latest_state() {
6707        let (_db, service) = setup();
6708        service
6709            .register_operational_collection(&OperationalRegisterRequest {
6710                name: "connector_health".to_owned(),
6711                kind: OperationalCollectionKind::LatestState,
6712                schema_json: "{}".to_owned(),
6713                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6714                filter_fields_json: "[]".to_owned(),
6715                validation_json: String::new(),
6716                secondary_indexes_json: "[]".to_owned(),
6717                format_version: 1,
6718            })
6719            .expect("register collection");
6720
6721        let error = service
6722            .compact_operational_collection("connector_health", false)
6723            .expect_err("latest_state compaction should be rejected");
6724        assert!(matches!(error, EngineError::InvalidWrite(_)));
6725        assert!(error.to_string().contains("append_only_log"));
6726    }
6727
6728    #[test]
6729    fn register_operational_collection_persists_filter_fields_json() {
6730        let (_db, service) = setup();
6731
6732        let record = service
6733            .register_operational_collection(&OperationalRegisterRequest {
6734                name: "audit_log".to_owned(),
6735                kind: OperationalCollectionKind::AppendOnlyLog,
6736                schema_json: "{}".to_owned(),
6737                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6738                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
6739                validation_json: String::new(),
6740                secondary_indexes_json: "[]".to_owned(),
6741                format_version: 1,
6742            })
6743            .expect("register collection");
6744
6745        assert_eq!(
6746            record.filter_fields_json,
6747            r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#
6748        );
6749    }
6750
6751    #[test]
6752    fn read_operational_collection_filters_append_only_rows_by_declared_fields() {
6753        let (db, service) = setup();
6754        service
6755            .register_operational_collection(&OperationalRegisterRequest {
6756                name: "audit_log".to_owned(),
6757                kind: OperationalCollectionKind::AppendOnlyLog,
6758                schema_json: "{}".to_owned(),
6759                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6760                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"seq","type":"integer","modes":["exact","range"]},{"name":"ts","type":"timestamp","modes":["exact","range"]}]"#.to_owned(),
6761                validation_json: String::new(),
6762                secondary_indexes_json: "[]".to_owned(),
6763                format_version: 1,
6764            })
6765            .expect("register collection");
6766        {
6767            let writer = crate::WriterActor::start(
6768                db.path(),
6769                Arc::new(SchemaManager::new()),
6770                crate::ProvenanceMode::Warn,
6771                Arc::new(crate::TelemetryCounters::default()),
6772            )
6773            .expect("writer");
6774            writer
6775                .submit(crate::WriteRequest {
6776                    label: "operational".to_owned(),
6777                    nodes: vec![],
6778                    node_retires: vec![],
6779                    edges: vec![],
6780                    edge_retires: vec![],
6781                    chunks: vec![],
6782                    runs: vec![],
6783                    steps: vec![],
6784                    actions: vec![],
6785                    optional_backfills: vec![],
6786                    vec_inserts: vec![],
6787                    operational_writes: vec![
6788                        crate::OperationalWrite::Append {
6789                            collection: "audit_log".to_owned(),
6790                            record_key: "evt-1".to_owned(),
6791                            payload_json: r#"{"actor":"alice","seq":1,"ts":100}"#.to_owned(),
6792                            source_ref: Some("src-1".to_owned()),
6793                        },
6794                        crate::OperationalWrite::Append {
6795                            collection: "audit_log".to_owned(),
6796                            record_key: "evt-2".to_owned(),
6797                            payload_json: r#"{"actor":"alice-admin","seq":2,"ts":200}"#.to_owned(),
6798                            source_ref: Some("src-2".to_owned()),
6799                        },
6800                        crate::OperationalWrite::Append {
6801                            collection: "audit_log".to_owned(),
6802                            record_key: "evt-3".to_owned(),
6803                            payload_json: r#"{"actor":"bob","seq":3,"ts":300}"#.to_owned(),
6804                            source_ref: Some("src-3".to_owned()),
6805                        },
6806                    ],
6807                })
6808                .expect("write");
6809        }
6810
6811        let report = service
6812            .read_operational_collection(&crate::operational::OperationalReadRequest {
6813                collection_name: "audit_log".to_owned(),
6814                filters: vec![
6815                    crate::operational::OperationalFilterClause::Prefix {
6816                        field: "actor".to_owned(),
6817                        value: "alice".to_owned(),
6818                    },
6819                    crate::operational::OperationalFilterClause::Range {
6820                        field: "ts".to_owned(),
6821                        lower: Some(150),
6822                        upper: Some(250),
6823                    },
6824                ],
6825                limit: Some(10),
6826            })
6827            .expect("filtered read");
6828
6829        assert_eq!(report.collection_name, "audit_log");
6830        assert_eq!(report.row_count, 1);
6831        assert!(!report.was_limited);
6832        assert_eq!(report.rows.len(), 1);
6833        assert_eq!(report.rows[0].record_key, "evt-2");
6834        assert_eq!(
6835            report.rows[0].payload_json,
6836            r#"{"actor":"alice-admin","seq":2,"ts":200}"#
6837        );
6838    }
6839
6840    #[test]
6841    fn read_operational_collection_uses_secondary_index_when_filter_values_are_missing() {
6842        let (db, service) = setup();
6843        service
6844            .register_operational_collection(&OperationalRegisterRequest {
6845                name: "audit_log".to_owned(),
6846                kind: OperationalCollectionKind::AppendOnlyLog,
6847                schema_json: "{}".to_owned(),
6848                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6849                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
6850                validation_json: String::new(),
6851                secondary_indexes_json: r#"[{"name":"actor_ts","kind":"append_only_field_time","field":"actor","value_type":"string","time_field":"ts"}]"#.to_owned(),
6852                format_version: 1,
6853            })
6854            .expect("register collection");
6855        {
6856            let writer = crate::WriterActor::start(
6857                db.path(),
6858                Arc::new(SchemaManager::new()),
6859                crate::ProvenanceMode::Warn,
6860                Arc::new(crate::TelemetryCounters::default()),
6861            )
6862            .expect("writer");
6863            writer
6864                .submit(crate::WriteRequest {
6865                    label: "operational".to_owned(),
6866                    nodes: vec![],
6867                    node_retires: vec![],
6868                    edges: vec![],
6869                    edge_retires: vec![],
6870                    chunks: vec![],
6871                    runs: vec![],
6872                    steps: vec![],
6873                    actions: vec![],
6874                    optional_backfills: vec![],
6875                    vec_inserts: vec![],
6876                    operational_writes: vec![
6877                        crate::OperationalWrite::Append {
6878                            collection: "audit_log".to_owned(),
6879                            record_key: "evt-1".to_owned(),
6880                            payload_json: r#"{"actor":"alice","ts":100}"#.to_owned(),
6881                            source_ref: Some("src-1".to_owned()),
6882                        },
6883                        crate::OperationalWrite::Append {
6884                            collection: "audit_log".to_owned(),
6885                            record_key: "evt-2".to_owned(),
6886                            payload_json: r#"{"actor":"alice-admin","ts":200}"#.to_owned(),
6887                            source_ref: Some("src-2".to_owned()),
6888                        },
6889                    ],
6890                })
6891                .expect("write");
6892        }
6893        let conn = sqlite::open_connection(db.path()).expect("conn");
6894        conn.execute(
6895            "DELETE FROM operational_filter_values WHERE collection_name = 'audit_log'",
6896            [],
6897        )
6898        .expect("clear filter values");
6899        drop(conn);
6900
6901        let report = service
6902            .read_operational_collection(&crate::operational::OperationalReadRequest {
6903                collection_name: "audit_log".to_owned(),
6904                filters: vec![
6905                    crate::operational::OperationalFilterClause::Prefix {
6906                        field: "actor".to_owned(),
6907                        value: "alice".to_owned(),
6908                    },
6909                    crate::operational::OperationalFilterClause::Range {
6910                        field: "ts".to_owned(),
6911                        lower: Some(150),
6912                        upper: Some(250),
6913                    },
6914                ],
6915                limit: Some(10),
6916            })
6917            .expect("secondary-index read");
6918
6919        assert_eq!(report.row_count, 1);
6920        assert_eq!(report.rows[0].record_key, "evt-2");
6921    }
6922
6923    #[test]
6924    fn read_operational_collection_rejects_undeclared_fields_and_latest_state_collections() {
6925        let (_db, service) = setup();
6926        service
6927            .register_operational_collection(&OperationalRegisterRequest {
6928                name: "connector_health".to_owned(),
6929                kind: OperationalCollectionKind::LatestState,
6930                schema_json: "{}".to_owned(),
6931                retention_json: "{}".to_owned(),
6932                filter_fields_json: r#"[{"name":"status","type":"string","modes":["exact"]}]"#
6933                    .to_owned(),
6934                validation_json: String::new(),
6935                secondary_indexes_json: "[]".to_owned(),
6936                format_version: 1,
6937            })
6938            .expect("register collection");
6939
6940        let latest_state_error = service
6941            .read_operational_collection(&crate::operational::OperationalReadRequest {
6942                collection_name: "connector_health".to_owned(),
6943                filters: vec![crate::operational::OperationalFilterClause::Exact {
6944                    field: "status".to_owned(),
6945                    value: crate::operational::OperationalFilterValue::String("ok".to_owned()),
6946                }],
6947                limit: Some(10),
6948            })
6949            .expect_err("latest_state filtered reads should be rejected");
6950        assert!(latest_state_error.to_string().contains("append_only_log"));
6951
6952        service
6953            .register_operational_collection(&OperationalRegisterRequest {
6954                name: "audit_log".to_owned(),
6955                kind: OperationalCollectionKind::AppendOnlyLog,
6956                schema_json: "{}".to_owned(),
6957                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6958                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact"]}]"#
6959                    .to_owned(),
6960                validation_json: String::new(),
6961                secondary_indexes_json: "[]".to_owned(),
6962                format_version: 1,
6963            })
6964            .expect("register append-only collection");
6965
6966        let undeclared_error = service
6967            .read_operational_collection(&crate::operational::OperationalReadRequest {
6968                collection_name: "audit_log".to_owned(),
6969                filters: vec![crate::operational::OperationalFilterClause::Exact {
6970                    field: "missing".to_owned(),
6971                    value: crate::operational::OperationalFilterValue::String("x".to_owned()),
6972                }],
6973                limit: Some(10),
6974            })
6975            .expect_err("undeclared field should be rejected");
6976        assert!(undeclared_error.to_string().contains("undeclared"));
6977    }
6978
6979    #[test]
6980    fn read_operational_collection_applies_limit_and_reports_truncation() {
6981        let (db, service) = setup();
6982        service
6983            .register_operational_collection(&OperationalRegisterRequest {
6984                name: "audit_log".to_owned(),
6985                kind: OperationalCollectionKind::AppendOnlyLog,
6986                schema_json: "{}".to_owned(),
6987                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6988                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["prefix"]}]"#
6989                    .to_owned(),
6990                validation_json: String::new(),
6991                secondary_indexes_json: "[]".to_owned(),
6992                format_version: 1,
6993            })
6994            .expect("register collection");
6995        {
6996            let writer = crate::WriterActor::start(
6997                db.path(),
6998                Arc::new(SchemaManager::new()),
6999                crate::ProvenanceMode::Warn,
7000                Arc::new(crate::TelemetryCounters::default()),
7001            )
7002            .expect("writer");
7003            writer
7004                .submit(crate::WriteRequest {
7005                    label: "operational".to_owned(),
7006                    nodes: vec![],
7007                    node_retires: vec![],
7008                    edges: vec![],
7009                    edge_retires: vec![],
7010                    chunks: vec![],
7011                    runs: vec![],
7012                    steps: vec![],
7013                    actions: vec![],
7014                    optional_backfills: vec![],
7015                    vec_inserts: vec![],
7016                    operational_writes: vec![
7017                        crate::OperationalWrite::Append {
7018                            collection: "audit_log".to_owned(),
7019                            record_key: "evt-1".to_owned(),
7020                            payload_json: r#"{"actor":"alice-1"}"#.to_owned(),
7021                            source_ref: Some("src-1".to_owned()),
7022                        },
7023                        crate::OperationalWrite::Append {
7024                            collection: "audit_log".to_owned(),
7025                            record_key: "evt-2".to_owned(),
7026                            payload_json: r#"{"actor":"alice-2"}"#.to_owned(),
7027                            source_ref: Some("src-2".to_owned()),
7028                        },
7029                    ],
7030                })
7031                .expect("write");
7032        }
7033
7034        let report = service
7035            .read_operational_collection(&crate::operational::OperationalReadRequest {
7036                collection_name: "audit_log".to_owned(),
7037                filters: vec![crate::operational::OperationalFilterClause::Prefix {
7038                    field: "actor".to_owned(),
7039                    value: "alice".to_owned(),
7040                }],
7041                limit: Some(1),
7042            })
7043            .expect("limited read");
7044
7045        assert_eq!(report.row_count, 1);
7046        assert_eq!(report.applied_limit, 1);
7047        assert!(report.was_limited);
7048        assert_eq!(report.rows[0].record_key, "evt-2");
7049    }
7050
7051    #[test]
7052    fn preexisting_operational_collection_can_gain_filter_contract_after_upgrade() {
7053        let db = NamedTempFile::new().expect("temp db");
7054        let conn = sqlite::open_connection(db.path()).expect("conn");
7055        conn.execute_batch(
7056            r#"
7057            CREATE TABLE operational_collections (
7058                name TEXT PRIMARY KEY,
7059                kind TEXT NOT NULL,
7060                schema_json TEXT NOT NULL,
7061                retention_json TEXT NOT NULL,
7062                format_version INTEGER NOT NULL DEFAULT 1,
7063                created_at INTEGER NOT NULL DEFAULT 100,
7064                disabled_at INTEGER
7065            );
7066            CREATE TABLE operational_mutations (
7067                id TEXT PRIMARY KEY,
7068                collection_name TEXT NOT NULL,
7069                record_key TEXT NOT NULL,
7070                op_kind TEXT NOT NULL,
7071                payload_json TEXT NOT NULL,
7072                source_ref TEXT,
7073                created_at INTEGER NOT NULL DEFAULT 100,
7074                mutation_order INTEGER NOT NULL DEFAULT 1
7075            );
7076            INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at)
7077            VALUES ('audit_log', 'append_only_log', '{}', '{"mode":"keep_all"}', 1, 100);
7078            INSERT INTO operational_mutations
7079                (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order)
7080            VALUES
7081                ('evt-1', 'audit_log', 'evt-1', 'append', '{"actor":"alice","ts":0}', 'src-1', 100, 1);
7082            "#,
7083        )
7084        .expect("seed pre-v10 schema");
7085        drop(conn);
7086
7087        let service = AdminService::new(db.path(), Arc::new(SchemaManager::new()));
7088        let pre_update = service
7089            .read_operational_collection(&crate::operational::OperationalReadRequest {
7090                collection_name: "audit_log".to_owned(),
7091                filters: vec![crate::operational::OperationalFilterClause::Exact {
7092                    field: "actor".to_owned(),
7093                    value: crate::operational::OperationalFilterValue::String("alice".to_owned()),
7094                }],
7095                limit: Some(10),
7096            })
7097            .expect_err("read should reject undeclared fields before migration update");
7098        assert!(pre_update.to_string().contains("undeclared"));
7099
7100        let updated = service
7101            .update_operational_collection_filters(
7102                "audit_log",
7103                r#"[{"name":"actor","type":"string","modes":["exact"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#,
7104            )
7105            .expect("update filter contract");
7106        assert!(updated.filter_fields_json.contains("\"actor\""));
7107
7108        let report = service
7109            .read_operational_collection(&crate::operational::OperationalReadRequest {
7110                collection_name: "audit_log".to_owned(),
7111                filters: vec![crate::operational::OperationalFilterClause::Range {
7112                    field: "ts".to_owned(),
7113                    lower: Some(0),
7114                    upper: Some(0),
7115                }],
7116                limit: Some(10),
7117            })
7118            .expect("read after explicit filter update");
7119        assert_eq!(report.row_count, 1);
7120        assert_eq!(report.rows[0].record_key, "evt-1");
7121    }
7122
7123    #[cfg(feature = "sqlite-vec")]
7124    #[test]
7125    fn check_semantics_detects_stale_vec_rows() {
7126        use crate::sqlite::open_connection_with_vec;
7127
7128        let db = NamedTempFile::new().expect("temp file");
7129        let schema = Arc::new(SchemaManager::new());
7130        {
7131            let conn = open_connection_with_vec(db.path()).expect("vec conn");
7132            schema.bootstrap(&conn).expect("bootstrap");
7133            schema
7134                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 3)
7135                .expect("vec profile");
7136            // Insert a vec row whose chunk does not exist.
7137            let bytes: Vec<u8> = [0.1f32, 0.2f32, 0.3f32]
7138                .iter()
7139                .flat_map(|f| f.to_le_bytes())
7140                .collect();
7141            conn.execute(
7142                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('ghost-chunk', ?1)",
7143                rusqlite::params![bytes],
7144            )
7145            .expect("insert stale vec row");
7146        }
7147        let service = AdminService::new(db.path(), Arc::clone(&schema));
7148        let report = service.check_semantics().expect("semantics check");
7149        assert_eq!(report.stale_vec_rows, 1);
7150        assert!(
7151            report.warnings.iter().any(|w| w.contains("stale vec")),
7152            "warning must mention stale vec"
7153        );
7154    }
7155
7156    #[cfg(feature = "sqlite-vec")]
7157    #[test]
7158    fn restore_vector_profiles_recreates_vec_table_from_metadata() {
7159        let db = NamedTempFile::new().expect("temp file");
7160        let schema = Arc::new(SchemaManager::new());
7161        {
7162            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7163            schema.bootstrap(&conn).expect("bootstrap");
7164            conn.execute(
7165                "INSERT INTO vector_profiles (profile, table_name, dimension, enabled) \
7166                 VALUES ('default', 'vec_nodes_active', 3, 1)",
7167                [],
7168            )
7169            .expect("insert vector profile");
7170        }
7171
7172        let service = AdminService::new(db.path(), Arc::clone(&schema));
7173        let report = service
7174            .restore_vector_profiles()
7175            .expect("restore vector profiles");
7176        assert_eq!(
7177            report.targets,
7178            vec![crate::projection::ProjectionTarget::Vec]
7179        );
7180        assert_eq!(report.rebuilt_rows, 1);
7181
7182        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7183        let count: i64 = conn
7184            .query_row(
7185                "SELECT count(*) FROM sqlite_schema WHERE name = 'vec_nodes_active'",
7186                [],
7187                |row| row.get(0),
7188            )
7189            .expect("vec schema count");
7190        assert_eq!(count, 1, "vec table should exist after restore");
7191    }
7192
7193    #[cfg(feature = "sqlite-vec")]
7194    #[test]
7195    fn load_vector_regeneration_config_supports_json_and_toml() {
7196        let dir = tempfile::tempdir().expect("temp dir");
7197        let json_path = dir.path().join("regen.json");
7198        let toml_path = dir.path().join("regen.toml");
7199
7200        let config = VectorRegenerationConfig {
7201            profile: "default".to_owned(),
7202            table_name: "vec_nodes_active".to_owned(),
7203            model_identity: "model-a".to_owned(),
7204            model_version: "1.0".to_owned(),
7205            dimension: 4,
7206            normalization_policy: "l2".to_owned(),
7207            chunking_policy: "per_chunk".to_owned(),
7208            preprocessing_policy: "trim".to_owned(),
7209            generator_command: vec!["/bin/echo".to_owned()],
7210        };
7211
7212        fs::write(&json_path, serde_json::to_string(&config).expect("json")).expect("write json");
7213        fs::write(&toml_path, toml::to_string(&config).expect("toml")).expect("write toml");
7214
7215        let parsed_json = load_vector_regeneration_config(&json_path).expect("json parse");
7216        let parsed_toml = load_vector_regeneration_config(&toml_path).expect("toml parse");
7217
7218        assert_eq!(parsed_json, config);
7219        assert_eq!(parsed_toml, config);
7220    }
7221
7222    #[cfg(all(not(feature = "sqlite-vec"), unix))]
7223    #[test]
7224    fn regenerate_vector_embeddings_unsupported_vec_capability_writes_request_and_failed_audit() {
7225        let db = NamedTempFile::new().expect("temp file");
7226        let schema = Arc::new(SchemaManager::new());
7227        let temp_dir = tempfile::tempdir().expect("temp dir");
7228        let script_path = temp_dir.path().join("vector-generator-no-vec.sh");
7229
7230        fs::write(
7231            &script_path,
7232            r#"#!/usr/bin/env bash
7233set -euo pipefail
7234python3 -c 'import json, sys
7235payload = json.load(sys.stdin)
7236embeddings = [{"chunk_id": chunk["chunk_id"], "embedding": [1.0, 0.0, 0.0, 0.0]} for chunk in payload["chunks"]]
7237json.dump({"embeddings": embeddings}, sys.stdout)'
7238"#,
7239        )
7240        .expect("write generator script");
7241        set_file_mode(&script_path, 0o755);
7242
7243        {
7244            let conn = sqlite::open_connection(db.path()).expect("connection");
7245            schema.bootstrap(&conn).expect("bootstrap");
7246            conn.execute(
7247                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7248                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7249                [],
7250            )
7251            .expect("insert node");
7252            conn.execute(
7253                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7254                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7255                [],
7256            )
7257            .expect("insert chunk");
7258        }
7259
7260        let service = AdminService::new(db.path(), Arc::clone(&schema));
7261        let error = service
7262            .regenerate_vector_embeddings(&VectorRegenerationConfig {
7263                profile: "default".to_owned(),
7264                table_name: "vec_nodes_active".to_owned(),
7265                model_identity: "test-model".to_owned(),
7266                model_version: "1.0.0".to_owned(),
7267                dimension: 4,
7268                normalization_policy: "l2".to_owned(),
7269                chunking_policy: "per_chunk".to_owned(),
7270                preprocessing_policy: "trim".to_owned(),
7271                generator_command: vec![script_path.to_string_lossy().to_string()],
7272            })
7273            .expect_err("sqlite-vec capability should be required");
7274
7275        assert!(error.to_string().contains("unsupported vec capability"));
7276
7277        let conn = sqlite::open_connection(db.path()).expect("connection");
7278        let request_count: i64 = conn
7279            .query_row(
7280                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_requested' AND subject = 'default'",
7281                [],
7282                |row| row.get(0),
7283            )
7284            .expect("request count");
7285        assert_eq!(request_count, 1);
7286        let failed_count: i64 = conn
7287            .query_row(
7288                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7289                [],
7290                |row| row.get(0),
7291            )
7292            .expect("failed count");
7293        assert_eq!(failed_count, 1);
7294        let metadata_json: String = conn
7295            .query_row(
7296                "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7297                [],
7298                |row| row.get(0),
7299            )
7300            .expect("failed metadata");
7301        assert!(metadata_json.contains("\"failure_class\":\"unsupported vec capability\""));
7302    }
7303
7304    #[cfg(feature = "sqlite-vec")]
7305    #[test]
7306    fn regenerate_vector_embeddings_rebuilds_embeddings_from_generator() {
7307        let db = NamedTempFile::new().expect("temp file");
7308        let schema = Arc::new(SchemaManager::new());
7309        let temp_dir = tempfile::tempdir().expect("temp dir");
7310        let script_path = temp_dir.path().join("vector-generator.sh");
7311
7312        fs::write(
7313            &script_path,
7314            r#"#!/usr/bin/env bash
7315set -euo pipefail
7316python3 -c 'import json, sys
7317payload = json.load(sys.stdin)
7318embeddings = []
7319for chunk in payload["chunks"]:
7320    text = chunk["text_content"].lower()
7321    if "budget" in text:
7322        embedding = [1.0, 0.0, 0.0, 0.0]
7323    else:
7324        embedding = [0.0, 1.0, 0.0, 0.0]
7325    embeddings.append({"chunk_id": chunk["chunk_id"], "embedding": embedding})
7326json.dump({"embeddings": embeddings}, sys.stdout)'
7327"#,
7328        )
7329        .expect("write generator script");
7330        set_file_mode(&script_path, 0o755);
7331
7332        {
7333            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7334            schema.bootstrap(&conn).expect("bootstrap");
7335            conn.execute(
7336                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7337                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7338                [],
7339            )
7340            .expect("insert node");
7341            conn.execute(
7342                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7343                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7344                [],
7345            )
7346            .expect("insert chunk 1");
7347            conn.execute(
7348                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7349                 VALUES ('chunk-2', 'doc-1', 'travel plan', 101)",
7350                [],
7351            )
7352            .expect("insert chunk 2");
7353        }
7354
7355        let service = AdminService::new(db.path(), Arc::clone(&schema));
7356        let report = service
7357            .regenerate_vector_embeddings(&VectorRegenerationConfig {
7358                profile: "default".to_owned(),
7359                table_name: "vec_nodes_active".to_owned(),
7360                model_identity: "test-model".to_owned(),
7361                model_version: "1.0.0".to_owned(),
7362                dimension: 4,
7363                normalization_policy: "l2".to_owned(),
7364                chunking_policy: "per_chunk".to_owned(),
7365                preprocessing_policy: "trim".to_owned(),
7366                generator_command: vec![script_path.to_string_lossy().to_string()],
7367            })
7368            .expect("regenerate vectors");
7369
7370        assert_eq!(report.profile, "default");
7371        assert_eq!(report.table_name, "vec_nodes_active");
7372        assert_eq!(report.dimension, 4);
7373        assert_eq!(report.total_chunks, 2);
7374        assert_eq!(report.regenerated_rows, 2);
7375        assert!(report.contract_persisted);
7376
7377        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7378        let vec_count: i64 = conn
7379            .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
7380                row.get(0)
7381            })
7382            .expect("vec count");
7383        assert_eq!(vec_count, 2);
7384
7385        let contract_count: i64 = conn
7386            .query_row(
7387                "SELECT count(*) FROM vector_embedding_contracts WHERE profile = 'default'",
7388                [],
7389                |row| row.get(0),
7390            )
7391            .expect("contract count");
7392        assert_eq!(contract_count, 1);
7393        let applied_at: i64 = conn
7394            .query_row(
7395                "SELECT applied_at FROM vector_embedding_contracts WHERE profile = 'default'",
7396                [],
7397                |row| row.get(0),
7398            )
7399            .expect("applied_at");
7400        assert!(applied_at > 0);
7401        let snapshot_hash: String = conn
7402            .query_row(
7403                "SELECT snapshot_hash FROM vector_embedding_contracts WHERE profile = 'default'",
7404                [],
7405                |row| row.get(0),
7406            )
7407            .expect("snapshot_hash");
7408        assert!(!snapshot_hash.is_empty());
7409        let contract_format_version: i64 = conn
7410            .query_row(
7411                "SELECT contract_format_version FROM vector_embedding_contracts WHERE profile = 'default'",
7412                [],
7413                |row| row.get(0),
7414            )
7415            .expect("contract_format_version");
7416        assert_eq!(contract_format_version, 1);
7417        let request_count: i64 = conn
7418            .query_row(
7419                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_requested' AND subject = 'default'",
7420                [],
7421                |row| row.get(0),
7422            )
7423            .expect("request audit count");
7424        assert_eq!(request_count, 1);
7425        let apply_count: i64 = conn
7426            .query_row(
7427                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_apply' AND subject = 'default'",
7428                [],
7429                |row| row.get(0),
7430            )
7431            .expect("apply audit count");
7432        assert_eq!(apply_count, 1);
7433        let apply_metadata: String = conn
7434            .query_row(
7435                "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_apply' AND subject = 'default'",
7436                [],
7437                |row| row.get(0),
7438            )
7439            .expect("apply metadata");
7440        assert!(apply_metadata.contains("\"profile\":\"default\""));
7441        assert!(apply_metadata.contains("\"snapshot_hash\":"));
7442    }
7443
7444    #[cfg(feature = "sqlite-vec")]
7445    #[test]
7446    fn regenerate_vector_embeddings_failure_leaves_contract_and_vec_rows_unchanged() {
7447        let db = NamedTempFile::new().expect("temp file");
7448        let schema = Arc::new(SchemaManager::new());
7449        let temp_dir = tempfile::tempdir().expect("temp dir");
7450        let script_path = temp_dir.path().join("vector-generator-fail.sh");
7451
7452        fs::write(
7453            &script_path,
7454            "#!/usr/bin/env bash\nset -euo pipefail\necho 'generator boom' >&2\nexit 17\n",
7455        )
7456        .expect("write failing script");
7457        set_file_mode(&script_path, 0o755);
7458
7459        {
7460            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7461            schema.bootstrap(&conn).expect("bootstrap");
7462            conn.execute(
7463                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7464                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7465                [],
7466            )
7467            .expect("insert node");
7468            conn.execute(
7469                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7470                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7471                [],
7472            )
7473            .expect("insert chunk");
7474            schema
7475                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
7476                .expect("ensure vec profile");
7477            conn.execute(
7478                r"
7479                INSERT INTO vector_embedding_contracts (
7480                    profile,
7481                    table_name,
7482                    model_identity,
7483                    model_version,
7484                    dimension,
7485                    normalization_policy,
7486                    chunking_policy,
7487                    preprocessing_policy,
7488                    generator_command_json,
7489                    applied_at,
7490                    snapshot_hash
7491                ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)
7492                ",
7493                rusqlite::params![
7494                    "default",
7495                    "vec_nodes_active",
7496                    "old-model",
7497                    "0.9.0",
7498                    4,
7499                    "l2",
7500                    "per_chunk",
7501                    "trim",
7502                    "[\"/bin/echo\"]",
7503                    111,
7504                    "old-snapshot"
7505                ],
7506            )
7507            .expect("seed contract");
7508            conn.execute(
7509                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
7510                [],
7511            )
7512            .expect("seed vec row");
7513        }
7514
7515        let service = AdminService::new(db.path(), Arc::clone(&schema));
7516        let error = service
7517            .regenerate_vector_embeddings_with_policy(
7518                &VectorRegenerationConfig {
7519                    profile: "default".to_owned(),
7520                    table_name: "vec_nodes_active".to_owned(),
7521                    model_identity: "new-model".to_owned(),
7522                    model_version: "1.0.0".to_owned(),
7523                    dimension: 4,
7524                    normalization_policy: "l2".to_owned(),
7525                    chunking_policy: "per_chunk".to_owned(),
7526                    preprocessing_policy: "trim".to_owned(),
7527                    generator_command: vec![script_path.to_string_lossy().to_string()],
7528                },
7529                &VectorGeneratorPolicy::default(),
7530            )
7531            .expect_err("generator should fail");
7532
7533        assert!(error.to_string().contains("generator nonzero exit"));
7534
7535        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7536        let model_identity: String = conn
7537            .query_row(
7538                "SELECT model_identity FROM vector_embedding_contracts WHERE profile = 'default'",
7539                [],
7540                |row| row.get(0),
7541            )
7542            .expect("model identity");
7543        assert_eq!(model_identity, "old-model");
7544        let snapshot_hash: String = conn
7545            .query_row(
7546                "SELECT snapshot_hash FROM vector_embedding_contracts WHERE profile = 'default'",
7547                [],
7548                |row| row.get(0),
7549            )
7550            .expect("snapshot hash");
7551        assert_eq!(snapshot_hash, "old-snapshot");
7552        let vec_count: i64 = conn
7553            .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
7554                row.get(0)
7555            })
7556            .expect("vec count");
7557        assert_eq!(vec_count, 1);
7558        let failure_count: i64 = conn
7559            .query_row(
7560                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7561                [],
7562                |row| row.get(0),
7563            )
7564            .expect("failure count");
7565        assert_eq!(failure_count, 1);
7566        let failure_metadata: String = conn
7567            .query_row(
7568                "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7569                [],
7570                |row| row.get(0),
7571            )
7572            .expect("failure metadata");
7573        assert!(failure_metadata.contains("\"failure_class\":\"generator nonzero exit\""));
7574    }
7575
7576    #[cfg(feature = "sqlite-vec")]
7577    #[test]
7578    fn regenerate_vector_embeddings_snapshot_drift_is_retryable_and_non_mutating() {
7579        let db = NamedTempFile::new().expect("temp file");
7580        let schema = Arc::new(SchemaManager::new());
7581        let temp_dir = tempfile::tempdir().expect("temp dir");
7582        let script_path = temp_dir.path().join("vector-generator-drift.sh");
7583        let db_path = db.path().to_string_lossy().to_string();
7584
7585        fs::write(
7586            &script_path,
7587            format!(
7588                r#"#!/usr/bin/env bash
7589set -euo pipefail
7590python3 -c 'import json, sqlite3, sys
7591payload = json.load(sys.stdin)
7592conn = sqlite3.connect({db_path:?})
7593conn.execute("INSERT INTO chunks (id, node_logical_id, text_content, created_at) VALUES (?, ?, ?, ?)", ("chunk-2", "doc-1", "late arriving text", 101))
7594conn.commit()
7595conn.close()
7596embeddings = [{{"chunk_id": chunk["chunk_id"], "embedding": [1.0, 0.0, 0.0, 0.0]}} for chunk in payload["chunks"]]
7597json.dump({{"embeddings": embeddings}}, sys.stdout)'
7598"#,
7599            ),
7600        )
7601        .expect("write drift script");
7602        set_file_mode(&script_path, 0o755);
7603
7604        {
7605            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7606            schema.bootstrap(&conn).expect("bootstrap");
7607            conn.execute(
7608                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7609                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7610                [],
7611            )
7612            .expect("insert node");
7613            conn.execute(
7614                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7615                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7616                [],
7617            )
7618            .expect("insert chunk");
7619            schema
7620                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
7621                .expect("ensure vec profile");
7622        }
7623
7624        let service = AdminService::new(db.path(), Arc::clone(&schema));
7625        let error = service
7626            .regenerate_vector_embeddings_with_policy(
7627                &VectorRegenerationConfig {
7628                    profile: "default".to_owned(),
7629                    table_name: "vec_nodes_active".to_owned(),
7630                    model_identity: "test-model".to_owned(),
7631                    model_version: "1.0.0".to_owned(),
7632                    dimension: 4,
7633                    normalization_policy: "l2".to_owned(),
7634                    chunking_policy: "per_chunk".to_owned(),
7635                    preprocessing_policy: "trim".to_owned(),
7636                    generator_command: vec![script_path.to_string_lossy().to_string()],
7637                },
7638                &VectorGeneratorPolicy::default(),
7639            )
7640            .expect_err("snapshot drift should fail");
7641
7642        assert!(
7643            error
7644                .to_string()
7645                .contains("vector regeneration snapshot drift:")
7646        );
7647        assert!(error.to_string().contains("[retryable]"));
7648
7649        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7650        let contract_count: i64 = conn
7651            .query_row(
7652                "SELECT count(*) FROM vector_embedding_contracts",
7653                [],
7654                |row| row.get(0),
7655            )
7656            .expect("contract count");
7657        assert_eq!(contract_count, 0);
7658        let vec_count: i64 = conn
7659            .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
7660                row.get(0)
7661            })
7662            .expect("vec count");
7663        assert_eq!(vec_count, 0);
7664        let failure_count: i64 = conn
7665            .query_row(
7666                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7667                [],
7668                |row| row.get(0),
7669            )
7670            .expect("failure count");
7671        assert_eq!(failure_count, 1);
7672    }
7673
7674    #[cfg(feature = "sqlite-vec")]
7675    #[test]
7676    fn regenerate_vector_embeddings_times_out_and_kills_generator() {
7677        let (_db, service) = setup();
7678        let temp_dir = tempfile::tempdir().expect("temp dir");
7679        let script_path = temp_dir.path().join("vector-generator-timeout.sh");
7680
7681        fs::write(
7682            &script_path,
7683            "#!/usr/bin/env bash\nset -euo pipefail\nsleep 1\nprintf '{\"embeddings\":[]}'\n",
7684        )
7685        .expect("write timeout script");
7686        set_file_mode(&script_path, 0o755);
7687
7688        let error = service
7689            .regenerate_vector_embeddings_with_policy(
7690                &VectorRegenerationConfig {
7691                    profile: "default".to_owned(),
7692                    table_name: "vec_nodes_active".to_owned(),
7693                    model_identity: "model".to_owned(),
7694                    model_version: "1.0.0".to_owned(),
7695                    dimension: 4,
7696                    normalization_policy: "l2".to_owned(),
7697                    chunking_policy: "per_chunk".to_owned(),
7698                    preprocessing_policy: "trim".to_owned(),
7699                    generator_command: vec![script_path.to_string_lossy().to_string()],
7700                },
7701                &VectorGeneratorPolicy {
7702                    timeout_ms: 50,
7703                    max_stdout_bytes: 1024,
7704                    max_stderr_bytes: 1024,
7705                    max_input_bytes: 1024,
7706                    max_chunks: 10,
7707                    require_absolute_executable: true,
7708                    reject_world_writable_executable: true,
7709                    allowed_executable_roots: vec![],
7710                    preserve_env_vars: vec![],
7711                },
7712            )
7713            .expect_err("generator should time out");
7714        assert!(error.to_string().contains("generator timeout"));
7715    }
7716
7717    #[cfg(feature = "sqlite-vec")]
7718    #[test]
7719    fn regenerate_vector_embeddings_rejects_oversized_stdout() {
7720        let (_db, service) = setup();
7721        let temp_dir = tempfile::tempdir().expect("temp dir");
7722        let script_path = temp_dir.path().join("vector-generator-stdout.sh");
7723
7724        fs::write(
7725            &script_path,
7726            "#!/usr/bin/env bash\nset -euo pipefail\npython3 -c 'import sys; sys.stdout.write(\"x\" * 5000)'\n",
7727        )
7728        .expect("write stdout script");
7729        set_file_mode(&script_path, 0o755);
7730
7731        let error = service
7732            .regenerate_vector_embeddings_with_policy(
7733                &VectorRegenerationConfig {
7734                    profile: "default".to_owned(),
7735                    table_name: "vec_nodes_active".to_owned(),
7736                    model_identity: "model".to_owned(),
7737                    model_version: "1.0.0".to_owned(),
7738                    dimension: 4,
7739                    normalization_policy: "l2".to_owned(),
7740                    chunking_policy: "per_chunk".to_owned(),
7741                    preprocessing_policy: "trim".to_owned(),
7742                    generator_command: vec![script_path.to_string_lossy().to_string()],
7743                },
7744                &VectorGeneratorPolicy {
7745                    timeout_ms: 1000,
7746                    max_stdout_bytes: 128,
7747                    max_stderr_bytes: 1024,
7748                    max_input_bytes: 1024,
7749                    max_chunks: 10,
7750                    require_absolute_executable: true,
7751                    reject_world_writable_executable: true,
7752                    allowed_executable_roots: vec![],
7753                    preserve_env_vars: vec![],
7754                },
7755            )
7756            .expect_err("generator stdout should overflow");
7757        assert!(error.to_string().contains("stdout overflow"));
7758    }
7759
7760    #[cfg(feature = "sqlite-vec")]
7761    #[test]
7762    fn regenerate_vector_embeddings_rejects_oversized_stderr() {
7763        let (_db, service) = setup();
7764        let temp_dir = tempfile::tempdir().expect("temp dir");
7765        let script_path = temp_dir.path().join("vector-generator-stderr.sh");
7766
7767        fs::write(
7768            &script_path,
7769            "#!/usr/bin/env bash\nset -euo pipefail\npython3 -c 'import sys; sys.stderr.write(\"e\" * 5000); sys.exit(7)'\n",
7770        )
7771        .expect("write stderr script");
7772        set_file_mode(&script_path, 0o755);
7773
7774        let error = service
7775            .regenerate_vector_embeddings_with_policy(
7776                &VectorRegenerationConfig {
7777                    profile: "default".to_owned(),
7778                    table_name: "vec_nodes_active".to_owned(),
7779                    model_identity: "model".to_owned(),
7780                    model_version: "1.0.0".to_owned(),
7781                    dimension: 4,
7782                    normalization_policy: "l2".to_owned(),
7783                    chunking_policy: "per_chunk".to_owned(),
7784                    preprocessing_policy: "trim".to_owned(),
7785                    generator_command: vec![script_path.to_string_lossy().to_string()],
7786                },
7787                &VectorGeneratorPolicy {
7788                    timeout_ms: 1000,
7789                    max_stdout_bytes: 1024,
7790                    max_stderr_bytes: 128,
7791                    max_input_bytes: 1024,
7792                    max_chunks: 10,
7793                    require_absolute_executable: true,
7794                    reject_world_writable_executable: true,
7795                    allowed_executable_roots: vec![],
7796                    preserve_env_vars: vec![],
7797                },
7798            )
7799            .expect_err("generator stderr should overflow");
7800        assert!(error.to_string().contains("stderr overflow"));
7801    }
7802
7803    #[cfg(feature = "sqlite-vec")]
7804    #[test]
7805    fn regenerate_vector_embeddings_rejects_oversized_input_before_spawn() {
7806        let db = NamedTempFile::new().expect("temp file");
7807        let schema = Arc::new(SchemaManager::new());
7808        {
7809            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7810            schema.bootstrap(&conn).expect("bootstrap");
7811            conn.execute(
7812                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7813                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7814                [],
7815            )
7816            .expect("insert node");
7817            conn.execute(
7818                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7819                 VALUES ('chunk-1', 'doc-1', 'this chunk is intentionally long to exceed the configured input limit', 100)",
7820                [],
7821            )
7822            .expect("insert chunk");
7823        }
7824
7825        let service = AdminService::new(db.path(), Arc::clone(&schema));
7826        let error = service
7827            .regenerate_vector_embeddings_with_policy(
7828                &VectorRegenerationConfig {
7829                    profile: "default".to_owned(),
7830                    table_name: "vec_nodes_active".to_owned(),
7831                    model_identity: "model".to_owned(),
7832                    model_version: "1.0.0".to_owned(),
7833                    dimension: 4,
7834                    normalization_policy: "l2".to_owned(),
7835                    chunking_policy: "per_chunk".to_owned(),
7836                    preprocessing_policy: "trim".to_owned(),
7837                    generator_command: vec!["/bin/echo".to_owned()],
7838                },
7839                &VectorGeneratorPolicy {
7840                    timeout_ms: 1000,
7841                    max_stdout_bytes: 1024,
7842                    max_stderr_bytes: 1024,
7843                    max_input_bytes: 32,
7844                    max_chunks: 10,
7845                    require_absolute_executable: true,
7846                    reject_world_writable_executable: true,
7847                    allowed_executable_roots: vec![],
7848                    preserve_env_vars: vec![],
7849                },
7850            )
7851            .expect_err("input size should be rejected before spawn");
7852        assert!(error.to_string().contains("payload too large"));
7853    }
7854
7855    #[cfg(feature = "sqlite-vec")]
7856    #[test]
7857    fn regenerate_vector_embeddings_rejects_excessive_chunk_count_before_spawn() {
7858        let db = NamedTempFile::new().expect("temp file");
7859        let schema = Arc::new(SchemaManager::new());
7860        {
7861            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7862            schema.bootstrap(&conn).expect("bootstrap");
7863            conn.execute(
7864                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7865                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7866                [],
7867            )
7868            .expect("insert node");
7869            conn.execute(
7870                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) VALUES ('chunk-1', 'doc-1', 'a', 100)",
7871                [],
7872            )
7873            .expect("insert chunk 1");
7874            conn.execute(
7875                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) VALUES ('chunk-2', 'doc-1', 'b', 101)",
7876                [],
7877            )
7878            .expect("insert chunk 2");
7879        }
7880
7881        let service = AdminService::new(db.path(), Arc::clone(&schema));
7882        let error = service
7883            .regenerate_vector_embeddings_with_policy(
7884                &VectorRegenerationConfig {
7885                    profile: "default".to_owned(),
7886                    table_name: "vec_nodes_active".to_owned(),
7887                    model_identity: "model".to_owned(),
7888                    model_version: "1.0.0".to_owned(),
7889                    dimension: 4,
7890                    normalization_policy: "l2".to_owned(),
7891                    chunking_policy: "per_chunk".to_owned(),
7892                    preprocessing_policy: "trim".to_owned(),
7893                    generator_command: vec!["/bin/echo".to_owned()],
7894                },
7895                &VectorGeneratorPolicy {
7896                    timeout_ms: 1000,
7897                    max_stdout_bytes: 1024,
7898                    max_stderr_bytes: 1024,
7899                    max_input_bytes: 2048,
7900                    max_chunks: 1,
7901                    require_absolute_executable: true,
7902                    reject_world_writable_executable: true,
7903                    allowed_executable_roots: vec![],
7904                    preserve_env_vars: vec![],
7905                },
7906            )
7907            .expect_err("chunk count should be rejected before spawn");
7908        assert!(error.to_string().contains("payload too large"));
7909    }
7910
7911    #[cfg(feature = "sqlite-vec")]
7912    #[test]
7913    fn regenerate_vector_embeddings_malformed_json_leaves_contract_and_vec_rows_unchanged() {
7914        let db = NamedTempFile::new().expect("temp file");
7915        let schema = Arc::new(SchemaManager::new());
7916        let temp_dir = tempfile::tempdir().expect("temp dir");
7917        let script_path = temp_dir.path().join("vector-generator-bad-json.sh");
7918
7919        fs::write(
7920            &script_path,
7921            "#!/usr/bin/env bash\nset -euo pipefail\nprintf 'not-json'\n",
7922        )
7923        .expect("write bad json script");
7924        set_file_mode(&script_path, 0o755);
7925
7926        {
7927            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7928            schema.bootstrap(&conn).expect("bootstrap");
7929            conn.execute(
7930                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7931                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7932                [],
7933            )
7934            .expect("insert node");
7935            conn.execute(
7936                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7937                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7938                [],
7939            )
7940            .expect("insert chunk");
7941            schema
7942                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
7943                .expect("ensure vec profile");
7944            conn.execute(
7945                r"
7946                INSERT INTO vector_embedding_contracts (
7947                    profile,
7948                    table_name,
7949                    model_identity,
7950                    model_version,
7951                    dimension,
7952                    normalization_policy,
7953                    chunking_policy,
7954                    preprocessing_policy,
7955                    generator_command_json,
7956                    applied_at,
7957                    snapshot_hash
7958                ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)
7959                ",
7960                rusqlite::params![
7961                    "default",
7962                    "vec_nodes_active",
7963                    "old-model",
7964                    "0.9.0",
7965                    4,
7966                    "l2",
7967                    "per_chunk",
7968                    "trim",
7969                    "[\"/bin/echo\"]",
7970                    111,
7971                    "old-snapshot"
7972                ],
7973            )
7974            .expect("seed contract");
7975            conn.execute(
7976                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
7977                [],
7978            )
7979            .expect("seed vec row");
7980        }
7981
7982        let service = AdminService::new(db.path(), Arc::clone(&schema));
7983        let error = service
7984            .regenerate_vector_embeddings_with_policy(
7985                &VectorRegenerationConfig {
7986                    profile: "default".to_owned(),
7987                    table_name: "vec_nodes_active".to_owned(),
7988                    model_identity: "new-model".to_owned(),
7989                    model_version: "1.0.0".to_owned(),
7990                    dimension: 4,
7991                    normalization_policy: "l2".to_owned(),
7992                    chunking_policy: "per_chunk".to_owned(),
7993                    preprocessing_policy: "trim".to_owned(),
7994                    generator_command: vec![script_path.to_string_lossy().to_string()],
7995                },
7996                &VectorGeneratorPolicy::default(),
7997            )
7998            .expect_err("bad json should fail");
7999
8000        assert!(error.to_string().contains("decode generator output"));
8001
8002        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8003        let model_identity: String = conn
8004            .query_row(
8005                "SELECT model_identity FROM vector_embedding_contracts WHERE profile = 'default'",
8006                [],
8007                |row| row.get(0),
8008            )
8009            .expect("model identity");
8010        assert_eq!(model_identity, "old-model");
8011        let vec_count: i64 = conn
8012            .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
8013                row.get(0)
8014            })
8015            .expect("vec count");
8016        assert_eq!(vec_count, 1);
8017        let failure_count: i64 = conn
8018            .query_row(
8019                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
8020                [],
8021                |row| row.get(0),
8022            )
8023            .expect("failure count");
8024        assert_eq!(failure_count, 1);
8025    }
8026
8027    #[cfg(feature = "sqlite-vec")]
8028    #[test]
8029    fn regenerate_vector_embeddings_rejects_whitespace_only_profile_before_mutation() {
8030        let db = NamedTempFile::new().expect("temp file");
8031        let schema = Arc::new(SchemaManager::new());
8032        {
8033            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8034            schema.bootstrap(&conn).expect("bootstrap");
8035            conn.execute(
8036                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8037                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
8038                [],
8039            )
8040            .expect("insert node");
8041            conn.execute(
8042                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8043                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
8044                [],
8045            )
8046            .expect("insert chunk");
8047        }
8048
8049        let service = AdminService::new(db.path(), Arc::clone(&schema));
8050        let error = service
8051            .regenerate_vector_embeddings(&VectorRegenerationConfig {
8052                profile: "   ".to_owned(),
8053                table_name: "vec_nodes_active".to_owned(),
8054                model_identity: "test-model".to_owned(),
8055                model_version: "1.0.0".to_owned(),
8056                dimension: 4,
8057                normalization_policy: "l2".to_owned(),
8058                chunking_policy: "per_chunk".to_owned(),
8059                preprocessing_policy: "trim".to_owned(),
8060                generator_command: vec!["/bin/echo".to_owned()],
8061            })
8062            .expect_err("whitespace profile should be rejected");
8063
8064        assert!(error.to_string().contains("invalid contract"));
8065        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8066        let contract_count: i64 = conn
8067            .query_row(
8068                "SELECT count(*) FROM vector_embedding_contracts",
8069                [],
8070                |row| row.get(0),
8071            )
8072            .expect("contract count");
8073        assert_eq!(contract_count, 0);
8074        let provenance_count: i64 = conn
8075            .query_row("SELECT count(*) FROM provenance_events", [], |row| {
8076                row.get(0)
8077            })
8078            .expect("provenance count");
8079        assert_eq!(provenance_count, 0);
8080    }
8081
8082    #[cfg(feature = "sqlite-vec")]
8083    #[test]
8084    fn regenerate_vector_embeddings_rejects_world_writable_executable_when_policy_requires_it() {
8085        let (_db, service) = setup();
8086        let temp_dir = tempfile::tempdir().expect("temp dir");
8087        let script_path = temp_dir.path().join("vector-generator-world-writable.sh");
8088
8089        fs::write(
8090            &script_path,
8091            "#!/usr/bin/env bash\nset -euo pipefail\nprintf '{\"embeddings\":[]}'\n",
8092        )
8093        .expect("write script");
8094        set_file_mode(&script_path, 0o777);
8095
8096        let error = service
8097            .regenerate_vector_embeddings_with_policy(
8098                &VectorRegenerationConfig {
8099                    profile: "default".to_owned(),
8100                    table_name: "vec_nodes_active".to_owned(),
8101                    model_identity: "model".to_owned(),
8102                    model_version: "1.0.0".to_owned(),
8103                    dimension: 4,
8104                    normalization_policy: "l2".to_owned(),
8105                    chunking_policy: "per_chunk".to_owned(),
8106                    preprocessing_policy: "trim".to_owned(),
8107                    generator_command: vec![script_path.to_string_lossy().to_string()],
8108                },
8109                &VectorGeneratorPolicy::default(),
8110            )
8111            .expect_err("world-writable executable should be rejected");
8112
8113        assert!(error.to_string().contains("world-writable executable"));
8114    }
8115
8116    #[cfg(feature = "sqlite-vec")]
8117    #[test]
8118    fn regenerate_vector_embeddings_rejects_executable_outside_allowlisted_roots() {
8119        let (_db, service) = setup();
8120        let temp_dir = tempfile::tempdir().expect("temp dir");
8121        let allowed_dir = tempfile::tempdir().expect("allowed dir");
8122        let script_path = temp_dir.path().join("vector-generator-outside-root.sh");
8123
8124        fs::write(
8125            &script_path,
8126            "#!/usr/bin/env bash\nset -euo pipefail\nprintf '{\"embeddings\":[]}'\n",
8127        )
8128        .expect("write script");
8129        set_file_mode(&script_path, 0o755);
8130
8131        let error = service
8132            .regenerate_vector_embeddings_with_policy(
8133                &VectorRegenerationConfig {
8134                    profile: "default".to_owned(),
8135                    table_name: "vec_nodes_active".to_owned(),
8136                    model_identity: "model".to_owned(),
8137                    model_version: "1.0.0".to_owned(),
8138                    dimension: 4,
8139                    normalization_policy: "l2".to_owned(),
8140                    chunking_policy: "per_chunk".to_owned(),
8141                    preprocessing_policy: "trim".to_owned(),
8142                    generator_command: vec![script_path.to_string_lossy().to_string()],
8143                },
8144                &VectorGeneratorPolicy {
8145                    timeout_ms: 1000,
8146                    max_stdout_bytes: 1024,
8147                    max_stderr_bytes: 1024,
8148                    max_input_bytes: 1024,
8149                    max_chunks: 10,
8150                    require_absolute_executable: true,
8151                    reject_world_writable_executable: true,
8152                    allowed_executable_roots: vec![
8153                        allowed_dir.path().to_string_lossy().to_string(),
8154                    ],
8155                    preserve_env_vars: vec![],
8156                },
8157            )
8158            .expect_err("disallowed root should be rejected");
8159
8160        assert!(
8161            error
8162                .to_string()
8163                .contains("outside allowed executable roots")
8164        );
8165    }
8166
8167    #[cfg(feature = "sqlite-vec")]
8168    #[test]
8169    fn regenerate_vector_embeddings_rejects_future_contract_format_version() {
8170        let db = NamedTempFile::new().expect("temp file");
8171        let schema = Arc::new(SchemaManager::new());
8172        {
8173            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8174            schema.bootstrap(&conn).expect("bootstrap");
8175            conn.execute(
8176                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8177                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
8178                [],
8179            )
8180            .expect("insert node");
8181            conn.execute(
8182                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8183                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
8184                [],
8185            )
8186            .expect("insert chunk");
8187            conn.execute(
8188                r"
8189                INSERT INTO vector_embedding_contracts (
8190                    profile,
8191                    table_name,
8192                    model_identity,
8193                    model_version,
8194                    dimension,
8195                    normalization_policy,
8196                    chunking_policy,
8197                    preprocessing_policy,
8198                    generator_command_json,
8199                    applied_at,
8200                    snapshot_hash,
8201                    contract_format_version,
8202                    updated_at
8203                ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13)
8204                ",
8205                rusqlite::params![
8206                    "default",
8207                    "vec_nodes_active",
8208                    "old-model",
8209                    "0.9.0",
8210                    4,
8211                    "l2",
8212                    "per_chunk",
8213                    "trim",
8214                    "[\"/bin/echo\"]",
8215                    111,
8216                    "old-snapshot",
8217                    99,
8218                    111,
8219                ],
8220            )
8221            .expect("seed future contract");
8222        }
8223
8224        let service = AdminService::new(db.path(), Arc::clone(&schema));
8225        let error = service
8226            .regenerate_vector_embeddings(&VectorRegenerationConfig {
8227                profile: "default".to_owned(),
8228                table_name: "vec_nodes_active".to_owned(),
8229                model_identity: "test-model".to_owned(),
8230                model_version: "1.0.0".to_owned(),
8231                dimension: 4,
8232                normalization_policy: "l2".to_owned(),
8233                chunking_policy: "per_chunk".to_owned(),
8234                preprocessing_policy: "trim".to_owned(),
8235                generator_command: vec!["/bin/echo".to_owned()],
8236            })
8237            .expect_err("future contract version should be rejected");
8238
8239        assert!(error.to_string().contains("unsupported"));
8240        assert!(error.to_string().contains("format version"));
8241    }
8242
8243    #[cfg(feature = "sqlite-vec")]
8244    #[test]
8245    fn regenerate_vector_embeddings_clears_environment_except_preserved_vars() {
8246        let db = NamedTempFile::new().expect("temp file");
8247        let schema = Arc::new(SchemaManager::new());
8248        let temp_dir = tempfile::tempdir().expect("temp dir");
8249        let script_path = temp_dir.path().join("vector-generator-env.sh");
8250        {
8251            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8252            schema.bootstrap(&conn).expect("bootstrap");
8253            conn.execute(
8254                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8255                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
8256                [],
8257            )
8258            .expect("insert node");
8259            conn.execute(
8260                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8261                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
8262                [],
8263            )
8264            .expect("insert chunk");
8265        }
8266
8267        fs::write(
8268            &script_path,
8269            r#"#!/usr/bin/env bash
8270set -euo pipefail
8271if [[ "${VECTOR_TEST_SECRET:-}" != "expected" ]]; then
8272  echo "missing secret" >&2
8273  exit 9
8274fi
8275python3 -c 'import json, sys
8276payload = json.load(sys.stdin)
8277json.dump({"embeddings": [{"chunk_id": payload["chunks"][0]["chunk_id"], "embedding": [1.0, 0.0, 0.0, 0.0]}]}, sys.stdout)'
8278"#,
8279        )
8280        .expect("write script");
8281        set_file_mode(&script_path, 0o755);
8282
8283        let service = AdminService::new(db.path(), Arc::clone(&schema));
8284        unsafe {
8285            std::env::set_var("VECTOR_TEST_SECRET", "expected");
8286        }
8287        let missing_env = service
8288            .regenerate_vector_embeddings_with_policy(
8289                &VectorRegenerationConfig {
8290                    profile: "default".to_owned(),
8291                    table_name: "vec_nodes_active".to_owned(),
8292                    model_identity: "model".to_owned(),
8293                    model_version: "1.0.0".to_owned(),
8294                    dimension: 4,
8295                    normalization_policy: "l2".to_owned(),
8296                    chunking_policy: "per_chunk".to_owned(),
8297                    preprocessing_policy: "trim".to_owned(),
8298                    generator_command: vec![script_path.to_string_lossy().to_string()],
8299                },
8300                &VectorGeneratorPolicy::default(),
8301            )
8302            .expect_err("non-preserved env var should be dropped");
8303        assert!(missing_env.to_string().contains("nonzero exit"));
8304
8305        let report = service
8306            .regenerate_vector_embeddings_with_policy(
8307                &VectorRegenerationConfig {
8308                    profile: "default".to_owned(),
8309                    table_name: "vec_nodes_active".to_owned(),
8310                    model_identity: "model".to_owned(),
8311                    model_version: "1.0.0".to_owned(),
8312                    dimension: 4,
8313                    normalization_policy: "l2".to_owned(),
8314                    chunking_policy: "per_chunk".to_owned(),
8315                    preprocessing_policy: "trim".to_owned(),
8316                    generator_command: vec![script_path.to_string_lossy().to_string()],
8317                },
8318                &VectorGeneratorPolicy {
8319                    timeout_ms: 1000,
8320                    max_stdout_bytes: 1024,
8321                    max_stderr_bytes: 1024,
8322                    max_input_bytes: 4096,
8323                    max_chunks: 10,
8324                    require_absolute_executable: true,
8325                    reject_world_writable_executable: true,
8326                    allowed_executable_roots: vec![],
8327                    preserve_env_vars: vec!["VECTOR_TEST_SECRET".to_owned()],
8328                },
8329            )
8330            .expect("preserved env var should allow success");
8331        assert_eq!(report.regenerated_rows, 1);
8332        unsafe {
8333            std::env::remove_var("VECTOR_TEST_SECRET");
8334        }
8335    }
8336
8337    #[test]
8338    fn check_semantics_detects_orphaned_chunk() {
8339        let (db, service) = setup();
8340        {
8341            // Open without FK enforcement to insert chunk with no active node.
8342            let conn = sqlite::open_connection(db.path()).expect("conn");
8343            conn.execute(
8344                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8345                 VALUES ('c1', 'ghost-node', 'text', 100)",
8346                [],
8347            )
8348            .expect("insert orphaned chunk");
8349        }
8350        let report = service.check_semantics().expect("semantics check");
8351        assert_eq!(report.orphaned_chunks, 1);
8352    }
8353
8354    #[test]
8355    fn check_semantics_detects_null_source_ref() {
8356        let (db, service) = setup();
8357        {
8358            let conn = sqlite::open_connection(db.path()).expect("conn");
8359            conn.execute(
8360                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at) \
8361                 VALUES ('r1', 'lg1', 'Meeting', '{}', 100)",
8362                [],
8363            )
8364            .expect("insert node with null source_ref");
8365        }
8366        let report = service.check_semantics().expect("semantics check");
8367        assert_eq!(report.null_source_ref_nodes, 1);
8368    }
8369
8370    #[test]
8371    fn check_semantics_detects_broken_step_fk() {
8372        let (db, service) = setup();
8373        {
8374            // Explicitly disable FK enforcement for this connection so we can insert
8375            // an orphaned step (ghost run_id) to simulate a partial-write failure.
8376            let conn = sqlite::open_connection(db.path()).expect("conn");
8377            conn.execute_batch("PRAGMA foreign_keys = OFF;")
8378                .expect("disable FK");
8379            conn.execute(
8380                "INSERT INTO steps (id, run_id, kind, status, properties, created_at) \
8381                 VALUES ('s1', 'ghost-run', 'llm', 'completed', '{}', 100)",
8382                [],
8383            )
8384            .expect("insert step with ghost run_id");
8385        }
8386        let report = service.check_semantics().expect("semantics check");
8387        assert_eq!(report.broken_step_fk, 1);
8388    }
8389
8390    #[test]
8391    fn check_semantics_detects_broken_action_fk() {
8392        let (db, service) = setup();
8393        {
8394            let conn = sqlite::open_connection(db.path()).expect("conn");
8395            conn.execute_batch("PRAGMA foreign_keys = OFF;")
8396                .expect("disable FK");
8397            conn.execute(
8398                "INSERT INTO actions (id, step_id, kind, status, properties, created_at) \
8399                 VALUES ('a1', 'ghost-step', 'emit', 'completed', '{}', 100)",
8400                [],
8401            )
8402            .expect("insert action with ghost step_id");
8403        }
8404        let report = service.check_semantics().expect("semantics check");
8405        assert_eq!(report.broken_action_fk, 1);
8406    }
8407
8408    #[test]
8409    fn check_semantics_detects_stale_fts_rows() {
8410        let (db, service) = setup();
8411        {
8412            let conn = sqlite::open_connection(db.path()).expect("conn");
8413            // FTS virtual tables have no FK constraints; insert a row referencing
8414            // a chunk_id that does not exist in the chunks table.
8415            conn.execute(
8416                "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
8417                 VALUES ('ghost-chunk', 'any-node', 'Meeting', 'stale content')",
8418                [],
8419            )
8420            .expect("insert stale FTS row");
8421        }
8422        let report = service.check_semantics().expect("semantics check");
8423        assert_eq!(report.stale_fts_rows, 1);
8424    }
8425
8426    #[test]
8427    fn check_semantics_detects_fts_rows_for_superseded_nodes() {
8428        let (db, service) = setup();
8429        {
8430            let conn = sqlite::open_connection(db.path()).expect("conn");
8431            // Insert a node that has been fully superseded (superseded_at IS NOT NULL).
8432            conn.execute(
8433                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
8434                 VALUES ('r1', 'lg-sup', 'Meeting', '{}', 100, 200, 'src-1')",
8435                [],
8436            )
8437            .expect("insert superseded node");
8438            // Insert an FTS row for the superseded node's logical_id.
8439            conn.execute(
8440                "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
8441                 VALUES ('ck-x', 'lg-sup', 'Meeting', 'superseded content')",
8442                [],
8443            )
8444            .expect("insert FTS row for superseded node");
8445        }
8446        let report = service.check_semantics().expect("semantics check");
8447        assert_eq!(report.fts_rows_for_superseded_nodes, 1);
8448    }
8449
8450    #[test]
8451    fn check_semantics_detects_dangling_edges() {
8452        let (db, service) = setup();
8453        {
8454            let conn = sqlite::open_connection(db.path()).expect("conn");
8455            conn.execute_batch("PRAGMA foreign_keys = OFF;")
8456                .expect("disable FK");
8457            // One active node as source; target does not exist — edge is dangling.
8458            conn.execute(
8459                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8460                 VALUES ('r1', 'lg-src', 'Meeting', '{}', 100, 'src-1')",
8461                [],
8462            )
8463            .expect("insert source node");
8464            conn.execute(
8465                "INSERT INTO edges \
8466                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
8467                 VALUES ('e1', 'edge-1', 'lg-src', 'ghost-target', 'LINKS', '{}', 100, 'src-1')",
8468                [],
8469            )
8470            .expect("insert dangling edge");
8471        }
8472        let report = service.check_semantics().expect("semantics check");
8473        assert_eq!(report.dangling_edges, 1);
8474    }
8475
8476    #[test]
8477    fn check_semantics_detects_orphaned_supersession_chains() {
8478        let (db, service) = setup();
8479        {
8480            let conn = sqlite::open_connection(db.path()).expect("conn");
8481            // Every version of this logical_id is superseded — no active row remains.
8482            conn.execute(
8483                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
8484                 VALUES ('r1', 'lg-orphaned', 'Meeting', '{}', 100, 200, 'src-1')",
8485                [],
8486            )
8487            .expect("insert fully superseded node");
8488        }
8489        let report = service.check_semantics().expect("semantics check");
8490        assert_eq!(report.orphaned_supersession_chains, 1);
8491    }
8492
8493    #[test]
8494    fn check_semantics_detects_mismatched_kind_property_fts_rows() {
8495        let (db, service) = setup();
8496        {
8497            let conn = sqlite::open_connection(db.path()).expect("conn");
8498            // Insert an active node with kind "Goal".
8499            conn.execute(
8500                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8501                 VALUES ('r1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'src-1')",
8502                [],
8503            )
8504            .expect("insert node");
8505            // Insert a property FTS row with a DIFFERENT kind than the node.
8506            conn.execute(
8507                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8508                 VALUES ('goal-1', 'WrongKind', 'Ship v2')",
8509                [],
8510            )
8511            .expect("insert mismatched property FTS row");
8512        }
8513        let report = service.check_semantics().expect("semantics check");
8514        assert_eq!(report.mismatched_kind_property_fts_rows, 1);
8515    }
8516
8517    #[test]
8518    fn check_semantics_detects_duplicate_property_fts_rows() {
8519        let (db, service) = setup();
8520        {
8521            let conn = sqlite::open_connection(db.path()).expect("conn");
8522            conn.execute(
8523                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8524                 VALUES ('r1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'src-1')",
8525                [],
8526            )
8527            .expect("insert node");
8528            // Insert two property FTS rows for the same logical ID.
8529            conn.execute(
8530                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8531                 VALUES ('goal-1', 'Goal', 'Ship v2')",
8532                [],
8533            )
8534            .expect("insert first property FTS row");
8535            conn.execute(
8536                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8537                 VALUES ('goal-1', 'Goal', 'Ship v2 duplicate')",
8538                [],
8539            )
8540            .expect("insert duplicate property FTS row");
8541        }
8542        let report = service.check_semantics().expect("semantics check");
8543        assert_eq!(report.duplicate_property_fts_rows, 1);
8544    }
8545
8546    #[test]
8547    fn check_semantics_detects_drifted_property_fts_text() {
8548        let (db, service) = setup();
8549        {
8550            let conn = sqlite::open_connection(db.path()).expect("conn");
8551            conn.execute(
8552                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8553                 VALUES ('Goal', '[\"$.name\"]', ' ')",
8554                [],
8555            )
8556            .expect("register schema");
8557            conn.execute(
8558                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8559                 VALUES ('r1', 'goal-1', 'Goal', '{\"name\":\"Current name\"}', 100, 'src-1')",
8560                [],
8561            )
8562            .expect("insert node");
8563            // Insert a property FTS row with outdated text content.
8564            conn.execute(
8565                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8566                 VALUES ('goal-1', 'Goal', 'Old stale name')",
8567                [],
8568            )
8569            .expect("insert stale property FTS row");
8570        }
8571        let report = service.check_semantics().expect("semantics check");
8572        assert_eq!(report.drifted_property_fts_rows, 1);
8573    }
8574
8575    #[test]
8576    fn check_semantics_detects_property_fts_row_that_should_not_exist() {
8577        let (db, service) = setup();
8578        {
8579            let conn = sqlite::open_connection(db.path()).expect("conn");
8580            conn.execute(
8581                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8582                 VALUES ('Goal', '[\"$.searchable\"]', ' ')",
8583                [],
8584            )
8585            .expect("register schema");
8586            // Node does NOT have $.searchable — extraction yields no value.
8587            conn.execute(
8588                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8589                 VALUES ('r1', 'goal-1', 'Goal', '{\"other\":\"field\"}', 100, 'src-1')",
8590                [],
8591            )
8592            .expect("insert node");
8593            // But a property FTS row exists anyway.
8594            conn.execute(
8595                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8596                 VALUES ('goal-1', 'Goal', 'phantom text')",
8597                [],
8598            )
8599            .expect("insert phantom property FTS row");
8600        }
8601        let report = service.check_semantics().expect("semantics check");
8602        assert_eq!(
8603            report.drifted_property_fts_rows, 1,
8604            "row that should not exist must be counted as drifted"
8605        );
8606    }
8607
8608    #[test]
8609    fn safe_export_writes_manifest_with_sha256() {
8610        let (_db, service) = setup();
8611        let export_dir = tempfile::TempDir::new().expect("temp dir");
8612        let export_path = export_dir.path().join("backup.db");
8613
8614        let manifest = service
8615            .safe_export(
8616                &export_path,
8617                SafeExportOptions {
8618                    force_checkpoint: false,
8619                },
8620            )
8621            .expect("export");
8622
8623        assert!(export_path.exists(), "exported db should exist");
8624        let manifest_path = export_dir.path().join("backup.db.export-manifest.json");
8625        assert!(
8626            manifest_path.exists(),
8627            "manifest file should exist at {}",
8628            manifest_path.display()
8629        );
8630        assert_eq!(manifest.sha256.len(), 64, "sha256 should be 64 hex chars");
8631        assert!(
8632            manifest.exported_at > 0,
8633            "exported_at should be a unix timestamp"
8634        );
8635        assert_eq!(
8636            manifest.schema_version,
8637            SchemaManager::new().current_version().0,
8638            "schema_version should match the live schema version"
8639        );
8640        assert_eq!(manifest.protocol_version, 1, "protocol_version should be 1");
8641        assert!(manifest.page_count > 0, "page_count should be positive");
8642    }
8643
8644    #[test]
8645    fn safe_export_preserves_operational_validation_contracts() {
8646        let (_db, service) = setup();
8647        let validation_json = r#"{"format_version":1,"mode":"enforce","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#;
8648        service
8649            .register_operational_collection(&OperationalRegisterRequest {
8650                name: "connector_health".to_owned(),
8651                kind: OperationalCollectionKind::LatestState,
8652                schema_json: "{}".to_owned(),
8653                retention_json: "{}".to_owned(),
8654                filter_fields_json: "[]".to_owned(),
8655                validation_json: validation_json.to_owned(),
8656                secondary_indexes_json: "[]".to_owned(),
8657                format_version: 1,
8658            })
8659            .expect("register collection");
8660
8661        let export_dir = tempfile::TempDir::new().expect("temp dir");
8662        let export_path = export_dir.path().join("backup.db");
8663        service
8664            .safe_export(
8665                &export_path,
8666                SafeExportOptions {
8667                    force_checkpoint: false,
8668                },
8669            )
8670            .expect("export");
8671
8672        let exported = sqlite::open_connection(&export_path).expect("exported conn");
8673        let exported_validation_json: String = exported
8674            .query_row(
8675                "SELECT validation_json FROM operational_collections WHERE name = 'connector_health'",
8676                [],
8677                |row| row.get(0),
8678            )
8679            .expect("validation_json");
8680        assert_eq!(exported_validation_json, validation_json);
8681    }
8682
8683    #[test]
8684    fn safe_export_force_checkpoint_false_skips_wal_pragma() {
8685        let (_db, service) = setup();
8686        let export_dir = tempfile::TempDir::new().expect("temp dir");
8687        let export_path = export_dir.path().join("no-wal.db");
8688
8689        // force_checkpoint: false must not error even on a non-WAL database
8690        let manifest = service
8691            .safe_export(
8692                &export_path,
8693                SafeExportOptions {
8694                    force_checkpoint: false,
8695                },
8696            )
8697            .expect("export with no checkpoint");
8698
8699        assert!(
8700            manifest.page_count > 0,
8701            "page_count must be populated regardless of checkpoint mode"
8702        );
8703        assert_eq!(
8704            manifest.schema_version,
8705            SchemaManager::new().current_version().0
8706        );
8707        assert_eq!(manifest.protocol_version, 1);
8708    }
8709
8710    #[test]
8711    fn safe_export_force_checkpoint_false_still_captures_wal_backed_changes() {
8712        let (db, service) = setup();
8713        let conn = sqlite::open_connection(db.path()).expect("conn");
8714        let journal_mode: String = conn
8715            .query_row("PRAGMA journal_mode=WAL", [], |row| row.get(0))
8716            .expect("enable wal");
8717        assert_eq!(journal_mode.to_lowercase(), "wal");
8718        let auto_checkpoint_pages: i64 = conn
8719            .query_row("PRAGMA wal_autocheckpoint=0", [], |row| row.get(0))
8720            .expect("disable auto checkpoint");
8721        assert_eq!(auto_checkpoint_pages, 0);
8722        conn.execute(
8723            "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8724             VALUES ('r-wal', 'lg-wal', 'Meeting', '{}', 100, 'src-wal')",
8725            [],
8726        )
8727        .expect("insert wal-backed node");
8728
8729        let export_dir = tempfile::TempDir::new().expect("temp dir");
8730        let export_path = export_dir.path().join("wal-backed.db");
8731        service
8732            .safe_export(
8733                &export_path,
8734                SafeExportOptions {
8735                    force_checkpoint: false,
8736                },
8737            )
8738            .expect("export wal-backed db");
8739
8740        let exported = sqlite::open_connection(&export_path).expect("open exported db");
8741        let exported_count: i64 = exported
8742            .query_row(
8743                "SELECT count(*) FROM nodes WHERE logical_id = 'lg-wal'",
8744                [],
8745                |row| row.get(0),
8746            )
8747            .expect("count exported nodes");
8748        assert_eq!(
8749            exported_count, 1,
8750            "safe_export must include committed rows that are still resident in the WAL"
8751        );
8752    }
8753
8754    #[test]
8755    fn excise_source_removes_searchable_content_after_excision() {
8756        let (db, service) = setup();
8757        {
8758            let conn = sqlite::open_connection(db.path()).expect("conn");
8759            conn.execute(
8760                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
8761                 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
8762                [],
8763            )
8764            .expect("insert v1");
8765            conn.execute(
8766                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8767                 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
8768                [],
8769            )
8770            .expect("insert v2");
8771            conn.execute(
8772                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8773                 VALUES ('ck1', 'lg1', 'hello world', 100)",
8774                [],
8775            )
8776            .expect("insert chunk");
8777        }
8778        service.excise_source("source-2").expect("excise");
8779        {
8780            let conn = sqlite::open_connection(db.path()).expect("conn");
8781            let fts_count: i64 = conn
8782                .query_row(
8783                    "SELECT count(*) FROM fts_nodes WHERE chunk_id = 'ck1'",
8784                    [],
8785                    |row| row.get(0),
8786                )
8787                .expect("fts count");
8788            assert_eq!(
8789                fts_count, 0,
8790                "excised content should not remain searchable after excise"
8791            );
8792        }
8793    }
8794
8795    #[cfg(feature = "sqlite-vec")]
8796    #[test]
8797    fn excise_source_cleans_chunks_and_vec_rows_for_excised_version() {
8798        let (db, service) = setup();
8799        {
8800            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8801            service
8802                .schema_manager
8803                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
8804                .expect("ensure vec profile");
8805            conn.execute(
8806                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
8807                 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
8808                [],
8809            )
8810            .expect("insert v1");
8811            conn.execute(
8812                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8813                 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
8814                [],
8815            )
8816            .expect("insert v2");
8817            conn.execute(
8818                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8819                 VALUES ('ck1', 'lg1', 'new content', 200)",
8820                [],
8821            )
8822            .expect("insert chunk");
8823            conn.execute(
8824                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('ck1', zeroblob(16))",
8825                [],
8826            )
8827            .expect("insert vec row");
8828        }
8829
8830        service.excise_source("source-2").expect("excise");
8831
8832        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8833        let active_row: String = conn
8834            .query_row(
8835                "SELECT row_id FROM nodes WHERE logical_id = 'lg1' AND superseded_at IS NULL",
8836                [],
8837                |row| row.get(0),
8838            )
8839            .expect("restored active row");
8840        assert_eq!(active_row, "r1");
8841        let chunk_count: i64 = conn
8842            .query_row(
8843                "SELECT count(*) FROM chunks WHERE node_logical_id = 'lg1'",
8844                [],
8845                |row| row.get(0),
8846            )
8847            .expect("chunk count");
8848        assert_eq!(
8849            chunk_count, 0,
8850            "excised source content must not survive as chunks"
8851        );
8852        let vec_count: i64 = conn
8853            .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
8854                row.get(0)
8855            })
8856            .expect("vec count");
8857        assert_eq!(vec_count, 0, "excised source vec rows must be removed");
8858        let fts_count: i64 = conn
8859            .query_row(
8860                "SELECT count(*) FROM fts_nodes WHERE node_logical_id = 'lg1'",
8861                [],
8862                |row| row.get(0),
8863            )
8864            .expect("fts count");
8865        assert_eq!(
8866            fts_count, 0,
8867            "excised source content must not remain searchable"
8868        );
8869    }
8870
8871    #[test]
8872    fn export_page_count_matches_exported_file() {
8873        let (_db, service) = setup();
8874        let export_dir = tempfile::TempDir::new().expect("temp dir");
8875        let export_path = export_dir.path().join("page-count.db");
8876
8877        let manifest = service
8878            .safe_export(
8879                &export_path,
8880                SafeExportOptions {
8881                    force_checkpoint: false,
8882                },
8883            )
8884            .expect("export");
8885
8886        let exported = sqlite::open_connection(&export_path).expect("open exported db");
8887        let actual_page_count: u64 = exported
8888            .query_row("PRAGMA page_count", [], |row| row.get(0))
8889            .expect("page_count from exported file");
8890
8891        assert_eq!(
8892            manifest.page_count, actual_page_count,
8893            "manifest page_count must match the exported file's PRAGMA page_count"
8894        );
8895    }
8896
8897    #[test]
8898    fn no_temp_file_after_successful_export() {
8899        let (_db, service) = setup();
8900        let export_dir = tempfile::TempDir::new().expect("temp dir");
8901        let export_path = export_dir.path().join("no-tmp.db");
8902
8903        service
8904            .safe_export(
8905                &export_path,
8906                SafeExportOptions {
8907                    force_checkpoint: false,
8908                },
8909            )
8910            .expect("export");
8911
8912        let tmp_files: Vec<_> = fs::read_dir(export_dir.path())
8913            .expect("read export dir")
8914            .filter_map(Result::ok)
8915            .filter(|e| e.path().extension().is_some_and(|ext| ext == "tmp"))
8916            .collect();
8917
8918        assert!(
8919            tmp_files.is_empty(),
8920            "no .tmp files should remain after a successful export, found: {tmp_files:?}"
8921        );
8922    }
8923
8924    #[test]
8925    fn export_manifest_is_valid_json() {
8926        let (_db, service) = setup();
8927        let export_dir = tempfile::TempDir::new().expect("temp dir");
8928        let export_path = export_dir.path().join("valid-json.db");
8929
8930        service
8931            .safe_export(
8932                &export_path,
8933                SafeExportOptions {
8934                    force_checkpoint: false,
8935                },
8936            )
8937            .expect("export");
8938
8939        let manifest_path = export_dir.path().join("valid-json.db.export-manifest.json");
8940        let manifest_contents = fs::read_to_string(&manifest_path).expect("read manifest");
8941        let parsed: serde_json::Value =
8942            serde_json::from_str(&manifest_contents).expect("manifest must be valid JSON");
8943
8944        assert!(
8945            parsed.get("exported_at").is_some(),
8946            "manifest must contain exported_at"
8947        );
8948        assert!(
8949            parsed.get("sha256").is_some(),
8950            "manifest must contain sha256"
8951        );
8952        assert!(
8953            parsed.get("schema_version").is_some(),
8954            "manifest must contain schema_version"
8955        );
8956        assert!(
8957            parsed.get("protocol_version").is_some(),
8958            "manifest must contain protocol_version"
8959        );
8960        assert!(
8961            parsed.get("page_count").is_some(),
8962            "manifest must contain page_count"
8963        );
8964    }
8965
8966    #[test]
8967    fn provenance_purge_dry_run_reports_counts() {
8968        let (db, service) = setup();
8969        {
8970            let conn = sqlite::open_connection(db.path()).expect("conn");
8971            conn.execute(
8972                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8973                 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
8974                [],
8975            )
8976            .expect("insert p1");
8977            conn.execute(
8978                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8979                 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 200)",
8980                [],
8981            )
8982            .expect("insert p2");
8983            conn.execute(
8984                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8985                 VALUES ('p3', 'excise', 'lg3', 'src-1', 300)",
8986                [],
8987            )
8988            .expect("insert p3");
8989        }
8990
8991        let options = super::ProvenancePurgeOptions {
8992            dry_run: true,
8993            preserve_event_types: Vec::new(),
8994        };
8995        let report = service
8996            .purge_provenance_events(250, &options)
8997            .expect("dry run purge");
8998
8999        assert_eq!(report.events_deleted, 2);
9000        assert_eq!(report.events_preserved, 1);
9001        assert!(report.oldest_remaining.is_some());
9002
9003        let conn = sqlite::open_connection(db.path()).expect("conn");
9004        let total: i64 = conn
9005            .query_row("SELECT count(*) FROM provenance_events", [], |row| {
9006                row.get(0)
9007            })
9008            .expect("count");
9009        assert_eq!(total, 3, "dry_run must not delete any events");
9010    }
9011
9012    #[test]
9013    fn provenance_purge_deletes_old_events() {
9014        let (db, service) = setup();
9015        {
9016            let conn = sqlite::open_connection(db.path()).expect("conn");
9017            conn.execute(
9018                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
9019                 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
9020                [],
9021            )
9022            .expect("insert p1");
9023            conn.execute(
9024                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
9025                 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 200)",
9026                [],
9027            )
9028            .expect("insert p2");
9029        }
9030
9031        let options = super::ProvenancePurgeOptions {
9032            dry_run: false,
9033            preserve_event_types: Vec::new(),
9034        };
9035        let report = service
9036            .purge_provenance_events(150, &options)
9037            .expect("purge");
9038
9039        assert_eq!(report.events_deleted, 1);
9040        assert_eq!(report.events_preserved, 1);
9041        assert_eq!(report.oldest_remaining, Some(200));
9042
9043        let conn = sqlite::open_connection(db.path()).expect("conn");
9044        let remaining: i64 = conn
9045            .query_row("SELECT count(*) FROM provenance_events", [], |row| {
9046                row.get(0)
9047            })
9048            .expect("count");
9049        assert_eq!(remaining, 1);
9050    }
9051
9052    #[test]
9053    fn provenance_purge_preserves_specified_types() {
9054        let (db, service) = setup();
9055        {
9056            let conn = sqlite::open_connection(db.path()).expect("conn");
9057            conn.execute(
9058                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
9059                 VALUES ('p1', 'excise', 'lg1', 'src-1', 100)",
9060                [],
9061            )
9062            .expect("insert p1");
9063            conn.execute(
9064                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
9065                 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 100)",
9066                [],
9067            )
9068            .expect("insert p2");
9069            conn.execute(
9070                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
9071                 VALUES ('p3', 'node_insert', 'lg3', 'src-1', 100)",
9072                [],
9073            )
9074            .expect("insert p3");
9075        }
9076
9077        let options = super::ProvenancePurgeOptions {
9078            dry_run: false,
9079            preserve_event_types: Vec::new(),
9080        };
9081        let report = service
9082            .purge_provenance_events(500, &options)
9083            .expect("purge");
9084
9085        assert_eq!(report.events_deleted, 2);
9086        assert_eq!(report.events_preserved, 1);
9087
9088        let conn = sqlite::open_connection(db.path()).expect("conn");
9089        let remaining_type: String = conn
9090            .query_row("SELECT event_type FROM provenance_events", [], |row| {
9091                row.get(0)
9092            })
9093            .expect("remaining event type");
9094        assert_eq!(remaining_type, "excise");
9095    }
9096
9097    #[test]
9098    fn provenance_purge_noop_with_zero_timestamp() {
9099        let (db, service) = setup();
9100        {
9101            let conn = sqlite::open_connection(db.path()).expect("conn");
9102            conn.execute(
9103                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
9104                 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
9105                [],
9106            )
9107            .expect("insert p1");
9108        }
9109
9110        let options = super::ProvenancePurgeOptions {
9111            dry_run: false,
9112            preserve_event_types: Vec::new(),
9113        };
9114        let report = service.purge_provenance_events(0, &options).expect("purge");
9115
9116        assert_eq!(report.events_deleted, 0);
9117        assert_eq!(report.events_preserved, 1);
9118        assert_eq!(report.oldest_remaining, Some(100));
9119    }
9120
9121    #[test]
9122    fn restore_skips_edge_when_counterpart_purged() {
9123        let (db, service) = setup();
9124        {
9125            let conn = sqlite::open_connection(db.path()).expect("conn");
9126            // Create node A (doc-1) and node B (doc-2)
9127            conn.execute(
9128                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9129                 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
9130                [],
9131            )
9132            .expect("insert node A");
9133            conn.execute(
9134                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9135                 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
9136                [],
9137            )
9138            .expect("insert node B");
9139            // Create edge between A and B
9140            conn.execute(
9141                "INSERT INTO edges \
9142                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
9143                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
9144                [],
9145            )
9146            .expect("insert edge");
9147            // Retire both A and B, and the edge
9148            conn.execute(
9149                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9150                 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
9151                [],
9152            )
9153            .expect("insert retire event A");
9154            conn.execute(
9155                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9156                 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
9157                [],
9158            )
9159            .expect("insert edge retire event");
9160            conn.execute(
9161                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
9162                [],
9163            )
9164            .expect("retire node A");
9165            conn.execute(
9166                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-2'",
9167                [],
9168            )
9169            .expect("retire node B");
9170            conn.execute(
9171                "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
9172                [],
9173            )
9174            .expect("retire edge");
9175            // Simulate purge of B: delete node rows but leave the edge intact
9176            // to reproduce the dangling-edge scenario the validation guards against.
9177            conn.execute("DELETE FROM nodes WHERE logical_id = 'doc-2'", [])
9178                .expect("purge node B rows");
9179        }
9180
9181        // Restore A — the edge should be skipped because B has no active node
9182        let report = service.restore_logical_id("doc-1").expect("restore A");
9183        assert!(!report.was_noop);
9184        assert_eq!(report.restored_node_rows, 1);
9185        assert_eq!(report.restored_edge_rows, 0, "edge should not be restored");
9186        assert_eq!(report.skipped_edges.len(), 1);
9187        assert_eq!(report.skipped_edges[0].edge_logical_id, "edge-1");
9188        assert_eq!(report.skipped_edges[0].missing_endpoint, "doc-2");
9189
9190        // Verify the edge is still retired in the database
9191        let conn = sqlite::open_connection(db.path()).expect("conn");
9192        let active_edge_count: i64 = conn
9193            .query_row(
9194                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
9195                [],
9196                |row| row.get(0),
9197            )
9198            .expect("active edge count");
9199        assert_eq!(active_edge_count, 0, "edge must remain retired");
9200    }
9201
9202    #[test]
9203    fn restore_restores_edges_to_active_nodes() {
9204        let (db, service) = setup();
9205        {
9206            let conn = sqlite::open_connection(db.path()).expect("conn");
9207            // Create node A and node B (B stays active)
9208            conn.execute(
9209                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9210                 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
9211                [],
9212            )
9213            .expect("insert node A");
9214            conn.execute(
9215                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9216                 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
9217                [],
9218            )
9219            .expect("insert node B");
9220            // Create edge between A and B
9221            conn.execute(
9222                "INSERT INTO edges \
9223                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
9224                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
9225                [],
9226            )
9227            .expect("insert edge");
9228            // Retire only A
9229            conn.execute(
9230                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9231                 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
9232                [],
9233            )
9234            .expect("insert retire event A");
9235            conn.execute(
9236                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9237                 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
9238                [],
9239            )
9240            .expect("insert edge retire event");
9241            conn.execute(
9242                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
9243                [],
9244            )
9245            .expect("retire node A");
9246            conn.execute(
9247                "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
9248                [],
9249            )
9250            .expect("retire edge");
9251        }
9252
9253        // Restore A — B is active, so the edge should be restored normally
9254        let report = service.restore_logical_id("doc-1").expect("restore A");
9255        assert!(!report.was_noop);
9256        assert_eq!(report.restored_node_rows, 1);
9257        assert!(report.restored_edge_rows > 0, "edge should be restored");
9258        assert!(
9259            report.skipped_edges.is_empty(),
9260            "no edges should be skipped"
9261        );
9262
9263        let conn = sqlite::open_connection(db.path()).expect("conn");
9264        let active_edge_count: i64 = conn
9265            .query_row(
9266                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
9267                [],
9268                |row| row.get(0),
9269            )
9270            .expect("active edge count");
9271        assert_eq!(active_edge_count, 1, "edge must be active");
9272    }
9273
9274    #[test]
9275    fn restore_restores_edges_when_both_restored() {
9276        let (db, service) = setup();
9277        {
9278            let conn = sqlite::open_connection(db.path()).expect("conn");
9279            // Create node A and node B
9280            conn.execute(
9281                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9282                 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
9283                [],
9284            )
9285            .expect("insert node A");
9286            conn.execute(
9287                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9288                 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
9289                [],
9290            )
9291            .expect("insert node B");
9292            // Create edge between A and B
9293            conn.execute(
9294                "INSERT INTO edges \
9295                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
9296                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
9297                [],
9298            )
9299            .expect("insert edge");
9300            // Retire both A and B
9301            conn.execute(
9302                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9303                 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
9304                [],
9305            )
9306            .expect("insert retire event A");
9307            conn.execute(
9308                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9309                 VALUES ('evt-retire-b', 'node_retire', 'doc-2', 'forget-1', 200, '')",
9310                [],
9311            )
9312            .expect("insert retire event B");
9313            conn.execute(
9314                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9315                 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
9316                [],
9317            )
9318            .expect("insert edge retire event");
9319            conn.execute(
9320                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
9321                [],
9322            )
9323            .expect("retire node A");
9324            conn.execute(
9325                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-2'",
9326                [],
9327            )
9328            .expect("retire node B");
9329            conn.execute(
9330                "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
9331                [],
9332            )
9333            .expect("retire edge");
9334        }
9335
9336        // Restore B first — edge is skipped because A is still retired
9337        let report_b = service.restore_logical_id("doc-2").expect("restore B");
9338        assert!(!report_b.was_noop);
9339
9340        // Restore A — B is now active, so the edge should be restored
9341        let report_a = service.restore_logical_id("doc-1").expect("restore A");
9342        assert!(!report_a.was_noop);
9343        assert_eq!(report_a.restored_node_rows, 1);
9344        assert!(
9345            report_a.restored_edge_rows > 0,
9346            "edge should be restored when both endpoints active"
9347        );
9348        assert!(
9349            report_a.skipped_edges.is_empty(),
9350            "no edges should be skipped"
9351        );
9352
9353        let conn = sqlite::open_connection(db.path()).expect("conn");
9354        let active_edge_count: i64 = conn
9355            .query_row(
9356                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
9357                [],
9358                |row| row.get(0),
9359            )
9360            .expect("active edge count");
9361        assert_eq!(
9362            active_edge_count, 1,
9363            "edge must be active after both endpoints restored"
9364        );
9365    }
9366
9367    // ── FTS property schema end-to-end tests ──────────────────────────
9368
9369    #[test]
9370    fn fts_property_schema_crud_round_trip() {
9371        let (_db, service) = setup();
9372
9373        // Register
9374        let record = service
9375            .register_fts_property_schema(
9376                "Meeting",
9377                &["$.title".to_owned(), "$.summary".to_owned()],
9378                None,
9379            )
9380            .expect("register");
9381        assert_eq!(record.kind, "Meeting");
9382        assert_eq!(record.property_paths, vec!["$.title", "$.summary"]);
9383        assert_eq!(record.separator, " ");
9384        assert_eq!(record.format_version, 1);
9385
9386        // Describe
9387        let described = service
9388            .describe_fts_property_schema("Meeting")
9389            .expect("describe")
9390            .expect("should exist");
9391        assert_eq!(described, record);
9392
9393        // Describe missing kind
9394        let missing = service
9395            .describe_fts_property_schema("NoSuchKind")
9396            .expect("describe missing");
9397        assert!(missing.is_none());
9398
9399        // List
9400        let list = service.list_fts_property_schemas().expect("list");
9401        assert_eq!(list.len(), 1);
9402        assert_eq!(list[0].kind, "Meeting");
9403
9404        // Update (idempotent upsert)
9405        let updated = service
9406            .register_fts_property_schema(
9407                "Meeting",
9408                &["$.title".to_owned(), "$.notes".to_owned()],
9409                Some("\n"),
9410            )
9411            .expect("update");
9412        assert_eq!(updated.property_paths, vec!["$.title", "$.notes"]);
9413        assert_eq!(updated.separator, "\n");
9414
9415        // Remove
9416        service
9417            .remove_fts_property_schema("Meeting")
9418            .expect("remove");
9419        let after_remove = service
9420            .describe_fts_property_schema("Meeting")
9421            .expect("describe after remove");
9422        assert!(after_remove.is_none());
9423
9424        // Remove non-existent is an error
9425        let err = service.remove_fts_property_schema("Meeting");
9426        assert!(err.is_err());
9427    }
9428
9429    #[test]
9430    fn describe_fts_property_schema_round_trips_recursive_entries() {
9431        let (_db, service) = setup();
9432
9433        let entries = vec![
9434            FtsPropertyPathSpec::scalar("$.title"),
9435            FtsPropertyPathSpec::recursive("$.payload"),
9436        ];
9437        let exclude = vec!["$.payload.private".to_owned()];
9438        let registered = service
9439            .register_fts_property_schema_with_entries(
9440                "KnowledgeItem",
9441                &entries,
9442                Some(" "),
9443                &exclude,
9444            )
9445            .expect("register recursive");
9446
9447        // The register entry point now echoes back the fully-populated
9448        // record via the same load helper used by describe/list.
9449        assert_eq!(registered.entries, entries);
9450        assert_eq!(registered.exclude_paths, exclude);
9451        assert_eq!(registered.property_paths, vec!["$.title", "$.payload"]);
9452
9453        let described = service
9454            .describe_fts_property_schema("KnowledgeItem")
9455            .expect("describe")
9456            .expect("should exist");
9457        assert_eq!(described.kind, "KnowledgeItem");
9458        assert_eq!(described.entries, entries);
9459        assert_eq!(described.exclude_paths, exclude);
9460        assert_eq!(described.property_paths, vec!["$.title", "$.payload"]);
9461        assert_eq!(described.separator, " ");
9462        assert_eq!(described.format_version, 1);
9463    }
9464
9465    #[test]
9466    fn list_fts_property_schemas_round_trips_recursive_entries() {
9467        let (_db, service) = setup();
9468
9469        let entries = vec![
9470            FtsPropertyPathSpec::scalar("$.title"),
9471            FtsPropertyPathSpec::recursive("$.payload"),
9472        ];
9473        let exclude = vec!["$.payload.secret".to_owned()];
9474        service
9475            .register_fts_property_schema_with_entries(
9476                "KnowledgeItem",
9477                &entries,
9478                Some(" "),
9479                &exclude,
9480            )
9481            .expect("register recursive");
9482
9483        let listed = service.list_fts_property_schemas().expect("list");
9484        assert_eq!(listed.len(), 1);
9485        let record = &listed[0];
9486        assert_eq!(record.kind, "KnowledgeItem");
9487        assert_eq!(record.entries, entries);
9488        assert_eq!(record.exclude_paths, exclude);
9489        assert_eq!(record.property_paths, vec!["$.title", "$.payload"]);
9490    }
9491
9492    #[test]
9493    fn describe_fts_property_schema_round_trips_scalar_only_entries() {
9494        let (_db, service) = setup();
9495
9496        service
9497            .register_fts_property_schema(
9498                "Meeting",
9499                &["$.title".to_owned(), "$.summary".to_owned()],
9500                None,
9501            )
9502            .expect("register scalar");
9503
9504        let described = service
9505            .describe_fts_property_schema("Meeting")
9506            .expect("describe")
9507            .expect("should exist");
9508        assert_eq!(described.property_paths, vec!["$.title", "$.summary"]);
9509        assert_eq!(described.entries.len(), 2);
9510        for entry in &described.entries {
9511            assert_eq!(
9512                entry.mode,
9513                FtsPropertyPathMode::Scalar,
9514                "scalar-only schema should deserialize every entry as Scalar"
9515            );
9516        }
9517        assert!(described.exclude_paths.is_empty());
9518    }
9519
9520    #[test]
9521    fn restore_reestablishes_property_fts_visibility() {
9522        let (db, service) = setup();
9523        {
9524            let conn = sqlite::open_connection(db.path()).expect("conn");
9525            // Register a property schema for Document kind.
9526            conn.execute(
9527                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9528                 VALUES ('Document', '[\"$.title\", \"$.body\"]', ' ')",
9529                [],
9530            )
9531            .expect("register schema");
9532            // Insert an active node with extractable properties.
9533            conn.execute(
9534                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9535                 VALUES ('row-1', 'doc-1', 'Document', '{\"title\":\"Budget\",\"body\":\"Q3 forecast\"}', 100, 'seed')",
9536                [],
9537            )
9538            .expect("insert node");
9539            // Insert a chunk so restore has something to work with for FTS.
9540            conn.execute(
9541                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
9542                 VALUES ('chunk-1', 'doc-1', 'budget text', 100)",
9543                [],
9544            )
9545            .expect("insert chunk");
9546            // Insert property FTS row (as write path would).
9547            conn.execute(
9548                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
9549                 VALUES ('doc-1', 'Document', 'Budget Q3 forecast')",
9550                [],
9551            )
9552            .expect("insert property fts");
9553            // Simulate retire: supersede node, clear FTS.
9554            conn.execute(
9555                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9556                 VALUES ('evt-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
9557                [],
9558            )
9559            .expect("retire event");
9560            conn.execute(
9561                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
9562                [],
9563            )
9564            .expect("supersede");
9565            conn.execute("DELETE FROM fts_nodes", [])
9566                .expect("clear chunk fts");
9567            conn.execute("DELETE FROM fts_node_properties", [])
9568                .expect("clear property fts");
9569        }
9570
9571        let report = service.restore_logical_id("doc-1").expect("restore");
9572        assert_eq!(report.restored_property_fts_rows, 1);
9573
9574        // Verify the property FTS row was recreated.
9575        let conn = sqlite::open_connection(db.path()).expect("conn");
9576        let prop_fts_count: i64 = conn
9577            .query_row(
9578                "SELECT count(*) FROM fts_node_properties WHERE node_logical_id = 'doc-1'",
9579                [],
9580                |row| row.get(0),
9581            )
9582            .expect("prop fts count");
9583        assert_eq!(prop_fts_count, 1, "property FTS must be restored");
9584
9585        let text: String = conn
9586            .query_row(
9587                "SELECT text_content FROM fts_node_properties WHERE node_logical_id = 'doc-1'",
9588                [],
9589                |row| row.get(0),
9590            )
9591            .expect("prop fts text");
9592        assert_eq!(text, "Budget Q3 forecast");
9593    }
9594
9595    #[test]
9596    fn safe_export_preserves_fts_property_schemas() {
9597        let (_db, service) = setup();
9598        service
9599            .register_fts_property_schema(
9600                "Goal",
9601                &["$.name".to_owned(), "$.rationale".to_owned()],
9602                None,
9603            )
9604            .expect("register schema");
9605
9606        let export_dir = tempfile::TempDir::new().expect("temp dir");
9607        let export_path = export_dir.path().join("backup.db");
9608        service
9609            .safe_export(
9610                &export_path,
9611                SafeExportOptions {
9612                    force_checkpoint: false,
9613                },
9614            )
9615            .expect("export");
9616
9617        // Open the exported DB and verify the schema survived.
9618        let exported_conn = rusqlite::Connection::open(&export_path).expect("open exported db");
9619        let kind: String = exported_conn
9620            .query_row(
9621                "SELECT kind FROM fts_property_schemas WHERE kind = 'Goal'",
9622                [],
9623                |row| row.get(0),
9624            )
9625            .expect("schema must exist in export");
9626        assert_eq!(kind, "Goal");
9627        let paths_json: String = exported_conn
9628            .query_row(
9629                "SELECT property_paths_json FROM fts_property_schemas WHERE kind = 'Goal'",
9630                [],
9631                |row| row.get(0),
9632            )
9633            .expect("paths must exist");
9634        let paths: Vec<String> = serde_json::from_str(&paths_json).expect("valid json");
9635        assert_eq!(paths, vec!["$.name", "$.rationale"]);
9636    }
9637
9638    #[test]
9639    #[allow(clippy::too_many_lines)]
9640    fn export_recovery_rebuilds_property_fts_from_canonical_state() {
9641        let (db, service) = setup();
9642        // Register a schema and insert two nodes with extractable properties.
9643        service
9644            .register_fts_property_schema("Goal", &["$.name".to_owned()], None)
9645            .expect("register");
9646        {
9647            let conn = sqlite::open_connection(db.path()).expect("conn");
9648            conn.execute(
9649                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9650                 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9651                [],
9652            )
9653            .expect("insert node 1");
9654            conn.execute(
9655                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
9656                 VALUES ('goal-1', 'Goal', 'Ship v2')",
9657                [],
9658            )
9659            .expect("insert property FTS row 1");
9660            conn.execute(
9661                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9662                 VALUES ('row-2', 'goal-2', 'Goal', '{\"name\":\"Launch redesign\"}', 100, 'seed')",
9663                [],
9664            )
9665            .expect("insert node 2");
9666            conn.execute(
9667                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
9668                 VALUES ('goal-2', 'Goal', 'Launch redesign')",
9669                [],
9670            )
9671            .expect("insert property FTS row 2");
9672        }
9673
9674        // Export.
9675        let export_dir = tempfile::TempDir::new().expect("temp dir");
9676        let export_path = export_dir.path().join("backup.db");
9677        service
9678            .safe_export(
9679                &export_path,
9680                SafeExportOptions {
9681                    force_checkpoint: false,
9682                },
9683            )
9684            .expect("export");
9685
9686        // Corrupt the derived rows: replace correct text with wrong text for
9687        // goal-1, and delete the row for goal-2 entirely. This exercises both
9688        // corrupted-but-present rows and missing rows in the same recovery.
9689        {
9690            let conn = rusqlite::Connection::open(&export_path).expect("open export");
9691            conn.execute(
9692                "DELETE FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
9693                [],
9694            )
9695            .expect("delete old row");
9696            conn.execute(
9697                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
9698                 VALUES ('goal-1', 'Goal', 'completely wrong stale text')",
9699                [],
9700            )
9701            .expect("insert corrupted row");
9702            conn.execute(
9703                "DELETE FROM fts_node_properties WHERE node_logical_id = 'goal-2'",
9704                [],
9705            )
9706            .expect("delete goal-2 row");
9707        }
9708
9709        // Open the exported DB and rebuild projections from canonical state.
9710        let schema = Arc::new(SchemaManager::new());
9711        let exported_service = AdminService::new(&export_path, Arc::clone(&schema));
9712        exported_service
9713            .rebuild_projections(ProjectionTarget::Fts)
9714            .expect("rebuild");
9715
9716        // Verify text_search(...) returns the correct result for goal-1's
9717        // canonical property ("Ship") — not the corrupted text.
9718        let coordinator = ExecutionCoordinator::open(
9719            &export_path,
9720            Arc::clone(&schema),
9721            None,
9722            1,
9723            Arc::new(TelemetryCounters::default()),
9724            None,
9725        )
9726        .expect("coordinator");
9727
9728        let compiled = QueryBuilder::nodes("Goal")
9729            .text_search("Ship", 10)
9730            .limit(10)
9731            .compile()
9732            .expect("compile");
9733        let rows = coordinator
9734            .execute_compiled_read(&compiled)
9735            .expect("execute read");
9736        assert_eq!(rows.nodes.len(), 1);
9737        assert_eq!(rows.nodes[0].logical_id, "goal-1");
9738
9739        // Verify text_search(...) recovers the previously missing goal-2 row.
9740        let compiled2 = QueryBuilder::nodes("Goal")
9741            .text_search("redesign", 10)
9742            .limit(10)
9743            .compile()
9744            .expect("compile");
9745        let rows2 = coordinator
9746            .execute_compiled_read(&compiled2)
9747            .expect("execute read");
9748        assert_eq!(rows2.nodes.len(), 1);
9749        assert_eq!(rows2.nodes[0].logical_id, "goal-2");
9750
9751        // The corrupted text must not be searchable after recovery.
9752        let compiled3 = QueryBuilder::nodes("Goal")
9753            .text_search("stale", 10)
9754            .limit(10)
9755            .compile()
9756            .expect("compile");
9757        let rows3 = coordinator
9758            .execute_compiled_read(&compiled3)
9759            .expect("execute read");
9760        assert_eq!(
9761            rows3.nodes.len(),
9762            0,
9763            "corrupted text must not appear in search after rebuild"
9764        );
9765
9766        // Verify integrity and semantics are clean after recovery.
9767        let integrity = exported_service.check_integrity().expect("integrity");
9768        assert_eq!(integrity.missing_property_fts_rows, 0);
9769        let semantics = exported_service.check_semantics().expect("semantics");
9770        assert_eq!(semantics.drifted_property_fts_rows, 0);
9771        assert_eq!(semantics.orphaned_property_fts_rows, 0);
9772        assert_eq!(semantics.duplicate_property_fts_rows, 0);
9773    }
9774
9775    #[test]
9776    fn check_integrity_no_false_positives_for_empty_extraction() {
9777        let (db, service) = setup();
9778        {
9779            let conn = sqlite::open_connection(db.path()).expect("conn");
9780            // Register a schema that looks for $.searchable
9781            conn.execute(
9782                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9783                 VALUES ('Ticket', '[\"$.searchable\"]', ' ')",
9784                [],
9785            )
9786            .expect("register schema");
9787            // Insert a node whose properties do NOT contain $.searchable —
9788            // correctly has no property FTS row.
9789            conn.execute(
9790                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9791                 VALUES ('row-1', 'ticket-1', 'Ticket', '{\"status\":\"open\"}', 100, 'seed')",
9792                [],
9793            )
9794            .expect("insert node");
9795        }
9796
9797        let report = service.check_integrity().expect("integrity");
9798        assert_eq!(
9799            report.missing_property_fts_rows, 0,
9800            "node with no extractable values must not be counted as missing"
9801        );
9802    }
9803
9804    #[test]
9805    fn check_integrity_detects_genuinely_missing_property_fts_rows() {
9806        let (db, service) = setup();
9807        {
9808            let conn = sqlite::open_connection(db.path()).expect("conn");
9809            conn.execute(
9810                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9811                 VALUES ('Ticket', '[\"$.title\"]', ' ')",
9812                [],
9813            )
9814            .expect("register schema");
9815            // Insert a node WITH an extractable $.title but no property FTS row.
9816            conn.execute(
9817                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9818                 VALUES ('row-1', 'ticket-1', 'Ticket', '{\"title\":\"fix login bug\"}', 100, 'seed')",
9819                [],
9820            )
9821            .expect("insert node");
9822        }
9823
9824        let report = service.check_integrity().expect("integrity");
9825        assert_eq!(
9826            report.missing_property_fts_rows, 1,
9827            "node with extractable values but no property FTS row must be detected"
9828        );
9829    }
9830
9831    #[test]
9832    fn rebuild_projections_fts_restores_missing_property_fts_rows() {
9833        let (db, service) = setup();
9834        {
9835            let conn = sqlite::open_connection(db.path()).expect("conn");
9836            conn.execute(
9837                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9838                 VALUES ('Goal', '[\"$.name\"]', ' ')",
9839                [],
9840            )
9841            .expect("register schema");
9842            conn.execute(
9843                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9844                 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9845                [],
9846            )
9847            .expect("insert node");
9848            // Deliberately do NOT insert a property FTS row.
9849        }
9850
9851        let report = service
9852            .rebuild_projections(ProjectionTarget::Fts)
9853            .expect("rebuild");
9854        assert!(
9855            report.rebuilt_rows >= 1,
9856            "rebuild must insert at least one property FTS row"
9857        );
9858
9859        let conn = sqlite::open_connection(db.path()).expect("conn");
9860        let text: String = conn
9861            .query_row(
9862                "SELECT text_content FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
9863                [],
9864                |row| row.get(0),
9865            )
9866            .expect("property FTS row must exist after rebuild");
9867        assert_eq!(text, "Ship v2");
9868    }
9869
9870    #[test]
9871    fn rebuild_missing_projections_fills_gap_for_deleted_property_fts_row() {
9872        let (db, service) = setup();
9873        {
9874            let conn = sqlite::open_connection(db.path()).expect("conn");
9875            conn.execute(
9876                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9877                 VALUES ('Goal', '[\"$.name\"]', ' ')",
9878                [],
9879            )
9880            .expect("register schema");
9881            conn.execute(
9882                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9883                 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9884                [],
9885            )
9886            .expect("insert node");
9887            // Insert and then delete the property FTS row to simulate corruption.
9888            conn.execute(
9889                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
9890                 VALUES ('goal-1', 'Goal', 'Ship v2')",
9891                [],
9892            )
9893            .expect("insert property fts");
9894            conn.execute(
9895                "DELETE FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
9896                [],
9897            )
9898            .expect("delete property fts");
9899        }
9900
9901        let report = service
9902            .rebuild_missing_projections()
9903            .expect("rebuild missing");
9904        assert!(
9905            report.rebuilt_rows >= 1,
9906            "missing rebuild must insert the gap-fill row"
9907        );
9908
9909        let conn = sqlite::open_connection(db.path()).expect("conn");
9910        let count: i64 = conn
9911            .query_row(
9912                "SELECT count(*) FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
9913                [],
9914                |row| row.get(0),
9915            )
9916            .expect("count");
9917        assert_eq!(
9918            count, 1,
9919            "gap-fill must restore exactly one property FTS row"
9920        );
9921    }
9922
9923    #[test]
9924    fn remove_schema_then_rebuild_cleans_stale_property_fts_rows() {
9925        let (db, service) = setup();
9926        service
9927            .register_fts_property_schema("Goal", &["$.name".to_owned()], None)
9928            .expect("register");
9929        {
9930            let conn = sqlite::open_connection(db.path()).expect("conn");
9931            conn.execute(
9932                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9933                 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9934                [],
9935            )
9936            .expect("insert node");
9937            // Manually insert a property FTS row (simulating the write path).
9938            conn.execute(
9939                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
9940                 VALUES ('goal-1', 'Goal', 'Ship v2')",
9941                [],
9942            )
9943            .expect("insert property fts");
9944        }
9945
9946        // Remove the schema — stale rows now exist.
9947        service.remove_fts_property_schema("Goal").expect("remove");
9948
9949        // Verify stale rows are detected.
9950        let semantics = service.check_semantics().expect("semantics");
9951        assert_eq!(
9952            semantics.orphaned_property_fts_rows, 1,
9953            "stale property FTS rows must be detected after schema removal"
9954        );
9955
9956        // Full rebuild should clean them.
9957        service
9958            .rebuild_projections(ProjectionTarget::Fts)
9959            .expect("rebuild");
9960
9961        let conn = sqlite::open_connection(db.path()).expect("conn");
9962        let count: i64 = conn
9963            .query_row(
9964                "SELECT count(*) FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
9965                [],
9966                |row| row.get(0),
9967            )
9968            .expect("count");
9969        assert_eq!(
9970            count, 0,
9971            "rebuild after schema removal must delete stale property FTS rows"
9972        );
9973    }
9974
9975    mod validate_fts_property_paths_tests {
9976        use super::super::validate_fts_property_paths;
9977
9978        #[test]
9979        fn valid_simple_path() {
9980            assert!(validate_fts_property_paths(&["$.name".to_owned()]).is_ok());
9981        }
9982
9983        #[test]
9984        fn valid_nested_path() {
9985            assert!(validate_fts_property_paths(&["$.address.city".to_owned()]).is_ok());
9986        }
9987
9988        #[test]
9989        fn valid_underscore_segment() {
9990            assert!(validate_fts_property_paths(&["$.a_b".to_owned()]).is_ok());
9991        }
9992
9993        #[test]
9994        fn rejects_bare_prefix() {
9995            let result = validate_fts_property_paths(&["$.".to_owned()]);
9996            assert!(result.is_err(), "path '$.' must be rejected");
9997        }
9998
9999        #[test]
10000        fn rejects_double_dot() {
10001            let result = validate_fts_property_paths(&["$..x".to_owned()]);
10002            assert!(result.is_err(), "path '$..x' must be rejected");
10003        }
10004
10005        #[test]
10006        fn rejects_trailing_dot() {
10007            let result = validate_fts_property_paths(&["$.foo.".to_owned()]);
10008            assert!(result.is_err(), "path '$.foo.' must be rejected");
10009        }
10010
10011        #[test]
10012        fn rejects_space_in_segment() {
10013            let result = validate_fts_property_paths(&["$.foo bar".to_owned()]);
10014            assert!(result.is_err(), "path '$.foo bar' must be rejected");
10015        }
10016
10017        #[test]
10018        fn rejects_bracket_syntax() {
10019            let result = validate_fts_property_paths(&["$.foo[0]".to_owned()]);
10020            assert!(result.is_err(), "path '$.foo[0]' must be rejected");
10021        }
10022
10023        #[test]
10024        fn rejects_duplicates() {
10025            let result = validate_fts_property_paths(&["$.name".to_owned(), "$.name".to_owned()]);
10026            assert!(result.is_err(), "duplicate paths must be rejected");
10027        }
10028
10029        #[test]
10030        fn rejects_empty_list() {
10031            let result = validate_fts_property_paths(&[]);
10032            assert!(result.is_err(), "empty path list must be rejected");
10033        }
10034    }
10035}