Skip to main content

fathomdb_engine/
admin.rs

1use std::fmt::Write as _;
2use std::fs;
3use std::io::{self, Read, Write};
4use std::path::{Path, PathBuf};
5use std::process::{Command, Stdio};
6use std::sync::Arc;
7use std::sync::mpsc;
8use std::thread;
9use std::time::{Duration, Instant, SystemTime};
10
11use fathomdb_schema::{SchemaError, SchemaManager};
12use rusqlite::{DatabaseName, OptionalExtension, TransactionBehavior};
13use serde::{Deserialize, Serialize};
14use sha2::{Digest, Sha256};
15
16use crate::{
17    EngineError, ProjectionRepairReport, ProjectionService, executable_trust,
18    ids::new_id,
19    operational::{
20        OperationalCollectionKind, OperationalCollectionRecord, OperationalCompactionReport,
21        OperationalCurrentRow, OperationalFilterClause, OperationalFilterField,
22        OperationalFilterFieldType, OperationalFilterMode, OperationalFilterValue,
23        OperationalHistoryValidationIssue, OperationalHistoryValidationReport,
24        OperationalMutationRow, OperationalPurgeReport, OperationalReadReport,
25        OperationalReadRequest, OperationalRegisterRequest, OperationalRepairReport,
26        OperationalRetentionActionKind, OperationalRetentionPlanItem,
27        OperationalRetentionPlanReport, OperationalRetentionRunItem, OperationalRetentionRunReport,
28        OperationalSecondaryIndexDefinition, OperationalSecondaryIndexRebuildReport,
29        OperationalTraceReport, extract_secondary_index_entries_for_current,
30        extract_secondary_index_entries_for_mutation, parse_operational_secondary_indexes_json,
31        parse_operational_validation_contract, validate_operational_payload_against_contract,
32    },
33    projection::ProjectionTarget,
34    sqlite,
35};
36
37/// Results of a physical and structural integrity check on the database.
38#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
39pub struct IntegrityReport {
40    pub physical_ok: bool,
41    pub foreign_keys_ok: bool,
42    pub missing_fts_rows: usize,
43    pub missing_property_fts_rows: usize,
44    pub duplicate_active_logical_ids: usize,
45    pub operational_missing_collections: usize,
46    pub operational_missing_last_mutations: usize,
47    pub warnings: Vec<String>,
48}
49
50/// A registered FTS property projection schema for a node kind.
51#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
52pub struct FtsPropertySchemaRecord {
53    /// The node kind this schema applies to.
54    pub kind: String,
55    /// JSON property paths to extract (e.g. `["$.name", "$.title"]`).
56    pub property_paths: Vec<String>,
57    /// Separator used when concatenating extracted values.
58    pub separator: String,
59    /// Schema format version.
60    pub format_version: i64,
61}
62
63/// Options controlling how a safe database export is performed.
64#[derive(Clone, Copy, Debug)]
65pub struct SafeExportOptions {
66    /// When true, runs `PRAGMA wal_checkpoint(FULL)` before copying and fails if
67    /// any WAL frames could not be applied (busy != 0). Set to false only in
68    /// tests that seed a database without WAL mode.
69    pub force_checkpoint: bool,
70}
71
72impl Default for SafeExportOptions {
73    fn default() -> Self {
74        Self {
75            force_checkpoint: true,
76        }
77    }
78}
79
80// Must match PROTOCOL_VERSION in fathomdb-admin-bridge.rs
81const EXPORT_PROTOCOL_VERSION: u32 = 1;
82
83/// Manifest describing a completed safe export.
84#[derive(Clone, Debug, Serialize)]
85pub struct SafeExportManifest {
86    /// Unix timestamp (seconds since epoch) when the export was created.
87    pub exported_at: u64,
88    /// SHA-256 hex digest of the exported database file.
89    pub sha256: String,
90    /// Schema version recorded in `fathom_schema_migrations` at export time.
91    pub schema_version: u32,
92    /// Bridge protocol version compiled into this binary.
93    pub protocol_version: u32,
94    /// Number of `SQLite` pages in the exported database file.
95    pub page_count: u64,
96}
97
98/// Report from tracing all rows associated with a given `source_ref`.
99#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
100pub struct TraceReport {
101    pub source_ref: String,
102    pub node_rows: usize,
103    pub edge_rows: usize,
104    pub action_rows: usize,
105    pub operational_mutation_rows: usize,
106    pub node_logical_ids: Vec<String>,
107    pub action_ids: Vec<String>,
108    pub operational_mutation_ids: Vec<String>,
109}
110
111/// An edge that was skipped during a restore because an endpoint is missing.
112#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
113pub struct SkippedEdge {
114    pub edge_logical_id: String,
115    pub missing_endpoint: String,
116}
117
118/// Report from restoring a retired logical ID back to active state.
119#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
120pub struct LogicalRestoreReport {
121    pub logical_id: String,
122    pub was_noop: bool,
123    pub restored_node_rows: usize,
124    pub restored_edge_rows: usize,
125    pub restored_chunk_rows: usize,
126    pub restored_fts_rows: usize,
127    pub restored_property_fts_rows: usize,
128    pub restored_vec_rows: usize,
129    pub skipped_edges: Vec<SkippedEdge>,
130    pub notes: Vec<String>,
131}
132
133/// Report from permanently purging all rows for a logical ID.
134#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
135pub struct LogicalPurgeReport {
136    pub logical_id: String,
137    pub was_noop: bool,
138    pub deleted_node_rows: usize,
139    pub deleted_edge_rows: usize,
140    pub deleted_chunk_rows: usize,
141    pub deleted_fts_rows: usize,
142    pub deleted_vec_rows: usize,
143    pub notes: Vec<String>,
144}
145
146/// Options controlling provenance event purging behavior.
147#[derive(Clone, Debug, Serialize, Deserialize)]
148pub struct ProvenancePurgeOptions {
149    pub dry_run: bool,
150    #[serde(default)]
151    pub preserve_event_types: Vec<String>,
152}
153
154/// Report from a provenance event purge operation.
155#[derive(Clone, Debug, Serialize)]
156pub struct ProvenancePurgeReport {
157    pub events_deleted: u64,
158    pub events_preserved: u64,
159    pub oldest_remaining: Option<i64>,
160}
161
162/// Service providing administrative operations (integrity checks, exports, restores, purges).
163#[derive(Debug)]
164pub struct AdminService {
165    database_path: PathBuf,
166    schema_manager: Arc<SchemaManager>,
167    projections: ProjectionService,
168}
169
170/// Results of a semantic consistency check on the graph data.
171#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
172pub struct SemanticReport {
173    /// Chunks whose `node_logical_id` has no active node.
174    pub orphaned_chunks: usize,
175    /// Active nodes with a NULL `source_ref` (loss of provenance).
176    pub null_source_ref_nodes: usize,
177    /// Steps referencing a `run_id` that does not exist in the runs table.
178    pub broken_step_fk: usize,
179    /// Actions referencing a `step_id` that does not exist in the steps table.
180    pub broken_action_fk: usize,
181    /// FTS rows whose `chunk_id` does not exist in the chunks table.
182    pub stale_fts_rows: usize,
183    /// FTS rows whose node has been superseded (`superseded_at` IS NOT NULL on all active rows).
184    pub fts_rows_for_superseded_nodes: usize,
185    /// Property FTS rows whose node has been superseded or does not exist.
186    pub stale_property_fts_rows: usize,
187    /// Property FTS rows whose kind has no registered FTS property schema.
188    pub orphaned_property_fts_rows: usize,
189    /// Property FTS rows whose `kind` does not match the active node's actual kind.
190    pub mismatched_kind_property_fts_rows: usize,
191    /// Active logical IDs with more than one `fts_node_properties` row.
192    pub duplicate_property_fts_rows: usize,
193    /// Property FTS rows whose `text_content` no longer matches the canonical extraction.
194    pub drifted_property_fts_rows: usize,
195    /// Active edges where at least one endpoint has no active node.
196    pub dangling_edges: usize,
197    /// `logical_ids` where every version has been superseded (no active row).
198    pub orphaned_supersession_chains: usize,
199    /// Vec rows whose backing chunk no longer exists in the chunks table.
200    pub stale_vec_rows: usize,
201    /// Compatibility counter for vec rows whose chunk points at missing node history.
202    pub vec_rows_for_superseded_nodes: usize,
203    /// Latest-state keys whose latest mutation is a `put` but no current row exists.
204    pub missing_operational_current_rows: usize,
205    /// Current rows that do not match the latest mutation state.
206    pub stale_operational_current_rows: usize,
207    /// Mutations written after the owning collection was disabled.
208    pub disabled_collection_mutations: usize,
209    /// Access metadata rows whose `logical_id` no longer has any node history.
210    pub orphaned_last_access_metadata_rows: usize,
211    pub warnings: Vec<String>,
212}
213
214/// Configuration for regenerating vector embeddings via an external generator command.
215#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
216#[serde(rename_all = "snake_case")]
217pub struct VectorRegenerationConfig {
218    pub profile: String,
219    pub table_name: String,
220    pub model_identity: String,
221    pub model_version: String,
222    pub dimension: usize,
223    pub normalization_policy: String,
224    pub chunking_policy: String,
225    pub preprocessing_policy: String,
226    pub generator_command: Vec<String>,
227}
228
229/// Report from a vector embedding regeneration run.
230#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
231pub struct VectorRegenerationReport {
232    pub profile: String,
233    pub table_name: String,
234    pub dimension: usize,
235    pub total_chunks: usize,
236    pub regenerated_rows: usize,
237    pub contract_persisted: bool,
238    pub notes: Vec<String>,
239}
240
241/// Security and resource policy for the external vector generator subprocess.
242#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
243#[serde(rename_all = "snake_case")]
244pub struct VectorGeneratorPolicy {
245    pub timeout_ms: u64,
246    pub max_stdout_bytes: usize,
247    pub max_stderr_bytes: usize,
248    pub max_input_bytes: usize,
249    pub max_chunks: usize,
250    #[serde(default = "default_require_absolute_executable")]
251    pub require_absolute_executable: bool,
252    #[serde(default = "default_reject_world_writable_executable")]
253    pub reject_world_writable_executable: bool,
254    #[serde(default)]
255    pub allowed_executable_roots: Vec<String>,
256    #[serde(default)]
257    pub preserve_env_vars: Vec<String>,
258}
259
260impl Default for VectorGeneratorPolicy {
261    fn default() -> Self {
262        Self {
263            timeout_ms: 300_000,
264            max_stdout_bytes: 64 * 1024 * 1024,
265            max_stderr_bytes: 1024 * 1024,
266            max_input_bytes: 64 * 1024 * 1024,
267            max_chunks: 1_000_000,
268            require_absolute_executable: true,
269            reject_world_writable_executable: true,
270            allowed_executable_roots: vec![],
271            preserve_env_vars: vec![],
272        }
273    }
274}
275
276const fn default_require_absolute_executable() -> bool {
277    true
278}
279
280const fn default_reject_world_writable_executable() -> bool {
281    true
282}
283
284const CURRENT_VECTOR_CONTRACT_FORMAT_VERSION: i64 = 1;
285const MAX_PROFILE_LEN: usize = 128;
286const MAX_MODEL_IDENTITY_LEN: usize = 256;
287const MAX_MODEL_VERSION_LEN: usize = 128;
288const MAX_POLICY_LEN: usize = 128;
289const MAX_GENERATOR_COMMAND_ARG_LEN: usize = 4096;
290const MAX_GENERATOR_COMMAND_TOTAL_LEN: usize = 16 * 1024;
291const MAX_CONTRACT_JSON_BYTES: usize = 32 * 1024;
292const MAX_AUDIT_METADATA_BYTES: usize = 2048;
293const DEFAULT_OPERATIONAL_READ_LIMIT: usize = 100;
294const MAX_OPERATIONAL_READ_LIMIT: usize = 1000;
295
296/// Thread-safe handle to the shared [`AdminService`].
297#[derive(Clone, Debug)]
298pub struct AdminHandle {
299    inner: Arc<AdminService>,
300}
301
302impl AdminHandle {
303    /// Wrap an [`AdminService`] in a shared handle.
304    #[must_use]
305    pub fn new(service: AdminService) -> Self {
306        Self {
307            inner: Arc::new(service),
308        }
309    }
310
311    /// Clone the inner `Arc` to the [`AdminService`].
312    #[must_use]
313    pub fn service(&self) -> Arc<AdminService> {
314        Arc::clone(&self.inner)
315    }
316}
317
318impl AdminService {
319    /// Create a new admin service for the database at the given path.
320    #[must_use]
321    pub fn new(path: impl AsRef<Path>, schema_manager: Arc<SchemaManager>) -> Self {
322        let database_path = path.as_ref().to_path_buf();
323        let projections = ProjectionService::new(&database_path, Arc::clone(&schema_manager));
324        Self {
325            database_path,
326            schema_manager,
327            projections,
328        }
329    }
330
331    fn connect(&self) -> Result<rusqlite::Connection, EngineError> {
332        #[cfg(feature = "sqlite-vec")]
333        let conn = sqlite::open_connection_with_vec(&self.database_path)?;
334        #[cfg(not(feature = "sqlite-vec"))]
335        let conn = sqlite::open_connection(&self.database_path)?;
336        self.schema_manager.bootstrap(&conn)?;
337        Ok(conn)
338    }
339
340    /// # Errors
341    /// Returns [`EngineError`] if the database connection fails or any SQL query fails.
342    pub fn check_integrity(&self) -> Result<IntegrityReport, EngineError> {
343        let conn = self.connect()?;
344
345        let physical_result: String =
346            conn.query_row("PRAGMA integrity_check", [], |row| row.get(0))?;
347        let foreign_key_count: i64 =
348            conn.query_row("SELECT count(*) FROM pragma_foreign_key_check", [], |row| {
349                row.get(0)
350            })?;
351        let missing_fts_rows: i64 = conn.query_row(
352            r"
353            SELECT count(*)
354            FROM chunks c
355            JOIN nodes n
356              ON n.logical_id = c.node_logical_id
357             AND n.superseded_at IS NULL
358            WHERE NOT EXISTS (
359                SELECT 1
360                FROM fts_nodes f
361                WHERE f.chunk_id = c.id
362            )
363            ",
364            [],
365            |row| row.get(0),
366        )?;
367        let duplicate_active: i64 = conn.query_row(
368            r"
369            SELECT count(*)
370            FROM (
371                SELECT logical_id
372                FROM nodes
373                WHERE superseded_at IS NULL
374                GROUP BY logical_id
375                HAVING count(*) > 1
376            )
377            ",
378            [],
379            |row| row.get(0),
380        )?;
381        let operational_missing_collections: i64 = conn.query_row(
382            r"
383            SELECT (
384                SELECT count(*)
385                FROM operational_mutations m
386                LEFT JOIN operational_collections c ON c.name = m.collection_name
387                WHERE c.name IS NULL
388            ) + (
389                SELECT count(*)
390                FROM operational_current oc
391                LEFT JOIN operational_collections c ON c.name = oc.collection_name
392                WHERE c.name IS NULL
393            )
394            ",
395            [],
396            |row| row.get(0),
397        )?;
398        let operational_missing_last_mutations: i64 = conn.query_row(
399            r"
400            SELECT count(*)
401            FROM operational_current oc
402            LEFT JOIN operational_mutations m ON m.id = oc.last_mutation_id
403            WHERE m.id IS NULL
404            ",
405            [],
406            |row| row.get(0),
407        )?;
408
409        // Count missing property FTS rows using the same extraction logic as
410        // write/rebuild. A pure-SQL check would overcount: nodes whose declared
411        // paths legitimately normalize to no values correctly have no row.
412        let missing_property_fts_rows = count_missing_property_fts_rows(&conn)?;
413
414        let mut warnings = Vec::new();
415        if missing_fts_rows > 0 {
416            warnings.push("missing FTS projections detected".to_owned());
417        }
418        if missing_property_fts_rows > 0 {
419            warnings.push("missing property FTS projections detected".to_owned());
420        }
421        if duplicate_active > 0 {
422            warnings.push("duplicate active logical_ids detected".to_owned());
423        }
424        if operational_missing_collections > 0 {
425            warnings.push("operational rows reference missing collections".to_owned());
426        }
427        if operational_missing_last_mutations > 0 {
428            warnings.push("operational current rows reference missing last mutations".to_owned());
429        }
430
431        // FIX(review): was `as usize` — unsound on 32-bit targets, wraps negatives silently.
432        // Options: (A) try_from().unwrap_or(0) — masks corruption, (B) try_from().expect() —
433        // panics on corruption, (C) propagate error. Chose (B) here: a negative count(*)
434        // signals data corruption, and the integrity report would be meaningless anyway.
435        Ok(IntegrityReport {
436            physical_ok: physical_result == "ok",
437            foreign_keys_ok: foreign_key_count == 0,
438            missing_fts_rows: i64_to_usize(missing_fts_rows),
439            missing_property_fts_rows: i64_to_usize(missing_property_fts_rows),
440            duplicate_active_logical_ids: i64_to_usize(duplicate_active),
441            operational_missing_collections: i64_to_usize(operational_missing_collections),
442            operational_missing_last_mutations: i64_to_usize(operational_missing_last_mutations),
443            warnings,
444        })
445    }
446
447    /// # Errors
448    /// Returns [`EngineError`] if the database connection fails or any SQL query fails.
449    #[allow(clippy::too_many_lines)]
450    pub fn check_semantics(&self) -> Result<SemanticReport, EngineError> {
451        let conn = self.connect()?;
452
453        let orphaned_chunks: i64 = conn.query_row(
454            r"
455            SELECT count(*)
456            FROM chunks c
457            WHERE NOT EXISTS (
458                SELECT 1 FROM nodes n
459                WHERE n.logical_id = c.node_logical_id
460            )
461            ",
462            [],
463            |row| row.get(0),
464        )?;
465
466        let null_source_ref_nodes: i64 = conn.query_row(
467            "SELECT count(*) FROM nodes WHERE source_ref IS NULL AND superseded_at IS NULL",
468            [],
469            |row| row.get(0),
470        )?;
471
472        let broken_step_fk: i64 = conn.query_row(
473            r"
474            SELECT count(*) FROM steps s
475            WHERE NOT EXISTS (SELECT 1 FROM runs r WHERE r.id = s.run_id)
476            ",
477            [],
478            |row| row.get(0),
479        )?;
480
481        let broken_action_fk: i64 = conn.query_row(
482            r"
483            SELECT count(*) FROM actions a
484            WHERE NOT EXISTS (SELECT 1 FROM steps s WHERE s.id = a.step_id)
485            ",
486            [],
487            |row| row.get(0),
488        )?;
489
490        let stale_fts_rows: i64 = conn.query_row(
491            r"
492            SELECT count(*) FROM fts_nodes f
493            WHERE NOT EXISTS (SELECT 1 FROM chunks c WHERE c.id = f.chunk_id)
494            ",
495            [],
496            |row| row.get(0),
497        )?;
498
499        let fts_rows_for_superseded_nodes: i64 = conn.query_row(
500            r"
501            SELECT count(*) FROM fts_nodes f
502            WHERE NOT EXISTS (
503                SELECT 1 FROM nodes n
504                WHERE n.logical_id = f.node_logical_id AND n.superseded_at IS NULL
505            )
506            ",
507            [],
508            |row| row.get(0),
509        )?;
510
511        let stale_property_fts_rows: i64 = conn.query_row(
512            r"
513            SELECT count(*) FROM fts_node_properties fp
514            WHERE NOT EXISTS (
515                SELECT 1 FROM nodes n
516                WHERE n.logical_id = fp.node_logical_id AND n.superseded_at IS NULL
517            )
518            ",
519            [],
520            |row| row.get(0),
521        )?;
522
523        let orphaned_property_fts_rows: i64 = conn.query_row(
524            r"
525            SELECT count(*) FROM fts_node_properties fp
526            WHERE NOT EXISTS (
527                SELECT 1 FROM fts_property_schemas s WHERE s.kind = fp.kind
528            )
529            ",
530            [],
531            |row| row.get(0),
532        )?;
533
534        let mismatched_kind_property_fts_rows: i64 = conn.query_row(
535            r"
536            SELECT count(*) FROM fts_node_properties fp
537            JOIN nodes n ON n.logical_id = fp.node_logical_id AND n.superseded_at IS NULL
538            WHERE n.kind != fp.kind
539            ",
540            [],
541            |row| row.get(0),
542        )?;
543
544        let duplicate_property_fts_rows: i64 = conn.query_row(
545            r"
546            SELECT count(*) FROM (
547                SELECT node_logical_id FROM fts_node_properties
548                GROUP BY node_logical_id
549                HAVING count(*) > 1
550            )
551            ",
552            [],
553            |row| row.get(0),
554        )?;
555
556        let drifted_property_fts_rows = count_drifted_property_fts_rows(&conn)?;
557
558        let dangling_edges: i64 = conn.query_row(
559            r"
560            SELECT count(*) FROM edges e
561            WHERE e.superseded_at IS NULL AND (
562                NOT EXISTS (SELECT 1 FROM nodes n WHERE n.logical_id = e.source_logical_id AND n.superseded_at IS NULL)
563                OR
564                NOT EXISTS (SELECT 1 FROM nodes n WHERE n.logical_id = e.target_logical_id AND n.superseded_at IS NULL)
565            )
566            ",
567            [],
568            |row| row.get(0),
569        )?;
570
571        let orphaned_supersession_chains: i64 = conn.query_row(
572            r"
573            SELECT count(*) FROM (
574                SELECT logical_id FROM nodes
575                GROUP BY logical_id
576                HAVING count(*) > 0 AND sum(CASE WHEN superseded_at IS NULL THEN 1 ELSE 0 END) = 0
577            )
578            ",
579            [],
580            |row| row.get(0),
581        )?;
582
583        // Vec stale row detection — degrades to 0 when the vec profile is absent.
584        #[cfg(feature = "sqlite-vec")]
585        let stale_vec_rows: i64 = match conn.query_row(
586            r"
587            SELECT count(*) FROM vec_nodes_active v
588            WHERE NOT EXISTS (SELECT 1 FROM chunks c WHERE c.id = v.chunk_id)
589            ",
590            [],
591            |row| row.get(0),
592        ) {
593            Ok(n) => n,
594            Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
595                if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
596            {
597                0
598            }
599            Err(e) => return Err(EngineError::Sqlite(e)),
600        };
601        #[cfg(not(feature = "sqlite-vec"))]
602        let stale_vec_rows: i64 = 0;
603
604        #[cfg(feature = "sqlite-vec")]
605        let vec_rows_for_superseded_nodes: i64 = match conn.query_row(
606            r"
607            SELECT count(*) FROM vec_nodes_active v
608            JOIN chunks c ON c.id = v.chunk_id
609            WHERE NOT EXISTS (
610                SELECT 1 FROM nodes n
611                WHERE n.logical_id = c.node_logical_id
612            )
613            ",
614            [],
615            |row| row.get(0),
616        ) {
617            Ok(n) => n,
618            Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
619                if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
620            {
621                0
622            }
623            Err(e) => return Err(EngineError::Sqlite(e)),
624        };
625        #[cfg(not(feature = "sqlite-vec"))]
626        let vec_rows_for_superseded_nodes: i64 = 0;
627        let missing_operational_current_rows: i64 = conn.query_row(
628            r"
629            SELECT count(*)
630            FROM operational_mutations m
631            JOIN operational_collections c
632              ON c.name = m.collection_name
633             AND c.kind = 'latest_state'
634            WHERE m.op_kind = 'put'
635              AND NOT EXISTS (
636                    SELECT 1
637                    FROM operational_mutations newer
638                    WHERE newer.collection_name = m.collection_name
639                      AND newer.record_key = m.record_key
640                      AND newer.mutation_order > m.mutation_order
641                )
642              AND NOT EXISTS (
643                    SELECT 1
644                    FROM operational_current oc
645                    WHERE oc.collection_name = m.collection_name
646                      AND oc.record_key = m.record_key
647                )
648            ",
649            [],
650            |row| row.get(0),
651        )?;
652        let stale_operational_current_rows: i64 = conn.query_row(
653            r"
654            SELECT count(*)
655            FROM operational_current oc
656            JOIN operational_collections c
657              ON c.name = oc.collection_name
658             AND c.kind = 'latest_state'
659            LEFT JOIN operational_mutations m ON m.id = oc.last_mutation_id
660            WHERE m.id IS NULL
661               OR m.collection_name != oc.collection_name
662               OR m.record_key != oc.record_key
663               OR m.op_kind != 'put'
664               OR m.payload_json != oc.payload_json
665               OR EXISTS (
666                    SELECT 1
667                    FROM operational_mutations newer
668                    WHERE newer.collection_name = oc.collection_name
669                      AND newer.record_key = oc.record_key
670                      AND newer.mutation_order > m.mutation_order
671                )
672            ",
673            [],
674            |row| row.get(0),
675        )?;
676        let disabled_collection_mutations: i64 = conn.query_row(
677            r"
678            SELECT count(*)
679            FROM operational_mutations m
680            JOIN operational_collections c ON c.name = m.collection_name
681            WHERE c.disabled_at IS NOT NULL AND m.created_at > c.disabled_at
682            ",
683            [],
684            |row| row.get(0),
685        )?;
686        let orphaned_last_access_metadata_rows: i64 = conn.query_row(
687            r"
688            SELECT count(*)
689            FROM node_access_metadata am
690            WHERE NOT EXISTS (
691                SELECT 1 FROM nodes n WHERE n.logical_id = am.logical_id
692            )
693            ",
694            [],
695            |row| row.get(0),
696        )?;
697
698        let mut warnings = Vec::new();
699        if orphaned_chunks > 0 {
700            warnings.push(format!(
701                "{orphaned_chunks} orphaned chunk(s) with no surviving node history"
702            ));
703        }
704        if null_source_ref_nodes > 0 {
705            warnings.push(format!(
706                "{null_source_ref_nodes} active node(s) with null source_ref"
707            ));
708        }
709        if broken_step_fk > 0 {
710            warnings.push(format!(
711                "{broken_step_fk} step(s) referencing non-existent run"
712            ));
713        }
714        if broken_action_fk > 0 {
715            warnings.push(format!(
716                "{broken_action_fk} action(s) referencing non-existent step"
717            ));
718        }
719        if stale_fts_rows > 0 {
720            warnings.push(format!(
721                "{stale_fts_rows} stale FTS row(s) referencing missing chunk"
722            ));
723        }
724        if fts_rows_for_superseded_nodes > 0 {
725            warnings.push(format!(
726                "{fts_rows_for_superseded_nodes} FTS row(s) for superseded node(s)"
727            ));
728        }
729        if stale_property_fts_rows > 0 {
730            warnings.push(format!(
731                "{stale_property_fts_rows} stale property FTS row(s) for superseded/missing node(s)"
732            ));
733        }
734        if orphaned_property_fts_rows > 0 {
735            warnings.push(format!(
736                "{orphaned_property_fts_rows} orphaned property FTS row(s) for unregistered kind(s)"
737            ));
738        }
739        if mismatched_kind_property_fts_rows > 0 {
740            warnings.push(format!(
741                "{mismatched_kind_property_fts_rows} property FTS row(s) whose kind does not match the active node"
742            ));
743        }
744        if duplicate_property_fts_rows > 0 {
745            warnings.push(format!(
746                "{duplicate_property_fts_rows} active logical ID(s) with duplicate property FTS rows"
747            ));
748        }
749        if drifted_property_fts_rows > 0 {
750            warnings.push(format!(
751                "{drifted_property_fts_rows} property FTS row(s) with stale text_content"
752            ));
753        }
754        if dangling_edges > 0 {
755            warnings.push(format!(
756                "{dangling_edges} active edge(s) with missing endpoint node"
757            ));
758        }
759        if orphaned_supersession_chains > 0 {
760            warnings.push(format!(
761                "{orphaned_supersession_chains} logical_id(s) with all versions superseded"
762            ));
763        }
764        if stale_vec_rows > 0 {
765            warnings.push(format!(
766                "{stale_vec_rows} stale vec row(s) referencing missing chunk"
767            ));
768        }
769        if vec_rows_for_superseded_nodes > 0 {
770            warnings.push(format!(
771                "{vec_rows_for_superseded_nodes} vec row(s) whose node history is missing"
772            ));
773        }
774        if missing_operational_current_rows > 0 {
775            warnings.push(format!(
776                "{missing_operational_current_rows} latest-state key(s) missing operational_current rows"
777            ));
778        }
779        if stale_operational_current_rows > 0 {
780            warnings.push(format!(
781                "{stale_operational_current_rows} stale operational_current row(s)"
782            ));
783        }
784        if disabled_collection_mutations > 0 {
785            warnings.push(format!(
786                "{disabled_collection_mutations} mutation(s) were written after collection disable"
787            ));
788        }
789        if orphaned_last_access_metadata_rows > 0 {
790            warnings.push(format!(
791                "{orphaned_last_access_metadata_rows} last_access metadata row(s) reference missing node history"
792            ));
793        }
794
795        Ok(SemanticReport {
796            orphaned_chunks: i64_to_usize(orphaned_chunks),
797            null_source_ref_nodes: i64_to_usize(null_source_ref_nodes),
798            broken_step_fk: i64_to_usize(broken_step_fk),
799            broken_action_fk: i64_to_usize(broken_action_fk),
800            stale_fts_rows: i64_to_usize(stale_fts_rows),
801            fts_rows_for_superseded_nodes: i64_to_usize(fts_rows_for_superseded_nodes),
802            stale_property_fts_rows: i64_to_usize(stale_property_fts_rows),
803            orphaned_property_fts_rows: i64_to_usize(orphaned_property_fts_rows),
804            mismatched_kind_property_fts_rows: i64_to_usize(mismatched_kind_property_fts_rows),
805            duplicate_property_fts_rows: i64_to_usize(duplicate_property_fts_rows),
806            drifted_property_fts_rows: i64_to_usize(drifted_property_fts_rows),
807            dangling_edges: i64_to_usize(dangling_edges),
808            orphaned_supersession_chains: i64_to_usize(orphaned_supersession_chains),
809            stale_vec_rows: i64_to_usize(stale_vec_rows),
810            vec_rows_for_superseded_nodes: i64_to_usize(vec_rows_for_superseded_nodes),
811            missing_operational_current_rows: i64_to_usize(missing_operational_current_rows),
812            stale_operational_current_rows: i64_to_usize(stale_operational_current_rows),
813            disabled_collection_mutations: i64_to_usize(disabled_collection_mutations),
814            orphaned_last_access_metadata_rows: i64_to_usize(orphaned_last_access_metadata_rows),
815            warnings,
816        })
817    }
818
819    /// # Errors
820    /// Returns [`EngineError`] if the collection metadata is invalid or the insert fails.
821    pub fn register_operational_collection(
822        &self,
823        request: &OperationalRegisterRequest,
824    ) -> Result<OperationalCollectionRecord, EngineError> {
825        if request.name.trim().is_empty() {
826            return Err(EngineError::InvalidWrite(
827                "operational collection name must not be empty".to_owned(),
828            ));
829        }
830        if request.schema_json.is_empty() {
831            return Err(EngineError::InvalidWrite(
832                "operational collection schema_json must not be empty".to_owned(),
833            ));
834        }
835        if request.retention_json.is_empty() {
836            return Err(EngineError::InvalidWrite(
837                "operational collection retention_json must not be empty".to_owned(),
838            ));
839        }
840        if request.filter_fields_json.is_empty() {
841            return Err(EngineError::InvalidWrite(
842                "operational collection filter_fields_json must not be empty".to_owned(),
843            ));
844        }
845        parse_operational_validation_contract(&request.validation_json)
846            .map_err(EngineError::InvalidWrite)?;
847        parse_operational_secondary_indexes_json(&request.secondary_indexes_json, request.kind)
848            .map_err(EngineError::InvalidWrite)?;
849        if request.format_version <= 0 {
850            return Err(EngineError::InvalidWrite(
851                "operational collection format_version must be positive".to_owned(),
852            ));
853        }
854        parse_operational_filter_fields(&request.filter_fields_json)
855            .map_err(EngineError::InvalidWrite)?;
856
857        let mut conn = self.connect()?;
858        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
859        tx.execute(
860            "INSERT INTO operational_collections \
861             (name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at) \
862             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, unixepoch())",
863            rusqlite::params![
864                request.name.as_str(),
865                request.kind.as_str(),
866                request.schema_json.as_str(),
867                request.retention_json.as_str(),
868                request.filter_fields_json.as_str(),
869                request.validation_json.as_str(),
870                request.secondary_indexes_json.as_str(),
871                request.format_version,
872            ],
873        )?;
874        persist_simple_provenance_event(
875            &tx,
876            "operational_collection_registered",
877            request.name.as_str(),
878            Some(serde_json::json!({
879                "kind": request.kind.as_str(),
880                "format_version": request.format_version,
881            })),
882        )?;
883        tx.commit()?;
884
885        self.describe_operational_collection(&request.name)?
886            .ok_or_else(|| {
887                EngineError::Bridge("registered collection missing after commit".to_owned())
888            })
889    }
890
891    /// # Errors
892    /// Returns [`EngineError`] if the database query fails.
893    pub fn describe_operational_collection(
894        &self,
895        name: &str,
896    ) -> Result<Option<OperationalCollectionRecord>, EngineError> {
897        let conn = self.connect()?;
898        load_operational_collection_record(&conn, name)
899    }
900
901    /// # Errors
902    /// Returns [`EngineError`] if the collection is missing, the filter contract is invalid,
903    /// or existing mutation backfill fails.
904    pub fn update_operational_collection_filters(
905        &self,
906        name: &str,
907        filter_fields_json: &str,
908    ) -> Result<OperationalCollectionRecord, EngineError> {
909        if filter_fields_json.is_empty() {
910            return Err(EngineError::InvalidWrite(
911                "operational collection filter_fields_json must not be empty".to_owned(),
912            ));
913        }
914        let declared_fields = parse_operational_filter_fields(filter_fields_json)
915            .map_err(EngineError::InvalidWrite)?;
916
917        let mut conn = self.connect()?;
918        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
919        load_operational_collection_record(&tx, name)?.ok_or_else(|| {
920            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
921        })?;
922        tx.execute(
923            "UPDATE operational_collections SET filter_fields_json = ?2 WHERE name = ?1",
924            rusqlite::params![name, filter_fields_json],
925        )?;
926        tx.execute(
927            "DELETE FROM operational_filter_values WHERE collection_name = ?1",
928            [name],
929        )?;
930
931        let mut mutation_stmt = tx.prepare(
932            "SELECT id, payload_json FROM operational_mutations \
933             WHERE collection_name = ?1 ORDER BY mutation_order",
934        )?;
935        let mutations = mutation_stmt
936            .query_map([name], |row| {
937                Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
938            })?
939            .collect::<Result<Vec<_>, _>>()?;
940        drop(mutation_stmt);
941
942        let mut insert_filter_value = tx.prepare_cached(
943            "INSERT INTO operational_filter_values \
944             (mutation_id, collection_name, field_name, string_value, integer_value) \
945             VALUES (?1, ?2, ?3, ?4, ?5)",
946        )?;
947        let mut inserted_values = 0usize;
948        for (mutation_id, payload_json) in &mutations {
949            for filter_value in
950                extract_operational_filter_values(&declared_fields, payload_json.as_str())
951            {
952                insert_filter_value.execute(rusqlite::params![
953                    mutation_id,
954                    name,
955                    filter_value.field_name,
956                    filter_value.string_value,
957                    filter_value.integer_value,
958                ])?;
959                inserted_values += 1;
960            }
961        }
962        drop(insert_filter_value);
963
964        persist_simple_provenance_event(
965            &tx,
966            "operational_collection_filter_fields_updated",
967            name,
968            Some(serde_json::json!({
969                "field_count": declared_fields.len(),
970                "mutations_backfilled": mutations.len(),
971                "inserted_filter_values": inserted_values,
972            })),
973        )?;
974        let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
975            EngineError::Bridge("operational collection missing after filter update".to_owned())
976        })?;
977        tx.commit()?;
978        Ok(updated)
979    }
980
981    /// # Errors
982    /// Returns [`EngineError`] if the collection is missing or the validation contract is invalid.
983    pub fn update_operational_collection_validation(
984        &self,
985        name: &str,
986        validation_json: &str,
987    ) -> Result<OperationalCollectionRecord, EngineError> {
988        parse_operational_validation_contract(validation_json)
989            .map_err(EngineError::InvalidWrite)?;
990
991        let mut conn = self.connect()?;
992        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
993        load_operational_collection_record(&tx, name)?.ok_or_else(|| {
994            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
995        })?;
996        tx.execute(
997            "UPDATE operational_collections SET validation_json = ?2 WHERE name = ?1",
998            rusqlite::params![name, validation_json],
999        )?;
1000        persist_simple_provenance_event(
1001            &tx,
1002            "operational_collection_validation_updated",
1003            name,
1004            Some(serde_json::json!({
1005                "has_validation": !validation_json.is_empty(),
1006            })),
1007        )?;
1008        let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1009            EngineError::Bridge("operational collection missing after validation update".to_owned())
1010        })?;
1011        tx.commit()?;
1012        Ok(updated)
1013    }
1014
1015    /// # Errors
1016    /// Returns [`EngineError`] if the collection is missing, the contract is invalid,
1017    /// or derived index rebuild fails.
1018    pub fn update_operational_collection_secondary_indexes(
1019        &self,
1020        name: &str,
1021        secondary_indexes_json: &str,
1022    ) -> Result<OperationalCollectionRecord, EngineError> {
1023        let mut conn = self.connect()?;
1024        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1025        let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1026            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1027        })?;
1028        let indexes = parse_operational_secondary_indexes_json(secondary_indexes_json, record.kind)
1029            .map_err(EngineError::InvalidWrite)?;
1030        tx.execute(
1031            "UPDATE operational_collections SET secondary_indexes_json = ?2 WHERE name = ?1",
1032            rusqlite::params![name, secondary_indexes_json],
1033        )?;
1034        let (mutation_entries_rebuilt, current_entries_rebuilt) =
1035            rebuild_operational_secondary_index_entries(&tx, &record.name, record.kind, &indexes)?;
1036        persist_simple_provenance_event(
1037            &tx,
1038            "operational_collection_secondary_indexes_updated",
1039            name,
1040            Some(serde_json::json!({
1041                "index_count": indexes.len(),
1042                "mutation_entries_rebuilt": mutation_entries_rebuilt,
1043                "current_entries_rebuilt": current_entries_rebuilt,
1044            })),
1045        )?;
1046        let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1047            EngineError::Bridge(
1048                "operational collection missing after secondary index update".to_owned(),
1049            )
1050        })?;
1051        tx.commit()?;
1052        Ok(updated)
1053    }
1054
1055    /// # Errors
1056    /// Returns [`EngineError`] if the collection is missing or rebuild fails.
1057    pub fn rebuild_operational_secondary_indexes(
1058        &self,
1059        name: &str,
1060    ) -> Result<OperationalSecondaryIndexRebuildReport, EngineError> {
1061        let mut conn = self.connect()?;
1062        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1063        let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1064            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1065        })?;
1066        let indexes =
1067            parse_operational_secondary_indexes_json(&record.secondary_indexes_json, record.kind)
1068                .map_err(EngineError::InvalidWrite)?;
1069        let (mutation_entries_rebuilt, current_entries_rebuilt) =
1070            rebuild_operational_secondary_index_entries(&tx, &record.name, record.kind, &indexes)?;
1071        persist_simple_provenance_event(
1072            &tx,
1073            "operational_secondary_indexes_rebuilt",
1074            name,
1075            Some(serde_json::json!({
1076                "index_count": indexes.len(),
1077                "mutation_entries_rebuilt": mutation_entries_rebuilt,
1078                "current_entries_rebuilt": current_entries_rebuilt,
1079            })),
1080        )?;
1081        tx.commit()?;
1082        Ok(OperationalSecondaryIndexRebuildReport {
1083            collection_name: name.to_owned(),
1084            mutation_entries_rebuilt,
1085            current_entries_rebuilt,
1086        })
1087    }
1088
1089    /// # Errors
1090    /// Returns [`EngineError`] if the collection is missing or its validation contract is invalid.
1091    pub fn validate_operational_collection_history(
1092        &self,
1093        name: &str,
1094    ) -> Result<OperationalHistoryValidationReport, EngineError> {
1095        let conn = self.connect()?;
1096        let record = load_operational_collection_record(&conn, name)?.ok_or_else(|| {
1097            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1098        })?;
1099        let Some(contract) = parse_operational_validation_contract(&record.validation_json)
1100            .map_err(EngineError::InvalidWrite)?
1101        else {
1102            return Err(EngineError::InvalidWrite(format!(
1103                "operational collection '{name}' has no validation_json configured"
1104            )));
1105        };
1106
1107        let mut stmt = conn.prepare(
1108            "SELECT id, record_key, op_kind, payload_json FROM operational_mutations \
1109             WHERE collection_name = ?1 ORDER BY mutation_order",
1110        )?;
1111        let rows = stmt
1112            .query_map([name], |row| {
1113                Ok((
1114                    row.get::<_, String>(0)?,
1115                    row.get::<_, String>(1)?,
1116                    row.get::<_, String>(2)?,
1117                    row.get::<_, String>(3)?,
1118                ))
1119            })?
1120            .collect::<Result<Vec<_>, _>>()?;
1121        drop(stmt);
1122
1123        let mut checked_rows = 0usize;
1124        let mut issues = Vec::new();
1125        for (mutation_id, record_key, op_kind, payload_json) in rows {
1126            if op_kind == "delete" {
1127                continue;
1128            }
1129            checked_rows += 1;
1130            if let Err(message) =
1131                validate_operational_payload_against_contract(&contract, payload_json.as_str())
1132            {
1133                issues.push(OperationalHistoryValidationIssue {
1134                    mutation_id,
1135                    record_key,
1136                    op_kind,
1137                    message,
1138                });
1139            }
1140        }
1141
1142        Ok(OperationalHistoryValidationReport {
1143            collection_name: name.to_owned(),
1144            checked_rows,
1145            invalid_row_count: issues.len(),
1146            issues,
1147        })
1148    }
1149
1150    /// # Errors
1151    /// Returns [`EngineError`] if the database query fails.
1152    pub fn disable_operational_collection(
1153        &self,
1154        name: &str,
1155    ) -> Result<OperationalCollectionRecord, EngineError> {
1156        let mut conn = self.connect()?;
1157        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1158        let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1159            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1160        })?;
1161        let changed = if record.disabled_at.is_none() {
1162            tx.execute(
1163                "UPDATE operational_collections SET disabled_at = unixepoch() WHERE name = ?1",
1164                [name],
1165            )?;
1166            true
1167        } else {
1168            false
1169        };
1170        let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1171            EngineError::Bridge("operational collection missing after disable".to_owned())
1172        })?;
1173        persist_simple_provenance_event(
1174            &tx,
1175            "operational_collection_disabled",
1176            name,
1177            Some(serde_json::json!({
1178                "disabled_at": record.disabled_at,
1179                "changed": changed,
1180            })),
1181        )?;
1182        tx.commit()?;
1183        Ok(record)
1184    }
1185
1186    /// # Errors
1187    /// Returns [`EngineError`] if the database query fails.
1188    pub fn compact_operational_collection(
1189        &self,
1190        name: &str,
1191        dry_run: bool,
1192    ) -> Result<OperationalCompactionReport, EngineError> {
1193        let mut conn = self.connect()?;
1194        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1195        let collection = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1196            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1197        })?;
1198        validate_append_only_operational_collection(&collection, "compact")?;
1199        let (mutation_ids, before_timestamp) =
1200            operational_compaction_candidates(&tx, &collection.retention_json, name)?;
1201        if dry_run {
1202            drop(tx);
1203            return Ok(OperationalCompactionReport {
1204                collection_name: name.to_owned(),
1205                deleted_mutations: mutation_ids.len(),
1206                dry_run: true,
1207                before_timestamp,
1208            });
1209        }
1210        let mut delete_stmt =
1211            tx.prepare_cached("DELETE FROM operational_mutations WHERE id = ?1")?;
1212        for mutation_id in &mutation_ids {
1213            delete_stmt.execute([mutation_id.as_str()])?;
1214        }
1215        drop(delete_stmt);
1216        persist_simple_provenance_event(
1217            &tx,
1218            "operational_collection_compacted",
1219            name,
1220            Some(serde_json::json!({
1221                "deleted_mutations": mutation_ids.len(),
1222                "before_timestamp": before_timestamp,
1223            })),
1224        )?;
1225        tx.commit()?;
1226        Ok(OperationalCompactionReport {
1227            collection_name: name.to_owned(),
1228            deleted_mutations: mutation_ids.len(),
1229            dry_run: false,
1230            before_timestamp,
1231        })
1232    }
1233
1234    /// # Errors
1235    /// Returns [`EngineError`] if the database query fails.
1236    pub fn purge_operational_collection(
1237        &self,
1238        name: &str,
1239        before_timestamp: i64,
1240    ) -> Result<OperationalPurgeReport, EngineError> {
1241        let mut conn = self.connect()?;
1242        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1243        let collection = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1244            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1245        })?;
1246        validate_append_only_operational_collection(&collection, "purge")?;
1247        let deleted_mutations = tx.execute(
1248            "DELETE FROM operational_mutations WHERE collection_name = ?1 AND created_at < ?2",
1249            rusqlite::params![name, before_timestamp],
1250        )?;
1251        persist_simple_provenance_event(
1252            &tx,
1253            "operational_collection_purged",
1254            name,
1255            Some(serde_json::json!({
1256                "deleted_mutations": deleted_mutations,
1257                "before_timestamp": before_timestamp,
1258            })),
1259        )?;
1260        tx.commit()?;
1261        Ok(OperationalPurgeReport {
1262            collection_name: name.to_owned(),
1263            deleted_mutations,
1264            before_timestamp,
1265        })
1266    }
1267
1268    /// # Errors
1269    /// Returns [`EngineError`] if collection selection or policy parsing fails.
1270    pub fn plan_operational_retention(
1271        &self,
1272        now_timestamp: i64,
1273        collection_names: Option<&[String]>,
1274        max_collections: Option<usize>,
1275    ) -> Result<OperationalRetentionPlanReport, EngineError> {
1276        let conn = self.connect()?;
1277        let records = load_operational_retention_records(&conn, collection_names, max_collections)?;
1278        let mut items = Vec::with_capacity(records.len());
1279        for record in records {
1280            items.push(plan_operational_retention_item(
1281                &conn,
1282                &record,
1283                now_timestamp,
1284            )?);
1285        }
1286        Ok(OperationalRetentionPlanReport {
1287            planned_at: now_timestamp,
1288            collections_examined: items.len(),
1289            items,
1290        })
1291    }
1292
1293    /// # Errors
1294    /// Returns [`EngineError`] if collection selection, policy parsing, or execution fails.
1295    pub fn run_operational_retention(
1296        &self,
1297        now_timestamp: i64,
1298        collection_names: Option<&[String]>,
1299        max_collections: Option<usize>,
1300        dry_run: bool,
1301    ) -> Result<OperationalRetentionRunReport, EngineError> {
1302        let mut conn = self.connect()?;
1303        let records = load_operational_retention_records(&conn, collection_names, max_collections)?;
1304        let mut items = Vec::with_capacity(records.len());
1305        let mut collections_acted_on = 0usize;
1306
1307        for record in records {
1308            let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1309            let item = run_operational_retention_item(&tx, &record, now_timestamp, dry_run)?;
1310            if item.deleted_mutations > 0 {
1311                collections_acted_on += 1;
1312            }
1313            if dry_run || item.action_kind == OperationalRetentionActionKind::Noop {
1314                drop(tx);
1315            } else {
1316                tx.commit()?;
1317            }
1318            items.push(item);
1319        }
1320
1321        Ok(OperationalRetentionRunReport {
1322            executed_at: now_timestamp,
1323            collections_examined: items.len(),
1324            collections_acted_on,
1325            dry_run,
1326            items,
1327        })
1328    }
1329
1330    /// # Errors
1331    /// Returns [`EngineError`] if the database query fails.
1332    pub fn trace_operational_collection(
1333        &self,
1334        collection_name: &str,
1335        record_key: Option<&str>,
1336    ) -> Result<OperationalTraceReport, EngineError> {
1337        let conn = self.connect()?;
1338        ensure_operational_collection_registered(&conn, collection_name)?;
1339        let mutations = if let Some(record_key) = record_key {
1340            let mut stmt = conn.prepare(
1341                "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
1342                 FROM operational_mutations \
1343                 WHERE collection_name = ?1 AND record_key = ?2 \
1344                 ORDER BY mutation_order",
1345            )?;
1346            stmt.query_map([collection_name, record_key], map_operational_mutation_row)?
1347                .collect::<Result<Vec<_>, _>>()?
1348        } else {
1349            let mut stmt = conn.prepare(
1350                "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
1351                 FROM operational_mutations \
1352                 WHERE collection_name = ?1 \
1353                 ORDER BY mutation_order",
1354            )?;
1355            stmt.query_map([collection_name], map_operational_mutation_row)?
1356                .collect::<Result<Vec<_>, _>>()?
1357        };
1358        let current_rows = if let Some(record_key) = record_key {
1359            let mut stmt = conn.prepare(
1360                "SELECT collection_name, record_key, payload_json, updated_at, last_mutation_id \
1361                 FROM operational_current \
1362                 WHERE collection_name = ?1 AND record_key = ?2 \
1363                 ORDER BY updated_at, record_key",
1364            )?;
1365            stmt.query_map([collection_name, record_key], map_operational_current_row)?
1366                .collect::<Result<Vec<_>, _>>()?
1367        } else {
1368            let mut stmt = conn.prepare(
1369                "SELECT collection_name, record_key, payload_json, updated_at, last_mutation_id \
1370                 FROM operational_current \
1371                 WHERE collection_name = ?1 \
1372                 ORDER BY updated_at, record_key",
1373            )?;
1374            stmt.query_map([collection_name], map_operational_current_row)?
1375                .collect::<Result<Vec<_>, _>>()?
1376        };
1377
1378        Ok(OperationalTraceReport {
1379            collection_name: collection_name.to_owned(),
1380            record_key: record_key.map(str::to_owned),
1381            mutation_count: mutations.len(),
1382            current_count: current_rows.len(),
1383            mutations,
1384            current_rows,
1385        })
1386    }
1387
1388    /// # Errors
1389    /// Returns [`EngineError`] if the collection contract is invalid or the filtered read fails.
1390    pub fn read_operational_collection(
1391        &self,
1392        request: &OperationalReadRequest,
1393    ) -> Result<OperationalReadReport, EngineError> {
1394        if request.collection_name.trim().is_empty() {
1395            return Err(EngineError::InvalidWrite(
1396                "operational read collection_name must not be empty".to_owned(),
1397            ));
1398        }
1399        if request.filters.is_empty() {
1400            return Err(EngineError::InvalidWrite(
1401                "operational read requires at least one filter clause".to_owned(),
1402            ));
1403        }
1404
1405        let conn = self.connect()?;
1406        let record = load_operational_collection_record(&conn, &request.collection_name)?
1407            .ok_or_else(|| {
1408                EngineError::InvalidWrite(format!(
1409                    "operational collection '{}' is not registered",
1410                    request.collection_name
1411                ))
1412            })?;
1413        validate_append_only_operational_collection(&record, "read")?;
1414        let declared_fields = parse_operational_filter_fields(&record.filter_fields_json)
1415            .map_err(EngineError::InvalidWrite)?;
1416        let secondary_indexes =
1417            parse_operational_secondary_indexes_json(&record.secondary_indexes_json, record.kind)
1418                .map_err(EngineError::InvalidWrite)?;
1419        let applied_limit = operational_read_limit(request.limit)?;
1420        let filters = compile_operational_read_filters(&request.filters, &declared_fields)?;
1421        if let Some(report) = execute_operational_secondary_index_read(
1422            &conn,
1423            &request.collection_name,
1424            &filters,
1425            &secondary_indexes,
1426            applied_limit,
1427        )? {
1428            return Ok(report);
1429        }
1430        execute_operational_filtered_read(&conn, &request.collection_name, &filters, applied_limit)
1431    }
1432
1433    /// # Errors
1434    /// Returns [`EngineError`] if the database query fails or collection validation fails.
1435    pub fn rebuild_operational_current(
1436        &self,
1437        collection_name: Option<&str>,
1438    ) -> Result<OperationalRepairReport, EngineError> {
1439        let mut conn = self.connect()?;
1440        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1441        let collections = if let Some(name) = collection_name {
1442            let maybe_kind: Option<String> = tx
1443                .query_row(
1444                    "SELECT kind FROM operational_collections WHERE name = ?1",
1445                    [name],
1446                    |row| row.get(0),
1447                )
1448                .optional()?;
1449            let Some(kind) = maybe_kind else {
1450                return Err(EngineError::InvalidWrite(format!(
1451                    "operational collection '{name}' is not registered"
1452                )));
1453            };
1454            if kind != OperationalCollectionKind::LatestState.as_str() {
1455                return Err(EngineError::InvalidWrite(format!(
1456                    "operational collection '{name}' is not latest_state"
1457                )));
1458            }
1459            vec![name.to_owned()]
1460        } else {
1461            let mut stmt = tx.prepare(
1462                "SELECT name FROM operational_collections WHERE kind = 'latest_state' ORDER BY name",
1463            )?;
1464            stmt.query_map([], |row| row.get::<_, String>(0))?
1465                .collect::<Result<Vec<_>, _>>()?
1466        };
1467
1468        let rebuilt_rows = rebuild_operational_current_rows(&tx, &collections)?;
1469        for collection in &collections {
1470            let record = load_operational_collection_record(&tx, collection)?.ok_or_else(|| {
1471                EngineError::Bridge(format!(
1472                    "operational collection '{collection}' missing during current rebuild"
1473                ))
1474            })?;
1475            let indexes = parse_operational_secondary_indexes_json(
1476                &record.secondary_indexes_json,
1477                record.kind,
1478            )
1479            .map_err(EngineError::InvalidWrite)?;
1480            if !indexes.is_empty() {
1481                rebuild_operational_secondary_index_entries(
1482                    &tx,
1483                    &record.name,
1484                    record.kind,
1485                    &indexes,
1486                )?;
1487            }
1488        }
1489
1490        persist_simple_provenance_event(
1491            &tx,
1492            "operational_current_rebuilt",
1493            collection_name.unwrap_or("*"),
1494            Some(serde_json::json!({
1495                "collections_rebuilt": collections.len(),
1496                "current_rows_rebuilt": rebuilt_rows,
1497            })),
1498        )?;
1499        tx.commit()?;
1500
1501        Ok(OperationalRepairReport {
1502            collections_rebuilt: collections.len(),
1503            current_rows_rebuilt: rebuilt_rows,
1504        })
1505    }
1506
1507    /// # Errors
1508    /// Returns [`EngineError`] if the database connection fails or the projection rebuild fails.
1509    pub fn rebuild_projections(
1510        &self,
1511        target: ProjectionTarget,
1512    ) -> Result<ProjectionRepairReport, EngineError> {
1513        self.projections.rebuild_projections(target)
1514    }
1515
1516    /// # Errors
1517    /// Returns [`EngineError`] if the database connection fails or the projection rebuild fails.
1518    pub fn rebuild_missing_projections(&self) -> Result<ProjectionRepairReport, EngineError> {
1519        self.projections.rebuild_missing_projections()
1520    }
1521
1522    /// Register (or update) an FTS property projection schema for the given node kind.
1523    ///
1524    /// After registration, any node of this kind will have the declared JSON property
1525    /// paths extracted, concatenated, and indexed in the `fts_node_properties` FTS5 table.
1526    ///
1527    /// # Errors
1528    /// Returns [`EngineError`] if `property_paths` is empty, contains duplicates,
1529    /// or if the database write fails.
1530    pub fn register_fts_property_schema(
1531        &self,
1532        kind: &str,
1533        property_paths: &[String],
1534        separator: Option<&str>,
1535    ) -> Result<FtsPropertySchemaRecord, EngineError> {
1536        validate_fts_property_paths(property_paths)?;
1537        let separator = separator.unwrap_or(" ");
1538        let paths_json = serde_json::to_string(property_paths).map_err(|e| {
1539            EngineError::InvalidWrite(format!("failed to serialize property paths: {e}"))
1540        })?;
1541
1542        let mut conn = self.connect()?;
1543        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1544        tx.execute(
1545            "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
1546             VALUES (?1, ?2, ?3) \
1547             ON CONFLICT(kind) DO UPDATE SET property_paths_json = ?2, separator = ?3",
1548            rusqlite::params![kind, paths_json, separator],
1549        )?;
1550        persist_simple_provenance_event(
1551            &tx,
1552            "fts_property_schema_registered",
1553            kind,
1554            Some(serde_json::json!({
1555                "property_paths": property_paths,
1556                "separator": separator,
1557            })),
1558        )?;
1559        tx.commit()?;
1560
1561        self.describe_fts_property_schema(kind)?.ok_or_else(|| {
1562            EngineError::Bridge("registered FTS property schema missing after commit".to_owned())
1563        })
1564    }
1565
1566    /// Return the FTS property schema for a single node kind, if registered.
1567    ///
1568    /// # Errors
1569    /// Returns [`EngineError`] if the database query fails.
1570    pub fn describe_fts_property_schema(
1571        &self,
1572        kind: &str,
1573    ) -> Result<Option<FtsPropertySchemaRecord>, EngineError> {
1574        let conn = self.connect()?;
1575        load_fts_property_schema_record(&conn, kind)
1576    }
1577
1578    /// Return all registered FTS property schemas.
1579    ///
1580    /// # Errors
1581    /// Returns [`EngineError`] if the database query fails.
1582    pub fn list_fts_property_schemas(&self) -> Result<Vec<FtsPropertySchemaRecord>, EngineError> {
1583        let conn = self.connect()?;
1584        let mut stmt = conn.prepare(
1585            "SELECT kind, property_paths_json, separator, format_version \
1586             FROM fts_property_schemas ORDER BY kind",
1587        )?;
1588        let records = stmt
1589            .query_map([], |row| {
1590                let paths_json: String = row.get(1)?;
1591                let paths: Vec<String> = serde_json::from_str(&paths_json).unwrap_or_default();
1592                Ok(FtsPropertySchemaRecord {
1593                    kind: row.get(0)?,
1594                    property_paths: paths,
1595                    separator: row.get(2)?,
1596                    format_version: row.get(3)?,
1597                })
1598            })?
1599            .collect::<Result<Vec<_>, _>>()?;
1600        Ok(records)
1601    }
1602
1603    /// Remove the FTS property schema for a node kind.
1604    ///
1605    /// This does **not** delete existing `fts_node_properties` rows for this kind;
1606    /// call `rebuild_projections(Fts)` to clean up stale rows.
1607    ///
1608    /// # Errors
1609    /// Returns [`EngineError`] if the kind is not registered or the delete fails.
1610    pub fn remove_fts_property_schema(&self, kind: &str) -> Result<(), EngineError> {
1611        let mut conn = self.connect()?;
1612        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1613        let deleted = tx.execute("DELETE FROM fts_property_schemas WHERE kind = ?1", [kind])?;
1614        if deleted == 0 {
1615            return Err(EngineError::InvalidWrite(format!(
1616                "FTS property schema for kind '{kind}' is not registered"
1617            )));
1618        }
1619        persist_simple_provenance_event(&tx, "fts_property_schema_removed", kind, None)?;
1620        tx.commit()?;
1621        Ok(())
1622    }
1623
1624    /// Recreate enabled vector profiles from persisted `vector_profiles` metadata.
1625    ///
1626    /// # Errors
1627    /// Returns [`EngineError`] if the database connection fails, reading metadata fails,
1628    /// or sqlite-vec support is unavailable while enabled profiles are present.
1629    pub fn restore_vector_profiles(&self) -> Result<ProjectionRepairReport, EngineError> {
1630        let conn = self.connect()?;
1631        let profiles: Vec<(String, String, i64)> = {
1632            let mut stmt = conn.prepare(
1633                "SELECT profile, table_name, dimension \
1634                 FROM vector_profiles WHERE enabled = 1 ORDER BY profile",
1635            )?;
1636            stmt.query_map([], |row| {
1637                Ok((
1638                    row.get::<_, String>(0)?,
1639                    row.get::<_, String>(1)?,
1640                    row.get::<_, i64>(2)?,
1641                ))
1642            })?
1643            .collect::<Result<Vec<_>, _>>()?
1644        };
1645
1646        for (profile, table_name, dimension) in &profiles {
1647            let dimension = usize::try_from(*dimension).map_err(|_| {
1648                EngineError::Bridge(format!("invalid vector profile dimension: {dimension}"))
1649            })?;
1650            self.schema_manager
1651                .ensure_vector_profile(&conn, profile, table_name, dimension)?;
1652        }
1653
1654        Ok(ProjectionRepairReport {
1655            targets: vec![ProjectionTarget::Vec],
1656            rebuilt_rows: profiles.len(),
1657            notes: vec![],
1658        })
1659    }
1660
1661    /// Rebuild vector embeddings using an application-supplied regeneration
1662    /// contract and generator command.
1663    ///
1664    /// The config is persisted in `vector_embedding_contracts` so the metadata
1665    /// required for recovery survives future repair runs.
1666    ///
1667    /// # Errors
1668    /// Returns [`EngineError`] if the database connection fails, the config is
1669    /// invalid, the generator command fails, or the regenerated embeddings are
1670    /// malformed.
1671    #[allow(clippy::too_many_lines)]
1672    pub fn regenerate_vector_embeddings(
1673        &self,
1674        config: &VectorRegenerationConfig,
1675    ) -> Result<VectorRegenerationReport, EngineError> {
1676        self.regenerate_vector_embeddings_with_policy(config, &VectorGeneratorPolicy::default())
1677    }
1678
1679    /// # Errors
1680    /// Returns [`EngineError`] if the database connection fails, the config is
1681    /// invalid, the generator command fails, or the regenerated embeddings are
1682    /// malformed.
1683    #[allow(clippy::too_many_lines)]
1684    pub fn regenerate_vector_embeddings_with_policy(
1685        &self,
1686        config: &VectorRegenerationConfig,
1687        policy: &VectorGeneratorPolicy,
1688    ) -> Result<VectorRegenerationReport, EngineError> {
1689        let conn = self.connect()?;
1690        let config = validate_vector_regeneration_config(&conn, config, policy)
1691            .map_err(|failure| failure.to_engine_error())?;
1692        let chunks = collect_regeneration_chunks(&conn)?;
1693        let payload = build_regeneration_input(&config, chunks.clone());
1694        let snapshot_hash = compute_snapshot_hash(&payload)?;
1695        let audit_metadata = VectorRegenerationAuditMetadata {
1696            profile: config.profile.clone(),
1697            model_identity: config.model_identity.clone(),
1698            model_version: config.model_version.clone(),
1699            chunk_count: chunks.len(),
1700            snapshot_hash: snapshot_hash.clone(),
1701            failure_class: None,
1702        };
1703        persist_vector_regeneration_event(
1704            &conn,
1705            "vector_regeneration_requested",
1706            &config.profile,
1707            &audit_metadata,
1708        )?;
1709        let notes = generator_policy_notes(policy);
1710        let generated = match run_vector_generator_bounded(&config, &payload, policy) {
1711            Ok(generated) => generated,
1712            Err(failure) => {
1713                self.persist_vector_regeneration_failure_best_effort(
1714                    &config.profile,
1715                    &audit_metadata,
1716                    &failure,
1717                );
1718                return Err(failure.to_engine_error());
1719            }
1720        };
1721        let mut embedding_map = match validate_generated_embeddings(&config, &chunks, generated) {
1722            Ok(embedding_map) => embedding_map,
1723            Err(failure) => {
1724                self.persist_vector_regeneration_failure_best_effort(
1725                    &config.profile,
1726                    &audit_metadata,
1727                    &failure,
1728                );
1729                return Err(failure.to_engine_error());
1730            }
1731        };
1732
1733        let mut conn = conn;
1734        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1735        match self.schema_manager.ensure_vector_profile(
1736            &tx,
1737            &config.profile,
1738            &config.table_name,
1739            config.dimension,
1740        ) {
1741            Ok(()) => {}
1742            Err(SchemaError::MissingCapability(message)) => {
1743                let failure = VectorRegenerationFailure::new(
1744                    VectorRegenerationFailureClass::UnsupportedVecCapability,
1745                    message,
1746                );
1747                drop(tx);
1748                self.persist_vector_regeneration_failure_best_effort(
1749                    &config.profile,
1750                    &audit_metadata,
1751                    &failure,
1752                );
1753                return Err(failure.to_engine_error());
1754            }
1755            Err(error) => return Err(EngineError::Schema(error)),
1756        }
1757        let apply_chunks = collect_regeneration_chunks(&tx)?;
1758        let apply_payload = build_regeneration_input(&config, apply_chunks.clone());
1759        let apply_hash = compute_snapshot_hash(&apply_payload)?;
1760        if apply_hash != snapshot_hash {
1761            let failure = VectorRegenerationFailure::new(
1762                VectorRegenerationFailureClass::SnapshotDrift,
1763                "chunk snapshot changed during generation; retry".to_owned(),
1764            );
1765            drop(tx);
1766            self.persist_vector_regeneration_failure_best_effort(
1767                &config.profile,
1768                &audit_metadata,
1769                &failure,
1770            );
1771            return Err(failure.to_engine_error());
1772        }
1773        persist_vector_contract(&tx, &config, &snapshot_hash)?;
1774        tx.execute("DELETE FROM vec_nodes_active", [])?;
1775        let mut stmt = tx
1776            .prepare_cached("INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES (?1, ?2)")?;
1777        let mut regenerated_rows = 0usize;
1778        for chunk in &apply_chunks {
1779            let Some(embedding) = embedding_map.remove(&chunk.chunk_id) else {
1780                drop(stmt);
1781                drop(tx);
1782                let failure = VectorRegenerationFailure::new(
1783                    VectorRegenerationFailureClass::MalformedGeneratorJson,
1784                    format!(
1785                        "generator did not return embedding for chunk '{}'",
1786                        chunk.chunk_id
1787                    ),
1788                );
1789                self.persist_vector_regeneration_failure_best_effort(
1790                    &config.profile,
1791                    &audit_metadata,
1792                    &failure,
1793                );
1794                return Err(failure.to_engine_error());
1795            };
1796            stmt.execute(rusqlite::params![chunk.chunk_id.as_str(), embedding])?;
1797            regenerated_rows += 1;
1798        }
1799        drop(stmt);
1800        persist_vector_regeneration_event(
1801            &tx,
1802            "vector_regeneration_apply",
1803            &config.profile,
1804            &audit_metadata,
1805        )?;
1806        tx.commit()?;
1807
1808        Ok(VectorRegenerationReport {
1809            profile: config.profile.clone(),
1810            table_name: config.table_name.clone(),
1811            dimension: config.dimension,
1812            total_chunks: chunks.len(),
1813            regenerated_rows,
1814            contract_persisted: true,
1815            notes,
1816        })
1817    }
1818
1819    fn persist_vector_regeneration_failure_best_effort(
1820        &self,
1821        profile: &str,
1822        metadata: &VectorRegenerationAuditMetadata,
1823        failure: &VectorRegenerationFailure,
1824    ) {
1825        let Ok(conn) = self.connect() else {
1826            return;
1827        };
1828        let failure_metadata = VectorRegenerationAuditMetadata {
1829            profile: metadata.profile.clone(),
1830            model_identity: metadata.model_identity.clone(),
1831            model_version: metadata.model_version.clone(),
1832            chunk_count: metadata.chunk_count,
1833            snapshot_hash: metadata.snapshot_hash.clone(),
1834            failure_class: Some(failure.failure_class_label().to_owned()),
1835        };
1836        let _ = persist_vector_regeneration_event(
1837            &conn,
1838            "vector_regeneration_failed",
1839            profile,
1840            &failure_metadata,
1841        );
1842    }
1843
1844    /// # Errors
1845    /// Returns [`EngineError`] if the database connection fails or any SQL query fails.
1846    pub fn trace_source(&self, source_ref: &str) -> Result<TraceReport, EngineError> {
1847        let conn = self.connect()?;
1848
1849        let node_logical_ids = collect_strings(
1850            &conn,
1851            "SELECT logical_id FROM nodes WHERE source_ref = ?1 ORDER BY created_at",
1852            source_ref,
1853        )?;
1854        let action_ids = collect_strings(
1855            &conn,
1856            "SELECT id FROM actions WHERE source_ref = ?1 ORDER BY created_at",
1857            source_ref,
1858        )?;
1859        let operational_mutation_ids = collect_strings(
1860            &conn,
1861            "SELECT id FROM operational_mutations WHERE source_ref = ?1 ORDER BY mutation_order",
1862            source_ref,
1863        )?;
1864
1865        Ok(TraceReport {
1866            source_ref: source_ref.to_owned(),
1867            node_rows: count_source_ref(&conn, "nodes", source_ref)?,
1868            edge_rows: count_source_ref(&conn, "edges", source_ref)?,
1869            action_rows: count_source_ref(&conn, "actions", source_ref)?,
1870            operational_mutation_rows: count_source_ref(
1871                &conn,
1872                "operational_mutations",
1873                source_ref,
1874            )?,
1875            node_logical_ids,
1876            action_ids,
1877            operational_mutation_ids,
1878        })
1879    }
1880
1881    /// # Errors
1882    /// Returns [`EngineError`] if the database connection fails, the transaction cannot be
1883    /// started, or lifecycle restoration prerequisites are missing.
1884    #[allow(clippy::too_many_lines)]
1885    pub fn restore_logical_id(
1886        &self,
1887        logical_id: &str,
1888    ) -> Result<LogicalRestoreReport, EngineError> {
1889        let mut conn = self.connect()?;
1890        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1891
1892        let active_count: i64 = tx.query_row(
1893            "SELECT count(*) FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
1894            [logical_id],
1895            |row| row.get(0),
1896        )?;
1897        if active_count > 0 {
1898            return Ok(LogicalRestoreReport {
1899                logical_id: logical_id.to_owned(),
1900                was_noop: true,
1901                restored_node_rows: 0,
1902                restored_edge_rows: 0,
1903                restored_chunk_rows: 0,
1904                restored_fts_rows: 0,
1905                restored_property_fts_rows: 0,
1906                restored_vec_rows: 0,
1907                skipped_edges: Vec::new(),
1908                notes: vec!["logical_id already active".to_owned()],
1909            });
1910        }
1911
1912        let restored_node: Option<(String, String)> = tx
1913            .query_row(
1914                "SELECT row_id, kind FROM nodes \
1915                 WHERE logical_id = ?1 AND superseded_at IS NOT NULL \
1916                 ORDER BY superseded_at DESC, created_at DESC, rowid DESC LIMIT 1",
1917                [logical_id],
1918                |row| Ok((row.get(0)?, row.get(1)?)),
1919            )
1920            .optional()?;
1921        let (restored_node_row_id, restored_kind) = restored_node.ok_or_else(|| {
1922            EngineError::InvalidWrite(format!("logical_id '{logical_id}' is not retired"))
1923        })?;
1924
1925        tx.execute(
1926            "UPDATE nodes SET superseded_at = NULL WHERE row_id = ?1",
1927            [restored_node_row_id.as_str()],
1928        )?;
1929
1930        let retire_scope: Option<(i64, Option<String>, i64)> = tx
1931            .query_row(
1932                "SELECT rowid, source_ref, created_at FROM provenance_events \
1933                 WHERE event_type = 'node_retire' AND subject = ?1 \
1934                 ORDER BY created_at DESC, rowid DESC LIMIT 1",
1935                [logical_id],
1936                |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
1937            )
1938            .optional()?;
1939        let (restored_edge_rows, skipped_edges) = if let Some((
1940            retire_event_rowid,
1941            retire_source_ref,
1942            retire_created_at,
1943        )) = retire_scope
1944        {
1945            restore_validated_edges(
1946                &tx,
1947                logical_id,
1948                retire_source_ref.as_deref(),
1949                retire_created_at,
1950                retire_event_rowid,
1951            )?
1952        } else {
1953            (0, Vec::new())
1954        };
1955
1956        let restored_chunk_rows: usize = tx
1957            .query_row(
1958                "SELECT count(*) FROM chunks WHERE node_logical_id = ?1",
1959                [logical_id],
1960                |row| row.get::<_, i64>(0),
1961            )
1962            .map(i64_to_usize)?;
1963        tx.execute(
1964            "DELETE FROM fts_nodes WHERE node_logical_id = ?1",
1965            [logical_id],
1966        )?;
1967        let restored_fts_rows = tx.execute(
1968            "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
1969             SELECT id, node_logical_id, ?2, text_content \
1970             FROM chunks WHERE node_logical_id = ?1",
1971            rusqlite::params![logical_id, restored_kind],
1972        )?;
1973        let restored_vec_rows = count_vec_rows_for_logical_id(&tx, logical_id)?;
1974
1975        // Rebuild property FTS for the restored node.
1976        tx.execute(
1977            "DELETE FROM fts_node_properties WHERE node_logical_id = ?1",
1978            [logical_id],
1979        )?;
1980        let restored_property_fts_rows =
1981            rebuild_single_node_property_fts(&tx, logical_id, &restored_kind)?;
1982
1983        persist_simple_provenance_event(
1984            &tx,
1985            "restore_logical_id",
1986            logical_id,
1987            Some(serde_json::json!({
1988                "restored_node_rows": 1,
1989                "restored_edge_rows": restored_edge_rows,
1990                "restored_chunk_rows": restored_chunk_rows,
1991                "restored_fts_rows": restored_fts_rows,
1992                "restored_property_fts_rows": restored_property_fts_rows,
1993                "restored_vec_rows": restored_vec_rows,
1994            })),
1995        )?;
1996        tx.commit()?;
1997
1998        Ok(LogicalRestoreReport {
1999            logical_id: logical_id.to_owned(),
2000            was_noop: false,
2001            restored_node_rows: 1,
2002            restored_edge_rows,
2003            restored_chunk_rows,
2004            restored_fts_rows,
2005            restored_property_fts_rows,
2006            restored_vec_rows,
2007            skipped_edges,
2008            notes: Vec::new(),
2009        })
2010    }
2011
2012    /// # Errors
2013    /// Returns [`EngineError`] if the database connection fails, the transaction cannot be
2014    /// started, or the purge mutation fails.
2015    pub fn purge_logical_id(&self, logical_id: &str) -> Result<LogicalPurgeReport, EngineError> {
2016        let mut conn = self.connect()?;
2017        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2018
2019        let active_count: i64 = tx.query_row(
2020            "SELECT count(*) FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
2021            [logical_id],
2022            |row| row.get(0),
2023        )?;
2024        if active_count > 0 {
2025            return Ok(LogicalPurgeReport {
2026                logical_id: logical_id.to_owned(),
2027                was_noop: true,
2028                deleted_node_rows: 0,
2029                deleted_edge_rows: 0,
2030                deleted_chunk_rows: 0,
2031                deleted_fts_rows: 0,
2032                deleted_vec_rows: 0,
2033                notes: vec!["logical_id is active; purge skipped".to_owned()],
2034            });
2035        }
2036
2037        let node_rows: i64 = tx.query_row(
2038            "SELECT count(*) FROM nodes WHERE logical_id = ?1",
2039            [logical_id],
2040            |row| row.get(0),
2041        )?;
2042        if node_rows == 0 {
2043            return Err(EngineError::InvalidWrite(format!(
2044                "logical_id '{logical_id}' does not exist"
2045            )));
2046        }
2047
2048        let deleted_vec_rows = delete_vec_rows_for_logical_id(&tx, logical_id)?;
2049        let deleted_fts_rows = tx.execute(
2050            "DELETE FROM fts_nodes WHERE node_logical_id = ?1",
2051            [logical_id],
2052        )?;
2053        let deleted_edge_rows = tx.execute(
2054            "DELETE FROM edges WHERE source_logical_id = ?1 OR target_logical_id = ?1",
2055            [logical_id],
2056        )?;
2057        let deleted_chunk_rows = tx.execute(
2058            "DELETE FROM chunks WHERE node_logical_id = ?1",
2059            [logical_id],
2060        )?;
2061        let deleted_node_rows =
2062            tx.execute("DELETE FROM nodes WHERE logical_id = ?1", [logical_id])?;
2063        tx.execute(
2064            "DELETE FROM node_access_metadata WHERE logical_id = ?1",
2065            [logical_id],
2066        )?;
2067
2068        persist_simple_provenance_event(
2069            &tx,
2070            "purge_logical_id",
2071            logical_id,
2072            Some(serde_json::json!({
2073                "deleted_node_rows": deleted_node_rows,
2074                "deleted_edge_rows": deleted_edge_rows,
2075                "deleted_chunk_rows": deleted_chunk_rows,
2076                "deleted_fts_rows": deleted_fts_rows,
2077                "deleted_vec_rows": deleted_vec_rows,
2078            })),
2079        )?;
2080        tx.commit()?;
2081
2082        Ok(LogicalPurgeReport {
2083            logical_id: logical_id.to_owned(),
2084            was_noop: false,
2085            deleted_node_rows,
2086            deleted_edge_rows,
2087            deleted_chunk_rows,
2088            deleted_fts_rows,
2089            deleted_vec_rows,
2090            notes: Vec::new(),
2091        })
2092    }
2093
2094    /// Purge provenance events older than `before_timestamp`.
2095    ///
2096    /// By default, `excise` and `purge_logical_id` event types are preserved so that
2097    /// data-deletion audit trails survive. Pass an explicit
2098    /// `preserve_event_types` list to override this default.
2099    ///
2100    /// # Errors
2101    /// Returns [`EngineError`] if the database connection fails, the transaction
2102    /// cannot be started, or any SQL statement fails.
2103    pub fn purge_provenance_events(
2104        &self,
2105        before_timestamp: i64,
2106        options: &ProvenancePurgeOptions,
2107    ) -> Result<ProvenancePurgeReport, EngineError> {
2108        let mut conn = self.connect()?;
2109        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2110
2111        let preserved_types: Vec<&str> = if options.preserve_event_types.is_empty() {
2112            vec!["excise", "purge_logical_id"]
2113        } else {
2114            options
2115                .preserve_event_types
2116                .iter()
2117                .map(String::as_str)
2118                .collect()
2119        };
2120
2121        // Build the NOT IN clause dynamically based on preserved types.
2122        let placeholders: String = (0..preserved_types.len())
2123            .map(|i| format!("?{}", i + 2))
2124            .collect::<Vec<_>>()
2125            .join(", ");
2126        let count_query = format!(
2127            "SELECT count(*) FROM provenance_events \
2128             WHERE created_at < ?1 AND event_type NOT IN ({placeholders})"
2129        );
2130        let delete_query = format!(
2131            "DELETE FROM provenance_events WHERE rowid IN (\
2132             SELECT rowid FROM provenance_events \
2133             WHERE created_at < ?1 AND event_type NOT IN ({placeholders}) \
2134             LIMIT 10000)"
2135        );
2136
2137        let bind_params = |stmt: &mut rusqlite::Statement<'_>| -> Result<(), rusqlite::Error> {
2138            stmt.raw_bind_parameter(1, before_timestamp)?;
2139            for (i, event_type) in preserved_types.iter().enumerate() {
2140                stmt.raw_bind_parameter(i + 2, *event_type)?;
2141            }
2142            Ok(())
2143        };
2144
2145        let events_deleted = if options.dry_run {
2146            let mut stmt = tx.prepare(&count_query)?;
2147            bind_params(&mut stmt)?;
2148            stmt.raw_query()
2149                .next()?
2150                .map_or(0, |row| row.get::<_, u64>(0).unwrap_or(0))
2151        } else {
2152            let mut total_deleted: u64 = 0;
2153            loop {
2154                let mut stmt = tx.prepare(&delete_query)?;
2155                bind_params(&mut stmt)?;
2156                let deleted = stmt.raw_execute()?;
2157                if deleted == 0 {
2158                    break;
2159                }
2160                total_deleted += deleted as u64;
2161            }
2162            total_deleted
2163        };
2164
2165        let total_after: u64 =
2166            tx.query_row("SELECT count(*) FROM provenance_events", [], |row| {
2167                row.get(0)
2168            })?;
2169
2170        let oldest_remaining: Option<i64> = tx
2171            .query_row("SELECT MIN(created_at) FROM provenance_events", [], |row| {
2172                row.get(0)
2173            })
2174            .optional()?
2175            .flatten();
2176
2177        if !options.dry_run {
2178            tx.commit()?;
2179        }
2180
2181        // In dry_run mode nothing was deleted, so total_after includes the
2182        // would-be-deleted rows; subtract to get the preserved count.
2183        let events_preserved = if options.dry_run {
2184            total_after - events_deleted
2185        } else {
2186            total_after
2187        };
2188
2189        Ok(ProvenancePurgeReport {
2190            events_deleted,
2191            events_preserved,
2192            oldest_remaining,
2193        })
2194    }
2195
2196    /// # Errors
2197    /// Returns [`EngineError`] if the database connection fails, the transaction cannot be
2198    /// started, or any SQL statement fails.
2199    #[allow(clippy::too_many_lines)]
2200    pub fn excise_source(&self, source_ref: &str) -> Result<TraceReport, EngineError> {
2201        let mut conn = self.connect()?;
2202
2203        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2204        let affected_operational_collections = collect_strings_tx(
2205            &tx,
2206            "SELECT DISTINCT m.collection_name \
2207             FROM operational_mutations m \
2208             JOIN operational_collections c ON c.name = m.collection_name \
2209             WHERE m.source_ref = ?1 AND c.kind = 'latest_state' \
2210             ORDER BY m.collection_name",
2211            source_ref,
2212        )?;
2213
2214        // Collect (row_id, logical_id) for active rows that will be excised.
2215        let pairs: Vec<(String, String)> = {
2216            let mut stmt = tx.prepare(
2217                "SELECT row_id, logical_id FROM nodes \
2218                 WHERE source_ref = ?1 AND superseded_at IS NULL",
2219            )?;
2220            stmt.query_map([source_ref], |row| {
2221                Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
2222            })?
2223            .collect::<Result<Vec<_>, _>>()?
2224        };
2225        let affected_logical_ids: Vec<String> = pairs
2226            .iter()
2227            .map(|(_, logical_id)| logical_id.clone())
2228            .collect();
2229
2230        // Supersede bad rows in all tables.
2231        tx.execute(
2232            "UPDATE nodes SET superseded_at = unixepoch() \
2233             WHERE source_ref = ?1 AND superseded_at IS NULL",
2234            [source_ref],
2235        )?;
2236        tx.execute(
2237            "UPDATE edges SET superseded_at = unixepoch() \
2238             WHERE source_ref = ?1 AND superseded_at IS NULL",
2239            [source_ref],
2240        )?;
2241        tx.execute(
2242            "UPDATE actions SET superseded_at = unixepoch() \
2243             WHERE source_ref = ?1 AND superseded_at IS NULL",
2244            [source_ref],
2245        )?;
2246        clear_operational_current_rows(&tx, &affected_operational_collections)?;
2247        tx.execute(
2248            "DELETE FROM operational_mutations WHERE source_ref = ?1",
2249            [source_ref],
2250        )?;
2251        for logical_id in &affected_logical_ids {
2252            delete_vec_rows_for_logical_id(&tx, logical_id)?;
2253            tx.execute(
2254                "DELETE FROM chunks WHERE node_logical_id = ?1",
2255                [logical_id.as_str()],
2256            )?;
2257        }
2258
2259        // Restore the most recent prior version for each affected logical_id.
2260        for (excised_row_id, logical_id) in &pairs {
2261            let prior: Option<String> = tx
2262                .query_row(
2263                    "SELECT row_id FROM nodes \
2264                     WHERE logical_id = ?1 AND row_id != ?2 \
2265                     ORDER BY created_at DESC LIMIT 1",
2266                    [logical_id.as_str(), excised_row_id.as_str()],
2267                    |row| row.get(0),
2268                )
2269                .optional()?;
2270            if let Some(prior_id) = prior {
2271                tx.execute(
2272                    "UPDATE nodes SET superseded_at = NULL WHERE row_id = ?1",
2273                    [prior_id.as_str()],
2274                )?;
2275            }
2276        }
2277
2278        for logical_id in &affected_logical_ids {
2279            let has_active_node = tx
2280                .query_row(
2281                    "SELECT 1 FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL LIMIT 1",
2282                    [logical_id.as_str()],
2283                    |row| row.get::<_, i64>(0),
2284                )
2285                .optional()?
2286                .is_some();
2287            if !has_active_node {
2288                tx.execute(
2289                    "DELETE FROM node_access_metadata WHERE logical_id = ?1",
2290                    [logical_id.as_str()],
2291                )?;
2292            }
2293        }
2294
2295        rebuild_operational_current_rows(&tx, &affected_operational_collections)?;
2296
2297        // Rebuild FTS atomically within the same transaction so readers never
2298        // observe a post-excise node state with a stale FTS index.
2299        tx.execute("DELETE FROM fts_nodes", [])?;
2300        tx.execute(
2301            r"
2302            INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content)
2303            SELECT c.id, n.logical_id, n.kind, c.text_content
2304            FROM chunks c
2305            JOIN nodes n
2306              ON n.logical_id = c.node_logical_id
2307             AND n.superseded_at IS NULL
2308            ",
2309            [],
2310        )?;
2311
2312        // Rebuild property FTS in the same transaction.
2313        rebuild_property_fts_in_tx(&tx)?;
2314
2315        // Record the audit event inside the same transaction so the excision and its
2316        // audit record are committed atomically — no window where the excision is
2317        // durable but unaudited.
2318        tx.execute(
2319            "INSERT INTO provenance_events (id, event_type, subject, source_ref) \
2320             VALUES (?1, 'excise_source', ?2, ?2)",
2321            rusqlite::params![new_id(), source_ref],
2322        )?;
2323
2324        tx.commit()?;
2325
2326        self.trace_source(source_ref)
2327    }
2328
2329    /// # Errors
2330    /// Returns [`EngineError`] if the WAL checkpoint fails, the `SQLite` backup fails,
2331    /// the SHA-256 digest cannot be computed, or the manifest file cannot be written.
2332    pub fn safe_export(
2333        &self,
2334        destination_path: impl AsRef<Path>,
2335        options: SafeExportOptions,
2336    ) -> Result<SafeExportManifest, EngineError> {
2337        let destination_path = destination_path.as_ref();
2338
2339        // 1. Optionally checkpoint WAL before exporting. This keeps the on-disk file tidy for
2340        // callers that want a fully checkpointed export, but export correctness does not depend
2341        // on it because the backup API copies from the live SQLite connection state.
2342        let conn = self.connect()?;
2343
2344        if options.force_checkpoint {
2345            trace_info!("safe_export: wal checkpoint started");
2346            let (busy, log, checkpointed): (i64, i64, i64) =
2347                conn.query_row("PRAGMA wal_checkpoint(FULL)", [], |row| {
2348                    Ok((row.get(0)?, row.get(1)?, row.get(2)?))
2349                })?;
2350            if busy != 0 {
2351                trace_warn!(
2352                    busy,
2353                    log_frames = log,
2354                    checkpointed_frames = checkpointed,
2355                    "safe_export: wal checkpoint blocked by active readers"
2356                );
2357                return Err(EngineError::Bridge(format!(
2358                    "WAL checkpoint blocked: {busy} active reader(s) prevented a full checkpoint; \
2359                     log frames={log}, checkpointed={checkpointed}; \
2360                     retry export when no readers are active"
2361                )));
2362            }
2363            trace_info!(
2364                log_frames = log,
2365                checkpointed_frames = checkpointed,
2366                "safe_export: wal checkpoint completed"
2367            );
2368        }
2369
2370        let schema_version: u32 = conn
2371            .query_row(
2372                "SELECT COALESCE(MAX(version), 0) FROM fathom_schema_migrations",
2373                [],
2374                |row| row.get(0),
2375            )
2376            .unwrap_or(0);
2377
2378        // 2. Export the database through SQLite's online backup API so committed data in the WAL
2379        // is included even when `force_checkpoint` is false.
2380        if let Some(parent) = destination_path.parent() {
2381            fs::create_dir_all(parent)?;
2382        }
2383        conn.backup(DatabaseName::Main, destination_path, None)?;
2384
2385        drop(conn);
2386
2387        // 2b. Query page_count from the EXPORTED file so the manifest reflects what was
2388        // actually backed up, not the source (which may have changed between the PRAGMA
2389        // and the backup call).
2390        let page_count: u64 = {
2391            let export_conn = rusqlite::Connection::open_with_flags(
2392                destination_path,
2393                rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY
2394                    | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX,
2395            )?;
2396            export_conn.query_row("PRAGMA page_count", [], |row| row.get(0))?
2397        };
2398
2399        // 3. Compute SHA-256 of the exported file.
2400        // FIX(review): was fs::read loading entire DB into memory; use streaming hash.
2401        let sha256 = {
2402            let mut file = fs::File::open(destination_path)?;
2403            let mut hasher = Sha256::new();
2404            io::copy(&mut file, &mut hasher)?;
2405            format!("{:x}", hasher.finalize())
2406        };
2407
2408        // 4. Record when the export was created.
2409        let exported_at = SystemTime::now()
2410            .duration_since(SystemTime::UNIX_EPOCH)
2411            .map_err(|e| EngineError::Bridge(format!("system clock error: {e}")))?
2412            .as_secs();
2413
2414        let manifest = SafeExportManifest {
2415            exported_at,
2416            sha256,
2417            schema_version,
2418            protocol_version: EXPORT_PROTOCOL_VERSION,
2419            page_count,
2420        };
2421
2422        // 5. Write manifest alongside the exported file, using Path API for the name.
2423        let manifest_path = {
2424            let mut p = destination_path.to_path_buf();
2425            let stem = p
2426                .file_name()
2427                .map(|n| format!("{}.export-manifest.json", n.to_string_lossy()))
2428                .ok_or_else(|| {
2429                    EngineError::Bridge("destination path has no filename".to_owned())
2430                })?;
2431            p.set_file_name(stem);
2432            p
2433        };
2434        let manifest_json =
2435            serde_json::to_string(&manifest).map_err(|e| EngineError::Bridge(e.to_string()))?;
2436
2437        // Atomic manifest write: write to a temp file then rename so readers never
2438        // observe a partially-written manifest.
2439        let manifest_tmp = manifest_path.with_extension("json.tmp");
2440        if let Err(e) = fs::write(&manifest_tmp, &manifest_json)
2441            .and_then(|()| fs::rename(&manifest_tmp, &manifest_path))
2442        {
2443            let _ = fs::remove_file(&manifest_tmp);
2444            return Err(e.into());
2445        }
2446
2447        Ok(manifest)
2448    }
2449}
2450
2451#[allow(dead_code)]
2452#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
2453struct VectorEmbeddingContractRecord {
2454    profile: String,
2455    table_name: String,
2456    model_identity: String,
2457    model_version: String,
2458    dimension: usize,
2459    normalization_policy: String,
2460    chunking_policy: String,
2461    preprocessing_policy: String,
2462    generator_command_json: String,
2463    applied_at: i64,
2464    snapshot_hash: String,
2465    contract_format_version: i64,
2466}
2467
2468#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
2469struct VectorRegenerationInputChunk {
2470    chunk_id: String,
2471    node_logical_id: String,
2472    kind: String,
2473    text_content: String,
2474    byte_start: Option<i64>,
2475    byte_end: Option<i64>,
2476    source_ref: Option<String>,
2477    created_at: i64,
2478}
2479
2480#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
2481struct VectorRegenerationInput {
2482    profile: String,
2483    table_name: String,
2484    model_identity: String,
2485    model_version: String,
2486    dimension: usize,
2487    normalization_policy: String,
2488    chunking_policy: String,
2489    preprocessing_policy: String,
2490    chunks: Vec<VectorRegenerationInputChunk>,
2491}
2492
2493#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2494struct GeneratedEmbedding {
2495    chunk_id: String,
2496    embedding: Vec<f32>,
2497}
2498
2499#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2500struct GeneratedEmbeddings {
2501    embeddings: Vec<GeneratedEmbedding>,
2502}
2503
2504#[derive(Clone, Copy, Debug, PartialEq, Eq)]
2505pub(crate) enum VectorRegenerationFailureClass {
2506    InvalidContract,
2507    PayloadTooLarge,
2508    GeneratorTimeout,
2509    GeneratorStdoutOverflow,
2510    GeneratorStderrOverflow,
2511    GeneratorNonzeroExit,
2512    MalformedGeneratorJson,
2513    SnapshotDrift,
2514    UnsupportedVecCapability,
2515}
2516
2517impl VectorRegenerationFailureClass {
2518    fn label(self) -> &'static str {
2519        match self {
2520            Self::InvalidContract => "invalid contract",
2521            Self::PayloadTooLarge => "payload too large",
2522            Self::GeneratorTimeout => "generator timeout",
2523            Self::GeneratorStdoutOverflow => "generator stdout overflow",
2524            Self::GeneratorStderrOverflow => "generator stderr overflow",
2525            Self::GeneratorNonzeroExit => "generator nonzero exit",
2526            Self::MalformedGeneratorJson => "malformed generator json",
2527            Self::SnapshotDrift => "snapshot drift",
2528            Self::UnsupportedVecCapability => "unsupported vec capability",
2529        }
2530    }
2531
2532    fn retryable(self) -> bool {
2533        matches!(self, Self::SnapshotDrift)
2534    }
2535}
2536
2537#[derive(Clone, Debug, PartialEq, Eq)]
2538pub(crate) struct VectorRegenerationFailure {
2539    class: VectorRegenerationFailureClass,
2540    detail: String,
2541}
2542
2543impl VectorRegenerationFailure {
2544    pub(crate) fn new(class: VectorRegenerationFailureClass, detail: impl Into<String>) -> Self {
2545        Self {
2546            class,
2547            detail: detail.into(),
2548        }
2549    }
2550
2551    fn to_engine_error(&self) -> EngineError {
2552        let retry_suffix = if self.class.retryable() {
2553            " [retryable]"
2554        } else {
2555            ""
2556        };
2557        EngineError::Bridge(format!(
2558            "vector regeneration {}: {}{}",
2559            self.class.label(),
2560            self.detail,
2561            retry_suffix
2562        ))
2563    }
2564
2565    fn failure_class_label(&self) -> &'static str {
2566        self.class.label()
2567    }
2568}
2569
2570#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
2571struct VectorRegenerationAuditMetadata {
2572    profile: String,
2573    model_identity: String,
2574    model_version: String,
2575    chunk_count: usize,
2576    snapshot_hash: String,
2577    #[serde(skip_serializing_if = "Option::is_none")]
2578    failure_class: Option<String>,
2579}
2580
2581#[derive(Clone, Copy, Debug, PartialEq, Eq, Deserialize)]
2582#[serde(tag = "mode", rename_all = "snake_case")]
2583enum OperationalRetentionPolicy {
2584    KeepAll,
2585    PurgeBeforeSeconds { max_age_seconds: i64 },
2586    KeepLast { max_rows: usize },
2587}
2588
2589/// # Errors
2590/// Returns [`EngineError`] if the file cannot be read or the config is invalid.
2591pub fn load_vector_regeneration_config(
2592    path: impl AsRef<Path>,
2593) -> Result<VectorRegenerationConfig, EngineError> {
2594    let path = path.as_ref();
2595    let raw = fs::read_to_string(path)?;
2596    match path.extension().and_then(|ext| ext.to_str()) {
2597        Some("toml") => {
2598            toml::from_str(&raw).map_err(|error| EngineError::Bridge(error.to_string()))
2599        }
2600        Some("json") | None => {
2601            serde_json::from_str(&raw).map_err(|error| EngineError::Bridge(error.to_string()))
2602        }
2603        Some(other) => Err(EngineError::Bridge(format!(
2604            "unsupported vector regeneration config extension: {other}"
2605        ))),
2606    }
2607}
2608
2609fn validate_vector_regeneration_config(
2610    conn: &rusqlite::Connection,
2611    config: &VectorRegenerationConfig,
2612    policy: &VectorGeneratorPolicy,
2613) -> Result<VectorRegenerationConfig, VectorRegenerationFailure> {
2614    let profile = validate_bounded_text("profile", &config.profile, MAX_PROFILE_LEN)?;
2615    let table_name = validate_bounded_text("table_name", &config.table_name, MAX_PROFILE_LEN)?;
2616    if table_name != "vec_nodes_active" {
2617        return Err(VectorRegenerationFailure::new(
2618            VectorRegenerationFailureClass::InvalidContract,
2619            format!("table_name must be vec_nodes_active, got '{table_name}'"),
2620        ));
2621    }
2622    let model_identity = validate_bounded_text(
2623        "model_identity",
2624        &config.model_identity,
2625        MAX_MODEL_IDENTITY_LEN,
2626    )?;
2627    let model_version = validate_bounded_text(
2628        "model_version",
2629        &config.model_version,
2630        MAX_MODEL_VERSION_LEN,
2631    )?;
2632    if config.dimension == 0 {
2633        return Err(VectorRegenerationFailure::new(
2634            VectorRegenerationFailureClass::InvalidContract,
2635            "dimension must be greater than zero".to_owned(),
2636        ));
2637    }
2638    let normalization_policy = validate_bounded_text(
2639        "normalization_policy",
2640        &config.normalization_policy,
2641        MAX_POLICY_LEN,
2642    )?;
2643    let chunking_policy =
2644        validate_bounded_text("chunking_policy", &config.chunking_policy, MAX_POLICY_LEN)?;
2645    let preprocessing_policy = validate_bounded_text(
2646        "preprocessing_policy",
2647        &config.preprocessing_policy,
2648        MAX_POLICY_LEN,
2649    )?;
2650    let generator_command = validate_generator_command(&config.generator_command, policy)?;
2651
2652    if let Some(existing_dimension) = current_vector_profile_dimension(conn, &profile)?
2653        && existing_dimension != config.dimension
2654    {
2655        return Err(VectorRegenerationFailure::new(
2656            VectorRegenerationFailureClass::InvalidContract,
2657            format!(
2658                "dimension {} does not match existing vector profile dimension {}",
2659                config.dimension, existing_dimension
2660            ),
2661        ));
2662    }
2663
2664    validate_existing_contract_version(conn, &profile)?;
2665
2666    let normalized = VectorRegenerationConfig {
2667        profile,
2668        table_name,
2669        model_identity,
2670        model_version,
2671        dimension: config.dimension,
2672        normalization_policy,
2673        chunking_policy,
2674        preprocessing_policy,
2675        generator_command,
2676    };
2677    let serialized = serde_json::to_vec(&normalized).map_err(|error| {
2678        VectorRegenerationFailure::new(
2679            VectorRegenerationFailureClass::InvalidContract,
2680            error.to_string(),
2681        )
2682    })?;
2683    if serialized.len() > MAX_CONTRACT_JSON_BYTES {
2684        return Err(VectorRegenerationFailure::new(
2685            VectorRegenerationFailureClass::InvalidContract,
2686            format!("serialized contract exceeds {MAX_CONTRACT_JSON_BYTES} bytes"),
2687        ));
2688    }
2689
2690    Ok(normalized)
2691}
2692
2693#[allow(clippy::cast_possible_wrap)]
2694fn persist_vector_contract(
2695    conn: &rusqlite::Connection,
2696    config: &VectorRegenerationConfig,
2697    snapshot_hash: &str,
2698) -> Result<(), EngineError> {
2699    let generator_command_json = serde_json::to_string(&config.generator_command)
2700        .map_err(|error| EngineError::Bridge(error.to_string()))?;
2701    conn.execute(
2702        r"
2703        INSERT OR REPLACE INTO vector_embedding_contracts (
2704            profile,
2705            table_name,
2706            model_identity,
2707            model_version,
2708            dimension,
2709            normalization_policy,
2710            chunking_policy,
2711            preprocessing_policy,
2712            generator_command_json,
2713            applied_at,
2714            snapshot_hash,
2715            contract_format_version,
2716            updated_at
2717        ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, unixepoch(), ?10, ?11, unixepoch())
2718        ",
2719        rusqlite::params![
2720            config.profile.as_str(),
2721            config.table_name.as_str(),
2722            config.model_identity.as_str(),
2723            config.model_version.as_str(),
2724            config.dimension as i64,
2725            config.normalization_policy.as_str(),
2726            config.chunking_policy.as_str(),
2727            config.preprocessing_policy.as_str(),
2728            generator_command_json,
2729            snapshot_hash,
2730            CURRENT_VECTOR_CONTRACT_FORMAT_VERSION,
2731        ],
2732    )?;
2733    Ok(())
2734}
2735
2736fn persist_vector_regeneration_event(
2737    conn: &rusqlite::Connection,
2738    event_type: &str,
2739    subject: &str,
2740    metadata: &VectorRegenerationAuditMetadata,
2741) -> Result<(), EngineError> {
2742    let metadata_json = serialize_audit_metadata(metadata)?;
2743    conn.execute(
2744        "INSERT INTO provenance_events (id, event_type, subject, metadata_json) VALUES (?1, ?2, ?3, ?4)",
2745        rusqlite::params![new_id(), event_type, subject, metadata_json],
2746    )?;
2747    Ok(())
2748}
2749
2750fn persist_simple_provenance_event(
2751    conn: &rusqlite::Connection,
2752    event_type: &str,
2753    subject: &str,
2754    metadata: Option<serde_json::Value>,
2755) -> Result<(), EngineError> {
2756    let metadata_json = metadata.map(|value| value.to_string()).unwrap_or_default();
2757    conn.execute(
2758        "INSERT INTO provenance_events (id, event_type, subject, metadata_json) VALUES (?1, ?2, ?3, ?4)",
2759        rusqlite::params![new_id(), event_type, subject, metadata_json],
2760    )?;
2761    Ok(())
2762}
2763
2764/// Count active nodes that should have a property FTS row (extraction yields a value)
2765/// but don't. Uses the same extraction logic as write/rebuild to avoid false positives
2766/// for nodes whose declared paths legitimately normalize to no values.
2767fn count_missing_property_fts_rows(conn: &rusqlite::Connection) -> Result<i64, EngineError> {
2768    let schemas = crate::writer::load_fts_property_schemas(conn)?;
2769    if schemas.is_empty() {
2770        return Ok(0);
2771    }
2772
2773    let mut missing = 0i64;
2774    for (kind, paths, separator) in &schemas {
2775        let mut stmt = conn.prepare(
2776            "SELECT n.logical_id, n.properties FROM nodes n \
2777             WHERE n.kind = ?1 AND n.superseded_at IS NULL \
2778               AND NOT EXISTS (SELECT 1 FROM fts_node_properties fp WHERE fp.node_logical_id = n.logical_id)",
2779        )?;
2780        let rows = stmt.query_map([kind.as_str()], |row| {
2781            Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
2782        })?;
2783        for row in rows {
2784            let (_logical_id, properties_str) = row?;
2785            let props: serde_json::Value =
2786                serde_json::from_str(&properties_str).unwrap_or_default();
2787            if crate::writer::compute_property_fts_text(&props, paths, separator).is_some() {
2788                missing += 1;
2789            }
2790        }
2791    }
2792    Ok(missing)
2793}
2794
2795/// Count property FTS rows whose `text_content` has drifted from the current canonical
2796/// value computed by `compute_property_fts_text(...)`. This catches:
2797/// - rows whose text no longer matches the current node properties and schema
2798/// - rows that should have been removed (extraction now yields no value)
2799fn count_drifted_property_fts_rows(conn: &rusqlite::Connection) -> Result<i64, EngineError> {
2800    let schemas = crate::writer::load_fts_property_schemas(conn)?;
2801    if schemas.is_empty() {
2802        return Ok(0);
2803    }
2804
2805    let mut drifted = 0i64;
2806    for (kind, paths, separator) in &schemas {
2807        let mut stmt = conn.prepare(
2808            "SELECT fp.node_logical_id, fp.text_content, n.properties \
2809             FROM fts_node_properties fp \
2810             JOIN nodes n ON n.logical_id = fp.node_logical_id AND n.superseded_at IS NULL \
2811             WHERE fp.kind = ?1 AND n.kind = ?1",
2812        )?;
2813        let rows = stmt.query_map([kind.as_str()], |row| {
2814            Ok((
2815                row.get::<_, String>(0)?,
2816                row.get::<_, String>(1)?,
2817                row.get::<_, String>(2)?,
2818            ))
2819        })?;
2820        for row in rows {
2821            let (_logical_id, stored_text, properties_str) = row?;
2822            let props: serde_json::Value =
2823                serde_json::from_str(&properties_str).unwrap_or_default();
2824            let expected = crate::writer::compute_property_fts_text(&props, paths, separator);
2825            match expected {
2826                Some(text) if text == stored_text => {}
2827                _ => drifted += 1,
2828            }
2829        }
2830    }
2831    Ok(drifted)
2832}
2833
2834/// Rebuild property FTS rows from canonical state within an existing transaction.
2835fn rebuild_property_fts_in_tx(conn: &rusqlite::Connection) -> Result<usize, EngineError> {
2836    conn.execute("DELETE FROM fts_node_properties", [])?;
2837    let inserted = crate::projection::insert_property_fts_rows(
2838        conn,
2839        "SELECT logical_id, properties FROM nodes WHERE kind = ?1 AND superseded_at IS NULL",
2840    )?;
2841    Ok(inserted)
2842}
2843
2844/// Rebuild property FTS for a single node. Returns 1 if a row was inserted, 0 otherwise.
2845/// The caller must delete any existing `fts_node_properties` row for this node first.
2846fn rebuild_single_node_property_fts(
2847    conn: &rusqlite::Connection,
2848    logical_id: &str,
2849    kind: &str,
2850) -> Result<usize, EngineError> {
2851    let schema: Option<(Vec<String>, String)> = conn
2852        .query_row(
2853            "SELECT property_paths_json, separator FROM fts_property_schemas WHERE kind = ?1",
2854            [kind],
2855            |row| {
2856                let paths_json: String = row.get(0)?;
2857                let separator: String = row.get(1)?;
2858                let paths: Vec<String> = serde_json::from_str(&paths_json).unwrap_or_default();
2859                Ok((paths, separator))
2860            },
2861        )
2862        .optional()?;
2863    let Some((paths, separator)) = schema else {
2864        return Ok(0);
2865    };
2866    let properties_str: Option<String> = conn
2867        .query_row(
2868            "SELECT properties FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
2869            [logical_id],
2870            |row| row.get(0),
2871        )
2872        .optional()?;
2873    let Some(properties_str) = properties_str else {
2874        return Ok(0);
2875    };
2876    let props: serde_json::Value = serde_json::from_str(&properties_str).unwrap_or_default();
2877    let Some(text) = crate::writer::compute_property_fts_text(&props, &paths, &separator) else {
2878        return Ok(0);
2879    };
2880    conn.execute(
2881        "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) VALUES (?1, ?2, ?3)",
2882        rusqlite::params![logical_id, kind, text],
2883    )?;
2884    Ok(1)
2885}
2886
2887fn validate_fts_property_paths(paths: &[String]) -> Result<(), EngineError> {
2888    if paths.is_empty() {
2889        return Err(EngineError::InvalidWrite(
2890            "FTS property paths must not be empty".to_owned(),
2891        ));
2892    }
2893    let mut seen = std::collections::HashSet::new();
2894    for path in paths {
2895        if !path.starts_with("$.") {
2896            return Err(EngineError::InvalidWrite(format!(
2897                "FTS property path must start with '$.' but got: {path}"
2898            )));
2899        }
2900        let after_prefix = &path[2..]; // safe: already validated "$." prefix
2901        let segments: Vec<&str> = after_prefix.split('.').collect();
2902        if segments.is_empty() || segments.iter().any(|s| s.is_empty()) {
2903            return Err(EngineError::InvalidWrite(format!(
2904                "FTS property path has empty segment(s): {path}"
2905            )));
2906        }
2907        for seg in &segments {
2908            if !seg.chars().all(|c| c.is_alphanumeric() || c == '_') {
2909                return Err(EngineError::InvalidWrite(format!(
2910                    "FTS property path segment contains invalid characters: {path}"
2911                )));
2912            }
2913        }
2914        if !seen.insert(path) {
2915            return Err(EngineError::InvalidWrite(format!(
2916                "duplicate FTS property path: {path}"
2917            )));
2918        }
2919    }
2920    Ok(())
2921}
2922
2923fn load_fts_property_schema_record(
2924    conn: &rusqlite::Connection,
2925    kind: &str,
2926) -> Result<Option<FtsPropertySchemaRecord>, EngineError> {
2927    let row = conn
2928        .query_row(
2929            "SELECT kind, property_paths_json, separator, format_version \
2930             FROM fts_property_schemas WHERE kind = ?1",
2931            [kind],
2932            |row| {
2933                let paths_json: String = row.get(1)?;
2934                let paths: Vec<String> = serde_json::from_str(&paths_json).unwrap_or_default();
2935                Ok(FtsPropertySchemaRecord {
2936                    kind: row.get(0)?,
2937                    property_paths: paths,
2938                    separator: row.get(2)?,
2939                    format_version: row.get(3)?,
2940                })
2941            },
2942        )
2943        .optional()?;
2944    Ok(row)
2945}
2946
2947fn build_regeneration_input(
2948    config: &VectorRegenerationConfig,
2949    chunks: Vec<VectorRegenerationInputChunk>,
2950) -> VectorRegenerationInput {
2951    VectorRegenerationInput {
2952        profile: config.profile.clone(),
2953        table_name: config.table_name.clone(),
2954        model_identity: config.model_identity.clone(),
2955        model_version: config.model_version.clone(),
2956        dimension: config.dimension,
2957        normalization_policy: config.normalization_policy.clone(),
2958        chunking_policy: config.chunking_policy.clone(),
2959        preprocessing_policy: config.preprocessing_policy.clone(),
2960        chunks,
2961    }
2962}
2963
2964fn compute_snapshot_hash(payload: &VectorRegenerationInput) -> Result<String, EngineError> {
2965    let bytes =
2966        serde_json::to_vec(payload).map_err(|error| EngineError::Bridge(error.to_string()))?;
2967    let mut hasher = Sha256::new();
2968    hasher.update(bytes);
2969    Ok(format!("{:x}", hasher.finalize()))
2970}
2971
2972fn collect_regeneration_chunks(
2973    conn: &rusqlite::Connection,
2974) -> Result<Vec<VectorRegenerationInputChunk>, EngineError> {
2975    let mut stmt = conn.prepare(
2976        r"
2977        SELECT c.id, c.node_logical_id, n.kind, c.text_content, c.byte_start, c.byte_end, n.source_ref, c.created_at
2978        FROM chunks c
2979        JOIN nodes n
2980          ON n.logical_id = c.node_logical_id
2981         AND n.superseded_at IS NULL
2982        ORDER BY c.created_at, c.id
2983        ",
2984    )?;
2985    let chunks = stmt
2986        .query_map([], |row| {
2987            Ok(VectorRegenerationInputChunk {
2988                chunk_id: row.get(0)?,
2989                node_logical_id: row.get(1)?,
2990                kind: row.get(2)?,
2991                text_content: row.get(3)?,
2992                byte_start: row.get(4)?,
2993                byte_end: row.get(5)?,
2994                source_ref: row.get(6)?,
2995                created_at: row.get(7)?,
2996            })
2997        })?
2998        .collect::<Result<Vec<_>, _>>()?;
2999    Ok(chunks)
3000}
3001
3002fn validate_generated_embeddings(
3003    config: &VectorRegenerationConfig,
3004    chunks: &[VectorRegenerationInputChunk],
3005    generated: GeneratedEmbeddings,
3006) -> Result<std::collections::HashMap<String, Vec<u8>>, VectorRegenerationFailure> {
3007    if generated.embeddings.len() != chunks.len() {
3008        return Err(VectorRegenerationFailure::new(
3009            VectorRegenerationFailureClass::MalformedGeneratorJson,
3010            format!(
3011                "generator returned {} embedding(s) for {} chunk(s)",
3012                generated.embeddings.len(),
3013                chunks.len()
3014            ),
3015        ));
3016    }
3017
3018    let mut embedding_map = std::collections::HashMap::new();
3019    for embedding in generated.embeddings {
3020        if embedding.embedding.len() != config.dimension {
3021            return Err(VectorRegenerationFailure::new(
3022                VectorRegenerationFailureClass::MalformedGeneratorJson,
3023                format!(
3024                    "embedding for chunk '{}' has dimension {}, expected {}",
3025                    embedding.chunk_id,
3026                    embedding.embedding.len(),
3027                    config.dimension
3028                ),
3029            ));
3030        }
3031        if embedding.embedding.iter().any(|value| !value.is_finite()) {
3032            return Err(VectorRegenerationFailure::new(
3033                VectorRegenerationFailureClass::MalformedGeneratorJson,
3034                format!(
3035                    "embedding for chunk '{}' contains non-finite values",
3036                    embedding.chunk_id
3037                ),
3038            ));
3039        }
3040        let bytes: Vec<u8> = embedding
3041            .embedding
3042            .iter()
3043            .flat_map(|value| value.to_le_bytes())
3044            .collect();
3045        if embedding_map
3046            .insert(embedding.chunk_id.clone(), bytes)
3047            .is_some()
3048        {
3049            return Err(VectorRegenerationFailure::new(
3050                VectorRegenerationFailureClass::MalformedGeneratorJson,
3051                format!(
3052                    "duplicate embedding returned for chunk '{}'",
3053                    embedding.chunk_id
3054                ),
3055            ));
3056        }
3057    }
3058
3059    Ok(embedding_map)
3060}
3061
3062fn generator_policy_notes(policy: &VectorGeneratorPolicy) -> Vec<String> {
3063    let mut notes = vec!["vector embeddings regenerated from application contract".to_owned()];
3064    if !policy.allowed_executable_roots.is_empty() {
3065        notes.push("generator executable roots enforced by operator policy".to_owned());
3066    }
3067    if !policy.preserve_env_vars.is_empty() {
3068        notes.push("generator environment reduced to preserved variables".to_owned());
3069    }
3070    notes
3071}
3072
3073enum GeneratorStream {
3074    Stdout,
3075    Stderr,
3076}
3077
3078enum StreamReadResult {
3079    Complete(Vec<u8>),
3080    Overflow,
3081    Io(io::Error),
3082}
3083
3084fn validate_bounded_text(
3085    field: &str,
3086    value: &str,
3087    max_len: usize,
3088) -> Result<String, VectorRegenerationFailure> {
3089    let trimmed = value.trim();
3090    if trimmed.is_empty() {
3091        return Err(VectorRegenerationFailure::new(
3092            VectorRegenerationFailureClass::InvalidContract,
3093            format!("{field} must not be empty"),
3094        ));
3095    }
3096    if trimmed.len() > max_len {
3097        return Err(VectorRegenerationFailure::new(
3098            VectorRegenerationFailureClass::InvalidContract,
3099            format!("{field} exceeds max length {max_len}"),
3100        ));
3101    }
3102    Ok(trimmed.to_owned())
3103}
3104
3105fn validate_generator_command(
3106    command: &[String],
3107    policy: &VectorGeneratorPolicy,
3108) -> Result<Vec<String>, VectorRegenerationFailure> {
3109    if command.is_empty() {
3110        return Err(VectorRegenerationFailure::new(
3111            VectorRegenerationFailureClass::InvalidContract,
3112            "generator_command must contain at least one element".to_owned(),
3113        ));
3114    }
3115    let mut total_len = 0usize;
3116    for argument in command {
3117        if argument.is_empty() {
3118            return Err(VectorRegenerationFailure::new(
3119                VectorRegenerationFailureClass::InvalidContract,
3120                "generator_command entries must not be empty".to_owned(),
3121            ));
3122        }
3123        if argument.len() > MAX_GENERATOR_COMMAND_ARG_LEN {
3124            return Err(VectorRegenerationFailure::new(
3125                VectorRegenerationFailureClass::InvalidContract,
3126                format!(
3127                    "generator_command argument exceeds max length {MAX_GENERATOR_COMMAND_ARG_LEN}"
3128                ),
3129            ));
3130        }
3131        total_len += argument.len();
3132    }
3133    if total_len > MAX_GENERATOR_COMMAND_TOTAL_LEN {
3134        return Err(VectorRegenerationFailure::new(
3135            VectorRegenerationFailureClass::InvalidContract,
3136            format!(
3137                "generator_command exceeds max serialized length {MAX_GENERATOR_COMMAND_TOTAL_LEN}"
3138            ),
3139        ));
3140    }
3141    executable_trust::validate_generator_executable(&command[0], policy)?;
3142    Ok(command.to_vec())
3143}
3144
3145fn current_vector_profile_dimension(
3146    conn: &rusqlite::Connection,
3147    profile: &str,
3148) -> Result<Option<usize>, VectorRegenerationFailure> {
3149    let dimension: Option<i64> = conn
3150        .query_row(
3151            "SELECT dimension FROM vector_profiles WHERE profile = ?1 AND enabled = 1",
3152            [profile],
3153            |row| row.get(0),
3154        )
3155        .optional()
3156        .map_err(|error| {
3157            VectorRegenerationFailure::new(
3158                VectorRegenerationFailureClass::InvalidContract,
3159                error.to_string(),
3160            )
3161        })?;
3162    dimension
3163        .map(|value| {
3164            usize::try_from(value).map_err(|_| {
3165                VectorRegenerationFailure::new(
3166                    VectorRegenerationFailureClass::InvalidContract,
3167                    format!("stored vector profile dimension is invalid: {value}"),
3168                )
3169            })
3170        })
3171        .transpose()
3172}
3173
3174fn validate_existing_contract_version(
3175    conn: &rusqlite::Connection,
3176    profile: &str,
3177) -> Result<(), VectorRegenerationFailure> {
3178    let version: Option<i64> = conn
3179        .query_row(
3180            "SELECT contract_format_version FROM vector_embedding_contracts WHERE profile = ?1",
3181            [profile],
3182            |row| row.get(0),
3183        )
3184        .optional()
3185        .map_err(|error| {
3186            VectorRegenerationFailure::new(
3187                VectorRegenerationFailureClass::InvalidContract,
3188                error.to_string(),
3189            )
3190        })?;
3191    if let Some(version) = version
3192        && version > CURRENT_VECTOR_CONTRACT_FORMAT_VERSION
3193    {
3194        return Err(VectorRegenerationFailure::new(
3195            VectorRegenerationFailureClass::InvalidContract,
3196            format!(
3197                "persisted contract format version {version} is unsupported; supported version is {CURRENT_VECTOR_CONTRACT_FORMAT_VERSION}"
3198            ),
3199        ));
3200    }
3201    Ok(())
3202}
3203
3204fn serialize_audit_metadata(
3205    metadata: &VectorRegenerationAuditMetadata,
3206) -> Result<String, EngineError> {
3207    let json =
3208        serde_json::to_string(metadata).map_err(|error| EngineError::Bridge(error.to_string()))?;
3209    if json.len() > MAX_AUDIT_METADATA_BYTES {
3210        return Err(VectorRegenerationFailure::new(
3211            VectorRegenerationFailureClass::InvalidContract,
3212            format!("audit metadata exceeds {MAX_AUDIT_METADATA_BYTES} bytes"),
3213        )
3214        .to_engine_error());
3215    }
3216    Ok(json)
3217}
3218
3219#[allow(clippy::too_many_lines)]
3220fn run_vector_generator_bounded(
3221    config: &VectorRegenerationConfig,
3222    payload: &VectorRegenerationInput,
3223    policy: &VectorGeneratorPolicy,
3224) -> Result<GeneratedEmbeddings, VectorRegenerationFailure> {
3225    if payload.chunks.len() > policy.max_chunks {
3226        return Err(VectorRegenerationFailure::new(
3227            VectorRegenerationFailureClass::PayloadTooLarge,
3228            format!(
3229                "chunk count {} exceeds max_chunks {}",
3230                payload.chunks.len(),
3231                policy.max_chunks
3232            ),
3233        ));
3234    }
3235
3236    let input = serde_json::to_vec(payload).map_err(|error| {
3237        VectorRegenerationFailure::new(
3238            VectorRegenerationFailureClass::MalformedGeneratorJson,
3239            error.to_string(),
3240        )
3241    })?;
3242    if input.len() > policy.max_input_bytes {
3243        return Err(VectorRegenerationFailure::new(
3244            VectorRegenerationFailureClass::PayloadTooLarge,
3245            format!(
3246                "serialized input {} bytes exceeds max_input_bytes {}",
3247                input.len(),
3248                policy.max_input_bytes
3249            ),
3250        ));
3251    }
3252
3253    let mut command = Command::new(config.generator_command.first().ok_or_else(|| {
3254        VectorRegenerationFailure::new(
3255            VectorRegenerationFailureClass::InvalidContract,
3256            "missing generator executable",
3257        )
3258    })?);
3259    command.args(config.generator_command.iter().skip(1));
3260    command.stdin(Stdio::piped());
3261    command.stdout(Stdio::piped());
3262    command.stderr(Stdio::piped());
3263    command.env_clear();
3264    for env_var in &policy.preserve_env_vars {
3265        if let Some(value) = std::env::var_os(env_var) {
3266            command.env(env_var, value);
3267        }
3268    }
3269
3270    let mut child = command.spawn().map_err(|error| {
3271        VectorRegenerationFailure::new(
3272            VectorRegenerationFailureClass::GeneratorNonzeroExit,
3273            format!("failed to spawn generator: {error}"),
3274        )
3275    })?;
3276    if let Some(mut stdin) = child.stdin.take() {
3277        stdin.write_all(&input).map_err(|error| {
3278            VectorRegenerationFailure::new(
3279                VectorRegenerationFailureClass::GeneratorNonzeroExit,
3280                format!("failed to write generator stdin: {error}"),
3281            )
3282        })?;
3283    } else {
3284        return Err(VectorRegenerationFailure::new(
3285            VectorRegenerationFailureClass::GeneratorNonzeroExit,
3286            "failed to open generator stdin",
3287        ));
3288    }
3289
3290    let stdout = child.stdout.take().ok_or_else(|| {
3291        VectorRegenerationFailure::new(
3292            VectorRegenerationFailureClass::GeneratorNonzeroExit,
3293            "failed to open generator stdout",
3294        )
3295    })?;
3296    let stderr = child.stderr.take().ok_or_else(|| {
3297        VectorRegenerationFailure::new(
3298            VectorRegenerationFailureClass::GeneratorNonzeroExit,
3299            "failed to open generator stderr",
3300        )
3301    })?;
3302
3303    let (tx, rx) = mpsc::channel();
3304    let stdout_handle = spawn_capped_reader(
3305        stdout,
3306        policy.max_stdout_bytes,
3307        GeneratorStream::Stdout,
3308        tx.clone(),
3309    );
3310    let stderr_handle =
3311        spawn_capped_reader(stderr, policy.max_stderr_bytes, GeneratorStream::Stderr, tx);
3312
3313    let start = Instant::now();
3314    let timeout = Duration::from_millis(policy.timeout_ms);
3315    let mut stdout_bytes: Option<Vec<u8>> = None;
3316    let mut stderr_bytes: Option<Vec<u8>> = None;
3317    let mut status = None;
3318    let mut stream_error: Option<VectorRegenerationFailure> = None;
3319
3320    while status.is_none() && stream_error.is_none() {
3321        while let Ok((stream, result)) = rx.try_recv() {
3322            match (stream, result) {
3323                (GeneratorStream::Stdout, StreamReadResult::Complete(bytes)) => {
3324                    stdout_bytes = Some(bytes);
3325                }
3326                (GeneratorStream::Stderr, StreamReadResult::Complete(bytes)) => {
3327                    stderr_bytes = Some(bytes);
3328                }
3329                (GeneratorStream::Stdout, StreamReadResult::Overflow) => {
3330                    stream_error = Some(VectorRegenerationFailure::new(
3331                        VectorRegenerationFailureClass::GeneratorStdoutOverflow,
3332                        format!(
3333                            "stdout exceeded max_stdout_bytes {}",
3334                            policy.max_stdout_bytes
3335                        ),
3336                    ));
3337                }
3338                (GeneratorStream::Stderr, StreamReadResult::Overflow) => {
3339                    stream_error = Some(VectorRegenerationFailure::new(
3340                        VectorRegenerationFailureClass::GeneratorStderrOverflow,
3341                        format!(
3342                            "stderr exceeded max_stderr_bytes {}",
3343                            policy.max_stderr_bytes
3344                        ),
3345                    ));
3346                }
3347                (_, StreamReadResult::Io(error)) => {
3348                    stream_error = Some(VectorRegenerationFailure::new(
3349                        VectorRegenerationFailureClass::GeneratorNonzeroExit,
3350                        format!("failed to read generator stream: {error}"),
3351                    ));
3352                }
3353            }
3354        }
3355
3356        if stream_error.is_some() {
3357            let _ = child.kill();
3358            break;
3359        }
3360        if start.elapsed() > timeout {
3361            let _ = child.kill();
3362            stream_error = Some(VectorRegenerationFailure::new(
3363                VectorRegenerationFailureClass::GeneratorTimeout,
3364                format!("generator exceeded timeout after {}ms", policy.timeout_ms),
3365            ));
3366            break;
3367        }
3368        status = child.try_wait().map_err(|error| {
3369            VectorRegenerationFailure::new(
3370                VectorRegenerationFailureClass::GeneratorNonzeroExit,
3371                format!("failed to poll generator status: {error}"),
3372            )
3373        })?;
3374        if status.is_none() {
3375            thread::sleep(Duration::from_millis(10));
3376        }
3377    }
3378
3379    let _ = child.wait();
3380    let _ = stdout_handle.join();
3381    let _ = stderr_handle.join();
3382
3383    while let Ok((stream, result)) = rx.try_recv() {
3384        match (stream, result) {
3385            (GeneratorStream::Stdout, StreamReadResult::Complete(bytes)) => {
3386                stdout_bytes = Some(bytes);
3387            }
3388            (GeneratorStream::Stderr, StreamReadResult::Complete(bytes)) => {
3389                stderr_bytes = Some(bytes);
3390            }
3391            (GeneratorStream::Stdout, StreamReadResult::Overflow) => {
3392                stream_error = Some(VectorRegenerationFailure::new(
3393                    VectorRegenerationFailureClass::GeneratorStdoutOverflow,
3394                    format!(
3395                        "stdout exceeded max_stdout_bytes {}",
3396                        policy.max_stdout_bytes
3397                    ),
3398                ));
3399            }
3400            (GeneratorStream::Stderr, StreamReadResult::Overflow) => {
3401                stream_error = Some(VectorRegenerationFailure::new(
3402                    VectorRegenerationFailureClass::GeneratorStderrOverflow,
3403                    format!(
3404                        "stderr exceeded max_stderr_bytes {}",
3405                        policy.max_stderr_bytes
3406                    ),
3407                ));
3408            }
3409            (_, StreamReadResult::Io(error)) => {
3410                stream_error = Some(VectorRegenerationFailure::new(
3411                    VectorRegenerationFailureClass::GeneratorNonzeroExit,
3412                    format!("failed to read generator stream: {error}"),
3413                ));
3414            }
3415        }
3416    }
3417
3418    if let Some(error) = stream_error {
3419        return Err(error);
3420    }
3421
3422    let status = status.ok_or_else(|| {
3423        VectorRegenerationFailure::new(
3424            VectorRegenerationFailureClass::GeneratorNonzeroExit,
3425            "vector generator exited without a status",
3426        )
3427    })?;
3428    if !status.success() {
3429        let stderr =
3430            truncate_error_text(&stderr_bytes.unwrap_or_default(), policy.max_stderr_bytes);
3431        return Err(VectorRegenerationFailure::new(
3432            VectorRegenerationFailureClass::GeneratorNonzeroExit,
3433            stderr,
3434        ));
3435    }
3436
3437    let stdout = stdout_bytes.unwrap_or_default();
3438    serde_json::from_slice(&stdout).map_err(|error| {
3439        VectorRegenerationFailure::new(
3440            VectorRegenerationFailureClass::MalformedGeneratorJson,
3441            format!("decode generator output: {error}"),
3442        )
3443    })
3444}
3445
3446fn spawn_capped_reader<R: Read + Send + 'static>(
3447    mut reader: R,
3448    max_bytes: usize,
3449    stream: GeneratorStream,
3450    tx: mpsc::Sender<(GeneratorStream, StreamReadResult)>,
3451) -> thread::JoinHandle<()> {
3452    thread::spawn(move || {
3453        let mut buffer = Vec::new();
3454        let mut chunk = [0u8; 8192];
3455        loop {
3456            match reader.read(&mut chunk) {
3457                Ok(0) => {
3458                    let _ = tx.send((stream, StreamReadResult::Complete(buffer)));
3459                    break;
3460                }
3461                Ok(read_bytes) => {
3462                    if buffer.len() + read_bytes > max_bytes {
3463                        let _ = tx.send((stream, StreamReadResult::Overflow));
3464                        break;
3465                    }
3466                    buffer.extend_from_slice(&chunk[..read_bytes]);
3467                }
3468                Err(error) => {
3469                    let _ = tx.send((stream, StreamReadResult::Io(error)));
3470                    break;
3471                }
3472            }
3473        }
3474    })
3475}
3476
3477fn truncate_error_text(bytes: &[u8], max_bytes: usize) -> String {
3478    let mut text = String::from_utf8_lossy(bytes).into_owned();
3479    if bytes.len() > max_bytes {
3480        text.push_str(" [truncated]");
3481    }
3482    text
3483}
3484
3485fn count_source_ref(
3486    conn: &rusqlite::Connection,
3487    table: &str,
3488    source_ref: &str,
3489) -> Result<usize, EngineError> {
3490    let sql = match table {
3491        "nodes" => "SELECT count(*) FROM nodes WHERE source_ref = ?1",
3492        "edges" => "SELECT count(*) FROM edges WHERE source_ref = ?1",
3493        "actions" => "SELECT count(*) FROM actions WHERE source_ref = ?1",
3494        "operational_mutations" => {
3495            "SELECT count(*) FROM operational_mutations WHERE source_ref = ?1"
3496        }
3497        other => return Err(EngineError::Bridge(format!("unknown table: {other}"))),
3498    };
3499    let count: i64 = conn.query_row(sql, [source_ref], |row| row.get(0))?;
3500    // FIX(review): was `count as usize` — unsound cast.
3501    // Chose option (C) here: propagate error since this is a user-facing helper.
3502    usize::try_from(count)
3503        .map_err(|_| EngineError::Bridge(format!("count overflow for table {table}: {count}")))
3504}
3505
3506fn rebuild_operational_current_rows(
3507    tx: &rusqlite::Transaction<'_>,
3508    collections: &[String],
3509) -> Result<usize, EngineError> {
3510    let mut rebuilt_rows = 0usize;
3511    clear_operational_current_rows(tx, collections)?;
3512    let mut ins_current = tx.prepare_cached(
3513        "INSERT INTO operational_current \
3514         (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
3515         VALUES (?1, ?2, ?3, ?4, ?5)",
3516    )?;
3517
3518    for collection in collections {
3519        let mut stmt = tx.prepare(
3520            "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
3521             FROM operational_mutations \
3522             WHERE collection_name = ?1 \
3523             ORDER BY record_key, mutation_order",
3524        )?;
3525        let mut latest_by_key: std::collections::HashMap<String, Option<(String, i64, String)>> =
3526            std::collections::HashMap::new();
3527        let rows = stmt.query_map([collection], map_operational_mutation_row)?;
3528        for row in rows {
3529            let mutation = row?;
3530            match mutation.op_kind.as_str() {
3531                "put" => {
3532                    latest_by_key.insert(
3533                        mutation.record_key,
3534                        Some((mutation.payload_json, mutation.created_at, mutation.id)),
3535                    );
3536                }
3537                "delete" => {
3538                    latest_by_key.insert(mutation.record_key, None);
3539                }
3540                _ => {}
3541            }
3542        }
3543
3544        for (record_key, state) in latest_by_key {
3545            if let Some((payload_json, updated_at, last_mutation_id)) = state {
3546                ins_current.execute(rusqlite::params![
3547                    collection,
3548                    record_key,
3549                    payload_json,
3550                    updated_at,
3551                    last_mutation_id,
3552                ])?;
3553                rebuilt_rows += 1;
3554            }
3555        }
3556    }
3557
3558    drop(ins_current);
3559    Ok(rebuilt_rows)
3560}
3561
3562fn clear_operational_current_rows(
3563    tx: &rusqlite::Transaction<'_>,
3564    collections: &[String],
3565) -> Result<(), EngineError> {
3566    let mut delete_current =
3567        tx.prepare_cached("DELETE FROM operational_current WHERE collection_name = ?1")?;
3568    let mut delete_secondary_current = tx.prepare_cached(
3569        "DELETE FROM operational_secondary_index_entries \
3570         WHERE collection_name = ?1 AND subject_kind = 'current'",
3571    )?;
3572    for collection in collections {
3573        delete_secondary_current.execute([collection])?;
3574        delete_current.execute([collection])?;
3575    }
3576    drop(delete_secondary_current);
3577    drop(delete_current);
3578    Ok(())
3579}
3580
3581fn clear_operational_secondary_index_entries(
3582    tx: &rusqlite::Transaction<'_>,
3583    collection_name: &str,
3584) -> Result<(), EngineError> {
3585    tx.execute(
3586        "DELETE FROM operational_secondary_index_entries WHERE collection_name = ?1",
3587        [collection_name],
3588    )?;
3589    Ok(())
3590}
3591
3592fn insert_operational_secondary_index_entry(
3593    tx: &rusqlite::Transaction<'_>,
3594    collection_name: &str,
3595    subject_kind: &str,
3596    mutation_id: &str,
3597    record_key: &str,
3598    entry: &crate::operational::OperationalSecondaryIndexEntry,
3599) -> Result<(), EngineError> {
3600    tx.execute(
3601        "INSERT INTO operational_secondary_index_entries \
3602         (collection_name, index_name, subject_kind, mutation_id, record_key, sort_timestamp, \
3603          slot1_text, slot1_integer, slot2_text, slot2_integer, slot3_text, slot3_integer) \
3604         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
3605        rusqlite::params![
3606            collection_name,
3607            entry.index_name,
3608            subject_kind,
3609            mutation_id,
3610            record_key,
3611            entry.sort_timestamp,
3612            entry.slot1_text,
3613            entry.slot1_integer,
3614            entry.slot2_text,
3615            entry.slot2_integer,
3616            entry.slot3_text,
3617            entry.slot3_integer,
3618        ],
3619    )?;
3620    Ok(())
3621}
3622
3623fn rebuild_operational_secondary_index_entries(
3624    tx: &rusqlite::Transaction<'_>,
3625    collection_name: &str,
3626    collection_kind: OperationalCollectionKind,
3627    indexes: &[OperationalSecondaryIndexDefinition],
3628) -> Result<(usize, usize), EngineError> {
3629    clear_operational_secondary_index_entries(tx, collection_name)?;
3630
3631    let mut mutation_entries_rebuilt = 0usize;
3632    if collection_kind == OperationalCollectionKind::AppendOnlyLog {
3633        let mut stmt = tx.prepare(
3634            "SELECT id, record_key, payload_json FROM operational_mutations \
3635             WHERE collection_name = ?1 ORDER BY mutation_order",
3636        )?;
3637        let rows = stmt
3638            .query_map([collection_name], |row| {
3639                Ok((
3640                    row.get::<_, String>(0)?,
3641                    row.get::<_, String>(1)?,
3642                    row.get::<_, String>(2)?,
3643                ))
3644            })?
3645            .collect::<Result<Vec<_>, _>>()?;
3646        drop(stmt);
3647        for (mutation_id, record_key, payload_json) in rows {
3648            for entry in extract_secondary_index_entries_for_mutation(indexes, &payload_json) {
3649                insert_operational_secondary_index_entry(
3650                    tx,
3651                    collection_name,
3652                    "mutation",
3653                    &mutation_id,
3654                    &record_key,
3655                    &entry,
3656                )?;
3657                mutation_entries_rebuilt += 1;
3658            }
3659        }
3660    }
3661
3662    let mut current_entries_rebuilt = 0usize;
3663    if collection_kind == OperationalCollectionKind::LatestState {
3664        let mut stmt = tx.prepare(
3665            "SELECT record_key, payload_json, updated_at, last_mutation_id FROM operational_current \
3666             WHERE collection_name = ?1 ORDER BY updated_at DESC, record_key",
3667        )?;
3668        let rows = stmt
3669            .query_map([collection_name], |row| {
3670                Ok((
3671                    row.get::<_, String>(0)?,
3672                    row.get::<_, String>(1)?,
3673                    row.get::<_, i64>(2)?,
3674                    row.get::<_, String>(3)?,
3675                ))
3676            })?
3677            .collect::<Result<Vec<_>, _>>()?;
3678        drop(stmt);
3679        for (record_key, payload_json, updated_at, last_mutation_id) in rows {
3680            for entry in
3681                extract_secondary_index_entries_for_current(indexes, &payload_json, updated_at)
3682            {
3683                insert_operational_secondary_index_entry(
3684                    tx,
3685                    collection_name,
3686                    "current",
3687                    &last_mutation_id,
3688                    &record_key,
3689                    &entry,
3690                )?;
3691                current_entries_rebuilt += 1;
3692            }
3693        }
3694    }
3695
3696    Ok((mutation_entries_rebuilt, current_entries_rebuilt))
3697}
3698
3699fn collect_strings_tx(
3700    tx: &rusqlite::Transaction<'_>,
3701    sql: &str,
3702    value: &str,
3703) -> Result<Vec<String>, EngineError> {
3704    let mut stmt = tx.prepare(sql)?;
3705    let rows = stmt.query_map([value], |row| row.get::<_, String>(0))?;
3706    rows.collect::<Result<Vec<_>, _>>()
3707        .map_err(EngineError::from)
3708}
3709
3710/// Convert a non-negative i64 count to usize, panicking on negative values
3711/// which would indicate data corruption.
3712#[allow(clippy::expect_used)]
3713fn i64_to_usize(val: i64) -> usize {
3714    usize::try_from(val).expect("count(*) must be non-negative")
3715}
3716
3717/// Runs a parameterized query and collects the first column as strings.
3718///
3719/// NOTE(review): sql parameter must be a hardcoded query string, never user input.
3720/// Options: (A) doc comment, (B) whitelist refactor like `count_source_ref`, (C) leave as-is.
3721/// Chose (A): function is private, only called with hardcoded SQL from `trace_source`.
3722/// Whitelist refactor not practical — queries have different SELECT/ORDER BY per table.
3723fn collect_strings(
3724    conn: &rusqlite::Connection,
3725    sql: &str,
3726    param: &str,
3727) -> Result<Vec<String>, EngineError> {
3728    let mut stmt = conn.prepare(sql)?;
3729    let values = stmt
3730        .query_map([param], |row| row.get::<_, String>(0))?
3731        .collect::<Result<Vec<_>, _>>()?;
3732    Ok(values)
3733}
3734
3735fn collect_edge_logical_ids_for_restore(
3736    tx: &rusqlite::Transaction<'_>,
3737    logical_id: &str,
3738    retire_source_ref: Option<&str>,
3739    retire_created_at: i64,
3740    retire_event_rowid: i64,
3741) -> Result<Vec<String>, EngineError> {
3742    let mut stmt = tx.prepare(
3743        "SELECT DISTINCT e.logical_id \
3744         FROM edges e \
3745         JOIN provenance_events p \
3746           ON p.subject = e.logical_id \
3747          AND p.event_type = 'edge_retire' \
3748          AND ( \
3749                p.created_at > ?3 \
3750                OR (p.created_at = ?3 AND p.rowid >= ?4) \
3751          ) \
3752          AND ((?2 IS NULL AND p.source_ref IS NULL) OR p.source_ref = ?2) \
3753         WHERE e.superseded_at IS NOT NULL \
3754           AND (e.source_logical_id = ?1 OR e.target_logical_id = ?1) \
3755           AND NOT EXISTS ( \
3756                SELECT 1 FROM edges active \
3757                WHERE active.logical_id = e.logical_id \
3758                  AND active.superseded_at IS NULL \
3759           ) \
3760         ORDER BY e.logical_id",
3761    )?;
3762    let edge_ids = stmt
3763        .query_map(
3764            rusqlite::params![
3765                logical_id,
3766                retire_source_ref,
3767                retire_created_at,
3768                retire_event_rowid
3769            ],
3770            |row| row.get::<_, String>(0),
3771        )?
3772        .collect::<Result<Vec<_>, _>>()?;
3773    Ok(edge_ids)
3774}
3775
3776/// Restores edges for a node being restored, skipping any whose counterpart
3777/// endpoint is not active (e.g. still retired or purged).
3778fn restore_validated_edges(
3779    tx: &rusqlite::Transaction<'_>,
3780    logical_id: &str,
3781    retire_source_ref: Option<&str>,
3782    retire_created_at: i64,
3783    retire_event_rowid: i64,
3784) -> Result<(usize, Vec<SkippedEdge>), EngineError> {
3785    let edge_logical_ids = collect_edge_logical_ids_for_restore(
3786        tx,
3787        logical_id,
3788        retire_source_ref,
3789        retire_created_at,
3790        retire_event_rowid,
3791    )?;
3792    let mut restored = 0usize;
3793    let mut skipped = Vec::new();
3794    for edge_logical_id in &edge_logical_ids {
3795        let edge_detail: Option<(String, String, String)> = tx
3796            .query_row(
3797                "SELECT row_id, source_logical_id, target_logical_id FROM edges \
3798                 WHERE logical_id = ?1 AND superseded_at IS NOT NULL \
3799                 ORDER BY superseded_at DESC, created_at DESC, rowid DESC LIMIT 1",
3800                [edge_logical_id.as_str()],
3801                |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
3802            )
3803            .optional()?;
3804        let Some((edge_row_id, source_lid, target_lid)) = edge_detail else {
3805            continue;
3806        };
3807        let other_endpoint = if source_lid == logical_id {
3808            &target_lid
3809        } else {
3810            &source_lid
3811        };
3812        let endpoint_active: bool = tx
3813            .query_row(
3814                "SELECT 1 FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL LIMIT 1",
3815                [other_endpoint.as_str()],
3816                |_| Ok(true),
3817            )
3818            .optional()?
3819            .unwrap_or(false);
3820        if !endpoint_active {
3821            skipped.push(SkippedEdge {
3822                edge_logical_id: edge_logical_id.clone(),
3823                missing_endpoint: other_endpoint.clone(),
3824            });
3825            continue;
3826        }
3827        restored += tx.execute(
3828            "UPDATE edges SET superseded_at = NULL WHERE row_id = ?1",
3829            [edge_row_id.as_str()],
3830        )?;
3831    }
3832    Ok((restored, skipped))
3833}
3834
3835#[cfg(feature = "sqlite-vec")]
3836fn count_vec_rows_for_logical_id(
3837    tx: &rusqlite::Transaction<'_>,
3838    logical_id: &str,
3839) -> Result<usize, EngineError> {
3840    match tx.query_row(
3841        "SELECT count(*) FROM vec_nodes_active v \
3842         JOIN chunks c ON c.id = v.chunk_id \
3843         WHERE c.node_logical_id = ?1",
3844        [logical_id],
3845        |row| row.get::<_, i64>(0),
3846    ) {
3847        Ok(count) => Ok(i64_to_usize(count)),
3848        Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
3849            if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
3850        {
3851            Ok(0)
3852        }
3853        Err(error) => Err(EngineError::Sqlite(error)),
3854    }
3855}
3856
3857#[cfg(not(feature = "sqlite-vec"))]
3858#[allow(clippy::unnecessary_wraps)]
3859fn count_vec_rows_for_logical_id(
3860    _tx: &rusqlite::Transaction<'_>,
3861    _logical_id: &str,
3862) -> Result<usize, EngineError> {
3863    Ok(0)
3864}
3865
3866#[cfg(feature = "sqlite-vec")]
3867fn delete_vec_rows_for_logical_id(
3868    tx: &rusqlite::Transaction<'_>,
3869    logical_id: &str,
3870) -> Result<usize, EngineError> {
3871    match tx.execute(
3872        "DELETE FROM vec_nodes_active \
3873         WHERE chunk_id IN (SELECT id FROM chunks WHERE node_logical_id = ?1)",
3874        [logical_id],
3875    ) {
3876        Ok(count) => Ok(count),
3877        Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
3878            if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
3879        {
3880            Ok(0)
3881        }
3882        Err(error) => Err(EngineError::Sqlite(error)),
3883    }
3884}
3885
3886#[cfg(not(feature = "sqlite-vec"))]
3887#[allow(clippy::unnecessary_wraps)]
3888fn delete_vec_rows_for_logical_id(
3889    _tx: &rusqlite::Transaction<'_>,
3890    _logical_id: &str,
3891) -> Result<usize, EngineError> {
3892    Ok(0)
3893}
3894
3895fn ensure_operational_collection_registered(
3896    conn: &rusqlite::Connection,
3897    collection_name: &str,
3898) -> Result<(), EngineError> {
3899    if load_operational_collection_record(conn, collection_name)?.is_none() {
3900        return Err(EngineError::InvalidWrite(format!(
3901            "operational collection '{collection_name}' is not registered"
3902        )));
3903    }
3904    Ok(())
3905}
3906
3907fn load_operational_collection_record(
3908    conn: &rusqlite::Connection,
3909    name: &str,
3910) -> Result<Option<OperationalCollectionRecord>, EngineError> {
3911    conn.query_row(
3912        "SELECT name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at, disabled_at \
3913         FROM operational_collections WHERE name = ?1",
3914        [name],
3915        map_operational_collection_row,
3916    )
3917    .optional()
3918    .map_err(EngineError::Sqlite)
3919}
3920
3921fn validate_append_only_operational_collection(
3922    record: &OperationalCollectionRecord,
3923    operation: &str,
3924) -> Result<(), EngineError> {
3925    if record.kind != OperationalCollectionKind::AppendOnlyLog {
3926        return Err(EngineError::InvalidWrite(format!(
3927            "operational collection '{}' must be append_only_log to {operation}",
3928            record.name
3929        )));
3930    }
3931    Ok(())
3932}
3933
3934#[derive(Clone, Debug, PartialEq, Eq)]
3935struct CompiledOperationalReadFilter {
3936    field: String,
3937    condition: OperationalReadCondition,
3938}
3939
3940#[derive(Clone, Debug)]
3941struct MatchedAppendOnlySecondaryIndexRead<'a> {
3942    index_name: &'a str,
3943    value_filter: &'a CompiledOperationalReadFilter,
3944    time_range: Option<&'a CompiledOperationalReadFilter>,
3945}
3946
3947#[derive(Clone, Debug, PartialEq, Eq)]
3948enum OperationalReadCondition {
3949    ExactString(String),
3950    ExactInteger(i64),
3951    Prefix(String),
3952    Range {
3953        lower: Option<i64>,
3954        upper: Option<i64>,
3955    },
3956}
3957
3958fn operational_read_limit(limit: Option<usize>) -> Result<usize, EngineError> {
3959    let applied_limit = limit.unwrap_or(DEFAULT_OPERATIONAL_READ_LIMIT);
3960    if applied_limit == 0 {
3961        return Err(EngineError::InvalidWrite(
3962            "operational read limit must be greater than zero".to_owned(),
3963        ));
3964    }
3965    Ok(applied_limit.min(MAX_OPERATIONAL_READ_LIMIT))
3966}
3967
3968fn parse_operational_filter_fields(
3969    filter_fields_json: &str,
3970) -> Result<Vec<OperationalFilterField>, String> {
3971    let fields: Vec<OperationalFilterField> = serde_json::from_str(filter_fields_json)
3972        .map_err(|error| format!("invalid filter_fields_json: {error}"))?;
3973    let mut seen = std::collections::HashSet::new();
3974    for field in &fields {
3975        if field.name.trim().is_empty() {
3976            return Err("filter_fields_json field names must not be empty".to_owned());
3977        }
3978        if !seen.insert(field.name.as_str()) {
3979            return Err(format!(
3980                "filter_fields_json contains duplicate field '{}'",
3981                field.name
3982            ));
3983        }
3984        if field.modes.is_empty() {
3985            return Err(format!(
3986                "filter_fields_json field '{}' must declare at least one mode",
3987                field.name
3988            ));
3989        }
3990        if field.modes.contains(&OperationalFilterMode::Prefix)
3991            && field.field_type != OperationalFilterFieldType::String
3992        {
3993            return Err(format!(
3994                "filter field '{}' only supports prefix for string types",
3995                field.name
3996            ));
3997        }
3998    }
3999    Ok(fields)
4000}
4001
4002fn compile_operational_read_filters(
4003    filters: &[OperationalFilterClause],
4004    declared_fields: &[OperationalFilterField],
4005) -> Result<Vec<CompiledOperationalReadFilter>, EngineError> {
4006    let field_map = declared_fields
4007        .iter()
4008        .map(|field| (field.name.as_str(), field))
4009        .collect::<std::collections::HashMap<_, _>>();
4010    filters
4011        .iter()
4012        .map(|filter| match filter {
4013            OperationalFilterClause::Exact { field, value } => {
4014                let declared = field_map.get(field.as_str()).ok_or_else(|| {
4015                    EngineError::InvalidWrite(format!(
4016                        "operational read filter uses undeclared field '{field}'"
4017                    ))
4018                })?;
4019                if !declared.modes.contains(&OperationalFilterMode::Exact) {
4020                    return Err(EngineError::InvalidWrite(format!(
4021                        "operational read field '{field}' does not allow exact filters"
4022                    )));
4023                }
4024                let condition = match (declared.field_type, value) {
4025                    (OperationalFilterFieldType::String, OperationalFilterValue::String(value)) => {
4026                        OperationalReadCondition::ExactString(value.clone())
4027                    }
4028                    (
4029                        OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp,
4030                        OperationalFilterValue::Integer(value),
4031                    ) => OperationalReadCondition::ExactInteger(*value),
4032                    _ => {
4033                        return Err(EngineError::InvalidWrite(format!(
4034                            "operational read field '{field}' received a value with the wrong type"
4035                        )));
4036                    }
4037                };
4038                Ok(CompiledOperationalReadFilter {
4039                    field: field.clone(),
4040                    condition,
4041                })
4042            }
4043            OperationalFilterClause::Prefix { field, value } => {
4044                let declared = field_map.get(field.as_str()).ok_or_else(|| {
4045                    EngineError::InvalidWrite(format!(
4046                        "operational read filter uses undeclared field '{field}'"
4047                    ))
4048                })?;
4049                if !declared.modes.contains(&OperationalFilterMode::Prefix) {
4050                    return Err(EngineError::InvalidWrite(format!(
4051                        "operational read field '{field}' does not allow prefix filters"
4052                    )));
4053                }
4054                if declared.field_type != OperationalFilterFieldType::String {
4055                    return Err(EngineError::InvalidWrite(format!(
4056                        "operational read field '{field}' only supports prefix filters for strings"
4057                    )));
4058                }
4059                Ok(CompiledOperationalReadFilter {
4060                    field: field.clone(),
4061                    condition: OperationalReadCondition::Prefix(value.clone()),
4062                })
4063            }
4064            OperationalFilterClause::Range {
4065                field,
4066                lower,
4067                upper,
4068            } => {
4069                let declared = field_map.get(field.as_str()).ok_or_else(|| {
4070                    EngineError::InvalidWrite(format!(
4071                        "operational read filter uses undeclared field '{field}'"
4072                    ))
4073                })?;
4074                if !declared.modes.contains(&OperationalFilterMode::Range) {
4075                    return Err(EngineError::InvalidWrite(format!(
4076                        "operational read field '{field}' does not allow range filters"
4077                    )));
4078                }
4079                if !matches!(
4080                    declared.field_type,
4081                    OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp
4082                ) {
4083                    return Err(EngineError::InvalidWrite(format!(
4084                        "operational read field '{field}' only supports range filters for integer/timestamp fields"
4085                    )));
4086                }
4087                if lower.is_none() && upper.is_none() {
4088                    return Err(EngineError::InvalidWrite(format!(
4089                        "operational read range filter for '{field}' must specify a lower or upper bound"
4090                    )));
4091                }
4092                Ok(CompiledOperationalReadFilter {
4093                    field: field.clone(),
4094                    condition: OperationalReadCondition::Range {
4095                        lower: *lower,
4096                        upper: *upper,
4097                    },
4098                })
4099            }
4100        })
4101        .collect()
4102}
4103
4104fn match_append_only_secondary_index_read<'a>(
4105    filters: &'a [CompiledOperationalReadFilter],
4106    indexes: &'a [OperationalSecondaryIndexDefinition],
4107) -> Option<MatchedAppendOnlySecondaryIndexRead<'a>> {
4108    indexes.iter().find_map(|index| {
4109        let OperationalSecondaryIndexDefinition::AppendOnlyFieldTime {
4110            name,
4111            field,
4112            value_type,
4113            time_field,
4114        } = index
4115        else {
4116            return None;
4117        };
4118        if !(1..=2).contains(&filters.len()) {
4119            return None;
4120        }
4121
4122        let mut value_filter = None;
4123        let mut time_range = None;
4124        for filter in filters {
4125            if filter.field == *field {
4126                let supported = matches!(
4127                    (&filter.condition, value_type),
4128                    (
4129                        OperationalReadCondition::ExactString(_)
4130                            | OperationalReadCondition::Prefix(_),
4131                        crate::operational::OperationalSecondaryIndexValueType::String
4132                    ) | (
4133                        OperationalReadCondition::ExactInteger(_),
4134                        crate::operational::OperationalSecondaryIndexValueType::Integer
4135                            | crate::operational::OperationalSecondaryIndexValueType::Timestamp
4136                    )
4137                );
4138                if !supported || value_filter.is_some() {
4139                    return None;
4140                }
4141                value_filter = Some(filter);
4142                continue;
4143            }
4144            if filter.field == *time_field {
4145                if !matches!(filter.condition, OperationalReadCondition::Range { .. })
4146                    || time_range.is_some()
4147                {
4148                    return None;
4149                }
4150                time_range = Some(filter);
4151                continue;
4152            }
4153            return None;
4154        }
4155
4156        value_filter.map(|value_filter| MatchedAppendOnlySecondaryIndexRead {
4157            index_name: name.as_str(),
4158            value_filter,
4159            time_range,
4160        })
4161    })
4162}
4163
4164fn execute_operational_secondary_index_read(
4165    conn: &rusqlite::Connection,
4166    collection_name: &str,
4167    filters: &[CompiledOperationalReadFilter],
4168    indexes: &[OperationalSecondaryIndexDefinition],
4169    applied_limit: usize,
4170) -> Result<Option<OperationalReadReport>, EngineError> {
4171    use rusqlite::types::Value;
4172
4173    let Some(matched) = match_append_only_secondary_index_read(filters, indexes) else {
4174        return Ok(None);
4175    };
4176
4177    let mut sql = String::from(
4178        "SELECT m.id, m.collection_name, m.record_key, m.op_kind, m.payload_json, m.source_ref, m.created_at \
4179         FROM operational_secondary_index_entries s \
4180         JOIN operational_mutations m ON m.id = s.mutation_id \
4181         WHERE s.collection_name = ?1 AND s.index_name = ?2 AND s.subject_kind = 'mutation' ",
4182    );
4183    let mut params = vec![
4184        Value::from(collection_name.to_owned()),
4185        Value::from(matched.index_name.to_owned()),
4186    ];
4187
4188    match &matched.value_filter.condition {
4189        OperationalReadCondition::ExactString(value) => {
4190            let _ = write!(sql, "AND s.slot1_text = ?{} ", params.len() + 1);
4191            params.push(Value::from(value.clone()));
4192        }
4193        OperationalReadCondition::Prefix(value) => {
4194            let _ = write!(sql, "AND s.slot1_text GLOB ?{} ", params.len() + 1);
4195            params.push(Value::from(glob_prefix_pattern(value)));
4196        }
4197        OperationalReadCondition::ExactInteger(value) => {
4198            let _ = write!(sql, "AND s.slot1_integer = ?{} ", params.len() + 1);
4199            params.push(Value::from(*value));
4200        }
4201        OperationalReadCondition::Range { .. } => return Ok(None),
4202    }
4203
4204    if let Some(time_range) = matched.time_range
4205        && let OperationalReadCondition::Range { lower, upper } = &time_range.condition
4206    {
4207        if let Some(lower) = lower {
4208            let _ = write!(sql, "AND s.sort_timestamp >= ?{} ", params.len() + 1);
4209            params.push(Value::from(*lower));
4210        }
4211        if let Some(upper) = upper {
4212            let _ = write!(sql, "AND s.sort_timestamp <= ?{} ", params.len() + 1);
4213            params.push(Value::from(*upper));
4214        }
4215    }
4216
4217    let _ = write!(
4218        sql,
4219        "ORDER BY s.sort_timestamp DESC, m.mutation_order DESC LIMIT ?{}",
4220        params.len() + 1
4221    );
4222    params.push(Value::from(i64::try_from(applied_limit + 1).map_err(
4223        |_| EngineError::Bridge("operational read limit overflow".to_owned()),
4224    )?));
4225
4226    let mut stmt = conn.prepare(&sql)?;
4227    let mut rows = stmt
4228        .query_map(
4229            rusqlite::params_from_iter(params),
4230            map_operational_mutation_row,
4231        )?
4232        .collect::<Result<Vec<_>, _>>()?;
4233    let was_limited = rows.len() > applied_limit;
4234    if was_limited {
4235        rows.truncate(applied_limit);
4236    }
4237
4238    Ok(Some(OperationalReadReport {
4239        collection_name: collection_name.to_owned(),
4240        row_count: rows.len(),
4241        applied_limit,
4242        was_limited,
4243        rows,
4244    }))
4245}
4246
4247fn execute_operational_filtered_read(
4248    conn: &rusqlite::Connection,
4249    collection_name: &str,
4250    filters: &[CompiledOperationalReadFilter],
4251    applied_limit: usize,
4252) -> Result<OperationalReadReport, EngineError> {
4253    use rusqlite::types::Value;
4254
4255    let mut sql = String::from(
4256        "SELECT m.id, m.collection_name, m.record_key, m.op_kind, m.payload_json, m.source_ref, m.created_at \
4257         FROM operational_mutations m ",
4258    );
4259    let mut params = vec![Value::from(collection_name.to_owned())];
4260    for (index, filter) in filters.iter().enumerate() {
4261        let _ = write!(
4262            sql,
4263            "JOIN operational_filter_values f{index} \
4264             ON f{index}.mutation_id = m.id \
4265            AND f{index}.collection_name = m.collection_name "
4266        );
4267        match &filter.condition {
4268            OperationalReadCondition::ExactString(value) => {
4269                let _ = write!(
4270                    sql,
4271                    "AND f{index}.field_name = ?{} AND f{index}.string_value = ?{} ",
4272                    params.len() + 1,
4273                    params.len() + 2
4274                );
4275                params.push(Value::from(filter.field.clone()));
4276                params.push(Value::from(value.clone()));
4277            }
4278            OperationalReadCondition::ExactInteger(value) => {
4279                let _ = write!(
4280                    sql,
4281                    "AND f{index}.field_name = ?{} AND f{index}.integer_value = ?{} ",
4282                    params.len() + 1,
4283                    params.len() + 2
4284                );
4285                params.push(Value::from(filter.field.clone()));
4286                params.push(Value::from(*value));
4287            }
4288            OperationalReadCondition::Prefix(value) => {
4289                let _ = write!(
4290                    sql,
4291                    "AND f{index}.field_name = ?{} AND f{index}.string_value GLOB ?{} ",
4292                    params.len() + 1,
4293                    params.len() + 2
4294                );
4295                params.push(Value::from(filter.field.clone()));
4296                params.push(Value::from(glob_prefix_pattern(value)));
4297            }
4298            OperationalReadCondition::Range { lower, upper } => {
4299                let _ = write!(sql, "AND f{index}.field_name = ?{} ", params.len() + 1);
4300                params.push(Value::from(filter.field.clone()));
4301                if let Some(lower) = lower {
4302                    let _ = write!(sql, "AND f{index}.integer_value >= ?{} ", params.len() + 1);
4303                    params.push(Value::from(*lower));
4304                }
4305                if let Some(upper) = upper {
4306                    let _ = write!(sql, "AND f{index}.integer_value <= ?{} ", params.len() + 1);
4307                    params.push(Value::from(*upper));
4308                }
4309            }
4310        }
4311    }
4312    let _ = write!(
4313        sql,
4314        "WHERE m.collection_name = ?1 ORDER BY m.mutation_order DESC LIMIT ?{}",
4315        params.len() + 1
4316    );
4317    params.push(Value::from(i64::try_from(applied_limit + 1).map_err(
4318        |_| EngineError::Bridge("operational read limit overflow".to_owned()),
4319    )?));
4320
4321    let mut stmt = conn.prepare(&sql)?;
4322    let mut rows = stmt
4323        .query_map(
4324            rusqlite::params_from_iter(params),
4325            map_operational_mutation_row,
4326        )?
4327        .collect::<Result<Vec<_>, _>>()?;
4328    let was_limited = rows.len() > applied_limit;
4329    if was_limited {
4330        rows.truncate(applied_limit);
4331    }
4332    Ok(OperationalReadReport {
4333        collection_name: collection_name.to_owned(),
4334        row_count: rows.len(),
4335        applied_limit,
4336        was_limited,
4337        rows,
4338    })
4339}
4340
4341fn glob_prefix_pattern(value: &str) -> String {
4342    let mut pattern = String::with_capacity(value.len() + 1);
4343    for ch in value.chars() {
4344        match ch {
4345            '*' => pattern.push_str("[*]"),
4346            '?' => pattern.push_str("[?]"),
4347            '[' => pattern.push_str("[[]"),
4348            _ => pattern.push(ch),
4349        }
4350    }
4351    pattern.push('*');
4352    pattern
4353}
4354
4355#[derive(Clone, Debug, PartialEq, Eq)]
4356struct ExtractedOperationalFilterValue {
4357    field_name: String,
4358    string_value: Option<String>,
4359    integer_value: Option<i64>,
4360}
4361
4362fn extract_operational_filter_values(
4363    filter_fields: &[OperationalFilterField],
4364    payload_json: &str,
4365) -> Vec<ExtractedOperationalFilterValue> {
4366    let Ok(parsed) = serde_json::from_str::<serde_json::Value>(payload_json) else {
4367        return Vec::new();
4368    };
4369    let Some(object) = parsed.as_object() else {
4370        return Vec::new();
4371    };
4372
4373    filter_fields
4374        .iter()
4375        .filter_map(|field| {
4376            let value = object.get(&field.name)?;
4377            match field.field_type {
4378                OperationalFilterFieldType::String => {
4379                    value
4380                        .as_str()
4381                        .map(|string_value| ExtractedOperationalFilterValue {
4382                            field_name: field.name.clone(),
4383                            string_value: Some(string_value.to_owned()),
4384                            integer_value: None,
4385                        })
4386                }
4387                OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp => {
4388                    value
4389                        .as_i64()
4390                        .map(|integer_value| ExtractedOperationalFilterValue {
4391                            field_name: field.name.clone(),
4392                            string_value: None,
4393                            integer_value: Some(integer_value),
4394                        })
4395                }
4396            }
4397        })
4398        .collect()
4399}
4400
4401fn operational_compaction_candidates(
4402    conn: &rusqlite::Connection,
4403    retention_json: &str,
4404    collection_name: &str,
4405) -> Result<(Vec<String>, Option<i64>), EngineError> {
4406    operational_compaction_candidates_at(
4407        conn,
4408        retention_json,
4409        collection_name,
4410        current_unix_timestamp()?,
4411    )
4412}
4413
4414fn operational_compaction_candidates_at(
4415    conn: &rusqlite::Connection,
4416    retention_json: &str,
4417    collection_name: &str,
4418    now_timestamp: i64,
4419) -> Result<(Vec<String>, Option<i64>), EngineError> {
4420    let policy = parse_operational_retention_policy(retention_json)?;
4421    match policy {
4422        OperationalRetentionPolicy::KeepAll => Ok((Vec::new(), None)),
4423        OperationalRetentionPolicy::PurgeBeforeSeconds { max_age_seconds } => {
4424            let before_timestamp = now_timestamp - max_age_seconds;
4425            let mut stmt = conn.prepare(
4426                "SELECT id FROM operational_mutations \
4427                 WHERE collection_name = ?1 AND created_at < ?2 \
4428                 ORDER BY mutation_order",
4429            )?;
4430            let mutation_ids = stmt
4431                .query_map(
4432                    rusqlite::params![collection_name, before_timestamp],
4433                    |row| row.get::<_, String>(0),
4434                )?
4435                .collect::<Result<Vec<_>, _>>()?;
4436            Ok((mutation_ids, Some(before_timestamp)))
4437        }
4438        OperationalRetentionPolicy::KeepLast { max_rows } => {
4439            let mut stmt = conn.prepare(
4440                "SELECT id FROM operational_mutations \
4441                 WHERE collection_name = ?1 \
4442                 ORDER BY mutation_order DESC",
4443            )?;
4444            let ordered_ids = stmt
4445                .query_map([collection_name], |row| row.get::<_, String>(0))?
4446                .collect::<Result<Vec<_>, _>>()?;
4447            Ok((ordered_ids.into_iter().skip(max_rows).collect(), None))
4448        }
4449    }
4450}
4451
4452fn parse_operational_retention_policy(
4453    retention_json: &str,
4454) -> Result<OperationalRetentionPolicy, EngineError> {
4455    let policy: OperationalRetentionPolicy = serde_json::from_str(retention_json)
4456        .map_err(|error| EngineError::InvalidWrite(format!("invalid retention_json: {error}")))?;
4457    match policy {
4458        OperationalRetentionPolicy::KeepAll => Ok(policy),
4459        OperationalRetentionPolicy::PurgeBeforeSeconds { max_age_seconds } => {
4460            if max_age_seconds <= 0 {
4461                return Err(EngineError::InvalidWrite(
4462                    "retention_json max_age_seconds must be greater than zero".to_owned(),
4463                ));
4464            }
4465            Ok(policy)
4466        }
4467        OperationalRetentionPolicy::KeepLast { max_rows } => {
4468            if max_rows == 0 {
4469                return Err(EngineError::InvalidWrite(
4470                    "retention_json max_rows must be greater than zero".to_owned(),
4471                ));
4472            }
4473            Ok(policy)
4474        }
4475    }
4476}
4477
4478fn load_operational_retention_records(
4479    conn: &rusqlite::Connection,
4480    collection_names: Option<&[String]>,
4481    max_collections: Option<usize>,
4482) -> Result<Vec<OperationalCollectionRecord>, EngineError> {
4483    let limit = max_collections.unwrap_or(usize::MAX);
4484    if limit == 0 {
4485        return Err(EngineError::InvalidWrite(
4486            "max_collections must be greater than zero".to_owned(),
4487        ));
4488    }
4489
4490    let mut records = Vec::new();
4491    if let Some(collection_names) = collection_names {
4492        for name in collection_names.iter().take(limit) {
4493            let record = load_operational_collection_record(conn, name)?.ok_or_else(|| {
4494                EngineError::InvalidWrite(format!(
4495                    "operational collection '{name}' is not registered"
4496                ))
4497            })?;
4498            records.push(record);
4499        }
4500        return Ok(records);
4501    }
4502
4503    let mut stmt = conn.prepare(
4504        "SELECT name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at, disabled_at \
4505         FROM operational_collections ORDER BY name",
4506    )?;
4507    let rows = stmt
4508        .query_map([], map_operational_collection_row)?
4509        .take(limit)
4510        .collect::<Result<Vec<_>, _>>()?;
4511    Ok(rows)
4512}
4513
4514fn last_operational_retention_run_at(
4515    conn: &rusqlite::Connection,
4516    collection_name: &str,
4517) -> Result<Option<i64>, EngineError> {
4518    conn.query_row(
4519        "SELECT MAX(executed_at) FROM operational_retention_runs WHERE collection_name = ?1",
4520        [collection_name],
4521        |row| row.get(0),
4522    )
4523    .optional()
4524    .map_err(EngineError::Sqlite)
4525    .map(Option::flatten)
4526}
4527
4528fn count_operational_mutations_for_collection(
4529    conn: &rusqlite::Connection,
4530    collection_name: &str,
4531) -> Result<usize, EngineError> {
4532    let count: i64 = conn.query_row(
4533        "SELECT count(*) FROM operational_mutations WHERE collection_name = ?1",
4534        [collection_name],
4535        |row| row.get(0),
4536    )?;
4537    usize::try_from(count).map_err(|_| {
4538        EngineError::Bridge(format!("count overflow for collection {collection_name}"))
4539    })
4540}
4541
4542fn retention_action_kind_and_limit(
4543    policy: &OperationalRetentionPolicy,
4544) -> (OperationalRetentionActionKind, Option<usize>) {
4545    match policy {
4546        OperationalRetentionPolicy::KeepAll => (OperationalRetentionActionKind::Noop, None),
4547        OperationalRetentionPolicy::PurgeBeforeSeconds { .. } => {
4548            (OperationalRetentionActionKind::PurgeBeforeSeconds, None)
4549        }
4550        OperationalRetentionPolicy::KeepLast { max_rows } => {
4551            (OperationalRetentionActionKind::KeepLast, Some(*max_rows))
4552        }
4553    }
4554}
4555
4556fn plan_operational_retention_item(
4557    conn: &rusqlite::Connection,
4558    record: &OperationalCollectionRecord,
4559    now_timestamp: i64,
4560) -> Result<OperationalRetentionPlanItem, EngineError> {
4561    let last_run_at = last_operational_retention_run_at(conn, &record.name)?;
4562    if record.kind != OperationalCollectionKind::AppendOnlyLog {
4563        return Ok(OperationalRetentionPlanItem {
4564            collection_name: record.name.clone(),
4565            action_kind: OperationalRetentionActionKind::Noop,
4566            candidate_deletions: 0,
4567            before_timestamp: None,
4568            max_rows: None,
4569            last_run_at,
4570        });
4571    }
4572    let policy = parse_operational_retention_policy(&record.retention_json)?;
4573    let (action_kind, max_rows) = retention_action_kind_and_limit(&policy);
4574    let (candidate_ids, before_timestamp) = operational_compaction_candidates_at(
4575        conn,
4576        &record.retention_json,
4577        &record.name,
4578        now_timestamp,
4579    )?;
4580    Ok(OperationalRetentionPlanItem {
4581        collection_name: record.name.clone(),
4582        action_kind,
4583        candidate_deletions: candidate_ids.len(),
4584        before_timestamp,
4585        max_rows,
4586        last_run_at,
4587    })
4588}
4589
4590fn run_operational_retention_item(
4591    tx: &rusqlite::Transaction<'_>,
4592    record: &OperationalCollectionRecord,
4593    now_timestamp: i64,
4594    dry_run: bool,
4595) -> Result<OperationalRetentionRunItem, EngineError> {
4596    let plan = plan_operational_retention_item(tx, record, now_timestamp)?;
4597    let mut deleted_mutations = 0usize;
4598    if record.kind == OperationalCollectionKind::AppendOnlyLog
4599        && plan.action_kind != OperationalRetentionActionKind::Noop
4600        && plan.candidate_deletions > 0
4601        && !dry_run
4602    {
4603        let (candidate_ids, _) = operational_compaction_candidates_at(
4604            tx,
4605            &record.retention_json,
4606            &record.name,
4607            now_timestamp,
4608        )?;
4609        let mut delete_stmt =
4610            tx.prepare_cached("DELETE FROM operational_mutations WHERE id = ?1")?;
4611        for mutation_id in &candidate_ids {
4612            delete_stmt.execute([mutation_id.as_str()])?;
4613            deleted_mutations += 1;
4614        }
4615        drop(delete_stmt);
4616
4617        persist_simple_provenance_event(
4618            tx,
4619            "operational_retention_run",
4620            &record.name,
4621            Some(serde_json::json!({
4622                "action_kind": plan.action_kind,
4623                "deleted_mutations": deleted_mutations,
4624                "before_timestamp": plan.before_timestamp,
4625                "max_rows": plan.max_rows,
4626                "executed_at": now_timestamp,
4627            })),
4628        )?;
4629    }
4630
4631    let live_rows_remaining = count_operational_mutations_for_collection(tx, &record.name)?;
4632    let effective_deleted_mutations = if dry_run {
4633        plan.candidate_deletions
4634    } else {
4635        deleted_mutations
4636    };
4637    let rows_remaining = if dry_run {
4638        live_rows_remaining.saturating_sub(effective_deleted_mutations)
4639    } else {
4640        live_rows_remaining
4641    };
4642    if !dry_run && plan.action_kind != OperationalRetentionActionKind::Noop {
4643        tx.execute(
4644            "INSERT INTO operational_retention_runs \
4645             (id, collection_name, executed_at, action_kind, dry_run, deleted_mutations, rows_remaining, metadata_json) \
4646             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
4647            rusqlite::params![
4648                new_id(),
4649                record.name,
4650                now_timestamp,
4651                serde_json::to_string(&plan.action_kind)
4652                    .unwrap_or_else(|_| "\"noop\"".to_owned())
4653                    .trim_matches('"')
4654                    .to_owned(),
4655                i32::from(dry_run),
4656                deleted_mutations,
4657                rows_remaining,
4658                serde_json::json!({
4659                    "before_timestamp": plan.before_timestamp,
4660                    "max_rows": plan.max_rows,
4661                })
4662                .to_string(),
4663            ],
4664        )?;
4665    }
4666
4667    Ok(OperationalRetentionRunItem {
4668        collection_name: plan.collection_name,
4669        action_kind: plan.action_kind,
4670        deleted_mutations: effective_deleted_mutations,
4671        before_timestamp: plan.before_timestamp,
4672        max_rows: plan.max_rows,
4673        rows_remaining,
4674    })
4675}
4676
4677fn current_unix_timestamp() -> Result<i64, EngineError> {
4678    let now = SystemTime::now()
4679        .duration_since(SystemTime::UNIX_EPOCH)
4680        .map_err(|error| EngineError::Bridge(format!("system clock error: {error}")))?;
4681    i64::try_from(now.as_secs())
4682        .map_err(|_| EngineError::Bridge("unix timestamp overflow".to_owned()))
4683}
4684
4685fn map_operational_collection_row(
4686    row: &rusqlite::Row<'_>,
4687) -> Result<OperationalCollectionRecord, rusqlite::Error> {
4688    let kind_text: String = row.get(1)?;
4689    let kind = OperationalCollectionKind::try_from(kind_text.as_str()).map_err(|message| {
4690        rusqlite::Error::FromSqlConversionFailure(
4691            1,
4692            rusqlite::types::Type::Text,
4693            Box::new(io::Error::new(io::ErrorKind::InvalidData, message)),
4694        )
4695    })?;
4696    Ok(OperationalCollectionRecord {
4697        name: row.get(0)?,
4698        kind,
4699        schema_json: row.get(2)?,
4700        retention_json: row.get(3)?,
4701        filter_fields_json: row.get(4)?,
4702        validation_json: row.get(5)?,
4703        secondary_indexes_json: row.get(6)?,
4704        format_version: row.get(7)?,
4705        created_at: row.get(8)?,
4706        disabled_at: row.get(9)?,
4707    })
4708}
4709
4710fn map_operational_mutation_row(
4711    row: &rusqlite::Row<'_>,
4712) -> Result<OperationalMutationRow, rusqlite::Error> {
4713    Ok(OperationalMutationRow {
4714        id: row.get(0)?,
4715        collection_name: row.get(1)?,
4716        record_key: row.get(2)?,
4717        op_kind: row.get(3)?,
4718        payload_json: row.get(4)?,
4719        source_ref: row.get(5)?,
4720        created_at: row.get(6)?,
4721    })
4722}
4723
4724fn map_operational_current_row(
4725    row: &rusqlite::Row<'_>,
4726) -> Result<OperationalCurrentRow, rusqlite::Error> {
4727    Ok(OperationalCurrentRow {
4728        collection_name: row.get(0)?,
4729        record_key: row.get(1)?,
4730        payload_json: row.get(2)?,
4731        updated_at: row.get(3)?,
4732        last_mutation_id: row.get(4)?,
4733    })
4734}
4735
4736#[cfg(test)]
4737#[allow(clippy::expect_used)]
4738mod tests {
4739    use std::fs;
4740    use std::sync::Arc;
4741
4742    use fathomdb_schema::SchemaManager;
4743    use tempfile::NamedTempFile;
4744
4745    use super::{AdminService, SafeExportOptions, VectorRegenerationConfig};
4746    use crate::projection::ProjectionTarget;
4747    use crate::sqlite;
4748    use crate::{
4749        EngineError, ExecutionCoordinator, OperationalCollectionKind, OperationalRegisterRequest,
4750        TelemetryCounters,
4751    };
4752
4753    use fathomdb_query::QueryBuilder;
4754
4755    #[cfg(feature = "sqlite-vec")]
4756    use super::{VectorGeneratorPolicy, load_vector_regeneration_config};
4757
4758    #[allow(dead_code)]
4759    #[cfg(unix)]
4760    fn set_file_mode(path: &std::path::Path, mode: u32) {
4761        use std::os::unix::fs::PermissionsExt;
4762
4763        let mut permissions = fs::metadata(path).expect("script metadata").permissions();
4764        permissions.set_mode(mode);
4765        fs::set_permissions(path, permissions).expect("chmod");
4766    }
4767
4768    #[allow(dead_code)]
4769    #[cfg(not(unix))]
4770    fn set_file_mode(_path: &std::path::Path, _mode: u32) {}
4771
4772    fn setup() -> (NamedTempFile, AdminService) {
4773        let db = NamedTempFile::new().expect("temp file");
4774        let schema = Arc::new(SchemaManager::new());
4775        {
4776            let conn = sqlite::open_connection(db.path()).expect("connection");
4777            schema.bootstrap(&conn).expect("bootstrap");
4778        }
4779        let service = AdminService::new(db.path(), Arc::clone(&schema));
4780        (db, service)
4781    }
4782
4783    #[test]
4784    fn check_integrity_includes_active_uniqueness_count() {
4785        let (_db, service) = setup();
4786        let report = service.check_integrity().expect("integrity check");
4787        assert_eq!(report.duplicate_active_logical_ids, 0);
4788        assert_eq!(report.operational_missing_collections, 0);
4789        assert_eq!(report.operational_missing_last_mutations, 0);
4790    }
4791
4792    #[test]
4793    fn trace_source_returns_node_logical_ids() {
4794        let (db, service) = setup();
4795        {
4796            let conn = sqlite::open_connection(db.path()).expect("conn");
4797            conn.execute(
4798                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4799                 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 'source-1')",
4800                [],
4801            )
4802            .expect("insert node");
4803        }
4804        let report = service.trace_source("source-1").expect("trace");
4805        assert_eq!(report.node_rows, 1);
4806        assert_eq!(report.node_logical_ids, vec!["lg1"]);
4807    }
4808
4809    #[test]
4810    fn trace_source_includes_operational_mutations() {
4811        let (db, service) = setup();
4812        {
4813            let conn = sqlite::open_connection(db.path()).expect("conn");
4814            conn.execute(
4815                "INSERT INTO operational_collections \
4816                 (name, kind, schema_json, retention_json, format_version, created_at) \
4817                 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
4818                [],
4819            )
4820            .expect("insert collection");
4821            conn.execute(
4822                "INSERT INTO operational_mutations \
4823                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
4824                 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"ok\"}', 'source-1', 100, 1)",
4825                [],
4826            )
4827            .expect("insert mutation");
4828        }
4829
4830        let report = service.trace_source("source-1").expect("trace");
4831        assert_eq!(report.operational_mutation_rows, 1);
4832        assert_eq!(report.operational_mutation_ids, vec!["m1"]);
4833    }
4834
4835    #[test]
4836    fn excise_source_restores_prior_active_node() {
4837        let (db, service) = setup();
4838        {
4839            let conn = sqlite::open_connection(db.path()).expect("conn");
4840            conn.execute(
4841                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
4842                 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
4843                [],
4844            )
4845            .expect("insert v1 superseded");
4846            conn.execute(
4847                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4848                 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
4849                [],
4850            )
4851            .expect("insert v2 active");
4852        }
4853        service.excise_source("source-2").expect("excise");
4854        {
4855            let conn = sqlite::open_connection(db.path()).expect("conn");
4856            let active_row_id: String = conn
4857                .query_row(
4858                    "SELECT row_id FROM nodes WHERE logical_id = 'lg1' AND superseded_at IS NULL",
4859                    [],
4860                    |row| row.get(0),
4861                )
4862                .expect("active row exists after excise");
4863            assert_eq!(active_row_id, "r1");
4864        }
4865    }
4866
4867    #[test]
4868    fn excise_source_deletes_operational_mutations_and_repairs_latest_state_current() {
4869        let (db, service) = setup();
4870        {
4871            let conn = sqlite::open_connection(db.path()).expect("conn");
4872            conn.execute(
4873                "INSERT INTO operational_collections \
4874                 (name, kind, schema_json, retention_json, format_version, created_at) \
4875                 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
4876                [],
4877            )
4878            .expect("insert collection");
4879            conn.execute(
4880                "INSERT INTO operational_mutations \
4881                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
4882                 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"old\"}', 'source-1', 100, 1)",
4883                [],
4884            )
4885            .expect("insert prior mutation");
4886            conn.execute(
4887                "INSERT INTO operational_mutations \
4888                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
4889                 VALUES ('m2', 'connector_health', 'gmail', 'put', '{\"status\":\"new\"}', 'source-2', 200, 2)",
4890                [],
4891            )
4892            .expect("insert excised mutation");
4893            conn.execute(
4894                "INSERT INTO operational_current \
4895                 (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
4896                 VALUES ('connector_health', 'gmail', '{\"status\":\"new\"}', 200, 'm2')",
4897                [],
4898            )
4899            .expect("insert current row");
4900        }
4901
4902        let traced = service
4903            .trace_source("source-2")
4904            .expect("trace before excise");
4905        assert_eq!(traced.operational_mutation_rows, 1);
4906        assert_eq!(traced.operational_mutation_ids, vec!["m2"]);
4907
4908        let excised = service.excise_source("source-2").expect("excise");
4909        assert_eq!(excised.operational_mutation_rows, 0);
4910        assert!(excised.operational_mutation_ids.is_empty());
4911
4912        {
4913            let conn = sqlite::open_connection(db.path()).expect("conn");
4914            let remaining: i64 = conn
4915                .query_row(
4916                    "SELECT count(*) FROM operational_mutations WHERE source_ref = 'source-2'",
4917                    [],
4918                    |row| row.get(0),
4919                )
4920                .expect("remaining count");
4921            assert_eq!(remaining, 0);
4922
4923            let current: (String, String) = conn
4924                .query_row(
4925                    "SELECT payload_json, last_mutation_id FROM operational_current \
4926                     WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
4927                    [],
4928                    |row| Ok((row.get(0)?, row.get(1)?)),
4929                )
4930                .expect("rebuilt current row");
4931            assert_eq!(current.0, "{\"status\":\"old\"}");
4932            assert_eq!(current.1, "m1");
4933        }
4934    }
4935
4936    #[test]
4937    fn restore_logical_id_reestablishes_last_pre_retire_content_and_attached_edges() {
4938        let (db, service) = setup();
4939        {
4940            let conn = sqlite::open_connection(db.path()).expect("conn");
4941            conn.execute(
4942                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4943                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
4944                [],
4945            )
4946            .expect("insert node");
4947            conn.execute(
4948                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4949                 VALUES ('node-row-topic', 'topic-1', 'Topic', '{}', 100, 'seed')",
4950                [],
4951            )
4952            .expect("insert target node");
4953            conn.execute(
4954                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
4955                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
4956                [],
4957            )
4958            .expect("insert chunk");
4959            conn.execute(
4960                "INSERT INTO edges \
4961                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
4962                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 'seed')",
4963                [],
4964            )
4965            .expect("insert edge");
4966            conn.execute(
4967                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
4968                 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
4969                [],
4970            )
4971            .expect("insert node retire event");
4972            conn.execute(
4973                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
4974                 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
4975                [],
4976            )
4977            .expect("insert edge retire event");
4978            conn.execute(
4979                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
4980                [],
4981            )
4982            .expect("retire node");
4983            conn.execute(
4984                "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
4985                [],
4986            )
4987            .expect("retire edge");
4988            conn.execute("DELETE FROM fts_nodes", [])
4989                .expect("clear fts");
4990        }
4991
4992        let report = service.restore_logical_id("doc-1").expect("restore");
4993        assert_eq!(report.logical_id, "doc-1");
4994        assert!(!report.was_noop);
4995        assert_eq!(report.restored_node_rows, 1);
4996        assert_eq!(report.restored_edge_rows, 1);
4997        assert_eq!(report.restored_chunk_rows, 1);
4998        assert_eq!(report.restored_fts_rows, 1);
4999
5000        let conn = sqlite::open_connection(db.path()).expect("conn");
5001        let active_node_count: i64 = conn
5002            .query_row(
5003                "SELECT count(*) FROM nodes WHERE logical_id = 'doc-1' AND superseded_at IS NULL",
5004                [],
5005                |row| row.get(0),
5006            )
5007            .expect("active node count");
5008        assert_eq!(active_node_count, 1);
5009        let active_edge_count: i64 = conn
5010            .query_row(
5011                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
5012                [],
5013                |row| row.get(0),
5014            )
5015            .expect("active edge count");
5016        assert_eq!(active_edge_count, 1);
5017        let fts_count: i64 = conn
5018            .query_row(
5019                "SELECT count(*) FROM fts_nodes WHERE chunk_id = 'chunk-1'",
5020                [],
5021                |row| row.get(0),
5022            )
5023            .expect("fts count");
5024        assert_eq!(fts_count, 1);
5025    }
5026
5027    #[test]
5028    fn restore_logical_id_restores_edges_retired_after_the_node_retire_event() {
5029        let (db, service) = setup();
5030        {
5031            let conn = sqlite::open_connection(db.path()).expect("conn");
5032            conn.execute(
5033                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5034                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
5035                [],
5036            )
5037            .expect("insert node");
5038            conn.execute(
5039                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5040                 VALUES ('node-row-topic', 'topic-1', 'Topic', '{}', 100, 'seed')",
5041                [],
5042            )
5043            .expect("insert target node");
5044            conn.execute(
5045                "INSERT INTO edges \
5046                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
5047                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 'seed')",
5048                [],
5049            )
5050            .expect("insert edge");
5051            conn.execute(
5052                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5053                 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5054                [],
5055            )
5056            .expect("insert node retire event");
5057            conn.execute(
5058                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5059                 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 201, '')",
5060                [],
5061            )
5062            .expect("insert edge retire event");
5063            conn.execute(
5064                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
5065                [],
5066            )
5067            .expect("retire node");
5068            conn.execute(
5069                "UPDATE edges SET superseded_at = 201 WHERE logical_id = 'edge-1'",
5070                [],
5071            )
5072            .expect("retire edge");
5073        }
5074
5075        let report = service.restore_logical_id("doc-1").expect("restore");
5076        assert_eq!(report.restored_edge_rows, 1);
5077
5078        let conn = sqlite::open_connection(db.path()).expect("conn");
5079        let active_edge_count: i64 = conn
5080            .query_row(
5081                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
5082                [],
5083                |row| row.get(0),
5084            )
5085            .expect("active edge count");
5086        assert_eq!(active_edge_count, 1);
5087    }
5088
5089    #[test]
5090    fn restore_logical_id_prefers_latest_retired_revision_when_timestamps_tie() {
5091        let (db, service) = setup();
5092        {
5093            let conn = sqlite::open_connection(db.path()).expect("conn");
5094            conn.execute(
5095                "INSERT INTO nodes \
5096                 (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5097                 VALUES ('node-row-older', 'doc-1', 'Document', '{\"title\":\"older\"}', 100, 200, 'forget-1')",
5098                [],
5099            )
5100            .expect("insert older retired node");
5101            conn.execute(
5102                "INSERT INTO nodes \
5103                 (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5104                 VALUES ('node-row-newer', 'doc-1', 'Document', '{\"title\":\"newer\"}', 100, 200, 'forget-1')",
5105                [],
5106            )
5107            .expect("insert newer retired node");
5108            conn.execute(
5109                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5110                 VALUES ('evt-retire-older', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5111                [],
5112            )
5113            .expect("insert older retire event");
5114            conn.execute(
5115                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5116                 VALUES ('evt-retire-newer', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5117                [],
5118            )
5119            .expect("insert newer retire event");
5120        }
5121
5122        let report = service.restore_logical_id("doc-1").expect("restore");
5123
5124        assert!(!report.was_noop);
5125        let conn = sqlite::open_connection(db.path()).expect("conn");
5126        let active_row: (String, String) = conn
5127            .query_row(
5128                "SELECT row_id, properties FROM nodes \
5129                 WHERE logical_id = 'doc-1' AND superseded_at IS NULL",
5130                [],
5131                |row| Ok((row.get(0)?, row.get(1)?)),
5132            )
5133            .expect("restored active row");
5134        assert_eq!(active_row.0, "node-row-newer");
5135        assert_eq!(active_row.1, "{\"title\":\"newer\"}");
5136    }
5137
5138    #[test]
5139    fn purge_logical_id_removes_retired_content_and_records_tombstone() {
5140        let (db, service) = setup();
5141        {
5142            let conn = sqlite::open_connection(db.path()).expect("conn");
5143            conn.execute(
5144                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5145                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
5146                [],
5147            )
5148            .expect("insert retired node");
5149            conn.execute(
5150                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5151                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5152                [],
5153            )
5154            .expect("insert chunk");
5155            conn.execute(
5156                "INSERT INTO edges \
5157                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, superseded_at, source_ref) \
5158                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 200, 'seed')",
5159                [],
5160            )
5161            .expect("insert retired edge");
5162            conn.execute(
5163                "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
5164                 VALUES ('chunk-1', 'doc-1', 'Document', 'budget narrative')",
5165                [],
5166            )
5167            .expect("insert fts");
5168        }
5169
5170        let report = service.purge_logical_id("doc-1").expect("purge");
5171        assert_eq!(report.logical_id, "doc-1");
5172        assert!(!report.was_noop);
5173        assert_eq!(report.deleted_node_rows, 1);
5174        assert_eq!(report.deleted_edge_rows, 1);
5175        assert_eq!(report.deleted_chunk_rows, 1);
5176        assert_eq!(report.deleted_fts_rows, 1);
5177
5178        let conn = sqlite::open_connection(db.path()).expect("conn");
5179        let remaining_nodes: i64 = conn
5180            .query_row(
5181                "SELECT count(*) FROM nodes WHERE logical_id = 'doc-1'",
5182                [],
5183                |row| row.get(0),
5184            )
5185            .expect("remaining nodes");
5186        assert_eq!(remaining_nodes, 0);
5187        let remaining_edges: i64 = conn
5188            .query_row(
5189                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1'",
5190                [],
5191                |row| row.get(0),
5192            )
5193            .expect("remaining edges");
5194        assert_eq!(remaining_edges, 0);
5195        let remaining_chunks: i64 = conn
5196            .query_row(
5197                "SELECT count(*) FROM chunks WHERE id = 'chunk-1'",
5198                [],
5199                |row| row.get(0),
5200            )
5201            .expect("remaining chunks");
5202        assert_eq!(remaining_chunks, 0);
5203        let purge_events: i64 = conn
5204            .query_row(
5205                "SELECT count(*) FROM provenance_events WHERE event_type = 'purge_logical_id' AND subject = 'doc-1'",
5206                [],
5207                |row| row.get(0),
5208            )
5209            .expect("purge events");
5210        assert_eq!(purge_events, 1);
5211    }
5212
5213    #[test]
5214    fn check_semantics_accepts_preserved_retired_chunks() {
5215        let (db, service) = setup();
5216        {
5217            let conn = sqlite::open_connection(db.path()).expect("conn");
5218            conn.execute(
5219                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5220                 VALUES ('node-row-1', 'doc-1', 'Document', '{}', 100, 200, 'seed')",
5221                [],
5222            )
5223            .expect("insert retired node");
5224            conn.execute(
5225                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5226                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5227                [],
5228            )
5229            .expect("insert chunk");
5230        }
5231
5232        let report = service.check_semantics().expect("semantics");
5233        assert_eq!(report.orphaned_chunks, 0);
5234    }
5235
5236    #[test]
5237    fn check_semantics_detects_missing_retired_node_history_for_preserved_chunks() {
5238        let (db, service) = setup();
5239        {
5240            let conn = sqlite::open_connection(db.path()).expect("conn");
5241            conn.execute(
5242                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5243                 VALUES ('chunk-1', 'ghost-doc', 'budget narrative', 100)",
5244                [],
5245            )
5246            .expect("insert orphaned chunk");
5247        }
5248
5249        let report = service.check_semantics().expect("semantics");
5250        assert_eq!(report.orphaned_chunks, 1);
5251    }
5252
5253    #[cfg(feature = "sqlite-vec")]
5254    #[test]
5255    fn check_semantics_detects_missing_retired_node_history_for_preserved_vec_rows() {
5256        let (db, service) = setup();
5257        {
5258            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5259            service
5260                .schema_manager
5261                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5262                .expect("ensure vec profile");
5263            conn.execute(
5264                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5265                 VALUES ('chunk-1', 'ghost-doc', 'budget narrative', 100)",
5266                [],
5267            )
5268            .expect("insert orphaned chunk");
5269            conn.execute(
5270                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
5271                [],
5272            )
5273            .expect("insert vec row");
5274        }
5275
5276        let report = service.check_semantics().expect("semantics");
5277        assert_eq!(report.orphaned_chunks, 1);
5278        assert_eq!(report.vec_rows_for_superseded_nodes, 1);
5279    }
5280
5281    #[cfg(feature = "sqlite-vec")]
5282    #[test]
5283    fn restore_logical_id_reestablishes_vector_search_without_reingest() {
5284        let (db, service) = setup();
5285        {
5286            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5287            service
5288                .schema_manager
5289                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5290                .expect("ensure vec profile");
5291            conn.execute(
5292                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5293                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
5294                [],
5295            )
5296            .expect("insert retired node");
5297            conn.execute(
5298                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5299                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5300                [],
5301            )
5302            .expect("insert chunk");
5303            conn.execute(
5304                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
5305                [],
5306            )
5307            .expect("insert vec row");
5308            conn.execute(
5309                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5310                 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5311                [],
5312            )
5313            .expect("insert retire event");
5314        }
5315
5316        let report = service.restore_logical_id("doc-1").expect("restore");
5317        assert_eq!(report.restored_vec_rows, 1);
5318
5319        let coordinator = ExecutionCoordinator::open(
5320            db.path(),
5321            Arc::new(SchemaManager::new()),
5322            Some(4),
5323            1,
5324            Arc::new(TelemetryCounters::default()),
5325        )
5326        .expect("coordinator");
5327        let compiled = QueryBuilder::nodes("Document")
5328            .vector_search("[0.0, 0.0, 0.0, 0.0]", 5)
5329            .compile()
5330            .expect("compile");
5331        let rows = coordinator
5332            .execute_compiled_read(&compiled)
5333            .expect("vector read");
5334        assert!(
5335            rows.nodes.iter().any(|row| row.logical_id == "doc-1"),
5336            "restore should make the preserved vec row visible again without re-ingest"
5337        );
5338    }
5339
5340    #[cfg(feature = "sqlite-vec")]
5341    #[test]
5342    fn purge_logical_id_deletes_vec_rows_for_retired_content() {
5343        let (db, service) = setup();
5344        {
5345            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5346            service
5347                .schema_manager
5348                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5349                .expect("ensure vec profile");
5350            conn.execute(
5351                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5352                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
5353                [],
5354            )
5355            .expect("insert retired node");
5356            conn.execute(
5357                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5358                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5359                [],
5360            )
5361            .expect("insert chunk");
5362            conn.execute(
5363                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
5364                [],
5365            )
5366            .expect("insert vec row");
5367        }
5368
5369        let report = service.purge_logical_id("doc-1").expect("purge");
5370        assert_eq!(report.deleted_vec_rows, 1);
5371
5372        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5373        let vec_count: i64 = conn
5374            .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
5375                row.get(0)
5376            })
5377            .expect("vec count");
5378        assert_eq!(vec_count, 0);
5379    }
5380
5381    #[cfg(feature = "sqlite-vec")]
5382    #[test]
5383    fn restore_logical_id_restores_visibility_of_regenerated_vectors() {
5384        let (db, service) = setup();
5385        let temp_dir = tempfile::tempdir().expect("temp dir");
5386        let script_path = temp_dir.path().join("vector-generator-restore.sh");
5387        fs::write(
5388            &script_path,
5389            r#"#!/usr/bin/env bash
5390set -euo pipefail
5391python3 -c 'import json, sys
5392payload = json.load(sys.stdin)
5393json.dump({"embeddings": [{"chunk_id": payload["chunks"][0]["chunk_id"], "embedding": [0.0, 0.0, 0.0, 0.0]}]}, sys.stdout)'
5394"#,
5395        )
5396        .expect("write script");
5397        set_file_mode(&script_path, 0o755);
5398
5399        {
5400            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5401            service
5402                .schema_manager
5403                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5404                .expect("ensure vec profile");
5405            conn.execute(
5406                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5407                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
5408                [],
5409            )
5410            .expect("insert node");
5411            conn.execute(
5412                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5413                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5414                [],
5415            )
5416            .expect("insert chunk");
5417        }
5418
5419        service
5420            .regenerate_vector_embeddings(&VectorRegenerationConfig {
5421                profile: "default".to_owned(),
5422                table_name: "vec_nodes_active".to_owned(),
5423                model_identity: "model".to_owned(),
5424                model_version: "1.0.0".to_owned(),
5425                dimension: 4,
5426                normalization_policy: "l2".to_owned(),
5427                chunking_policy: "per_chunk".to_owned(),
5428                preprocessing_policy: "trim".to_owned(),
5429                generator_command: vec![script_path.to_string_lossy().to_string()],
5430            })
5431            .expect("regenerate");
5432
5433        {
5434            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5435            conn.execute(
5436                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5437                 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5438                [],
5439            )
5440            .expect("insert retire event");
5441            conn.execute(
5442                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
5443                [],
5444            )
5445            .expect("retire node");
5446        }
5447
5448        let report = service.restore_logical_id("doc-1").expect("restore");
5449        assert_eq!(report.restored_vec_rows, 1);
5450
5451        let coordinator = ExecutionCoordinator::open(
5452            db.path(),
5453            Arc::new(SchemaManager::new()),
5454            Some(4),
5455            1,
5456            Arc::new(TelemetryCounters::default()),
5457        )
5458        .expect("coordinator");
5459        let compiled = QueryBuilder::nodes("Document")
5460            .vector_search("[0.0, 0.0, 0.0, 0.0]", 5)
5461            .compile()
5462            .expect("compile");
5463        let rows = coordinator
5464            .execute_compiled_read(&compiled)
5465            .expect("vector read");
5466        assert!(
5467            rows.nodes.iter().any(|row| row.logical_id == "doc-1"),
5468            "restored logical_id should become visible through regenerated vectors"
5469        );
5470    }
5471
5472    #[test]
5473    fn check_semantics_clean_db_returns_zeros() {
5474        let (_db, service) = setup();
5475        let report = service.check_semantics().expect("semantics check");
5476        assert_eq!(report.orphaned_chunks, 0);
5477        assert_eq!(report.null_source_ref_nodes, 0);
5478        assert_eq!(report.broken_step_fk, 0);
5479        assert_eq!(report.broken_action_fk, 0);
5480        assert_eq!(report.stale_fts_rows, 0);
5481        assert_eq!(report.fts_rows_for_superseded_nodes, 0);
5482        assert_eq!(report.dangling_edges, 0);
5483        assert_eq!(report.orphaned_supersession_chains, 0);
5484        assert_eq!(report.stale_vec_rows, 0);
5485        assert_eq!(report.vec_rows_for_superseded_nodes, 0);
5486        assert_eq!(report.missing_operational_current_rows, 0);
5487        assert_eq!(report.stale_operational_current_rows, 0);
5488        assert_eq!(report.disabled_collection_mutations, 0);
5489        assert_eq!(report.mismatched_kind_property_fts_rows, 0);
5490        assert_eq!(report.duplicate_property_fts_rows, 0);
5491        assert_eq!(report.drifted_property_fts_rows, 0);
5492        assert!(report.warnings.is_empty());
5493    }
5494
5495    #[test]
5496    fn register_operational_collection_persists_and_emits_provenance() {
5497        let (db, service) = setup();
5498        let record = service
5499            .register_operational_collection(&OperationalRegisterRequest {
5500                name: "connector_health".to_owned(),
5501                kind: OperationalCollectionKind::LatestState,
5502                schema_json: "{}".to_owned(),
5503                retention_json: "{}".to_owned(),
5504                filter_fields_json: "[]".to_owned(),
5505                validation_json: String::new(),
5506                secondary_indexes_json: "[]".to_owned(),
5507                format_version: 1,
5508            })
5509            .expect("register collection");
5510
5511        assert_eq!(record.name, "connector_health");
5512        assert_eq!(record.kind, OperationalCollectionKind::LatestState);
5513        assert_eq!(record.schema_json, "{}");
5514        assert_eq!(record.retention_json, "{}");
5515        assert_eq!(record.filter_fields_json, "[]");
5516        assert!(record.created_at > 0);
5517        assert_eq!(record.disabled_at, None);
5518
5519        let described = service
5520            .describe_operational_collection("connector_health")
5521            .expect("describe collection")
5522            .expect("collection exists");
5523        assert_eq!(described, record);
5524
5525        let conn = sqlite::open_connection(db.path()).expect("conn");
5526        let provenance_count: i64 = conn
5527            .query_row(
5528                "SELECT count(*) FROM provenance_events \
5529                 WHERE event_type = 'operational_collection_registered' AND subject = 'connector_health'",
5530                [],
5531                |row| row.get(0),
5532            )
5533            .expect("provenance count");
5534        assert_eq!(provenance_count, 1);
5535    }
5536
5537    #[test]
5538    fn register_and_update_operational_collection_validation_round_trip() {
5539        let (db, service) = setup();
5540        let record = service
5541            .register_operational_collection(&OperationalRegisterRequest {
5542                name: "connector_health".to_owned(),
5543                kind: OperationalCollectionKind::LatestState,
5544                schema_json: "{}".to_owned(),
5545                retention_json: "{}".to_owned(),
5546                filter_fields_json: "[]".to_owned(),
5547                validation_json: String::new(),
5548                secondary_indexes_json: "[]".to_owned(),
5549                format_version: 1,
5550            })
5551            .expect("register collection");
5552        assert_eq!(record.validation_json, "");
5553
5554        let validation_json = r#"{"format_version":1,"mode":"enforce","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#;
5555        let updated = service
5556            .update_operational_collection_validation("connector_health", validation_json)
5557            .expect("update validation");
5558        assert_eq!(updated.validation_json, validation_json);
5559
5560        let described = service
5561            .describe_operational_collection("connector_health")
5562            .expect("describe collection")
5563            .expect("collection exists");
5564        assert_eq!(described.validation_json, validation_json);
5565
5566        let conn = sqlite::open_connection(db.path()).expect("conn");
5567        let provenance_count: i64 = conn
5568            .query_row(
5569                "SELECT count(*) FROM provenance_events \
5570                 WHERE event_type = 'operational_collection_validation_updated' \
5571                   AND subject = 'connector_health'",
5572                [],
5573                |row| row.get(0),
5574            )
5575            .expect("provenance count");
5576        assert_eq!(provenance_count, 1);
5577    }
5578
5579    #[test]
5580    fn register_update_and_rebuild_operational_secondary_indexes_round_trip() {
5581        let (db, service) = setup();
5582        let record = service
5583            .register_operational_collection(&OperationalRegisterRequest {
5584                name: "audit_log".to_owned(),
5585                kind: OperationalCollectionKind::AppendOnlyLog,
5586                schema_json: "{}".to_owned(),
5587                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
5588                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
5589                validation_json: String::new(),
5590                secondary_indexes_json: "[]".to_owned(),
5591                format_version: 1,
5592            })
5593            .expect("register collection");
5594        assert_eq!(record.secondary_indexes_json, "[]");
5595
5596        {
5597            let writer = crate::WriterActor::start(
5598                db.path(),
5599                Arc::new(SchemaManager::new()),
5600                crate::ProvenanceMode::Warn,
5601                Arc::new(crate::TelemetryCounters::default()),
5602            )
5603            .expect("writer");
5604            writer
5605                .submit(crate::WriteRequest {
5606                    label: "secondary-index-seed".to_owned(),
5607                    nodes: vec![],
5608                    node_retires: vec![],
5609                    edges: vec![],
5610                    edge_retires: vec![],
5611                    chunks: vec![],
5612                    runs: vec![],
5613                    steps: vec![],
5614                    actions: vec![],
5615                    optional_backfills: vec![],
5616                    vec_inserts: vec![],
5617                    operational_writes: vec![
5618                        crate::OperationalWrite::Append {
5619                            collection: "audit_log".to_owned(),
5620                            record_key: "evt-1".to_owned(),
5621                            payload_json: r#"{"actor":"alice","ts":100}"#.to_owned(),
5622                            source_ref: Some("src-1".to_owned()),
5623                        },
5624                        crate::OperationalWrite::Append {
5625                            collection: "audit_log".to_owned(),
5626                            record_key: "evt-2".to_owned(),
5627                            payload_json: r#"{"actor":"bob","ts":200}"#.to_owned(),
5628                            source_ref: Some("src-2".to_owned()),
5629                        },
5630                    ],
5631                })
5632                .expect("seed writes");
5633        }
5634
5635        let secondary_indexes_json = r#"[{"name":"actor_ts","kind":"append_only_field_time","field":"actor","value_type":"string","time_field":"ts"}]"#;
5636        let updated = service
5637            .update_operational_collection_secondary_indexes("audit_log", secondary_indexes_json)
5638            .expect("update secondary indexes");
5639        assert_eq!(updated.secondary_indexes_json, secondary_indexes_json);
5640
5641        let conn = sqlite::open_connection(db.path()).expect("conn");
5642        let entry_count: i64 = conn
5643            .query_row(
5644                "SELECT count(*) FROM operational_secondary_index_entries \
5645                 WHERE collection_name = 'audit_log' AND index_name = 'actor_ts'",
5646                [],
5647                |row| row.get(0),
5648            )
5649            .expect("secondary index count");
5650        assert_eq!(entry_count, 2);
5651        conn.execute(
5652            "DELETE FROM operational_secondary_index_entries WHERE collection_name = 'audit_log'",
5653            [],
5654        )
5655        .expect("clear index entries");
5656        drop(conn);
5657
5658        let rebuild = service
5659            .rebuild_operational_secondary_indexes("audit_log")
5660            .expect("rebuild secondary indexes");
5661        assert_eq!(rebuild.collection_name, "audit_log");
5662        assert_eq!(rebuild.mutation_entries_rebuilt, 2);
5663        assert_eq!(rebuild.current_entries_rebuilt, 0);
5664    }
5665
5666    #[test]
5667    fn register_operational_collection_rejects_invalid_validation_contract() {
5668        let (_db, service) = setup();
5669
5670        let error = service
5671            .register_operational_collection(&OperationalRegisterRequest {
5672                name: "connector_health".to_owned(),
5673                kind: OperationalCollectionKind::LatestState,
5674                schema_json: "{}".to_owned(),
5675                retention_json: "{}".to_owned(),
5676                filter_fields_json: "[]".to_owned(),
5677                validation_json: r#"{"format_version":1,"mode":"enforce","fields":[{"name":"status","type":"string","minimum":0}]}"#
5678                    .to_owned(),
5679                secondary_indexes_json: "[]".to_owned(),
5680                format_version: 1,
5681            })
5682            .expect_err("invalid validation contract should reject");
5683
5684        assert!(matches!(error, EngineError::InvalidWrite(_)));
5685        assert!(error.to_string().contains("minimum/maximum"));
5686    }
5687
5688    #[test]
5689    fn validate_operational_collection_history_reports_invalid_rows_without_mutation() {
5690        let (db, service) = setup();
5691        service
5692            .register_operational_collection(&OperationalRegisterRequest {
5693                name: "audit_log".to_owned(),
5694                kind: OperationalCollectionKind::AppendOnlyLog,
5695                schema_json: "{}".to_owned(),
5696                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
5697                filter_fields_json: "[]".to_owned(),
5698                validation_json: r#"{"format_version":1,"mode":"disabled","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#
5699                    .to_owned(),
5700                secondary_indexes_json: "[]".to_owned(),
5701                format_version: 1,
5702            })
5703            .expect("register collection");
5704        {
5705            let writer = crate::WriterActor::start(
5706                db.path(),
5707                Arc::new(SchemaManager::new()),
5708                crate::ProvenanceMode::Warn,
5709                Arc::new(crate::TelemetryCounters::default()),
5710            )
5711            .expect("writer");
5712            writer
5713                .submit(crate::WriteRequest {
5714                    label: "history-validation".to_owned(),
5715                    nodes: vec![],
5716                    node_retires: vec![],
5717                    edges: vec![],
5718                    edge_retires: vec![],
5719                    chunks: vec![],
5720                    runs: vec![],
5721                    steps: vec![],
5722                    actions: vec![],
5723                    optional_backfills: vec![],
5724                    vec_inserts: vec![],
5725                    operational_writes: vec![
5726                        crate::OperationalWrite::Append {
5727                            collection: "audit_log".to_owned(),
5728                            record_key: "evt-1".to_owned(),
5729                            payload_json: r#"{"status":"ok"}"#.to_owned(),
5730                            source_ref: Some("src-1".to_owned()),
5731                        },
5732                        crate::OperationalWrite::Append {
5733                            collection: "audit_log".to_owned(),
5734                            record_key: "evt-2".to_owned(),
5735                            payload_json: r#"{"status":"bogus"}"#.to_owned(),
5736                            source_ref: Some("src-2".to_owned()),
5737                        },
5738                    ],
5739                })
5740                .expect("write");
5741        }
5742
5743        let report = service
5744            .validate_operational_collection_history("audit_log")
5745            .expect("validate history");
5746        assert_eq!(report.collection_name, "audit_log");
5747        assert_eq!(report.checked_rows, 2);
5748        assert_eq!(report.invalid_row_count, 1);
5749        assert_eq!(report.issues.len(), 1);
5750        assert_eq!(report.issues[0].record_key, "evt-2");
5751        assert!(report.issues[0].message.contains("must be one of"));
5752
5753        let trace = service
5754            .trace_operational_collection("audit_log", None)
5755            .expect("trace");
5756        assert_eq!(trace.mutation_count, 2);
5757
5758        let conn = sqlite::open_connection(db.path()).expect("conn");
5759        let provenance_count: i64 = conn
5760            .query_row(
5761                "SELECT count(*) FROM provenance_events \
5762                 WHERE event_type = 'operational_collection_history_validated' \
5763                   AND subject = 'audit_log'",
5764                [],
5765                |row| row.get(0),
5766            )
5767            .expect("provenance count");
5768        assert_eq!(provenance_count, 0);
5769    }
5770
5771    #[test]
5772    fn trace_operational_collection_returns_mutations_and_current_rows() {
5773        let (db, service) = setup();
5774        service
5775            .register_operational_collection(&OperationalRegisterRequest {
5776                name: "connector_health".to_owned(),
5777                kind: OperationalCollectionKind::LatestState,
5778                schema_json: "{}".to_owned(),
5779                retention_json: "{}".to_owned(),
5780                filter_fields_json: "[]".to_owned(),
5781                validation_json: String::new(),
5782                secondary_indexes_json: "[]".to_owned(),
5783                format_version: 1,
5784            })
5785            .expect("register collection");
5786        {
5787            let writer = crate::WriterActor::start(
5788                db.path(),
5789                Arc::new(SchemaManager::new()),
5790                crate::ProvenanceMode::Warn,
5791                Arc::new(crate::TelemetryCounters::default()),
5792            )
5793            .expect("writer");
5794            writer
5795                .submit(crate::WriteRequest {
5796                    label: "operational".to_owned(),
5797                    nodes: vec![],
5798                    node_retires: vec![],
5799                    edges: vec![],
5800                    edge_retires: vec![],
5801                    chunks: vec![],
5802                    runs: vec![],
5803                    steps: vec![],
5804                    actions: vec![],
5805                    optional_backfills: vec![],
5806                    vec_inserts: vec![],
5807                    operational_writes: vec![crate::OperationalWrite::Put {
5808                        collection: "connector_health".to_owned(),
5809                        record_key: "gmail".to_owned(),
5810                        payload_json: r#"{"status":"ok"}"#.to_owned(),
5811                        source_ref: Some("src-1".to_owned()),
5812                    }],
5813                })
5814                .expect("write");
5815        }
5816
5817        let report = service
5818            .trace_operational_collection("connector_health", Some("gmail"))
5819            .expect("trace");
5820        assert_eq!(report.collection_name, "connector_health");
5821        assert_eq!(report.record_key.as_deref(), Some("gmail"));
5822        assert_eq!(report.mutation_count, 1);
5823        assert_eq!(report.current_count, 1);
5824        assert_eq!(report.mutations[0].op_kind, "put");
5825        assert_eq!(report.current_rows[0].payload_json, r#"{"status":"ok"}"#);
5826    }
5827
5828    #[test]
5829    fn trace_operational_collection_rejects_unknown_collection() {
5830        let (_db, service) = setup();
5831
5832        let error = service
5833            .trace_operational_collection("missing_collection", None)
5834            .expect_err("unknown collection should fail");
5835
5836        assert!(matches!(error, EngineError::InvalidWrite(_)));
5837        assert!(error.to_string().contains("is not registered"));
5838    }
5839
5840    #[test]
5841    fn rebuild_operational_current_repairs_missing_latest_state_rows() {
5842        let (db, service) = setup();
5843        service
5844            .register_operational_collection(&OperationalRegisterRequest {
5845                name: "connector_health".to_owned(),
5846                kind: OperationalCollectionKind::LatestState,
5847                schema_json: "{}".to_owned(),
5848                retention_json: "{}".to_owned(),
5849                filter_fields_json: "[]".to_owned(),
5850                validation_json: String::new(),
5851                secondary_indexes_json: "[]".to_owned(),
5852                format_version: 1,
5853            })
5854            .expect("register collection");
5855        {
5856            let writer = crate::WriterActor::start(
5857                db.path(),
5858                Arc::new(SchemaManager::new()),
5859                crate::ProvenanceMode::Warn,
5860                Arc::new(crate::TelemetryCounters::default()),
5861            )
5862            .expect("writer");
5863            writer
5864                .submit(crate::WriteRequest {
5865                    label: "operational".to_owned(),
5866                    nodes: vec![],
5867                    node_retires: vec![],
5868                    edges: vec![],
5869                    edge_retires: vec![],
5870                    chunks: vec![],
5871                    runs: vec![],
5872                    steps: vec![],
5873                    actions: vec![],
5874                    optional_backfills: vec![],
5875                    vec_inserts: vec![],
5876                    operational_writes: vec![crate::OperationalWrite::Put {
5877                        collection: "connector_health".to_owned(),
5878                        record_key: "gmail".to_owned(),
5879                        payload_json: r#"{"status":"ok"}"#.to_owned(),
5880                        source_ref: Some("src-1".to_owned()),
5881                    }],
5882                })
5883                .expect("write");
5884        }
5885        {
5886            let conn = sqlite::open_connection(db.path()).expect("conn");
5887            conn.execute(
5888                "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5889                [],
5890            )
5891            .expect("delete current row");
5892        }
5893
5894        let before = service.check_semantics().expect("semantics before rebuild");
5895        assert_eq!(before.missing_operational_current_rows, 1);
5896
5897        let repair = service
5898            .rebuild_operational_current(Some("connector_health"))
5899            .expect("rebuild current");
5900        assert_eq!(repair.collections_rebuilt, 1);
5901        assert_eq!(repair.current_rows_rebuilt, 1);
5902
5903        let after = service.check_semantics().expect("semantics after rebuild");
5904        assert_eq!(after.missing_operational_current_rows, 0);
5905
5906        let conn = sqlite::open_connection(db.path()).expect("conn");
5907        let payload: String = conn
5908            .query_row(
5909                "SELECT payload_json FROM operational_current \
5910                 WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5911                [],
5912                |row| row.get(0),
5913            )
5914            .expect("restored payload");
5915        assert_eq!(payload, r#"{"status":"ok"}"#);
5916    }
5917
5918    #[test]
5919    fn rebuild_operational_current_restores_latest_state_secondary_index_entries() {
5920        let (db, service) = setup();
5921        service
5922            .register_operational_collection(&OperationalRegisterRequest {
5923                name: "connector_health".to_owned(),
5924                kind: OperationalCollectionKind::LatestState,
5925                schema_json: "{}".to_owned(),
5926                retention_json: "{}".to_owned(),
5927                filter_fields_json: "[]".to_owned(),
5928                validation_json: String::new(),
5929                secondary_indexes_json: r#"[{"name":"status_current","kind":"latest_state_field","field":"status","value_type":"string"}]"#.to_owned(),
5930                format_version: 1,
5931            })
5932            .expect("register collection");
5933        {
5934            let writer = crate::WriterActor::start(
5935                db.path(),
5936                Arc::new(SchemaManager::new()),
5937                crate::ProvenanceMode::Warn,
5938                Arc::new(crate::TelemetryCounters::default()),
5939            )
5940            .expect("writer");
5941            writer
5942                .submit(crate::WriteRequest {
5943                    label: "operational".to_owned(),
5944                    nodes: vec![],
5945                    node_retires: vec![],
5946                    edges: vec![],
5947                    edge_retires: vec![],
5948                    chunks: vec![],
5949                    runs: vec![],
5950                    steps: vec![],
5951                    actions: vec![],
5952                    optional_backfills: vec![],
5953                    vec_inserts: vec![],
5954                    operational_writes: vec![crate::OperationalWrite::Put {
5955                        collection: "connector_health".to_owned(),
5956                        record_key: "gmail".to_owned(),
5957                        payload_json: r#"{"status":"ok"}"#.to_owned(),
5958                        source_ref: Some("src-1".to_owned()),
5959                    }],
5960                })
5961                .expect("write");
5962        }
5963        {
5964            let conn = sqlite::open_connection(db.path()).expect("conn");
5965            let entry_count: i64 = conn
5966                .query_row(
5967                    "SELECT count(*) FROM operational_secondary_index_entries \
5968                     WHERE collection_name = 'connector_health' AND subject_kind = 'current'",
5969                    [],
5970                    |row| row.get(0),
5971                )
5972                .expect("secondary index count before repair");
5973            assert_eq!(entry_count, 1);
5974            conn.execute(
5975                "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5976                [],
5977            )
5978            .expect("delete current row");
5979        }
5980
5981        service
5982            .rebuild_operational_current(Some("connector_health"))
5983            .expect("rebuild current");
5984
5985        let conn = sqlite::open_connection(db.path()).expect("conn");
5986        let entry_count: i64 = conn
5987            .query_row(
5988                "SELECT count(*) FROM operational_secondary_index_entries \
5989                 WHERE collection_name = 'connector_health' AND subject_kind = 'current'",
5990                [],
5991                |row| row.get(0),
5992            )
5993            .expect("secondary index count after repair");
5994        assert_eq!(entry_count, 1);
5995    }
5996
5997    #[test]
5998    fn operational_current_semantics_and_rebuild_follow_mutation_order() {
5999        let (db, service) = setup();
6000        {
6001            let conn = sqlite::open_connection(db.path()).expect("conn");
6002            conn.execute(
6003                "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6004                 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
6005                [],
6006            )
6007            .expect("seed collection");
6008            conn.execute(
6009                "INSERT INTO operational_mutations \
6010                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6011                 VALUES ('m3', 'connector_health', 'gmail', 'put', '{\"status\":\"old\"}', 'src-1', 100, 1)",
6012                [],
6013            )
6014            .expect("seed first put");
6015            conn.execute(
6016                "INSERT INTO operational_mutations \
6017                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6018                 VALUES ('m2', 'connector_health', 'gmail', 'delete', '', 'src-2', 100, 2)",
6019                [],
6020            )
6021            .expect("seed delete");
6022            conn.execute(
6023                "INSERT INTO operational_mutations \
6024                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6025                 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"new\"}', 'src-3', 100, 3)",
6026                [],
6027            )
6028            .expect("seed final put");
6029            conn.execute(
6030                "INSERT INTO operational_current \
6031                 (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
6032                 VALUES ('connector_health', 'gmail', '{\"status\":\"new\"}', 100, 'm1')",
6033                [],
6034            )
6035            .expect("seed current");
6036        }
6037
6038        let before = service.check_semantics().expect("semantics before rebuild");
6039        assert_eq!(before.missing_operational_current_rows, 0);
6040        assert_eq!(before.stale_operational_current_rows, 0);
6041
6042        {
6043            let conn = sqlite::open_connection(db.path()).expect("conn");
6044            conn.execute(
6045                "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6046                [],
6047            )
6048            .expect("delete current row");
6049        }
6050
6051        let missing = service.check_semantics().expect("semantics after delete");
6052        assert_eq!(missing.missing_operational_current_rows, 1);
6053        assert_eq!(missing.stale_operational_current_rows, 0);
6054
6055        service
6056            .rebuild_operational_current(Some("connector_health"))
6057            .expect("rebuild current");
6058
6059        let after = service.check_semantics().expect("semantics after rebuild");
6060        assert_eq!(after.missing_operational_current_rows, 0);
6061        assert_eq!(after.stale_operational_current_rows, 0);
6062
6063        let conn = sqlite::open_connection(db.path()).expect("conn");
6064        let payload: String = conn
6065            .query_row(
6066                "SELECT payload_json FROM operational_current \
6067                 WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6068                [],
6069                |row| row.get(0),
6070            )
6071            .expect("restored payload");
6072        assert_eq!(payload, r#"{"status":"new"}"#);
6073    }
6074
6075    #[test]
6076    fn disable_operational_collection_sets_disabled_at_and_emits_provenance() {
6077        let (db, service) = setup();
6078        service
6079            .register_operational_collection(&OperationalRegisterRequest {
6080                name: "audit_log".to_owned(),
6081                kind: OperationalCollectionKind::AppendOnlyLog,
6082                schema_json: "{}".to_owned(),
6083                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6084                filter_fields_json: "[]".to_owned(),
6085                validation_json: String::new(),
6086                secondary_indexes_json: "[]".to_owned(),
6087                format_version: 1,
6088            })
6089            .expect("register collection");
6090
6091        let record = service
6092            .disable_operational_collection("audit_log")
6093            .expect("disable collection");
6094        assert_eq!(record.name, "audit_log");
6095        assert!(record.disabled_at.is_some());
6096
6097        let disabled_at = record.disabled_at.expect("disabled_at");
6098        let described = service
6099            .describe_operational_collection("audit_log")
6100            .expect("describe collection")
6101            .expect("collection exists");
6102        assert_eq!(described.disabled_at, Some(disabled_at));
6103
6104        let writer = crate::WriterActor::start(
6105            db.path(),
6106            Arc::new(SchemaManager::new()),
6107            crate::ProvenanceMode::Warn,
6108            Arc::new(crate::TelemetryCounters::default()),
6109        )
6110        .expect("writer");
6111        let error = writer
6112            .submit(crate::WriteRequest {
6113                label: "disabled-operational".to_owned(),
6114                nodes: vec![],
6115                node_retires: vec![],
6116                edges: vec![],
6117                edge_retires: vec![],
6118                chunks: vec![],
6119                runs: vec![],
6120                steps: vec![],
6121                actions: vec![],
6122                optional_backfills: vec![],
6123                vec_inserts: vec![],
6124                operational_writes: vec![crate::OperationalWrite::Append {
6125                    collection: "audit_log".to_owned(),
6126                    record_key: "evt-1".to_owned(),
6127                    payload_json: r#"{"type":"sync"}"#.to_owned(),
6128                    source_ref: Some("src-1".to_owned()),
6129                }],
6130            })
6131            .expect_err("disabled collection should reject writes");
6132        assert!(matches!(error, EngineError::InvalidWrite(_)));
6133        assert!(error.to_string().contains("is disabled"));
6134
6135        let conn = sqlite::open_connection(db.path()).expect("conn");
6136        let provenance_count: i64 = conn
6137            .query_row(
6138                "SELECT count(*) FROM provenance_events \
6139                 WHERE event_type = 'operational_collection_disabled' AND subject = 'audit_log'",
6140                [],
6141                |row| row.get(0),
6142            )
6143            .expect("provenance count");
6144        assert_eq!(provenance_count, 1);
6145    }
6146
6147    #[test]
6148    fn purge_operational_collection_deletes_append_only_rows_before_cutoff() {
6149        let (db, service) = setup();
6150        {
6151            let conn = sqlite::open_connection(db.path()).expect("conn");
6152            conn.execute(
6153                "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6154                 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_all\"}', 1, 100)",
6155                [],
6156            )
6157            .expect("seed collection");
6158            conn.execute(
6159                "INSERT INTO operational_mutations \
6160                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6161                 VALUES ('evt-1', 'audit_log', 'evt-1', 'append', '{\"seq\":1}', 'src-1', 100, 1)",
6162                [],
6163            )
6164            .expect("seed event 1");
6165            conn.execute(
6166                "INSERT INTO operational_mutations \
6167                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6168                 VALUES ('evt-2', 'audit_log', 'evt-2', 'append', '{\"seq\":2}', 'src-2', 200, 2)",
6169                [],
6170            )
6171            .expect("seed event 2");
6172            conn.execute(
6173                "INSERT INTO operational_mutations \
6174                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6175                 VALUES ('evt-3', 'audit_log', 'evt-3', 'append', '{\"seq\":3}', 'src-3', 300, 3)",
6176                [],
6177            )
6178            .expect("seed event 3");
6179        }
6180
6181        let report = service
6182            .purge_operational_collection("audit_log", 250)
6183            .expect("purge collection");
6184        assert_eq!(report.collection_name, "audit_log");
6185        assert_eq!(report.deleted_mutations, 2);
6186        assert_eq!(report.before_timestamp, 250);
6187
6188        let conn = sqlite::open_connection(db.path()).expect("conn");
6189        let remaining: Vec<String> = {
6190            let mut stmt = conn
6191                .prepare(
6192                    "SELECT id FROM operational_mutations \
6193                     WHERE collection_name = 'audit_log' ORDER BY mutation_order",
6194                )
6195                .expect("stmt");
6196            stmt.query_map([], |row| row.get(0))
6197                .expect("rows")
6198                .collect::<Result<_, _>>()
6199                .expect("collect")
6200        };
6201        assert_eq!(remaining, vec!["evt-3".to_owned()]);
6202        let provenance_count: i64 = conn
6203            .query_row(
6204                "SELECT count(*) FROM provenance_events \
6205                 WHERE event_type = 'operational_collection_purged' AND subject = 'audit_log'",
6206                [],
6207                |row| row.get(0),
6208            )
6209            .expect("provenance count");
6210        assert_eq!(provenance_count, 1);
6211    }
6212
6213    #[test]
6214    fn compact_operational_collection_dry_run_reports_without_mutation() {
6215        let (db, service) = setup();
6216        {
6217            let conn = sqlite::open_connection(db.path()).expect("conn");
6218            conn.execute(
6219                "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6220                 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6221                [],
6222            )
6223            .expect("seed collection");
6224            for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
6225                conn.execute(
6226                    "INSERT INTO operational_mutations \
6227                     (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6228                     VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6229                    rusqlite::params![
6230                        format!("evt-{index}"),
6231                        format!("{{\"seq\":{index}}}"),
6232                        created_at,
6233                        index,
6234                    ],
6235                )
6236                .expect("seed event");
6237            }
6238        }
6239
6240        let report = service
6241            .compact_operational_collection("audit_log", true)
6242            .expect("compact collection");
6243        assert_eq!(report.collection_name, "audit_log");
6244        assert_eq!(report.deleted_mutations, 1);
6245        assert!(report.dry_run);
6246        assert_eq!(report.before_timestamp, None);
6247
6248        let conn = sqlite::open_connection(db.path()).expect("conn");
6249        let remaining_count: i64 = conn
6250            .query_row(
6251                "SELECT count(*) FROM operational_mutations WHERE collection_name = 'audit_log'",
6252                [],
6253                |row| row.get(0),
6254            )
6255            .expect("remaining count");
6256        assert_eq!(remaining_count, 3);
6257        let provenance_count: i64 = conn
6258            .query_row(
6259                "SELECT count(*) FROM provenance_events \
6260                 WHERE event_type = 'operational_collection_compacted' AND subject = 'audit_log'",
6261                [],
6262                |row| row.get(0),
6263            )
6264            .expect("provenance count");
6265        assert_eq!(provenance_count, 0);
6266    }
6267
6268    #[test]
6269    fn compact_operational_collection_keep_last_deletes_oldest_rows() {
6270        let (db, service) = setup();
6271        {
6272            let conn = sqlite::open_connection(db.path()).expect("conn");
6273            conn.execute(
6274                "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6275                 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6276                [],
6277            )
6278            .expect("seed collection");
6279            for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
6280                conn.execute(
6281                    "INSERT INTO operational_mutations \
6282                     (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6283                     VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6284                    rusqlite::params![
6285                        format!("evt-{index}"),
6286                        format!("{{\"seq\":{index}}}"),
6287                        created_at,
6288                        index,
6289                    ],
6290                )
6291                .expect("seed event");
6292            }
6293        }
6294
6295        let report = service
6296            .compact_operational_collection("audit_log", false)
6297            .expect("compact collection");
6298        assert_eq!(report.deleted_mutations, 1);
6299        assert!(!report.dry_run);
6300
6301        let conn = sqlite::open_connection(db.path()).expect("conn");
6302        let remaining: Vec<String> = {
6303            let mut stmt = conn
6304                .prepare(
6305                    "SELECT id FROM operational_mutations \
6306                     WHERE collection_name = 'audit_log' ORDER BY mutation_order",
6307                )
6308                .expect("stmt");
6309            stmt.query_map([], |row| row.get(0))
6310                .expect("rows")
6311                .collect::<Result<_, _>>()
6312                .expect("collect")
6313        };
6314        assert_eq!(remaining, vec!["evt-2".to_owned(), "evt-3".to_owned()]);
6315        let provenance_count: i64 = conn
6316            .query_row(
6317                "SELECT count(*) FROM provenance_events \
6318                 WHERE event_type = 'operational_collection_compacted' AND subject = 'audit_log'",
6319                [],
6320                |row| row.get(0),
6321            )
6322            .expect("provenance count");
6323        assert_eq!(provenance_count, 1);
6324    }
6325
6326    #[test]
6327    fn plan_and_run_operational_retention_keep_last() {
6328        let (db, service) = setup();
6329        {
6330            let conn = sqlite::open_connection(db.path()).expect("conn");
6331            conn.execute(
6332                "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6333                 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6334                [],
6335            )
6336            .expect("seed collection");
6337            for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
6338                conn.execute(
6339                    "INSERT INTO operational_mutations \
6340                     (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6341                     VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6342                    rusqlite::params![
6343                        format!("evt-{index}"),
6344                        format!("{{\"seq\":{index}}}"),
6345                        created_at,
6346                        index,
6347                    ],
6348                )
6349                .expect("seed event");
6350            }
6351        }
6352
6353        let plan = service
6354            .plan_operational_retention(1_000, None, Some(10))
6355            .expect("plan retention");
6356        assert_eq!(plan.collections_examined, 1);
6357        assert_eq!(plan.items[0].collection_name, "audit_log");
6358        assert_eq!(
6359            plan.items[0].action_kind,
6360            crate::operational::OperationalRetentionActionKind::KeepLast
6361        );
6362        assert_eq!(plan.items[0].candidate_deletions, 1);
6363        assert_eq!(plan.items[0].max_rows, Some(2));
6364        assert_eq!(plan.items[0].last_run_at, None);
6365
6366        let dry_run = service
6367            .run_operational_retention(1_000, None, Some(10), true)
6368            .expect("dry-run retention");
6369        assert!(dry_run.dry_run);
6370        assert_eq!(dry_run.collections_acted_on, 1);
6371        assert_eq!(dry_run.items[0].deleted_mutations, 1);
6372        assert_eq!(dry_run.items[0].rows_remaining, 2);
6373
6374        let conn = sqlite::open_connection(db.path()).expect("conn");
6375        let remaining_count: i64 = conn
6376            .query_row(
6377                "SELECT count(*) FROM operational_mutations WHERE collection_name = 'audit_log'",
6378                [],
6379                |row| row.get(0),
6380            )
6381            .expect("remaining count after dry run");
6382        assert_eq!(remaining_count, 3);
6383        let retention_run_count: i64 = conn
6384            .query_row(
6385                "SELECT count(*) FROM operational_retention_runs WHERE collection_name = 'audit_log'",
6386                [],
6387                |row| row.get(0),
6388            )
6389            .expect("retention run count");
6390        assert_eq!(retention_run_count, 0);
6391        drop(conn);
6392
6393        let executed = service
6394            .run_operational_retention(1_000, None, Some(10), false)
6395            .expect("execute retention");
6396        assert_eq!(executed.collections_acted_on, 1);
6397        assert_eq!(executed.items[0].deleted_mutations, 1);
6398        assert_eq!(executed.items[0].rows_remaining, 2);
6399
6400        let conn = sqlite::open_connection(db.path()).expect("conn");
6401        let remaining: Vec<String> = {
6402            let mut stmt = conn
6403                .prepare(
6404                    "SELECT id FROM operational_mutations \
6405                     WHERE collection_name = 'audit_log' ORDER BY mutation_order",
6406                )
6407                .expect("stmt");
6408            stmt.query_map([], |row| row.get(0))
6409                .expect("rows")
6410                .collect::<Result<_, _>>()
6411                .expect("collect")
6412        };
6413        assert_eq!(remaining, vec!["evt-2".to_owned(), "evt-3".to_owned()]);
6414        let last_run_at: i64 = conn
6415            .query_row(
6416                "SELECT executed_at FROM operational_retention_runs \
6417                 WHERE collection_name = 'audit_log' ORDER BY executed_at DESC LIMIT 1",
6418                [],
6419                |row| row.get(0),
6420            )
6421            .expect("last run at");
6422        assert_eq!(last_run_at, 1_000);
6423    }
6424
6425    #[test]
6426    fn dry_run_operational_retention_does_not_mark_noop_collection_as_acted_on() {
6427        let (db, service) = setup();
6428        let conn = sqlite::open_connection(db.path()).expect("conn");
6429        conn.execute(
6430            "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6431             VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6432            [],
6433        )
6434        .expect("seed collection");
6435        for (index, created_at) in [(1_i64, 100_i64), (2, 200)] {
6436            conn.execute(
6437                "INSERT INTO operational_mutations \
6438                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6439                 VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6440                rusqlite::params![
6441                    format!("evt-{index}"),
6442                    format!("{{\"seq\":{index}}}"),
6443                    created_at,
6444                    index,
6445                ],
6446            )
6447            .expect("seed event");
6448        }
6449        drop(conn);
6450
6451        let dry_run = service
6452            .run_operational_retention(1_000, None, Some(10), true)
6453            .expect("dry-run retention");
6454        assert!(dry_run.dry_run);
6455        assert_eq!(dry_run.collections_acted_on, 0);
6456        assert_eq!(dry_run.items[0].deleted_mutations, 0);
6457        assert_eq!(dry_run.items[0].rows_remaining, 2);
6458    }
6459
6460    #[test]
6461    fn compact_operational_collection_rejects_latest_state() {
6462        let (_db, service) = setup();
6463        service
6464            .register_operational_collection(&OperationalRegisterRequest {
6465                name: "connector_health".to_owned(),
6466                kind: OperationalCollectionKind::LatestState,
6467                schema_json: "{}".to_owned(),
6468                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6469                filter_fields_json: "[]".to_owned(),
6470                validation_json: String::new(),
6471                secondary_indexes_json: "[]".to_owned(),
6472                format_version: 1,
6473            })
6474            .expect("register collection");
6475
6476        let error = service
6477            .compact_operational_collection("connector_health", false)
6478            .expect_err("latest_state compaction should be rejected");
6479        assert!(matches!(error, EngineError::InvalidWrite(_)));
6480        assert!(error.to_string().contains("append_only_log"));
6481    }
6482
6483    #[test]
6484    fn register_operational_collection_persists_filter_fields_json() {
6485        let (_db, service) = setup();
6486
6487        let record = service
6488            .register_operational_collection(&OperationalRegisterRequest {
6489                name: "audit_log".to_owned(),
6490                kind: OperationalCollectionKind::AppendOnlyLog,
6491                schema_json: "{}".to_owned(),
6492                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6493                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
6494                validation_json: String::new(),
6495                secondary_indexes_json: "[]".to_owned(),
6496                format_version: 1,
6497            })
6498            .expect("register collection");
6499
6500        assert_eq!(
6501            record.filter_fields_json,
6502            r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#
6503        );
6504    }
6505
6506    #[test]
6507    fn read_operational_collection_filters_append_only_rows_by_declared_fields() {
6508        let (db, service) = setup();
6509        service
6510            .register_operational_collection(&OperationalRegisterRequest {
6511                name: "audit_log".to_owned(),
6512                kind: OperationalCollectionKind::AppendOnlyLog,
6513                schema_json: "{}".to_owned(),
6514                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6515                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"seq","type":"integer","modes":["exact","range"]},{"name":"ts","type":"timestamp","modes":["exact","range"]}]"#.to_owned(),
6516                validation_json: String::new(),
6517                secondary_indexes_json: "[]".to_owned(),
6518                format_version: 1,
6519            })
6520            .expect("register collection");
6521        {
6522            let writer = crate::WriterActor::start(
6523                db.path(),
6524                Arc::new(SchemaManager::new()),
6525                crate::ProvenanceMode::Warn,
6526                Arc::new(crate::TelemetryCounters::default()),
6527            )
6528            .expect("writer");
6529            writer
6530                .submit(crate::WriteRequest {
6531                    label: "operational".to_owned(),
6532                    nodes: vec![],
6533                    node_retires: vec![],
6534                    edges: vec![],
6535                    edge_retires: vec![],
6536                    chunks: vec![],
6537                    runs: vec![],
6538                    steps: vec![],
6539                    actions: vec![],
6540                    optional_backfills: vec![],
6541                    vec_inserts: vec![],
6542                    operational_writes: vec![
6543                        crate::OperationalWrite::Append {
6544                            collection: "audit_log".to_owned(),
6545                            record_key: "evt-1".to_owned(),
6546                            payload_json: r#"{"actor":"alice","seq":1,"ts":100}"#.to_owned(),
6547                            source_ref: Some("src-1".to_owned()),
6548                        },
6549                        crate::OperationalWrite::Append {
6550                            collection: "audit_log".to_owned(),
6551                            record_key: "evt-2".to_owned(),
6552                            payload_json: r#"{"actor":"alice-admin","seq":2,"ts":200}"#.to_owned(),
6553                            source_ref: Some("src-2".to_owned()),
6554                        },
6555                        crate::OperationalWrite::Append {
6556                            collection: "audit_log".to_owned(),
6557                            record_key: "evt-3".to_owned(),
6558                            payload_json: r#"{"actor":"bob","seq":3,"ts":300}"#.to_owned(),
6559                            source_ref: Some("src-3".to_owned()),
6560                        },
6561                    ],
6562                })
6563                .expect("write");
6564        }
6565
6566        let report = service
6567            .read_operational_collection(&crate::operational::OperationalReadRequest {
6568                collection_name: "audit_log".to_owned(),
6569                filters: vec![
6570                    crate::operational::OperationalFilterClause::Prefix {
6571                        field: "actor".to_owned(),
6572                        value: "alice".to_owned(),
6573                    },
6574                    crate::operational::OperationalFilterClause::Range {
6575                        field: "ts".to_owned(),
6576                        lower: Some(150),
6577                        upper: Some(250),
6578                    },
6579                ],
6580                limit: Some(10),
6581            })
6582            .expect("filtered read");
6583
6584        assert_eq!(report.collection_name, "audit_log");
6585        assert_eq!(report.row_count, 1);
6586        assert!(!report.was_limited);
6587        assert_eq!(report.rows.len(), 1);
6588        assert_eq!(report.rows[0].record_key, "evt-2");
6589        assert_eq!(
6590            report.rows[0].payload_json,
6591            r#"{"actor":"alice-admin","seq":2,"ts":200}"#
6592        );
6593    }
6594
6595    #[test]
6596    fn read_operational_collection_uses_secondary_index_when_filter_values_are_missing() {
6597        let (db, service) = setup();
6598        service
6599            .register_operational_collection(&OperationalRegisterRequest {
6600                name: "audit_log".to_owned(),
6601                kind: OperationalCollectionKind::AppendOnlyLog,
6602                schema_json: "{}".to_owned(),
6603                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6604                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
6605                validation_json: String::new(),
6606                secondary_indexes_json: r#"[{"name":"actor_ts","kind":"append_only_field_time","field":"actor","value_type":"string","time_field":"ts"}]"#.to_owned(),
6607                format_version: 1,
6608            })
6609            .expect("register collection");
6610        {
6611            let writer = crate::WriterActor::start(
6612                db.path(),
6613                Arc::new(SchemaManager::new()),
6614                crate::ProvenanceMode::Warn,
6615                Arc::new(crate::TelemetryCounters::default()),
6616            )
6617            .expect("writer");
6618            writer
6619                .submit(crate::WriteRequest {
6620                    label: "operational".to_owned(),
6621                    nodes: vec![],
6622                    node_retires: vec![],
6623                    edges: vec![],
6624                    edge_retires: vec![],
6625                    chunks: vec![],
6626                    runs: vec![],
6627                    steps: vec![],
6628                    actions: vec![],
6629                    optional_backfills: vec![],
6630                    vec_inserts: vec![],
6631                    operational_writes: vec![
6632                        crate::OperationalWrite::Append {
6633                            collection: "audit_log".to_owned(),
6634                            record_key: "evt-1".to_owned(),
6635                            payload_json: r#"{"actor":"alice","ts":100}"#.to_owned(),
6636                            source_ref: Some("src-1".to_owned()),
6637                        },
6638                        crate::OperationalWrite::Append {
6639                            collection: "audit_log".to_owned(),
6640                            record_key: "evt-2".to_owned(),
6641                            payload_json: r#"{"actor":"alice-admin","ts":200}"#.to_owned(),
6642                            source_ref: Some("src-2".to_owned()),
6643                        },
6644                    ],
6645                })
6646                .expect("write");
6647        }
6648        let conn = sqlite::open_connection(db.path()).expect("conn");
6649        conn.execute(
6650            "DELETE FROM operational_filter_values WHERE collection_name = 'audit_log'",
6651            [],
6652        )
6653        .expect("clear filter values");
6654        drop(conn);
6655
6656        let report = service
6657            .read_operational_collection(&crate::operational::OperationalReadRequest {
6658                collection_name: "audit_log".to_owned(),
6659                filters: vec![
6660                    crate::operational::OperationalFilterClause::Prefix {
6661                        field: "actor".to_owned(),
6662                        value: "alice".to_owned(),
6663                    },
6664                    crate::operational::OperationalFilterClause::Range {
6665                        field: "ts".to_owned(),
6666                        lower: Some(150),
6667                        upper: Some(250),
6668                    },
6669                ],
6670                limit: Some(10),
6671            })
6672            .expect("secondary-index read");
6673
6674        assert_eq!(report.row_count, 1);
6675        assert_eq!(report.rows[0].record_key, "evt-2");
6676    }
6677
6678    #[test]
6679    fn read_operational_collection_rejects_undeclared_fields_and_latest_state_collections() {
6680        let (_db, service) = setup();
6681        service
6682            .register_operational_collection(&OperationalRegisterRequest {
6683                name: "connector_health".to_owned(),
6684                kind: OperationalCollectionKind::LatestState,
6685                schema_json: "{}".to_owned(),
6686                retention_json: "{}".to_owned(),
6687                filter_fields_json: r#"[{"name":"status","type":"string","modes":["exact"]}]"#
6688                    .to_owned(),
6689                validation_json: String::new(),
6690                secondary_indexes_json: "[]".to_owned(),
6691                format_version: 1,
6692            })
6693            .expect("register collection");
6694
6695        let latest_state_error = service
6696            .read_operational_collection(&crate::operational::OperationalReadRequest {
6697                collection_name: "connector_health".to_owned(),
6698                filters: vec![crate::operational::OperationalFilterClause::Exact {
6699                    field: "status".to_owned(),
6700                    value: crate::operational::OperationalFilterValue::String("ok".to_owned()),
6701                }],
6702                limit: Some(10),
6703            })
6704            .expect_err("latest_state filtered reads should be rejected");
6705        assert!(latest_state_error.to_string().contains("append_only_log"));
6706
6707        service
6708            .register_operational_collection(&OperationalRegisterRequest {
6709                name: "audit_log".to_owned(),
6710                kind: OperationalCollectionKind::AppendOnlyLog,
6711                schema_json: "{}".to_owned(),
6712                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6713                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact"]}]"#
6714                    .to_owned(),
6715                validation_json: String::new(),
6716                secondary_indexes_json: "[]".to_owned(),
6717                format_version: 1,
6718            })
6719            .expect("register append-only collection");
6720
6721        let undeclared_error = service
6722            .read_operational_collection(&crate::operational::OperationalReadRequest {
6723                collection_name: "audit_log".to_owned(),
6724                filters: vec![crate::operational::OperationalFilterClause::Exact {
6725                    field: "missing".to_owned(),
6726                    value: crate::operational::OperationalFilterValue::String("x".to_owned()),
6727                }],
6728                limit: Some(10),
6729            })
6730            .expect_err("undeclared field should be rejected");
6731        assert!(undeclared_error.to_string().contains("undeclared"));
6732    }
6733
6734    #[test]
6735    fn read_operational_collection_applies_limit_and_reports_truncation() {
6736        let (db, service) = setup();
6737        service
6738            .register_operational_collection(&OperationalRegisterRequest {
6739                name: "audit_log".to_owned(),
6740                kind: OperationalCollectionKind::AppendOnlyLog,
6741                schema_json: "{}".to_owned(),
6742                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6743                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["prefix"]}]"#
6744                    .to_owned(),
6745                validation_json: String::new(),
6746                secondary_indexes_json: "[]".to_owned(),
6747                format_version: 1,
6748            })
6749            .expect("register collection");
6750        {
6751            let writer = crate::WriterActor::start(
6752                db.path(),
6753                Arc::new(SchemaManager::new()),
6754                crate::ProvenanceMode::Warn,
6755                Arc::new(crate::TelemetryCounters::default()),
6756            )
6757            .expect("writer");
6758            writer
6759                .submit(crate::WriteRequest {
6760                    label: "operational".to_owned(),
6761                    nodes: vec![],
6762                    node_retires: vec![],
6763                    edges: vec![],
6764                    edge_retires: vec![],
6765                    chunks: vec![],
6766                    runs: vec![],
6767                    steps: vec![],
6768                    actions: vec![],
6769                    optional_backfills: vec![],
6770                    vec_inserts: vec![],
6771                    operational_writes: vec![
6772                        crate::OperationalWrite::Append {
6773                            collection: "audit_log".to_owned(),
6774                            record_key: "evt-1".to_owned(),
6775                            payload_json: r#"{"actor":"alice-1"}"#.to_owned(),
6776                            source_ref: Some("src-1".to_owned()),
6777                        },
6778                        crate::OperationalWrite::Append {
6779                            collection: "audit_log".to_owned(),
6780                            record_key: "evt-2".to_owned(),
6781                            payload_json: r#"{"actor":"alice-2"}"#.to_owned(),
6782                            source_ref: Some("src-2".to_owned()),
6783                        },
6784                    ],
6785                })
6786                .expect("write");
6787        }
6788
6789        let report = service
6790            .read_operational_collection(&crate::operational::OperationalReadRequest {
6791                collection_name: "audit_log".to_owned(),
6792                filters: vec![crate::operational::OperationalFilterClause::Prefix {
6793                    field: "actor".to_owned(),
6794                    value: "alice".to_owned(),
6795                }],
6796                limit: Some(1),
6797            })
6798            .expect("limited read");
6799
6800        assert_eq!(report.row_count, 1);
6801        assert_eq!(report.applied_limit, 1);
6802        assert!(report.was_limited);
6803        assert_eq!(report.rows[0].record_key, "evt-2");
6804    }
6805
6806    #[test]
6807    fn preexisting_operational_collection_can_gain_filter_contract_after_upgrade() {
6808        let db = NamedTempFile::new().expect("temp db");
6809        let conn = sqlite::open_connection(db.path()).expect("conn");
6810        conn.execute_batch(
6811            r#"
6812            CREATE TABLE operational_collections (
6813                name TEXT PRIMARY KEY,
6814                kind TEXT NOT NULL,
6815                schema_json TEXT NOT NULL,
6816                retention_json TEXT NOT NULL,
6817                format_version INTEGER NOT NULL DEFAULT 1,
6818                created_at INTEGER NOT NULL DEFAULT 100,
6819                disabled_at INTEGER
6820            );
6821            CREATE TABLE operational_mutations (
6822                id TEXT PRIMARY KEY,
6823                collection_name TEXT NOT NULL,
6824                record_key TEXT NOT NULL,
6825                op_kind TEXT NOT NULL,
6826                payload_json TEXT NOT NULL,
6827                source_ref TEXT,
6828                created_at INTEGER NOT NULL DEFAULT 100,
6829                mutation_order INTEGER NOT NULL DEFAULT 1
6830            );
6831            INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at)
6832            VALUES ('audit_log', 'append_only_log', '{}', '{"mode":"keep_all"}', 1, 100);
6833            INSERT INTO operational_mutations
6834                (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order)
6835            VALUES
6836                ('evt-1', 'audit_log', 'evt-1', 'append', '{"actor":"alice","ts":0}', 'src-1', 100, 1);
6837            "#,
6838        )
6839        .expect("seed pre-v10 schema");
6840        drop(conn);
6841
6842        let service = AdminService::new(db.path(), Arc::new(SchemaManager::new()));
6843        let pre_update = service
6844            .read_operational_collection(&crate::operational::OperationalReadRequest {
6845                collection_name: "audit_log".to_owned(),
6846                filters: vec![crate::operational::OperationalFilterClause::Exact {
6847                    field: "actor".to_owned(),
6848                    value: crate::operational::OperationalFilterValue::String("alice".to_owned()),
6849                }],
6850                limit: Some(10),
6851            })
6852            .expect_err("read should reject undeclared fields before migration update");
6853        assert!(pre_update.to_string().contains("undeclared"));
6854
6855        let updated = service
6856            .update_operational_collection_filters(
6857                "audit_log",
6858                r#"[{"name":"actor","type":"string","modes":["exact"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#,
6859            )
6860            .expect("update filter contract");
6861        assert!(updated.filter_fields_json.contains("\"actor\""));
6862
6863        let report = service
6864            .read_operational_collection(&crate::operational::OperationalReadRequest {
6865                collection_name: "audit_log".to_owned(),
6866                filters: vec![crate::operational::OperationalFilterClause::Range {
6867                    field: "ts".to_owned(),
6868                    lower: Some(0),
6869                    upper: Some(0),
6870                }],
6871                limit: Some(10),
6872            })
6873            .expect("read after explicit filter update");
6874        assert_eq!(report.row_count, 1);
6875        assert_eq!(report.rows[0].record_key, "evt-1");
6876    }
6877
6878    #[cfg(feature = "sqlite-vec")]
6879    #[test]
6880    fn check_semantics_detects_stale_vec_rows() {
6881        use crate::sqlite::open_connection_with_vec;
6882
6883        let db = NamedTempFile::new().expect("temp file");
6884        let schema = Arc::new(SchemaManager::new());
6885        {
6886            let conn = open_connection_with_vec(db.path()).expect("vec conn");
6887            schema.bootstrap(&conn).expect("bootstrap");
6888            schema
6889                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 3)
6890                .expect("vec profile");
6891            // Insert a vec row whose chunk does not exist.
6892            let bytes: Vec<u8> = [0.1f32, 0.2f32, 0.3f32]
6893                .iter()
6894                .flat_map(|f| f.to_le_bytes())
6895                .collect();
6896            conn.execute(
6897                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('ghost-chunk', ?1)",
6898                rusqlite::params![bytes],
6899            )
6900            .expect("insert stale vec row");
6901        }
6902        let service = AdminService::new(db.path(), Arc::clone(&schema));
6903        let report = service.check_semantics().expect("semantics check");
6904        assert_eq!(report.stale_vec_rows, 1);
6905        assert!(
6906            report.warnings.iter().any(|w| w.contains("stale vec")),
6907            "warning must mention stale vec"
6908        );
6909    }
6910
6911    #[cfg(feature = "sqlite-vec")]
6912    #[test]
6913    fn restore_vector_profiles_recreates_vec_table_from_metadata() {
6914        let db = NamedTempFile::new().expect("temp file");
6915        let schema = Arc::new(SchemaManager::new());
6916        {
6917            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6918            schema.bootstrap(&conn).expect("bootstrap");
6919            conn.execute(
6920                "INSERT INTO vector_profiles (profile, table_name, dimension, enabled) \
6921                 VALUES ('default', 'vec_nodes_active', 3, 1)",
6922                [],
6923            )
6924            .expect("insert vector profile");
6925        }
6926
6927        let service = AdminService::new(db.path(), Arc::clone(&schema));
6928        let report = service
6929            .restore_vector_profiles()
6930            .expect("restore vector profiles");
6931        assert_eq!(
6932            report.targets,
6933            vec![crate::projection::ProjectionTarget::Vec]
6934        );
6935        assert_eq!(report.rebuilt_rows, 1);
6936
6937        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6938        let count: i64 = conn
6939            .query_row(
6940                "SELECT count(*) FROM sqlite_schema WHERE name = 'vec_nodes_active'",
6941                [],
6942                |row| row.get(0),
6943            )
6944            .expect("vec schema count");
6945        assert_eq!(count, 1, "vec table should exist after restore");
6946    }
6947
6948    #[cfg(feature = "sqlite-vec")]
6949    #[test]
6950    fn load_vector_regeneration_config_supports_json_and_toml() {
6951        let dir = tempfile::tempdir().expect("temp dir");
6952        let json_path = dir.path().join("regen.json");
6953        let toml_path = dir.path().join("regen.toml");
6954
6955        let config = VectorRegenerationConfig {
6956            profile: "default".to_owned(),
6957            table_name: "vec_nodes_active".to_owned(),
6958            model_identity: "model-a".to_owned(),
6959            model_version: "1.0".to_owned(),
6960            dimension: 4,
6961            normalization_policy: "l2".to_owned(),
6962            chunking_policy: "per_chunk".to_owned(),
6963            preprocessing_policy: "trim".to_owned(),
6964            generator_command: vec!["/bin/echo".to_owned()],
6965        };
6966
6967        fs::write(&json_path, serde_json::to_string(&config).expect("json")).expect("write json");
6968        fs::write(&toml_path, toml::to_string(&config).expect("toml")).expect("write toml");
6969
6970        let parsed_json = load_vector_regeneration_config(&json_path).expect("json parse");
6971        let parsed_toml = load_vector_regeneration_config(&toml_path).expect("toml parse");
6972
6973        assert_eq!(parsed_json, config);
6974        assert_eq!(parsed_toml, config);
6975    }
6976
6977    #[cfg(all(not(feature = "sqlite-vec"), unix))]
6978    #[test]
6979    fn regenerate_vector_embeddings_unsupported_vec_capability_writes_request_and_failed_audit() {
6980        let db = NamedTempFile::new().expect("temp file");
6981        let schema = Arc::new(SchemaManager::new());
6982        let temp_dir = tempfile::tempdir().expect("temp dir");
6983        let script_path = temp_dir.path().join("vector-generator-no-vec.sh");
6984
6985        fs::write(
6986            &script_path,
6987            r#"#!/usr/bin/env bash
6988set -euo pipefail
6989python3 -c 'import json, sys
6990payload = json.load(sys.stdin)
6991embeddings = [{"chunk_id": chunk["chunk_id"], "embedding": [1.0, 0.0, 0.0, 0.0]} for chunk in payload["chunks"]]
6992json.dump({"embeddings": embeddings}, sys.stdout)'
6993"#,
6994        )
6995        .expect("write generator script");
6996        set_file_mode(&script_path, 0o755);
6997
6998        {
6999            let conn = sqlite::open_connection(db.path()).expect("connection");
7000            schema.bootstrap(&conn).expect("bootstrap");
7001            conn.execute(
7002                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7003                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7004                [],
7005            )
7006            .expect("insert node");
7007            conn.execute(
7008                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7009                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7010                [],
7011            )
7012            .expect("insert chunk");
7013        }
7014
7015        let service = AdminService::new(db.path(), Arc::clone(&schema));
7016        let error = service
7017            .regenerate_vector_embeddings(&VectorRegenerationConfig {
7018                profile: "default".to_owned(),
7019                table_name: "vec_nodes_active".to_owned(),
7020                model_identity: "test-model".to_owned(),
7021                model_version: "1.0.0".to_owned(),
7022                dimension: 4,
7023                normalization_policy: "l2".to_owned(),
7024                chunking_policy: "per_chunk".to_owned(),
7025                preprocessing_policy: "trim".to_owned(),
7026                generator_command: vec![script_path.to_string_lossy().to_string()],
7027            })
7028            .expect_err("sqlite-vec capability should be required");
7029
7030        assert!(error.to_string().contains("unsupported vec capability"));
7031
7032        let conn = sqlite::open_connection(db.path()).expect("connection");
7033        let request_count: i64 = conn
7034            .query_row(
7035                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_requested' AND subject = 'default'",
7036                [],
7037                |row| row.get(0),
7038            )
7039            .expect("request count");
7040        assert_eq!(request_count, 1);
7041        let failed_count: i64 = conn
7042            .query_row(
7043                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7044                [],
7045                |row| row.get(0),
7046            )
7047            .expect("failed count");
7048        assert_eq!(failed_count, 1);
7049        let metadata_json: String = conn
7050            .query_row(
7051                "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7052                [],
7053                |row| row.get(0),
7054            )
7055            .expect("failed metadata");
7056        assert!(metadata_json.contains("\"failure_class\":\"unsupported vec capability\""));
7057    }
7058
7059    #[cfg(feature = "sqlite-vec")]
7060    #[test]
7061    fn regenerate_vector_embeddings_rebuilds_embeddings_from_generator() {
7062        let db = NamedTempFile::new().expect("temp file");
7063        let schema = Arc::new(SchemaManager::new());
7064        let temp_dir = tempfile::tempdir().expect("temp dir");
7065        let script_path = temp_dir.path().join("vector-generator.sh");
7066
7067        fs::write(
7068            &script_path,
7069            r#"#!/usr/bin/env bash
7070set -euo pipefail
7071python3 -c 'import json, sys
7072payload = json.load(sys.stdin)
7073embeddings = []
7074for chunk in payload["chunks"]:
7075    text = chunk["text_content"].lower()
7076    if "budget" in text:
7077        embedding = [1.0, 0.0, 0.0, 0.0]
7078    else:
7079        embedding = [0.0, 1.0, 0.0, 0.0]
7080    embeddings.append({"chunk_id": chunk["chunk_id"], "embedding": embedding})
7081json.dump({"embeddings": embeddings}, sys.stdout)'
7082"#,
7083        )
7084        .expect("write generator script");
7085        set_file_mode(&script_path, 0o755);
7086
7087        {
7088            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7089            schema.bootstrap(&conn).expect("bootstrap");
7090            conn.execute(
7091                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7092                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7093                [],
7094            )
7095            .expect("insert node");
7096            conn.execute(
7097                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7098                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7099                [],
7100            )
7101            .expect("insert chunk 1");
7102            conn.execute(
7103                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7104                 VALUES ('chunk-2', 'doc-1', 'travel plan', 101)",
7105                [],
7106            )
7107            .expect("insert chunk 2");
7108        }
7109
7110        let service = AdminService::new(db.path(), Arc::clone(&schema));
7111        let report = service
7112            .regenerate_vector_embeddings(&VectorRegenerationConfig {
7113                profile: "default".to_owned(),
7114                table_name: "vec_nodes_active".to_owned(),
7115                model_identity: "test-model".to_owned(),
7116                model_version: "1.0.0".to_owned(),
7117                dimension: 4,
7118                normalization_policy: "l2".to_owned(),
7119                chunking_policy: "per_chunk".to_owned(),
7120                preprocessing_policy: "trim".to_owned(),
7121                generator_command: vec![script_path.to_string_lossy().to_string()],
7122            })
7123            .expect("regenerate vectors");
7124
7125        assert_eq!(report.profile, "default");
7126        assert_eq!(report.table_name, "vec_nodes_active");
7127        assert_eq!(report.dimension, 4);
7128        assert_eq!(report.total_chunks, 2);
7129        assert_eq!(report.regenerated_rows, 2);
7130        assert!(report.contract_persisted);
7131
7132        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7133        let vec_count: i64 = conn
7134            .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
7135                row.get(0)
7136            })
7137            .expect("vec count");
7138        assert_eq!(vec_count, 2);
7139
7140        let contract_count: i64 = conn
7141            .query_row(
7142                "SELECT count(*) FROM vector_embedding_contracts WHERE profile = 'default'",
7143                [],
7144                |row| row.get(0),
7145            )
7146            .expect("contract count");
7147        assert_eq!(contract_count, 1);
7148        let applied_at: i64 = conn
7149            .query_row(
7150                "SELECT applied_at FROM vector_embedding_contracts WHERE profile = 'default'",
7151                [],
7152                |row| row.get(0),
7153            )
7154            .expect("applied_at");
7155        assert!(applied_at > 0);
7156        let snapshot_hash: String = conn
7157            .query_row(
7158                "SELECT snapshot_hash FROM vector_embedding_contracts WHERE profile = 'default'",
7159                [],
7160                |row| row.get(0),
7161            )
7162            .expect("snapshot_hash");
7163        assert!(!snapshot_hash.is_empty());
7164        let contract_format_version: i64 = conn
7165            .query_row(
7166                "SELECT contract_format_version FROM vector_embedding_contracts WHERE profile = 'default'",
7167                [],
7168                |row| row.get(0),
7169            )
7170            .expect("contract_format_version");
7171        assert_eq!(contract_format_version, 1);
7172        let request_count: i64 = conn
7173            .query_row(
7174                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_requested' AND subject = 'default'",
7175                [],
7176                |row| row.get(0),
7177            )
7178            .expect("request audit count");
7179        assert_eq!(request_count, 1);
7180        let apply_count: i64 = conn
7181            .query_row(
7182                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_apply' AND subject = 'default'",
7183                [],
7184                |row| row.get(0),
7185            )
7186            .expect("apply audit count");
7187        assert_eq!(apply_count, 1);
7188        let apply_metadata: String = conn
7189            .query_row(
7190                "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_apply' AND subject = 'default'",
7191                [],
7192                |row| row.get(0),
7193            )
7194            .expect("apply metadata");
7195        assert!(apply_metadata.contains("\"profile\":\"default\""));
7196        assert!(apply_metadata.contains("\"snapshot_hash\":"));
7197    }
7198
7199    #[cfg(feature = "sqlite-vec")]
7200    #[test]
7201    fn regenerate_vector_embeddings_failure_leaves_contract_and_vec_rows_unchanged() {
7202        let db = NamedTempFile::new().expect("temp file");
7203        let schema = Arc::new(SchemaManager::new());
7204        let temp_dir = tempfile::tempdir().expect("temp dir");
7205        let script_path = temp_dir.path().join("vector-generator-fail.sh");
7206
7207        fs::write(
7208            &script_path,
7209            "#!/usr/bin/env bash\nset -euo pipefail\necho 'generator boom' >&2\nexit 17\n",
7210        )
7211        .expect("write failing script");
7212        set_file_mode(&script_path, 0o755);
7213
7214        {
7215            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7216            schema.bootstrap(&conn).expect("bootstrap");
7217            conn.execute(
7218                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7219                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7220                [],
7221            )
7222            .expect("insert node");
7223            conn.execute(
7224                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7225                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7226                [],
7227            )
7228            .expect("insert chunk");
7229            schema
7230                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
7231                .expect("ensure vec profile");
7232            conn.execute(
7233                r"
7234                INSERT INTO vector_embedding_contracts (
7235                    profile,
7236                    table_name,
7237                    model_identity,
7238                    model_version,
7239                    dimension,
7240                    normalization_policy,
7241                    chunking_policy,
7242                    preprocessing_policy,
7243                    generator_command_json,
7244                    applied_at,
7245                    snapshot_hash
7246                ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)
7247                ",
7248                rusqlite::params![
7249                    "default",
7250                    "vec_nodes_active",
7251                    "old-model",
7252                    "0.9.0",
7253                    4,
7254                    "l2",
7255                    "per_chunk",
7256                    "trim",
7257                    "[\"/bin/echo\"]",
7258                    111,
7259                    "old-snapshot"
7260                ],
7261            )
7262            .expect("seed contract");
7263            conn.execute(
7264                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
7265                [],
7266            )
7267            .expect("seed vec row");
7268        }
7269
7270        let service = AdminService::new(db.path(), Arc::clone(&schema));
7271        let error = service
7272            .regenerate_vector_embeddings_with_policy(
7273                &VectorRegenerationConfig {
7274                    profile: "default".to_owned(),
7275                    table_name: "vec_nodes_active".to_owned(),
7276                    model_identity: "new-model".to_owned(),
7277                    model_version: "1.0.0".to_owned(),
7278                    dimension: 4,
7279                    normalization_policy: "l2".to_owned(),
7280                    chunking_policy: "per_chunk".to_owned(),
7281                    preprocessing_policy: "trim".to_owned(),
7282                    generator_command: vec![script_path.to_string_lossy().to_string()],
7283                },
7284                &VectorGeneratorPolicy::default(),
7285            )
7286            .expect_err("generator should fail");
7287
7288        assert!(error.to_string().contains("generator nonzero exit"));
7289
7290        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7291        let model_identity: String = conn
7292            .query_row(
7293                "SELECT model_identity FROM vector_embedding_contracts WHERE profile = 'default'",
7294                [],
7295                |row| row.get(0),
7296            )
7297            .expect("model identity");
7298        assert_eq!(model_identity, "old-model");
7299        let snapshot_hash: String = conn
7300            .query_row(
7301                "SELECT snapshot_hash FROM vector_embedding_contracts WHERE profile = 'default'",
7302                [],
7303                |row| row.get(0),
7304            )
7305            .expect("snapshot hash");
7306        assert_eq!(snapshot_hash, "old-snapshot");
7307        let vec_count: i64 = conn
7308            .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
7309                row.get(0)
7310            })
7311            .expect("vec count");
7312        assert_eq!(vec_count, 1);
7313        let failure_count: i64 = conn
7314            .query_row(
7315                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7316                [],
7317                |row| row.get(0),
7318            )
7319            .expect("failure count");
7320        assert_eq!(failure_count, 1);
7321        let failure_metadata: String = conn
7322            .query_row(
7323                "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7324                [],
7325                |row| row.get(0),
7326            )
7327            .expect("failure metadata");
7328        assert!(failure_metadata.contains("\"failure_class\":\"generator nonzero exit\""));
7329    }
7330
7331    #[cfg(feature = "sqlite-vec")]
7332    #[test]
7333    fn regenerate_vector_embeddings_snapshot_drift_is_retryable_and_non_mutating() {
7334        let db = NamedTempFile::new().expect("temp file");
7335        let schema = Arc::new(SchemaManager::new());
7336        let temp_dir = tempfile::tempdir().expect("temp dir");
7337        let script_path = temp_dir.path().join("vector-generator-drift.sh");
7338        let db_path = db.path().to_string_lossy().to_string();
7339
7340        fs::write(
7341            &script_path,
7342            format!(
7343                r#"#!/usr/bin/env bash
7344set -euo pipefail
7345python3 -c 'import json, sqlite3, sys
7346payload = json.load(sys.stdin)
7347conn = sqlite3.connect({db_path:?})
7348conn.execute("INSERT INTO chunks (id, node_logical_id, text_content, created_at) VALUES (?, ?, ?, ?)", ("chunk-2", "doc-1", "late arriving text", 101))
7349conn.commit()
7350conn.close()
7351embeddings = [{{"chunk_id": chunk["chunk_id"], "embedding": [1.0, 0.0, 0.0, 0.0]}} for chunk in payload["chunks"]]
7352json.dump({{"embeddings": embeddings}}, sys.stdout)'
7353"#,
7354            ),
7355        )
7356        .expect("write drift script");
7357        set_file_mode(&script_path, 0o755);
7358
7359        {
7360            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7361            schema.bootstrap(&conn).expect("bootstrap");
7362            conn.execute(
7363                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7364                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7365                [],
7366            )
7367            .expect("insert node");
7368            conn.execute(
7369                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7370                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7371                [],
7372            )
7373            .expect("insert chunk");
7374            schema
7375                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
7376                .expect("ensure vec profile");
7377        }
7378
7379        let service = AdminService::new(db.path(), Arc::clone(&schema));
7380        let error = service
7381            .regenerate_vector_embeddings_with_policy(
7382                &VectorRegenerationConfig {
7383                    profile: "default".to_owned(),
7384                    table_name: "vec_nodes_active".to_owned(),
7385                    model_identity: "test-model".to_owned(),
7386                    model_version: "1.0.0".to_owned(),
7387                    dimension: 4,
7388                    normalization_policy: "l2".to_owned(),
7389                    chunking_policy: "per_chunk".to_owned(),
7390                    preprocessing_policy: "trim".to_owned(),
7391                    generator_command: vec![script_path.to_string_lossy().to_string()],
7392                },
7393                &VectorGeneratorPolicy::default(),
7394            )
7395            .expect_err("snapshot drift should fail");
7396
7397        assert!(
7398            error
7399                .to_string()
7400                .contains("vector regeneration snapshot drift:")
7401        );
7402        assert!(error.to_string().contains("[retryable]"));
7403
7404        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7405        let contract_count: i64 = conn
7406            .query_row(
7407                "SELECT count(*) FROM vector_embedding_contracts",
7408                [],
7409                |row| row.get(0),
7410            )
7411            .expect("contract count");
7412        assert_eq!(contract_count, 0);
7413        let vec_count: i64 = conn
7414            .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
7415                row.get(0)
7416            })
7417            .expect("vec count");
7418        assert_eq!(vec_count, 0);
7419        let failure_count: i64 = conn
7420            .query_row(
7421                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7422                [],
7423                |row| row.get(0),
7424            )
7425            .expect("failure count");
7426        assert_eq!(failure_count, 1);
7427    }
7428
7429    #[cfg(feature = "sqlite-vec")]
7430    #[test]
7431    fn regenerate_vector_embeddings_times_out_and_kills_generator() {
7432        let (_db, service) = setup();
7433        let temp_dir = tempfile::tempdir().expect("temp dir");
7434        let script_path = temp_dir.path().join("vector-generator-timeout.sh");
7435
7436        fs::write(
7437            &script_path,
7438            "#!/usr/bin/env bash\nset -euo pipefail\nsleep 1\nprintf '{\"embeddings\":[]}'\n",
7439        )
7440        .expect("write timeout script");
7441        set_file_mode(&script_path, 0o755);
7442
7443        let error = service
7444            .regenerate_vector_embeddings_with_policy(
7445                &VectorRegenerationConfig {
7446                    profile: "default".to_owned(),
7447                    table_name: "vec_nodes_active".to_owned(),
7448                    model_identity: "model".to_owned(),
7449                    model_version: "1.0.0".to_owned(),
7450                    dimension: 4,
7451                    normalization_policy: "l2".to_owned(),
7452                    chunking_policy: "per_chunk".to_owned(),
7453                    preprocessing_policy: "trim".to_owned(),
7454                    generator_command: vec![script_path.to_string_lossy().to_string()],
7455                },
7456                &VectorGeneratorPolicy {
7457                    timeout_ms: 50,
7458                    max_stdout_bytes: 1024,
7459                    max_stderr_bytes: 1024,
7460                    max_input_bytes: 1024,
7461                    max_chunks: 10,
7462                    require_absolute_executable: true,
7463                    reject_world_writable_executable: true,
7464                    allowed_executable_roots: vec![],
7465                    preserve_env_vars: vec![],
7466                },
7467            )
7468            .expect_err("generator should time out");
7469        assert!(error.to_string().contains("generator timeout"));
7470    }
7471
7472    #[cfg(feature = "sqlite-vec")]
7473    #[test]
7474    fn regenerate_vector_embeddings_rejects_oversized_stdout() {
7475        let (_db, service) = setup();
7476        let temp_dir = tempfile::tempdir().expect("temp dir");
7477        let script_path = temp_dir.path().join("vector-generator-stdout.sh");
7478
7479        fs::write(
7480            &script_path,
7481            "#!/usr/bin/env bash\nset -euo pipefail\npython3 -c 'import sys; sys.stdout.write(\"x\" * 5000)'\n",
7482        )
7483        .expect("write stdout script");
7484        set_file_mode(&script_path, 0o755);
7485
7486        let error = service
7487            .regenerate_vector_embeddings_with_policy(
7488                &VectorRegenerationConfig {
7489                    profile: "default".to_owned(),
7490                    table_name: "vec_nodes_active".to_owned(),
7491                    model_identity: "model".to_owned(),
7492                    model_version: "1.0.0".to_owned(),
7493                    dimension: 4,
7494                    normalization_policy: "l2".to_owned(),
7495                    chunking_policy: "per_chunk".to_owned(),
7496                    preprocessing_policy: "trim".to_owned(),
7497                    generator_command: vec![script_path.to_string_lossy().to_string()],
7498                },
7499                &VectorGeneratorPolicy {
7500                    timeout_ms: 1000,
7501                    max_stdout_bytes: 128,
7502                    max_stderr_bytes: 1024,
7503                    max_input_bytes: 1024,
7504                    max_chunks: 10,
7505                    require_absolute_executable: true,
7506                    reject_world_writable_executable: true,
7507                    allowed_executable_roots: vec![],
7508                    preserve_env_vars: vec![],
7509                },
7510            )
7511            .expect_err("generator stdout should overflow");
7512        assert!(error.to_string().contains("stdout overflow"));
7513    }
7514
7515    #[cfg(feature = "sqlite-vec")]
7516    #[test]
7517    fn regenerate_vector_embeddings_rejects_oversized_stderr() {
7518        let (_db, service) = setup();
7519        let temp_dir = tempfile::tempdir().expect("temp dir");
7520        let script_path = temp_dir.path().join("vector-generator-stderr.sh");
7521
7522        fs::write(
7523            &script_path,
7524            "#!/usr/bin/env bash\nset -euo pipefail\npython3 -c 'import sys; sys.stderr.write(\"e\" * 5000); sys.exit(7)'\n",
7525        )
7526        .expect("write stderr script");
7527        set_file_mode(&script_path, 0o755);
7528
7529        let error = service
7530            .regenerate_vector_embeddings_with_policy(
7531                &VectorRegenerationConfig {
7532                    profile: "default".to_owned(),
7533                    table_name: "vec_nodes_active".to_owned(),
7534                    model_identity: "model".to_owned(),
7535                    model_version: "1.0.0".to_owned(),
7536                    dimension: 4,
7537                    normalization_policy: "l2".to_owned(),
7538                    chunking_policy: "per_chunk".to_owned(),
7539                    preprocessing_policy: "trim".to_owned(),
7540                    generator_command: vec![script_path.to_string_lossy().to_string()],
7541                },
7542                &VectorGeneratorPolicy {
7543                    timeout_ms: 1000,
7544                    max_stdout_bytes: 1024,
7545                    max_stderr_bytes: 128,
7546                    max_input_bytes: 1024,
7547                    max_chunks: 10,
7548                    require_absolute_executable: true,
7549                    reject_world_writable_executable: true,
7550                    allowed_executable_roots: vec![],
7551                    preserve_env_vars: vec![],
7552                },
7553            )
7554            .expect_err("generator stderr should overflow");
7555        assert!(error.to_string().contains("stderr overflow"));
7556    }
7557
7558    #[cfg(feature = "sqlite-vec")]
7559    #[test]
7560    fn regenerate_vector_embeddings_rejects_oversized_input_before_spawn() {
7561        let db = NamedTempFile::new().expect("temp file");
7562        let schema = Arc::new(SchemaManager::new());
7563        {
7564            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7565            schema.bootstrap(&conn).expect("bootstrap");
7566            conn.execute(
7567                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7568                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7569                [],
7570            )
7571            .expect("insert node");
7572            conn.execute(
7573                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7574                 VALUES ('chunk-1', 'doc-1', 'this chunk is intentionally long to exceed the configured input limit', 100)",
7575                [],
7576            )
7577            .expect("insert chunk");
7578        }
7579
7580        let service = AdminService::new(db.path(), Arc::clone(&schema));
7581        let error = service
7582            .regenerate_vector_embeddings_with_policy(
7583                &VectorRegenerationConfig {
7584                    profile: "default".to_owned(),
7585                    table_name: "vec_nodes_active".to_owned(),
7586                    model_identity: "model".to_owned(),
7587                    model_version: "1.0.0".to_owned(),
7588                    dimension: 4,
7589                    normalization_policy: "l2".to_owned(),
7590                    chunking_policy: "per_chunk".to_owned(),
7591                    preprocessing_policy: "trim".to_owned(),
7592                    generator_command: vec!["/bin/echo".to_owned()],
7593                },
7594                &VectorGeneratorPolicy {
7595                    timeout_ms: 1000,
7596                    max_stdout_bytes: 1024,
7597                    max_stderr_bytes: 1024,
7598                    max_input_bytes: 32,
7599                    max_chunks: 10,
7600                    require_absolute_executable: true,
7601                    reject_world_writable_executable: true,
7602                    allowed_executable_roots: vec![],
7603                    preserve_env_vars: vec![],
7604                },
7605            )
7606            .expect_err("input size should be rejected before spawn");
7607        assert!(error.to_string().contains("payload too large"));
7608    }
7609
7610    #[cfg(feature = "sqlite-vec")]
7611    #[test]
7612    fn regenerate_vector_embeddings_rejects_excessive_chunk_count_before_spawn() {
7613        let db = NamedTempFile::new().expect("temp file");
7614        let schema = Arc::new(SchemaManager::new());
7615        {
7616            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7617            schema.bootstrap(&conn).expect("bootstrap");
7618            conn.execute(
7619                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7620                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7621                [],
7622            )
7623            .expect("insert node");
7624            conn.execute(
7625                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) VALUES ('chunk-1', 'doc-1', 'a', 100)",
7626                [],
7627            )
7628            .expect("insert chunk 1");
7629            conn.execute(
7630                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) VALUES ('chunk-2', 'doc-1', 'b', 101)",
7631                [],
7632            )
7633            .expect("insert chunk 2");
7634        }
7635
7636        let service = AdminService::new(db.path(), Arc::clone(&schema));
7637        let error = service
7638            .regenerate_vector_embeddings_with_policy(
7639                &VectorRegenerationConfig {
7640                    profile: "default".to_owned(),
7641                    table_name: "vec_nodes_active".to_owned(),
7642                    model_identity: "model".to_owned(),
7643                    model_version: "1.0.0".to_owned(),
7644                    dimension: 4,
7645                    normalization_policy: "l2".to_owned(),
7646                    chunking_policy: "per_chunk".to_owned(),
7647                    preprocessing_policy: "trim".to_owned(),
7648                    generator_command: vec!["/bin/echo".to_owned()],
7649                },
7650                &VectorGeneratorPolicy {
7651                    timeout_ms: 1000,
7652                    max_stdout_bytes: 1024,
7653                    max_stderr_bytes: 1024,
7654                    max_input_bytes: 2048,
7655                    max_chunks: 1,
7656                    require_absolute_executable: true,
7657                    reject_world_writable_executable: true,
7658                    allowed_executable_roots: vec![],
7659                    preserve_env_vars: vec![],
7660                },
7661            )
7662            .expect_err("chunk count should be rejected before spawn");
7663        assert!(error.to_string().contains("payload too large"));
7664    }
7665
7666    #[cfg(feature = "sqlite-vec")]
7667    #[test]
7668    fn regenerate_vector_embeddings_malformed_json_leaves_contract_and_vec_rows_unchanged() {
7669        let db = NamedTempFile::new().expect("temp file");
7670        let schema = Arc::new(SchemaManager::new());
7671        let temp_dir = tempfile::tempdir().expect("temp dir");
7672        let script_path = temp_dir.path().join("vector-generator-bad-json.sh");
7673
7674        fs::write(
7675            &script_path,
7676            "#!/usr/bin/env bash\nset -euo pipefail\nprintf 'not-json'\n",
7677        )
7678        .expect("write bad json script");
7679        set_file_mode(&script_path, 0o755);
7680
7681        {
7682            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7683            schema.bootstrap(&conn).expect("bootstrap");
7684            conn.execute(
7685                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7686                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7687                [],
7688            )
7689            .expect("insert node");
7690            conn.execute(
7691                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7692                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7693                [],
7694            )
7695            .expect("insert chunk");
7696            schema
7697                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
7698                .expect("ensure vec profile");
7699            conn.execute(
7700                r"
7701                INSERT INTO vector_embedding_contracts (
7702                    profile,
7703                    table_name,
7704                    model_identity,
7705                    model_version,
7706                    dimension,
7707                    normalization_policy,
7708                    chunking_policy,
7709                    preprocessing_policy,
7710                    generator_command_json,
7711                    applied_at,
7712                    snapshot_hash
7713                ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)
7714                ",
7715                rusqlite::params![
7716                    "default",
7717                    "vec_nodes_active",
7718                    "old-model",
7719                    "0.9.0",
7720                    4,
7721                    "l2",
7722                    "per_chunk",
7723                    "trim",
7724                    "[\"/bin/echo\"]",
7725                    111,
7726                    "old-snapshot"
7727                ],
7728            )
7729            .expect("seed contract");
7730            conn.execute(
7731                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
7732                [],
7733            )
7734            .expect("seed vec row");
7735        }
7736
7737        let service = AdminService::new(db.path(), Arc::clone(&schema));
7738        let error = service
7739            .regenerate_vector_embeddings_with_policy(
7740                &VectorRegenerationConfig {
7741                    profile: "default".to_owned(),
7742                    table_name: "vec_nodes_active".to_owned(),
7743                    model_identity: "new-model".to_owned(),
7744                    model_version: "1.0.0".to_owned(),
7745                    dimension: 4,
7746                    normalization_policy: "l2".to_owned(),
7747                    chunking_policy: "per_chunk".to_owned(),
7748                    preprocessing_policy: "trim".to_owned(),
7749                    generator_command: vec![script_path.to_string_lossy().to_string()],
7750                },
7751                &VectorGeneratorPolicy::default(),
7752            )
7753            .expect_err("bad json should fail");
7754
7755        assert!(error.to_string().contains("decode generator output"));
7756
7757        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7758        let model_identity: String = conn
7759            .query_row(
7760                "SELECT model_identity FROM vector_embedding_contracts WHERE profile = 'default'",
7761                [],
7762                |row| row.get(0),
7763            )
7764            .expect("model identity");
7765        assert_eq!(model_identity, "old-model");
7766        let vec_count: i64 = conn
7767            .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
7768                row.get(0)
7769            })
7770            .expect("vec count");
7771        assert_eq!(vec_count, 1);
7772        let failure_count: i64 = conn
7773            .query_row(
7774                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7775                [],
7776                |row| row.get(0),
7777            )
7778            .expect("failure count");
7779        assert_eq!(failure_count, 1);
7780    }
7781
7782    #[cfg(feature = "sqlite-vec")]
7783    #[test]
7784    fn regenerate_vector_embeddings_rejects_whitespace_only_profile_before_mutation() {
7785        let db = NamedTempFile::new().expect("temp file");
7786        let schema = Arc::new(SchemaManager::new());
7787        {
7788            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7789            schema.bootstrap(&conn).expect("bootstrap");
7790            conn.execute(
7791                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7792                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7793                [],
7794            )
7795            .expect("insert node");
7796            conn.execute(
7797                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7798                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7799                [],
7800            )
7801            .expect("insert chunk");
7802        }
7803
7804        let service = AdminService::new(db.path(), Arc::clone(&schema));
7805        let error = service
7806            .regenerate_vector_embeddings(&VectorRegenerationConfig {
7807                profile: "   ".to_owned(),
7808                table_name: "vec_nodes_active".to_owned(),
7809                model_identity: "test-model".to_owned(),
7810                model_version: "1.0.0".to_owned(),
7811                dimension: 4,
7812                normalization_policy: "l2".to_owned(),
7813                chunking_policy: "per_chunk".to_owned(),
7814                preprocessing_policy: "trim".to_owned(),
7815                generator_command: vec!["/bin/echo".to_owned()],
7816            })
7817            .expect_err("whitespace profile should be rejected");
7818
7819        assert!(error.to_string().contains("invalid contract"));
7820        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7821        let contract_count: i64 = conn
7822            .query_row(
7823                "SELECT count(*) FROM vector_embedding_contracts",
7824                [],
7825                |row| row.get(0),
7826            )
7827            .expect("contract count");
7828        assert_eq!(contract_count, 0);
7829        let provenance_count: i64 = conn
7830            .query_row("SELECT count(*) FROM provenance_events", [], |row| {
7831                row.get(0)
7832            })
7833            .expect("provenance count");
7834        assert_eq!(provenance_count, 0);
7835    }
7836
7837    #[cfg(feature = "sqlite-vec")]
7838    #[test]
7839    fn regenerate_vector_embeddings_rejects_world_writable_executable_when_policy_requires_it() {
7840        let (_db, service) = setup();
7841        let temp_dir = tempfile::tempdir().expect("temp dir");
7842        let script_path = temp_dir.path().join("vector-generator-world-writable.sh");
7843
7844        fs::write(
7845            &script_path,
7846            "#!/usr/bin/env bash\nset -euo pipefail\nprintf '{\"embeddings\":[]}'\n",
7847        )
7848        .expect("write script");
7849        set_file_mode(&script_path, 0o777);
7850
7851        let error = service
7852            .regenerate_vector_embeddings_with_policy(
7853                &VectorRegenerationConfig {
7854                    profile: "default".to_owned(),
7855                    table_name: "vec_nodes_active".to_owned(),
7856                    model_identity: "model".to_owned(),
7857                    model_version: "1.0.0".to_owned(),
7858                    dimension: 4,
7859                    normalization_policy: "l2".to_owned(),
7860                    chunking_policy: "per_chunk".to_owned(),
7861                    preprocessing_policy: "trim".to_owned(),
7862                    generator_command: vec![script_path.to_string_lossy().to_string()],
7863                },
7864                &VectorGeneratorPolicy::default(),
7865            )
7866            .expect_err("world-writable executable should be rejected");
7867
7868        assert!(error.to_string().contains("world-writable executable"));
7869    }
7870
7871    #[cfg(feature = "sqlite-vec")]
7872    #[test]
7873    fn regenerate_vector_embeddings_rejects_executable_outside_allowlisted_roots() {
7874        let (_db, service) = setup();
7875        let temp_dir = tempfile::tempdir().expect("temp dir");
7876        let allowed_dir = tempfile::tempdir().expect("allowed dir");
7877        let script_path = temp_dir.path().join("vector-generator-outside-root.sh");
7878
7879        fs::write(
7880            &script_path,
7881            "#!/usr/bin/env bash\nset -euo pipefail\nprintf '{\"embeddings\":[]}'\n",
7882        )
7883        .expect("write script");
7884        set_file_mode(&script_path, 0o755);
7885
7886        let error = service
7887            .regenerate_vector_embeddings_with_policy(
7888                &VectorRegenerationConfig {
7889                    profile: "default".to_owned(),
7890                    table_name: "vec_nodes_active".to_owned(),
7891                    model_identity: "model".to_owned(),
7892                    model_version: "1.0.0".to_owned(),
7893                    dimension: 4,
7894                    normalization_policy: "l2".to_owned(),
7895                    chunking_policy: "per_chunk".to_owned(),
7896                    preprocessing_policy: "trim".to_owned(),
7897                    generator_command: vec![script_path.to_string_lossy().to_string()],
7898                },
7899                &VectorGeneratorPolicy {
7900                    timeout_ms: 1000,
7901                    max_stdout_bytes: 1024,
7902                    max_stderr_bytes: 1024,
7903                    max_input_bytes: 1024,
7904                    max_chunks: 10,
7905                    require_absolute_executable: true,
7906                    reject_world_writable_executable: true,
7907                    allowed_executable_roots: vec![
7908                        allowed_dir.path().to_string_lossy().to_string(),
7909                    ],
7910                    preserve_env_vars: vec![],
7911                },
7912            )
7913            .expect_err("disallowed root should be rejected");
7914
7915        assert!(
7916            error
7917                .to_string()
7918                .contains("outside allowed executable roots")
7919        );
7920    }
7921
7922    #[cfg(feature = "sqlite-vec")]
7923    #[test]
7924    fn regenerate_vector_embeddings_rejects_future_contract_format_version() {
7925        let db = NamedTempFile::new().expect("temp file");
7926        let schema = Arc::new(SchemaManager::new());
7927        {
7928            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7929            schema.bootstrap(&conn).expect("bootstrap");
7930            conn.execute(
7931                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7932                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7933                [],
7934            )
7935            .expect("insert node");
7936            conn.execute(
7937                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7938                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7939                [],
7940            )
7941            .expect("insert chunk");
7942            conn.execute(
7943                r"
7944                INSERT INTO vector_embedding_contracts (
7945                    profile,
7946                    table_name,
7947                    model_identity,
7948                    model_version,
7949                    dimension,
7950                    normalization_policy,
7951                    chunking_policy,
7952                    preprocessing_policy,
7953                    generator_command_json,
7954                    applied_at,
7955                    snapshot_hash,
7956                    contract_format_version,
7957                    updated_at
7958                ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13)
7959                ",
7960                rusqlite::params![
7961                    "default",
7962                    "vec_nodes_active",
7963                    "old-model",
7964                    "0.9.0",
7965                    4,
7966                    "l2",
7967                    "per_chunk",
7968                    "trim",
7969                    "[\"/bin/echo\"]",
7970                    111,
7971                    "old-snapshot",
7972                    99,
7973                    111,
7974                ],
7975            )
7976            .expect("seed future contract");
7977        }
7978
7979        let service = AdminService::new(db.path(), Arc::clone(&schema));
7980        let error = service
7981            .regenerate_vector_embeddings(&VectorRegenerationConfig {
7982                profile: "default".to_owned(),
7983                table_name: "vec_nodes_active".to_owned(),
7984                model_identity: "test-model".to_owned(),
7985                model_version: "1.0.0".to_owned(),
7986                dimension: 4,
7987                normalization_policy: "l2".to_owned(),
7988                chunking_policy: "per_chunk".to_owned(),
7989                preprocessing_policy: "trim".to_owned(),
7990                generator_command: vec!["/bin/echo".to_owned()],
7991            })
7992            .expect_err("future contract version should be rejected");
7993
7994        assert!(error.to_string().contains("unsupported"));
7995        assert!(error.to_string().contains("format version"));
7996    }
7997
7998    #[cfg(feature = "sqlite-vec")]
7999    #[test]
8000    fn regenerate_vector_embeddings_clears_environment_except_preserved_vars() {
8001        let db = NamedTempFile::new().expect("temp file");
8002        let schema = Arc::new(SchemaManager::new());
8003        let temp_dir = tempfile::tempdir().expect("temp dir");
8004        let script_path = temp_dir.path().join("vector-generator-env.sh");
8005        {
8006            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8007            schema.bootstrap(&conn).expect("bootstrap");
8008            conn.execute(
8009                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8010                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
8011                [],
8012            )
8013            .expect("insert node");
8014            conn.execute(
8015                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8016                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
8017                [],
8018            )
8019            .expect("insert chunk");
8020        }
8021
8022        fs::write(
8023            &script_path,
8024            r#"#!/usr/bin/env bash
8025set -euo pipefail
8026if [[ "${VECTOR_TEST_SECRET:-}" != "expected" ]]; then
8027  echo "missing secret" >&2
8028  exit 9
8029fi
8030python3 -c 'import json, sys
8031payload = json.load(sys.stdin)
8032json.dump({"embeddings": [{"chunk_id": payload["chunks"][0]["chunk_id"], "embedding": [1.0, 0.0, 0.0, 0.0]}]}, sys.stdout)'
8033"#,
8034        )
8035        .expect("write script");
8036        set_file_mode(&script_path, 0o755);
8037
8038        let service = AdminService::new(db.path(), Arc::clone(&schema));
8039        unsafe {
8040            std::env::set_var("VECTOR_TEST_SECRET", "expected");
8041        }
8042        let missing_env = service
8043            .regenerate_vector_embeddings_with_policy(
8044                &VectorRegenerationConfig {
8045                    profile: "default".to_owned(),
8046                    table_name: "vec_nodes_active".to_owned(),
8047                    model_identity: "model".to_owned(),
8048                    model_version: "1.0.0".to_owned(),
8049                    dimension: 4,
8050                    normalization_policy: "l2".to_owned(),
8051                    chunking_policy: "per_chunk".to_owned(),
8052                    preprocessing_policy: "trim".to_owned(),
8053                    generator_command: vec![script_path.to_string_lossy().to_string()],
8054                },
8055                &VectorGeneratorPolicy::default(),
8056            )
8057            .expect_err("non-preserved env var should be dropped");
8058        assert!(missing_env.to_string().contains("nonzero exit"));
8059
8060        let report = service
8061            .regenerate_vector_embeddings_with_policy(
8062                &VectorRegenerationConfig {
8063                    profile: "default".to_owned(),
8064                    table_name: "vec_nodes_active".to_owned(),
8065                    model_identity: "model".to_owned(),
8066                    model_version: "1.0.0".to_owned(),
8067                    dimension: 4,
8068                    normalization_policy: "l2".to_owned(),
8069                    chunking_policy: "per_chunk".to_owned(),
8070                    preprocessing_policy: "trim".to_owned(),
8071                    generator_command: vec![script_path.to_string_lossy().to_string()],
8072                },
8073                &VectorGeneratorPolicy {
8074                    timeout_ms: 1000,
8075                    max_stdout_bytes: 1024,
8076                    max_stderr_bytes: 1024,
8077                    max_input_bytes: 4096,
8078                    max_chunks: 10,
8079                    require_absolute_executable: true,
8080                    reject_world_writable_executable: true,
8081                    allowed_executable_roots: vec![],
8082                    preserve_env_vars: vec!["VECTOR_TEST_SECRET".to_owned()],
8083                },
8084            )
8085            .expect("preserved env var should allow success");
8086        assert_eq!(report.regenerated_rows, 1);
8087        unsafe {
8088            std::env::remove_var("VECTOR_TEST_SECRET");
8089        }
8090    }
8091
8092    #[test]
8093    fn check_semantics_detects_orphaned_chunk() {
8094        let (db, service) = setup();
8095        {
8096            // Open without FK enforcement to insert chunk with no active node.
8097            let conn = sqlite::open_connection(db.path()).expect("conn");
8098            conn.execute(
8099                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8100                 VALUES ('c1', 'ghost-node', 'text', 100)",
8101                [],
8102            )
8103            .expect("insert orphaned chunk");
8104        }
8105        let report = service.check_semantics().expect("semantics check");
8106        assert_eq!(report.orphaned_chunks, 1);
8107    }
8108
8109    #[test]
8110    fn check_semantics_detects_null_source_ref() {
8111        let (db, service) = setup();
8112        {
8113            let conn = sqlite::open_connection(db.path()).expect("conn");
8114            conn.execute(
8115                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at) \
8116                 VALUES ('r1', 'lg1', 'Meeting', '{}', 100)",
8117                [],
8118            )
8119            .expect("insert node with null source_ref");
8120        }
8121        let report = service.check_semantics().expect("semantics check");
8122        assert_eq!(report.null_source_ref_nodes, 1);
8123    }
8124
8125    #[test]
8126    fn check_semantics_detects_broken_step_fk() {
8127        let (db, service) = setup();
8128        {
8129            // Explicitly disable FK enforcement for this connection so we can insert
8130            // an orphaned step (ghost run_id) to simulate a partial-write failure.
8131            let conn = sqlite::open_connection(db.path()).expect("conn");
8132            conn.execute_batch("PRAGMA foreign_keys = OFF;")
8133                .expect("disable FK");
8134            conn.execute(
8135                "INSERT INTO steps (id, run_id, kind, status, properties, created_at) \
8136                 VALUES ('s1', 'ghost-run', 'llm', 'completed', '{}', 100)",
8137                [],
8138            )
8139            .expect("insert step with ghost run_id");
8140        }
8141        let report = service.check_semantics().expect("semantics check");
8142        assert_eq!(report.broken_step_fk, 1);
8143    }
8144
8145    #[test]
8146    fn check_semantics_detects_broken_action_fk() {
8147        let (db, service) = setup();
8148        {
8149            let conn = sqlite::open_connection(db.path()).expect("conn");
8150            conn.execute_batch("PRAGMA foreign_keys = OFF;")
8151                .expect("disable FK");
8152            conn.execute(
8153                "INSERT INTO actions (id, step_id, kind, status, properties, created_at) \
8154                 VALUES ('a1', 'ghost-step', 'emit', 'completed', '{}', 100)",
8155                [],
8156            )
8157            .expect("insert action with ghost step_id");
8158        }
8159        let report = service.check_semantics().expect("semantics check");
8160        assert_eq!(report.broken_action_fk, 1);
8161    }
8162
8163    #[test]
8164    fn check_semantics_detects_stale_fts_rows() {
8165        let (db, service) = setup();
8166        {
8167            let conn = sqlite::open_connection(db.path()).expect("conn");
8168            // FTS virtual tables have no FK constraints; insert a row referencing
8169            // a chunk_id that does not exist in the chunks table.
8170            conn.execute(
8171                "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
8172                 VALUES ('ghost-chunk', 'any-node', 'Meeting', 'stale content')",
8173                [],
8174            )
8175            .expect("insert stale FTS row");
8176        }
8177        let report = service.check_semantics().expect("semantics check");
8178        assert_eq!(report.stale_fts_rows, 1);
8179    }
8180
8181    #[test]
8182    fn check_semantics_detects_fts_rows_for_superseded_nodes() {
8183        let (db, service) = setup();
8184        {
8185            let conn = sqlite::open_connection(db.path()).expect("conn");
8186            // Insert a node that has been fully superseded (superseded_at IS NOT NULL).
8187            conn.execute(
8188                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
8189                 VALUES ('r1', 'lg-sup', 'Meeting', '{}', 100, 200, 'src-1')",
8190                [],
8191            )
8192            .expect("insert superseded node");
8193            // Insert an FTS row for the superseded node's logical_id.
8194            conn.execute(
8195                "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
8196                 VALUES ('ck-x', 'lg-sup', 'Meeting', 'superseded content')",
8197                [],
8198            )
8199            .expect("insert FTS row for superseded node");
8200        }
8201        let report = service.check_semantics().expect("semantics check");
8202        assert_eq!(report.fts_rows_for_superseded_nodes, 1);
8203    }
8204
8205    #[test]
8206    fn check_semantics_detects_dangling_edges() {
8207        let (db, service) = setup();
8208        {
8209            let conn = sqlite::open_connection(db.path()).expect("conn");
8210            conn.execute_batch("PRAGMA foreign_keys = OFF;")
8211                .expect("disable FK");
8212            // One active node as source; target does not exist — edge is dangling.
8213            conn.execute(
8214                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8215                 VALUES ('r1', 'lg-src', 'Meeting', '{}', 100, 'src-1')",
8216                [],
8217            )
8218            .expect("insert source node");
8219            conn.execute(
8220                "INSERT INTO edges \
8221                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
8222                 VALUES ('e1', 'edge-1', 'lg-src', 'ghost-target', 'LINKS', '{}', 100, 'src-1')",
8223                [],
8224            )
8225            .expect("insert dangling edge");
8226        }
8227        let report = service.check_semantics().expect("semantics check");
8228        assert_eq!(report.dangling_edges, 1);
8229    }
8230
8231    #[test]
8232    fn check_semantics_detects_orphaned_supersession_chains() {
8233        let (db, service) = setup();
8234        {
8235            let conn = sqlite::open_connection(db.path()).expect("conn");
8236            // Every version of this logical_id is superseded — no active row remains.
8237            conn.execute(
8238                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
8239                 VALUES ('r1', 'lg-orphaned', 'Meeting', '{}', 100, 200, 'src-1')",
8240                [],
8241            )
8242            .expect("insert fully superseded node");
8243        }
8244        let report = service.check_semantics().expect("semantics check");
8245        assert_eq!(report.orphaned_supersession_chains, 1);
8246    }
8247
8248    #[test]
8249    fn check_semantics_detects_mismatched_kind_property_fts_rows() {
8250        let (db, service) = setup();
8251        {
8252            let conn = sqlite::open_connection(db.path()).expect("conn");
8253            // Insert an active node with kind "Goal".
8254            conn.execute(
8255                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8256                 VALUES ('r1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'src-1')",
8257                [],
8258            )
8259            .expect("insert node");
8260            // Insert a property FTS row with a DIFFERENT kind than the node.
8261            conn.execute(
8262                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8263                 VALUES ('goal-1', 'WrongKind', 'Ship v2')",
8264                [],
8265            )
8266            .expect("insert mismatched property FTS row");
8267        }
8268        let report = service.check_semantics().expect("semantics check");
8269        assert_eq!(report.mismatched_kind_property_fts_rows, 1);
8270    }
8271
8272    #[test]
8273    fn check_semantics_detects_duplicate_property_fts_rows() {
8274        let (db, service) = setup();
8275        {
8276            let conn = sqlite::open_connection(db.path()).expect("conn");
8277            conn.execute(
8278                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8279                 VALUES ('r1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'src-1')",
8280                [],
8281            )
8282            .expect("insert node");
8283            // Insert two property FTS rows for the same logical ID.
8284            conn.execute(
8285                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8286                 VALUES ('goal-1', 'Goal', 'Ship v2')",
8287                [],
8288            )
8289            .expect("insert first property FTS row");
8290            conn.execute(
8291                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8292                 VALUES ('goal-1', 'Goal', 'Ship v2 duplicate')",
8293                [],
8294            )
8295            .expect("insert duplicate property FTS row");
8296        }
8297        let report = service.check_semantics().expect("semantics check");
8298        assert_eq!(report.duplicate_property_fts_rows, 1);
8299    }
8300
8301    #[test]
8302    fn check_semantics_detects_drifted_property_fts_text() {
8303        let (db, service) = setup();
8304        {
8305            let conn = sqlite::open_connection(db.path()).expect("conn");
8306            conn.execute(
8307                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8308                 VALUES ('Goal', '[\"$.name\"]', ' ')",
8309                [],
8310            )
8311            .expect("register schema");
8312            conn.execute(
8313                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8314                 VALUES ('r1', 'goal-1', 'Goal', '{\"name\":\"Current name\"}', 100, 'src-1')",
8315                [],
8316            )
8317            .expect("insert node");
8318            // Insert a property FTS row with outdated text content.
8319            conn.execute(
8320                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8321                 VALUES ('goal-1', 'Goal', 'Old stale name')",
8322                [],
8323            )
8324            .expect("insert stale property FTS row");
8325        }
8326        let report = service.check_semantics().expect("semantics check");
8327        assert_eq!(report.drifted_property_fts_rows, 1);
8328    }
8329
8330    #[test]
8331    fn check_semantics_detects_property_fts_row_that_should_not_exist() {
8332        let (db, service) = setup();
8333        {
8334            let conn = sqlite::open_connection(db.path()).expect("conn");
8335            conn.execute(
8336                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8337                 VALUES ('Goal', '[\"$.searchable\"]', ' ')",
8338                [],
8339            )
8340            .expect("register schema");
8341            // Node does NOT have $.searchable — extraction yields no value.
8342            conn.execute(
8343                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8344                 VALUES ('r1', 'goal-1', 'Goal', '{\"other\":\"field\"}', 100, 'src-1')",
8345                [],
8346            )
8347            .expect("insert node");
8348            // But a property FTS row exists anyway.
8349            conn.execute(
8350                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8351                 VALUES ('goal-1', 'Goal', 'phantom text')",
8352                [],
8353            )
8354            .expect("insert phantom property FTS row");
8355        }
8356        let report = service.check_semantics().expect("semantics check");
8357        assert_eq!(
8358            report.drifted_property_fts_rows, 1,
8359            "row that should not exist must be counted as drifted"
8360        );
8361    }
8362
8363    #[test]
8364    fn safe_export_writes_manifest_with_sha256() {
8365        let (_db, service) = setup();
8366        let export_dir = tempfile::TempDir::new().expect("temp dir");
8367        let export_path = export_dir.path().join("backup.db");
8368
8369        let manifest = service
8370            .safe_export(
8371                &export_path,
8372                SafeExportOptions {
8373                    force_checkpoint: false,
8374                },
8375            )
8376            .expect("export");
8377
8378        assert!(export_path.exists(), "exported db should exist");
8379        let manifest_path = export_dir.path().join("backup.db.export-manifest.json");
8380        assert!(
8381            manifest_path.exists(),
8382            "manifest file should exist at {}",
8383            manifest_path.display()
8384        );
8385        assert_eq!(manifest.sha256.len(), 64, "sha256 should be 64 hex chars");
8386        assert!(
8387            manifest.exported_at > 0,
8388            "exported_at should be a unix timestamp"
8389        );
8390        assert_eq!(
8391            manifest.schema_version,
8392            SchemaManager::new().current_version().0,
8393            "schema_version should match the live schema version"
8394        );
8395        assert_eq!(manifest.protocol_version, 1, "protocol_version should be 1");
8396        assert!(manifest.page_count > 0, "page_count should be positive");
8397    }
8398
8399    #[test]
8400    fn safe_export_preserves_operational_validation_contracts() {
8401        let (_db, service) = setup();
8402        let validation_json = r#"{"format_version":1,"mode":"enforce","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#;
8403        service
8404            .register_operational_collection(&OperationalRegisterRequest {
8405                name: "connector_health".to_owned(),
8406                kind: OperationalCollectionKind::LatestState,
8407                schema_json: "{}".to_owned(),
8408                retention_json: "{}".to_owned(),
8409                filter_fields_json: "[]".to_owned(),
8410                validation_json: validation_json.to_owned(),
8411                secondary_indexes_json: "[]".to_owned(),
8412                format_version: 1,
8413            })
8414            .expect("register collection");
8415
8416        let export_dir = tempfile::TempDir::new().expect("temp dir");
8417        let export_path = export_dir.path().join("backup.db");
8418        service
8419            .safe_export(
8420                &export_path,
8421                SafeExportOptions {
8422                    force_checkpoint: false,
8423                },
8424            )
8425            .expect("export");
8426
8427        let exported = sqlite::open_connection(&export_path).expect("exported conn");
8428        let exported_validation_json: String = exported
8429            .query_row(
8430                "SELECT validation_json FROM operational_collections WHERE name = 'connector_health'",
8431                [],
8432                |row| row.get(0),
8433            )
8434            .expect("validation_json");
8435        assert_eq!(exported_validation_json, validation_json);
8436    }
8437
8438    #[test]
8439    fn safe_export_force_checkpoint_false_skips_wal_pragma() {
8440        let (_db, service) = setup();
8441        let export_dir = tempfile::TempDir::new().expect("temp dir");
8442        let export_path = export_dir.path().join("no-wal.db");
8443
8444        // force_checkpoint: false must not error even on a non-WAL database
8445        let manifest = service
8446            .safe_export(
8447                &export_path,
8448                SafeExportOptions {
8449                    force_checkpoint: false,
8450                },
8451            )
8452            .expect("export with no checkpoint");
8453
8454        assert!(
8455            manifest.page_count > 0,
8456            "page_count must be populated regardless of checkpoint mode"
8457        );
8458        assert_eq!(
8459            manifest.schema_version,
8460            SchemaManager::new().current_version().0
8461        );
8462        assert_eq!(manifest.protocol_version, 1);
8463    }
8464
8465    #[test]
8466    fn safe_export_force_checkpoint_false_still_captures_wal_backed_changes() {
8467        let (db, service) = setup();
8468        let conn = sqlite::open_connection(db.path()).expect("conn");
8469        let journal_mode: String = conn
8470            .query_row("PRAGMA journal_mode=WAL", [], |row| row.get(0))
8471            .expect("enable wal");
8472        assert_eq!(journal_mode.to_lowercase(), "wal");
8473        let auto_checkpoint_pages: i64 = conn
8474            .query_row("PRAGMA wal_autocheckpoint=0", [], |row| row.get(0))
8475            .expect("disable auto checkpoint");
8476        assert_eq!(auto_checkpoint_pages, 0);
8477        conn.execute(
8478            "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8479             VALUES ('r-wal', 'lg-wal', 'Meeting', '{}', 100, 'src-wal')",
8480            [],
8481        )
8482        .expect("insert wal-backed node");
8483
8484        let export_dir = tempfile::TempDir::new().expect("temp dir");
8485        let export_path = export_dir.path().join("wal-backed.db");
8486        service
8487            .safe_export(
8488                &export_path,
8489                SafeExportOptions {
8490                    force_checkpoint: false,
8491                },
8492            )
8493            .expect("export wal-backed db");
8494
8495        let exported = sqlite::open_connection(&export_path).expect("open exported db");
8496        let exported_count: i64 = exported
8497            .query_row(
8498                "SELECT count(*) FROM nodes WHERE logical_id = 'lg-wal'",
8499                [],
8500                |row| row.get(0),
8501            )
8502            .expect("count exported nodes");
8503        assert_eq!(
8504            exported_count, 1,
8505            "safe_export must include committed rows that are still resident in the WAL"
8506        );
8507    }
8508
8509    #[test]
8510    fn excise_source_removes_searchable_content_after_excision() {
8511        let (db, service) = setup();
8512        {
8513            let conn = sqlite::open_connection(db.path()).expect("conn");
8514            conn.execute(
8515                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
8516                 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
8517                [],
8518            )
8519            .expect("insert v1");
8520            conn.execute(
8521                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8522                 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
8523                [],
8524            )
8525            .expect("insert v2");
8526            conn.execute(
8527                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8528                 VALUES ('ck1', 'lg1', 'hello world', 100)",
8529                [],
8530            )
8531            .expect("insert chunk");
8532        }
8533        service.excise_source("source-2").expect("excise");
8534        {
8535            let conn = sqlite::open_connection(db.path()).expect("conn");
8536            let fts_count: i64 = conn
8537                .query_row(
8538                    "SELECT count(*) FROM fts_nodes WHERE chunk_id = 'ck1'",
8539                    [],
8540                    |row| row.get(0),
8541                )
8542                .expect("fts count");
8543            assert_eq!(
8544                fts_count, 0,
8545                "excised content should not remain searchable after excise"
8546            );
8547        }
8548    }
8549
8550    #[cfg(feature = "sqlite-vec")]
8551    #[test]
8552    fn excise_source_cleans_chunks_and_vec_rows_for_excised_version() {
8553        let (db, service) = setup();
8554        {
8555            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8556            service
8557                .schema_manager
8558                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
8559                .expect("ensure vec profile");
8560            conn.execute(
8561                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
8562                 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
8563                [],
8564            )
8565            .expect("insert v1");
8566            conn.execute(
8567                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8568                 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
8569                [],
8570            )
8571            .expect("insert v2");
8572            conn.execute(
8573                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8574                 VALUES ('ck1', 'lg1', 'new content', 200)",
8575                [],
8576            )
8577            .expect("insert chunk");
8578            conn.execute(
8579                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('ck1', zeroblob(16))",
8580                [],
8581            )
8582            .expect("insert vec row");
8583        }
8584
8585        service.excise_source("source-2").expect("excise");
8586
8587        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8588        let active_row: String = conn
8589            .query_row(
8590                "SELECT row_id FROM nodes WHERE logical_id = 'lg1' AND superseded_at IS NULL",
8591                [],
8592                |row| row.get(0),
8593            )
8594            .expect("restored active row");
8595        assert_eq!(active_row, "r1");
8596        let chunk_count: i64 = conn
8597            .query_row(
8598                "SELECT count(*) FROM chunks WHERE node_logical_id = 'lg1'",
8599                [],
8600                |row| row.get(0),
8601            )
8602            .expect("chunk count");
8603        assert_eq!(
8604            chunk_count, 0,
8605            "excised source content must not survive as chunks"
8606        );
8607        let vec_count: i64 = conn
8608            .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
8609                row.get(0)
8610            })
8611            .expect("vec count");
8612        assert_eq!(vec_count, 0, "excised source vec rows must be removed");
8613        let fts_count: i64 = conn
8614            .query_row(
8615                "SELECT count(*) FROM fts_nodes WHERE node_logical_id = 'lg1'",
8616                [],
8617                |row| row.get(0),
8618            )
8619            .expect("fts count");
8620        assert_eq!(
8621            fts_count, 0,
8622            "excised source content must not remain searchable"
8623        );
8624    }
8625
8626    #[test]
8627    fn export_page_count_matches_exported_file() {
8628        let (_db, service) = setup();
8629        let export_dir = tempfile::TempDir::new().expect("temp dir");
8630        let export_path = export_dir.path().join("page-count.db");
8631
8632        let manifest = service
8633            .safe_export(
8634                &export_path,
8635                SafeExportOptions {
8636                    force_checkpoint: false,
8637                },
8638            )
8639            .expect("export");
8640
8641        let exported = sqlite::open_connection(&export_path).expect("open exported db");
8642        let actual_page_count: u64 = exported
8643            .query_row("PRAGMA page_count", [], |row| row.get(0))
8644            .expect("page_count from exported file");
8645
8646        assert_eq!(
8647            manifest.page_count, actual_page_count,
8648            "manifest page_count must match the exported file's PRAGMA page_count"
8649        );
8650    }
8651
8652    #[test]
8653    fn no_temp_file_after_successful_export() {
8654        let (_db, service) = setup();
8655        let export_dir = tempfile::TempDir::new().expect("temp dir");
8656        let export_path = export_dir.path().join("no-tmp.db");
8657
8658        service
8659            .safe_export(
8660                &export_path,
8661                SafeExportOptions {
8662                    force_checkpoint: false,
8663                },
8664            )
8665            .expect("export");
8666
8667        let tmp_files: Vec<_> = fs::read_dir(export_dir.path())
8668            .expect("read export dir")
8669            .filter_map(Result::ok)
8670            .filter(|e| e.path().extension().is_some_and(|ext| ext == "tmp"))
8671            .collect();
8672
8673        assert!(
8674            tmp_files.is_empty(),
8675            "no .tmp files should remain after a successful export, found: {tmp_files:?}"
8676        );
8677    }
8678
8679    #[test]
8680    fn export_manifest_is_valid_json() {
8681        let (_db, service) = setup();
8682        let export_dir = tempfile::TempDir::new().expect("temp dir");
8683        let export_path = export_dir.path().join("valid-json.db");
8684
8685        service
8686            .safe_export(
8687                &export_path,
8688                SafeExportOptions {
8689                    force_checkpoint: false,
8690                },
8691            )
8692            .expect("export");
8693
8694        let manifest_path = export_dir.path().join("valid-json.db.export-manifest.json");
8695        let manifest_contents = fs::read_to_string(&manifest_path).expect("read manifest");
8696        let parsed: serde_json::Value =
8697            serde_json::from_str(&manifest_contents).expect("manifest must be valid JSON");
8698
8699        assert!(
8700            parsed.get("exported_at").is_some(),
8701            "manifest must contain exported_at"
8702        );
8703        assert!(
8704            parsed.get("sha256").is_some(),
8705            "manifest must contain sha256"
8706        );
8707        assert!(
8708            parsed.get("schema_version").is_some(),
8709            "manifest must contain schema_version"
8710        );
8711        assert!(
8712            parsed.get("protocol_version").is_some(),
8713            "manifest must contain protocol_version"
8714        );
8715        assert!(
8716            parsed.get("page_count").is_some(),
8717            "manifest must contain page_count"
8718        );
8719    }
8720
8721    #[test]
8722    fn provenance_purge_dry_run_reports_counts() {
8723        let (db, service) = setup();
8724        {
8725            let conn = sqlite::open_connection(db.path()).expect("conn");
8726            conn.execute(
8727                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8728                 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
8729                [],
8730            )
8731            .expect("insert p1");
8732            conn.execute(
8733                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8734                 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 200)",
8735                [],
8736            )
8737            .expect("insert p2");
8738            conn.execute(
8739                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8740                 VALUES ('p3', 'excise', 'lg3', 'src-1', 300)",
8741                [],
8742            )
8743            .expect("insert p3");
8744        }
8745
8746        let options = super::ProvenancePurgeOptions {
8747            dry_run: true,
8748            preserve_event_types: Vec::new(),
8749        };
8750        let report = service
8751            .purge_provenance_events(250, &options)
8752            .expect("dry run purge");
8753
8754        assert_eq!(report.events_deleted, 2);
8755        assert_eq!(report.events_preserved, 1);
8756        assert!(report.oldest_remaining.is_some());
8757
8758        let conn = sqlite::open_connection(db.path()).expect("conn");
8759        let total: i64 = conn
8760            .query_row("SELECT count(*) FROM provenance_events", [], |row| {
8761                row.get(0)
8762            })
8763            .expect("count");
8764        assert_eq!(total, 3, "dry_run must not delete any events");
8765    }
8766
8767    #[test]
8768    fn provenance_purge_deletes_old_events() {
8769        let (db, service) = setup();
8770        {
8771            let conn = sqlite::open_connection(db.path()).expect("conn");
8772            conn.execute(
8773                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8774                 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
8775                [],
8776            )
8777            .expect("insert p1");
8778            conn.execute(
8779                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8780                 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 200)",
8781                [],
8782            )
8783            .expect("insert p2");
8784        }
8785
8786        let options = super::ProvenancePurgeOptions {
8787            dry_run: false,
8788            preserve_event_types: Vec::new(),
8789        };
8790        let report = service
8791            .purge_provenance_events(150, &options)
8792            .expect("purge");
8793
8794        assert_eq!(report.events_deleted, 1);
8795        assert_eq!(report.events_preserved, 1);
8796        assert_eq!(report.oldest_remaining, Some(200));
8797
8798        let conn = sqlite::open_connection(db.path()).expect("conn");
8799        let remaining: i64 = conn
8800            .query_row("SELECT count(*) FROM provenance_events", [], |row| {
8801                row.get(0)
8802            })
8803            .expect("count");
8804        assert_eq!(remaining, 1);
8805    }
8806
8807    #[test]
8808    fn provenance_purge_preserves_specified_types() {
8809        let (db, service) = setup();
8810        {
8811            let conn = sqlite::open_connection(db.path()).expect("conn");
8812            conn.execute(
8813                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8814                 VALUES ('p1', 'excise', 'lg1', 'src-1', 100)",
8815                [],
8816            )
8817            .expect("insert p1");
8818            conn.execute(
8819                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8820                 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 100)",
8821                [],
8822            )
8823            .expect("insert p2");
8824            conn.execute(
8825                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8826                 VALUES ('p3', 'node_insert', 'lg3', 'src-1', 100)",
8827                [],
8828            )
8829            .expect("insert p3");
8830        }
8831
8832        let options = super::ProvenancePurgeOptions {
8833            dry_run: false,
8834            preserve_event_types: Vec::new(),
8835        };
8836        let report = service
8837            .purge_provenance_events(500, &options)
8838            .expect("purge");
8839
8840        assert_eq!(report.events_deleted, 2);
8841        assert_eq!(report.events_preserved, 1);
8842
8843        let conn = sqlite::open_connection(db.path()).expect("conn");
8844        let remaining_type: String = conn
8845            .query_row("SELECT event_type FROM provenance_events", [], |row| {
8846                row.get(0)
8847            })
8848            .expect("remaining event type");
8849        assert_eq!(remaining_type, "excise");
8850    }
8851
8852    #[test]
8853    fn provenance_purge_noop_with_zero_timestamp() {
8854        let (db, service) = setup();
8855        {
8856            let conn = sqlite::open_connection(db.path()).expect("conn");
8857            conn.execute(
8858                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8859                 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
8860                [],
8861            )
8862            .expect("insert p1");
8863        }
8864
8865        let options = super::ProvenancePurgeOptions {
8866            dry_run: false,
8867            preserve_event_types: Vec::new(),
8868        };
8869        let report = service.purge_provenance_events(0, &options).expect("purge");
8870
8871        assert_eq!(report.events_deleted, 0);
8872        assert_eq!(report.events_preserved, 1);
8873        assert_eq!(report.oldest_remaining, Some(100));
8874    }
8875
8876    #[test]
8877    fn restore_skips_edge_when_counterpart_purged() {
8878        let (db, service) = setup();
8879        {
8880            let conn = sqlite::open_connection(db.path()).expect("conn");
8881            // Create node A (doc-1) and node B (doc-2)
8882            conn.execute(
8883                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8884                 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
8885                [],
8886            )
8887            .expect("insert node A");
8888            conn.execute(
8889                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8890                 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
8891                [],
8892            )
8893            .expect("insert node B");
8894            // Create edge between A and B
8895            conn.execute(
8896                "INSERT INTO edges \
8897                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
8898                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
8899                [],
8900            )
8901            .expect("insert edge");
8902            // Retire both A and B, and the edge
8903            conn.execute(
8904                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8905                 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
8906                [],
8907            )
8908            .expect("insert retire event A");
8909            conn.execute(
8910                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8911                 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
8912                [],
8913            )
8914            .expect("insert edge retire event");
8915            conn.execute(
8916                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
8917                [],
8918            )
8919            .expect("retire node A");
8920            conn.execute(
8921                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-2'",
8922                [],
8923            )
8924            .expect("retire node B");
8925            conn.execute(
8926                "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
8927                [],
8928            )
8929            .expect("retire edge");
8930            // Simulate purge of B: delete node rows but leave the edge intact
8931            // to reproduce the dangling-edge scenario the validation guards against.
8932            conn.execute("DELETE FROM nodes WHERE logical_id = 'doc-2'", [])
8933                .expect("purge node B rows");
8934        }
8935
8936        // Restore A — the edge should be skipped because B has no active node
8937        let report = service.restore_logical_id("doc-1").expect("restore A");
8938        assert!(!report.was_noop);
8939        assert_eq!(report.restored_node_rows, 1);
8940        assert_eq!(report.restored_edge_rows, 0, "edge should not be restored");
8941        assert_eq!(report.skipped_edges.len(), 1);
8942        assert_eq!(report.skipped_edges[0].edge_logical_id, "edge-1");
8943        assert_eq!(report.skipped_edges[0].missing_endpoint, "doc-2");
8944
8945        // Verify the edge is still retired in the database
8946        let conn = sqlite::open_connection(db.path()).expect("conn");
8947        let active_edge_count: i64 = conn
8948            .query_row(
8949                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
8950                [],
8951                |row| row.get(0),
8952            )
8953            .expect("active edge count");
8954        assert_eq!(active_edge_count, 0, "edge must remain retired");
8955    }
8956
8957    #[test]
8958    fn restore_restores_edges_to_active_nodes() {
8959        let (db, service) = setup();
8960        {
8961            let conn = sqlite::open_connection(db.path()).expect("conn");
8962            // Create node A and node B (B stays active)
8963            conn.execute(
8964                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8965                 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
8966                [],
8967            )
8968            .expect("insert node A");
8969            conn.execute(
8970                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8971                 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
8972                [],
8973            )
8974            .expect("insert node B");
8975            // Create edge between A and B
8976            conn.execute(
8977                "INSERT INTO edges \
8978                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
8979                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
8980                [],
8981            )
8982            .expect("insert edge");
8983            // Retire only A
8984            conn.execute(
8985                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8986                 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
8987                [],
8988            )
8989            .expect("insert retire event A");
8990            conn.execute(
8991                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8992                 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
8993                [],
8994            )
8995            .expect("insert edge retire event");
8996            conn.execute(
8997                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
8998                [],
8999            )
9000            .expect("retire node A");
9001            conn.execute(
9002                "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
9003                [],
9004            )
9005            .expect("retire edge");
9006        }
9007
9008        // Restore A — B is active, so the edge should be restored normally
9009        let report = service.restore_logical_id("doc-1").expect("restore A");
9010        assert!(!report.was_noop);
9011        assert_eq!(report.restored_node_rows, 1);
9012        assert!(report.restored_edge_rows > 0, "edge should be restored");
9013        assert!(
9014            report.skipped_edges.is_empty(),
9015            "no edges should be skipped"
9016        );
9017
9018        let conn = sqlite::open_connection(db.path()).expect("conn");
9019        let active_edge_count: i64 = conn
9020            .query_row(
9021                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
9022                [],
9023                |row| row.get(0),
9024            )
9025            .expect("active edge count");
9026        assert_eq!(active_edge_count, 1, "edge must be active");
9027    }
9028
9029    #[test]
9030    fn restore_restores_edges_when_both_restored() {
9031        let (db, service) = setup();
9032        {
9033            let conn = sqlite::open_connection(db.path()).expect("conn");
9034            // Create node A and node B
9035            conn.execute(
9036                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9037                 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
9038                [],
9039            )
9040            .expect("insert node A");
9041            conn.execute(
9042                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9043                 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
9044                [],
9045            )
9046            .expect("insert node B");
9047            // Create edge between A and B
9048            conn.execute(
9049                "INSERT INTO edges \
9050                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
9051                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
9052                [],
9053            )
9054            .expect("insert edge");
9055            // Retire both A and B
9056            conn.execute(
9057                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9058                 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
9059                [],
9060            )
9061            .expect("insert retire event A");
9062            conn.execute(
9063                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9064                 VALUES ('evt-retire-b', 'node_retire', 'doc-2', 'forget-1', 200, '')",
9065                [],
9066            )
9067            .expect("insert retire event B");
9068            conn.execute(
9069                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9070                 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
9071                [],
9072            )
9073            .expect("insert edge retire event");
9074            conn.execute(
9075                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
9076                [],
9077            )
9078            .expect("retire node A");
9079            conn.execute(
9080                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-2'",
9081                [],
9082            )
9083            .expect("retire node B");
9084            conn.execute(
9085                "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
9086                [],
9087            )
9088            .expect("retire edge");
9089        }
9090
9091        // Restore B first — edge is skipped because A is still retired
9092        let report_b = service.restore_logical_id("doc-2").expect("restore B");
9093        assert!(!report_b.was_noop);
9094
9095        // Restore A — B is now active, so the edge should be restored
9096        let report_a = service.restore_logical_id("doc-1").expect("restore A");
9097        assert!(!report_a.was_noop);
9098        assert_eq!(report_a.restored_node_rows, 1);
9099        assert!(
9100            report_a.restored_edge_rows > 0,
9101            "edge should be restored when both endpoints active"
9102        );
9103        assert!(
9104            report_a.skipped_edges.is_empty(),
9105            "no edges should be skipped"
9106        );
9107
9108        let conn = sqlite::open_connection(db.path()).expect("conn");
9109        let active_edge_count: i64 = conn
9110            .query_row(
9111                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
9112                [],
9113                |row| row.get(0),
9114            )
9115            .expect("active edge count");
9116        assert_eq!(
9117            active_edge_count, 1,
9118            "edge must be active after both endpoints restored"
9119        );
9120    }
9121
9122    // ── FTS property schema end-to-end tests ──────────────────────────
9123
9124    #[test]
9125    fn fts_property_schema_crud_round_trip() {
9126        let (_db, service) = setup();
9127
9128        // Register
9129        let record = service
9130            .register_fts_property_schema(
9131                "Meeting",
9132                &["$.title".to_owned(), "$.summary".to_owned()],
9133                None,
9134            )
9135            .expect("register");
9136        assert_eq!(record.kind, "Meeting");
9137        assert_eq!(record.property_paths, vec!["$.title", "$.summary"]);
9138        assert_eq!(record.separator, " ");
9139        assert_eq!(record.format_version, 1);
9140
9141        // Describe
9142        let described = service
9143            .describe_fts_property_schema("Meeting")
9144            .expect("describe")
9145            .expect("should exist");
9146        assert_eq!(described, record);
9147
9148        // Describe missing kind
9149        let missing = service
9150            .describe_fts_property_schema("NoSuchKind")
9151            .expect("describe missing");
9152        assert!(missing.is_none());
9153
9154        // List
9155        let list = service.list_fts_property_schemas().expect("list");
9156        assert_eq!(list.len(), 1);
9157        assert_eq!(list[0].kind, "Meeting");
9158
9159        // Update (idempotent upsert)
9160        let updated = service
9161            .register_fts_property_schema(
9162                "Meeting",
9163                &["$.title".to_owned(), "$.notes".to_owned()],
9164                Some("\n"),
9165            )
9166            .expect("update");
9167        assert_eq!(updated.property_paths, vec!["$.title", "$.notes"]);
9168        assert_eq!(updated.separator, "\n");
9169
9170        // Remove
9171        service
9172            .remove_fts_property_schema("Meeting")
9173            .expect("remove");
9174        let after_remove = service
9175            .describe_fts_property_schema("Meeting")
9176            .expect("describe after remove");
9177        assert!(after_remove.is_none());
9178
9179        // Remove non-existent is an error
9180        let err = service.remove_fts_property_schema("Meeting");
9181        assert!(err.is_err());
9182    }
9183
9184    #[test]
9185    fn restore_reestablishes_property_fts_visibility() {
9186        let (db, service) = setup();
9187        {
9188            let conn = sqlite::open_connection(db.path()).expect("conn");
9189            // Register a property schema for Document kind.
9190            conn.execute(
9191                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9192                 VALUES ('Document', '[\"$.title\", \"$.body\"]', ' ')",
9193                [],
9194            )
9195            .expect("register schema");
9196            // Insert an active node with extractable properties.
9197            conn.execute(
9198                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9199                 VALUES ('row-1', 'doc-1', 'Document', '{\"title\":\"Budget\",\"body\":\"Q3 forecast\"}', 100, 'seed')",
9200                [],
9201            )
9202            .expect("insert node");
9203            // Insert a chunk so restore has something to work with for FTS.
9204            conn.execute(
9205                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
9206                 VALUES ('chunk-1', 'doc-1', 'budget text', 100)",
9207                [],
9208            )
9209            .expect("insert chunk");
9210            // Insert property FTS row (as write path would).
9211            conn.execute(
9212                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
9213                 VALUES ('doc-1', 'Document', 'Budget Q3 forecast')",
9214                [],
9215            )
9216            .expect("insert property fts");
9217            // Simulate retire: supersede node, clear FTS.
9218            conn.execute(
9219                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9220                 VALUES ('evt-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
9221                [],
9222            )
9223            .expect("retire event");
9224            conn.execute(
9225                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
9226                [],
9227            )
9228            .expect("supersede");
9229            conn.execute("DELETE FROM fts_nodes", [])
9230                .expect("clear chunk fts");
9231            conn.execute("DELETE FROM fts_node_properties", [])
9232                .expect("clear property fts");
9233        }
9234
9235        let report = service.restore_logical_id("doc-1").expect("restore");
9236        assert_eq!(report.restored_property_fts_rows, 1);
9237
9238        // Verify the property FTS row was recreated.
9239        let conn = sqlite::open_connection(db.path()).expect("conn");
9240        let prop_fts_count: i64 = conn
9241            .query_row(
9242                "SELECT count(*) FROM fts_node_properties WHERE node_logical_id = 'doc-1'",
9243                [],
9244                |row| row.get(0),
9245            )
9246            .expect("prop fts count");
9247        assert_eq!(prop_fts_count, 1, "property FTS must be restored");
9248
9249        let text: String = conn
9250            .query_row(
9251                "SELECT text_content FROM fts_node_properties WHERE node_logical_id = 'doc-1'",
9252                [],
9253                |row| row.get(0),
9254            )
9255            .expect("prop fts text");
9256        assert_eq!(text, "Budget Q3 forecast");
9257    }
9258
9259    #[test]
9260    fn safe_export_preserves_fts_property_schemas() {
9261        let (_db, service) = setup();
9262        service
9263            .register_fts_property_schema(
9264                "Goal",
9265                &["$.name".to_owned(), "$.rationale".to_owned()],
9266                None,
9267            )
9268            .expect("register schema");
9269
9270        let export_dir = tempfile::TempDir::new().expect("temp dir");
9271        let export_path = export_dir.path().join("backup.db");
9272        service
9273            .safe_export(
9274                &export_path,
9275                SafeExportOptions {
9276                    force_checkpoint: false,
9277                },
9278            )
9279            .expect("export");
9280
9281        // Open the exported DB and verify the schema survived.
9282        let exported_conn = rusqlite::Connection::open(&export_path).expect("open exported db");
9283        let kind: String = exported_conn
9284            .query_row(
9285                "SELECT kind FROM fts_property_schemas WHERE kind = 'Goal'",
9286                [],
9287                |row| row.get(0),
9288            )
9289            .expect("schema must exist in export");
9290        assert_eq!(kind, "Goal");
9291        let paths_json: String = exported_conn
9292            .query_row(
9293                "SELECT property_paths_json FROM fts_property_schemas WHERE kind = 'Goal'",
9294                [],
9295                |row| row.get(0),
9296            )
9297            .expect("paths must exist");
9298        let paths: Vec<String> = serde_json::from_str(&paths_json).expect("valid json");
9299        assert_eq!(paths, vec!["$.name", "$.rationale"]);
9300    }
9301
9302    #[test]
9303    #[allow(clippy::too_many_lines)]
9304    fn export_recovery_rebuilds_property_fts_from_canonical_state() {
9305        let (db, service) = setup();
9306        // Register a schema and insert two nodes with extractable properties.
9307        service
9308            .register_fts_property_schema("Goal", &["$.name".to_owned()], None)
9309            .expect("register");
9310        {
9311            let conn = sqlite::open_connection(db.path()).expect("conn");
9312            conn.execute(
9313                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9314                 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9315                [],
9316            )
9317            .expect("insert node 1");
9318            conn.execute(
9319                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
9320                 VALUES ('goal-1', 'Goal', 'Ship v2')",
9321                [],
9322            )
9323            .expect("insert property FTS row 1");
9324            conn.execute(
9325                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9326                 VALUES ('row-2', 'goal-2', 'Goal', '{\"name\":\"Launch redesign\"}', 100, 'seed')",
9327                [],
9328            )
9329            .expect("insert node 2");
9330            conn.execute(
9331                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
9332                 VALUES ('goal-2', 'Goal', 'Launch redesign')",
9333                [],
9334            )
9335            .expect("insert property FTS row 2");
9336        }
9337
9338        // Export.
9339        let export_dir = tempfile::TempDir::new().expect("temp dir");
9340        let export_path = export_dir.path().join("backup.db");
9341        service
9342            .safe_export(
9343                &export_path,
9344                SafeExportOptions {
9345                    force_checkpoint: false,
9346                },
9347            )
9348            .expect("export");
9349
9350        // Corrupt the derived rows: replace correct text with wrong text for
9351        // goal-1, and delete the row for goal-2 entirely. This exercises both
9352        // corrupted-but-present rows and missing rows in the same recovery.
9353        {
9354            let conn = rusqlite::Connection::open(&export_path).expect("open export");
9355            conn.execute(
9356                "DELETE FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
9357                [],
9358            )
9359            .expect("delete old row");
9360            conn.execute(
9361                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
9362                 VALUES ('goal-1', 'Goal', 'completely wrong stale text')",
9363                [],
9364            )
9365            .expect("insert corrupted row");
9366            conn.execute(
9367                "DELETE FROM fts_node_properties WHERE node_logical_id = 'goal-2'",
9368                [],
9369            )
9370            .expect("delete goal-2 row");
9371        }
9372
9373        // Open the exported DB and rebuild projections from canonical state.
9374        let schema = Arc::new(SchemaManager::new());
9375        let exported_service = AdminService::new(&export_path, Arc::clone(&schema));
9376        exported_service
9377            .rebuild_projections(ProjectionTarget::Fts)
9378            .expect("rebuild");
9379
9380        // Verify text_search(...) returns the correct result for goal-1's
9381        // canonical property ("Ship") — not the corrupted text.
9382        let coordinator = ExecutionCoordinator::open(
9383            &export_path,
9384            Arc::clone(&schema),
9385            None,
9386            1,
9387            Arc::new(TelemetryCounters::default()),
9388        )
9389        .expect("coordinator");
9390
9391        let compiled = QueryBuilder::nodes("Goal")
9392            .text_search("Ship", 10)
9393            .limit(10)
9394            .compile()
9395            .expect("compile");
9396        let rows = coordinator
9397            .execute_compiled_read(&compiled)
9398            .expect("execute read");
9399        assert_eq!(rows.nodes.len(), 1);
9400        assert_eq!(rows.nodes[0].logical_id, "goal-1");
9401
9402        // Verify text_search(...) recovers the previously missing goal-2 row.
9403        let compiled2 = QueryBuilder::nodes("Goal")
9404            .text_search("redesign", 10)
9405            .limit(10)
9406            .compile()
9407            .expect("compile");
9408        let rows2 = coordinator
9409            .execute_compiled_read(&compiled2)
9410            .expect("execute read");
9411        assert_eq!(rows2.nodes.len(), 1);
9412        assert_eq!(rows2.nodes[0].logical_id, "goal-2");
9413
9414        // The corrupted text must not be searchable after recovery.
9415        let compiled3 = QueryBuilder::nodes("Goal")
9416            .text_search("stale", 10)
9417            .limit(10)
9418            .compile()
9419            .expect("compile");
9420        let rows3 = coordinator
9421            .execute_compiled_read(&compiled3)
9422            .expect("execute read");
9423        assert_eq!(
9424            rows3.nodes.len(),
9425            0,
9426            "corrupted text must not appear in search after rebuild"
9427        );
9428
9429        // Verify integrity and semantics are clean after recovery.
9430        let integrity = exported_service.check_integrity().expect("integrity");
9431        assert_eq!(integrity.missing_property_fts_rows, 0);
9432        let semantics = exported_service.check_semantics().expect("semantics");
9433        assert_eq!(semantics.drifted_property_fts_rows, 0);
9434        assert_eq!(semantics.orphaned_property_fts_rows, 0);
9435        assert_eq!(semantics.duplicate_property_fts_rows, 0);
9436    }
9437
9438    #[test]
9439    fn check_integrity_no_false_positives_for_empty_extraction() {
9440        let (db, service) = setup();
9441        {
9442            let conn = sqlite::open_connection(db.path()).expect("conn");
9443            // Register a schema that looks for $.searchable
9444            conn.execute(
9445                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9446                 VALUES ('Ticket', '[\"$.searchable\"]', ' ')",
9447                [],
9448            )
9449            .expect("register schema");
9450            // Insert a node whose properties do NOT contain $.searchable —
9451            // correctly has no property FTS row.
9452            conn.execute(
9453                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9454                 VALUES ('row-1', 'ticket-1', 'Ticket', '{\"status\":\"open\"}', 100, 'seed')",
9455                [],
9456            )
9457            .expect("insert node");
9458        }
9459
9460        let report = service.check_integrity().expect("integrity");
9461        assert_eq!(
9462            report.missing_property_fts_rows, 0,
9463            "node with no extractable values must not be counted as missing"
9464        );
9465    }
9466
9467    #[test]
9468    fn check_integrity_detects_genuinely_missing_property_fts_rows() {
9469        let (db, service) = setup();
9470        {
9471            let conn = sqlite::open_connection(db.path()).expect("conn");
9472            conn.execute(
9473                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9474                 VALUES ('Ticket', '[\"$.title\"]', ' ')",
9475                [],
9476            )
9477            .expect("register schema");
9478            // Insert a node WITH an extractable $.title but no property FTS row.
9479            conn.execute(
9480                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9481                 VALUES ('row-1', 'ticket-1', 'Ticket', '{\"title\":\"fix login bug\"}', 100, 'seed')",
9482                [],
9483            )
9484            .expect("insert node");
9485        }
9486
9487        let report = service.check_integrity().expect("integrity");
9488        assert_eq!(
9489            report.missing_property_fts_rows, 1,
9490            "node with extractable values but no property FTS row must be detected"
9491        );
9492    }
9493
9494    #[test]
9495    fn rebuild_projections_fts_restores_missing_property_fts_rows() {
9496        let (db, service) = setup();
9497        {
9498            let conn = sqlite::open_connection(db.path()).expect("conn");
9499            conn.execute(
9500                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9501                 VALUES ('Goal', '[\"$.name\"]', ' ')",
9502                [],
9503            )
9504            .expect("register schema");
9505            conn.execute(
9506                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9507                 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9508                [],
9509            )
9510            .expect("insert node");
9511            // Deliberately do NOT insert a property FTS row.
9512        }
9513
9514        let report = service
9515            .rebuild_projections(ProjectionTarget::Fts)
9516            .expect("rebuild");
9517        assert!(
9518            report.rebuilt_rows >= 1,
9519            "rebuild must insert at least one property FTS row"
9520        );
9521
9522        let conn = sqlite::open_connection(db.path()).expect("conn");
9523        let text: String = conn
9524            .query_row(
9525                "SELECT text_content FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
9526                [],
9527                |row| row.get(0),
9528            )
9529            .expect("property FTS row must exist after rebuild");
9530        assert_eq!(text, "Ship v2");
9531    }
9532
9533    #[test]
9534    fn rebuild_missing_projections_fills_gap_for_deleted_property_fts_row() {
9535        let (db, service) = setup();
9536        {
9537            let conn = sqlite::open_connection(db.path()).expect("conn");
9538            conn.execute(
9539                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9540                 VALUES ('Goal', '[\"$.name\"]', ' ')",
9541                [],
9542            )
9543            .expect("register schema");
9544            conn.execute(
9545                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9546                 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9547                [],
9548            )
9549            .expect("insert node");
9550            // Insert and then delete the property FTS row to simulate corruption.
9551            conn.execute(
9552                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
9553                 VALUES ('goal-1', 'Goal', 'Ship v2')",
9554                [],
9555            )
9556            .expect("insert property fts");
9557            conn.execute(
9558                "DELETE FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
9559                [],
9560            )
9561            .expect("delete property fts");
9562        }
9563
9564        let report = service
9565            .rebuild_missing_projections()
9566            .expect("rebuild missing");
9567        assert!(
9568            report.rebuilt_rows >= 1,
9569            "missing rebuild must insert the gap-fill row"
9570        );
9571
9572        let conn = sqlite::open_connection(db.path()).expect("conn");
9573        let count: i64 = conn
9574            .query_row(
9575                "SELECT count(*) FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
9576                [],
9577                |row| row.get(0),
9578            )
9579            .expect("count");
9580        assert_eq!(
9581            count, 1,
9582            "gap-fill must restore exactly one property FTS row"
9583        );
9584    }
9585
9586    #[test]
9587    fn remove_schema_then_rebuild_cleans_stale_property_fts_rows() {
9588        let (db, service) = setup();
9589        service
9590            .register_fts_property_schema("Goal", &["$.name".to_owned()], None)
9591            .expect("register");
9592        {
9593            let conn = sqlite::open_connection(db.path()).expect("conn");
9594            conn.execute(
9595                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9596                 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9597                [],
9598            )
9599            .expect("insert node");
9600            // Manually insert a property FTS row (simulating the write path).
9601            conn.execute(
9602                "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
9603                 VALUES ('goal-1', 'Goal', 'Ship v2')",
9604                [],
9605            )
9606            .expect("insert property fts");
9607        }
9608
9609        // Remove the schema — stale rows now exist.
9610        service.remove_fts_property_schema("Goal").expect("remove");
9611
9612        // Verify stale rows are detected.
9613        let semantics = service.check_semantics().expect("semantics");
9614        assert_eq!(
9615            semantics.orphaned_property_fts_rows, 1,
9616            "stale property FTS rows must be detected after schema removal"
9617        );
9618
9619        // Full rebuild should clean them.
9620        service
9621            .rebuild_projections(ProjectionTarget::Fts)
9622            .expect("rebuild");
9623
9624        let conn = sqlite::open_connection(db.path()).expect("conn");
9625        let count: i64 = conn
9626            .query_row(
9627                "SELECT count(*) FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
9628                [],
9629                |row| row.get(0),
9630            )
9631            .expect("count");
9632        assert_eq!(
9633            count, 0,
9634            "rebuild after schema removal must delete stale property FTS rows"
9635        );
9636    }
9637
9638    mod validate_fts_property_paths_tests {
9639        use super::super::validate_fts_property_paths;
9640
9641        #[test]
9642        fn valid_simple_path() {
9643            assert!(validate_fts_property_paths(&["$.name".to_owned()]).is_ok());
9644        }
9645
9646        #[test]
9647        fn valid_nested_path() {
9648            assert!(validate_fts_property_paths(&["$.address.city".to_owned()]).is_ok());
9649        }
9650
9651        #[test]
9652        fn valid_underscore_segment() {
9653            assert!(validate_fts_property_paths(&["$.a_b".to_owned()]).is_ok());
9654        }
9655
9656        #[test]
9657        fn rejects_bare_prefix() {
9658            let result = validate_fts_property_paths(&["$.".to_owned()]);
9659            assert!(result.is_err(), "path '$.' must be rejected");
9660        }
9661
9662        #[test]
9663        fn rejects_double_dot() {
9664            let result = validate_fts_property_paths(&["$..x".to_owned()]);
9665            assert!(result.is_err(), "path '$..x' must be rejected");
9666        }
9667
9668        #[test]
9669        fn rejects_trailing_dot() {
9670            let result = validate_fts_property_paths(&["$.foo.".to_owned()]);
9671            assert!(result.is_err(), "path '$.foo.' must be rejected");
9672        }
9673
9674        #[test]
9675        fn rejects_space_in_segment() {
9676            let result = validate_fts_property_paths(&["$.foo bar".to_owned()]);
9677            assert!(result.is_err(), "path '$.foo bar' must be rejected");
9678        }
9679
9680        #[test]
9681        fn rejects_bracket_syntax() {
9682            let result = validate_fts_property_paths(&["$.foo[0]".to_owned()]);
9683            assert!(result.is_err(), "path '$.foo[0]' must be rejected");
9684        }
9685
9686        #[test]
9687        fn rejects_duplicates() {
9688            let result = validate_fts_property_paths(&["$.name".to_owned(), "$.name".to_owned()]);
9689            assert!(result.is_err(), "duplicate paths must be rejected");
9690        }
9691
9692        #[test]
9693        fn rejects_empty_list() {
9694            let result = validate_fts_property_paths(&[]);
9695            assert!(result.is_err(), "empty path list must be rejected");
9696        }
9697    }
9698}