1use std::fmt::Write as _;
2use std::fs;
3use std::io;
4use std::path::{Path, PathBuf};
5use std::sync::Arc;
6use std::sync::mpsc::SyncSender;
7use std::time::SystemTime;
8
9use fathomdb_schema::{SchemaError, SchemaManager};
10use rusqlite::{DatabaseName, OptionalExtension, TransactionBehavior};
11use serde::{Deserialize, Serialize};
12use sha2::{Digest, Sha256};
13
14use crate::rebuild_actor::{RebuildMode, RebuildRequest, RebuildStateRow};
15
16use crate::{
17 EngineError, ProjectionRepairReport, ProjectionService,
18 embedder::{QueryEmbedder, QueryEmbedderIdentity},
19 ids::new_id,
20 operational::{
21 OperationalCollectionKind, OperationalCollectionRecord, OperationalCompactionReport,
22 OperationalCurrentRow, OperationalFilterClause, OperationalFilterField,
23 OperationalFilterFieldType, OperationalFilterMode, OperationalFilterValue,
24 OperationalHistoryValidationIssue, OperationalHistoryValidationReport,
25 OperationalMutationRow, OperationalPurgeReport, OperationalReadReport,
26 OperationalReadRequest, OperationalRegisterRequest, OperationalRepairReport,
27 OperationalRetentionActionKind, OperationalRetentionPlanItem,
28 OperationalRetentionPlanReport, OperationalRetentionRunItem, OperationalRetentionRunReport,
29 OperationalSecondaryIndexDefinition, OperationalSecondaryIndexRebuildReport,
30 OperationalTraceReport, extract_secondary_index_entries_for_current,
31 extract_secondary_index_entries_for_mutation, parse_operational_secondary_indexes_json,
32 parse_operational_validation_contract, validate_operational_payload_against_contract,
33 },
34 projection::ProjectionTarget,
35 sqlite,
36};
37
38#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
40pub struct IntegrityReport {
41 pub physical_ok: bool,
42 pub foreign_keys_ok: bool,
43 pub missing_fts_rows: usize,
44 pub missing_property_fts_rows: usize,
45 pub duplicate_active_logical_ids: usize,
46 pub operational_missing_collections: usize,
47 pub operational_missing_last_mutations: usize,
48 pub warnings: Vec<String>,
49}
50
51#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
53pub struct FtsPropertySchemaRecord {
54 pub kind: String,
56 pub property_paths: Vec<String>,
61 pub entries: Vec<FtsPropertyPathSpec>,
66 pub exclude_paths: Vec<String>,
69 pub separator: String,
71 pub format_version: i64,
73}
74
75#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize)]
77#[serde(rename_all = "snake_case")]
78pub enum FtsPropertyPathMode {
79 #[default]
82 Scalar,
83 Recursive,
86}
87
88#[non_exhaustive]
90#[derive(Clone, Debug, PartialEq, Serialize)]
91pub struct FtsPropertyPathSpec {
92 pub path: String,
94 pub mode: FtsPropertyPathMode,
96 pub weight: Option<f32>,
99}
100
101impl Eq for FtsPropertyPathSpec {}
104
105impl FtsPropertyPathSpec {
106 #[must_use]
107 pub fn scalar(path: impl Into<String>) -> Self {
108 Self {
109 path: path.into(),
110 mode: FtsPropertyPathMode::Scalar,
111 weight: None,
112 }
113 }
114
115 #[must_use]
116 pub fn recursive(path: impl Into<String>) -> Self {
117 Self {
118 path: path.into(),
119 mode: FtsPropertyPathMode::Recursive,
120 weight: None,
121 }
122 }
123
124 #[must_use]
130 pub fn with_weight(mut self, weight: f32) -> Self {
131 self.weight = Some(weight);
132 self
133 }
134}
135
136#[derive(Clone, Copy, Debug)]
138pub struct SafeExportOptions {
139 pub force_checkpoint: bool,
143}
144
145impl Default for SafeExportOptions {
146 fn default() -> Self {
147 Self {
148 force_checkpoint: true,
149 }
150 }
151}
152
153const EXPORT_PROTOCOL_VERSION: u32 = 1;
155
156#[derive(Clone, Debug, Serialize)]
158pub struct SafeExportManifest {
159 pub exported_at: u64,
161 pub sha256: String,
163 pub schema_version: u32,
165 pub protocol_version: u32,
167 pub page_count: u64,
169}
170
171#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
173pub struct TraceReport {
174 pub source_ref: String,
175 pub node_rows: usize,
176 pub edge_rows: usize,
177 pub action_rows: usize,
178 pub operational_mutation_rows: usize,
179 pub node_logical_ids: Vec<String>,
180 pub action_ids: Vec<String>,
181 pub operational_mutation_ids: Vec<String>,
182}
183
184#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
186pub struct SkippedEdge {
187 pub edge_logical_id: String,
188 pub missing_endpoint: String,
189}
190
191#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
193pub struct LogicalRestoreReport {
194 pub logical_id: String,
195 pub was_noop: bool,
196 pub restored_node_rows: usize,
197 pub restored_edge_rows: usize,
198 pub restored_chunk_rows: usize,
199 pub restored_fts_rows: usize,
200 pub restored_property_fts_rows: usize,
201 pub restored_vec_rows: usize,
202 pub skipped_edges: Vec<SkippedEdge>,
203 pub notes: Vec<String>,
204}
205
206#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
208pub struct LogicalPurgeReport {
209 pub logical_id: String,
210 pub was_noop: bool,
211 pub deleted_node_rows: usize,
212 pub deleted_edge_rows: usize,
213 pub deleted_chunk_rows: usize,
214 pub deleted_fts_rows: usize,
215 pub deleted_vec_rows: usize,
216 pub notes: Vec<String>,
217}
218
219#[derive(Clone, Debug, Serialize, Deserialize)]
221pub struct ProvenancePurgeOptions {
222 pub dry_run: bool,
223 #[serde(default)]
224 pub preserve_event_types: Vec<String>,
225}
226
227#[derive(Clone, Debug, Serialize)]
229pub struct ProvenancePurgeReport {
230 pub events_deleted: u64,
231 pub events_preserved: u64,
232 pub oldest_remaining: Option<i64>,
233}
234
235#[derive(Debug)]
237pub struct AdminService {
238 database_path: PathBuf,
239 schema_manager: Arc<SchemaManager>,
240 projections: ProjectionService,
241 rebuild_sender: Option<SyncSender<RebuildRequest>>,
245}
246
247#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
249pub struct SemanticReport {
250 pub orphaned_chunks: usize,
252 pub null_source_ref_nodes: usize,
254 pub broken_step_fk: usize,
256 pub broken_action_fk: usize,
258 pub stale_fts_rows: usize,
260 pub fts_rows_for_superseded_nodes: usize,
262 pub stale_property_fts_rows: usize,
264 pub orphaned_property_fts_rows: usize,
266 pub mismatched_kind_property_fts_rows: usize,
268 pub duplicate_property_fts_rows: usize,
270 pub drifted_property_fts_rows: usize,
272 pub dangling_edges: usize,
274 pub orphaned_supersession_chains: usize,
276 pub stale_vec_rows: usize,
278 pub vec_rows_for_superseded_nodes: usize,
280 pub missing_operational_current_rows: usize,
282 pub stale_operational_current_rows: usize,
284 pub disabled_collection_mutations: usize,
286 pub orphaned_last_access_metadata_rows: usize,
288 pub warnings: Vec<String>,
289}
290
291#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
301#[serde(rename_all = "snake_case", deny_unknown_fields)]
302pub struct VectorRegenerationConfig {
303 pub profile: String,
304 pub table_name: String,
305 pub chunking_policy: String,
306 pub preprocessing_policy: String,
307}
308
309#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
311pub struct VectorRegenerationReport {
312 pub profile: String,
313 pub table_name: String,
314 pub dimension: usize,
315 pub total_chunks: usize,
316 pub regenerated_rows: usize,
317 pub contract_persisted: bool,
318 pub notes: Vec<String>,
319}
320
321#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
325pub struct FtsProfile {
326 pub kind: String,
328 pub tokenizer: String,
330 pub active_at: Option<i64>,
332 pub created_at: i64,
334}
335
336#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
340pub struct VecProfile {
341 pub model_identity: String,
343 pub model_version: Option<String>,
345 pub dimensions: u32,
347 pub active_at: Option<i64>,
349 pub created_at: i64,
351}
352
353#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
357pub struct ProjectionImpact {
358 pub rows_to_rebuild: u64,
360 pub estimated_seconds: u64,
362 pub temp_db_size_bytes: u64,
364 pub current_tokenizer: Option<String>,
366 pub target_tokenizer: Option<String>,
368}
369
370pub const TOKENIZER_PRESETS: &[(&str, &str)] = &[
372 (
373 "recall-optimized-english",
374 "porter unicode61 remove_diacritics 2",
375 ),
376 ("precision-optimized", "unicode61 remove_diacritics 2"),
377 ("global-cjk", "icu"),
378 ("substring-trigram", "trigram"),
379 ("source-code", "unicode61 tokenchars '._-$@'"),
380];
381
382pub fn resolve_tokenizer_preset(input: &str) -> &str {
387 for (name, value) in TOKENIZER_PRESETS {
388 if *name == input {
389 return value;
390 }
391 }
392 input
393}
394
395const CURRENT_VECTOR_CONTRACT_FORMAT_VERSION: i64 = 1;
396const MAX_PROFILE_LEN: usize = 128;
397const MAX_POLICY_LEN: usize = 128;
398const MAX_CONTRACT_JSON_BYTES: usize = 32 * 1024;
399const MAX_AUDIT_METADATA_BYTES: usize = 2048;
400const DEFAULT_OPERATIONAL_READ_LIMIT: usize = 100;
401const MAX_OPERATIONAL_READ_LIMIT: usize = 1000;
402
403#[derive(Clone, Debug)]
405pub struct AdminHandle {
406 inner: Arc<AdminService>,
407}
408
409impl AdminHandle {
410 #[must_use]
412 pub fn new(service: AdminService) -> Self {
413 Self {
414 inner: Arc::new(service),
415 }
416 }
417
418 #[must_use]
420 pub fn service(&self) -> Arc<AdminService> {
421 Arc::clone(&self.inner)
422 }
423}
424
425impl AdminService {
426 #[must_use]
428 pub fn new(path: impl AsRef<Path>, schema_manager: Arc<SchemaManager>) -> Self {
429 let database_path = path.as_ref().to_path_buf();
430 let projections = ProjectionService::new(&database_path, Arc::clone(&schema_manager));
431 Self {
432 database_path,
433 schema_manager,
434 projections,
435 rebuild_sender: None,
436 }
437 }
438
439 #[must_use]
441 pub fn new_with_rebuild(
442 path: impl AsRef<Path>,
443 schema_manager: Arc<SchemaManager>,
444 rebuild_sender: SyncSender<RebuildRequest>,
445 ) -> Self {
446 let database_path = path.as_ref().to_path_buf();
447 let projections = ProjectionService::new(&database_path, Arc::clone(&schema_manager));
448 Self {
449 database_path,
450 schema_manager,
451 projections,
452 rebuild_sender: Some(rebuild_sender),
453 }
454 }
455
456 fn connect(&self) -> Result<rusqlite::Connection, EngineError> {
457 #[cfg(feature = "sqlite-vec")]
458 let conn = sqlite::open_connection_with_vec(&self.database_path)?;
459 #[cfg(not(feature = "sqlite-vec"))]
460 let conn = sqlite::open_connection(&self.database_path)?;
461 self.schema_manager.bootstrap(&conn)?;
462 Ok(conn)
463 }
464
465 pub fn set_fts_profile(
475 &self,
476 kind: &str,
477 tokenizer_str: &str,
478 ) -> Result<FtsProfile, EngineError> {
479 let resolved = resolve_tokenizer_preset(tokenizer_str);
480 if !resolved
482 .chars()
483 .all(|c| c.is_alphanumeric() || "'._-$@ ".contains(c))
484 {
485 return Err(EngineError::Bridge(format!(
486 "invalid tokenizer string: {resolved:?}"
487 )));
488 }
489 let conn = self.connect()?;
490 conn.execute(
491 r"INSERT INTO projection_profiles (kind, facet, config_json, active_at, created_at)
492 VALUES (?1, 'fts', json_object('tokenizer', ?2), unixepoch(), unixepoch())
493 ON CONFLICT(kind, facet) DO UPDATE SET
494 config_json = json_object('tokenizer', ?2),
495 active_at = unixepoch()",
496 rusqlite::params![kind, resolved],
497 )?;
498 let row = conn.query_row(
499 "SELECT kind, json_extract(config_json, '$.tokenizer'), active_at, created_at \
500 FROM projection_profiles WHERE kind = ?1 AND facet = 'fts'",
501 rusqlite::params![kind],
502 |row| {
503 Ok(FtsProfile {
504 kind: row.get(0)?,
505 tokenizer: row.get(1)?,
506 active_at: row.get(2)?,
507 created_at: row.get(3)?,
508 })
509 },
510 )?;
511 Ok(row)
512 }
513
514 pub fn get_fts_profile(&self, kind: &str) -> Result<Option<FtsProfile>, EngineError> {
521 let conn = self.connect()?;
522 let result = conn
523 .query_row(
524 "SELECT kind, json_extract(config_json, '$.tokenizer'), active_at, created_at \
525 FROM projection_profiles WHERE kind = ?1 AND facet = 'fts'",
526 rusqlite::params![kind],
527 |row| {
528 Ok(FtsProfile {
529 kind: row.get(0)?,
530 tokenizer: row.get(1)?,
531 active_at: row.get(2)?,
532 created_at: row.get(3)?,
533 })
534 },
535 )
536 .optional()?;
537 Ok(result)
538 }
539
540 pub fn get_vec_profile(&self) -> Result<Option<VecProfile>, EngineError> {
547 let conn = self.connect()?;
548 let result = conn
549 .query_row(
550 "SELECT \
551 json_extract(config_json, '$.model_identity'), \
552 json_extract(config_json, '$.model_version'), \
553 CAST(json_extract(config_json, '$.dimensions') AS INTEGER), \
554 active_at, \
555 created_at \
556 FROM projection_profiles WHERE kind = '*' AND facet = 'vec'",
557 [],
558 |row| {
559 Ok(VecProfile {
560 model_identity: row.get(0)?,
561 model_version: row.get(1)?,
562 dimensions: {
563 let d: i64 = row.get(2)?;
564 u32::try_from(d).unwrap_or(0)
565 },
566 active_at: row.get(3)?,
567 created_at: row.get(4)?,
568 })
569 },
570 )
571 .optional()?;
572 Ok(result)
573 }
574
575 #[allow(dead_code)]
580 fn set_vec_profile_inner(
581 conn: &rusqlite::Connection,
582 identity_json: &str,
583 ) -> Result<VecProfile, rusqlite::Error> {
584 conn.execute(
585 r"INSERT INTO projection_profiles (kind, facet, config_json, active_at, created_at)
586 VALUES ('*', 'vec', ?1, unixepoch(), unixepoch())
587 ON CONFLICT(kind, facet) DO UPDATE SET
588 config_json = ?1,
589 active_at = unixepoch()",
590 rusqlite::params![identity_json],
591 )?;
592 conn.query_row(
593 "SELECT \
594 json_extract(config_json, '$.model_identity'), \
595 json_extract(config_json, '$.model_version'), \
596 CAST(json_extract(config_json, '$.dimensions') AS INTEGER), \
597 active_at, \
598 created_at \
599 FROM projection_profiles WHERE kind = '*' AND facet = 'vec'",
600 [],
601 |row| {
602 Ok(VecProfile {
603 model_identity: row.get(0)?,
604 model_version: row.get(1)?,
605 dimensions: {
606 let d: i64 = row.get(2)?;
607 u32::try_from(d).unwrap_or(0)
608 },
609 active_at: row.get(3)?,
610 created_at: row.get(4)?,
611 })
612 },
613 )
614 }
615
616 pub fn set_vec_profile(&self, config_json: &str) -> Result<VecProfile, EngineError> {
625 let conn = self.connect()?;
626 Self::set_vec_profile_inner(&conn, config_json).map_err(EngineError::Sqlite)
627 }
628
629 pub fn preview_projection_impact(
637 &self,
638 kind: &str,
639 facet: &str,
640 ) -> Result<ProjectionImpact, EngineError> {
641 let conn = self.connect()?;
642 match facet {
643 "fts" => {
644 let rows: u64 = conn
645 .query_row(
646 "SELECT count(*) FROM nodes WHERE kind = ?1 AND superseded_at IS NULL",
647 rusqlite::params![kind],
648 |row| row.get::<_, i64>(0),
649 )
650 .map(i64::cast_unsigned)?;
651 let current_tokenizer = self.get_fts_profile(kind)?.map(|p| p.tokenizer);
652 Ok(ProjectionImpact {
653 rows_to_rebuild: rows,
654 estimated_seconds: rows / 5000,
655 temp_db_size_bytes: rows * 200,
656 current_tokenizer,
657 target_tokenizer: None,
658 })
659 }
660 "vec" => {
661 let rows: u64 = conn
662 .query_row("SELECT count(*) FROM chunks", [], |row| {
663 row.get::<_, i64>(0)
664 })
665 .map(i64::cast_unsigned)?;
666 Ok(ProjectionImpact {
667 rows_to_rebuild: rows,
668 estimated_seconds: rows / 100,
669 temp_db_size_bytes: rows * 1536,
670 current_tokenizer: None,
671 target_tokenizer: None,
672 })
673 }
674 other => Err(EngineError::Bridge(format!(
675 "unknown projection facet: {other:?}"
676 ))),
677 }
678 }
679
680 pub fn check_integrity(&self) -> Result<IntegrityReport, EngineError> {
683 let conn = self.connect()?;
684
685 let physical_result: String =
686 conn.query_row("PRAGMA integrity_check", [], |row| row.get(0))?;
687 let foreign_key_count: i64 =
688 conn.query_row("SELECT count(*) FROM pragma_foreign_key_check", [], |row| {
689 row.get(0)
690 })?;
691 let missing_fts_rows: i64 = conn.query_row(
692 r"
693 SELECT count(*)
694 FROM chunks c
695 JOIN nodes n
696 ON n.logical_id = c.node_logical_id
697 AND n.superseded_at IS NULL
698 WHERE NOT EXISTS (
699 SELECT 1
700 FROM fts_nodes f
701 WHERE f.chunk_id = c.id
702 )
703 ",
704 [],
705 |row| row.get(0),
706 )?;
707 let duplicate_active: i64 = conn.query_row(
708 r"
709 SELECT count(*)
710 FROM (
711 SELECT logical_id
712 FROM nodes
713 WHERE superseded_at IS NULL
714 GROUP BY logical_id
715 HAVING count(*) > 1
716 )
717 ",
718 [],
719 |row| row.get(0),
720 )?;
721 let operational_missing_collections: i64 = conn.query_row(
722 r"
723 SELECT (
724 SELECT count(*)
725 FROM operational_mutations m
726 LEFT JOIN operational_collections c ON c.name = m.collection_name
727 WHERE c.name IS NULL
728 ) + (
729 SELECT count(*)
730 FROM operational_current oc
731 LEFT JOIN operational_collections c ON c.name = oc.collection_name
732 WHERE c.name IS NULL
733 )
734 ",
735 [],
736 |row| row.get(0),
737 )?;
738 let operational_missing_last_mutations: i64 = conn.query_row(
739 r"
740 SELECT count(*)
741 FROM operational_current oc
742 LEFT JOIN operational_mutations m ON m.id = oc.last_mutation_id
743 WHERE m.id IS NULL
744 ",
745 [],
746 |row| row.get(0),
747 )?;
748
749 let missing_property_fts_rows = count_missing_property_fts_rows(&conn)?;
753
754 let mut warnings = Vec::new();
755 if missing_fts_rows > 0 {
756 warnings.push("missing FTS projections detected".to_owned());
757 }
758 if missing_property_fts_rows > 0 {
759 warnings.push("missing property FTS projections detected".to_owned());
760 }
761 if duplicate_active > 0 {
762 warnings.push("duplicate active logical_ids detected".to_owned());
763 }
764 if operational_missing_collections > 0 {
765 warnings.push("operational rows reference missing collections".to_owned());
766 }
767 if operational_missing_last_mutations > 0 {
768 warnings.push("operational current rows reference missing last mutations".to_owned());
769 }
770
771 Ok(IntegrityReport {
776 physical_ok: physical_result == "ok",
777 foreign_keys_ok: foreign_key_count == 0,
778 missing_fts_rows: i64_to_usize(missing_fts_rows),
779 missing_property_fts_rows: i64_to_usize(missing_property_fts_rows),
780 duplicate_active_logical_ids: i64_to_usize(duplicate_active),
781 operational_missing_collections: i64_to_usize(operational_missing_collections),
782 operational_missing_last_mutations: i64_to_usize(operational_missing_last_mutations),
783 warnings,
784 })
785 }
786
787 #[allow(clippy::too_many_lines)]
790 pub fn check_semantics(&self) -> Result<SemanticReport, EngineError> {
791 let conn = self.connect()?;
792
793 let orphaned_chunks: i64 = conn.query_row(
794 r"
795 SELECT count(*)
796 FROM chunks c
797 WHERE NOT EXISTS (
798 SELECT 1 FROM nodes n
799 WHERE n.logical_id = c.node_logical_id
800 )
801 ",
802 [],
803 |row| row.get(0),
804 )?;
805
806 let null_source_ref_nodes: i64 = conn.query_row(
807 "SELECT count(*) FROM nodes WHERE source_ref IS NULL AND superseded_at IS NULL",
808 [],
809 |row| row.get(0),
810 )?;
811
812 let broken_step_fk: i64 = conn.query_row(
813 r"
814 SELECT count(*) FROM steps s
815 WHERE NOT EXISTS (SELECT 1 FROM runs r WHERE r.id = s.run_id)
816 ",
817 [],
818 |row| row.get(0),
819 )?;
820
821 let broken_action_fk: i64 = conn.query_row(
822 r"
823 SELECT count(*) FROM actions a
824 WHERE NOT EXISTS (SELECT 1 FROM steps s WHERE s.id = a.step_id)
825 ",
826 [],
827 |row| row.get(0),
828 )?;
829
830 let stale_fts_rows: i64 = conn.query_row(
831 r"
832 SELECT count(*) FROM fts_nodes f
833 WHERE NOT EXISTS (SELECT 1 FROM chunks c WHERE c.id = f.chunk_id)
834 ",
835 [],
836 |row| row.get(0),
837 )?;
838
839 let fts_rows_for_superseded_nodes: i64 = conn.query_row(
840 r"
841 SELECT count(*) FROM fts_nodes f
842 WHERE NOT EXISTS (
843 SELECT 1 FROM nodes n
844 WHERE n.logical_id = f.node_logical_id AND n.superseded_at IS NULL
845 )
846 ",
847 [],
848 |row| row.get(0),
849 )?;
850
851 let (
852 stale_property_fts_rows,
853 orphaned_property_fts_rows,
854 mismatched_kind_property_fts_rows,
855 duplicate_property_fts_rows,
856 ) = count_per_kind_property_fts_issues(&conn)?;
857
858 let drifted_property_fts_rows = count_drifted_property_fts_rows(&conn)?;
859
860 let dangling_edges: i64 = conn.query_row(
861 r"
862 SELECT count(*) FROM edges e
863 WHERE e.superseded_at IS NULL AND (
864 NOT EXISTS (SELECT 1 FROM nodes n WHERE n.logical_id = e.source_logical_id AND n.superseded_at IS NULL)
865 OR
866 NOT EXISTS (SELECT 1 FROM nodes n WHERE n.logical_id = e.target_logical_id AND n.superseded_at IS NULL)
867 )
868 ",
869 [],
870 |row| row.get(0),
871 )?;
872
873 let orphaned_supersession_chains: i64 = conn.query_row(
874 r"
875 SELECT count(*) FROM (
876 SELECT logical_id FROM nodes
877 GROUP BY logical_id
878 HAVING count(*) > 0 AND sum(CASE WHEN superseded_at IS NULL THEN 1 ELSE 0 END) = 0
879 )
880 ",
881 [],
882 |row| row.get(0),
883 )?;
884
885 #[cfg(feature = "sqlite-vec")]
887 let stale_vec_rows: i64 = match conn.query_row(
888 r"
889 SELECT count(*) FROM vec_nodes_active v
890 WHERE NOT EXISTS (SELECT 1 FROM chunks c WHERE c.id = v.chunk_id)
891 ",
892 [],
893 |row| row.get(0),
894 ) {
895 Ok(n) => n,
896 Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
897 if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
898 {
899 0
900 }
901 Err(e) => return Err(EngineError::Sqlite(e)),
902 };
903 #[cfg(not(feature = "sqlite-vec"))]
904 let stale_vec_rows: i64 = 0;
905
906 #[cfg(feature = "sqlite-vec")]
907 let vec_rows_for_superseded_nodes: i64 = match conn.query_row(
908 r"
909 SELECT count(*) FROM vec_nodes_active v
910 JOIN chunks c ON c.id = v.chunk_id
911 WHERE NOT EXISTS (
912 SELECT 1 FROM nodes n
913 WHERE n.logical_id = c.node_logical_id
914 )
915 ",
916 [],
917 |row| row.get(0),
918 ) {
919 Ok(n) => n,
920 Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
921 if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
922 {
923 0
924 }
925 Err(e) => return Err(EngineError::Sqlite(e)),
926 };
927 #[cfg(not(feature = "sqlite-vec"))]
928 let vec_rows_for_superseded_nodes: i64 = 0;
929 let missing_operational_current_rows: i64 = conn.query_row(
930 r"
931 SELECT count(*)
932 FROM operational_mutations m
933 JOIN operational_collections c
934 ON c.name = m.collection_name
935 AND c.kind = 'latest_state'
936 WHERE m.op_kind = 'put'
937 AND NOT EXISTS (
938 SELECT 1
939 FROM operational_mutations newer
940 WHERE newer.collection_name = m.collection_name
941 AND newer.record_key = m.record_key
942 AND newer.mutation_order > m.mutation_order
943 )
944 AND NOT EXISTS (
945 SELECT 1
946 FROM operational_current oc
947 WHERE oc.collection_name = m.collection_name
948 AND oc.record_key = m.record_key
949 )
950 ",
951 [],
952 |row| row.get(0),
953 )?;
954 let stale_operational_current_rows: i64 = conn.query_row(
955 r"
956 SELECT count(*)
957 FROM operational_current oc
958 JOIN operational_collections c
959 ON c.name = oc.collection_name
960 AND c.kind = 'latest_state'
961 LEFT JOIN operational_mutations m ON m.id = oc.last_mutation_id
962 WHERE m.id IS NULL
963 OR m.collection_name != oc.collection_name
964 OR m.record_key != oc.record_key
965 OR m.op_kind != 'put'
966 OR m.payload_json != oc.payload_json
967 OR EXISTS (
968 SELECT 1
969 FROM operational_mutations newer
970 WHERE newer.collection_name = oc.collection_name
971 AND newer.record_key = oc.record_key
972 AND newer.mutation_order > m.mutation_order
973 )
974 ",
975 [],
976 |row| row.get(0),
977 )?;
978 let disabled_collection_mutations: i64 = conn.query_row(
979 r"
980 SELECT count(*)
981 FROM operational_mutations m
982 JOIN operational_collections c ON c.name = m.collection_name
983 WHERE c.disabled_at IS NOT NULL AND m.created_at > c.disabled_at
984 ",
985 [],
986 |row| row.get(0),
987 )?;
988 let orphaned_last_access_metadata_rows: i64 = conn.query_row(
989 r"
990 SELECT count(*)
991 FROM node_access_metadata am
992 WHERE NOT EXISTS (
993 SELECT 1 FROM nodes n WHERE n.logical_id = am.logical_id
994 )
995 ",
996 [],
997 |row| row.get(0),
998 )?;
999
1000 let mut warnings = Vec::new();
1001 if orphaned_chunks > 0 {
1002 warnings.push(format!(
1003 "{orphaned_chunks} orphaned chunk(s) with no surviving node history"
1004 ));
1005 }
1006 if null_source_ref_nodes > 0 {
1007 warnings.push(format!(
1008 "{null_source_ref_nodes} active node(s) with null source_ref"
1009 ));
1010 }
1011 if broken_step_fk > 0 {
1012 warnings.push(format!(
1013 "{broken_step_fk} step(s) referencing non-existent run"
1014 ));
1015 }
1016 if broken_action_fk > 0 {
1017 warnings.push(format!(
1018 "{broken_action_fk} action(s) referencing non-existent step"
1019 ));
1020 }
1021 if stale_fts_rows > 0 {
1022 warnings.push(format!(
1023 "{stale_fts_rows} stale FTS row(s) referencing missing chunk"
1024 ));
1025 }
1026 if fts_rows_for_superseded_nodes > 0 {
1027 warnings.push(format!(
1028 "{fts_rows_for_superseded_nodes} FTS row(s) for superseded node(s)"
1029 ));
1030 }
1031 if stale_property_fts_rows > 0 {
1032 warnings.push(format!(
1033 "{stale_property_fts_rows} stale property FTS row(s) for superseded/missing node(s)"
1034 ));
1035 }
1036 if orphaned_property_fts_rows > 0 {
1037 warnings.push(format!(
1038 "{orphaned_property_fts_rows} orphaned property FTS row(s) for unregistered kind(s)"
1039 ));
1040 }
1041 if mismatched_kind_property_fts_rows > 0 {
1042 warnings.push(format!(
1043 "{mismatched_kind_property_fts_rows} property FTS row(s) whose kind does not match the active node"
1044 ));
1045 }
1046 if duplicate_property_fts_rows > 0 {
1047 warnings.push(format!(
1048 "{duplicate_property_fts_rows} active logical ID(s) with duplicate property FTS rows"
1049 ));
1050 }
1051 if drifted_property_fts_rows > 0 {
1052 warnings.push(format!(
1053 "{drifted_property_fts_rows} property FTS row(s) with stale text_content"
1054 ));
1055 }
1056 if dangling_edges > 0 {
1057 warnings.push(format!(
1058 "{dangling_edges} active edge(s) with missing endpoint node"
1059 ));
1060 }
1061 if orphaned_supersession_chains > 0 {
1062 warnings.push(format!(
1063 "{orphaned_supersession_chains} logical_id(s) with all versions superseded"
1064 ));
1065 }
1066 if stale_vec_rows > 0 {
1067 warnings.push(format!(
1068 "{stale_vec_rows} stale vec row(s) referencing missing chunk"
1069 ));
1070 }
1071 if vec_rows_for_superseded_nodes > 0 {
1072 warnings.push(format!(
1073 "{vec_rows_for_superseded_nodes} vec row(s) whose node history is missing"
1074 ));
1075 }
1076 if missing_operational_current_rows > 0 {
1077 warnings.push(format!(
1078 "{missing_operational_current_rows} latest-state key(s) missing operational_current rows"
1079 ));
1080 }
1081 if stale_operational_current_rows > 0 {
1082 warnings.push(format!(
1083 "{stale_operational_current_rows} stale operational_current row(s)"
1084 ));
1085 }
1086 if disabled_collection_mutations > 0 {
1087 warnings.push(format!(
1088 "{disabled_collection_mutations} mutation(s) were written after collection disable"
1089 ));
1090 }
1091 if orphaned_last_access_metadata_rows > 0 {
1092 warnings.push(format!(
1093 "{orphaned_last_access_metadata_rows} last_access metadata row(s) reference missing node history"
1094 ));
1095 }
1096
1097 Ok(SemanticReport {
1098 orphaned_chunks: i64_to_usize(orphaned_chunks),
1099 null_source_ref_nodes: i64_to_usize(null_source_ref_nodes),
1100 broken_step_fk: i64_to_usize(broken_step_fk),
1101 broken_action_fk: i64_to_usize(broken_action_fk),
1102 stale_fts_rows: i64_to_usize(stale_fts_rows),
1103 fts_rows_for_superseded_nodes: i64_to_usize(fts_rows_for_superseded_nodes),
1104 stale_property_fts_rows: i64_to_usize(stale_property_fts_rows),
1105 orphaned_property_fts_rows: i64_to_usize(orphaned_property_fts_rows),
1106 mismatched_kind_property_fts_rows: i64_to_usize(mismatched_kind_property_fts_rows),
1107 duplicate_property_fts_rows: i64_to_usize(duplicate_property_fts_rows),
1108 drifted_property_fts_rows: i64_to_usize(drifted_property_fts_rows),
1109 dangling_edges: i64_to_usize(dangling_edges),
1110 orphaned_supersession_chains: i64_to_usize(orphaned_supersession_chains),
1111 stale_vec_rows: i64_to_usize(stale_vec_rows),
1112 vec_rows_for_superseded_nodes: i64_to_usize(vec_rows_for_superseded_nodes),
1113 missing_operational_current_rows: i64_to_usize(missing_operational_current_rows),
1114 stale_operational_current_rows: i64_to_usize(stale_operational_current_rows),
1115 disabled_collection_mutations: i64_to_usize(disabled_collection_mutations),
1116 orphaned_last_access_metadata_rows: i64_to_usize(orphaned_last_access_metadata_rows),
1117 warnings,
1118 })
1119 }
1120
1121 pub fn register_operational_collection(
1124 &self,
1125 request: &OperationalRegisterRequest,
1126 ) -> Result<OperationalCollectionRecord, EngineError> {
1127 if request.name.trim().is_empty() {
1128 return Err(EngineError::InvalidWrite(
1129 "operational collection name must not be empty".to_owned(),
1130 ));
1131 }
1132 if request.schema_json.is_empty() {
1133 return Err(EngineError::InvalidWrite(
1134 "operational collection schema_json must not be empty".to_owned(),
1135 ));
1136 }
1137 if request.retention_json.is_empty() {
1138 return Err(EngineError::InvalidWrite(
1139 "operational collection retention_json must not be empty".to_owned(),
1140 ));
1141 }
1142 if request.filter_fields_json.is_empty() {
1143 return Err(EngineError::InvalidWrite(
1144 "operational collection filter_fields_json must not be empty".to_owned(),
1145 ));
1146 }
1147 parse_operational_validation_contract(&request.validation_json)
1148 .map_err(EngineError::InvalidWrite)?;
1149 parse_operational_secondary_indexes_json(&request.secondary_indexes_json, request.kind)
1150 .map_err(EngineError::InvalidWrite)?;
1151 if request.format_version <= 0 {
1152 return Err(EngineError::InvalidWrite(
1153 "operational collection format_version must be positive".to_owned(),
1154 ));
1155 }
1156 parse_operational_filter_fields(&request.filter_fields_json)
1157 .map_err(EngineError::InvalidWrite)?;
1158
1159 let mut conn = self.connect()?;
1160 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1161 tx.execute(
1162 "INSERT INTO operational_collections \
1163 (name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at) \
1164 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, unixepoch())",
1165 rusqlite::params![
1166 request.name.as_str(),
1167 request.kind.as_str(),
1168 request.schema_json.as_str(),
1169 request.retention_json.as_str(),
1170 request.filter_fields_json.as_str(),
1171 request.validation_json.as_str(),
1172 request.secondary_indexes_json.as_str(),
1173 request.format_version,
1174 ],
1175 )?;
1176 persist_simple_provenance_event(
1177 &tx,
1178 "operational_collection_registered",
1179 request.name.as_str(),
1180 Some(serde_json::json!({
1181 "kind": request.kind.as_str(),
1182 "format_version": request.format_version,
1183 })),
1184 )?;
1185 tx.commit()?;
1186
1187 self.describe_operational_collection(&request.name)?
1188 .ok_or_else(|| {
1189 EngineError::Bridge("registered collection missing after commit".to_owned())
1190 })
1191 }
1192
1193 pub fn describe_operational_collection(
1196 &self,
1197 name: &str,
1198 ) -> Result<Option<OperationalCollectionRecord>, EngineError> {
1199 let conn = self.connect()?;
1200 load_operational_collection_record(&conn, name)
1201 }
1202
1203 pub fn update_operational_collection_filters(
1207 &self,
1208 name: &str,
1209 filter_fields_json: &str,
1210 ) -> Result<OperationalCollectionRecord, EngineError> {
1211 if filter_fields_json.is_empty() {
1212 return Err(EngineError::InvalidWrite(
1213 "operational collection filter_fields_json must not be empty".to_owned(),
1214 ));
1215 }
1216 let declared_fields = parse_operational_filter_fields(filter_fields_json)
1217 .map_err(EngineError::InvalidWrite)?;
1218
1219 let mut conn = self.connect()?;
1220 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1221 load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1222 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1223 })?;
1224 tx.execute(
1225 "UPDATE operational_collections SET filter_fields_json = ?2 WHERE name = ?1",
1226 rusqlite::params![name, filter_fields_json],
1227 )?;
1228 tx.execute(
1229 "DELETE FROM operational_filter_values WHERE collection_name = ?1",
1230 [name],
1231 )?;
1232
1233 let mut mutation_stmt = tx.prepare(
1234 "SELECT id, payload_json FROM operational_mutations \
1235 WHERE collection_name = ?1 ORDER BY mutation_order",
1236 )?;
1237 let mutations = mutation_stmt
1238 .query_map([name], |row| {
1239 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1240 })?
1241 .collect::<Result<Vec<_>, _>>()?;
1242 drop(mutation_stmt);
1243
1244 let mut insert_filter_value = tx.prepare_cached(
1245 "INSERT INTO operational_filter_values \
1246 (mutation_id, collection_name, field_name, string_value, integer_value) \
1247 VALUES (?1, ?2, ?3, ?4, ?5)",
1248 )?;
1249 let mut inserted_values = 0usize;
1250 for (mutation_id, payload_json) in &mutations {
1251 for filter_value in
1252 extract_operational_filter_values(&declared_fields, payload_json.as_str())
1253 {
1254 insert_filter_value.execute(rusqlite::params![
1255 mutation_id,
1256 name,
1257 filter_value.field_name,
1258 filter_value.string_value,
1259 filter_value.integer_value,
1260 ])?;
1261 inserted_values += 1;
1262 }
1263 }
1264 drop(insert_filter_value);
1265
1266 persist_simple_provenance_event(
1267 &tx,
1268 "operational_collection_filter_fields_updated",
1269 name,
1270 Some(serde_json::json!({
1271 "field_count": declared_fields.len(),
1272 "mutations_backfilled": mutations.len(),
1273 "inserted_filter_values": inserted_values,
1274 })),
1275 )?;
1276 let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1277 EngineError::Bridge("operational collection missing after filter update".to_owned())
1278 })?;
1279 tx.commit()?;
1280 Ok(updated)
1281 }
1282
1283 pub fn update_operational_collection_validation(
1286 &self,
1287 name: &str,
1288 validation_json: &str,
1289 ) -> Result<OperationalCollectionRecord, EngineError> {
1290 parse_operational_validation_contract(validation_json)
1291 .map_err(EngineError::InvalidWrite)?;
1292
1293 let mut conn = self.connect()?;
1294 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1295 load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1296 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1297 })?;
1298 tx.execute(
1299 "UPDATE operational_collections SET validation_json = ?2 WHERE name = ?1",
1300 rusqlite::params![name, validation_json],
1301 )?;
1302 persist_simple_provenance_event(
1303 &tx,
1304 "operational_collection_validation_updated",
1305 name,
1306 Some(serde_json::json!({
1307 "has_validation": !validation_json.is_empty(),
1308 })),
1309 )?;
1310 let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1311 EngineError::Bridge("operational collection missing after validation update".to_owned())
1312 })?;
1313 tx.commit()?;
1314 Ok(updated)
1315 }
1316
1317 pub fn update_operational_collection_secondary_indexes(
1321 &self,
1322 name: &str,
1323 secondary_indexes_json: &str,
1324 ) -> Result<OperationalCollectionRecord, EngineError> {
1325 let mut conn = self.connect()?;
1326 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1327 let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1328 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1329 })?;
1330 let indexes = parse_operational_secondary_indexes_json(secondary_indexes_json, record.kind)
1331 .map_err(EngineError::InvalidWrite)?;
1332 tx.execute(
1333 "UPDATE operational_collections SET secondary_indexes_json = ?2 WHERE name = ?1",
1334 rusqlite::params![name, secondary_indexes_json],
1335 )?;
1336 let (mutation_entries_rebuilt, current_entries_rebuilt) =
1337 rebuild_operational_secondary_index_entries(&tx, &record.name, record.kind, &indexes)?;
1338 persist_simple_provenance_event(
1339 &tx,
1340 "operational_collection_secondary_indexes_updated",
1341 name,
1342 Some(serde_json::json!({
1343 "index_count": indexes.len(),
1344 "mutation_entries_rebuilt": mutation_entries_rebuilt,
1345 "current_entries_rebuilt": current_entries_rebuilt,
1346 })),
1347 )?;
1348 let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1349 EngineError::Bridge(
1350 "operational collection missing after secondary index update".to_owned(),
1351 )
1352 })?;
1353 tx.commit()?;
1354 Ok(updated)
1355 }
1356
1357 pub fn rebuild_operational_secondary_indexes(
1360 &self,
1361 name: &str,
1362 ) -> Result<OperationalSecondaryIndexRebuildReport, EngineError> {
1363 let mut conn = self.connect()?;
1364 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1365 let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1366 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1367 })?;
1368 let indexes =
1369 parse_operational_secondary_indexes_json(&record.secondary_indexes_json, record.kind)
1370 .map_err(EngineError::InvalidWrite)?;
1371 let (mutation_entries_rebuilt, current_entries_rebuilt) =
1372 rebuild_operational_secondary_index_entries(&tx, &record.name, record.kind, &indexes)?;
1373 persist_simple_provenance_event(
1374 &tx,
1375 "operational_secondary_indexes_rebuilt",
1376 name,
1377 Some(serde_json::json!({
1378 "index_count": indexes.len(),
1379 "mutation_entries_rebuilt": mutation_entries_rebuilt,
1380 "current_entries_rebuilt": current_entries_rebuilt,
1381 })),
1382 )?;
1383 tx.commit()?;
1384 Ok(OperationalSecondaryIndexRebuildReport {
1385 collection_name: name.to_owned(),
1386 mutation_entries_rebuilt,
1387 current_entries_rebuilt,
1388 })
1389 }
1390
1391 pub fn validate_operational_collection_history(
1394 &self,
1395 name: &str,
1396 ) -> Result<OperationalHistoryValidationReport, EngineError> {
1397 let conn = self.connect()?;
1398 let record = load_operational_collection_record(&conn, name)?.ok_or_else(|| {
1399 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1400 })?;
1401 let Some(contract) = parse_operational_validation_contract(&record.validation_json)
1402 .map_err(EngineError::InvalidWrite)?
1403 else {
1404 return Err(EngineError::InvalidWrite(format!(
1405 "operational collection '{name}' has no validation_json configured"
1406 )));
1407 };
1408
1409 let mut stmt = conn.prepare(
1410 "SELECT id, record_key, op_kind, payload_json FROM operational_mutations \
1411 WHERE collection_name = ?1 ORDER BY mutation_order",
1412 )?;
1413 let rows = stmt
1414 .query_map([name], |row| {
1415 Ok((
1416 row.get::<_, String>(0)?,
1417 row.get::<_, String>(1)?,
1418 row.get::<_, String>(2)?,
1419 row.get::<_, String>(3)?,
1420 ))
1421 })?
1422 .collect::<Result<Vec<_>, _>>()?;
1423 drop(stmt);
1424
1425 let mut checked_rows = 0usize;
1426 let mut issues = Vec::new();
1427 for (mutation_id, record_key, op_kind, payload_json) in rows {
1428 if op_kind == "delete" {
1429 continue;
1430 }
1431 checked_rows += 1;
1432 if let Err(message) =
1433 validate_operational_payload_against_contract(&contract, payload_json.as_str())
1434 {
1435 issues.push(OperationalHistoryValidationIssue {
1436 mutation_id,
1437 record_key,
1438 op_kind,
1439 message,
1440 });
1441 }
1442 }
1443
1444 Ok(OperationalHistoryValidationReport {
1445 collection_name: name.to_owned(),
1446 checked_rows,
1447 invalid_row_count: issues.len(),
1448 issues,
1449 })
1450 }
1451
1452 pub fn disable_operational_collection(
1455 &self,
1456 name: &str,
1457 ) -> Result<OperationalCollectionRecord, EngineError> {
1458 let mut conn = self.connect()?;
1459 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1460 let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1461 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1462 })?;
1463 let changed = if record.disabled_at.is_none() {
1464 tx.execute(
1465 "UPDATE operational_collections SET disabled_at = unixepoch() WHERE name = ?1",
1466 [name],
1467 )?;
1468 true
1469 } else {
1470 false
1471 };
1472 let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1473 EngineError::Bridge("operational collection missing after disable".to_owned())
1474 })?;
1475 persist_simple_provenance_event(
1476 &tx,
1477 "operational_collection_disabled",
1478 name,
1479 Some(serde_json::json!({
1480 "disabled_at": record.disabled_at,
1481 "changed": changed,
1482 })),
1483 )?;
1484 tx.commit()?;
1485 Ok(record)
1486 }
1487
1488 pub fn compact_operational_collection(
1491 &self,
1492 name: &str,
1493 dry_run: bool,
1494 ) -> Result<OperationalCompactionReport, EngineError> {
1495 let mut conn = self.connect()?;
1496 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1497 let collection = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1498 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1499 })?;
1500 validate_append_only_operational_collection(&collection, "compact")?;
1501 let (mutation_ids, before_timestamp) =
1502 operational_compaction_candidates(&tx, &collection.retention_json, name)?;
1503 if dry_run {
1504 drop(tx);
1505 return Ok(OperationalCompactionReport {
1506 collection_name: name.to_owned(),
1507 deleted_mutations: mutation_ids.len(),
1508 dry_run: true,
1509 before_timestamp,
1510 });
1511 }
1512 let mut delete_stmt =
1513 tx.prepare_cached("DELETE FROM operational_mutations WHERE id = ?1")?;
1514 for mutation_id in &mutation_ids {
1515 delete_stmt.execute([mutation_id.as_str()])?;
1516 }
1517 drop(delete_stmt);
1518 persist_simple_provenance_event(
1519 &tx,
1520 "operational_collection_compacted",
1521 name,
1522 Some(serde_json::json!({
1523 "deleted_mutations": mutation_ids.len(),
1524 "before_timestamp": before_timestamp,
1525 })),
1526 )?;
1527 tx.commit()?;
1528 Ok(OperationalCompactionReport {
1529 collection_name: name.to_owned(),
1530 deleted_mutations: mutation_ids.len(),
1531 dry_run: false,
1532 before_timestamp,
1533 })
1534 }
1535
1536 pub fn purge_operational_collection(
1539 &self,
1540 name: &str,
1541 before_timestamp: i64,
1542 ) -> Result<OperationalPurgeReport, EngineError> {
1543 let mut conn = self.connect()?;
1544 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1545 let collection = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1546 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1547 })?;
1548 validate_append_only_operational_collection(&collection, "purge")?;
1549 let deleted_mutations = tx.execute(
1550 "DELETE FROM operational_mutations WHERE collection_name = ?1 AND created_at < ?2",
1551 rusqlite::params![name, before_timestamp],
1552 )?;
1553 persist_simple_provenance_event(
1554 &tx,
1555 "operational_collection_purged",
1556 name,
1557 Some(serde_json::json!({
1558 "deleted_mutations": deleted_mutations,
1559 "before_timestamp": before_timestamp,
1560 })),
1561 )?;
1562 tx.commit()?;
1563 Ok(OperationalPurgeReport {
1564 collection_name: name.to_owned(),
1565 deleted_mutations,
1566 before_timestamp,
1567 })
1568 }
1569
1570 pub fn plan_operational_retention(
1573 &self,
1574 now_timestamp: i64,
1575 collection_names: Option<&[String]>,
1576 max_collections: Option<usize>,
1577 ) -> Result<OperationalRetentionPlanReport, EngineError> {
1578 let conn = self.connect()?;
1579 let records = load_operational_retention_records(&conn, collection_names, max_collections)?;
1580 let mut items = Vec::with_capacity(records.len());
1581 for record in records {
1582 items.push(plan_operational_retention_item(
1583 &conn,
1584 &record,
1585 now_timestamp,
1586 )?);
1587 }
1588 Ok(OperationalRetentionPlanReport {
1589 planned_at: now_timestamp,
1590 collections_examined: items.len(),
1591 items,
1592 })
1593 }
1594
1595 pub fn run_operational_retention(
1598 &self,
1599 now_timestamp: i64,
1600 collection_names: Option<&[String]>,
1601 max_collections: Option<usize>,
1602 dry_run: bool,
1603 ) -> Result<OperationalRetentionRunReport, EngineError> {
1604 let mut conn = self.connect()?;
1605 let records = load_operational_retention_records(&conn, collection_names, max_collections)?;
1606 let mut items = Vec::with_capacity(records.len());
1607 let mut collections_acted_on = 0usize;
1608
1609 for record in records {
1610 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1611 let item = run_operational_retention_item(&tx, &record, now_timestamp, dry_run)?;
1612 if item.deleted_mutations > 0 {
1613 collections_acted_on += 1;
1614 }
1615 if dry_run || item.action_kind == OperationalRetentionActionKind::Noop {
1616 drop(tx);
1617 } else {
1618 tx.commit()?;
1619 }
1620 items.push(item);
1621 }
1622
1623 Ok(OperationalRetentionRunReport {
1624 executed_at: now_timestamp,
1625 collections_examined: items.len(),
1626 collections_acted_on,
1627 dry_run,
1628 items,
1629 })
1630 }
1631
1632 pub fn trace_operational_collection(
1635 &self,
1636 collection_name: &str,
1637 record_key: Option<&str>,
1638 ) -> Result<OperationalTraceReport, EngineError> {
1639 let conn = self.connect()?;
1640 ensure_operational_collection_registered(&conn, collection_name)?;
1641 let mutations = if let Some(record_key) = record_key {
1642 let mut stmt = conn.prepare(
1643 "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
1644 FROM operational_mutations \
1645 WHERE collection_name = ?1 AND record_key = ?2 \
1646 ORDER BY mutation_order",
1647 )?;
1648 stmt.query_map([collection_name, record_key], map_operational_mutation_row)?
1649 .collect::<Result<Vec<_>, _>>()?
1650 } else {
1651 let mut stmt = conn.prepare(
1652 "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
1653 FROM operational_mutations \
1654 WHERE collection_name = ?1 \
1655 ORDER BY mutation_order",
1656 )?;
1657 stmt.query_map([collection_name], map_operational_mutation_row)?
1658 .collect::<Result<Vec<_>, _>>()?
1659 };
1660 let current_rows = if let Some(record_key) = record_key {
1661 let mut stmt = conn.prepare(
1662 "SELECT collection_name, record_key, payload_json, updated_at, last_mutation_id \
1663 FROM operational_current \
1664 WHERE collection_name = ?1 AND record_key = ?2 \
1665 ORDER BY updated_at, record_key",
1666 )?;
1667 stmt.query_map([collection_name, record_key], map_operational_current_row)?
1668 .collect::<Result<Vec<_>, _>>()?
1669 } else {
1670 let mut stmt = conn.prepare(
1671 "SELECT collection_name, record_key, payload_json, updated_at, last_mutation_id \
1672 FROM operational_current \
1673 WHERE collection_name = ?1 \
1674 ORDER BY updated_at, record_key",
1675 )?;
1676 stmt.query_map([collection_name], map_operational_current_row)?
1677 .collect::<Result<Vec<_>, _>>()?
1678 };
1679
1680 Ok(OperationalTraceReport {
1681 collection_name: collection_name.to_owned(),
1682 record_key: record_key.map(str::to_owned),
1683 mutation_count: mutations.len(),
1684 current_count: current_rows.len(),
1685 mutations,
1686 current_rows,
1687 })
1688 }
1689
1690 pub fn read_operational_collection(
1693 &self,
1694 request: &OperationalReadRequest,
1695 ) -> Result<OperationalReadReport, EngineError> {
1696 if request.collection_name.trim().is_empty() {
1697 return Err(EngineError::InvalidWrite(
1698 "operational read collection_name must not be empty".to_owned(),
1699 ));
1700 }
1701 if request.filters.is_empty() {
1702 return Err(EngineError::InvalidWrite(
1703 "operational read requires at least one filter clause".to_owned(),
1704 ));
1705 }
1706
1707 let conn = self.connect()?;
1708 let record = load_operational_collection_record(&conn, &request.collection_name)?
1709 .ok_or_else(|| {
1710 EngineError::InvalidWrite(format!(
1711 "operational collection '{}' is not registered",
1712 request.collection_name
1713 ))
1714 })?;
1715 validate_append_only_operational_collection(&record, "read")?;
1716 let declared_fields = parse_operational_filter_fields(&record.filter_fields_json)
1717 .map_err(EngineError::InvalidWrite)?;
1718 let secondary_indexes =
1719 parse_operational_secondary_indexes_json(&record.secondary_indexes_json, record.kind)
1720 .map_err(EngineError::InvalidWrite)?;
1721 let applied_limit = operational_read_limit(request.limit)?;
1722 let filters = compile_operational_read_filters(&request.filters, &declared_fields)?;
1723 if let Some(report) = execute_operational_secondary_index_read(
1724 &conn,
1725 &request.collection_name,
1726 &filters,
1727 &secondary_indexes,
1728 applied_limit,
1729 )? {
1730 return Ok(report);
1731 }
1732 execute_operational_filtered_read(&conn, &request.collection_name, &filters, applied_limit)
1733 }
1734
1735 pub fn rebuild_operational_current(
1738 &self,
1739 collection_name: Option<&str>,
1740 ) -> Result<OperationalRepairReport, EngineError> {
1741 let mut conn = self.connect()?;
1742 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1743 let collections = if let Some(name) = collection_name {
1744 let maybe_kind: Option<String> = tx
1745 .query_row(
1746 "SELECT kind FROM operational_collections WHERE name = ?1",
1747 [name],
1748 |row| row.get(0),
1749 )
1750 .optional()?;
1751 let Some(kind) = maybe_kind else {
1752 return Err(EngineError::InvalidWrite(format!(
1753 "operational collection '{name}' is not registered"
1754 )));
1755 };
1756 if kind != OperationalCollectionKind::LatestState.as_str() {
1757 return Err(EngineError::InvalidWrite(format!(
1758 "operational collection '{name}' is not latest_state"
1759 )));
1760 }
1761 vec![name.to_owned()]
1762 } else {
1763 let mut stmt = tx.prepare(
1764 "SELECT name FROM operational_collections WHERE kind = 'latest_state' ORDER BY name",
1765 )?;
1766 stmt.query_map([], |row| row.get::<_, String>(0))?
1767 .collect::<Result<Vec<_>, _>>()?
1768 };
1769
1770 let rebuilt_rows = rebuild_operational_current_rows(&tx, &collections)?;
1771 for collection in &collections {
1772 let record = load_operational_collection_record(&tx, collection)?.ok_or_else(|| {
1773 EngineError::Bridge(format!(
1774 "operational collection '{collection}' missing during current rebuild"
1775 ))
1776 })?;
1777 let indexes = parse_operational_secondary_indexes_json(
1778 &record.secondary_indexes_json,
1779 record.kind,
1780 )
1781 .map_err(EngineError::InvalidWrite)?;
1782 if !indexes.is_empty() {
1783 rebuild_operational_secondary_index_entries(
1784 &tx,
1785 &record.name,
1786 record.kind,
1787 &indexes,
1788 )?;
1789 }
1790 }
1791
1792 persist_simple_provenance_event(
1793 &tx,
1794 "operational_current_rebuilt",
1795 collection_name.unwrap_or("*"),
1796 Some(serde_json::json!({
1797 "collections_rebuilt": collections.len(),
1798 "current_rows_rebuilt": rebuilt_rows,
1799 })),
1800 )?;
1801 tx.commit()?;
1802
1803 Ok(OperationalRepairReport {
1804 collections_rebuilt: collections.len(),
1805 current_rows_rebuilt: rebuilt_rows,
1806 })
1807 }
1808
1809 pub fn rebuild_projections(
1812 &self,
1813 target: ProjectionTarget,
1814 ) -> Result<ProjectionRepairReport, EngineError> {
1815 self.projections.rebuild_projections(target)
1816 }
1817
1818 pub fn rebuild_missing_projections(&self) -> Result<ProjectionRepairReport, EngineError> {
1821 self.projections.rebuild_missing_projections()
1822 }
1823
1824 pub fn register_fts_property_schema(
1833 &self,
1834 kind: &str,
1835 property_paths: &[String],
1836 separator: Option<&str>,
1837 ) -> Result<FtsPropertySchemaRecord, EngineError> {
1838 let specs: Vec<FtsPropertyPathSpec> = property_paths
1839 .iter()
1840 .map(|p| FtsPropertyPathSpec::scalar(p.clone()))
1841 .collect();
1842 self.register_fts_property_schema_with_entries(
1843 kind,
1844 &specs,
1845 separator,
1846 &[],
1847 RebuildMode::Eager,
1848 )
1849 }
1850
1851 pub fn register_fts_property_schema_with_entries(
1867 &self,
1868 kind: &str,
1869 entries: &[FtsPropertyPathSpec],
1870 separator: Option<&str>,
1871 exclude_paths: &[String],
1872 mode: RebuildMode,
1873 ) -> Result<FtsPropertySchemaRecord, EngineError> {
1874 let paths: Vec<String> = entries.iter().map(|e| e.path.clone()).collect();
1875 validate_fts_property_paths(&paths)?;
1876 for p in exclude_paths {
1877 if !p.starts_with("$.") {
1878 return Err(EngineError::InvalidWrite(format!(
1879 "exclude_paths entries must start with '$.' but got: {p}"
1880 )));
1881 }
1882 }
1883 for e in entries {
1884 if let Some(w) = e.weight
1885 && !(w > 0.0 && w <= 1000.0)
1886 {
1887 return Err(EngineError::Bridge(format!(
1888 "weight out of range: {w} (must satisfy 0.0 < weight <= 1000.0)"
1889 )));
1890 }
1891 }
1892 let separator = separator.unwrap_or(" ");
1893 let paths_json = serialize_property_paths_json(entries, exclude_paths)?;
1894
1895 match mode {
1896 RebuildMode::Eager => self.register_fts_property_schema_eager(
1897 kind,
1898 entries,
1899 separator,
1900 exclude_paths,
1901 &paths,
1902 &paths_json,
1903 ),
1904 RebuildMode::Async => self.register_fts_property_schema_async(
1905 kind,
1906 entries,
1907 separator,
1908 &paths,
1909 &paths_json,
1910 ),
1911 }
1912 }
1913
1914 fn register_fts_property_schema_eager(
1916 &self,
1917 kind: &str,
1918 entries: &[FtsPropertyPathSpec],
1919 separator: &str,
1920 exclude_paths: &[String],
1921 paths: &[String],
1922 paths_json: &str,
1923 ) -> Result<FtsPropertySchemaRecord, EngineError> {
1924 let mut conn = self.connect()?;
1925 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1926
1927 let previous_row: Option<(String, String)> = tx
1933 .query_row(
1934 "SELECT property_paths_json, separator FROM fts_property_schemas WHERE kind = ?1",
1935 [kind],
1936 |row| {
1937 let json: String = row.get(0)?;
1938 let sep: String = row.get(1)?;
1939 Ok((json, sep))
1940 },
1941 )
1942 .optional()?;
1943 let had_previous_schema = previous_row.is_some();
1944 let previous_recursive_paths: Vec<String> = previous_row
1945 .map(|(json, sep)| crate::writer::parse_property_schema_json(&json, &sep))
1946 .map_or(Vec::new(), |schema| {
1947 schema
1948 .paths
1949 .into_iter()
1950 .filter(|p| p.mode == crate::writer::PropertyPathMode::Recursive)
1951 .map(|p| p.path)
1952 .collect()
1953 });
1954 let new_recursive_paths: Vec<&str> = entries
1955 .iter()
1956 .filter(|e| e.mode == FtsPropertyPathMode::Recursive)
1957 .map(|e| e.path.as_str())
1958 .collect();
1959 let introduces_new_recursive = new_recursive_paths
1960 .iter()
1961 .any(|p| !previous_recursive_paths.iter().any(|prev| prev == p));
1962
1963 tx.execute(
1964 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
1965 VALUES (?1, ?2, ?3) \
1966 ON CONFLICT(kind) DO UPDATE SET property_paths_json = ?2, separator = ?3",
1967 rusqlite::params![kind, paths_json, separator],
1968 )?;
1969
1970 let _ = (introduces_new_recursive, had_previous_schema);
1976 let needs_rebuild = true;
1977 if needs_rebuild {
1978 let any_weight = entries.iter().any(|e| e.weight.is_some());
1979 let tok = fathomdb_schema::resolve_fts_tokenizer(&tx, kind)
1980 .map_err(|e| EngineError::Bridge(e.to_string()))?;
1981 if any_weight {
1982 create_or_replace_fts_kind_table(&tx, kind, entries, &tok)?;
1986 tx.execute(
1987 "DELETE FROM fts_node_property_positions WHERE kind = ?1",
1988 [kind],
1989 )?;
1990 } else {
1993 create_or_replace_fts_kind_table(&tx, kind, &[], &tok)?;
1997 tx.execute(
1998 "DELETE FROM fts_node_property_positions WHERE kind = ?1",
1999 [kind],
2000 )?;
2001 crate::projection::insert_property_fts_rows_for_kind(&tx, kind)?;
2006 }
2007 }
2008
2009 persist_simple_provenance_event(
2010 &tx,
2011 "fts_property_schema_registered",
2012 kind,
2013 Some(serde_json::json!({
2014 "property_paths": paths,
2015 "separator": separator,
2016 "exclude_paths": exclude_paths,
2017 "eager_rebuild": needs_rebuild,
2018 })),
2019 )?;
2020 tx.commit()?;
2021
2022 self.describe_fts_property_schema(kind)?.ok_or_else(|| {
2023 EngineError::Bridge("registered FTS property schema missing after commit".to_owned())
2024 })
2025 }
2026
2027 fn register_fts_property_schema_async(
2029 &self,
2030 kind: &str,
2031 entries: &[FtsPropertyPathSpec],
2032 separator: &str,
2033 paths: &[String],
2034 paths_json: &str,
2035 ) -> Result<FtsPropertySchemaRecord, EngineError> {
2036 let mut conn = self.connect()?;
2037 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2038
2039 let had_previous_schema: bool = tx
2041 .query_row(
2042 "SELECT count(*) FROM fts_property_schemas WHERE kind = ?1",
2043 rusqlite::params![kind],
2044 |r| r.get::<_, i64>(0),
2045 )
2046 .unwrap_or(0)
2047 > 0;
2048
2049 tx.execute(
2051 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
2052 VALUES (?1, ?2, ?3) \
2053 ON CONFLICT(kind) DO UPDATE SET property_paths_json = ?2, separator = ?3",
2054 rusqlite::params![kind, paths_json, separator],
2055 )?;
2056
2057 let any_weight = entries.iter().any(|e| e.weight.is_some());
2061 let tok = fathomdb_schema::resolve_fts_tokenizer(&tx, kind)
2062 .map_err(|e| EngineError::Bridge(e.to_string()))?;
2063 if any_weight {
2064 create_or_replace_fts_kind_table(&tx, kind, entries, &tok)?;
2065 } else {
2066 create_or_replace_fts_kind_table(&tx, kind, &[], &tok)?;
2069 }
2070
2071 let schema_id: i64 = tx.query_row(
2073 "SELECT rowid FROM fts_property_schemas WHERE kind = ?1",
2074 rusqlite::params![kind],
2075 |r| r.get(0),
2076 )?;
2077
2078 let now_ms = crate::rebuild_actor::now_unix_ms_pub();
2079 let is_first = i64::from(!had_previous_schema);
2080
2081 tx.execute(
2083 "INSERT INTO fts_property_rebuild_state \
2084 (kind, schema_id, state, rows_done, started_at, is_first_registration) \
2085 VALUES (?1, ?2, 'PENDING', 0, ?3, ?4) \
2086 ON CONFLICT(kind) DO UPDATE SET \
2087 schema_id = excluded.schema_id, \
2088 state = 'PENDING', \
2089 rows_total = NULL, \
2090 rows_done = 0, \
2091 started_at = excluded.started_at, \
2092 last_progress_at = NULL, \
2093 error_message = NULL, \
2094 is_first_registration = excluded.is_first_registration",
2095 rusqlite::params![kind, schema_id, now_ms, is_first],
2096 )?;
2097
2098 persist_simple_provenance_event(
2099 &tx,
2100 "fts_property_schema_registered",
2101 kind,
2102 Some(serde_json::json!({
2103 "property_paths": paths,
2104 "separator": separator,
2105 "mode": "async",
2106 })),
2107 )?;
2108 tx.commit()?;
2109
2110 if let Some(sender) = &self.rebuild_sender
2116 && sender
2117 .try_send(RebuildRequest {
2118 kind: kind.to_owned(),
2119 schema_id,
2120 })
2121 .is_err()
2122 {
2123 trace_warn!(
2124 kind = %kind,
2125 "rebuild channel full; rebuild request dropped — state remains PENDING"
2126 );
2127 }
2128
2129 self.describe_fts_property_schema(kind)?.ok_or_else(|| {
2130 EngineError::Bridge("registered FTS property schema missing after commit".to_owned())
2131 })
2132 }
2133
2134 pub fn get_property_fts_rebuild_state(
2139 &self,
2140 kind: &str,
2141 ) -> Result<Option<RebuildStateRow>, EngineError> {
2142 let conn = self.connect()?;
2143 let row = conn
2144 .query_row(
2145 "SELECT kind, schema_id, state, rows_total, rows_done, \
2146 started_at, is_first_registration, error_message \
2147 FROM fts_property_rebuild_state WHERE kind = ?1",
2148 rusqlite::params![kind],
2149 |r| {
2150 Ok(RebuildStateRow {
2151 kind: r.get(0)?,
2152 schema_id: r.get(1)?,
2153 state: r.get(2)?,
2154 rows_total: r.get(3)?,
2155 rows_done: r.get(4)?,
2156 started_at: r.get(5)?,
2157 is_first_registration: r.get::<_, i64>(6)? != 0,
2158 error_message: r.get(7)?,
2159 })
2160 },
2161 )
2162 .optional()?;
2163 Ok(row)
2164 }
2165
2166 pub fn count_staging_rows(&self, kind: &str) -> Result<i64, EngineError> {
2172 let conn = self.connect()?;
2173 let count: i64 = conn.query_row(
2174 "SELECT count(*) FROM fts_property_rebuild_staging WHERE kind = ?1",
2175 rusqlite::params![kind],
2176 |r| r.get(0),
2177 )?;
2178 Ok(count)
2179 }
2180
2181 pub fn staging_row_exists(
2187 &self,
2188 kind: &str,
2189 node_logical_id: &str,
2190 ) -> Result<bool, EngineError> {
2191 let conn = self.connect()?;
2192 let count: i64 = conn.query_row(
2193 "SELECT count(*) FROM fts_property_rebuild_staging WHERE kind = ?1 AND node_logical_id = ?2",
2194 rusqlite::params![kind, node_logical_id],
2195 |r| r.get(0),
2196 )?;
2197 Ok(count > 0)
2198 }
2199
2200 pub fn describe_fts_property_schema(
2205 &self,
2206 kind: &str,
2207 ) -> Result<Option<FtsPropertySchemaRecord>, EngineError> {
2208 let conn = self.connect()?;
2209 load_fts_property_schema_record(&conn, kind)
2210 }
2211
2212 pub fn list_fts_property_schemas(&self) -> Result<Vec<FtsPropertySchemaRecord>, EngineError> {
2217 let conn = self.connect()?;
2218 let mut stmt = conn.prepare(
2219 "SELECT kind, property_paths_json, separator, format_version \
2220 FROM fts_property_schemas ORDER BY kind",
2221 )?;
2222 let records = stmt
2223 .query_map([], |row| {
2224 let kind: String = row.get(0)?;
2225 let paths_json: String = row.get(1)?;
2226 let separator: String = row.get(2)?;
2227 let format_version: i64 = row.get(3)?;
2228 Ok(build_fts_property_schema_record(
2229 kind,
2230 &paths_json,
2231 separator,
2232 format_version,
2233 ))
2234 })?
2235 .collect::<Result<Vec<_>, _>>()?;
2236 Ok(records)
2237 }
2238
2239 pub fn remove_fts_property_schema(&self, kind: &str) -> Result<(), EngineError> {
2247 let mut conn = self.connect()?;
2248 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2249 let deleted = tx.execute("DELETE FROM fts_property_schemas WHERE kind = ?1", [kind])?;
2250 if deleted == 0 {
2251 return Err(EngineError::InvalidWrite(format!(
2252 "FTS property schema for kind '{kind}' is not registered"
2253 )));
2254 }
2255 let table = fathomdb_schema::fts_kind_table_name(kind);
2257 let table_exists: bool = tx
2258 .query_row(
2259 "SELECT count(*) FROM sqlite_master WHERE type='table' AND name = ?1 \
2260 AND sql LIKE 'CREATE VIRTUAL TABLE%'",
2261 rusqlite::params![table],
2262 |r| r.get::<_, i64>(0),
2263 )
2264 .unwrap_or(0)
2265 > 0;
2266 if table_exists {
2267 tx.execute_batch(&format!("DELETE FROM {table}"))?;
2268 }
2269 persist_simple_provenance_event(&tx, "fts_property_schema_removed", kind, None)?;
2270 tx.commit()?;
2271 Ok(())
2272 }
2273
2274 pub fn restore_vector_profiles(&self) -> Result<ProjectionRepairReport, EngineError> {
2280 let conn = self.connect()?;
2281 let profiles: Vec<(String, String, i64)> = {
2282 let mut stmt = conn.prepare(
2283 "SELECT profile, table_name, dimension \
2284 FROM vector_profiles WHERE enabled = 1 ORDER BY profile",
2285 )?;
2286 stmt.query_map([], |row| {
2287 Ok((
2288 row.get::<_, String>(0)?,
2289 row.get::<_, String>(1)?,
2290 row.get::<_, i64>(2)?,
2291 ))
2292 })?
2293 .collect::<Result<Vec<_>, _>>()?
2294 };
2295
2296 for (profile, table_name, dimension) in &profiles {
2297 let dimension = usize::try_from(*dimension).map_err(|_| {
2298 EngineError::Bridge(format!("invalid vector profile dimension: {dimension}"))
2299 })?;
2300 self.schema_manager
2301 .ensure_vector_profile(&conn, profile, table_name, dimension)?;
2302 }
2303
2304 Ok(ProjectionRepairReport {
2305 targets: vec![ProjectionTarget::Vec],
2306 rebuilt_rows: profiles.len(),
2307 notes: vec![],
2308 })
2309 }
2310
2311 #[allow(clippy::too_many_lines)]
2327 pub fn regenerate_vector_embeddings(
2328 &self,
2329 embedder: &dyn QueryEmbedder,
2330 config: &VectorRegenerationConfig,
2331 ) -> Result<VectorRegenerationReport, EngineError> {
2332 let conn = self.connect()?;
2333 let identity = embedder.identity();
2334 let config = validate_vector_regeneration_config(&conn, config, &identity)
2335 .map_err(|failure| failure.to_engine_error())?;
2336 let chunks = collect_regeneration_chunks(&conn)?;
2337 let payload = build_regeneration_input(&config, &identity, chunks.clone());
2338 let snapshot_hash = compute_snapshot_hash(&payload)?;
2339 let audit_metadata = VectorRegenerationAuditMetadata {
2340 profile: config.profile.clone(),
2341 model_identity: identity.model_identity.clone(),
2342 model_version: identity.model_version.clone(),
2343 chunk_count: chunks.len(),
2344 snapshot_hash: snapshot_hash.clone(),
2345 failure_class: None,
2346 };
2347 persist_vector_regeneration_event(
2348 &conn,
2349 "vector_regeneration_requested",
2350 &config.profile,
2351 &audit_metadata,
2352 )?;
2353 let notes = vec!["vector embeddings regenerated via configured embedder".to_owned()];
2354
2355 let mut embedding_map: std::collections::HashMap<String, Vec<u8>> =
2356 std::collections::HashMap::with_capacity(chunks.len());
2357 for chunk in &chunks {
2358 let vector = match embedder.embed_query(&chunk.text_content) {
2359 Ok(vector) => vector,
2360 Err(error) => {
2361 let failure = VectorRegenerationFailure::new(
2362 VectorRegenerationFailureClass::EmbedderFailure,
2363 format!("embedder failed for chunk '{}': {error}", chunk.chunk_id),
2364 );
2365 self.persist_vector_regeneration_failure_best_effort(
2366 &config.profile,
2367 &audit_metadata,
2368 &failure,
2369 );
2370 return Err(failure.to_engine_error());
2371 }
2372 };
2373 if vector.len() != identity.dimension {
2374 let failure = VectorRegenerationFailure::new(
2375 VectorRegenerationFailureClass::InvalidEmbedderOutput,
2376 format!(
2377 "embedder produced {} values for chunk '{}', expected {}",
2378 vector.len(),
2379 chunk.chunk_id,
2380 identity.dimension
2381 ),
2382 );
2383 self.persist_vector_regeneration_failure_best_effort(
2384 &config.profile,
2385 &audit_metadata,
2386 &failure,
2387 );
2388 return Err(failure.to_engine_error());
2389 }
2390 if vector.iter().any(|value| !value.is_finite()) {
2391 let failure = VectorRegenerationFailure::new(
2392 VectorRegenerationFailureClass::InvalidEmbedderOutput,
2393 format!(
2394 "embedder returned non-finite values for chunk '{}'",
2395 chunk.chunk_id
2396 ),
2397 );
2398 self.persist_vector_regeneration_failure_best_effort(
2399 &config.profile,
2400 &audit_metadata,
2401 &failure,
2402 );
2403 return Err(failure.to_engine_error());
2404 }
2405 let bytes: Vec<u8> = vector
2406 .iter()
2407 .flat_map(|value| value.to_le_bytes())
2408 .collect();
2409 embedding_map.insert(chunk.chunk_id.clone(), bytes);
2410 }
2411
2412 let mut conn = conn;
2413 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2414 match self.schema_manager.ensure_vector_profile(
2415 &tx,
2416 &config.profile,
2417 &config.table_name,
2418 identity.dimension,
2419 ) {
2420 Ok(()) => {}
2421 Err(SchemaError::MissingCapability(message)) => {
2422 let failure = VectorRegenerationFailure::new(
2423 VectorRegenerationFailureClass::UnsupportedVecCapability,
2424 message,
2425 );
2426 drop(tx);
2427 self.persist_vector_regeneration_failure_best_effort(
2428 &config.profile,
2429 &audit_metadata,
2430 &failure,
2431 );
2432 return Err(failure.to_engine_error());
2433 }
2434 Err(error) => return Err(EngineError::Schema(error)),
2435 }
2436 let apply_chunks = collect_regeneration_chunks(&tx)?;
2437 let apply_payload = build_regeneration_input(&config, &identity, apply_chunks.clone());
2438 let apply_hash = compute_snapshot_hash(&apply_payload)?;
2439 if apply_hash != snapshot_hash {
2440 let failure = VectorRegenerationFailure::new(
2441 VectorRegenerationFailureClass::SnapshotDrift,
2442 "chunk snapshot changed during generation; retry".to_owned(),
2443 );
2444 drop(tx);
2445 self.persist_vector_regeneration_failure_best_effort(
2446 &config.profile,
2447 &audit_metadata,
2448 &failure,
2449 );
2450 return Err(failure.to_engine_error());
2451 }
2452 persist_vector_contract(&tx, &config, &identity, &snapshot_hash)?;
2453 tx.execute("DELETE FROM vec_nodes_active", [])?;
2454 let mut stmt = tx
2455 .prepare_cached("INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES (?1, ?2)")?;
2456 let mut regenerated_rows = 0usize;
2457 for chunk in &apply_chunks {
2458 let Some(embedding) = embedding_map.remove(&chunk.chunk_id) else {
2459 drop(stmt);
2460 drop(tx);
2461 let failure = VectorRegenerationFailure::new(
2462 VectorRegenerationFailureClass::InvalidEmbedderOutput,
2463 format!(
2464 "embedder did not produce a vector for chunk '{}'",
2465 chunk.chunk_id
2466 ),
2467 );
2468 self.persist_vector_regeneration_failure_best_effort(
2469 &config.profile,
2470 &audit_metadata,
2471 &failure,
2472 );
2473 return Err(failure.to_engine_error());
2474 };
2475 stmt.execute(rusqlite::params![chunk.chunk_id.as_str(), embedding])?;
2476 regenerated_rows += 1;
2477 }
2478 drop(stmt);
2479 persist_vector_regeneration_event(
2480 &tx,
2481 "vector_regeneration_apply",
2482 &config.profile,
2483 &audit_metadata,
2484 )?;
2485 tx.commit()?;
2486
2487 Ok(VectorRegenerationReport {
2488 profile: config.profile.clone(),
2489 table_name: config.table_name.clone(),
2490 dimension: identity.dimension,
2491 total_chunks: chunks.len(),
2492 regenerated_rows,
2493 contract_persisted: true,
2494 notes,
2495 })
2496 }
2497
2498 fn persist_vector_regeneration_failure_best_effort(
2499 &self,
2500 profile: &str,
2501 metadata: &VectorRegenerationAuditMetadata,
2502 failure: &VectorRegenerationFailure,
2503 ) {
2504 let Ok(conn) = self.connect() else {
2505 return;
2506 };
2507 let failure_metadata = VectorRegenerationAuditMetadata {
2508 profile: metadata.profile.clone(),
2509 model_identity: metadata.model_identity.clone(),
2510 model_version: metadata.model_version.clone(),
2511 chunk_count: metadata.chunk_count,
2512 snapshot_hash: metadata.snapshot_hash.clone(),
2513 failure_class: Some(failure.failure_class_label().to_owned()),
2514 };
2515 let _ = persist_vector_regeneration_event(
2516 &conn,
2517 "vector_regeneration_failed",
2518 profile,
2519 &failure_metadata,
2520 );
2521 }
2522
2523 pub fn trace_source(&self, source_ref: &str) -> Result<TraceReport, EngineError> {
2526 let conn = self.connect()?;
2527
2528 let node_logical_ids = collect_strings(
2529 &conn,
2530 "SELECT logical_id FROM nodes WHERE source_ref = ?1 ORDER BY created_at",
2531 source_ref,
2532 )?;
2533 let action_ids = collect_strings(
2534 &conn,
2535 "SELECT id FROM actions WHERE source_ref = ?1 ORDER BY created_at",
2536 source_ref,
2537 )?;
2538 let operational_mutation_ids = collect_strings(
2539 &conn,
2540 "SELECT id FROM operational_mutations WHERE source_ref = ?1 ORDER BY mutation_order",
2541 source_ref,
2542 )?;
2543
2544 Ok(TraceReport {
2545 source_ref: source_ref.to_owned(),
2546 node_rows: count_source_ref(&conn, "nodes", source_ref)?,
2547 edge_rows: count_source_ref(&conn, "edges", source_ref)?,
2548 action_rows: count_source_ref(&conn, "actions", source_ref)?,
2549 operational_mutation_rows: count_source_ref(
2550 &conn,
2551 "operational_mutations",
2552 source_ref,
2553 )?,
2554 node_logical_ids,
2555 action_ids,
2556 operational_mutation_ids,
2557 })
2558 }
2559
2560 #[allow(clippy::too_many_lines)]
2564 pub fn restore_logical_id(
2565 &self,
2566 logical_id: &str,
2567 ) -> Result<LogicalRestoreReport, EngineError> {
2568 let mut conn = self.connect()?;
2569 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2570
2571 let active_count: i64 = tx.query_row(
2572 "SELECT count(*) FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
2573 [logical_id],
2574 |row| row.get(0),
2575 )?;
2576 if active_count > 0 {
2577 return Ok(LogicalRestoreReport {
2578 logical_id: logical_id.to_owned(),
2579 was_noop: true,
2580 restored_node_rows: 0,
2581 restored_edge_rows: 0,
2582 restored_chunk_rows: 0,
2583 restored_fts_rows: 0,
2584 restored_property_fts_rows: 0,
2585 restored_vec_rows: 0,
2586 skipped_edges: Vec::new(),
2587 notes: vec!["logical_id already active".to_owned()],
2588 });
2589 }
2590
2591 let restored_node: Option<(String, String)> = tx
2592 .query_row(
2593 "SELECT row_id, kind FROM nodes \
2594 WHERE logical_id = ?1 AND superseded_at IS NOT NULL \
2595 ORDER BY superseded_at DESC, created_at DESC, rowid DESC LIMIT 1",
2596 [logical_id],
2597 |row| Ok((row.get(0)?, row.get(1)?)),
2598 )
2599 .optional()?;
2600 let (restored_node_row_id, restored_kind) = restored_node.ok_or_else(|| {
2601 EngineError::InvalidWrite(format!("logical_id '{logical_id}' is not retired"))
2602 })?;
2603
2604 tx.execute(
2605 "UPDATE nodes SET superseded_at = NULL WHERE row_id = ?1",
2606 [restored_node_row_id.as_str()],
2607 )?;
2608
2609 let retire_scope: Option<(i64, Option<String>, i64)> = tx
2610 .query_row(
2611 "SELECT rowid, source_ref, created_at FROM provenance_events \
2612 WHERE event_type = 'node_retire' AND subject = ?1 \
2613 ORDER BY created_at DESC, rowid DESC LIMIT 1",
2614 [logical_id],
2615 |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
2616 )
2617 .optional()?;
2618 let (restored_edge_rows, skipped_edges) = if let Some((
2619 retire_event_rowid,
2620 retire_source_ref,
2621 retire_created_at,
2622 )) = retire_scope
2623 {
2624 restore_validated_edges(
2625 &tx,
2626 logical_id,
2627 retire_source_ref.as_deref(),
2628 retire_created_at,
2629 retire_event_rowid,
2630 )?
2631 } else {
2632 (0, Vec::new())
2633 };
2634
2635 let restored_chunk_rows: usize = tx
2636 .query_row(
2637 "SELECT count(*) FROM chunks WHERE node_logical_id = ?1",
2638 [logical_id],
2639 |row| row.get::<_, i64>(0),
2640 )
2641 .map(i64_to_usize)?;
2642 tx.execute(
2643 "DELETE FROM fts_nodes WHERE node_logical_id = ?1",
2644 [logical_id],
2645 )?;
2646 let restored_fts_rows = tx.execute(
2647 "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
2648 SELECT id, node_logical_id, ?2, text_content \
2649 FROM chunks WHERE node_logical_id = ?1",
2650 rusqlite::params![logical_id, restored_kind],
2651 )?;
2652 let restored_vec_rows = count_vec_rows_for_logical_id(&tx, logical_id)?;
2653
2654 let table = fathomdb_schema::fts_kind_table_name(&restored_kind);
2657 let fts_table_exists: bool = tx
2658 .query_row(
2659 "SELECT count(*) FROM sqlite_master WHERE type='table' AND name = ?1 \
2660 AND sql LIKE 'CREATE VIRTUAL TABLE%'",
2661 rusqlite::params![table],
2662 |r| r.get::<_, i64>(0),
2663 )
2664 .unwrap_or(0)
2665 > 0;
2666 if fts_table_exists {
2667 tx.execute(
2668 &format!("DELETE FROM {table} WHERE node_logical_id = ?1"),
2669 [logical_id],
2670 )?;
2671 }
2672 let restored_property_fts_rows =
2673 rebuild_single_node_property_fts(&tx, logical_id, &restored_kind)?;
2674
2675 persist_simple_provenance_event(
2676 &tx,
2677 "restore_logical_id",
2678 logical_id,
2679 Some(serde_json::json!({
2680 "restored_node_rows": 1,
2681 "restored_edge_rows": restored_edge_rows,
2682 "restored_chunk_rows": restored_chunk_rows,
2683 "restored_fts_rows": restored_fts_rows,
2684 "restored_property_fts_rows": restored_property_fts_rows,
2685 "restored_vec_rows": restored_vec_rows,
2686 })),
2687 )?;
2688 tx.commit()?;
2689
2690 Ok(LogicalRestoreReport {
2691 logical_id: logical_id.to_owned(),
2692 was_noop: false,
2693 restored_node_rows: 1,
2694 restored_edge_rows,
2695 restored_chunk_rows,
2696 restored_fts_rows,
2697 restored_property_fts_rows,
2698 restored_vec_rows,
2699 skipped_edges,
2700 notes: Vec::new(),
2701 })
2702 }
2703
2704 pub fn purge_logical_id(&self, logical_id: &str) -> Result<LogicalPurgeReport, EngineError> {
2708 let mut conn = self.connect()?;
2709 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2710
2711 let active_count: i64 = tx.query_row(
2712 "SELECT count(*) FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
2713 [logical_id],
2714 |row| row.get(0),
2715 )?;
2716 if active_count > 0 {
2717 return Ok(LogicalPurgeReport {
2718 logical_id: logical_id.to_owned(),
2719 was_noop: true,
2720 deleted_node_rows: 0,
2721 deleted_edge_rows: 0,
2722 deleted_chunk_rows: 0,
2723 deleted_fts_rows: 0,
2724 deleted_vec_rows: 0,
2725 notes: vec!["logical_id is active; purge skipped".to_owned()],
2726 });
2727 }
2728
2729 let node_rows: i64 = tx.query_row(
2730 "SELECT count(*) FROM nodes WHERE logical_id = ?1",
2731 [logical_id],
2732 |row| row.get(0),
2733 )?;
2734 if node_rows == 0 {
2735 return Err(EngineError::InvalidWrite(format!(
2736 "logical_id '{logical_id}' does not exist"
2737 )));
2738 }
2739
2740 let deleted_vec_rows = delete_vec_rows_for_logical_id(&tx, logical_id)?;
2741 let deleted_fts_rows = tx.execute(
2742 "DELETE FROM fts_nodes WHERE node_logical_id = ?1",
2743 [logical_id],
2744 )?;
2745 let deleted_edge_rows = tx.execute(
2746 "DELETE FROM edges WHERE source_logical_id = ?1 OR target_logical_id = ?1",
2747 [logical_id],
2748 )?;
2749 let deleted_chunk_rows = tx.execute(
2750 "DELETE FROM chunks WHERE node_logical_id = ?1",
2751 [logical_id],
2752 )?;
2753 let deleted_node_rows =
2754 tx.execute("DELETE FROM nodes WHERE logical_id = ?1", [logical_id])?;
2755 tx.execute(
2756 "DELETE FROM node_access_metadata WHERE logical_id = ?1",
2757 [logical_id],
2758 )?;
2759
2760 persist_simple_provenance_event(
2761 &tx,
2762 "purge_logical_id",
2763 logical_id,
2764 Some(serde_json::json!({
2765 "deleted_node_rows": deleted_node_rows,
2766 "deleted_edge_rows": deleted_edge_rows,
2767 "deleted_chunk_rows": deleted_chunk_rows,
2768 "deleted_fts_rows": deleted_fts_rows,
2769 "deleted_vec_rows": deleted_vec_rows,
2770 })),
2771 )?;
2772 tx.commit()?;
2773
2774 Ok(LogicalPurgeReport {
2775 logical_id: logical_id.to_owned(),
2776 was_noop: false,
2777 deleted_node_rows,
2778 deleted_edge_rows,
2779 deleted_chunk_rows,
2780 deleted_fts_rows,
2781 deleted_vec_rows,
2782 notes: Vec::new(),
2783 })
2784 }
2785
2786 pub fn purge_provenance_events(
2796 &self,
2797 before_timestamp: i64,
2798 options: &ProvenancePurgeOptions,
2799 ) -> Result<ProvenancePurgeReport, EngineError> {
2800 let mut conn = self.connect()?;
2801 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2802
2803 let preserved_types: Vec<&str> = if options.preserve_event_types.is_empty() {
2804 vec!["excise", "purge_logical_id"]
2805 } else {
2806 options
2807 .preserve_event_types
2808 .iter()
2809 .map(String::as_str)
2810 .collect()
2811 };
2812
2813 let placeholders: String = (0..preserved_types.len())
2815 .map(|i| format!("?{}", i + 2))
2816 .collect::<Vec<_>>()
2817 .join(", ");
2818 let count_query = format!(
2819 "SELECT count(*) FROM provenance_events \
2820 WHERE created_at < ?1 AND event_type NOT IN ({placeholders})"
2821 );
2822 let delete_query = format!(
2823 "DELETE FROM provenance_events WHERE rowid IN (\
2824 SELECT rowid FROM provenance_events \
2825 WHERE created_at < ?1 AND event_type NOT IN ({placeholders}) \
2826 LIMIT 10000)"
2827 );
2828
2829 let bind_params = |stmt: &mut rusqlite::Statement<'_>| -> Result<(), rusqlite::Error> {
2830 stmt.raw_bind_parameter(1, before_timestamp)?;
2831 for (i, event_type) in preserved_types.iter().enumerate() {
2832 stmt.raw_bind_parameter(i + 2, *event_type)?;
2833 }
2834 Ok(())
2835 };
2836
2837 let events_deleted = if options.dry_run {
2838 let mut stmt = tx.prepare(&count_query)?;
2839 bind_params(&mut stmt)?;
2840 stmt.raw_query()
2841 .next()?
2842 .map_or(0, |row| row.get::<_, u64>(0).unwrap_or(0))
2843 } else {
2844 let mut total_deleted: u64 = 0;
2845 loop {
2846 let mut stmt = tx.prepare(&delete_query)?;
2847 bind_params(&mut stmt)?;
2848 let deleted = stmt.raw_execute()?;
2849 if deleted == 0 {
2850 break;
2851 }
2852 total_deleted += deleted as u64;
2853 }
2854 total_deleted
2855 };
2856
2857 let total_after: u64 =
2858 tx.query_row("SELECT count(*) FROM provenance_events", [], |row| {
2859 row.get(0)
2860 })?;
2861
2862 let oldest_remaining: Option<i64> = tx
2863 .query_row("SELECT MIN(created_at) FROM provenance_events", [], |row| {
2864 row.get(0)
2865 })
2866 .optional()?
2867 .flatten();
2868
2869 if !options.dry_run {
2870 tx.commit()?;
2871 }
2872
2873 let events_preserved = if options.dry_run {
2876 total_after - events_deleted
2877 } else {
2878 total_after
2879 };
2880
2881 Ok(ProvenancePurgeReport {
2882 events_deleted,
2883 events_preserved,
2884 oldest_remaining,
2885 })
2886 }
2887
2888 #[allow(clippy::too_many_lines)]
2892 pub fn excise_source(&self, source_ref: &str) -> Result<TraceReport, EngineError> {
2893 let mut conn = self.connect()?;
2894
2895 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2896 let affected_operational_collections = collect_strings_tx(
2897 &tx,
2898 "SELECT DISTINCT m.collection_name \
2899 FROM operational_mutations m \
2900 JOIN operational_collections c ON c.name = m.collection_name \
2901 WHERE m.source_ref = ?1 AND c.kind = 'latest_state' \
2902 ORDER BY m.collection_name",
2903 source_ref,
2904 )?;
2905
2906 let pairs: Vec<(String, String)> = {
2908 let mut stmt = tx.prepare(
2909 "SELECT row_id, logical_id FROM nodes \
2910 WHERE source_ref = ?1 AND superseded_at IS NULL",
2911 )?;
2912 stmt.query_map([source_ref], |row| {
2913 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
2914 })?
2915 .collect::<Result<Vec<_>, _>>()?
2916 };
2917 let affected_logical_ids: Vec<String> = pairs
2918 .iter()
2919 .map(|(_, logical_id)| logical_id.clone())
2920 .collect();
2921
2922 tx.execute(
2924 "UPDATE nodes SET superseded_at = unixepoch() \
2925 WHERE source_ref = ?1 AND superseded_at IS NULL",
2926 [source_ref],
2927 )?;
2928 tx.execute(
2929 "UPDATE edges SET superseded_at = unixepoch() \
2930 WHERE source_ref = ?1 AND superseded_at IS NULL",
2931 [source_ref],
2932 )?;
2933 tx.execute(
2934 "UPDATE actions SET superseded_at = unixepoch() \
2935 WHERE source_ref = ?1 AND superseded_at IS NULL",
2936 [source_ref],
2937 )?;
2938 clear_operational_current_rows(&tx, &affected_operational_collections)?;
2939 tx.execute(
2940 "DELETE FROM operational_mutations WHERE source_ref = ?1",
2941 [source_ref],
2942 )?;
2943 for logical_id in &affected_logical_ids {
2944 delete_vec_rows_for_logical_id(&tx, logical_id)?;
2945 tx.execute(
2946 "DELETE FROM chunks WHERE node_logical_id = ?1",
2947 [logical_id.as_str()],
2948 )?;
2949 }
2950
2951 for (excised_row_id, logical_id) in &pairs {
2953 let prior: Option<String> = tx
2954 .query_row(
2955 "SELECT row_id FROM nodes \
2956 WHERE logical_id = ?1 AND row_id != ?2 \
2957 ORDER BY created_at DESC LIMIT 1",
2958 [logical_id.as_str(), excised_row_id.as_str()],
2959 |row| row.get(0),
2960 )
2961 .optional()?;
2962 if let Some(prior_id) = prior {
2963 tx.execute(
2964 "UPDATE nodes SET superseded_at = NULL WHERE row_id = ?1",
2965 [prior_id.as_str()],
2966 )?;
2967 }
2968 }
2969
2970 for logical_id in &affected_logical_ids {
2971 let has_active_node = tx
2972 .query_row(
2973 "SELECT 1 FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL LIMIT 1",
2974 [logical_id.as_str()],
2975 |row| row.get::<_, i64>(0),
2976 )
2977 .optional()?
2978 .is_some();
2979 if !has_active_node {
2980 tx.execute(
2981 "DELETE FROM node_access_metadata WHERE logical_id = ?1",
2982 [logical_id.as_str()],
2983 )?;
2984 }
2985 }
2986
2987 rebuild_operational_current_rows(&tx, &affected_operational_collections)?;
2988
2989 tx.execute("DELETE FROM fts_nodes", [])?;
2992 tx.execute(
2993 r"
2994 INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content)
2995 SELECT c.id, n.logical_id, n.kind, c.text_content
2996 FROM chunks c
2997 JOIN nodes n
2998 ON n.logical_id = c.node_logical_id
2999 AND n.superseded_at IS NULL
3000 ",
3001 [],
3002 )?;
3003
3004 rebuild_property_fts_in_tx(&tx)?;
3006
3007 tx.execute(
3011 "INSERT INTO provenance_events (id, event_type, subject, source_ref) \
3012 VALUES (?1, 'excise_source', ?2, ?2)",
3013 rusqlite::params![new_id(), source_ref],
3014 )?;
3015
3016 tx.commit()?;
3017
3018 self.trace_source(source_ref)
3019 }
3020
3021 pub fn safe_export(
3025 &self,
3026 destination_path: impl AsRef<Path>,
3027 options: SafeExportOptions,
3028 ) -> Result<SafeExportManifest, EngineError> {
3029 let destination_path = destination_path.as_ref();
3030
3031 let conn = self.connect()?;
3035
3036 if options.force_checkpoint {
3037 trace_info!("safe_export: wal checkpoint started");
3038 let (busy, log, checkpointed): (i64, i64, i64) =
3039 conn.query_row("PRAGMA wal_checkpoint(FULL)", [], |row| {
3040 Ok((row.get(0)?, row.get(1)?, row.get(2)?))
3041 })?;
3042 if busy != 0 {
3043 trace_warn!(
3044 busy,
3045 log_frames = log,
3046 checkpointed_frames = checkpointed,
3047 "safe_export: wal checkpoint blocked by active readers"
3048 );
3049 return Err(EngineError::Bridge(format!(
3050 "WAL checkpoint blocked: {busy} active reader(s) prevented a full checkpoint; \
3051 log frames={log}, checkpointed={checkpointed}; \
3052 retry export when no readers are active"
3053 )));
3054 }
3055 trace_info!(
3056 log_frames = log,
3057 checkpointed_frames = checkpointed,
3058 "safe_export: wal checkpoint completed"
3059 );
3060 }
3061
3062 let schema_version: u32 = conn
3063 .query_row(
3064 "SELECT COALESCE(MAX(version), 0) FROM fathom_schema_migrations",
3065 [],
3066 |row| row.get(0),
3067 )
3068 .unwrap_or(0);
3069
3070 if let Some(parent) = destination_path.parent() {
3073 fs::create_dir_all(parent)?;
3074 }
3075 conn.backup(DatabaseName::Main, destination_path, None)?;
3076
3077 drop(conn);
3078
3079 let page_count: u64 = {
3083 let export_conn = rusqlite::Connection::open_with_flags(
3084 destination_path,
3085 rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY
3086 | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX,
3087 )?;
3088 export_conn.query_row("PRAGMA page_count", [], |row| row.get(0))?
3089 };
3090
3091 let sha256 = {
3094 let mut file = fs::File::open(destination_path)?;
3095 let mut hasher = Sha256::new();
3096 io::copy(&mut file, &mut hasher)?;
3097 format!("{:x}", hasher.finalize())
3098 };
3099
3100 let exported_at = SystemTime::now()
3102 .duration_since(SystemTime::UNIX_EPOCH)
3103 .map_err(|e| EngineError::Bridge(format!("system clock error: {e}")))?
3104 .as_secs();
3105
3106 let manifest = SafeExportManifest {
3107 exported_at,
3108 sha256,
3109 schema_version,
3110 protocol_version: EXPORT_PROTOCOL_VERSION,
3111 page_count,
3112 };
3113
3114 let manifest_path = {
3116 let mut p = destination_path.to_path_buf();
3117 let stem = p
3118 .file_name()
3119 .map(|n| format!("{}.export-manifest.json", n.to_string_lossy()))
3120 .ok_or_else(|| {
3121 EngineError::Bridge("destination path has no filename".to_owned())
3122 })?;
3123 p.set_file_name(stem);
3124 p
3125 };
3126 let manifest_json =
3127 serde_json::to_string(&manifest).map_err(|e| EngineError::Bridge(e.to_string()))?;
3128
3129 let manifest_tmp = manifest_path.with_extension("json.tmp");
3132 if let Err(e) = fs::write(&manifest_tmp, &manifest_json)
3133 .and_then(|()| fs::rename(&manifest_tmp, &manifest_path))
3134 {
3135 let _ = fs::remove_file(&manifest_tmp);
3136 return Err(e.into());
3137 }
3138
3139 Ok(manifest)
3140 }
3141}
3142
3143#[allow(dead_code)]
3144#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
3145struct VectorEmbeddingContractRecord {
3146 profile: String,
3147 table_name: String,
3148 model_identity: String,
3149 model_version: String,
3150 dimension: usize,
3151 normalization_policy: String,
3152 chunking_policy: String,
3153 preprocessing_policy: String,
3154 generator_command_json: String,
3155 applied_at: i64,
3156 snapshot_hash: String,
3157 contract_format_version: i64,
3158}
3159
3160#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
3161struct VectorRegenerationInputChunk {
3162 chunk_id: String,
3163 node_logical_id: String,
3164 kind: String,
3165 text_content: String,
3166 byte_start: Option<i64>,
3167 byte_end: Option<i64>,
3168 source_ref: Option<String>,
3169 created_at: i64,
3170}
3171
3172#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
3173struct VectorRegenerationInput {
3174 profile: String,
3175 table_name: String,
3176 model_identity: String,
3177 model_version: String,
3178 dimension: usize,
3179 normalization_policy: String,
3180 chunking_policy: String,
3181 preprocessing_policy: String,
3182 chunks: Vec<VectorRegenerationInputChunk>,
3183}
3184
3185#[derive(Clone, Copy, Debug, PartialEq, Eq)]
3186pub(crate) enum VectorRegenerationFailureClass {
3187 InvalidContract,
3188 EmbedderFailure,
3189 InvalidEmbedderOutput,
3190 SnapshotDrift,
3191 UnsupportedVecCapability,
3192}
3193
3194impl VectorRegenerationFailureClass {
3195 fn label(self) -> &'static str {
3196 match self {
3197 Self::InvalidContract => "invalid contract",
3198 Self::EmbedderFailure => "embedder failure",
3199 Self::InvalidEmbedderOutput => "invalid embedder output",
3200 Self::SnapshotDrift => "snapshot drift",
3201 Self::UnsupportedVecCapability => "unsupported vec capability",
3202 }
3203 }
3204
3205 fn retryable(self) -> bool {
3206 matches!(self, Self::SnapshotDrift)
3207 }
3208}
3209
3210#[derive(Clone, Debug, PartialEq, Eq)]
3211pub(crate) struct VectorRegenerationFailure {
3212 class: VectorRegenerationFailureClass,
3213 detail: String,
3214}
3215
3216impl VectorRegenerationFailure {
3217 pub(crate) fn new(class: VectorRegenerationFailureClass, detail: impl Into<String>) -> Self {
3218 Self {
3219 class,
3220 detail: detail.into(),
3221 }
3222 }
3223
3224 fn to_engine_error(&self) -> EngineError {
3225 let retry_suffix = if self.class.retryable() {
3226 " [retryable]"
3227 } else {
3228 ""
3229 };
3230 EngineError::Bridge(format!(
3231 "vector regeneration {}: {}{}",
3232 self.class.label(),
3233 self.detail,
3234 retry_suffix
3235 ))
3236 }
3237
3238 fn failure_class_label(&self) -> &'static str {
3239 self.class.label()
3240 }
3241}
3242
3243#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
3244struct VectorRegenerationAuditMetadata {
3245 profile: String,
3246 model_identity: String,
3247 model_version: String,
3248 chunk_count: usize,
3249 snapshot_hash: String,
3250 #[serde(skip_serializing_if = "Option::is_none")]
3251 failure_class: Option<String>,
3252}
3253
3254#[derive(Clone, Copy, Debug, PartialEq, Eq, Deserialize)]
3255#[serde(tag = "mode", rename_all = "snake_case")]
3256enum OperationalRetentionPolicy {
3257 KeepAll,
3258 PurgeBeforeSeconds { max_age_seconds: i64 },
3259 KeepLast { max_rows: usize },
3260}
3261
3262pub fn load_vector_regeneration_config(
3265 path: impl AsRef<Path>,
3266) -> Result<VectorRegenerationConfig, EngineError> {
3267 let path = path.as_ref();
3268 let raw = fs::read_to_string(path)?;
3269 match path.extension().and_then(|ext| ext.to_str()) {
3270 Some("toml") => {
3271 toml::from_str(&raw).map_err(|error| EngineError::Bridge(error.to_string()))
3272 }
3273 Some("json") | None => {
3274 serde_json::from_str(&raw).map_err(|error| EngineError::Bridge(error.to_string()))
3275 }
3276 Some(other) => Err(EngineError::Bridge(format!(
3277 "unsupported vector regeneration config extension: {other}"
3278 ))),
3279 }
3280}
3281
3282fn validate_vector_regeneration_config(
3283 conn: &rusqlite::Connection,
3284 config: &VectorRegenerationConfig,
3285 identity: &QueryEmbedderIdentity,
3286) -> Result<VectorRegenerationConfig, VectorRegenerationFailure> {
3287 let profile = validate_bounded_text("profile", &config.profile, MAX_PROFILE_LEN)?;
3288 let table_name = validate_bounded_text("table_name", &config.table_name, MAX_PROFILE_LEN)?;
3289 if table_name != "vec_nodes_active" {
3290 return Err(VectorRegenerationFailure::new(
3291 VectorRegenerationFailureClass::InvalidContract,
3292 format!("table_name must be vec_nodes_active, got '{table_name}'"),
3293 ));
3294 }
3295 if identity.dimension == 0 {
3296 return Err(VectorRegenerationFailure::new(
3297 VectorRegenerationFailureClass::InvalidContract,
3298 "embedder reports dimension 0".to_owned(),
3299 ));
3300 }
3301 let chunking_policy =
3302 validate_bounded_text("chunking_policy", &config.chunking_policy, MAX_POLICY_LEN)?;
3303 let preprocessing_policy = validate_bounded_text(
3304 "preprocessing_policy",
3305 &config.preprocessing_policy,
3306 MAX_POLICY_LEN,
3307 )?;
3308
3309 if let Some(existing_dimension) = current_vector_profile_dimension(conn, &profile)?
3310 && existing_dimension != identity.dimension
3311 {
3312 return Err(VectorRegenerationFailure::new(
3313 VectorRegenerationFailureClass::InvalidContract,
3314 format!(
3315 "embedder dimension {} does not match existing vector profile dimension {}",
3316 identity.dimension, existing_dimension
3317 ),
3318 ));
3319 }
3320
3321 validate_existing_contract_version(conn, &profile)?;
3322
3323 let normalized = VectorRegenerationConfig {
3324 profile,
3325 table_name,
3326 chunking_policy,
3327 preprocessing_policy,
3328 };
3329 let serialized = serde_json::to_vec(&normalized).map_err(|error| {
3330 VectorRegenerationFailure::new(
3331 VectorRegenerationFailureClass::InvalidContract,
3332 error.to_string(),
3333 )
3334 })?;
3335 if serialized.len() > MAX_CONTRACT_JSON_BYTES {
3336 return Err(VectorRegenerationFailure::new(
3337 VectorRegenerationFailureClass::InvalidContract,
3338 format!("serialized contract exceeds {MAX_CONTRACT_JSON_BYTES} bytes"),
3339 ));
3340 }
3341
3342 Ok(normalized)
3343}
3344
3345#[allow(clippy::cast_possible_wrap)]
3346fn persist_vector_contract(
3347 conn: &rusqlite::Connection,
3348 config: &VectorRegenerationConfig,
3349 identity: &QueryEmbedderIdentity,
3350 snapshot_hash: &str,
3351) -> Result<(), EngineError> {
3352 conn.execute(
3353 r"
3354 INSERT OR REPLACE INTO vector_embedding_contracts (
3355 profile,
3356 table_name,
3357 model_identity,
3358 model_version,
3359 dimension,
3360 normalization_policy,
3361 chunking_policy,
3362 preprocessing_policy,
3363 generator_command_json,
3364 applied_at,
3365 snapshot_hash,
3366 contract_format_version,
3367 updated_at
3368 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, unixepoch(), ?10, ?11, unixepoch())
3369 ",
3370 rusqlite::params![
3371 config.profile.as_str(),
3372 config.table_name.as_str(),
3373 identity.model_identity.as_str(),
3374 identity.model_version.as_str(),
3375 identity.dimension as i64,
3376 identity.normalization_policy.as_str(),
3377 config.chunking_policy.as_str(),
3378 config.preprocessing_policy.as_str(),
3379 "[]",
3380 snapshot_hash,
3381 CURRENT_VECTOR_CONTRACT_FORMAT_VERSION,
3382 ],
3383 )?;
3384 Ok(())
3385}
3386
3387fn persist_vector_regeneration_event(
3388 conn: &rusqlite::Connection,
3389 event_type: &str,
3390 subject: &str,
3391 metadata: &VectorRegenerationAuditMetadata,
3392) -> Result<(), EngineError> {
3393 let metadata_json = serialize_audit_metadata(metadata)?;
3394 conn.execute(
3395 "INSERT INTO provenance_events (id, event_type, subject, metadata_json) VALUES (?1, ?2, ?3, ?4)",
3396 rusqlite::params![new_id(), event_type, subject, metadata_json],
3397 )?;
3398 Ok(())
3399}
3400
3401fn persist_simple_provenance_event(
3402 conn: &rusqlite::Connection,
3403 event_type: &str,
3404 subject: &str,
3405 metadata: Option<serde_json::Value>,
3406) -> Result<(), EngineError> {
3407 let metadata_json = metadata.map(|value| value.to_string()).unwrap_or_default();
3408 conn.execute(
3409 "INSERT INTO provenance_events (id, event_type, subject, metadata_json) VALUES (?1, ?2, ?3, ?4)",
3410 rusqlite::params![new_id(), event_type, subject, metadata_json],
3411 )?;
3412 Ok(())
3413}
3414
3415fn count_per_kind_property_fts_issues(
3423 conn: &rusqlite::Connection,
3424) -> Result<(i64, i64, i64, i64), EngineError> {
3425 let per_kind_tables: Vec<String> = {
3429 let mut stmt = conn.prepare(
3430 "SELECT name FROM sqlite_master \
3431 WHERE type='table' AND name LIKE 'fts_props_%' \
3432 AND sql LIKE 'CREATE VIRTUAL TABLE%'",
3433 )?;
3434 stmt.query_map([], |r| r.get::<_, String>(0))?
3435 .collect::<Result<Vec<_>, _>>()?
3436 };
3437
3438 let registered_kinds: std::collections::HashSet<String> = {
3439 let mut stmt = conn.prepare("SELECT kind FROM fts_property_schemas")?;
3440 stmt.query_map([], |r| r.get::<_, String>(0))?
3441 .collect::<Result<std::collections::HashSet<_>, _>>()?
3442 };
3443
3444 let mut stale = 0i64;
3445 let mut orphaned = 0i64;
3446 let mut duplicate = 0i64;
3447
3448 for table in &per_kind_tables {
3449 let kind_stale: i64 = conn.query_row(
3451 &format!(
3452 "SELECT count(*) FROM {table} fp \
3453 WHERE NOT EXISTS (\
3454 SELECT 1 FROM nodes n \
3455 WHERE n.logical_id = fp.node_logical_id AND n.superseded_at IS NULL\
3456 )"
3457 ),
3458 [],
3459 |r| r.get(0),
3460 )?;
3461 stale += kind_stale;
3462
3463 let kind_dup: i64 = conn.query_row(
3465 &format!(
3466 "SELECT count(*) FROM (\
3467 SELECT node_logical_id FROM {table} \
3468 GROUP BY node_logical_id HAVING count(*) > 1\
3469 )"
3470 ),
3471 [],
3472 |r| r.get(0),
3473 )?;
3474 duplicate += kind_dup;
3475
3476 let table_has_schema = registered_kinds
3479 .iter()
3480 .any(|k| fathomdb_schema::fts_kind_table_name(k) == *table);
3481 if !table_has_schema {
3482 let table_rows: i64 =
3483 conn.query_row(&format!("SELECT count(*) FROM {table}"), [], |r| r.get(0))?;
3484 orphaned += table_rows;
3485 }
3486 }
3487
3488 Ok((stale, orphaned, 0, duplicate))
3490}
3491
3492fn count_missing_property_fts_rows(conn: &rusqlite::Connection) -> Result<i64, EngineError> {
3496 let schemas = crate::writer::load_fts_property_schemas(conn)?;
3497 if schemas.is_empty() {
3498 return Ok(0);
3499 }
3500
3501 let mut missing = 0i64;
3502 for (kind, schema) in &schemas {
3503 let table = fathomdb_schema::fts_kind_table_name(kind);
3504 let table_exists: bool = conn
3506 .query_row(
3507 "SELECT count(*) FROM sqlite_master WHERE type='table' AND name = ?1",
3508 [table.as_str()],
3509 |r| r.get::<_, i64>(0),
3510 )
3511 .unwrap_or(0)
3512 > 0;
3513
3514 if table_exists {
3515 let mut stmt = conn.prepare(&format!(
3516 "SELECT n.logical_id, n.properties FROM nodes n \
3517 WHERE n.kind = ?1 AND n.superseded_at IS NULL \
3518 AND NOT EXISTS (SELECT 1 FROM {table} fp WHERE fp.node_logical_id = n.logical_id)"
3519 ))?;
3520 let rows = stmt.query_map([kind.as_str()], |row| {
3521 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
3522 })?;
3523 for row in rows {
3524 let (_logical_id, properties_str) = row?;
3525 let props: serde_json::Value =
3526 serde_json::from_str(&properties_str).unwrap_or_default();
3527 if crate::writer::extract_property_fts(&props, schema)
3528 .0
3529 .is_some()
3530 {
3531 missing += 1;
3532 }
3533 }
3534 } else {
3535 let mut stmt = conn.prepare(
3537 "SELECT n.logical_id, n.properties FROM nodes n \
3538 WHERE n.kind = ?1 AND n.superseded_at IS NULL",
3539 )?;
3540 let rows = stmt.query_map([kind.as_str()], |row| {
3541 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
3542 })?;
3543 for row in rows {
3544 let (_logical_id, properties_str) = row?;
3545 let props: serde_json::Value =
3546 serde_json::from_str(&properties_str).unwrap_or_default();
3547 if crate::writer::extract_property_fts(&props, schema)
3548 .0
3549 .is_some()
3550 {
3551 missing += 1;
3552 }
3553 }
3554 }
3555 }
3556 Ok(missing)
3557}
3558
3559fn count_drifted_property_fts_rows(conn: &rusqlite::Connection) -> Result<i64, EngineError> {
3564 let schemas = crate::writer::load_fts_property_schemas(conn)?;
3565 if schemas.is_empty() {
3566 return Ok(0);
3567 }
3568
3569 let mut drifted = 0i64;
3570 for (kind, schema) in &schemas {
3571 let table = fathomdb_schema::fts_kind_table_name(kind);
3572 let table_exists: bool = conn
3574 .query_row(
3575 "SELECT count(*) FROM sqlite_master WHERE type='table' AND name = ?1",
3576 [table.as_str()],
3577 |r| r.get::<_, i64>(0),
3578 )
3579 .unwrap_or(0)
3580 > 0;
3581 if !table_exists {
3582 continue;
3583 }
3584 let mut stmt = conn.prepare(&format!(
3585 "SELECT fp.node_logical_id, fp.text_content, n.properties \
3586 FROM {table} fp \
3587 JOIN nodes n ON n.logical_id = fp.node_logical_id AND n.superseded_at IS NULL \
3588 WHERE n.kind = ?1"
3589 ))?;
3590 let rows = stmt.query_map([kind.as_str()], |row| {
3591 Ok((
3592 row.get::<_, String>(0)?,
3593 row.get::<_, String>(1)?,
3594 row.get::<_, String>(2)?,
3595 ))
3596 })?;
3597 for row in rows {
3598 let (_logical_id, stored_text, properties_str) = row?;
3599 let props: serde_json::Value =
3600 serde_json::from_str(&properties_str).unwrap_or_default();
3601 let (expected, _positions, _stats) =
3602 crate::writer::extract_property_fts(&props, schema);
3603 match expected {
3604 Some(text) if text == stored_text => {}
3605 _ => drifted += 1,
3606 }
3607 }
3608 }
3609 Ok(drifted)
3610}
3611
3612fn rebuild_property_fts_in_tx(conn: &rusqlite::Connection) -> Result<usize, EngineError> {
3614 let all_per_kind_tables: Vec<String> = {
3617 let mut stmt = conn.prepare(
3618 "SELECT name FROM sqlite_master WHERE type='table' AND name LIKE 'fts_props_%' \
3619 AND sql LIKE 'CREATE VIRTUAL TABLE%'",
3620 )?;
3621 stmt.query_map([], |r| r.get::<_, String>(0))?
3622 .collect::<Result<Vec<_>, _>>()?
3623 };
3624 for table in &all_per_kind_tables {
3625 conn.execute_batch(&format!("DELETE FROM {table}"))?;
3626 }
3627 conn.execute("DELETE FROM fts_node_property_positions", [])?;
3628 let inserted = crate::projection::insert_property_fts_rows(
3629 conn,
3630 "SELECT logical_id, properties FROM nodes WHERE kind = ?1 AND superseded_at IS NULL",
3631 )?;
3632 Ok(inserted)
3633}
3634
3635fn rebuild_single_node_property_fts(
3638 conn: &rusqlite::Connection,
3639 logical_id: &str,
3640 kind: &str,
3641) -> Result<usize, EngineError> {
3642 let schema: Option<(String, String)> = conn
3643 .query_row(
3644 "SELECT property_paths_json, separator FROM fts_property_schemas WHERE kind = ?1",
3645 [kind],
3646 |row| {
3647 let paths_json: String = row.get(0)?;
3648 let separator: String = row.get(1)?;
3649 Ok((paths_json, separator))
3650 },
3651 )
3652 .optional()?;
3653 let Some((paths_json, separator)) = schema else {
3654 return Ok(0);
3655 };
3656 let parsed = crate::writer::parse_property_schema_json(&paths_json, &separator);
3657 let properties_str: Option<String> = conn
3658 .query_row(
3659 "SELECT properties FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
3660 [logical_id],
3661 |row| row.get(0),
3662 )
3663 .optional()?;
3664 let Some(properties_str) = properties_str else {
3665 return Ok(0);
3666 };
3667 let props: serde_json::Value = serde_json::from_str(&properties_str).unwrap_or_default();
3668 let (text, positions, _stats) = crate::writer::extract_property_fts(&props, &parsed);
3669 let Some(text) = text else {
3670 return Ok(0);
3671 };
3672 conn.execute(
3673 "DELETE FROM fts_node_property_positions WHERE node_logical_id = ?1",
3674 rusqlite::params![logical_id],
3675 )?;
3676 let table = fathomdb_schema::fts_kind_table_name(kind);
3677 let tok = fathomdb_schema::DEFAULT_FTS_TOKENIZER;
3678 conn.execute_batch(&format!(
3679 "CREATE VIRTUAL TABLE IF NOT EXISTS {table} \
3680 USING fts5(node_logical_id UNINDEXED, text_content, tokenize = '{tok}')"
3681 ))?;
3682 conn.execute(
3683 &format!("INSERT INTO {table} (node_logical_id, text_content) VALUES (?1, ?2)"),
3684 rusqlite::params![logical_id, text],
3685 )?;
3686 for pos in &positions {
3687 conn.execute(
3688 "INSERT INTO fts_node_property_positions \
3689 (node_logical_id, kind, start_offset, end_offset, leaf_path) \
3690 VALUES (?1, ?2, ?3, ?4, ?5)",
3691 rusqlite::params![
3692 logical_id,
3693 kind,
3694 i64::try_from(pos.start_offset).unwrap_or(i64::MAX),
3695 i64::try_from(pos.end_offset).unwrap_or(i64::MAX),
3696 pos.leaf_path,
3697 ],
3698 )?;
3699 }
3700 Ok(1)
3701}
3702
3703fn serialize_property_paths_json(
3704 entries: &[FtsPropertyPathSpec],
3705 exclude_paths: &[String],
3706) -> Result<String, EngineError> {
3707 let all_scalar = entries
3711 .iter()
3712 .all(|e| e.mode == FtsPropertyPathMode::Scalar);
3713 let any_weight = entries.iter().any(|e| e.weight.is_some());
3714 if all_scalar && exclude_paths.is_empty() && !any_weight {
3715 let paths: Vec<&str> = entries.iter().map(|e| e.path.as_str()).collect();
3716 return serde_json::to_string(&paths).map_err(|e| {
3717 EngineError::InvalidWrite(format!("failed to serialize property paths: {e}"))
3718 });
3719 }
3720
3721 let mut obj = serde_json::Map::new();
3722 let paths_json: Vec<serde_json::Value> = entries
3723 .iter()
3724 .map(|e| {
3725 let mode_str = match e.mode {
3726 FtsPropertyPathMode::Scalar => "scalar",
3727 FtsPropertyPathMode::Recursive => "recursive",
3728 };
3729 let mut entry = serde_json::json!({ "path": e.path, "mode": mode_str });
3730 if let Some(w) = e.weight {
3731 entry["weight"] = serde_json::json!(w);
3732 }
3733 entry
3734 })
3735 .collect();
3736 obj.insert("paths".to_owned(), serde_json::Value::Array(paths_json));
3737 if !exclude_paths.is_empty() {
3738 obj.insert("exclude_paths".to_owned(), serde_json::json!(exclude_paths));
3739 }
3740 serde_json::to_string(&serde_json::Value::Object(obj))
3741 .map_err(|e| EngineError::InvalidWrite(format!("failed to serialize property paths: {e}")))
3742}
3743
3744fn create_or_replace_fts_kind_table(
3750 conn: &rusqlite::Connection,
3751 kind: &str,
3752 specs: &[FtsPropertyPathSpec],
3753 tokenizer: &str,
3754) -> Result<(), EngineError> {
3755 let table = fathomdb_schema::fts_kind_table_name(kind);
3756
3757 if !tokenizer
3762 .chars()
3763 .all(|c| c.is_alphanumeric() || "'._-$@ ".contains(c))
3764 {
3765 return Err(EngineError::Bridge(format!(
3766 "invalid tokenizer string: {tokenizer:?}"
3767 )));
3768 }
3769
3770 let cols: Vec<String> = if specs.is_empty() {
3771 vec![
3772 "node_logical_id UNINDEXED".to_owned(),
3773 "text_content".to_owned(),
3774 ]
3775 } else {
3776 std::iter::once("node_logical_id UNINDEXED".to_owned())
3777 .chain(specs.iter().map(|s| {
3778 let is_recursive = matches!(s.mode, FtsPropertyPathMode::Recursive);
3779 fathomdb_schema::fts_column_name(&s.path, is_recursive)
3780 }))
3781 .collect()
3782 };
3783
3784 let tokenizer_sql = tokenizer.replace('\'', "''");
3787 conn.execute_batch(&format!(
3788 "DROP TABLE IF EXISTS {table}; \
3789 CREATE VIRTUAL TABLE {table} USING fts5({cols}, tokenize='{tokenizer_sql}');",
3790 cols = cols.join(", "),
3791 ))?;
3792
3793 Ok(())
3794}
3795
3796fn validate_fts_property_paths(paths: &[String]) -> Result<(), EngineError> {
3797 if paths.is_empty() {
3798 return Err(EngineError::InvalidWrite(
3799 "FTS property paths must not be empty".to_owned(),
3800 ));
3801 }
3802 let mut seen = std::collections::HashSet::new();
3803 for path in paths {
3804 if !path.starts_with("$.") {
3805 return Err(EngineError::InvalidWrite(format!(
3806 "FTS property path must start with '$.' but got: {path}"
3807 )));
3808 }
3809 let after_prefix = &path[2..]; let segments: Vec<&str> = after_prefix.split('.').collect();
3811 if segments.is_empty() || segments.iter().any(|s| s.is_empty()) {
3812 return Err(EngineError::InvalidWrite(format!(
3813 "FTS property path has empty segment(s): {path}"
3814 )));
3815 }
3816 for seg in &segments {
3817 if !seg.chars().all(|c| c.is_alphanumeric() || c == '_') {
3818 return Err(EngineError::InvalidWrite(format!(
3819 "FTS property path segment contains invalid characters: {path}"
3820 )));
3821 }
3822 }
3823 if !seen.insert(path) {
3824 return Err(EngineError::InvalidWrite(format!(
3825 "duplicate FTS property path: {path}"
3826 )));
3827 }
3828 }
3829 Ok(())
3830}
3831
3832fn load_fts_property_schema_record(
3833 conn: &rusqlite::Connection,
3834 kind: &str,
3835) -> Result<Option<FtsPropertySchemaRecord>, EngineError> {
3836 let row = conn
3837 .query_row(
3838 "SELECT kind, property_paths_json, separator, format_version \
3839 FROM fts_property_schemas WHERE kind = ?1",
3840 [kind],
3841 |row| {
3842 let kind: String = row.get(0)?;
3843 let paths_json: String = row.get(1)?;
3844 let separator: String = row.get(2)?;
3845 let format_version: i64 = row.get(3)?;
3846 Ok(build_fts_property_schema_record(
3847 kind,
3848 &paths_json,
3849 separator,
3850 format_version,
3851 ))
3852 },
3853 )
3854 .optional()?;
3855 Ok(row)
3856}
3857
3858fn build_fts_property_schema_record(
3864 kind: String,
3865 paths_json: &str,
3866 separator: String,
3867 format_version: i64,
3868) -> FtsPropertySchemaRecord {
3869 let schema = crate::writer::parse_property_schema_json(paths_json, &separator);
3870 let entries: Vec<FtsPropertyPathSpec> = schema
3871 .paths
3872 .into_iter()
3873 .map(|entry| FtsPropertyPathSpec {
3874 path: entry.path,
3875 mode: match entry.mode {
3876 crate::writer::PropertyPathMode::Scalar => FtsPropertyPathMode::Scalar,
3877 crate::writer::PropertyPathMode::Recursive => FtsPropertyPathMode::Recursive,
3878 },
3879 weight: entry.weight,
3880 })
3881 .collect();
3882 let property_paths: Vec<String> = entries.iter().map(|e| e.path.clone()).collect();
3883 FtsPropertySchemaRecord {
3884 kind,
3885 property_paths,
3886 entries,
3887 exclude_paths: schema.exclude_paths,
3888 separator,
3889 format_version,
3890 }
3891}
3892
3893fn build_regeneration_input(
3894 config: &VectorRegenerationConfig,
3895 identity: &QueryEmbedderIdentity,
3896 chunks: Vec<VectorRegenerationInputChunk>,
3897) -> VectorRegenerationInput {
3898 VectorRegenerationInput {
3899 profile: config.profile.clone(),
3900 table_name: config.table_name.clone(),
3901 model_identity: identity.model_identity.clone(),
3902 model_version: identity.model_version.clone(),
3903 dimension: identity.dimension,
3904 normalization_policy: identity.normalization_policy.clone(),
3905 chunking_policy: config.chunking_policy.clone(),
3906 preprocessing_policy: config.preprocessing_policy.clone(),
3907 chunks,
3908 }
3909}
3910
3911fn compute_snapshot_hash(payload: &VectorRegenerationInput) -> Result<String, EngineError> {
3912 let bytes =
3913 serde_json::to_vec(payload).map_err(|error| EngineError::Bridge(error.to_string()))?;
3914 let mut hasher = Sha256::new();
3915 hasher.update(bytes);
3916 Ok(format!("{:x}", hasher.finalize()))
3917}
3918
3919fn collect_regeneration_chunks(
3920 conn: &rusqlite::Connection,
3921) -> Result<Vec<VectorRegenerationInputChunk>, EngineError> {
3922 let mut stmt = conn.prepare(
3923 r"
3924 SELECT c.id, c.node_logical_id, n.kind, c.text_content, c.byte_start, c.byte_end, n.source_ref, c.created_at
3925 FROM chunks c
3926 JOIN nodes n
3927 ON n.logical_id = c.node_logical_id
3928 AND n.superseded_at IS NULL
3929 ORDER BY c.created_at, c.id
3930 ",
3931 )?;
3932 let chunks = stmt
3933 .query_map([], |row| {
3934 Ok(VectorRegenerationInputChunk {
3935 chunk_id: row.get(0)?,
3936 node_logical_id: row.get(1)?,
3937 kind: row.get(2)?,
3938 text_content: row.get(3)?,
3939 byte_start: row.get(4)?,
3940 byte_end: row.get(5)?,
3941 source_ref: row.get(6)?,
3942 created_at: row.get(7)?,
3943 })
3944 })?
3945 .collect::<Result<Vec<_>, _>>()?;
3946 Ok(chunks)
3947}
3948
3949fn validate_bounded_text(
3950 field: &str,
3951 value: &str,
3952 max_len: usize,
3953) -> Result<String, VectorRegenerationFailure> {
3954 let trimmed = value.trim();
3955 if trimmed.is_empty() {
3956 return Err(VectorRegenerationFailure::new(
3957 VectorRegenerationFailureClass::InvalidContract,
3958 format!("{field} must not be empty"),
3959 ));
3960 }
3961 if trimmed.len() > max_len {
3962 return Err(VectorRegenerationFailure::new(
3963 VectorRegenerationFailureClass::InvalidContract,
3964 format!("{field} exceeds max length {max_len}"),
3965 ));
3966 }
3967 Ok(trimmed.to_owned())
3968}
3969
3970fn current_vector_profile_dimension(
3971 conn: &rusqlite::Connection,
3972 profile: &str,
3973) -> Result<Option<usize>, VectorRegenerationFailure> {
3974 let dimension: Option<i64> = conn
3975 .query_row(
3976 "SELECT dimension FROM vector_profiles WHERE profile = ?1 AND enabled = 1",
3977 [profile],
3978 |row| row.get(0),
3979 )
3980 .optional()
3981 .map_err(|error| {
3982 VectorRegenerationFailure::new(
3983 VectorRegenerationFailureClass::InvalidContract,
3984 error.to_string(),
3985 )
3986 })?;
3987 dimension
3988 .map(|value| {
3989 usize::try_from(value).map_err(|_| {
3990 VectorRegenerationFailure::new(
3991 VectorRegenerationFailureClass::InvalidContract,
3992 format!("stored vector profile dimension is invalid: {value}"),
3993 )
3994 })
3995 })
3996 .transpose()
3997}
3998
3999fn validate_existing_contract_version(
4000 conn: &rusqlite::Connection,
4001 profile: &str,
4002) -> Result<(), VectorRegenerationFailure> {
4003 let version: Option<i64> = conn
4004 .query_row(
4005 "SELECT contract_format_version FROM vector_embedding_contracts WHERE profile = ?1",
4006 [profile],
4007 |row| row.get(0),
4008 )
4009 .optional()
4010 .map_err(|error| {
4011 VectorRegenerationFailure::new(
4012 VectorRegenerationFailureClass::InvalidContract,
4013 error.to_string(),
4014 )
4015 })?;
4016 if let Some(version) = version
4017 && version > CURRENT_VECTOR_CONTRACT_FORMAT_VERSION
4018 {
4019 return Err(VectorRegenerationFailure::new(
4020 VectorRegenerationFailureClass::InvalidContract,
4021 format!(
4022 "persisted contract format version {version} is unsupported; supported version is {CURRENT_VECTOR_CONTRACT_FORMAT_VERSION}"
4023 ),
4024 ));
4025 }
4026 Ok(())
4027}
4028
4029fn serialize_audit_metadata(
4030 metadata: &VectorRegenerationAuditMetadata,
4031) -> Result<String, EngineError> {
4032 let json =
4033 serde_json::to_string(metadata).map_err(|error| EngineError::Bridge(error.to_string()))?;
4034 if json.len() > MAX_AUDIT_METADATA_BYTES {
4035 return Err(VectorRegenerationFailure::new(
4036 VectorRegenerationFailureClass::InvalidContract,
4037 format!("audit metadata exceeds {MAX_AUDIT_METADATA_BYTES} bytes"),
4038 )
4039 .to_engine_error());
4040 }
4041 Ok(json)
4042}
4043
4044fn count_source_ref(
4045 conn: &rusqlite::Connection,
4046 table: &str,
4047 source_ref: &str,
4048) -> Result<usize, EngineError> {
4049 let sql = match table {
4050 "nodes" => "SELECT count(*) FROM nodes WHERE source_ref = ?1",
4051 "edges" => "SELECT count(*) FROM edges WHERE source_ref = ?1",
4052 "actions" => "SELECT count(*) FROM actions WHERE source_ref = ?1",
4053 "operational_mutations" => {
4054 "SELECT count(*) FROM operational_mutations WHERE source_ref = ?1"
4055 }
4056 other => return Err(EngineError::Bridge(format!("unknown table: {other}"))),
4057 };
4058 let count: i64 = conn.query_row(sql, [source_ref], |row| row.get(0))?;
4059 usize::try_from(count)
4062 .map_err(|_| EngineError::Bridge(format!("count overflow for table {table}: {count}")))
4063}
4064
4065fn rebuild_operational_current_rows(
4066 tx: &rusqlite::Transaction<'_>,
4067 collections: &[String],
4068) -> Result<usize, EngineError> {
4069 let mut rebuilt_rows = 0usize;
4070 clear_operational_current_rows(tx, collections)?;
4071 let mut ins_current = tx.prepare_cached(
4072 "INSERT INTO operational_current \
4073 (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
4074 VALUES (?1, ?2, ?3, ?4, ?5)",
4075 )?;
4076
4077 for collection in collections {
4078 let mut stmt = tx.prepare(
4079 "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
4080 FROM operational_mutations \
4081 WHERE collection_name = ?1 \
4082 ORDER BY record_key, mutation_order",
4083 )?;
4084 let mut latest_by_key: std::collections::HashMap<String, Option<(String, i64, String)>> =
4085 std::collections::HashMap::new();
4086 let rows = stmt.query_map([collection], map_operational_mutation_row)?;
4087 for row in rows {
4088 let mutation = row?;
4089 match mutation.op_kind.as_str() {
4090 "put" => {
4091 latest_by_key.insert(
4092 mutation.record_key,
4093 Some((mutation.payload_json, mutation.created_at, mutation.id)),
4094 );
4095 }
4096 "delete" => {
4097 latest_by_key.insert(mutation.record_key, None);
4098 }
4099 _ => {}
4100 }
4101 }
4102
4103 for (record_key, state) in latest_by_key {
4104 if let Some((payload_json, updated_at, last_mutation_id)) = state {
4105 ins_current.execute(rusqlite::params![
4106 collection,
4107 record_key,
4108 payload_json,
4109 updated_at,
4110 last_mutation_id,
4111 ])?;
4112 rebuilt_rows += 1;
4113 }
4114 }
4115 }
4116
4117 drop(ins_current);
4118 Ok(rebuilt_rows)
4119}
4120
4121fn clear_operational_current_rows(
4122 tx: &rusqlite::Transaction<'_>,
4123 collections: &[String],
4124) -> Result<(), EngineError> {
4125 let mut delete_current =
4126 tx.prepare_cached("DELETE FROM operational_current WHERE collection_name = ?1")?;
4127 let mut delete_secondary_current = tx.prepare_cached(
4128 "DELETE FROM operational_secondary_index_entries \
4129 WHERE collection_name = ?1 AND subject_kind = 'current'",
4130 )?;
4131 for collection in collections {
4132 delete_secondary_current.execute([collection])?;
4133 delete_current.execute([collection])?;
4134 }
4135 drop(delete_secondary_current);
4136 drop(delete_current);
4137 Ok(())
4138}
4139
4140fn clear_operational_secondary_index_entries(
4141 tx: &rusqlite::Transaction<'_>,
4142 collection_name: &str,
4143) -> Result<(), EngineError> {
4144 tx.execute(
4145 "DELETE FROM operational_secondary_index_entries WHERE collection_name = ?1",
4146 [collection_name],
4147 )?;
4148 Ok(())
4149}
4150
4151fn insert_operational_secondary_index_entry(
4152 tx: &rusqlite::Transaction<'_>,
4153 collection_name: &str,
4154 subject_kind: &str,
4155 mutation_id: &str,
4156 record_key: &str,
4157 entry: &crate::operational::OperationalSecondaryIndexEntry,
4158) -> Result<(), EngineError> {
4159 tx.execute(
4160 "INSERT INTO operational_secondary_index_entries \
4161 (collection_name, index_name, subject_kind, mutation_id, record_key, sort_timestamp, \
4162 slot1_text, slot1_integer, slot2_text, slot2_integer, slot3_text, slot3_integer) \
4163 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
4164 rusqlite::params![
4165 collection_name,
4166 entry.index_name,
4167 subject_kind,
4168 mutation_id,
4169 record_key,
4170 entry.sort_timestamp,
4171 entry.slot1_text,
4172 entry.slot1_integer,
4173 entry.slot2_text,
4174 entry.slot2_integer,
4175 entry.slot3_text,
4176 entry.slot3_integer,
4177 ],
4178 )?;
4179 Ok(())
4180}
4181
4182fn rebuild_operational_secondary_index_entries(
4183 tx: &rusqlite::Transaction<'_>,
4184 collection_name: &str,
4185 collection_kind: OperationalCollectionKind,
4186 indexes: &[OperationalSecondaryIndexDefinition],
4187) -> Result<(usize, usize), EngineError> {
4188 clear_operational_secondary_index_entries(tx, collection_name)?;
4189
4190 let mut mutation_entries_rebuilt = 0usize;
4191 if collection_kind == OperationalCollectionKind::AppendOnlyLog {
4192 let mut stmt = tx.prepare(
4193 "SELECT id, record_key, payload_json FROM operational_mutations \
4194 WHERE collection_name = ?1 ORDER BY mutation_order",
4195 )?;
4196 let rows = stmt
4197 .query_map([collection_name], |row| {
4198 Ok((
4199 row.get::<_, String>(0)?,
4200 row.get::<_, String>(1)?,
4201 row.get::<_, String>(2)?,
4202 ))
4203 })?
4204 .collect::<Result<Vec<_>, _>>()?;
4205 drop(stmt);
4206 for (mutation_id, record_key, payload_json) in rows {
4207 for entry in extract_secondary_index_entries_for_mutation(indexes, &payload_json) {
4208 insert_operational_secondary_index_entry(
4209 tx,
4210 collection_name,
4211 "mutation",
4212 &mutation_id,
4213 &record_key,
4214 &entry,
4215 )?;
4216 mutation_entries_rebuilt += 1;
4217 }
4218 }
4219 }
4220
4221 let mut current_entries_rebuilt = 0usize;
4222 if collection_kind == OperationalCollectionKind::LatestState {
4223 let mut stmt = tx.prepare(
4224 "SELECT record_key, payload_json, updated_at, last_mutation_id FROM operational_current \
4225 WHERE collection_name = ?1 ORDER BY updated_at DESC, record_key",
4226 )?;
4227 let rows = stmt
4228 .query_map([collection_name], |row| {
4229 Ok((
4230 row.get::<_, String>(0)?,
4231 row.get::<_, String>(1)?,
4232 row.get::<_, i64>(2)?,
4233 row.get::<_, String>(3)?,
4234 ))
4235 })?
4236 .collect::<Result<Vec<_>, _>>()?;
4237 drop(stmt);
4238 for (record_key, payload_json, updated_at, last_mutation_id) in rows {
4239 for entry in
4240 extract_secondary_index_entries_for_current(indexes, &payload_json, updated_at)
4241 {
4242 insert_operational_secondary_index_entry(
4243 tx,
4244 collection_name,
4245 "current",
4246 &last_mutation_id,
4247 &record_key,
4248 &entry,
4249 )?;
4250 current_entries_rebuilt += 1;
4251 }
4252 }
4253 }
4254
4255 Ok((mutation_entries_rebuilt, current_entries_rebuilt))
4256}
4257
4258fn collect_strings_tx(
4259 tx: &rusqlite::Transaction<'_>,
4260 sql: &str,
4261 value: &str,
4262) -> Result<Vec<String>, EngineError> {
4263 let mut stmt = tx.prepare(sql)?;
4264 let rows = stmt.query_map([value], |row| row.get::<_, String>(0))?;
4265 rows.collect::<Result<Vec<_>, _>>()
4266 .map_err(EngineError::from)
4267}
4268
4269#[allow(clippy::expect_used)]
4272fn i64_to_usize(val: i64) -> usize {
4273 usize::try_from(val).expect("count(*) must be non-negative")
4274}
4275
4276fn collect_strings(
4283 conn: &rusqlite::Connection,
4284 sql: &str,
4285 param: &str,
4286) -> Result<Vec<String>, EngineError> {
4287 let mut stmt = conn.prepare(sql)?;
4288 let values = stmt
4289 .query_map([param], |row| row.get::<_, String>(0))?
4290 .collect::<Result<Vec<_>, _>>()?;
4291 Ok(values)
4292}
4293
4294fn collect_edge_logical_ids_for_restore(
4295 tx: &rusqlite::Transaction<'_>,
4296 logical_id: &str,
4297 retire_source_ref: Option<&str>,
4298 retire_created_at: i64,
4299 retire_event_rowid: i64,
4300) -> Result<Vec<String>, EngineError> {
4301 let mut stmt = tx.prepare(
4302 "SELECT DISTINCT e.logical_id \
4303 FROM edges e \
4304 JOIN provenance_events p \
4305 ON p.subject = e.logical_id \
4306 AND p.event_type = 'edge_retire' \
4307 AND ( \
4308 p.created_at > ?3 \
4309 OR (p.created_at = ?3 AND p.rowid >= ?4) \
4310 ) \
4311 AND ((?2 IS NULL AND p.source_ref IS NULL) OR p.source_ref = ?2) \
4312 WHERE e.superseded_at IS NOT NULL \
4313 AND (e.source_logical_id = ?1 OR e.target_logical_id = ?1) \
4314 AND NOT EXISTS ( \
4315 SELECT 1 FROM edges active \
4316 WHERE active.logical_id = e.logical_id \
4317 AND active.superseded_at IS NULL \
4318 ) \
4319 ORDER BY e.logical_id",
4320 )?;
4321 let edge_ids = stmt
4322 .query_map(
4323 rusqlite::params![
4324 logical_id,
4325 retire_source_ref,
4326 retire_created_at,
4327 retire_event_rowid
4328 ],
4329 |row| row.get::<_, String>(0),
4330 )?
4331 .collect::<Result<Vec<_>, _>>()?;
4332 Ok(edge_ids)
4333}
4334
4335fn restore_validated_edges(
4338 tx: &rusqlite::Transaction<'_>,
4339 logical_id: &str,
4340 retire_source_ref: Option<&str>,
4341 retire_created_at: i64,
4342 retire_event_rowid: i64,
4343) -> Result<(usize, Vec<SkippedEdge>), EngineError> {
4344 let edge_logical_ids = collect_edge_logical_ids_for_restore(
4345 tx,
4346 logical_id,
4347 retire_source_ref,
4348 retire_created_at,
4349 retire_event_rowid,
4350 )?;
4351 let mut restored = 0usize;
4352 let mut skipped = Vec::new();
4353 for edge_logical_id in &edge_logical_ids {
4354 let edge_detail: Option<(String, String, String)> = tx
4355 .query_row(
4356 "SELECT row_id, source_logical_id, target_logical_id FROM edges \
4357 WHERE logical_id = ?1 AND superseded_at IS NOT NULL \
4358 ORDER BY superseded_at DESC, created_at DESC, rowid DESC LIMIT 1",
4359 [edge_logical_id.as_str()],
4360 |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
4361 )
4362 .optional()?;
4363 let Some((edge_row_id, source_lid, target_lid)) = edge_detail else {
4364 continue;
4365 };
4366 let other_endpoint = if source_lid == logical_id {
4367 &target_lid
4368 } else {
4369 &source_lid
4370 };
4371 let endpoint_active: bool = tx
4372 .query_row(
4373 "SELECT 1 FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL LIMIT 1",
4374 [other_endpoint.as_str()],
4375 |_| Ok(true),
4376 )
4377 .optional()?
4378 .unwrap_or(false);
4379 if !endpoint_active {
4380 skipped.push(SkippedEdge {
4381 edge_logical_id: edge_logical_id.clone(),
4382 missing_endpoint: other_endpoint.clone(),
4383 });
4384 continue;
4385 }
4386 restored += tx.execute(
4387 "UPDATE edges SET superseded_at = NULL WHERE row_id = ?1",
4388 [edge_row_id.as_str()],
4389 )?;
4390 }
4391 Ok((restored, skipped))
4392}
4393
4394#[cfg(feature = "sqlite-vec")]
4395fn count_vec_rows_for_logical_id(
4396 tx: &rusqlite::Transaction<'_>,
4397 logical_id: &str,
4398) -> Result<usize, EngineError> {
4399 match tx.query_row(
4400 "SELECT count(*) FROM vec_nodes_active v \
4401 JOIN chunks c ON c.id = v.chunk_id \
4402 WHERE c.node_logical_id = ?1",
4403 [logical_id],
4404 |row| row.get::<_, i64>(0),
4405 ) {
4406 Ok(count) => Ok(i64_to_usize(count)),
4407 Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
4408 if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
4409 {
4410 Ok(0)
4411 }
4412 Err(error) => Err(EngineError::Sqlite(error)),
4413 }
4414}
4415
4416#[cfg(not(feature = "sqlite-vec"))]
4417#[allow(clippy::unnecessary_wraps)]
4418fn count_vec_rows_for_logical_id(
4419 _tx: &rusqlite::Transaction<'_>,
4420 _logical_id: &str,
4421) -> Result<usize, EngineError> {
4422 Ok(0)
4423}
4424
4425#[cfg(feature = "sqlite-vec")]
4426fn delete_vec_rows_for_logical_id(
4427 tx: &rusqlite::Transaction<'_>,
4428 logical_id: &str,
4429) -> Result<usize, EngineError> {
4430 match tx.execute(
4431 "DELETE FROM vec_nodes_active \
4432 WHERE chunk_id IN (SELECT id FROM chunks WHERE node_logical_id = ?1)",
4433 [logical_id],
4434 ) {
4435 Ok(count) => Ok(count),
4436 Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
4437 if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
4438 {
4439 Ok(0)
4440 }
4441 Err(error) => Err(EngineError::Sqlite(error)),
4442 }
4443}
4444
4445#[cfg(not(feature = "sqlite-vec"))]
4446#[allow(clippy::unnecessary_wraps)]
4447fn delete_vec_rows_for_logical_id(
4448 _tx: &rusqlite::Transaction<'_>,
4449 _logical_id: &str,
4450) -> Result<usize, EngineError> {
4451 Ok(0)
4452}
4453
4454fn ensure_operational_collection_registered(
4455 conn: &rusqlite::Connection,
4456 collection_name: &str,
4457) -> Result<(), EngineError> {
4458 if load_operational_collection_record(conn, collection_name)?.is_none() {
4459 return Err(EngineError::InvalidWrite(format!(
4460 "operational collection '{collection_name}' is not registered"
4461 )));
4462 }
4463 Ok(())
4464}
4465
4466fn load_operational_collection_record(
4467 conn: &rusqlite::Connection,
4468 name: &str,
4469) -> Result<Option<OperationalCollectionRecord>, EngineError> {
4470 conn.query_row(
4471 "SELECT name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at, disabled_at \
4472 FROM operational_collections WHERE name = ?1",
4473 [name],
4474 map_operational_collection_row,
4475 )
4476 .optional()
4477 .map_err(EngineError::Sqlite)
4478}
4479
4480fn validate_append_only_operational_collection(
4481 record: &OperationalCollectionRecord,
4482 operation: &str,
4483) -> Result<(), EngineError> {
4484 if record.kind != OperationalCollectionKind::AppendOnlyLog {
4485 return Err(EngineError::InvalidWrite(format!(
4486 "operational collection '{}' must be append_only_log to {operation}",
4487 record.name
4488 )));
4489 }
4490 Ok(())
4491}
4492
4493#[derive(Clone, Debug, PartialEq, Eq)]
4494struct CompiledOperationalReadFilter {
4495 field: String,
4496 condition: OperationalReadCondition,
4497}
4498
4499#[derive(Clone, Debug)]
4500struct MatchedAppendOnlySecondaryIndexRead<'a> {
4501 index_name: &'a str,
4502 value_filter: &'a CompiledOperationalReadFilter,
4503 time_range: Option<&'a CompiledOperationalReadFilter>,
4504}
4505
4506#[derive(Clone, Debug, PartialEq, Eq)]
4507enum OperationalReadCondition {
4508 ExactString(String),
4509 ExactInteger(i64),
4510 Prefix(String),
4511 Range {
4512 lower: Option<i64>,
4513 upper: Option<i64>,
4514 },
4515}
4516
4517fn operational_read_limit(limit: Option<usize>) -> Result<usize, EngineError> {
4518 let applied_limit = limit.unwrap_or(DEFAULT_OPERATIONAL_READ_LIMIT);
4519 if applied_limit == 0 {
4520 return Err(EngineError::InvalidWrite(
4521 "operational read limit must be greater than zero".to_owned(),
4522 ));
4523 }
4524 Ok(applied_limit.min(MAX_OPERATIONAL_READ_LIMIT))
4525}
4526
4527fn parse_operational_filter_fields(
4528 filter_fields_json: &str,
4529) -> Result<Vec<OperationalFilterField>, String> {
4530 let fields: Vec<OperationalFilterField> = serde_json::from_str(filter_fields_json)
4531 .map_err(|error| format!("invalid filter_fields_json: {error}"))?;
4532 let mut seen = std::collections::HashSet::new();
4533 for field in &fields {
4534 if field.name.trim().is_empty() {
4535 return Err("filter_fields_json field names must not be empty".to_owned());
4536 }
4537 if !seen.insert(field.name.as_str()) {
4538 return Err(format!(
4539 "filter_fields_json contains duplicate field '{}'",
4540 field.name
4541 ));
4542 }
4543 if field.modes.is_empty() {
4544 return Err(format!(
4545 "filter_fields_json field '{}' must declare at least one mode",
4546 field.name
4547 ));
4548 }
4549 if field.modes.contains(&OperationalFilterMode::Prefix)
4550 && field.field_type != OperationalFilterFieldType::String
4551 {
4552 return Err(format!(
4553 "filter field '{}' only supports prefix for string types",
4554 field.name
4555 ));
4556 }
4557 }
4558 Ok(fields)
4559}
4560
4561fn compile_operational_read_filters(
4562 filters: &[OperationalFilterClause],
4563 declared_fields: &[OperationalFilterField],
4564) -> Result<Vec<CompiledOperationalReadFilter>, EngineError> {
4565 let field_map = declared_fields
4566 .iter()
4567 .map(|field| (field.name.as_str(), field))
4568 .collect::<std::collections::HashMap<_, _>>();
4569 filters
4570 .iter()
4571 .map(|filter| match filter {
4572 OperationalFilterClause::Exact { field, value } => {
4573 let declared = field_map.get(field.as_str()).ok_or_else(|| {
4574 EngineError::InvalidWrite(format!(
4575 "operational read filter uses undeclared field '{field}'"
4576 ))
4577 })?;
4578 if !declared.modes.contains(&OperationalFilterMode::Exact) {
4579 return Err(EngineError::InvalidWrite(format!(
4580 "operational read field '{field}' does not allow exact filters"
4581 )));
4582 }
4583 let condition = match (declared.field_type, value) {
4584 (OperationalFilterFieldType::String, OperationalFilterValue::String(value)) => {
4585 OperationalReadCondition::ExactString(value.clone())
4586 }
4587 (
4588 OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp,
4589 OperationalFilterValue::Integer(value),
4590 ) => OperationalReadCondition::ExactInteger(*value),
4591 _ => {
4592 return Err(EngineError::InvalidWrite(format!(
4593 "operational read field '{field}' received a value with the wrong type"
4594 )));
4595 }
4596 };
4597 Ok(CompiledOperationalReadFilter {
4598 field: field.clone(),
4599 condition,
4600 })
4601 }
4602 OperationalFilterClause::Prefix { field, value } => {
4603 let declared = field_map.get(field.as_str()).ok_or_else(|| {
4604 EngineError::InvalidWrite(format!(
4605 "operational read filter uses undeclared field '{field}'"
4606 ))
4607 })?;
4608 if !declared.modes.contains(&OperationalFilterMode::Prefix) {
4609 return Err(EngineError::InvalidWrite(format!(
4610 "operational read field '{field}' does not allow prefix filters"
4611 )));
4612 }
4613 if declared.field_type != OperationalFilterFieldType::String {
4614 return Err(EngineError::InvalidWrite(format!(
4615 "operational read field '{field}' only supports prefix filters for strings"
4616 )));
4617 }
4618 Ok(CompiledOperationalReadFilter {
4619 field: field.clone(),
4620 condition: OperationalReadCondition::Prefix(value.clone()),
4621 })
4622 }
4623 OperationalFilterClause::Range {
4624 field,
4625 lower,
4626 upper,
4627 } => {
4628 let declared = field_map.get(field.as_str()).ok_or_else(|| {
4629 EngineError::InvalidWrite(format!(
4630 "operational read filter uses undeclared field '{field}'"
4631 ))
4632 })?;
4633 if !declared.modes.contains(&OperationalFilterMode::Range) {
4634 return Err(EngineError::InvalidWrite(format!(
4635 "operational read field '{field}' does not allow range filters"
4636 )));
4637 }
4638 if !matches!(
4639 declared.field_type,
4640 OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp
4641 ) {
4642 return Err(EngineError::InvalidWrite(format!(
4643 "operational read field '{field}' only supports range filters for integer/timestamp fields"
4644 )));
4645 }
4646 if lower.is_none() && upper.is_none() {
4647 return Err(EngineError::InvalidWrite(format!(
4648 "operational read range filter for '{field}' must specify a lower or upper bound"
4649 )));
4650 }
4651 Ok(CompiledOperationalReadFilter {
4652 field: field.clone(),
4653 condition: OperationalReadCondition::Range {
4654 lower: *lower,
4655 upper: *upper,
4656 },
4657 })
4658 }
4659 })
4660 .collect()
4661}
4662
4663fn match_append_only_secondary_index_read<'a>(
4664 filters: &'a [CompiledOperationalReadFilter],
4665 indexes: &'a [OperationalSecondaryIndexDefinition],
4666) -> Option<MatchedAppendOnlySecondaryIndexRead<'a>> {
4667 indexes.iter().find_map(|index| {
4668 let OperationalSecondaryIndexDefinition::AppendOnlyFieldTime {
4669 name,
4670 field,
4671 value_type,
4672 time_field,
4673 } = index
4674 else {
4675 return None;
4676 };
4677 if !(1..=2).contains(&filters.len()) {
4678 return None;
4679 }
4680
4681 let mut value_filter = None;
4682 let mut time_range = None;
4683 for filter in filters {
4684 if filter.field == *field {
4685 let supported = matches!(
4686 (&filter.condition, value_type),
4687 (
4688 OperationalReadCondition::ExactString(_)
4689 | OperationalReadCondition::Prefix(_),
4690 crate::operational::OperationalSecondaryIndexValueType::String
4691 ) | (
4692 OperationalReadCondition::ExactInteger(_),
4693 crate::operational::OperationalSecondaryIndexValueType::Integer
4694 | crate::operational::OperationalSecondaryIndexValueType::Timestamp
4695 )
4696 );
4697 if !supported || value_filter.is_some() {
4698 return None;
4699 }
4700 value_filter = Some(filter);
4701 continue;
4702 }
4703 if filter.field == *time_field {
4704 if !matches!(filter.condition, OperationalReadCondition::Range { .. })
4705 || time_range.is_some()
4706 {
4707 return None;
4708 }
4709 time_range = Some(filter);
4710 continue;
4711 }
4712 return None;
4713 }
4714
4715 value_filter.map(|value_filter| MatchedAppendOnlySecondaryIndexRead {
4716 index_name: name.as_str(),
4717 value_filter,
4718 time_range,
4719 })
4720 })
4721}
4722
4723fn execute_operational_secondary_index_read(
4724 conn: &rusqlite::Connection,
4725 collection_name: &str,
4726 filters: &[CompiledOperationalReadFilter],
4727 indexes: &[OperationalSecondaryIndexDefinition],
4728 applied_limit: usize,
4729) -> Result<Option<OperationalReadReport>, EngineError> {
4730 use rusqlite::types::Value;
4731
4732 let Some(matched) = match_append_only_secondary_index_read(filters, indexes) else {
4733 return Ok(None);
4734 };
4735
4736 let mut sql = String::from(
4737 "SELECT m.id, m.collection_name, m.record_key, m.op_kind, m.payload_json, m.source_ref, m.created_at \
4738 FROM operational_secondary_index_entries s \
4739 JOIN operational_mutations m ON m.id = s.mutation_id \
4740 WHERE s.collection_name = ?1 AND s.index_name = ?2 AND s.subject_kind = 'mutation' ",
4741 );
4742 let mut params = vec![
4743 Value::from(collection_name.to_owned()),
4744 Value::from(matched.index_name.to_owned()),
4745 ];
4746
4747 match &matched.value_filter.condition {
4748 OperationalReadCondition::ExactString(value) => {
4749 let _ = write!(sql, "AND s.slot1_text = ?{} ", params.len() + 1);
4750 params.push(Value::from(value.clone()));
4751 }
4752 OperationalReadCondition::Prefix(value) => {
4753 let _ = write!(sql, "AND s.slot1_text GLOB ?{} ", params.len() + 1);
4754 params.push(Value::from(glob_prefix_pattern(value)));
4755 }
4756 OperationalReadCondition::ExactInteger(value) => {
4757 let _ = write!(sql, "AND s.slot1_integer = ?{} ", params.len() + 1);
4758 params.push(Value::from(*value));
4759 }
4760 OperationalReadCondition::Range { .. } => return Ok(None),
4761 }
4762
4763 if let Some(time_range) = matched.time_range
4764 && let OperationalReadCondition::Range { lower, upper } = &time_range.condition
4765 {
4766 if let Some(lower) = lower {
4767 let _ = write!(sql, "AND s.sort_timestamp >= ?{} ", params.len() + 1);
4768 params.push(Value::from(*lower));
4769 }
4770 if let Some(upper) = upper {
4771 let _ = write!(sql, "AND s.sort_timestamp <= ?{} ", params.len() + 1);
4772 params.push(Value::from(*upper));
4773 }
4774 }
4775
4776 let _ = write!(
4777 sql,
4778 "ORDER BY s.sort_timestamp DESC, m.mutation_order DESC LIMIT ?{}",
4779 params.len() + 1
4780 );
4781 params.push(Value::from(i64::try_from(applied_limit + 1).map_err(
4782 |_| EngineError::Bridge("operational read limit overflow".to_owned()),
4783 )?));
4784
4785 let mut stmt = conn.prepare(&sql)?;
4786 let mut rows = stmt
4787 .query_map(
4788 rusqlite::params_from_iter(params),
4789 map_operational_mutation_row,
4790 )?
4791 .collect::<Result<Vec<_>, _>>()?;
4792 let was_limited = rows.len() > applied_limit;
4793 if was_limited {
4794 rows.truncate(applied_limit);
4795 }
4796
4797 Ok(Some(OperationalReadReport {
4798 collection_name: collection_name.to_owned(),
4799 row_count: rows.len(),
4800 applied_limit,
4801 was_limited,
4802 rows,
4803 }))
4804}
4805
4806fn execute_operational_filtered_read(
4807 conn: &rusqlite::Connection,
4808 collection_name: &str,
4809 filters: &[CompiledOperationalReadFilter],
4810 applied_limit: usize,
4811) -> Result<OperationalReadReport, EngineError> {
4812 use rusqlite::types::Value;
4813
4814 let mut sql = String::from(
4815 "SELECT m.id, m.collection_name, m.record_key, m.op_kind, m.payload_json, m.source_ref, m.created_at \
4816 FROM operational_mutations m ",
4817 );
4818 let mut params = vec![Value::from(collection_name.to_owned())];
4819 for (index, filter) in filters.iter().enumerate() {
4820 let _ = write!(
4821 sql,
4822 "JOIN operational_filter_values f{index} \
4823 ON f{index}.mutation_id = m.id \
4824 AND f{index}.collection_name = m.collection_name "
4825 );
4826 match &filter.condition {
4827 OperationalReadCondition::ExactString(value) => {
4828 let _ = write!(
4829 sql,
4830 "AND f{index}.field_name = ?{} AND f{index}.string_value = ?{} ",
4831 params.len() + 1,
4832 params.len() + 2
4833 );
4834 params.push(Value::from(filter.field.clone()));
4835 params.push(Value::from(value.clone()));
4836 }
4837 OperationalReadCondition::ExactInteger(value) => {
4838 let _ = write!(
4839 sql,
4840 "AND f{index}.field_name = ?{} AND f{index}.integer_value = ?{} ",
4841 params.len() + 1,
4842 params.len() + 2
4843 );
4844 params.push(Value::from(filter.field.clone()));
4845 params.push(Value::from(*value));
4846 }
4847 OperationalReadCondition::Prefix(value) => {
4848 let _ = write!(
4849 sql,
4850 "AND f{index}.field_name = ?{} AND f{index}.string_value GLOB ?{} ",
4851 params.len() + 1,
4852 params.len() + 2
4853 );
4854 params.push(Value::from(filter.field.clone()));
4855 params.push(Value::from(glob_prefix_pattern(value)));
4856 }
4857 OperationalReadCondition::Range { lower, upper } => {
4858 let _ = write!(sql, "AND f{index}.field_name = ?{} ", params.len() + 1);
4859 params.push(Value::from(filter.field.clone()));
4860 if let Some(lower) = lower {
4861 let _ = write!(sql, "AND f{index}.integer_value >= ?{} ", params.len() + 1);
4862 params.push(Value::from(*lower));
4863 }
4864 if let Some(upper) = upper {
4865 let _ = write!(sql, "AND f{index}.integer_value <= ?{} ", params.len() + 1);
4866 params.push(Value::from(*upper));
4867 }
4868 }
4869 }
4870 }
4871 let _ = write!(
4872 sql,
4873 "WHERE m.collection_name = ?1 ORDER BY m.mutation_order DESC LIMIT ?{}",
4874 params.len() + 1
4875 );
4876 params.push(Value::from(i64::try_from(applied_limit + 1).map_err(
4877 |_| EngineError::Bridge("operational read limit overflow".to_owned()),
4878 )?));
4879
4880 let mut stmt = conn.prepare(&sql)?;
4881 let mut rows = stmt
4882 .query_map(
4883 rusqlite::params_from_iter(params),
4884 map_operational_mutation_row,
4885 )?
4886 .collect::<Result<Vec<_>, _>>()?;
4887 let was_limited = rows.len() > applied_limit;
4888 if was_limited {
4889 rows.truncate(applied_limit);
4890 }
4891 Ok(OperationalReadReport {
4892 collection_name: collection_name.to_owned(),
4893 row_count: rows.len(),
4894 applied_limit,
4895 was_limited,
4896 rows,
4897 })
4898}
4899
4900fn glob_prefix_pattern(value: &str) -> String {
4901 let mut pattern = String::with_capacity(value.len() + 1);
4902 for ch in value.chars() {
4903 match ch {
4904 '*' => pattern.push_str("[*]"),
4905 '?' => pattern.push_str("[?]"),
4906 '[' => pattern.push_str("[[]"),
4907 _ => pattern.push(ch),
4908 }
4909 }
4910 pattern.push('*');
4911 pattern
4912}
4913
4914#[derive(Clone, Debug, PartialEq, Eq)]
4915struct ExtractedOperationalFilterValue {
4916 field_name: String,
4917 string_value: Option<String>,
4918 integer_value: Option<i64>,
4919}
4920
4921fn extract_operational_filter_values(
4922 filter_fields: &[OperationalFilterField],
4923 payload_json: &str,
4924) -> Vec<ExtractedOperationalFilterValue> {
4925 let Ok(parsed) = serde_json::from_str::<serde_json::Value>(payload_json) else {
4926 return Vec::new();
4927 };
4928 let Some(object) = parsed.as_object() else {
4929 return Vec::new();
4930 };
4931
4932 filter_fields
4933 .iter()
4934 .filter_map(|field| {
4935 let value = object.get(&field.name)?;
4936 match field.field_type {
4937 OperationalFilterFieldType::String => {
4938 value
4939 .as_str()
4940 .map(|string_value| ExtractedOperationalFilterValue {
4941 field_name: field.name.clone(),
4942 string_value: Some(string_value.to_owned()),
4943 integer_value: None,
4944 })
4945 }
4946 OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp => {
4947 value
4948 .as_i64()
4949 .map(|integer_value| ExtractedOperationalFilterValue {
4950 field_name: field.name.clone(),
4951 string_value: None,
4952 integer_value: Some(integer_value),
4953 })
4954 }
4955 }
4956 })
4957 .collect()
4958}
4959
4960fn operational_compaction_candidates(
4961 conn: &rusqlite::Connection,
4962 retention_json: &str,
4963 collection_name: &str,
4964) -> Result<(Vec<String>, Option<i64>), EngineError> {
4965 operational_compaction_candidates_at(
4966 conn,
4967 retention_json,
4968 collection_name,
4969 current_unix_timestamp()?,
4970 )
4971}
4972
4973fn operational_compaction_candidates_at(
4974 conn: &rusqlite::Connection,
4975 retention_json: &str,
4976 collection_name: &str,
4977 now_timestamp: i64,
4978) -> Result<(Vec<String>, Option<i64>), EngineError> {
4979 let policy = parse_operational_retention_policy(retention_json)?;
4980 match policy {
4981 OperationalRetentionPolicy::KeepAll => Ok((Vec::new(), None)),
4982 OperationalRetentionPolicy::PurgeBeforeSeconds { max_age_seconds } => {
4983 let before_timestamp = now_timestamp - max_age_seconds;
4984 let mut stmt = conn.prepare(
4985 "SELECT id FROM operational_mutations \
4986 WHERE collection_name = ?1 AND created_at < ?2 \
4987 ORDER BY mutation_order",
4988 )?;
4989 let mutation_ids = stmt
4990 .query_map(
4991 rusqlite::params![collection_name, before_timestamp],
4992 |row| row.get::<_, String>(0),
4993 )?
4994 .collect::<Result<Vec<_>, _>>()?;
4995 Ok((mutation_ids, Some(before_timestamp)))
4996 }
4997 OperationalRetentionPolicy::KeepLast { max_rows } => {
4998 let mut stmt = conn.prepare(
4999 "SELECT id FROM operational_mutations \
5000 WHERE collection_name = ?1 \
5001 ORDER BY mutation_order DESC",
5002 )?;
5003 let ordered_ids = stmt
5004 .query_map([collection_name], |row| row.get::<_, String>(0))?
5005 .collect::<Result<Vec<_>, _>>()?;
5006 Ok((ordered_ids.into_iter().skip(max_rows).collect(), None))
5007 }
5008 }
5009}
5010
5011fn parse_operational_retention_policy(
5012 retention_json: &str,
5013) -> Result<OperationalRetentionPolicy, EngineError> {
5014 let policy: OperationalRetentionPolicy = serde_json::from_str(retention_json)
5015 .map_err(|error| EngineError::InvalidWrite(format!("invalid retention_json: {error}")))?;
5016 match policy {
5017 OperationalRetentionPolicy::KeepAll => Ok(policy),
5018 OperationalRetentionPolicy::PurgeBeforeSeconds { max_age_seconds } => {
5019 if max_age_seconds <= 0 {
5020 return Err(EngineError::InvalidWrite(
5021 "retention_json max_age_seconds must be greater than zero".to_owned(),
5022 ));
5023 }
5024 Ok(policy)
5025 }
5026 OperationalRetentionPolicy::KeepLast { max_rows } => {
5027 if max_rows == 0 {
5028 return Err(EngineError::InvalidWrite(
5029 "retention_json max_rows must be greater than zero".to_owned(),
5030 ));
5031 }
5032 Ok(policy)
5033 }
5034 }
5035}
5036
5037fn load_operational_retention_records(
5038 conn: &rusqlite::Connection,
5039 collection_names: Option<&[String]>,
5040 max_collections: Option<usize>,
5041) -> Result<Vec<OperationalCollectionRecord>, EngineError> {
5042 let limit = max_collections.unwrap_or(usize::MAX);
5043 if limit == 0 {
5044 return Err(EngineError::InvalidWrite(
5045 "max_collections must be greater than zero".to_owned(),
5046 ));
5047 }
5048
5049 let mut records = Vec::new();
5050 if let Some(collection_names) = collection_names {
5051 for name in collection_names.iter().take(limit) {
5052 let record = load_operational_collection_record(conn, name)?.ok_or_else(|| {
5053 EngineError::InvalidWrite(format!(
5054 "operational collection '{name}' is not registered"
5055 ))
5056 })?;
5057 records.push(record);
5058 }
5059 return Ok(records);
5060 }
5061
5062 let mut stmt = conn.prepare(
5063 "SELECT name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at, disabled_at \
5064 FROM operational_collections ORDER BY name",
5065 )?;
5066 let rows = stmt
5067 .query_map([], map_operational_collection_row)?
5068 .take(limit)
5069 .collect::<Result<Vec<_>, _>>()?;
5070 Ok(rows)
5071}
5072
5073fn last_operational_retention_run_at(
5074 conn: &rusqlite::Connection,
5075 collection_name: &str,
5076) -> Result<Option<i64>, EngineError> {
5077 conn.query_row(
5078 "SELECT MAX(executed_at) FROM operational_retention_runs WHERE collection_name = ?1",
5079 [collection_name],
5080 |row| row.get(0),
5081 )
5082 .optional()
5083 .map_err(EngineError::Sqlite)
5084 .map(Option::flatten)
5085}
5086
5087fn count_operational_mutations_for_collection(
5088 conn: &rusqlite::Connection,
5089 collection_name: &str,
5090) -> Result<usize, EngineError> {
5091 let count: i64 = conn.query_row(
5092 "SELECT count(*) FROM operational_mutations WHERE collection_name = ?1",
5093 [collection_name],
5094 |row| row.get(0),
5095 )?;
5096 usize::try_from(count).map_err(|_| {
5097 EngineError::Bridge(format!("count overflow for collection {collection_name}"))
5098 })
5099}
5100
5101fn retention_action_kind_and_limit(
5102 policy: &OperationalRetentionPolicy,
5103) -> (OperationalRetentionActionKind, Option<usize>) {
5104 match policy {
5105 OperationalRetentionPolicy::KeepAll => (OperationalRetentionActionKind::Noop, None),
5106 OperationalRetentionPolicy::PurgeBeforeSeconds { .. } => {
5107 (OperationalRetentionActionKind::PurgeBeforeSeconds, None)
5108 }
5109 OperationalRetentionPolicy::KeepLast { max_rows } => {
5110 (OperationalRetentionActionKind::KeepLast, Some(*max_rows))
5111 }
5112 }
5113}
5114
5115fn plan_operational_retention_item(
5116 conn: &rusqlite::Connection,
5117 record: &OperationalCollectionRecord,
5118 now_timestamp: i64,
5119) -> Result<OperationalRetentionPlanItem, EngineError> {
5120 let last_run_at = last_operational_retention_run_at(conn, &record.name)?;
5121 if record.kind != OperationalCollectionKind::AppendOnlyLog {
5122 return Ok(OperationalRetentionPlanItem {
5123 collection_name: record.name.clone(),
5124 action_kind: OperationalRetentionActionKind::Noop,
5125 candidate_deletions: 0,
5126 before_timestamp: None,
5127 max_rows: None,
5128 last_run_at,
5129 });
5130 }
5131 let policy = parse_operational_retention_policy(&record.retention_json)?;
5132 let (action_kind, max_rows) = retention_action_kind_and_limit(&policy);
5133 let (candidate_ids, before_timestamp) = operational_compaction_candidates_at(
5134 conn,
5135 &record.retention_json,
5136 &record.name,
5137 now_timestamp,
5138 )?;
5139 Ok(OperationalRetentionPlanItem {
5140 collection_name: record.name.clone(),
5141 action_kind,
5142 candidate_deletions: candidate_ids.len(),
5143 before_timestamp,
5144 max_rows,
5145 last_run_at,
5146 })
5147}
5148
5149fn run_operational_retention_item(
5150 tx: &rusqlite::Transaction<'_>,
5151 record: &OperationalCollectionRecord,
5152 now_timestamp: i64,
5153 dry_run: bool,
5154) -> Result<OperationalRetentionRunItem, EngineError> {
5155 let plan = plan_operational_retention_item(tx, record, now_timestamp)?;
5156 let mut deleted_mutations = 0usize;
5157 if record.kind == OperationalCollectionKind::AppendOnlyLog
5158 && plan.action_kind != OperationalRetentionActionKind::Noop
5159 && plan.candidate_deletions > 0
5160 && !dry_run
5161 {
5162 let (candidate_ids, _) = operational_compaction_candidates_at(
5163 tx,
5164 &record.retention_json,
5165 &record.name,
5166 now_timestamp,
5167 )?;
5168 let mut delete_stmt =
5169 tx.prepare_cached("DELETE FROM operational_mutations WHERE id = ?1")?;
5170 for mutation_id in &candidate_ids {
5171 delete_stmt.execute([mutation_id.as_str()])?;
5172 deleted_mutations += 1;
5173 }
5174 drop(delete_stmt);
5175
5176 persist_simple_provenance_event(
5177 tx,
5178 "operational_retention_run",
5179 &record.name,
5180 Some(serde_json::json!({
5181 "action_kind": plan.action_kind,
5182 "deleted_mutations": deleted_mutations,
5183 "before_timestamp": plan.before_timestamp,
5184 "max_rows": plan.max_rows,
5185 "executed_at": now_timestamp,
5186 })),
5187 )?;
5188 }
5189
5190 let live_rows_remaining = count_operational_mutations_for_collection(tx, &record.name)?;
5191 let effective_deleted_mutations = if dry_run {
5192 plan.candidate_deletions
5193 } else {
5194 deleted_mutations
5195 };
5196 let rows_remaining = if dry_run {
5197 live_rows_remaining.saturating_sub(effective_deleted_mutations)
5198 } else {
5199 live_rows_remaining
5200 };
5201 if !dry_run && plan.action_kind != OperationalRetentionActionKind::Noop {
5202 tx.execute(
5203 "INSERT INTO operational_retention_runs \
5204 (id, collection_name, executed_at, action_kind, dry_run, deleted_mutations, rows_remaining, metadata_json) \
5205 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
5206 rusqlite::params![
5207 new_id(),
5208 record.name,
5209 now_timestamp,
5210 serde_json::to_string(&plan.action_kind)
5211 .unwrap_or_else(|_| "\"noop\"".to_owned())
5212 .trim_matches('"')
5213 .to_owned(),
5214 i32::from(dry_run),
5215 deleted_mutations,
5216 rows_remaining,
5217 serde_json::json!({
5218 "before_timestamp": plan.before_timestamp,
5219 "max_rows": plan.max_rows,
5220 })
5221 .to_string(),
5222 ],
5223 )?;
5224 }
5225
5226 Ok(OperationalRetentionRunItem {
5227 collection_name: plan.collection_name,
5228 action_kind: plan.action_kind,
5229 deleted_mutations: effective_deleted_mutations,
5230 before_timestamp: plan.before_timestamp,
5231 max_rows: plan.max_rows,
5232 rows_remaining,
5233 })
5234}
5235
5236fn current_unix_timestamp() -> Result<i64, EngineError> {
5237 let now = SystemTime::now()
5238 .duration_since(SystemTime::UNIX_EPOCH)
5239 .map_err(|error| EngineError::Bridge(format!("system clock error: {error}")))?;
5240 i64::try_from(now.as_secs())
5241 .map_err(|_| EngineError::Bridge("unix timestamp overflow".to_owned()))
5242}
5243
5244fn map_operational_collection_row(
5245 row: &rusqlite::Row<'_>,
5246) -> Result<OperationalCollectionRecord, rusqlite::Error> {
5247 let kind_text: String = row.get(1)?;
5248 let kind = OperationalCollectionKind::try_from(kind_text.as_str()).map_err(|message| {
5249 rusqlite::Error::FromSqlConversionFailure(
5250 1,
5251 rusqlite::types::Type::Text,
5252 Box::new(io::Error::new(io::ErrorKind::InvalidData, message)),
5253 )
5254 })?;
5255 Ok(OperationalCollectionRecord {
5256 name: row.get(0)?,
5257 kind,
5258 schema_json: row.get(2)?,
5259 retention_json: row.get(3)?,
5260 filter_fields_json: row.get(4)?,
5261 validation_json: row.get(5)?,
5262 secondary_indexes_json: row.get(6)?,
5263 format_version: row.get(7)?,
5264 created_at: row.get(8)?,
5265 disabled_at: row.get(9)?,
5266 })
5267}
5268
5269fn map_operational_mutation_row(
5270 row: &rusqlite::Row<'_>,
5271) -> Result<OperationalMutationRow, rusqlite::Error> {
5272 Ok(OperationalMutationRow {
5273 id: row.get(0)?,
5274 collection_name: row.get(1)?,
5275 record_key: row.get(2)?,
5276 op_kind: row.get(3)?,
5277 payload_json: row.get(4)?,
5278 source_ref: row.get(5)?,
5279 created_at: row.get(6)?,
5280 })
5281}
5282
5283fn map_operational_current_row(
5284 row: &rusqlite::Row<'_>,
5285) -> Result<OperationalCurrentRow, rusqlite::Error> {
5286 Ok(OperationalCurrentRow {
5287 collection_name: row.get(0)?,
5288 record_key: row.get(1)?,
5289 payload_json: row.get(2)?,
5290 updated_at: row.get(3)?,
5291 last_mutation_id: row.get(4)?,
5292 })
5293}
5294
5295#[cfg(test)]
5296#[allow(clippy::expect_used)]
5297mod tests {
5298 use std::fs;
5299 use std::sync::Arc;
5300
5301 use fathomdb_schema::SchemaManager;
5302 use tempfile::NamedTempFile;
5303
5304 use super::{
5305 AdminService, FtsPropertyPathMode, FtsPropertyPathSpec, SafeExportOptions,
5306 VectorRegenerationConfig,
5307 };
5308 use crate::embedder::{EmbedderError, QueryEmbedder, QueryEmbedderIdentity};
5309 use crate::projection::ProjectionTarget;
5310 use crate::sqlite;
5311 use crate::{EngineError, OperationalCollectionKind, OperationalRegisterRequest};
5312
5313 #[cfg(feature = "sqlite-vec")]
5314 use crate::{ExecutionCoordinator, TelemetryCounters};
5315
5316 #[cfg(feature = "sqlite-vec")]
5317 use fathomdb_query::QueryBuilder;
5318
5319 #[cfg(feature = "sqlite-vec")]
5320 use super::load_vector_regeneration_config;
5321
5322 #[derive(Debug)]
5326 #[allow(dead_code)]
5327 struct TestEmbedder {
5328 identity: QueryEmbedderIdentity,
5329 vector: Vec<f32>,
5330 }
5331
5332 #[allow(dead_code)]
5333 impl TestEmbedder {
5334 fn new(model: &str, dimension: usize) -> Self {
5335 Self {
5336 identity: QueryEmbedderIdentity {
5337 model_identity: model.to_owned(),
5338 model_version: "1.0.0".to_owned(),
5339 dimension,
5340 normalization_policy: "l2".to_owned(),
5341 },
5342 vector: vec![1.0; dimension],
5343 }
5344 }
5345 }
5346
5347 impl QueryEmbedder for TestEmbedder {
5348 fn embed_query(&self, _text: &str) -> Result<Vec<f32>, EmbedderError> {
5349 Ok(self.vector.clone())
5350 }
5351 fn identity(&self) -> QueryEmbedderIdentity {
5352 self.identity.clone()
5353 }
5354 }
5355
5356 #[derive(Debug)]
5359 #[allow(dead_code)]
5360 struct FailingEmbedder {
5361 identity: QueryEmbedderIdentity,
5362 }
5363
5364 impl QueryEmbedder for FailingEmbedder {
5365 fn embed_query(&self, _text: &str) -> Result<Vec<f32>, EmbedderError> {
5366 Err(EmbedderError::Failed("test failure".to_owned()))
5367 }
5368 fn identity(&self) -> QueryEmbedderIdentity {
5369 self.identity.clone()
5370 }
5371 }
5372
5373 #[allow(dead_code)]
5374 #[cfg(unix)]
5375 fn set_file_mode(path: &std::path::Path, mode: u32) {
5376 use std::os::unix::fs::PermissionsExt;
5377
5378 let mut permissions = fs::metadata(path).expect("script metadata").permissions();
5379 permissions.set_mode(mode);
5380 fs::set_permissions(path, permissions).expect("chmod");
5381 }
5382
5383 #[allow(dead_code)]
5384 #[cfg(not(unix))]
5385 fn set_file_mode(_path: &std::path::Path, _mode: u32) {}
5386
5387 fn setup() -> (NamedTempFile, AdminService) {
5388 let db = NamedTempFile::new().expect("temp file");
5389 let schema = Arc::new(SchemaManager::new());
5390 {
5391 let conn = sqlite::open_connection(db.path()).expect("connection");
5392 schema.bootstrap(&conn).expect("bootstrap");
5393 }
5394 let service = AdminService::new(db.path(), Arc::clone(&schema));
5395 (db, service)
5396 }
5397
5398 #[test]
5399 fn check_integrity_includes_active_uniqueness_count() {
5400 let (_db, service) = setup();
5401 let report = service.check_integrity().expect("integrity check");
5402 assert_eq!(report.duplicate_active_logical_ids, 0);
5403 assert_eq!(report.operational_missing_collections, 0);
5404 assert_eq!(report.operational_missing_last_mutations, 0);
5405 }
5406
5407 #[test]
5408 fn trace_source_returns_node_logical_ids() {
5409 let (db, service) = setup();
5410 {
5411 let conn = sqlite::open_connection(db.path()).expect("conn");
5412 conn.execute(
5413 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5414 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 'source-1')",
5415 [],
5416 )
5417 .expect("insert node");
5418 }
5419 let report = service.trace_source("source-1").expect("trace");
5420 assert_eq!(report.node_rows, 1);
5421 assert_eq!(report.node_logical_ids, vec!["lg1"]);
5422 }
5423
5424 #[test]
5425 fn trace_source_includes_operational_mutations() {
5426 let (db, service) = setup();
5427 {
5428 let conn = sqlite::open_connection(db.path()).expect("conn");
5429 conn.execute(
5430 "INSERT INTO operational_collections \
5431 (name, kind, schema_json, retention_json, format_version, created_at) \
5432 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
5433 [],
5434 )
5435 .expect("insert collection");
5436 conn.execute(
5437 "INSERT INTO operational_mutations \
5438 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5439 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"ok\"}', 'source-1', 100, 1)",
5440 [],
5441 )
5442 .expect("insert mutation");
5443 }
5444
5445 let report = service.trace_source("source-1").expect("trace");
5446 assert_eq!(report.operational_mutation_rows, 1);
5447 assert_eq!(report.operational_mutation_ids, vec!["m1"]);
5448 }
5449
5450 #[test]
5451 fn excise_source_restores_prior_active_node() {
5452 let (db, service) = setup();
5453 {
5454 let conn = sqlite::open_connection(db.path()).expect("conn");
5455 conn.execute(
5456 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5457 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
5458 [],
5459 )
5460 .expect("insert v1 superseded");
5461 conn.execute(
5462 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5463 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
5464 [],
5465 )
5466 .expect("insert v2 active");
5467 }
5468 service.excise_source("source-2").expect("excise");
5469 {
5470 let conn = sqlite::open_connection(db.path()).expect("conn");
5471 let active_row_id: String = conn
5472 .query_row(
5473 "SELECT row_id FROM nodes WHERE logical_id = 'lg1' AND superseded_at IS NULL",
5474 [],
5475 |row| row.get(0),
5476 )
5477 .expect("active row exists after excise");
5478 assert_eq!(active_row_id, "r1");
5479 }
5480 }
5481
5482 #[test]
5483 fn excise_source_deletes_operational_mutations_and_repairs_latest_state_current() {
5484 let (db, service) = setup();
5485 {
5486 let conn = sqlite::open_connection(db.path()).expect("conn");
5487 conn.execute(
5488 "INSERT INTO operational_collections \
5489 (name, kind, schema_json, retention_json, format_version, created_at) \
5490 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
5491 [],
5492 )
5493 .expect("insert collection");
5494 conn.execute(
5495 "INSERT INTO operational_mutations \
5496 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5497 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"old\"}', 'source-1', 100, 1)",
5498 [],
5499 )
5500 .expect("insert prior mutation");
5501 conn.execute(
5502 "INSERT INTO operational_mutations \
5503 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5504 VALUES ('m2', 'connector_health', 'gmail', 'put', '{\"status\":\"new\"}', 'source-2', 200, 2)",
5505 [],
5506 )
5507 .expect("insert excised mutation");
5508 conn.execute(
5509 "INSERT INTO operational_current \
5510 (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
5511 VALUES ('connector_health', 'gmail', '{\"status\":\"new\"}', 200, 'm2')",
5512 [],
5513 )
5514 .expect("insert current row");
5515 }
5516
5517 let traced = service
5518 .trace_source("source-2")
5519 .expect("trace before excise");
5520 assert_eq!(traced.operational_mutation_rows, 1);
5521 assert_eq!(traced.operational_mutation_ids, vec!["m2"]);
5522
5523 let excised = service.excise_source("source-2").expect("excise");
5524 assert_eq!(excised.operational_mutation_rows, 0);
5525 assert!(excised.operational_mutation_ids.is_empty());
5526
5527 {
5528 let conn = sqlite::open_connection(db.path()).expect("conn");
5529 let remaining: i64 = conn
5530 .query_row(
5531 "SELECT count(*) FROM operational_mutations WHERE source_ref = 'source-2'",
5532 [],
5533 |row| row.get(0),
5534 )
5535 .expect("remaining count");
5536 assert_eq!(remaining, 0);
5537
5538 let current: (String, String) = conn
5539 .query_row(
5540 "SELECT payload_json, last_mutation_id FROM operational_current \
5541 WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5542 [],
5543 |row| Ok((row.get(0)?, row.get(1)?)),
5544 )
5545 .expect("rebuilt current row");
5546 assert_eq!(current.0, "{\"status\":\"old\"}");
5547 assert_eq!(current.1, "m1");
5548 }
5549 }
5550
5551 #[test]
5552 fn restore_logical_id_reestablishes_last_pre_retire_content_and_attached_edges() {
5553 let (db, service) = setup();
5554 {
5555 let conn = sqlite::open_connection(db.path()).expect("conn");
5556 conn.execute(
5557 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5558 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
5559 [],
5560 )
5561 .expect("insert node");
5562 conn.execute(
5563 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5564 VALUES ('node-row-topic', 'topic-1', 'Topic', '{}', 100, 'seed')",
5565 [],
5566 )
5567 .expect("insert target node");
5568 conn.execute(
5569 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5570 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5571 [],
5572 )
5573 .expect("insert chunk");
5574 conn.execute(
5575 "INSERT INTO edges \
5576 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
5577 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 'seed')",
5578 [],
5579 )
5580 .expect("insert edge");
5581 conn.execute(
5582 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5583 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5584 [],
5585 )
5586 .expect("insert node retire event");
5587 conn.execute(
5588 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5589 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
5590 [],
5591 )
5592 .expect("insert edge retire event");
5593 conn.execute(
5594 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
5595 [],
5596 )
5597 .expect("retire node");
5598 conn.execute(
5599 "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
5600 [],
5601 )
5602 .expect("retire edge");
5603 conn.execute("DELETE FROM fts_nodes", [])
5604 .expect("clear fts");
5605 }
5606
5607 let report = service.restore_logical_id("doc-1").expect("restore");
5608 assert_eq!(report.logical_id, "doc-1");
5609 assert!(!report.was_noop);
5610 assert_eq!(report.restored_node_rows, 1);
5611 assert_eq!(report.restored_edge_rows, 1);
5612 assert_eq!(report.restored_chunk_rows, 1);
5613 assert_eq!(report.restored_fts_rows, 1);
5614
5615 let conn = sqlite::open_connection(db.path()).expect("conn");
5616 let active_node_count: i64 = conn
5617 .query_row(
5618 "SELECT count(*) FROM nodes WHERE logical_id = 'doc-1' AND superseded_at IS NULL",
5619 [],
5620 |row| row.get(0),
5621 )
5622 .expect("active node count");
5623 assert_eq!(active_node_count, 1);
5624 let active_edge_count: i64 = conn
5625 .query_row(
5626 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
5627 [],
5628 |row| row.get(0),
5629 )
5630 .expect("active edge count");
5631 assert_eq!(active_edge_count, 1);
5632 let fts_count: i64 = conn
5633 .query_row(
5634 "SELECT count(*) FROM fts_nodes WHERE chunk_id = 'chunk-1'",
5635 [],
5636 |row| row.get(0),
5637 )
5638 .expect("fts count");
5639 assert_eq!(fts_count, 1);
5640 }
5641
5642 #[test]
5643 fn restore_logical_id_restores_edges_retired_after_the_node_retire_event() {
5644 let (db, service) = setup();
5645 {
5646 let conn = sqlite::open_connection(db.path()).expect("conn");
5647 conn.execute(
5648 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5649 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
5650 [],
5651 )
5652 .expect("insert node");
5653 conn.execute(
5654 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5655 VALUES ('node-row-topic', 'topic-1', 'Topic', '{}', 100, 'seed')",
5656 [],
5657 )
5658 .expect("insert target node");
5659 conn.execute(
5660 "INSERT INTO edges \
5661 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
5662 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 'seed')",
5663 [],
5664 )
5665 .expect("insert edge");
5666 conn.execute(
5667 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5668 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5669 [],
5670 )
5671 .expect("insert node retire event");
5672 conn.execute(
5673 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5674 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 201, '')",
5675 [],
5676 )
5677 .expect("insert edge retire event");
5678 conn.execute(
5679 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
5680 [],
5681 )
5682 .expect("retire node");
5683 conn.execute(
5684 "UPDATE edges SET superseded_at = 201 WHERE logical_id = 'edge-1'",
5685 [],
5686 )
5687 .expect("retire edge");
5688 }
5689
5690 let report = service.restore_logical_id("doc-1").expect("restore");
5691 assert_eq!(report.restored_edge_rows, 1);
5692
5693 let conn = sqlite::open_connection(db.path()).expect("conn");
5694 let active_edge_count: i64 = conn
5695 .query_row(
5696 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
5697 [],
5698 |row| row.get(0),
5699 )
5700 .expect("active edge count");
5701 assert_eq!(active_edge_count, 1);
5702 }
5703
5704 #[test]
5705 fn restore_logical_id_prefers_latest_retired_revision_when_timestamps_tie() {
5706 let (db, service) = setup();
5707 {
5708 let conn = sqlite::open_connection(db.path()).expect("conn");
5709 conn.execute(
5710 "INSERT INTO nodes \
5711 (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5712 VALUES ('node-row-older', 'doc-1', 'Document', '{\"title\":\"older\"}', 100, 200, 'forget-1')",
5713 [],
5714 )
5715 .expect("insert older retired node");
5716 conn.execute(
5717 "INSERT INTO nodes \
5718 (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5719 VALUES ('node-row-newer', 'doc-1', 'Document', '{\"title\":\"newer\"}', 100, 200, 'forget-1')",
5720 [],
5721 )
5722 .expect("insert newer retired node");
5723 conn.execute(
5724 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5725 VALUES ('evt-retire-older', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5726 [],
5727 )
5728 .expect("insert older retire event");
5729 conn.execute(
5730 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5731 VALUES ('evt-retire-newer', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5732 [],
5733 )
5734 .expect("insert newer retire event");
5735 }
5736
5737 let report = service.restore_logical_id("doc-1").expect("restore");
5738
5739 assert!(!report.was_noop);
5740 let conn = sqlite::open_connection(db.path()).expect("conn");
5741 let active_row: (String, String) = conn
5742 .query_row(
5743 "SELECT row_id, properties FROM nodes \
5744 WHERE logical_id = 'doc-1' AND superseded_at IS NULL",
5745 [],
5746 |row| Ok((row.get(0)?, row.get(1)?)),
5747 )
5748 .expect("restored active row");
5749 assert_eq!(active_row.0, "node-row-newer");
5750 assert_eq!(active_row.1, "{\"title\":\"newer\"}");
5751 }
5752
5753 #[test]
5754 fn purge_logical_id_removes_retired_content_and_records_tombstone() {
5755 let (db, service) = setup();
5756 {
5757 let conn = sqlite::open_connection(db.path()).expect("conn");
5758 conn.execute(
5759 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5760 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
5761 [],
5762 )
5763 .expect("insert retired node");
5764 conn.execute(
5765 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5766 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5767 [],
5768 )
5769 .expect("insert chunk");
5770 conn.execute(
5771 "INSERT INTO edges \
5772 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, superseded_at, source_ref) \
5773 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 200, 'seed')",
5774 [],
5775 )
5776 .expect("insert retired edge");
5777 conn.execute(
5778 "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
5779 VALUES ('chunk-1', 'doc-1', 'Document', 'budget narrative')",
5780 [],
5781 )
5782 .expect("insert fts");
5783 }
5784
5785 let report = service.purge_logical_id("doc-1").expect("purge");
5786 assert_eq!(report.logical_id, "doc-1");
5787 assert!(!report.was_noop);
5788 assert_eq!(report.deleted_node_rows, 1);
5789 assert_eq!(report.deleted_edge_rows, 1);
5790 assert_eq!(report.deleted_chunk_rows, 1);
5791 assert_eq!(report.deleted_fts_rows, 1);
5792
5793 let conn = sqlite::open_connection(db.path()).expect("conn");
5794 let remaining_nodes: i64 = conn
5795 .query_row(
5796 "SELECT count(*) FROM nodes WHERE logical_id = 'doc-1'",
5797 [],
5798 |row| row.get(0),
5799 )
5800 .expect("remaining nodes");
5801 assert_eq!(remaining_nodes, 0);
5802 let remaining_edges: i64 = conn
5803 .query_row(
5804 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1'",
5805 [],
5806 |row| row.get(0),
5807 )
5808 .expect("remaining edges");
5809 assert_eq!(remaining_edges, 0);
5810 let remaining_chunks: i64 = conn
5811 .query_row(
5812 "SELECT count(*) FROM chunks WHERE id = 'chunk-1'",
5813 [],
5814 |row| row.get(0),
5815 )
5816 .expect("remaining chunks");
5817 assert_eq!(remaining_chunks, 0);
5818 let purge_events: i64 = conn
5819 .query_row(
5820 "SELECT count(*) FROM provenance_events WHERE event_type = 'purge_logical_id' AND subject = 'doc-1'",
5821 [],
5822 |row| row.get(0),
5823 )
5824 .expect("purge events");
5825 assert_eq!(purge_events, 1);
5826 }
5827
5828 #[test]
5829 fn check_semantics_accepts_preserved_retired_chunks() {
5830 let (db, service) = setup();
5831 {
5832 let conn = sqlite::open_connection(db.path()).expect("conn");
5833 conn.execute(
5834 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5835 VALUES ('node-row-1', 'doc-1', 'Document', '{}', 100, 200, 'seed')",
5836 [],
5837 )
5838 .expect("insert retired node");
5839 conn.execute(
5840 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5841 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5842 [],
5843 )
5844 .expect("insert chunk");
5845 }
5846
5847 let report = service.check_semantics().expect("semantics");
5848 assert_eq!(report.orphaned_chunks, 0);
5849 }
5850
5851 #[test]
5852 fn check_semantics_detects_missing_retired_node_history_for_preserved_chunks() {
5853 let (db, service) = setup();
5854 {
5855 let conn = sqlite::open_connection(db.path()).expect("conn");
5856 conn.execute(
5857 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5858 VALUES ('chunk-1', 'ghost-doc', 'budget narrative', 100)",
5859 [],
5860 )
5861 .expect("insert orphaned chunk");
5862 }
5863
5864 let report = service.check_semantics().expect("semantics");
5865 assert_eq!(report.orphaned_chunks, 1);
5866 }
5867
5868 #[cfg(feature = "sqlite-vec")]
5869 #[test]
5870 fn check_semantics_detects_missing_retired_node_history_for_preserved_vec_rows() {
5871 let (db, service) = setup();
5872 {
5873 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5874 service
5875 .schema_manager
5876 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5877 .expect("ensure vec profile");
5878 conn.execute(
5879 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5880 VALUES ('chunk-1', 'ghost-doc', 'budget narrative', 100)",
5881 [],
5882 )
5883 .expect("insert orphaned chunk");
5884 conn.execute(
5885 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
5886 [],
5887 )
5888 .expect("insert vec row");
5889 }
5890
5891 let report = service.check_semantics().expect("semantics");
5892 assert_eq!(report.orphaned_chunks, 1);
5893 assert_eq!(report.vec_rows_for_superseded_nodes, 1);
5894 }
5895
5896 #[cfg(feature = "sqlite-vec")]
5897 #[test]
5898 fn restore_logical_id_reestablishes_vector_search_without_reingest() {
5899 let (db, service) = setup();
5900 {
5901 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5902 service
5903 .schema_manager
5904 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5905 .expect("ensure vec profile");
5906 conn.execute(
5907 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5908 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
5909 [],
5910 )
5911 .expect("insert retired node");
5912 conn.execute(
5913 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5914 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5915 [],
5916 )
5917 .expect("insert chunk");
5918 conn.execute(
5919 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
5920 [],
5921 )
5922 .expect("insert vec row");
5923 conn.execute(
5924 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5925 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5926 [],
5927 )
5928 .expect("insert retire event");
5929 }
5930
5931 let report = service.restore_logical_id("doc-1").expect("restore");
5932 assert_eq!(report.restored_vec_rows, 1);
5933
5934 let coordinator = ExecutionCoordinator::open(
5935 db.path(),
5936 Arc::new(SchemaManager::new()),
5937 Some(4),
5938 1,
5939 Arc::new(TelemetryCounters::default()),
5940 None,
5941 )
5942 .expect("coordinator");
5943 let compiled = QueryBuilder::nodes("Document")
5944 .vector_search("[0.0, 0.0, 0.0, 0.0]", 5)
5945 .compile()
5946 .expect("compile");
5947 let rows = coordinator
5948 .execute_compiled_read(&compiled)
5949 .expect("vector read");
5950 assert!(
5951 rows.nodes.iter().any(|row| row.logical_id == "doc-1"),
5952 "restore should make the preserved vec row visible again without re-ingest"
5953 );
5954 }
5955
5956 #[cfg(feature = "sqlite-vec")]
5957 #[test]
5958 fn purge_logical_id_deletes_vec_rows_for_retired_content() {
5959 let (db, service) = setup();
5960 {
5961 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5962 service
5963 .schema_manager
5964 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5965 .expect("ensure vec profile");
5966 conn.execute(
5967 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5968 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
5969 [],
5970 )
5971 .expect("insert retired node");
5972 conn.execute(
5973 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5974 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5975 [],
5976 )
5977 .expect("insert chunk");
5978 conn.execute(
5979 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
5980 [],
5981 )
5982 .expect("insert vec row");
5983 }
5984
5985 let report = service.purge_logical_id("doc-1").expect("purge");
5986 assert_eq!(report.deleted_vec_rows, 1);
5987
5988 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5989 let vec_count: i64 = conn
5990 .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
5991 row.get(0)
5992 })
5993 .expect("vec count");
5994 assert_eq!(vec_count, 0);
5995 }
5996
5997 #[cfg(feature = "sqlite-vec")]
5998 #[test]
5999 fn restore_logical_id_restores_visibility_of_regenerated_vectors() {
6000 let (db, service) = setup();
6001
6002 {
6003 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6004 service
6005 .schema_manager
6006 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
6007 .expect("ensure vec profile");
6008 conn.execute(
6009 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
6010 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
6011 [],
6012 )
6013 .expect("insert node");
6014 conn.execute(
6015 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
6016 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
6017 [],
6018 )
6019 .expect("insert chunk");
6020 }
6021
6022 let embedder = TestEmbedder::new("test-model", 4);
6023 service
6024 .regenerate_vector_embeddings(
6025 &embedder,
6026 &VectorRegenerationConfig {
6027 profile: "default".to_owned(),
6028 table_name: "vec_nodes_active".to_owned(),
6029 chunking_policy: "per_chunk".to_owned(),
6030 preprocessing_policy: "trim".to_owned(),
6031 },
6032 )
6033 .expect("regenerate");
6034
6035 {
6036 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6037 conn.execute(
6038 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
6039 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
6040 [],
6041 )
6042 .expect("insert retire event");
6043 conn.execute(
6044 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
6045 [],
6046 )
6047 .expect("retire node");
6048 }
6049
6050 let report = service.restore_logical_id("doc-1").expect("restore");
6051 assert_eq!(report.restored_vec_rows, 1);
6052
6053 let coordinator = ExecutionCoordinator::open(
6054 db.path(),
6055 Arc::new(SchemaManager::new()),
6056 Some(4),
6057 1,
6058 Arc::new(TelemetryCounters::default()),
6059 None,
6060 )
6061 .expect("coordinator");
6062 let compiled = QueryBuilder::nodes("Document")
6063 .vector_search("[0.0, 0.0, 0.0, 0.0]", 5)
6064 .compile()
6065 .expect("compile");
6066 let rows = coordinator
6067 .execute_compiled_read(&compiled)
6068 .expect("vector read");
6069 assert!(
6070 rows.nodes.iter().any(|row| row.logical_id == "doc-1"),
6071 "restored logical_id should become visible through regenerated vectors"
6072 );
6073 }
6074
6075 #[test]
6076 fn check_semantics_clean_db_returns_zeros() {
6077 let (_db, service) = setup();
6078 let report = service.check_semantics().expect("semantics check");
6079 assert_eq!(report.orphaned_chunks, 0);
6080 assert_eq!(report.null_source_ref_nodes, 0);
6081 assert_eq!(report.broken_step_fk, 0);
6082 assert_eq!(report.broken_action_fk, 0);
6083 assert_eq!(report.stale_fts_rows, 0);
6084 assert_eq!(report.fts_rows_for_superseded_nodes, 0);
6085 assert_eq!(report.dangling_edges, 0);
6086 assert_eq!(report.orphaned_supersession_chains, 0);
6087 assert_eq!(report.stale_vec_rows, 0);
6088 assert_eq!(report.vec_rows_for_superseded_nodes, 0);
6089 assert_eq!(report.missing_operational_current_rows, 0);
6090 assert_eq!(report.stale_operational_current_rows, 0);
6091 assert_eq!(report.disabled_collection_mutations, 0);
6092 assert_eq!(report.mismatched_kind_property_fts_rows, 0);
6093 assert_eq!(report.duplicate_property_fts_rows, 0);
6094 assert_eq!(report.drifted_property_fts_rows, 0);
6095 assert!(report.warnings.is_empty());
6096 }
6097
6098 #[test]
6099 fn register_operational_collection_persists_and_emits_provenance() {
6100 let (db, service) = setup();
6101 let record = service
6102 .register_operational_collection(&OperationalRegisterRequest {
6103 name: "connector_health".to_owned(),
6104 kind: OperationalCollectionKind::LatestState,
6105 schema_json: "{}".to_owned(),
6106 retention_json: "{}".to_owned(),
6107 filter_fields_json: "[]".to_owned(),
6108 validation_json: String::new(),
6109 secondary_indexes_json: "[]".to_owned(),
6110 format_version: 1,
6111 })
6112 .expect("register collection");
6113
6114 assert_eq!(record.name, "connector_health");
6115 assert_eq!(record.kind, OperationalCollectionKind::LatestState);
6116 assert_eq!(record.schema_json, "{}");
6117 assert_eq!(record.retention_json, "{}");
6118 assert_eq!(record.filter_fields_json, "[]");
6119 assert!(record.created_at > 0);
6120 assert_eq!(record.disabled_at, None);
6121
6122 let described = service
6123 .describe_operational_collection("connector_health")
6124 .expect("describe collection")
6125 .expect("collection exists");
6126 assert_eq!(described, record);
6127
6128 let conn = sqlite::open_connection(db.path()).expect("conn");
6129 let provenance_count: i64 = conn
6130 .query_row(
6131 "SELECT count(*) FROM provenance_events \
6132 WHERE event_type = 'operational_collection_registered' AND subject = 'connector_health'",
6133 [],
6134 |row| row.get(0),
6135 )
6136 .expect("provenance count");
6137 assert_eq!(provenance_count, 1);
6138 }
6139
6140 #[test]
6141 fn register_and_update_operational_collection_validation_round_trip() {
6142 let (db, service) = setup();
6143 let record = service
6144 .register_operational_collection(&OperationalRegisterRequest {
6145 name: "connector_health".to_owned(),
6146 kind: OperationalCollectionKind::LatestState,
6147 schema_json: "{}".to_owned(),
6148 retention_json: "{}".to_owned(),
6149 filter_fields_json: "[]".to_owned(),
6150 validation_json: String::new(),
6151 secondary_indexes_json: "[]".to_owned(),
6152 format_version: 1,
6153 })
6154 .expect("register collection");
6155 assert_eq!(record.validation_json, "");
6156
6157 let validation_json = r#"{"format_version":1,"mode":"enforce","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#;
6158 let updated = service
6159 .update_operational_collection_validation("connector_health", validation_json)
6160 .expect("update validation");
6161 assert_eq!(updated.validation_json, validation_json);
6162
6163 let described = service
6164 .describe_operational_collection("connector_health")
6165 .expect("describe collection")
6166 .expect("collection exists");
6167 assert_eq!(described.validation_json, validation_json);
6168
6169 let conn = sqlite::open_connection(db.path()).expect("conn");
6170 let provenance_count: i64 = conn
6171 .query_row(
6172 "SELECT count(*) FROM provenance_events \
6173 WHERE event_type = 'operational_collection_validation_updated' \
6174 AND subject = 'connector_health'",
6175 [],
6176 |row| row.get(0),
6177 )
6178 .expect("provenance count");
6179 assert_eq!(provenance_count, 1);
6180 }
6181
6182 #[test]
6183 fn register_update_and_rebuild_operational_secondary_indexes_round_trip() {
6184 let (db, service) = setup();
6185 let record = service
6186 .register_operational_collection(&OperationalRegisterRequest {
6187 name: "audit_log".to_owned(),
6188 kind: OperationalCollectionKind::AppendOnlyLog,
6189 schema_json: "{}".to_owned(),
6190 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6191 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
6192 validation_json: String::new(),
6193 secondary_indexes_json: "[]".to_owned(),
6194 format_version: 1,
6195 })
6196 .expect("register collection");
6197 assert_eq!(record.secondary_indexes_json, "[]");
6198
6199 {
6200 let writer = crate::WriterActor::start(
6201 db.path(),
6202 Arc::new(SchemaManager::new()),
6203 crate::ProvenanceMode::Warn,
6204 Arc::new(crate::TelemetryCounters::default()),
6205 )
6206 .expect("writer");
6207 writer
6208 .submit(crate::WriteRequest {
6209 label: "secondary-index-seed".to_owned(),
6210 nodes: vec![],
6211 node_retires: vec![],
6212 edges: vec![],
6213 edge_retires: vec![],
6214 chunks: vec![],
6215 runs: vec![],
6216 steps: vec![],
6217 actions: vec![],
6218 optional_backfills: vec![],
6219 vec_inserts: vec![],
6220 operational_writes: vec![
6221 crate::OperationalWrite::Append {
6222 collection: "audit_log".to_owned(),
6223 record_key: "evt-1".to_owned(),
6224 payload_json: r#"{"actor":"alice","ts":100}"#.to_owned(),
6225 source_ref: Some("src-1".to_owned()),
6226 },
6227 crate::OperationalWrite::Append {
6228 collection: "audit_log".to_owned(),
6229 record_key: "evt-2".to_owned(),
6230 payload_json: r#"{"actor":"bob","ts":200}"#.to_owned(),
6231 source_ref: Some("src-2".to_owned()),
6232 },
6233 ],
6234 })
6235 .expect("seed writes");
6236 }
6237
6238 let secondary_indexes_json = r#"[{"name":"actor_ts","kind":"append_only_field_time","field":"actor","value_type":"string","time_field":"ts"}]"#;
6239 let updated = service
6240 .update_operational_collection_secondary_indexes("audit_log", secondary_indexes_json)
6241 .expect("update secondary indexes");
6242 assert_eq!(updated.secondary_indexes_json, secondary_indexes_json);
6243
6244 let conn = sqlite::open_connection(db.path()).expect("conn");
6245 let entry_count: i64 = conn
6246 .query_row(
6247 "SELECT count(*) FROM operational_secondary_index_entries \
6248 WHERE collection_name = 'audit_log' AND index_name = 'actor_ts'",
6249 [],
6250 |row| row.get(0),
6251 )
6252 .expect("secondary index count");
6253 assert_eq!(entry_count, 2);
6254 conn.execute(
6255 "DELETE FROM operational_secondary_index_entries WHERE collection_name = 'audit_log'",
6256 [],
6257 )
6258 .expect("clear index entries");
6259 drop(conn);
6260
6261 let rebuild = service
6262 .rebuild_operational_secondary_indexes("audit_log")
6263 .expect("rebuild secondary indexes");
6264 assert_eq!(rebuild.collection_name, "audit_log");
6265 assert_eq!(rebuild.mutation_entries_rebuilt, 2);
6266 assert_eq!(rebuild.current_entries_rebuilt, 0);
6267 }
6268
6269 #[test]
6270 fn register_operational_collection_rejects_invalid_validation_contract() {
6271 let (_db, service) = setup();
6272
6273 let error = service
6274 .register_operational_collection(&OperationalRegisterRequest {
6275 name: "connector_health".to_owned(),
6276 kind: OperationalCollectionKind::LatestState,
6277 schema_json: "{}".to_owned(),
6278 retention_json: "{}".to_owned(),
6279 filter_fields_json: "[]".to_owned(),
6280 validation_json: r#"{"format_version":1,"mode":"enforce","fields":[{"name":"status","type":"string","minimum":0}]}"#
6281 .to_owned(),
6282 secondary_indexes_json: "[]".to_owned(),
6283 format_version: 1,
6284 })
6285 .expect_err("invalid validation contract should reject");
6286
6287 assert!(matches!(error, EngineError::InvalidWrite(_)));
6288 assert!(error.to_string().contains("minimum/maximum"));
6289 }
6290
6291 #[test]
6292 fn validate_operational_collection_history_reports_invalid_rows_without_mutation() {
6293 let (db, service) = setup();
6294 service
6295 .register_operational_collection(&OperationalRegisterRequest {
6296 name: "audit_log".to_owned(),
6297 kind: OperationalCollectionKind::AppendOnlyLog,
6298 schema_json: "{}".to_owned(),
6299 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6300 filter_fields_json: "[]".to_owned(),
6301 validation_json: r#"{"format_version":1,"mode":"disabled","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#
6302 .to_owned(),
6303 secondary_indexes_json: "[]".to_owned(),
6304 format_version: 1,
6305 })
6306 .expect("register collection");
6307 {
6308 let writer = crate::WriterActor::start(
6309 db.path(),
6310 Arc::new(SchemaManager::new()),
6311 crate::ProvenanceMode::Warn,
6312 Arc::new(crate::TelemetryCounters::default()),
6313 )
6314 .expect("writer");
6315 writer
6316 .submit(crate::WriteRequest {
6317 label: "history-validation".to_owned(),
6318 nodes: vec![],
6319 node_retires: vec![],
6320 edges: vec![],
6321 edge_retires: vec![],
6322 chunks: vec![],
6323 runs: vec![],
6324 steps: vec![],
6325 actions: vec![],
6326 optional_backfills: vec![],
6327 vec_inserts: vec![],
6328 operational_writes: vec![
6329 crate::OperationalWrite::Append {
6330 collection: "audit_log".to_owned(),
6331 record_key: "evt-1".to_owned(),
6332 payload_json: r#"{"status":"ok"}"#.to_owned(),
6333 source_ref: Some("src-1".to_owned()),
6334 },
6335 crate::OperationalWrite::Append {
6336 collection: "audit_log".to_owned(),
6337 record_key: "evt-2".to_owned(),
6338 payload_json: r#"{"status":"bogus"}"#.to_owned(),
6339 source_ref: Some("src-2".to_owned()),
6340 },
6341 ],
6342 })
6343 .expect("write");
6344 }
6345
6346 let report = service
6347 .validate_operational_collection_history("audit_log")
6348 .expect("validate history");
6349 assert_eq!(report.collection_name, "audit_log");
6350 assert_eq!(report.checked_rows, 2);
6351 assert_eq!(report.invalid_row_count, 1);
6352 assert_eq!(report.issues.len(), 1);
6353 assert_eq!(report.issues[0].record_key, "evt-2");
6354 assert!(report.issues[0].message.contains("must be one of"));
6355
6356 let trace = service
6357 .trace_operational_collection("audit_log", None)
6358 .expect("trace");
6359 assert_eq!(trace.mutation_count, 2);
6360
6361 let conn = sqlite::open_connection(db.path()).expect("conn");
6362 let provenance_count: i64 = conn
6363 .query_row(
6364 "SELECT count(*) FROM provenance_events \
6365 WHERE event_type = 'operational_collection_history_validated' \
6366 AND subject = 'audit_log'",
6367 [],
6368 |row| row.get(0),
6369 )
6370 .expect("provenance count");
6371 assert_eq!(provenance_count, 0);
6372 }
6373
6374 #[test]
6375 fn trace_operational_collection_returns_mutations_and_current_rows() {
6376 let (db, service) = setup();
6377 service
6378 .register_operational_collection(&OperationalRegisterRequest {
6379 name: "connector_health".to_owned(),
6380 kind: OperationalCollectionKind::LatestState,
6381 schema_json: "{}".to_owned(),
6382 retention_json: "{}".to_owned(),
6383 filter_fields_json: "[]".to_owned(),
6384 validation_json: String::new(),
6385 secondary_indexes_json: "[]".to_owned(),
6386 format_version: 1,
6387 })
6388 .expect("register collection");
6389 {
6390 let writer = crate::WriterActor::start(
6391 db.path(),
6392 Arc::new(SchemaManager::new()),
6393 crate::ProvenanceMode::Warn,
6394 Arc::new(crate::TelemetryCounters::default()),
6395 )
6396 .expect("writer");
6397 writer
6398 .submit(crate::WriteRequest {
6399 label: "operational".to_owned(),
6400 nodes: vec![],
6401 node_retires: vec![],
6402 edges: vec![],
6403 edge_retires: vec![],
6404 chunks: vec![],
6405 runs: vec![],
6406 steps: vec![],
6407 actions: vec![],
6408 optional_backfills: vec![],
6409 vec_inserts: vec![],
6410 operational_writes: vec![crate::OperationalWrite::Put {
6411 collection: "connector_health".to_owned(),
6412 record_key: "gmail".to_owned(),
6413 payload_json: r#"{"status":"ok"}"#.to_owned(),
6414 source_ref: Some("src-1".to_owned()),
6415 }],
6416 })
6417 .expect("write");
6418 }
6419
6420 let report = service
6421 .trace_operational_collection("connector_health", Some("gmail"))
6422 .expect("trace");
6423 assert_eq!(report.collection_name, "connector_health");
6424 assert_eq!(report.record_key.as_deref(), Some("gmail"));
6425 assert_eq!(report.mutation_count, 1);
6426 assert_eq!(report.current_count, 1);
6427 assert_eq!(report.mutations[0].op_kind, "put");
6428 assert_eq!(report.current_rows[0].payload_json, r#"{"status":"ok"}"#);
6429 }
6430
6431 #[test]
6432 fn trace_operational_collection_rejects_unknown_collection() {
6433 let (_db, service) = setup();
6434
6435 let error = service
6436 .trace_operational_collection("missing_collection", None)
6437 .expect_err("unknown collection should fail");
6438
6439 assert!(matches!(error, EngineError::InvalidWrite(_)));
6440 assert!(error.to_string().contains("is not registered"));
6441 }
6442
6443 #[test]
6444 fn rebuild_operational_current_repairs_missing_latest_state_rows() {
6445 let (db, service) = setup();
6446 service
6447 .register_operational_collection(&OperationalRegisterRequest {
6448 name: "connector_health".to_owned(),
6449 kind: OperationalCollectionKind::LatestState,
6450 schema_json: "{}".to_owned(),
6451 retention_json: "{}".to_owned(),
6452 filter_fields_json: "[]".to_owned(),
6453 validation_json: String::new(),
6454 secondary_indexes_json: "[]".to_owned(),
6455 format_version: 1,
6456 })
6457 .expect("register collection");
6458 {
6459 let writer = crate::WriterActor::start(
6460 db.path(),
6461 Arc::new(SchemaManager::new()),
6462 crate::ProvenanceMode::Warn,
6463 Arc::new(crate::TelemetryCounters::default()),
6464 )
6465 .expect("writer");
6466 writer
6467 .submit(crate::WriteRequest {
6468 label: "operational".to_owned(),
6469 nodes: vec![],
6470 node_retires: vec![],
6471 edges: vec![],
6472 edge_retires: vec![],
6473 chunks: vec![],
6474 runs: vec![],
6475 steps: vec![],
6476 actions: vec![],
6477 optional_backfills: vec![],
6478 vec_inserts: vec![],
6479 operational_writes: vec![crate::OperationalWrite::Put {
6480 collection: "connector_health".to_owned(),
6481 record_key: "gmail".to_owned(),
6482 payload_json: r#"{"status":"ok"}"#.to_owned(),
6483 source_ref: Some("src-1".to_owned()),
6484 }],
6485 })
6486 .expect("write");
6487 }
6488 {
6489 let conn = sqlite::open_connection(db.path()).expect("conn");
6490 conn.execute(
6491 "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6492 [],
6493 )
6494 .expect("delete current row");
6495 }
6496
6497 let before = service.check_semantics().expect("semantics before rebuild");
6498 assert_eq!(before.missing_operational_current_rows, 1);
6499
6500 let repair = service
6501 .rebuild_operational_current(Some("connector_health"))
6502 .expect("rebuild current");
6503 assert_eq!(repair.collections_rebuilt, 1);
6504 assert_eq!(repair.current_rows_rebuilt, 1);
6505
6506 let after = service.check_semantics().expect("semantics after rebuild");
6507 assert_eq!(after.missing_operational_current_rows, 0);
6508
6509 let conn = sqlite::open_connection(db.path()).expect("conn");
6510 let payload: String = conn
6511 .query_row(
6512 "SELECT payload_json FROM operational_current \
6513 WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6514 [],
6515 |row| row.get(0),
6516 )
6517 .expect("restored payload");
6518 assert_eq!(payload, r#"{"status":"ok"}"#);
6519 }
6520
6521 #[test]
6522 fn rebuild_operational_current_restores_latest_state_secondary_index_entries() {
6523 let (db, service) = setup();
6524 service
6525 .register_operational_collection(&OperationalRegisterRequest {
6526 name: "connector_health".to_owned(),
6527 kind: OperationalCollectionKind::LatestState,
6528 schema_json: "{}".to_owned(),
6529 retention_json: "{}".to_owned(),
6530 filter_fields_json: "[]".to_owned(),
6531 validation_json: String::new(),
6532 secondary_indexes_json: r#"[{"name":"status_current","kind":"latest_state_field","field":"status","value_type":"string"}]"#.to_owned(),
6533 format_version: 1,
6534 })
6535 .expect("register collection");
6536 {
6537 let writer = crate::WriterActor::start(
6538 db.path(),
6539 Arc::new(SchemaManager::new()),
6540 crate::ProvenanceMode::Warn,
6541 Arc::new(crate::TelemetryCounters::default()),
6542 )
6543 .expect("writer");
6544 writer
6545 .submit(crate::WriteRequest {
6546 label: "operational".to_owned(),
6547 nodes: vec![],
6548 node_retires: vec![],
6549 edges: vec![],
6550 edge_retires: vec![],
6551 chunks: vec![],
6552 runs: vec![],
6553 steps: vec![],
6554 actions: vec![],
6555 optional_backfills: vec![],
6556 vec_inserts: vec![],
6557 operational_writes: vec![crate::OperationalWrite::Put {
6558 collection: "connector_health".to_owned(),
6559 record_key: "gmail".to_owned(),
6560 payload_json: r#"{"status":"ok"}"#.to_owned(),
6561 source_ref: Some("src-1".to_owned()),
6562 }],
6563 })
6564 .expect("write");
6565 }
6566 {
6567 let conn = sqlite::open_connection(db.path()).expect("conn");
6568 let entry_count: i64 = conn
6569 .query_row(
6570 "SELECT count(*) FROM operational_secondary_index_entries \
6571 WHERE collection_name = 'connector_health' AND subject_kind = 'current'",
6572 [],
6573 |row| row.get(0),
6574 )
6575 .expect("secondary index count before repair");
6576 assert_eq!(entry_count, 1);
6577 conn.execute(
6578 "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6579 [],
6580 )
6581 .expect("delete current row");
6582 }
6583
6584 service
6585 .rebuild_operational_current(Some("connector_health"))
6586 .expect("rebuild current");
6587
6588 let conn = sqlite::open_connection(db.path()).expect("conn");
6589 let entry_count: i64 = conn
6590 .query_row(
6591 "SELECT count(*) FROM operational_secondary_index_entries \
6592 WHERE collection_name = 'connector_health' AND subject_kind = 'current'",
6593 [],
6594 |row| row.get(0),
6595 )
6596 .expect("secondary index count after repair");
6597 assert_eq!(entry_count, 1);
6598 }
6599
6600 #[test]
6601 fn operational_current_semantics_and_rebuild_follow_mutation_order() {
6602 let (db, service) = setup();
6603 {
6604 let conn = sqlite::open_connection(db.path()).expect("conn");
6605 conn.execute(
6606 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6607 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
6608 [],
6609 )
6610 .expect("seed collection");
6611 conn.execute(
6612 "INSERT INTO operational_mutations \
6613 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6614 VALUES ('m3', 'connector_health', 'gmail', 'put', '{\"status\":\"old\"}', 'src-1', 100, 1)",
6615 [],
6616 )
6617 .expect("seed first put");
6618 conn.execute(
6619 "INSERT INTO operational_mutations \
6620 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6621 VALUES ('m2', 'connector_health', 'gmail', 'delete', '', 'src-2', 100, 2)",
6622 [],
6623 )
6624 .expect("seed delete");
6625 conn.execute(
6626 "INSERT INTO operational_mutations \
6627 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6628 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"new\"}', 'src-3', 100, 3)",
6629 [],
6630 )
6631 .expect("seed final put");
6632 conn.execute(
6633 "INSERT INTO operational_current \
6634 (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
6635 VALUES ('connector_health', 'gmail', '{\"status\":\"new\"}', 100, 'm1')",
6636 [],
6637 )
6638 .expect("seed current");
6639 }
6640
6641 let before = service.check_semantics().expect("semantics before rebuild");
6642 assert_eq!(before.missing_operational_current_rows, 0);
6643 assert_eq!(before.stale_operational_current_rows, 0);
6644
6645 {
6646 let conn = sqlite::open_connection(db.path()).expect("conn");
6647 conn.execute(
6648 "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6649 [],
6650 )
6651 .expect("delete current row");
6652 }
6653
6654 let missing = service.check_semantics().expect("semantics after delete");
6655 assert_eq!(missing.missing_operational_current_rows, 1);
6656 assert_eq!(missing.stale_operational_current_rows, 0);
6657
6658 service
6659 .rebuild_operational_current(Some("connector_health"))
6660 .expect("rebuild current");
6661
6662 let after = service.check_semantics().expect("semantics after rebuild");
6663 assert_eq!(after.missing_operational_current_rows, 0);
6664 assert_eq!(after.stale_operational_current_rows, 0);
6665
6666 let conn = sqlite::open_connection(db.path()).expect("conn");
6667 let payload: String = conn
6668 .query_row(
6669 "SELECT payload_json FROM operational_current \
6670 WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6671 [],
6672 |row| row.get(0),
6673 )
6674 .expect("restored payload");
6675 assert_eq!(payload, r#"{"status":"new"}"#);
6676 }
6677
6678 #[test]
6679 fn disable_operational_collection_sets_disabled_at_and_emits_provenance() {
6680 let (db, service) = setup();
6681 service
6682 .register_operational_collection(&OperationalRegisterRequest {
6683 name: "audit_log".to_owned(),
6684 kind: OperationalCollectionKind::AppendOnlyLog,
6685 schema_json: "{}".to_owned(),
6686 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6687 filter_fields_json: "[]".to_owned(),
6688 validation_json: String::new(),
6689 secondary_indexes_json: "[]".to_owned(),
6690 format_version: 1,
6691 })
6692 .expect("register collection");
6693
6694 let record = service
6695 .disable_operational_collection("audit_log")
6696 .expect("disable collection");
6697 assert_eq!(record.name, "audit_log");
6698 assert!(record.disabled_at.is_some());
6699
6700 let disabled_at = record.disabled_at.expect("disabled_at");
6701 let described = service
6702 .describe_operational_collection("audit_log")
6703 .expect("describe collection")
6704 .expect("collection exists");
6705 assert_eq!(described.disabled_at, Some(disabled_at));
6706
6707 let writer = crate::WriterActor::start(
6708 db.path(),
6709 Arc::new(SchemaManager::new()),
6710 crate::ProvenanceMode::Warn,
6711 Arc::new(crate::TelemetryCounters::default()),
6712 )
6713 .expect("writer");
6714 let error = writer
6715 .submit(crate::WriteRequest {
6716 label: "disabled-operational".to_owned(),
6717 nodes: vec![],
6718 node_retires: vec![],
6719 edges: vec![],
6720 edge_retires: vec![],
6721 chunks: vec![],
6722 runs: vec![],
6723 steps: vec![],
6724 actions: vec![],
6725 optional_backfills: vec![],
6726 vec_inserts: vec![],
6727 operational_writes: vec![crate::OperationalWrite::Append {
6728 collection: "audit_log".to_owned(),
6729 record_key: "evt-1".to_owned(),
6730 payload_json: r#"{"type":"sync"}"#.to_owned(),
6731 source_ref: Some("src-1".to_owned()),
6732 }],
6733 })
6734 .expect_err("disabled collection should reject writes");
6735 assert!(matches!(error, EngineError::InvalidWrite(_)));
6736 assert!(error.to_string().contains("is disabled"));
6737
6738 let conn = sqlite::open_connection(db.path()).expect("conn");
6739 let provenance_count: i64 = conn
6740 .query_row(
6741 "SELECT count(*) FROM provenance_events \
6742 WHERE event_type = 'operational_collection_disabled' AND subject = 'audit_log'",
6743 [],
6744 |row| row.get(0),
6745 )
6746 .expect("provenance count");
6747 assert_eq!(provenance_count, 1);
6748 }
6749
6750 #[test]
6751 fn purge_operational_collection_deletes_append_only_rows_before_cutoff() {
6752 let (db, service) = setup();
6753 {
6754 let conn = sqlite::open_connection(db.path()).expect("conn");
6755 conn.execute(
6756 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6757 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_all\"}', 1, 100)",
6758 [],
6759 )
6760 .expect("seed collection");
6761 conn.execute(
6762 "INSERT INTO operational_mutations \
6763 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6764 VALUES ('evt-1', 'audit_log', 'evt-1', 'append', '{\"seq\":1}', 'src-1', 100, 1)",
6765 [],
6766 )
6767 .expect("seed event 1");
6768 conn.execute(
6769 "INSERT INTO operational_mutations \
6770 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6771 VALUES ('evt-2', 'audit_log', 'evt-2', 'append', '{\"seq\":2}', 'src-2', 200, 2)",
6772 [],
6773 )
6774 .expect("seed event 2");
6775 conn.execute(
6776 "INSERT INTO operational_mutations \
6777 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6778 VALUES ('evt-3', 'audit_log', 'evt-3', 'append', '{\"seq\":3}', 'src-3', 300, 3)",
6779 [],
6780 )
6781 .expect("seed event 3");
6782 }
6783
6784 let report = service
6785 .purge_operational_collection("audit_log", 250)
6786 .expect("purge collection");
6787 assert_eq!(report.collection_name, "audit_log");
6788 assert_eq!(report.deleted_mutations, 2);
6789 assert_eq!(report.before_timestamp, 250);
6790
6791 let conn = sqlite::open_connection(db.path()).expect("conn");
6792 let remaining: Vec<String> = {
6793 let mut stmt = conn
6794 .prepare(
6795 "SELECT id FROM operational_mutations \
6796 WHERE collection_name = 'audit_log' ORDER BY mutation_order",
6797 )
6798 .expect("stmt");
6799 stmt.query_map([], |row| row.get(0))
6800 .expect("rows")
6801 .collect::<Result<_, _>>()
6802 .expect("collect")
6803 };
6804 assert_eq!(remaining, vec!["evt-3".to_owned()]);
6805 let provenance_count: i64 = conn
6806 .query_row(
6807 "SELECT count(*) FROM provenance_events \
6808 WHERE event_type = 'operational_collection_purged' AND subject = 'audit_log'",
6809 [],
6810 |row| row.get(0),
6811 )
6812 .expect("provenance count");
6813 assert_eq!(provenance_count, 1);
6814 }
6815
6816 #[test]
6817 fn compact_operational_collection_dry_run_reports_without_mutation() {
6818 let (db, service) = setup();
6819 {
6820 let conn = sqlite::open_connection(db.path()).expect("conn");
6821 conn.execute(
6822 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6823 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6824 [],
6825 )
6826 .expect("seed collection");
6827 for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
6828 conn.execute(
6829 "INSERT INTO operational_mutations \
6830 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6831 VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6832 rusqlite::params![
6833 format!("evt-{index}"),
6834 format!("{{\"seq\":{index}}}"),
6835 created_at,
6836 index,
6837 ],
6838 )
6839 .expect("seed event");
6840 }
6841 }
6842
6843 let report = service
6844 .compact_operational_collection("audit_log", true)
6845 .expect("compact collection");
6846 assert_eq!(report.collection_name, "audit_log");
6847 assert_eq!(report.deleted_mutations, 1);
6848 assert!(report.dry_run);
6849 assert_eq!(report.before_timestamp, None);
6850
6851 let conn = sqlite::open_connection(db.path()).expect("conn");
6852 let remaining_count: i64 = conn
6853 .query_row(
6854 "SELECT count(*) FROM operational_mutations WHERE collection_name = 'audit_log'",
6855 [],
6856 |row| row.get(0),
6857 )
6858 .expect("remaining count");
6859 assert_eq!(remaining_count, 3);
6860 let provenance_count: i64 = conn
6861 .query_row(
6862 "SELECT count(*) FROM provenance_events \
6863 WHERE event_type = 'operational_collection_compacted' AND subject = 'audit_log'",
6864 [],
6865 |row| row.get(0),
6866 )
6867 .expect("provenance count");
6868 assert_eq!(provenance_count, 0);
6869 }
6870
6871 #[test]
6872 fn compact_operational_collection_keep_last_deletes_oldest_rows() {
6873 let (db, service) = setup();
6874 {
6875 let conn = sqlite::open_connection(db.path()).expect("conn");
6876 conn.execute(
6877 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6878 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6879 [],
6880 )
6881 .expect("seed collection");
6882 for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
6883 conn.execute(
6884 "INSERT INTO operational_mutations \
6885 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6886 VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6887 rusqlite::params![
6888 format!("evt-{index}"),
6889 format!("{{\"seq\":{index}}}"),
6890 created_at,
6891 index,
6892 ],
6893 )
6894 .expect("seed event");
6895 }
6896 }
6897
6898 let report = service
6899 .compact_operational_collection("audit_log", false)
6900 .expect("compact collection");
6901 assert_eq!(report.deleted_mutations, 1);
6902 assert!(!report.dry_run);
6903
6904 let conn = sqlite::open_connection(db.path()).expect("conn");
6905 let remaining: Vec<String> = {
6906 let mut stmt = conn
6907 .prepare(
6908 "SELECT id FROM operational_mutations \
6909 WHERE collection_name = 'audit_log' ORDER BY mutation_order",
6910 )
6911 .expect("stmt");
6912 stmt.query_map([], |row| row.get(0))
6913 .expect("rows")
6914 .collect::<Result<_, _>>()
6915 .expect("collect")
6916 };
6917 assert_eq!(remaining, vec!["evt-2".to_owned(), "evt-3".to_owned()]);
6918 let provenance_count: i64 = conn
6919 .query_row(
6920 "SELECT count(*) FROM provenance_events \
6921 WHERE event_type = 'operational_collection_compacted' AND subject = 'audit_log'",
6922 [],
6923 |row| row.get(0),
6924 )
6925 .expect("provenance count");
6926 assert_eq!(provenance_count, 1);
6927 }
6928
6929 #[test]
6930 fn plan_and_run_operational_retention_keep_last() {
6931 let (db, service) = setup();
6932 {
6933 let conn = sqlite::open_connection(db.path()).expect("conn");
6934 conn.execute(
6935 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6936 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6937 [],
6938 )
6939 .expect("seed collection");
6940 for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
6941 conn.execute(
6942 "INSERT INTO operational_mutations \
6943 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6944 VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6945 rusqlite::params![
6946 format!("evt-{index}"),
6947 format!("{{\"seq\":{index}}}"),
6948 created_at,
6949 index,
6950 ],
6951 )
6952 .expect("seed event");
6953 }
6954 }
6955
6956 let plan = service
6957 .plan_operational_retention(1_000, None, Some(10))
6958 .expect("plan retention");
6959 assert_eq!(plan.collections_examined, 1);
6960 assert_eq!(plan.items[0].collection_name, "audit_log");
6961 assert_eq!(
6962 plan.items[0].action_kind,
6963 crate::operational::OperationalRetentionActionKind::KeepLast
6964 );
6965 assert_eq!(plan.items[0].candidate_deletions, 1);
6966 assert_eq!(plan.items[0].max_rows, Some(2));
6967 assert_eq!(plan.items[0].last_run_at, None);
6968
6969 let dry_run = service
6970 .run_operational_retention(1_000, None, Some(10), true)
6971 .expect("dry-run retention");
6972 assert!(dry_run.dry_run);
6973 assert_eq!(dry_run.collections_acted_on, 1);
6974 assert_eq!(dry_run.items[0].deleted_mutations, 1);
6975 assert_eq!(dry_run.items[0].rows_remaining, 2);
6976
6977 let conn = sqlite::open_connection(db.path()).expect("conn");
6978 let remaining_count: i64 = conn
6979 .query_row(
6980 "SELECT count(*) FROM operational_mutations WHERE collection_name = 'audit_log'",
6981 [],
6982 |row| row.get(0),
6983 )
6984 .expect("remaining count after dry run");
6985 assert_eq!(remaining_count, 3);
6986 let retention_run_count: i64 = conn
6987 .query_row(
6988 "SELECT count(*) FROM operational_retention_runs WHERE collection_name = 'audit_log'",
6989 [],
6990 |row| row.get(0),
6991 )
6992 .expect("retention run count");
6993 assert_eq!(retention_run_count, 0);
6994 drop(conn);
6995
6996 let executed = service
6997 .run_operational_retention(1_000, None, Some(10), false)
6998 .expect("execute retention");
6999 assert_eq!(executed.collections_acted_on, 1);
7000 assert_eq!(executed.items[0].deleted_mutations, 1);
7001 assert_eq!(executed.items[0].rows_remaining, 2);
7002
7003 let conn = sqlite::open_connection(db.path()).expect("conn");
7004 let remaining: Vec<String> = {
7005 let mut stmt = conn
7006 .prepare(
7007 "SELECT id FROM operational_mutations \
7008 WHERE collection_name = 'audit_log' ORDER BY mutation_order",
7009 )
7010 .expect("stmt");
7011 stmt.query_map([], |row| row.get(0))
7012 .expect("rows")
7013 .collect::<Result<_, _>>()
7014 .expect("collect")
7015 };
7016 assert_eq!(remaining, vec!["evt-2".to_owned(), "evt-3".to_owned()]);
7017 let last_run_at: i64 = conn
7018 .query_row(
7019 "SELECT executed_at FROM operational_retention_runs \
7020 WHERE collection_name = 'audit_log' ORDER BY executed_at DESC LIMIT 1",
7021 [],
7022 |row| row.get(0),
7023 )
7024 .expect("last run at");
7025 assert_eq!(last_run_at, 1_000);
7026 }
7027
7028 #[test]
7029 fn dry_run_operational_retention_does_not_mark_noop_collection_as_acted_on() {
7030 let (db, service) = setup();
7031 let conn = sqlite::open_connection(db.path()).expect("conn");
7032 conn.execute(
7033 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
7034 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
7035 [],
7036 )
7037 .expect("seed collection");
7038 for (index, created_at) in [(1_i64, 100_i64), (2, 200)] {
7039 conn.execute(
7040 "INSERT INTO operational_mutations \
7041 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
7042 VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
7043 rusqlite::params![
7044 format!("evt-{index}"),
7045 format!("{{\"seq\":{index}}}"),
7046 created_at,
7047 index,
7048 ],
7049 )
7050 .expect("seed event");
7051 }
7052 drop(conn);
7053
7054 let dry_run = service
7055 .run_operational_retention(1_000, None, Some(10), true)
7056 .expect("dry-run retention");
7057 assert!(dry_run.dry_run);
7058 assert_eq!(dry_run.collections_acted_on, 0);
7059 assert_eq!(dry_run.items[0].deleted_mutations, 0);
7060 assert_eq!(dry_run.items[0].rows_remaining, 2);
7061 }
7062
7063 #[test]
7064 fn compact_operational_collection_rejects_latest_state() {
7065 let (_db, service) = setup();
7066 service
7067 .register_operational_collection(&OperationalRegisterRequest {
7068 name: "connector_health".to_owned(),
7069 kind: OperationalCollectionKind::LatestState,
7070 schema_json: "{}".to_owned(),
7071 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
7072 filter_fields_json: "[]".to_owned(),
7073 validation_json: String::new(),
7074 secondary_indexes_json: "[]".to_owned(),
7075 format_version: 1,
7076 })
7077 .expect("register collection");
7078
7079 let error = service
7080 .compact_operational_collection("connector_health", false)
7081 .expect_err("latest_state compaction should be rejected");
7082 assert!(matches!(error, EngineError::InvalidWrite(_)));
7083 assert!(error.to_string().contains("append_only_log"));
7084 }
7085
7086 #[test]
7087 fn register_operational_collection_persists_filter_fields_json() {
7088 let (_db, service) = setup();
7089
7090 let record = service
7091 .register_operational_collection(&OperationalRegisterRequest {
7092 name: "audit_log".to_owned(),
7093 kind: OperationalCollectionKind::AppendOnlyLog,
7094 schema_json: "{}".to_owned(),
7095 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
7096 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
7097 validation_json: String::new(),
7098 secondary_indexes_json: "[]".to_owned(),
7099 format_version: 1,
7100 })
7101 .expect("register collection");
7102
7103 assert_eq!(
7104 record.filter_fields_json,
7105 r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#
7106 );
7107 }
7108
7109 #[test]
7110 fn read_operational_collection_filters_append_only_rows_by_declared_fields() {
7111 let (db, service) = setup();
7112 service
7113 .register_operational_collection(&OperationalRegisterRequest {
7114 name: "audit_log".to_owned(),
7115 kind: OperationalCollectionKind::AppendOnlyLog,
7116 schema_json: "{}".to_owned(),
7117 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
7118 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"seq","type":"integer","modes":["exact","range"]},{"name":"ts","type":"timestamp","modes":["exact","range"]}]"#.to_owned(),
7119 validation_json: String::new(),
7120 secondary_indexes_json: "[]".to_owned(),
7121 format_version: 1,
7122 })
7123 .expect("register collection");
7124 {
7125 let writer = crate::WriterActor::start(
7126 db.path(),
7127 Arc::new(SchemaManager::new()),
7128 crate::ProvenanceMode::Warn,
7129 Arc::new(crate::TelemetryCounters::default()),
7130 )
7131 .expect("writer");
7132 writer
7133 .submit(crate::WriteRequest {
7134 label: "operational".to_owned(),
7135 nodes: vec![],
7136 node_retires: vec![],
7137 edges: vec![],
7138 edge_retires: vec![],
7139 chunks: vec![],
7140 runs: vec![],
7141 steps: vec![],
7142 actions: vec![],
7143 optional_backfills: vec![],
7144 vec_inserts: vec![],
7145 operational_writes: vec![
7146 crate::OperationalWrite::Append {
7147 collection: "audit_log".to_owned(),
7148 record_key: "evt-1".to_owned(),
7149 payload_json: r#"{"actor":"alice","seq":1,"ts":100}"#.to_owned(),
7150 source_ref: Some("src-1".to_owned()),
7151 },
7152 crate::OperationalWrite::Append {
7153 collection: "audit_log".to_owned(),
7154 record_key: "evt-2".to_owned(),
7155 payload_json: r#"{"actor":"alice-admin","seq":2,"ts":200}"#.to_owned(),
7156 source_ref: Some("src-2".to_owned()),
7157 },
7158 crate::OperationalWrite::Append {
7159 collection: "audit_log".to_owned(),
7160 record_key: "evt-3".to_owned(),
7161 payload_json: r#"{"actor":"bob","seq":3,"ts":300}"#.to_owned(),
7162 source_ref: Some("src-3".to_owned()),
7163 },
7164 ],
7165 })
7166 .expect("write");
7167 }
7168
7169 let report = service
7170 .read_operational_collection(&crate::operational::OperationalReadRequest {
7171 collection_name: "audit_log".to_owned(),
7172 filters: vec![
7173 crate::operational::OperationalFilterClause::Prefix {
7174 field: "actor".to_owned(),
7175 value: "alice".to_owned(),
7176 },
7177 crate::operational::OperationalFilterClause::Range {
7178 field: "ts".to_owned(),
7179 lower: Some(150),
7180 upper: Some(250),
7181 },
7182 ],
7183 limit: Some(10),
7184 })
7185 .expect("filtered read");
7186
7187 assert_eq!(report.collection_name, "audit_log");
7188 assert_eq!(report.row_count, 1);
7189 assert!(!report.was_limited);
7190 assert_eq!(report.rows.len(), 1);
7191 assert_eq!(report.rows[0].record_key, "evt-2");
7192 assert_eq!(
7193 report.rows[0].payload_json,
7194 r#"{"actor":"alice-admin","seq":2,"ts":200}"#
7195 );
7196 }
7197
7198 #[test]
7199 fn read_operational_collection_uses_secondary_index_when_filter_values_are_missing() {
7200 let (db, service) = setup();
7201 service
7202 .register_operational_collection(&OperationalRegisterRequest {
7203 name: "audit_log".to_owned(),
7204 kind: OperationalCollectionKind::AppendOnlyLog,
7205 schema_json: "{}".to_owned(),
7206 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
7207 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
7208 validation_json: String::new(),
7209 secondary_indexes_json: r#"[{"name":"actor_ts","kind":"append_only_field_time","field":"actor","value_type":"string","time_field":"ts"}]"#.to_owned(),
7210 format_version: 1,
7211 })
7212 .expect("register collection");
7213 {
7214 let writer = crate::WriterActor::start(
7215 db.path(),
7216 Arc::new(SchemaManager::new()),
7217 crate::ProvenanceMode::Warn,
7218 Arc::new(crate::TelemetryCounters::default()),
7219 )
7220 .expect("writer");
7221 writer
7222 .submit(crate::WriteRequest {
7223 label: "operational".to_owned(),
7224 nodes: vec![],
7225 node_retires: vec![],
7226 edges: vec![],
7227 edge_retires: vec![],
7228 chunks: vec![],
7229 runs: vec![],
7230 steps: vec![],
7231 actions: vec![],
7232 optional_backfills: vec![],
7233 vec_inserts: vec![],
7234 operational_writes: vec![
7235 crate::OperationalWrite::Append {
7236 collection: "audit_log".to_owned(),
7237 record_key: "evt-1".to_owned(),
7238 payload_json: r#"{"actor":"alice","ts":100}"#.to_owned(),
7239 source_ref: Some("src-1".to_owned()),
7240 },
7241 crate::OperationalWrite::Append {
7242 collection: "audit_log".to_owned(),
7243 record_key: "evt-2".to_owned(),
7244 payload_json: r#"{"actor":"alice-admin","ts":200}"#.to_owned(),
7245 source_ref: Some("src-2".to_owned()),
7246 },
7247 ],
7248 })
7249 .expect("write");
7250 }
7251 let conn = sqlite::open_connection(db.path()).expect("conn");
7252 conn.execute(
7253 "DELETE FROM operational_filter_values WHERE collection_name = 'audit_log'",
7254 [],
7255 )
7256 .expect("clear filter values");
7257 drop(conn);
7258
7259 let report = service
7260 .read_operational_collection(&crate::operational::OperationalReadRequest {
7261 collection_name: "audit_log".to_owned(),
7262 filters: vec![
7263 crate::operational::OperationalFilterClause::Prefix {
7264 field: "actor".to_owned(),
7265 value: "alice".to_owned(),
7266 },
7267 crate::operational::OperationalFilterClause::Range {
7268 field: "ts".to_owned(),
7269 lower: Some(150),
7270 upper: Some(250),
7271 },
7272 ],
7273 limit: Some(10),
7274 })
7275 .expect("secondary-index read");
7276
7277 assert_eq!(report.row_count, 1);
7278 assert_eq!(report.rows[0].record_key, "evt-2");
7279 }
7280
7281 #[test]
7282 fn read_operational_collection_rejects_undeclared_fields_and_latest_state_collections() {
7283 let (_db, service) = setup();
7284 service
7285 .register_operational_collection(&OperationalRegisterRequest {
7286 name: "connector_health".to_owned(),
7287 kind: OperationalCollectionKind::LatestState,
7288 schema_json: "{}".to_owned(),
7289 retention_json: "{}".to_owned(),
7290 filter_fields_json: r#"[{"name":"status","type":"string","modes":["exact"]}]"#
7291 .to_owned(),
7292 validation_json: String::new(),
7293 secondary_indexes_json: "[]".to_owned(),
7294 format_version: 1,
7295 })
7296 .expect("register collection");
7297
7298 let latest_state_error = service
7299 .read_operational_collection(&crate::operational::OperationalReadRequest {
7300 collection_name: "connector_health".to_owned(),
7301 filters: vec![crate::operational::OperationalFilterClause::Exact {
7302 field: "status".to_owned(),
7303 value: crate::operational::OperationalFilterValue::String("ok".to_owned()),
7304 }],
7305 limit: Some(10),
7306 })
7307 .expect_err("latest_state filtered reads should be rejected");
7308 assert!(latest_state_error.to_string().contains("append_only_log"));
7309
7310 service
7311 .register_operational_collection(&OperationalRegisterRequest {
7312 name: "audit_log".to_owned(),
7313 kind: OperationalCollectionKind::AppendOnlyLog,
7314 schema_json: "{}".to_owned(),
7315 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
7316 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact"]}]"#
7317 .to_owned(),
7318 validation_json: String::new(),
7319 secondary_indexes_json: "[]".to_owned(),
7320 format_version: 1,
7321 })
7322 .expect("register append-only collection");
7323
7324 let undeclared_error = service
7325 .read_operational_collection(&crate::operational::OperationalReadRequest {
7326 collection_name: "audit_log".to_owned(),
7327 filters: vec![crate::operational::OperationalFilterClause::Exact {
7328 field: "missing".to_owned(),
7329 value: crate::operational::OperationalFilterValue::String("x".to_owned()),
7330 }],
7331 limit: Some(10),
7332 })
7333 .expect_err("undeclared field should be rejected");
7334 assert!(undeclared_error.to_string().contains("undeclared"));
7335 }
7336
7337 #[test]
7338 fn read_operational_collection_applies_limit_and_reports_truncation() {
7339 let (db, service) = setup();
7340 service
7341 .register_operational_collection(&OperationalRegisterRequest {
7342 name: "audit_log".to_owned(),
7343 kind: OperationalCollectionKind::AppendOnlyLog,
7344 schema_json: "{}".to_owned(),
7345 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
7346 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["prefix"]}]"#
7347 .to_owned(),
7348 validation_json: String::new(),
7349 secondary_indexes_json: "[]".to_owned(),
7350 format_version: 1,
7351 })
7352 .expect("register collection");
7353 {
7354 let writer = crate::WriterActor::start(
7355 db.path(),
7356 Arc::new(SchemaManager::new()),
7357 crate::ProvenanceMode::Warn,
7358 Arc::new(crate::TelemetryCounters::default()),
7359 )
7360 .expect("writer");
7361 writer
7362 .submit(crate::WriteRequest {
7363 label: "operational".to_owned(),
7364 nodes: vec![],
7365 node_retires: vec![],
7366 edges: vec![],
7367 edge_retires: vec![],
7368 chunks: vec![],
7369 runs: vec![],
7370 steps: vec![],
7371 actions: vec![],
7372 optional_backfills: vec![],
7373 vec_inserts: vec![],
7374 operational_writes: vec![
7375 crate::OperationalWrite::Append {
7376 collection: "audit_log".to_owned(),
7377 record_key: "evt-1".to_owned(),
7378 payload_json: r#"{"actor":"alice-1"}"#.to_owned(),
7379 source_ref: Some("src-1".to_owned()),
7380 },
7381 crate::OperationalWrite::Append {
7382 collection: "audit_log".to_owned(),
7383 record_key: "evt-2".to_owned(),
7384 payload_json: r#"{"actor":"alice-2"}"#.to_owned(),
7385 source_ref: Some("src-2".to_owned()),
7386 },
7387 ],
7388 })
7389 .expect("write");
7390 }
7391
7392 let report = service
7393 .read_operational_collection(&crate::operational::OperationalReadRequest {
7394 collection_name: "audit_log".to_owned(),
7395 filters: vec![crate::operational::OperationalFilterClause::Prefix {
7396 field: "actor".to_owned(),
7397 value: "alice".to_owned(),
7398 }],
7399 limit: Some(1),
7400 })
7401 .expect("limited read");
7402
7403 assert_eq!(report.row_count, 1);
7404 assert_eq!(report.applied_limit, 1);
7405 assert!(report.was_limited);
7406 assert_eq!(report.rows[0].record_key, "evt-2");
7407 }
7408
7409 #[test]
7410 fn preexisting_operational_collection_can_gain_filter_contract_after_upgrade() {
7411 let db = NamedTempFile::new().expect("temp db");
7412 let conn = sqlite::open_connection(db.path()).expect("conn");
7413 conn.execute_batch(
7414 r#"
7415 CREATE TABLE operational_collections (
7416 name TEXT PRIMARY KEY,
7417 kind TEXT NOT NULL,
7418 schema_json TEXT NOT NULL,
7419 retention_json TEXT NOT NULL,
7420 format_version INTEGER NOT NULL DEFAULT 1,
7421 created_at INTEGER NOT NULL DEFAULT 100,
7422 disabled_at INTEGER
7423 );
7424 CREATE TABLE operational_mutations (
7425 id TEXT PRIMARY KEY,
7426 collection_name TEXT NOT NULL,
7427 record_key TEXT NOT NULL,
7428 op_kind TEXT NOT NULL,
7429 payload_json TEXT NOT NULL,
7430 source_ref TEXT,
7431 created_at INTEGER NOT NULL DEFAULT 100,
7432 mutation_order INTEGER NOT NULL DEFAULT 1
7433 );
7434 INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at)
7435 VALUES ('audit_log', 'append_only_log', '{}', '{"mode":"keep_all"}', 1, 100);
7436 INSERT INTO operational_mutations
7437 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order)
7438 VALUES
7439 ('evt-1', 'audit_log', 'evt-1', 'append', '{"actor":"alice","ts":0}', 'src-1', 100, 1);
7440 "#,
7441 )
7442 .expect("seed pre-v10 schema");
7443 drop(conn);
7444
7445 let service = AdminService::new(db.path(), Arc::new(SchemaManager::new()));
7446 let pre_update = service
7447 .read_operational_collection(&crate::operational::OperationalReadRequest {
7448 collection_name: "audit_log".to_owned(),
7449 filters: vec![crate::operational::OperationalFilterClause::Exact {
7450 field: "actor".to_owned(),
7451 value: crate::operational::OperationalFilterValue::String("alice".to_owned()),
7452 }],
7453 limit: Some(10),
7454 })
7455 .expect_err("read should reject undeclared fields before migration update");
7456 assert!(pre_update.to_string().contains("undeclared"));
7457
7458 let updated = service
7459 .update_operational_collection_filters(
7460 "audit_log",
7461 r#"[{"name":"actor","type":"string","modes":["exact"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#,
7462 )
7463 .expect("update filter contract");
7464 assert!(updated.filter_fields_json.contains("\"actor\""));
7465
7466 let report = service
7467 .read_operational_collection(&crate::operational::OperationalReadRequest {
7468 collection_name: "audit_log".to_owned(),
7469 filters: vec![crate::operational::OperationalFilterClause::Range {
7470 field: "ts".to_owned(),
7471 lower: Some(0),
7472 upper: Some(0),
7473 }],
7474 limit: Some(10),
7475 })
7476 .expect("read after explicit filter update");
7477 assert_eq!(report.row_count, 1);
7478 assert_eq!(report.rows[0].record_key, "evt-1");
7479 }
7480
7481 #[cfg(feature = "sqlite-vec")]
7482 #[test]
7483 fn check_semantics_detects_stale_vec_rows() {
7484 use crate::sqlite::open_connection_with_vec;
7485
7486 let db = NamedTempFile::new().expect("temp file");
7487 let schema = Arc::new(SchemaManager::new());
7488 {
7489 let conn = open_connection_with_vec(db.path()).expect("vec conn");
7490 schema.bootstrap(&conn).expect("bootstrap");
7491 schema
7492 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 3)
7493 .expect("vec profile");
7494 let bytes: Vec<u8> = [0.1f32, 0.2f32, 0.3f32]
7496 .iter()
7497 .flat_map(|f| f.to_le_bytes())
7498 .collect();
7499 conn.execute(
7500 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('ghost-chunk', ?1)",
7501 rusqlite::params![bytes],
7502 )
7503 .expect("insert stale vec row");
7504 }
7505 let service = AdminService::new(db.path(), Arc::clone(&schema));
7506 let report = service.check_semantics().expect("semantics check");
7507 assert_eq!(report.stale_vec_rows, 1);
7508 assert!(
7509 report.warnings.iter().any(|w| w.contains("stale vec")),
7510 "warning must mention stale vec"
7511 );
7512 }
7513
7514 #[cfg(feature = "sqlite-vec")]
7515 #[test]
7516 fn restore_vector_profiles_recreates_vec_table_from_metadata() {
7517 let db = NamedTempFile::new().expect("temp file");
7518 let schema = Arc::new(SchemaManager::new());
7519 {
7520 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7521 schema.bootstrap(&conn).expect("bootstrap");
7522 conn.execute(
7523 "INSERT INTO vector_profiles (profile, table_name, dimension, enabled) \
7524 VALUES ('default', 'vec_nodes_active', 3, 1)",
7525 [],
7526 )
7527 .expect("insert vector profile");
7528 }
7529
7530 let service = AdminService::new(db.path(), Arc::clone(&schema));
7531 let report = service
7532 .restore_vector_profiles()
7533 .expect("restore vector profiles");
7534 assert_eq!(
7535 report.targets,
7536 vec![crate::projection::ProjectionTarget::Vec]
7537 );
7538 assert_eq!(report.rebuilt_rows, 1);
7539
7540 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7541 let count: i64 = conn
7542 .query_row(
7543 "SELECT count(*) FROM sqlite_schema WHERE name = 'vec_nodes_active'",
7544 [],
7545 |row| row.get(0),
7546 )
7547 .expect("vec schema count");
7548 assert_eq!(count, 1, "vec table should exist after restore");
7549 }
7550
7551 #[cfg(feature = "sqlite-vec")]
7552 #[test]
7553 fn load_vector_regeneration_config_supports_json_and_toml() {
7554 let dir = tempfile::tempdir().expect("temp dir");
7555 let json_path = dir.path().join("regen.json");
7556 let toml_path = dir.path().join("regen.toml");
7557
7558 let config = VectorRegenerationConfig {
7559 profile: "default".to_owned(),
7560 table_name: "vec_nodes_active".to_owned(),
7561 chunking_policy: "per_chunk".to_owned(),
7562 preprocessing_policy: "trim".to_owned(),
7563 };
7564
7565 fs::write(&json_path, serde_json::to_string(&config).expect("json")).expect("write json");
7566 fs::write(&toml_path, toml::to_string(&config).expect("toml")).expect("write toml");
7567
7568 let parsed_json = load_vector_regeneration_config(&json_path).expect("json parse");
7569 let parsed_toml = load_vector_regeneration_config(&toml_path).expect("toml parse");
7570
7571 assert_eq!(parsed_json, config);
7572 assert_eq!(parsed_toml, config);
7573 }
7574
7575 #[test]
7580 fn regenerate_vector_embeddings_config_rejects_old_identity_fields() {
7581 let legacy_json = r#"{
7582 "profile": "default",
7583 "table_name": "vec_nodes_active",
7584 "model_identity": "old-model",
7585 "model_version": "1.0",
7586 "dimension": 4,
7587 "normalization_policy": "l2",
7588 "chunking_policy": "per_chunk",
7589 "preprocessing_policy": "trim",
7590 "generator_command": ["/bin/echo"]
7591 }"#;
7592 let result: Result<VectorRegenerationConfig, _> = serde_json::from_str(legacy_json);
7593 assert!(
7594 result.is_err(),
7595 "legacy identity fields must be rejected at deserialization"
7596 );
7597 }
7598
7599 #[cfg(all(not(feature = "sqlite-vec"), unix))]
7600 #[test]
7601 fn regenerate_vector_embeddings_unsupported_vec_capability_writes_request_and_failed_audit() {
7602 let db = NamedTempFile::new().expect("temp file");
7603 let schema = Arc::new(SchemaManager::new());
7604
7605 {
7606 let conn = sqlite::open_connection(db.path()).expect("connection");
7607 schema.bootstrap(&conn).expect("bootstrap");
7608 conn.execute(
7609 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7610 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7611 [],
7612 )
7613 .expect("insert node");
7614 conn.execute(
7615 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7616 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7617 [],
7618 )
7619 .expect("insert chunk");
7620 }
7621
7622 let service = AdminService::new(db.path(), Arc::clone(&schema));
7623 let embedder = TestEmbedder::new("test-model", 4);
7624 let error = service
7625 .regenerate_vector_embeddings(
7626 &embedder,
7627 &VectorRegenerationConfig {
7628 profile: "default".to_owned(),
7629 table_name: "vec_nodes_active".to_owned(),
7630 chunking_policy: "per_chunk".to_owned(),
7631 preprocessing_policy: "trim".to_owned(),
7632 },
7633 )
7634 .expect_err("sqlite-vec capability should be required");
7635
7636 assert!(error.to_string().contains("unsupported vec capability"));
7637
7638 let conn = sqlite::open_connection(db.path()).expect("connection");
7639 let request_count: i64 = conn
7640 .query_row(
7641 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_requested' AND subject = 'default'",
7642 [],
7643 |row| row.get(0),
7644 )
7645 .expect("request count");
7646 assert_eq!(request_count, 1);
7647 let failed_count: i64 = conn
7648 .query_row(
7649 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7650 [],
7651 |row| row.get(0),
7652 )
7653 .expect("failed count");
7654 assert_eq!(failed_count, 1);
7655 let metadata_json: String = conn
7656 .query_row(
7657 "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7658 [],
7659 |row| row.get(0),
7660 )
7661 .expect("failed metadata");
7662 assert!(metadata_json.contains("\"failure_class\":\"unsupported vec capability\""));
7663 }
7664
7665 #[cfg(feature = "sqlite-vec")]
7666 #[test]
7667 #[allow(clippy::too_many_lines)]
7668 fn regenerate_vector_embeddings_rebuilds_embeddings_via_embedder() {
7669 let db = NamedTempFile::new().expect("temp file");
7670 let schema = Arc::new(SchemaManager::new());
7671
7672 {
7673 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7674 schema.bootstrap(&conn).expect("bootstrap");
7675 conn.execute(
7676 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7677 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7678 [],
7679 )
7680 .expect("insert node");
7681 conn.execute(
7682 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7683 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7684 [],
7685 )
7686 .expect("insert chunk 1");
7687 conn.execute(
7688 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7689 VALUES ('chunk-2', 'doc-1', 'travel plan', 101)",
7690 [],
7691 )
7692 .expect("insert chunk 2");
7693 }
7694
7695 let service = AdminService::new(db.path(), Arc::clone(&schema));
7696 let embedder = TestEmbedder::new("test-model", 4);
7697 let report = service
7698 .regenerate_vector_embeddings(
7699 &embedder,
7700 &VectorRegenerationConfig {
7701 profile: "default".to_owned(),
7702 table_name: "vec_nodes_active".to_owned(),
7703 chunking_policy: "per_chunk".to_owned(),
7704 preprocessing_policy: "trim".to_owned(),
7705 },
7706 )
7707 .expect("regenerate vectors");
7708
7709 assert_eq!(report.profile, "default");
7710 assert_eq!(report.table_name, "vec_nodes_active");
7711 assert_eq!(report.dimension, 4);
7712 assert_eq!(report.total_chunks, 2);
7713 assert_eq!(report.regenerated_rows, 2);
7714 assert!(report.contract_persisted);
7715
7716 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7717 let vec_count: i64 = conn
7718 .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
7719 row.get(0)
7720 })
7721 .expect("vec count");
7722 assert_eq!(vec_count, 2);
7723
7724 let (model_identity, model_version, dimension, normalization_policy): (
7728 String,
7729 String,
7730 i64,
7731 String,
7732 ) = conn
7733 .query_row(
7734 "SELECT model_identity, model_version, dimension, normalization_policy \
7735 FROM vector_embedding_contracts WHERE profile = 'default'",
7736 [],
7737 |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)),
7738 )
7739 .expect("contract row");
7740 assert_eq!(model_identity, "test-model");
7741 assert_eq!(model_version, "1.0.0");
7742 assert_eq!(dimension, 4);
7743 assert_eq!(normalization_policy, "l2");
7744
7745 let contract_format_version: i64 = conn
7746 .query_row(
7747 "SELECT contract_format_version FROM vector_embedding_contracts WHERE profile = 'default'",
7748 [],
7749 |row| row.get(0),
7750 )
7751 .expect("contract_format_version");
7752 assert_eq!(contract_format_version, 1);
7753 let request_count: i64 = conn
7754 .query_row(
7755 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_requested' AND subject = 'default'",
7756 [],
7757 |row| row.get(0),
7758 )
7759 .expect("request audit count");
7760 assert_eq!(request_count, 1);
7761 let apply_count: i64 = conn
7762 .query_row(
7763 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_apply' AND subject = 'default'",
7764 [],
7765 |row| row.get(0),
7766 )
7767 .expect("apply audit count");
7768 assert_eq!(apply_count, 1);
7769 let apply_metadata: String = conn
7770 .query_row(
7771 "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_apply' AND subject = 'default'",
7772 [],
7773 |row| row.get(0),
7774 )
7775 .expect("apply metadata");
7776 assert!(apply_metadata.contains("\"profile\":\"default\""));
7777 assert!(apply_metadata.contains("\"snapshot_hash\":"));
7778 assert!(apply_metadata.contains("\"model_identity\":\"test-model\""));
7779 }
7780
7781 #[cfg(feature = "sqlite-vec")]
7782 #[test]
7783 #[allow(clippy::too_many_lines)]
7784 fn regenerate_vector_embeddings_embedder_failure_leaves_contract_and_vec_rows_unchanged() {
7785 let db = NamedTempFile::new().expect("temp file");
7786 let schema = Arc::new(SchemaManager::new());
7787
7788 {
7789 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7790 schema.bootstrap(&conn).expect("bootstrap");
7791 conn.execute(
7792 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7793 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7794 [],
7795 )
7796 .expect("insert node");
7797 conn.execute(
7798 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7799 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7800 [],
7801 )
7802 .expect("insert chunk");
7803 schema
7804 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
7805 .expect("ensure vec profile");
7806 conn.execute(
7807 r"
7808 INSERT INTO vector_embedding_contracts (
7809 profile,
7810 table_name,
7811 model_identity,
7812 model_version,
7813 dimension,
7814 normalization_policy,
7815 chunking_policy,
7816 preprocessing_policy,
7817 generator_command_json,
7818 applied_at,
7819 snapshot_hash
7820 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)
7821 ",
7822 rusqlite::params![
7823 "default",
7824 "vec_nodes_active",
7825 "old-model",
7826 "0.9.0",
7827 4,
7828 "l2",
7829 "per_chunk",
7830 "trim",
7831 "[]",
7832 111,
7833 "old-snapshot"
7834 ],
7835 )
7836 .expect("seed contract");
7837 conn.execute(
7838 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
7839 [],
7840 )
7841 .expect("seed vec row");
7842 }
7843
7844 let service = AdminService::new(db.path(), Arc::clone(&schema));
7845 let failing = FailingEmbedder {
7846 identity: QueryEmbedderIdentity {
7847 model_identity: "new-model".to_owned(),
7848 model_version: "1.0.0".to_owned(),
7849 dimension: 4,
7850 normalization_policy: "l2".to_owned(),
7851 },
7852 };
7853 let error = service
7854 .regenerate_vector_embeddings(
7855 &failing,
7856 &VectorRegenerationConfig {
7857 profile: "default".to_owned(),
7858 table_name: "vec_nodes_active".to_owned(),
7859 chunking_policy: "per_chunk".to_owned(),
7860 preprocessing_policy: "trim".to_owned(),
7861 },
7862 )
7863 .expect_err("embedder should fail");
7864
7865 assert!(error.to_string().contains("embedder failure"));
7866
7867 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7868 let model_identity: String = conn
7869 .query_row(
7870 "SELECT model_identity FROM vector_embedding_contracts WHERE profile = 'default'",
7871 [],
7872 |row| row.get(0),
7873 )
7874 .expect("model identity");
7875 assert_eq!(model_identity, "old-model");
7876 let snapshot_hash: String = conn
7877 .query_row(
7878 "SELECT snapshot_hash FROM vector_embedding_contracts WHERE profile = 'default'",
7879 [],
7880 |row| row.get(0),
7881 )
7882 .expect("snapshot hash");
7883 assert_eq!(snapshot_hash, "old-snapshot");
7884 let vec_count: i64 = conn
7885 .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
7886 row.get(0)
7887 })
7888 .expect("vec count");
7889 assert_eq!(vec_count, 1);
7890 let failure_count: i64 = conn
7891 .query_row(
7892 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7893 [],
7894 |row| row.get(0),
7895 )
7896 .expect("failure count");
7897 assert_eq!(failure_count, 1);
7898 let failure_metadata: String = conn
7899 .query_row(
7900 "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7901 [],
7902 |row| row.get(0),
7903 )
7904 .expect("failure metadata");
7905 assert!(failure_metadata.contains("\"failure_class\":\"embedder failure\""));
7906 }
7907
7908 #[cfg(feature = "sqlite-vec")]
7919 #[test]
7920 fn regenerate_vector_embeddings_rejects_whitespace_only_profile_before_mutation() {
7921 let db = NamedTempFile::new().expect("temp file");
7922 let schema = Arc::new(SchemaManager::new());
7923 {
7924 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7925 schema.bootstrap(&conn).expect("bootstrap");
7926 conn.execute(
7927 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7928 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7929 [],
7930 )
7931 .expect("insert node");
7932 conn.execute(
7933 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7934 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7935 [],
7936 )
7937 .expect("insert chunk");
7938 }
7939
7940 let service = AdminService::new(db.path(), Arc::clone(&schema));
7941 let embedder = TestEmbedder::new("test-model", 4);
7942 let error = service
7943 .regenerate_vector_embeddings(
7944 &embedder,
7945 &VectorRegenerationConfig {
7946 profile: " ".to_owned(),
7947 table_name: "vec_nodes_active".to_owned(),
7948 chunking_policy: "per_chunk".to_owned(),
7949 preprocessing_policy: "trim".to_owned(),
7950 },
7951 )
7952 .expect_err("whitespace profile should be rejected");
7953
7954 assert!(error.to_string().contains("invalid contract"));
7955 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7956 let contract_count: i64 = conn
7957 .query_row(
7958 "SELECT count(*) FROM vector_embedding_contracts",
7959 [],
7960 |row| row.get(0),
7961 )
7962 .expect("contract count");
7963 assert_eq!(contract_count, 0);
7964 let provenance_count: i64 = conn
7965 .query_row("SELECT count(*) FROM provenance_events", [], |row| {
7966 row.get(0)
7967 })
7968 .expect("provenance count");
7969 assert_eq!(provenance_count, 0);
7970 }
7971
7972 #[cfg(feature = "sqlite-vec")]
7973 #[test]
7974 fn regenerate_vector_embeddings_rejects_future_contract_format_version() {
7975 let db = NamedTempFile::new().expect("temp file");
7976 let schema = Arc::new(SchemaManager::new());
7977 {
7978 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7979 schema.bootstrap(&conn).expect("bootstrap");
7980 conn.execute(
7981 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7982 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7983 [],
7984 )
7985 .expect("insert node");
7986 conn.execute(
7987 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7988 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7989 [],
7990 )
7991 .expect("insert chunk");
7992 conn.execute(
7993 r"
7994 INSERT INTO vector_embedding_contracts (
7995 profile,
7996 table_name,
7997 model_identity,
7998 model_version,
7999 dimension,
8000 normalization_policy,
8001 chunking_policy,
8002 preprocessing_policy,
8003 generator_command_json,
8004 applied_at,
8005 snapshot_hash,
8006 contract_format_version,
8007 updated_at
8008 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13)
8009 ",
8010 rusqlite::params![
8011 "default",
8012 "vec_nodes_active",
8013 "old-model",
8014 "0.9.0",
8015 4,
8016 "l2",
8017 "per_chunk",
8018 "trim",
8019 "[]",
8020 111,
8021 "old-snapshot",
8022 99,
8023 111,
8024 ],
8025 )
8026 .expect("seed future contract");
8027 }
8028
8029 let service = AdminService::new(db.path(), Arc::clone(&schema));
8030 let embedder = TestEmbedder::new("test-model", 4);
8031 let error = service
8032 .regenerate_vector_embeddings(
8033 &embedder,
8034 &VectorRegenerationConfig {
8035 profile: "default".to_owned(),
8036 table_name: "vec_nodes_active".to_owned(),
8037 chunking_policy: "per_chunk".to_owned(),
8038 preprocessing_policy: "trim".to_owned(),
8039 },
8040 )
8041 .expect_err("future contract version should be rejected");
8042
8043 assert!(error.to_string().contains("unsupported"));
8044 assert!(error.to_string().contains("format version"));
8045 }
8046
8047 #[test]
8048 fn check_semantics_detects_orphaned_chunk() {
8049 let (db, service) = setup();
8050 {
8051 let conn = sqlite::open_connection(db.path()).expect("conn");
8053 conn.execute(
8054 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8055 VALUES ('c1', 'ghost-node', 'text', 100)",
8056 [],
8057 )
8058 .expect("insert orphaned chunk");
8059 }
8060 let report = service.check_semantics().expect("semantics check");
8061 assert_eq!(report.orphaned_chunks, 1);
8062 }
8063
8064 #[test]
8065 fn check_semantics_detects_null_source_ref() {
8066 let (db, service) = setup();
8067 {
8068 let conn = sqlite::open_connection(db.path()).expect("conn");
8069 conn.execute(
8070 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at) \
8071 VALUES ('r1', 'lg1', 'Meeting', '{}', 100)",
8072 [],
8073 )
8074 .expect("insert node with null source_ref");
8075 }
8076 let report = service.check_semantics().expect("semantics check");
8077 assert_eq!(report.null_source_ref_nodes, 1);
8078 }
8079
8080 #[test]
8081 fn check_semantics_detects_broken_step_fk() {
8082 let (db, service) = setup();
8083 {
8084 let conn = sqlite::open_connection(db.path()).expect("conn");
8087 conn.execute_batch("PRAGMA foreign_keys = OFF;")
8088 .expect("disable FK");
8089 conn.execute(
8090 "INSERT INTO steps (id, run_id, kind, status, properties, created_at) \
8091 VALUES ('s1', 'ghost-run', 'llm', 'completed', '{}', 100)",
8092 [],
8093 )
8094 .expect("insert step with ghost run_id");
8095 }
8096 let report = service.check_semantics().expect("semantics check");
8097 assert_eq!(report.broken_step_fk, 1);
8098 }
8099
8100 #[test]
8101 fn check_semantics_detects_broken_action_fk() {
8102 let (db, service) = setup();
8103 {
8104 let conn = sqlite::open_connection(db.path()).expect("conn");
8105 conn.execute_batch("PRAGMA foreign_keys = OFF;")
8106 .expect("disable FK");
8107 conn.execute(
8108 "INSERT INTO actions (id, step_id, kind, status, properties, created_at) \
8109 VALUES ('a1', 'ghost-step', 'emit', 'completed', '{}', 100)",
8110 [],
8111 )
8112 .expect("insert action with ghost step_id");
8113 }
8114 let report = service.check_semantics().expect("semantics check");
8115 assert_eq!(report.broken_action_fk, 1);
8116 }
8117
8118 #[test]
8119 fn check_semantics_detects_stale_fts_rows() {
8120 let (db, service) = setup();
8121 {
8122 let conn = sqlite::open_connection(db.path()).expect("conn");
8123 conn.execute(
8126 "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
8127 VALUES ('ghost-chunk', 'any-node', 'Meeting', 'stale content')",
8128 [],
8129 )
8130 .expect("insert stale FTS row");
8131 }
8132 let report = service.check_semantics().expect("semantics check");
8133 assert_eq!(report.stale_fts_rows, 1);
8134 }
8135
8136 #[test]
8137 fn check_semantics_detects_fts_rows_for_superseded_nodes() {
8138 let (db, service) = setup();
8139 {
8140 let conn = sqlite::open_connection(db.path()).expect("conn");
8141 conn.execute(
8143 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
8144 VALUES ('r1', 'lg-sup', 'Meeting', '{}', 100, 200, 'src-1')",
8145 [],
8146 )
8147 .expect("insert superseded node");
8148 conn.execute(
8150 "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
8151 VALUES ('ck-x', 'lg-sup', 'Meeting', 'superseded content')",
8152 [],
8153 )
8154 .expect("insert FTS row for superseded node");
8155 }
8156 let report = service.check_semantics().expect("semantics check");
8157 assert_eq!(report.fts_rows_for_superseded_nodes, 1);
8158 }
8159
8160 #[test]
8161 fn check_semantics_detects_dangling_edges() {
8162 let (db, service) = setup();
8163 {
8164 let conn = sqlite::open_connection(db.path()).expect("conn");
8165 conn.execute_batch("PRAGMA foreign_keys = OFF;")
8166 .expect("disable FK");
8167 conn.execute(
8169 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8170 VALUES ('r1', 'lg-src', 'Meeting', '{}', 100, 'src-1')",
8171 [],
8172 )
8173 .expect("insert source node");
8174 conn.execute(
8175 "INSERT INTO edges \
8176 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
8177 VALUES ('e1', 'edge-1', 'lg-src', 'ghost-target', 'LINKS', '{}', 100, 'src-1')",
8178 [],
8179 )
8180 .expect("insert dangling edge");
8181 }
8182 let report = service.check_semantics().expect("semantics check");
8183 assert_eq!(report.dangling_edges, 1);
8184 }
8185
8186 #[test]
8187 fn check_semantics_detects_orphaned_supersession_chains() {
8188 let (db, service) = setup();
8189 {
8190 let conn = sqlite::open_connection(db.path()).expect("conn");
8191 conn.execute(
8193 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
8194 VALUES ('r1', 'lg-orphaned', 'Meeting', '{}', 100, 200, 'src-1')",
8195 [],
8196 )
8197 .expect("insert fully superseded node");
8198 }
8199 let report = service.check_semantics().expect("semantics check");
8200 assert_eq!(report.orphaned_supersession_chains, 1);
8201 }
8202
8203 #[test]
8204 fn check_semantics_detects_mismatched_kind_property_fts_rows() {
8205 let (db, service) = setup();
8211 {
8212 let conn = sqlite::open_connection(db.path()).expect("conn");
8213 conn.execute(
8214 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8215 VALUES ('Goal', '[\"$.name\"]', ' ')",
8216 [],
8217 )
8218 .expect("register schema");
8219 conn.execute(
8220 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8221 VALUES ('r1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'src-1')",
8222 [],
8223 )
8224 .expect("insert node");
8225 let table = fathomdb_schema::fts_kind_table_name("Goal");
8227 conn.execute_batch(&format!(
8228 "CREATE VIRTUAL TABLE IF NOT EXISTS {table} \
8229 USING fts5(node_logical_id UNINDEXED, text_content, tokenize = 'porter unicode61 remove_diacritics 2')"
8230 ))
8231 .expect("create per-kind table");
8232 conn.execute(
8233 &format!("INSERT INTO {table} (node_logical_id, text_content) VALUES ('goal-1', 'Ship v2')"),
8234 [],
8235 )
8236 .expect("insert per-kind FTS row");
8237 }
8238 let report = service.check_semantics().expect("semantics check");
8239 assert_eq!(report.mismatched_kind_property_fts_rows, 0);
8241 }
8242
8243 #[test]
8244 fn check_semantics_detects_duplicate_property_fts_rows() {
8245 let (db, service) = setup();
8246 {
8247 let conn = sqlite::open_connection(db.path()).expect("conn");
8248 conn.execute(
8249 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8250 VALUES ('r1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'src-1')",
8251 [],
8252 )
8253 .expect("insert node");
8254 let table = fathomdb_schema::fts_kind_table_name("Goal");
8256 conn.execute_batch(&format!(
8257 "CREATE VIRTUAL TABLE IF NOT EXISTS {table} \
8258 USING fts5(node_logical_id UNINDEXED, text_content, tokenize = 'porter unicode61 remove_diacritics 2')"
8259 ))
8260 .expect("create per-kind table");
8261 conn.execute(
8262 &format!("INSERT INTO {table} (node_logical_id, text_content) VALUES ('goal-1', 'Ship v2')"),
8263 [],
8264 )
8265 .expect("insert first property FTS row");
8266 conn.execute(
8267 &format!("INSERT INTO {table} (node_logical_id, text_content) VALUES ('goal-1', 'Ship v2 duplicate')"),
8268 [],
8269 )
8270 .expect("insert duplicate property FTS row");
8271 }
8272 let report = service.check_semantics().expect("semantics check");
8273 assert_eq!(report.duplicate_property_fts_rows, 1);
8274 }
8275
8276 #[test]
8277 fn check_semantics_detects_drifted_property_fts_text() {
8278 let (db, service) = setup();
8279 {
8280 let conn = sqlite::open_connection(db.path()).expect("conn");
8281 conn.execute(
8282 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8283 VALUES ('Goal', '[\"$.name\"]', ' ')",
8284 [],
8285 )
8286 .expect("register schema");
8287 conn.execute(
8288 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8289 VALUES ('r1', 'goal-1', 'Goal', '{\"name\":\"Current name\"}', 100, 'src-1')",
8290 [],
8291 )
8292 .expect("insert node");
8293 let table = fathomdb_schema::fts_kind_table_name("Goal");
8295 conn.execute_batch(&format!(
8296 "CREATE VIRTUAL TABLE IF NOT EXISTS {table} \
8297 USING fts5(node_logical_id UNINDEXED, text_content, tokenize = 'porter unicode61 remove_diacritics 2')"
8298 ))
8299 .expect("create per-kind table");
8300 conn.execute(
8301 &format!("INSERT INTO {table} (node_logical_id, text_content) VALUES ('goal-1', 'Old stale name')"),
8302 [],
8303 )
8304 .expect("insert stale property FTS row");
8305 }
8306 let report = service.check_semantics().expect("semantics check");
8307 assert_eq!(report.drifted_property_fts_rows, 1);
8308 }
8309
8310 #[test]
8311 fn check_semantics_detects_property_fts_row_that_should_not_exist() {
8312 let (db, service) = setup();
8313 {
8314 let conn = sqlite::open_connection(db.path()).expect("conn");
8315 conn.execute(
8316 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8317 VALUES ('Goal', '[\"$.searchable\"]', ' ')",
8318 [],
8319 )
8320 .expect("register schema");
8321 conn.execute(
8323 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8324 VALUES ('r1', 'goal-1', 'Goal', '{\"other\":\"field\"}', 100, 'src-1')",
8325 [],
8326 )
8327 .expect("insert node");
8328 let table = fathomdb_schema::fts_kind_table_name("Goal");
8330 conn.execute_batch(&format!(
8331 "CREATE VIRTUAL TABLE IF NOT EXISTS {table} \
8332 USING fts5(node_logical_id UNINDEXED, text_content, tokenize = 'porter unicode61 remove_diacritics 2')"
8333 ))
8334 .expect("create per-kind table");
8335 conn.execute(
8336 &format!("INSERT INTO {table} (node_logical_id, text_content) VALUES ('goal-1', 'phantom text')"),
8337 [],
8338 )
8339 .expect("insert phantom property FTS row");
8340 }
8341 let report = service.check_semantics().expect("semantics check");
8342 assert_eq!(
8343 report.drifted_property_fts_rows, 1,
8344 "row that should not exist must be counted as drifted"
8345 );
8346 }
8347
8348 #[test]
8349 fn safe_export_writes_manifest_with_sha256() {
8350 let (_db, service) = setup();
8351 let export_dir = tempfile::TempDir::new().expect("temp dir");
8352 let export_path = export_dir.path().join("backup.db");
8353
8354 let manifest = service
8355 .safe_export(
8356 &export_path,
8357 SafeExportOptions {
8358 force_checkpoint: false,
8359 },
8360 )
8361 .expect("export");
8362
8363 assert!(export_path.exists(), "exported db should exist");
8364 let manifest_path = export_dir.path().join("backup.db.export-manifest.json");
8365 assert!(
8366 manifest_path.exists(),
8367 "manifest file should exist at {}",
8368 manifest_path.display()
8369 );
8370 assert_eq!(manifest.sha256.len(), 64, "sha256 should be 64 hex chars");
8371 assert!(
8372 manifest.exported_at > 0,
8373 "exported_at should be a unix timestamp"
8374 );
8375 assert_eq!(
8376 manifest.schema_version,
8377 SchemaManager::new().current_version().0,
8378 "schema_version should match the live schema version"
8379 );
8380 assert_eq!(manifest.protocol_version, 1, "protocol_version should be 1");
8381 assert!(manifest.page_count > 0, "page_count should be positive");
8382 }
8383
8384 #[test]
8385 fn safe_export_preserves_operational_validation_contracts() {
8386 let (_db, service) = setup();
8387 let validation_json = r#"{"format_version":1,"mode":"enforce","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#;
8388 service
8389 .register_operational_collection(&OperationalRegisterRequest {
8390 name: "connector_health".to_owned(),
8391 kind: OperationalCollectionKind::LatestState,
8392 schema_json: "{}".to_owned(),
8393 retention_json: "{}".to_owned(),
8394 filter_fields_json: "[]".to_owned(),
8395 validation_json: validation_json.to_owned(),
8396 secondary_indexes_json: "[]".to_owned(),
8397 format_version: 1,
8398 })
8399 .expect("register collection");
8400
8401 let export_dir = tempfile::TempDir::new().expect("temp dir");
8402 let export_path = export_dir.path().join("backup.db");
8403 service
8404 .safe_export(
8405 &export_path,
8406 SafeExportOptions {
8407 force_checkpoint: false,
8408 },
8409 )
8410 .expect("export");
8411
8412 let exported = sqlite::open_connection(&export_path).expect("exported conn");
8413 let exported_validation_json: String = exported
8414 .query_row(
8415 "SELECT validation_json FROM operational_collections WHERE name = 'connector_health'",
8416 [],
8417 |row| row.get(0),
8418 )
8419 .expect("validation_json");
8420 assert_eq!(exported_validation_json, validation_json);
8421 }
8422
8423 #[test]
8424 fn safe_export_force_checkpoint_false_skips_wal_pragma() {
8425 let (_db, service) = setup();
8426 let export_dir = tempfile::TempDir::new().expect("temp dir");
8427 let export_path = export_dir.path().join("no-wal.db");
8428
8429 let manifest = service
8431 .safe_export(
8432 &export_path,
8433 SafeExportOptions {
8434 force_checkpoint: false,
8435 },
8436 )
8437 .expect("export with no checkpoint");
8438
8439 assert!(
8440 manifest.page_count > 0,
8441 "page_count must be populated regardless of checkpoint mode"
8442 );
8443 assert_eq!(
8444 manifest.schema_version,
8445 SchemaManager::new().current_version().0
8446 );
8447 assert_eq!(manifest.protocol_version, 1);
8448 }
8449
8450 #[test]
8451 fn safe_export_force_checkpoint_false_still_captures_wal_backed_changes() {
8452 let (db, service) = setup();
8453 let conn = sqlite::open_connection(db.path()).expect("conn");
8454 let journal_mode: String = conn
8455 .query_row("PRAGMA journal_mode=WAL", [], |row| row.get(0))
8456 .expect("enable wal");
8457 assert_eq!(journal_mode.to_lowercase(), "wal");
8458 let auto_checkpoint_pages: i64 = conn
8459 .query_row("PRAGMA wal_autocheckpoint=0", [], |row| row.get(0))
8460 .expect("disable auto checkpoint");
8461 assert_eq!(auto_checkpoint_pages, 0);
8462 conn.execute(
8463 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8464 VALUES ('r-wal', 'lg-wal', 'Meeting', '{}', 100, 'src-wal')",
8465 [],
8466 )
8467 .expect("insert wal-backed node");
8468
8469 let export_dir = tempfile::TempDir::new().expect("temp dir");
8470 let export_path = export_dir.path().join("wal-backed.db");
8471 service
8472 .safe_export(
8473 &export_path,
8474 SafeExportOptions {
8475 force_checkpoint: false,
8476 },
8477 )
8478 .expect("export wal-backed db");
8479
8480 let exported = sqlite::open_connection(&export_path).expect("open exported db");
8481 let exported_count: i64 = exported
8482 .query_row(
8483 "SELECT count(*) FROM nodes WHERE logical_id = 'lg-wal'",
8484 [],
8485 |row| row.get(0),
8486 )
8487 .expect("count exported nodes");
8488 assert_eq!(
8489 exported_count, 1,
8490 "safe_export must include committed rows that are still resident in the WAL"
8491 );
8492 }
8493
8494 #[test]
8495 fn excise_source_removes_searchable_content_after_excision() {
8496 let (db, service) = setup();
8497 {
8498 let conn = sqlite::open_connection(db.path()).expect("conn");
8499 conn.execute(
8500 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
8501 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
8502 [],
8503 )
8504 .expect("insert v1");
8505 conn.execute(
8506 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8507 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
8508 [],
8509 )
8510 .expect("insert v2");
8511 conn.execute(
8512 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8513 VALUES ('ck1', 'lg1', 'hello world', 100)",
8514 [],
8515 )
8516 .expect("insert chunk");
8517 }
8518 service.excise_source("source-2").expect("excise");
8519 {
8520 let conn = sqlite::open_connection(db.path()).expect("conn");
8521 let fts_count: i64 = conn
8522 .query_row(
8523 "SELECT count(*) FROM fts_nodes WHERE chunk_id = 'ck1'",
8524 [],
8525 |row| row.get(0),
8526 )
8527 .expect("fts count");
8528 assert_eq!(
8529 fts_count, 0,
8530 "excised content should not remain searchable after excise"
8531 );
8532 }
8533 }
8534
8535 #[cfg(feature = "sqlite-vec")]
8536 #[test]
8537 fn excise_source_cleans_chunks_and_vec_rows_for_excised_version() {
8538 let (db, service) = setup();
8539 {
8540 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8541 service
8542 .schema_manager
8543 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
8544 .expect("ensure vec profile");
8545 conn.execute(
8546 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
8547 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
8548 [],
8549 )
8550 .expect("insert v1");
8551 conn.execute(
8552 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8553 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
8554 [],
8555 )
8556 .expect("insert v2");
8557 conn.execute(
8558 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8559 VALUES ('ck1', 'lg1', 'new content', 200)",
8560 [],
8561 )
8562 .expect("insert chunk");
8563 conn.execute(
8564 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('ck1', zeroblob(16))",
8565 [],
8566 )
8567 .expect("insert vec row");
8568 }
8569
8570 service.excise_source("source-2").expect("excise");
8571
8572 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8573 let active_row: String = conn
8574 .query_row(
8575 "SELECT row_id FROM nodes WHERE logical_id = 'lg1' AND superseded_at IS NULL",
8576 [],
8577 |row| row.get(0),
8578 )
8579 .expect("restored active row");
8580 assert_eq!(active_row, "r1");
8581 let chunk_count: i64 = conn
8582 .query_row(
8583 "SELECT count(*) FROM chunks WHERE node_logical_id = 'lg1'",
8584 [],
8585 |row| row.get(0),
8586 )
8587 .expect("chunk count");
8588 assert_eq!(
8589 chunk_count, 0,
8590 "excised source content must not survive as chunks"
8591 );
8592 let vec_count: i64 = conn
8593 .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
8594 row.get(0)
8595 })
8596 .expect("vec count");
8597 assert_eq!(vec_count, 0, "excised source vec rows must be removed");
8598 let fts_count: i64 = conn
8599 .query_row(
8600 "SELECT count(*) FROM fts_nodes WHERE node_logical_id = 'lg1'",
8601 [],
8602 |row| row.get(0),
8603 )
8604 .expect("fts count");
8605 assert_eq!(
8606 fts_count, 0,
8607 "excised source content must not remain searchable"
8608 );
8609 }
8610
8611 #[test]
8612 fn export_page_count_matches_exported_file() {
8613 let (_db, service) = setup();
8614 let export_dir = tempfile::TempDir::new().expect("temp dir");
8615 let export_path = export_dir.path().join("page-count.db");
8616
8617 let manifest = service
8618 .safe_export(
8619 &export_path,
8620 SafeExportOptions {
8621 force_checkpoint: false,
8622 },
8623 )
8624 .expect("export");
8625
8626 let exported = sqlite::open_connection(&export_path).expect("open exported db");
8627 let actual_page_count: u64 = exported
8628 .query_row("PRAGMA page_count", [], |row| row.get(0))
8629 .expect("page_count from exported file");
8630
8631 assert_eq!(
8632 manifest.page_count, actual_page_count,
8633 "manifest page_count must match the exported file's PRAGMA page_count"
8634 );
8635 }
8636
8637 #[test]
8638 fn no_temp_file_after_successful_export() {
8639 let (_db, service) = setup();
8640 let export_dir = tempfile::TempDir::new().expect("temp dir");
8641 let export_path = export_dir.path().join("no-tmp.db");
8642
8643 service
8644 .safe_export(
8645 &export_path,
8646 SafeExportOptions {
8647 force_checkpoint: false,
8648 },
8649 )
8650 .expect("export");
8651
8652 let tmp_files: Vec<_> = fs::read_dir(export_dir.path())
8653 .expect("read export dir")
8654 .filter_map(Result::ok)
8655 .filter(|e| e.path().extension().is_some_and(|ext| ext == "tmp"))
8656 .collect();
8657
8658 assert!(
8659 tmp_files.is_empty(),
8660 "no .tmp files should remain after a successful export, found: {tmp_files:?}"
8661 );
8662 }
8663
8664 #[test]
8665 fn export_manifest_is_valid_json() {
8666 let (_db, service) = setup();
8667 let export_dir = tempfile::TempDir::new().expect("temp dir");
8668 let export_path = export_dir.path().join("valid-json.db");
8669
8670 service
8671 .safe_export(
8672 &export_path,
8673 SafeExportOptions {
8674 force_checkpoint: false,
8675 },
8676 )
8677 .expect("export");
8678
8679 let manifest_path = export_dir.path().join("valid-json.db.export-manifest.json");
8680 let manifest_contents = fs::read_to_string(&manifest_path).expect("read manifest");
8681 let parsed: serde_json::Value =
8682 serde_json::from_str(&manifest_contents).expect("manifest must be valid JSON");
8683
8684 assert!(
8685 parsed.get("exported_at").is_some(),
8686 "manifest must contain exported_at"
8687 );
8688 assert!(
8689 parsed.get("sha256").is_some(),
8690 "manifest must contain sha256"
8691 );
8692 assert!(
8693 parsed.get("schema_version").is_some(),
8694 "manifest must contain schema_version"
8695 );
8696 assert!(
8697 parsed.get("protocol_version").is_some(),
8698 "manifest must contain protocol_version"
8699 );
8700 assert!(
8701 parsed.get("page_count").is_some(),
8702 "manifest must contain page_count"
8703 );
8704 }
8705
8706 #[test]
8707 fn provenance_purge_dry_run_reports_counts() {
8708 let (db, service) = setup();
8709 {
8710 let conn = sqlite::open_connection(db.path()).expect("conn");
8711 conn.execute(
8712 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8713 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
8714 [],
8715 )
8716 .expect("insert p1");
8717 conn.execute(
8718 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8719 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 200)",
8720 [],
8721 )
8722 .expect("insert p2");
8723 conn.execute(
8724 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8725 VALUES ('p3', 'excise', 'lg3', 'src-1', 300)",
8726 [],
8727 )
8728 .expect("insert p3");
8729 }
8730
8731 let options = super::ProvenancePurgeOptions {
8732 dry_run: true,
8733 preserve_event_types: Vec::new(),
8734 };
8735 let report = service
8736 .purge_provenance_events(250, &options)
8737 .expect("dry run purge");
8738
8739 assert_eq!(report.events_deleted, 2);
8740 assert_eq!(report.events_preserved, 1);
8741 assert!(report.oldest_remaining.is_some());
8742
8743 let conn = sqlite::open_connection(db.path()).expect("conn");
8744 let total: i64 = conn
8745 .query_row("SELECT count(*) FROM provenance_events", [], |row| {
8746 row.get(0)
8747 })
8748 .expect("count");
8749 assert_eq!(total, 3, "dry_run must not delete any events");
8750 }
8751
8752 #[test]
8753 fn provenance_purge_deletes_old_events() {
8754 let (db, service) = setup();
8755 {
8756 let conn = sqlite::open_connection(db.path()).expect("conn");
8757 conn.execute(
8758 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8759 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
8760 [],
8761 )
8762 .expect("insert p1");
8763 conn.execute(
8764 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8765 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 200)",
8766 [],
8767 )
8768 .expect("insert p2");
8769 }
8770
8771 let options = super::ProvenancePurgeOptions {
8772 dry_run: false,
8773 preserve_event_types: Vec::new(),
8774 };
8775 let report = service
8776 .purge_provenance_events(150, &options)
8777 .expect("purge");
8778
8779 assert_eq!(report.events_deleted, 1);
8780 assert_eq!(report.events_preserved, 1);
8781 assert_eq!(report.oldest_remaining, Some(200));
8782
8783 let conn = sqlite::open_connection(db.path()).expect("conn");
8784 let remaining: i64 = conn
8785 .query_row("SELECT count(*) FROM provenance_events", [], |row| {
8786 row.get(0)
8787 })
8788 .expect("count");
8789 assert_eq!(remaining, 1);
8790 }
8791
8792 #[test]
8793 fn provenance_purge_preserves_specified_types() {
8794 let (db, service) = setup();
8795 {
8796 let conn = sqlite::open_connection(db.path()).expect("conn");
8797 conn.execute(
8798 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8799 VALUES ('p1', 'excise', 'lg1', 'src-1', 100)",
8800 [],
8801 )
8802 .expect("insert p1");
8803 conn.execute(
8804 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8805 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 100)",
8806 [],
8807 )
8808 .expect("insert p2");
8809 conn.execute(
8810 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8811 VALUES ('p3', 'node_insert', 'lg3', 'src-1', 100)",
8812 [],
8813 )
8814 .expect("insert p3");
8815 }
8816
8817 let options = super::ProvenancePurgeOptions {
8818 dry_run: false,
8819 preserve_event_types: Vec::new(),
8820 };
8821 let report = service
8822 .purge_provenance_events(500, &options)
8823 .expect("purge");
8824
8825 assert_eq!(report.events_deleted, 2);
8826 assert_eq!(report.events_preserved, 1);
8827
8828 let conn = sqlite::open_connection(db.path()).expect("conn");
8829 let remaining_type: String = conn
8830 .query_row("SELECT event_type FROM provenance_events", [], |row| {
8831 row.get(0)
8832 })
8833 .expect("remaining event type");
8834 assert_eq!(remaining_type, "excise");
8835 }
8836
8837 #[test]
8838 fn provenance_purge_noop_with_zero_timestamp() {
8839 let (db, service) = setup();
8840 {
8841 let conn = sqlite::open_connection(db.path()).expect("conn");
8842 conn.execute(
8843 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8844 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
8845 [],
8846 )
8847 .expect("insert p1");
8848 }
8849
8850 let options = super::ProvenancePurgeOptions {
8851 dry_run: false,
8852 preserve_event_types: Vec::new(),
8853 };
8854 let report = service.purge_provenance_events(0, &options).expect("purge");
8855
8856 assert_eq!(report.events_deleted, 0);
8857 assert_eq!(report.events_preserved, 1);
8858 assert_eq!(report.oldest_remaining, Some(100));
8859 }
8860
8861 #[test]
8862 fn restore_skips_edge_when_counterpart_purged() {
8863 let (db, service) = setup();
8864 {
8865 let conn = sqlite::open_connection(db.path()).expect("conn");
8866 conn.execute(
8868 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8869 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
8870 [],
8871 )
8872 .expect("insert node A");
8873 conn.execute(
8874 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8875 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
8876 [],
8877 )
8878 .expect("insert node B");
8879 conn.execute(
8881 "INSERT INTO edges \
8882 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
8883 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
8884 [],
8885 )
8886 .expect("insert edge");
8887 conn.execute(
8889 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8890 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
8891 [],
8892 )
8893 .expect("insert retire event A");
8894 conn.execute(
8895 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8896 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
8897 [],
8898 )
8899 .expect("insert edge retire event");
8900 conn.execute(
8901 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
8902 [],
8903 )
8904 .expect("retire node A");
8905 conn.execute(
8906 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-2'",
8907 [],
8908 )
8909 .expect("retire node B");
8910 conn.execute(
8911 "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
8912 [],
8913 )
8914 .expect("retire edge");
8915 conn.execute("DELETE FROM nodes WHERE logical_id = 'doc-2'", [])
8918 .expect("purge node B rows");
8919 }
8920
8921 let report = service.restore_logical_id("doc-1").expect("restore A");
8923 assert!(!report.was_noop);
8924 assert_eq!(report.restored_node_rows, 1);
8925 assert_eq!(report.restored_edge_rows, 0, "edge should not be restored");
8926 assert_eq!(report.skipped_edges.len(), 1);
8927 assert_eq!(report.skipped_edges[0].edge_logical_id, "edge-1");
8928 assert_eq!(report.skipped_edges[0].missing_endpoint, "doc-2");
8929
8930 let conn = sqlite::open_connection(db.path()).expect("conn");
8932 let active_edge_count: i64 = conn
8933 .query_row(
8934 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
8935 [],
8936 |row| row.get(0),
8937 )
8938 .expect("active edge count");
8939 assert_eq!(active_edge_count, 0, "edge must remain retired");
8940 }
8941
8942 #[test]
8943 fn restore_restores_edges_to_active_nodes() {
8944 let (db, service) = setup();
8945 {
8946 let conn = sqlite::open_connection(db.path()).expect("conn");
8947 conn.execute(
8949 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8950 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
8951 [],
8952 )
8953 .expect("insert node A");
8954 conn.execute(
8955 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8956 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
8957 [],
8958 )
8959 .expect("insert node B");
8960 conn.execute(
8962 "INSERT INTO edges \
8963 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
8964 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
8965 [],
8966 )
8967 .expect("insert edge");
8968 conn.execute(
8970 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8971 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
8972 [],
8973 )
8974 .expect("insert retire event A");
8975 conn.execute(
8976 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8977 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
8978 [],
8979 )
8980 .expect("insert edge retire event");
8981 conn.execute(
8982 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
8983 [],
8984 )
8985 .expect("retire node A");
8986 conn.execute(
8987 "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
8988 [],
8989 )
8990 .expect("retire edge");
8991 }
8992
8993 let report = service.restore_logical_id("doc-1").expect("restore A");
8995 assert!(!report.was_noop);
8996 assert_eq!(report.restored_node_rows, 1);
8997 assert!(report.restored_edge_rows > 0, "edge should be restored");
8998 assert!(
8999 report.skipped_edges.is_empty(),
9000 "no edges should be skipped"
9001 );
9002
9003 let conn = sqlite::open_connection(db.path()).expect("conn");
9004 let active_edge_count: i64 = conn
9005 .query_row(
9006 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
9007 [],
9008 |row| row.get(0),
9009 )
9010 .expect("active edge count");
9011 assert_eq!(active_edge_count, 1, "edge must be active");
9012 }
9013
9014 #[test]
9015 fn restore_restores_edges_when_both_restored() {
9016 let (db, service) = setup();
9017 {
9018 let conn = sqlite::open_connection(db.path()).expect("conn");
9019 conn.execute(
9021 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9022 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
9023 [],
9024 )
9025 .expect("insert node A");
9026 conn.execute(
9027 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9028 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
9029 [],
9030 )
9031 .expect("insert node B");
9032 conn.execute(
9034 "INSERT INTO edges \
9035 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
9036 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
9037 [],
9038 )
9039 .expect("insert edge");
9040 conn.execute(
9042 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9043 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
9044 [],
9045 )
9046 .expect("insert retire event A");
9047 conn.execute(
9048 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9049 VALUES ('evt-retire-b', 'node_retire', 'doc-2', 'forget-1', 200, '')",
9050 [],
9051 )
9052 .expect("insert retire event B");
9053 conn.execute(
9054 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9055 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
9056 [],
9057 )
9058 .expect("insert edge retire event");
9059 conn.execute(
9060 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
9061 [],
9062 )
9063 .expect("retire node A");
9064 conn.execute(
9065 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-2'",
9066 [],
9067 )
9068 .expect("retire node B");
9069 conn.execute(
9070 "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
9071 [],
9072 )
9073 .expect("retire edge");
9074 }
9075
9076 let report_b = service.restore_logical_id("doc-2").expect("restore B");
9078 assert!(!report_b.was_noop);
9079
9080 let report_a = service.restore_logical_id("doc-1").expect("restore A");
9082 assert!(!report_a.was_noop);
9083 assert_eq!(report_a.restored_node_rows, 1);
9084 assert!(
9085 report_a.restored_edge_rows > 0,
9086 "edge should be restored when both endpoints active"
9087 );
9088 assert!(
9089 report_a.skipped_edges.is_empty(),
9090 "no edges should be skipped"
9091 );
9092
9093 let conn = sqlite::open_connection(db.path()).expect("conn");
9094 let active_edge_count: i64 = conn
9095 .query_row(
9096 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
9097 [],
9098 |row| row.get(0),
9099 )
9100 .expect("active edge count");
9101 assert_eq!(
9102 active_edge_count, 1,
9103 "edge must be active after both endpoints restored"
9104 );
9105 }
9106
9107 #[test]
9110 fn fts_property_schema_crud_round_trip() {
9111 let (_db, service) = setup();
9112
9113 let record = service
9115 .register_fts_property_schema(
9116 "Meeting",
9117 &["$.title".to_owned(), "$.summary".to_owned()],
9118 None,
9119 )
9120 .expect("register");
9121 assert_eq!(record.kind, "Meeting");
9122 assert_eq!(record.property_paths, vec!["$.title", "$.summary"]);
9123 assert_eq!(record.separator, " ");
9124 assert_eq!(record.format_version, 1);
9125
9126 let described = service
9128 .describe_fts_property_schema("Meeting")
9129 .expect("describe")
9130 .expect("should exist");
9131 assert_eq!(described, record);
9132
9133 let missing = service
9135 .describe_fts_property_schema("NoSuchKind")
9136 .expect("describe missing");
9137 assert!(missing.is_none());
9138
9139 let list = service.list_fts_property_schemas().expect("list");
9141 assert_eq!(list.len(), 1);
9142 assert_eq!(list[0].kind, "Meeting");
9143
9144 let updated = service
9146 .register_fts_property_schema(
9147 "Meeting",
9148 &["$.title".to_owned(), "$.notes".to_owned()],
9149 Some("\n"),
9150 )
9151 .expect("update");
9152 assert_eq!(updated.property_paths, vec!["$.title", "$.notes"]);
9153 assert_eq!(updated.separator, "\n");
9154
9155 service
9157 .remove_fts_property_schema("Meeting")
9158 .expect("remove");
9159 let after_remove = service
9160 .describe_fts_property_schema("Meeting")
9161 .expect("describe after remove");
9162 assert!(after_remove.is_none());
9163
9164 let err = service.remove_fts_property_schema("Meeting");
9166 assert!(err.is_err());
9167 }
9168
9169 #[test]
9170 fn describe_fts_property_schema_round_trips_recursive_entries() {
9171 let (_db, service) = setup();
9172
9173 let entries = vec![
9174 FtsPropertyPathSpec::scalar("$.title"),
9175 FtsPropertyPathSpec::recursive("$.payload"),
9176 ];
9177 let exclude = vec!["$.payload.private".to_owned()];
9178 let registered = service
9179 .register_fts_property_schema_with_entries(
9180 "KnowledgeItem",
9181 &entries,
9182 Some(" "),
9183 &exclude,
9184 crate::rebuild_actor::RebuildMode::Eager,
9185 )
9186 .expect("register recursive");
9187
9188 assert_eq!(registered.entries, entries);
9191 assert_eq!(registered.exclude_paths, exclude);
9192 assert_eq!(registered.property_paths, vec!["$.title", "$.payload"]);
9193
9194 let described = service
9195 .describe_fts_property_schema("KnowledgeItem")
9196 .expect("describe")
9197 .expect("should exist");
9198 assert_eq!(described.kind, "KnowledgeItem");
9199 assert_eq!(described.entries, entries);
9200 assert_eq!(described.exclude_paths, exclude);
9201 assert_eq!(described.property_paths, vec!["$.title", "$.payload"]);
9202 assert_eq!(described.separator, " ");
9203 assert_eq!(described.format_version, 1);
9204 }
9205
9206 #[test]
9207 fn list_fts_property_schemas_round_trips_recursive_entries() {
9208 let (_db, service) = setup();
9209
9210 let entries = vec![
9211 FtsPropertyPathSpec::scalar("$.title"),
9212 FtsPropertyPathSpec::recursive("$.payload"),
9213 ];
9214 let exclude = vec!["$.payload.secret".to_owned()];
9215 service
9216 .register_fts_property_schema_with_entries(
9217 "KnowledgeItem",
9218 &entries,
9219 Some(" "),
9220 &exclude,
9221 crate::rebuild_actor::RebuildMode::Eager,
9222 )
9223 .expect("register recursive");
9224
9225 let listed = service.list_fts_property_schemas().expect("list");
9226 assert_eq!(listed.len(), 1);
9227 let record = &listed[0];
9228 assert_eq!(record.kind, "KnowledgeItem");
9229 assert_eq!(record.entries, entries);
9230 assert_eq!(record.exclude_paths, exclude);
9231 assert_eq!(record.property_paths, vec!["$.title", "$.payload"]);
9232 }
9233
9234 #[test]
9235 fn describe_fts_property_schema_round_trips_scalar_only_entries() {
9236 let (_db, service) = setup();
9237
9238 service
9239 .register_fts_property_schema(
9240 "Meeting",
9241 &["$.title".to_owned(), "$.summary".to_owned()],
9242 None,
9243 )
9244 .expect("register scalar");
9245
9246 let described = service
9247 .describe_fts_property_schema("Meeting")
9248 .expect("describe")
9249 .expect("should exist");
9250 assert_eq!(described.property_paths, vec!["$.title", "$.summary"]);
9251 assert_eq!(described.entries.len(), 2);
9252 for entry in &described.entries {
9253 assert_eq!(
9254 entry.mode,
9255 FtsPropertyPathMode::Scalar,
9256 "scalar-only schema should deserialize every entry as Scalar"
9257 );
9258 }
9259 assert!(described.exclude_paths.is_empty());
9260 }
9261
9262 #[test]
9263 fn restore_reestablishes_property_fts_visibility() {
9264 let (db, service) = setup();
9265 let doc_table = fathomdb_schema::fts_kind_table_name("Document");
9266 {
9267 let conn = sqlite::open_connection(db.path()).expect("conn");
9268 conn.execute(
9270 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9271 VALUES ('Document', '[\"$.title\", \"$.body\"]', ' ')",
9272 [],
9273 )
9274 .expect("register schema");
9275 conn.execute_batch(&format!(
9277 "CREATE VIRTUAL TABLE IF NOT EXISTS {doc_table} USING fts5(\
9278 node_logical_id UNINDEXED, text_content, \
9279 tokenize = 'porter unicode61 remove_diacritics 2'\
9280 )"
9281 ))
9282 .expect("create per-kind table");
9283 conn.execute(
9285 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9286 VALUES ('row-1', 'doc-1', 'Document', '{\"title\":\"Budget\",\"body\":\"Q3 forecast\"}', 100, 'seed')",
9287 [],
9288 )
9289 .expect("insert node");
9290 conn.execute(
9292 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
9293 VALUES ('chunk-1', 'doc-1', 'budget text', 100)",
9294 [],
9295 )
9296 .expect("insert chunk");
9297 conn.execute(
9299 &format!(
9300 "INSERT INTO {doc_table} (node_logical_id, text_content) \
9301 VALUES ('doc-1', 'Budget Q3 forecast')"
9302 ),
9303 [],
9304 )
9305 .expect("insert property fts");
9306 conn.execute(
9308 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9309 VALUES ('evt-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
9310 [],
9311 )
9312 .expect("retire event");
9313 conn.execute(
9314 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
9315 [],
9316 )
9317 .expect("supersede");
9318 conn.execute("DELETE FROM fts_nodes", [])
9319 .expect("clear chunk fts");
9320 conn.execute(&format!("DELETE FROM {doc_table}"), [])
9321 .expect("clear property fts");
9322 }
9323
9324 let report = service.restore_logical_id("doc-1").expect("restore");
9325 assert_eq!(report.restored_property_fts_rows, 1);
9326
9327 let conn = sqlite::open_connection(db.path()).expect("conn");
9329 let prop_fts_count: i64 = conn
9330 .query_row(
9331 &format!("SELECT count(*) FROM {doc_table} WHERE node_logical_id = 'doc-1'"),
9332 [],
9333 |row| row.get(0),
9334 )
9335 .expect("prop fts count");
9336 assert_eq!(prop_fts_count, 1, "property FTS must be restored");
9337
9338 let text: String = conn
9339 .query_row(
9340 &format!("SELECT text_content FROM {doc_table} WHERE node_logical_id = 'doc-1'"),
9341 [],
9342 |row| row.get(0),
9343 )
9344 .expect("prop fts text");
9345 assert_eq!(text, "Budget Q3 forecast");
9346 }
9347
9348 #[test]
9349 fn safe_export_preserves_fts_property_schemas() {
9350 let (_db, service) = setup();
9351 service
9352 .register_fts_property_schema(
9353 "Goal",
9354 &["$.name".to_owned(), "$.rationale".to_owned()],
9355 None,
9356 )
9357 .expect("register schema");
9358
9359 let export_dir = tempfile::TempDir::new().expect("temp dir");
9360 let export_path = export_dir.path().join("backup.db");
9361 service
9362 .safe_export(
9363 &export_path,
9364 SafeExportOptions {
9365 force_checkpoint: false,
9366 },
9367 )
9368 .expect("export");
9369
9370 let exported_conn = rusqlite::Connection::open(&export_path).expect("open exported db");
9372 let kind: String = exported_conn
9373 .query_row(
9374 "SELECT kind FROM fts_property_schemas WHERE kind = 'Goal'",
9375 [],
9376 |row| row.get(0),
9377 )
9378 .expect("schema must exist in export");
9379 assert_eq!(kind, "Goal");
9380 let paths_json: String = exported_conn
9381 .query_row(
9382 "SELECT property_paths_json FROM fts_property_schemas WHERE kind = 'Goal'",
9383 [],
9384 |row| row.get(0),
9385 )
9386 .expect("paths must exist");
9387 let paths: Vec<String> = serde_json::from_str(&paths_json).expect("valid json");
9388 assert_eq!(paths, vec!["$.name", "$.rationale"]);
9389 }
9390
9391 #[test]
9392 #[allow(clippy::too_many_lines)]
9393 fn export_recovery_rebuilds_property_fts_from_canonical_state() {
9394 let (db, service) = setup();
9395 let goal_table = fathomdb_schema::fts_kind_table_name("Goal");
9396 service
9398 .register_fts_property_schema("Goal", &["$.name".to_owned()], None)
9399 .expect("register");
9400 {
9401 let conn = sqlite::open_connection(db.path()).expect("conn");
9402 conn.execute(
9403 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9404 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9405 [],
9406 )
9407 .expect("insert node 1");
9408 conn.execute(
9409 &format!(
9410 "INSERT INTO {goal_table} (node_logical_id, text_content) \
9411 VALUES ('goal-1', 'Ship v2')"
9412 ),
9413 [],
9414 )
9415 .expect("insert property FTS row 1");
9416 conn.execute(
9417 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9418 VALUES ('row-2', 'goal-2', 'Goal', '{\"name\":\"Launch redesign\"}', 100, 'seed')",
9419 [],
9420 )
9421 .expect("insert node 2");
9422 conn.execute(
9423 &format!(
9424 "INSERT INTO {goal_table} (node_logical_id, text_content) \
9425 VALUES ('goal-2', 'Launch redesign')"
9426 ),
9427 [],
9428 )
9429 .expect("insert property FTS row 2");
9430 }
9431
9432 let export_dir = tempfile::TempDir::new().expect("temp dir");
9434 let export_path = export_dir.path().join("backup.db");
9435 service
9436 .safe_export(
9437 &export_path,
9438 SafeExportOptions {
9439 force_checkpoint: false,
9440 },
9441 )
9442 .expect("export");
9443
9444 {
9448 let conn = rusqlite::Connection::open(&export_path).expect("open export");
9449 SchemaManager::new()
9451 .bootstrap(&conn)
9452 .expect("bootstrap export");
9453 conn.execute(
9454 &format!("DELETE FROM {goal_table} WHERE node_logical_id = 'goal-1'"),
9455 [],
9456 )
9457 .expect("delete old row");
9458 conn.execute(
9459 &format!(
9460 "INSERT INTO {goal_table} (node_logical_id, text_content) \
9461 VALUES ('goal-1', 'completely wrong stale text')"
9462 ),
9463 [],
9464 )
9465 .expect("insert corrupted row");
9466 conn.execute(
9467 &format!("DELETE FROM {goal_table} WHERE node_logical_id = 'goal-2'"),
9468 [],
9469 )
9470 .expect("delete goal-2 row");
9471 }
9472
9473 let schema = Arc::new(SchemaManager::new());
9475 let exported_service = AdminService::new(&export_path, Arc::clone(&schema));
9476 exported_service
9477 .rebuild_projections(ProjectionTarget::Fts)
9478 .expect("rebuild");
9479
9480 let conn = rusqlite::Connection::open(&export_path).expect("open export for verify");
9482 let goal1_text: String = conn
9483 .query_row(
9484 &format!("SELECT text_content FROM {goal_table} WHERE node_logical_id = 'goal-1'"),
9485 [],
9486 |r| r.get(0),
9487 )
9488 .expect("goal-1 text after rebuild");
9489 assert_eq!(
9490 goal1_text, "Ship v2",
9491 "goal-1 text must be corrected by rebuild"
9492 );
9493
9494 let goal2_count: i64 = conn
9495 .query_row(
9496 &format!("SELECT count(*) FROM {goal_table} WHERE node_logical_id = 'goal-2'"),
9497 [],
9498 |r| r.get(0),
9499 )
9500 .expect("goal-2 count");
9501 assert_eq!(goal2_count, 1, "goal-2 row must be restored by rebuild");
9502
9503 let stale_count: i64 = conn
9504 .query_row(
9505 &format!("SELECT count(*) FROM {goal_table} WHERE text_content = 'completely wrong stale text'"),
9506 [],
9507 |r| r.get(0),
9508 )
9509 .expect("stale count");
9510 assert_eq!(stale_count, 0, "corrupted text must be gone after rebuild");
9511
9512 let integrity = exported_service.check_integrity().expect("integrity");
9514 assert_eq!(integrity.missing_property_fts_rows, 0);
9515 let semantics = exported_service.check_semantics().expect("semantics");
9516 assert_eq!(semantics.drifted_property_fts_rows, 0);
9517 assert_eq!(semantics.orphaned_property_fts_rows, 0);
9518 assert_eq!(semantics.duplicate_property_fts_rows, 0);
9519 }
9520
9521 #[test]
9522 fn check_integrity_no_false_positives_for_empty_extraction() {
9523 let (db, service) = setup();
9524 {
9525 let conn = sqlite::open_connection(db.path()).expect("conn");
9526 conn.execute(
9528 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9529 VALUES ('Ticket', '[\"$.searchable\"]', ' ')",
9530 [],
9531 )
9532 .expect("register schema");
9533 conn.execute(
9536 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9537 VALUES ('row-1', 'ticket-1', 'Ticket', '{\"status\":\"open\"}', 100, 'seed')",
9538 [],
9539 )
9540 .expect("insert node");
9541 }
9542
9543 let report = service.check_integrity().expect("integrity");
9544 assert_eq!(
9545 report.missing_property_fts_rows, 0,
9546 "node with no extractable values must not be counted as missing"
9547 );
9548 }
9549
9550 #[test]
9551 fn check_integrity_detects_genuinely_missing_property_fts_rows() {
9552 let (db, service) = setup();
9553 {
9554 let conn = sqlite::open_connection(db.path()).expect("conn");
9555 conn.execute(
9556 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9557 VALUES ('Ticket', '[\"$.title\"]', ' ')",
9558 [],
9559 )
9560 .expect("register schema");
9561 conn.execute(
9563 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9564 VALUES ('row-1', 'ticket-1', 'Ticket', '{\"title\":\"fix login bug\"}', 100, 'seed')",
9565 [],
9566 )
9567 .expect("insert node");
9568 }
9569
9570 let report = service.check_integrity().expect("integrity");
9571 assert_eq!(
9572 report.missing_property_fts_rows, 1,
9573 "node with extractable values but no property FTS row must be detected"
9574 );
9575 }
9576
9577 #[test]
9578 fn rebuild_projections_fts_restores_missing_property_fts_rows() {
9579 let (db, service) = setup();
9580 let goal_table = fathomdb_schema::fts_kind_table_name("Goal");
9581 {
9582 let conn = sqlite::open_connection(db.path()).expect("conn");
9583 conn.execute(
9584 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9585 VALUES ('Goal', '[\"$.name\"]', ' ')",
9586 [],
9587 )
9588 .expect("register schema");
9589 conn.execute(
9590 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9591 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9592 [],
9593 )
9594 .expect("insert node");
9595 }
9597
9598 let report = service
9599 .rebuild_projections(ProjectionTarget::Fts)
9600 .expect("rebuild");
9601 assert!(
9602 report.rebuilt_rows >= 1,
9603 "rebuild must insert at least one property FTS row"
9604 );
9605
9606 let conn = sqlite::open_connection(db.path()).expect("conn");
9607 let text: String = conn
9608 .query_row(
9609 &format!("SELECT text_content FROM {goal_table} WHERE node_logical_id = 'goal-1'"),
9610 [],
9611 |row| row.get(0),
9612 )
9613 .expect("property FTS row must exist after rebuild");
9614 assert_eq!(text, "Ship v2");
9615 }
9616
9617 #[test]
9618 fn rebuild_missing_projections_fills_gap_for_deleted_property_fts_row() {
9619 let (db, service) = setup();
9620 let goal_table = fathomdb_schema::fts_kind_table_name("Goal");
9621 {
9622 let conn = sqlite::open_connection(db.path()).expect("conn");
9623 conn.execute(
9624 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9625 VALUES ('Goal', '[\"$.name\"]', ' ')",
9626 [],
9627 )
9628 .expect("register schema");
9629 conn.execute(
9630 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9631 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9632 [],
9633 )
9634 .expect("insert node");
9635 conn.execute_batch(&format!(
9637 "CREATE VIRTUAL TABLE IF NOT EXISTS {goal_table} USING fts5(\
9638 node_logical_id UNINDEXED, text_content, \
9639 tokenize = 'porter unicode61 remove_diacritics 2'\
9640 )"
9641 ))
9642 .expect("create per-kind table");
9643 conn.execute(
9644 &format!(
9645 "INSERT INTO {goal_table} (node_logical_id, text_content) \
9646 VALUES ('goal-1', 'Ship v2')"
9647 ),
9648 [],
9649 )
9650 .expect("insert property fts");
9651 conn.execute(
9652 &format!("DELETE FROM {goal_table} WHERE node_logical_id = 'goal-1'"),
9653 [],
9654 )
9655 .expect("delete property fts");
9656 }
9657
9658 let report = service
9659 .rebuild_missing_projections()
9660 .expect("rebuild missing");
9661 assert!(
9662 report.rebuilt_rows >= 1,
9663 "missing rebuild must insert the gap-fill row"
9664 );
9665
9666 let conn = sqlite::open_connection(db.path()).expect("conn");
9667 let count: i64 = conn
9668 .query_row(
9669 &format!("SELECT count(*) FROM {goal_table} WHERE node_logical_id = 'goal-1'"),
9670 [],
9671 |row| row.get(0),
9672 )
9673 .expect("count");
9674 assert_eq!(
9675 count, 1,
9676 "gap-fill must restore exactly one property FTS row"
9677 );
9678 }
9679
9680 #[test]
9681 fn remove_schema_then_rebuild_cleans_stale_property_fts_rows() {
9682 let (db, service) = setup();
9688 let goal_table = fathomdb_schema::fts_kind_table_name("Goal");
9689 {
9690 let conn = sqlite::open_connection(db.path()).expect("conn");
9691 conn.execute(
9692 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9693 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9694 [],
9695 )
9696 .expect("insert node");
9697 conn.execute_batch(&format!(
9700 "CREATE VIRTUAL TABLE IF NOT EXISTS {goal_table} \
9701 USING fts5(node_logical_id UNINDEXED, text_content, tokenize = 'porter unicode61 remove_diacritics 2')"
9702 ))
9703 .expect("create per-kind table");
9704 conn.execute(
9705 &format!(
9706 "INSERT INTO {goal_table} (node_logical_id, text_content) \
9707 VALUES ('goal-1', 'Ship v2')"
9708 ),
9709 [],
9710 )
9711 .expect("insert property fts");
9712 }
9713
9714 let semantics = service.check_semantics().expect("semantics");
9716 assert_eq!(
9717 semantics.orphaned_property_fts_rows, 1,
9718 "orphaned property FTS rows must be detected with no registered schema"
9719 );
9720
9721 service
9723 .rebuild_projections(ProjectionTarget::Fts)
9724 .expect("rebuild");
9725
9726 let conn = sqlite::open_connection(db.path()).expect("conn");
9727 let count: i64 = conn
9728 .query_row(
9729 &format!("SELECT count(*) FROM {goal_table} WHERE node_logical_id = 'goal-1'"),
9730 [],
9731 |row| row.get(0),
9732 )
9733 .expect("count");
9734 assert_eq!(
9735 count, 0,
9736 "rebuild must delete rows from per-kind tables with no registered schema"
9737 );
9738 }
9739
9740 mod validate_fts_property_paths_tests {
9741 use super::super::validate_fts_property_paths;
9742
9743 #[test]
9744 fn valid_simple_path() {
9745 assert!(validate_fts_property_paths(&["$.name".to_owned()]).is_ok());
9746 }
9747
9748 #[test]
9749 fn valid_nested_path() {
9750 assert!(validate_fts_property_paths(&["$.address.city".to_owned()]).is_ok());
9751 }
9752
9753 #[test]
9754 fn valid_underscore_segment() {
9755 assert!(validate_fts_property_paths(&["$.a_b".to_owned()]).is_ok());
9756 }
9757
9758 #[test]
9759 fn rejects_bare_prefix() {
9760 let result = validate_fts_property_paths(&["$.".to_owned()]);
9761 assert!(result.is_err(), "path '$.' must be rejected");
9762 }
9763
9764 #[test]
9765 fn rejects_double_dot() {
9766 let result = validate_fts_property_paths(&["$..x".to_owned()]);
9767 assert!(result.is_err(), "path '$..x' must be rejected");
9768 }
9769
9770 #[test]
9771 fn rejects_trailing_dot() {
9772 let result = validate_fts_property_paths(&["$.foo.".to_owned()]);
9773 assert!(result.is_err(), "path '$.foo.' must be rejected");
9774 }
9775
9776 #[test]
9777 fn rejects_space_in_segment() {
9778 let result = validate_fts_property_paths(&["$.foo bar".to_owned()]);
9779 assert!(result.is_err(), "path '$.foo bar' must be rejected");
9780 }
9781
9782 #[test]
9783 fn rejects_bracket_syntax() {
9784 let result = validate_fts_property_paths(&["$.foo[0]".to_owned()]);
9785 assert!(result.is_err(), "path '$.foo[0]' must be rejected");
9786 }
9787
9788 #[test]
9789 fn rejects_duplicates() {
9790 let result = validate_fts_property_paths(&["$.name".to_owned(), "$.name".to_owned()]);
9791 assert!(result.is_err(), "duplicate paths must be rejected");
9792 }
9793
9794 #[test]
9795 fn rejects_empty_list() {
9796 let result = validate_fts_property_paths(&[]);
9797 assert!(result.is_err(), "empty path list must be rejected");
9798 }
9799 }
9800
9801 #[test]
9804 fn register_fts_schema_writes_to_per_kind_table() {
9805 let (db, service) = setup();
9808 {
9809 let conn = sqlite::open_connection(db.path()).expect("conn");
9810 conn.execute(
9812 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9813 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9814 [],
9815 )
9816 .expect("insert node");
9817 }
9818
9819 service
9821 .register_fts_property_schema("Goal", &["$.name".to_owned()], None)
9822 .expect("register schema");
9823
9824 let conn = sqlite::open_connection(db.path()).expect("conn");
9825 let table = fathomdb_schema::fts_kind_table_name("Goal");
9826 let per_kind_count: i64 = conn
9828 .query_row(
9829 &format!("SELECT count(*) FROM {table} WHERE node_logical_id = 'goal-1'"),
9830 [],
9831 |row| row.get(0),
9832 )
9833 .expect("per-kind count");
9834 assert_eq!(
9835 per_kind_count, 1,
9836 "per-kind table must have the row after registration"
9837 );
9838 }
9839
9840 #[test]
9841 fn remove_fts_schema_deletes_from_per_kind_table() {
9842 let (db, service) = setup();
9844 {
9845 let conn = sqlite::open_connection(db.path()).expect("conn");
9846 conn.execute(
9847 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9848 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9849 [],
9850 )
9851 .expect("insert node");
9852 }
9853
9854 service
9855 .register_fts_property_schema("Goal", &["$.name".to_owned()], None)
9856 .expect("register schema");
9857 service
9858 .remove_fts_property_schema("Goal")
9859 .expect("remove schema");
9860
9861 let conn = sqlite::open_connection(db.path()).expect("conn");
9862 let table = fathomdb_schema::fts_kind_table_name("Goal");
9863 let per_kind_count: i64 = conn
9864 .query_row(
9865 &format!("SELECT count(*) FROM {table} WHERE node_logical_id = 'goal-1'"),
9866 [],
9867 |row| row.get(0),
9868 )
9869 .expect("per-kind count");
9870 assert_eq!(
9871 per_kind_count, 0,
9872 "per-kind table must be empty after schema removal"
9873 );
9874 }
9875
9876 #[test]
9879 fn fts_path_spec_with_weight_builder() {
9880 let spec = FtsPropertyPathSpec::scalar("$.title").with_weight(5.0);
9881 assert_eq!(spec.weight, Some(5.0));
9882 assert_eq!(spec.path, "$.title");
9883 assert_eq!(spec.mode, FtsPropertyPathMode::Scalar);
9884 }
9885
9886 #[test]
9887 fn fts_path_spec_serialize_with_weight() {
9888 use super::serialize_property_paths_json;
9889 let entries = vec![
9890 FtsPropertyPathSpec::scalar("$.title").with_weight(2.0),
9891 FtsPropertyPathSpec::scalar("$.body"),
9892 ];
9893 let json = serialize_property_paths_json(&entries, &[]).expect("serialize");
9894 let v: serde_json::Value = serde_json::from_str(&json).expect("parse");
9896 let paths = v
9897 .get("paths")
9898 .expect("paths key")
9899 .as_array()
9900 .expect("array");
9901 assert_eq!(paths.len(), 2);
9902 assert_eq!(
9904 paths[0].get("path").and_then(serde_json::Value::as_str),
9905 Some("$.title")
9906 );
9907 assert_eq!(
9908 paths[0].get("weight").and_then(serde_json::Value::as_f64),
9909 Some(2.0)
9910 );
9911 assert!(
9913 paths[1].get("weight").is_none(),
9914 "unweighted spec must omit weight field"
9915 );
9916 }
9917
9918 #[test]
9919 fn fts_path_spec_serialize_no_weights() {
9920 use super::serialize_property_paths_json;
9921 let entries = vec![
9922 FtsPropertyPathSpec::scalar("$.title"),
9923 FtsPropertyPathSpec::scalar("$.payload"),
9924 ];
9925 let json = serialize_property_paths_json(&entries, &[]).expect("serialize");
9926 let v: serde_json::Value = serde_json::from_str(&json).expect("parse");
9928 assert!(
9929 v.is_array(),
9930 "all-scalar no-weight schema must serialize as bare string array"
9931 );
9932 let arr = v.as_array().expect("array");
9933 assert_eq!(arr.len(), 2);
9934 assert_eq!(arr[0].as_str(), Some("$.title"));
9935 assert_eq!(arr[1].as_str(), Some("$.payload"));
9936 }
9937
9938 #[test]
9939 fn fts_weight_validation_out_of_range() {
9940 let (_db, service) = setup();
9941 let entries_zero = vec![FtsPropertyPathSpec::scalar("$.title").with_weight(0.0)];
9943 let result = service.register_fts_property_schema_with_entries(
9944 "Article",
9945 &entries_zero,
9946 None,
9947 &[],
9948 crate::rebuild_actor::RebuildMode::Eager,
9949 );
9950 assert!(result.is_err(), "weight 0.0 must be rejected");
9951 let err_msg = result.expect_err("weight 0.0 must be rejected").to_string();
9952 assert!(
9953 err_msg.contains("weight"),
9954 "error must mention weight: {err_msg}"
9955 );
9956
9957 let entries_big = vec![FtsPropertyPathSpec::scalar("$.title").with_weight(1001.0)];
9959 let result = service.register_fts_property_schema_with_entries(
9960 "Article",
9961 &entries_big,
9962 None,
9963 &[],
9964 crate::rebuild_actor::RebuildMode::Eager,
9965 );
9966 assert!(result.is_err(), "weight 1001.0 must be rejected");
9967 }
9968
9969 #[test]
9970 fn fts_weight_validation_valid() {
9971 let (_db, service) = setup();
9972 let entries = vec![FtsPropertyPathSpec::scalar("$.title").with_weight(10.0)];
9973 let result = service.register_fts_property_schema_with_entries(
9974 "Article",
9975 &entries,
9976 None,
9977 &[],
9978 crate::rebuild_actor::RebuildMode::Eager,
9979 );
9980 assert!(
9981 result.is_ok(),
9982 "weight 10.0 must be accepted: {:?}",
9983 result.err()
9984 );
9985 }
9986
9987 #[test]
9990 fn create_or_replace_creates_multi_column_table() {
9991 use super::create_or_replace_fts_kind_table;
9992 let (db, _service) = setup();
9993 let conn = sqlite::open_connection(db.path()).expect("conn");
9994 let specs = vec![
9995 FtsPropertyPathSpec::scalar("$.title"),
9996 FtsPropertyPathSpec::recursive("$.payload"),
9997 ];
9998 create_or_replace_fts_kind_table(
9999 &conn,
10000 "Article",
10001 &specs,
10002 fathomdb_schema::DEFAULT_FTS_TOKENIZER,
10003 )
10004 .expect("create table");
10005
10006 let table = fathomdb_schema::fts_kind_table_name("Article");
10008 let count: i64 = conn
10010 .query_row(&format!("SELECT count(*) FROM {table}"), [], |r| r.get(0))
10011 .expect("count");
10012 assert_eq!(count, 0, "new table must be empty");
10013
10014 let title_col = fathomdb_schema::fts_column_name("$.title", false);
10016 let payload_col = fathomdb_schema::fts_column_name("$.payload", true);
10017 conn.execute(
10018 &format!(
10019 "INSERT INTO {table} (node_logical_id, {title_col}, {payload_col}) VALUES ('id1', 'hello', 'world')"
10020 ),
10021 [],
10022 )
10023 .expect("insert with per-spec columns must succeed");
10024 }
10025
10026 #[test]
10027 fn create_or_replace_drops_and_recreates() {
10028 use super::create_or_replace_fts_kind_table;
10029 let (db, _service) = setup();
10030 let conn = sqlite::open_connection(db.path()).expect("conn");
10031
10032 let specs_v1 = vec![FtsPropertyPathSpec::scalar("$.title")];
10034 create_or_replace_fts_kind_table(
10035 &conn,
10036 "Post",
10037 &specs_v1,
10038 fathomdb_schema::DEFAULT_FTS_TOKENIZER,
10039 )
10040 .expect("create v1");
10041
10042 let specs_v2 = vec![
10044 FtsPropertyPathSpec::scalar("$.title"),
10045 FtsPropertyPathSpec::scalar("$.summary"),
10046 ];
10047 create_or_replace_fts_kind_table(
10048 &conn,
10049 "Post",
10050 &specs_v2,
10051 fathomdb_schema::DEFAULT_FTS_TOKENIZER,
10052 )
10053 .expect("create v2");
10054
10055 let table = fathomdb_schema::fts_kind_table_name("Post");
10057 let summary_col = fathomdb_schema::fts_column_name("$.summary", false);
10058 conn.execute(
10059 &format!("INSERT INTO {table} (node_logical_id, {summary_col}) VALUES ('id1', 'text')"),
10060 [],
10061 )
10062 .expect("second layout must allow summary column");
10063 }
10064
10065 #[test]
10066 fn create_or_replace_invalid_tokenizer() {
10067 use super::create_or_replace_fts_kind_table;
10068 let (db, _service) = setup();
10069 let conn = sqlite::open_connection(db.path()).expect("conn");
10070 let specs = vec![FtsPropertyPathSpec::scalar("$.title")];
10071 let result = create_or_replace_fts_kind_table(&conn, "Post", &specs, "'; DROP TABLE --");
10072 assert!(result.is_err(), "invalid tokenizer must be rejected");
10073 let err_msg = result
10074 .expect_err("invalid tokenizer must be rejected")
10075 .to_string();
10076 assert!(
10077 err_msg.contains("tokenizer"),
10078 "error must mention tokenizer: {err_msg}"
10079 );
10080 }
10081
10082 #[test]
10083 fn register_with_weights_creates_per_column_table() {
10084 let (db, service) = setup();
10085 let entries = vec![
10086 FtsPropertyPathSpec::scalar("$.title").with_weight(2.0),
10087 FtsPropertyPathSpec::scalar("$.body"),
10088 ];
10089 service
10090 .register_fts_property_schema_with_entries(
10091 "Article",
10092 &entries,
10093 None,
10094 &[],
10095 crate::rebuild_actor::RebuildMode::Eager,
10096 )
10097 .expect("register");
10098
10099 let conn = sqlite::open_connection(db.path()).expect("conn");
10101 let table = fathomdb_schema::fts_kind_table_name("Article");
10102 let title_col = fathomdb_schema::fts_column_name("$.title", false);
10103 let body_col = fathomdb_schema::fts_column_name("$.body", false);
10104 conn.execute(
10106 &format!(
10107 "INSERT INTO {table} (node_logical_id, {title_col}, {body_col}) VALUES ('art-1', 'hello', 'world')"
10108 ),
10109 [],
10110 )
10111 .expect("per-spec columns must exist after registration with weights");
10112 }
10113
10114 #[test]
10115 fn weighted_to_unweighted_downgrade_recreates_table() {
10116 let (db, service) = setup();
10117
10118 let weighted_entries = vec![
10120 FtsPropertyPathSpec::scalar("$.title").with_weight(2.0),
10121 FtsPropertyPathSpec::scalar("$.body"),
10122 ];
10123 service
10124 .register_fts_property_schema_with_entries(
10125 "Article",
10126 &weighted_entries,
10127 None,
10128 &[],
10129 crate::rebuild_actor::RebuildMode::Eager,
10130 )
10131 .expect("register weighted");
10132
10133 let unweighted_entries = vec![
10135 FtsPropertyPathSpec::scalar("$.title"),
10136 FtsPropertyPathSpec::scalar("$.body"),
10137 ];
10138 service
10139 .register_fts_property_schema_with_entries(
10140 "Article",
10141 &unweighted_entries,
10142 None,
10143 &[],
10144 crate::rebuild_actor::RebuildMode::Eager,
10145 )
10146 .expect("re-register unweighted");
10147
10148 let conn = sqlite::open_connection(db.path()).expect("conn");
10151 let table = fathomdb_schema::fts_kind_table_name("Article");
10152 let result = conn.execute(
10153 &format!("INSERT INTO {table} (node_logical_id, text_content) VALUES ('art-1', 'hello world')"),
10154 [],
10155 );
10156 assert!(
10157 result.is_ok(),
10158 "text_content column must exist after weighted-to-unweighted downgrade"
10159 );
10160 }
10161
10162 #[test]
10165 fn set_get_fts_profile_roundtrip() {
10166 let (_db, service) = setup();
10167 let profile = service
10168 .set_fts_profile("book", "unicode61")
10169 .expect("set_fts_profile");
10170 assert_eq!(profile.kind, "book");
10171 assert_eq!(profile.tokenizer, "unicode61");
10172
10173 let got = service
10174 .get_fts_profile("book")
10175 .expect("get_fts_profile")
10176 .expect("should be Some");
10177 assert_eq!(got.kind, "book");
10178 assert_eq!(got.tokenizer, "unicode61");
10179 }
10180
10181 #[test]
10182 fn fts_profile_upsert() {
10183 let (_db, service) = setup();
10184 service
10185 .set_fts_profile("article", "unicode61")
10186 .expect("first set");
10187 service
10188 .set_fts_profile("article", "porter unicode61 remove_diacritics 2")
10189 .expect("second set");
10190 let got = service
10191 .get_fts_profile("article")
10192 .expect("get")
10193 .expect("Some");
10194 assert_eq!(got.tokenizer, "porter unicode61 remove_diacritics 2");
10195 }
10196
10197 #[test]
10198 fn invalid_tokenizer_rejected() {
10199 let (_db, service) = setup();
10200 let result = service.set_fts_profile("book", "'; DROP TABLE nodes --");
10201 assert!(result.is_err(), "invalid tokenizer must be rejected");
10202 let msg = result.expect_err("must be Err").to_string();
10203 assert!(
10204 msg.contains("tokenizer") || msg.contains("invalid"),
10205 "error must mention tokenizer or invalid: {msg}"
10206 );
10207 }
10208
10209 #[test]
10210 fn preset_recall_optimized_english() {
10211 assert_eq!(
10212 super::resolve_tokenizer_preset("recall-optimized-english"),
10213 "porter unicode61 remove_diacritics 2"
10214 );
10215 }
10216
10217 #[test]
10218 fn preset_precision_optimized() {
10219 assert_eq!(
10220 super::resolve_tokenizer_preset("precision-optimized"),
10221 "unicode61 remove_diacritics 2"
10222 );
10223 }
10224
10225 #[test]
10226 fn preset_global_cjk() {
10227 assert_eq!(super::resolve_tokenizer_preset("global-cjk"), "icu");
10228 }
10229
10230 #[test]
10231 fn preset_substring_trigram() {
10232 assert_eq!(
10233 super::resolve_tokenizer_preset("substring-trigram"),
10234 "trigram"
10235 );
10236 }
10237
10238 #[test]
10239 fn preset_source_code() {
10240 assert_eq!(
10241 super::resolve_tokenizer_preset("source-code"),
10242 "unicode61 tokenchars '._-$@'"
10243 );
10244 }
10245
10246 #[test]
10247 fn preview_fts_row_count() {
10248 let (db, service) = setup();
10249 {
10250 let conn = sqlite::open_connection(db.path()).expect("conn");
10251 for i in 0..5u32 {
10252 conn.execute(
10253 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
10254 VALUES (?1, ?2, 'book', '{}', 100, 'src')",
10255 rusqlite::params![format!("r{i}"), format!("lg{i}")],
10256 )
10257 .expect("insert node");
10258 }
10259 conn.execute(
10261 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref, superseded_at) \
10262 VALUES ('r99', 'lg99', 'book', '{}', 100, 'src', 200)",
10263 [],
10264 )
10265 .expect("insert superseded");
10266 }
10267 let impact = service
10268 .preview_projection_impact("book", "fts")
10269 .expect("preview");
10270 assert_eq!(impact.rows_to_rebuild, 5);
10271 }
10272
10273 #[test]
10274 fn preview_populates_current_tokenizer() {
10275 let (_db, service) = setup();
10276 service
10277 .set_fts_profile("doc", "trigram")
10278 .expect("set profile");
10279 let impact = service
10280 .preview_projection_impact("doc", "fts")
10281 .expect("preview");
10282 assert_eq!(impact.current_tokenizer, Some("trigram".to_owned()));
10283 assert_eq!(impact.target_tokenizer, None);
10284 }
10285
10286 #[test]
10289 fn create_or_replace_source_code_tokenizer_is_accepted() {
10290 use super::create_or_replace_fts_kind_table;
10294 let (db, _service) = setup();
10295 let conn = sqlite::open_connection(db.path()).expect("conn");
10296 let specs = vec![FtsPropertyPathSpec::scalar("$.symbol")];
10297 let source_code_tokenizer = "unicode61 tokenchars '._-$@'";
10298 let result =
10299 create_or_replace_fts_kind_table(&conn, "Symbol", &specs, source_code_tokenizer);
10300 assert!(
10301 result.is_ok(),
10302 "source-code tokenizer string must be accepted by create_or_replace_fts_kind_table: {:?}",
10303 result.err()
10304 );
10305 }
10306
10307 #[test]
10308 fn source_code_profile_round_trip_through_register_fts_schema() {
10309 let db = tempfile::NamedTempFile::new().expect("temp file");
10314 let schema = Arc::new(fathomdb_schema::SchemaManager::new());
10315
10316 {
10318 let _coord = crate::ExecutionCoordinator::open(
10319 db.path(),
10320 Arc::clone(&schema),
10321 None,
10322 1,
10323 Arc::new(crate::TelemetryCounters::default()),
10324 None,
10325 )
10326 .expect("coordinator opens for bootstrap");
10327 }
10328
10329 let service = AdminService::new(db.path(), Arc::clone(&schema));
10330
10331 service
10333 .set_fts_profile("Symbol", "source-code")
10334 .expect("set_fts_profile with source-code preset must succeed");
10335
10336 let result = service.register_fts_property_schema("Symbol", &["$.name".to_owned()], None);
10339 assert!(
10340 result.is_ok(),
10341 "register_fts_property_schema must succeed when source-code profile is active: {:?}",
10342 result.err()
10343 );
10344 }
10345}