1use std::fmt::Write as _;
2use std::fs;
3use std::io;
4use std::path::{Path, PathBuf};
5use std::sync::Arc;
6use std::sync::mpsc::SyncSender;
7use std::time::SystemTime;
8
9use fathomdb_schema::{SchemaError, SchemaManager};
10use rusqlite::{DatabaseName, OptionalExtension, TransactionBehavior};
11use serde::{Deserialize, Serialize};
12use sha2::{Digest, Sha256};
13
14use crate::rebuild_actor::{RebuildMode, RebuildRequest, RebuildStateRow};
15
16use crate::{
17 EngineError, ProjectionRepairReport, ProjectionService,
18 embedder::{BatchEmbedder, QueryEmbedder, QueryEmbedderIdentity},
19 ids::new_id,
20 operational::{
21 OperationalCollectionKind, OperationalCollectionRecord, OperationalCompactionReport,
22 OperationalCurrentRow, OperationalFilterClause, OperationalFilterField,
23 OperationalFilterFieldType, OperationalFilterMode, OperationalFilterValue,
24 OperationalHistoryValidationIssue, OperationalHistoryValidationReport,
25 OperationalMutationRow, OperationalPurgeReport, OperationalReadReport,
26 OperationalReadRequest, OperationalRegisterRequest, OperationalRepairReport,
27 OperationalRetentionActionKind, OperationalRetentionPlanItem,
28 OperationalRetentionPlanReport, OperationalRetentionRunItem, OperationalRetentionRunReport,
29 OperationalSecondaryIndexDefinition, OperationalSecondaryIndexRebuildReport,
30 OperationalTraceReport, extract_secondary_index_entries_for_current,
31 extract_secondary_index_entries_for_mutation, parse_operational_secondary_indexes_json,
32 parse_operational_validation_contract, validate_operational_payload_against_contract,
33 },
34 projection::ProjectionTarget,
35 sqlite,
36};
37
38#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
40pub struct IntegrityReport {
41 pub physical_ok: bool,
42 pub foreign_keys_ok: bool,
43 pub missing_fts_rows: usize,
44 pub missing_property_fts_rows: usize,
45 pub duplicate_active_logical_ids: usize,
46 pub operational_missing_collections: usize,
47 pub operational_missing_last_mutations: usize,
48 pub warnings: Vec<String>,
49}
50
51#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
53pub struct FtsPropertySchemaRecord {
54 pub kind: String,
56 pub property_paths: Vec<String>,
61 pub entries: Vec<FtsPropertyPathSpec>,
66 pub exclude_paths: Vec<String>,
69 pub separator: String,
71 pub format_version: i64,
73}
74
75#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize)]
77#[serde(rename_all = "snake_case")]
78pub enum FtsPropertyPathMode {
79 #[default]
82 Scalar,
83 Recursive,
86}
87
88#[non_exhaustive]
90#[derive(Clone, Debug, PartialEq, Serialize)]
91pub struct FtsPropertyPathSpec {
92 pub path: String,
94 pub mode: FtsPropertyPathMode,
96 pub weight: Option<f32>,
99}
100
101impl Eq for FtsPropertyPathSpec {}
104
105impl FtsPropertyPathSpec {
106 #[must_use]
107 pub fn scalar(path: impl Into<String>) -> Self {
108 Self {
109 path: path.into(),
110 mode: FtsPropertyPathMode::Scalar,
111 weight: None,
112 }
113 }
114
115 #[must_use]
116 pub fn recursive(path: impl Into<String>) -> Self {
117 Self {
118 path: path.into(),
119 mode: FtsPropertyPathMode::Recursive,
120 weight: None,
121 }
122 }
123
124 #[must_use]
130 pub fn with_weight(mut self, weight: f32) -> Self {
131 self.weight = Some(weight);
132 self
133 }
134}
135
136#[derive(Clone, Copy, Debug)]
138pub struct SafeExportOptions {
139 pub force_checkpoint: bool,
143}
144
145impl Default for SafeExportOptions {
146 fn default() -> Self {
147 Self {
148 force_checkpoint: true,
149 }
150 }
151}
152
153const EXPORT_PROTOCOL_VERSION: u32 = 1;
155
156#[derive(Clone, Debug, Serialize)]
158pub struct SafeExportManifest {
159 pub exported_at: u64,
161 pub sha256: String,
163 pub schema_version: u32,
165 pub protocol_version: u32,
167 pub page_count: u64,
169}
170
171#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
173pub struct TraceReport {
174 pub source_ref: String,
175 pub node_rows: usize,
176 pub edge_rows: usize,
177 pub action_rows: usize,
178 pub operational_mutation_rows: usize,
179 pub node_logical_ids: Vec<String>,
180 pub action_ids: Vec<String>,
181 pub operational_mutation_ids: Vec<String>,
182}
183
184#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
186pub struct SkippedEdge {
187 pub edge_logical_id: String,
188 pub missing_endpoint: String,
189}
190
191#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
193pub struct LogicalRestoreReport {
194 pub logical_id: String,
195 pub was_noop: bool,
196 pub restored_node_rows: usize,
197 pub restored_edge_rows: usize,
198 pub restored_chunk_rows: usize,
199 pub restored_fts_rows: usize,
200 pub restored_property_fts_rows: usize,
201 pub restored_vec_rows: usize,
202 pub skipped_edges: Vec<SkippedEdge>,
203 pub notes: Vec<String>,
204}
205
206#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
208pub struct LogicalPurgeReport {
209 pub logical_id: String,
210 pub was_noop: bool,
211 pub deleted_node_rows: usize,
212 pub deleted_edge_rows: usize,
213 pub deleted_chunk_rows: usize,
214 pub deleted_fts_rows: usize,
215 pub deleted_vec_rows: usize,
216 pub notes: Vec<String>,
217}
218
219#[derive(Clone, Debug, Serialize, Deserialize)]
221pub struct ProvenancePurgeOptions {
222 pub dry_run: bool,
223 #[serde(default)]
224 pub preserve_event_types: Vec<String>,
225}
226
227#[derive(Clone, Debug, Serialize)]
229pub struct ProvenancePurgeReport {
230 pub events_deleted: u64,
231 pub events_preserved: u64,
232 pub oldest_remaining: Option<i64>,
233}
234
235#[derive(Debug)]
237pub struct AdminService {
238 database_path: PathBuf,
239 schema_manager: Arc<SchemaManager>,
240 projections: ProjectionService,
241 rebuild_sender: Option<SyncSender<RebuildRequest>>,
245}
246
247#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
249pub struct SemanticReport {
250 pub orphaned_chunks: usize,
252 pub null_source_ref_nodes: usize,
254 pub broken_step_fk: usize,
256 pub broken_action_fk: usize,
258 pub stale_fts_rows: usize,
260 pub fts_rows_for_superseded_nodes: usize,
262 pub stale_property_fts_rows: usize,
264 pub orphaned_property_fts_rows: usize,
266 pub mismatched_kind_property_fts_rows: usize,
268 pub duplicate_property_fts_rows: usize,
270 pub drifted_property_fts_rows: usize,
272 pub dangling_edges: usize,
274 pub orphaned_supersession_chains: usize,
276 pub stale_vec_rows: usize,
278 pub vec_rows_for_superseded_nodes: usize,
280 pub missing_operational_current_rows: usize,
282 pub stale_operational_current_rows: usize,
284 pub disabled_collection_mutations: usize,
286 pub orphaned_last_access_metadata_rows: usize,
288 pub warnings: Vec<String>,
289}
290
291#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
304#[serde(rename_all = "snake_case", deny_unknown_fields)]
305pub struct VectorRegenerationConfig {
306 pub kind: String,
307 pub profile: String,
308 pub chunking_policy: String,
309 pub preprocessing_policy: String,
310}
311
312#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
314pub struct VectorRegenerationReport {
315 pub profile: String,
316 pub table_name: String,
317 pub dimension: usize,
318 pub total_chunks: usize,
319 pub regenerated_rows: usize,
320 pub contract_persisted: bool,
321 pub notes: Vec<String>,
322}
323
324#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
328pub struct FtsProfile {
329 pub kind: String,
331 pub tokenizer: String,
333 pub active_at: Option<i64>,
335 pub created_at: i64,
337}
338
339#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
343pub struct VecProfile {
344 pub model_identity: String,
346 pub model_version: Option<String>,
348 pub dimensions: u32,
350 pub active_at: Option<i64>,
352 pub created_at: i64,
354}
355
356#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
360pub struct ProjectionImpact {
361 pub rows_to_rebuild: u64,
363 pub estimated_seconds: u64,
365 pub temp_db_size_bytes: u64,
367 pub current_tokenizer: Option<String>,
369 pub target_tokenizer: Option<String>,
371}
372
373pub const TOKENIZER_PRESETS: &[(&str, &str)] = &[
375 (
376 "recall-optimized-english",
377 "porter unicode61 remove_diacritics 2",
378 ),
379 ("precision-optimized", "unicode61 remove_diacritics 2"),
380 ("global-cjk", "icu"),
381 ("substring-trigram", "trigram"),
382 ("source-code", "unicode61 tokenchars '._-$@'"),
383];
384
385pub fn resolve_tokenizer_preset(input: &str) -> &str {
390 for (name, value) in TOKENIZER_PRESETS {
391 if *name == input {
392 return value;
393 }
394 }
395 input
396}
397
398const CURRENT_VECTOR_CONTRACT_FORMAT_VERSION: i64 = 1;
399const MAX_PROFILE_LEN: usize = 128;
400const MAX_POLICY_LEN: usize = 128;
401const MAX_CONTRACT_JSON_BYTES: usize = 32 * 1024;
402const MAX_AUDIT_METADATA_BYTES: usize = 2048;
403const DEFAULT_OPERATIONAL_READ_LIMIT: usize = 100;
404const MAX_OPERATIONAL_READ_LIMIT: usize = 1000;
405
406#[derive(Clone, Debug)]
408pub struct AdminHandle {
409 inner: Arc<AdminService>,
410}
411
412impl AdminHandle {
413 #[must_use]
415 pub fn new(service: AdminService) -> Self {
416 Self {
417 inner: Arc::new(service),
418 }
419 }
420
421 #[must_use]
423 pub fn service(&self) -> Arc<AdminService> {
424 Arc::clone(&self.inner)
425 }
426}
427
428impl AdminService {
429 #[must_use]
431 pub fn new(path: impl AsRef<Path>, schema_manager: Arc<SchemaManager>) -> Self {
432 let database_path = path.as_ref().to_path_buf();
433 let projections = ProjectionService::new(&database_path, Arc::clone(&schema_manager));
434 Self {
435 database_path,
436 schema_manager,
437 projections,
438 rebuild_sender: None,
439 }
440 }
441
442 #[must_use]
444 pub fn new_with_rebuild(
445 path: impl AsRef<Path>,
446 schema_manager: Arc<SchemaManager>,
447 rebuild_sender: SyncSender<RebuildRequest>,
448 ) -> Self {
449 let database_path = path.as_ref().to_path_buf();
450 let projections = ProjectionService::new(&database_path, Arc::clone(&schema_manager));
451 Self {
452 database_path,
453 schema_manager,
454 projections,
455 rebuild_sender: Some(rebuild_sender),
456 }
457 }
458
459 fn connect(&self) -> Result<rusqlite::Connection, EngineError> {
460 #[cfg(feature = "sqlite-vec")]
461 let conn = sqlite::open_connection_with_vec(&self.database_path)?;
462 #[cfg(not(feature = "sqlite-vec"))]
463 let conn = sqlite::open_connection(&self.database_path)?;
464 self.schema_manager.bootstrap(&conn)?;
465 Ok(conn)
466 }
467
468 pub fn set_fts_profile(
478 &self,
479 kind: &str,
480 tokenizer_str: &str,
481 ) -> Result<FtsProfile, EngineError> {
482 let resolved = resolve_tokenizer_preset(tokenizer_str);
483 if !resolved
485 .chars()
486 .all(|c| c.is_alphanumeric() || "'._-$@ ".contains(c))
487 {
488 return Err(EngineError::Bridge(format!(
489 "invalid tokenizer string: {resolved:?}"
490 )));
491 }
492 let conn = self.connect()?;
493 conn.execute(
494 r"INSERT INTO projection_profiles (kind, facet, config_json, active_at, created_at)
495 VALUES (?1, 'fts', json_object('tokenizer', ?2), unixepoch(), unixepoch())
496 ON CONFLICT(kind, facet) DO UPDATE SET
497 config_json = json_object('tokenizer', ?2),
498 active_at = unixepoch()",
499 rusqlite::params![kind, resolved],
500 )?;
501 let row = conn.query_row(
502 "SELECT kind, json_extract(config_json, '$.tokenizer'), active_at, created_at \
503 FROM projection_profiles WHERE kind = ?1 AND facet = 'fts'",
504 rusqlite::params![kind],
505 |row| {
506 Ok(FtsProfile {
507 kind: row.get(0)?,
508 tokenizer: row.get(1)?,
509 active_at: row.get(2)?,
510 created_at: row.get(3)?,
511 })
512 },
513 )?;
514 Ok(row)
515 }
516
517 pub fn get_fts_profile(&self, kind: &str) -> Result<Option<FtsProfile>, EngineError> {
524 let conn = self.connect()?;
525 let result = conn
526 .query_row(
527 "SELECT kind, json_extract(config_json, '$.tokenizer'), active_at, created_at \
528 FROM projection_profiles WHERE kind = ?1 AND facet = 'fts'",
529 rusqlite::params![kind],
530 |row| {
531 Ok(FtsProfile {
532 kind: row.get(0)?,
533 tokenizer: row.get(1)?,
534 active_at: row.get(2)?,
535 created_at: row.get(3)?,
536 })
537 },
538 )
539 .optional()?;
540 Ok(result)
541 }
542
543 pub fn get_vec_profile(&self, kind: &str) -> Result<Option<VecProfile>, EngineError> {
551 let conn = self.connect()?;
552 let result = conn
553 .query_row(
554 "SELECT \
555 json_extract(config_json, '$.model_identity'), \
556 json_extract(config_json, '$.model_version'), \
557 CAST(json_extract(config_json, '$.dimensions') AS INTEGER), \
558 active_at, \
559 created_at \
560 FROM projection_profiles WHERE kind = ?1 AND facet = 'vec'",
561 rusqlite::params![kind],
562 |row| {
563 Ok(VecProfile {
564 model_identity: row.get::<_, Option<String>>(0)?.unwrap_or_default(),
565 model_version: row.get(1)?,
566 dimensions: {
567 let d: i64 = row.get::<_, Option<i64>>(2)?.unwrap_or(0);
568 u32::try_from(d).unwrap_or(0)
569 },
570 active_at: row.get(3)?,
571 created_at: row.get(4)?,
572 })
573 },
574 )
575 .optional()?;
576 Ok(result)
577 }
578
579 #[allow(dead_code)]
584 fn set_vec_profile_inner(
585 conn: &rusqlite::Connection,
586 identity_json: &str,
587 ) -> Result<VecProfile, rusqlite::Error> {
588 conn.execute(
589 r"INSERT INTO projection_profiles (kind, facet, config_json, active_at, created_at)
590 VALUES ('*', 'vec', ?1, unixepoch(), unixepoch())
591 ON CONFLICT(kind, facet) DO UPDATE SET
592 config_json = ?1,
593 active_at = unixepoch()",
594 rusqlite::params![identity_json],
595 )?;
596 conn.query_row(
597 "SELECT \
598 json_extract(config_json, '$.model_identity'), \
599 json_extract(config_json, '$.model_version'), \
600 CAST(json_extract(config_json, '$.dimensions') AS INTEGER), \
601 active_at, \
602 created_at \
603 FROM projection_profiles WHERE kind = '*' AND facet = 'vec'",
604 [],
605 |row| {
606 Ok(VecProfile {
607 model_identity: row.get(0)?,
608 model_version: row.get(1)?,
609 dimensions: {
610 let d: i64 = row.get(2)?;
611 u32::try_from(d).unwrap_or(0)
612 },
613 active_at: row.get(3)?,
614 created_at: row.get(4)?,
615 })
616 },
617 )
618 }
619
620 pub fn set_vec_profile(&self, config_json: &str) -> Result<VecProfile, EngineError> {
629 let conn = self.connect()?;
630 Self::set_vec_profile_inner(&conn, config_json).map_err(EngineError::Sqlite)
631 }
632
633 pub fn preview_projection_impact(
641 &self,
642 kind: &str,
643 facet: &str,
644 ) -> Result<ProjectionImpact, EngineError> {
645 let conn = self.connect()?;
646 match facet {
647 "fts" => {
648 let rows: u64 = conn
649 .query_row(
650 "SELECT count(*) FROM nodes WHERE kind = ?1 AND superseded_at IS NULL",
651 rusqlite::params![kind],
652 |row| row.get::<_, i64>(0),
653 )
654 .map(i64::cast_unsigned)?;
655 let current_tokenizer = self.get_fts_profile(kind)?.map(|p| p.tokenizer);
656 Ok(ProjectionImpact {
657 rows_to_rebuild: rows,
658 estimated_seconds: rows / 5000,
659 temp_db_size_bytes: rows * 200,
660 current_tokenizer,
661 target_tokenizer: None,
662 })
663 }
664 "vec" => {
665 let rows: u64 = conn
666 .query_row("SELECT count(*) FROM chunks", [], |row| {
667 row.get::<_, i64>(0)
668 })
669 .map(i64::cast_unsigned)?;
670 Ok(ProjectionImpact {
671 rows_to_rebuild: rows,
672 estimated_seconds: rows / 100,
673 temp_db_size_bytes: rows * 1536,
674 current_tokenizer: None,
675 target_tokenizer: None,
676 })
677 }
678 other => Err(EngineError::Bridge(format!(
679 "unknown projection facet: {other:?}"
680 ))),
681 }
682 }
683
684 pub fn check_integrity(&self) -> Result<IntegrityReport, EngineError> {
687 let conn = self.connect()?;
688
689 let physical_result: String =
690 conn.query_row("PRAGMA integrity_check", [], |row| row.get(0))?;
691 let foreign_key_count: i64 =
692 conn.query_row("SELECT count(*) FROM pragma_foreign_key_check", [], |row| {
693 row.get(0)
694 })?;
695 let missing_fts_rows: i64 = conn.query_row(
696 r"
697 SELECT count(*)
698 FROM chunks c
699 JOIN nodes n
700 ON n.logical_id = c.node_logical_id
701 AND n.superseded_at IS NULL
702 WHERE NOT EXISTS (
703 SELECT 1
704 FROM fts_nodes f
705 WHERE f.chunk_id = c.id
706 )
707 ",
708 [],
709 |row| row.get(0),
710 )?;
711 let duplicate_active: i64 = conn.query_row(
712 r"
713 SELECT count(*)
714 FROM (
715 SELECT logical_id
716 FROM nodes
717 WHERE superseded_at IS NULL
718 GROUP BY logical_id
719 HAVING count(*) > 1
720 )
721 ",
722 [],
723 |row| row.get(0),
724 )?;
725 let operational_missing_collections: i64 = conn.query_row(
726 r"
727 SELECT (
728 SELECT count(*)
729 FROM operational_mutations m
730 LEFT JOIN operational_collections c ON c.name = m.collection_name
731 WHERE c.name IS NULL
732 ) + (
733 SELECT count(*)
734 FROM operational_current oc
735 LEFT JOIN operational_collections c ON c.name = oc.collection_name
736 WHERE c.name IS NULL
737 )
738 ",
739 [],
740 |row| row.get(0),
741 )?;
742 let operational_missing_last_mutations: i64 = conn.query_row(
743 r"
744 SELECT count(*)
745 FROM operational_current oc
746 LEFT JOIN operational_mutations m ON m.id = oc.last_mutation_id
747 WHERE m.id IS NULL
748 ",
749 [],
750 |row| row.get(0),
751 )?;
752
753 let missing_property_fts_rows = count_missing_property_fts_rows(&conn)?;
757
758 let mut warnings = Vec::new();
759 if missing_fts_rows > 0 {
760 warnings.push("missing FTS projections detected".to_owned());
761 }
762 if missing_property_fts_rows > 0 {
763 warnings.push("missing property FTS projections detected".to_owned());
764 }
765 if duplicate_active > 0 {
766 warnings.push("duplicate active logical_ids detected".to_owned());
767 }
768 if operational_missing_collections > 0 {
769 warnings.push("operational rows reference missing collections".to_owned());
770 }
771 if operational_missing_last_mutations > 0 {
772 warnings.push("operational current rows reference missing last mutations".to_owned());
773 }
774
775 Ok(IntegrityReport {
780 physical_ok: physical_result == "ok",
781 foreign_keys_ok: foreign_key_count == 0,
782 missing_fts_rows: i64_to_usize(missing_fts_rows),
783 missing_property_fts_rows: i64_to_usize(missing_property_fts_rows),
784 duplicate_active_logical_ids: i64_to_usize(duplicate_active),
785 operational_missing_collections: i64_to_usize(operational_missing_collections),
786 operational_missing_last_mutations: i64_to_usize(operational_missing_last_mutations),
787 warnings,
788 })
789 }
790
791 #[allow(clippy::too_many_lines)]
794 pub fn check_semantics(&self) -> Result<SemanticReport, EngineError> {
795 let conn = self.connect()?;
796
797 let orphaned_chunks: i64 = conn.query_row(
798 r"
799 SELECT count(*)
800 FROM chunks c
801 WHERE NOT EXISTS (
802 SELECT 1 FROM nodes n
803 WHERE n.logical_id = c.node_logical_id
804 )
805 ",
806 [],
807 |row| row.get(0),
808 )?;
809
810 let null_source_ref_nodes: i64 = conn.query_row(
811 "SELECT count(*) FROM nodes WHERE source_ref IS NULL AND superseded_at IS NULL",
812 [],
813 |row| row.get(0),
814 )?;
815
816 let broken_step_fk: i64 = conn.query_row(
817 r"
818 SELECT count(*) FROM steps s
819 WHERE NOT EXISTS (SELECT 1 FROM runs r WHERE r.id = s.run_id)
820 ",
821 [],
822 |row| row.get(0),
823 )?;
824
825 let broken_action_fk: i64 = conn.query_row(
826 r"
827 SELECT count(*) FROM actions a
828 WHERE NOT EXISTS (SELECT 1 FROM steps s WHERE s.id = a.step_id)
829 ",
830 [],
831 |row| row.get(0),
832 )?;
833
834 let stale_fts_rows: i64 = conn.query_row(
835 r"
836 SELECT count(*) FROM fts_nodes f
837 WHERE NOT EXISTS (SELECT 1 FROM chunks c WHERE c.id = f.chunk_id)
838 ",
839 [],
840 |row| row.get(0),
841 )?;
842
843 let fts_rows_for_superseded_nodes: i64 = conn.query_row(
844 r"
845 SELECT count(*) FROM fts_nodes f
846 WHERE NOT EXISTS (
847 SELECT 1 FROM nodes n
848 WHERE n.logical_id = f.node_logical_id AND n.superseded_at IS NULL
849 )
850 ",
851 [],
852 |row| row.get(0),
853 )?;
854
855 let (
856 stale_property_fts_rows,
857 orphaned_property_fts_rows,
858 mismatched_kind_property_fts_rows,
859 duplicate_property_fts_rows,
860 ) = count_per_kind_property_fts_issues(&conn)?;
861
862 let drifted_property_fts_rows = count_drifted_property_fts_rows(&conn)?;
863
864 let dangling_edges: i64 = conn.query_row(
865 r"
866 SELECT count(*) FROM edges e
867 WHERE e.superseded_at IS NULL AND (
868 NOT EXISTS (SELECT 1 FROM nodes n WHERE n.logical_id = e.source_logical_id AND n.superseded_at IS NULL)
869 OR
870 NOT EXISTS (SELECT 1 FROM nodes n WHERE n.logical_id = e.target_logical_id AND n.superseded_at IS NULL)
871 )
872 ",
873 [],
874 |row| row.get(0),
875 )?;
876
877 let orphaned_supersession_chains: i64 = conn.query_row(
878 r"
879 SELECT count(*) FROM (
880 SELECT logical_id FROM nodes
881 GROUP BY logical_id
882 HAVING count(*) > 0 AND sum(CASE WHEN superseded_at IS NULL THEN 1 ELSE 0 END) = 0
883 )
884 ",
885 [],
886 |row| row.get(0),
887 )?;
888
889 #[cfg(feature = "sqlite-vec")]
891 let (stale_vec_rows, vec_rows_for_superseded_nodes): (i64, i64) = {
892 let kinds: Vec<String> =
893 match conn.prepare("SELECT kind FROM projection_profiles WHERE facet = 'vec'") {
894 Ok(mut stmt) => stmt
895 .query_map([], |row| row.get(0))
896 .map_err(EngineError::Sqlite)?
897 .collect::<Result<Vec<_>, _>>()
898 .map_err(EngineError::Sqlite)?,
899 Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
900 if msg.contains("no such table: projection_profiles") =>
901 {
902 vec![]
903 }
904 Err(e) => return Err(EngineError::Sqlite(e)),
905 };
906 let mut stale = 0i64;
907 let mut superseded = 0i64;
908 for kind in &kinds {
909 let table = fathomdb_schema::vec_kind_table_name(kind);
910 let stale_sql = format!(
911 "SELECT count(*) FROM {table} v \
912 WHERE NOT EXISTS (SELECT 1 FROM chunks c WHERE c.id = v.chunk_id)"
913 );
914 let superseded_sql = format!(
915 "SELECT count(*) FROM {table} v \
916 JOIN chunks c ON c.id = v.chunk_id \
917 WHERE NOT EXISTS (SELECT 1 FROM nodes n WHERE n.logical_id = c.node_logical_id)"
918 );
919 stale += match conn.query_row(&stale_sql, [], |row| row.get(0)) {
920 Ok(n) => n,
921 Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
922 if msg.contains("no such table:")
923 || msg.contains("no such module: vec0") =>
924 {
925 0
926 }
927 Err(e) => return Err(EngineError::Sqlite(e)),
928 };
929 superseded += match conn.query_row(&superseded_sql, [], |row| row.get(0)) {
930 Ok(n) => n,
931 Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
932 if msg.contains("no such table:")
933 || msg.contains("no such module: vec0") =>
934 {
935 0
936 }
937 Err(e) => return Err(EngineError::Sqlite(e)),
938 };
939 }
940 (stale, superseded)
941 };
942 #[cfg(not(feature = "sqlite-vec"))]
943 let stale_vec_rows: i64 = 0;
944 #[cfg(not(feature = "sqlite-vec"))]
945 let vec_rows_for_superseded_nodes: i64 = 0;
946 let missing_operational_current_rows: i64 = conn.query_row(
947 r"
948 SELECT count(*)
949 FROM operational_mutations m
950 JOIN operational_collections c
951 ON c.name = m.collection_name
952 AND c.kind = 'latest_state'
953 WHERE m.op_kind = 'put'
954 AND NOT EXISTS (
955 SELECT 1
956 FROM operational_mutations newer
957 WHERE newer.collection_name = m.collection_name
958 AND newer.record_key = m.record_key
959 AND newer.mutation_order > m.mutation_order
960 )
961 AND NOT EXISTS (
962 SELECT 1
963 FROM operational_current oc
964 WHERE oc.collection_name = m.collection_name
965 AND oc.record_key = m.record_key
966 )
967 ",
968 [],
969 |row| row.get(0),
970 )?;
971 let stale_operational_current_rows: i64 = conn.query_row(
972 r"
973 SELECT count(*)
974 FROM operational_current oc
975 JOIN operational_collections c
976 ON c.name = oc.collection_name
977 AND c.kind = 'latest_state'
978 LEFT JOIN operational_mutations m ON m.id = oc.last_mutation_id
979 WHERE m.id IS NULL
980 OR m.collection_name != oc.collection_name
981 OR m.record_key != oc.record_key
982 OR m.op_kind != 'put'
983 OR m.payload_json != oc.payload_json
984 OR EXISTS (
985 SELECT 1
986 FROM operational_mutations newer
987 WHERE newer.collection_name = oc.collection_name
988 AND newer.record_key = oc.record_key
989 AND newer.mutation_order > m.mutation_order
990 )
991 ",
992 [],
993 |row| row.get(0),
994 )?;
995 let disabled_collection_mutations: i64 = conn.query_row(
996 r"
997 SELECT count(*)
998 FROM operational_mutations m
999 JOIN operational_collections c ON c.name = m.collection_name
1000 WHERE c.disabled_at IS NOT NULL AND m.created_at > c.disabled_at
1001 ",
1002 [],
1003 |row| row.get(0),
1004 )?;
1005 let orphaned_last_access_metadata_rows: i64 = conn.query_row(
1006 r"
1007 SELECT count(*)
1008 FROM node_access_metadata am
1009 WHERE NOT EXISTS (
1010 SELECT 1 FROM nodes n WHERE n.logical_id = am.logical_id
1011 )
1012 ",
1013 [],
1014 |row| row.get(0),
1015 )?;
1016
1017 let mut warnings = Vec::new();
1018 if orphaned_chunks > 0 {
1019 warnings.push(format!(
1020 "{orphaned_chunks} orphaned chunk(s) with no surviving node history"
1021 ));
1022 }
1023 if null_source_ref_nodes > 0 {
1024 warnings.push(format!(
1025 "{null_source_ref_nodes} active node(s) with null source_ref"
1026 ));
1027 }
1028 if broken_step_fk > 0 {
1029 warnings.push(format!(
1030 "{broken_step_fk} step(s) referencing non-existent run"
1031 ));
1032 }
1033 if broken_action_fk > 0 {
1034 warnings.push(format!(
1035 "{broken_action_fk} action(s) referencing non-existent step"
1036 ));
1037 }
1038 if stale_fts_rows > 0 {
1039 warnings.push(format!(
1040 "{stale_fts_rows} stale FTS row(s) referencing missing chunk"
1041 ));
1042 }
1043 if fts_rows_for_superseded_nodes > 0 {
1044 warnings.push(format!(
1045 "{fts_rows_for_superseded_nodes} FTS row(s) for superseded node(s)"
1046 ));
1047 }
1048 if stale_property_fts_rows > 0 {
1049 warnings.push(format!(
1050 "{stale_property_fts_rows} stale property FTS row(s) for superseded/missing node(s)"
1051 ));
1052 }
1053 if orphaned_property_fts_rows > 0 {
1054 warnings.push(format!(
1055 "{orphaned_property_fts_rows} orphaned property FTS row(s) for unregistered kind(s)"
1056 ));
1057 }
1058 if mismatched_kind_property_fts_rows > 0 {
1059 warnings.push(format!(
1060 "{mismatched_kind_property_fts_rows} property FTS row(s) whose kind does not match the active node"
1061 ));
1062 }
1063 if duplicate_property_fts_rows > 0 {
1064 warnings.push(format!(
1065 "{duplicate_property_fts_rows} active logical ID(s) with duplicate property FTS rows"
1066 ));
1067 }
1068 if drifted_property_fts_rows > 0 {
1069 warnings.push(format!(
1070 "{drifted_property_fts_rows} property FTS row(s) with stale text_content"
1071 ));
1072 }
1073 if dangling_edges > 0 {
1074 warnings.push(format!(
1075 "{dangling_edges} active edge(s) with missing endpoint node"
1076 ));
1077 }
1078 if orphaned_supersession_chains > 0 {
1079 warnings.push(format!(
1080 "{orphaned_supersession_chains} logical_id(s) with all versions superseded"
1081 ));
1082 }
1083 if stale_vec_rows > 0 {
1084 warnings.push(format!(
1085 "{stale_vec_rows} stale vec row(s) referencing missing chunk"
1086 ));
1087 }
1088 if vec_rows_for_superseded_nodes > 0 {
1089 warnings.push(format!(
1090 "{vec_rows_for_superseded_nodes} vec row(s) whose node history is missing"
1091 ));
1092 }
1093 if missing_operational_current_rows > 0 {
1094 warnings.push(format!(
1095 "{missing_operational_current_rows} latest-state key(s) missing operational_current rows"
1096 ));
1097 }
1098 if stale_operational_current_rows > 0 {
1099 warnings.push(format!(
1100 "{stale_operational_current_rows} stale operational_current row(s)"
1101 ));
1102 }
1103 if disabled_collection_mutations > 0 {
1104 warnings.push(format!(
1105 "{disabled_collection_mutations} mutation(s) were written after collection disable"
1106 ));
1107 }
1108 if orphaned_last_access_metadata_rows > 0 {
1109 warnings.push(format!(
1110 "{orphaned_last_access_metadata_rows} last_access metadata row(s) reference missing node history"
1111 ));
1112 }
1113
1114 Ok(SemanticReport {
1115 orphaned_chunks: i64_to_usize(orphaned_chunks),
1116 null_source_ref_nodes: i64_to_usize(null_source_ref_nodes),
1117 broken_step_fk: i64_to_usize(broken_step_fk),
1118 broken_action_fk: i64_to_usize(broken_action_fk),
1119 stale_fts_rows: i64_to_usize(stale_fts_rows),
1120 fts_rows_for_superseded_nodes: i64_to_usize(fts_rows_for_superseded_nodes),
1121 stale_property_fts_rows: i64_to_usize(stale_property_fts_rows),
1122 orphaned_property_fts_rows: i64_to_usize(orphaned_property_fts_rows),
1123 mismatched_kind_property_fts_rows: i64_to_usize(mismatched_kind_property_fts_rows),
1124 duplicate_property_fts_rows: i64_to_usize(duplicate_property_fts_rows),
1125 drifted_property_fts_rows: i64_to_usize(drifted_property_fts_rows),
1126 dangling_edges: i64_to_usize(dangling_edges),
1127 orphaned_supersession_chains: i64_to_usize(orphaned_supersession_chains),
1128 stale_vec_rows: i64_to_usize(stale_vec_rows),
1129 vec_rows_for_superseded_nodes: i64_to_usize(vec_rows_for_superseded_nodes),
1130 missing_operational_current_rows: i64_to_usize(missing_operational_current_rows),
1131 stale_operational_current_rows: i64_to_usize(stale_operational_current_rows),
1132 disabled_collection_mutations: i64_to_usize(disabled_collection_mutations),
1133 orphaned_last_access_metadata_rows: i64_to_usize(orphaned_last_access_metadata_rows),
1134 warnings,
1135 })
1136 }
1137
1138 pub fn register_operational_collection(
1141 &self,
1142 request: &OperationalRegisterRequest,
1143 ) -> Result<OperationalCollectionRecord, EngineError> {
1144 if request.name.trim().is_empty() {
1145 return Err(EngineError::InvalidWrite(
1146 "operational collection name must not be empty".to_owned(),
1147 ));
1148 }
1149 if request.schema_json.is_empty() {
1150 return Err(EngineError::InvalidWrite(
1151 "operational collection schema_json must not be empty".to_owned(),
1152 ));
1153 }
1154 if request.retention_json.is_empty() {
1155 return Err(EngineError::InvalidWrite(
1156 "operational collection retention_json must not be empty".to_owned(),
1157 ));
1158 }
1159 if request.filter_fields_json.is_empty() {
1160 return Err(EngineError::InvalidWrite(
1161 "operational collection filter_fields_json must not be empty".to_owned(),
1162 ));
1163 }
1164 parse_operational_validation_contract(&request.validation_json)
1165 .map_err(EngineError::InvalidWrite)?;
1166 parse_operational_secondary_indexes_json(&request.secondary_indexes_json, request.kind)
1167 .map_err(EngineError::InvalidWrite)?;
1168 if request.format_version <= 0 {
1169 return Err(EngineError::InvalidWrite(
1170 "operational collection format_version must be positive".to_owned(),
1171 ));
1172 }
1173 parse_operational_filter_fields(&request.filter_fields_json)
1174 .map_err(EngineError::InvalidWrite)?;
1175
1176 let mut conn = self.connect()?;
1177 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1178 tx.execute(
1179 "INSERT INTO operational_collections \
1180 (name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at) \
1181 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, unixepoch())",
1182 rusqlite::params![
1183 request.name.as_str(),
1184 request.kind.as_str(),
1185 request.schema_json.as_str(),
1186 request.retention_json.as_str(),
1187 request.filter_fields_json.as_str(),
1188 request.validation_json.as_str(),
1189 request.secondary_indexes_json.as_str(),
1190 request.format_version,
1191 ],
1192 )?;
1193 persist_simple_provenance_event(
1194 &tx,
1195 "operational_collection_registered",
1196 request.name.as_str(),
1197 Some(serde_json::json!({
1198 "kind": request.kind.as_str(),
1199 "format_version": request.format_version,
1200 })),
1201 )?;
1202 tx.commit()?;
1203
1204 self.describe_operational_collection(&request.name)?
1205 .ok_or_else(|| {
1206 EngineError::Bridge("registered collection missing after commit".to_owned())
1207 })
1208 }
1209
1210 pub fn describe_operational_collection(
1213 &self,
1214 name: &str,
1215 ) -> Result<Option<OperationalCollectionRecord>, EngineError> {
1216 let conn = self.connect()?;
1217 load_operational_collection_record(&conn, name)
1218 }
1219
1220 pub fn update_operational_collection_filters(
1224 &self,
1225 name: &str,
1226 filter_fields_json: &str,
1227 ) -> Result<OperationalCollectionRecord, EngineError> {
1228 if filter_fields_json.is_empty() {
1229 return Err(EngineError::InvalidWrite(
1230 "operational collection filter_fields_json must not be empty".to_owned(),
1231 ));
1232 }
1233 let declared_fields = parse_operational_filter_fields(filter_fields_json)
1234 .map_err(EngineError::InvalidWrite)?;
1235
1236 let mut conn = self.connect()?;
1237 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1238 load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1239 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1240 })?;
1241 tx.execute(
1242 "UPDATE operational_collections SET filter_fields_json = ?2 WHERE name = ?1",
1243 rusqlite::params![name, filter_fields_json],
1244 )?;
1245 tx.execute(
1246 "DELETE FROM operational_filter_values WHERE collection_name = ?1",
1247 [name],
1248 )?;
1249
1250 let mut mutation_stmt = tx.prepare(
1251 "SELECT id, payload_json FROM operational_mutations \
1252 WHERE collection_name = ?1 ORDER BY mutation_order",
1253 )?;
1254 let mutations = mutation_stmt
1255 .query_map([name], |row| {
1256 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1257 })?
1258 .collect::<Result<Vec<_>, _>>()?;
1259 drop(mutation_stmt);
1260
1261 let mut insert_filter_value = tx.prepare_cached(
1262 "INSERT INTO operational_filter_values \
1263 (mutation_id, collection_name, field_name, string_value, integer_value) \
1264 VALUES (?1, ?2, ?3, ?4, ?5)",
1265 )?;
1266 let mut inserted_values = 0usize;
1267 for (mutation_id, payload_json) in &mutations {
1268 for filter_value in
1269 extract_operational_filter_values(&declared_fields, payload_json.as_str())
1270 {
1271 insert_filter_value.execute(rusqlite::params![
1272 mutation_id,
1273 name,
1274 filter_value.field_name,
1275 filter_value.string_value,
1276 filter_value.integer_value,
1277 ])?;
1278 inserted_values += 1;
1279 }
1280 }
1281 drop(insert_filter_value);
1282
1283 persist_simple_provenance_event(
1284 &tx,
1285 "operational_collection_filter_fields_updated",
1286 name,
1287 Some(serde_json::json!({
1288 "field_count": declared_fields.len(),
1289 "mutations_backfilled": mutations.len(),
1290 "inserted_filter_values": inserted_values,
1291 })),
1292 )?;
1293 let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1294 EngineError::Bridge("operational collection missing after filter update".to_owned())
1295 })?;
1296 tx.commit()?;
1297 Ok(updated)
1298 }
1299
1300 pub fn update_operational_collection_validation(
1303 &self,
1304 name: &str,
1305 validation_json: &str,
1306 ) -> Result<OperationalCollectionRecord, EngineError> {
1307 parse_operational_validation_contract(validation_json)
1308 .map_err(EngineError::InvalidWrite)?;
1309
1310 let mut conn = self.connect()?;
1311 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1312 load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1313 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1314 })?;
1315 tx.execute(
1316 "UPDATE operational_collections SET validation_json = ?2 WHERE name = ?1",
1317 rusqlite::params![name, validation_json],
1318 )?;
1319 persist_simple_provenance_event(
1320 &tx,
1321 "operational_collection_validation_updated",
1322 name,
1323 Some(serde_json::json!({
1324 "has_validation": !validation_json.is_empty(),
1325 })),
1326 )?;
1327 let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1328 EngineError::Bridge("operational collection missing after validation update".to_owned())
1329 })?;
1330 tx.commit()?;
1331 Ok(updated)
1332 }
1333
1334 pub fn update_operational_collection_secondary_indexes(
1338 &self,
1339 name: &str,
1340 secondary_indexes_json: &str,
1341 ) -> Result<OperationalCollectionRecord, EngineError> {
1342 let mut conn = self.connect()?;
1343 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1344 let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1345 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1346 })?;
1347 let indexes = parse_operational_secondary_indexes_json(secondary_indexes_json, record.kind)
1348 .map_err(EngineError::InvalidWrite)?;
1349 tx.execute(
1350 "UPDATE operational_collections SET secondary_indexes_json = ?2 WHERE name = ?1",
1351 rusqlite::params![name, secondary_indexes_json],
1352 )?;
1353 let (mutation_entries_rebuilt, current_entries_rebuilt) =
1354 rebuild_operational_secondary_index_entries(&tx, &record.name, record.kind, &indexes)?;
1355 persist_simple_provenance_event(
1356 &tx,
1357 "operational_collection_secondary_indexes_updated",
1358 name,
1359 Some(serde_json::json!({
1360 "index_count": indexes.len(),
1361 "mutation_entries_rebuilt": mutation_entries_rebuilt,
1362 "current_entries_rebuilt": current_entries_rebuilt,
1363 })),
1364 )?;
1365 let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1366 EngineError::Bridge(
1367 "operational collection missing after secondary index update".to_owned(),
1368 )
1369 })?;
1370 tx.commit()?;
1371 Ok(updated)
1372 }
1373
1374 pub fn rebuild_operational_secondary_indexes(
1377 &self,
1378 name: &str,
1379 ) -> Result<OperationalSecondaryIndexRebuildReport, EngineError> {
1380 let mut conn = self.connect()?;
1381 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1382 let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1383 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1384 })?;
1385 let indexes =
1386 parse_operational_secondary_indexes_json(&record.secondary_indexes_json, record.kind)
1387 .map_err(EngineError::InvalidWrite)?;
1388 let (mutation_entries_rebuilt, current_entries_rebuilt) =
1389 rebuild_operational_secondary_index_entries(&tx, &record.name, record.kind, &indexes)?;
1390 persist_simple_provenance_event(
1391 &tx,
1392 "operational_secondary_indexes_rebuilt",
1393 name,
1394 Some(serde_json::json!({
1395 "index_count": indexes.len(),
1396 "mutation_entries_rebuilt": mutation_entries_rebuilt,
1397 "current_entries_rebuilt": current_entries_rebuilt,
1398 })),
1399 )?;
1400 tx.commit()?;
1401 Ok(OperationalSecondaryIndexRebuildReport {
1402 collection_name: name.to_owned(),
1403 mutation_entries_rebuilt,
1404 current_entries_rebuilt,
1405 })
1406 }
1407
1408 pub fn validate_operational_collection_history(
1411 &self,
1412 name: &str,
1413 ) -> Result<OperationalHistoryValidationReport, EngineError> {
1414 let conn = self.connect()?;
1415 let record = load_operational_collection_record(&conn, name)?.ok_or_else(|| {
1416 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1417 })?;
1418 let Some(contract) = parse_operational_validation_contract(&record.validation_json)
1419 .map_err(EngineError::InvalidWrite)?
1420 else {
1421 return Err(EngineError::InvalidWrite(format!(
1422 "operational collection '{name}' has no validation_json configured"
1423 )));
1424 };
1425
1426 let mut stmt = conn.prepare(
1427 "SELECT id, record_key, op_kind, payload_json FROM operational_mutations \
1428 WHERE collection_name = ?1 ORDER BY mutation_order",
1429 )?;
1430 let rows = stmt
1431 .query_map([name], |row| {
1432 Ok((
1433 row.get::<_, String>(0)?,
1434 row.get::<_, String>(1)?,
1435 row.get::<_, String>(2)?,
1436 row.get::<_, String>(3)?,
1437 ))
1438 })?
1439 .collect::<Result<Vec<_>, _>>()?;
1440 drop(stmt);
1441
1442 let mut checked_rows = 0usize;
1443 let mut issues = Vec::new();
1444 for (mutation_id, record_key, op_kind, payload_json) in rows {
1445 if op_kind == "delete" {
1446 continue;
1447 }
1448 checked_rows += 1;
1449 if let Err(message) =
1450 validate_operational_payload_against_contract(&contract, payload_json.as_str())
1451 {
1452 issues.push(OperationalHistoryValidationIssue {
1453 mutation_id,
1454 record_key,
1455 op_kind,
1456 message,
1457 });
1458 }
1459 }
1460
1461 Ok(OperationalHistoryValidationReport {
1462 collection_name: name.to_owned(),
1463 checked_rows,
1464 invalid_row_count: issues.len(),
1465 issues,
1466 })
1467 }
1468
1469 pub fn disable_operational_collection(
1472 &self,
1473 name: &str,
1474 ) -> Result<OperationalCollectionRecord, EngineError> {
1475 let mut conn = self.connect()?;
1476 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1477 let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1478 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1479 })?;
1480 let changed = if record.disabled_at.is_none() {
1481 tx.execute(
1482 "UPDATE operational_collections SET disabled_at = unixepoch() WHERE name = ?1",
1483 [name],
1484 )?;
1485 true
1486 } else {
1487 false
1488 };
1489 let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1490 EngineError::Bridge("operational collection missing after disable".to_owned())
1491 })?;
1492 persist_simple_provenance_event(
1493 &tx,
1494 "operational_collection_disabled",
1495 name,
1496 Some(serde_json::json!({
1497 "disabled_at": record.disabled_at,
1498 "changed": changed,
1499 })),
1500 )?;
1501 tx.commit()?;
1502 Ok(record)
1503 }
1504
1505 pub fn compact_operational_collection(
1508 &self,
1509 name: &str,
1510 dry_run: bool,
1511 ) -> Result<OperationalCompactionReport, EngineError> {
1512 let mut conn = self.connect()?;
1513 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1514 let collection = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1515 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1516 })?;
1517 validate_append_only_operational_collection(&collection, "compact")?;
1518 let (mutation_ids, before_timestamp) =
1519 operational_compaction_candidates(&tx, &collection.retention_json, name)?;
1520 if dry_run {
1521 drop(tx);
1522 return Ok(OperationalCompactionReport {
1523 collection_name: name.to_owned(),
1524 deleted_mutations: mutation_ids.len(),
1525 dry_run: true,
1526 before_timestamp,
1527 });
1528 }
1529 let mut delete_stmt =
1530 tx.prepare_cached("DELETE FROM operational_mutations WHERE id = ?1")?;
1531 for mutation_id in &mutation_ids {
1532 delete_stmt.execute([mutation_id.as_str()])?;
1533 }
1534 drop(delete_stmt);
1535 persist_simple_provenance_event(
1536 &tx,
1537 "operational_collection_compacted",
1538 name,
1539 Some(serde_json::json!({
1540 "deleted_mutations": mutation_ids.len(),
1541 "before_timestamp": before_timestamp,
1542 })),
1543 )?;
1544 tx.commit()?;
1545 Ok(OperationalCompactionReport {
1546 collection_name: name.to_owned(),
1547 deleted_mutations: mutation_ids.len(),
1548 dry_run: false,
1549 before_timestamp,
1550 })
1551 }
1552
1553 pub fn purge_operational_collection(
1556 &self,
1557 name: &str,
1558 before_timestamp: i64,
1559 ) -> Result<OperationalPurgeReport, EngineError> {
1560 let mut conn = self.connect()?;
1561 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1562 let collection = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1563 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1564 })?;
1565 validate_append_only_operational_collection(&collection, "purge")?;
1566 let deleted_mutations = tx.execute(
1567 "DELETE FROM operational_mutations WHERE collection_name = ?1 AND created_at < ?2",
1568 rusqlite::params![name, before_timestamp],
1569 )?;
1570 persist_simple_provenance_event(
1571 &tx,
1572 "operational_collection_purged",
1573 name,
1574 Some(serde_json::json!({
1575 "deleted_mutations": deleted_mutations,
1576 "before_timestamp": before_timestamp,
1577 })),
1578 )?;
1579 tx.commit()?;
1580 Ok(OperationalPurgeReport {
1581 collection_name: name.to_owned(),
1582 deleted_mutations,
1583 before_timestamp,
1584 })
1585 }
1586
1587 pub fn plan_operational_retention(
1590 &self,
1591 now_timestamp: i64,
1592 collection_names: Option<&[String]>,
1593 max_collections: Option<usize>,
1594 ) -> Result<OperationalRetentionPlanReport, EngineError> {
1595 let conn = self.connect()?;
1596 let records = load_operational_retention_records(&conn, collection_names, max_collections)?;
1597 let mut items = Vec::with_capacity(records.len());
1598 for record in records {
1599 items.push(plan_operational_retention_item(
1600 &conn,
1601 &record,
1602 now_timestamp,
1603 )?);
1604 }
1605 Ok(OperationalRetentionPlanReport {
1606 planned_at: now_timestamp,
1607 collections_examined: items.len(),
1608 items,
1609 })
1610 }
1611
1612 pub fn run_operational_retention(
1615 &self,
1616 now_timestamp: i64,
1617 collection_names: Option<&[String]>,
1618 max_collections: Option<usize>,
1619 dry_run: bool,
1620 ) -> Result<OperationalRetentionRunReport, EngineError> {
1621 let mut conn = self.connect()?;
1622 let records = load_operational_retention_records(&conn, collection_names, max_collections)?;
1623 let mut items = Vec::with_capacity(records.len());
1624 let mut collections_acted_on = 0usize;
1625
1626 for record in records {
1627 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1628 let item = run_operational_retention_item(&tx, &record, now_timestamp, dry_run)?;
1629 if item.deleted_mutations > 0 {
1630 collections_acted_on += 1;
1631 }
1632 if dry_run || item.action_kind == OperationalRetentionActionKind::Noop {
1633 drop(tx);
1634 } else {
1635 tx.commit()?;
1636 }
1637 items.push(item);
1638 }
1639
1640 Ok(OperationalRetentionRunReport {
1641 executed_at: now_timestamp,
1642 collections_examined: items.len(),
1643 collections_acted_on,
1644 dry_run,
1645 items,
1646 })
1647 }
1648
1649 pub fn trace_operational_collection(
1652 &self,
1653 collection_name: &str,
1654 record_key: Option<&str>,
1655 ) -> Result<OperationalTraceReport, EngineError> {
1656 let conn = self.connect()?;
1657 ensure_operational_collection_registered(&conn, collection_name)?;
1658 let mutations = if let Some(record_key) = record_key {
1659 let mut stmt = conn.prepare(
1660 "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
1661 FROM operational_mutations \
1662 WHERE collection_name = ?1 AND record_key = ?2 \
1663 ORDER BY mutation_order",
1664 )?;
1665 stmt.query_map([collection_name, record_key], map_operational_mutation_row)?
1666 .collect::<Result<Vec<_>, _>>()?
1667 } else {
1668 let mut stmt = conn.prepare(
1669 "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
1670 FROM operational_mutations \
1671 WHERE collection_name = ?1 \
1672 ORDER BY mutation_order",
1673 )?;
1674 stmt.query_map([collection_name], map_operational_mutation_row)?
1675 .collect::<Result<Vec<_>, _>>()?
1676 };
1677 let current_rows = if let Some(record_key) = record_key {
1678 let mut stmt = conn.prepare(
1679 "SELECT collection_name, record_key, payload_json, updated_at, last_mutation_id \
1680 FROM operational_current \
1681 WHERE collection_name = ?1 AND record_key = ?2 \
1682 ORDER BY updated_at, record_key",
1683 )?;
1684 stmt.query_map([collection_name, record_key], map_operational_current_row)?
1685 .collect::<Result<Vec<_>, _>>()?
1686 } else {
1687 let mut stmt = conn.prepare(
1688 "SELECT collection_name, record_key, payload_json, updated_at, last_mutation_id \
1689 FROM operational_current \
1690 WHERE collection_name = ?1 \
1691 ORDER BY updated_at, record_key",
1692 )?;
1693 stmt.query_map([collection_name], map_operational_current_row)?
1694 .collect::<Result<Vec<_>, _>>()?
1695 };
1696
1697 Ok(OperationalTraceReport {
1698 collection_name: collection_name.to_owned(),
1699 record_key: record_key.map(str::to_owned),
1700 mutation_count: mutations.len(),
1701 current_count: current_rows.len(),
1702 mutations,
1703 current_rows,
1704 })
1705 }
1706
1707 pub fn read_operational_collection(
1710 &self,
1711 request: &OperationalReadRequest,
1712 ) -> Result<OperationalReadReport, EngineError> {
1713 if request.collection_name.trim().is_empty() {
1714 return Err(EngineError::InvalidWrite(
1715 "operational read collection_name must not be empty".to_owned(),
1716 ));
1717 }
1718 if request.filters.is_empty() {
1719 return Err(EngineError::InvalidWrite(
1720 "operational read requires at least one filter clause".to_owned(),
1721 ));
1722 }
1723
1724 let conn = self.connect()?;
1725 let record = load_operational_collection_record(&conn, &request.collection_name)?
1726 .ok_or_else(|| {
1727 EngineError::InvalidWrite(format!(
1728 "operational collection '{}' is not registered",
1729 request.collection_name
1730 ))
1731 })?;
1732 validate_append_only_operational_collection(&record, "read")?;
1733 let declared_fields = parse_operational_filter_fields(&record.filter_fields_json)
1734 .map_err(EngineError::InvalidWrite)?;
1735 let secondary_indexes =
1736 parse_operational_secondary_indexes_json(&record.secondary_indexes_json, record.kind)
1737 .map_err(EngineError::InvalidWrite)?;
1738 let applied_limit = operational_read_limit(request.limit)?;
1739 let filters = compile_operational_read_filters(&request.filters, &declared_fields)?;
1740 if let Some(report) = execute_operational_secondary_index_read(
1741 &conn,
1742 &request.collection_name,
1743 &filters,
1744 &secondary_indexes,
1745 applied_limit,
1746 )? {
1747 return Ok(report);
1748 }
1749 execute_operational_filtered_read(&conn, &request.collection_name, &filters, applied_limit)
1750 }
1751
1752 pub fn rebuild_operational_current(
1755 &self,
1756 collection_name: Option<&str>,
1757 ) -> Result<OperationalRepairReport, EngineError> {
1758 let mut conn = self.connect()?;
1759 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1760 let collections = if let Some(name) = collection_name {
1761 let maybe_kind: Option<String> = tx
1762 .query_row(
1763 "SELECT kind FROM operational_collections WHERE name = ?1",
1764 [name],
1765 |row| row.get(0),
1766 )
1767 .optional()?;
1768 let Some(kind) = maybe_kind else {
1769 return Err(EngineError::InvalidWrite(format!(
1770 "operational collection '{name}' is not registered"
1771 )));
1772 };
1773 if kind != OperationalCollectionKind::LatestState.as_str() {
1774 return Err(EngineError::InvalidWrite(format!(
1775 "operational collection '{name}' is not latest_state"
1776 )));
1777 }
1778 vec![name.to_owned()]
1779 } else {
1780 let mut stmt = tx.prepare(
1781 "SELECT name FROM operational_collections WHERE kind = 'latest_state' ORDER BY name",
1782 )?;
1783 stmt.query_map([], |row| row.get::<_, String>(0))?
1784 .collect::<Result<Vec<_>, _>>()?
1785 };
1786
1787 let rebuilt_rows = rebuild_operational_current_rows(&tx, &collections)?;
1788 for collection in &collections {
1789 let record = load_operational_collection_record(&tx, collection)?.ok_or_else(|| {
1790 EngineError::Bridge(format!(
1791 "operational collection '{collection}' missing during current rebuild"
1792 ))
1793 })?;
1794 let indexes = parse_operational_secondary_indexes_json(
1795 &record.secondary_indexes_json,
1796 record.kind,
1797 )
1798 .map_err(EngineError::InvalidWrite)?;
1799 if !indexes.is_empty() {
1800 rebuild_operational_secondary_index_entries(
1801 &tx,
1802 &record.name,
1803 record.kind,
1804 &indexes,
1805 )?;
1806 }
1807 }
1808
1809 persist_simple_provenance_event(
1810 &tx,
1811 "operational_current_rebuilt",
1812 collection_name.unwrap_or("*"),
1813 Some(serde_json::json!({
1814 "collections_rebuilt": collections.len(),
1815 "current_rows_rebuilt": rebuilt_rows,
1816 })),
1817 )?;
1818 tx.commit()?;
1819
1820 Ok(OperationalRepairReport {
1821 collections_rebuilt: collections.len(),
1822 current_rows_rebuilt: rebuilt_rows,
1823 })
1824 }
1825
1826 pub fn rebuild_projections(
1829 &self,
1830 target: ProjectionTarget,
1831 ) -> Result<ProjectionRepairReport, EngineError> {
1832 self.projections.rebuild_projections(target)
1833 }
1834
1835 pub fn rebuild_missing_projections(&self) -> Result<ProjectionRepairReport, EngineError> {
1838 self.projections.rebuild_missing_projections()
1839 }
1840
1841 pub fn register_fts_property_schema(
1850 &self,
1851 kind: &str,
1852 property_paths: &[String],
1853 separator: Option<&str>,
1854 ) -> Result<FtsPropertySchemaRecord, EngineError> {
1855 let specs: Vec<FtsPropertyPathSpec> = property_paths
1856 .iter()
1857 .map(|p| FtsPropertyPathSpec::scalar(p.clone()))
1858 .collect();
1859 self.register_fts_property_schema_with_entries(
1860 kind,
1861 &specs,
1862 separator,
1863 &[],
1864 RebuildMode::Eager,
1865 )
1866 }
1867
1868 pub fn register_fts_property_schema_with_entries(
1884 &self,
1885 kind: &str,
1886 entries: &[FtsPropertyPathSpec],
1887 separator: Option<&str>,
1888 exclude_paths: &[String],
1889 mode: RebuildMode,
1890 ) -> Result<FtsPropertySchemaRecord, EngineError> {
1891 let paths: Vec<String> = entries.iter().map(|e| e.path.clone()).collect();
1892 validate_fts_property_paths(&paths)?;
1893 for p in exclude_paths {
1894 if !p.starts_with("$.") {
1895 return Err(EngineError::InvalidWrite(format!(
1896 "exclude_paths entries must start with '$.' but got: {p}"
1897 )));
1898 }
1899 }
1900 for e in entries {
1901 if let Some(w) = e.weight
1902 && !(w > 0.0 && w <= 1000.0)
1903 {
1904 return Err(EngineError::Bridge(format!(
1905 "weight out of range: {w} (must satisfy 0.0 < weight <= 1000.0)"
1906 )));
1907 }
1908 }
1909 let separator = separator.unwrap_or(" ");
1910 let paths_json = serialize_property_paths_json(entries, exclude_paths)?;
1911
1912 match mode {
1913 RebuildMode::Eager => self.register_fts_property_schema_eager(
1914 kind,
1915 entries,
1916 separator,
1917 exclude_paths,
1918 &paths,
1919 &paths_json,
1920 ),
1921 RebuildMode::Async => self.register_fts_property_schema_async(
1922 kind,
1923 entries,
1924 separator,
1925 &paths,
1926 &paths_json,
1927 ),
1928 }
1929 }
1930
1931 fn register_fts_property_schema_eager(
1933 &self,
1934 kind: &str,
1935 entries: &[FtsPropertyPathSpec],
1936 separator: &str,
1937 exclude_paths: &[String],
1938 paths: &[String],
1939 paths_json: &str,
1940 ) -> Result<FtsPropertySchemaRecord, EngineError> {
1941 let mut conn = self.connect()?;
1942 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1943
1944 let previous_row: Option<(String, String)> = tx
1950 .query_row(
1951 "SELECT property_paths_json, separator FROM fts_property_schemas WHERE kind = ?1",
1952 [kind],
1953 |row| {
1954 let json: String = row.get(0)?;
1955 let sep: String = row.get(1)?;
1956 Ok((json, sep))
1957 },
1958 )
1959 .optional()?;
1960 let had_previous_schema = previous_row.is_some();
1961 let previous_recursive_paths: Vec<String> = previous_row
1962 .map(|(json, sep)| crate::writer::parse_property_schema_json(&json, &sep))
1963 .map_or(Vec::new(), |schema| {
1964 schema
1965 .paths
1966 .into_iter()
1967 .filter(|p| p.mode == crate::writer::PropertyPathMode::Recursive)
1968 .map(|p| p.path)
1969 .collect()
1970 });
1971 let new_recursive_paths: Vec<&str> = entries
1972 .iter()
1973 .filter(|e| e.mode == FtsPropertyPathMode::Recursive)
1974 .map(|e| e.path.as_str())
1975 .collect();
1976 let introduces_new_recursive = new_recursive_paths
1977 .iter()
1978 .any(|p| !previous_recursive_paths.iter().any(|prev| prev == p));
1979
1980 tx.execute(
1981 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
1982 VALUES (?1, ?2, ?3) \
1983 ON CONFLICT(kind) DO UPDATE SET property_paths_json = ?2, separator = ?3",
1984 rusqlite::params![kind, paths_json, separator],
1985 )?;
1986
1987 let _ = (introduces_new_recursive, had_previous_schema);
1993 let needs_rebuild = true;
1994 if needs_rebuild {
1995 let any_weight = entries.iter().any(|e| e.weight.is_some());
1996 let tok = fathomdb_schema::resolve_fts_tokenizer(&tx, kind)
1997 .map_err(|e| EngineError::Bridge(e.to_string()))?;
1998 if any_weight {
1999 create_or_replace_fts_kind_table(&tx, kind, entries, &tok)?;
2003 tx.execute(
2004 "DELETE FROM fts_node_property_positions WHERE kind = ?1",
2005 [kind],
2006 )?;
2007 } else {
2010 create_or_replace_fts_kind_table(&tx, kind, &[], &tok)?;
2014 tx.execute(
2015 "DELETE FROM fts_node_property_positions WHERE kind = ?1",
2016 [kind],
2017 )?;
2018 crate::projection::insert_property_fts_rows_for_kind(&tx, kind)?;
2023 }
2024 }
2025
2026 persist_simple_provenance_event(
2027 &tx,
2028 "fts_property_schema_registered",
2029 kind,
2030 Some(serde_json::json!({
2031 "property_paths": paths,
2032 "separator": separator,
2033 "exclude_paths": exclude_paths,
2034 "eager_rebuild": needs_rebuild,
2035 })),
2036 )?;
2037 tx.commit()?;
2038
2039 self.describe_fts_property_schema(kind)?.ok_or_else(|| {
2040 EngineError::Bridge("registered FTS property schema missing after commit".to_owned())
2041 })
2042 }
2043
2044 fn register_fts_property_schema_async(
2046 &self,
2047 kind: &str,
2048 entries: &[FtsPropertyPathSpec],
2049 separator: &str,
2050 paths: &[String],
2051 paths_json: &str,
2052 ) -> Result<FtsPropertySchemaRecord, EngineError> {
2053 let mut conn = self.connect()?;
2054 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2055
2056 let had_previous_schema: bool = tx
2058 .query_row(
2059 "SELECT count(*) FROM fts_property_schemas WHERE kind = ?1",
2060 rusqlite::params![kind],
2061 |r| r.get::<_, i64>(0),
2062 )
2063 .unwrap_or(0)
2064 > 0;
2065
2066 tx.execute(
2068 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
2069 VALUES (?1, ?2, ?3) \
2070 ON CONFLICT(kind) DO UPDATE SET property_paths_json = ?2, separator = ?3",
2071 rusqlite::params![kind, paths_json, separator],
2072 )?;
2073
2074 let any_weight = entries.iter().any(|e| e.weight.is_some());
2078 let tok = fathomdb_schema::resolve_fts_tokenizer(&tx, kind)
2079 .map_err(|e| EngineError::Bridge(e.to_string()))?;
2080 if any_weight {
2081 create_or_replace_fts_kind_table(&tx, kind, entries, &tok)?;
2082 } else {
2083 create_or_replace_fts_kind_table(&tx, kind, &[], &tok)?;
2086 }
2087
2088 let schema_id: i64 = tx.query_row(
2090 "SELECT rowid FROM fts_property_schemas WHERE kind = ?1",
2091 rusqlite::params![kind],
2092 |r| r.get(0),
2093 )?;
2094
2095 let now_ms = crate::rebuild_actor::now_unix_ms_pub();
2096 let is_first = i64::from(!had_previous_schema);
2097
2098 tx.execute(
2100 "INSERT INTO fts_property_rebuild_state \
2101 (kind, schema_id, state, rows_done, started_at, is_first_registration) \
2102 VALUES (?1, ?2, 'PENDING', 0, ?3, ?4) \
2103 ON CONFLICT(kind) DO UPDATE SET \
2104 schema_id = excluded.schema_id, \
2105 state = 'PENDING', \
2106 rows_total = NULL, \
2107 rows_done = 0, \
2108 started_at = excluded.started_at, \
2109 last_progress_at = NULL, \
2110 error_message = NULL, \
2111 is_first_registration = excluded.is_first_registration",
2112 rusqlite::params![kind, schema_id, now_ms, is_first],
2113 )?;
2114
2115 persist_simple_provenance_event(
2116 &tx,
2117 "fts_property_schema_registered",
2118 kind,
2119 Some(serde_json::json!({
2120 "property_paths": paths,
2121 "separator": separator,
2122 "mode": "async",
2123 })),
2124 )?;
2125 tx.commit()?;
2126
2127 if let Some(sender) = &self.rebuild_sender
2133 && sender
2134 .try_send(RebuildRequest {
2135 kind: kind.to_owned(),
2136 schema_id,
2137 })
2138 .is_err()
2139 {
2140 trace_warn!(
2141 kind = %kind,
2142 "rebuild channel full; rebuild request dropped — state remains PENDING"
2143 );
2144 }
2145
2146 self.describe_fts_property_schema(kind)?.ok_or_else(|| {
2147 EngineError::Bridge("registered FTS property schema missing after commit".to_owned())
2148 })
2149 }
2150
2151 pub fn get_property_fts_rebuild_state(
2156 &self,
2157 kind: &str,
2158 ) -> Result<Option<RebuildStateRow>, EngineError> {
2159 let conn = self.connect()?;
2160 let row = conn
2161 .query_row(
2162 "SELECT kind, schema_id, state, rows_total, rows_done, \
2163 started_at, is_first_registration, error_message \
2164 FROM fts_property_rebuild_state WHERE kind = ?1",
2165 rusqlite::params![kind],
2166 |r| {
2167 Ok(RebuildStateRow {
2168 kind: r.get(0)?,
2169 schema_id: r.get(1)?,
2170 state: r.get(2)?,
2171 rows_total: r.get(3)?,
2172 rows_done: r.get(4)?,
2173 started_at: r.get(5)?,
2174 is_first_registration: r.get::<_, i64>(6)? != 0,
2175 error_message: r.get(7)?,
2176 })
2177 },
2178 )
2179 .optional()?;
2180 Ok(row)
2181 }
2182
2183 pub fn count_staging_rows(&self, kind: &str) -> Result<i64, EngineError> {
2189 let conn = self.connect()?;
2190 let count: i64 = conn.query_row(
2191 "SELECT count(*) FROM fts_property_rebuild_staging WHERE kind = ?1",
2192 rusqlite::params![kind],
2193 |r| r.get(0),
2194 )?;
2195 Ok(count)
2196 }
2197
2198 pub fn staging_row_exists(
2204 &self,
2205 kind: &str,
2206 node_logical_id: &str,
2207 ) -> Result<bool, EngineError> {
2208 let conn = self.connect()?;
2209 let count: i64 = conn.query_row(
2210 "SELECT count(*) FROM fts_property_rebuild_staging WHERE kind = ?1 AND node_logical_id = ?2",
2211 rusqlite::params![kind, node_logical_id],
2212 |r| r.get(0),
2213 )?;
2214 Ok(count > 0)
2215 }
2216
2217 pub fn describe_fts_property_schema(
2222 &self,
2223 kind: &str,
2224 ) -> Result<Option<FtsPropertySchemaRecord>, EngineError> {
2225 let conn = self.connect()?;
2226 load_fts_property_schema_record(&conn, kind)
2227 }
2228
2229 pub fn list_fts_property_schemas(&self) -> Result<Vec<FtsPropertySchemaRecord>, EngineError> {
2234 let conn = self.connect()?;
2235 let mut stmt = conn.prepare(
2236 "SELECT kind, property_paths_json, separator, format_version \
2237 FROM fts_property_schemas ORDER BY kind",
2238 )?;
2239 let records = stmt
2240 .query_map([], |row| {
2241 let kind: String = row.get(0)?;
2242 let paths_json: String = row.get(1)?;
2243 let separator: String = row.get(2)?;
2244 let format_version: i64 = row.get(3)?;
2245 Ok(build_fts_property_schema_record(
2246 kind,
2247 &paths_json,
2248 separator,
2249 format_version,
2250 ))
2251 })?
2252 .collect::<Result<Vec<_>, _>>()?;
2253 Ok(records)
2254 }
2255
2256 pub fn remove_fts_property_schema(&self, kind: &str) -> Result<(), EngineError> {
2264 let mut conn = self.connect()?;
2265 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2266 let deleted = tx.execute("DELETE FROM fts_property_schemas WHERE kind = ?1", [kind])?;
2267 if deleted == 0 {
2268 return Err(EngineError::InvalidWrite(format!(
2269 "FTS property schema for kind '{kind}' is not registered"
2270 )));
2271 }
2272 let table = fathomdb_schema::fts_kind_table_name(kind);
2274 let table_exists: bool = tx
2275 .query_row(
2276 "SELECT count(*) FROM sqlite_master WHERE type='table' AND name = ?1 \
2277 AND sql LIKE 'CREATE VIRTUAL TABLE%'",
2278 rusqlite::params![table],
2279 |r| r.get::<_, i64>(0),
2280 )
2281 .unwrap_or(0)
2282 > 0;
2283 if table_exists {
2284 tx.execute_batch(&format!("DELETE FROM {table}"))?;
2285 }
2286 persist_simple_provenance_event(&tx, "fts_property_schema_removed", kind, None)?;
2287 tx.commit()?;
2288 Ok(())
2289 }
2290
2291 pub fn restore_vector_profiles(&self) -> Result<ProjectionRepairReport, EngineError> {
2297 let conn = self.connect()?;
2298 let profiles: Vec<(String, String, i64)> = {
2299 let mut stmt = conn.prepare(
2300 "SELECT profile, table_name, dimension \
2301 FROM vector_profiles WHERE enabled = 1 ORDER BY profile",
2302 )?;
2303 stmt.query_map([], |row| {
2304 Ok((
2305 row.get::<_, String>(0)?,
2306 row.get::<_, String>(1)?,
2307 row.get::<_, i64>(2)?,
2308 ))
2309 })?
2310 .collect::<Result<Vec<_>, _>>()?
2311 };
2312
2313 for (profile, table_name, dimension) in &profiles {
2314 let dimension = usize::try_from(*dimension).map_err(|_| {
2315 EngineError::Bridge(format!("invalid vector profile dimension: {dimension}"))
2316 })?;
2317 self.schema_manager
2318 .ensure_vector_profile(&conn, profile, table_name, dimension)?;
2319 }
2320
2321 Ok(ProjectionRepairReport {
2322 targets: vec![ProjectionTarget::Vec],
2323 rebuilt_rows: profiles.len(),
2324 notes: vec![],
2325 })
2326 }
2327
2328 #[allow(clippy::too_many_lines)]
2344 pub fn regenerate_vector_embeddings(
2345 &self,
2346 embedder: &dyn QueryEmbedder,
2347 config: &VectorRegenerationConfig,
2348 ) -> Result<VectorRegenerationReport, EngineError> {
2349 let conn = self.connect()?;
2350 let identity = embedder.identity();
2351 let config = validate_vector_regeneration_config(&conn, config, &identity)
2352 .map_err(|failure| failure.to_engine_error())?;
2353 let chunks = collect_regeneration_chunks(&conn)?;
2354 let payload = build_regeneration_input(&config, &identity, chunks.clone());
2355 let snapshot_hash = compute_snapshot_hash(&payload)?;
2356 let audit_metadata = VectorRegenerationAuditMetadata {
2357 profile: config.profile.clone(),
2358 model_identity: identity.model_identity.clone(),
2359 model_version: identity.model_version.clone(),
2360 chunk_count: chunks.len(),
2361 snapshot_hash: snapshot_hash.clone(),
2362 failure_class: None,
2363 };
2364 persist_vector_regeneration_event(
2365 &conn,
2366 "vector_regeneration_requested",
2367 &config.profile,
2368 &audit_metadata,
2369 )?;
2370 let notes = vec!["vector embeddings regenerated via configured embedder".to_owned()];
2371
2372 let mut embedding_map: std::collections::HashMap<String, Vec<u8>> =
2373 std::collections::HashMap::with_capacity(chunks.len());
2374 for chunk in &chunks {
2375 let vector = match embedder.embed_query(&chunk.text_content) {
2376 Ok(vector) => vector,
2377 Err(error) => {
2378 let failure = VectorRegenerationFailure::new(
2379 VectorRegenerationFailureClass::EmbedderFailure,
2380 format!("embedder failed for chunk '{}': {error}", chunk.chunk_id),
2381 );
2382 self.persist_vector_regeneration_failure_best_effort(
2383 &config.profile,
2384 &audit_metadata,
2385 &failure,
2386 );
2387 return Err(failure.to_engine_error());
2388 }
2389 };
2390 if vector.len() != identity.dimension {
2391 let failure = VectorRegenerationFailure::new(
2392 VectorRegenerationFailureClass::InvalidEmbedderOutput,
2393 format!(
2394 "embedder produced {} values for chunk '{}', expected {}",
2395 vector.len(),
2396 chunk.chunk_id,
2397 identity.dimension
2398 ),
2399 );
2400 self.persist_vector_regeneration_failure_best_effort(
2401 &config.profile,
2402 &audit_metadata,
2403 &failure,
2404 );
2405 return Err(failure.to_engine_error());
2406 }
2407 if vector.iter().any(|value| !value.is_finite()) {
2408 let failure = VectorRegenerationFailure::new(
2409 VectorRegenerationFailureClass::InvalidEmbedderOutput,
2410 format!(
2411 "embedder returned non-finite values for chunk '{}'",
2412 chunk.chunk_id
2413 ),
2414 );
2415 self.persist_vector_regeneration_failure_best_effort(
2416 &config.profile,
2417 &audit_metadata,
2418 &failure,
2419 );
2420 return Err(failure.to_engine_error());
2421 }
2422 let bytes: Vec<u8> = vector
2423 .iter()
2424 .flat_map(|value| value.to_le_bytes())
2425 .collect();
2426 embedding_map.insert(chunk.chunk_id.clone(), bytes);
2427 }
2428
2429 let table_name = fathomdb_schema::vec_kind_table_name(&config.kind);
2430 let mut conn = conn;
2431 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2432 match self
2433 .schema_manager
2434 .ensure_vec_kind_profile(&tx, &config.kind, identity.dimension)
2435 {
2436 Ok(()) => {}
2437 Err(SchemaError::MissingCapability(message)) => {
2438 let failure = VectorRegenerationFailure::new(
2439 VectorRegenerationFailureClass::UnsupportedVecCapability,
2440 message,
2441 );
2442 drop(tx);
2443 self.persist_vector_regeneration_failure_best_effort(
2444 &config.profile,
2445 &audit_metadata,
2446 &failure,
2447 );
2448 return Err(failure.to_engine_error());
2449 }
2450 Err(error) => return Err(EngineError::Schema(error)),
2451 }
2452 let apply_chunks = collect_regeneration_chunks(&tx)?;
2453 let apply_payload = build_regeneration_input(&config, &identity, apply_chunks.clone());
2454 let apply_hash = compute_snapshot_hash(&apply_payload)?;
2455 if apply_hash != snapshot_hash {
2456 let failure = VectorRegenerationFailure::new(
2457 VectorRegenerationFailureClass::SnapshotDrift,
2458 "chunk snapshot changed during generation; retry".to_owned(),
2459 );
2460 drop(tx);
2461 self.persist_vector_regeneration_failure_best_effort(
2462 &config.profile,
2463 &audit_metadata,
2464 &failure,
2465 );
2466 return Err(failure.to_engine_error());
2467 }
2468 persist_vector_contract(&tx, &config, &table_name, &identity, &snapshot_hash)?;
2469 tx.execute(&format!("DELETE FROM {table_name}"), [])?;
2470 let mut stmt = tx.prepare_cached(&format!(
2471 "INSERT INTO {table_name} (chunk_id, embedding) VALUES (?1, ?2)"
2472 ))?;
2473 let mut regenerated_rows = 0usize;
2474 for chunk in &apply_chunks {
2475 let Some(embedding) = embedding_map.remove(&chunk.chunk_id) else {
2476 drop(stmt);
2477 drop(tx);
2478 let failure = VectorRegenerationFailure::new(
2479 VectorRegenerationFailureClass::InvalidEmbedderOutput,
2480 format!(
2481 "embedder did not produce a vector for chunk '{}'",
2482 chunk.chunk_id
2483 ),
2484 );
2485 self.persist_vector_regeneration_failure_best_effort(
2486 &config.profile,
2487 &audit_metadata,
2488 &failure,
2489 );
2490 return Err(failure.to_engine_error());
2491 };
2492 stmt.execute(rusqlite::params![chunk.chunk_id.as_str(), embedding])?;
2493 regenerated_rows += 1;
2494 }
2495 drop(stmt);
2496 persist_vector_regeneration_event(
2497 &tx,
2498 "vector_regeneration_apply",
2499 &config.profile,
2500 &audit_metadata,
2501 )?;
2502 tx.commit()?;
2503
2504 Ok(VectorRegenerationReport {
2505 profile: config.profile.clone(),
2506 table_name,
2507 dimension: identity.dimension,
2508 total_chunks: chunks.len(),
2509 regenerated_rows,
2510 contract_persisted: true,
2511 notes,
2512 })
2513 }
2514
2515 #[allow(clippy::too_many_lines)]
2529 pub fn regenerate_vector_embeddings_in_process(
2530 &self,
2531 embedder: &dyn BatchEmbedder,
2532 config: &VectorRegenerationConfig,
2533 ) -> Result<VectorRegenerationReport, EngineError> {
2534 let conn = self.connect()?;
2535 let identity = embedder.identity();
2536 let config = validate_vector_regeneration_config(&conn, config, &identity)
2537 .map_err(|failure| failure.to_engine_error())?;
2538 let chunks = collect_regeneration_chunks(&conn)?;
2539 let payload = build_regeneration_input(&config, &identity, chunks.clone());
2540 let snapshot_hash = compute_snapshot_hash(&payload)?;
2541 let audit_metadata = VectorRegenerationAuditMetadata {
2542 profile: config.profile.clone(),
2543 model_identity: identity.model_identity.clone(),
2544 model_version: identity.model_version.clone(),
2545 chunk_count: chunks.len(),
2546 snapshot_hash: snapshot_hash.clone(),
2547 failure_class: None,
2548 };
2549 persist_vector_regeneration_event(
2550 &conn,
2551 "vector_regeneration_requested",
2552 &config.profile,
2553 &audit_metadata,
2554 )?;
2555 let notes = vec!["vector embeddings regenerated via in-process batch embedder".to_owned()];
2556
2557 let chunk_texts: Vec<String> = chunks.iter().map(|c| c.text_content.clone()).collect();
2559 let batch_vectors = match embedder.batch_embed(&chunk_texts) {
2560 Ok(vecs) => vecs,
2561 Err(error) => {
2562 let failure = VectorRegenerationFailure::new(
2563 VectorRegenerationFailureClass::EmbedderFailure,
2564 format!("batch embedder failed: {error}"),
2565 );
2566 self.persist_vector_regeneration_failure_best_effort(
2567 &config.profile,
2568 &audit_metadata,
2569 &failure,
2570 );
2571 return Err(failure.to_engine_error());
2572 }
2573 };
2574 if batch_vectors.len() != chunks.len() {
2575 let failure = VectorRegenerationFailure::new(
2576 VectorRegenerationFailureClass::InvalidEmbedderOutput,
2577 format!(
2578 "batch embedder returned {} vectors for {} chunks",
2579 batch_vectors.len(),
2580 chunks.len()
2581 ),
2582 );
2583 self.persist_vector_regeneration_failure_best_effort(
2584 &config.profile,
2585 &audit_metadata,
2586 &failure,
2587 );
2588 return Err(failure.to_engine_error());
2589 }
2590
2591 let mut embedding_map: std::collections::HashMap<String, Vec<u8>> =
2592 std::collections::HashMap::with_capacity(chunks.len());
2593 for (chunk, vector) in chunks.iter().zip(batch_vectors) {
2594 if vector.len() != identity.dimension {
2595 let failure = VectorRegenerationFailure::new(
2596 VectorRegenerationFailureClass::InvalidEmbedderOutput,
2597 format!(
2598 "embedder produced {} values for chunk '{}', expected {}",
2599 vector.len(),
2600 chunk.chunk_id,
2601 identity.dimension
2602 ),
2603 );
2604 self.persist_vector_regeneration_failure_best_effort(
2605 &config.profile,
2606 &audit_metadata,
2607 &failure,
2608 );
2609 return Err(failure.to_engine_error());
2610 }
2611 if vector.iter().any(|value| !value.is_finite()) {
2612 let failure = VectorRegenerationFailure::new(
2613 VectorRegenerationFailureClass::InvalidEmbedderOutput,
2614 format!(
2615 "embedder returned non-finite values for chunk '{}'",
2616 chunk.chunk_id
2617 ),
2618 );
2619 self.persist_vector_regeneration_failure_best_effort(
2620 &config.profile,
2621 &audit_metadata,
2622 &failure,
2623 );
2624 return Err(failure.to_engine_error());
2625 }
2626 let bytes: Vec<u8> = vector
2627 .iter()
2628 .flat_map(|value| value.to_le_bytes())
2629 .collect();
2630 embedding_map.insert(chunk.chunk_id.clone(), bytes);
2631 }
2632
2633 let mut conn = conn;
2634 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2635 let table_name = fathomdb_schema::vec_kind_table_name(&config.kind);
2636 match self
2637 .schema_manager
2638 .ensure_vec_kind_profile(&tx, &config.kind, identity.dimension)
2639 {
2640 Ok(()) => {}
2641 Err(SchemaError::MissingCapability(message)) => {
2642 let failure = VectorRegenerationFailure::new(
2643 VectorRegenerationFailureClass::UnsupportedVecCapability,
2644 message,
2645 );
2646 drop(tx);
2647 self.persist_vector_regeneration_failure_best_effort(
2648 &config.profile,
2649 &audit_metadata,
2650 &failure,
2651 );
2652 return Err(failure.to_engine_error());
2653 }
2654 Err(error) => return Err(EngineError::Schema(error)),
2655 }
2656 let apply_chunks = collect_regeneration_chunks(&tx)?;
2657 let apply_payload = build_regeneration_input(&config, &identity, apply_chunks.clone());
2658 let apply_hash = compute_snapshot_hash(&apply_payload)?;
2659 if apply_hash != snapshot_hash {
2660 let failure = VectorRegenerationFailure::new(
2661 VectorRegenerationFailureClass::SnapshotDrift,
2662 "chunk snapshot changed during generation; retry".to_owned(),
2663 );
2664 drop(tx);
2665 self.persist_vector_regeneration_failure_best_effort(
2666 &config.profile,
2667 &audit_metadata,
2668 &failure,
2669 );
2670 return Err(failure.to_engine_error());
2671 }
2672 persist_vector_contract(&tx, &config, &table_name, &identity, &snapshot_hash)?;
2673 tx.execute(&format!("DELETE FROM {table_name}"), [])?;
2674 let mut stmt = tx.prepare_cached(&format!(
2675 "INSERT INTO {table_name} (chunk_id, embedding) VALUES (?1, ?2)"
2676 ))?;
2677 let mut regenerated_rows = 0usize;
2678 for chunk in &apply_chunks {
2679 let Some(embedding) = embedding_map.remove(&chunk.chunk_id) else {
2680 drop(stmt);
2681 drop(tx);
2682 let failure = VectorRegenerationFailure::new(
2683 VectorRegenerationFailureClass::InvalidEmbedderOutput,
2684 format!(
2685 "embedder did not produce a vector for chunk '{}'",
2686 chunk.chunk_id
2687 ),
2688 );
2689 self.persist_vector_regeneration_failure_best_effort(
2690 &config.profile,
2691 &audit_metadata,
2692 &failure,
2693 );
2694 return Err(failure.to_engine_error());
2695 };
2696 stmt.execute(rusqlite::params![chunk.chunk_id.as_str(), embedding])?;
2697 regenerated_rows += 1;
2698 }
2699 drop(stmt);
2700 persist_vector_regeneration_event(
2701 &tx,
2702 "vector_regeneration_apply",
2703 &config.profile,
2704 &audit_metadata,
2705 )?;
2706 tx.commit()?;
2707
2708 Ok(VectorRegenerationReport {
2709 profile: config.profile.clone(),
2710 table_name,
2711 dimension: identity.dimension,
2712 total_chunks: chunks.len(),
2713 regenerated_rows,
2714 contract_persisted: true,
2715 notes,
2716 })
2717 }
2718
2719 fn persist_vector_regeneration_failure_best_effort(
2720 &self,
2721 profile: &str,
2722 metadata: &VectorRegenerationAuditMetadata,
2723 failure: &VectorRegenerationFailure,
2724 ) {
2725 let Ok(conn) = self.connect() else {
2726 return;
2727 };
2728 let failure_metadata = VectorRegenerationAuditMetadata {
2729 profile: metadata.profile.clone(),
2730 model_identity: metadata.model_identity.clone(),
2731 model_version: metadata.model_version.clone(),
2732 chunk_count: metadata.chunk_count,
2733 snapshot_hash: metadata.snapshot_hash.clone(),
2734 failure_class: Some(failure.failure_class_label().to_owned()),
2735 };
2736 let _ = persist_vector_regeneration_event(
2737 &conn,
2738 "vector_regeneration_failed",
2739 profile,
2740 &failure_metadata,
2741 );
2742 }
2743
2744 pub fn trace_source(&self, source_ref: &str) -> Result<TraceReport, EngineError> {
2747 let conn = self.connect()?;
2748
2749 let node_logical_ids = collect_strings(
2750 &conn,
2751 "SELECT logical_id FROM nodes WHERE source_ref = ?1 ORDER BY created_at",
2752 source_ref,
2753 )?;
2754 let action_ids = collect_strings(
2755 &conn,
2756 "SELECT id FROM actions WHERE source_ref = ?1 ORDER BY created_at",
2757 source_ref,
2758 )?;
2759 let operational_mutation_ids = collect_strings(
2760 &conn,
2761 "SELECT id FROM operational_mutations WHERE source_ref = ?1 ORDER BY mutation_order",
2762 source_ref,
2763 )?;
2764
2765 Ok(TraceReport {
2766 source_ref: source_ref.to_owned(),
2767 node_rows: count_source_ref(&conn, "nodes", source_ref)?,
2768 edge_rows: count_source_ref(&conn, "edges", source_ref)?,
2769 action_rows: count_source_ref(&conn, "actions", source_ref)?,
2770 operational_mutation_rows: count_source_ref(
2771 &conn,
2772 "operational_mutations",
2773 source_ref,
2774 )?,
2775 node_logical_ids,
2776 action_ids,
2777 operational_mutation_ids,
2778 })
2779 }
2780
2781 #[allow(clippy::too_many_lines)]
2785 pub fn restore_logical_id(
2786 &self,
2787 logical_id: &str,
2788 ) -> Result<LogicalRestoreReport, EngineError> {
2789 let mut conn = self.connect()?;
2790 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2791
2792 let active_count: i64 = tx.query_row(
2793 "SELECT count(*) FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
2794 [logical_id],
2795 |row| row.get(0),
2796 )?;
2797 if active_count > 0 {
2798 return Ok(LogicalRestoreReport {
2799 logical_id: logical_id.to_owned(),
2800 was_noop: true,
2801 restored_node_rows: 0,
2802 restored_edge_rows: 0,
2803 restored_chunk_rows: 0,
2804 restored_fts_rows: 0,
2805 restored_property_fts_rows: 0,
2806 restored_vec_rows: 0,
2807 skipped_edges: Vec::new(),
2808 notes: vec!["logical_id already active".to_owned()],
2809 });
2810 }
2811
2812 let restored_node: Option<(String, String)> = tx
2813 .query_row(
2814 "SELECT row_id, kind FROM nodes \
2815 WHERE logical_id = ?1 AND superseded_at IS NOT NULL \
2816 ORDER BY superseded_at DESC, created_at DESC, rowid DESC LIMIT 1",
2817 [logical_id],
2818 |row| Ok((row.get(0)?, row.get(1)?)),
2819 )
2820 .optional()?;
2821 let (restored_node_row_id, restored_kind) = restored_node.ok_or_else(|| {
2822 EngineError::InvalidWrite(format!("logical_id '{logical_id}' is not retired"))
2823 })?;
2824
2825 tx.execute(
2826 "UPDATE nodes SET superseded_at = NULL WHERE row_id = ?1",
2827 [restored_node_row_id.as_str()],
2828 )?;
2829
2830 let retire_scope: Option<(i64, Option<String>, i64)> = tx
2831 .query_row(
2832 "SELECT rowid, source_ref, created_at FROM provenance_events \
2833 WHERE event_type = 'node_retire' AND subject = ?1 \
2834 ORDER BY created_at DESC, rowid DESC LIMIT 1",
2835 [logical_id],
2836 |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
2837 )
2838 .optional()?;
2839 let (restored_edge_rows, skipped_edges) = if let Some((
2840 retire_event_rowid,
2841 retire_source_ref,
2842 retire_created_at,
2843 )) = retire_scope
2844 {
2845 restore_validated_edges(
2846 &tx,
2847 logical_id,
2848 retire_source_ref.as_deref(),
2849 retire_created_at,
2850 retire_event_rowid,
2851 )?
2852 } else {
2853 (0, Vec::new())
2854 };
2855
2856 let restored_chunk_rows: usize = tx
2857 .query_row(
2858 "SELECT count(*) FROM chunks WHERE node_logical_id = ?1",
2859 [logical_id],
2860 |row| row.get::<_, i64>(0),
2861 )
2862 .map(i64_to_usize)?;
2863 tx.execute(
2864 "DELETE FROM fts_nodes WHERE node_logical_id = ?1",
2865 [logical_id],
2866 )?;
2867 let restored_fts_rows = tx.execute(
2868 "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
2869 SELECT id, node_logical_id, ?2, text_content \
2870 FROM chunks WHERE node_logical_id = ?1",
2871 rusqlite::params![logical_id, restored_kind],
2872 )?;
2873 let restored_vec_rows = count_vec_rows_for_logical_id(&tx, logical_id)?;
2874
2875 let table = fathomdb_schema::fts_kind_table_name(&restored_kind);
2878 let fts_table_exists: bool = tx
2879 .query_row(
2880 "SELECT count(*) FROM sqlite_master WHERE type='table' AND name = ?1 \
2881 AND sql LIKE 'CREATE VIRTUAL TABLE%'",
2882 rusqlite::params![table],
2883 |r| r.get::<_, i64>(0),
2884 )
2885 .unwrap_or(0)
2886 > 0;
2887 if fts_table_exists {
2888 tx.execute(
2889 &format!("DELETE FROM {table} WHERE node_logical_id = ?1"),
2890 [logical_id],
2891 )?;
2892 }
2893 let restored_property_fts_rows =
2894 rebuild_single_node_property_fts(&tx, logical_id, &restored_kind)?;
2895
2896 persist_simple_provenance_event(
2897 &tx,
2898 "restore_logical_id",
2899 logical_id,
2900 Some(serde_json::json!({
2901 "restored_node_rows": 1,
2902 "restored_edge_rows": restored_edge_rows,
2903 "restored_chunk_rows": restored_chunk_rows,
2904 "restored_fts_rows": restored_fts_rows,
2905 "restored_property_fts_rows": restored_property_fts_rows,
2906 "restored_vec_rows": restored_vec_rows,
2907 })),
2908 )?;
2909 tx.commit()?;
2910
2911 Ok(LogicalRestoreReport {
2912 logical_id: logical_id.to_owned(),
2913 was_noop: false,
2914 restored_node_rows: 1,
2915 restored_edge_rows,
2916 restored_chunk_rows,
2917 restored_fts_rows,
2918 restored_property_fts_rows,
2919 restored_vec_rows,
2920 skipped_edges,
2921 notes: Vec::new(),
2922 })
2923 }
2924
2925 pub fn purge_logical_id(&self, logical_id: &str) -> Result<LogicalPurgeReport, EngineError> {
2929 let mut conn = self.connect()?;
2930 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2931
2932 let active_count: i64 = tx.query_row(
2933 "SELECT count(*) FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
2934 [logical_id],
2935 |row| row.get(0),
2936 )?;
2937 if active_count > 0 {
2938 return Ok(LogicalPurgeReport {
2939 logical_id: logical_id.to_owned(),
2940 was_noop: true,
2941 deleted_node_rows: 0,
2942 deleted_edge_rows: 0,
2943 deleted_chunk_rows: 0,
2944 deleted_fts_rows: 0,
2945 deleted_vec_rows: 0,
2946 notes: vec!["logical_id is active; purge skipped".to_owned()],
2947 });
2948 }
2949
2950 let node_rows: i64 = tx.query_row(
2951 "SELECT count(*) FROM nodes WHERE logical_id = ?1",
2952 [logical_id],
2953 |row| row.get(0),
2954 )?;
2955 if node_rows == 0 {
2956 return Err(EngineError::InvalidWrite(format!(
2957 "logical_id '{logical_id}' does not exist"
2958 )));
2959 }
2960
2961 let deleted_vec_rows = delete_vec_rows_for_logical_id(&tx, logical_id)?;
2962 let deleted_fts_rows = tx.execute(
2963 "DELETE FROM fts_nodes WHERE node_logical_id = ?1",
2964 [logical_id],
2965 )?;
2966 let deleted_edge_rows = tx.execute(
2967 "DELETE FROM edges WHERE source_logical_id = ?1 OR target_logical_id = ?1",
2968 [logical_id],
2969 )?;
2970 let deleted_chunk_rows = tx.execute(
2971 "DELETE FROM chunks WHERE node_logical_id = ?1",
2972 [logical_id],
2973 )?;
2974 let deleted_node_rows =
2975 tx.execute("DELETE FROM nodes WHERE logical_id = ?1", [logical_id])?;
2976 tx.execute(
2977 "DELETE FROM node_access_metadata WHERE logical_id = ?1",
2978 [logical_id],
2979 )?;
2980
2981 persist_simple_provenance_event(
2982 &tx,
2983 "purge_logical_id",
2984 logical_id,
2985 Some(serde_json::json!({
2986 "deleted_node_rows": deleted_node_rows,
2987 "deleted_edge_rows": deleted_edge_rows,
2988 "deleted_chunk_rows": deleted_chunk_rows,
2989 "deleted_fts_rows": deleted_fts_rows,
2990 "deleted_vec_rows": deleted_vec_rows,
2991 })),
2992 )?;
2993 tx.commit()?;
2994
2995 Ok(LogicalPurgeReport {
2996 logical_id: logical_id.to_owned(),
2997 was_noop: false,
2998 deleted_node_rows,
2999 deleted_edge_rows,
3000 deleted_chunk_rows,
3001 deleted_fts_rows,
3002 deleted_vec_rows,
3003 notes: Vec::new(),
3004 })
3005 }
3006
3007 pub fn purge_provenance_events(
3017 &self,
3018 before_timestamp: i64,
3019 options: &ProvenancePurgeOptions,
3020 ) -> Result<ProvenancePurgeReport, EngineError> {
3021 let mut conn = self.connect()?;
3022 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
3023
3024 let preserved_types: Vec<&str> = if options.preserve_event_types.is_empty() {
3025 vec!["excise", "purge_logical_id"]
3026 } else {
3027 options
3028 .preserve_event_types
3029 .iter()
3030 .map(String::as_str)
3031 .collect()
3032 };
3033
3034 let placeholders: String = (0..preserved_types.len())
3036 .map(|i| format!("?{}", i + 2))
3037 .collect::<Vec<_>>()
3038 .join(", ");
3039 let count_query = format!(
3040 "SELECT count(*) FROM provenance_events \
3041 WHERE created_at < ?1 AND event_type NOT IN ({placeholders})"
3042 );
3043 let delete_query = format!(
3044 "DELETE FROM provenance_events WHERE rowid IN (\
3045 SELECT rowid FROM provenance_events \
3046 WHERE created_at < ?1 AND event_type NOT IN ({placeholders}) \
3047 LIMIT 10000)"
3048 );
3049
3050 let bind_params = |stmt: &mut rusqlite::Statement<'_>| -> Result<(), rusqlite::Error> {
3051 stmt.raw_bind_parameter(1, before_timestamp)?;
3052 for (i, event_type) in preserved_types.iter().enumerate() {
3053 stmt.raw_bind_parameter(i + 2, *event_type)?;
3054 }
3055 Ok(())
3056 };
3057
3058 let events_deleted = if options.dry_run {
3059 let mut stmt = tx.prepare(&count_query)?;
3060 bind_params(&mut stmt)?;
3061 stmt.raw_query()
3062 .next()?
3063 .map_or(0, |row| row.get::<_, u64>(0).unwrap_or(0))
3064 } else {
3065 let mut total_deleted: u64 = 0;
3066 loop {
3067 let mut stmt = tx.prepare(&delete_query)?;
3068 bind_params(&mut stmt)?;
3069 let deleted = stmt.raw_execute()?;
3070 if deleted == 0 {
3071 break;
3072 }
3073 total_deleted += deleted as u64;
3074 }
3075 total_deleted
3076 };
3077
3078 let total_after: u64 =
3079 tx.query_row("SELECT count(*) FROM provenance_events", [], |row| {
3080 row.get(0)
3081 })?;
3082
3083 let oldest_remaining: Option<i64> = tx
3084 .query_row("SELECT MIN(created_at) FROM provenance_events", [], |row| {
3085 row.get(0)
3086 })
3087 .optional()?
3088 .flatten();
3089
3090 if !options.dry_run {
3091 tx.commit()?;
3092 }
3093
3094 let events_preserved = if options.dry_run {
3097 total_after - events_deleted
3098 } else {
3099 total_after
3100 };
3101
3102 Ok(ProvenancePurgeReport {
3103 events_deleted,
3104 events_preserved,
3105 oldest_remaining,
3106 })
3107 }
3108
3109 #[allow(clippy::too_many_lines)]
3113 pub fn excise_source(&self, source_ref: &str) -> Result<TraceReport, EngineError> {
3114 let mut conn = self.connect()?;
3115
3116 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
3117 let affected_operational_collections = collect_strings_tx(
3118 &tx,
3119 "SELECT DISTINCT m.collection_name \
3120 FROM operational_mutations m \
3121 JOIN operational_collections c ON c.name = m.collection_name \
3122 WHERE m.source_ref = ?1 AND c.kind = 'latest_state' \
3123 ORDER BY m.collection_name",
3124 source_ref,
3125 )?;
3126
3127 let pairs: Vec<(String, String)> = {
3129 let mut stmt = tx.prepare(
3130 "SELECT row_id, logical_id FROM nodes \
3131 WHERE source_ref = ?1 AND superseded_at IS NULL",
3132 )?;
3133 stmt.query_map([source_ref], |row| {
3134 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
3135 })?
3136 .collect::<Result<Vec<_>, _>>()?
3137 };
3138 let affected_logical_ids: Vec<String> = pairs
3139 .iter()
3140 .map(|(_, logical_id)| logical_id.clone())
3141 .collect();
3142
3143 tx.execute(
3145 "UPDATE nodes SET superseded_at = unixepoch() \
3146 WHERE source_ref = ?1 AND superseded_at IS NULL",
3147 [source_ref],
3148 )?;
3149 tx.execute(
3150 "UPDATE edges SET superseded_at = unixepoch() \
3151 WHERE source_ref = ?1 AND superseded_at IS NULL",
3152 [source_ref],
3153 )?;
3154 tx.execute(
3155 "UPDATE actions SET superseded_at = unixepoch() \
3156 WHERE source_ref = ?1 AND superseded_at IS NULL",
3157 [source_ref],
3158 )?;
3159 clear_operational_current_rows(&tx, &affected_operational_collections)?;
3160 tx.execute(
3161 "DELETE FROM operational_mutations WHERE source_ref = ?1",
3162 [source_ref],
3163 )?;
3164 for logical_id in &affected_logical_ids {
3165 delete_vec_rows_for_logical_id(&tx, logical_id)?;
3166 tx.execute(
3167 "DELETE FROM chunks WHERE node_logical_id = ?1",
3168 [logical_id.as_str()],
3169 )?;
3170 }
3171
3172 for (excised_row_id, logical_id) in &pairs {
3174 let prior: Option<String> = tx
3175 .query_row(
3176 "SELECT row_id FROM nodes \
3177 WHERE logical_id = ?1 AND row_id != ?2 \
3178 ORDER BY created_at DESC LIMIT 1",
3179 [logical_id.as_str(), excised_row_id.as_str()],
3180 |row| row.get(0),
3181 )
3182 .optional()?;
3183 if let Some(prior_id) = prior {
3184 tx.execute(
3185 "UPDATE nodes SET superseded_at = NULL WHERE row_id = ?1",
3186 [prior_id.as_str()],
3187 )?;
3188 }
3189 }
3190
3191 for logical_id in &affected_logical_ids {
3192 let has_active_node = tx
3193 .query_row(
3194 "SELECT 1 FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL LIMIT 1",
3195 [logical_id.as_str()],
3196 |row| row.get::<_, i64>(0),
3197 )
3198 .optional()?
3199 .is_some();
3200 if !has_active_node {
3201 tx.execute(
3202 "DELETE FROM node_access_metadata WHERE logical_id = ?1",
3203 [logical_id.as_str()],
3204 )?;
3205 }
3206 }
3207
3208 rebuild_operational_current_rows(&tx, &affected_operational_collections)?;
3209
3210 tx.execute("DELETE FROM fts_nodes", [])?;
3213 tx.execute(
3214 r"
3215 INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content)
3216 SELECT c.id, n.logical_id, n.kind, c.text_content
3217 FROM chunks c
3218 JOIN nodes n
3219 ON n.logical_id = c.node_logical_id
3220 AND n.superseded_at IS NULL
3221 ",
3222 [],
3223 )?;
3224
3225 rebuild_property_fts_in_tx(&tx)?;
3227
3228 tx.execute(
3232 "INSERT INTO provenance_events (id, event_type, subject, source_ref) \
3233 VALUES (?1, 'excise_source', ?2, ?2)",
3234 rusqlite::params![new_id(), source_ref],
3235 )?;
3236
3237 tx.commit()?;
3238
3239 self.trace_source(source_ref)
3240 }
3241
3242 pub fn safe_export(
3246 &self,
3247 destination_path: impl AsRef<Path>,
3248 options: SafeExportOptions,
3249 ) -> Result<SafeExportManifest, EngineError> {
3250 let destination_path = destination_path.as_ref();
3251
3252 let conn = self.connect()?;
3256
3257 if options.force_checkpoint {
3258 trace_info!("safe_export: wal checkpoint started");
3259 let (busy, log, checkpointed): (i64, i64, i64) =
3260 conn.query_row("PRAGMA wal_checkpoint(FULL)", [], |row| {
3261 Ok((row.get(0)?, row.get(1)?, row.get(2)?))
3262 })?;
3263 if busy != 0 {
3264 trace_warn!(
3265 busy,
3266 log_frames = log,
3267 checkpointed_frames = checkpointed,
3268 "safe_export: wal checkpoint blocked by active readers"
3269 );
3270 return Err(EngineError::Bridge(format!(
3271 "WAL checkpoint blocked: {busy} active reader(s) prevented a full checkpoint; \
3272 log frames={log}, checkpointed={checkpointed}; \
3273 retry export when no readers are active"
3274 )));
3275 }
3276 trace_info!(
3277 log_frames = log,
3278 checkpointed_frames = checkpointed,
3279 "safe_export: wal checkpoint completed"
3280 );
3281 }
3282
3283 let schema_version: u32 = conn
3284 .query_row(
3285 "SELECT COALESCE(MAX(version), 0) FROM fathom_schema_migrations",
3286 [],
3287 |row| row.get(0),
3288 )
3289 .unwrap_or(0);
3290
3291 if let Some(parent) = destination_path.parent() {
3294 fs::create_dir_all(parent)?;
3295 }
3296 conn.backup(DatabaseName::Main, destination_path, None)?;
3297
3298 drop(conn);
3299
3300 let page_count: u64 = {
3304 let export_conn = rusqlite::Connection::open_with_flags(
3305 destination_path,
3306 rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY
3307 | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX,
3308 )?;
3309 export_conn.query_row("PRAGMA page_count", [], |row| row.get(0))?
3310 };
3311
3312 let sha256 = {
3315 let mut file = fs::File::open(destination_path)?;
3316 let mut hasher = Sha256::new();
3317 io::copy(&mut file, &mut hasher)?;
3318 format!("{:x}", hasher.finalize())
3319 };
3320
3321 let exported_at = SystemTime::now()
3323 .duration_since(SystemTime::UNIX_EPOCH)
3324 .map_err(|e| EngineError::Bridge(format!("system clock error: {e}")))?
3325 .as_secs();
3326
3327 let manifest = SafeExportManifest {
3328 exported_at,
3329 sha256,
3330 schema_version,
3331 protocol_version: EXPORT_PROTOCOL_VERSION,
3332 page_count,
3333 };
3334
3335 let manifest_path = {
3337 let mut p = destination_path.to_path_buf();
3338 let stem = p
3339 .file_name()
3340 .map(|n| format!("{}.export-manifest.json", n.to_string_lossy()))
3341 .ok_or_else(|| {
3342 EngineError::Bridge("destination path has no filename".to_owned())
3343 })?;
3344 p.set_file_name(stem);
3345 p
3346 };
3347 let manifest_json =
3348 serde_json::to_string(&manifest).map_err(|e| EngineError::Bridge(e.to_string()))?;
3349
3350 let manifest_tmp = manifest_path.with_extension("json.tmp");
3353 if let Err(e) = fs::write(&manifest_tmp, &manifest_json)
3354 .and_then(|()| fs::rename(&manifest_tmp, &manifest_path))
3355 {
3356 let _ = fs::remove_file(&manifest_tmp);
3357 return Err(e.into());
3358 }
3359
3360 Ok(manifest)
3361 }
3362}
3363
3364#[allow(dead_code)]
3365#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
3366struct VectorEmbeddingContractRecord {
3367 profile: String,
3368 table_name: String,
3369 model_identity: String,
3370 model_version: String,
3371 dimension: usize,
3372 normalization_policy: String,
3373 chunking_policy: String,
3374 preprocessing_policy: String,
3375 generator_command_json: String,
3376 applied_at: i64,
3377 snapshot_hash: String,
3378 contract_format_version: i64,
3379}
3380
3381#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
3382struct VectorRegenerationInputChunk {
3383 chunk_id: String,
3384 node_logical_id: String,
3385 kind: String,
3386 text_content: String,
3387 byte_start: Option<i64>,
3388 byte_end: Option<i64>,
3389 source_ref: Option<String>,
3390 created_at: i64,
3391}
3392
3393#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
3394struct VectorRegenerationInput {
3395 profile: String,
3396 table_name: String,
3397 model_identity: String,
3398 model_version: String,
3399 dimension: usize,
3400 normalization_policy: String,
3401 chunking_policy: String,
3402 preprocessing_policy: String,
3403 chunks: Vec<VectorRegenerationInputChunk>,
3404}
3405
3406#[derive(Clone, Copy, Debug, PartialEq, Eq)]
3407pub(crate) enum VectorRegenerationFailureClass {
3408 InvalidContract,
3409 EmbedderFailure,
3410 InvalidEmbedderOutput,
3411 SnapshotDrift,
3412 UnsupportedVecCapability,
3413}
3414
3415impl VectorRegenerationFailureClass {
3416 fn label(self) -> &'static str {
3417 match self {
3418 Self::InvalidContract => "invalid contract",
3419 Self::EmbedderFailure => "embedder failure",
3420 Self::InvalidEmbedderOutput => "invalid embedder output",
3421 Self::SnapshotDrift => "snapshot drift",
3422 Self::UnsupportedVecCapability => "unsupported vec capability",
3423 }
3424 }
3425
3426 fn retryable(self) -> bool {
3427 matches!(self, Self::SnapshotDrift)
3428 }
3429}
3430
3431#[derive(Clone, Debug, PartialEq, Eq)]
3432pub(crate) struct VectorRegenerationFailure {
3433 class: VectorRegenerationFailureClass,
3434 detail: String,
3435}
3436
3437impl VectorRegenerationFailure {
3438 pub(crate) fn new(class: VectorRegenerationFailureClass, detail: impl Into<String>) -> Self {
3439 Self {
3440 class,
3441 detail: detail.into(),
3442 }
3443 }
3444
3445 fn to_engine_error(&self) -> EngineError {
3446 let retry_suffix = if self.class.retryable() {
3447 " [retryable]"
3448 } else {
3449 ""
3450 };
3451 EngineError::Bridge(format!(
3452 "vector regeneration {}: {}{}",
3453 self.class.label(),
3454 self.detail,
3455 retry_suffix
3456 ))
3457 }
3458
3459 fn failure_class_label(&self) -> &'static str {
3460 self.class.label()
3461 }
3462}
3463
3464#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
3465struct VectorRegenerationAuditMetadata {
3466 profile: String,
3467 model_identity: String,
3468 model_version: String,
3469 chunk_count: usize,
3470 snapshot_hash: String,
3471 #[serde(skip_serializing_if = "Option::is_none")]
3472 failure_class: Option<String>,
3473}
3474
3475#[derive(Clone, Copy, Debug, PartialEq, Eq, Deserialize)]
3476#[serde(tag = "mode", rename_all = "snake_case")]
3477enum OperationalRetentionPolicy {
3478 KeepAll,
3479 PurgeBeforeSeconds { max_age_seconds: i64 },
3480 KeepLast { max_rows: usize },
3481}
3482
3483pub fn load_vector_regeneration_config(
3486 path: impl AsRef<Path>,
3487) -> Result<VectorRegenerationConfig, EngineError> {
3488 let path = path.as_ref();
3489 let raw = fs::read_to_string(path)?;
3490 match path.extension().and_then(|ext| ext.to_str()) {
3491 Some("toml") => {
3492 toml::from_str(&raw).map_err(|error| EngineError::Bridge(error.to_string()))
3493 }
3494 Some("json") | None => {
3495 serde_json::from_str(&raw).map_err(|error| EngineError::Bridge(error.to_string()))
3496 }
3497 Some(other) => Err(EngineError::Bridge(format!(
3498 "unsupported vector regeneration config extension: {other}"
3499 ))),
3500 }
3501}
3502
3503fn validate_vector_regeneration_config(
3504 conn: &rusqlite::Connection,
3505 config: &VectorRegenerationConfig,
3506 identity: &QueryEmbedderIdentity,
3507) -> Result<VectorRegenerationConfig, VectorRegenerationFailure> {
3508 let kind = validate_bounded_text("kind", &config.kind, MAX_PROFILE_LEN)?;
3509 let profile = validate_bounded_text("profile", &config.profile, MAX_PROFILE_LEN)?;
3510 if identity.dimension == 0 {
3511 return Err(VectorRegenerationFailure::new(
3512 VectorRegenerationFailureClass::InvalidContract,
3513 "embedder reports dimension 0".to_owned(),
3514 ));
3515 }
3516 let chunking_policy =
3517 validate_bounded_text("chunking_policy", &config.chunking_policy, MAX_POLICY_LEN)?;
3518 let preprocessing_policy = validate_bounded_text(
3519 "preprocessing_policy",
3520 &config.preprocessing_policy,
3521 MAX_POLICY_LEN,
3522 )?;
3523
3524 if let Some(existing_dimension) = current_vector_profile_dimension(conn, &profile)?
3525 && existing_dimension != identity.dimension
3526 {
3527 return Err(VectorRegenerationFailure::new(
3528 VectorRegenerationFailureClass::InvalidContract,
3529 format!(
3530 "embedder dimension {} does not match existing vector profile dimension {}",
3531 identity.dimension, existing_dimension
3532 ),
3533 ));
3534 }
3535
3536 validate_existing_contract_version(conn, &profile)?;
3537
3538 let normalized = VectorRegenerationConfig {
3539 kind,
3540 profile,
3541 chunking_policy,
3542 preprocessing_policy,
3543 };
3544 let serialized = serde_json::to_vec(&normalized).map_err(|error| {
3545 VectorRegenerationFailure::new(
3546 VectorRegenerationFailureClass::InvalidContract,
3547 error.to_string(),
3548 )
3549 })?;
3550 if serialized.len() > MAX_CONTRACT_JSON_BYTES {
3551 return Err(VectorRegenerationFailure::new(
3552 VectorRegenerationFailureClass::InvalidContract,
3553 format!("serialized contract exceeds {MAX_CONTRACT_JSON_BYTES} bytes"),
3554 ));
3555 }
3556
3557 Ok(normalized)
3558}
3559
3560#[allow(clippy::cast_possible_wrap)]
3561fn persist_vector_contract(
3562 conn: &rusqlite::Connection,
3563 config: &VectorRegenerationConfig,
3564 table_name: &str,
3565 identity: &QueryEmbedderIdentity,
3566 snapshot_hash: &str,
3567) -> Result<(), EngineError> {
3568 conn.execute(
3569 r"
3570 INSERT OR REPLACE INTO vector_embedding_contracts (
3571 profile,
3572 table_name,
3573 model_identity,
3574 model_version,
3575 dimension,
3576 normalization_policy,
3577 chunking_policy,
3578 preprocessing_policy,
3579 generator_command_json,
3580 applied_at,
3581 snapshot_hash,
3582 contract_format_version,
3583 updated_at
3584 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, unixepoch(), ?10, ?11, unixepoch())
3585 ",
3586 rusqlite::params![
3587 config.profile.as_str(),
3588 table_name,
3589 identity.model_identity.as_str(),
3590 identity.model_version.as_str(),
3591 identity.dimension as i64,
3592 identity.normalization_policy.as_str(),
3593 config.chunking_policy.as_str(),
3594 config.preprocessing_policy.as_str(),
3595 "[]",
3596 snapshot_hash,
3597 CURRENT_VECTOR_CONTRACT_FORMAT_VERSION,
3598 ],
3599 )?;
3600 Ok(())
3601}
3602
3603fn persist_vector_regeneration_event(
3604 conn: &rusqlite::Connection,
3605 event_type: &str,
3606 subject: &str,
3607 metadata: &VectorRegenerationAuditMetadata,
3608) -> Result<(), EngineError> {
3609 let metadata_json = serialize_audit_metadata(metadata)?;
3610 conn.execute(
3611 "INSERT INTO provenance_events (id, event_type, subject, metadata_json) VALUES (?1, ?2, ?3, ?4)",
3612 rusqlite::params![new_id(), event_type, subject, metadata_json],
3613 )?;
3614 Ok(())
3615}
3616
3617fn persist_simple_provenance_event(
3618 conn: &rusqlite::Connection,
3619 event_type: &str,
3620 subject: &str,
3621 metadata: Option<serde_json::Value>,
3622) -> Result<(), EngineError> {
3623 let metadata_json = metadata.map(|value| value.to_string()).unwrap_or_default();
3624 conn.execute(
3625 "INSERT INTO provenance_events (id, event_type, subject, metadata_json) VALUES (?1, ?2, ?3, ?4)",
3626 rusqlite::params![new_id(), event_type, subject, metadata_json],
3627 )?;
3628 Ok(())
3629}
3630
3631fn count_per_kind_property_fts_issues(
3639 conn: &rusqlite::Connection,
3640) -> Result<(i64, i64, i64, i64), EngineError> {
3641 let per_kind_tables: Vec<String> = {
3645 let mut stmt = conn.prepare(
3646 "SELECT name FROM sqlite_master \
3647 WHERE type='table' AND name LIKE 'fts_props_%' \
3648 AND sql LIKE 'CREATE VIRTUAL TABLE%'",
3649 )?;
3650 stmt.query_map([], |r| r.get::<_, String>(0))?
3651 .collect::<Result<Vec<_>, _>>()?
3652 };
3653
3654 let registered_kinds: std::collections::HashSet<String> = {
3655 let mut stmt = conn.prepare("SELECT kind FROM fts_property_schemas")?;
3656 stmt.query_map([], |r| r.get::<_, String>(0))?
3657 .collect::<Result<std::collections::HashSet<_>, _>>()?
3658 };
3659
3660 let mut stale = 0i64;
3661 let mut orphaned = 0i64;
3662 let mut duplicate = 0i64;
3663
3664 for table in &per_kind_tables {
3665 let kind_stale: i64 = conn.query_row(
3667 &format!(
3668 "SELECT count(*) FROM {table} fp \
3669 WHERE NOT EXISTS (\
3670 SELECT 1 FROM nodes n \
3671 WHERE n.logical_id = fp.node_logical_id AND n.superseded_at IS NULL\
3672 )"
3673 ),
3674 [],
3675 |r| r.get(0),
3676 )?;
3677 stale += kind_stale;
3678
3679 let kind_dup: i64 = conn.query_row(
3681 &format!(
3682 "SELECT count(*) FROM (\
3683 SELECT node_logical_id FROM {table} \
3684 GROUP BY node_logical_id HAVING count(*) > 1\
3685 )"
3686 ),
3687 [],
3688 |r| r.get(0),
3689 )?;
3690 duplicate += kind_dup;
3691
3692 let table_has_schema = registered_kinds
3695 .iter()
3696 .any(|k| fathomdb_schema::fts_kind_table_name(k) == *table);
3697 if !table_has_schema {
3698 let table_rows: i64 =
3699 conn.query_row(&format!("SELECT count(*) FROM {table}"), [], |r| r.get(0))?;
3700 orphaned += table_rows;
3701 }
3702 }
3703
3704 Ok((stale, orphaned, 0, duplicate))
3706}
3707
3708fn count_missing_property_fts_rows(conn: &rusqlite::Connection) -> Result<i64, EngineError> {
3712 let schemas = crate::writer::load_fts_property_schemas(conn)?;
3713 if schemas.is_empty() {
3714 return Ok(0);
3715 }
3716
3717 let mut missing = 0i64;
3718 for (kind, schema) in &schemas {
3719 let table = fathomdb_schema::fts_kind_table_name(kind);
3720 let table_exists: bool = conn
3722 .query_row(
3723 "SELECT count(*) FROM sqlite_master WHERE type='table' AND name = ?1",
3724 [table.as_str()],
3725 |r| r.get::<_, i64>(0),
3726 )
3727 .unwrap_or(0)
3728 > 0;
3729
3730 if table_exists {
3731 let mut stmt = conn.prepare(&format!(
3732 "SELECT n.logical_id, n.properties FROM nodes n \
3733 WHERE n.kind = ?1 AND n.superseded_at IS NULL \
3734 AND NOT EXISTS (SELECT 1 FROM {table} fp WHERE fp.node_logical_id = n.logical_id)"
3735 ))?;
3736 let rows = stmt.query_map([kind.as_str()], |row| {
3737 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
3738 })?;
3739 for row in rows {
3740 let (_logical_id, properties_str) = row?;
3741 let props: serde_json::Value =
3742 serde_json::from_str(&properties_str).unwrap_or_default();
3743 if crate::writer::extract_property_fts(&props, schema)
3744 .0
3745 .is_some()
3746 {
3747 missing += 1;
3748 }
3749 }
3750 } else {
3751 let mut stmt = conn.prepare(
3753 "SELECT n.logical_id, n.properties FROM nodes n \
3754 WHERE n.kind = ?1 AND n.superseded_at IS NULL",
3755 )?;
3756 let rows = stmt.query_map([kind.as_str()], |row| {
3757 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
3758 })?;
3759 for row in rows {
3760 let (_logical_id, properties_str) = row?;
3761 let props: serde_json::Value =
3762 serde_json::from_str(&properties_str).unwrap_or_default();
3763 if crate::writer::extract_property_fts(&props, schema)
3764 .0
3765 .is_some()
3766 {
3767 missing += 1;
3768 }
3769 }
3770 }
3771 }
3772 Ok(missing)
3773}
3774
3775fn count_drifted_property_fts_rows(conn: &rusqlite::Connection) -> Result<i64, EngineError> {
3780 let schemas = crate::writer::load_fts_property_schemas(conn)?;
3781 if schemas.is_empty() {
3782 return Ok(0);
3783 }
3784
3785 let mut drifted = 0i64;
3786 for (kind, schema) in &schemas {
3787 let table = fathomdb_schema::fts_kind_table_name(kind);
3788 let table_exists: bool = conn
3790 .query_row(
3791 "SELECT count(*) FROM sqlite_master WHERE type='table' AND name = ?1",
3792 [table.as_str()],
3793 |r| r.get::<_, i64>(0),
3794 )
3795 .unwrap_or(0)
3796 > 0;
3797 if !table_exists {
3798 continue;
3799 }
3800 let mut stmt = conn.prepare(&format!(
3801 "SELECT fp.node_logical_id, fp.text_content, n.properties \
3802 FROM {table} fp \
3803 JOIN nodes n ON n.logical_id = fp.node_logical_id AND n.superseded_at IS NULL \
3804 WHERE n.kind = ?1"
3805 ))?;
3806 let rows = stmt.query_map([kind.as_str()], |row| {
3807 Ok((
3808 row.get::<_, String>(0)?,
3809 row.get::<_, String>(1)?,
3810 row.get::<_, String>(2)?,
3811 ))
3812 })?;
3813 for row in rows {
3814 let (_logical_id, stored_text, properties_str) = row?;
3815 let props: serde_json::Value =
3816 serde_json::from_str(&properties_str).unwrap_or_default();
3817 let (expected, _positions, _stats) =
3818 crate::writer::extract_property_fts(&props, schema);
3819 match expected {
3820 Some(text) if text == stored_text => {}
3821 _ => drifted += 1,
3822 }
3823 }
3824 }
3825 Ok(drifted)
3826}
3827
3828fn rebuild_property_fts_in_tx(conn: &rusqlite::Connection) -> Result<usize, EngineError> {
3830 let all_per_kind_tables: Vec<String> = {
3833 let mut stmt = conn.prepare(
3834 "SELECT name FROM sqlite_master WHERE type='table' AND name LIKE 'fts_props_%' \
3835 AND sql LIKE 'CREATE VIRTUAL TABLE%'",
3836 )?;
3837 stmt.query_map([], |r| r.get::<_, String>(0))?
3838 .collect::<Result<Vec<_>, _>>()?
3839 };
3840 for table in &all_per_kind_tables {
3841 conn.execute_batch(&format!("DELETE FROM {table}"))?;
3842 }
3843 conn.execute("DELETE FROM fts_node_property_positions", [])?;
3844 let inserted = crate::projection::insert_property_fts_rows(
3845 conn,
3846 "SELECT logical_id, properties FROM nodes WHERE kind = ?1 AND superseded_at IS NULL",
3847 )?;
3848 Ok(inserted)
3849}
3850
3851fn rebuild_single_node_property_fts(
3854 conn: &rusqlite::Connection,
3855 logical_id: &str,
3856 kind: &str,
3857) -> Result<usize, EngineError> {
3858 let schema: Option<(String, String)> = conn
3859 .query_row(
3860 "SELECT property_paths_json, separator FROM fts_property_schemas WHERE kind = ?1",
3861 [kind],
3862 |row| {
3863 let paths_json: String = row.get(0)?;
3864 let separator: String = row.get(1)?;
3865 Ok((paths_json, separator))
3866 },
3867 )
3868 .optional()?;
3869 let Some((paths_json, separator)) = schema else {
3870 return Ok(0);
3871 };
3872 let parsed = crate::writer::parse_property_schema_json(&paths_json, &separator);
3873 let properties_str: Option<String> = conn
3874 .query_row(
3875 "SELECT properties FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
3876 [logical_id],
3877 |row| row.get(0),
3878 )
3879 .optional()?;
3880 let Some(properties_str) = properties_str else {
3881 return Ok(0);
3882 };
3883 let props: serde_json::Value = serde_json::from_str(&properties_str).unwrap_or_default();
3884 let (text, positions, _stats) = crate::writer::extract_property_fts(&props, &parsed);
3885 let Some(text) = text else {
3886 return Ok(0);
3887 };
3888 conn.execute(
3889 "DELETE FROM fts_node_property_positions WHERE node_logical_id = ?1",
3890 rusqlite::params![logical_id],
3891 )?;
3892 let table = fathomdb_schema::fts_kind_table_name(kind);
3893 let tok = fathomdb_schema::DEFAULT_FTS_TOKENIZER;
3894 conn.execute_batch(&format!(
3895 "CREATE VIRTUAL TABLE IF NOT EXISTS {table} \
3896 USING fts5(node_logical_id UNINDEXED, text_content, tokenize = '{tok}')"
3897 ))?;
3898 conn.execute(
3899 &format!("INSERT INTO {table} (node_logical_id, text_content) VALUES (?1, ?2)"),
3900 rusqlite::params![logical_id, text],
3901 )?;
3902 for pos in &positions {
3903 conn.execute(
3904 "INSERT INTO fts_node_property_positions \
3905 (node_logical_id, kind, start_offset, end_offset, leaf_path) \
3906 VALUES (?1, ?2, ?3, ?4, ?5)",
3907 rusqlite::params![
3908 logical_id,
3909 kind,
3910 i64::try_from(pos.start_offset).unwrap_or(i64::MAX),
3911 i64::try_from(pos.end_offset).unwrap_or(i64::MAX),
3912 pos.leaf_path,
3913 ],
3914 )?;
3915 }
3916 Ok(1)
3917}
3918
3919fn serialize_property_paths_json(
3920 entries: &[FtsPropertyPathSpec],
3921 exclude_paths: &[String],
3922) -> Result<String, EngineError> {
3923 let all_scalar = entries
3927 .iter()
3928 .all(|e| e.mode == FtsPropertyPathMode::Scalar);
3929 let any_weight = entries.iter().any(|e| e.weight.is_some());
3930 if all_scalar && exclude_paths.is_empty() && !any_weight {
3931 let paths: Vec<&str> = entries.iter().map(|e| e.path.as_str()).collect();
3932 return serde_json::to_string(&paths).map_err(|e| {
3933 EngineError::InvalidWrite(format!("failed to serialize property paths: {e}"))
3934 });
3935 }
3936
3937 let mut obj = serde_json::Map::new();
3938 let paths_json: Vec<serde_json::Value> = entries
3939 .iter()
3940 .map(|e| {
3941 let mode_str = match e.mode {
3942 FtsPropertyPathMode::Scalar => "scalar",
3943 FtsPropertyPathMode::Recursive => "recursive",
3944 };
3945 let mut entry = serde_json::json!({ "path": e.path, "mode": mode_str });
3946 if let Some(w) = e.weight {
3947 entry["weight"] = serde_json::json!(w);
3948 }
3949 entry
3950 })
3951 .collect();
3952 obj.insert("paths".to_owned(), serde_json::Value::Array(paths_json));
3953 if !exclude_paths.is_empty() {
3954 obj.insert("exclude_paths".to_owned(), serde_json::json!(exclude_paths));
3955 }
3956 serde_json::to_string(&serde_json::Value::Object(obj))
3957 .map_err(|e| EngineError::InvalidWrite(format!("failed to serialize property paths: {e}")))
3958}
3959
3960fn create_or_replace_fts_kind_table(
3966 conn: &rusqlite::Connection,
3967 kind: &str,
3968 specs: &[FtsPropertyPathSpec],
3969 tokenizer: &str,
3970) -> Result<(), EngineError> {
3971 let table = fathomdb_schema::fts_kind_table_name(kind);
3972
3973 if !tokenizer
3978 .chars()
3979 .all(|c| c.is_alphanumeric() || "'._-$@ ".contains(c))
3980 {
3981 return Err(EngineError::Bridge(format!(
3982 "invalid tokenizer string: {tokenizer:?}"
3983 )));
3984 }
3985
3986 let cols: Vec<String> = if specs.is_empty() {
3987 vec![
3988 "node_logical_id UNINDEXED".to_owned(),
3989 "text_content".to_owned(),
3990 ]
3991 } else {
3992 std::iter::once("node_logical_id UNINDEXED".to_owned())
3993 .chain(specs.iter().map(|s| {
3994 let is_recursive = matches!(s.mode, FtsPropertyPathMode::Recursive);
3995 fathomdb_schema::fts_column_name(&s.path, is_recursive)
3996 }))
3997 .collect()
3998 };
3999
4000 let tokenizer_sql = tokenizer.replace('\'', "''");
4003 conn.execute_batch(&format!(
4004 "DROP TABLE IF EXISTS {table}; \
4005 CREATE VIRTUAL TABLE {table} USING fts5({cols}, tokenize='{tokenizer_sql}');",
4006 cols = cols.join(", "),
4007 ))?;
4008
4009 Ok(())
4010}
4011
4012fn validate_fts_property_paths(paths: &[String]) -> Result<(), EngineError> {
4013 if paths.is_empty() {
4014 return Err(EngineError::InvalidWrite(
4015 "FTS property paths must not be empty".to_owned(),
4016 ));
4017 }
4018 let mut seen = std::collections::HashSet::new();
4019 for path in paths {
4020 if !path.starts_with("$.") {
4021 return Err(EngineError::InvalidWrite(format!(
4022 "FTS property path must start with '$.' but got: {path}"
4023 )));
4024 }
4025 let after_prefix = &path[2..]; let segments: Vec<&str> = after_prefix.split('.').collect();
4027 if segments.is_empty() || segments.iter().any(|s| s.is_empty()) {
4028 return Err(EngineError::InvalidWrite(format!(
4029 "FTS property path has empty segment(s): {path}"
4030 )));
4031 }
4032 for seg in &segments {
4033 if !seg.chars().all(|c| c.is_alphanumeric() || c == '_') {
4034 return Err(EngineError::InvalidWrite(format!(
4035 "FTS property path segment contains invalid characters: {path}"
4036 )));
4037 }
4038 }
4039 if !seen.insert(path) {
4040 return Err(EngineError::InvalidWrite(format!(
4041 "duplicate FTS property path: {path}"
4042 )));
4043 }
4044 }
4045 Ok(())
4046}
4047
4048fn load_fts_property_schema_record(
4049 conn: &rusqlite::Connection,
4050 kind: &str,
4051) -> Result<Option<FtsPropertySchemaRecord>, EngineError> {
4052 let row = conn
4053 .query_row(
4054 "SELECT kind, property_paths_json, separator, format_version \
4055 FROM fts_property_schemas WHERE kind = ?1",
4056 [kind],
4057 |row| {
4058 let kind: String = row.get(0)?;
4059 let paths_json: String = row.get(1)?;
4060 let separator: String = row.get(2)?;
4061 let format_version: i64 = row.get(3)?;
4062 Ok(build_fts_property_schema_record(
4063 kind,
4064 &paths_json,
4065 separator,
4066 format_version,
4067 ))
4068 },
4069 )
4070 .optional()?;
4071 Ok(row)
4072}
4073
4074fn build_fts_property_schema_record(
4080 kind: String,
4081 paths_json: &str,
4082 separator: String,
4083 format_version: i64,
4084) -> FtsPropertySchemaRecord {
4085 let schema = crate::writer::parse_property_schema_json(paths_json, &separator);
4086 let entries: Vec<FtsPropertyPathSpec> = schema
4087 .paths
4088 .into_iter()
4089 .map(|entry| FtsPropertyPathSpec {
4090 path: entry.path,
4091 mode: match entry.mode {
4092 crate::writer::PropertyPathMode::Scalar => FtsPropertyPathMode::Scalar,
4093 crate::writer::PropertyPathMode::Recursive => FtsPropertyPathMode::Recursive,
4094 },
4095 weight: entry.weight,
4096 })
4097 .collect();
4098 let property_paths: Vec<String> = entries.iter().map(|e| e.path.clone()).collect();
4099 FtsPropertySchemaRecord {
4100 kind,
4101 property_paths,
4102 entries,
4103 exclude_paths: schema.exclude_paths,
4104 separator,
4105 format_version,
4106 }
4107}
4108
4109fn build_regeneration_input(
4110 config: &VectorRegenerationConfig,
4111 identity: &QueryEmbedderIdentity,
4112 chunks: Vec<VectorRegenerationInputChunk>,
4113) -> VectorRegenerationInput {
4114 VectorRegenerationInput {
4115 profile: config.profile.clone(),
4116 table_name: fathomdb_schema::vec_kind_table_name(&config.kind),
4117 model_identity: identity.model_identity.clone(),
4118 model_version: identity.model_version.clone(),
4119 dimension: identity.dimension,
4120 normalization_policy: identity.normalization_policy.clone(),
4121 chunking_policy: config.chunking_policy.clone(),
4122 preprocessing_policy: config.preprocessing_policy.clone(),
4123 chunks,
4124 }
4125}
4126
4127fn compute_snapshot_hash(payload: &VectorRegenerationInput) -> Result<String, EngineError> {
4128 let bytes =
4129 serde_json::to_vec(payload).map_err(|error| EngineError::Bridge(error.to_string()))?;
4130 let mut hasher = Sha256::new();
4131 hasher.update(bytes);
4132 Ok(format!("{:x}", hasher.finalize()))
4133}
4134
4135fn collect_regeneration_chunks(
4136 conn: &rusqlite::Connection,
4137) -> Result<Vec<VectorRegenerationInputChunk>, EngineError> {
4138 let mut stmt = conn.prepare(
4139 r"
4140 SELECT c.id, c.node_logical_id, n.kind, c.text_content, c.byte_start, c.byte_end, n.source_ref, c.created_at
4141 FROM chunks c
4142 JOIN nodes n
4143 ON n.logical_id = c.node_logical_id
4144 AND n.superseded_at IS NULL
4145 ORDER BY c.created_at, c.id
4146 ",
4147 )?;
4148 let chunks = stmt
4149 .query_map([], |row| {
4150 Ok(VectorRegenerationInputChunk {
4151 chunk_id: row.get(0)?,
4152 node_logical_id: row.get(1)?,
4153 kind: row.get(2)?,
4154 text_content: row.get(3)?,
4155 byte_start: row.get(4)?,
4156 byte_end: row.get(5)?,
4157 source_ref: row.get(6)?,
4158 created_at: row.get(7)?,
4159 })
4160 })?
4161 .collect::<Result<Vec<_>, _>>()?;
4162 Ok(chunks)
4163}
4164
4165fn validate_bounded_text(
4166 field: &str,
4167 value: &str,
4168 max_len: usize,
4169) -> Result<String, VectorRegenerationFailure> {
4170 let trimmed = value.trim();
4171 if trimmed.is_empty() {
4172 return Err(VectorRegenerationFailure::new(
4173 VectorRegenerationFailureClass::InvalidContract,
4174 format!("{field} must not be empty"),
4175 ));
4176 }
4177 if trimmed.len() > max_len {
4178 return Err(VectorRegenerationFailure::new(
4179 VectorRegenerationFailureClass::InvalidContract,
4180 format!("{field} exceeds max length {max_len}"),
4181 ));
4182 }
4183 Ok(trimmed.to_owned())
4184}
4185
4186fn current_vector_profile_dimension(
4187 conn: &rusqlite::Connection,
4188 profile: &str,
4189) -> Result<Option<usize>, VectorRegenerationFailure> {
4190 let dimension: Option<i64> = conn
4191 .query_row(
4192 "SELECT dimension FROM vector_profiles WHERE profile = ?1 AND enabled = 1",
4193 [profile],
4194 |row| row.get(0),
4195 )
4196 .optional()
4197 .map_err(|error| {
4198 VectorRegenerationFailure::new(
4199 VectorRegenerationFailureClass::InvalidContract,
4200 error.to_string(),
4201 )
4202 })?;
4203 dimension
4204 .map(|value| {
4205 usize::try_from(value).map_err(|_| {
4206 VectorRegenerationFailure::new(
4207 VectorRegenerationFailureClass::InvalidContract,
4208 format!("stored vector profile dimension is invalid: {value}"),
4209 )
4210 })
4211 })
4212 .transpose()
4213}
4214
4215fn validate_existing_contract_version(
4216 conn: &rusqlite::Connection,
4217 profile: &str,
4218) -> Result<(), VectorRegenerationFailure> {
4219 let version: Option<i64> = conn
4220 .query_row(
4221 "SELECT contract_format_version FROM vector_embedding_contracts WHERE profile = ?1",
4222 [profile],
4223 |row| row.get(0),
4224 )
4225 .optional()
4226 .map_err(|error| {
4227 VectorRegenerationFailure::new(
4228 VectorRegenerationFailureClass::InvalidContract,
4229 error.to_string(),
4230 )
4231 })?;
4232 if let Some(version) = version
4233 && version > CURRENT_VECTOR_CONTRACT_FORMAT_VERSION
4234 {
4235 return Err(VectorRegenerationFailure::new(
4236 VectorRegenerationFailureClass::InvalidContract,
4237 format!(
4238 "persisted contract format version {version} is unsupported; supported version is {CURRENT_VECTOR_CONTRACT_FORMAT_VERSION}"
4239 ),
4240 ));
4241 }
4242 Ok(())
4243}
4244
4245fn serialize_audit_metadata(
4246 metadata: &VectorRegenerationAuditMetadata,
4247) -> Result<String, EngineError> {
4248 let json =
4249 serde_json::to_string(metadata).map_err(|error| EngineError::Bridge(error.to_string()))?;
4250 if json.len() > MAX_AUDIT_METADATA_BYTES {
4251 return Err(VectorRegenerationFailure::new(
4252 VectorRegenerationFailureClass::InvalidContract,
4253 format!("audit metadata exceeds {MAX_AUDIT_METADATA_BYTES} bytes"),
4254 )
4255 .to_engine_error());
4256 }
4257 Ok(json)
4258}
4259
4260fn count_source_ref(
4261 conn: &rusqlite::Connection,
4262 table: &str,
4263 source_ref: &str,
4264) -> Result<usize, EngineError> {
4265 let sql = match table {
4266 "nodes" => "SELECT count(*) FROM nodes WHERE source_ref = ?1",
4267 "edges" => "SELECT count(*) FROM edges WHERE source_ref = ?1",
4268 "actions" => "SELECT count(*) FROM actions WHERE source_ref = ?1",
4269 "operational_mutations" => {
4270 "SELECT count(*) FROM operational_mutations WHERE source_ref = ?1"
4271 }
4272 other => return Err(EngineError::Bridge(format!("unknown table: {other}"))),
4273 };
4274 let count: i64 = conn.query_row(sql, [source_ref], |row| row.get(0))?;
4275 usize::try_from(count)
4278 .map_err(|_| EngineError::Bridge(format!("count overflow for table {table}: {count}")))
4279}
4280
4281fn rebuild_operational_current_rows(
4282 tx: &rusqlite::Transaction<'_>,
4283 collections: &[String],
4284) -> Result<usize, EngineError> {
4285 let mut rebuilt_rows = 0usize;
4286 clear_operational_current_rows(tx, collections)?;
4287 let mut ins_current = tx.prepare_cached(
4288 "INSERT INTO operational_current \
4289 (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
4290 VALUES (?1, ?2, ?3, ?4, ?5)",
4291 )?;
4292
4293 for collection in collections {
4294 let mut stmt = tx.prepare(
4295 "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
4296 FROM operational_mutations \
4297 WHERE collection_name = ?1 \
4298 ORDER BY record_key, mutation_order",
4299 )?;
4300 let mut latest_by_key: std::collections::HashMap<String, Option<(String, i64, String)>> =
4301 std::collections::HashMap::new();
4302 let rows = stmt.query_map([collection], map_operational_mutation_row)?;
4303 for row in rows {
4304 let mutation = row?;
4305 match mutation.op_kind.as_str() {
4306 "put" => {
4307 latest_by_key.insert(
4308 mutation.record_key,
4309 Some((mutation.payload_json, mutation.created_at, mutation.id)),
4310 );
4311 }
4312 "delete" => {
4313 latest_by_key.insert(mutation.record_key, None);
4314 }
4315 _ => {}
4316 }
4317 }
4318
4319 for (record_key, state) in latest_by_key {
4320 if let Some((payload_json, updated_at, last_mutation_id)) = state {
4321 ins_current.execute(rusqlite::params![
4322 collection,
4323 record_key,
4324 payload_json,
4325 updated_at,
4326 last_mutation_id,
4327 ])?;
4328 rebuilt_rows += 1;
4329 }
4330 }
4331 }
4332
4333 drop(ins_current);
4334 Ok(rebuilt_rows)
4335}
4336
4337fn clear_operational_current_rows(
4338 tx: &rusqlite::Transaction<'_>,
4339 collections: &[String],
4340) -> Result<(), EngineError> {
4341 let mut delete_current =
4342 tx.prepare_cached("DELETE FROM operational_current WHERE collection_name = ?1")?;
4343 let mut delete_secondary_current = tx.prepare_cached(
4344 "DELETE FROM operational_secondary_index_entries \
4345 WHERE collection_name = ?1 AND subject_kind = 'current'",
4346 )?;
4347 for collection in collections {
4348 delete_secondary_current.execute([collection])?;
4349 delete_current.execute([collection])?;
4350 }
4351 drop(delete_secondary_current);
4352 drop(delete_current);
4353 Ok(())
4354}
4355
4356fn clear_operational_secondary_index_entries(
4357 tx: &rusqlite::Transaction<'_>,
4358 collection_name: &str,
4359) -> Result<(), EngineError> {
4360 tx.execute(
4361 "DELETE FROM operational_secondary_index_entries WHERE collection_name = ?1",
4362 [collection_name],
4363 )?;
4364 Ok(())
4365}
4366
4367fn insert_operational_secondary_index_entry(
4368 tx: &rusqlite::Transaction<'_>,
4369 collection_name: &str,
4370 subject_kind: &str,
4371 mutation_id: &str,
4372 record_key: &str,
4373 entry: &crate::operational::OperationalSecondaryIndexEntry,
4374) -> Result<(), EngineError> {
4375 tx.execute(
4376 "INSERT INTO operational_secondary_index_entries \
4377 (collection_name, index_name, subject_kind, mutation_id, record_key, sort_timestamp, \
4378 slot1_text, slot1_integer, slot2_text, slot2_integer, slot3_text, slot3_integer) \
4379 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
4380 rusqlite::params![
4381 collection_name,
4382 entry.index_name,
4383 subject_kind,
4384 mutation_id,
4385 record_key,
4386 entry.sort_timestamp,
4387 entry.slot1_text,
4388 entry.slot1_integer,
4389 entry.slot2_text,
4390 entry.slot2_integer,
4391 entry.slot3_text,
4392 entry.slot3_integer,
4393 ],
4394 )?;
4395 Ok(())
4396}
4397
4398fn rebuild_operational_secondary_index_entries(
4399 tx: &rusqlite::Transaction<'_>,
4400 collection_name: &str,
4401 collection_kind: OperationalCollectionKind,
4402 indexes: &[OperationalSecondaryIndexDefinition],
4403) -> Result<(usize, usize), EngineError> {
4404 clear_operational_secondary_index_entries(tx, collection_name)?;
4405
4406 let mut mutation_entries_rebuilt = 0usize;
4407 if collection_kind == OperationalCollectionKind::AppendOnlyLog {
4408 let mut stmt = tx.prepare(
4409 "SELECT id, record_key, payload_json FROM operational_mutations \
4410 WHERE collection_name = ?1 ORDER BY mutation_order",
4411 )?;
4412 let rows = stmt
4413 .query_map([collection_name], |row| {
4414 Ok((
4415 row.get::<_, String>(0)?,
4416 row.get::<_, String>(1)?,
4417 row.get::<_, String>(2)?,
4418 ))
4419 })?
4420 .collect::<Result<Vec<_>, _>>()?;
4421 drop(stmt);
4422 for (mutation_id, record_key, payload_json) in rows {
4423 for entry in extract_secondary_index_entries_for_mutation(indexes, &payload_json) {
4424 insert_operational_secondary_index_entry(
4425 tx,
4426 collection_name,
4427 "mutation",
4428 &mutation_id,
4429 &record_key,
4430 &entry,
4431 )?;
4432 mutation_entries_rebuilt += 1;
4433 }
4434 }
4435 }
4436
4437 let mut current_entries_rebuilt = 0usize;
4438 if collection_kind == OperationalCollectionKind::LatestState {
4439 let mut stmt = tx.prepare(
4440 "SELECT record_key, payload_json, updated_at, last_mutation_id FROM operational_current \
4441 WHERE collection_name = ?1 ORDER BY updated_at DESC, record_key",
4442 )?;
4443 let rows = stmt
4444 .query_map([collection_name], |row| {
4445 Ok((
4446 row.get::<_, String>(0)?,
4447 row.get::<_, String>(1)?,
4448 row.get::<_, i64>(2)?,
4449 row.get::<_, String>(3)?,
4450 ))
4451 })?
4452 .collect::<Result<Vec<_>, _>>()?;
4453 drop(stmt);
4454 for (record_key, payload_json, updated_at, last_mutation_id) in rows {
4455 for entry in
4456 extract_secondary_index_entries_for_current(indexes, &payload_json, updated_at)
4457 {
4458 insert_operational_secondary_index_entry(
4459 tx,
4460 collection_name,
4461 "current",
4462 &last_mutation_id,
4463 &record_key,
4464 &entry,
4465 )?;
4466 current_entries_rebuilt += 1;
4467 }
4468 }
4469 }
4470
4471 Ok((mutation_entries_rebuilt, current_entries_rebuilt))
4472}
4473
4474fn collect_strings_tx(
4475 tx: &rusqlite::Transaction<'_>,
4476 sql: &str,
4477 value: &str,
4478) -> Result<Vec<String>, EngineError> {
4479 let mut stmt = tx.prepare(sql)?;
4480 let rows = stmt.query_map([value], |row| row.get::<_, String>(0))?;
4481 rows.collect::<Result<Vec<_>, _>>()
4482 .map_err(EngineError::from)
4483}
4484
4485#[allow(clippy::expect_used)]
4488fn i64_to_usize(val: i64) -> usize {
4489 usize::try_from(val).expect("count(*) must be non-negative")
4490}
4491
4492fn collect_strings(
4499 conn: &rusqlite::Connection,
4500 sql: &str,
4501 param: &str,
4502) -> Result<Vec<String>, EngineError> {
4503 let mut stmt = conn.prepare(sql)?;
4504 let values = stmt
4505 .query_map([param], |row| row.get::<_, String>(0))?
4506 .collect::<Result<Vec<_>, _>>()?;
4507 Ok(values)
4508}
4509
4510fn collect_edge_logical_ids_for_restore(
4511 tx: &rusqlite::Transaction<'_>,
4512 logical_id: &str,
4513 retire_source_ref: Option<&str>,
4514 retire_created_at: i64,
4515 retire_event_rowid: i64,
4516) -> Result<Vec<String>, EngineError> {
4517 let mut stmt = tx.prepare(
4518 "SELECT DISTINCT e.logical_id \
4519 FROM edges e \
4520 JOIN provenance_events p \
4521 ON p.subject = e.logical_id \
4522 AND p.event_type = 'edge_retire' \
4523 AND ( \
4524 p.created_at > ?3 \
4525 OR (p.created_at = ?3 AND p.rowid >= ?4) \
4526 ) \
4527 AND ((?2 IS NULL AND p.source_ref IS NULL) OR p.source_ref = ?2) \
4528 WHERE e.superseded_at IS NOT NULL \
4529 AND (e.source_logical_id = ?1 OR e.target_logical_id = ?1) \
4530 AND NOT EXISTS ( \
4531 SELECT 1 FROM edges active \
4532 WHERE active.logical_id = e.logical_id \
4533 AND active.superseded_at IS NULL \
4534 ) \
4535 ORDER BY e.logical_id",
4536 )?;
4537 let edge_ids = stmt
4538 .query_map(
4539 rusqlite::params![
4540 logical_id,
4541 retire_source_ref,
4542 retire_created_at,
4543 retire_event_rowid
4544 ],
4545 |row| row.get::<_, String>(0),
4546 )?
4547 .collect::<Result<Vec<_>, _>>()?;
4548 Ok(edge_ids)
4549}
4550
4551fn restore_validated_edges(
4554 tx: &rusqlite::Transaction<'_>,
4555 logical_id: &str,
4556 retire_source_ref: Option<&str>,
4557 retire_created_at: i64,
4558 retire_event_rowid: i64,
4559) -> Result<(usize, Vec<SkippedEdge>), EngineError> {
4560 let edge_logical_ids = collect_edge_logical_ids_for_restore(
4561 tx,
4562 logical_id,
4563 retire_source_ref,
4564 retire_created_at,
4565 retire_event_rowid,
4566 )?;
4567 let mut restored = 0usize;
4568 let mut skipped = Vec::new();
4569 for edge_logical_id in &edge_logical_ids {
4570 let edge_detail: Option<(String, String, String)> = tx
4571 .query_row(
4572 "SELECT row_id, source_logical_id, target_logical_id FROM edges \
4573 WHERE logical_id = ?1 AND superseded_at IS NOT NULL \
4574 ORDER BY superseded_at DESC, created_at DESC, rowid DESC LIMIT 1",
4575 [edge_logical_id.as_str()],
4576 |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
4577 )
4578 .optional()?;
4579 let Some((edge_row_id, source_lid, target_lid)) = edge_detail else {
4580 continue;
4581 };
4582 let other_endpoint = if source_lid == logical_id {
4583 &target_lid
4584 } else {
4585 &source_lid
4586 };
4587 let endpoint_active: bool = tx
4588 .query_row(
4589 "SELECT 1 FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL LIMIT 1",
4590 [other_endpoint.as_str()],
4591 |_| Ok(true),
4592 )
4593 .optional()?
4594 .unwrap_or(false);
4595 if !endpoint_active {
4596 skipped.push(SkippedEdge {
4597 edge_logical_id: edge_logical_id.clone(),
4598 missing_endpoint: other_endpoint.clone(),
4599 });
4600 continue;
4601 }
4602 restored += tx.execute(
4603 "UPDATE edges SET superseded_at = NULL WHERE row_id = ?1",
4604 [edge_row_id.as_str()],
4605 )?;
4606 }
4607 Ok((restored, skipped))
4608}
4609
4610#[cfg(feature = "sqlite-vec")]
4611fn count_vec_rows_for_logical_id(
4612 tx: &rusqlite::Transaction<'_>,
4613 logical_id: &str,
4614) -> Result<usize, EngineError> {
4615 let kind: Option<String> = tx
4617 .query_row(
4618 "SELECT kind FROM nodes WHERE logical_id = ?1 LIMIT 1",
4619 [logical_id],
4620 |row| row.get(0),
4621 )
4622 .optional()?;
4623 let Some(kind) = kind else {
4624 return Ok(0);
4625 };
4626 let table_name = fathomdb_schema::vec_kind_table_name(&kind);
4627 match tx.query_row(
4628 &format!(
4629 "SELECT count(*) FROM {table_name} v \
4630 JOIN chunks c ON c.id = v.chunk_id \
4631 WHERE c.node_logical_id = ?1"
4632 ),
4633 [logical_id],
4634 |row| row.get::<_, i64>(0),
4635 ) {
4636 Ok(count) => Ok(i64_to_usize(count)),
4637 Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
4638 if msg.contains(&table_name) || msg.contains("no such module: vec0") =>
4639 {
4640 Ok(0)
4641 }
4642 Err(error) => Err(EngineError::Sqlite(error)),
4643 }
4644}
4645
4646#[cfg(not(feature = "sqlite-vec"))]
4647#[allow(clippy::unnecessary_wraps)]
4648fn count_vec_rows_for_logical_id(
4649 _tx: &rusqlite::Transaction<'_>,
4650 _logical_id: &str,
4651) -> Result<usize, EngineError> {
4652 Ok(0)
4653}
4654
4655#[cfg(feature = "sqlite-vec")]
4656fn delete_vec_rows_for_logical_id(
4657 tx: &rusqlite::Transaction<'_>,
4658 logical_id: &str,
4659) -> Result<usize, EngineError> {
4660 let kind: Option<String> = tx
4662 .query_row(
4663 "SELECT kind FROM nodes WHERE logical_id = ?1 LIMIT 1",
4664 [logical_id],
4665 |row| row.get(0),
4666 )
4667 .optional()?;
4668 let Some(kind) = kind else {
4669 return Ok(0);
4670 };
4671 let table_name = fathomdb_schema::vec_kind_table_name(&kind);
4672 match tx.execute(
4673 &format!(
4674 "DELETE FROM {table_name} WHERE chunk_id IN (SELECT id FROM chunks WHERE node_logical_id = ?1)"
4675 ),
4676 [logical_id],
4677 ) {
4678 Ok(count) => Ok(count),
4679 Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
4680 if msg.contains(&table_name) || msg.contains("no such module: vec0") =>
4681 {
4682 Ok(0)
4683 }
4684 Err(error) => Err(EngineError::Sqlite(error)),
4685 }
4686}
4687
4688#[cfg(not(feature = "sqlite-vec"))]
4689#[allow(clippy::unnecessary_wraps)]
4690fn delete_vec_rows_for_logical_id(
4691 _tx: &rusqlite::Transaction<'_>,
4692 _logical_id: &str,
4693) -> Result<usize, EngineError> {
4694 Ok(0)
4695}
4696
4697fn ensure_operational_collection_registered(
4698 conn: &rusqlite::Connection,
4699 collection_name: &str,
4700) -> Result<(), EngineError> {
4701 if load_operational_collection_record(conn, collection_name)?.is_none() {
4702 return Err(EngineError::InvalidWrite(format!(
4703 "operational collection '{collection_name}' is not registered"
4704 )));
4705 }
4706 Ok(())
4707}
4708
4709fn load_operational_collection_record(
4710 conn: &rusqlite::Connection,
4711 name: &str,
4712) -> Result<Option<OperationalCollectionRecord>, EngineError> {
4713 conn.query_row(
4714 "SELECT name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at, disabled_at \
4715 FROM operational_collections WHERE name = ?1",
4716 [name],
4717 map_operational_collection_row,
4718 )
4719 .optional()
4720 .map_err(EngineError::Sqlite)
4721}
4722
4723fn validate_append_only_operational_collection(
4724 record: &OperationalCollectionRecord,
4725 operation: &str,
4726) -> Result<(), EngineError> {
4727 if record.kind != OperationalCollectionKind::AppendOnlyLog {
4728 return Err(EngineError::InvalidWrite(format!(
4729 "operational collection '{}' must be append_only_log to {operation}",
4730 record.name
4731 )));
4732 }
4733 Ok(())
4734}
4735
4736#[derive(Clone, Debug, PartialEq, Eq)]
4737struct CompiledOperationalReadFilter {
4738 field: String,
4739 condition: OperationalReadCondition,
4740}
4741
4742#[derive(Clone, Debug)]
4743struct MatchedAppendOnlySecondaryIndexRead<'a> {
4744 index_name: &'a str,
4745 value_filter: &'a CompiledOperationalReadFilter,
4746 time_range: Option<&'a CompiledOperationalReadFilter>,
4747}
4748
4749#[derive(Clone, Debug, PartialEq, Eq)]
4750enum OperationalReadCondition {
4751 ExactString(String),
4752 ExactInteger(i64),
4753 Prefix(String),
4754 Range {
4755 lower: Option<i64>,
4756 upper: Option<i64>,
4757 },
4758}
4759
4760fn operational_read_limit(limit: Option<usize>) -> Result<usize, EngineError> {
4761 let applied_limit = limit.unwrap_or(DEFAULT_OPERATIONAL_READ_LIMIT);
4762 if applied_limit == 0 {
4763 return Err(EngineError::InvalidWrite(
4764 "operational read limit must be greater than zero".to_owned(),
4765 ));
4766 }
4767 Ok(applied_limit.min(MAX_OPERATIONAL_READ_LIMIT))
4768}
4769
4770fn parse_operational_filter_fields(
4771 filter_fields_json: &str,
4772) -> Result<Vec<OperationalFilterField>, String> {
4773 let fields: Vec<OperationalFilterField> = serde_json::from_str(filter_fields_json)
4774 .map_err(|error| format!("invalid filter_fields_json: {error}"))?;
4775 let mut seen = std::collections::HashSet::new();
4776 for field in &fields {
4777 if field.name.trim().is_empty() {
4778 return Err("filter_fields_json field names must not be empty".to_owned());
4779 }
4780 if !seen.insert(field.name.as_str()) {
4781 return Err(format!(
4782 "filter_fields_json contains duplicate field '{}'",
4783 field.name
4784 ));
4785 }
4786 if field.modes.is_empty() {
4787 return Err(format!(
4788 "filter_fields_json field '{}' must declare at least one mode",
4789 field.name
4790 ));
4791 }
4792 if field.modes.contains(&OperationalFilterMode::Prefix)
4793 && field.field_type != OperationalFilterFieldType::String
4794 {
4795 return Err(format!(
4796 "filter field '{}' only supports prefix for string types",
4797 field.name
4798 ));
4799 }
4800 }
4801 Ok(fields)
4802}
4803
4804fn compile_operational_read_filters(
4805 filters: &[OperationalFilterClause],
4806 declared_fields: &[OperationalFilterField],
4807) -> Result<Vec<CompiledOperationalReadFilter>, EngineError> {
4808 let field_map = declared_fields
4809 .iter()
4810 .map(|field| (field.name.as_str(), field))
4811 .collect::<std::collections::HashMap<_, _>>();
4812 filters
4813 .iter()
4814 .map(|filter| match filter {
4815 OperationalFilterClause::Exact { field, value } => {
4816 let declared = field_map.get(field.as_str()).ok_or_else(|| {
4817 EngineError::InvalidWrite(format!(
4818 "operational read filter uses undeclared field '{field}'"
4819 ))
4820 })?;
4821 if !declared.modes.contains(&OperationalFilterMode::Exact) {
4822 return Err(EngineError::InvalidWrite(format!(
4823 "operational read field '{field}' does not allow exact filters"
4824 )));
4825 }
4826 let condition = match (declared.field_type, value) {
4827 (OperationalFilterFieldType::String, OperationalFilterValue::String(value)) => {
4828 OperationalReadCondition::ExactString(value.clone())
4829 }
4830 (
4831 OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp,
4832 OperationalFilterValue::Integer(value),
4833 ) => OperationalReadCondition::ExactInteger(*value),
4834 _ => {
4835 return Err(EngineError::InvalidWrite(format!(
4836 "operational read field '{field}' received a value with the wrong type"
4837 )));
4838 }
4839 };
4840 Ok(CompiledOperationalReadFilter {
4841 field: field.clone(),
4842 condition,
4843 })
4844 }
4845 OperationalFilterClause::Prefix { field, value } => {
4846 let declared = field_map.get(field.as_str()).ok_or_else(|| {
4847 EngineError::InvalidWrite(format!(
4848 "operational read filter uses undeclared field '{field}'"
4849 ))
4850 })?;
4851 if !declared.modes.contains(&OperationalFilterMode::Prefix) {
4852 return Err(EngineError::InvalidWrite(format!(
4853 "operational read field '{field}' does not allow prefix filters"
4854 )));
4855 }
4856 if declared.field_type != OperationalFilterFieldType::String {
4857 return Err(EngineError::InvalidWrite(format!(
4858 "operational read field '{field}' only supports prefix filters for strings"
4859 )));
4860 }
4861 Ok(CompiledOperationalReadFilter {
4862 field: field.clone(),
4863 condition: OperationalReadCondition::Prefix(value.clone()),
4864 })
4865 }
4866 OperationalFilterClause::Range {
4867 field,
4868 lower,
4869 upper,
4870 } => {
4871 let declared = field_map.get(field.as_str()).ok_or_else(|| {
4872 EngineError::InvalidWrite(format!(
4873 "operational read filter uses undeclared field '{field}'"
4874 ))
4875 })?;
4876 if !declared.modes.contains(&OperationalFilterMode::Range) {
4877 return Err(EngineError::InvalidWrite(format!(
4878 "operational read field '{field}' does not allow range filters"
4879 )));
4880 }
4881 if !matches!(
4882 declared.field_type,
4883 OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp
4884 ) {
4885 return Err(EngineError::InvalidWrite(format!(
4886 "operational read field '{field}' only supports range filters for integer/timestamp fields"
4887 )));
4888 }
4889 if lower.is_none() && upper.is_none() {
4890 return Err(EngineError::InvalidWrite(format!(
4891 "operational read range filter for '{field}' must specify a lower or upper bound"
4892 )));
4893 }
4894 Ok(CompiledOperationalReadFilter {
4895 field: field.clone(),
4896 condition: OperationalReadCondition::Range {
4897 lower: *lower,
4898 upper: *upper,
4899 },
4900 })
4901 }
4902 })
4903 .collect()
4904}
4905
4906fn match_append_only_secondary_index_read<'a>(
4907 filters: &'a [CompiledOperationalReadFilter],
4908 indexes: &'a [OperationalSecondaryIndexDefinition],
4909) -> Option<MatchedAppendOnlySecondaryIndexRead<'a>> {
4910 indexes.iter().find_map(|index| {
4911 let OperationalSecondaryIndexDefinition::AppendOnlyFieldTime {
4912 name,
4913 field,
4914 value_type,
4915 time_field,
4916 } = index
4917 else {
4918 return None;
4919 };
4920 if !(1..=2).contains(&filters.len()) {
4921 return None;
4922 }
4923
4924 let mut value_filter = None;
4925 let mut time_range = None;
4926 for filter in filters {
4927 if filter.field == *field {
4928 let supported = matches!(
4929 (&filter.condition, value_type),
4930 (
4931 OperationalReadCondition::ExactString(_)
4932 | OperationalReadCondition::Prefix(_),
4933 crate::operational::OperationalSecondaryIndexValueType::String
4934 ) | (
4935 OperationalReadCondition::ExactInteger(_),
4936 crate::operational::OperationalSecondaryIndexValueType::Integer
4937 | crate::operational::OperationalSecondaryIndexValueType::Timestamp
4938 )
4939 );
4940 if !supported || value_filter.is_some() {
4941 return None;
4942 }
4943 value_filter = Some(filter);
4944 continue;
4945 }
4946 if filter.field == *time_field {
4947 if !matches!(filter.condition, OperationalReadCondition::Range { .. })
4948 || time_range.is_some()
4949 {
4950 return None;
4951 }
4952 time_range = Some(filter);
4953 continue;
4954 }
4955 return None;
4956 }
4957
4958 value_filter.map(|value_filter| MatchedAppendOnlySecondaryIndexRead {
4959 index_name: name.as_str(),
4960 value_filter,
4961 time_range,
4962 })
4963 })
4964}
4965
4966fn execute_operational_secondary_index_read(
4967 conn: &rusqlite::Connection,
4968 collection_name: &str,
4969 filters: &[CompiledOperationalReadFilter],
4970 indexes: &[OperationalSecondaryIndexDefinition],
4971 applied_limit: usize,
4972) -> Result<Option<OperationalReadReport>, EngineError> {
4973 use rusqlite::types::Value;
4974
4975 let Some(matched) = match_append_only_secondary_index_read(filters, indexes) else {
4976 return Ok(None);
4977 };
4978
4979 let mut sql = String::from(
4980 "SELECT m.id, m.collection_name, m.record_key, m.op_kind, m.payload_json, m.source_ref, m.created_at \
4981 FROM operational_secondary_index_entries s \
4982 JOIN operational_mutations m ON m.id = s.mutation_id \
4983 WHERE s.collection_name = ?1 AND s.index_name = ?2 AND s.subject_kind = 'mutation' ",
4984 );
4985 let mut params = vec![
4986 Value::from(collection_name.to_owned()),
4987 Value::from(matched.index_name.to_owned()),
4988 ];
4989
4990 match &matched.value_filter.condition {
4991 OperationalReadCondition::ExactString(value) => {
4992 let _ = write!(sql, "AND s.slot1_text = ?{} ", params.len() + 1);
4993 params.push(Value::from(value.clone()));
4994 }
4995 OperationalReadCondition::Prefix(value) => {
4996 let _ = write!(sql, "AND s.slot1_text GLOB ?{} ", params.len() + 1);
4997 params.push(Value::from(glob_prefix_pattern(value)));
4998 }
4999 OperationalReadCondition::ExactInteger(value) => {
5000 let _ = write!(sql, "AND s.slot1_integer = ?{} ", params.len() + 1);
5001 params.push(Value::from(*value));
5002 }
5003 OperationalReadCondition::Range { .. } => return Ok(None),
5004 }
5005
5006 if let Some(time_range) = matched.time_range
5007 && let OperationalReadCondition::Range { lower, upper } = &time_range.condition
5008 {
5009 if let Some(lower) = lower {
5010 let _ = write!(sql, "AND s.sort_timestamp >= ?{} ", params.len() + 1);
5011 params.push(Value::from(*lower));
5012 }
5013 if let Some(upper) = upper {
5014 let _ = write!(sql, "AND s.sort_timestamp <= ?{} ", params.len() + 1);
5015 params.push(Value::from(*upper));
5016 }
5017 }
5018
5019 let _ = write!(
5020 sql,
5021 "ORDER BY s.sort_timestamp DESC, m.mutation_order DESC LIMIT ?{}",
5022 params.len() + 1
5023 );
5024 params.push(Value::from(i64::try_from(applied_limit + 1).map_err(
5025 |_| EngineError::Bridge("operational read limit overflow".to_owned()),
5026 )?));
5027
5028 let mut stmt = conn.prepare(&sql)?;
5029 let mut rows = stmt
5030 .query_map(
5031 rusqlite::params_from_iter(params),
5032 map_operational_mutation_row,
5033 )?
5034 .collect::<Result<Vec<_>, _>>()?;
5035 let was_limited = rows.len() > applied_limit;
5036 if was_limited {
5037 rows.truncate(applied_limit);
5038 }
5039
5040 Ok(Some(OperationalReadReport {
5041 collection_name: collection_name.to_owned(),
5042 row_count: rows.len(),
5043 applied_limit,
5044 was_limited,
5045 rows,
5046 }))
5047}
5048
5049fn execute_operational_filtered_read(
5050 conn: &rusqlite::Connection,
5051 collection_name: &str,
5052 filters: &[CompiledOperationalReadFilter],
5053 applied_limit: usize,
5054) -> Result<OperationalReadReport, EngineError> {
5055 use rusqlite::types::Value;
5056
5057 let mut sql = String::from(
5058 "SELECT m.id, m.collection_name, m.record_key, m.op_kind, m.payload_json, m.source_ref, m.created_at \
5059 FROM operational_mutations m ",
5060 );
5061 let mut params = vec![Value::from(collection_name.to_owned())];
5062 for (index, filter) in filters.iter().enumerate() {
5063 let _ = write!(
5064 sql,
5065 "JOIN operational_filter_values f{index} \
5066 ON f{index}.mutation_id = m.id \
5067 AND f{index}.collection_name = m.collection_name "
5068 );
5069 match &filter.condition {
5070 OperationalReadCondition::ExactString(value) => {
5071 let _ = write!(
5072 sql,
5073 "AND f{index}.field_name = ?{} AND f{index}.string_value = ?{} ",
5074 params.len() + 1,
5075 params.len() + 2
5076 );
5077 params.push(Value::from(filter.field.clone()));
5078 params.push(Value::from(value.clone()));
5079 }
5080 OperationalReadCondition::ExactInteger(value) => {
5081 let _ = write!(
5082 sql,
5083 "AND f{index}.field_name = ?{} AND f{index}.integer_value = ?{} ",
5084 params.len() + 1,
5085 params.len() + 2
5086 );
5087 params.push(Value::from(filter.field.clone()));
5088 params.push(Value::from(*value));
5089 }
5090 OperationalReadCondition::Prefix(value) => {
5091 let _ = write!(
5092 sql,
5093 "AND f{index}.field_name = ?{} AND f{index}.string_value GLOB ?{} ",
5094 params.len() + 1,
5095 params.len() + 2
5096 );
5097 params.push(Value::from(filter.field.clone()));
5098 params.push(Value::from(glob_prefix_pattern(value)));
5099 }
5100 OperationalReadCondition::Range { lower, upper } => {
5101 let _ = write!(sql, "AND f{index}.field_name = ?{} ", params.len() + 1);
5102 params.push(Value::from(filter.field.clone()));
5103 if let Some(lower) = lower {
5104 let _ = write!(sql, "AND f{index}.integer_value >= ?{} ", params.len() + 1);
5105 params.push(Value::from(*lower));
5106 }
5107 if let Some(upper) = upper {
5108 let _ = write!(sql, "AND f{index}.integer_value <= ?{} ", params.len() + 1);
5109 params.push(Value::from(*upper));
5110 }
5111 }
5112 }
5113 }
5114 let _ = write!(
5115 sql,
5116 "WHERE m.collection_name = ?1 ORDER BY m.mutation_order DESC LIMIT ?{}",
5117 params.len() + 1
5118 );
5119 params.push(Value::from(i64::try_from(applied_limit + 1).map_err(
5120 |_| EngineError::Bridge("operational read limit overflow".to_owned()),
5121 )?));
5122
5123 let mut stmt = conn.prepare(&sql)?;
5124 let mut rows = stmt
5125 .query_map(
5126 rusqlite::params_from_iter(params),
5127 map_operational_mutation_row,
5128 )?
5129 .collect::<Result<Vec<_>, _>>()?;
5130 let was_limited = rows.len() > applied_limit;
5131 if was_limited {
5132 rows.truncate(applied_limit);
5133 }
5134 Ok(OperationalReadReport {
5135 collection_name: collection_name.to_owned(),
5136 row_count: rows.len(),
5137 applied_limit,
5138 was_limited,
5139 rows,
5140 })
5141}
5142
5143fn glob_prefix_pattern(value: &str) -> String {
5144 let mut pattern = String::with_capacity(value.len() + 1);
5145 for ch in value.chars() {
5146 match ch {
5147 '*' => pattern.push_str("[*]"),
5148 '?' => pattern.push_str("[?]"),
5149 '[' => pattern.push_str("[[]"),
5150 _ => pattern.push(ch),
5151 }
5152 }
5153 pattern.push('*');
5154 pattern
5155}
5156
5157#[derive(Clone, Debug, PartialEq, Eq)]
5158struct ExtractedOperationalFilterValue {
5159 field_name: String,
5160 string_value: Option<String>,
5161 integer_value: Option<i64>,
5162}
5163
5164fn extract_operational_filter_values(
5165 filter_fields: &[OperationalFilterField],
5166 payload_json: &str,
5167) -> Vec<ExtractedOperationalFilterValue> {
5168 let Ok(parsed) = serde_json::from_str::<serde_json::Value>(payload_json) else {
5169 return Vec::new();
5170 };
5171 let Some(object) = parsed.as_object() else {
5172 return Vec::new();
5173 };
5174
5175 filter_fields
5176 .iter()
5177 .filter_map(|field| {
5178 let value = object.get(&field.name)?;
5179 match field.field_type {
5180 OperationalFilterFieldType::String => {
5181 value
5182 .as_str()
5183 .map(|string_value| ExtractedOperationalFilterValue {
5184 field_name: field.name.clone(),
5185 string_value: Some(string_value.to_owned()),
5186 integer_value: None,
5187 })
5188 }
5189 OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp => {
5190 value
5191 .as_i64()
5192 .map(|integer_value| ExtractedOperationalFilterValue {
5193 field_name: field.name.clone(),
5194 string_value: None,
5195 integer_value: Some(integer_value),
5196 })
5197 }
5198 }
5199 })
5200 .collect()
5201}
5202
5203fn operational_compaction_candidates(
5204 conn: &rusqlite::Connection,
5205 retention_json: &str,
5206 collection_name: &str,
5207) -> Result<(Vec<String>, Option<i64>), EngineError> {
5208 operational_compaction_candidates_at(
5209 conn,
5210 retention_json,
5211 collection_name,
5212 current_unix_timestamp()?,
5213 )
5214}
5215
5216fn operational_compaction_candidates_at(
5217 conn: &rusqlite::Connection,
5218 retention_json: &str,
5219 collection_name: &str,
5220 now_timestamp: i64,
5221) -> Result<(Vec<String>, Option<i64>), EngineError> {
5222 let policy = parse_operational_retention_policy(retention_json)?;
5223 match policy {
5224 OperationalRetentionPolicy::KeepAll => Ok((Vec::new(), None)),
5225 OperationalRetentionPolicy::PurgeBeforeSeconds { max_age_seconds } => {
5226 let before_timestamp = now_timestamp - max_age_seconds;
5227 let mut stmt = conn.prepare(
5228 "SELECT id FROM operational_mutations \
5229 WHERE collection_name = ?1 AND created_at < ?2 \
5230 ORDER BY mutation_order",
5231 )?;
5232 let mutation_ids = stmt
5233 .query_map(
5234 rusqlite::params![collection_name, before_timestamp],
5235 |row| row.get::<_, String>(0),
5236 )?
5237 .collect::<Result<Vec<_>, _>>()?;
5238 Ok((mutation_ids, Some(before_timestamp)))
5239 }
5240 OperationalRetentionPolicy::KeepLast { max_rows } => {
5241 let mut stmt = conn.prepare(
5242 "SELECT id FROM operational_mutations \
5243 WHERE collection_name = ?1 \
5244 ORDER BY mutation_order DESC",
5245 )?;
5246 let ordered_ids = stmt
5247 .query_map([collection_name], |row| row.get::<_, String>(0))?
5248 .collect::<Result<Vec<_>, _>>()?;
5249 Ok((ordered_ids.into_iter().skip(max_rows).collect(), None))
5250 }
5251 }
5252}
5253
5254fn parse_operational_retention_policy(
5255 retention_json: &str,
5256) -> Result<OperationalRetentionPolicy, EngineError> {
5257 let policy: OperationalRetentionPolicy = serde_json::from_str(retention_json)
5258 .map_err(|error| EngineError::InvalidWrite(format!("invalid retention_json: {error}")))?;
5259 match policy {
5260 OperationalRetentionPolicy::KeepAll => Ok(policy),
5261 OperationalRetentionPolicy::PurgeBeforeSeconds { max_age_seconds } => {
5262 if max_age_seconds <= 0 {
5263 return Err(EngineError::InvalidWrite(
5264 "retention_json max_age_seconds must be greater than zero".to_owned(),
5265 ));
5266 }
5267 Ok(policy)
5268 }
5269 OperationalRetentionPolicy::KeepLast { max_rows } => {
5270 if max_rows == 0 {
5271 return Err(EngineError::InvalidWrite(
5272 "retention_json max_rows must be greater than zero".to_owned(),
5273 ));
5274 }
5275 Ok(policy)
5276 }
5277 }
5278}
5279
5280fn load_operational_retention_records(
5281 conn: &rusqlite::Connection,
5282 collection_names: Option<&[String]>,
5283 max_collections: Option<usize>,
5284) -> Result<Vec<OperationalCollectionRecord>, EngineError> {
5285 let limit = max_collections.unwrap_or(usize::MAX);
5286 if limit == 0 {
5287 return Err(EngineError::InvalidWrite(
5288 "max_collections must be greater than zero".to_owned(),
5289 ));
5290 }
5291
5292 let mut records = Vec::new();
5293 if let Some(collection_names) = collection_names {
5294 for name in collection_names.iter().take(limit) {
5295 let record = load_operational_collection_record(conn, name)?.ok_or_else(|| {
5296 EngineError::InvalidWrite(format!(
5297 "operational collection '{name}' is not registered"
5298 ))
5299 })?;
5300 records.push(record);
5301 }
5302 return Ok(records);
5303 }
5304
5305 let mut stmt = conn.prepare(
5306 "SELECT name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at, disabled_at \
5307 FROM operational_collections ORDER BY name",
5308 )?;
5309 let rows = stmt
5310 .query_map([], map_operational_collection_row)?
5311 .take(limit)
5312 .collect::<Result<Vec<_>, _>>()?;
5313 Ok(rows)
5314}
5315
5316fn last_operational_retention_run_at(
5317 conn: &rusqlite::Connection,
5318 collection_name: &str,
5319) -> Result<Option<i64>, EngineError> {
5320 conn.query_row(
5321 "SELECT MAX(executed_at) FROM operational_retention_runs WHERE collection_name = ?1",
5322 [collection_name],
5323 |row| row.get(0),
5324 )
5325 .optional()
5326 .map_err(EngineError::Sqlite)
5327 .map(Option::flatten)
5328}
5329
5330fn count_operational_mutations_for_collection(
5331 conn: &rusqlite::Connection,
5332 collection_name: &str,
5333) -> Result<usize, EngineError> {
5334 let count: i64 = conn.query_row(
5335 "SELECT count(*) FROM operational_mutations WHERE collection_name = ?1",
5336 [collection_name],
5337 |row| row.get(0),
5338 )?;
5339 usize::try_from(count).map_err(|_| {
5340 EngineError::Bridge(format!("count overflow for collection {collection_name}"))
5341 })
5342}
5343
5344fn retention_action_kind_and_limit(
5345 policy: &OperationalRetentionPolicy,
5346) -> (OperationalRetentionActionKind, Option<usize>) {
5347 match policy {
5348 OperationalRetentionPolicy::KeepAll => (OperationalRetentionActionKind::Noop, None),
5349 OperationalRetentionPolicy::PurgeBeforeSeconds { .. } => {
5350 (OperationalRetentionActionKind::PurgeBeforeSeconds, None)
5351 }
5352 OperationalRetentionPolicy::KeepLast { max_rows } => {
5353 (OperationalRetentionActionKind::KeepLast, Some(*max_rows))
5354 }
5355 }
5356}
5357
5358fn plan_operational_retention_item(
5359 conn: &rusqlite::Connection,
5360 record: &OperationalCollectionRecord,
5361 now_timestamp: i64,
5362) -> Result<OperationalRetentionPlanItem, EngineError> {
5363 let last_run_at = last_operational_retention_run_at(conn, &record.name)?;
5364 if record.kind != OperationalCollectionKind::AppendOnlyLog {
5365 return Ok(OperationalRetentionPlanItem {
5366 collection_name: record.name.clone(),
5367 action_kind: OperationalRetentionActionKind::Noop,
5368 candidate_deletions: 0,
5369 before_timestamp: None,
5370 max_rows: None,
5371 last_run_at,
5372 });
5373 }
5374 let policy = parse_operational_retention_policy(&record.retention_json)?;
5375 let (action_kind, max_rows) = retention_action_kind_and_limit(&policy);
5376 let (candidate_ids, before_timestamp) = operational_compaction_candidates_at(
5377 conn,
5378 &record.retention_json,
5379 &record.name,
5380 now_timestamp,
5381 )?;
5382 Ok(OperationalRetentionPlanItem {
5383 collection_name: record.name.clone(),
5384 action_kind,
5385 candidate_deletions: candidate_ids.len(),
5386 before_timestamp,
5387 max_rows,
5388 last_run_at,
5389 })
5390}
5391
5392fn run_operational_retention_item(
5393 tx: &rusqlite::Transaction<'_>,
5394 record: &OperationalCollectionRecord,
5395 now_timestamp: i64,
5396 dry_run: bool,
5397) -> Result<OperationalRetentionRunItem, EngineError> {
5398 let plan = plan_operational_retention_item(tx, record, now_timestamp)?;
5399 let mut deleted_mutations = 0usize;
5400 if record.kind == OperationalCollectionKind::AppendOnlyLog
5401 && plan.action_kind != OperationalRetentionActionKind::Noop
5402 && plan.candidate_deletions > 0
5403 && !dry_run
5404 {
5405 let (candidate_ids, _) = operational_compaction_candidates_at(
5406 tx,
5407 &record.retention_json,
5408 &record.name,
5409 now_timestamp,
5410 )?;
5411 let mut delete_stmt =
5412 tx.prepare_cached("DELETE FROM operational_mutations WHERE id = ?1")?;
5413 for mutation_id in &candidate_ids {
5414 delete_stmt.execute([mutation_id.as_str()])?;
5415 deleted_mutations += 1;
5416 }
5417 drop(delete_stmt);
5418
5419 persist_simple_provenance_event(
5420 tx,
5421 "operational_retention_run",
5422 &record.name,
5423 Some(serde_json::json!({
5424 "action_kind": plan.action_kind,
5425 "deleted_mutations": deleted_mutations,
5426 "before_timestamp": plan.before_timestamp,
5427 "max_rows": plan.max_rows,
5428 "executed_at": now_timestamp,
5429 })),
5430 )?;
5431 }
5432
5433 let live_rows_remaining = count_operational_mutations_for_collection(tx, &record.name)?;
5434 let effective_deleted_mutations = if dry_run {
5435 plan.candidate_deletions
5436 } else {
5437 deleted_mutations
5438 };
5439 let rows_remaining = if dry_run {
5440 live_rows_remaining.saturating_sub(effective_deleted_mutations)
5441 } else {
5442 live_rows_remaining
5443 };
5444 if !dry_run && plan.action_kind != OperationalRetentionActionKind::Noop {
5445 tx.execute(
5446 "INSERT INTO operational_retention_runs \
5447 (id, collection_name, executed_at, action_kind, dry_run, deleted_mutations, rows_remaining, metadata_json) \
5448 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
5449 rusqlite::params![
5450 new_id(),
5451 record.name,
5452 now_timestamp,
5453 serde_json::to_string(&plan.action_kind)
5454 .unwrap_or_else(|_| "\"noop\"".to_owned())
5455 .trim_matches('"')
5456 .to_owned(),
5457 i32::from(dry_run),
5458 deleted_mutations,
5459 rows_remaining,
5460 serde_json::json!({
5461 "before_timestamp": plan.before_timestamp,
5462 "max_rows": plan.max_rows,
5463 })
5464 .to_string(),
5465 ],
5466 )?;
5467 }
5468
5469 Ok(OperationalRetentionRunItem {
5470 collection_name: plan.collection_name,
5471 action_kind: plan.action_kind,
5472 deleted_mutations: effective_deleted_mutations,
5473 before_timestamp: plan.before_timestamp,
5474 max_rows: plan.max_rows,
5475 rows_remaining,
5476 })
5477}
5478
5479fn current_unix_timestamp() -> Result<i64, EngineError> {
5480 let now = SystemTime::now()
5481 .duration_since(SystemTime::UNIX_EPOCH)
5482 .map_err(|error| EngineError::Bridge(format!("system clock error: {error}")))?;
5483 i64::try_from(now.as_secs())
5484 .map_err(|_| EngineError::Bridge("unix timestamp overflow".to_owned()))
5485}
5486
5487fn map_operational_collection_row(
5488 row: &rusqlite::Row<'_>,
5489) -> Result<OperationalCollectionRecord, rusqlite::Error> {
5490 let kind_text: String = row.get(1)?;
5491 let kind = OperationalCollectionKind::try_from(kind_text.as_str()).map_err(|message| {
5492 rusqlite::Error::FromSqlConversionFailure(
5493 1,
5494 rusqlite::types::Type::Text,
5495 Box::new(io::Error::new(io::ErrorKind::InvalidData, message)),
5496 )
5497 })?;
5498 Ok(OperationalCollectionRecord {
5499 name: row.get(0)?,
5500 kind,
5501 schema_json: row.get(2)?,
5502 retention_json: row.get(3)?,
5503 filter_fields_json: row.get(4)?,
5504 validation_json: row.get(5)?,
5505 secondary_indexes_json: row.get(6)?,
5506 format_version: row.get(7)?,
5507 created_at: row.get(8)?,
5508 disabled_at: row.get(9)?,
5509 })
5510}
5511
5512fn map_operational_mutation_row(
5513 row: &rusqlite::Row<'_>,
5514) -> Result<OperationalMutationRow, rusqlite::Error> {
5515 Ok(OperationalMutationRow {
5516 id: row.get(0)?,
5517 collection_name: row.get(1)?,
5518 record_key: row.get(2)?,
5519 op_kind: row.get(3)?,
5520 payload_json: row.get(4)?,
5521 source_ref: row.get(5)?,
5522 created_at: row.get(6)?,
5523 })
5524}
5525
5526fn map_operational_current_row(
5527 row: &rusqlite::Row<'_>,
5528) -> Result<OperationalCurrentRow, rusqlite::Error> {
5529 Ok(OperationalCurrentRow {
5530 collection_name: row.get(0)?,
5531 record_key: row.get(1)?,
5532 payload_json: row.get(2)?,
5533 updated_at: row.get(3)?,
5534 last_mutation_id: row.get(4)?,
5535 })
5536}
5537
5538#[cfg(test)]
5539#[allow(clippy::expect_used)]
5540mod tests {
5541 use std::fs;
5542 use std::sync::Arc;
5543
5544 use fathomdb_schema::SchemaManager;
5545 use tempfile::NamedTempFile;
5546
5547 use super::{
5548 AdminService, FtsPropertyPathMode, FtsPropertyPathSpec, SafeExportOptions,
5549 VectorRegenerationConfig,
5550 };
5551 use crate::embedder::{BatchEmbedder, EmbedderError, QueryEmbedder, QueryEmbedderIdentity};
5552 use crate::projection::ProjectionTarget;
5553 use crate::sqlite;
5554 use crate::{EngineError, OperationalCollectionKind, OperationalRegisterRequest};
5555
5556 #[cfg(feature = "sqlite-vec")]
5557 use crate::{ExecutionCoordinator, TelemetryCounters};
5558
5559 #[cfg(feature = "sqlite-vec")]
5560 use fathomdb_query::QueryBuilder;
5561
5562 #[cfg(feature = "sqlite-vec")]
5563 use super::load_vector_regeneration_config;
5564
5565 #[derive(Debug)]
5569 #[allow(dead_code)]
5570 struct TestEmbedder {
5571 identity: QueryEmbedderIdentity,
5572 vector: Vec<f32>,
5573 }
5574
5575 #[allow(dead_code)]
5576 impl TestEmbedder {
5577 fn new(model: &str, dimension: usize) -> Self {
5578 Self {
5579 identity: QueryEmbedderIdentity {
5580 model_identity: model.to_owned(),
5581 model_version: "1.0.0".to_owned(),
5582 dimension,
5583 normalization_policy: "l2".to_owned(),
5584 },
5585 vector: vec![1.0; dimension],
5586 }
5587 }
5588 }
5589
5590 impl QueryEmbedder for TestEmbedder {
5591 fn embed_query(&self, _text: &str) -> Result<Vec<f32>, EmbedderError> {
5592 Ok(self.vector.clone())
5593 }
5594 fn identity(&self) -> QueryEmbedderIdentity {
5595 self.identity.clone()
5596 }
5597 fn max_tokens(&self) -> usize {
5598 512
5599 }
5600 }
5601
5602 impl BatchEmbedder for TestEmbedder {
5603 fn batch_embed(&self, texts: &[String]) -> Result<Vec<Vec<f32>>, EmbedderError> {
5604 Ok(texts.iter().map(|_| self.vector.clone()).collect())
5605 }
5606 fn identity(&self) -> QueryEmbedderIdentity {
5607 self.identity.clone()
5608 }
5609 fn max_tokens(&self) -> usize {
5610 512
5611 }
5612 }
5613
5614 #[derive(Debug)]
5617 #[allow(dead_code)]
5618 struct FailingEmbedder {
5619 identity: QueryEmbedderIdentity,
5620 }
5621
5622 impl QueryEmbedder for FailingEmbedder {
5623 fn embed_query(&self, _text: &str) -> Result<Vec<f32>, EmbedderError> {
5624 Err(EmbedderError::Failed("test failure".to_owned()))
5625 }
5626 fn identity(&self) -> QueryEmbedderIdentity {
5627 self.identity.clone()
5628 }
5629 fn max_tokens(&self) -> usize {
5630 512
5631 }
5632 }
5633
5634 #[allow(dead_code)]
5635 #[cfg(unix)]
5636 fn set_file_mode(path: &std::path::Path, mode: u32) {
5637 use std::os::unix::fs::PermissionsExt;
5638
5639 let mut permissions = fs::metadata(path).expect("script metadata").permissions();
5640 permissions.set_mode(mode);
5641 fs::set_permissions(path, permissions).expect("chmod");
5642 }
5643
5644 #[allow(dead_code)]
5645 #[cfg(not(unix))]
5646 fn set_file_mode(_path: &std::path::Path, _mode: u32) {}
5647
5648 fn setup() -> (NamedTempFile, AdminService) {
5649 let db = NamedTempFile::new().expect("temp file");
5650 let schema = Arc::new(SchemaManager::new());
5651 {
5652 let conn = sqlite::open_connection(db.path()).expect("connection");
5653 schema.bootstrap(&conn).expect("bootstrap");
5654 }
5655 let service = AdminService::new(db.path(), Arc::clone(&schema));
5656 (db, service)
5657 }
5658
5659 #[test]
5660 fn check_integrity_includes_active_uniqueness_count() {
5661 let (_db, service) = setup();
5662 let report = service.check_integrity().expect("integrity check");
5663 assert_eq!(report.duplicate_active_logical_ids, 0);
5664 assert_eq!(report.operational_missing_collections, 0);
5665 assert_eq!(report.operational_missing_last_mutations, 0);
5666 }
5667
5668 #[test]
5669 fn trace_source_returns_node_logical_ids() {
5670 let (db, service) = setup();
5671 {
5672 let conn = sqlite::open_connection(db.path()).expect("conn");
5673 conn.execute(
5674 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5675 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 'source-1')",
5676 [],
5677 )
5678 .expect("insert node");
5679 }
5680 let report = service.trace_source("source-1").expect("trace");
5681 assert_eq!(report.node_rows, 1);
5682 assert_eq!(report.node_logical_ids, vec!["lg1"]);
5683 }
5684
5685 #[test]
5686 fn trace_source_includes_operational_mutations() {
5687 let (db, service) = setup();
5688 {
5689 let conn = sqlite::open_connection(db.path()).expect("conn");
5690 conn.execute(
5691 "INSERT INTO operational_collections \
5692 (name, kind, schema_json, retention_json, format_version, created_at) \
5693 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
5694 [],
5695 )
5696 .expect("insert collection");
5697 conn.execute(
5698 "INSERT INTO operational_mutations \
5699 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5700 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"ok\"}', 'source-1', 100, 1)",
5701 [],
5702 )
5703 .expect("insert mutation");
5704 }
5705
5706 let report = service.trace_source("source-1").expect("trace");
5707 assert_eq!(report.operational_mutation_rows, 1);
5708 assert_eq!(report.operational_mutation_ids, vec!["m1"]);
5709 }
5710
5711 #[test]
5712 fn excise_source_restores_prior_active_node() {
5713 let (db, service) = setup();
5714 {
5715 let conn = sqlite::open_connection(db.path()).expect("conn");
5716 conn.execute(
5717 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5718 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
5719 [],
5720 )
5721 .expect("insert v1 superseded");
5722 conn.execute(
5723 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5724 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
5725 [],
5726 )
5727 .expect("insert v2 active");
5728 }
5729 service.excise_source("source-2").expect("excise");
5730 {
5731 let conn = sqlite::open_connection(db.path()).expect("conn");
5732 let active_row_id: String = conn
5733 .query_row(
5734 "SELECT row_id FROM nodes WHERE logical_id = 'lg1' AND superseded_at IS NULL",
5735 [],
5736 |row| row.get(0),
5737 )
5738 .expect("active row exists after excise");
5739 assert_eq!(active_row_id, "r1");
5740 }
5741 }
5742
5743 #[test]
5744 fn excise_source_deletes_operational_mutations_and_repairs_latest_state_current() {
5745 let (db, service) = setup();
5746 {
5747 let conn = sqlite::open_connection(db.path()).expect("conn");
5748 conn.execute(
5749 "INSERT INTO operational_collections \
5750 (name, kind, schema_json, retention_json, format_version, created_at) \
5751 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
5752 [],
5753 )
5754 .expect("insert collection");
5755 conn.execute(
5756 "INSERT INTO operational_mutations \
5757 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5758 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"old\"}', 'source-1', 100, 1)",
5759 [],
5760 )
5761 .expect("insert prior mutation");
5762 conn.execute(
5763 "INSERT INTO operational_mutations \
5764 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5765 VALUES ('m2', 'connector_health', 'gmail', 'put', '{\"status\":\"new\"}', 'source-2', 200, 2)",
5766 [],
5767 )
5768 .expect("insert excised mutation");
5769 conn.execute(
5770 "INSERT INTO operational_current \
5771 (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
5772 VALUES ('connector_health', 'gmail', '{\"status\":\"new\"}', 200, 'm2')",
5773 [],
5774 )
5775 .expect("insert current row");
5776 }
5777
5778 let traced = service
5779 .trace_source("source-2")
5780 .expect("trace before excise");
5781 assert_eq!(traced.operational_mutation_rows, 1);
5782 assert_eq!(traced.operational_mutation_ids, vec!["m2"]);
5783
5784 let excised = service.excise_source("source-2").expect("excise");
5785 assert_eq!(excised.operational_mutation_rows, 0);
5786 assert!(excised.operational_mutation_ids.is_empty());
5787
5788 {
5789 let conn = sqlite::open_connection(db.path()).expect("conn");
5790 let remaining: i64 = conn
5791 .query_row(
5792 "SELECT count(*) FROM operational_mutations WHERE source_ref = 'source-2'",
5793 [],
5794 |row| row.get(0),
5795 )
5796 .expect("remaining count");
5797 assert_eq!(remaining, 0);
5798
5799 let current: (String, String) = conn
5800 .query_row(
5801 "SELECT payload_json, last_mutation_id FROM operational_current \
5802 WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5803 [],
5804 |row| Ok((row.get(0)?, row.get(1)?)),
5805 )
5806 .expect("rebuilt current row");
5807 assert_eq!(current.0, "{\"status\":\"old\"}");
5808 assert_eq!(current.1, "m1");
5809 }
5810 }
5811
5812 #[test]
5813 fn restore_logical_id_reestablishes_last_pre_retire_content_and_attached_edges() {
5814 let (db, service) = setup();
5815 {
5816 let conn = sqlite::open_connection(db.path()).expect("conn");
5817 conn.execute(
5818 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5819 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
5820 [],
5821 )
5822 .expect("insert node");
5823 conn.execute(
5824 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5825 VALUES ('node-row-topic', 'topic-1', 'Topic', '{}', 100, 'seed')",
5826 [],
5827 )
5828 .expect("insert target node");
5829 conn.execute(
5830 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5831 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5832 [],
5833 )
5834 .expect("insert chunk");
5835 conn.execute(
5836 "INSERT INTO edges \
5837 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
5838 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 'seed')",
5839 [],
5840 )
5841 .expect("insert edge");
5842 conn.execute(
5843 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5844 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5845 [],
5846 )
5847 .expect("insert node retire event");
5848 conn.execute(
5849 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5850 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
5851 [],
5852 )
5853 .expect("insert edge retire event");
5854 conn.execute(
5855 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
5856 [],
5857 )
5858 .expect("retire node");
5859 conn.execute(
5860 "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
5861 [],
5862 )
5863 .expect("retire edge");
5864 conn.execute("DELETE FROM fts_nodes", [])
5865 .expect("clear fts");
5866 }
5867
5868 let report = service.restore_logical_id("doc-1").expect("restore");
5869 assert_eq!(report.logical_id, "doc-1");
5870 assert!(!report.was_noop);
5871 assert_eq!(report.restored_node_rows, 1);
5872 assert_eq!(report.restored_edge_rows, 1);
5873 assert_eq!(report.restored_chunk_rows, 1);
5874 assert_eq!(report.restored_fts_rows, 1);
5875
5876 let conn = sqlite::open_connection(db.path()).expect("conn");
5877 let active_node_count: i64 = conn
5878 .query_row(
5879 "SELECT count(*) FROM nodes WHERE logical_id = 'doc-1' AND superseded_at IS NULL",
5880 [],
5881 |row| row.get(0),
5882 )
5883 .expect("active node count");
5884 assert_eq!(active_node_count, 1);
5885 let active_edge_count: i64 = conn
5886 .query_row(
5887 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
5888 [],
5889 |row| row.get(0),
5890 )
5891 .expect("active edge count");
5892 assert_eq!(active_edge_count, 1);
5893 let fts_count: i64 = conn
5894 .query_row(
5895 "SELECT count(*) FROM fts_nodes WHERE chunk_id = 'chunk-1'",
5896 [],
5897 |row| row.get(0),
5898 )
5899 .expect("fts count");
5900 assert_eq!(fts_count, 1);
5901 }
5902
5903 #[test]
5904 fn restore_logical_id_restores_edges_retired_after_the_node_retire_event() {
5905 let (db, service) = setup();
5906 {
5907 let conn = sqlite::open_connection(db.path()).expect("conn");
5908 conn.execute(
5909 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5910 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
5911 [],
5912 )
5913 .expect("insert node");
5914 conn.execute(
5915 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5916 VALUES ('node-row-topic', 'topic-1', 'Topic', '{}', 100, 'seed')",
5917 [],
5918 )
5919 .expect("insert target node");
5920 conn.execute(
5921 "INSERT INTO edges \
5922 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
5923 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 'seed')",
5924 [],
5925 )
5926 .expect("insert edge");
5927 conn.execute(
5928 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5929 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5930 [],
5931 )
5932 .expect("insert node retire event");
5933 conn.execute(
5934 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5935 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 201, '')",
5936 [],
5937 )
5938 .expect("insert edge retire event");
5939 conn.execute(
5940 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
5941 [],
5942 )
5943 .expect("retire node");
5944 conn.execute(
5945 "UPDATE edges SET superseded_at = 201 WHERE logical_id = 'edge-1'",
5946 [],
5947 )
5948 .expect("retire edge");
5949 }
5950
5951 let report = service.restore_logical_id("doc-1").expect("restore");
5952 assert_eq!(report.restored_edge_rows, 1);
5953
5954 let conn = sqlite::open_connection(db.path()).expect("conn");
5955 let active_edge_count: i64 = conn
5956 .query_row(
5957 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
5958 [],
5959 |row| row.get(0),
5960 )
5961 .expect("active edge count");
5962 assert_eq!(active_edge_count, 1);
5963 }
5964
5965 #[test]
5966 fn restore_logical_id_prefers_latest_retired_revision_when_timestamps_tie() {
5967 let (db, service) = setup();
5968 {
5969 let conn = sqlite::open_connection(db.path()).expect("conn");
5970 conn.execute(
5971 "INSERT INTO nodes \
5972 (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5973 VALUES ('node-row-older', 'doc-1', 'Document', '{\"title\":\"older\"}', 100, 200, 'forget-1')",
5974 [],
5975 )
5976 .expect("insert older retired node");
5977 conn.execute(
5978 "INSERT INTO nodes \
5979 (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5980 VALUES ('node-row-newer', 'doc-1', 'Document', '{\"title\":\"newer\"}', 100, 200, 'forget-1')",
5981 [],
5982 )
5983 .expect("insert newer retired node");
5984 conn.execute(
5985 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5986 VALUES ('evt-retire-older', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5987 [],
5988 )
5989 .expect("insert older retire event");
5990 conn.execute(
5991 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5992 VALUES ('evt-retire-newer', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5993 [],
5994 )
5995 .expect("insert newer retire event");
5996 }
5997
5998 let report = service.restore_logical_id("doc-1").expect("restore");
5999
6000 assert!(!report.was_noop);
6001 let conn = sqlite::open_connection(db.path()).expect("conn");
6002 let active_row: (String, String) = conn
6003 .query_row(
6004 "SELECT row_id, properties FROM nodes \
6005 WHERE logical_id = 'doc-1' AND superseded_at IS NULL",
6006 [],
6007 |row| Ok((row.get(0)?, row.get(1)?)),
6008 )
6009 .expect("restored active row");
6010 assert_eq!(active_row.0, "node-row-newer");
6011 assert_eq!(active_row.1, "{\"title\":\"newer\"}");
6012 }
6013
6014 #[test]
6015 fn purge_logical_id_removes_retired_content_and_records_tombstone() {
6016 let (db, service) = setup();
6017 {
6018 let conn = sqlite::open_connection(db.path()).expect("conn");
6019 conn.execute(
6020 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
6021 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
6022 [],
6023 )
6024 .expect("insert retired node");
6025 conn.execute(
6026 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
6027 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
6028 [],
6029 )
6030 .expect("insert chunk");
6031 conn.execute(
6032 "INSERT INTO edges \
6033 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, superseded_at, source_ref) \
6034 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 200, 'seed')",
6035 [],
6036 )
6037 .expect("insert retired edge");
6038 conn.execute(
6039 "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
6040 VALUES ('chunk-1', 'doc-1', 'Document', 'budget narrative')",
6041 [],
6042 )
6043 .expect("insert fts");
6044 }
6045
6046 let report = service.purge_logical_id("doc-1").expect("purge");
6047 assert_eq!(report.logical_id, "doc-1");
6048 assert!(!report.was_noop);
6049 assert_eq!(report.deleted_node_rows, 1);
6050 assert_eq!(report.deleted_edge_rows, 1);
6051 assert_eq!(report.deleted_chunk_rows, 1);
6052 assert_eq!(report.deleted_fts_rows, 1);
6053
6054 let conn = sqlite::open_connection(db.path()).expect("conn");
6055 let remaining_nodes: i64 = conn
6056 .query_row(
6057 "SELECT count(*) FROM nodes WHERE logical_id = 'doc-1'",
6058 [],
6059 |row| row.get(0),
6060 )
6061 .expect("remaining nodes");
6062 assert_eq!(remaining_nodes, 0);
6063 let remaining_edges: i64 = conn
6064 .query_row(
6065 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1'",
6066 [],
6067 |row| row.get(0),
6068 )
6069 .expect("remaining edges");
6070 assert_eq!(remaining_edges, 0);
6071 let remaining_chunks: i64 = conn
6072 .query_row(
6073 "SELECT count(*) FROM chunks WHERE id = 'chunk-1'",
6074 [],
6075 |row| row.get(0),
6076 )
6077 .expect("remaining chunks");
6078 assert_eq!(remaining_chunks, 0);
6079 let purge_events: i64 = conn
6080 .query_row(
6081 "SELECT count(*) FROM provenance_events WHERE event_type = 'purge_logical_id' AND subject = 'doc-1'",
6082 [],
6083 |row| row.get(0),
6084 )
6085 .expect("purge events");
6086 assert_eq!(purge_events, 1);
6087 }
6088
6089 #[test]
6090 fn check_semantics_accepts_preserved_retired_chunks() {
6091 let (db, service) = setup();
6092 {
6093 let conn = sqlite::open_connection(db.path()).expect("conn");
6094 conn.execute(
6095 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
6096 VALUES ('node-row-1', 'doc-1', 'Document', '{}', 100, 200, 'seed')",
6097 [],
6098 )
6099 .expect("insert retired node");
6100 conn.execute(
6101 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
6102 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
6103 [],
6104 )
6105 .expect("insert chunk");
6106 }
6107
6108 let report = service.check_semantics().expect("semantics");
6109 assert_eq!(report.orphaned_chunks, 0);
6110 }
6111
6112 #[test]
6113 fn check_semantics_detects_missing_retired_node_history_for_preserved_chunks() {
6114 let (db, service) = setup();
6115 {
6116 let conn = sqlite::open_connection(db.path()).expect("conn");
6117 conn.execute(
6118 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
6119 VALUES ('chunk-1', 'ghost-doc', 'budget narrative', 100)",
6120 [],
6121 )
6122 .expect("insert orphaned chunk");
6123 }
6124
6125 let report = service.check_semantics().expect("semantics");
6126 assert_eq!(report.orphaned_chunks, 1);
6127 }
6128
6129 #[cfg(feature = "sqlite-vec")]
6130 #[test]
6131 fn check_semantics_detects_missing_retired_node_history_for_preserved_vec_rows() {
6132 let (db, service) = setup();
6133 {
6134 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6135 service
6136 .schema_manager
6137 .ensure_vec_kind_profile(&conn, "Doc", 4)
6138 .expect("ensure vec kind profile");
6139 conn.execute(
6140 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
6141 VALUES ('chunk-1', 'ghost-doc', 'budget narrative', 100)",
6142 [],
6143 )
6144 .expect("insert orphaned chunk");
6145 conn.execute(
6146 "INSERT INTO vec_doc (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
6147 [],
6148 )
6149 .expect("insert vec row");
6150 }
6151
6152 let report = service.check_semantics().expect("semantics");
6153 assert_eq!(report.orphaned_chunks, 1);
6154 assert_eq!(report.vec_rows_for_superseded_nodes, 1);
6155 }
6156
6157 #[cfg(feature = "sqlite-vec")]
6158 #[test]
6159 fn restore_logical_id_reestablishes_vector_search_without_reingest() {
6160 let (db, service) = setup();
6161 {
6162 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6163 service
6164 .schema_manager
6165 .ensure_vec_kind_profile(&conn, "Document", 4)
6166 .expect("ensure vec kind profile");
6167 conn.execute(
6168 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
6169 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
6170 [],
6171 )
6172 .expect("insert retired node");
6173 conn.execute(
6174 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
6175 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
6176 [],
6177 )
6178 .expect("insert chunk");
6179 conn.execute(
6180 "INSERT INTO vec_document (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
6181 [],
6182 )
6183 .expect("insert vec row");
6184 conn.execute(
6185 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
6186 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
6187 [],
6188 )
6189 .expect("insert retire event");
6190 }
6191
6192 let report = service.restore_logical_id("doc-1").expect("restore");
6193 assert_eq!(report.restored_vec_rows, 1);
6194
6195 let coordinator = ExecutionCoordinator::open(
6196 db.path(),
6197 Arc::new(SchemaManager::new()),
6198 Some(4),
6199 1,
6200 Arc::new(TelemetryCounters::default()),
6201 None,
6202 )
6203 .expect("coordinator");
6204 let compiled = QueryBuilder::nodes("Document")
6205 .vector_search("[0.0, 0.0, 0.0, 0.0]", 5)
6206 .compile()
6207 .expect("compile");
6208 let rows = coordinator
6209 .execute_compiled_read(&compiled)
6210 .expect("vector read");
6211 assert!(
6212 rows.nodes.iter().any(|row| row.logical_id == "doc-1"),
6213 "restore should make the preserved vec row visible again without re-ingest"
6214 );
6215 }
6216
6217 #[cfg(feature = "sqlite-vec")]
6218 #[test]
6219 fn purge_logical_id_deletes_vec_rows_for_retired_content() {
6220 let (db, service) = setup();
6221 {
6222 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6223 service
6224 .schema_manager
6225 .ensure_vec_kind_profile(&conn, "Document", 4)
6226 .expect("ensure vec kind profile");
6227 conn.execute(
6228 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
6229 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
6230 [],
6231 )
6232 .expect("insert retired node");
6233 conn.execute(
6234 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
6235 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
6236 [],
6237 )
6238 .expect("insert chunk");
6239 conn.execute(
6240 "INSERT INTO vec_document (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
6241 [],
6242 )
6243 .expect("insert vec row");
6244 }
6245
6246 let report = service.purge_logical_id("doc-1").expect("purge");
6247 assert_eq!(report.deleted_vec_rows, 1);
6248
6249 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6250 let vec_count: i64 = conn
6251 .query_row("SELECT count(*) FROM vec_document", [], |row| row.get(0))
6252 .expect("vec count");
6253 assert_eq!(vec_count, 0);
6254 }
6255
6256 #[cfg(feature = "sqlite-vec")]
6257 #[test]
6258 fn restore_logical_id_restores_visibility_of_regenerated_vectors() {
6259 let (db, service) = setup();
6260
6261 {
6262 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6263 conn.execute(
6264 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
6265 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
6266 [],
6267 )
6268 .expect("insert node");
6269 conn.execute(
6270 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
6271 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
6272 [],
6273 )
6274 .expect("insert chunk");
6275 }
6276
6277 let embedder = TestEmbedder::new("test-model", 4);
6278 service
6279 .regenerate_vector_embeddings(
6280 &embedder,
6281 &VectorRegenerationConfig {
6282 kind: "Document".to_owned(),
6283 profile: "default".to_owned(),
6284 chunking_policy: "per_chunk".to_owned(),
6285 preprocessing_policy: "trim".to_owned(),
6286 },
6287 )
6288 .expect("regenerate");
6289
6290 {
6291 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6292 conn.execute(
6293 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
6294 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
6295 [],
6296 )
6297 .expect("insert retire event");
6298 conn.execute(
6299 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
6300 [],
6301 )
6302 .expect("retire node");
6303 }
6304
6305 let report = service.restore_logical_id("doc-1").expect("restore");
6306 assert_eq!(report.restored_vec_rows, 1);
6307
6308 let coordinator = ExecutionCoordinator::open(
6309 db.path(),
6310 Arc::new(SchemaManager::new()),
6311 Some(4),
6312 1,
6313 Arc::new(TelemetryCounters::default()),
6314 None,
6315 )
6316 .expect("coordinator");
6317 let compiled = QueryBuilder::nodes("Document")
6318 .vector_search("[0.0, 0.0, 0.0, 0.0]", 5)
6319 .compile()
6320 .expect("compile");
6321 let rows = coordinator
6322 .execute_compiled_read(&compiled)
6323 .expect("vector read");
6324 assert!(
6325 rows.nodes.iter().any(|row| row.logical_id == "doc-1"),
6326 "restored logical_id should become visible through regenerated vectors"
6327 );
6328 }
6329
6330 #[test]
6331 fn check_semantics_clean_db_returns_zeros() {
6332 let (_db, service) = setup();
6333 let report = service.check_semantics().expect("semantics check");
6334 assert_eq!(report.orphaned_chunks, 0);
6335 assert_eq!(report.null_source_ref_nodes, 0);
6336 assert_eq!(report.broken_step_fk, 0);
6337 assert_eq!(report.broken_action_fk, 0);
6338 assert_eq!(report.stale_fts_rows, 0);
6339 assert_eq!(report.fts_rows_for_superseded_nodes, 0);
6340 assert_eq!(report.dangling_edges, 0);
6341 assert_eq!(report.orphaned_supersession_chains, 0);
6342 assert_eq!(report.stale_vec_rows, 0);
6343 assert_eq!(report.vec_rows_for_superseded_nodes, 0);
6344 assert_eq!(report.missing_operational_current_rows, 0);
6345 assert_eq!(report.stale_operational_current_rows, 0);
6346 assert_eq!(report.disabled_collection_mutations, 0);
6347 assert_eq!(report.mismatched_kind_property_fts_rows, 0);
6348 assert_eq!(report.duplicate_property_fts_rows, 0);
6349 assert_eq!(report.drifted_property_fts_rows, 0);
6350 assert!(report.warnings.is_empty());
6351 }
6352
6353 #[test]
6354 fn register_operational_collection_persists_and_emits_provenance() {
6355 let (db, service) = setup();
6356 let record = service
6357 .register_operational_collection(&OperationalRegisterRequest {
6358 name: "connector_health".to_owned(),
6359 kind: OperationalCollectionKind::LatestState,
6360 schema_json: "{}".to_owned(),
6361 retention_json: "{}".to_owned(),
6362 filter_fields_json: "[]".to_owned(),
6363 validation_json: String::new(),
6364 secondary_indexes_json: "[]".to_owned(),
6365 format_version: 1,
6366 })
6367 .expect("register collection");
6368
6369 assert_eq!(record.name, "connector_health");
6370 assert_eq!(record.kind, OperationalCollectionKind::LatestState);
6371 assert_eq!(record.schema_json, "{}");
6372 assert_eq!(record.retention_json, "{}");
6373 assert_eq!(record.filter_fields_json, "[]");
6374 assert!(record.created_at > 0);
6375 assert_eq!(record.disabled_at, None);
6376
6377 let described = service
6378 .describe_operational_collection("connector_health")
6379 .expect("describe collection")
6380 .expect("collection exists");
6381 assert_eq!(described, record);
6382
6383 let conn = sqlite::open_connection(db.path()).expect("conn");
6384 let provenance_count: i64 = conn
6385 .query_row(
6386 "SELECT count(*) FROM provenance_events \
6387 WHERE event_type = 'operational_collection_registered' AND subject = 'connector_health'",
6388 [],
6389 |row| row.get(0),
6390 )
6391 .expect("provenance count");
6392 assert_eq!(provenance_count, 1);
6393 }
6394
6395 #[test]
6396 fn register_and_update_operational_collection_validation_round_trip() {
6397 let (db, service) = setup();
6398 let record = service
6399 .register_operational_collection(&OperationalRegisterRequest {
6400 name: "connector_health".to_owned(),
6401 kind: OperationalCollectionKind::LatestState,
6402 schema_json: "{}".to_owned(),
6403 retention_json: "{}".to_owned(),
6404 filter_fields_json: "[]".to_owned(),
6405 validation_json: String::new(),
6406 secondary_indexes_json: "[]".to_owned(),
6407 format_version: 1,
6408 })
6409 .expect("register collection");
6410 assert_eq!(record.validation_json, "");
6411
6412 let validation_json = r#"{"format_version":1,"mode":"enforce","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#;
6413 let updated = service
6414 .update_operational_collection_validation("connector_health", validation_json)
6415 .expect("update validation");
6416 assert_eq!(updated.validation_json, validation_json);
6417
6418 let described = service
6419 .describe_operational_collection("connector_health")
6420 .expect("describe collection")
6421 .expect("collection exists");
6422 assert_eq!(described.validation_json, validation_json);
6423
6424 let conn = sqlite::open_connection(db.path()).expect("conn");
6425 let provenance_count: i64 = conn
6426 .query_row(
6427 "SELECT count(*) FROM provenance_events \
6428 WHERE event_type = 'operational_collection_validation_updated' \
6429 AND subject = 'connector_health'",
6430 [],
6431 |row| row.get(0),
6432 )
6433 .expect("provenance count");
6434 assert_eq!(provenance_count, 1);
6435 }
6436
6437 #[test]
6438 fn register_update_and_rebuild_operational_secondary_indexes_round_trip() {
6439 let (db, service) = setup();
6440 let record = service
6441 .register_operational_collection(&OperationalRegisterRequest {
6442 name: "audit_log".to_owned(),
6443 kind: OperationalCollectionKind::AppendOnlyLog,
6444 schema_json: "{}".to_owned(),
6445 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6446 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
6447 validation_json: String::new(),
6448 secondary_indexes_json: "[]".to_owned(),
6449 format_version: 1,
6450 })
6451 .expect("register collection");
6452 assert_eq!(record.secondary_indexes_json, "[]");
6453
6454 {
6455 let writer = crate::WriterActor::start(
6456 db.path(),
6457 Arc::new(SchemaManager::new()),
6458 crate::ProvenanceMode::Warn,
6459 Arc::new(crate::TelemetryCounters::default()),
6460 )
6461 .expect("writer");
6462 writer
6463 .submit(crate::WriteRequest {
6464 label: "secondary-index-seed".to_owned(),
6465 nodes: vec![],
6466 node_retires: vec![],
6467 edges: vec![],
6468 edge_retires: vec![],
6469 chunks: vec![],
6470 runs: vec![],
6471 steps: vec![],
6472 actions: vec![],
6473 optional_backfills: vec![],
6474 vec_inserts: vec![],
6475 operational_writes: vec![
6476 crate::OperationalWrite::Append {
6477 collection: "audit_log".to_owned(),
6478 record_key: "evt-1".to_owned(),
6479 payload_json: r#"{"actor":"alice","ts":100}"#.to_owned(),
6480 source_ref: Some("src-1".to_owned()),
6481 },
6482 crate::OperationalWrite::Append {
6483 collection: "audit_log".to_owned(),
6484 record_key: "evt-2".to_owned(),
6485 payload_json: r#"{"actor":"bob","ts":200}"#.to_owned(),
6486 source_ref: Some("src-2".to_owned()),
6487 },
6488 ],
6489 })
6490 .expect("seed writes");
6491 }
6492
6493 let secondary_indexes_json = r#"[{"name":"actor_ts","kind":"append_only_field_time","field":"actor","value_type":"string","time_field":"ts"}]"#;
6494 let updated = service
6495 .update_operational_collection_secondary_indexes("audit_log", secondary_indexes_json)
6496 .expect("update secondary indexes");
6497 assert_eq!(updated.secondary_indexes_json, secondary_indexes_json);
6498
6499 let conn = sqlite::open_connection(db.path()).expect("conn");
6500 let entry_count: i64 = conn
6501 .query_row(
6502 "SELECT count(*) FROM operational_secondary_index_entries \
6503 WHERE collection_name = 'audit_log' AND index_name = 'actor_ts'",
6504 [],
6505 |row| row.get(0),
6506 )
6507 .expect("secondary index count");
6508 assert_eq!(entry_count, 2);
6509 conn.execute(
6510 "DELETE FROM operational_secondary_index_entries WHERE collection_name = 'audit_log'",
6511 [],
6512 )
6513 .expect("clear index entries");
6514 drop(conn);
6515
6516 let rebuild = service
6517 .rebuild_operational_secondary_indexes("audit_log")
6518 .expect("rebuild secondary indexes");
6519 assert_eq!(rebuild.collection_name, "audit_log");
6520 assert_eq!(rebuild.mutation_entries_rebuilt, 2);
6521 assert_eq!(rebuild.current_entries_rebuilt, 0);
6522 }
6523
6524 #[test]
6525 fn register_operational_collection_rejects_invalid_validation_contract() {
6526 let (_db, service) = setup();
6527
6528 let error = service
6529 .register_operational_collection(&OperationalRegisterRequest {
6530 name: "connector_health".to_owned(),
6531 kind: OperationalCollectionKind::LatestState,
6532 schema_json: "{}".to_owned(),
6533 retention_json: "{}".to_owned(),
6534 filter_fields_json: "[]".to_owned(),
6535 validation_json: r#"{"format_version":1,"mode":"enforce","fields":[{"name":"status","type":"string","minimum":0}]}"#
6536 .to_owned(),
6537 secondary_indexes_json: "[]".to_owned(),
6538 format_version: 1,
6539 })
6540 .expect_err("invalid validation contract should reject");
6541
6542 assert!(matches!(error, EngineError::InvalidWrite(_)));
6543 assert!(error.to_string().contains("minimum/maximum"));
6544 }
6545
6546 #[test]
6547 fn validate_operational_collection_history_reports_invalid_rows_without_mutation() {
6548 let (db, service) = setup();
6549 service
6550 .register_operational_collection(&OperationalRegisterRequest {
6551 name: "audit_log".to_owned(),
6552 kind: OperationalCollectionKind::AppendOnlyLog,
6553 schema_json: "{}".to_owned(),
6554 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6555 filter_fields_json: "[]".to_owned(),
6556 validation_json: r#"{"format_version":1,"mode":"disabled","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#
6557 .to_owned(),
6558 secondary_indexes_json: "[]".to_owned(),
6559 format_version: 1,
6560 })
6561 .expect("register collection");
6562 {
6563 let writer = crate::WriterActor::start(
6564 db.path(),
6565 Arc::new(SchemaManager::new()),
6566 crate::ProvenanceMode::Warn,
6567 Arc::new(crate::TelemetryCounters::default()),
6568 )
6569 .expect("writer");
6570 writer
6571 .submit(crate::WriteRequest {
6572 label: "history-validation".to_owned(),
6573 nodes: vec![],
6574 node_retires: vec![],
6575 edges: vec![],
6576 edge_retires: vec![],
6577 chunks: vec![],
6578 runs: vec![],
6579 steps: vec![],
6580 actions: vec![],
6581 optional_backfills: vec![],
6582 vec_inserts: vec![],
6583 operational_writes: vec![
6584 crate::OperationalWrite::Append {
6585 collection: "audit_log".to_owned(),
6586 record_key: "evt-1".to_owned(),
6587 payload_json: r#"{"status":"ok"}"#.to_owned(),
6588 source_ref: Some("src-1".to_owned()),
6589 },
6590 crate::OperationalWrite::Append {
6591 collection: "audit_log".to_owned(),
6592 record_key: "evt-2".to_owned(),
6593 payload_json: r#"{"status":"bogus"}"#.to_owned(),
6594 source_ref: Some("src-2".to_owned()),
6595 },
6596 ],
6597 })
6598 .expect("write");
6599 }
6600
6601 let report = service
6602 .validate_operational_collection_history("audit_log")
6603 .expect("validate history");
6604 assert_eq!(report.collection_name, "audit_log");
6605 assert_eq!(report.checked_rows, 2);
6606 assert_eq!(report.invalid_row_count, 1);
6607 assert_eq!(report.issues.len(), 1);
6608 assert_eq!(report.issues[0].record_key, "evt-2");
6609 assert!(report.issues[0].message.contains("must be one of"));
6610
6611 let trace = service
6612 .trace_operational_collection("audit_log", None)
6613 .expect("trace");
6614 assert_eq!(trace.mutation_count, 2);
6615
6616 let conn = sqlite::open_connection(db.path()).expect("conn");
6617 let provenance_count: i64 = conn
6618 .query_row(
6619 "SELECT count(*) FROM provenance_events \
6620 WHERE event_type = 'operational_collection_history_validated' \
6621 AND subject = 'audit_log'",
6622 [],
6623 |row| row.get(0),
6624 )
6625 .expect("provenance count");
6626 assert_eq!(provenance_count, 0);
6627 }
6628
6629 #[test]
6630 fn trace_operational_collection_returns_mutations_and_current_rows() {
6631 let (db, service) = setup();
6632 service
6633 .register_operational_collection(&OperationalRegisterRequest {
6634 name: "connector_health".to_owned(),
6635 kind: OperationalCollectionKind::LatestState,
6636 schema_json: "{}".to_owned(),
6637 retention_json: "{}".to_owned(),
6638 filter_fields_json: "[]".to_owned(),
6639 validation_json: String::new(),
6640 secondary_indexes_json: "[]".to_owned(),
6641 format_version: 1,
6642 })
6643 .expect("register collection");
6644 {
6645 let writer = crate::WriterActor::start(
6646 db.path(),
6647 Arc::new(SchemaManager::new()),
6648 crate::ProvenanceMode::Warn,
6649 Arc::new(crate::TelemetryCounters::default()),
6650 )
6651 .expect("writer");
6652 writer
6653 .submit(crate::WriteRequest {
6654 label: "operational".to_owned(),
6655 nodes: vec![],
6656 node_retires: vec![],
6657 edges: vec![],
6658 edge_retires: vec![],
6659 chunks: vec![],
6660 runs: vec![],
6661 steps: vec![],
6662 actions: vec![],
6663 optional_backfills: vec![],
6664 vec_inserts: vec![],
6665 operational_writes: vec![crate::OperationalWrite::Put {
6666 collection: "connector_health".to_owned(),
6667 record_key: "gmail".to_owned(),
6668 payload_json: r#"{"status":"ok"}"#.to_owned(),
6669 source_ref: Some("src-1".to_owned()),
6670 }],
6671 })
6672 .expect("write");
6673 }
6674
6675 let report = service
6676 .trace_operational_collection("connector_health", Some("gmail"))
6677 .expect("trace");
6678 assert_eq!(report.collection_name, "connector_health");
6679 assert_eq!(report.record_key.as_deref(), Some("gmail"));
6680 assert_eq!(report.mutation_count, 1);
6681 assert_eq!(report.current_count, 1);
6682 assert_eq!(report.mutations[0].op_kind, "put");
6683 assert_eq!(report.current_rows[0].payload_json, r#"{"status":"ok"}"#);
6684 }
6685
6686 #[test]
6687 fn trace_operational_collection_rejects_unknown_collection() {
6688 let (_db, service) = setup();
6689
6690 let error = service
6691 .trace_operational_collection("missing_collection", None)
6692 .expect_err("unknown collection should fail");
6693
6694 assert!(matches!(error, EngineError::InvalidWrite(_)));
6695 assert!(error.to_string().contains("is not registered"));
6696 }
6697
6698 #[test]
6699 fn rebuild_operational_current_repairs_missing_latest_state_rows() {
6700 let (db, service) = setup();
6701 service
6702 .register_operational_collection(&OperationalRegisterRequest {
6703 name: "connector_health".to_owned(),
6704 kind: OperationalCollectionKind::LatestState,
6705 schema_json: "{}".to_owned(),
6706 retention_json: "{}".to_owned(),
6707 filter_fields_json: "[]".to_owned(),
6708 validation_json: String::new(),
6709 secondary_indexes_json: "[]".to_owned(),
6710 format_version: 1,
6711 })
6712 .expect("register collection");
6713 {
6714 let writer = crate::WriterActor::start(
6715 db.path(),
6716 Arc::new(SchemaManager::new()),
6717 crate::ProvenanceMode::Warn,
6718 Arc::new(crate::TelemetryCounters::default()),
6719 )
6720 .expect("writer");
6721 writer
6722 .submit(crate::WriteRequest {
6723 label: "operational".to_owned(),
6724 nodes: vec![],
6725 node_retires: vec![],
6726 edges: vec![],
6727 edge_retires: vec![],
6728 chunks: vec![],
6729 runs: vec![],
6730 steps: vec![],
6731 actions: vec![],
6732 optional_backfills: vec![],
6733 vec_inserts: vec![],
6734 operational_writes: vec![crate::OperationalWrite::Put {
6735 collection: "connector_health".to_owned(),
6736 record_key: "gmail".to_owned(),
6737 payload_json: r#"{"status":"ok"}"#.to_owned(),
6738 source_ref: Some("src-1".to_owned()),
6739 }],
6740 })
6741 .expect("write");
6742 }
6743 {
6744 let conn = sqlite::open_connection(db.path()).expect("conn");
6745 conn.execute(
6746 "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6747 [],
6748 )
6749 .expect("delete current row");
6750 }
6751
6752 let before = service.check_semantics().expect("semantics before rebuild");
6753 assert_eq!(before.missing_operational_current_rows, 1);
6754
6755 let repair = service
6756 .rebuild_operational_current(Some("connector_health"))
6757 .expect("rebuild current");
6758 assert_eq!(repair.collections_rebuilt, 1);
6759 assert_eq!(repair.current_rows_rebuilt, 1);
6760
6761 let after = service.check_semantics().expect("semantics after rebuild");
6762 assert_eq!(after.missing_operational_current_rows, 0);
6763
6764 let conn = sqlite::open_connection(db.path()).expect("conn");
6765 let payload: String = conn
6766 .query_row(
6767 "SELECT payload_json FROM operational_current \
6768 WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6769 [],
6770 |row| row.get(0),
6771 )
6772 .expect("restored payload");
6773 assert_eq!(payload, r#"{"status":"ok"}"#);
6774 }
6775
6776 #[test]
6777 fn rebuild_operational_current_restores_latest_state_secondary_index_entries() {
6778 let (db, service) = setup();
6779 service
6780 .register_operational_collection(&OperationalRegisterRequest {
6781 name: "connector_health".to_owned(),
6782 kind: OperationalCollectionKind::LatestState,
6783 schema_json: "{}".to_owned(),
6784 retention_json: "{}".to_owned(),
6785 filter_fields_json: "[]".to_owned(),
6786 validation_json: String::new(),
6787 secondary_indexes_json: r#"[{"name":"status_current","kind":"latest_state_field","field":"status","value_type":"string"}]"#.to_owned(),
6788 format_version: 1,
6789 })
6790 .expect("register collection");
6791 {
6792 let writer = crate::WriterActor::start(
6793 db.path(),
6794 Arc::new(SchemaManager::new()),
6795 crate::ProvenanceMode::Warn,
6796 Arc::new(crate::TelemetryCounters::default()),
6797 )
6798 .expect("writer");
6799 writer
6800 .submit(crate::WriteRequest {
6801 label: "operational".to_owned(),
6802 nodes: vec![],
6803 node_retires: vec![],
6804 edges: vec![],
6805 edge_retires: vec![],
6806 chunks: vec![],
6807 runs: vec![],
6808 steps: vec![],
6809 actions: vec![],
6810 optional_backfills: vec![],
6811 vec_inserts: vec![],
6812 operational_writes: vec![crate::OperationalWrite::Put {
6813 collection: "connector_health".to_owned(),
6814 record_key: "gmail".to_owned(),
6815 payload_json: r#"{"status":"ok"}"#.to_owned(),
6816 source_ref: Some("src-1".to_owned()),
6817 }],
6818 })
6819 .expect("write");
6820 }
6821 {
6822 let conn = sqlite::open_connection(db.path()).expect("conn");
6823 let entry_count: i64 = conn
6824 .query_row(
6825 "SELECT count(*) FROM operational_secondary_index_entries \
6826 WHERE collection_name = 'connector_health' AND subject_kind = 'current'",
6827 [],
6828 |row| row.get(0),
6829 )
6830 .expect("secondary index count before repair");
6831 assert_eq!(entry_count, 1);
6832 conn.execute(
6833 "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6834 [],
6835 )
6836 .expect("delete current row");
6837 }
6838
6839 service
6840 .rebuild_operational_current(Some("connector_health"))
6841 .expect("rebuild current");
6842
6843 let conn = sqlite::open_connection(db.path()).expect("conn");
6844 let entry_count: i64 = conn
6845 .query_row(
6846 "SELECT count(*) FROM operational_secondary_index_entries \
6847 WHERE collection_name = 'connector_health' AND subject_kind = 'current'",
6848 [],
6849 |row| row.get(0),
6850 )
6851 .expect("secondary index count after repair");
6852 assert_eq!(entry_count, 1);
6853 }
6854
6855 #[test]
6856 fn operational_current_semantics_and_rebuild_follow_mutation_order() {
6857 let (db, service) = setup();
6858 {
6859 let conn = sqlite::open_connection(db.path()).expect("conn");
6860 conn.execute(
6861 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6862 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
6863 [],
6864 )
6865 .expect("seed collection");
6866 conn.execute(
6867 "INSERT INTO operational_mutations \
6868 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6869 VALUES ('m3', 'connector_health', 'gmail', 'put', '{\"status\":\"old\"}', 'src-1', 100, 1)",
6870 [],
6871 )
6872 .expect("seed first put");
6873 conn.execute(
6874 "INSERT INTO operational_mutations \
6875 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6876 VALUES ('m2', 'connector_health', 'gmail', 'delete', '', 'src-2', 100, 2)",
6877 [],
6878 )
6879 .expect("seed delete");
6880 conn.execute(
6881 "INSERT INTO operational_mutations \
6882 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6883 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"new\"}', 'src-3', 100, 3)",
6884 [],
6885 )
6886 .expect("seed final put");
6887 conn.execute(
6888 "INSERT INTO operational_current \
6889 (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
6890 VALUES ('connector_health', 'gmail', '{\"status\":\"new\"}', 100, 'm1')",
6891 [],
6892 )
6893 .expect("seed current");
6894 }
6895
6896 let before = service.check_semantics().expect("semantics before rebuild");
6897 assert_eq!(before.missing_operational_current_rows, 0);
6898 assert_eq!(before.stale_operational_current_rows, 0);
6899
6900 {
6901 let conn = sqlite::open_connection(db.path()).expect("conn");
6902 conn.execute(
6903 "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6904 [],
6905 )
6906 .expect("delete current row");
6907 }
6908
6909 let missing = service.check_semantics().expect("semantics after delete");
6910 assert_eq!(missing.missing_operational_current_rows, 1);
6911 assert_eq!(missing.stale_operational_current_rows, 0);
6912
6913 service
6914 .rebuild_operational_current(Some("connector_health"))
6915 .expect("rebuild current");
6916
6917 let after = service.check_semantics().expect("semantics after rebuild");
6918 assert_eq!(after.missing_operational_current_rows, 0);
6919 assert_eq!(after.stale_operational_current_rows, 0);
6920
6921 let conn = sqlite::open_connection(db.path()).expect("conn");
6922 let payload: String = conn
6923 .query_row(
6924 "SELECT payload_json FROM operational_current \
6925 WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6926 [],
6927 |row| row.get(0),
6928 )
6929 .expect("restored payload");
6930 assert_eq!(payload, r#"{"status":"new"}"#);
6931 }
6932
6933 #[test]
6934 fn disable_operational_collection_sets_disabled_at_and_emits_provenance() {
6935 let (db, service) = setup();
6936 service
6937 .register_operational_collection(&OperationalRegisterRequest {
6938 name: "audit_log".to_owned(),
6939 kind: OperationalCollectionKind::AppendOnlyLog,
6940 schema_json: "{}".to_owned(),
6941 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6942 filter_fields_json: "[]".to_owned(),
6943 validation_json: String::new(),
6944 secondary_indexes_json: "[]".to_owned(),
6945 format_version: 1,
6946 })
6947 .expect("register collection");
6948
6949 let record = service
6950 .disable_operational_collection("audit_log")
6951 .expect("disable collection");
6952 assert_eq!(record.name, "audit_log");
6953 assert!(record.disabled_at.is_some());
6954
6955 let disabled_at = record.disabled_at.expect("disabled_at");
6956 let described = service
6957 .describe_operational_collection("audit_log")
6958 .expect("describe collection")
6959 .expect("collection exists");
6960 assert_eq!(described.disabled_at, Some(disabled_at));
6961
6962 let writer = crate::WriterActor::start(
6963 db.path(),
6964 Arc::new(SchemaManager::new()),
6965 crate::ProvenanceMode::Warn,
6966 Arc::new(crate::TelemetryCounters::default()),
6967 )
6968 .expect("writer");
6969 let error = writer
6970 .submit(crate::WriteRequest {
6971 label: "disabled-operational".to_owned(),
6972 nodes: vec![],
6973 node_retires: vec![],
6974 edges: vec![],
6975 edge_retires: vec![],
6976 chunks: vec![],
6977 runs: vec![],
6978 steps: vec![],
6979 actions: vec![],
6980 optional_backfills: vec![],
6981 vec_inserts: vec![],
6982 operational_writes: vec![crate::OperationalWrite::Append {
6983 collection: "audit_log".to_owned(),
6984 record_key: "evt-1".to_owned(),
6985 payload_json: r#"{"type":"sync"}"#.to_owned(),
6986 source_ref: Some("src-1".to_owned()),
6987 }],
6988 })
6989 .expect_err("disabled collection should reject writes");
6990 assert!(matches!(error, EngineError::InvalidWrite(_)));
6991 assert!(error.to_string().contains("is disabled"));
6992
6993 let conn = sqlite::open_connection(db.path()).expect("conn");
6994 let provenance_count: i64 = conn
6995 .query_row(
6996 "SELECT count(*) FROM provenance_events \
6997 WHERE event_type = 'operational_collection_disabled' AND subject = 'audit_log'",
6998 [],
6999 |row| row.get(0),
7000 )
7001 .expect("provenance count");
7002 assert_eq!(provenance_count, 1);
7003 }
7004
7005 #[test]
7006 fn purge_operational_collection_deletes_append_only_rows_before_cutoff() {
7007 let (db, service) = setup();
7008 {
7009 let conn = sqlite::open_connection(db.path()).expect("conn");
7010 conn.execute(
7011 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
7012 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_all\"}', 1, 100)",
7013 [],
7014 )
7015 .expect("seed collection");
7016 conn.execute(
7017 "INSERT INTO operational_mutations \
7018 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
7019 VALUES ('evt-1', 'audit_log', 'evt-1', 'append', '{\"seq\":1}', 'src-1', 100, 1)",
7020 [],
7021 )
7022 .expect("seed event 1");
7023 conn.execute(
7024 "INSERT INTO operational_mutations \
7025 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
7026 VALUES ('evt-2', 'audit_log', 'evt-2', 'append', '{\"seq\":2}', 'src-2', 200, 2)",
7027 [],
7028 )
7029 .expect("seed event 2");
7030 conn.execute(
7031 "INSERT INTO operational_mutations \
7032 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
7033 VALUES ('evt-3', 'audit_log', 'evt-3', 'append', '{\"seq\":3}', 'src-3', 300, 3)",
7034 [],
7035 )
7036 .expect("seed event 3");
7037 }
7038
7039 let report = service
7040 .purge_operational_collection("audit_log", 250)
7041 .expect("purge collection");
7042 assert_eq!(report.collection_name, "audit_log");
7043 assert_eq!(report.deleted_mutations, 2);
7044 assert_eq!(report.before_timestamp, 250);
7045
7046 let conn = sqlite::open_connection(db.path()).expect("conn");
7047 let remaining: Vec<String> = {
7048 let mut stmt = conn
7049 .prepare(
7050 "SELECT id FROM operational_mutations \
7051 WHERE collection_name = 'audit_log' ORDER BY mutation_order",
7052 )
7053 .expect("stmt");
7054 stmt.query_map([], |row| row.get(0))
7055 .expect("rows")
7056 .collect::<Result<_, _>>()
7057 .expect("collect")
7058 };
7059 assert_eq!(remaining, vec!["evt-3".to_owned()]);
7060 let provenance_count: i64 = conn
7061 .query_row(
7062 "SELECT count(*) FROM provenance_events \
7063 WHERE event_type = 'operational_collection_purged' AND subject = 'audit_log'",
7064 [],
7065 |row| row.get(0),
7066 )
7067 .expect("provenance count");
7068 assert_eq!(provenance_count, 1);
7069 }
7070
7071 #[test]
7072 fn compact_operational_collection_dry_run_reports_without_mutation() {
7073 let (db, service) = setup();
7074 {
7075 let conn = sqlite::open_connection(db.path()).expect("conn");
7076 conn.execute(
7077 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
7078 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
7079 [],
7080 )
7081 .expect("seed collection");
7082 for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
7083 conn.execute(
7084 "INSERT INTO operational_mutations \
7085 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
7086 VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
7087 rusqlite::params![
7088 format!("evt-{index}"),
7089 format!("{{\"seq\":{index}}}"),
7090 created_at,
7091 index,
7092 ],
7093 )
7094 .expect("seed event");
7095 }
7096 }
7097
7098 let report = service
7099 .compact_operational_collection("audit_log", true)
7100 .expect("compact collection");
7101 assert_eq!(report.collection_name, "audit_log");
7102 assert_eq!(report.deleted_mutations, 1);
7103 assert!(report.dry_run);
7104 assert_eq!(report.before_timestamp, None);
7105
7106 let conn = sqlite::open_connection(db.path()).expect("conn");
7107 let remaining_count: i64 = conn
7108 .query_row(
7109 "SELECT count(*) FROM operational_mutations WHERE collection_name = 'audit_log'",
7110 [],
7111 |row| row.get(0),
7112 )
7113 .expect("remaining count");
7114 assert_eq!(remaining_count, 3);
7115 let provenance_count: i64 = conn
7116 .query_row(
7117 "SELECT count(*) FROM provenance_events \
7118 WHERE event_type = 'operational_collection_compacted' AND subject = 'audit_log'",
7119 [],
7120 |row| row.get(0),
7121 )
7122 .expect("provenance count");
7123 assert_eq!(provenance_count, 0);
7124 }
7125
7126 #[test]
7127 fn compact_operational_collection_keep_last_deletes_oldest_rows() {
7128 let (db, service) = setup();
7129 {
7130 let conn = sqlite::open_connection(db.path()).expect("conn");
7131 conn.execute(
7132 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
7133 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
7134 [],
7135 )
7136 .expect("seed collection");
7137 for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
7138 conn.execute(
7139 "INSERT INTO operational_mutations \
7140 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
7141 VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
7142 rusqlite::params![
7143 format!("evt-{index}"),
7144 format!("{{\"seq\":{index}}}"),
7145 created_at,
7146 index,
7147 ],
7148 )
7149 .expect("seed event");
7150 }
7151 }
7152
7153 let report = service
7154 .compact_operational_collection("audit_log", false)
7155 .expect("compact collection");
7156 assert_eq!(report.deleted_mutations, 1);
7157 assert!(!report.dry_run);
7158
7159 let conn = sqlite::open_connection(db.path()).expect("conn");
7160 let remaining: Vec<String> = {
7161 let mut stmt = conn
7162 .prepare(
7163 "SELECT id FROM operational_mutations \
7164 WHERE collection_name = 'audit_log' ORDER BY mutation_order",
7165 )
7166 .expect("stmt");
7167 stmt.query_map([], |row| row.get(0))
7168 .expect("rows")
7169 .collect::<Result<_, _>>()
7170 .expect("collect")
7171 };
7172 assert_eq!(remaining, vec!["evt-2".to_owned(), "evt-3".to_owned()]);
7173 let provenance_count: i64 = conn
7174 .query_row(
7175 "SELECT count(*) FROM provenance_events \
7176 WHERE event_type = 'operational_collection_compacted' AND subject = 'audit_log'",
7177 [],
7178 |row| row.get(0),
7179 )
7180 .expect("provenance count");
7181 assert_eq!(provenance_count, 1);
7182 }
7183
7184 #[test]
7185 fn plan_and_run_operational_retention_keep_last() {
7186 let (db, service) = setup();
7187 {
7188 let conn = sqlite::open_connection(db.path()).expect("conn");
7189 conn.execute(
7190 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
7191 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
7192 [],
7193 )
7194 .expect("seed collection");
7195 for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
7196 conn.execute(
7197 "INSERT INTO operational_mutations \
7198 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
7199 VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
7200 rusqlite::params![
7201 format!("evt-{index}"),
7202 format!("{{\"seq\":{index}}}"),
7203 created_at,
7204 index,
7205 ],
7206 )
7207 .expect("seed event");
7208 }
7209 }
7210
7211 let plan = service
7212 .plan_operational_retention(1_000, None, Some(10))
7213 .expect("plan retention");
7214 assert_eq!(plan.collections_examined, 1);
7215 assert_eq!(plan.items[0].collection_name, "audit_log");
7216 assert_eq!(
7217 plan.items[0].action_kind,
7218 crate::operational::OperationalRetentionActionKind::KeepLast
7219 );
7220 assert_eq!(plan.items[0].candidate_deletions, 1);
7221 assert_eq!(plan.items[0].max_rows, Some(2));
7222 assert_eq!(plan.items[0].last_run_at, None);
7223
7224 let dry_run = service
7225 .run_operational_retention(1_000, None, Some(10), true)
7226 .expect("dry-run retention");
7227 assert!(dry_run.dry_run);
7228 assert_eq!(dry_run.collections_acted_on, 1);
7229 assert_eq!(dry_run.items[0].deleted_mutations, 1);
7230 assert_eq!(dry_run.items[0].rows_remaining, 2);
7231
7232 let conn = sqlite::open_connection(db.path()).expect("conn");
7233 let remaining_count: i64 = conn
7234 .query_row(
7235 "SELECT count(*) FROM operational_mutations WHERE collection_name = 'audit_log'",
7236 [],
7237 |row| row.get(0),
7238 )
7239 .expect("remaining count after dry run");
7240 assert_eq!(remaining_count, 3);
7241 let retention_run_count: i64 = conn
7242 .query_row(
7243 "SELECT count(*) FROM operational_retention_runs WHERE collection_name = 'audit_log'",
7244 [],
7245 |row| row.get(0),
7246 )
7247 .expect("retention run count");
7248 assert_eq!(retention_run_count, 0);
7249 drop(conn);
7250
7251 let executed = service
7252 .run_operational_retention(1_000, None, Some(10), false)
7253 .expect("execute retention");
7254 assert_eq!(executed.collections_acted_on, 1);
7255 assert_eq!(executed.items[0].deleted_mutations, 1);
7256 assert_eq!(executed.items[0].rows_remaining, 2);
7257
7258 let conn = sqlite::open_connection(db.path()).expect("conn");
7259 let remaining: Vec<String> = {
7260 let mut stmt = conn
7261 .prepare(
7262 "SELECT id FROM operational_mutations \
7263 WHERE collection_name = 'audit_log' ORDER BY mutation_order",
7264 )
7265 .expect("stmt");
7266 stmt.query_map([], |row| row.get(0))
7267 .expect("rows")
7268 .collect::<Result<_, _>>()
7269 .expect("collect")
7270 };
7271 assert_eq!(remaining, vec!["evt-2".to_owned(), "evt-3".to_owned()]);
7272 let last_run_at: i64 = conn
7273 .query_row(
7274 "SELECT executed_at FROM operational_retention_runs \
7275 WHERE collection_name = 'audit_log' ORDER BY executed_at DESC LIMIT 1",
7276 [],
7277 |row| row.get(0),
7278 )
7279 .expect("last run at");
7280 assert_eq!(last_run_at, 1_000);
7281 }
7282
7283 #[test]
7284 fn dry_run_operational_retention_does_not_mark_noop_collection_as_acted_on() {
7285 let (db, service) = setup();
7286 let conn = sqlite::open_connection(db.path()).expect("conn");
7287 conn.execute(
7288 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
7289 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
7290 [],
7291 )
7292 .expect("seed collection");
7293 for (index, created_at) in [(1_i64, 100_i64), (2, 200)] {
7294 conn.execute(
7295 "INSERT INTO operational_mutations \
7296 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
7297 VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
7298 rusqlite::params![
7299 format!("evt-{index}"),
7300 format!("{{\"seq\":{index}}}"),
7301 created_at,
7302 index,
7303 ],
7304 )
7305 .expect("seed event");
7306 }
7307 drop(conn);
7308
7309 let dry_run = service
7310 .run_operational_retention(1_000, None, Some(10), true)
7311 .expect("dry-run retention");
7312 assert!(dry_run.dry_run);
7313 assert_eq!(dry_run.collections_acted_on, 0);
7314 assert_eq!(dry_run.items[0].deleted_mutations, 0);
7315 assert_eq!(dry_run.items[0].rows_remaining, 2);
7316 }
7317
7318 #[test]
7319 fn compact_operational_collection_rejects_latest_state() {
7320 let (_db, service) = setup();
7321 service
7322 .register_operational_collection(&OperationalRegisterRequest {
7323 name: "connector_health".to_owned(),
7324 kind: OperationalCollectionKind::LatestState,
7325 schema_json: "{}".to_owned(),
7326 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
7327 filter_fields_json: "[]".to_owned(),
7328 validation_json: String::new(),
7329 secondary_indexes_json: "[]".to_owned(),
7330 format_version: 1,
7331 })
7332 .expect("register collection");
7333
7334 let error = service
7335 .compact_operational_collection("connector_health", false)
7336 .expect_err("latest_state compaction should be rejected");
7337 assert!(matches!(error, EngineError::InvalidWrite(_)));
7338 assert!(error.to_string().contains("append_only_log"));
7339 }
7340
7341 #[test]
7342 fn register_operational_collection_persists_filter_fields_json() {
7343 let (_db, service) = setup();
7344
7345 let record = service
7346 .register_operational_collection(&OperationalRegisterRequest {
7347 name: "audit_log".to_owned(),
7348 kind: OperationalCollectionKind::AppendOnlyLog,
7349 schema_json: "{}".to_owned(),
7350 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
7351 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
7352 validation_json: String::new(),
7353 secondary_indexes_json: "[]".to_owned(),
7354 format_version: 1,
7355 })
7356 .expect("register collection");
7357
7358 assert_eq!(
7359 record.filter_fields_json,
7360 r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#
7361 );
7362 }
7363
7364 #[test]
7365 fn read_operational_collection_filters_append_only_rows_by_declared_fields() {
7366 let (db, service) = setup();
7367 service
7368 .register_operational_collection(&OperationalRegisterRequest {
7369 name: "audit_log".to_owned(),
7370 kind: OperationalCollectionKind::AppendOnlyLog,
7371 schema_json: "{}".to_owned(),
7372 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
7373 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"seq","type":"integer","modes":["exact","range"]},{"name":"ts","type":"timestamp","modes":["exact","range"]}]"#.to_owned(),
7374 validation_json: String::new(),
7375 secondary_indexes_json: "[]".to_owned(),
7376 format_version: 1,
7377 })
7378 .expect("register collection");
7379 {
7380 let writer = crate::WriterActor::start(
7381 db.path(),
7382 Arc::new(SchemaManager::new()),
7383 crate::ProvenanceMode::Warn,
7384 Arc::new(crate::TelemetryCounters::default()),
7385 )
7386 .expect("writer");
7387 writer
7388 .submit(crate::WriteRequest {
7389 label: "operational".to_owned(),
7390 nodes: vec![],
7391 node_retires: vec![],
7392 edges: vec![],
7393 edge_retires: vec![],
7394 chunks: vec![],
7395 runs: vec![],
7396 steps: vec![],
7397 actions: vec![],
7398 optional_backfills: vec![],
7399 vec_inserts: vec![],
7400 operational_writes: vec![
7401 crate::OperationalWrite::Append {
7402 collection: "audit_log".to_owned(),
7403 record_key: "evt-1".to_owned(),
7404 payload_json: r#"{"actor":"alice","seq":1,"ts":100}"#.to_owned(),
7405 source_ref: Some("src-1".to_owned()),
7406 },
7407 crate::OperationalWrite::Append {
7408 collection: "audit_log".to_owned(),
7409 record_key: "evt-2".to_owned(),
7410 payload_json: r#"{"actor":"alice-admin","seq":2,"ts":200}"#.to_owned(),
7411 source_ref: Some("src-2".to_owned()),
7412 },
7413 crate::OperationalWrite::Append {
7414 collection: "audit_log".to_owned(),
7415 record_key: "evt-3".to_owned(),
7416 payload_json: r#"{"actor":"bob","seq":3,"ts":300}"#.to_owned(),
7417 source_ref: Some("src-3".to_owned()),
7418 },
7419 ],
7420 })
7421 .expect("write");
7422 }
7423
7424 let report = service
7425 .read_operational_collection(&crate::operational::OperationalReadRequest {
7426 collection_name: "audit_log".to_owned(),
7427 filters: vec![
7428 crate::operational::OperationalFilterClause::Prefix {
7429 field: "actor".to_owned(),
7430 value: "alice".to_owned(),
7431 },
7432 crate::operational::OperationalFilterClause::Range {
7433 field: "ts".to_owned(),
7434 lower: Some(150),
7435 upper: Some(250),
7436 },
7437 ],
7438 limit: Some(10),
7439 })
7440 .expect("filtered read");
7441
7442 assert_eq!(report.collection_name, "audit_log");
7443 assert_eq!(report.row_count, 1);
7444 assert!(!report.was_limited);
7445 assert_eq!(report.rows.len(), 1);
7446 assert_eq!(report.rows[0].record_key, "evt-2");
7447 assert_eq!(
7448 report.rows[0].payload_json,
7449 r#"{"actor":"alice-admin","seq":2,"ts":200}"#
7450 );
7451 }
7452
7453 #[test]
7454 fn read_operational_collection_uses_secondary_index_when_filter_values_are_missing() {
7455 let (db, service) = setup();
7456 service
7457 .register_operational_collection(&OperationalRegisterRequest {
7458 name: "audit_log".to_owned(),
7459 kind: OperationalCollectionKind::AppendOnlyLog,
7460 schema_json: "{}".to_owned(),
7461 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
7462 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
7463 validation_json: String::new(),
7464 secondary_indexes_json: r#"[{"name":"actor_ts","kind":"append_only_field_time","field":"actor","value_type":"string","time_field":"ts"}]"#.to_owned(),
7465 format_version: 1,
7466 })
7467 .expect("register collection");
7468 {
7469 let writer = crate::WriterActor::start(
7470 db.path(),
7471 Arc::new(SchemaManager::new()),
7472 crate::ProvenanceMode::Warn,
7473 Arc::new(crate::TelemetryCounters::default()),
7474 )
7475 .expect("writer");
7476 writer
7477 .submit(crate::WriteRequest {
7478 label: "operational".to_owned(),
7479 nodes: vec![],
7480 node_retires: vec![],
7481 edges: vec![],
7482 edge_retires: vec![],
7483 chunks: vec![],
7484 runs: vec![],
7485 steps: vec![],
7486 actions: vec![],
7487 optional_backfills: vec![],
7488 vec_inserts: vec![],
7489 operational_writes: vec![
7490 crate::OperationalWrite::Append {
7491 collection: "audit_log".to_owned(),
7492 record_key: "evt-1".to_owned(),
7493 payload_json: r#"{"actor":"alice","ts":100}"#.to_owned(),
7494 source_ref: Some("src-1".to_owned()),
7495 },
7496 crate::OperationalWrite::Append {
7497 collection: "audit_log".to_owned(),
7498 record_key: "evt-2".to_owned(),
7499 payload_json: r#"{"actor":"alice-admin","ts":200}"#.to_owned(),
7500 source_ref: Some("src-2".to_owned()),
7501 },
7502 ],
7503 })
7504 .expect("write");
7505 }
7506 let conn = sqlite::open_connection(db.path()).expect("conn");
7507 conn.execute(
7508 "DELETE FROM operational_filter_values WHERE collection_name = 'audit_log'",
7509 [],
7510 )
7511 .expect("clear filter values");
7512 drop(conn);
7513
7514 let report = service
7515 .read_operational_collection(&crate::operational::OperationalReadRequest {
7516 collection_name: "audit_log".to_owned(),
7517 filters: vec![
7518 crate::operational::OperationalFilterClause::Prefix {
7519 field: "actor".to_owned(),
7520 value: "alice".to_owned(),
7521 },
7522 crate::operational::OperationalFilterClause::Range {
7523 field: "ts".to_owned(),
7524 lower: Some(150),
7525 upper: Some(250),
7526 },
7527 ],
7528 limit: Some(10),
7529 })
7530 .expect("secondary-index read");
7531
7532 assert_eq!(report.row_count, 1);
7533 assert_eq!(report.rows[0].record_key, "evt-2");
7534 }
7535
7536 #[test]
7537 fn read_operational_collection_rejects_undeclared_fields_and_latest_state_collections() {
7538 let (_db, service) = setup();
7539 service
7540 .register_operational_collection(&OperationalRegisterRequest {
7541 name: "connector_health".to_owned(),
7542 kind: OperationalCollectionKind::LatestState,
7543 schema_json: "{}".to_owned(),
7544 retention_json: "{}".to_owned(),
7545 filter_fields_json: r#"[{"name":"status","type":"string","modes":["exact"]}]"#
7546 .to_owned(),
7547 validation_json: String::new(),
7548 secondary_indexes_json: "[]".to_owned(),
7549 format_version: 1,
7550 })
7551 .expect("register collection");
7552
7553 let latest_state_error = service
7554 .read_operational_collection(&crate::operational::OperationalReadRequest {
7555 collection_name: "connector_health".to_owned(),
7556 filters: vec![crate::operational::OperationalFilterClause::Exact {
7557 field: "status".to_owned(),
7558 value: crate::operational::OperationalFilterValue::String("ok".to_owned()),
7559 }],
7560 limit: Some(10),
7561 })
7562 .expect_err("latest_state filtered reads should be rejected");
7563 assert!(latest_state_error.to_string().contains("append_only_log"));
7564
7565 service
7566 .register_operational_collection(&OperationalRegisterRequest {
7567 name: "audit_log".to_owned(),
7568 kind: OperationalCollectionKind::AppendOnlyLog,
7569 schema_json: "{}".to_owned(),
7570 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
7571 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact"]}]"#
7572 .to_owned(),
7573 validation_json: String::new(),
7574 secondary_indexes_json: "[]".to_owned(),
7575 format_version: 1,
7576 })
7577 .expect("register append-only collection");
7578
7579 let undeclared_error = service
7580 .read_operational_collection(&crate::operational::OperationalReadRequest {
7581 collection_name: "audit_log".to_owned(),
7582 filters: vec![crate::operational::OperationalFilterClause::Exact {
7583 field: "missing".to_owned(),
7584 value: crate::operational::OperationalFilterValue::String("x".to_owned()),
7585 }],
7586 limit: Some(10),
7587 })
7588 .expect_err("undeclared field should be rejected");
7589 assert!(undeclared_error.to_string().contains("undeclared"));
7590 }
7591
7592 #[test]
7593 fn read_operational_collection_applies_limit_and_reports_truncation() {
7594 let (db, service) = setup();
7595 service
7596 .register_operational_collection(&OperationalRegisterRequest {
7597 name: "audit_log".to_owned(),
7598 kind: OperationalCollectionKind::AppendOnlyLog,
7599 schema_json: "{}".to_owned(),
7600 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
7601 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["prefix"]}]"#
7602 .to_owned(),
7603 validation_json: String::new(),
7604 secondary_indexes_json: "[]".to_owned(),
7605 format_version: 1,
7606 })
7607 .expect("register collection");
7608 {
7609 let writer = crate::WriterActor::start(
7610 db.path(),
7611 Arc::new(SchemaManager::new()),
7612 crate::ProvenanceMode::Warn,
7613 Arc::new(crate::TelemetryCounters::default()),
7614 )
7615 .expect("writer");
7616 writer
7617 .submit(crate::WriteRequest {
7618 label: "operational".to_owned(),
7619 nodes: vec![],
7620 node_retires: vec![],
7621 edges: vec![],
7622 edge_retires: vec![],
7623 chunks: vec![],
7624 runs: vec![],
7625 steps: vec![],
7626 actions: vec![],
7627 optional_backfills: vec![],
7628 vec_inserts: vec![],
7629 operational_writes: vec![
7630 crate::OperationalWrite::Append {
7631 collection: "audit_log".to_owned(),
7632 record_key: "evt-1".to_owned(),
7633 payload_json: r#"{"actor":"alice-1"}"#.to_owned(),
7634 source_ref: Some("src-1".to_owned()),
7635 },
7636 crate::OperationalWrite::Append {
7637 collection: "audit_log".to_owned(),
7638 record_key: "evt-2".to_owned(),
7639 payload_json: r#"{"actor":"alice-2"}"#.to_owned(),
7640 source_ref: Some("src-2".to_owned()),
7641 },
7642 ],
7643 })
7644 .expect("write");
7645 }
7646
7647 let report = service
7648 .read_operational_collection(&crate::operational::OperationalReadRequest {
7649 collection_name: "audit_log".to_owned(),
7650 filters: vec![crate::operational::OperationalFilterClause::Prefix {
7651 field: "actor".to_owned(),
7652 value: "alice".to_owned(),
7653 }],
7654 limit: Some(1),
7655 })
7656 .expect("limited read");
7657
7658 assert_eq!(report.row_count, 1);
7659 assert_eq!(report.applied_limit, 1);
7660 assert!(report.was_limited);
7661 assert_eq!(report.rows[0].record_key, "evt-2");
7662 }
7663
7664 #[test]
7665 fn preexisting_operational_collection_can_gain_filter_contract_after_upgrade() {
7666 let db = NamedTempFile::new().expect("temp db");
7667 let conn = sqlite::open_connection(db.path()).expect("conn");
7668 conn.execute_batch(
7669 r#"
7670 CREATE TABLE operational_collections (
7671 name TEXT PRIMARY KEY,
7672 kind TEXT NOT NULL,
7673 schema_json TEXT NOT NULL,
7674 retention_json TEXT NOT NULL,
7675 format_version INTEGER NOT NULL DEFAULT 1,
7676 created_at INTEGER NOT NULL DEFAULT 100,
7677 disabled_at INTEGER
7678 );
7679 CREATE TABLE operational_mutations (
7680 id TEXT PRIMARY KEY,
7681 collection_name TEXT NOT NULL,
7682 record_key TEXT NOT NULL,
7683 op_kind TEXT NOT NULL,
7684 payload_json TEXT NOT NULL,
7685 source_ref TEXT,
7686 created_at INTEGER NOT NULL DEFAULT 100,
7687 mutation_order INTEGER NOT NULL DEFAULT 1
7688 );
7689 INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at)
7690 VALUES ('audit_log', 'append_only_log', '{}', '{"mode":"keep_all"}', 1, 100);
7691 INSERT INTO operational_mutations
7692 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order)
7693 VALUES
7694 ('evt-1', 'audit_log', 'evt-1', 'append', '{"actor":"alice","ts":0}', 'src-1', 100, 1);
7695 "#,
7696 )
7697 .expect("seed pre-v10 schema");
7698 drop(conn);
7699
7700 let service = AdminService::new(db.path(), Arc::new(SchemaManager::new()));
7701 let pre_update = service
7702 .read_operational_collection(&crate::operational::OperationalReadRequest {
7703 collection_name: "audit_log".to_owned(),
7704 filters: vec![crate::operational::OperationalFilterClause::Exact {
7705 field: "actor".to_owned(),
7706 value: crate::operational::OperationalFilterValue::String("alice".to_owned()),
7707 }],
7708 limit: Some(10),
7709 })
7710 .expect_err("read should reject undeclared fields before migration update");
7711 assert!(pre_update.to_string().contains("undeclared"));
7712
7713 let updated = service
7714 .update_operational_collection_filters(
7715 "audit_log",
7716 r#"[{"name":"actor","type":"string","modes":["exact"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#,
7717 )
7718 .expect("update filter contract");
7719 assert!(updated.filter_fields_json.contains("\"actor\""));
7720
7721 let report = service
7722 .read_operational_collection(&crate::operational::OperationalReadRequest {
7723 collection_name: "audit_log".to_owned(),
7724 filters: vec![crate::operational::OperationalFilterClause::Range {
7725 field: "ts".to_owned(),
7726 lower: Some(0),
7727 upper: Some(0),
7728 }],
7729 limit: Some(10),
7730 })
7731 .expect("read after explicit filter update");
7732 assert_eq!(report.row_count, 1);
7733 assert_eq!(report.rows[0].record_key, "evt-1");
7734 }
7735
7736 #[cfg(feature = "sqlite-vec")]
7737 #[test]
7738 fn check_semantics_detects_stale_vec_rows() {
7739 use crate::sqlite::open_connection_with_vec;
7740
7741 let db = NamedTempFile::new().expect("temp file");
7742 let schema = Arc::new(SchemaManager::new());
7743 {
7744 let conn = open_connection_with_vec(db.path()).expect("vec conn");
7745 schema.bootstrap(&conn).expect("bootstrap");
7746 schema
7747 .ensure_vec_kind_profile(&conn, "Doc", 3)
7748 .expect("vec kind profile");
7749 let bytes: Vec<u8> = [0.1f32, 0.2f32, 0.3f32]
7751 .iter()
7752 .flat_map(|f| f.to_le_bytes())
7753 .collect();
7754 conn.execute(
7755 "INSERT INTO vec_doc (chunk_id, embedding) VALUES ('ghost-chunk', ?1)",
7756 rusqlite::params![bytes],
7757 )
7758 .expect("insert stale vec row");
7759 }
7760 let service = AdminService::new(db.path(), Arc::clone(&schema));
7761 let report = service.check_semantics().expect("semantics check");
7762 assert_eq!(report.stale_vec_rows, 1);
7763 assert!(
7764 report.warnings.iter().any(|w| w.contains("stale vec")),
7765 "warning must mention stale vec"
7766 );
7767 }
7768
7769 #[cfg(feature = "sqlite-vec")]
7770 #[test]
7771 fn restore_vector_profiles_recreates_vec_table_from_metadata() {
7772 let db = NamedTempFile::new().expect("temp file");
7773 let schema = Arc::new(SchemaManager::new());
7774 {
7775 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7776 schema.bootstrap(&conn).expect("bootstrap");
7777 conn.execute(
7778 "INSERT INTO vector_profiles (profile, table_name, dimension, enabled) \
7779 VALUES ('default', 'vec_nodes_active', 3, 1)",
7780 [],
7781 )
7782 .expect("insert vector profile");
7783 }
7784
7785 let service = AdminService::new(db.path(), Arc::clone(&schema));
7786 let report = service
7787 .restore_vector_profiles()
7788 .expect("restore vector profiles");
7789 assert_eq!(
7790 report.targets,
7791 vec![crate::projection::ProjectionTarget::Vec]
7792 );
7793 assert_eq!(report.rebuilt_rows, 1);
7794
7795 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7796 let count: i64 = conn
7797 .query_row(
7798 "SELECT count(*) FROM sqlite_schema WHERE name = 'vec_nodes_active'",
7799 [],
7800 |row| row.get(0),
7801 )
7802 .expect("vec schema count");
7803 assert_eq!(count, 1, "vec table should exist after restore");
7804 }
7805
7806 #[cfg(feature = "sqlite-vec")]
7807 #[test]
7808 fn load_vector_regeneration_config_supports_json_and_toml() {
7809 let dir = tempfile::tempdir().expect("temp dir");
7810 let json_path = dir.path().join("regen.json");
7811 let toml_path = dir.path().join("regen.toml");
7812
7813 let config = VectorRegenerationConfig {
7814 kind: "Document".to_owned(),
7815 profile: "default".to_owned(),
7816 chunking_policy: "per_chunk".to_owned(),
7817 preprocessing_policy: "trim".to_owned(),
7818 };
7819
7820 fs::write(&json_path, serde_json::to_string(&config).expect("json")).expect("write json");
7821 fs::write(&toml_path, toml::to_string(&config).expect("toml")).expect("write toml");
7822
7823 let parsed_json = load_vector_regeneration_config(&json_path).expect("json parse");
7824 let parsed_toml = load_vector_regeneration_config(&toml_path).expect("toml parse");
7825
7826 assert_eq!(parsed_json, config);
7827 assert_eq!(parsed_toml, config);
7828 }
7829
7830 #[test]
7835 fn regenerate_vector_embeddings_config_rejects_old_identity_fields() {
7836 let legacy_json = r#"{
7839 "kind": "Document",
7840 "profile": "default",
7841 "table_name": "vec_nodes_active",
7842 "model_identity": "old-model",
7843 "model_version": "1.0",
7844 "dimension": 4,
7845 "normalization_policy": "l2",
7846 "chunking_policy": "per_chunk",
7847 "preprocessing_policy": "trim",
7848 "generator_command": ["/bin/echo"]
7849 }"#;
7850 let result: Result<VectorRegenerationConfig, _> = serde_json::from_str(legacy_json);
7851 assert!(
7852 result.is_err(),
7853 "legacy identity fields must be rejected at deserialization"
7854 );
7855 }
7856
7857 #[cfg(all(not(feature = "sqlite-vec"), unix))]
7858 #[test]
7859 fn regenerate_vector_embeddings_unsupported_vec_capability_writes_request_and_failed_audit() {
7860 let db = NamedTempFile::new().expect("temp file");
7861 let schema = Arc::new(SchemaManager::new());
7862
7863 {
7864 let conn = sqlite::open_connection(db.path()).expect("connection");
7865 schema.bootstrap(&conn).expect("bootstrap");
7866 conn.execute(
7867 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7868 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7869 [],
7870 )
7871 .expect("insert node");
7872 conn.execute(
7873 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7874 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7875 [],
7876 )
7877 .expect("insert chunk");
7878 }
7879
7880 let service = AdminService::new(db.path(), Arc::clone(&schema));
7881 let embedder = TestEmbedder::new("test-model", 4);
7882 let error = service
7883 .regenerate_vector_embeddings(
7884 &embedder,
7885 &VectorRegenerationConfig {
7886 kind: "Document".to_owned(),
7887 profile: "default".to_owned(),
7888 chunking_policy: "per_chunk".to_owned(),
7889 preprocessing_policy: "trim".to_owned(),
7890 },
7891 )
7892 .expect_err("sqlite-vec capability should be required");
7893
7894 assert!(error.to_string().contains("unsupported vec capability"));
7895
7896 let conn = sqlite::open_connection(db.path()).expect("connection");
7897 let request_count: i64 = conn
7898 .query_row(
7899 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_requested' AND subject = 'default'",
7900 [],
7901 |row| row.get(0),
7902 )
7903 .expect("request count");
7904 assert_eq!(request_count, 1);
7905 let failed_count: i64 = conn
7906 .query_row(
7907 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7908 [],
7909 |row| row.get(0),
7910 )
7911 .expect("failed count");
7912 assert_eq!(failed_count, 1);
7913 let metadata_json: String = conn
7914 .query_row(
7915 "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7916 [],
7917 |row| row.get(0),
7918 )
7919 .expect("failed metadata");
7920 assert!(metadata_json.contains("\"failure_class\":\"unsupported vec capability\""));
7921 }
7922
7923 #[cfg(feature = "sqlite-vec")]
7924 #[test]
7925 #[allow(clippy::too_many_lines)]
7926 fn regenerate_vector_embeddings_rebuilds_embeddings_via_embedder() {
7927 let db = NamedTempFile::new().expect("temp file");
7928 let schema = Arc::new(SchemaManager::new());
7929
7930 {
7931 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7932 schema.bootstrap(&conn).expect("bootstrap");
7933 conn.execute(
7934 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7935 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7936 [],
7937 )
7938 .expect("insert node");
7939 conn.execute(
7940 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7941 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7942 [],
7943 )
7944 .expect("insert chunk 1");
7945 conn.execute(
7946 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7947 VALUES ('chunk-2', 'doc-1', 'travel plan', 101)",
7948 [],
7949 )
7950 .expect("insert chunk 2");
7951 }
7952
7953 let service = AdminService::new(db.path(), Arc::clone(&schema));
7954 let embedder = TestEmbedder::new("test-model", 4);
7955 let report = service
7956 .regenerate_vector_embeddings(
7957 &embedder,
7958 &VectorRegenerationConfig {
7959 kind: "Document".to_owned(),
7960 profile: "default".to_owned(),
7961 chunking_policy: "per_chunk".to_owned(),
7962 preprocessing_policy: "trim".to_owned(),
7963 },
7964 )
7965 .expect("regenerate vectors");
7966
7967 assert_eq!(report.profile, "default");
7968 assert_eq!(report.table_name, "vec_document");
7969 assert_eq!(report.dimension, 4);
7970 assert_eq!(report.total_chunks, 2);
7971 assert_eq!(report.regenerated_rows, 2);
7972 assert!(report.contract_persisted);
7973
7974 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7975 let vec_count: i64 = conn
7976 .query_row("SELECT count(*) FROM vec_document", [], |row| row.get(0))
7977 .expect("vec count");
7978 assert_eq!(vec_count, 2);
7979
7980 let (model_identity, model_version, dimension, normalization_policy): (
7984 String,
7985 String,
7986 i64,
7987 String,
7988 ) = conn
7989 .query_row(
7990 "SELECT model_identity, model_version, dimension, normalization_policy \
7991 FROM vector_embedding_contracts WHERE profile = 'default'",
7992 [],
7993 |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)),
7994 )
7995 .expect("contract row");
7996 assert_eq!(model_identity, "test-model");
7997 assert_eq!(model_version, "1.0.0");
7998 assert_eq!(dimension, 4);
7999 assert_eq!(normalization_policy, "l2");
8000
8001 let contract_format_version: i64 = conn
8002 .query_row(
8003 "SELECT contract_format_version FROM vector_embedding_contracts WHERE profile = 'default'",
8004 [],
8005 |row| row.get(0),
8006 )
8007 .expect("contract_format_version");
8008 assert_eq!(contract_format_version, 1);
8009 let request_count: i64 = conn
8010 .query_row(
8011 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_requested' AND subject = 'default'",
8012 [],
8013 |row| row.get(0),
8014 )
8015 .expect("request audit count");
8016 assert_eq!(request_count, 1);
8017 let apply_count: i64 = conn
8018 .query_row(
8019 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_apply' AND subject = 'default'",
8020 [],
8021 |row| row.get(0),
8022 )
8023 .expect("apply audit count");
8024 assert_eq!(apply_count, 1);
8025 let apply_metadata: String = conn
8026 .query_row(
8027 "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_apply' AND subject = 'default'",
8028 [],
8029 |row| row.get(0),
8030 )
8031 .expect("apply metadata");
8032 assert!(apply_metadata.contains("\"profile\":\"default\""));
8033 assert!(apply_metadata.contains("\"snapshot_hash\":"));
8034 assert!(apply_metadata.contains("\"model_identity\":\"test-model\""));
8035 }
8036
8037 #[cfg(feature = "sqlite-vec")]
8038 #[test]
8039 #[allow(clippy::too_many_lines)]
8040 fn regenerate_vector_embeddings_embedder_failure_leaves_contract_and_vec_rows_unchanged() {
8041 let db = NamedTempFile::new().expect("temp file");
8042 let schema = Arc::new(SchemaManager::new());
8043
8044 {
8045 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8046 schema.bootstrap(&conn).expect("bootstrap");
8047 conn.execute(
8048 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8049 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
8050 [],
8051 )
8052 .expect("insert node");
8053 conn.execute(
8054 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8055 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
8056 [],
8057 )
8058 .expect("insert chunk");
8059 schema
8060 .ensure_vec_kind_profile(&conn, "Document", 4)
8061 .expect("ensure vec kind profile");
8062 conn.execute(
8063 r"
8064 INSERT INTO vector_embedding_contracts (
8065 profile,
8066 table_name,
8067 model_identity,
8068 model_version,
8069 dimension,
8070 normalization_policy,
8071 chunking_policy,
8072 preprocessing_policy,
8073 generator_command_json,
8074 applied_at,
8075 snapshot_hash
8076 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)
8077 ",
8078 rusqlite::params![
8079 "default",
8080 "vec_document",
8081 "old-model",
8082 "0.9.0",
8083 4,
8084 "l2",
8085 "per_chunk",
8086 "trim",
8087 "[]",
8088 111,
8089 "old-snapshot"
8090 ],
8091 )
8092 .expect("seed contract");
8093 conn.execute(
8094 "INSERT INTO vec_document (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
8095 [],
8096 )
8097 .expect("seed vec row");
8098 }
8099
8100 let service = AdminService::new(db.path(), Arc::clone(&schema));
8101 let failing = FailingEmbedder {
8102 identity: QueryEmbedderIdentity {
8103 model_identity: "new-model".to_owned(),
8104 model_version: "1.0.0".to_owned(),
8105 dimension: 4,
8106 normalization_policy: "l2".to_owned(),
8107 },
8108 };
8109 let error = service
8110 .regenerate_vector_embeddings(
8111 &failing,
8112 &VectorRegenerationConfig {
8113 kind: "Document".to_owned(),
8114 profile: "default".to_owned(),
8115 chunking_policy: "per_chunk".to_owned(),
8116 preprocessing_policy: "trim".to_owned(),
8117 },
8118 )
8119 .expect_err("embedder should fail");
8120
8121 assert!(error.to_string().contains("embedder failure"));
8122
8123 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8124 let model_identity: String = conn
8125 .query_row(
8126 "SELECT model_identity FROM vector_embedding_contracts WHERE profile = 'default'",
8127 [],
8128 |row| row.get(0),
8129 )
8130 .expect("model identity");
8131 assert_eq!(model_identity, "old-model");
8132 let snapshot_hash: String = conn
8133 .query_row(
8134 "SELECT snapshot_hash FROM vector_embedding_contracts WHERE profile = 'default'",
8135 [],
8136 |row| row.get(0),
8137 )
8138 .expect("snapshot hash");
8139 assert_eq!(snapshot_hash, "old-snapshot");
8140 let vec_count: i64 = conn
8141 .query_row("SELECT count(*) FROM vec_document", [], |row| row.get(0))
8142 .expect("vec count");
8143 assert_eq!(vec_count, 1);
8144 let failure_count: i64 = conn
8145 .query_row(
8146 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
8147 [],
8148 |row| row.get(0),
8149 )
8150 .expect("failure count");
8151 assert_eq!(failure_count, 1);
8152 let failure_metadata: String = conn
8153 .query_row(
8154 "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
8155 [],
8156 |row| row.get(0),
8157 )
8158 .expect("failure metadata");
8159 assert!(failure_metadata.contains("\"failure_class\":\"embedder failure\""));
8160 }
8161
8162 #[cfg(feature = "sqlite-vec")]
8173 #[test]
8174 fn regenerate_vector_embeddings_rejects_whitespace_only_profile_before_mutation() {
8175 let db = NamedTempFile::new().expect("temp file");
8176 let schema = Arc::new(SchemaManager::new());
8177 {
8178 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8179 schema.bootstrap(&conn).expect("bootstrap");
8180 conn.execute(
8181 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8182 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
8183 [],
8184 )
8185 .expect("insert node");
8186 conn.execute(
8187 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8188 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
8189 [],
8190 )
8191 .expect("insert chunk");
8192 }
8193
8194 let service = AdminService::new(db.path(), Arc::clone(&schema));
8195 let embedder = TestEmbedder::new("test-model", 4);
8196 let error = service
8197 .regenerate_vector_embeddings(
8198 &embedder,
8199 &VectorRegenerationConfig {
8200 kind: "Document".to_owned(),
8201 profile: " ".to_owned(),
8202 chunking_policy: "per_chunk".to_owned(),
8203 preprocessing_policy: "trim".to_owned(),
8204 },
8205 )
8206 .expect_err("whitespace profile should be rejected");
8207
8208 assert!(error.to_string().contains("invalid contract"));
8209 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8210 let contract_count: i64 = conn
8211 .query_row(
8212 "SELECT count(*) FROM vector_embedding_contracts",
8213 [],
8214 |row| row.get(0),
8215 )
8216 .expect("contract count");
8217 assert_eq!(contract_count, 0);
8218 let provenance_count: i64 = conn
8219 .query_row("SELECT count(*) FROM provenance_events", [], |row| {
8220 row.get(0)
8221 })
8222 .expect("provenance count");
8223 assert_eq!(provenance_count, 0);
8224 }
8225
8226 #[cfg(feature = "sqlite-vec")]
8227 #[test]
8228 fn regenerate_vector_embeddings_rejects_future_contract_format_version() {
8229 let db = NamedTempFile::new().expect("temp file");
8230 let schema = Arc::new(SchemaManager::new());
8231 {
8232 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8233 schema.bootstrap(&conn).expect("bootstrap");
8234 conn.execute(
8235 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8236 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
8237 [],
8238 )
8239 .expect("insert node");
8240 conn.execute(
8241 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8242 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
8243 [],
8244 )
8245 .expect("insert chunk");
8246 conn.execute(
8247 r"
8248 INSERT INTO vector_embedding_contracts (
8249 profile,
8250 table_name,
8251 model_identity,
8252 model_version,
8253 dimension,
8254 normalization_policy,
8255 chunking_policy,
8256 preprocessing_policy,
8257 generator_command_json,
8258 applied_at,
8259 snapshot_hash,
8260 contract_format_version,
8261 updated_at
8262 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13)
8263 ",
8264 rusqlite::params![
8265 "default",
8266 "vec_nodes_active",
8267 "old-model",
8268 "0.9.0",
8269 4,
8270 "l2",
8271 "per_chunk",
8272 "trim",
8273 "[]",
8274 111,
8275 "old-snapshot",
8276 99,
8277 111,
8278 ],
8279 )
8280 .expect("seed future contract");
8281 }
8282
8283 let service = AdminService::new(db.path(), Arc::clone(&schema));
8284 let embedder = TestEmbedder::new("test-model", 4);
8285 let error = service
8286 .regenerate_vector_embeddings(
8287 &embedder,
8288 &VectorRegenerationConfig {
8289 kind: "Document".to_owned(),
8290 profile: "default".to_owned(),
8291 chunking_policy: "per_chunk".to_owned(),
8292 preprocessing_policy: "trim".to_owned(),
8293 },
8294 )
8295 .expect_err("future contract version should be rejected");
8296
8297 assert!(error.to_string().contains("unsupported"));
8298 assert!(error.to_string().contains("format version"));
8299 }
8300
8301 #[test]
8302 fn check_semantics_detects_orphaned_chunk() {
8303 let (db, service) = setup();
8304 {
8305 let conn = sqlite::open_connection(db.path()).expect("conn");
8307 conn.execute(
8308 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8309 VALUES ('c1', 'ghost-node', 'text', 100)",
8310 [],
8311 )
8312 .expect("insert orphaned chunk");
8313 }
8314 let report = service.check_semantics().expect("semantics check");
8315 assert_eq!(report.orphaned_chunks, 1);
8316 }
8317
8318 #[test]
8319 fn check_semantics_detects_null_source_ref() {
8320 let (db, service) = setup();
8321 {
8322 let conn = sqlite::open_connection(db.path()).expect("conn");
8323 conn.execute(
8324 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at) \
8325 VALUES ('r1', 'lg1', 'Meeting', '{}', 100)",
8326 [],
8327 )
8328 .expect("insert node with null source_ref");
8329 }
8330 let report = service.check_semantics().expect("semantics check");
8331 assert_eq!(report.null_source_ref_nodes, 1);
8332 }
8333
8334 #[test]
8335 fn check_semantics_detects_broken_step_fk() {
8336 let (db, service) = setup();
8337 {
8338 let conn = sqlite::open_connection(db.path()).expect("conn");
8341 conn.execute_batch("PRAGMA foreign_keys = OFF;")
8342 .expect("disable FK");
8343 conn.execute(
8344 "INSERT INTO steps (id, run_id, kind, status, properties, created_at) \
8345 VALUES ('s1', 'ghost-run', 'llm', 'completed', '{}', 100)",
8346 [],
8347 )
8348 .expect("insert step with ghost run_id");
8349 }
8350 let report = service.check_semantics().expect("semantics check");
8351 assert_eq!(report.broken_step_fk, 1);
8352 }
8353
8354 #[test]
8355 fn check_semantics_detects_broken_action_fk() {
8356 let (db, service) = setup();
8357 {
8358 let conn = sqlite::open_connection(db.path()).expect("conn");
8359 conn.execute_batch("PRAGMA foreign_keys = OFF;")
8360 .expect("disable FK");
8361 conn.execute(
8362 "INSERT INTO actions (id, step_id, kind, status, properties, created_at) \
8363 VALUES ('a1', 'ghost-step', 'emit', 'completed', '{}', 100)",
8364 [],
8365 )
8366 .expect("insert action with ghost step_id");
8367 }
8368 let report = service.check_semantics().expect("semantics check");
8369 assert_eq!(report.broken_action_fk, 1);
8370 }
8371
8372 #[test]
8373 fn check_semantics_detects_stale_fts_rows() {
8374 let (db, service) = setup();
8375 {
8376 let conn = sqlite::open_connection(db.path()).expect("conn");
8377 conn.execute(
8380 "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
8381 VALUES ('ghost-chunk', 'any-node', 'Meeting', 'stale content')",
8382 [],
8383 )
8384 .expect("insert stale FTS row");
8385 }
8386 let report = service.check_semantics().expect("semantics check");
8387 assert_eq!(report.stale_fts_rows, 1);
8388 }
8389
8390 #[test]
8391 fn check_semantics_detects_fts_rows_for_superseded_nodes() {
8392 let (db, service) = setup();
8393 {
8394 let conn = sqlite::open_connection(db.path()).expect("conn");
8395 conn.execute(
8397 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
8398 VALUES ('r1', 'lg-sup', 'Meeting', '{}', 100, 200, 'src-1')",
8399 [],
8400 )
8401 .expect("insert superseded node");
8402 conn.execute(
8404 "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
8405 VALUES ('ck-x', 'lg-sup', 'Meeting', 'superseded content')",
8406 [],
8407 )
8408 .expect("insert FTS row for superseded node");
8409 }
8410 let report = service.check_semantics().expect("semantics check");
8411 assert_eq!(report.fts_rows_for_superseded_nodes, 1);
8412 }
8413
8414 #[test]
8415 fn check_semantics_detects_dangling_edges() {
8416 let (db, service) = setup();
8417 {
8418 let conn = sqlite::open_connection(db.path()).expect("conn");
8419 conn.execute_batch("PRAGMA foreign_keys = OFF;")
8420 .expect("disable FK");
8421 conn.execute(
8423 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8424 VALUES ('r1', 'lg-src', 'Meeting', '{}', 100, 'src-1')",
8425 [],
8426 )
8427 .expect("insert source node");
8428 conn.execute(
8429 "INSERT INTO edges \
8430 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
8431 VALUES ('e1', 'edge-1', 'lg-src', 'ghost-target', 'LINKS', '{}', 100, 'src-1')",
8432 [],
8433 )
8434 .expect("insert dangling edge");
8435 }
8436 let report = service.check_semantics().expect("semantics check");
8437 assert_eq!(report.dangling_edges, 1);
8438 }
8439
8440 #[test]
8441 fn check_semantics_detects_orphaned_supersession_chains() {
8442 let (db, service) = setup();
8443 {
8444 let conn = sqlite::open_connection(db.path()).expect("conn");
8445 conn.execute(
8447 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
8448 VALUES ('r1', 'lg-orphaned', 'Meeting', '{}', 100, 200, 'src-1')",
8449 [],
8450 )
8451 .expect("insert fully superseded node");
8452 }
8453 let report = service.check_semantics().expect("semantics check");
8454 assert_eq!(report.orphaned_supersession_chains, 1);
8455 }
8456
8457 #[test]
8458 fn check_semantics_detects_mismatched_kind_property_fts_rows() {
8459 let (db, service) = setup();
8465 {
8466 let conn = sqlite::open_connection(db.path()).expect("conn");
8467 conn.execute(
8468 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8469 VALUES ('Goal', '[\"$.name\"]', ' ')",
8470 [],
8471 )
8472 .expect("register schema");
8473 conn.execute(
8474 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8475 VALUES ('r1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'src-1')",
8476 [],
8477 )
8478 .expect("insert node");
8479 let table = fathomdb_schema::fts_kind_table_name("Goal");
8481 conn.execute_batch(&format!(
8482 "CREATE VIRTUAL TABLE IF NOT EXISTS {table} \
8483 USING fts5(node_logical_id UNINDEXED, text_content, tokenize = 'porter unicode61 remove_diacritics 2')"
8484 ))
8485 .expect("create per-kind table");
8486 conn.execute(
8487 &format!("INSERT INTO {table} (node_logical_id, text_content) VALUES ('goal-1', 'Ship v2')"),
8488 [],
8489 )
8490 .expect("insert per-kind FTS row");
8491 }
8492 let report = service.check_semantics().expect("semantics check");
8493 assert_eq!(report.mismatched_kind_property_fts_rows, 0);
8495 }
8496
8497 #[test]
8498 fn check_semantics_detects_duplicate_property_fts_rows() {
8499 let (db, service) = setup();
8500 {
8501 let conn = sqlite::open_connection(db.path()).expect("conn");
8502 conn.execute(
8503 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8504 VALUES ('r1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'src-1')",
8505 [],
8506 )
8507 .expect("insert node");
8508 let table = fathomdb_schema::fts_kind_table_name("Goal");
8510 conn.execute_batch(&format!(
8511 "CREATE VIRTUAL TABLE IF NOT EXISTS {table} \
8512 USING fts5(node_logical_id UNINDEXED, text_content, tokenize = 'porter unicode61 remove_diacritics 2')"
8513 ))
8514 .expect("create per-kind table");
8515 conn.execute(
8516 &format!("INSERT INTO {table} (node_logical_id, text_content) VALUES ('goal-1', 'Ship v2')"),
8517 [],
8518 )
8519 .expect("insert first property FTS row");
8520 conn.execute(
8521 &format!("INSERT INTO {table} (node_logical_id, text_content) VALUES ('goal-1', 'Ship v2 duplicate')"),
8522 [],
8523 )
8524 .expect("insert duplicate property FTS row");
8525 }
8526 let report = service.check_semantics().expect("semantics check");
8527 assert_eq!(report.duplicate_property_fts_rows, 1);
8528 }
8529
8530 #[test]
8531 fn check_semantics_detects_drifted_property_fts_text() {
8532 let (db, service) = setup();
8533 {
8534 let conn = sqlite::open_connection(db.path()).expect("conn");
8535 conn.execute(
8536 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8537 VALUES ('Goal', '[\"$.name\"]', ' ')",
8538 [],
8539 )
8540 .expect("register schema");
8541 conn.execute(
8542 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8543 VALUES ('r1', 'goal-1', 'Goal', '{\"name\":\"Current name\"}', 100, 'src-1')",
8544 [],
8545 )
8546 .expect("insert node");
8547 let table = fathomdb_schema::fts_kind_table_name("Goal");
8549 conn.execute_batch(&format!(
8550 "CREATE VIRTUAL TABLE IF NOT EXISTS {table} \
8551 USING fts5(node_logical_id UNINDEXED, text_content, tokenize = 'porter unicode61 remove_diacritics 2')"
8552 ))
8553 .expect("create per-kind table");
8554 conn.execute(
8555 &format!("INSERT INTO {table} (node_logical_id, text_content) VALUES ('goal-1', 'Old stale name')"),
8556 [],
8557 )
8558 .expect("insert stale property FTS row");
8559 }
8560 let report = service.check_semantics().expect("semantics check");
8561 assert_eq!(report.drifted_property_fts_rows, 1);
8562 }
8563
8564 #[test]
8565 fn check_semantics_detects_property_fts_row_that_should_not_exist() {
8566 let (db, service) = setup();
8567 {
8568 let conn = sqlite::open_connection(db.path()).expect("conn");
8569 conn.execute(
8570 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8571 VALUES ('Goal', '[\"$.searchable\"]', ' ')",
8572 [],
8573 )
8574 .expect("register schema");
8575 conn.execute(
8577 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8578 VALUES ('r1', 'goal-1', 'Goal', '{\"other\":\"field\"}', 100, 'src-1')",
8579 [],
8580 )
8581 .expect("insert node");
8582 let table = fathomdb_schema::fts_kind_table_name("Goal");
8584 conn.execute_batch(&format!(
8585 "CREATE VIRTUAL TABLE IF NOT EXISTS {table} \
8586 USING fts5(node_logical_id UNINDEXED, text_content, tokenize = 'porter unicode61 remove_diacritics 2')"
8587 ))
8588 .expect("create per-kind table");
8589 conn.execute(
8590 &format!("INSERT INTO {table} (node_logical_id, text_content) VALUES ('goal-1', 'phantom text')"),
8591 [],
8592 )
8593 .expect("insert phantom property FTS row");
8594 }
8595 let report = service.check_semantics().expect("semantics check");
8596 assert_eq!(
8597 report.drifted_property_fts_rows, 1,
8598 "row that should not exist must be counted as drifted"
8599 );
8600 }
8601
8602 #[test]
8603 fn safe_export_writes_manifest_with_sha256() {
8604 let (_db, service) = setup();
8605 let export_dir = tempfile::TempDir::new().expect("temp dir");
8606 let export_path = export_dir.path().join("backup.db");
8607
8608 let manifest = service
8609 .safe_export(
8610 &export_path,
8611 SafeExportOptions {
8612 force_checkpoint: false,
8613 },
8614 )
8615 .expect("export");
8616
8617 assert!(export_path.exists(), "exported db should exist");
8618 let manifest_path = export_dir.path().join("backup.db.export-manifest.json");
8619 assert!(
8620 manifest_path.exists(),
8621 "manifest file should exist at {}",
8622 manifest_path.display()
8623 );
8624 assert_eq!(manifest.sha256.len(), 64, "sha256 should be 64 hex chars");
8625 assert!(
8626 manifest.exported_at > 0,
8627 "exported_at should be a unix timestamp"
8628 );
8629 assert_eq!(
8630 manifest.schema_version,
8631 SchemaManager::new().current_version().0,
8632 "schema_version should match the live schema version"
8633 );
8634 assert_eq!(manifest.protocol_version, 1, "protocol_version should be 1");
8635 assert!(manifest.page_count > 0, "page_count should be positive");
8636 }
8637
8638 #[test]
8639 fn safe_export_preserves_operational_validation_contracts() {
8640 let (_db, service) = setup();
8641 let validation_json = r#"{"format_version":1,"mode":"enforce","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#;
8642 service
8643 .register_operational_collection(&OperationalRegisterRequest {
8644 name: "connector_health".to_owned(),
8645 kind: OperationalCollectionKind::LatestState,
8646 schema_json: "{}".to_owned(),
8647 retention_json: "{}".to_owned(),
8648 filter_fields_json: "[]".to_owned(),
8649 validation_json: validation_json.to_owned(),
8650 secondary_indexes_json: "[]".to_owned(),
8651 format_version: 1,
8652 })
8653 .expect("register collection");
8654
8655 let export_dir = tempfile::TempDir::new().expect("temp dir");
8656 let export_path = export_dir.path().join("backup.db");
8657 service
8658 .safe_export(
8659 &export_path,
8660 SafeExportOptions {
8661 force_checkpoint: false,
8662 },
8663 )
8664 .expect("export");
8665
8666 let exported = sqlite::open_connection(&export_path).expect("exported conn");
8667 let exported_validation_json: String = exported
8668 .query_row(
8669 "SELECT validation_json FROM operational_collections WHERE name = 'connector_health'",
8670 [],
8671 |row| row.get(0),
8672 )
8673 .expect("validation_json");
8674 assert_eq!(exported_validation_json, validation_json);
8675 }
8676
8677 #[test]
8678 fn safe_export_force_checkpoint_false_skips_wal_pragma() {
8679 let (_db, service) = setup();
8680 let export_dir = tempfile::TempDir::new().expect("temp dir");
8681 let export_path = export_dir.path().join("no-wal.db");
8682
8683 let manifest = service
8685 .safe_export(
8686 &export_path,
8687 SafeExportOptions {
8688 force_checkpoint: false,
8689 },
8690 )
8691 .expect("export with no checkpoint");
8692
8693 assert!(
8694 manifest.page_count > 0,
8695 "page_count must be populated regardless of checkpoint mode"
8696 );
8697 assert_eq!(
8698 manifest.schema_version,
8699 SchemaManager::new().current_version().0
8700 );
8701 assert_eq!(manifest.protocol_version, 1);
8702 }
8703
8704 #[test]
8705 fn safe_export_force_checkpoint_false_still_captures_wal_backed_changes() {
8706 let (db, service) = setup();
8707 let conn = sqlite::open_connection(db.path()).expect("conn");
8708 let journal_mode: String = conn
8709 .query_row("PRAGMA journal_mode=WAL", [], |row| row.get(0))
8710 .expect("enable wal");
8711 assert_eq!(journal_mode.to_lowercase(), "wal");
8712 let auto_checkpoint_pages: i64 = conn
8713 .query_row("PRAGMA wal_autocheckpoint=0", [], |row| row.get(0))
8714 .expect("disable auto checkpoint");
8715 assert_eq!(auto_checkpoint_pages, 0);
8716 conn.execute(
8717 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8718 VALUES ('r-wal', 'lg-wal', 'Meeting', '{}', 100, 'src-wal')",
8719 [],
8720 )
8721 .expect("insert wal-backed node");
8722
8723 let export_dir = tempfile::TempDir::new().expect("temp dir");
8724 let export_path = export_dir.path().join("wal-backed.db");
8725 service
8726 .safe_export(
8727 &export_path,
8728 SafeExportOptions {
8729 force_checkpoint: false,
8730 },
8731 )
8732 .expect("export wal-backed db");
8733
8734 let exported = sqlite::open_connection(&export_path).expect("open exported db");
8735 let exported_count: i64 = exported
8736 .query_row(
8737 "SELECT count(*) FROM nodes WHERE logical_id = 'lg-wal'",
8738 [],
8739 |row| row.get(0),
8740 )
8741 .expect("count exported nodes");
8742 assert_eq!(
8743 exported_count, 1,
8744 "safe_export must include committed rows that are still resident in the WAL"
8745 );
8746 }
8747
8748 #[test]
8749 fn excise_source_removes_searchable_content_after_excision() {
8750 let (db, service) = setup();
8751 {
8752 let conn = sqlite::open_connection(db.path()).expect("conn");
8753 conn.execute(
8754 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
8755 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
8756 [],
8757 )
8758 .expect("insert v1");
8759 conn.execute(
8760 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8761 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
8762 [],
8763 )
8764 .expect("insert v2");
8765 conn.execute(
8766 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8767 VALUES ('ck1', 'lg1', 'hello world', 100)",
8768 [],
8769 )
8770 .expect("insert chunk");
8771 }
8772 service.excise_source("source-2").expect("excise");
8773 {
8774 let conn = sqlite::open_connection(db.path()).expect("conn");
8775 let fts_count: i64 = conn
8776 .query_row(
8777 "SELECT count(*) FROM fts_nodes WHERE chunk_id = 'ck1'",
8778 [],
8779 |row| row.get(0),
8780 )
8781 .expect("fts count");
8782 assert_eq!(
8783 fts_count, 0,
8784 "excised content should not remain searchable after excise"
8785 );
8786 }
8787 }
8788
8789 #[cfg(feature = "sqlite-vec")]
8790 #[test]
8791 fn excise_source_cleans_chunks_and_vec_rows_for_excised_version() {
8792 let (db, service) = setup();
8793 {
8794 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8795 service
8796 .schema_manager
8797 .ensure_vec_kind_profile(&conn, "Meeting", 4)
8798 .expect("ensure vec kind profile");
8799 conn.execute(
8800 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
8801 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
8802 [],
8803 )
8804 .expect("insert v1");
8805 conn.execute(
8806 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8807 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
8808 [],
8809 )
8810 .expect("insert v2");
8811 conn.execute(
8812 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8813 VALUES ('ck1', 'lg1', 'new content', 200)",
8814 [],
8815 )
8816 .expect("insert chunk");
8817 conn.execute(
8818 "INSERT INTO vec_meeting (chunk_id, embedding) VALUES ('ck1', zeroblob(16))",
8819 [],
8820 )
8821 .expect("insert vec row");
8822 }
8823
8824 service.excise_source("source-2").expect("excise");
8825
8826 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8827 let active_row: String = conn
8828 .query_row(
8829 "SELECT row_id FROM nodes WHERE logical_id = 'lg1' AND superseded_at IS NULL",
8830 [],
8831 |row| row.get(0),
8832 )
8833 .expect("restored active row");
8834 assert_eq!(active_row, "r1");
8835 let chunk_count: i64 = conn
8836 .query_row(
8837 "SELECT count(*) FROM chunks WHERE node_logical_id = 'lg1'",
8838 [],
8839 |row| row.get(0),
8840 )
8841 .expect("chunk count");
8842 assert_eq!(
8843 chunk_count, 0,
8844 "excised source content must not survive as chunks"
8845 );
8846 let vec_count: i64 = conn
8847 .query_row("SELECT count(*) FROM vec_meeting", [], |row| row.get(0))
8848 .expect("vec count");
8849 assert_eq!(vec_count, 0, "excised source vec rows must be removed");
8850 let fts_count: i64 = conn
8851 .query_row(
8852 "SELECT count(*) FROM fts_nodes WHERE node_logical_id = 'lg1'",
8853 [],
8854 |row| row.get(0),
8855 )
8856 .expect("fts count");
8857 assert_eq!(
8858 fts_count, 0,
8859 "excised source content must not remain searchable"
8860 );
8861 }
8862
8863 #[test]
8864 fn export_page_count_matches_exported_file() {
8865 let (_db, service) = setup();
8866 let export_dir = tempfile::TempDir::new().expect("temp dir");
8867 let export_path = export_dir.path().join("page-count.db");
8868
8869 let manifest = service
8870 .safe_export(
8871 &export_path,
8872 SafeExportOptions {
8873 force_checkpoint: false,
8874 },
8875 )
8876 .expect("export");
8877
8878 let exported = sqlite::open_connection(&export_path).expect("open exported db");
8879 let actual_page_count: u64 = exported
8880 .query_row("PRAGMA page_count", [], |row| row.get(0))
8881 .expect("page_count from exported file");
8882
8883 assert_eq!(
8884 manifest.page_count, actual_page_count,
8885 "manifest page_count must match the exported file's PRAGMA page_count"
8886 );
8887 }
8888
8889 #[test]
8890 fn no_temp_file_after_successful_export() {
8891 let (_db, service) = setup();
8892 let export_dir = tempfile::TempDir::new().expect("temp dir");
8893 let export_path = export_dir.path().join("no-tmp.db");
8894
8895 service
8896 .safe_export(
8897 &export_path,
8898 SafeExportOptions {
8899 force_checkpoint: false,
8900 },
8901 )
8902 .expect("export");
8903
8904 let tmp_files: Vec<_> = fs::read_dir(export_dir.path())
8905 .expect("read export dir")
8906 .filter_map(Result::ok)
8907 .filter(|e| e.path().extension().is_some_and(|ext| ext == "tmp"))
8908 .collect();
8909
8910 assert!(
8911 tmp_files.is_empty(),
8912 "no .tmp files should remain after a successful export, found: {tmp_files:?}"
8913 );
8914 }
8915
8916 #[test]
8917 fn export_manifest_is_valid_json() {
8918 let (_db, service) = setup();
8919 let export_dir = tempfile::TempDir::new().expect("temp dir");
8920 let export_path = export_dir.path().join("valid-json.db");
8921
8922 service
8923 .safe_export(
8924 &export_path,
8925 SafeExportOptions {
8926 force_checkpoint: false,
8927 },
8928 )
8929 .expect("export");
8930
8931 let manifest_path = export_dir.path().join("valid-json.db.export-manifest.json");
8932 let manifest_contents = fs::read_to_string(&manifest_path).expect("read manifest");
8933 let parsed: serde_json::Value =
8934 serde_json::from_str(&manifest_contents).expect("manifest must be valid JSON");
8935
8936 assert!(
8937 parsed.get("exported_at").is_some(),
8938 "manifest must contain exported_at"
8939 );
8940 assert!(
8941 parsed.get("sha256").is_some(),
8942 "manifest must contain sha256"
8943 );
8944 assert!(
8945 parsed.get("schema_version").is_some(),
8946 "manifest must contain schema_version"
8947 );
8948 assert!(
8949 parsed.get("protocol_version").is_some(),
8950 "manifest must contain protocol_version"
8951 );
8952 assert!(
8953 parsed.get("page_count").is_some(),
8954 "manifest must contain page_count"
8955 );
8956 }
8957
8958 #[test]
8959 fn provenance_purge_dry_run_reports_counts() {
8960 let (db, service) = setup();
8961 {
8962 let conn = sqlite::open_connection(db.path()).expect("conn");
8963 conn.execute(
8964 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8965 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
8966 [],
8967 )
8968 .expect("insert p1");
8969 conn.execute(
8970 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8971 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 200)",
8972 [],
8973 )
8974 .expect("insert p2");
8975 conn.execute(
8976 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8977 VALUES ('p3', 'excise', 'lg3', 'src-1', 300)",
8978 [],
8979 )
8980 .expect("insert p3");
8981 }
8982
8983 let options = super::ProvenancePurgeOptions {
8984 dry_run: true,
8985 preserve_event_types: Vec::new(),
8986 };
8987 let report = service
8988 .purge_provenance_events(250, &options)
8989 .expect("dry run purge");
8990
8991 assert_eq!(report.events_deleted, 2);
8992 assert_eq!(report.events_preserved, 1);
8993 assert!(report.oldest_remaining.is_some());
8994
8995 let conn = sqlite::open_connection(db.path()).expect("conn");
8996 let total: i64 = conn
8997 .query_row("SELECT count(*) FROM provenance_events", [], |row| {
8998 row.get(0)
8999 })
9000 .expect("count");
9001 assert_eq!(total, 3, "dry_run must not delete any events");
9002 }
9003
9004 #[test]
9005 fn provenance_purge_deletes_old_events() {
9006 let (db, service) = setup();
9007 {
9008 let conn = sqlite::open_connection(db.path()).expect("conn");
9009 conn.execute(
9010 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
9011 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
9012 [],
9013 )
9014 .expect("insert p1");
9015 conn.execute(
9016 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
9017 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 200)",
9018 [],
9019 )
9020 .expect("insert p2");
9021 }
9022
9023 let options = super::ProvenancePurgeOptions {
9024 dry_run: false,
9025 preserve_event_types: Vec::new(),
9026 };
9027 let report = service
9028 .purge_provenance_events(150, &options)
9029 .expect("purge");
9030
9031 assert_eq!(report.events_deleted, 1);
9032 assert_eq!(report.events_preserved, 1);
9033 assert_eq!(report.oldest_remaining, Some(200));
9034
9035 let conn = sqlite::open_connection(db.path()).expect("conn");
9036 let remaining: i64 = conn
9037 .query_row("SELECT count(*) FROM provenance_events", [], |row| {
9038 row.get(0)
9039 })
9040 .expect("count");
9041 assert_eq!(remaining, 1);
9042 }
9043
9044 #[test]
9045 fn provenance_purge_preserves_specified_types() {
9046 let (db, service) = setup();
9047 {
9048 let conn = sqlite::open_connection(db.path()).expect("conn");
9049 conn.execute(
9050 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
9051 VALUES ('p1', 'excise', 'lg1', 'src-1', 100)",
9052 [],
9053 )
9054 .expect("insert p1");
9055 conn.execute(
9056 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
9057 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 100)",
9058 [],
9059 )
9060 .expect("insert p2");
9061 conn.execute(
9062 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
9063 VALUES ('p3', 'node_insert', 'lg3', 'src-1', 100)",
9064 [],
9065 )
9066 .expect("insert p3");
9067 }
9068
9069 let options = super::ProvenancePurgeOptions {
9070 dry_run: false,
9071 preserve_event_types: Vec::new(),
9072 };
9073 let report = service
9074 .purge_provenance_events(500, &options)
9075 .expect("purge");
9076
9077 assert_eq!(report.events_deleted, 2);
9078 assert_eq!(report.events_preserved, 1);
9079
9080 let conn = sqlite::open_connection(db.path()).expect("conn");
9081 let remaining_type: String = conn
9082 .query_row("SELECT event_type FROM provenance_events", [], |row| {
9083 row.get(0)
9084 })
9085 .expect("remaining event type");
9086 assert_eq!(remaining_type, "excise");
9087 }
9088
9089 #[test]
9090 fn provenance_purge_noop_with_zero_timestamp() {
9091 let (db, service) = setup();
9092 {
9093 let conn = sqlite::open_connection(db.path()).expect("conn");
9094 conn.execute(
9095 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
9096 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
9097 [],
9098 )
9099 .expect("insert p1");
9100 }
9101
9102 let options = super::ProvenancePurgeOptions {
9103 dry_run: false,
9104 preserve_event_types: Vec::new(),
9105 };
9106 let report = service.purge_provenance_events(0, &options).expect("purge");
9107
9108 assert_eq!(report.events_deleted, 0);
9109 assert_eq!(report.events_preserved, 1);
9110 assert_eq!(report.oldest_remaining, Some(100));
9111 }
9112
9113 #[test]
9114 fn restore_skips_edge_when_counterpart_purged() {
9115 let (db, service) = setup();
9116 {
9117 let conn = sqlite::open_connection(db.path()).expect("conn");
9118 conn.execute(
9120 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9121 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
9122 [],
9123 )
9124 .expect("insert node A");
9125 conn.execute(
9126 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9127 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
9128 [],
9129 )
9130 .expect("insert node B");
9131 conn.execute(
9133 "INSERT INTO edges \
9134 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
9135 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
9136 [],
9137 )
9138 .expect("insert edge");
9139 conn.execute(
9141 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9142 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
9143 [],
9144 )
9145 .expect("insert retire event A");
9146 conn.execute(
9147 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9148 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
9149 [],
9150 )
9151 .expect("insert edge retire event");
9152 conn.execute(
9153 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
9154 [],
9155 )
9156 .expect("retire node A");
9157 conn.execute(
9158 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-2'",
9159 [],
9160 )
9161 .expect("retire node B");
9162 conn.execute(
9163 "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
9164 [],
9165 )
9166 .expect("retire edge");
9167 conn.execute("DELETE FROM nodes WHERE logical_id = 'doc-2'", [])
9170 .expect("purge node B rows");
9171 }
9172
9173 let report = service.restore_logical_id("doc-1").expect("restore A");
9175 assert!(!report.was_noop);
9176 assert_eq!(report.restored_node_rows, 1);
9177 assert_eq!(report.restored_edge_rows, 0, "edge should not be restored");
9178 assert_eq!(report.skipped_edges.len(), 1);
9179 assert_eq!(report.skipped_edges[0].edge_logical_id, "edge-1");
9180 assert_eq!(report.skipped_edges[0].missing_endpoint, "doc-2");
9181
9182 let conn = sqlite::open_connection(db.path()).expect("conn");
9184 let active_edge_count: i64 = conn
9185 .query_row(
9186 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
9187 [],
9188 |row| row.get(0),
9189 )
9190 .expect("active edge count");
9191 assert_eq!(active_edge_count, 0, "edge must remain retired");
9192 }
9193
9194 #[test]
9195 fn restore_restores_edges_to_active_nodes() {
9196 let (db, service) = setup();
9197 {
9198 let conn = sqlite::open_connection(db.path()).expect("conn");
9199 conn.execute(
9201 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9202 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
9203 [],
9204 )
9205 .expect("insert node A");
9206 conn.execute(
9207 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9208 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
9209 [],
9210 )
9211 .expect("insert node B");
9212 conn.execute(
9214 "INSERT INTO edges \
9215 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
9216 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
9217 [],
9218 )
9219 .expect("insert edge");
9220 conn.execute(
9222 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9223 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
9224 [],
9225 )
9226 .expect("insert retire event A");
9227 conn.execute(
9228 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9229 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
9230 [],
9231 )
9232 .expect("insert edge retire event");
9233 conn.execute(
9234 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
9235 [],
9236 )
9237 .expect("retire node A");
9238 conn.execute(
9239 "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
9240 [],
9241 )
9242 .expect("retire edge");
9243 }
9244
9245 let report = service.restore_logical_id("doc-1").expect("restore A");
9247 assert!(!report.was_noop);
9248 assert_eq!(report.restored_node_rows, 1);
9249 assert!(report.restored_edge_rows > 0, "edge should be restored");
9250 assert!(
9251 report.skipped_edges.is_empty(),
9252 "no edges should be skipped"
9253 );
9254
9255 let conn = sqlite::open_connection(db.path()).expect("conn");
9256 let active_edge_count: i64 = conn
9257 .query_row(
9258 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
9259 [],
9260 |row| row.get(0),
9261 )
9262 .expect("active edge count");
9263 assert_eq!(active_edge_count, 1, "edge must be active");
9264 }
9265
9266 #[test]
9267 fn restore_restores_edges_when_both_restored() {
9268 let (db, service) = setup();
9269 {
9270 let conn = sqlite::open_connection(db.path()).expect("conn");
9271 conn.execute(
9273 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9274 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
9275 [],
9276 )
9277 .expect("insert node A");
9278 conn.execute(
9279 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9280 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
9281 [],
9282 )
9283 .expect("insert node B");
9284 conn.execute(
9286 "INSERT INTO edges \
9287 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
9288 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
9289 [],
9290 )
9291 .expect("insert edge");
9292 conn.execute(
9294 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9295 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
9296 [],
9297 )
9298 .expect("insert retire event A");
9299 conn.execute(
9300 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9301 VALUES ('evt-retire-b', 'node_retire', 'doc-2', 'forget-1', 200, '')",
9302 [],
9303 )
9304 .expect("insert retire event B");
9305 conn.execute(
9306 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9307 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
9308 [],
9309 )
9310 .expect("insert edge retire event");
9311 conn.execute(
9312 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
9313 [],
9314 )
9315 .expect("retire node A");
9316 conn.execute(
9317 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-2'",
9318 [],
9319 )
9320 .expect("retire node B");
9321 conn.execute(
9322 "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
9323 [],
9324 )
9325 .expect("retire edge");
9326 }
9327
9328 let report_b = service.restore_logical_id("doc-2").expect("restore B");
9330 assert!(!report_b.was_noop);
9331
9332 let report_a = service.restore_logical_id("doc-1").expect("restore A");
9334 assert!(!report_a.was_noop);
9335 assert_eq!(report_a.restored_node_rows, 1);
9336 assert!(
9337 report_a.restored_edge_rows > 0,
9338 "edge should be restored when both endpoints active"
9339 );
9340 assert!(
9341 report_a.skipped_edges.is_empty(),
9342 "no edges should be skipped"
9343 );
9344
9345 let conn = sqlite::open_connection(db.path()).expect("conn");
9346 let active_edge_count: i64 = conn
9347 .query_row(
9348 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
9349 [],
9350 |row| row.get(0),
9351 )
9352 .expect("active edge count");
9353 assert_eq!(
9354 active_edge_count, 1,
9355 "edge must be active after both endpoints restored"
9356 );
9357 }
9358
9359 #[test]
9362 fn fts_property_schema_crud_round_trip() {
9363 let (_db, service) = setup();
9364
9365 let record = service
9367 .register_fts_property_schema(
9368 "Meeting",
9369 &["$.title".to_owned(), "$.summary".to_owned()],
9370 None,
9371 )
9372 .expect("register");
9373 assert_eq!(record.kind, "Meeting");
9374 assert_eq!(record.property_paths, vec!["$.title", "$.summary"]);
9375 assert_eq!(record.separator, " ");
9376 assert_eq!(record.format_version, 1);
9377
9378 let described = service
9380 .describe_fts_property_schema("Meeting")
9381 .expect("describe")
9382 .expect("should exist");
9383 assert_eq!(described, record);
9384
9385 let missing = service
9387 .describe_fts_property_schema("NoSuchKind")
9388 .expect("describe missing");
9389 assert!(missing.is_none());
9390
9391 let list = service.list_fts_property_schemas().expect("list");
9393 assert_eq!(list.len(), 1);
9394 assert_eq!(list[0].kind, "Meeting");
9395
9396 let updated = service
9398 .register_fts_property_schema(
9399 "Meeting",
9400 &["$.title".to_owned(), "$.notes".to_owned()],
9401 Some("\n"),
9402 )
9403 .expect("update");
9404 assert_eq!(updated.property_paths, vec!["$.title", "$.notes"]);
9405 assert_eq!(updated.separator, "\n");
9406
9407 service
9409 .remove_fts_property_schema("Meeting")
9410 .expect("remove");
9411 let after_remove = service
9412 .describe_fts_property_schema("Meeting")
9413 .expect("describe after remove");
9414 assert!(after_remove.is_none());
9415
9416 let err = service.remove_fts_property_schema("Meeting");
9418 assert!(err.is_err());
9419 }
9420
9421 #[test]
9422 fn describe_fts_property_schema_round_trips_recursive_entries() {
9423 let (_db, service) = setup();
9424
9425 let entries = vec![
9426 FtsPropertyPathSpec::scalar("$.title"),
9427 FtsPropertyPathSpec::recursive("$.payload"),
9428 ];
9429 let exclude = vec!["$.payload.private".to_owned()];
9430 let registered = service
9431 .register_fts_property_schema_with_entries(
9432 "KnowledgeItem",
9433 &entries,
9434 Some(" "),
9435 &exclude,
9436 crate::rebuild_actor::RebuildMode::Eager,
9437 )
9438 .expect("register recursive");
9439
9440 assert_eq!(registered.entries, entries);
9443 assert_eq!(registered.exclude_paths, exclude);
9444 assert_eq!(registered.property_paths, vec!["$.title", "$.payload"]);
9445
9446 let described = service
9447 .describe_fts_property_schema("KnowledgeItem")
9448 .expect("describe")
9449 .expect("should exist");
9450 assert_eq!(described.kind, "KnowledgeItem");
9451 assert_eq!(described.entries, entries);
9452 assert_eq!(described.exclude_paths, exclude);
9453 assert_eq!(described.property_paths, vec!["$.title", "$.payload"]);
9454 assert_eq!(described.separator, " ");
9455 assert_eq!(described.format_version, 1);
9456 }
9457
9458 #[test]
9459 fn list_fts_property_schemas_round_trips_recursive_entries() {
9460 let (_db, service) = setup();
9461
9462 let entries = vec![
9463 FtsPropertyPathSpec::scalar("$.title"),
9464 FtsPropertyPathSpec::recursive("$.payload"),
9465 ];
9466 let exclude = vec!["$.payload.secret".to_owned()];
9467 service
9468 .register_fts_property_schema_with_entries(
9469 "KnowledgeItem",
9470 &entries,
9471 Some(" "),
9472 &exclude,
9473 crate::rebuild_actor::RebuildMode::Eager,
9474 )
9475 .expect("register recursive");
9476
9477 let listed = service.list_fts_property_schemas().expect("list");
9478 assert_eq!(listed.len(), 1);
9479 let record = &listed[0];
9480 assert_eq!(record.kind, "KnowledgeItem");
9481 assert_eq!(record.entries, entries);
9482 assert_eq!(record.exclude_paths, exclude);
9483 assert_eq!(record.property_paths, vec!["$.title", "$.payload"]);
9484 }
9485
9486 #[test]
9487 fn describe_fts_property_schema_round_trips_scalar_only_entries() {
9488 let (_db, service) = setup();
9489
9490 service
9491 .register_fts_property_schema(
9492 "Meeting",
9493 &["$.title".to_owned(), "$.summary".to_owned()],
9494 None,
9495 )
9496 .expect("register scalar");
9497
9498 let described = service
9499 .describe_fts_property_schema("Meeting")
9500 .expect("describe")
9501 .expect("should exist");
9502 assert_eq!(described.property_paths, vec!["$.title", "$.summary"]);
9503 assert_eq!(described.entries.len(), 2);
9504 for entry in &described.entries {
9505 assert_eq!(
9506 entry.mode,
9507 FtsPropertyPathMode::Scalar,
9508 "scalar-only schema should deserialize every entry as Scalar"
9509 );
9510 }
9511 assert!(described.exclude_paths.is_empty());
9512 }
9513
9514 #[test]
9515 fn restore_reestablishes_property_fts_visibility() {
9516 let (db, service) = setup();
9517 let doc_table = fathomdb_schema::fts_kind_table_name("Document");
9518 {
9519 let conn = sqlite::open_connection(db.path()).expect("conn");
9520 conn.execute(
9522 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9523 VALUES ('Document', '[\"$.title\", \"$.body\"]', ' ')",
9524 [],
9525 )
9526 .expect("register schema");
9527 conn.execute_batch(&format!(
9529 "CREATE VIRTUAL TABLE IF NOT EXISTS {doc_table} USING fts5(\
9530 node_logical_id UNINDEXED, text_content, \
9531 tokenize = 'porter unicode61 remove_diacritics 2'\
9532 )"
9533 ))
9534 .expect("create per-kind table");
9535 conn.execute(
9537 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9538 VALUES ('row-1', 'doc-1', 'Document', '{\"title\":\"Budget\",\"body\":\"Q3 forecast\"}', 100, 'seed')",
9539 [],
9540 )
9541 .expect("insert node");
9542 conn.execute(
9544 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
9545 VALUES ('chunk-1', 'doc-1', 'budget text', 100)",
9546 [],
9547 )
9548 .expect("insert chunk");
9549 conn.execute(
9551 &format!(
9552 "INSERT INTO {doc_table} (node_logical_id, text_content) \
9553 VALUES ('doc-1', 'Budget Q3 forecast')"
9554 ),
9555 [],
9556 )
9557 .expect("insert property fts");
9558 conn.execute(
9560 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9561 VALUES ('evt-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
9562 [],
9563 )
9564 .expect("retire event");
9565 conn.execute(
9566 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
9567 [],
9568 )
9569 .expect("supersede");
9570 conn.execute("DELETE FROM fts_nodes", [])
9571 .expect("clear chunk fts");
9572 conn.execute(&format!("DELETE FROM {doc_table}"), [])
9573 .expect("clear property fts");
9574 }
9575
9576 let report = service.restore_logical_id("doc-1").expect("restore");
9577 assert_eq!(report.restored_property_fts_rows, 1);
9578
9579 let conn = sqlite::open_connection(db.path()).expect("conn");
9581 let prop_fts_count: i64 = conn
9582 .query_row(
9583 &format!("SELECT count(*) FROM {doc_table} WHERE node_logical_id = 'doc-1'"),
9584 [],
9585 |row| row.get(0),
9586 )
9587 .expect("prop fts count");
9588 assert_eq!(prop_fts_count, 1, "property FTS must be restored");
9589
9590 let text: String = conn
9591 .query_row(
9592 &format!("SELECT text_content FROM {doc_table} WHERE node_logical_id = 'doc-1'"),
9593 [],
9594 |row| row.get(0),
9595 )
9596 .expect("prop fts text");
9597 assert_eq!(text, "Budget Q3 forecast");
9598 }
9599
9600 #[test]
9601 fn safe_export_preserves_fts_property_schemas() {
9602 let (_db, service) = setup();
9603 service
9604 .register_fts_property_schema(
9605 "Goal",
9606 &["$.name".to_owned(), "$.rationale".to_owned()],
9607 None,
9608 )
9609 .expect("register schema");
9610
9611 let export_dir = tempfile::TempDir::new().expect("temp dir");
9612 let export_path = export_dir.path().join("backup.db");
9613 service
9614 .safe_export(
9615 &export_path,
9616 SafeExportOptions {
9617 force_checkpoint: false,
9618 },
9619 )
9620 .expect("export");
9621
9622 let exported_conn = rusqlite::Connection::open(&export_path).expect("open exported db");
9624 let kind: String = exported_conn
9625 .query_row(
9626 "SELECT kind FROM fts_property_schemas WHERE kind = 'Goal'",
9627 [],
9628 |row| row.get(0),
9629 )
9630 .expect("schema must exist in export");
9631 assert_eq!(kind, "Goal");
9632 let paths_json: String = exported_conn
9633 .query_row(
9634 "SELECT property_paths_json FROM fts_property_schemas WHERE kind = 'Goal'",
9635 [],
9636 |row| row.get(0),
9637 )
9638 .expect("paths must exist");
9639 let paths: Vec<String> = serde_json::from_str(&paths_json).expect("valid json");
9640 assert_eq!(paths, vec!["$.name", "$.rationale"]);
9641 }
9642
9643 #[test]
9644 #[allow(clippy::too_many_lines)]
9645 fn export_recovery_rebuilds_property_fts_from_canonical_state() {
9646 let (db, service) = setup();
9647 let goal_table = fathomdb_schema::fts_kind_table_name("Goal");
9648 service
9650 .register_fts_property_schema("Goal", &["$.name".to_owned()], None)
9651 .expect("register");
9652 {
9653 let conn = sqlite::open_connection(db.path()).expect("conn");
9654 conn.execute(
9655 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9656 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9657 [],
9658 )
9659 .expect("insert node 1");
9660 conn.execute(
9661 &format!(
9662 "INSERT INTO {goal_table} (node_logical_id, text_content) \
9663 VALUES ('goal-1', 'Ship v2')"
9664 ),
9665 [],
9666 )
9667 .expect("insert property FTS row 1");
9668 conn.execute(
9669 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9670 VALUES ('row-2', 'goal-2', 'Goal', '{\"name\":\"Launch redesign\"}', 100, 'seed')",
9671 [],
9672 )
9673 .expect("insert node 2");
9674 conn.execute(
9675 &format!(
9676 "INSERT INTO {goal_table} (node_logical_id, text_content) \
9677 VALUES ('goal-2', 'Launch redesign')"
9678 ),
9679 [],
9680 )
9681 .expect("insert property FTS row 2");
9682 }
9683
9684 let export_dir = tempfile::TempDir::new().expect("temp dir");
9686 let export_path = export_dir.path().join("backup.db");
9687 service
9688 .safe_export(
9689 &export_path,
9690 SafeExportOptions {
9691 force_checkpoint: false,
9692 },
9693 )
9694 .expect("export");
9695
9696 {
9700 let conn = rusqlite::Connection::open(&export_path).expect("open export");
9701 SchemaManager::new()
9703 .bootstrap(&conn)
9704 .expect("bootstrap export");
9705 conn.execute(
9706 &format!("DELETE FROM {goal_table} WHERE node_logical_id = 'goal-1'"),
9707 [],
9708 )
9709 .expect("delete old row");
9710 conn.execute(
9711 &format!(
9712 "INSERT INTO {goal_table} (node_logical_id, text_content) \
9713 VALUES ('goal-1', 'completely wrong stale text')"
9714 ),
9715 [],
9716 )
9717 .expect("insert corrupted row");
9718 conn.execute(
9719 &format!("DELETE FROM {goal_table} WHERE node_logical_id = 'goal-2'"),
9720 [],
9721 )
9722 .expect("delete goal-2 row");
9723 }
9724
9725 let schema = Arc::new(SchemaManager::new());
9727 let exported_service = AdminService::new(&export_path, Arc::clone(&schema));
9728 exported_service
9729 .rebuild_projections(ProjectionTarget::Fts)
9730 .expect("rebuild");
9731
9732 let conn = rusqlite::Connection::open(&export_path).expect("open export for verify");
9734 let goal1_text: String = conn
9735 .query_row(
9736 &format!("SELECT text_content FROM {goal_table} WHERE node_logical_id = 'goal-1'"),
9737 [],
9738 |r| r.get(0),
9739 )
9740 .expect("goal-1 text after rebuild");
9741 assert_eq!(
9742 goal1_text, "Ship v2",
9743 "goal-1 text must be corrected by rebuild"
9744 );
9745
9746 let goal2_count: i64 = conn
9747 .query_row(
9748 &format!("SELECT count(*) FROM {goal_table} WHERE node_logical_id = 'goal-2'"),
9749 [],
9750 |r| r.get(0),
9751 )
9752 .expect("goal-2 count");
9753 assert_eq!(goal2_count, 1, "goal-2 row must be restored by rebuild");
9754
9755 let stale_count: i64 = conn
9756 .query_row(
9757 &format!("SELECT count(*) FROM {goal_table} WHERE text_content = 'completely wrong stale text'"),
9758 [],
9759 |r| r.get(0),
9760 )
9761 .expect("stale count");
9762 assert_eq!(stale_count, 0, "corrupted text must be gone after rebuild");
9763
9764 let integrity = exported_service.check_integrity().expect("integrity");
9766 assert_eq!(integrity.missing_property_fts_rows, 0);
9767 let semantics = exported_service.check_semantics().expect("semantics");
9768 assert_eq!(semantics.drifted_property_fts_rows, 0);
9769 assert_eq!(semantics.orphaned_property_fts_rows, 0);
9770 assert_eq!(semantics.duplicate_property_fts_rows, 0);
9771 }
9772
9773 #[test]
9774 fn check_integrity_no_false_positives_for_empty_extraction() {
9775 let (db, service) = setup();
9776 {
9777 let conn = sqlite::open_connection(db.path()).expect("conn");
9778 conn.execute(
9780 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9781 VALUES ('Ticket', '[\"$.searchable\"]', ' ')",
9782 [],
9783 )
9784 .expect("register schema");
9785 conn.execute(
9788 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9789 VALUES ('row-1', 'ticket-1', 'Ticket', '{\"status\":\"open\"}', 100, 'seed')",
9790 [],
9791 )
9792 .expect("insert node");
9793 }
9794
9795 let report = service.check_integrity().expect("integrity");
9796 assert_eq!(
9797 report.missing_property_fts_rows, 0,
9798 "node with no extractable values must not be counted as missing"
9799 );
9800 }
9801
9802 #[test]
9803 fn check_integrity_detects_genuinely_missing_property_fts_rows() {
9804 let (db, service) = setup();
9805 {
9806 let conn = sqlite::open_connection(db.path()).expect("conn");
9807 conn.execute(
9808 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9809 VALUES ('Ticket', '[\"$.title\"]', ' ')",
9810 [],
9811 )
9812 .expect("register schema");
9813 conn.execute(
9815 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9816 VALUES ('row-1', 'ticket-1', 'Ticket', '{\"title\":\"fix login bug\"}', 100, 'seed')",
9817 [],
9818 )
9819 .expect("insert node");
9820 }
9821
9822 let report = service.check_integrity().expect("integrity");
9823 assert_eq!(
9824 report.missing_property_fts_rows, 1,
9825 "node with extractable values but no property FTS row must be detected"
9826 );
9827 }
9828
9829 #[test]
9830 fn rebuild_projections_fts_restores_missing_property_fts_rows() {
9831 let (db, service) = setup();
9832 let goal_table = fathomdb_schema::fts_kind_table_name("Goal");
9833 {
9834 let conn = sqlite::open_connection(db.path()).expect("conn");
9835 conn.execute(
9836 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9837 VALUES ('Goal', '[\"$.name\"]', ' ')",
9838 [],
9839 )
9840 .expect("register schema");
9841 conn.execute(
9842 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9843 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9844 [],
9845 )
9846 .expect("insert node");
9847 }
9849
9850 let report = service
9851 .rebuild_projections(ProjectionTarget::Fts)
9852 .expect("rebuild");
9853 assert!(
9854 report.rebuilt_rows >= 1,
9855 "rebuild must insert at least one property FTS row"
9856 );
9857
9858 let conn = sqlite::open_connection(db.path()).expect("conn");
9859 let text: String = conn
9860 .query_row(
9861 &format!("SELECT text_content FROM {goal_table} WHERE node_logical_id = 'goal-1'"),
9862 [],
9863 |row| row.get(0),
9864 )
9865 .expect("property FTS row must exist after rebuild");
9866 assert_eq!(text, "Ship v2");
9867 }
9868
9869 #[test]
9870 fn rebuild_missing_projections_fills_gap_for_deleted_property_fts_row() {
9871 let (db, service) = setup();
9872 let goal_table = fathomdb_schema::fts_kind_table_name("Goal");
9873 {
9874 let conn = sqlite::open_connection(db.path()).expect("conn");
9875 conn.execute(
9876 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9877 VALUES ('Goal', '[\"$.name\"]', ' ')",
9878 [],
9879 )
9880 .expect("register schema");
9881 conn.execute(
9882 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9883 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9884 [],
9885 )
9886 .expect("insert node");
9887 conn.execute_batch(&format!(
9889 "CREATE VIRTUAL TABLE IF NOT EXISTS {goal_table} USING fts5(\
9890 node_logical_id UNINDEXED, text_content, \
9891 tokenize = 'porter unicode61 remove_diacritics 2'\
9892 )"
9893 ))
9894 .expect("create per-kind table");
9895 conn.execute(
9896 &format!(
9897 "INSERT INTO {goal_table} (node_logical_id, text_content) \
9898 VALUES ('goal-1', 'Ship v2')"
9899 ),
9900 [],
9901 )
9902 .expect("insert property fts");
9903 conn.execute(
9904 &format!("DELETE FROM {goal_table} WHERE node_logical_id = 'goal-1'"),
9905 [],
9906 )
9907 .expect("delete property fts");
9908 }
9909
9910 let report = service
9911 .rebuild_missing_projections()
9912 .expect("rebuild missing");
9913 assert!(
9914 report.rebuilt_rows >= 1,
9915 "missing rebuild must insert the gap-fill row"
9916 );
9917
9918 let conn = sqlite::open_connection(db.path()).expect("conn");
9919 let count: i64 = conn
9920 .query_row(
9921 &format!("SELECT count(*) FROM {goal_table} WHERE node_logical_id = 'goal-1'"),
9922 [],
9923 |row| row.get(0),
9924 )
9925 .expect("count");
9926 assert_eq!(
9927 count, 1,
9928 "gap-fill must restore exactly one property FTS row"
9929 );
9930 }
9931
9932 #[test]
9933 fn remove_schema_then_rebuild_cleans_stale_property_fts_rows() {
9934 let (db, service) = setup();
9940 let goal_table = fathomdb_schema::fts_kind_table_name("Goal");
9941 {
9942 let conn = sqlite::open_connection(db.path()).expect("conn");
9943 conn.execute(
9944 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9945 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9946 [],
9947 )
9948 .expect("insert node");
9949 conn.execute_batch(&format!(
9952 "CREATE VIRTUAL TABLE IF NOT EXISTS {goal_table} \
9953 USING fts5(node_logical_id UNINDEXED, text_content, tokenize = 'porter unicode61 remove_diacritics 2')"
9954 ))
9955 .expect("create per-kind table");
9956 conn.execute(
9957 &format!(
9958 "INSERT INTO {goal_table} (node_logical_id, text_content) \
9959 VALUES ('goal-1', 'Ship v2')"
9960 ),
9961 [],
9962 )
9963 .expect("insert property fts");
9964 }
9965
9966 let semantics = service.check_semantics().expect("semantics");
9968 assert_eq!(
9969 semantics.orphaned_property_fts_rows, 1,
9970 "orphaned property FTS rows must be detected with no registered schema"
9971 );
9972
9973 service
9975 .rebuild_projections(ProjectionTarget::Fts)
9976 .expect("rebuild");
9977
9978 let conn = sqlite::open_connection(db.path()).expect("conn");
9979 let count: i64 = conn
9980 .query_row(
9981 &format!("SELECT count(*) FROM {goal_table} WHERE node_logical_id = 'goal-1'"),
9982 [],
9983 |row| row.get(0),
9984 )
9985 .expect("count");
9986 assert_eq!(
9987 count, 0,
9988 "rebuild must delete rows from per-kind tables with no registered schema"
9989 );
9990 }
9991
9992 mod validate_fts_property_paths_tests {
9993 use super::super::validate_fts_property_paths;
9994
9995 #[test]
9996 fn valid_simple_path() {
9997 assert!(validate_fts_property_paths(&["$.name".to_owned()]).is_ok());
9998 }
9999
10000 #[test]
10001 fn valid_nested_path() {
10002 assert!(validate_fts_property_paths(&["$.address.city".to_owned()]).is_ok());
10003 }
10004
10005 #[test]
10006 fn valid_underscore_segment() {
10007 assert!(validate_fts_property_paths(&["$.a_b".to_owned()]).is_ok());
10008 }
10009
10010 #[test]
10011 fn rejects_bare_prefix() {
10012 let result = validate_fts_property_paths(&["$.".to_owned()]);
10013 assert!(result.is_err(), "path '$.' must be rejected");
10014 }
10015
10016 #[test]
10017 fn rejects_double_dot() {
10018 let result = validate_fts_property_paths(&["$..x".to_owned()]);
10019 assert!(result.is_err(), "path '$..x' must be rejected");
10020 }
10021
10022 #[test]
10023 fn rejects_trailing_dot() {
10024 let result = validate_fts_property_paths(&["$.foo.".to_owned()]);
10025 assert!(result.is_err(), "path '$.foo.' must be rejected");
10026 }
10027
10028 #[test]
10029 fn rejects_space_in_segment() {
10030 let result = validate_fts_property_paths(&["$.foo bar".to_owned()]);
10031 assert!(result.is_err(), "path '$.foo bar' must be rejected");
10032 }
10033
10034 #[test]
10035 fn rejects_bracket_syntax() {
10036 let result = validate_fts_property_paths(&["$.foo[0]".to_owned()]);
10037 assert!(result.is_err(), "path '$.foo[0]' must be rejected");
10038 }
10039
10040 #[test]
10041 fn rejects_duplicates() {
10042 let result = validate_fts_property_paths(&["$.name".to_owned(), "$.name".to_owned()]);
10043 assert!(result.is_err(), "duplicate paths must be rejected");
10044 }
10045
10046 #[test]
10047 fn rejects_empty_list() {
10048 let result = validate_fts_property_paths(&[]);
10049 assert!(result.is_err(), "empty path list must be rejected");
10050 }
10051 }
10052
10053 #[test]
10056 fn register_fts_schema_writes_to_per_kind_table() {
10057 let (db, service) = setup();
10060 {
10061 let conn = sqlite::open_connection(db.path()).expect("conn");
10062 conn.execute(
10064 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
10065 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
10066 [],
10067 )
10068 .expect("insert node");
10069 }
10070
10071 service
10073 .register_fts_property_schema("Goal", &["$.name".to_owned()], None)
10074 .expect("register schema");
10075
10076 let conn = sqlite::open_connection(db.path()).expect("conn");
10077 let table = fathomdb_schema::fts_kind_table_name("Goal");
10078 let per_kind_count: i64 = conn
10080 .query_row(
10081 &format!("SELECT count(*) FROM {table} WHERE node_logical_id = 'goal-1'"),
10082 [],
10083 |row| row.get(0),
10084 )
10085 .expect("per-kind count");
10086 assert_eq!(
10087 per_kind_count, 1,
10088 "per-kind table must have the row after registration"
10089 );
10090 }
10091
10092 #[test]
10093 fn remove_fts_schema_deletes_from_per_kind_table() {
10094 let (db, service) = setup();
10096 {
10097 let conn = sqlite::open_connection(db.path()).expect("conn");
10098 conn.execute(
10099 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
10100 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
10101 [],
10102 )
10103 .expect("insert node");
10104 }
10105
10106 service
10107 .register_fts_property_schema("Goal", &["$.name".to_owned()], None)
10108 .expect("register schema");
10109 service
10110 .remove_fts_property_schema("Goal")
10111 .expect("remove schema");
10112
10113 let conn = sqlite::open_connection(db.path()).expect("conn");
10114 let table = fathomdb_schema::fts_kind_table_name("Goal");
10115 let per_kind_count: i64 = conn
10116 .query_row(
10117 &format!("SELECT count(*) FROM {table} WHERE node_logical_id = 'goal-1'"),
10118 [],
10119 |row| row.get(0),
10120 )
10121 .expect("per-kind count");
10122 assert_eq!(
10123 per_kind_count, 0,
10124 "per-kind table must be empty after schema removal"
10125 );
10126 }
10127
10128 #[test]
10131 fn fts_path_spec_with_weight_builder() {
10132 let spec = FtsPropertyPathSpec::scalar("$.title").with_weight(5.0);
10133 assert_eq!(spec.weight, Some(5.0));
10134 assert_eq!(spec.path, "$.title");
10135 assert_eq!(spec.mode, FtsPropertyPathMode::Scalar);
10136 }
10137
10138 #[test]
10139 fn fts_path_spec_serialize_with_weight() {
10140 use super::serialize_property_paths_json;
10141 let entries = vec![
10142 FtsPropertyPathSpec::scalar("$.title").with_weight(2.0),
10143 FtsPropertyPathSpec::scalar("$.body"),
10144 ];
10145 let json = serialize_property_paths_json(&entries, &[]).expect("serialize");
10146 let v: serde_json::Value = serde_json::from_str(&json).expect("parse");
10148 let paths = v
10149 .get("paths")
10150 .expect("paths key")
10151 .as_array()
10152 .expect("array");
10153 assert_eq!(paths.len(), 2);
10154 assert_eq!(
10156 paths[0].get("path").and_then(serde_json::Value::as_str),
10157 Some("$.title")
10158 );
10159 assert_eq!(
10160 paths[0].get("weight").and_then(serde_json::Value::as_f64),
10161 Some(2.0)
10162 );
10163 assert!(
10165 paths[1].get("weight").is_none(),
10166 "unweighted spec must omit weight field"
10167 );
10168 }
10169
10170 #[test]
10171 fn fts_path_spec_serialize_no_weights() {
10172 use super::serialize_property_paths_json;
10173 let entries = vec![
10174 FtsPropertyPathSpec::scalar("$.title"),
10175 FtsPropertyPathSpec::scalar("$.payload"),
10176 ];
10177 let json = serialize_property_paths_json(&entries, &[]).expect("serialize");
10178 let v: serde_json::Value = serde_json::from_str(&json).expect("parse");
10180 assert!(
10181 v.is_array(),
10182 "all-scalar no-weight schema must serialize as bare string array"
10183 );
10184 let arr = v.as_array().expect("array");
10185 assert_eq!(arr.len(), 2);
10186 assert_eq!(arr[0].as_str(), Some("$.title"));
10187 assert_eq!(arr[1].as_str(), Some("$.payload"));
10188 }
10189
10190 #[test]
10191 fn fts_weight_validation_out_of_range() {
10192 let (_db, service) = setup();
10193 let entries_zero = vec![FtsPropertyPathSpec::scalar("$.title").with_weight(0.0)];
10195 let result = service.register_fts_property_schema_with_entries(
10196 "Article",
10197 &entries_zero,
10198 None,
10199 &[],
10200 crate::rebuild_actor::RebuildMode::Eager,
10201 );
10202 assert!(result.is_err(), "weight 0.0 must be rejected");
10203 let err_msg = result.expect_err("weight 0.0 must be rejected").to_string();
10204 assert!(
10205 err_msg.contains("weight"),
10206 "error must mention weight: {err_msg}"
10207 );
10208
10209 let entries_big = vec![FtsPropertyPathSpec::scalar("$.title").with_weight(1001.0)];
10211 let result = service.register_fts_property_schema_with_entries(
10212 "Article",
10213 &entries_big,
10214 None,
10215 &[],
10216 crate::rebuild_actor::RebuildMode::Eager,
10217 );
10218 assert!(result.is_err(), "weight 1001.0 must be rejected");
10219 }
10220
10221 #[test]
10222 fn fts_weight_validation_valid() {
10223 let (_db, service) = setup();
10224 let entries = vec![FtsPropertyPathSpec::scalar("$.title").with_weight(10.0)];
10225 let result = service.register_fts_property_schema_with_entries(
10226 "Article",
10227 &entries,
10228 None,
10229 &[],
10230 crate::rebuild_actor::RebuildMode::Eager,
10231 );
10232 assert!(
10233 result.is_ok(),
10234 "weight 10.0 must be accepted: {:?}",
10235 result.err()
10236 );
10237 }
10238
10239 #[test]
10242 fn create_or_replace_creates_multi_column_table() {
10243 use super::create_or_replace_fts_kind_table;
10244 let (db, _service) = setup();
10245 let conn = sqlite::open_connection(db.path()).expect("conn");
10246 let specs = vec![
10247 FtsPropertyPathSpec::scalar("$.title"),
10248 FtsPropertyPathSpec::recursive("$.payload"),
10249 ];
10250 create_or_replace_fts_kind_table(
10251 &conn,
10252 "Article",
10253 &specs,
10254 fathomdb_schema::DEFAULT_FTS_TOKENIZER,
10255 )
10256 .expect("create table");
10257
10258 let table = fathomdb_schema::fts_kind_table_name("Article");
10260 let count: i64 = conn
10262 .query_row(&format!("SELECT count(*) FROM {table}"), [], |r| r.get(0))
10263 .expect("count");
10264 assert_eq!(count, 0, "new table must be empty");
10265
10266 let title_col = fathomdb_schema::fts_column_name("$.title", false);
10268 let payload_col = fathomdb_schema::fts_column_name("$.payload", true);
10269 conn.execute(
10270 &format!(
10271 "INSERT INTO {table} (node_logical_id, {title_col}, {payload_col}) VALUES ('id1', 'hello', 'world')"
10272 ),
10273 [],
10274 )
10275 .expect("insert with per-spec columns must succeed");
10276 }
10277
10278 #[test]
10279 fn create_or_replace_drops_and_recreates() {
10280 use super::create_or_replace_fts_kind_table;
10281 let (db, _service) = setup();
10282 let conn = sqlite::open_connection(db.path()).expect("conn");
10283
10284 let specs_v1 = vec![FtsPropertyPathSpec::scalar("$.title")];
10286 create_or_replace_fts_kind_table(
10287 &conn,
10288 "Post",
10289 &specs_v1,
10290 fathomdb_schema::DEFAULT_FTS_TOKENIZER,
10291 )
10292 .expect("create v1");
10293
10294 let specs_v2 = vec![
10296 FtsPropertyPathSpec::scalar("$.title"),
10297 FtsPropertyPathSpec::scalar("$.summary"),
10298 ];
10299 create_or_replace_fts_kind_table(
10300 &conn,
10301 "Post",
10302 &specs_v2,
10303 fathomdb_schema::DEFAULT_FTS_TOKENIZER,
10304 )
10305 .expect("create v2");
10306
10307 let table = fathomdb_schema::fts_kind_table_name("Post");
10309 let summary_col = fathomdb_schema::fts_column_name("$.summary", false);
10310 conn.execute(
10311 &format!("INSERT INTO {table} (node_logical_id, {summary_col}) VALUES ('id1', 'text')"),
10312 [],
10313 )
10314 .expect("second layout must allow summary column");
10315 }
10316
10317 #[test]
10318 fn create_or_replace_invalid_tokenizer() {
10319 use super::create_or_replace_fts_kind_table;
10320 let (db, _service) = setup();
10321 let conn = sqlite::open_connection(db.path()).expect("conn");
10322 let specs = vec![FtsPropertyPathSpec::scalar("$.title")];
10323 let result = create_or_replace_fts_kind_table(&conn, "Post", &specs, "'; DROP TABLE --");
10324 assert!(result.is_err(), "invalid tokenizer must be rejected");
10325 let err_msg = result
10326 .expect_err("invalid tokenizer must be rejected")
10327 .to_string();
10328 assert!(
10329 err_msg.contains("tokenizer"),
10330 "error must mention tokenizer: {err_msg}"
10331 );
10332 }
10333
10334 #[test]
10335 fn register_with_weights_creates_per_column_table() {
10336 let (db, service) = setup();
10337 let entries = vec![
10338 FtsPropertyPathSpec::scalar("$.title").with_weight(2.0),
10339 FtsPropertyPathSpec::scalar("$.body"),
10340 ];
10341 service
10342 .register_fts_property_schema_with_entries(
10343 "Article",
10344 &entries,
10345 None,
10346 &[],
10347 crate::rebuild_actor::RebuildMode::Eager,
10348 )
10349 .expect("register");
10350
10351 let conn = sqlite::open_connection(db.path()).expect("conn");
10353 let table = fathomdb_schema::fts_kind_table_name("Article");
10354 let title_col = fathomdb_schema::fts_column_name("$.title", false);
10355 let body_col = fathomdb_schema::fts_column_name("$.body", false);
10356 conn.execute(
10358 &format!(
10359 "INSERT INTO {table} (node_logical_id, {title_col}, {body_col}) VALUES ('art-1', 'hello', 'world')"
10360 ),
10361 [],
10362 )
10363 .expect("per-spec columns must exist after registration with weights");
10364 }
10365
10366 #[test]
10367 fn weighted_to_unweighted_downgrade_recreates_table() {
10368 let (db, service) = setup();
10369
10370 let weighted_entries = vec![
10372 FtsPropertyPathSpec::scalar("$.title").with_weight(2.0),
10373 FtsPropertyPathSpec::scalar("$.body"),
10374 ];
10375 service
10376 .register_fts_property_schema_with_entries(
10377 "Article",
10378 &weighted_entries,
10379 None,
10380 &[],
10381 crate::rebuild_actor::RebuildMode::Eager,
10382 )
10383 .expect("register weighted");
10384
10385 let unweighted_entries = vec![
10387 FtsPropertyPathSpec::scalar("$.title"),
10388 FtsPropertyPathSpec::scalar("$.body"),
10389 ];
10390 service
10391 .register_fts_property_schema_with_entries(
10392 "Article",
10393 &unweighted_entries,
10394 None,
10395 &[],
10396 crate::rebuild_actor::RebuildMode::Eager,
10397 )
10398 .expect("re-register unweighted");
10399
10400 let conn = sqlite::open_connection(db.path()).expect("conn");
10403 let table = fathomdb_schema::fts_kind_table_name("Article");
10404 let result = conn.execute(
10405 &format!("INSERT INTO {table} (node_logical_id, text_content) VALUES ('art-1', 'hello world')"),
10406 [],
10407 );
10408 assert!(
10409 result.is_ok(),
10410 "text_content column must exist after weighted-to-unweighted downgrade"
10411 );
10412 }
10413
10414 #[test]
10417 fn set_get_fts_profile_roundtrip() {
10418 let (_db, service) = setup();
10419 let profile = service
10420 .set_fts_profile("book", "unicode61")
10421 .expect("set_fts_profile");
10422 assert_eq!(profile.kind, "book");
10423 assert_eq!(profile.tokenizer, "unicode61");
10424
10425 let got = service
10426 .get_fts_profile("book")
10427 .expect("get_fts_profile")
10428 .expect("should be Some");
10429 assert_eq!(got.kind, "book");
10430 assert_eq!(got.tokenizer, "unicode61");
10431 }
10432
10433 #[test]
10434 fn fts_profile_upsert() {
10435 let (_db, service) = setup();
10436 service
10437 .set_fts_profile("article", "unicode61")
10438 .expect("first set");
10439 service
10440 .set_fts_profile("article", "porter unicode61 remove_diacritics 2")
10441 .expect("second set");
10442 let got = service
10443 .get_fts_profile("article")
10444 .expect("get")
10445 .expect("Some");
10446 assert_eq!(got.tokenizer, "porter unicode61 remove_diacritics 2");
10447 }
10448
10449 #[test]
10450 fn invalid_tokenizer_rejected() {
10451 let (_db, service) = setup();
10452 let result = service.set_fts_profile("book", "'; DROP TABLE nodes --");
10453 assert!(result.is_err(), "invalid tokenizer must be rejected");
10454 let msg = result.expect_err("must be Err").to_string();
10455 assert!(
10456 msg.contains("tokenizer") || msg.contains("invalid"),
10457 "error must mention tokenizer or invalid: {msg}"
10458 );
10459 }
10460
10461 #[test]
10462 fn preset_recall_optimized_english() {
10463 assert_eq!(
10464 super::resolve_tokenizer_preset("recall-optimized-english"),
10465 "porter unicode61 remove_diacritics 2"
10466 );
10467 }
10468
10469 #[test]
10470 fn preset_precision_optimized() {
10471 assert_eq!(
10472 super::resolve_tokenizer_preset("precision-optimized"),
10473 "unicode61 remove_diacritics 2"
10474 );
10475 }
10476
10477 #[test]
10478 fn preset_global_cjk() {
10479 assert_eq!(super::resolve_tokenizer_preset("global-cjk"), "icu");
10480 }
10481
10482 #[test]
10483 fn preset_substring_trigram() {
10484 assert_eq!(
10485 super::resolve_tokenizer_preset("substring-trigram"),
10486 "trigram"
10487 );
10488 }
10489
10490 #[test]
10491 fn preset_source_code() {
10492 assert_eq!(
10493 super::resolve_tokenizer_preset("source-code"),
10494 "unicode61 tokenchars '._-$@'"
10495 );
10496 }
10497
10498 #[test]
10499 fn preview_fts_row_count() {
10500 let (db, service) = setup();
10501 {
10502 let conn = sqlite::open_connection(db.path()).expect("conn");
10503 for i in 0..5u32 {
10504 conn.execute(
10505 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
10506 VALUES (?1, ?2, 'book', '{}', 100, 'src')",
10507 rusqlite::params![format!("r{i}"), format!("lg{i}")],
10508 )
10509 .expect("insert node");
10510 }
10511 conn.execute(
10513 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref, superseded_at) \
10514 VALUES ('r99', 'lg99', 'book', '{}', 100, 'src', 200)",
10515 [],
10516 )
10517 .expect("insert superseded");
10518 }
10519 let impact = service
10520 .preview_projection_impact("book", "fts")
10521 .expect("preview");
10522 assert_eq!(impact.rows_to_rebuild, 5);
10523 }
10524
10525 #[test]
10526 fn preview_populates_current_tokenizer() {
10527 let (_db, service) = setup();
10528 service
10529 .set_fts_profile("doc", "trigram")
10530 .expect("set profile");
10531 let impact = service
10532 .preview_projection_impact("doc", "fts")
10533 .expect("preview");
10534 assert_eq!(impact.current_tokenizer, Some("trigram".to_owned()));
10535 assert_eq!(impact.target_tokenizer, None);
10536 }
10537
10538 #[test]
10541 fn create_or_replace_source_code_tokenizer_is_accepted() {
10542 use super::create_or_replace_fts_kind_table;
10546 let (db, _service) = setup();
10547 let conn = sqlite::open_connection(db.path()).expect("conn");
10548 let specs = vec![FtsPropertyPathSpec::scalar("$.symbol")];
10549 let source_code_tokenizer = "unicode61 tokenchars '._-$@'";
10550 let result =
10551 create_or_replace_fts_kind_table(&conn, "Symbol", &specs, source_code_tokenizer);
10552 assert!(
10553 result.is_ok(),
10554 "source-code tokenizer string must be accepted by create_or_replace_fts_kind_table: {:?}",
10555 result.err()
10556 );
10557 }
10558
10559 #[test]
10560 fn source_code_profile_round_trip_through_register_fts_schema() {
10561 let db = tempfile::NamedTempFile::new().expect("temp file");
10566 let schema = Arc::new(fathomdb_schema::SchemaManager::new());
10567
10568 {
10570 let _coord = crate::ExecutionCoordinator::open(
10571 db.path(),
10572 Arc::clone(&schema),
10573 None,
10574 1,
10575 Arc::new(crate::TelemetryCounters::default()),
10576 None,
10577 )
10578 .expect("coordinator opens for bootstrap");
10579 }
10580
10581 let service = AdminService::new(db.path(), Arc::clone(&schema));
10582
10583 service
10585 .set_fts_profile("Symbol", "source-code")
10586 .expect("set_fts_profile with source-code preset must succeed");
10587
10588 let result = service.register_fts_property_schema("Symbol", &["$.name".to_owned()], None);
10591 assert!(
10592 result.is_ok(),
10593 "register_fts_property_schema must succeed when source-code profile is active: {:?}",
10594 result.err()
10595 );
10596 }
10597
10598 #[cfg(feature = "sqlite-vec")]
10607 #[test]
10608 fn embedder_max_tokens_8192_handles_chunk_exceeding_512_words() {
10609 let long_text = (0..600u32)
10610 .map(|i| format!("word{i}"))
10611 .collect::<Vec<_>>()
10612 .join(" ");
10613
10614 let db = NamedTempFile::new().expect("temp file");
10615 let schema = Arc::new(SchemaManager::new());
10616
10617 {
10618 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
10619 schema.bootstrap(&conn).expect("bootstrap");
10620 conn.execute(
10621 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
10622 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'src-1')",
10623 [],
10624 )
10625 .expect("insert node");
10626 conn.execute(
10627 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
10628 VALUES (?1, 'doc-1', ?2, 100)",
10629 rusqlite::params!["chunk-long", long_text],
10630 )
10631 .expect("insert long chunk");
10632 }
10633
10634 let embedder = LargeContextTestEmbedder::new("long-context-model", 4, 8192);
10635 let service = AdminService::new(db.path(), Arc::clone(&schema));
10636 let report = service
10637 .regenerate_vector_embeddings(
10638 &embedder,
10639 &VectorRegenerationConfig {
10640 kind: "Document".to_owned(),
10641 profile: "default".to_owned(),
10642 chunking_policy: "per_chunk".to_owned(),
10643 preprocessing_policy: "trim".to_owned(),
10644 },
10645 )
10646 .expect("regenerate with long-context embedder");
10647
10648 assert_eq!(
10649 report.total_chunks, 1,
10650 "600-word text pre-written as one chunk must result in exactly one embedded row"
10651 );
10652 assert_eq!(report.regenerated_rows, 1);
10653 assert_eq!(
10654 embedder.max_tokens(),
10655 8192,
10656 "embedder must advertise 8192 token capacity"
10657 );
10658 }
10659
10660 #[cfg(feature = "sqlite-vec")]
10662 #[derive(Debug)]
10663 struct LargeContextTestEmbedder {
10664 identity: QueryEmbedderIdentity,
10665 vector: Vec<f32>,
10666 max_tokens: usize,
10667 }
10668
10669 #[cfg(feature = "sqlite-vec")]
10670 impl LargeContextTestEmbedder {
10671 fn new(model: &str, dimension: usize, max_tokens: usize) -> Self {
10672 Self {
10673 identity: QueryEmbedderIdentity {
10674 model_identity: model.to_owned(),
10675 model_version: "1.0.0".to_owned(),
10676 dimension,
10677 normalization_policy: "l2".to_owned(),
10678 },
10679 vector: vec![1.0; dimension],
10680 max_tokens,
10681 }
10682 }
10683 }
10684
10685 #[cfg(feature = "sqlite-vec")]
10686 impl QueryEmbedder for LargeContextTestEmbedder {
10687 fn embed_query(&self, _text: &str) -> Result<Vec<f32>, EmbedderError> {
10688 Ok(self.vector.clone())
10689 }
10690 fn identity(&self) -> QueryEmbedderIdentity {
10691 self.identity.clone()
10692 }
10693 fn max_tokens(&self) -> usize {
10694 self.max_tokens
10695 }
10696 }
10697
10698 #[cfg(feature = "sqlite-vec")]
10702 #[test]
10703 #[allow(clippy::too_many_lines)]
10704 fn regenerate_vector_embeddings_in_process_writes_contract_and_vec_rows() {
10705 let db = NamedTempFile::new().expect("temp file");
10706 let schema = Arc::new(SchemaManager::new());
10707
10708 {
10709 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
10710 schema.bootstrap(&conn).expect("bootstrap");
10711 for (row_id, logical_id, created_at, src) in [
10712 ("r1", "node-1", 100, "src1"),
10713 ("r2", "node-2", 101, "src2"),
10714 ("r3", "node-3", 102, "src3"),
10715 ] {
10716 conn.execute(
10717 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
10718 VALUES (?1, ?2, 'Doc', '{}', ?3, ?4)",
10719 rusqlite::params![row_id, logical_id, created_at, src],
10720 )
10721 .expect("insert node");
10722 }
10723 for (chunk_id, node_id, text, created_at) in [
10724 ("c1", "node-1", "first document text", 100),
10725 ("c2", "node-2", "second document text", 101),
10726 ("c3", "node-3", "third document text", 102),
10727 ] {
10728 conn.execute(
10729 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
10730 VALUES (?1, ?2, ?3, ?4)",
10731 rusqlite::params![chunk_id, node_id, text, created_at],
10732 )
10733 .expect("insert chunk");
10734 }
10735 }
10736
10737 let service = AdminService::new(db.path(), Arc::clone(&schema));
10738 let embedder = TestEmbedder::new("batch-test-model", 4);
10739 let config = VectorRegenerationConfig {
10740 kind: "Doc".to_owned(),
10741 profile: "default".to_owned(),
10742 chunking_policy: "per_chunk".to_owned(),
10743 preprocessing_policy: "trim".to_owned(),
10744 };
10745 let report = service
10746 .regenerate_vector_embeddings_in_process(&embedder, &config)
10747 .expect("in-process regen must succeed");
10748
10749 assert_eq!(report.total_chunks, 3);
10750 assert_eq!(report.regenerated_rows, 3);
10751 assert!(report.contract_persisted);
10752
10753 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
10754 let vec_count: i64 = conn
10755 .query_row("SELECT count(*) FROM vec_doc", [], |row| row.get(0))
10756 .expect("vec_doc count");
10757 assert_eq!(vec_count, 3, "one vec row per chunk");
10758
10759 let model_identity: String = conn
10760 .query_row(
10761 "SELECT model_identity FROM vector_embedding_contracts WHERE profile = 'default'",
10762 [],
10763 |row| row.get(0),
10764 )
10765 .expect("contract row");
10766 assert_eq!(model_identity, "batch-test-model");
10767 }
10768
10769 #[cfg(feature = "sqlite-vec")]
10772 #[test]
10773 #[allow(clippy::too_many_lines)]
10774 fn regenerate_vector_embeddings_targets_per_kind_table() {
10775 let db = NamedTempFile::new().expect("temp file");
10776 let schema = Arc::new(SchemaManager::new());
10777
10778 {
10779 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
10780 schema.bootstrap(&conn).expect("bootstrap");
10781 conn.execute(
10782 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
10783 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
10784 [],
10785 )
10786 .expect("insert node");
10787 conn.execute(
10788 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
10789 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
10790 [],
10791 )
10792 .expect("insert chunk");
10793 }
10794
10795 let service = AdminService::new(db.path(), Arc::clone(&schema));
10796 let embedder = TestEmbedder::new("test-model", 4);
10797 let report = service
10798 .regenerate_vector_embeddings(
10799 &embedder,
10800 &VectorRegenerationConfig {
10801 kind: "Document".to_owned(),
10802 profile: "default".to_owned(),
10803 chunking_policy: "per_chunk".to_owned(),
10804 preprocessing_policy: "trim".to_owned(),
10805 },
10806 )
10807 .expect("regenerate vectors");
10808
10809 assert_eq!(report.table_name, "vec_document");
10810 assert_eq!(report.regenerated_rows, 1);
10811
10812 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
10813 let vec_count: i64 = conn
10814 .query_row("SELECT count(*) FROM vec_document", [], |row| row.get(0))
10815 .expect("vec_document count");
10816 assert_eq!(vec_count, 1, "rows must be in vec_document");
10817
10818 let old_count: i64 = conn
10819 .query_row(
10820 "SELECT count(*) FROM sqlite_master WHERE type='table' AND name='vec_nodes_active'",
10821 [],
10822 |r| r.get(0),
10823 )
10824 .expect("sqlite_master check");
10825 assert_eq!(
10826 old_count, 0,
10827 "vec_nodes_active must NOT be created for per-kind regen"
10828 );
10829 }
10830
10831 #[test]
10834 fn get_vec_profile_returns_none_when_no_profile_exists() {
10835 let (db, service) = setup();
10836 let _ = db;
10837 let result = service.get_vec_profile("MyKind").expect("should not error");
10838 assert!(
10839 result.is_none(),
10840 "must return None when no profile registered"
10841 );
10842 }
10843
10844 #[cfg(feature = "sqlite-vec")]
10845 #[test]
10846 fn get_vec_profile_returns_profile_for_registered_kind() {
10847 let db = NamedTempFile::new().expect("temp file");
10848 let schema = Arc::new(SchemaManager::new());
10849 {
10850 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
10851 schema.bootstrap(&conn).expect("bootstrap");
10852 schema
10853 .ensure_vec_kind_profile(&conn, "MyKind", 128)
10854 .expect("ensure_vec_kind_profile");
10855 }
10856
10857 let service = AdminService::new(db.path(), Arc::clone(&schema));
10858 let profile = service.get_vec_profile("MyKind").expect("should not error");
10859 assert!(profile.is_some(), "must return profile after registration");
10860 assert_eq!(profile.unwrap().dimensions, 128);
10861 }
10862
10863 #[test]
10864 fn get_vec_profile_does_not_return_global_sentinel_row() {
10865 let (db, service) = setup();
10866 {
10867 let conn = sqlite::open_connection(db.path()).expect("conn");
10868 conn.execute(
10869 "INSERT INTO projection_profiles (kind, facet, config_json, active_at, created_at) \
10870 VALUES ('*', 'vec', '{\"model_identity\":\"old-model\",\"dimensions\":384}', 0, 0)",
10871 [],
10872 )
10873 .expect("insert global sentinel");
10874 }
10875 let result = service
10876 .get_vec_profile("SomeKind")
10877 .expect("should not error");
10878 assert!(
10879 result.is_none(),
10880 "per-kind query must not return global ('*', 'vec') row"
10881 );
10882 }
10883}