1use std::fmt::Write as _;
2use std::fs;
3use std::io;
4use std::path::{Path, PathBuf};
5use std::sync::Arc;
6use std::sync::mpsc::SyncSender;
7use std::time::SystemTime;
8
9use fathomdb_schema::{SchemaError, SchemaManager};
10use rusqlite::{DatabaseName, OptionalExtension, TransactionBehavior};
11use serde::{Deserialize, Serialize};
12use sha2::{Digest, Sha256};
13
14use crate::rebuild_actor::{RebuildMode, RebuildRequest, RebuildStateRow};
15
16use crate::{
17 EngineError, ProjectionRepairReport, ProjectionService,
18 embedder::{QueryEmbedder, QueryEmbedderIdentity},
19 ids::new_id,
20 operational::{
21 OperationalCollectionKind, OperationalCollectionRecord, OperationalCompactionReport,
22 OperationalCurrentRow, OperationalFilterClause, OperationalFilterField,
23 OperationalFilterFieldType, OperationalFilterMode, OperationalFilterValue,
24 OperationalHistoryValidationIssue, OperationalHistoryValidationReport,
25 OperationalMutationRow, OperationalPurgeReport, OperationalReadReport,
26 OperationalReadRequest, OperationalRegisterRequest, OperationalRepairReport,
27 OperationalRetentionActionKind, OperationalRetentionPlanItem,
28 OperationalRetentionPlanReport, OperationalRetentionRunItem, OperationalRetentionRunReport,
29 OperationalSecondaryIndexDefinition, OperationalSecondaryIndexRebuildReport,
30 OperationalTraceReport, extract_secondary_index_entries_for_current,
31 extract_secondary_index_entries_for_mutation, parse_operational_secondary_indexes_json,
32 parse_operational_validation_contract, validate_operational_payload_against_contract,
33 },
34 projection::ProjectionTarget,
35 sqlite,
36};
37
38#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
40pub struct IntegrityReport {
41 pub physical_ok: bool,
42 pub foreign_keys_ok: bool,
43 pub missing_fts_rows: usize,
44 pub missing_property_fts_rows: usize,
45 pub duplicate_active_logical_ids: usize,
46 pub operational_missing_collections: usize,
47 pub operational_missing_last_mutations: usize,
48 pub warnings: Vec<String>,
49}
50
51#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
53pub struct FtsPropertySchemaRecord {
54 pub kind: String,
56 pub property_paths: Vec<String>,
61 pub entries: Vec<FtsPropertyPathSpec>,
66 pub exclude_paths: Vec<String>,
69 pub separator: String,
71 pub format_version: i64,
73}
74
75#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize)]
77#[serde(rename_all = "snake_case")]
78pub enum FtsPropertyPathMode {
79 #[default]
82 Scalar,
83 Recursive,
86}
87
88#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
90pub struct FtsPropertyPathSpec {
91 pub path: String,
93 pub mode: FtsPropertyPathMode,
95}
96
97impl FtsPropertyPathSpec {
98 #[must_use]
99 pub fn scalar(path: impl Into<String>) -> Self {
100 Self {
101 path: path.into(),
102 mode: FtsPropertyPathMode::Scalar,
103 }
104 }
105
106 #[must_use]
107 pub fn recursive(path: impl Into<String>) -> Self {
108 Self {
109 path: path.into(),
110 mode: FtsPropertyPathMode::Recursive,
111 }
112 }
113}
114
115#[derive(Clone, Copy, Debug)]
117pub struct SafeExportOptions {
118 pub force_checkpoint: bool,
122}
123
124impl Default for SafeExportOptions {
125 fn default() -> Self {
126 Self {
127 force_checkpoint: true,
128 }
129 }
130}
131
132const EXPORT_PROTOCOL_VERSION: u32 = 1;
134
135#[derive(Clone, Debug, Serialize)]
137pub struct SafeExportManifest {
138 pub exported_at: u64,
140 pub sha256: String,
142 pub schema_version: u32,
144 pub protocol_version: u32,
146 pub page_count: u64,
148}
149
150#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
152pub struct TraceReport {
153 pub source_ref: String,
154 pub node_rows: usize,
155 pub edge_rows: usize,
156 pub action_rows: usize,
157 pub operational_mutation_rows: usize,
158 pub node_logical_ids: Vec<String>,
159 pub action_ids: Vec<String>,
160 pub operational_mutation_ids: Vec<String>,
161}
162
163#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
165pub struct SkippedEdge {
166 pub edge_logical_id: String,
167 pub missing_endpoint: String,
168}
169
170#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
172pub struct LogicalRestoreReport {
173 pub logical_id: String,
174 pub was_noop: bool,
175 pub restored_node_rows: usize,
176 pub restored_edge_rows: usize,
177 pub restored_chunk_rows: usize,
178 pub restored_fts_rows: usize,
179 pub restored_property_fts_rows: usize,
180 pub restored_vec_rows: usize,
181 pub skipped_edges: Vec<SkippedEdge>,
182 pub notes: Vec<String>,
183}
184
185#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
187pub struct LogicalPurgeReport {
188 pub logical_id: String,
189 pub was_noop: bool,
190 pub deleted_node_rows: usize,
191 pub deleted_edge_rows: usize,
192 pub deleted_chunk_rows: usize,
193 pub deleted_fts_rows: usize,
194 pub deleted_vec_rows: usize,
195 pub notes: Vec<String>,
196}
197
198#[derive(Clone, Debug, Serialize, Deserialize)]
200pub struct ProvenancePurgeOptions {
201 pub dry_run: bool,
202 #[serde(default)]
203 pub preserve_event_types: Vec<String>,
204}
205
206#[derive(Clone, Debug, Serialize)]
208pub struct ProvenancePurgeReport {
209 pub events_deleted: u64,
210 pub events_preserved: u64,
211 pub oldest_remaining: Option<i64>,
212}
213
214#[derive(Debug)]
216pub struct AdminService {
217 database_path: PathBuf,
218 schema_manager: Arc<SchemaManager>,
219 projections: ProjectionService,
220 rebuild_sender: Option<SyncSender<RebuildRequest>>,
224}
225
226#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
228pub struct SemanticReport {
229 pub orphaned_chunks: usize,
231 pub null_source_ref_nodes: usize,
233 pub broken_step_fk: usize,
235 pub broken_action_fk: usize,
237 pub stale_fts_rows: usize,
239 pub fts_rows_for_superseded_nodes: usize,
241 pub stale_property_fts_rows: usize,
243 pub orphaned_property_fts_rows: usize,
245 pub mismatched_kind_property_fts_rows: usize,
247 pub duplicate_property_fts_rows: usize,
249 pub drifted_property_fts_rows: usize,
251 pub dangling_edges: usize,
253 pub orphaned_supersession_chains: usize,
255 pub stale_vec_rows: usize,
257 pub vec_rows_for_superseded_nodes: usize,
259 pub missing_operational_current_rows: usize,
261 pub stale_operational_current_rows: usize,
263 pub disabled_collection_mutations: usize,
265 pub orphaned_last_access_metadata_rows: usize,
267 pub warnings: Vec<String>,
268}
269
270#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
280#[serde(rename_all = "snake_case", deny_unknown_fields)]
281pub struct VectorRegenerationConfig {
282 pub profile: String,
283 pub table_name: String,
284 pub chunking_policy: String,
285 pub preprocessing_policy: String,
286}
287
288#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
290pub struct VectorRegenerationReport {
291 pub profile: String,
292 pub table_name: String,
293 pub dimension: usize,
294 pub total_chunks: usize,
295 pub regenerated_rows: usize,
296 pub contract_persisted: bool,
297 pub notes: Vec<String>,
298}
299
300const CURRENT_VECTOR_CONTRACT_FORMAT_VERSION: i64 = 1;
301const MAX_PROFILE_LEN: usize = 128;
302const MAX_POLICY_LEN: usize = 128;
303const MAX_CONTRACT_JSON_BYTES: usize = 32 * 1024;
304const MAX_AUDIT_METADATA_BYTES: usize = 2048;
305const DEFAULT_OPERATIONAL_READ_LIMIT: usize = 100;
306const MAX_OPERATIONAL_READ_LIMIT: usize = 1000;
307
308#[derive(Clone, Debug)]
310pub struct AdminHandle {
311 inner: Arc<AdminService>,
312}
313
314impl AdminHandle {
315 #[must_use]
317 pub fn new(service: AdminService) -> Self {
318 Self {
319 inner: Arc::new(service),
320 }
321 }
322
323 #[must_use]
325 pub fn service(&self) -> Arc<AdminService> {
326 Arc::clone(&self.inner)
327 }
328}
329
330impl AdminService {
331 #[must_use]
333 pub fn new(path: impl AsRef<Path>, schema_manager: Arc<SchemaManager>) -> Self {
334 let database_path = path.as_ref().to_path_buf();
335 let projections = ProjectionService::new(&database_path, Arc::clone(&schema_manager));
336 Self {
337 database_path,
338 schema_manager,
339 projections,
340 rebuild_sender: None,
341 }
342 }
343
344 #[must_use]
346 pub fn new_with_rebuild(
347 path: impl AsRef<Path>,
348 schema_manager: Arc<SchemaManager>,
349 rebuild_sender: SyncSender<RebuildRequest>,
350 ) -> Self {
351 let database_path = path.as_ref().to_path_buf();
352 let projections = ProjectionService::new(&database_path, Arc::clone(&schema_manager));
353 Self {
354 database_path,
355 schema_manager,
356 projections,
357 rebuild_sender: Some(rebuild_sender),
358 }
359 }
360
361 fn connect(&self) -> Result<rusqlite::Connection, EngineError> {
362 #[cfg(feature = "sqlite-vec")]
363 let conn = sqlite::open_connection_with_vec(&self.database_path)?;
364 #[cfg(not(feature = "sqlite-vec"))]
365 let conn = sqlite::open_connection(&self.database_path)?;
366 self.schema_manager.bootstrap(&conn)?;
367 Ok(conn)
368 }
369
370 pub fn check_integrity(&self) -> Result<IntegrityReport, EngineError> {
373 let conn = self.connect()?;
374
375 let physical_result: String =
376 conn.query_row("PRAGMA integrity_check", [], |row| row.get(0))?;
377 let foreign_key_count: i64 =
378 conn.query_row("SELECT count(*) FROM pragma_foreign_key_check", [], |row| {
379 row.get(0)
380 })?;
381 let missing_fts_rows: i64 = conn.query_row(
382 r"
383 SELECT count(*)
384 FROM chunks c
385 JOIN nodes n
386 ON n.logical_id = c.node_logical_id
387 AND n.superseded_at IS NULL
388 WHERE NOT EXISTS (
389 SELECT 1
390 FROM fts_nodes f
391 WHERE f.chunk_id = c.id
392 )
393 ",
394 [],
395 |row| row.get(0),
396 )?;
397 let duplicate_active: i64 = conn.query_row(
398 r"
399 SELECT count(*)
400 FROM (
401 SELECT logical_id
402 FROM nodes
403 WHERE superseded_at IS NULL
404 GROUP BY logical_id
405 HAVING count(*) > 1
406 )
407 ",
408 [],
409 |row| row.get(0),
410 )?;
411 let operational_missing_collections: i64 = conn.query_row(
412 r"
413 SELECT (
414 SELECT count(*)
415 FROM operational_mutations m
416 LEFT JOIN operational_collections c ON c.name = m.collection_name
417 WHERE c.name IS NULL
418 ) + (
419 SELECT count(*)
420 FROM operational_current oc
421 LEFT JOIN operational_collections c ON c.name = oc.collection_name
422 WHERE c.name IS NULL
423 )
424 ",
425 [],
426 |row| row.get(0),
427 )?;
428 let operational_missing_last_mutations: i64 = conn.query_row(
429 r"
430 SELECT count(*)
431 FROM operational_current oc
432 LEFT JOIN operational_mutations m ON m.id = oc.last_mutation_id
433 WHERE m.id IS NULL
434 ",
435 [],
436 |row| row.get(0),
437 )?;
438
439 let missing_property_fts_rows = count_missing_property_fts_rows(&conn)?;
443
444 let mut warnings = Vec::new();
445 if missing_fts_rows > 0 {
446 warnings.push("missing FTS projections detected".to_owned());
447 }
448 if missing_property_fts_rows > 0 {
449 warnings.push("missing property FTS projections detected".to_owned());
450 }
451 if duplicate_active > 0 {
452 warnings.push("duplicate active logical_ids detected".to_owned());
453 }
454 if operational_missing_collections > 0 {
455 warnings.push("operational rows reference missing collections".to_owned());
456 }
457 if operational_missing_last_mutations > 0 {
458 warnings.push("operational current rows reference missing last mutations".to_owned());
459 }
460
461 Ok(IntegrityReport {
466 physical_ok: physical_result == "ok",
467 foreign_keys_ok: foreign_key_count == 0,
468 missing_fts_rows: i64_to_usize(missing_fts_rows),
469 missing_property_fts_rows: i64_to_usize(missing_property_fts_rows),
470 duplicate_active_logical_ids: i64_to_usize(duplicate_active),
471 operational_missing_collections: i64_to_usize(operational_missing_collections),
472 operational_missing_last_mutations: i64_to_usize(operational_missing_last_mutations),
473 warnings,
474 })
475 }
476
477 #[allow(clippy::too_many_lines)]
480 pub fn check_semantics(&self) -> Result<SemanticReport, EngineError> {
481 let conn = self.connect()?;
482
483 let orphaned_chunks: i64 = conn.query_row(
484 r"
485 SELECT count(*)
486 FROM chunks c
487 WHERE NOT EXISTS (
488 SELECT 1 FROM nodes n
489 WHERE n.logical_id = c.node_logical_id
490 )
491 ",
492 [],
493 |row| row.get(0),
494 )?;
495
496 let null_source_ref_nodes: i64 = conn.query_row(
497 "SELECT count(*) FROM nodes WHERE source_ref IS NULL AND superseded_at IS NULL",
498 [],
499 |row| row.get(0),
500 )?;
501
502 let broken_step_fk: i64 = conn.query_row(
503 r"
504 SELECT count(*) FROM steps s
505 WHERE NOT EXISTS (SELECT 1 FROM runs r WHERE r.id = s.run_id)
506 ",
507 [],
508 |row| row.get(0),
509 )?;
510
511 let broken_action_fk: i64 = conn.query_row(
512 r"
513 SELECT count(*) FROM actions a
514 WHERE NOT EXISTS (SELECT 1 FROM steps s WHERE s.id = a.step_id)
515 ",
516 [],
517 |row| row.get(0),
518 )?;
519
520 let stale_fts_rows: i64 = conn.query_row(
521 r"
522 SELECT count(*) FROM fts_nodes f
523 WHERE NOT EXISTS (SELECT 1 FROM chunks c WHERE c.id = f.chunk_id)
524 ",
525 [],
526 |row| row.get(0),
527 )?;
528
529 let fts_rows_for_superseded_nodes: i64 = conn.query_row(
530 r"
531 SELECT count(*) FROM fts_nodes f
532 WHERE NOT EXISTS (
533 SELECT 1 FROM nodes n
534 WHERE n.logical_id = f.node_logical_id AND n.superseded_at IS NULL
535 )
536 ",
537 [],
538 |row| row.get(0),
539 )?;
540
541 let stale_property_fts_rows: i64 = conn.query_row(
542 r"
543 SELECT count(*) FROM fts_node_properties fp
544 WHERE NOT EXISTS (
545 SELECT 1 FROM nodes n
546 WHERE n.logical_id = fp.node_logical_id AND n.superseded_at IS NULL
547 )
548 ",
549 [],
550 |row| row.get(0),
551 )?;
552
553 let orphaned_property_fts_rows: i64 = conn.query_row(
554 r"
555 SELECT count(*) FROM fts_node_properties fp
556 WHERE NOT EXISTS (
557 SELECT 1 FROM fts_property_schemas s WHERE s.kind = fp.kind
558 )
559 ",
560 [],
561 |row| row.get(0),
562 )?;
563
564 let mismatched_kind_property_fts_rows: i64 = conn.query_row(
565 r"
566 SELECT count(*) FROM fts_node_properties fp
567 JOIN nodes n ON n.logical_id = fp.node_logical_id AND n.superseded_at IS NULL
568 WHERE n.kind != fp.kind
569 ",
570 [],
571 |row| row.get(0),
572 )?;
573
574 let duplicate_property_fts_rows: i64 = conn.query_row(
575 r"
576 SELECT count(*) FROM (
577 SELECT node_logical_id FROM fts_node_properties
578 GROUP BY node_logical_id
579 HAVING count(*) > 1
580 )
581 ",
582 [],
583 |row| row.get(0),
584 )?;
585
586 let drifted_property_fts_rows = count_drifted_property_fts_rows(&conn)?;
587
588 let dangling_edges: i64 = conn.query_row(
589 r"
590 SELECT count(*) FROM edges e
591 WHERE e.superseded_at IS NULL AND (
592 NOT EXISTS (SELECT 1 FROM nodes n WHERE n.logical_id = e.source_logical_id AND n.superseded_at IS NULL)
593 OR
594 NOT EXISTS (SELECT 1 FROM nodes n WHERE n.logical_id = e.target_logical_id AND n.superseded_at IS NULL)
595 )
596 ",
597 [],
598 |row| row.get(0),
599 )?;
600
601 let orphaned_supersession_chains: i64 = conn.query_row(
602 r"
603 SELECT count(*) FROM (
604 SELECT logical_id FROM nodes
605 GROUP BY logical_id
606 HAVING count(*) > 0 AND sum(CASE WHEN superseded_at IS NULL THEN 1 ELSE 0 END) = 0
607 )
608 ",
609 [],
610 |row| row.get(0),
611 )?;
612
613 #[cfg(feature = "sqlite-vec")]
615 let stale_vec_rows: i64 = match conn.query_row(
616 r"
617 SELECT count(*) FROM vec_nodes_active v
618 WHERE NOT EXISTS (SELECT 1 FROM chunks c WHERE c.id = v.chunk_id)
619 ",
620 [],
621 |row| row.get(0),
622 ) {
623 Ok(n) => n,
624 Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
625 if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
626 {
627 0
628 }
629 Err(e) => return Err(EngineError::Sqlite(e)),
630 };
631 #[cfg(not(feature = "sqlite-vec"))]
632 let stale_vec_rows: i64 = 0;
633
634 #[cfg(feature = "sqlite-vec")]
635 let vec_rows_for_superseded_nodes: i64 = match conn.query_row(
636 r"
637 SELECT count(*) FROM vec_nodes_active v
638 JOIN chunks c ON c.id = v.chunk_id
639 WHERE NOT EXISTS (
640 SELECT 1 FROM nodes n
641 WHERE n.logical_id = c.node_logical_id
642 )
643 ",
644 [],
645 |row| row.get(0),
646 ) {
647 Ok(n) => n,
648 Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
649 if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
650 {
651 0
652 }
653 Err(e) => return Err(EngineError::Sqlite(e)),
654 };
655 #[cfg(not(feature = "sqlite-vec"))]
656 let vec_rows_for_superseded_nodes: i64 = 0;
657 let missing_operational_current_rows: i64 = conn.query_row(
658 r"
659 SELECT count(*)
660 FROM operational_mutations m
661 JOIN operational_collections c
662 ON c.name = m.collection_name
663 AND c.kind = 'latest_state'
664 WHERE m.op_kind = 'put'
665 AND NOT EXISTS (
666 SELECT 1
667 FROM operational_mutations newer
668 WHERE newer.collection_name = m.collection_name
669 AND newer.record_key = m.record_key
670 AND newer.mutation_order > m.mutation_order
671 )
672 AND NOT EXISTS (
673 SELECT 1
674 FROM operational_current oc
675 WHERE oc.collection_name = m.collection_name
676 AND oc.record_key = m.record_key
677 )
678 ",
679 [],
680 |row| row.get(0),
681 )?;
682 let stale_operational_current_rows: i64 = conn.query_row(
683 r"
684 SELECT count(*)
685 FROM operational_current oc
686 JOIN operational_collections c
687 ON c.name = oc.collection_name
688 AND c.kind = 'latest_state'
689 LEFT JOIN operational_mutations m ON m.id = oc.last_mutation_id
690 WHERE m.id IS NULL
691 OR m.collection_name != oc.collection_name
692 OR m.record_key != oc.record_key
693 OR m.op_kind != 'put'
694 OR m.payload_json != oc.payload_json
695 OR EXISTS (
696 SELECT 1
697 FROM operational_mutations newer
698 WHERE newer.collection_name = oc.collection_name
699 AND newer.record_key = oc.record_key
700 AND newer.mutation_order > m.mutation_order
701 )
702 ",
703 [],
704 |row| row.get(0),
705 )?;
706 let disabled_collection_mutations: i64 = conn.query_row(
707 r"
708 SELECT count(*)
709 FROM operational_mutations m
710 JOIN operational_collections c ON c.name = m.collection_name
711 WHERE c.disabled_at IS NOT NULL AND m.created_at > c.disabled_at
712 ",
713 [],
714 |row| row.get(0),
715 )?;
716 let orphaned_last_access_metadata_rows: i64 = conn.query_row(
717 r"
718 SELECT count(*)
719 FROM node_access_metadata am
720 WHERE NOT EXISTS (
721 SELECT 1 FROM nodes n WHERE n.logical_id = am.logical_id
722 )
723 ",
724 [],
725 |row| row.get(0),
726 )?;
727
728 let mut warnings = Vec::new();
729 if orphaned_chunks > 0 {
730 warnings.push(format!(
731 "{orphaned_chunks} orphaned chunk(s) with no surviving node history"
732 ));
733 }
734 if null_source_ref_nodes > 0 {
735 warnings.push(format!(
736 "{null_source_ref_nodes} active node(s) with null source_ref"
737 ));
738 }
739 if broken_step_fk > 0 {
740 warnings.push(format!(
741 "{broken_step_fk} step(s) referencing non-existent run"
742 ));
743 }
744 if broken_action_fk > 0 {
745 warnings.push(format!(
746 "{broken_action_fk} action(s) referencing non-existent step"
747 ));
748 }
749 if stale_fts_rows > 0 {
750 warnings.push(format!(
751 "{stale_fts_rows} stale FTS row(s) referencing missing chunk"
752 ));
753 }
754 if fts_rows_for_superseded_nodes > 0 {
755 warnings.push(format!(
756 "{fts_rows_for_superseded_nodes} FTS row(s) for superseded node(s)"
757 ));
758 }
759 if stale_property_fts_rows > 0 {
760 warnings.push(format!(
761 "{stale_property_fts_rows} stale property FTS row(s) for superseded/missing node(s)"
762 ));
763 }
764 if orphaned_property_fts_rows > 0 {
765 warnings.push(format!(
766 "{orphaned_property_fts_rows} orphaned property FTS row(s) for unregistered kind(s)"
767 ));
768 }
769 if mismatched_kind_property_fts_rows > 0 {
770 warnings.push(format!(
771 "{mismatched_kind_property_fts_rows} property FTS row(s) whose kind does not match the active node"
772 ));
773 }
774 if duplicate_property_fts_rows > 0 {
775 warnings.push(format!(
776 "{duplicate_property_fts_rows} active logical ID(s) with duplicate property FTS rows"
777 ));
778 }
779 if drifted_property_fts_rows > 0 {
780 warnings.push(format!(
781 "{drifted_property_fts_rows} property FTS row(s) with stale text_content"
782 ));
783 }
784 if dangling_edges > 0 {
785 warnings.push(format!(
786 "{dangling_edges} active edge(s) with missing endpoint node"
787 ));
788 }
789 if orphaned_supersession_chains > 0 {
790 warnings.push(format!(
791 "{orphaned_supersession_chains} logical_id(s) with all versions superseded"
792 ));
793 }
794 if stale_vec_rows > 0 {
795 warnings.push(format!(
796 "{stale_vec_rows} stale vec row(s) referencing missing chunk"
797 ));
798 }
799 if vec_rows_for_superseded_nodes > 0 {
800 warnings.push(format!(
801 "{vec_rows_for_superseded_nodes} vec row(s) whose node history is missing"
802 ));
803 }
804 if missing_operational_current_rows > 0 {
805 warnings.push(format!(
806 "{missing_operational_current_rows} latest-state key(s) missing operational_current rows"
807 ));
808 }
809 if stale_operational_current_rows > 0 {
810 warnings.push(format!(
811 "{stale_operational_current_rows} stale operational_current row(s)"
812 ));
813 }
814 if disabled_collection_mutations > 0 {
815 warnings.push(format!(
816 "{disabled_collection_mutations} mutation(s) were written after collection disable"
817 ));
818 }
819 if orphaned_last_access_metadata_rows > 0 {
820 warnings.push(format!(
821 "{orphaned_last_access_metadata_rows} last_access metadata row(s) reference missing node history"
822 ));
823 }
824
825 Ok(SemanticReport {
826 orphaned_chunks: i64_to_usize(orphaned_chunks),
827 null_source_ref_nodes: i64_to_usize(null_source_ref_nodes),
828 broken_step_fk: i64_to_usize(broken_step_fk),
829 broken_action_fk: i64_to_usize(broken_action_fk),
830 stale_fts_rows: i64_to_usize(stale_fts_rows),
831 fts_rows_for_superseded_nodes: i64_to_usize(fts_rows_for_superseded_nodes),
832 stale_property_fts_rows: i64_to_usize(stale_property_fts_rows),
833 orphaned_property_fts_rows: i64_to_usize(orphaned_property_fts_rows),
834 mismatched_kind_property_fts_rows: i64_to_usize(mismatched_kind_property_fts_rows),
835 duplicate_property_fts_rows: i64_to_usize(duplicate_property_fts_rows),
836 drifted_property_fts_rows: i64_to_usize(drifted_property_fts_rows),
837 dangling_edges: i64_to_usize(dangling_edges),
838 orphaned_supersession_chains: i64_to_usize(orphaned_supersession_chains),
839 stale_vec_rows: i64_to_usize(stale_vec_rows),
840 vec_rows_for_superseded_nodes: i64_to_usize(vec_rows_for_superseded_nodes),
841 missing_operational_current_rows: i64_to_usize(missing_operational_current_rows),
842 stale_operational_current_rows: i64_to_usize(stale_operational_current_rows),
843 disabled_collection_mutations: i64_to_usize(disabled_collection_mutations),
844 orphaned_last_access_metadata_rows: i64_to_usize(orphaned_last_access_metadata_rows),
845 warnings,
846 })
847 }
848
849 pub fn register_operational_collection(
852 &self,
853 request: &OperationalRegisterRequest,
854 ) -> Result<OperationalCollectionRecord, EngineError> {
855 if request.name.trim().is_empty() {
856 return Err(EngineError::InvalidWrite(
857 "operational collection name must not be empty".to_owned(),
858 ));
859 }
860 if request.schema_json.is_empty() {
861 return Err(EngineError::InvalidWrite(
862 "operational collection schema_json must not be empty".to_owned(),
863 ));
864 }
865 if request.retention_json.is_empty() {
866 return Err(EngineError::InvalidWrite(
867 "operational collection retention_json must not be empty".to_owned(),
868 ));
869 }
870 if request.filter_fields_json.is_empty() {
871 return Err(EngineError::InvalidWrite(
872 "operational collection filter_fields_json must not be empty".to_owned(),
873 ));
874 }
875 parse_operational_validation_contract(&request.validation_json)
876 .map_err(EngineError::InvalidWrite)?;
877 parse_operational_secondary_indexes_json(&request.secondary_indexes_json, request.kind)
878 .map_err(EngineError::InvalidWrite)?;
879 if request.format_version <= 0 {
880 return Err(EngineError::InvalidWrite(
881 "operational collection format_version must be positive".to_owned(),
882 ));
883 }
884 parse_operational_filter_fields(&request.filter_fields_json)
885 .map_err(EngineError::InvalidWrite)?;
886
887 let mut conn = self.connect()?;
888 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
889 tx.execute(
890 "INSERT INTO operational_collections \
891 (name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at) \
892 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, unixepoch())",
893 rusqlite::params![
894 request.name.as_str(),
895 request.kind.as_str(),
896 request.schema_json.as_str(),
897 request.retention_json.as_str(),
898 request.filter_fields_json.as_str(),
899 request.validation_json.as_str(),
900 request.secondary_indexes_json.as_str(),
901 request.format_version,
902 ],
903 )?;
904 persist_simple_provenance_event(
905 &tx,
906 "operational_collection_registered",
907 request.name.as_str(),
908 Some(serde_json::json!({
909 "kind": request.kind.as_str(),
910 "format_version": request.format_version,
911 })),
912 )?;
913 tx.commit()?;
914
915 self.describe_operational_collection(&request.name)?
916 .ok_or_else(|| {
917 EngineError::Bridge("registered collection missing after commit".to_owned())
918 })
919 }
920
921 pub fn describe_operational_collection(
924 &self,
925 name: &str,
926 ) -> Result<Option<OperationalCollectionRecord>, EngineError> {
927 let conn = self.connect()?;
928 load_operational_collection_record(&conn, name)
929 }
930
931 pub fn update_operational_collection_filters(
935 &self,
936 name: &str,
937 filter_fields_json: &str,
938 ) -> Result<OperationalCollectionRecord, EngineError> {
939 if filter_fields_json.is_empty() {
940 return Err(EngineError::InvalidWrite(
941 "operational collection filter_fields_json must not be empty".to_owned(),
942 ));
943 }
944 let declared_fields = parse_operational_filter_fields(filter_fields_json)
945 .map_err(EngineError::InvalidWrite)?;
946
947 let mut conn = self.connect()?;
948 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
949 load_operational_collection_record(&tx, name)?.ok_or_else(|| {
950 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
951 })?;
952 tx.execute(
953 "UPDATE operational_collections SET filter_fields_json = ?2 WHERE name = ?1",
954 rusqlite::params![name, filter_fields_json],
955 )?;
956 tx.execute(
957 "DELETE FROM operational_filter_values WHERE collection_name = ?1",
958 [name],
959 )?;
960
961 let mut mutation_stmt = tx.prepare(
962 "SELECT id, payload_json FROM operational_mutations \
963 WHERE collection_name = ?1 ORDER BY mutation_order",
964 )?;
965 let mutations = mutation_stmt
966 .query_map([name], |row| {
967 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
968 })?
969 .collect::<Result<Vec<_>, _>>()?;
970 drop(mutation_stmt);
971
972 let mut insert_filter_value = tx.prepare_cached(
973 "INSERT INTO operational_filter_values \
974 (mutation_id, collection_name, field_name, string_value, integer_value) \
975 VALUES (?1, ?2, ?3, ?4, ?5)",
976 )?;
977 let mut inserted_values = 0usize;
978 for (mutation_id, payload_json) in &mutations {
979 for filter_value in
980 extract_operational_filter_values(&declared_fields, payload_json.as_str())
981 {
982 insert_filter_value.execute(rusqlite::params![
983 mutation_id,
984 name,
985 filter_value.field_name,
986 filter_value.string_value,
987 filter_value.integer_value,
988 ])?;
989 inserted_values += 1;
990 }
991 }
992 drop(insert_filter_value);
993
994 persist_simple_provenance_event(
995 &tx,
996 "operational_collection_filter_fields_updated",
997 name,
998 Some(serde_json::json!({
999 "field_count": declared_fields.len(),
1000 "mutations_backfilled": mutations.len(),
1001 "inserted_filter_values": inserted_values,
1002 })),
1003 )?;
1004 let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1005 EngineError::Bridge("operational collection missing after filter update".to_owned())
1006 })?;
1007 tx.commit()?;
1008 Ok(updated)
1009 }
1010
1011 pub fn update_operational_collection_validation(
1014 &self,
1015 name: &str,
1016 validation_json: &str,
1017 ) -> Result<OperationalCollectionRecord, EngineError> {
1018 parse_operational_validation_contract(validation_json)
1019 .map_err(EngineError::InvalidWrite)?;
1020
1021 let mut conn = self.connect()?;
1022 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1023 load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1024 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1025 })?;
1026 tx.execute(
1027 "UPDATE operational_collections SET validation_json = ?2 WHERE name = ?1",
1028 rusqlite::params![name, validation_json],
1029 )?;
1030 persist_simple_provenance_event(
1031 &tx,
1032 "operational_collection_validation_updated",
1033 name,
1034 Some(serde_json::json!({
1035 "has_validation": !validation_json.is_empty(),
1036 })),
1037 )?;
1038 let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1039 EngineError::Bridge("operational collection missing after validation update".to_owned())
1040 })?;
1041 tx.commit()?;
1042 Ok(updated)
1043 }
1044
1045 pub fn update_operational_collection_secondary_indexes(
1049 &self,
1050 name: &str,
1051 secondary_indexes_json: &str,
1052 ) -> Result<OperationalCollectionRecord, EngineError> {
1053 let mut conn = self.connect()?;
1054 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1055 let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1056 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1057 })?;
1058 let indexes = parse_operational_secondary_indexes_json(secondary_indexes_json, record.kind)
1059 .map_err(EngineError::InvalidWrite)?;
1060 tx.execute(
1061 "UPDATE operational_collections SET secondary_indexes_json = ?2 WHERE name = ?1",
1062 rusqlite::params![name, secondary_indexes_json],
1063 )?;
1064 let (mutation_entries_rebuilt, current_entries_rebuilt) =
1065 rebuild_operational_secondary_index_entries(&tx, &record.name, record.kind, &indexes)?;
1066 persist_simple_provenance_event(
1067 &tx,
1068 "operational_collection_secondary_indexes_updated",
1069 name,
1070 Some(serde_json::json!({
1071 "index_count": indexes.len(),
1072 "mutation_entries_rebuilt": mutation_entries_rebuilt,
1073 "current_entries_rebuilt": current_entries_rebuilt,
1074 })),
1075 )?;
1076 let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1077 EngineError::Bridge(
1078 "operational collection missing after secondary index update".to_owned(),
1079 )
1080 })?;
1081 tx.commit()?;
1082 Ok(updated)
1083 }
1084
1085 pub fn rebuild_operational_secondary_indexes(
1088 &self,
1089 name: &str,
1090 ) -> Result<OperationalSecondaryIndexRebuildReport, EngineError> {
1091 let mut conn = self.connect()?;
1092 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1093 let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1094 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1095 })?;
1096 let indexes =
1097 parse_operational_secondary_indexes_json(&record.secondary_indexes_json, record.kind)
1098 .map_err(EngineError::InvalidWrite)?;
1099 let (mutation_entries_rebuilt, current_entries_rebuilt) =
1100 rebuild_operational_secondary_index_entries(&tx, &record.name, record.kind, &indexes)?;
1101 persist_simple_provenance_event(
1102 &tx,
1103 "operational_secondary_indexes_rebuilt",
1104 name,
1105 Some(serde_json::json!({
1106 "index_count": indexes.len(),
1107 "mutation_entries_rebuilt": mutation_entries_rebuilt,
1108 "current_entries_rebuilt": current_entries_rebuilt,
1109 })),
1110 )?;
1111 tx.commit()?;
1112 Ok(OperationalSecondaryIndexRebuildReport {
1113 collection_name: name.to_owned(),
1114 mutation_entries_rebuilt,
1115 current_entries_rebuilt,
1116 })
1117 }
1118
1119 pub fn validate_operational_collection_history(
1122 &self,
1123 name: &str,
1124 ) -> Result<OperationalHistoryValidationReport, EngineError> {
1125 let conn = self.connect()?;
1126 let record = load_operational_collection_record(&conn, name)?.ok_or_else(|| {
1127 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1128 })?;
1129 let Some(contract) = parse_operational_validation_contract(&record.validation_json)
1130 .map_err(EngineError::InvalidWrite)?
1131 else {
1132 return Err(EngineError::InvalidWrite(format!(
1133 "operational collection '{name}' has no validation_json configured"
1134 )));
1135 };
1136
1137 let mut stmt = conn.prepare(
1138 "SELECT id, record_key, op_kind, payload_json FROM operational_mutations \
1139 WHERE collection_name = ?1 ORDER BY mutation_order",
1140 )?;
1141 let rows = stmt
1142 .query_map([name], |row| {
1143 Ok((
1144 row.get::<_, String>(0)?,
1145 row.get::<_, String>(1)?,
1146 row.get::<_, String>(2)?,
1147 row.get::<_, String>(3)?,
1148 ))
1149 })?
1150 .collect::<Result<Vec<_>, _>>()?;
1151 drop(stmt);
1152
1153 let mut checked_rows = 0usize;
1154 let mut issues = Vec::new();
1155 for (mutation_id, record_key, op_kind, payload_json) in rows {
1156 if op_kind == "delete" {
1157 continue;
1158 }
1159 checked_rows += 1;
1160 if let Err(message) =
1161 validate_operational_payload_against_contract(&contract, payload_json.as_str())
1162 {
1163 issues.push(OperationalHistoryValidationIssue {
1164 mutation_id,
1165 record_key,
1166 op_kind,
1167 message,
1168 });
1169 }
1170 }
1171
1172 Ok(OperationalHistoryValidationReport {
1173 collection_name: name.to_owned(),
1174 checked_rows,
1175 invalid_row_count: issues.len(),
1176 issues,
1177 })
1178 }
1179
1180 pub fn disable_operational_collection(
1183 &self,
1184 name: &str,
1185 ) -> Result<OperationalCollectionRecord, EngineError> {
1186 let mut conn = self.connect()?;
1187 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1188 let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1189 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1190 })?;
1191 let changed = if record.disabled_at.is_none() {
1192 tx.execute(
1193 "UPDATE operational_collections SET disabled_at = unixepoch() WHERE name = ?1",
1194 [name],
1195 )?;
1196 true
1197 } else {
1198 false
1199 };
1200 let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1201 EngineError::Bridge("operational collection missing after disable".to_owned())
1202 })?;
1203 persist_simple_provenance_event(
1204 &tx,
1205 "operational_collection_disabled",
1206 name,
1207 Some(serde_json::json!({
1208 "disabled_at": record.disabled_at,
1209 "changed": changed,
1210 })),
1211 )?;
1212 tx.commit()?;
1213 Ok(record)
1214 }
1215
1216 pub fn compact_operational_collection(
1219 &self,
1220 name: &str,
1221 dry_run: bool,
1222 ) -> Result<OperationalCompactionReport, EngineError> {
1223 let mut conn = self.connect()?;
1224 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1225 let collection = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1226 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1227 })?;
1228 validate_append_only_operational_collection(&collection, "compact")?;
1229 let (mutation_ids, before_timestamp) =
1230 operational_compaction_candidates(&tx, &collection.retention_json, name)?;
1231 if dry_run {
1232 drop(tx);
1233 return Ok(OperationalCompactionReport {
1234 collection_name: name.to_owned(),
1235 deleted_mutations: mutation_ids.len(),
1236 dry_run: true,
1237 before_timestamp,
1238 });
1239 }
1240 let mut delete_stmt =
1241 tx.prepare_cached("DELETE FROM operational_mutations WHERE id = ?1")?;
1242 for mutation_id in &mutation_ids {
1243 delete_stmt.execute([mutation_id.as_str()])?;
1244 }
1245 drop(delete_stmt);
1246 persist_simple_provenance_event(
1247 &tx,
1248 "operational_collection_compacted",
1249 name,
1250 Some(serde_json::json!({
1251 "deleted_mutations": mutation_ids.len(),
1252 "before_timestamp": before_timestamp,
1253 })),
1254 )?;
1255 tx.commit()?;
1256 Ok(OperationalCompactionReport {
1257 collection_name: name.to_owned(),
1258 deleted_mutations: mutation_ids.len(),
1259 dry_run: false,
1260 before_timestamp,
1261 })
1262 }
1263
1264 pub fn purge_operational_collection(
1267 &self,
1268 name: &str,
1269 before_timestamp: i64,
1270 ) -> Result<OperationalPurgeReport, EngineError> {
1271 let mut conn = self.connect()?;
1272 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1273 let collection = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1274 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1275 })?;
1276 validate_append_only_operational_collection(&collection, "purge")?;
1277 let deleted_mutations = tx.execute(
1278 "DELETE FROM operational_mutations WHERE collection_name = ?1 AND created_at < ?2",
1279 rusqlite::params![name, before_timestamp],
1280 )?;
1281 persist_simple_provenance_event(
1282 &tx,
1283 "operational_collection_purged",
1284 name,
1285 Some(serde_json::json!({
1286 "deleted_mutations": deleted_mutations,
1287 "before_timestamp": before_timestamp,
1288 })),
1289 )?;
1290 tx.commit()?;
1291 Ok(OperationalPurgeReport {
1292 collection_name: name.to_owned(),
1293 deleted_mutations,
1294 before_timestamp,
1295 })
1296 }
1297
1298 pub fn plan_operational_retention(
1301 &self,
1302 now_timestamp: i64,
1303 collection_names: Option<&[String]>,
1304 max_collections: Option<usize>,
1305 ) -> Result<OperationalRetentionPlanReport, EngineError> {
1306 let conn = self.connect()?;
1307 let records = load_operational_retention_records(&conn, collection_names, max_collections)?;
1308 let mut items = Vec::with_capacity(records.len());
1309 for record in records {
1310 items.push(plan_operational_retention_item(
1311 &conn,
1312 &record,
1313 now_timestamp,
1314 )?);
1315 }
1316 Ok(OperationalRetentionPlanReport {
1317 planned_at: now_timestamp,
1318 collections_examined: items.len(),
1319 items,
1320 })
1321 }
1322
1323 pub fn run_operational_retention(
1326 &self,
1327 now_timestamp: i64,
1328 collection_names: Option<&[String]>,
1329 max_collections: Option<usize>,
1330 dry_run: bool,
1331 ) -> Result<OperationalRetentionRunReport, EngineError> {
1332 let mut conn = self.connect()?;
1333 let records = load_operational_retention_records(&conn, collection_names, max_collections)?;
1334 let mut items = Vec::with_capacity(records.len());
1335 let mut collections_acted_on = 0usize;
1336
1337 for record in records {
1338 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1339 let item = run_operational_retention_item(&tx, &record, now_timestamp, dry_run)?;
1340 if item.deleted_mutations > 0 {
1341 collections_acted_on += 1;
1342 }
1343 if dry_run || item.action_kind == OperationalRetentionActionKind::Noop {
1344 drop(tx);
1345 } else {
1346 tx.commit()?;
1347 }
1348 items.push(item);
1349 }
1350
1351 Ok(OperationalRetentionRunReport {
1352 executed_at: now_timestamp,
1353 collections_examined: items.len(),
1354 collections_acted_on,
1355 dry_run,
1356 items,
1357 })
1358 }
1359
1360 pub fn trace_operational_collection(
1363 &self,
1364 collection_name: &str,
1365 record_key: Option<&str>,
1366 ) -> Result<OperationalTraceReport, EngineError> {
1367 let conn = self.connect()?;
1368 ensure_operational_collection_registered(&conn, collection_name)?;
1369 let mutations = if let Some(record_key) = record_key {
1370 let mut stmt = conn.prepare(
1371 "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
1372 FROM operational_mutations \
1373 WHERE collection_name = ?1 AND record_key = ?2 \
1374 ORDER BY mutation_order",
1375 )?;
1376 stmt.query_map([collection_name, record_key], map_operational_mutation_row)?
1377 .collect::<Result<Vec<_>, _>>()?
1378 } else {
1379 let mut stmt = conn.prepare(
1380 "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
1381 FROM operational_mutations \
1382 WHERE collection_name = ?1 \
1383 ORDER BY mutation_order",
1384 )?;
1385 stmt.query_map([collection_name], map_operational_mutation_row)?
1386 .collect::<Result<Vec<_>, _>>()?
1387 };
1388 let current_rows = if let Some(record_key) = record_key {
1389 let mut stmt = conn.prepare(
1390 "SELECT collection_name, record_key, payload_json, updated_at, last_mutation_id \
1391 FROM operational_current \
1392 WHERE collection_name = ?1 AND record_key = ?2 \
1393 ORDER BY updated_at, record_key",
1394 )?;
1395 stmt.query_map([collection_name, record_key], map_operational_current_row)?
1396 .collect::<Result<Vec<_>, _>>()?
1397 } else {
1398 let mut stmt = conn.prepare(
1399 "SELECT collection_name, record_key, payload_json, updated_at, last_mutation_id \
1400 FROM operational_current \
1401 WHERE collection_name = ?1 \
1402 ORDER BY updated_at, record_key",
1403 )?;
1404 stmt.query_map([collection_name], map_operational_current_row)?
1405 .collect::<Result<Vec<_>, _>>()?
1406 };
1407
1408 Ok(OperationalTraceReport {
1409 collection_name: collection_name.to_owned(),
1410 record_key: record_key.map(str::to_owned),
1411 mutation_count: mutations.len(),
1412 current_count: current_rows.len(),
1413 mutations,
1414 current_rows,
1415 })
1416 }
1417
1418 pub fn read_operational_collection(
1421 &self,
1422 request: &OperationalReadRequest,
1423 ) -> Result<OperationalReadReport, EngineError> {
1424 if request.collection_name.trim().is_empty() {
1425 return Err(EngineError::InvalidWrite(
1426 "operational read collection_name must not be empty".to_owned(),
1427 ));
1428 }
1429 if request.filters.is_empty() {
1430 return Err(EngineError::InvalidWrite(
1431 "operational read requires at least one filter clause".to_owned(),
1432 ));
1433 }
1434
1435 let conn = self.connect()?;
1436 let record = load_operational_collection_record(&conn, &request.collection_name)?
1437 .ok_or_else(|| {
1438 EngineError::InvalidWrite(format!(
1439 "operational collection '{}' is not registered",
1440 request.collection_name
1441 ))
1442 })?;
1443 validate_append_only_operational_collection(&record, "read")?;
1444 let declared_fields = parse_operational_filter_fields(&record.filter_fields_json)
1445 .map_err(EngineError::InvalidWrite)?;
1446 let secondary_indexes =
1447 parse_operational_secondary_indexes_json(&record.secondary_indexes_json, record.kind)
1448 .map_err(EngineError::InvalidWrite)?;
1449 let applied_limit = operational_read_limit(request.limit)?;
1450 let filters = compile_operational_read_filters(&request.filters, &declared_fields)?;
1451 if let Some(report) = execute_operational_secondary_index_read(
1452 &conn,
1453 &request.collection_name,
1454 &filters,
1455 &secondary_indexes,
1456 applied_limit,
1457 )? {
1458 return Ok(report);
1459 }
1460 execute_operational_filtered_read(&conn, &request.collection_name, &filters, applied_limit)
1461 }
1462
1463 pub fn rebuild_operational_current(
1466 &self,
1467 collection_name: Option<&str>,
1468 ) -> Result<OperationalRepairReport, EngineError> {
1469 let mut conn = self.connect()?;
1470 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1471 let collections = if let Some(name) = collection_name {
1472 let maybe_kind: Option<String> = tx
1473 .query_row(
1474 "SELECT kind FROM operational_collections WHERE name = ?1",
1475 [name],
1476 |row| row.get(0),
1477 )
1478 .optional()?;
1479 let Some(kind) = maybe_kind else {
1480 return Err(EngineError::InvalidWrite(format!(
1481 "operational collection '{name}' is not registered"
1482 )));
1483 };
1484 if kind != OperationalCollectionKind::LatestState.as_str() {
1485 return Err(EngineError::InvalidWrite(format!(
1486 "operational collection '{name}' is not latest_state"
1487 )));
1488 }
1489 vec![name.to_owned()]
1490 } else {
1491 let mut stmt = tx.prepare(
1492 "SELECT name FROM operational_collections WHERE kind = 'latest_state' ORDER BY name",
1493 )?;
1494 stmt.query_map([], |row| row.get::<_, String>(0))?
1495 .collect::<Result<Vec<_>, _>>()?
1496 };
1497
1498 let rebuilt_rows = rebuild_operational_current_rows(&tx, &collections)?;
1499 for collection in &collections {
1500 let record = load_operational_collection_record(&tx, collection)?.ok_or_else(|| {
1501 EngineError::Bridge(format!(
1502 "operational collection '{collection}' missing during current rebuild"
1503 ))
1504 })?;
1505 let indexes = parse_operational_secondary_indexes_json(
1506 &record.secondary_indexes_json,
1507 record.kind,
1508 )
1509 .map_err(EngineError::InvalidWrite)?;
1510 if !indexes.is_empty() {
1511 rebuild_operational_secondary_index_entries(
1512 &tx,
1513 &record.name,
1514 record.kind,
1515 &indexes,
1516 )?;
1517 }
1518 }
1519
1520 persist_simple_provenance_event(
1521 &tx,
1522 "operational_current_rebuilt",
1523 collection_name.unwrap_or("*"),
1524 Some(serde_json::json!({
1525 "collections_rebuilt": collections.len(),
1526 "current_rows_rebuilt": rebuilt_rows,
1527 })),
1528 )?;
1529 tx.commit()?;
1530
1531 Ok(OperationalRepairReport {
1532 collections_rebuilt: collections.len(),
1533 current_rows_rebuilt: rebuilt_rows,
1534 })
1535 }
1536
1537 pub fn rebuild_projections(
1540 &self,
1541 target: ProjectionTarget,
1542 ) -> Result<ProjectionRepairReport, EngineError> {
1543 self.projections.rebuild_projections(target)
1544 }
1545
1546 pub fn rebuild_missing_projections(&self) -> Result<ProjectionRepairReport, EngineError> {
1549 self.projections.rebuild_missing_projections()
1550 }
1551
1552 pub fn register_fts_property_schema(
1561 &self,
1562 kind: &str,
1563 property_paths: &[String],
1564 separator: Option<&str>,
1565 ) -> Result<FtsPropertySchemaRecord, EngineError> {
1566 let specs: Vec<FtsPropertyPathSpec> = property_paths
1567 .iter()
1568 .map(|p| FtsPropertyPathSpec::scalar(p.clone()))
1569 .collect();
1570 self.register_fts_property_schema_with_entries(
1571 kind,
1572 &specs,
1573 separator,
1574 &[],
1575 RebuildMode::Eager,
1576 )
1577 }
1578
1579 pub fn register_fts_property_schema_with_entries(
1595 &self,
1596 kind: &str,
1597 entries: &[FtsPropertyPathSpec],
1598 separator: Option<&str>,
1599 exclude_paths: &[String],
1600 mode: RebuildMode,
1601 ) -> Result<FtsPropertySchemaRecord, EngineError> {
1602 let paths: Vec<String> = entries.iter().map(|e| e.path.clone()).collect();
1603 validate_fts_property_paths(&paths)?;
1604 for p in exclude_paths {
1605 if !p.starts_with("$.") {
1606 return Err(EngineError::InvalidWrite(format!(
1607 "exclude_paths entries must start with '$.' but got: {p}"
1608 )));
1609 }
1610 }
1611 let separator = separator.unwrap_or(" ");
1612 let paths_json = serialize_property_paths_json(entries, exclude_paths)?;
1613
1614 match mode {
1615 RebuildMode::Eager => self.register_fts_property_schema_eager(
1616 kind,
1617 entries,
1618 separator,
1619 exclude_paths,
1620 &paths,
1621 &paths_json,
1622 ),
1623 RebuildMode::Async => {
1624 self.register_fts_property_schema_async(kind, separator, &paths, &paths_json)
1625 }
1626 }
1627 }
1628
1629 fn register_fts_property_schema_eager(
1631 &self,
1632 kind: &str,
1633 entries: &[FtsPropertyPathSpec],
1634 separator: &str,
1635 exclude_paths: &[String],
1636 paths: &[String],
1637 paths_json: &str,
1638 ) -> Result<FtsPropertySchemaRecord, EngineError> {
1639 let mut conn = self.connect()?;
1640 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1641
1642 let previous_row: Option<(String, String)> = tx
1648 .query_row(
1649 "SELECT property_paths_json, separator FROM fts_property_schemas WHERE kind = ?1",
1650 [kind],
1651 |row| {
1652 let json: String = row.get(0)?;
1653 let sep: String = row.get(1)?;
1654 Ok((json, sep))
1655 },
1656 )
1657 .optional()?;
1658 let had_previous_schema = previous_row.is_some();
1659 let previous_recursive_paths: Vec<String> = previous_row
1660 .map(|(json, sep)| crate::writer::parse_property_schema_json(&json, &sep))
1661 .map_or(Vec::new(), |schema| {
1662 schema
1663 .paths
1664 .into_iter()
1665 .filter(|p| p.mode == crate::writer::PropertyPathMode::Recursive)
1666 .map(|p| p.path)
1667 .collect()
1668 });
1669 let new_recursive_paths: Vec<&str> = entries
1670 .iter()
1671 .filter(|e| e.mode == FtsPropertyPathMode::Recursive)
1672 .map(|e| e.path.as_str())
1673 .collect();
1674 let introduces_new_recursive = new_recursive_paths
1675 .iter()
1676 .any(|p| !previous_recursive_paths.iter().any(|prev| prev == p));
1677
1678 tx.execute(
1679 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
1680 VALUES (?1, ?2, ?3) \
1681 ON CONFLICT(kind) DO UPDATE SET property_paths_json = ?2, separator = ?3",
1682 rusqlite::params![kind, paths_json, separator],
1683 )?;
1684
1685 let needs_rebuild = introduces_new_recursive || had_previous_schema;
1693 if needs_rebuild {
1694 tx.execute("DELETE FROM fts_node_properties WHERE kind = ?1", [kind])?;
1695 tx.execute(
1696 "DELETE FROM fts_node_property_positions WHERE kind = ?1",
1697 [kind],
1698 )?;
1699 crate::projection::insert_property_fts_rows_for_kind(&tx, kind)?;
1704 }
1705
1706 persist_simple_provenance_event(
1707 &tx,
1708 "fts_property_schema_registered",
1709 kind,
1710 Some(serde_json::json!({
1711 "property_paths": paths,
1712 "separator": separator,
1713 "exclude_paths": exclude_paths,
1714 "eager_rebuild": needs_rebuild,
1715 })),
1716 )?;
1717 tx.commit()?;
1718
1719 self.describe_fts_property_schema(kind)?.ok_or_else(|| {
1720 EngineError::Bridge("registered FTS property schema missing after commit".to_owned())
1721 })
1722 }
1723
1724 fn register_fts_property_schema_async(
1726 &self,
1727 kind: &str,
1728 separator: &str,
1729 paths: &[String],
1730 paths_json: &str,
1731 ) -> Result<FtsPropertySchemaRecord, EngineError> {
1732 let mut conn = self.connect()?;
1733 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1734
1735 let had_previous_schema: bool = tx
1737 .query_row(
1738 "SELECT count(*) FROM fts_property_schemas WHERE kind = ?1",
1739 rusqlite::params![kind],
1740 |r| r.get::<_, i64>(0),
1741 )
1742 .unwrap_or(0)
1743 > 0;
1744
1745 tx.execute(
1747 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
1748 VALUES (?1, ?2, ?3) \
1749 ON CONFLICT(kind) DO UPDATE SET property_paths_json = ?2, separator = ?3",
1750 rusqlite::params![kind, paths_json, separator],
1751 )?;
1752
1753 let schema_id: i64 = tx.query_row(
1755 "SELECT rowid FROM fts_property_schemas WHERE kind = ?1",
1756 rusqlite::params![kind],
1757 |r| r.get(0),
1758 )?;
1759
1760 let now_ms = crate::rebuild_actor::now_unix_ms_pub();
1761 let is_first = i64::from(!had_previous_schema);
1762
1763 tx.execute(
1765 "INSERT INTO fts_property_rebuild_state \
1766 (kind, schema_id, state, rows_done, started_at, is_first_registration) \
1767 VALUES (?1, ?2, 'PENDING', 0, ?3, ?4) \
1768 ON CONFLICT(kind) DO UPDATE SET \
1769 schema_id = excluded.schema_id, \
1770 state = 'PENDING', \
1771 rows_total = NULL, \
1772 rows_done = 0, \
1773 started_at = excluded.started_at, \
1774 last_progress_at = NULL, \
1775 error_message = NULL, \
1776 is_first_registration = excluded.is_first_registration",
1777 rusqlite::params![kind, schema_id, now_ms, is_first],
1778 )?;
1779
1780 persist_simple_provenance_event(
1781 &tx,
1782 "fts_property_schema_registered",
1783 kind,
1784 Some(serde_json::json!({
1785 "property_paths": paths,
1786 "separator": separator,
1787 "mode": "async",
1788 })),
1789 )?;
1790 tx.commit()?;
1791
1792 if let Some(sender) = &self.rebuild_sender
1798 && sender
1799 .try_send(RebuildRequest {
1800 kind: kind.to_owned(),
1801 schema_id,
1802 })
1803 .is_err()
1804 {
1805 trace_warn!(
1806 kind = %kind,
1807 "rebuild channel full; rebuild request dropped — state remains PENDING"
1808 );
1809 }
1810
1811 self.describe_fts_property_schema(kind)?.ok_or_else(|| {
1812 EngineError::Bridge("registered FTS property schema missing after commit".to_owned())
1813 })
1814 }
1815
1816 pub fn get_property_fts_rebuild_state(
1821 &self,
1822 kind: &str,
1823 ) -> Result<Option<RebuildStateRow>, EngineError> {
1824 let conn = self.connect()?;
1825 let row = conn
1826 .query_row(
1827 "SELECT kind, schema_id, state, rows_total, rows_done, \
1828 started_at, is_first_registration, error_message \
1829 FROM fts_property_rebuild_state WHERE kind = ?1",
1830 rusqlite::params![kind],
1831 |r| {
1832 Ok(RebuildStateRow {
1833 kind: r.get(0)?,
1834 schema_id: r.get(1)?,
1835 state: r.get(2)?,
1836 rows_total: r.get(3)?,
1837 rows_done: r.get(4)?,
1838 started_at: r.get(5)?,
1839 is_first_registration: r.get::<_, i64>(6)? != 0,
1840 error_message: r.get(7)?,
1841 })
1842 },
1843 )
1844 .optional()?;
1845 Ok(row)
1846 }
1847
1848 pub fn count_staging_rows(&self, kind: &str) -> Result<i64, EngineError> {
1854 let conn = self.connect()?;
1855 let count: i64 = conn.query_row(
1856 "SELECT count(*) FROM fts_property_rebuild_staging WHERE kind = ?1",
1857 rusqlite::params![kind],
1858 |r| r.get(0),
1859 )?;
1860 Ok(count)
1861 }
1862
1863 pub fn staging_row_exists(
1869 &self,
1870 kind: &str,
1871 node_logical_id: &str,
1872 ) -> Result<bool, EngineError> {
1873 let conn = self.connect()?;
1874 let count: i64 = conn.query_row(
1875 "SELECT count(*) FROM fts_property_rebuild_staging WHERE kind = ?1 AND node_logical_id = ?2",
1876 rusqlite::params![kind, node_logical_id],
1877 |r| r.get(0),
1878 )?;
1879 Ok(count > 0)
1880 }
1881
1882 pub fn describe_fts_property_schema(
1887 &self,
1888 kind: &str,
1889 ) -> Result<Option<FtsPropertySchemaRecord>, EngineError> {
1890 let conn = self.connect()?;
1891 load_fts_property_schema_record(&conn, kind)
1892 }
1893
1894 pub fn list_fts_property_schemas(&self) -> Result<Vec<FtsPropertySchemaRecord>, EngineError> {
1899 let conn = self.connect()?;
1900 let mut stmt = conn.prepare(
1901 "SELECT kind, property_paths_json, separator, format_version \
1902 FROM fts_property_schemas ORDER BY kind",
1903 )?;
1904 let records = stmt
1905 .query_map([], |row| {
1906 let kind: String = row.get(0)?;
1907 let paths_json: String = row.get(1)?;
1908 let separator: String = row.get(2)?;
1909 let format_version: i64 = row.get(3)?;
1910 Ok(build_fts_property_schema_record(
1911 kind,
1912 &paths_json,
1913 separator,
1914 format_version,
1915 ))
1916 })?
1917 .collect::<Result<Vec<_>, _>>()?;
1918 Ok(records)
1919 }
1920
1921 pub fn remove_fts_property_schema(&self, kind: &str) -> Result<(), EngineError> {
1929 let mut conn = self.connect()?;
1930 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1931 let deleted = tx.execute("DELETE FROM fts_property_schemas WHERE kind = ?1", [kind])?;
1932 if deleted == 0 {
1933 return Err(EngineError::InvalidWrite(format!(
1934 "FTS property schema for kind '{kind}' is not registered"
1935 )));
1936 }
1937 persist_simple_provenance_event(&tx, "fts_property_schema_removed", kind, None)?;
1938 tx.commit()?;
1939 Ok(())
1940 }
1941
1942 pub fn restore_vector_profiles(&self) -> Result<ProjectionRepairReport, EngineError> {
1948 let conn = self.connect()?;
1949 let profiles: Vec<(String, String, i64)> = {
1950 let mut stmt = conn.prepare(
1951 "SELECT profile, table_name, dimension \
1952 FROM vector_profiles WHERE enabled = 1 ORDER BY profile",
1953 )?;
1954 stmt.query_map([], |row| {
1955 Ok((
1956 row.get::<_, String>(0)?,
1957 row.get::<_, String>(1)?,
1958 row.get::<_, i64>(2)?,
1959 ))
1960 })?
1961 .collect::<Result<Vec<_>, _>>()?
1962 };
1963
1964 for (profile, table_name, dimension) in &profiles {
1965 let dimension = usize::try_from(*dimension).map_err(|_| {
1966 EngineError::Bridge(format!("invalid vector profile dimension: {dimension}"))
1967 })?;
1968 self.schema_manager
1969 .ensure_vector_profile(&conn, profile, table_name, dimension)?;
1970 }
1971
1972 Ok(ProjectionRepairReport {
1973 targets: vec![ProjectionTarget::Vec],
1974 rebuilt_rows: profiles.len(),
1975 notes: vec![],
1976 })
1977 }
1978
1979 #[allow(clippy::too_many_lines)]
1995 pub fn regenerate_vector_embeddings(
1996 &self,
1997 embedder: &dyn QueryEmbedder,
1998 config: &VectorRegenerationConfig,
1999 ) -> Result<VectorRegenerationReport, EngineError> {
2000 let conn = self.connect()?;
2001 let identity = embedder.identity();
2002 let config = validate_vector_regeneration_config(&conn, config, &identity)
2003 .map_err(|failure| failure.to_engine_error())?;
2004 let chunks = collect_regeneration_chunks(&conn)?;
2005 let payload = build_regeneration_input(&config, &identity, chunks.clone());
2006 let snapshot_hash = compute_snapshot_hash(&payload)?;
2007 let audit_metadata = VectorRegenerationAuditMetadata {
2008 profile: config.profile.clone(),
2009 model_identity: identity.model_identity.clone(),
2010 model_version: identity.model_version.clone(),
2011 chunk_count: chunks.len(),
2012 snapshot_hash: snapshot_hash.clone(),
2013 failure_class: None,
2014 };
2015 persist_vector_regeneration_event(
2016 &conn,
2017 "vector_regeneration_requested",
2018 &config.profile,
2019 &audit_metadata,
2020 )?;
2021 let notes = vec!["vector embeddings regenerated via configured embedder".to_owned()];
2022
2023 let mut embedding_map: std::collections::HashMap<String, Vec<u8>> =
2024 std::collections::HashMap::with_capacity(chunks.len());
2025 for chunk in &chunks {
2026 let vector = match embedder.embed_query(&chunk.text_content) {
2027 Ok(vector) => vector,
2028 Err(error) => {
2029 let failure = VectorRegenerationFailure::new(
2030 VectorRegenerationFailureClass::EmbedderFailure,
2031 format!("embedder failed for chunk '{}': {error}", chunk.chunk_id),
2032 );
2033 self.persist_vector_regeneration_failure_best_effort(
2034 &config.profile,
2035 &audit_metadata,
2036 &failure,
2037 );
2038 return Err(failure.to_engine_error());
2039 }
2040 };
2041 if vector.len() != identity.dimension {
2042 let failure = VectorRegenerationFailure::new(
2043 VectorRegenerationFailureClass::InvalidEmbedderOutput,
2044 format!(
2045 "embedder produced {} values for chunk '{}', expected {}",
2046 vector.len(),
2047 chunk.chunk_id,
2048 identity.dimension
2049 ),
2050 );
2051 self.persist_vector_regeneration_failure_best_effort(
2052 &config.profile,
2053 &audit_metadata,
2054 &failure,
2055 );
2056 return Err(failure.to_engine_error());
2057 }
2058 if vector.iter().any(|value| !value.is_finite()) {
2059 let failure = VectorRegenerationFailure::new(
2060 VectorRegenerationFailureClass::InvalidEmbedderOutput,
2061 format!(
2062 "embedder returned non-finite values for chunk '{}'",
2063 chunk.chunk_id
2064 ),
2065 );
2066 self.persist_vector_regeneration_failure_best_effort(
2067 &config.profile,
2068 &audit_metadata,
2069 &failure,
2070 );
2071 return Err(failure.to_engine_error());
2072 }
2073 let bytes: Vec<u8> = vector
2074 .iter()
2075 .flat_map(|value| value.to_le_bytes())
2076 .collect();
2077 embedding_map.insert(chunk.chunk_id.clone(), bytes);
2078 }
2079
2080 let mut conn = conn;
2081 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2082 match self.schema_manager.ensure_vector_profile(
2083 &tx,
2084 &config.profile,
2085 &config.table_name,
2086 identity.dimension,
2087 ) {
2088 Ok(()) => {}
2089 Err(SchemaError::MissingCapability(message)) => {
2090 let failure = VectorRegenerationFailure::new(
2091 VectorRegenerationFailureClass::UnsupportedVecCapability,
2092 message,
2093 );
2094 drop(tx);
2095 self.persist_vector_regeneration_failure_best_effort(
2096 &config.profile,
2097 &audit_metadata,
2098 &failure,
2099 );
2100 return Err(failure.to_engine_error());
2101 }
2102 Err(error) => return Err(EngineError::Schema(error)),
2103 }
2104 let apply_chunks = collect_regeneration_chunks(&tx)?;
2105 let apply_payload = build_regeneration_input(&config, &identity, apply_chunks.clone());
2106 let apply_hash = compute_snapshot_hash(&apply_payload)?;
2107 if apply_hash != snapshot_hash {
2108 let failure = VectorRegenerationFailure::new(
2109 VectorRegenerationFailureClass::SnapshotDrift,
2110 "chunk snapshot changed during generation; retry".to_owned(),
2111 );
2112 drop(tx);
2113 self.persist_vector_regeneration_failure_best_effort(
2114 &config.profile,
2115 &audit_metadata,
2116 &failure,
2117 );
2118 return Err(failure.to_engine_error());
2119 }
2120 persist_vector_contract(&tx, &config, &identity, &snapshot_hash)?;
2121 tx.execute("DELETE FROM vec_nodes_active", [])?;
2122 let mut stmt = tx
2123 .prepare_cached("INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES (?1, ?2)")?;
2124 let mut regenerated_rows = 0usize;
2125 for chunk in &apply_chunks {
2126 let Some(embedding) = embedding_map.remove(&chunk.chunk_id) else {
2127 drop(stmt);
2128 drop(tx);
2129 let failure = VectorRegenerationFailure::new(
2130 VectorRegenerationFailureClass::InvalidEmbedderOutput,
2131 format!(
2132 "embedder did not produce a vector for chunk '{}'",
2133 chunk.chunk_id
2134 ),
2135 );
2136 self.persist_vector_regeneration_failure_best_effort(
2137 &config.profile,
2138 &audit_metadata,
2139 &failure,
2140 );
2141 return Err(failure.to_engine_error());
2142 };
2143 stmt.execute(rusqlite::params![chunk.chunk_id.as_str(), embedding])?;
2144 regenerated_rows += 1;
2145 }
2146 drop(stmt);
2147 persist_vector_regeneration_event(
2148 &tx,
2149 "vector_regeneration_apply",
2150 &config.profile,
2151 &audit_metadata,
2152 )?;
2153 tx.commit()?;
2154
2155 Ok(VectorRegenerationReport {
2156 profile: config.profile.clone(),
2157 table_name: config.table_name.clone(),
2158 dimension: identity.dimension,
2159 total_chunks: chunks.len(),
2160 regenerated_rows,
2161 contract_persisted: true,
2162 notes,
2163 })
2164 }
2165
2166 fn persist_vector_regeneration_failure_best_effort(
2167 &self,
2168 profile: &str,
2169 metadata: &VectorRegenerationAuditMetadata,
2170 failure: &VectorRegenerationFailure,
2171 ) {
2172 let Ok(conn) = self.connect() else {
2173 return;
2174 };
2175 let failure_metadata = VectorRegenerationAuditMetadata {
2176 profile: metadata.profile.clone(),
2177 model_identity: metadata.model_identity.clone(),
2178 model_version: metadata.model_version.clone(),
2179 chunk_count: metadata.chunk_count,
2180 snapshot_hash: metadata.snapshot_hash.clone(),
2181 failure_class: Some(failure.failure_class_label().to_owned()),
2182 };
2183 let _ = persist_vector_regeneration_event(
2184 &conn,
2185 "vector_regeneration_failed",
2186 profile,
2187 &failure_metadata,
2188 );
2189 }
2190
2191 pub fn trace_source(&self, source_ref: &str) -> Result<TraceReport, EngineError> {
2194 let conn = self.connect()?;
2195
2196 let node_logical_ids = collect_strings(
2197 &conn,
2198 "SELECT logical_id FROM nodes WHERE source_ref = ?1 ORDER BY created_at",
2199 source_ref,
2200 )?;
2201 let action_ids = collect_strings(
2202 &conn,
2203 "SELECT id FROM actions WHERE source_ref = ?1 ORDER BY created_at",
2204 source_ref,
2205 )?;
2206 let operational_mutation_ids = collect_strings(
2207 &conn,
2208 "SELECT id FROM operational_mutations WHERE source_ref = ?1 ORDER BY mutation_order",
2209 source_ref,
2210 )?;
2211
2212 Ok(TraceReport {
2213 source_ref: source_ref.to_owned(),
2214 node_rows: count_source_ref(&conn, "nodes", source_ref)?,
2215 edge_rows: count_source_ref(&conn, "edges", source_ref)?,
2216 action_rows: count_source_ref(&conn, "actions", source_ref)?,
2217 operational_mutation_rows: count_source_ref(
2218 &conn,
2219 "operational_mutations",
2220 source_ref,
2221 )?,
2222 node_logical_ids,
2223 action_ids,
2224 operational_mutation_ids,
2225 })
2226 }
2227
2228 #[allow(clippy::too_many_lines)]
2232 pub fn restore_logical_id(
2233 &self,
2234 logical_id: &str,
2235 ) -> Result<LogicalRestoreReport, EngineError> {
2236 let mut conn = self.connect()?;
2237 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2238
2239 let active_count: i64 = tx.query_row(
2240 "SELECT count(*) FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
2241 [logical_id],
2242 |row| row.get(0),
2243 )?;
2244 if active_count > 0 {
2245 return Ok(LogicalRestoreReport {
2246 logical_id: logical_id.to_owned(),
2247 was_noop: true,
2248 restored_node_rows: 0,
2249 restored_edge_rows: 0,
2250 restored_chunk_rows: 0,
2251 restored_fts_rows: 0,
2252 restored_property_fts_rows: 0,
2253 restored_vec_rows: 0,
2254 skipped_edges: Vec::new(),
2255 notes: vec!["logical_id already active".to_owned()],
2256 });
2257 }
2258
2259 let restored_node: Option<(String, String)> = tx
2260 .query_row(
2261 "SELECT row_id, kind FROM nodes \
2262 WHERE logical_id = ?1 AND superseded_at IS NOT NULL \
2263 ORDER BY superseded_at DESC, created_at DESC, rowid DESC LIMIT 1",
2264 [logical_id],
2265 |row| Ok((row.get(0)?, row.get(1)?)),
2266 )
2267 .optional()?;
2268 let (restored_node_row_id, restored_kind) = restored_node.ok_or_else(|| {
2269 EngineError::InvalidWrite(format!("logical_id '{logical_id}' is not retired"))
2270 })?;
2271
2272 tx.execute(
2273 "UPDATE nodes SET superseded_at = NULL WHERE row_id = ?1",
2274 [restored_node_row_id.as_str()],
2275 )?;
2276
2277 let retire_scope: Option<(i64, Option<String>, i64)> = tx
2278 .query_row(
2279 "SELECT rowid, source_ref, created_at FROM provenance_events \
2280 WHERE event_type = 'node_retire' AND subject = ?1 \
2281 ORDER BY created_at DESC, rowid DESC LIMIT 1",
2282 [logical_id],
2283 |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
2284 )
2285 .optional()?;
2286 let (restored_edge_rows, skipped_edges) = if let Some((
2287 retire_event_rowid,
2288 retire_source_ref,
2289 retire_created_at,
2290 )) = retire_scope
2291 {
2292 restore_validated_edges(
2293 &tx,
2294 logical_id,
2295 retire_source_ref.as_deref(),
2296 retire_created_at,
2297 retire_event_rowid,
2298 )?
2299 } else {
2300 (0, Vec::new())
2301 };
2302
2303 let restored_chunk_rows: usize = tx
2304 .query_row(
2305 "SELECT count(*) FROM chunks WHERE node_logical_id = ?1",
2306 [logical_id],
2307 |row| row.get::<_, i64>(0),
2308 )
2309 .map(i64_to_usize)?;
2310 tx.execute(
2311 "DELETE FROM fts_nodes WHERE node_logical_id = ?1",
2312 [logical_id],
2313 )?;
2314 let restored_fts_rows = tx.execute(
2315 "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
2316 SELECT id, node_logical_id, ?2, text_content \
2317 FROM chunks WHERE node_logical_id = ?1",
2318 rusqlite::params![logical_id, restored_kind],
2319 )?;
2320 let restored_vec_rows = count_vec_rows_for_logical_id(&tx, logical_id)?;
2321
2322 tx.execute(
2324 "DELETE FROM fts_node_properties WHERE node_logical_id = ?1",
2325 [logical_id],
2326 )?;
2327 let restored_property_fts_rows =
2328 rebuild_single_node_property_fts(&tx, logical_id, &restored_kind)?;
2329
2330 persist_simple_provenance_event(
2331 &tx,
2332 "restore_logical_id",
2333 logical_id,
2334 Some(serde_json::json!({
2335 "restored_node_rows": 1,
2336 "restored_edge_rows": restored_edge_rows,
2337 "restored_chunk_rows": restored_chunk_rows,
2338 "restored_fts_rows": restored_fts_rows,
2339 "restored_property_fts_rows": restored_property_fts_rows,
2340 "restored_vec_rows": restored_vec_rows,
2341 })),
2342 )?;
2343 tx.commit()?;
2344
2345 Ok(LogicalRestoreReport {
2346 logical_id: logical_id.to_owned(),
2347 was_noop: false,
2348 restored_node_rows: 1,
2349 restored_edge_rows,
2350 restored_chunk_rows,
2351 restored_fts_rows,
2352 restored_property_fts_rows,
2353 restored_vec_rows,
2354 skipped_edges,
2355 notes: Vec::new(),
2356 })
2357 }
2358
2359 pub fn purge_logical_id(&self, logical_id: &str) -> Result<LogicalPurgeReport, EngineError> {
2363 let mut conn = self.connect()?;
2364 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2365
2366 let active_count: i64 = tx.query_row(
2367 "SELECT count(*) FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
2368 [logical_id],
2369 |row| row.get(0),
2370 )?;
2371 if active_count > 0 {
2372 return Ok(LogicalPurgeReport {
2373 logical_id: logical_id.to_owned(),
2374 was_noop: true,
2375 deleted_node_rows: 0,
2376 deleted_edge_rows: 0,
2377 deleted_chunk_rows: 0,
2378 deleted_fts_rows: 0,
2379 deleted_vec_rows: 0,
2380 notes: vec!["logical_id is active; purge skipped".to_owned()],
2381 });
2382 }
2383
2384 let node_rows: i64 = tx.query_row(
2385 "SELECT count(*) FROM nodes WHERE logical_id = ?1",
2386 [logical_id],
2387 |row| row.get(0),
2388 )?;
2389 if node_rows == 0 {
2390 return Err(EngineError::InvalidWrite(format!(
2391 "logical_id '{logical_id}' does not exist"
2392 )));
2393 }
2394
2395 let deleted_vec_rows = delete_vec_rows_for_logical_id(&tx, logical_id)?;
2396 let deleted_fts_rows = tx.execute(
2397 "DELETE FROM fts_nodes WHERE node_logical_id = ?1",
2398 [logical_id],
2399 )?;
2400 let deleted_edge_rows = tx.execute(
2401 "DELETE FROM edges WHERE source_logical_id = ?1 OR target_logical_id = ?1",
2402 [logical_id],
2403 )?;
2404 let deleted_chunk_rows = tx.execute(
2405 "DELETE FROM chunks WHERE node_logical_id = ?1",
2406 [logical_id],
2407 )?;
2408 let deleted_node_rows =
2409 tx.execute("DELETE FROM nodes WHERE logical_id = ?1", [logical_id])?;
2410 tx.execute(
2411 "DELETE FROM node_access_metadata WHERE logical_id = ?1",
2412 [logical_id],
2413 )?;
2414
2415 persist_simple_provenance_event(
2416 &tx,
2417 "purge_logical_id",
2418 logical_id,
2419 Some(serde_json::json!({
2420 "deleted_node_rows": deleted_node_rows,
2421 "deleted_edge_rows": deleted_edge_rows,
2422 "deleted_chunk_rows": deleted_chunk_rows,
2423 "deleted_fts_rows": deleted_fts_rows,
2424 "deleted_vec_rows": deleted_vec_rows,
2425 })),
2426 )?;
2427 tx.commit()?;
2428
2429 Ok(LogicalPurgeReport {
2430 logical_id: logical_id.to_owned(),
2431 was_noop: false,
2432 deleted_node_rows,
2433 deleted_edge_rows,
2434 deleted_chunk_rows,
2435 deleted_fts_rows,
2436 deleted_vec_rows,
2437 notes: Vec::new(),
2438 })
2439 }
2440
2441 pub fn purge_provenance_events(
2451 &self,
2452 before_timestamp: i64,
2453 options: &ProvenancePurgeOptions,
2454 ) -> Result<ProvenancePurgeReport, EngineError> {
2455 let mut conn = self.connect()?;
2456 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2457
2458 let preserved_types: Vec<&str> = if options.preserve_event_types.is_empty() {
2459 vec!["excise", "purge_logical_id"]
2460 } else {
2461 options
2462 .preserve_event_types
2463 .iter()
2464 .map(String::as_str)
2465 .collect()
2466 };
2467
2468 let placeholders: String = (0..preserved_types.len())
2470 .map(|i| format!("?{}", i + 2))
2471 .collect::<Vec<_>>()
2472 .join(", ");
2473 let count_query = format!(
2474 "SELECT count(*) FROM provenance_events \
2475 WHERE created_at < ?1 AND event_type NOT IN ({placeholders})"
2476 );
2477 let delete_query = format!(
2478 "DELETE FROM provenance_events WHERE rowid IN (\
2479 SELECT rowid FROM provenance_events \
2480 WHERE created_at < ?1 AND event_type NOT IN ({placeholders}) \
2481 LIMIT 10000)"
2482 );
2483
2484 let bind_params = |stmt: &mut rusqlite::Statement<'_>| -> Result<(), rusqlite::Error> {
2485 stmt.raw_bind_parameter(1, before_timestamp)?;
2486 for (i, event_type) in preserved_types.iter().enumerate() {
2487 stmt.raw_bind_parameter(i + 2, *event_type)?;
2488 }
2489 Ok(())
2490 };
2491
2492 let events_deleted = if options.dry_run {
2493 let mut stmt = tx.prepare(&count_query)?;
2494 bind_params(&mut stmt)?;
2495 stmt.raw_query()
2496 .next()?
2497 .map_or(0, |row| row.get::<_, u64>(0).unwrap_or(0))
2498 } else {
2499 let mut total_deleted: u64 = 0;
2500 loop {
2501 let mut stmt = tx.prepare(&delete_query)?;
2502 bind_params(&mut stmt)?;
2503 let deleted = stmt.raw_execute()?;
2504 if deleted == 0 {
2505 break;
2506 }
2507 total_deleted += deleted as u64;
2508 }
2509 total_deleted
2510 };
2511
2512 let total_after: u64 =
2513 tx.query_row("SELECT count(*) FROM provenance_events", [], |row| {
2514 row.get(0)
2515 })?;
2516
2517 let oldest_remaining: Option<i64> = tx
2518 .query_row("SELECT MIN(created_at) FROM provenance_events", [], |row| {
2519 row.get(0)
2520 })
2521 .optional()?
2522 .flatten();
2523
2524 if !options.dry_run {
2525 tx.commit()?;
2526 }
2527
2528 let events_preserved = if options.dry_run {
2531 total_after - events_deleted
2532 } else {
2533 total_after
2534 };
2535
2536 Ok(ProvenancePurgeReport {
2537 events_deleted,
2538 events_preserved,
2539 oldest_remaining,
2540 })
2541 }
2542
2543 #[allow(clippy::too_many_lines)]
2547 pub fn excise_source(&self, source_ref: &str) -> Result<TraceReport, EngineError> {
2548 let mut conn = self.connect()?;
2549
2550 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2551 let affected_operational_collections = collect_strings_tx(
2552 &tx,
2553 "SELECT DISTINCT m.collection_name \
2554 FROM operational_mutations m \
2555 JOIN operational_collections c ON c.name = m.collection_name \
2556 WHERE m.source_ref = ?1 AND c.kind = 'latest_state' \
2557 ORDER BY m.collection_name",
2558 source_ref,
2559 )?;
2560
2561 let pairs: Vec<(String, String)> = {
2563 let mut stmt = tx.prepare(
2564 "SELECT row_id, logical_id FROM nodes \
2565 WHERE source_ref = ?1 AND superseded_at IS NULL",
2566 )?;
2567 stmt.query_map([source_ref], |row| {
2568 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
2569 })?
2570 .collect::<Result<Vec<_>, _>>()?
2571 };
2572 let affected_logical_ids: Vec<String> = pairs
2573 .iter()
2574 .map(|(_, logical_id)| logical_id.clone())
2575 .collect();
2576
2577 tx.execute(
2579 "UPDATE nodes SET superseded_at = unixepoch() \
2580 WHERE source_ref = ?1 AND superseded_at IS NULL",
2581 [source_ref],
2582 )?;
2583 tx.execute(
2584 "UPDATE edges SET superseded_at = unixepoch() \
2585 WHERE source_ref = ?1 AND superseded_at IS NULL",
2586 [source_ref],
2587 )?;
2588 tx.execute(
2589 "UPDATE actions SET superseded_at = unixepoch() \
2590 WHERE source_ref = ?1 AND superseded_at IS NULL",
2591 [source_ref],
2592 )?;
2593 clear_operational_current_rows(&tx, &affected_operational_collections)?;
2594 tx.execute(
2595 "DELETE FROM operational_mutations WHERE source_ref = ?1",
2596 [source_ref],
2597 )?;
2598 for logical_id in &affected_logical_ids {
2599 delete_vec_rows_for_logical_id(&tx, logical_id)?;
2600 tx.execute(
2601 "DELETE FROM chunks WHERE node_logical_id = ?1",
2602 [logical_id.as_str()],
2603 )?;
2604 }
2605
2606 for (excised_row_id, logical_id) in &pairs {
2608 let prior: Option<String> = tx
2609 .query_row(
2610 "SELECT row_id FROM nodes \
2611 WHERE logical_id = ?1 AND row_id != ?2 \
2612 ORDER BY created_at DESC LIMIT 1",
2613 [logical_id.as_str(), excised_row_id.as_str()],
2614 |row| row.get(0),
2615 )
2616 .optional()?;
2617 if let Some(prior_id) = prior {
2618 tx.execute(
2619 "UPDATE nodes SET superseded_at = NULL WHERE row_id = ?1",
2620 [prior_id.as_str()],
2621 )?;
2622 }
2623 }
2624
2625 for logical_id in &affected_logical_ids {
2626 let has_active_node = tx
2627 .query_row(
2628 "SELECT 1 FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL LIMIT 1",
2629 [logical_id.as_str()],
2630 |row| row.get::<_, i64>(0),
2631 )
2632 .optional()?
2633 .is_some();
2634 if !has_active_node {
2635 tx.execute(
2636 "DELETE FROM node_access_metadata WHERE logical_id = ?1",
2637 [logical_id.as_str()],
2638 )?;
2639 }
2640 }
2641
2642 rebuild_operational_current_rows(&tx, &affected_operational_collections)?;
2643
2644 tx.execute("DELETE FROM fts_nodes", [])?;
2647 tx.execute(
2648 r"
2649 INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content)
2650 SELECT c.id, n.logical_id, n.kind, c.text_content
2651 FROM chunks c
2652 JOIN nodes n
2653 ON n.logical_id = c.node_logical_id
2654 AND n.superseded_at IS NULL
2655 ",
2656 [],
2657 )?;
2658
2659 rebuild_property_fts_in_tx(&tx)?;
2661
2662 tx.execute(
2666 "INSERT INTO provenance_events (id, event_type, subject, source_ref) \
2667 VALUES (?1, 'excise_source', ?2, ?2)",
2668 rusqlite::params![new_id(), source_ref],
2669 )?;
2670
2671 tx.commit()?;
2672
2673 self.trace_source(source_ref)
2674 }
2675
2676 pub fn safe_export(
2680 &self,
2681 destination_path: impl AsRef<Path>,
2682 options: SafeExportOptions,
2683 ) -> Result<SafeExportManifest, EngineError> {
2684 let destination_path = destination_path.as_ref();
2685
2686 let conn = self.connect()?;
2690
2691 if options.force_checkpoint {
2692 trace_info!("safe_export: wal checkpoint started");
2693 let (busy, log, checkpointed): (i64, i64, i64) =
2694 conn.query_row("PRAGMA wal_checkpoint(FULL)", [], |row| {
2695 Ok((row.get(0)?, row.get(1)?, row.get(2)?))
2696 })?;
2697 if busy != 0 {
2698 trace_warn!(
2699 busy,
2700 log_frames = log,
2701 checkpointed_frames = checkpointed,
2702 "safe_export: wal checkpoint blocked by active readers"
2703 );
2704 return Err(EngineError::Bridge(format!(
2705 "WAL checkpoint blocked: {busy} active reader(s) prevented a full checkpoint; \
2706 log frames={log}, checkpointed={checkpointed}; \
2707 retry export when no readers are active"
2708 )));
2709 }
2710 trace_info!(
2711 log_frames = log,
2712 checkpointed_frames = checkpointed,
2713 "safe_export: wal checkpoint completed"
2714 );
2715 }
2716
2717 let schema_version: u32 = conn
2718 .query_row(
2719 "SELECT COALESCE(MAX(version), 0) FROM fathom_schema_migrations",
2720 [],
2721 |row| row.get(0),
2722 )
2723 .unwrap_or(0);
2724
2725 if let Some(parent) = destination_path.parent() {
2728 fs::create_dir_all(parent)?;
2729 }
2730 conn.backup(DatabaseName::Main, destination_path, None)?;
2731
2732 drop(conn);
2733
2734 let page_count: u64 = {
2738 let export_conn = rusqlite::Connection::open_with_flags(
2739 destination_path,
2740 rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY
2741 | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX,
2742 )?;
2743 export_conn.query_row("PRAGMA page_count", [], |row| row.get(0))?
2744 };
2745
2746 let sha256 = {
2749 let mut file = fs::File::open(destination_path)?;
2750 let mut hasher = Sha256::new();
2751 io::copy(&mut file, &mut hasher)?;
2752 format!("{:x}", hasher.finalize())
2753 };
2754
2755 let exported_at = SystemTime::now()
2757 .duration_since(SystemTime::UNIX_EPOCH)
2758 .map_err(|e| EngineError::Bridge(format!("system clock error: {e}")))?
2759 .as_secs();
2760
2761 let manifest = SafeExportManifest {
2762 exported_at,
2763 sha256,
2764 schema_version,
2765 protocol_version: EXPORT_PROTOCOL_VERSION,
2766 page_count,
2767 };
2768
2769 let manifest_path = {
2771 let mut p = destination_path.to_path_buf();
2772 let stem = p
2773 .file_name()
2774 .map(|n| format!("{}.export-manifest.json", n.to_string_lossy()))
2775 .ok_or_else(|| {
2776 EngineError::Bridge("destination path has no filename".to_owned())
2777 })?;
2778 p.set_file_name(stem);
2779 p
2780 };
2781 let manifest_json =
2782 serde_json::to_string(&manifest).map_err(|e| EngineError::Bridge(e.to_string()))?;
2783
2784 let manifest_tmp = manifest_path.with_extension("json.tmp");
2787 if let Err(e) = fs::write(&manifest_tmp, &manifest_json)
2788 .and_then(|()| fs::rename(&manifest_tmp, &manifest_path))
2789 {
2790 let _ = fs::remove_file(&manifest_tmp);
2791 return Err(e.into());
2792 }
2793
2794 Ok(manifest)
2795 }
2796}
2797
2798#[allow(dead_code)]
2799#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
2800struct VectorEmbeddingContractRecord {
2801 profile: String,
2802 table_name: String,
2803 model_identity: String,
2804 model_version: String,
2805 dimension: usize,
2806 normalization_policy: String,
2807 chunking_policy: String,
2808 preprocessing_policy: String,
2809 generator_command_json: String,
2810 applied_at: i64,
2811 snapshot_hash: String,
2812 contract_format_version: i64,
2813}
2814
2815#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
2816struct VectorRegenerationInputChunk {
2817 chunk_id: String,
2818 node_logical_id: String,
2819 kind: String,
2820 text_content: String,
2821 byte_start: Option<i64>,
2822 byte_end: Option<i64>,
2823 source_ref: Option<String>,
2824 created_at: i64,
2825}
2826
2827#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
2828struct VectorRegenerationInput {
2829 profile: String,
2830 table_name: String,
2831 model_identity: String,
2832 model_version: String,
2833 dimension: usize,
2834 normalization_policy: String,
2835 chunking_policy: String,
2836 preprocessing_policy: String,
2837 chunks: Vec<VectorRegenerationInputChunk>,
2838}
2839
2840#[derive(Clone, Copy, Debug, PartialEq, Eq)]
2841pub(crate) enum VectorRegenerationFailureClass {
2842 InvalidContract,
2843 EmbedderFailure,
2844 InvalidEmbedderOutput,
2845 SnapshotDrift,
2846 UnsupportedVecCapability,
2847}
2848
2849impl VectorRegenerationFailureClass {
2850 fn label(self) -> &'static str {
2851 match self {
2852 Self::InvalidContract => "invalid contract",
2853 Self::EmbedderFailure => "embedder failure",
2854 Self::InvalidEmbedderOutput => "invalid embedder output",
2855 Self::SnapshotDrift => "snapshot drift",
2856 Self::UnsupportedVecCapability => "unsupported vec capability",
2857 }
2858 }
2859
2860 fn retryable(self) -> bool {
2861 matches!(self, Self::SnapshotDrift)
2862 }
2863}
2864
2865#[derive(Clone, Debug, PartialEq, Eq)]
2866pub(crate) struct VectorRegenerationFailure {
2867 class: VectorRegenerationFailureClass,
2868 detail: String,
2869}
2870
2871impl VectorRegenerationFailure {
2872 pub(crate) fn new(class: VectorRegenerationFailureClass, detail: impl Into<String>) -> Self {
2873 Self {
2874 class,
2875 detail: detail.into(),
2876 }
2877 }
2878
2879 fn to_engine_error(&self) -> EngineError {
2880 let retry_suffix = if self.class.retryable() {
2881 " [retryable]"
2882 } else {
2883 ""
2884 };
2885 EngineError::Bridge(format!(
2886 "vector regeneration {}: {}{}",
2887 self.class.label(),
2888 self.detail,
2889 retry_suffix
2890 ))
2891 }
2892
2893 fn failure_class_label(&self) -> &'static str {
2894 self.class.label()
2895 }
2896}
2897
2898#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
2899struct VectorRegenerationAuditMetadata {
2900 profile: String,
2901 model_identity: String,
2902 model_version: String,
2903 chunk_count: usize,
2904 snapshot_hash: String,
2905 #[serde(skip_serializing_if = "Option::is_none")]
2906 failure_class: Option<String>,
2907}
2908
2909#[derive(Clone, Copy, Debug, PartialEq, Eq, Deserialize)]
2910#[serde(tag = "mode", rename_all = "snake_case")]
2911enum OperationalRetentionPolicy {
2912 KeepAll,
2913 PurgeBeforeSeconds { max_age_seconds: i64 },
2914 KeepLast { max_rows: usize },
2915}
2916
2917pub fn load_vector_regeneration_config(
2920 path: impl AsRef<Path>,
2921) -> Result<VectorRegenerationConfig, EngineError> {
2922 let path = path.as_ref();
2923 let raw = fs::read_to_string(path)?;
2924 match path.extension().and_then(|ext| ext.to_str()) {
2925 Some("toml") => {
2926 toml::from_str(&raw).map_err(|error| EngineError::Bridge(error.to_string()))
2927 }
2928 Some("json") | None => {
2929 serde_json::from_str(&raw).map_err(|error| EngineError::Bridge(error.to_string()))
2930 }
2931 Some(other) => Err(EngineError::Bridge(format!(
2932 "unsupported vector regeneration config extension: {other}"
2933 ))),
2934 }
2935}
2936
2937fn validate_vector_regeneration_config(
2938 conn: &rusqlite::Connection,
2939 config: &VectorRegenerationConfig,
2940 identity: &QueryEmbedderIdentity,
2941) -> Result<VectorRegenerationConfig, VectorRegenerationFailure> {
2942 let profile = validate_bounded_text("profile", &config.profile, MAX_PROFILE_LEN)?;
2943 let table_name = validate_bounded_text("table_name", &config.table_name, MAX_PROFILE_LEN)?;
2944 if table_name != "vec_nodes_active" {
2945 return Err(VectorRegenerationFailure::new(
2946 VectorRegenerationFailureClass::InvalidContract,
2947 format!("table_name must be vec_nodes_active, got '{table_name}'"),
2948 ));
2949 }
2950 if identity.dimension == 0 {
2951 return Err(VectorRegenerationFailure::new(
2952 VectorRegenerationFailureClass::InvalidContract,
2953 "embedder reports dimension 0".to_owned(),
2954 ));
2955 }
2956 let chunking_policy =
2957 validate_bounded_text("chunking_policy", &config.chunking_policy, MAX_POLICY_LEN)?;
2958 let preprocessing_policy = validate_bounded_text(
2959 "preprocessing_policy",
2960 &config.preprocessing_policy,
2961 MAX_POLICY_LEN,
2962 )?;
2963
2964 if let Some(existing_dimension) = current_vector_profile_dimension(conn, &profile)?
2965 && existing_dimension != identity.dimension
2966 {
2967 return Err(VectorRegenerationFailure::new(
2968 VectorRegenerationFailureClass::InvalidContract,
2969 format!(
2970 "embedder dimension {} does not match existing vector profile dimension {}",
2971 identity.dimension, existing_dimension
2972 ),
2973 ));
2974 }
2975
2976 validate_existing_contract_version(conn, &profile)?;
2977
2978 let normalized = VectorRegenerationConfig {
2979 profile,
2980 table_name,
2981 chunking_policy,
2982 preprocessing_policy,
2983 };
2984 let serialized = serde_json::to_vec(&normalized).map_err(|error| {
2985 VectorRegenerationFailure::new(
2986 VectorRegenerationFailureClass::InvalidContract,
2987 error.to_string(),
2988 )
2989 })?;
2990 if serialized.len() > MAX_CONTRACT_JSON_BYTES {
2991 return Err(VectorRegenerationFailure::new(
2992 VectorRegenerationFailureClass::InvalidContract,
2993 format!("serialized contract exceeds {MAX_CONTRACT_JSON_BYTES} bytes"),
2994 ));
2995 }
2996
2997 Ok(normalized)
2998}
2999
3000#[allow(clippy::cast_possible_wrap)]
3001fn persist_vector_contract(
3002 conn: &rusqlite::Connection,
3003 config: &VectorRegenerationConfig,
3004 identity: &QueryEmbedderIdentity,
3005 snapshot_hash: &str,
3006) -> Result<(), EngineError> {
3007 conn.execute(
3008 r"
3009 INSERT OR REPLACE INTO vector_embedding_contracts (
3010 profile,
3011 table_name,
3012 model_identity,
3013 model_version,
3014 dimension,
3015 normalization_policy,
3016 chunking_policy,
3017 preprocessing_policy,
3018 generator_command_json,
3019 applied_at,
3020 snapshot_hash,
3021 contract_format_version,
3022 updated_at
3023 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, unixepoch(), ?10, ?11, unixepoch())
3024 ",
3025 rusqlite::params![
3026 config.profile.as_str(),
3027 config.table_name.as_str(),
3028 identity.model_identity.as_str(),
3029 identity.model_version.as_str(),
3030 identity.dimension as i64,
3031 identity.normalization_policy.as_str(),
3032 config.chunking_policy.as_str(),
3033 config.preprocessing_policy.as_str(),
3034 "[]",
3035 snapshot_hash,
3036 CURRENT_VECTOR_CONTRACT_FORMAT_VERSION,
3037 ],
3038 )?;
3039 Ok(())
3040}
3041
3042fn persist_vector_regeneration_event(
3043 conn: &rusqlite::Connection,
3044 event_type: &str,
3045 subject: &str,
3046 metadata: &VectorRegenerationAuditMetadata,
3047) -> Result<(), EngineError> {
3048 let metadata_json = serialize_audit_metadata(metadata)?;
3049 conn.execute(
3050 "INSERT INTO provenance_events (id, event_type, subject, metadata_json) VALUES (?1, ?2, ?3, ?4)",
3051 rusqlite::params![new_id(), event_type, subject, metadata_json],
3052 )?;
3053 Ok(())
3054}
3055
3056fn persist_simple_provenance_event(
3057 conn: &rusqlite::Connection,
3058 event_type: &str,
3059 subject: &str,
3060 metadata: Option<serde_json::Value>,
3061) -> Result<(), EngineError> {
3062 let metadata_json = metadata.map(|value| value.to_string()).unwrap_or_default();
3063 conn.execute(
3064 "INSERT INTO provenance_events (id, event_type, subject, metadata_json) VALUES (?1, ?2, ?3, ?4)",
3065 rusqlite::params![new_id(), event_type, subject, metadata_json],
3066 )?;
3067 Ok(())
3068}
3069
3070fn count_missing_property_fts_rows(conn: &rusqlite::Connection) -> Result<i64, EngineError> {
3074 let schemas = crate::writer::load_fts_property_schemas(conn)?;
3075 if schemas.is_empty() {
3076 return Ok(0);
3077 }
3078
3079 let mut missing = 0i64;
3080 for (kind, schema) in &schemas {
3081 let mut stmt = conn.prepare(
3082 "SELECT n.logical_id, n.properties FROM nodes n \
3083 WHERE n.kind = ?1 AND n.superseded_at IS NULL \
3084 AND NOT EXISTS (SELECT 1 FROM fts_node_properties fp WHERE fp.node_logical_id = n.logical_id)",
3085 )?;
3086 let rows = stmt.query_map([kind.as_str()], |row| {
3087 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
3088 })?;
3089 for row in rows {
3090 let (_logical_id, properties_str) = row?;
3091 let props: serde_json::Value =
3092 serde_json::from_str(&properties_str).unwrap_or_default();
3093 if crate::writer::extract_property_fts(&props, schema)
3094 .0
3095 .is_some()
3096 {
3097 missing += 1;
3098 }
3099 }
3100 }
3101 Ok(missing)
3102}
3103
3104fn count_drifted_property_fts_rows(conn: &rusqlite::Connection) -> Result<i64, EngineError> {
3109 let schemas = crate::writer::load_fts_property_schemas(conn)?;
3110 if schemas.is_empty() {
3111 return Ok(0);
3112 }
3113
3114 let mut drifted = 0i64;
3115 for (kind, schema) in &schemas {
3116 let mut stmt = conn.prepare(
3117 "SELECT fp.node_logical_id, fp.text_content, n.properties \
3118 FROM fts_node_properties fp \
3119 JOIN nodes n ON n.logical_id = fp.node_logical_id AND n.superseded_at IS NULL \
3120 WHERE fp.kind = ?1 AND n.kind = ?1",
3121 )?;
3122 let rows = stmt.query_map([kind.as_str()], |row| {
3123 Ok((
3124 row.get::<_, String>(0)?,
3125 row.get::<_, String>(1)?,
3126 row.get::<_, String>(2)?,
3127 ))
3128 })?;
3129 for row in rows {
3130 let (_logical_id, stored_text, properties_str) = row?;
3131 let props: serde_json::Value =
3132 serde_json::from_str(&properties_str).unwrap_or_default();
3133 let (expected, _positions, _stats) =
3134 crate::writer::extract_property_fts(&props, schema);
3135 match expected {
3136 Some(text) if text == stored_text => {}
3137 _ => drifted += 1,
3138 }
3139 }
3140 }
3141 Ok(drifted)
3142}
3143
3144fn rebuild_property_fts_in_tx(conn: &rusqlite::Connection) -> Result<usize, EngineError> {
3146 conn.execute("DELETE FROM fts_node_properties", [])?;
3147 conn.execute("DELETE FROM fts_node_property_positions", [])?;
3148 let inserted = crate::projection::insert_property_fts_rows(
3149 conn,
3150 "SELECT logical_id, properties FROM nodes WHERE kind = ?1 AND superseded_at IS NULL",
3151 )?;
3152 Ok(inserted)
3153}
3154
3155fn rebuild_single_node_property_fts(
3158 conn: &rusqlite::Connection,
3159 logical_id: &str,
3160 kind: &str,
3161) -> Result<usize, EngineError> {
3162 let schema: Option<(String, String)> = conn
3163 .query_row(
3164 "SELECT property_paths_json, separator FROM fts_property_schemas WHERE kind = ?1",
3165 [kind],
3166 |row| {
3167 let paths_json: String = row.get(0)?;
3168 let separator: String = row.get(1)?;
3169 Ok((paths_json, separator))
3170 },
3171 )
3172 .optional()?;
3173 let Some((paths_json, separator)) = schema else {
3174 return Ok(0);
3175 };
3176 let parsed = crate::writer::parse_property_schema_json(&paths_json, &separator);
3177 let properties_str: Option<String> = conn
3178 .query_row(
3179 "SELECT properties FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
3180 [logical_id],
3181 |row| row.get(0),
3182 )
3183 .optional()?;
3184 let Some(properties_str) = properties_str else {
3185 return Ok(0);
3186 };
3187 let props: serde_json::Value = serde_json::from_str(&properties_str).unwrap_or_default();
3188 let (text, positions, _stats) = crate::writer::extract_property_fts(&props, &parsed);
3189 let Some(text) = text else {
3190 return Ok(0);
3191 };
3192 conn.execute(
3193 "DELETE FROM fts_node_property_positions WHERE node_logical_id = ?1",
3194 rusqlite::params![logical_id],
3195 )?;
3196 conn.execute(
3197 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) VALUES (?1, ?2, ?3)",
3198 rusqlite::params![logical_id, kind, text],
3199 )?;
3200 for pos in &positions {
3201 conn.execute(
3202 "INSERT INTO fts_node_property_positions \
3203 (node_logical_id, kind, start_offset, end_offset, leaf_path) \
3204 VALUES (?1, ?2, ?3, ?4, ?5)",
3205 rusqlite::params![
3206 logical_id,
3207 kind,
3208 i64::try_from(pos.start_offset).unwrap_or(i64::MAX),
3209 i64::try_from(pos.end_offset).unwrap_or(i64::MAX),
3210 pos.leaf_path,
3211 ],
3212 )?;
3213 }
3214 Ok(1)
3215}
3216
3217fn serialize_property_paths_json(
3218 entries: &[FtsPropertyPathSpec],
3219 exclude_paths: &[String],
3220) -> Result<String, EngineError> {
3221 let all_scalar = entries
3225 .iter()
3226 .all(|e| e.mode == FtsPropertyPathMode::Scalar);
3227 if all_scalar && exclude_paths.is_empty() {
3228 let paths: Vec<&str> = entries.iter().map(|e| e.path.as_str()).collect();
3229 return serde_json::to_string(&paths).map_err(|e| {
3230 EngineError::InvalidWrite(format!("failed to serialize property paths: {e}"))
3231 });
3232 }
3233
3234 let mut obj = serde_json::Map::new();
3235 let paths_json: Vec<serde_json::Value> = entries
3236 .iter()
3237 .map(|e| {
3238 let mode_str = match e.mode {
3239 FtsPropertyPathMode::Scalar => "scalar",
3240 FtsPropertyPathMode::Recursive => "recursive",
3241 };
3242 serde_json::json!({ "path": e.path, "mode": mode_str })
3243 })
3244 .collect();
3245 obj.insert("paths".to_owned(), serde_json::Value::Array(paths_json));
3246 if !exclude_paths.is_empty() {
3247 obj.insert("exclude_paths".to_owned(), serde_json::json!(exclude_paths));
3248 }
3249 serde_json::to_string(&serde_json::Value::Object(obj))
3250 .map_err(|e| EngineError::InvalidWrite(format!("failed to serialize property paths: {e}")))
3251}
3252
3253fn validate_fts_property_paths(paths: &[String]) -> Result<(), EngineError> {
3254 if paths.is_empty() {
3255 return Err(EngineError::InvalidWrite(
3256 "FTS property paths must not be empty".to_owned(),
3257 ));
3258 }
3259 let mut seen = std::collections::HashSet::new();
3260 for path in paths {
3261 if !path.starts_with("$.") {
3262 return Err(EngineError::InvalidWrite(format!(
3263 "FTS property path must start with '$.' but got: {path}"
3264 )));
3265 }
3266 let after_prefix = &path[2..]; let segments: Vec<&str> = after_prefix.split('.').collect();
3268 if segments.is_empty() || segments.iter().any(|s| s.is_empty()) {
3269 return Err(EngineError::InvalidWrite(format!(
3270 "FTS property path has empty segment(s): {path}"
3271 )));
3272 }
3273 for seg in &segments {
3274 if !seg.chars().all(|c| c.is_alphanumeric() || c == '_') {
3275 return Err(EngineError::InvalidWrite(format!(
3276 "FTS property path segment contains invalid characters: {path}"
3277 )));
3278 }
3279 }
3280 if !seen.insert(path) {
3281 return Err(EngineError::InvalidWrite(format!(
3282 "duplicate FTS property path: {path}"
3283 )));
3284 }
3285 }
3286 Ok(())
3287}
3288
3289fn load_fts_property_schema_record(
3290 conn: &rusqlite::Connection,
3291 kind: &str,
3292) -> Result<Option<FtsPropertySchemaRecord>, EngineError> {
3293 let row = conn
3294 .query_row(
3295 "SELECT kind, property_paths_json, separator, format_version \
3296 FROM fts_property_schemas WHERE kind = ?1",
3297 [kind],
3298 |row| {
3299 let kind: String = row.get(0)?;
3300 let paths_json: String = row.get(1)?;
3301 let separator: String = row.get(2)?;
3302 let format_version: i64 = row.get(3)?;
3303 Ok(build_fts_property_schema_record(
3304 kind,
3305 &paths_json,
3306 separator,
3307 format_version,
3308 ))
3309 },
3310 )
3311 .optional()?;
3312 Ok(row)
3313}
3314
3315fn build_fts_property_schema_record(
3321 kind: String,
3322 paths_json: &str,
3323 separator: String,
3324 format_version: i64,
3325) -> FtsPropertySchemaRecord {
3326 let schema = crate::writer::parse_property_schema_json(paths_json, &separator);
3327 let entries: Vec<FtsPropertyPathSpec> = schema
3328 .paths
3329 .into_iter()
3330 .map(|entry| FtsPropertyPathSpec {
3331 path: entry.path,
3332 mode: match entry.mode {
3333 crate::writer::PropertyPathMode::Scalar => FtsPropertyPathMode::Scalar,
3334 crate::writer::PropertyPathMode::Recursive => FtsPropertyPathMode::Recursive,
3335 },
3336 })
3337 .collect();
3338 let property_paths: Vec<String> = entries.iter().map(|e| e.path.clone()).collect();
3339 FtsPropertySchemaRecord {
3340 kind,
3341 property_paths,
3342 entries,
3343 exclude_paths: schema.exclude_paths,
3344 separator,
3345 format_version,
3346 }
3347}
3348
3349fn build_regeneration_input(
3350 config: &VectorRegenerationConfig,
3351 identity: &QueryEmbedderIdentity,
3352 chunks: Vec<VectorRegenerationInputChunk>,
3353) -> VectorRegenerationInput {
3354 VectorRegenerationInput {
3355 profile: config.profile.clone(),
3356 table_name: config.table_name.clone(),
3357 model_identity: identity.model_identity.clone(),
3358 model_version: identity.model_version.clone(),
3359 dimension: identity.dimension,
3360 normalization_policy: identity.normalization_policy.clone(),
3361 chunking_policy: config.chunking_policy.clone(),
3362 preprocessing_policy: config.preprocessing_policy.clone(),
3363 chunks,
3364 }
3365}
3366
3367fn compute_snapshot_hash(payload: &VectorRegenerationInput) -> Result<String, EngineError> {
3368 let bytes =
3369 serde_json::to_vec(payload).map_err(|error| EngineError::Bridge(error.to_string()))?;
3370 let mut hasher = Sha256::new();
3371 hasher.update(bytes);
3372 Ok(format!("{:x}", hasher.finalize()))
3373}
3374
3375fn collect_regeneration_chunks(
3376 conn: &rusqlite::Connection,
3377) -> Result<Vec<VectorRegenerationInputChunk>, EngineError> {
3378 let mut stmt = conn.prepare(
3379 r"
3380 SELECT c.id, c.node_logical_id, n.kind, c.text_content, c.byte_start, c.byte_end, n.source_ref, c.created_at
3381 FROM chunks c
3382 JOIN nodes n
3383 ON n.logical_id = c.node_logical_id
3384 AND n.superseded_at IS NULL
3385 ORDER BY c.created_at, c.id
3386 ",
3387 )?;
3388 let chunks = stmt
3389 .query_map([], |row| {
3390 Ok(VectorRegenerationInputChunk {
3391 chunk_id: row.get(0)?,
3392 node_logical_id: row.get(1)?,
3393 kind: row.get(2)?,
3394 text_content: row.get(3)?,
3395 byte_start: row.get(4)?,
3396 byte_end: row.get(5)?,
3397 source_ref: row.get(6)?,
3398 created_at: row.get(7)?,
3399 })
3400 })?
3401 .collect::<Result<Vec<_>, _>>()?;
3402 Ok(chunks)
3403}
3404
3405fn validate_bounded_text(
3406 field: &str,
3407 value: &str,
3408 max_len: usize,
3409) -> Result<String, VectorRegenerationFailure> {
3410 let trimmed = value.trim();
3411 if trimmed.is_empty() {
3412 return Err(VectorRegenerationFailure::new(
3413 VectorRegenerationFailureClass::InvalidContract,
3414 format!("{field} must not be empty"),
3415 ));
3416 }
3417 if trimmed.len() > max_len {
3418 return Err(VectorRegenerationFailure::new(
3419 VectorRegenerationFailureClass::InvalidContract,
3420 format!("{field} exceeds max length {max_len}"),
3421 ));
3422 }
3423 Ok(trimmed.to_owned())
3424}
3425
3426fn current_vector_profile_dimension(
3427 conn: &rusqlite::Connection,
3428 profile: &str,
3429) -> Result<Option<usize>, VectorRegenerationFailure> {
3430 let dimension: Option<i64> = conn
3431 .query_row(
3432 "SELECT dimension FROM vector_profiles WHERE profile = ?1 AND enabled = 1",
3433 [profile],
3434 |row| row.get(0),
3435 )
3436 .optional()
3437 .map_err(|error| {
3438 VectorRegenerationFailure::new(
3439 VectorRegenerationFailureClass::InvalidContract,
3440 error.to_string(),
3441 )
3442 })?;
3443 dimension
3444 .map(|value| {
3445 usize::try_from(value).map_err(|_| {
3446 VectorRegenerationFailure::new(
3447 VectorRegenerationFailureClass::InvalidContract,
3448 format!("stored vector profile dimension is invalid: {value}"),
3449 )
3450 })
3451 })
3452 .transpose()
3453}
3454
3455fn validate_existing_contract_version(
3456 conn: &rusqlite::Connection,
3457 profile: &str,
3458) -> Result<(), VectorRegenerationFailure> {
3459 let version: Option<i64> = conn
3460 .query_row(
3461 "SELECT contract_format_version FROM vector_embedding_contracts WHERE profile = ?1",
3462 [profile],
3463 |row| row.get(0),
3464 )
3465 .optional()
3466 .map_err(|error| {
3467 VectorRegenerationFailure::new(
3468 VectorRegenerationFailureClass::InvalidContract,
3469 error.to_string(),
3470 )
3471 })?;
3472 if let Some(version) = version
3473 && version > CURRENT_VECTOR_CONTRACT_FORMAT_VERSION
3474 {
3475 return Err(VectorRegenerationFailure::new(
3476 VectorRegenerationFailureClass::InvalidContract,
3477 format!(
3478 "persisted contract format version {version} is unsupported; supported version is {CURRENT_VECTOR_CONTRACT_FORMAT_VERSION}"
3479 ),
3480 ));
3481 }
3482 Ok(())
3483}
3484
3485fn serialize_audit_metadata(
3486 metadata: &VectorRegenerationAuditMetadata,
3487) -> Result<String, EngineError> {
3488 let json =
3489 serde_json::to_string(metadata).map_err(|error| EngineError::Bridge(error.to_string()))?;
3490 if json.len() > MAX_AUDIT_METADATA_BYTES {
3491 return Err(VectorRegenerationFailure::new(
3492 VectorRegenerationFailureClass::InvalidContract,
3493 format!("audit metadata exceeds {MAX_AUDIT_METADATA_BYTES} bytes"),
3494 )
3495 .to_engine_error());
3496 }
3497 Ok(json)
3498}
3499
3500fn count_source_ref(
3501 conn: &rusqlite::Connection,
3502 table: &str,
3503 source_ref: &str,
3504) -> Result<usize, EngineError> {
3505 let sql = match table {
3506 "nodes" => "SELECT count(*) FROM nodes WHERE source_ref = ?1",
3507 "edges" => "SELECT count(*) FROM edges WHERE source_ref = ?1",
3508 "actions" => "SELECT count(*) FROM actions WHERE source_ref = ?1",
3509 "operational_mutations" => {
3510 "SELECT count(*) FROM operational_mutations WHERE source_ref = ?1"
3511 }
3512 other => return Err(EngineError::Bridge(format!("unknown table: {other}"))),
3513 };
3514 let count: i64 = conn.query_row(sql, [source_ref], |row| row.get(0))?;
3515 usize::try_from(count)
3518 .map_err(|_| EngineError::Bridge(format!("count overflow for table {table}: {count}")))
3519}
3520
3521fn rebuild_operational_current_rows(
3522 tx: &rusqlite::Transaction<'_>,
3523 collections: &[String],
3524) -> Result<usize, EngineError> {
3525 let mut rebuilt_rows = 0usize;
3526 clear_operational_current_rows(tx, collections)?;
3527 let mut ins_current = tx.prepare_cached(
3528 "INSERT INTO operational_current \
3529 (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
3530 VALUES (?1, ?2, ?3, ?4, ?5)",
3531 )?;
3532
3533 for collection in collections {
3534 let mut stmt = tx.prepare(
3535 "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
3536 FROM operational_mutations \
3537 WHERE collection_name = ?1 \
3538 ORDER BY record_key, mutation_order",
3539 )?;
3540 let mut latest_by_key: std::collections::HashMap<String, Option<(String, i64, String)>> =
3541 std::collections::HashMap::new();
3542 let rows = stmt.query_map([collection], map_operational_mutation_row)?;
3543 for row in rows {
3544 let mutation = row?;
3545 match mutation.op_kind.as_str() {
3546 "put" => {
3547 latest_by_key.insert(
3548 mutation.record_key,
3549 Some((mutation.payload_json, mutation.created_at, mutation.id)),
3550 );
3551 }
3552 "delete" => {
3553 latest_by_key.insert(mutation.record_key, None);
3554 }
3555 _ => {}
3556 }
3557 }
3558
3559 for (record_key, state) in latest_by_key {
3560 if let Some((payload_json, updated_at, last_mutation_id)) = state {
3561 ins_current.execute(rusqlite::params![
3562 collection,
3563 record_key,
3564 payload_json,
3565 updated_at,
3566 last_mutation_id,
3567 ])?;
3568 rebuilt_rows += 1;
3569 }
3570 }
3571 }
3572
3573 drop(ins_current);
3574 Ok(rebuilt_rows)
3575}
3576
3577fn clear_operational_current_rows(
3578 tx: &rusqlite::Transaction<'_>,
3579 collections: &[String],
3580) -> Result<(), EngineError> {
3581 let mut delete_current =
3582 tx.prepare_cached("DELETE FROM operational_current WHERE collection_name = ?1")?;
3583 let mut delete_secondary_current = tx.prepare_cached(
3584 "DELETE FROM operational_secondary_index_entries \
3585 WHERE collection_name = ?1 AND subject_kind = 'current'",
3586 )?;
3587 for collection in collections {
3588 delete_secondary_current.execute([collection])?;
3589 delete_current.execute([collection])?;
3590 }
3591 drop(delete_secondary_current);
3592 drop(delete_current);
3593 Ok(())
3594}
3595
3596fn clear_operational_secondary_index_entries(
3597 tx: &rusqlite::Transaction<'_>,
3598 collection_name: &str,
3599) -> Result<(), EngineError> {
3600 tx.execute(
3601 "DELETE FROM operational_secondary_index_entries WHERE collection_name = ?1",
3602 [collection_name],
3603 )?;
3604 Ok(())
3605}
3606
3607fn insert_operational_secondary_index_entry(
3608 tx: &rusqlite::Transaction<'_>,
3609 collection_name: &str,
3610 subject_kind: &str,
3611 mutation_id: &str,
3612 record_key: &str,
3613 entry: &crate::operational::OperationalSecondaryIndexEntry,
3614) -> Result<(), EngineError> {
3615 tx.execute(
3616 "INSERT INTO operational_secondary_index_entries \
3617 (collection_name, index_name, subject_kind, mutation_id, record_key, sort_timestamp, \
3618 slot1_text, slot1_integer, slot2_text, slot2_integer, slot3_text, slot3_integer) \
3619 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
3620 rusqlite::params![
3621 collection_name,
3622 entry.index_name,
3623 subject_kind,
3624 mutation_id,
3625 record_key,
3626 entry.sort_timestamp,
3627 entry.slot1_text,
3628 entry.slot1_integer,
3629 entry.slot2_text,
3630 entry.slot2_integer,
3631 entry.slot3_text,
3632 entry.slot3_integer,
3633 ],
3634 )?;
3635 Ok(())
3636}
3637
3638fn rebuild_operational_secondary_index_entries(
3639 tx: &rusqlite::Transaction<'_>,
3640 collection_name: &str,
3641 collection_kind: OperationalCollectionKind,
3642 indexes: &[OperationalSecondaryIndexDefinition],
3643) -> Result<(usize, usize), EngineError> {
3644 clear_operational_secondary_index_entries(tx, collection_name)?;
3645
3646 let mut mutation_entries_rebuilt = 0usize;
3647 if collection_kind == OperationalCollectionKind::AppendOnlyLog {
3648 let mut stmt = tx.prepare(
3649 "SELECT id, record_key, payload_json FROM operational_mutations \
3650 WHERE collection_name = ?1 ORDER BY mutation_order",
3651 )?;
3652 let rows = stmt
3653 .query_map([collection_name], |row| {
3654 Ok((
3655 row.get::<_, String>(0)?,
3656 row.get::<_, String>(1)?,
3657 row.get::<_, String>(2)?,
3658 ))
3659 })?
3660 .collect::<Result<Vec<_>, _>>()?;
3661 drop(stmt);
3662 for (mutation_id, record_key, payload_json) in rows {
3663 for entry in extract_secondary_index_entries_for_mutation(indexes, &payload_json) {
3664 insert_operational_secondary_index_entry(
3665 tx,
3666 collection_name,
3667 "mutation",
3668 &mutation_id,
3669 &record_key,
3670 &entry,
3671 )?;
3672 mutation_entries_rebuilt += 1;
3673 }
3674 }
3675 }
3676
3677 let mut current_entries_rebuilt = 0usize;
3678 if collection_kind == OperationalCollectionKind::LatestState {
3679 let mut stmt = tx.prepare(
3680 "SELECT record_key, payload_json, updated_at, last_mutation_id FROM operational_current \
3681 WHERE collection_name = ?1 ORDER BY updated_at DESC, record_key",
3682 )?;
3683 let rows = stmt
3684 .query_map([collection_name], |row| {
3685 Ok((
3686 row.get::<_, String>(0)?,
3687 row.get::<_, String>(1)?,
3688 row.get::<_, i64>(2)?,
3689 row.get::<_, String>(3)?,
3690 ))
3691 })?
3692 .collect::<Result<Vec<_>, _>>()?;
3693 drop(stmt);
3694 for (record_key, payload_json, updated_at, last_mutation_id) in rows {
3695 for entry in
3696 extract_secondary_index_entries_for_current(indexes, &payload_json, updated_at)
3697 {
3698 insert_operational_secondary_index_entry(
3699 tx,
3700 collection_name,
3701 "current",
3702 &last_mutation_id,
3703 &record_key,
3704 &entry,
3705 )?;
3706 current_entries_rebuilt += 1;
3707 }
3708 }
3709 }
3710
3711 Ok((mutation_entries_rebuilt, current_entries_rebuilt))
3712}
3713
3714fn collect_strings_tx(
3715 tx: &rusqlite::Transaction<'_>,
3716 sql: &str,
3717 value: &str,
3718) -> Result<Vec<String>, EngineError> {
3719 let mut stmt = tx.prepare(sql)?;
3720 let rows = stmt.query_map([value], |row| row.get::<_, String>(0))?;
3721 rows.collect::<Result<Vec<_>, _>>()
3722 .map_err(EngineError::from)
3723}
3724
3725#[allow(clippy::expect_used)]
3728fn i64_to_usize(val: i64) -> usize {
3729 usize::try_from(val).expect("count(*) must be non-negative")
3730}
3731
3732fn collect_strings(
3739 conn: &rusqlite::Connection,
3740 sql: &str,
3741 param: &str,
3742) -> Result<Vec<String>, EngineError> {
3743 let mut stmt = conn.prepare(sql)?;
3744 let values = stmt
3745 .query_map([param], |row| row.get::<_, String>(0))?
3746 .collect::<Result<Vec<_>, _>>()?;
3747 Ok(values)
3748}
3749
3750fn collect_edge_logical_ids_for_restore(
3751 tx: &rusqlite::Transaction<'_>,
3752 logical_id: &str,
3753 retire_source_ref: Option<&str>,
3754 retire_created_at: i64,
3755 retire_event_rowid: i64,
3756) -> Result<Vec<String>, EngineError> {
3757 let mut stmt = tx.prepare(
3758 "SELECT DISTINCT e.logical_id \
3759 FROM edges e \
3760 JOIN provenance_events p \
3761 ON p.subject = e.logical_id \
3762 AND p.event_type = 'edge_retire' \
3763 AND ( \
3764 p.created_at > ?3 \
3765 OR (p.created_at = ?3 AND p.rowid >= ?4) \
3766 ) \
3767 AND ((?2 IS NULL AND p.source_ref IS NULL) OR p.source_ref = ?2) \
3768 WHERE e.superseded_at IS NOT NULL \
3769 AND (e.source_logical_id = ?1 OR e.target_logical_id = ?1) \
3770 AND NOT EXISTS ( \
3771 SELECT 1 FROM edges active \
3772 WHERE active.logical_id = e.logical_id \
3773 AND active.superseded_at IS NULL \
3774 ) \
3775 ORDER BY e.logical_id",
3776 )?;
3777 let edge_ids = stmt
3778 .query_map(
3779 rusqlite::params![
3780 logical_id,
3781 retire_source_ref,
3782 retire_created_at,
3783 retire_event_rowid
3784 ],
3785 |row| row.get::<_, String>(0),
3786 )?
3787 .collect::<Result<Vec<_>, _>>()?;
3788 Ok(edge_ids)
3789}
3790
3791fn restore_validated_edges(
3794 tx: &rusqlite::Transaction<'_>,
3795 logical_id: &str,
3796 retire_source_ref: Option<&str>,
3797 retire_created_at: i64,
3798 retire_event_rowid: i64,
3799) -> Result<(usize, Vec<SkippedEdge>), EngineError> {
3800 let edge_logical_ids = collect_edge_logical_ids_for_restore(
3801 tx,
3802 logical_id,
3803 retire_source_ref,
3804 retire_created_at,
3805 retire_event_rowid,
3806 )?;
3807 let mut restored = 0usize;
3808 let mut skipped = Vec::new();
3809 for edge_logical_id in &edge_logical_ids {
3810 let edge_detail: Option<(String, String, String)> = tx
3811 .query_row(
3812 "SELECT row_id, source_logical_id, target_logical_id FROM edges \
3813 WHERE logical_id = ?1 AND superseded_at IS NOT NULL \
3814 ORDER BY superseded_at DESC, created_at DESC, rowid DESC LIMIT 1",
3815 [edge_logical_id.as_str()],
3816 |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
3817 )
3818 .optional()?;
3819 let Some((edge_row_id, source_lid, target_lid)) = edge_detail else {
3820 continue;
3821 };
3822 let other_endpoint = if source_lid == logical_id {
3823 &target_lid
3824 } else {
3825 &source_lid
3826 };
3827 let endpoint_active: bool = tx
3828 .query_row(
3829 "SELECT 1 FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL LIMIT 1",
3830 [other_endpoint.as_str()],
3831 |_| Ok(true),
3832 )
3833 .optional()?
3834 .unwrap_or(false);
3835 if !endpoint_active {
3836 skipped.push(SkippedEdge {
3837 edge_logical_id: edge_logical_id.clone(),
3838 missing_endpoint: other_endpoint.clone(),
3839 });
3840 continue;
3841 }
3842 restored += tx.execute(
3843 "UPDATE edges SET superseded_at = NULL WHERE row_id = ?1",
3844 [edge_row_id.as_str()],
3845 )?;
3846 }
3847 Ok((restored, skipped))
3848}
3849
3850#[cfg(feature = "sqlite-vec")]
3851fn count_vec_rows_for_logical_id(
3852 tx: &rusqlite::Transaction<'_>,
3853 logical_id: &str,
3854) -> Result<usize, EngineError> {
3855 match tx.query_row(
3856 "SELECT count(*) FROM vec_nodes_active v \
3857 JOIN chunks c ON c.id = v.chunk_id \
3858 WHERE c.node_logical_id = ?1",
3859 [logical_id],
3860 |row| row.get::<_, i64>(0),
3861 ) {
3862 Ok(count) => Ok(i64_to_usize(count)),
3863 Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
3864 if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
3865 {
3866 Ok(0)
3867 }
3868 Err(error) => Err(EngineError::Sqlite(error)),
3869 }
3870}
3871
3872#[cfg(not(feature = "sqlite-vec"))]
3873#[allow(clippy::unnecessary_wraps)]
3874fn count_vec_rows_for_logical_id(
3875 _tx: &rusqlite::Transaction<'_>,
3876 _logical_id: &str,
3877) -> Result<usize, EngineError> {
3878 Ok(0)
3879}
3880
3881#[cfg(feature = "sqlite-vec")]
3882fn delete_vec_rows_for_logical_id(
3883 tx: &rusqlite::Transaction<'_>,
3884 logical_id: &str,
3885) -> Result<usize, EngineError> {
3886 match tx.execute(
3887 "DELETE FROM vec_nodes_active \
3888 WHERE chunk_id IN (SELECT id FROM chunks WHERE node_logical_id = ?1)",
3889 [logical_id],
3890 ) {
3891 Ok(count) => Ok(count),
3892 Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
3893 if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
3894 {
3895 Ok(0)
3896 }
3897 Err(error) => Err(EngineError::Sqlite(error)),
3898 }
3899}
3900
3901#[cfg(not(feature = "sqlite-vec"))]
3902#[allow(clippy::unnecessary_wraps)]
3903fn delete_vec_rows_for_logical_id(
3904 _tx: &rusqlite::Transaction<'_>,
3905 _logical_id: &str,
3906) -> Result<usize, EngineError> {
3907 Ok(0)
3908}
3909
3910fn ensure_operational_collection_registered(
3911 conn: &rusqlite::Connection,
3912 collection_name: &str,
3913) -> Result<(), EngineError> {
3914 if load_operational_collection_record(conn, collection_name)?.is_none() {
3915 return Err(EngineError::InvalidWrite(format!(
3916 "operational collection '{collection_name}' is not registered"
3917 )));
3918 }
3919 Ok(())
3920}
3921
3922fn load_operational_collection_record(
3923 conn: &rusqlite::Connection,
3924 name: &str,
3925) -> Result<Option<OperationalCollectionRecord>, EngineError> {
3926 conn.query_row(
3927 "SELECT name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at, disabled_at \
3928 FROM operational_collections WHERE name = ?1",
3929 [name],
3930 map_operational_collection_row,
3931 )
3932 .optional()
3933 .map_err(EngineError::Sqlite)
3934}
3935
3936fn validate_append_only_operational_collection(
3937 record: &OperationalCollectionRecord,
3938 operation: &str,
3939) -> Result<(), EngineError> {
3940 if record.kind != OperationalCollectionKind::AppendOnlyLog {
3941 return Err(EngineError::InvalidWrite(format!(
3942 "operational collection '{}' must be append_only_log to {operation}",
3943 record.name
3944 )));
3945 }
3946 Ok(())
3947}
3948
3949#[derive(Clone, Debug, PartialEq, Eq)]
3950struct CompiledOperationalReadFilter {
3951 field: String,
3952 condition: OperationalReadCondition,
3953}
3954
3955#[derive(Clone, Debug)]
3956struct MatchedAppendOnlySecondaryIndexRead<'a> {
3957 index_name: &'a str,
3958 value_filter: &'a CompiledOperationalReadFilter,
3959 time_range: Option<&'a CompiledOperationalReadFilter>,
3960}
3961
3962#[derive(Clone, Debug, PartialEq, Eq)]
3963enum OperationalReadCondition {
3964 ExactString(String),
3965 ExactInteger(i64),
3966 Prefix(String),
3967 Range {
3968 lower: Option<i64>,
3969 upper: Option<i64>,
3970 },
3971}
3972
3973fn operational_read_limit(limit: Option<usize>) -> Result<usize, EngineError> {
3974 let applied_limit = limit.unwrap_or(DEFAULT_OPERATIONAL_READ_LIMIT);
3975 if applied_limit == 0 {
3976 return Err(EngineError::InvalidWrite(
3977 "operational read limit must be greater than zero".to_owned(),
3978 ));
3979 }
3980 Ok(applied_limit.min(MAX_OPERATIONAL_READ_LIMIT))
3981}
3982
3983fn parse_operational_filter_fields(
3984 filter_fields_json: &str,
3985) -> Result<Vec<OperationalFilterField>, String> {
3986 let fields: Vec<OperationalFilterField> = serde_json::from_str(filter_fields_json)
3987 .map_err(|error| format!("invalid filter_fields_json: {error}"))?;
3988 let mut seen = std::collections::HashSet::new();
3989 for field in &fields {
3990 if field.name.trim().is_empty() {
3991 return Err("filter_fields_json field names must not be empty".to_owned());
3992 }
3993 if !seen.insert(field.name.as_str()) {
3994 return Err(format!(
3995 "filter_fields_json contains duplicate field '{}'",
3996 field.name
3997 ));
3998 }
3999 if field.modes.is_empty() {
4000 return Err(format!(
4001 "filter_fields_json field '{}' must declare at least one mode",
4002 field.name
4003 ));
4004 }
4005 if field.modes.contains(&OperationalFilterMode::Prefix)
4006 && field.field_type != OperationalFilterFieldType::String
4007 {
4008 return Err(format!(
4009 "filter field '{}' only supports prefix for string types",
4010 field.name
4011 ));
4012 }
4013 }
4014 Ok(fields)
4015}
4016
4017fn compile_operational_read_filters(
4018 filters: &[OperationalFilterClause],
4019 declared_fields: &[OperationalFilterField],
4020) -> Result<Vec<CompiledOperationalReadFilter>, EngineError> {
4021 let field_map = declared_fields
4022 .iter()
4023 .map(|field| (field.name.as_str(), field))
4024 .collect::<std::collections::HashMap<_, _>>();
4025 filters
4026 .iter()
4027 .map(|filter| match filter {
4028 OperationalFilterClause::Exact { field, value } => {
4029 let declared = field_map.get(field.as_str()).ok_or_else(|| {
4030 EngineError::InvalidWrite(format!(
4031 "operational read filter uses undeclared field '{field}'"
4032 ))
4033 })?;
4034 if !declared.modes.contains(&OperationalFilterMode::Exact) {
4035 return Err(EngineError::InvalidWrite(format!(
4036 "operational read field '{field}' does not allow exact filters"
4037 )));
4038 }
4039 let condition = match (declared.field_type, value) {
4040 (OperationalFilterFieldType::String, OperationalFilterValue::String(value)) => {
4041 OperationalReadCondition::ExactString(value.clone())
4042 }
4043 (
4044 OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp,
4045 OperationalFilterValue::Integer(value),
4046 ) => OperationalReadCondition::ExactInteger(*value),
4047 _ => {
4048 return Err(EngineError::InvalidWrite(format!(
4049 "operational read field '{field}' received a value with the wrong type"
4050 )));
4051 }
4052 };
4053 Ok(CompiledOperationalReadFilter {
4054 field: field.clone(),
4055 condition,
4056 })
4057 }
4058 OperationalFilterClause::Prefix { field, value } => {
4059 let declared = field_map.get(field.as_str()).ok_or_else(|| {
4060 EngineError::InvalidWrite(format!(
4061 "operational read filter uses undeclared field '{field}'"
4062 ))
4063 })?;
4064 if !declared.modes.contains(&OperationalFilterMode::Prefix) {
4065 return Err(EngineError::InvalidWrite(format!(
4066 "operational read field '{field}' does not allow prefix filters"
4067 )));
4068 }
4069 if declared.field_type != OperationalFilterFieldType::String {
4070 return Err(EngineError::InvalidWrite(format!(
4071 "operational read field '{field}' only supports prefix filters for strings"
4072 )));
4073 }
4074 Ok(CompiledOperationalReadFilter {
4075 field: field.clone(),
4076 condition: OperationalReadCondition::Prefix(value.clone()),
4077 })
4078 }
4079 OperationalFilterClause::Range {
4080 field,
4081 lower,
4082 upper,
4083 } => {
4084 let declared = field_map.get(field.as_str()).ok_or_else(|| {
4085 EngineError::InvalidWrite(format!(
4086 "operational read filter uses undeclared field '{field}'"
4087 ))
4088 })?;
4089 if !declared.modes.contains(&OperationalFilterMode::Range) {
4090 return Err(EngineError::InvalidWrite(format!(
4091 "operational read field '{field}' does not allow range filters"
4092 )));
4093 }
4094 if !matches!(
4095 declared.field_type,
4096 OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp
4097 ) {
4098 return Err(EngineError::InvalidWrite(format!(
4099 "operational read field '{field}' only supports range filters for integer/timestamp fields"
4100 )));
4101 }
4102 if lower.is_none() && upper.is_none() {
4103 return Err(EngineError::InvalidWrite(format!(
4104 "operational read range filter for '{field}' must specify a lower or upper bound"
4105 )));
4106 }
4107 Ok(CompiledOperationalReadFilter {
4108 field: field.clone(),
4109 condition: OperationalReadCondition::Range {
4110 lower: *lower,
4111 upper: *upper,
4112 },
4113 })
4114 }
4115 })
4116 .collect()
4117}
4118
4119fn match_append_only_secondary_index_read<'a>(
4120 filters: &'a [CompiledOperationalReadFilter],
4121 indexes: &'a [OperationalSecondaryIndexDefinition],
4122) -> Option<MatchedAppendOnlySecondaryIndexRead<'a>> {
4123 indexes.iter().find_map(|index| {
4124 let OperationalSecondaryIndexDefinition::AppendOnlyFieldTime {
4125 name,
4126 field,
4127 value_type,
4128 time_field,
4129 } = index
4130 else {
4131 return None;
4132 };
4133 if !(1..=2).contains(&filters.len()) {
4134 return None;
4135 }
4136
4137 let mut value_filter = None;
4138 let mut time_range = None;
4139 for filter in filters {
4140 if filter.field == *field {
4141 let supported = matches!(
4142 (&filter.condition, value_type),
4143 (
4144 OperationalReadCondition::ExactString(_)
4145 | OperationalReadCondition::Prefix(_),
4146 crate::operational::OperationalSecondaryIndexValueType::String
4147 ) | (
4148 OperationalReadCondition::ExactInteger(_),
4149 crate::operational::OperationalSecondaryIndexValueType::Integer
4150 | crate::operational::OperationalSecondaryIndexValueType::Timestamp
4151 )
4152 );
4153 if !supported || value_filter.is_some() {
4154 return None;
4155 }
4156 value_filter = Some(filter);
4157 continue;
4158 }
4159 if filter.field == *time_field {
4160 if !matches!(filter.condition, OperationalReadCondition::Range { .. })
4161 || time_range.is_some()
4162 {
4163 return None;
4164 }
4165 time_range = Some(filter);
4166 continue;
4167 }
4168 return None;
4169 }
4170
4171 value_filter.map(|value_filter| MatchedAppendOnlySecondaryIndexRead {
4172 index_name: name.as_str(),
4173 value_filter,
4174 time_range,
4175 })
4176 })
4177}
4178
4179fn execute_operational_secondary_index_read(
4180 conn: &rusqlite::Connection,
4181 collection_name: &str,
4182 filters: &[CompiledOperationalReadFilter],
4183 indexes: &[OperationalSecondaryIndexDefinition],
4184 applied_limit: usize,
4185) -> Result<Option<OperationalReadReport>, EngineError> {
4186 use rusqlite::types::Value;
4187
4188 let Some(matched) = match_append_only_secondary_index_read(filters, indexes) else {
4189 return Ok(None);
4190 };
4191
4192 let mut sql = String::from(
4193 "SELECT m.id, m.collection_name, m.record_key, m.op_kind, m.payload_json, m.source_ref, m.created_at \
4194 FROM operational_secondary_index_entries s \
4195 JOIN operational_mutations m ON m.id = s.mutation_id \
4196 WHERE s.collection_name = ?1 AND s.index_name = ?2 AND s.subject_kind = 'mutation' ",
4197 );
4198 let mut params = vec![
4199 Value::from(collection_name.to_owned()),
4200 Value::from(matched.index_name.to_owned()),
4201 ];
4202
4203 match &matched.value_filter.condition {
4204 OperationalReadCondition::ExactString(value) => {
4205 let _ = write!(sql, "AND s.slot1_text = ?{} ", params.len() + 1);
4206 params.push(Value::from(value.clone()));
4207 }
4208 OperationalReadCondition::Prefix(value) => {
4209 let _ = write!(sql, "AND s.slot1_text GLOB ?{} ", params.len() + 1);
4210 params.push(Value::from(glob_prefix_pattern(value)));
4211 }
4212 OperationalReadCondition::ExactInteger(value) => {
4213 let _ = write!(sql, "AND s.slot1_integer = ?{} ", params.len() + 1);
4214 params.push(Value::from(*value));
4215 }
4216 OperationalReadCondition::Range { .. } => return Ok(None),
4217 }
4218
4219 if let Some(time_range) = matched.time_range
4220 && let OperationalReadCondition::Range { lower, upper } = &time_range.condition
4221 {
4222 if let Some(lower) = lower {
4223 let _ = write!(sql, "AND s.sort_timestamp >= ?{} ", params.len() + 1);
4224 params.push(Value::from(*lower));
4225 }
4226 if let Some(upper) = upper {
4227 let _ = write!(sql, "AND s.sort_timestamp <= ?{} ", params.len() + 1);
4228 params.push(Value::from(*upper));
4229 }
4230 }
4231
4232 let _ = write!(
4233 sql,
4234 "ORDER BY s.sort_timestamp DESC, m.mutation_order DESC LIMIT ?{}",
4235 params.len() + 1
4236 );
4237 params.push(Value::from(i64::try_from(applied_limit + 1).map_err(
4238 |_| EngineError::Bridge("operational read limit overflow".to_owned()),
4239 )?));
4240
4241 let mut stmt = conn.prepare(&sql)?;
4242 let mut rows = stmt
4243 .query_map(
4244 rusqlite::params_from_iter(params),
4245 map_operational_mutation_row,
4246 )?
4247 .collect::<Result<Vec<_>, _>>()?;
4248 let was_limited = rows.len() > applied_limit;
4249 if was_limited {
4250 rows.truncate(applied_limit);
4251 }
4252
4253 Ok(Some(OperationalReadReport {
4254 collection_name: collection_name.to_owned(),
4255 row_count: rows.len(),
4256 applied_limit,
4257 was_limited,
4258 rows,
4259 }))
4260}
4261
4262fn execute_operational_filtered_read(
4263 conn: &rusqlite::Connection,
4264 collection_name: &str,
4265 filters: &[CompiledOperationalReadFilter],
4266 applied_limit: usize,
4267) -> Result<OperationalReadReport, EngineError> {
4268 use rusqlite::types::Value;
4269
4270 let mut sql = String::from(
4271 "SELECT m.id, m.collection_name, m.record_key, m.op_kind, m.payload_json, m.source_ref, m.created_at \
4272 FROM operational_mutations m ",
4273 );
4274 let mut params = vec![Value::from(collection_name.to_owned())];
4275 for (index, filter) in filters.iter().enumerate() {
4276 let _ = write!(
4277 sql,
4278 "JOIN operational_filter_values f{index} \
4279 ON f{index}.mutation_id = m.id \
4280 AND f{index}.collection_name = m.collection_name "
4281 );
4282 match &filter.condition {
4283 OperationalReadCondition::ExactString(value) => {
4284 let _ = write!(
4285 sql,
4286 "AND f{index}.field_name = ?{} AND f{index}.string_value = ?{} ",
4287 params.len() + 1,
4288 params.len() + 2
4289 );
4290 params.push(Value::from(filter.field.clone()));
4291 params.push(Value::from(value.clone()));
4292 }
4293 OperationalReadCondition::ExactInteger(value) => {
4294 let _ = write!(
4295 sql,
4296 "AND f{index}.field_name = ?{} AND f{index}.integer_value = ?{} ",
4297 params.len() + 1,
4298 params.len() + 2
4299 );
4300 params.push(Value::from(filter.field.clone()));
4301 params.push(Value::from(*value));
4302 }
4303 OperationalReadCondition::Prefix(value) => {
4304 let _ = write!(
4305 sql,
4306 "AND f{index}.field_name = ?{} AND f{index}.string_value GLOB ?{} ",
4307 params.len() + 1,
4308 params.len() + 2
4309 );
4310 params.push(Value::from(filter.field.clone()));
4311 params.push(Value::from(glob_prefix_pattern(value)));
4312 }
4313 OperationalReadCondition::Range { lower, upper } => {
4314 let _ = write!(sql, "AND f{index}.field_name = ?{} ", params.len() + 1);
4315 params.push(Value::from(filter.field.clone()));
4316 if let Some(lower) = lower {
4317 let _ = write!(sql, "AND f{index}.integer_value >= ?{} ", params.len() + 1);
4318 params.push(Value::from(*lower));
4319 }
4320 if let Some(upper) = upper {
4321 let _ = write!(sql, "AND f{index}.integer_value <= ?{} ", params.len() + 1);
4322 params.push(Value::from(*upper));
4323 }
4324 }
4325 }
4326 }
4327 let _ = write!(
4328 sql,
4329 "WHERE m.collection_name = ?1 ORDER BY m.mutation_order DESC LIMIT ?{}",
4330 params.len() + 1
4331 );
4332 params.push(Value::from(i64::try_from(applied_limit + 1).map_err(
4333 |_| EngineError::Bridge("operational read limit overflow".to_owned()),
4334 )?));
4335
4336 let mut stmt = conn.prepare(&sql)?;
4337 let mut rows = stmt
4338 .query_map(
4339 rusqlite::params_from_iter(params),
4340 map_operational_mutation_row,
4341 )?
4342 .collect::<Result<Vec<_>, _>>()?;
4343 let was_limited = rows.len() > applied_limit;
4344 if was_limited {
4345 rows.truncate(applied_limit);
4346 }
4347 Ok(OperationalReadReport {
4348 collection_name: collection_name.to_owned(),
4349 row_count: rows.len(),
4350 applied_limit,
4351 was_limited,
4352 rows,
4353 })
4354}
4355
4356fn glob_prefix_pattern(value: &str) -> String {
4357 let mut pattern = String::with_capacity(value.len() + 1);
4358 for ch in value.chars() {
4359 match ch {
4360 '*' => pattern.push_str("[*]"),
4361 '?' => pattern.push_str("[?]"),
4362 '[' => pattern.push_str("[[]"),
4363 _ => pattern.push(ch),
4364 }
4365 }
4366 pattern.push('*');
4367 pattern
4368}
4369
4370#[derive(Clone, Debug, PartialEq, Eq)]
4371struct ExtractedOperationalFilterValue {
4372 field_name: String,
4373 string_value: Option<String>,
4374 integer_value: Option<i64>,
4375}
4376
4377fn extract_operational_filter_values(
4378 filter_fields: &[OperationalFilterField],
4379 payload_json: &str,
4380) -> Vec<ExtractedOperationalFilterValue> {
4381 let Ok(parsed) = serde_json::from_str::<serde_json::Value>(payload_json) else {
4382 return Vec::new();
4383 };
4384 let Some(object) = parsed.as_object() else {
4385 return Vec::new();
4386 };
4387
4388 filter_fields
4389 .iter()
4390 .filter_map(|field| {
4391 let value = object.get(&field.name)?;
4392 match field.field_type {
4393 OperationalFilterFieldType::String => {
4394 value
4395 .as_str()
4396 .map(|string_value| ExtractedOperationalFilterValue {
4397 field_name: field.name.clone(),
4398 string_value: Some(string_value.to_owned()),
4399 integer_value: None,
4400 })
4401 }
4402 OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp => {
4403 value
4404 .as_i64()
4405 .map(|integer_value| ExtractedOperationalFilterValue {
4406 field_name: field.name.clone(),
4407 string_value: None,
4408 integer_value: Some(integer_value),
4409 })
4410 }
4411 }
4412 })
4413 .collect()
4414}
4415
4416fn operational_compaction_candidates(
4417 conn: &rusqlite::Connection,
4418 retention_json: &str,
4419 collection_name: &str,
4420) -> Result<(Vec<String>, Option<i64>), EngineError> {
4421 operational_compaction_candidates_at(
4422 conn,
4423 retention_json,
4424 collection_name,
4425 current_unix_timestamp()?,
4426 )
4427}
4428
4429fn operational_compaction_candidates_at(
4430 conn: &rusqlite::Connection,
4431 retention_json: &str,
4432 collection_name: &str,
4433 now_timestamp: i64,
4434) -> Result<(Vec<String>, Option<i64>), EngineError> {
4435 let policy = parse_operational_retention_policy(retention_json)?;
4436 match policy {
4437 OperationalRetentionPolicy::KeepAll => Ok((Vec::new(), None)),
4438 OperationalRetentionPolicy::PurgeBeforeSeconds { max_age_seconds } => {
4439 let before_timestamp = now_timestamp - max_age_seconds;
4440 let mut stmt = conn.prepare(
4441 "SELECT id FROM operational_mutations \
4442 WHERE collection_name = ?1 AND created_at < ?2 \
4443 ORDER BY mutation_order",
4444 )?;
4445 let mutation_ids = stmt
4446 .query_map(
4447 rusqlite::params![collection_name, before_timestamp],
4448 |row| row.get::<_, String>(0),
4449 )?
4450 .collect::<Result<Vec<_>, _>>()?;
4451 Ok((mutation_ids, Some(before_timestamp)))
4452 }
4453 OperationalRetentionPolicy::KeepLast { max_rows } => {
4454 let mut stmt = conn.prepare(
4455 "SELECT id FROM operational_mutations \
4456 WHERE collection_name = ?1 \
4457 ORDER BY mutation_order DESC",
4458 )?;
4459 let ordered_ids = stmt
4460 .query_map([collection_name], |row| row.get::<_, String>(0))?
4461 .collect::<Result<Vec<_>, _>>()?;
4462 Ok((ordered_ids.into_iter().skip(max_rows).collect(), None))
4463 }
4464 }
4465}
4466
4467fn parse_operational_retention_policy(
4468 retention_json: &str,
4469) -> Result<OperationalRetentionPolicy, EngineError> {
4470 let policy: OperationalRetentionPolicy = serde_json::from_str(retention_json)
4471 .map_err(|error| EngineError::InvalidWrite(format!("invalid retention_json: {error}")))?;
4472 match policy {
4473 OperationalRetentionPolicy::KeepAll => Ok(policy),
4474 OperationalRetentionPolicy::PurgeBeforeSeconds { max_age_seconds } => {
4475 if max_age_seconds <= 0 {
4476 return Err(EngineError::InvalidWrite(
4477 "retention_json max_age_seconds must be greater than zero".to_owned(),
4478 ));
4479 }
4480 Ok(policy)
4481 }
4482 OperationalRetentionPolicy::KeepLast { max_rows } => {
4483 if max_rows == 0 {
4484 return Err(EngineError::InvalidWrite(
4485 "retention_json max_rows must be greater than zero".to_owned(),
4486 ));
4487 }
4488 Ok(policy)
4489 }
4490 }
4491}
4492
4493fn load_operational_retention_records(
4494 conn: &rusqlite::Connection,
4495 collection_names: Option<&[String]>,
4496 max_collections: Option<usize>,
4497) -> Result<Vec<OperationalCollectionRecord>, EngineError> {
4498 let limit = max_collections.unwrap_or(usize::MAX);
4499 if limit == 0 {
4500 return Err(EngineError::InvalidWrite(
4501 "max_collections must be greater than zero".to_owned(),
4502 ));
4503 }
4504
4505 let mut records = Vec::new();
4506 if let Some(collection_names) = collection_names {
4507 for name in collection_names.iter().take(limit) {
4508 let record = load_operational_collection_record(conn, name)?.ok_or_else(|| {
4509 EngineError::InvalidWrite(format!(
4510 "operational collection '{name}' is not registered"
4511 ))
4512 })?;
4513 records.push(record);
4514 }
4515 return Ok(records);
4516 }
4517
4518 let mut stmt = conn.prepare(
4519 "SELECT name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at, disabled_at \
4520 FROM operational_collections ORDER BY name",
4521 )?;
4522 let rows = stmt
4523 .query_map([], map_operational_collection_row)?
4524 .take(limit)
4525 .collect::<Result<Vec<_>, _>>()?;
4526 Ok(rows)
4527}
4528
4529fn last_operational_retention_run_at(
4530 conn: &rusqlite::Connection,
4531 collection_name: &str,
4532) -> Result<Option<i64>, EngineError> {
4533 conn.query_row(
4534 "SELECT MAX(executed_at) FROM operational_retention_runs WHERE collection_name = ?1",
4535 [collection_name],
4536 |row| row.get(0),
4537 )
4538 .optional()
4539 .map_err(EngineError::Sqlite)
4540 .map(Option::flatten)
4541}
4542
4543fn count_operational_mutations_for_collection(
4544 conn: &rusqlite::Connection,
4545 collection_name: &str,
4546) -> Result<usize, EngineError> {
4547 let count: i64 = conn.query_row(
4548 "SELECT count(*) FROM operational_mutations WHERE collection_name = ?1",
4549 [collection_name],
4550 |row| row.get(0),
4551 )?;
4552 usize::try_from(count).map_err(|_| {
4553 EngineError::Bridge(format!("count overflow for collection {collection_name}"))
4554 })
4555}
4556
4557fn retention_action_kind_and_limit(
4558 policy: &OperationalRetentionPolicy,
4559) -> (OperationalRetentionActionKind, Option<usize>) {
4560 match policy {
4561 OperationalRetentionPolicy::KeepAll => (OperationalRetentionActionKind::Noop, None),
4562 OperationalRetentionPolicy::PurgeBeforeSeconds { .. } => {
4563 (OperationalRetentionActionKind::PurgeBeforeSeconds, None)
4564 }
4565 OperationalRetentionPolicy::KeepLast { max_rows } => {
4566 (OperationalRetentionActionKind::KeepLast, Some(*max_rows))
4567 }
4568 }
4569}
4570
4571fn plan_operational_retention_item(
4572 conn: &rusqlite::Connection,
4573 record: &OperationalCollectionRecord,
4574 now_timestamp: i64,
4575) -> Result<OperationalRetentionPlanItem, EngineError> {
4576 let last_run_at = last_operational_retention_run_at(conn, &record.name)?;
4577 if record.kind != OperationalCollectionKind::AppendOnlyLog {
4578 return Ok(OperationalRetentionPlanItem {
4579 collection_name: record.name.clone(),
4580 action_kind: OperationalRetentionActionKind::Noop,
4581 candidate_deletions: 0,
4582 before_timestamp: None,
4583 max_rows: None,
4584 last_run_at,
4585 });
4586 }
4587 let policy = parse_operational_retention_policy(&record.retention_json)?;
4588 let (action_kind, max_rows) = retention_action_kind_and_limit(&policy);
4589 let (candidate_ids, before_timestamp) = operational_compaction_candidates_at(
4590 conn,
4591 &record.retention_json,
4592 &record.name,
4593 now_timestamp,
4594 )?;
4595 Ok(OperationalRetentionPlanItem {
4596 collection_name: record.name.clone(),
4597 action_kind,
4598 candidate_deletions: candidate_ids.len(),
4599 before_timestamp,
4600 max_rows,
4601 last_run_at,
4602 })
4603}
4604
4605fn run_operational_retention_item(
4606 tx: &rusqlite::Transaction<'_>,
4607 record: &OperationalCollectionRecord,
4608 now_timestamp: i64,
4609 dry_run: bool,
4610) -> Result<OperationalRetentionRunItem, EngineError> {
4611 let plan = plan_operational_retention_item(tx, record, now_timestamp)?;
4612 let mut deleted_mutations = 0usize;
4613 if record.kind == OperationalCollectionKind::AppendOnlyLog
4614 && plan.action_kind != OperationalRetentionActionKind::Noop
4615 && plan.candidate_deletions > 0
4616 && !dry_run
4617 {
4618 let (candidate_ids, _) = operational_compaction_candidates_at(
4619 tx,
4620 &record.retention_json,
4621 &record.name,
4622 now_timestamp,
4623 )?;
4624 let mut delete_stmt =
4625 tx.prepare_cached("DELETE FROM operational_mutations WHERE id = ?1")?;
4626 for mutation_id in &candidate_ids {
4627 delete_stmt.execute([mutation_id.as_str()])?;
4628 deleted_mutations += 1;
4629 }
4630 drop(delete_stmt);
4631
4632 persist_simple_provenance_event(
4633 tx,
4634 "operational_retention_run",
4635 &record.name,
4636 Some(serde_json::json!({
4637 "action_kind": plan.action_kind,
4638 "deleted_mutations": deleted_mutations,
4639 "before_timestamp": plan.before_timestamp,
4640 "max_rows": plan.max_rows,
4641 "executed_at": now_timestamp,
4642 })),
4643 )?;
4644 }
4645
4646 let live_rows_remaining = count_operational_mutations_for_collection(tx, &record.name)?;
4647 let effective_deleted_mutations = if dry_run {
4648 plan.candidate_deletions
4649 } else {
4650 deleted_mutations
4651 };
4652 let rows_remaining = if dry_run {
4653 live_rows_remaining.saturating_sub(effective_deleted_mutations)
4654 } else {
4655 live_rows_remaining
4656 };
4657 if !dry_run && plan.action_kind != OperationalRetentionActionKind::Noop {
4658 tx.execute(
4659 "INSERT INTO operational_retention_runs \
4660 (id, collection_name, executed_at, action_kind, dry_run, deleted_mutations, rows_remaining, metadata_json) \
4661 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
4662 rusqlite::params![
4663 new_id(),
4664 record.name,
4665 now_timestamp,
4666 serde_json::to_string(&plan.action_kind)
4667 .unwrap_or_else(|_| "\"noop\"".to_owned())
4668 .trim_matches('"')
4669 .to_owned(),
4670 i32::from(dry_run),
4671 deleted_mutations,
4672 rows_remaining,
4673 serde_json::json!({
4674 "before_timestamp": plan.before_timestamp,
4675 "max_rows": plan.max_rows,
4676 })
4677 .to_string(),
4678 ],
4679 )?;
4680 }
4681
4682 Ok(OperationalRetentionRunItem {
4683 collection_name: plan.collection_name,
4684 action_kind: plan.action_kind,
4685 deleted_mutations: effective_deleted_mutations,
4686 before_timestamp: plan.before_timestamp,
4687 max_rows: plan.max_rows,
4688 rows_remaining,
4689 })
4690}
4691
4692fn current_unix_timestamp() -> Result<i64, EngineError> {
4693 let now = SystemTime::now()
4694 .duration_since(SystemTime::UNIX_EPOCH)
4695 .map_err(|error| EngineError::Bridge(format!("system clock error: {error}")))?;
4696 i64::try_from(now.as_secs())
4697 .map_err(|_| EngineError::Bridge("unix timestamp overflow".to_owned()))
4698}
4699
4700fn map_operational_collection_row(
4701 row: &rusqlite::Row<'_>,
4702) -> Result<OperationalCollectionRecord, rusqlite::Error> {
4703 let kind_text: String = row.get(1)?;
4704 let kind = OperationalCollectionKind::try_from(kind_text.as_str()).map_err(|message| {
4705 rusqlite::Error::FromSqlConversionFailure(
4706 1,
4707 rusqlite::types::Type::Text,
4708 Box::new(io::Error::new(io::ErrorKind::InvalidData, message)),
4709 )
4710 })?;
4711 Ok(OperationalCollectionRecord {
4712 name: row.get(0)?,
4713 kind,
4714 schema_json: row.get(2)?,
4715 retention_json: row.get(3)?,
4716 filter_fields_json: row.get(4)?,
4717 validation_json: row.get(5)?,
4718 secondary_indexes_json: row.get(6)?,
4719 format_version: row.get(7)?,
4720 created_at: row.get(8)?,
4721 disabled_at: row.get(9)?,
4722 })
4723}
4724
4725fn map_operational_mutation_row(
4726 row: &rusqlite::Row<'_>,
4727) -> Result<OperationalMutationRow, rusqlite::Error> {
4728 Ok(OperationalMutationRow {
4729 id: row.get(0)?,
4730 collection_name: row.get(1)?,
4731 record_key: row.get(2)?,
4732 op_kind: row.get(3)?,
4733 payload_json: row.get(4)?,
4734 source_ref: row.get(5)?,
4735 created_at: row.get(6)?,
4736 })
4737}
4738
4739fn map_operational_current_row(
4740 row: &rusqlite::Row<'_>,
4741) -> Result<OperationalCurrentRow, rusqlite::Error> {
4742 Ok(OperationalCurrentRow {
4743 collection_name: row.get(0)?,
4744 record_key: row.get(1)?,
4745 payload_json: row.get(2)?,
4746 updated_at: row.get(3)?,
4747 last_mutation_id: row.get(4)?,
4748 })
4749}
4750
4751#[cfg(test)]
4752#[allow(clippy::expect_used)]
4753mod tests {
4754 use std::fs;
4755 use std::sync::Arc;
4756
4757 use fathomdb_schema::SchemaManager;
4758 use tempfile::NamedTempFile;
4759
4760 use super::{
4761 AdminService, FtsPropertyPathMode, FtsPropertyPathSpec, SafeExportOptions,
4762 VectorRegenerationConfig,
4763 };
4764 use crate::embedder::{EmbedderError, QueryEmbedder, QueryEmbedderIdentity};
4765 use crate::projection::ProjectionTarget;
4766 use crate::sqlite;
4767 use crate::{
4768 EngineError, ExecutionCoordinator, OperationalCollectionKind, OperationalRegisterRequest,
4769 TelemetryCounters,
4770 };
4771
4772 use fathomdb_query::QueryBuilder;
4773
4774 #[cfg(feature = "sqlite-vec")]
4775 use super::load_vector_regeneration_config;
4776
4777 #[derive(Debug)]
4781 #[allow(dead_code)]
4782 struct TestEmbedder {
4783 identity: QueryEmbedderIdentity,
4784 vector: Vec<f32>,
4785 }
4786
4787 #[allow(dead_code)]
4788 impl TestEmbedder {
4789 fn new(model: &str, dimension: usize) -> Self {
4790 Self {
4791 identity: QueryEmbedderIdentity {
4792 model_identity: model.to_owned(),
4793 model_version: "1.0.0".to_owned(),
4794 dimension,
4795 normalization_policy: "l2".to_owned(),
4796 },
4797 vector: vec![1.0; dimension],
4798 }
4799 }
4800 }
4801
4802 impl QueryEmbedder for TestEmbedder {
4803 fn embed_query(&self, _text: &str) -> Result<Vec<f32>, EmbedderError> {
4804 Ok(self.vector.clone())
4805 }
4806 fn identity(&self) -> QueryEmbedderIdentity {
4807 self.identity.clone()
4808 }
4809 }
4810
4811 #[derive(Debug)]
4814 #[allow(dead_code)]
4815 struct FailingEmbedder {
4816 identity: QueryEmbedderIdentity,
4817 }
4818
4819 impl QueryEmbedder for FailingEmbedder {
4820 fn embed_query(&self, _text: &str) -> Result<Vec<f32>, EmbedderError> {
4821 Err(EmbedderError::Failed("test failure".to_owned()))
4822 }
4823 fn identity(&self) -> QueryEmbedderIdentity {
4824 self.identity.clone()
4825 }
4826 }
4827
4828 #[allow(dead_code)]
4829 #[cfg(unix)]
4830 fn set_file_mode(path: &std::path::Path, mode: u32) {
4831 use std::os::unix::fs::PermissionsExt;
4832
4833 let mut permissions = fs::metadata(path).expect("script metadata").permissions();
4834 permissions.set_mode(mode);
4835 fs::set_permissions(path, permissions).expect("chmod");
4836 }
4837
4838 #[allow(dead_code)]
4839 #[cfg(not(unix))]
4840 fn set_file_mode(_path: &std::path::Path, _mode: u32) {}
4841
4842 fn setup() -> (NamedTempFile, AdminService) {
4843 let db = NamedTempFile::new().expect("temp file");
4844 let schema = Arc::new(SchemaManager::new());
4845 {
4846 let conn = sqlite::open_connection(db.path()).expect("connection");
4847 schema.bootstrap(&conn).expect("bootstrap");
4848 }
4849 let service = AdminService::new(db.path(), Arc::clone(&schema));
4850 (db, service)
4851 }
4852
4853 #[test]
4854 fn check_integrity_includes_active_uniqueness_count() {
4855 let (_db, service) = setup();
4856 let report = service.check_integrity().expect("integrity check");
4857 assert_eq!(report.duplicate_active_logical_ids, 0);
4858 assert_eq!(report.operational_missing_collections, 0);
4859 assert_eq!(report.operational_missing_last_mutations, 0);
4860 }
4861
4862 #[test]
4863 fn trace_source_returns_node_logical_ids() {
4864 let (db, service) = setup();
4865 {
4866 let conn = sqlite::open_connection(db.path()).expect("conn");
4867 conn.execute(
4868 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4869 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 'source-1')",
4870 [],
4871 )
4872 .expect("insert node");
4873 }
4874 let report = service.trace_source("source-1").expect("trace");
4875 assert_eq!(report.node_rows, 1);
4876 assert_eq!(report.node_logical_ids, vec!["lg1"]);
4877 }
4878
4879 #[test]
4880 fn trace_source_includes_operational_mutations() {
4881 let (db, service) = setup();
4882 {
4883 let conn = sqlite::open_connection(db.path()).expect("conn");
4884 conn.execute(
4885 "INSERT INTO operational_collections \
4886 (name, kind, schema_json, retention_json, format_version, created_at) \
4887 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
4888 [],
4889 )
4890 .expect("insert collection");
4891 conn.execute(
4892 "INSERT INTO operational_mutations \
4893 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
4894 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"ok\"}', 'source-1', 100, 1)",
4895 [],
4896 )
4897 .expect("insert mutation");
4898 }
4899
4900 let report = service.trace_source("source-1").expect("trace");
4901 assert_eq!(report.operational_mutation_rows, 1);
4902 assert_eq!(report.operational_mutation_ids, vec!["m1"]);
4903 }
4904
4905 #[test]
4906 fn excise_source_restores_prior_active_node() {
4907 let (db, service) = setup();
4908 {
4909 let conn = sqlite::open_connection(db.path()).expect("conn");
4910 conn.execute(
4911 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
4912 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
4913 [],
4914 )
4915 .expect("insert v1 superseded");
4916 conn.execute(
4917 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4918 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
4919 [],
4920 )
4921 .expect("insert v2 active");
4922 }
4923 service.excise_source("source-2").expect("excise");
4924 {
4925 let conn = sqlite::open_connection(db.path()).expect("conn");
4926 let active_row_id: String = conn
4927 .query_row(
4928 "SELECT row_id FROM nodes WHERE logical_id = 'lg1' AND superseded_at IS NULL",
4929 [],
4930 |row| row.get(0),
4931 )
4932 .expect("active row exists after excise");
4933 assert_eq!(active_row_id, "r1");
4934 }
4935 }
4936
4937 #[test]
4938 fn excise_source_deletes_operational_mutations_and_repairs_latest_state_current() {
4939 let (db, service) = setup();
4940 {
4941 let conn = sqlite::open_connection(db.path()).expect("conn");
4942 conn.execute(
4943 "INSERT INTO operational_collections \
4944 (name, kind, schema_json, retention_json, format_version, created_at) \
4945 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
4946 [],
4947 )
4948 .expect("insert collection");
4949 conn.execute(
4950 "INSERT INTO operational_mutations \
4951 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
4952 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"old\"}', 'source-1', 100, 1)",
4953 [],
4954 )
4955 .expect("insert prior mutation");
4956 conn.execute(
4957 "INSERT INTO operational_mutations \
4958 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
4959 VALUES ('m2', 'connector_health', 'gmail', 'put', '{\"status\":\"new\"}', 'source-2', 200, 2)",
4960 [],
4961 )
4962 .expect("insert excised mutation");
4963 conn.execute(
4964 "INSERT INTO operational_current \
4965 (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
4966 VALUES ('connector_health', 'gmail', '{\"status\":\"new\"}', 200, 'm2')",
4967 [],
4968 )
4969 .expect("insert current row");
4970 }
4971
4972 let traced = service
4973 .trace_source("source-2")
4974 .expect("trace before excise");
4975 assert_eq!(traced.operational_mutation_rows, 1);
4976 assert_eq!(traced.operational_mutation_ids, vec!["m2"]);
4977
4978 let excised = service.excise_source("source-2").expect("excise");
4979 assert_eq!(excised.operational_mutation_rows, 0);
4980 assert!(excised.operational_mutation_ids.is_empty());
4981
4982 {
4983 let conn = sqlite::open_connection(db.path()).expect("conn");
4984 let remaining: i64 = conn
4985 .query_row(
4986 "SELECT count(*) FROM operational_mutations WHERE source_ref = 'source-2'",
4987 [],
4988 |row| row.get(0),
4989 )
4990 .expect("remaining count");
4991 assert_eq!(remaining, 0);
4992
4993 let current: (String, String) = conn
4994 .query_row(
4995 "SELECT payload_json, last_mutation_id FROM operational_current \
4996 WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
4997 [],
4998 |row| Ok((row.get(0)?, row.get(1)?)),
4999 )
5000 .expect("rebuilt current row");
5001 assert_eq!(current.0, "{\"status\":\"old\"}");
5002 assert_eq!(current.1, "m1");
5003 }
5004 }
5005
5006 #[test]
5007 fn restore_logical_id_reestablishes_last_pre_retire_content_and_attached_edges() {
5008 let (db, service) = setup();
5009 {
5010 let conn = sqlite::open_connection(db.path()).expect("conn");
5011 conn.execute(
5012 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5013 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
5014 [],
5015 )
5016 .expect("insert node");
5017 conn.execute(
5018 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5019 VALUES ('node-row-topic', 'topic-1', 'Topic', '{}', 100, 'seed')",
5020 [],
5021 )
5022 .expect("insert target node");
5023 conn.execute(
5024 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5025 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5026 [],
5027 )
5028 .expect("insert chunk");
5029 conn.execute(
5030 "INSERT INTO edges \
5031 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
5032 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 'seed')",
5033 [],
5034 )
5035 .expect("insert edge");
5036 conn.execute(
5037 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5038 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5039 [],
5040 )
5041 .expect("insert node retire event");
5042 conn.execute(
5043 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5044 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
5045 [],
5046 )
5047 .expect("insert edge retire event");
5048 conn.execute(
5049 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
5050 [],
5051 )
5052 .expect("retire node");
5053 conn.execute(
5054 "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
5055 [],
5056 )
5057 .expect("retire edge");
5058 conn.execute("DELETE FROM fts_nodes", [])
5059 .expect("clear fts");
5060 }
5061
5062 let report = service.restore_logical_id("doc-1").expect("restore");
5063 assert_eq!(report.logical_id, "doc-1");
5064 assert!(!report.was_noop);
5065 assert_eq!(report.restored_node_rows, 1);
5066 assert_eq!(report.restored_edge_rows, 1);
5067 assert_eq!(report.restored_chunk_rows, 1);
5068 assert_eq!(report.restored_fts_rows, 1);
5069
5070 let conn = sqlite::open_connection(db.path()).expect("conn");
5071 let active_node_count: i64 = conn
5072 .query_row(
5073 "SELECT count(*) FROM nodes WHERE logical_id = 'doc-1' AND superseded_at IS NULL",
5074 [],
5075 |row| row.get(0),
5076 )
5077 .expect("active node count");
5078 assert_eq!(active_node_count, 1);
5079 let active_edge_count: i64 = conn
5080 .query_row(
5081 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
5082 [],
5083 |row| row.get(0),
5084 )
5085 .expect("active edge count");
5086 assert_eq!(active_edge_count, 1);
5087 let fts_count: i64 = conn
5088 .query_row(
5089 "SELECT count(*) FROM fts_nodes WHERE chunk_id = 'chunk-1'",
5090 [],
5091 |row| row.get(0),
5092 )
5093 .expect("fts count");
5094 assert_eq!(fts_count, 1);
5095 }
5096
5097 #[test]
5098 fn restore_logical_id_restores_edges_retired_after_the_node_retire_event() {
5099 let (db, service) = setup();
5100 {
5101 let conn = sqlite::open_connection(db.path()).expect("conn");
5102 conn.execute(
5103 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5104 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
5105 [],
5106 )
5107 .expect("insert node");
5108 conn.execute(
5109 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5110 VALUES ('node-row-topic', 'topic-1', 'Topic', '{}', 100, 'seed')",
5111 [],
5112 )
5113 .expect("insert target node");
5114 conn.execute(
5115 "INSERT INTO edges \
5116 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
5117 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 'seed')",
5118 [],
5119 )
5120 .expect("insert edge");
5121 conn.execute(
5122 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5123 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5124 [],
5125 )
5126 .expect("insert node retire event");
5127 conn.execute(
5128 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5129 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 201, '')",
5130 [],
5131 )
5132 .expect("insert edge retire event");
5133 conn.execute(
5134 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
5135 [],
5136 )
5137 .expect("retire node");
5138 conn.execute(
5139 "UPDATE edges SET superseded_at = 201 WHERE logical_id = 'edge-1'",
5140 [],
5141 )
5142 .expect("retire edge");
5143 }
5144
5145 let report = service.restore_logical_id("doc-1").expect("restore");
5146 assert_eq!(report.restored_edge_rows, 1);
5147
5148 let conn = sqlite::open_connection(db.path()).expect("conn");
5149 let active_edge_count: i64 = conn
5150 .query_row(
5151 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
5152 [],
5153 |row| row.get(0),
5154 )
5155 .expect("active edge count");
5156 assert_eq!(active_edge_count, 1);
5157 }
5158
5159 #[test]
5160 fn restore_logical_id_prefers_latest_retired_revision_when_timestamps_tie() {
5161 let (db, service) = setup();
5162 {
5163 let conn = sqlite::open_connection(db.path()).expect("conn");
5164 conn.execute(
5165 "INSERT INTO nodes \
5166 (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5167 VALUES ('node-row-older', 'doc-1', 'Document', '{\"title\":\"older\"}', 100, 200, 'forget-1')",
5168 [],
5169 )
5170 .expect("insert older retired node");
5171 conn.execute(
5172 "INSERT INTO nodes \
5173 (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5174 VALUES ('node-row-newer', 'doc-1', 'Document', '{\"title\":\"newer\"}', 100, 200, 'forget-1')",
5175 [],
5176 )
5177 .expect("insert newer retired node");
5178 conn.execute(
5179 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5180 VALUES ('evt-retire-older', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5181 [],
5182 )
5183 .expect("insert older retire event");
5184 conn.execute(
5185 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5186 VALUES ('evt-retire-newer', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5187 [],
5188 )
5189 .expect("insert newer retire event");
5190 }
5191
5192 let report = service.restore_logical_id("doc-1").expect("restore");
5193
5194 assert!(!report.was_noop);
5195 let conn = sqlite::open_connection(db.path()).expect("conn");
5196 let active_row: (String, String) = conn
5197 .query_row(
5198 "SELECT row_id, properties FROM nodes \
5199 WHERE logical_id = 'doc-1' AND superseded_at IS NULL",
5200 [],
5201 |row| Ok((row.get(0)?, row.get(1)?)),
5202 )
5203 .expect("restored active row");
5204 assert_eq!(active_row.0, "node-row-newer");
5205 assert_eq!(active_row.1, "{\"title\":\"newer\"}");
5206 }
5207
5208 #[test]
5209 fn purge_logical_id_removes_retired_content_and_records_tombstone() {
5210 let (db, service) = setup();
5211 {
5212 let conn = sqlite::open_connection(db.path()).expect("conn");
5213 conn.execute(
5214 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5215 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
5216 [],
5217 )
5218 .expect("insert retired node");
5219 conn.execute(
5220 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5221 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5222 [],
5223 )
5224 .expect("insert chunk");
5225 conn.execute(
5226 "INSERT INTO edges \
5227 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, superseded_at, source_ref) \
5228 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 200, 'seed')",
5229 [],
5230 )
5231 .expect("insert retired edge");
5232 conn.execute(
5233 "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
5234 VALUES ('chunk-1', 'doc-1', 'Document', 'budget narrative')",
5235 [],
5236 )
5237 .expect("insert fts");
5238 }
5239
5240 let report = service.purge_logical_id("doc-1").expect("purge");
5241 assert_eq!(report.logical_id, "doc-1");
5242 assert!(!report.was_noop);
5243 assert_eq!(report.deleted_node_rows, 1);
5244 assert_eq!(report.deleted_edge_rows, 1);
5245 assert_eq!(report.deleted_chunk_rows, 1);
5246 assert_eq!(report.deleted_fts_rows, 1);
5247
5248 let conn = sqlite::open_connection(db.path()).expect("conn");
5249 let remaining_nodes: i64 = conn
5250 .query_row(
5251 "SELECT count(*) FROM nodes WHERE logical_id = 'doc-1'",
5252 [],
5253 |row| row.get(0),
5254 )
5255 .expect("remaining nodes");
5256 assert_eq!(remaining_nodes, 0);
5257 let remaining_edges: i64 = conn
5258 .query_row(
5259 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1'",
5260 [],
5261 |row| row.get(0),
5262 )
5263 .expect("remaining edges");
5264 assert_eq!(remaining_edges, 0);
5265 let remaining_chunks: i64 = conn
5266 .query_row(
5267 "SELECT count(*) FROM chunks WHERE id = 'chunk-1'",
5268 [],
5269 |row| row.get(0),
5270 )
5271 .expect("remaining chunks");
5272 assert_eq!(remaining_chunks, 0);
5273 let purge_events: i64 = conn
5274 .query_row(
5275 "SELECT count(*) FROM provenance_events WHERE event_type = 'purge_logical_id' AND subject = 'doc-1'",
5276 [],
5277 |row| row.get(0),
5278 )
5279 .expect("purge events");
5280 assert_eq!(purge_events, 1);
5281 }
5282
5283 #[test]
5284 fn check_semantics_accepts_preserved_retired_chunks() {
5285 let (db, service) = setup();
5286 {
5287 let conn = sqlite::open_connection(db.path()).expect("conn");
5288 conn.execute(
5289 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5290 VALUES ('node-row-1', 'doc-1', 'Document', '{}', 100, 200, 'seed')",
5291 [],
5292 )
5293 .expect("insert retired node");
5294 conn.execute(
5295 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5296 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5297 [],
5298 )
5299 .expect("insert chunk");
5300 }
5301
5302 let report = service.check_semantics().expect("semantics");
5303 assert_eq!(report.orphaned_chunks, 0);
5304 }
5305
5306 #[test]
5307 fn check_semantics_detects_missing_retired_node_history_for_preserved_chunks() {
5308 let (db, service) = setup();
5309 {
5310 let conn = sqlite::open_connection(db.path()).expect("conn");
5311 conn.execute(
5312 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5313 VALUES ('chunk-1', 'ghost-doc', 'budget narrative', 100)",
5314 [],
5315 )
5316 .expect("insert orphaned chunk");
5317 }
5318
5319 let report = service.check_semantics().expect("semantics");
5320 assert_eq!(report.orphaned_chunks, 1);
5321 }
5322
5323 #[cfg(feature = "sqlite-vec")]
5324 #[test]
5325 fn check_semantics_detects_missing_retired_node_history_for_preserved_vec_rows() {
5326 let (db, service) = setup();
5327 {
5328 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5329 service
5330 .schema_manager
5331 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5332 .expect("ensure vec profile");
5333 conn.execute(
5334 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5335 VALUES ('chunk-1', 'ghost-doc', 'budget narrative', 100)",
5336 [],
5337 )
5338 .expect("insert orphaned chunk");
5339 conn.execute(
5340 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
5341 [],
5342 )
5343 .expect("insert vec row");
5344 }
5345
5346 let report = service.check_semantics().expect("semantics");
5347 assert_eq!(report.orphaned_chunks, 1);
5348 assert_eq!(report.vec_rows_for_superseded_nodes, 1);
5349 }
5350
5351 #[cfg(feature = "sqlite-vec")]
5352 #[test]
5353 fn restore_logical_id_reestablishes_vector_search_without_reingest() {
5354 let (db, service) = setup();
5355 {
5356 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5357 service
5358 .schema_manager
5359 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5360 .expect("ensure vec profile");
5361 conn.execute(
5362 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5363 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
5364 [],
5365 )
5366 .expect("insert retired node");
5367 conn.execute(
5368 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5369 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5370 [],
5371 )
5372 .expect("insert chunk");
5373 conn.execute(
5374 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
5375 [],
5376 )
5377 .expect("insert vec row");
5378 conn.execute(
5379 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5380 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5381 [],
5382 )
5383 .expect("insert retire event");
5384 }
5385
5386 let report = service.restore_logical_id("doc-1").expect("restore");
5387 assert_eq!(report.restored_vec_rows, 1);
5388
5389 let coordinator = ExecutionCoordinator::open(
5390 db.path(),
5391 Arc::new(SchemaManager::new()),
5392 Some(4),
5393 1,
5394 Arc::new(TelemetryCounters::default()),
5395 None,
5396 )
5397 .expect("coordinator");
5398 let compiled = QueryBuilder::nodes("Document")
5399 .vector_search("[0.0, 0.0, 0.0, 0.0]", 5)
5400 .compile()
5401 .expect("compile");
5402 let rows = coordinator
5403 .execute_compiled_read(&compiled)
5404 .expect("vector read");
5405 assert!(
5406 rows.nodes.iter().any(|row| row.logical_id == "doc-1"),
5407 "restore should make the preserved vec row visible again without re-ingest"
5408 );
5409 }
5410
5411 #[cfg(feature = "sqlite-vec")]
5412 #[test]
5413 fn purge_logical_id_deletes_vec_rows_for_retired_content() {
5414 let (db, service) = setup();
5415 {
5416 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5417 service
5418 .schema_manager
5419 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5420 .expect("ensure vec profile");
5421 conn.execute(
5422 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5423 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
5424 [],
5425 )
5426 .expect("insert retired node");
5427 conn.execute(
5428 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5429 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5430 [],
5431 )
5432 .expect("insert chunk");
5433 conn.execute(
5434 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
5435 [],
5436 )
5437 .expect("insert vec row");
5438 }
5439
5440 let report = service.purge_logical_id("doc-1").expect("purge");
5441 assert_eq!(report.deleted_vec_rows, 1);
5442
5443 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5444 let vec_count: i64 = conn
5445 .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
5446 row.get(0)
5447 })
5448 .expect("vec count");
5449 assert_eq!(vec_count, 0);
5450 }
5451
5452 #[cfg(feature = "sqlite-vec")]
5453 #[test]
5454 fn restore_logical_id_restores_visibility_of_regenerated_vectors() {
5455 let (db, service) = setup();
5456
5457 {
5458 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5459 service
5460 .schema_manager
5461 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5462 .expect("ensure vec profile");
5463 conn.execute(
5464 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5465 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
5466 [],
5467 )
5468 .expect("insert node");
5469 conn.execute(
5470 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5471 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5472 [],
5473 )
5474 .expect("insert chunk");
5475 }
5476
5477 let embedder = TestEmbedder::new("test-model", 4);
5478 service
5479 .regenerate_vector_embeddings(
5480 &embedder,
5481 &VectorRegenerationConfig {
5482 profile: "default".to_owned(),
5483 table_name: "vec_nodes_active".to_owned(),
5484 chunking_policy: "per_chunk".to_owned(),
5485 preprocessing_policy: "trim".to_owned(),
5486 },
5487 )
5488 .expect("regenerate");
5489
5490 {
5491 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5492 conn.execute(
5493 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5494 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5495 [],
5496 )
5497 .expect("insert retire event");
5498 conn.execute(
5499 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
5500 [],
5501 )
5502 .expect("retire node");
5503 }
5504
5505 let report = service.restore_logical_id("doc-1").expect("restore");
5506 assert_eq!(report.restored_vec_rows, 1);
5507
5508 let coordinator = ExecutionCoordinator::open(
5509 db.path(),
5510 Arc::new(SchemaManager::new()),
5511 Some(4),
5512 1,
5513 Arc::new(TelemetryCounters::default()),
5514 None,
5515 )
5516 .expect("coordinator");
5517 let compiled = QueryBuilder::nodes("Document")
5518 .vector_search("[0.0, 0.0, 0.0, 0.0]", 5)
5519 .compile()
5520 .expect("compile");
5521 let rows = coordinator
5522 .execute_compiled_read(&compiled)
5523 .expect("vector read");
5524 assert!(
5525 rows.nodes.iter().any(|row| row.logical_id == "doc-1"),
5526 "restored logical_id should become visible through regenerated vectors"
5527 );
5528 }
5529
5530 #[test]
5531 fn check_semantics_clean_db_returns_zeros() {
5532 let (_db, service) = setup();
5533 let report = service.check_semantics().expect("semantics check");
5534 assert_eq!(report.orphaned_chunks, 0);
5535 assert_eq!(report.null_source_ref_nodes, 0);
5536 assert_eq!(report.broken_step_fk, 0);
5537 assert_eq!(report.broken_action_fk, 0);
5538 assert_eq!(report.stale_fts_rows, 0);
5539 assert_eq!(report.fts_rows_for_superseded_nodes, 0);
5540 assert_eq!(report.dangling_edges, 0);
5541 assert_eq!(report.orphaned_supersession_chains, 0);
5542 assert_eq!(report.stale_vec_rows, 0);
5543 assert_eq!(report.vec_rows_for_superseded_nodes, 0);
5544 assert_eq!(report.missing_operational_current_rows, 0);
5545 assert_eq!(report.stale_operational_current_rows, 0);
5546 assert_eq!(report.disabled_collection_mutations, 0);
5547 assert_eq!(report.mismatched_kind_property_fts_rows, 0);
5548 assert_eq!(report.duplicate_property_fts_rows, 0);
5549 assert_eq!(report.drifted_property_fts_rows, 0);
5550 assert!(report.warnings.is_empty());
5551 }
5552
5553 #[test]
5554 fn register_operational_collection_persists_and_emits_provenance() {
5555 let (db, service) = setup();
5556 let record = service
5557 .register_operational_collection(&OperationalRegisterRequest {
5558 name: "connector_health".to_owned(),
5559 kind: OperationalCollectionKind::LatestState,
5560 schema_json: "{}".to_owned(),
5561 retention_json: "{}".to_owned(),
5562 filter_fields_json: "[]".to_owned(),
5563 validation_json: String::new(),
5564 secondary_indexes_json: "[]".to_owned(),
5565 format_version: 1,
5566 })
5567 .expect("register collection");
5568
5569 assert_eq!(record.name, "connector_health");
5570 assert_eq!(record.kind, OperationalCollectionKind::LatestState);
5571 assert_eq!(record.schema_json, "{}");
5572 assert_eq!(record.retention_json, "{}");
5573 assert_eq!(record.filter_fields_json, "[]");
5574 assert!(record.created_at > 0);
5575 assert_eq!(record.disabled_at, None);
5576
5577 let described = service
5578 .describe_operational_collection("connector_health")
5579 .expect("describe collection")
5580 .expect("collection exists");
5581 assert_eq!(described, record);
5582
5583 let conn = sqlite::open_connection(db.path()).expect("conn");
5584 let provenance_count: i64 = conn
5585 .query_row(
5586 "SELECT count(*) FROM provenance_events \
5587 WHERE event_type = 'operational_collection_registered' AND subject = 'connector_health'",
5588 [],
5589 |row| row.get(0),
5590 )
5591 .expect("provenance count");
5592 assert_eq!(provenance_count, 1);
5593 }
5594
5595 #[test]
5596 fn register_and_update_operational_collection_validation_round_trip() {
5597 let (db, service) = setup();
5598 let record = service
5599 .register_operational_collection(&OperationalRegisterRequest {
5600 name: "connector_health".to_owned(),
5601 kind: OperationalCollectionKind::LatestState,
5602 schema_json: "{}".to_owned(),
5603 retention_json: "{}".to_owned(),
5604 filter_fields_json: "[]".to_owned(),
5605 validation_json: String::new(),
5606 secondary_indexes_json: "[]".to_owned(),
5607 format_version: 1,
5608 })
5609 .expect("register collection");
5610 assert_eq!(record.validation_json, "");
5611
5612 let validation_json = r#"{"format_version":1,"mode":"enforce","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#;
5613 let updated = service
5614 .update_operational_collection_validation("connector_health", validation_json)
5615 .expect("update validation");
5616 assert_eq!(updated.validation_json, validation_json);
5617
5618 let described = service
5619 .describe_operational_collection("connector_health")
5620 .expect("describe collection")
5621 .expect("collection exists");
5622 assert_eq!(described.validation_json, validation_json);
5623
5624 let conn = sqlite::open_connection(db.path()).expect("conn");
5625 let provenance_count: i64 = conn
5626 .query_row(
5627 "SELECT count(*) FROM provenance_events \
5628 WHERE event_type = 'operational_collection_validation_updated' \
5629 AND subject = 'connector_health'",
5630 [],
5631 |row| row.get(0),
5632 )
5633 .expect("provenance count");
5634 assert_eq!(provenance_count, 1);
5635 }
5636
5637 #[test]
5638 fn register_update_and_rebuild_operational_secondary_indexes_round_trip() {
5639 let (db, service) = setup();
5640 let record = service
5641 .register_operational_collection(&OperationalRegisterRequest {
5642 name: "audit_log".to_owned(),
5643 kind: OperationalCollectionKind::AppendOnlyLog,
5644 schema_json: "{}".to_owned(),
5645 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
5646 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
5647 validation_json: String::new(),
5648 secondary_indexes_json: "[]".to_owned(),
5649 format_version: 1,
5650 })
5651 .expect("register collection");
5652 assert_eq!(record.secondary_indexes_json, "[]");
5653
5654 {
5655 let writer = crate::WriterActor::start(
5656 db.path(),
5657 Arc::new(SchemaManager::new()),
5658 crate::ProvenanceMode::Warn,
5659 Arc::new(crate::TelemetryCounters::default()),
5660 )
5661 .expect("writer");
5662 writer
5663 .submit(crate::WriteRequest {
5664 label: "secondary-index-seed".to_owned(),
5665 nodes: vec![],
5666 node_retires: vec![],
5667 edges: vec![],
5668 edge_retires: vec![],
5669 chunks: vec![],
5670 runs: vec![],
5671 steps: vec![],
5672 actions: vec![],
5673 optional_backfills: vec![],
5674 vec_inserts: vec![],
5675 operational_writes: vec![
5676 crate::OperationalWrite::Append {
5677 collection: "audit_log".to_owned(),
5678 record_key: "evt-1".to_owned(),
5679 payload_json: r#"{"actor":"alice","ts":100}"#.to_owned(),
5680 source_ref: Some("src-1".to_owned()),
5681 },
5682 crate::OperationalWrite::Append {
5683 collection: "audit_log".to_owned(),
5684 record_key: "evt-2".to_owned(),
5685 payload_json: r#"{"actor":"bob","ts":200}"#.to_owned(),
5686 source_ref: Some("src-2".to_owned()),
5687 },
5688 ],
5689 })
5690 .expect("seed writes");
5691 }
5692
5693 let secondary_indexes_json = r#"[{"name":"actor_ts","kind":"append_only_field_time","field":"actor","value_type":"string","time_field":"ts"}]"#;
5694 let updated = service
5695 .update_operational_collection_secondary_indexes("audit_log", secondary_indexes_json)
5696 .expect("update secondary indexes");
5697 assert_eq!(updated.secondary_indexes_json, secondary_indexes_json);
5698
5699 let conn = sqlite::open_connection(db.path()).expect("conn");
5700 let entry_count: i64 = conn
5701 .query_row(
5702 "SELECT count(*) FROM operational_secondary_index_entries \
5703 WHERE collection_name = 'audit_log' AND index_name = 'actor_ts'",
5704 [],
5705 |row| row.get(0),
5706 )
5707 .expect("secondary index count");
5708 assert_eq!(entry_count, 2);
5709 conn.execute(
5710 "DELETE FROM operational_secondary_index_entries WHERE collection_name = 'audit_log'",
5711 [],
5712 )
5713 .expect("clear index entries");
5714 drop(conn);
5715
5716 let rebuild = service
5717 .rebuild_operational_secondary_indexes("audit_log")
5718 .expect("rebuild secondary indexes");
5719 assert_eq!(rebuild.collection_name, "audit_log");
5720 assert_eq!(rebuild.mutation_entries_rebuilt, 2);
5721 assert_eq!(rebuild.current_entries_rebuilt, 0);
5722 }
5723
5724 #[test]
5725 fn register_operational_collection_rejects_invalid_validation_contract() {
5726 let (_db, service) = setup();
5727
5728 let error = service
5729 .register_operational_collection(&OperationalRegisterRequest {
5730 name: "connector_health".to_owned(),
5731 kind: OperationalCollectionKind::LatestState,
5732 schema_json: "{}".to_owned(),
5733 retention_json: "{}".to_owned(),
5734 filter_fields_json: "[]".to_owned(),
5735 validation_json: r#"{"format_version":1,"mode":"enforce","fields":[{"name":"status","type":"string","minimum":0}]}"#
5736 .to_owned(),
5737 secondary_indexes_json: "[]".to_owned(),
5738 format_version: 1,
5739 })
5740 .expect_err("invalid validation contract should reject");
5741
5742 assert!(matches!(error, EngineError::InvalidWrite(_)));
5743 assert!(error.to_string().contains("minimum/maximum"));
5744 }
5745
5746 #[test]
5747 fn validate_operational_collection_history_reports_invalid_rows_without_mutation() {
5748 let (db, service) = setup();
5749 service
5750 .register_operational_collection(&OperationalRegisterRequest {
5751 name: "audit_log".to_owned(),
5752 kind: OperationalCollectionKind::AppendOnlyLog,
5753 schema_json: "{}".to_owned(),
5754 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
5755 filter_fields_json: "[]".to_owned(),
5756 validation_json: r#"{"format_version":1,"mode":"disabled","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#
5757 .to_owned(),
5758 secondary_indexes_json: "[]".to_owned(),
5759 format_version: 1,
5760 })
5761 .expect("register collection");
5762 {
5763 let writer = crate::WriterActor::start(
5764 db.path(),
5765 Arc::new(SchemaManager::new()),
5766 crate::ProvenanceMode::Warn,
5767 Arc::new(crate::TelemetryCounters::default()),
5768 )
5769 .expect("writer");
5770 writer
5771 .submit(crate::WriteRequest {
5772 label: "history-validation".to_owned(),
5773 nodes: vec![],
5774 node_retires: vec![],
5775 edges: vec![],
5776 edge_retires: vec![],
5777 chunks: vec![],
5778 runs: vec![],
5779 steps: vec![],
5780 actions: vec![],
5781 optional_backfills: vec![],
5782 vec_inserts: vec![],
5783 operational_writes: vec![
5784 crate::OperationalWrite::Append {
5785 collection: "audit_log".to_owned(),
5786 record_key: "evt-1".to_owned(),
5787 payload_json: r#"{"status":"ok"}"#.to_owned(),
5788 source_ref: Some("src-1".to_owned()),
5789 },
5790 crate::OperationalWrite::Append {
5791 collection: "audit_log".to_owned(),
5792 record_key: "evt-2".to_owned(),
5793 payload_json: r#"{"status":"bogus"}"#.to_owned(),
5794 source_ref: Some("src-2".to_owned()),
5795 },
5796 ],
5797 })
5798 .expect("write");
5799 }
5800
5801 let report = service
5802 .validate_operational_collection_history("audit_log")
5803 .expect("validate history");
5804 assert_eq!(report.collection_name, "audit_log");
5805 assert_eq!(report.checked_rows, 2);
5806 assert_eq!(report.invalid_row_count, 1);
5807 assert_eq!(report.issues.len(), 1);
5808 assert_eq!(report.issues[0].record_key, "evt-2");
5809 assert!(report.issues[0].message.contains("must be one of"));
5810
5811 let trace = service
5812 .trace_operational_collection("audit_log", None)
5813 .expect("trace");
5814 assert_eq!(trace.mutation_count, 2);
5815
5816 let conn = sqlite::open_connection(db.path()).expect("conn");
5817 let provenance_count: i64 = conn
5818 .query_row(
5819 "SELECT count(*) FROM provenance_events \
5820 WHERE event_type = 'operational_collection_history_validated' \
5821 AND subject = 'audit_log'",
5822 [],
5823 |row| row.get(0),
5824 )
5825 .expect("provenance count");
5826 assert_eq!(provenance_count, 0);
5827 }
5828
5829 #[test]
5830 fn trace_operational_collection_returns_mutations_and_current_rows() {
5831 let (db, service) = setup();
5832 service
5833 .register_operational_collection(&OperationalRegisterRequest {
5834 name: "connector_health".to_owned(),
5835 kind: OperationalCollectionKind::LatestState,
5836 schema_json: "{}".to_owned(),
5837 retention_json: "{}".to_owned(),
5838 filter_fields_json: "[]".to_owned(),
5839 validation_json: String::new(),
5840 secondary_indexes_json: "[]".to_owned(),
5841 format_version: 1,
5842 })
5843 .expect("register collection");
5844 {
5845 let writer = crate::WriterActor::start(
5846 db.path(),
5847 Arc::new(SchemaManager::new()),
5848 crate::ProvenanceMode::Warn,
5849 Arc::new(crate::TelemetryCounters::default()),
5850 )
5851 .expect("writer");
5852 writer
5853 .submit(crate::WriteRequest {
5854 label: "operational".to_owned(),
5855 nodes: vec![],
5856 node_retires: vec![],
5857 edges: vec![],
5858 edge_retires: vec![],
5859 chunks: vec![],
5860 runs: vec![],
5861 steps: vec![],
5862 actions: vec![],
5863 optional_backfills: vec![],
5864 vec_inserts: vec![],
5865 operational_writes: vec![crate::OperationalWrite::Put {
5866 collection: "connector_health".to_owned(),
5867 record_key: "gmail".to_owned(),
5868 payload_json: r#"{"status":"ok"}"#.to_owned(),
5869 source_ref: Some("src-1".to_owned()),
5870 }],
5871 })
5872 .expect("write");
5873 }
5874
5875 let report = service
5876 .trace_operational_collection("connector_health", Some("gmail"))
5877 .expect("trace");
5878 assert_eq!(report.collection_name, "connector_health");
5879 assert_eq!(report.record_key.as_deref(), Some("gmail"));
5880 assert_eq!(report.mutation_count, 1);
5881 assert_eq!(report.current_count, 1);
5882 assert_eq!(report.mutations[0].op_kind, "put");
5883 assert_eq!(report.current_rows[0].payload_json, r#"{"status":"ok"}"#);
5884 }
5885
5886 #[test]
5887 fn trace_operational_collection_rejects_unknown_collection() {
5888 let (_db, service) = setup();
5889
5890 let error = service
5891 .trace_operational_collection("missing_collection", None)
5892 .expect_err("unknown collection should fail");
5893
5894 assert!(matches!(error, EngineError::InvalidWrite(_)));
5895 assert!(error.to_string().contains("is not registered"));
5896 }
5897
5898 #[test]
5899 fn rebuild_operational_current_repairs_missing_latest_state_rows() {
5900 let (db, service) = setup();
5901 service
5902 .register_operational_collection(&OperationalRegisterRequest {
5903 name: "connector_health".to_owned(),
5904 kind: OperationalCollectionKind::LatestState,
5905 schema_json: "{}".to_owned(),
5906 retention_json: "{}".to_owned(),
5907 filter_fields_json: "[]".to_owned(),
5908 validation_json: String::new(),
5909 secondary_indexes_json: "[]".to_owned(),
5910 format_version: 1,
5911 })
5912 .expect("register collection");
5913 {
5914 let writer = crate::WriterActor::start(
5915 db.path(),
5916 Arc::new(SchemaManager::new()),
5917 crate::ProvenanceMode::Warn,
5918 Arc::new(crate::TelemetryCounters::default()),
5919 )
5920 .expect("writer");
5921 writer
5922 .submit(crate::WriteRequest {
5923 label: "operational".to_owned(),
5924 nodes: vec![],
5925 node_retires: vec![],
5926 edges: vec![],
5927 edge_retires: vec![],
5928 chunks: vec![],
5929 runs: vec![],
5930 steps: vec![],
5931 actions: vec![],
5932 optional_backfills: vec![],
5933 vec_inserts: vec![],
5934 operational_writes: vec![crate::OperationalWrite::Put {
5935 collection: "connector_health".to_owned(),
5936 record_key: "gmail".to_owned(),
5937 payload_json: r#"{"status":"ok"}"#.to_owned(),
5938 source_ref: Some("src-1".to_owned()),
5939 }],
5940 })
5941 .expect("write");
5942 }
5943 {
5944 let conn = sqlite::open_connection(db.path()).expect("conn");
5945 conn.execute(
5946 "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5947 [],
5948 )
5949 .expect("delete current row");
5950 }
5951
5952 let before = service.check_semantics().expect("semantics before rebuild");
5953 assert_eq!(before.missing_operational_current_rows, 1);
5954
5955 let repair = service
5956 .rebuild_operational_current(Some("connector_health"))
5957 .expect("rebuild current");
5958 assert_eq!(repair.collections_rebuilt, 1);
5959 assert_eq!(repair.current_rows_rebuilt, 1);
5960
5961 let after = service.check_semantics().expect("semantics after rebuild");
5962 assert_eq!(after.missing_operational_current_rows, 0);
5963
5964 let conn = sqlite::open_connection(db.path()).expect("conn");
5965 let payload: String = conn
5966 .query_row(
5967 "SELECT payload_json FROM operational_current \
5968 WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5969 [],
5970 |row| row.get(0),
5971 )
5972 .expect("restored payload");
5973 assert_eq!(payload, r#"{"status":"ok"}"#);
5974 }
5975
5976 #[test]
5977 fn rebuild_operational_current_restores_latest_state_secondary_index_entries() {
5978 let (db, service) = setup();
5979 service
5980 .register_operational_collection(&OperationalRegisterRequest {
5981 name: "connector_health".to_owned(),
5982 kind: OperationalCollectionKind::LatestState,
5983 schema_json: "{}".to_owned(),
5984 retention_json: "{}".to_owned(),
5985 filter_fields_json: "[]".to_owned(),
5986 validation_json: String::new(),
5987 secondary_indexes_json: r#"[{"name":"status_current","kind":"latest_state_field","field":"status","value_type":"string"}]"#.to_owned(),
5988 format_version: 1,
5989 })
5990 .expect("register collection");
5991 {
5992 let writer = crate::WriterActor::start(
5993 db.path(),
5994 Arc::new(SchemaManager::new()),
5995 crate::ProvenanceMode::Warn,
5996 Arc::new(crate::TelemetryCounters::default()),
5997 )
5998 .expect("writer");
5999 writer
6000 .submit(crate::WriteRequest {
6001 label: "operational".to_owned(),
6002 nodes: vec![],
6003 node_retires: vec![],
6004 edges: vec![],
6005 edge_retires: vec![],
6006 chunks: vec![],
6007 runs: vec![],
6008 steps: vec![],
6009 actions: vec![],
6010 optional_backfills: vec![],
6011 vec_inserts: vec![],
6012 operational_writes: vec![crate::OperationalWrite::Put {
6013 collection: "connector_health".to_owned(),
6014 record_key: "gmail".to_owned(),
6015 payload_json: r#"{"status":"ok"}"#.to_owned(),
6016 source_ref: Some("src-1".to_owned()),
6017 }],
6018 })
6019 .expect("write");
6020 }
6021 {
6022 let conn = sqlite::open_connection(db.path()).expect("conn");
6023 let entry_count: i64 = conn
6024 .query_row(
6025 "SELECT count(*) FROM operational_secondary_index_entries \
6026 WHERE collection_name = 'connector_health' AND subject_kind = 'current'",
6027 [],
6028 |row| row.get(0),
6029 )
6030 .expect("secondary index count before repair");
6031 assert_eq!(entry_count, 1);
6032 conn.execute(
6033 "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6034 [],
6035 )
6036 .expect("delete current row");
6037 }
6038
6039 service
6040 .rebuild_operational_current(Some("connector_health"))
6041 .expect("rebuild current");
6042
6043 let conn = sqlite::open_connection(db.path()).expect("conn");
6044 let entry_count: i64 = conn
6045 .query_row(
6046 "SELECT count(*) FROM operational_secondary_index_entries \
6047 WHERE collection_name = 'connector_health' AND subject_kind = 'current'",
6048 [],
6049 |row| row.get(0),
6050 )
6051 .expect("secondary index count after repair");
6052 assert_eq!(entry_count, 1);
6053 }
6054
6055 #[test]
6056 fn operational_current_semantics_and_rebuild_follow_mutation_order() {
6057 let (db, service) = setup();
6058 {
6059 let conn = sqlite::open_connection(db.path()).expect("conn");
6060 conn.execute(
6061 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6062 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
6063 [],
6064 )
6065 .expect("seed collection");
6066 conn.execute(
6067 "INSERT INTO operational_mutations \
6068 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6069 VALUES ('m3', 'connector_health', 'gmail', 'put', '{\"status\":\"old\"}', 'src-1', 100, 1)",
6070 [],
6071 )
6072 .expect("seed first put");
6073 conn.execute(
6074 "INSERT INTO operational_mutations \
6075 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6076 VALUES ('m2', 'connector_health', 'gmail', 'delete', '', 'src-2', 100, 2)",
6077 [],
6078 )
6079 .expect("seed delete");
6080 conn.execute(
6081 "INSERT INTO operational_mutations \
6082 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6083 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"new\"}', 'src-3', 100, 3)",
6084 [],
6085 )
6086 .expect("seed final put");
6087 conn.execute(
6088 "INSERT INTO operational_current \
6089 (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
6090 VALUES ('connector_health', 'gmail', '{\"status\":\"new\"}', 100, 'm1')",
6091 [],
6092 )
6093 .expect("seed current");
6094 }
6095
6096 let before = service.check_semantics().expect("semantics before rebuild");
6097 assert_eq!(before.missing_operational_current_rows, 0);
6098 assert_eq!(before.stale_operational_current_rows, 0);
6099
6100 {
6101 let conn = sqlite::open_connection(db.path()).expect("conn");
6102 conn.execute(
6103 "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6104 [],
6105 )
6106 .expect("delete current row");
6107 }
6108
6109 let missing = service.check_semantics().expect("semantics after delete");
6110 assert_eq!(missing.missing_operational_current_rows, 1);
6111 assert_eq!(missing.stale_operational_current_rows, 0);
6112
6113 service
6114 .rebuild_operational_current(Some("connector_health"))
6115 .expect("rebuild current");
6116
6117 let after = service.check_semantics().expect("semantics after rebuild");
6118 assert_eq!(after.missing_operational_current_rows, 0);
6119 assert_eq!(after.stale_operational_current_rows, 0);
6120
6121 let conn = sqlite::open_connection(db.path()).expect("conn");
6122 let payload: String = conn
6123 .query_row(
6124 "SELECT payload_json FROM operational_current \
6125 WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6126 [],
6127 |row| row.get(0),
6128 )
6129 .expect("restored payload");
6130 assert_eq!(payload, r#"{"status":"new"}"#);
6131 }
6132
6133 #[test]
6134 fn disable_operational_collection_sets_disabled_at_and_emits_provenance() {
6135 let (db, service) = setup();
6136 service
6137 .register_operational_collection(&OperationalRegisterRequest {
6138 name: "audit_log".to_owned(),
6139 kind: OperationalCollectionKind::AppendOnlyLog,
6140 schema_json: "{}".to_owned(),
6141 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6142 filter_fields_json: "[]".to_owned(),
6143 validation_json: String::new(),
6144 secondary_indexes_json: "[]".to_owned(),
6145 format_version: 1,
6146 })
6147 .expect("register collection");
6148
6149 let record = service
6150 .disable_operational_collection("audit_log")
6151 .expect("disable collection");
6152 assert_eq!(record.name, "audit_log");
6153 assert!(record.disabled_at.is_some());
6154
6155 let disabled_at = record.disabled_at.expect("disabled_at");
6156 let described = service
6157 .describe_operational_collection("audit_log")
6158 .expect("describe collection")
6159 .expect("collection exists");
6160 assert_eq!(described.disabled_at, Some(disabled_at));
6161
6162 let writer = crate::WriterActor::start(
6163 db.path(),
6164 Arc::new(SchemaManager::new()),
6165 crate::ProvenanceMode::Warn,
6166 Arc::new(crate::TelemetryCounters::default()),
6167 )
6168 .expect("writer");
6169 let error = writer
6170 .submit(crate::WriteRequest {
6171 label: "disabled-operational".to_owned(),
6172 nodes: vec![],
6173 node_retires: vec![],
6174 edges: vec![],
6175 edge_retires: vec![],
6176 chunks: vec![],
6177 runs: vec![],
6178 steps: vec![],
6179 actions: vec![],
6180 optional_backfills: vec![],
6181 vec_inserts: vec![],
6182 operational_writes: vec![crate::OperationalWrite::Append {
6183 collection: "audit_log".to_owned(),
6184 record_key: "evt-1".to_owned(),
6185 payload_json: r#"{"type":"sync"}"#.to_owned(),
6186 source_ref: Some("src-1".to_owned()),
6187 }],
6188 })
6189 .expect_err("disabled collection should reject writes");
6190 assert!(matches!(error, EngineError::InvalidWrite(_)));
6191 assert!(error.to_string().contains("is disabled"));
6192
6193 let conn = sqlite::open_connection(db.path()).expect("conn");
6194 let provenance_count: i64 = conn
6195 .query_row(
6196 "SELECT count(*) FROM provenance_events \
6197 WHERE event_type = 'operational_collection_disabled' AND subject = 'audit_log'",
6198 [],
6199 |row| row.get(0),
6200 )
6201 .expect("provenance count");
6202 assert_eq!(provenance_count, 1);
6203 }
6204
6205 #[test]
6206 fn purge_operational_collection_deletes_append_only_rows_before_cutoff() {
6207 let (db, service) = setup();
6208 {
6209 let conn = sqlite::open_connection(db.path()).expect("conn");
6210 conn.execute(
6211 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6212 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_all\"}', 1, 100)",
6213 [],
6214 )
6215 .expect("seed collection");
6216 conn.execute(
6217 "INSERT INTO operational_mutations \
6218 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6219 VALUES ('evt-1', 'audit_log', 'evt-1', 'append', '{\"seq\":1}', 'src-1', 100, 1)",
6220 [],
6221 )
6222 .expect("seed event 1");
6223 conn.execute(
6224 "INSERT INTO operational_mutations \
6225 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6226 VALUES ('evt-2', 'audit_log', 'evt-2', 'append', '{\"seq\":2}', 'src-2', 200, 2)",
6227 [],
6228 )
6229 .expect("seed event 2");
6230 conn.execute(
6231 "INSERT INTO operational_mutations \
6232 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6233 VALUES ('evt-3', 'audit_log', 'evt-3', 'append', '{\"seq\":3}', 'src-3', 300, 3)",
6234 [],
6235 )
6236 .expect("seed event 3");
6237 }
6238
6239 let report = service
6240 .purge_operational_collection("audit_log", 250)
6241 .expect("purge collection");
6242 assert_eq!(report.collection_name, "audit_log");
6243 assert_eq!(report.deleted_mutations, 2);
6244 assert_eq!(report.before_timestamp, 250);
6245
6246 let conn = sqlite::open_connection(db.path()).expect("conn");
6247 let remaining: Vec<String> = {
6248 let mut stmt = conn
6249 .prepare(
6250 "SELECT id FROM operational_mutations \
6251 WHERE collection_name = 'audit_log' ORDER BY mutation_order",
6252 )
6253 .expect("stmt");
6254 stmt.query_map([], |row| row.get(0))
6255 .expect("rows")
6256 .collect::<Result<_, _>>()
6257 .expect("collect")
6258 };
6259 assert_eq!(remaining, vec!["evt-3".to_owned()]);
6260 let provenance_count: i64 = conn
6261 .query_row(
6262 "SELECT count(*) FROM provenance_events \
6263 WHERE event_type = 'operational_collection_purged' AND subject = 'audit_log'",
6264 [],
6265 |row| row.get(0),
6266 )
6267 .expect("provenance count");
6268 assert_eq!(provenance_count, 1);
6269 }
6270
6271 #[test]
6272 fn compact_operational_collection_dry_run_reports_without_mutation() {
6273 let (db, service) = setup();
6274 {
6275 let conn = sqlite::open_connection(db.path()).expect("conn");
6276 conn.execute(
6277 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6278 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6279 [],
6280 )
6281 .expect("seed collection");
6282 for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
6283 conn.execute(
6284 "INSERT INTO operational_mutations \
6285 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6286 VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6287 rusqlite::params![
6288 format!("evt-{index}"),
6289 format!("{{\"seq\":{index}}}"),
6290 created_at,
6291 index,
6292 ],
6293 )
6294 .expect("seed event");
6295 }
6296 }
6297
6298 let report = service
6299 .compact_operational_collection("audit_log", true)
6300 .expect("compact collection");
6301 assert_eq!(report.collection_name, "audit_log");
6302 assert_eq!(report.deleted_mutations, 1);
6303 assert!(report.dry_run);
6304 assert_eq!(report.before_timestamp, None);
6305
6306 let conn = sqlite::open_connection(db.path()).expect("conn");
6307 let remaining_count: i64 = conn
6308 .query_row(
6309 "SELECT count(*) FROM operational_mutations WHERE collection_name = 'audit_log'",
6310 [],
6311 |row| row.get(0),
6312 )
6313 .expect("remaining count");
6314 assert_eq!(remaining_count, 3);
6315 let provenance_count: i64 = conn
6316 .query_row(
6317 "SELECT count(*) FROM provenance_events \
6318 WHERE event_type = 'operational_collection_compacted' AND subject = 'audit_log'",
6319 [],
6320 |row| row.get(0),
6321 )
6322 .expect("provenance count");
6323 assert_eq!(provenance_count, 0);
6324 }
6325
6326 #[test]
6327 fn compact_operational_collection_keep_last_deletes_oldest_rows() {
6328 let (db, service) = setup();
6329 {
6330 let conn = sqlite::open_connection(db.path()).expect("conn");
6331 conn.execute(
6332 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6333 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6334 [],
6335 )
6336 .expect("seed collection");
6337 for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
6338 conn.execute(
6339 "INSERT INTO operational_mutations \
6340 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6341 VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6342 rusqlite::params![
6343 format!("evt-{index}"),
6344 format!("{{\"seq\":{index}}}"),
6345 created_at,
6346 index,
6347 ],
6348 )
6349 .expect("seed event");
6350 }
6351 }
6352
6353 let report = service
6354 .compact_operational_collection("audit_log", false)
6355 .expect("compact collection");
6356 assert_eq!(report.deleted_mutations, 1);
6357 assert!(!report.dry_run);
6358
6359 let conn = sqlite::open_connection(db.path()).expect("conn");
6360 let remaining: Vec<String> = {
6361 let mut stmt = conn
6362 .prepare(
6363 "SELECT id FROM operational_mutations \
6364 WHERE collection_name = 'audit_log' ORDER BY mutation_order",
6365 )
6366 .expect("stmt");
6367 stmt.query_map([], |row| row.get(0))
6368 .expect("rows")
6369 .collect::<Result<_, _>>()
6370 .expect("collect")
6371 };
6372 assert_eq!(remaining, vec!["evt-2".to_owned(), "evt-3".to_owned()]);
6373 let provenance_count: i64 = conn
6374 .query_row(
6375 "SELECT count(*) FROM provenance_events \
6376 WHERE event_type = 'operational_collection_compacted' AND subject = 'audit_log'",
6377 [],
6378 |row| row.get(0),
6379 )
6380 .expect("provenance count");
6381 assert_eq!(provenance_count, 1);
6382 }
6383
6384 #[test]
6385 fn plan_and_run_operational_retention_keep_last() {
6386 let (db, service) = setup();
6387 {
6388 let conn = sqlite::open_connection(db.path()).expect("conn");
6389 conn.execute(
6390 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6391 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6392 [],
6393 )
6394 .expect("seed collection");
6395 for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
6396 conn.execute(
6397 "INSERT INTO operational_mutations \
6398 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6399 VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6400 rusqlite::params![
6401 format!("evt-{index}"),
6402 format!("{{\"seq\":{index}}}"),
6403 created_at,
6404 index,
6405 ],
6406 )
6407 .expect("seed event");
6408 }
6409 }
6410
6411 let plan = service
6412 .plan_operational_retention(1_000, None, Some(10))
6413 .expect("plan retention");
6414 assert_eq!(plan.collections_examined, 1);
6415 assert_eq!(plan.items[0].collection_name, "audit_log");
6416 assert_eq!(
6417 plan.items[0].action_kind,
6418 crate::operational::OperationalRetentionActionKind::KeepLast
6419 );
6420 assert_eq!(plan.items[0].candidate_deletions, 1);
6421 assert_eq!(plan.items[0].max_rows, Some(2));
6422 assert_eq!(plan.items[0].last_run_at, None);
6423
6424 let dry_run = service
6425 .run_operational_retention(1_000, None, Some(10), true)
6426 .expect("dry-run retention");
6427 assert!(dry_run.dry_run);
6428 assert_eq!(dry_run.collections_acted_on, 1);
6429 assert_eq!(dry_run.items[0].deleted_mutations, 1);
6430 assert_eq!(dry_run.items[0].rows_remaining, 2);
6431
6432 let conn = sqlite::open_connection(db.path()).expect("conn");
6433 let remaining_count: i64 = conn
6434 .query_row(
6435 "SELECT count(*) FROM operational_mutations WHERE collection_name = 'audit_log'",
6436 [],
6437 |row| row.get(0),
6438 )
6439 .expect("remaining count after dry run");
6440 assert_eq!(remaining_count, 3);
6441 let retention_run_count: i64 = conn
6442 .query_row(
6443 "SELECT count(*) FROM operational_retention_runs WHERE collection_name = 'audit_log'",
6444 [],
6445 |row| row.get(0),
6446 )
6447 .expect("retention run count");
6448 assert_eq!(retention_run_count, 0);
6449 drop(conn);
6450
6451 let executed = service
6452 .run_operational_retention(1_000, None, Some(10), false)
6453 .expect("execute retention");
6454 assert_eq!(executed.collections_acted_on, 1);
6455 assert_eq!(executed.items[0].deleted_mutations, 1);
6456 assert_eq!(executed.items[0].rows_remaining, 2);
6457
6458 let conn = sqlite::open_connection(db.path()).expect("conn");
6459 let remaining: Vec<String> = {
6460 let mut stmt = conn
6461 .prepare(
6462 "SELECT id FROM operational_mutations \
6463 WHERE collection_name = 'audit_log' ORDER BY mutation_order",
6464 )
6465 .expect("stmt");
6466 stmt.query_map([], |row| row.get(0))
6467 .expect("rows")
6468 .collect::<Result<_, _>>()
6469 .expect("collect")
6470 };
6471 assert_eq!(remaining, vec!["evt-2".to_owned(), "evt-3".to_owned()]);
6472 let last_run_at: i64 = conn
6473 .query_row(
6474 "SELECT executed_at FROM operational_retention_runs \
6475 WHERE collection_name = 'audit_log' ORDER BY executed_at DESC LIMIT 1",
6476 [],
6477 |row| row.get(0),
6478 )
6479 .expect("last run at");
6480 assert_eq!(last_run_at, 1_000);
6481 }
6482
6483 #[test]
6484 fn dry_run_operational_retention_does_not_mark_noop_collection_as_acted_on() {
6485 let (db, service) = setup();
6486 let conn = sqlite::open_connection(db.path()).expect("conn");
6487 conn.execute(
6488 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6489 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6490 [],
6491 )
6492 .expect("seed collection");
6493 for (index, created_at) in [(1_i64, 100_i64), (2, 200)] {
6494 conn.execute(
6495 "INSERT INTO operational_mutations \
6496 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6497 VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6498 rusqlite::params![
6499 format!("evt-{index}"),
6500 format!("{{\"seq\":{index}}}"),
6501 created_at,
6502 index,
6503 ],
6504 )
6505 .expect("seed event");
6506 }
6507 drop(conn);
6508
6509 let dry_run = service
6510 .run_operational_retention(1_000, None, Some(10), true)
6511 .expect("dry-run retention");
6512 assert!(dry_run.dry_run);
6513 assert_eq!(dry_run.collections_acted_on, 0);
6514 assert_eq!(dry_run.items[0].deleted_mutations, 0);
6515 assert_eq!(dry_run.items[0].rows_remaining, 2);
6516 }
6517
6518 #[test]
6519 fn compact_operational_collection_rejects_latest_state() {
6520 let (_db, service) = setup();
6521 service
6522 .register_operational_collection(&OperationalRegisterRequest {
6523 name: "connector_health".to_owned(),
6524 kind: OperationalCollectionKind::LatestState,
6525 schema_json: "{}".to_owned(),
6526 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6527 filter_fields_json: "[]".to_owned(),
6528 validation_json: String::new(),
6529 secondary_indexes_json: "[]".to_owned(),
6530 format_version: 1,
6531 })
6532 .expect("register collection");
6533
6534 let error = service
6535 .compact_operational_collection("connector_health", false)
6536 .expect_err("latest_state compaction should be rejected");
6537 assert!(matches!(error, EngineError::InvalidWrite(_)));
6538 assert!(error.to_string().contains("append_only_log"));
6539 }
6540
6541 #[test]
6542 fn register_operational_collection_persists_filter_fields_json() {
6543 let (_db, service) = setup();
6544
6545 let record = service
6546 .register_operational_collection(&OperationalRegisterRequest {
6547 name: "audit_log".to_owned(),
6548 kind: OperationalCollectionKind::AppendOnlyLog,
6549 schema_json: "{}".to_owned(),
6550 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6551 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
6552 validation_json: String::new(),
6553 secondary_indexes_json: "[]".to_owned(),
6554 format_version: 1,
6555 })
6556 .expect("register collection");
6557
6558 assert_eq!(
6559 record.filter_fields_json,
6560 r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#
6561 );
6562 }
6563
6564 #[test]
6565 fn read_operational_collection_filters_append_only_rows_by_declared_fields() {
6566 let (db, service) = setup();
6567 service
6568 .register_operational_collection(&OperationalRegisterRequest {
6569 name: "audit_log".to_owned(),
6570 kind: OperationalCollectionKind::AppendOnlyLog,
6571 schema_json: "{}".to_owned(),
6572 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6573 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"seq","type":"integer","modes":["exact","range"]},{"name":"ts","type":"timestamp","modes":["exact","range"]}]"#.to_owned(),
6574 validation_json: String::new(),
6575 secondary_indexes_json: "[]".to_owned(),
6576 format_version: 1,
6577 })
6578 .expect("register collection");
6579 {
6580 let writer = crate::WriterActor::start(
6581 db.path(),
6582 Arc::new(SchemaManager::new()),
6583 crate::ProvenanceMode::Warn,
6584 Arc::new(crate::TelemetryCounters::default()),
6585 )
6586 .expect("writer");
6587 writer
6588 .submit(crate::WriteRequest {
6589 label: "operational".to_owned(),
6590 nodes: vec![],
6591 node_retires: vec![],
6592 edges: vec![],
6593 edge_retires: vec![],
6594 chunks: vec![],
6595 runs: vec![],
6596 steps: vec![],
6597 actions: vec![],
6598 optional_backfills: vec![],
6599 vec_inserts: vec![],
6600 operational_writes: vec![
6601 crate::OperationalWrite::Append {
6602 collection: "audit_log".to_owned(),
6603 record_key: "evt-1".to_owned(),
6604 payload_json: r#"{"actor":"alice","seq":1,"ts":100}"#.to_owned(),
6605 source_ref: Some("src-1".to_owned()),
6606 },
6607 crate::OperationalWrite::Append {
6608 collection: "audit_log".to_owned(),
6609 record_key: "evt-2".to_owned(),
6610 payload_json: r#"{"actor":"alice-admin","seq":2,"ts":200}"#.to_owned(),
6611 source_ref: Some("src-2".to_owned()),
6612 },
6613 crate::OperationalWrite::Append {
6614 collection: "audit_log".to_owned(),
6615 record_key: "evt-3".to_owned(),
6616 payload_json: r#"{"actor":"bob","seq":3,"ts":300}"#.to_owned(),
6617 source_ref: Some("src-3".to_owned()),
6618 },
6619 ],
6620 })
6621 .expect("write");
6622 }
6623
6624 let report = service
6625 .read_operational_collection(&crate::operational::OperationalReadRequest {
6626 collection_name: "audit_log".to_owned(),
6627 filters: vec![
6628 crate::operational::OperationalFilterClause::Prefix {
6629 field: "actor".to_owned(),
6630 value: "alice".to_owned(),
6631 },
6632 crate::operational::OperationalFilterClause::Range {
6633 field: "ts".to_owned(),
6634 lower: Some(150),
6635 upper: Some(250),
6636 },
6637 ],
6638 limit: Some(10),
6639 })
6640 .expect("filtered read");
6641
6642 assert_eq!(report.collection_name, "audit_log");
6643 assert_eq!(report.row_count, 1);
6644 assert!(!report.was_limited);
6645 assert_eq!(report.rows.len(), 1);
6646 assert_eq!(report.rows[0].record_key, "evt-2");
6647 assert_eq!(
6648 report.rows[0].payload_json,
6649 r#"{"actor":"alice-admin","seq":2,"ts":200}"#
6650 );
6651 }
6652
6653 #[test]
6654 fn read_operational_collection_uses_secondary_index_when_filter_values_are_missing() {
6655 let (db, service) = setup();
6656 service
6657 .register_operational_collection(&OperationalRegisterRequest {
6658 name: "audit_log".to_owned(),
6659 kind: OperationalCollectionKind::AppendOnlyLog,
6660 schema_json: "{}".to_owned(),
6661 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6662 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
6663 validation_json: String::new(),
6664 secondary_indexes_json: r#"[{"name":"actor_ts","kind":"append_only_field_time","field":"actor","value_type":"string","time_field":"ts"}]"#.to_owned(),
6665 format_version: 1,
6666 })
6667 .expect("register collection");
6668 {
6669 let writer = crate::WriterActor::start(
6670 db.path(),
6671 Arc::new(SchemaManager::new()),
6672 crate::ProvenanceMode::Warn,
6673 Arc::new(crate::TelemetryCounters::default()),
6674 )
6675 .expect("writer");
6676 writer
6677 .submit(crate::WriteRequest {
6678 label: "operational".to_owned(),
6679 nodes: vec![],
6680 node_retires: vec![],
6681 edges: vec![],
6682 edge_retires: vec![],
6683 chunks: vec![],
6684 runs: vec![],
6685 steps: vec![],
6686 actions: vec![],
6687 optional_backfills: vec![],
6688 vec_inserts: vec![],
6689 operational_writes: vec![
6690 crate::OperationalWrite::Append {
6691 collection: "audit_log".to_owned(),
6692 record_key: "evt-1".to_owned(),
6693 payload_json: r#"{"actor":"alice","ts":100}"#.to_owned(),
6694 source_ref: Some("src-1".to_owned()),
6695 },
6696 crate::OperationalWrite::Append {
6697 collection: "audit_log".to_owned(),
6698 record_key: "evt-2".to_owned(),
6699 payload_json: r#"{"actor":"alice-admin","ts":200}"#.to_owned(),
6700 source_ref: Some("src-2".to_owned()),
6701 },
6702 ],
6703 })
6704 .expect("write");
6705 }
6706 let conn = sqlite::open_connection(db.path()).expect("conn");
6707 conn.execute(
6708 "DELETE FROM operational_filter_values WHERE collection_name = 'audit_log'",
6709 [],
6710 )
6711 .expect("clear filter values");
6712 drop(conn);
6713
6714 let report = service
6715 .read_operational_collection(&crate::operational::OperationalReadRequest {
6716 collection_name: "audit_log".to_owned(),
6717 filters: vec![
6718 crate::operational::OperationalFilterClause::Prefix {
6719 field: "actor".to_owned(),
6720 value: "alice".to_owned(),
6721 },
6722 crate::operational::OperationalFilterClause::Range {
6723 field: "ts".to_owned(),
6724 lower: Some(150),
6725 upper: Some(250),
6726 },
6727 ],
6728 limit: Some(10),
6729 })
6730 .expect("secondary-index read");
6731
6732 assert_eq!(report.row_count, 1);
6733 assert_eq!(report.rows[0].record_key, "evt-2");
6734 }
6735
6736 #[test]
6737 fn read_operational_collection_rejects_undeclared_fields_and_latest_state_collections() {
6738 let (_db, service) = setup();
6739 service
6740 .register_operational_collection(&OperationalRegisterRequest {
6741 name: "connector_health".to_owned(),
6742 kind: OperationalCollectionKind::LatestState,
6743 schema_json: "{}".to_owned(),
6744 retention_json: "{}".to_owned(),
6745 filter_fields_json: r#"[{"name":"status","type":"string","modes":["exact"]}]"#
6746 .to_owned(),
6747 validation_json: String::new(),
6748 secondary_indexes_json: "[]".to_owned(),
6749 format_version: 1,
6750 })
6751 .expect("register collection");
6752
6753 let latest_state_error = service
6754 .read_operational_collection(&crate::operational::OperationalReadRequest {
6755 collection_name: "connector_health".to_owned(),
6756 filters: vec![crate::operational::OperationalFilterClause::Exact {
6757 field: "status".to_owned(),
6758 value: crate::operational::OperationalFilterValue::String("ok".to_owned()),
6759 }],
6760 limit: Some(10),
6761 })
6762 .expect_err("latest_state filtered reads should be rejected");
6763 assert!(latest_state_error.to_string().contains("append_only_log"));
6764
6765 service
6766 .register_operational_collection(&OperationalRegisterRequest {
6767 name: "audit_log".to_owned(),
6768 kind: OperationalCollectionKind::AppendOnlyLog,
6769 schema_json: "{}".to_owned(),
6770 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6771 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact"]}]"#
6772 .to_owned(),
6773 validation_json: String::new(),
6774 secondary_indexes_json: "[]".to_owned(),
6775 format_version: 1,
6776 })
6777 .expect("register append-only collection");
6778
6779 let undeclared_error = service
6780 .read_operational_collection(&crate::operational::OperationalReadRequest {
6781 collection_name: "audit_log".to_owned(),
6782 filters: vec![crate::operational::OperationalFilterClause::Exact {
6783 field: "missing".to_owned(),
6784 value: crate::operational::OperationalFilterValue::String("x".to_owned()),
6785 }],
6786 limit: Some(10),
6787 })
6788 .expect_err("undeclared field should be rejected");
6789 assert!(undeclared_error.to_string().contains("undeclared"));
6790 }
6791
6792 #[test]
6793 fn read_operational_collection_applies_limit_and_reports_truncation() {
6794 let (db, service) = setup();
6795 service
6796 .register_operational_collection(&OperationalRegisterRequest {
6797 name: "audit_log".to_owned(),
6798 kind: OperationalCollectionKind::AppendOnlyLog,
6799 schema_json: "{}".to_owned(),
6800 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6801 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["prefix"]}]"#
6802 .to_owned(),
6803 validation_json: String::new(),
6804 secondary_indexes_json: "[]".to_owned(),
6805 format_version: 1,
6806 })
6807 .expect("register collection");
6808 {
6809 let writer = crate::WriterActor::start(
6810 db.path(),
6811 Arc::new(SchemaManager::new()),
6812 crate::ProvenanceMode::Warn,
6813 Arc::new(crate::TelemetryCounters::default()),
6814 )
6815 .expect("writer");
6816 writer
6817 .submit(crate::WriteRequest {
6818 label: "operational".to_owned(),
6819 nodes: vec![],
6820 node_retires: vec![],
6821 edges: vec![],
6822 edge_retires: vec![],
6823 chunks: vec![],
6824 runs: vec![],
6825 steps: vec![],
6826 actions: vec![],
6827 optional_backfills: vec![],
6828 vec_inserts: vec![],
6829 operational_writes: vec![
6830 crate::OperationalWrite::Append {
6831 collection: "audit_log".to_owned(),
6832 record_key: "evt-1".to_owned(),
6833 payload_json: r#"{"actor":"alice-1"}"#.to_owned(),
6834 source_ref: Some("src-1".to_owned()),
6835 },
6836 crate::OperationalWrite::Append {
6837 collection: "audit_log".to_owned(),
6838 record_key: "evt-2".to_owned(),
6839 payload_json: r#"{"actor":"alice-2"}"#.to_owned(),
6840 source_ref: Some("src-2".to_owned()),
6841 },
6842 ],
6843 })
6844 .expect("write");
6845 }
6846
6847 let report = service
6848 .read_operational_collection(&crate::operational::OperationalReadRequest {
6849 collection_name: "audit_log".to_owned(),
6850 filters: vec![crate::operational::OperationalFilterClause::Prefix {
6851 field: "actor".to_owned(),
6852 value: "alice".to_owned(),
6853 }],
6854 limit: Some(1),
6855 })
6856 .expect("limited read");
6857
6858 assert_eq!(report.row_count, 1);
6859 assert_eq!(report.applied_limit, 1);
6860 assert!(report.was_limited);
6861 assert_eq!(report.rows[0].record_key, "evt-2");
6862 }
6863
6864 #[test]
6865 fn preexisting_operational_collection_can_gain_filter_contract_after_upgrade() {
6866 let db = NamedTempFile::new().expect("temp db");
6867 let conn = sqlite::open_connection(db.path()).expect("conn");
6868 conn.execute_batch(
6869 r#"
6870 CREATE TABLE operational_collections (
6871 name TEXT PRIMARY KEY,
6872 kind TEXT NOT NULL,
6873 schema_json TEXT NOT NULL,
6874 retention_json TEXT NOT NULL,
6875 format_version INTEGER NOT NULL DEFAULT 1,
6876 created_at INTEGER NOT NULL DEFAULT 100,
6877 disabled_at INTEGER
6878 );
6879 CREATE TABLE operational_mutations (
6880 id TEXT PRIMARY KEY,
6881 collection_name TEXT NOT NULL,
6882 record_key TEXT NOT NULL,
6883 op_kind TEXT NOT NULL,
6884 payload_json TEXT NOT NULL,
6885 source_ref TEXT,
6886 created_at INTEGER NOT NULL DEFAULT 100,
6887 mutation_order INTEGER NOT NULL DEFAULT 1
6888 );
6889 INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at)
6890 VALUES ('audit_log', 'append_only_log', '{}', '{"mode":"keep_all"}', 1, 100);
6891 INSERT INTO operational_mutations
6892 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order)
6893 VALUES
6894 ('evt-1', 'audit_log', 'evt-1', 'append', '{"actor":"alice","ts":0}', 'src-1', 100, 1);
6895 "#,
6896 )
6897 .expect("seed pre-v10 schema");
6898 drop(conn);
6899
6900 let service = AdminService::new(db.path(), Arc::new(SchemaManager::new()));
6901 let pre_update = service
6902 .read_operational_collection(&crate::operational::OperationalReadRequest {
6903 collection_name: "audit_log".to_owned(),
6904 filters: vec![crate::operational::OperationalFilterClause::Exact {
6905 field: "actor".to_owned(),
6906 value: crate::operational::OperationalFilterValue::String("alice".to_owned()),
6907 }],
6908 limit: Some(10),
6909 })
6910 .expect_err("read should reject undeclared fields before migration update");
6911 assert!(pre_update.to_string().contains("undeclared"));
6912
6913 let updated = service
6914 .update_operational_collection_filters(
6915 "audit_log",
6916 r#"[{"name":"actor","type":"string","modes":["exact"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#,
6917 )
6918 .expect("update filter contract");
6919 assert!(updated.filter_fields_json.contains("\"actor\""));
6920
6921 let report = service
6922 .read_operational_collection(&crate::operational::OperationalReadRequest {
6923 collection_name: "audit_log".to_owned(),
6924 filters: vec![crate::operational::OperationalFilterClause::Range {
6925 field: "ts".to_owned(),
6926 lower: Some(0),
6927 upper: Some(0),
6928 }],
6929 limit: Some(10),
6930 })
6931 .expect("read after explicit filter update");
6932 assert_eq!(report.row_count, 1);
6933 assert_eq!(report.rows[0].record_key, "evt-1");
6934 }
6935
6936 #[cfg(feature = "sqlite-vec")]
6937 #[test]
6938 fn check_semantics_detects_stale_vec_rows() {
6939 use crate::sqlite::open_connection_with_vec;
6940
6941 let db = NamedTempFile::new().expect("temp file");
6942 let schema = Arc::new(SchemaManager::new());
6943 {
6944 let conn = open_connection_with_vec(db.path()).expect("vec conn");
6945 schema.bootstrap(&conn).expect("bootstrap");
6946 schema
6947 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 3)
6948 .expect("vec profile");
6949 let bytes: Vec<u8> = [0.1f32, 0.2f32, 0.3f32]
6951 .iter()
6952 .flat_map(|f| f.to_le_bytes())
6953 .collect();
6954 conn.execute(
6955 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('ghost-chunk', ?1)",
6956 rusqlite::params![bytes],
6957 )
6958 .expect("insert stale vec row");
6959 }
6960 let service = AdminService::new(db.path(), Arc::clone(&schema));
6961 let report = service.check_semantics().expect("semantics check");
6962 assert_eq!(report.stale_vec_rows, 1);
6963 assert!(
6964 report.warnings.iter().any(|w| w.contains("stale vec")),
6965 "warning must mention stale vec"
6966 );
6967 }
6968
6969 #[cfg(feature = "sqlite-vec")]
6970 #[test]
6971 fn restore_vector_profiles_recreates_vec_table_from_metadata() {
6972 let db = NamedTempFile::new().expect("temp file");
6973 let schema = Arc::new(SchemaManager::new());
6974 {
6975 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6976 schema.bootstrap(&conn).expect("bootstrap");
6977 conn.execute(
6978 "INSERT INTO vector_profiles (profile, table_name, dimension, enabled) \
6979 VALUES ('default', 'vec_nodes_active', 3, 1)",
6980 [],
6981 )
6982 .expect("insert vector profile");
6983 }
6984
6985 let service = AdminService::new(db.path(), Arc::clone(&schema));
6986 let report = service
6987 .restore_vector_profiles()
6988 .expect("restore vector profiles");
6989 assert_eq!(
6990 report.targets,
6991 vec![crate::projection::ProjectionTarget::Vec]
6992 );
6993 assert_eq!(report.rebuilt_rows, 1);
6994
6995 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6996 let count: i64 = conn
6997 .query_row(
6998 "SELECT count(*) FROM sqlite_schema WHERE name = 'vec_nodes_active'",
6999 [],
7000 |row| row.get(0),
7001 )
7002 .expect("vec schema count");
7003 assert_eq!(count, 1, "vec table should exist after restore");
7004 }
7005
7006 #[cfg(feature = "sqlite-vec")]
7007 #[test]
7008 fn load_vector_regeneration_config_supports_json_and_toml() {
7009 let dir = tempfile::tempdir().expect("temp dir");
7010 let json_path = dir.path().join("regen.json");
7011 let toml_path = dir.path().join("regen.toml");
7012
7013 let config = VectorRegenerationConfig {
7014 profile: "default".to_owned(),
7015 table_name: "vec_nodes_active".to_owned(),
7016 chunking_policy: "per_chunk".to_owned(),
7017 preprocessing_policy: "trim".to_owned(),
7018 };
7019
7020 fs::write(&json_path, serde_json::to_string(&config).expect("json")).expect("write json");
7021 fs::write(&toml_path, toml::to_string(&config).expect("toml")).expect("write toml");
7022
7023 let parsed_json = load_vector_regeneration_config(&json_path).expect("json parse");
7024 let parsed_toml = load_vector_regeneration_config(&toml_path).expect("toml parse");
7025
7026 assert_eq!(parsed_json, config);
7027 assert_eq!(parsed_toml, config);
7028 }
7029
7030 #[test]
7035 fn regenerate_vector_embeddings_config_rejects_old_identity_fields() {
7036 let legacy_json = r#"{
7037 "profile": "default",
7038 "table_name": "vec_nodes_active",
7039 "model_identity": "old-model",
7040 "model_version": "1.0",
7041 "dimension": 4,
7042 "normalization_policy": "l2",
7043 "chunking_policy": "per_chunk",
7044 "preprocessing_policy": "trim",
7045 "generator_command": ["/bin/echo"]
7046 }"#;
7047 let result: Result<VectorRegenerationConfig, _> = serde_json::from_str(legacy_json);
7048 assert!(
7049 result.is_err(),
7050 "legacy identity fields must be rejected at deserialization"
7051 );
7052 }
7053
7054 #[cfg(all(not(feature = "sqlite-vec"), unix))]
7055 #[test]
7056 fn regenerate_vector_embeddings_unsupported_vec_capability_writes_request_and_failed_audit() {
7057 let db = NamedTempFile::new().expect("temp file");
7058 let schema = Arc::new(SchemaManager::new());
7059
7060 {
7061 let conn = sqlite::open_connection(db.path()).expect("connection");
7062 schema.bootstrap(&conn).expect("bootstrap");
7063 conn.execute(
7064 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7065 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7066 [],
7067 )
7068 .expect("insert node");
7069 conn.execute(
7070 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7071 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7072 [],
7073 )
7074 .expect("insert chunk");
7075 }
7076
7077 let service = AdminService::new(db.path(), Arc::clone(&schema));
7078 let embedder = TestEmbedder::new("test-model", 4);
7079 let error = service
7080 .regenerate_vector_embeddings(
7081 &embedder,
7082 &VectorRegenerationConfig {
7083 profile: "default".to_owned(),
7084 table_name: "vec_nodes_active".to_owned(),
7085 chunking_policy: "per_chunk".to_owned(),
7086 preprocessing_policy: "trim".to_owned(),
7087 },
7088 )
7089 .expect_err("sqlite-vec capability should be required");
7090
7091 assert!(error.to_string().contains("unsupported vec capability"));
7092
7093 let conn = sqlite::open_connection(db.path()).expect("connection");
7094 let request_count: i64 = conn
7095 .query_row(
7096 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_requested' AND subject = 'default'",
7097 [],
7098 |row| row.get(0),
7099 )
7100 .expect("request count");
7101 assert_eq!(request_count, 1);
7102 let failed_count: i64 = conn
7103 .query_row(
7104 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7105 [],
7106 |row| row.get(0),
7107 )
7108 .expect("failed count");
7109 assert_eq!(failed_count, 1);
7110 let metadata_json: String = conn
7111 .query_row(
7112 "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7113 [],
7114 |row| row.get(0),
7115 )
7116 .expect("failed metadata");
7117 assert!(metadata_json.contains("\"failure_class\":\"unsupported vec capability\""));
7118 }
7119
7120 #[cfg(feature = "sqlite-vec")]
7121 #[test]
7122 #[allow(clippy::too_many_lines)]
7123 fn regenerate_vector_embeddings_rebuilds_embeddings_via_embedder() {
7124 let db = NamedTempFile::new().expect("temp file");
7125 let schema = Arc::new(SchemaManager::new());
7126
7127 {
7128 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7129 schema.bootstrap(&conn).expect("bootstrap");
7130 conn.execute(
7131 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7132 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7133 [],
7134 )
7135 .expect("insert node");
7136 conn.execute(
7137 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7138 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7139 [],
7140 )
7141 .expect("insert chunk 1");
7142 conn.execute(
7143 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7144 VALUES ('chunk-2', 'doc-1', 'travel plan', 101)",
7145 [],
7146 )
7147 .expect("insert chunk 2");
7148 }
7149
7150 let service = AdminService::new(db.path(), Arc::clone(&schema));
7151 let embedder = TestEmbedder::new("test-model", 4);
7152 let report = service
7153 .regenerate_vector_embeddings(
7154 &embedder,
7155 &VectorRegenerationConfig {
7156 profile: "default".to_owned(),
7157 table_name: "vec_nodes_active".to_owned(),
7158 chunking_policy: "per_chunk".to_owned(),
7159 preprocessing_policy: "trim".to_owned(),
7160 },
7161 )
7162 .expect("regenerate vectors");
7163
7164 assert_eq!(report.profile, "default");
7165 assert_eq!(report.table_name, "vec_nodes_active");
7166 assert_eq!(report.dimension, 4);
7167 assert_eq!(report.total_chunks, 2);
7168 assert_eq!(report.regenerated_rows, 2);
7169 assert!(report.contract_persisted);
7170
7171 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7172 let vec_count: i64 = conn
7173 .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
7174 row.get(0)
7175 })
7176 .expect("vec count");
7177 assert_eq!(vec_count, 2);
7178
7179 let (model_identity, model_version, dimension, normalization_policy): (
7183 String,
7184 String,
7185 i64,
7186 String,
7187 ) = conn
7188 .query_row(
7189 "SELECT model_identity, model_version, dimension, normalization_policy \
7190 FROM vector_embedding_contracts WHERE profile = 'default'",
7191 [],
7192 |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)),
7193 )
7194 .expect("contract row");
7195 assert_eq!(model_identity, "test-model");
7196 assert_eq!(model_version, "1.0.0");
7197 assert_eq!(dimension, 4);
7198 assert_eq!(normalization_policy, "l2");
7199
7200 let contract_format_version: i64 = conn
7201 .query_row(
7202 "SELECT contract_format_version FROM vector_embedding_contracts WHERE profile = 'default'",
7203 [],
7204 |row| row.get(0),
7205 )
7206 .expect("contract_format_version");
7207 assert_eq!(contract_format_version, 1);
7208 let request_count: i64 = conn
7209 .query_row(
7210 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_requested' AND subject = 'default'",
7211 [],
7212 |row| row.get(0),
7213 )
7214 .expect("request audit count");
7215 assert_eq!(request_count, 1);
7216 let apply_count: i64 = conn
7217 .query_row(
7218 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_apply' AND subject = 'default'",
7219 [],
7220 |row| row.get(0),
7221 )
7222 .expect("apply audit count");
7223 assert_eq!(apply_count, 1);
7224 let apply_metadata: String = conn
7225 .query_row(
7226 "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_apply' AND subject = 'default'",
7227 [],
7228 |row| row.get(0),
7229 )
7230 .expect("apply metadata");
7231 assert!(apply_metadata.contains("\"profile\":\"default\""));
7232 assert!(apply_metadata.contains("\"snapshot_hash\":"));
7233 assert!(apply_metadata.contains("\"model_identity\":\"test-model\""));
7234 }
7235
7236 #[cfg(feature = "sqlite-vec")]
7237 #[test]
7238 #[allow(clippy::too_many_lines)]
7239 fn regenerate_vector_embeddings_embedder_failure_leaves_contract_and_vec_rows_unchanged() {
7240 let db = NamedTempFile::new().expect("temp file");
7241 let schema = Arc::new(SchemaManager::new());
7242
7243 {
7244 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7245 schema.bootstrap(&conn).expect("bootstrap");
7246 conn.execute(
7247 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7248 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7249 [],
7250 )
7251 .expect("insert node");
7252 conn.execute(
7253 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7254 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7255 [],
7256 )
7257 .expect("insert chunk");
7258 schema
7259 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
7260 .expect("ensure vec profile");
7261 conn.execute(
7262 r"
7263 INSERT INTO vector_embedding_contracts (
7264 profile,
7265 table_name,
7266 model_identity,
7267 model_version,
7268 dimension,
7269 normalization_policy,
7270 chunking_policy,
7271 preprocessing_policy,
7272 generator_command_json,
7273 applied_at,
7274 snapshot_hash
7275 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)
7276 ",
7277 rusqlite::params![
7278 "default",
7279 "vec_nodes_active",
7280 "old-model",
7281 "0.9.0",
7282 4,
7283 "l2",
7284 "per_chunk",
7285 "trim",
7286 "[]",
7287 111,
7288 "old-snapshot"
7289 ],
7290 )
7291 .expect("seed contract");
7292 conn.execute(
7293 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
7294 [],
7295 )
7296 .expect("seed vec row");
7297 }
7298
7299 let service = AdminService::new(db.path(), Arc::clone(&schema));
7300 let failing = FailingEmbedder {
7301 identity: QueryEmbedderIdentity {
7302 model_identity: "new-model".to_owned(),
7303 model_version: "1.0.0".to_owned(),
7304 dimension: 4,
7305 normalization_policy: "l2".to_owned(),
7306 },
7307 };
7308 let error = service
7309 .regenerate_vector_embeddings(
7310 &failing,
7311 &VectorRegenerationConfig {
7312 profile: "default".to_owned(),
7313 table_name: "vec_nodes_active".to_owned(),
7314 chunking_policy: "per_chunk".to_owned(),
7315 preprocessing_policy: "trim".to_owned(),
7316 },
7317 )
7318 .expect_err("embedder should fail");
7319
7320 assert!(error.to_string().contains("embedder failure"));
7321
7322 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7323 let model_identity: String = conn
7324 .query_row(
7325 "SELECT model_identity FROM vector_embedding_contracts WHERE profile = 'default'",
7326 [],
7327 |row| row.get(0),
7328 )
7329 .expect("model identity");
7330 assert_eq!(model_identity, "old-model");
7331 let snapshot_hash: String = conn
7332 .query_row(
7333 "SELECT snapshot_hash FROM vector_embedding_contracts WHERE profile = 'default'",
7334 [],
7335 |row| row.get(0),
7336 )
7337 .expect("snapshot hash");
7338 assert_eq!(snapshot_hash, "old-snapshot");
7339 let vec_count: i64 = conn
7340 .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
7341 row.get(0)
7342 })
7343 .expect("vec count");
7344 assert_eq!(vec_count, 1);
7345 let failure_count: i64 = conn
7346 .query_row(
7347 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7348 [],
7349 |row| row.get(0),
7350 )
7351 .expect("failure count");
7352 assert_eq!(failure_count, 1);
7353 let failure_metadata: String = conn
7354 .query_row(
7355 "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7356 [],
7357 |row| row.get(0),
7358 )
7359 .expect("failure metadata");
7360 assert!(failure_metadata.contains("\"failure_class\":\"embedder failure\""));
7361 }
7362
7363 #[cfg(feature = "sqlite-vec")]
7374 #[test]
7375 fn regenerate_vector_embeddings_rejects_whitespace_only_profile_before_mutation() {
7376 let db = NamedTempFile::new().expect("temp file");
7377 let schema = Arc::new(SchemaManager::new());
7378 {
7379 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7380 schema.bootstrap(&conn).expect("bootstrap");
7381 conn.execute(
7382 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7383 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7384 [],
7385 )
7386 .expect("insert node");
7387 conn.execute(
7388 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7389 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7390 [],
7391 )
7392 .expect("insert chunk");
7393 }
7394
7395 let service = AdminService::new(db.path(), Arc::clone(&schema));
7396 let embedder = TestEmbedder::new("test-model", 4);
7397 let error = service
7398 .regenerate_vector_embeddings(
7399 &embedder,
7400 &VectorRegenerationConfig {
7401 profile: " ".to_owned(),
7402 table_name: "vec_nodes_active".to_owned(),
7403 chunking_policy: "per_chunk".to_owned(),
7404 preprocessing_policy: "trim".to_owned(),
7405 },
7406 )
7407 .expect_err("whitespace profile should be rejected");
7408
7409 assert!(error.to_string().contains("invalid contract"));
7410 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7411 let contract_count: i64 = conn
7412 .query_row(
7413 "SELECT count(*) FROM vector_embedding_contracts",
7414 [],
7415 |row| row.get(0),
7416 )
7417 .expect("contract count");
7418 assert_eq!(contract_count, 0);
7419 let provenance_count: i64 = conn
7420 .query_row("SELECT count(*) FROM provenance_events", [], |row| {
7421 row.get(0)
7422 })
7423 .expect("provenance count");
7424 assert_eq!(provenance_count, 0);
7425 }
7426
7427 #[cfg(feature = "sqlite-vec")]
7428 #[test]
7429 fn regenerate_vector_embeddings_rejects_future_contract_format_version() {
7430 let db = NamedTempFile::new().expect("temp file");
7431 let schema = Arc::new(SchemaManager::new());
7432 {
7433 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7434 schema.bootstrap(&conn).expect("bootstrap");
7435 conn.execute(
7436 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7437 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7438 [],
7439 )
7440 .expect("insert node");
7441 conn.execute(
7442 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7443 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7444 [],
7445 )
7446 .expect("insert chunk");
7447 conn.execute(
7448 r"
7449 INSERT INTO vector_embedding_contracts (
7450 profile,
7451 table_name,
7452 model_identity,
7453 model_version,
7454 dimension,
7455 normalization_policy,
7456 chunking_policy,
7457 preprocessing_policy,
7458 generator_command_json,
7459 applied_at,
7460 snapshot_hash,
7461 contract_format_version,
7462 updated_at
7463 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13)
7464 ",
7465 rusqlite::params![
7466 "default",
7467 "vec_nodes_active",
7468 "old-model",
7469 "0.9.0",
7470 4,
7471 "l2",
7472 "per_chunk",
7473 "trim",
7474 "[]",
7475 111,
7476 "old-snapshot",
7477 99,
7478 111,
7479 ],
7480 )
7481 .expect("seed future contract");
7482 }
7483
7484 let service = AdminService::new(db.path(), Arc::clone(&schema));
7485 let embedder = TestEmbedder::new("test-model", 4);
7486 let error = service
7487 .regenerate_vector_embeddings(
7488 &embedder,
7489 &VectorRegenerationConfig {
7490 profile: "default".to_owned(),
7491 table_name: "vec_nodes_active".to_owned(),
7492 chunking_policy: "per_chunk".to_owned(),
7493 preprocessing_policy: "trim".to_owned(),
7494 },
7495 )
7496 .expect_err("future contract version should be rejected");
7497
7498 assert!(error.to_string().contains("unsupported"));
7499 assert!(error.to_string().contains("format version"));
7500 }
7501
7502 #[test]
7503 fn check_semantics_detects_orphaned_chunk() {
7504 let (db, service) = setup();
7505 {
7506 let conn = sqlite::open_connection(db.path()).expect("conn");
7508 conn.execute(
7509 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7510 VALUES ('c1', 'ghost-node', 'text', 100)",
7511 [],
7512 )
7513 .expect("insert orphaned chunk");
7514 }
7515 let report = service.check_semantics().expect("semantics check");
7516 assert_eq!(report.orphaned_chunks, 1);
7517 }
7518
7519 #[test]
7520 fn check_semantics_detects_null_source_ref() {
7521 let (db, service) = setup();
7522 {
7523 let conn = sqlite::open_connection(db.path()).expect("conn");
7524 conn.execute(
7525 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at) \
7526 VALUES ('r1', 'lg1', 'Meeting', '{}', 100)",
7527 [],
7528 )
7529 .expect("insert node with null source_ref");
7530 }
7531 let report = service.check_semantics().expect("semantics check");
7532 assert_eq!(report.null_source_ref_nodes, 1);
7533 }
7534
7535 #[test]
7536 fn check_semantics_detects_broken_step_fk() {
7537 let (db, service) = setup();
7538 {
7539 let conn = sqlite::open_connection(db.path()).expect("conn");
7542 conn.execute_batch("PRAGMA foreign_keys = OFF;")
7543 .expect("disable FK");
7544 conn.execute(
7545 "INSERT INTO steps (id, run_id, kind, status, properties, created_at) \
7546 VALUES ('s1', 'ghost-run', 'llm', 'completed', '{}', 100)",
7547 [],
7548 )
7549 .expect("insert step with ghost run_id");
7550 }
7551 let report = service.check_semantics().expect("semantics check");
7552 assert_eq!(report.broken_step_fk, 1);
7553 }
7554
7555 #[test]
7556 fn check_semantics_detects_broken_action_fk() {
7557 let (db, service) = setup();
7558 {
7559 let conn = sqlite::open_connection(db.path()).expect("conn");
7560 conn.execute_batch("PRAGMA foreign_keys = OFF;")
7561 .expect("disable FK");
7562 conn.execute(
7563 "INSERT INTO actions (id, step_id, kind, status, properties, created_at) \
7564 VALUES ('a1', 'ghost-step', 'emit', 'completed', '{}', 100)",
7565 [],
7566 )
7567 .expect("insert action with ghost step_id");
7568 }
7569 let report = service.check_semantics().expect("semantics check");
7570 assert_eq!(report.broken_action_fk, 1);
7571 }
7572
7573 #[test]
7574 fn check_semantics_detects_stale_fts_rows() {
7575 let (db, service) = setup();
7576 {
7577 let conn = sqlite::open_connection(db.path()).expect("conn");
7578 conn.execute(
7581 "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
7582 VALUES ('ghost-chunk', 'any-node', 'Meeting', 'stale content')",
7583 [],
7584 )
7585 .expect("insert stale FTS row");
7586 }
7587 let report = service.check_semantics().expect("semantics check");
7588 assert_eq!(report.stale_fts_rows, 1);
7589 }
7590
7591 #[test]
7592 fn check_semantics_detects_fts_rows_for_superseded_nodes() {
7593 let (db, service) = setup();
7594 {
7595 let conn = sqlite::open_connection(db.path()).expect("conn");
7596 conn.execute(
7598 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
7599 VALUES ('r1', 'lg-sup', 'Meeting', '{}', 100, 200, 'src-1')",
7600 [],
7601 )
7602 .expect("insert superseded node");
7603 conn.execute(
7605 "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
7606 VALUES ('ck-x', 'lg-sup', 'Meeting', 'superseded content')",
7607 [],
7608 )
7609 .expect("insert FTS row for superseded node");
7610 }
7611 let report = service.check_semantics().expect("semantics check");
7612 assert_eq!(report.fts_rows_for_superseded_nodes, 1);
7613 }
7614
7615 #[test]
7616 fn check_semantics_detects_dangling_edges() {
7617 let (db, service) = setup();
7618 {
7619 let conn = sqlite::open_connection(db.path()).expect("conn");
7620 conn.execute_batch("PRAGMA foreign_keys = OFF;")
7621 .expect("disable FK");
7622 conn.execute(
7624 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7625 VALUES ('r1', 'lg-src', 'Meeting', '{}', 100, 'src-1')",
7626 [],
7627 )
7628 .expect("insert source node");
7629 conn.execute(
7630 "INSERT INTO edges \
7631 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
7632 VALUES ('e1', 'edge-1', 'lg-src', 'ghost-target', 'LINKS', '{}', 100, 'src-1')",
7633 [],
7634 )
7635 .expect("insert dangling edge");
7636 }
7637 let report = service.check_semantics().expect("semantics check");
7638 assert_eq!(report.dangling_edges, 1);
7639 }
7640
7641 #[test]
7642 fn check_semantics_detects_orphaned_supersession_chains() {
7643 let (db, service) = setup();
7644 {
7645 let conn = sqlite::open_connection(db.path()).expect("conn");
7646 conn.execute(
7648 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
7649 VALUES ('r1', 'lg-orphaned', 'Meeting', '{}', 100, 200, 'src-1')",
7650 [],
7651 )
7652 .expect("insert fully superseded node");
7653 }
7654 let report = service.check_semantics().expect("semantics check");
7655 assert_eq!(report.orphaned_supersession_chains, 1);
7656 }
7657
7658 #[test]
7659 fn check_semantics_detects_mismatched_kind_property_fts_rows() {
7660 let (db, service) = setup();
7661 {
7662 let conn = sqlite::open_connection(db.path()).expect("conn");
7663 conn.execute(
7665 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7666 VALUES ('r1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'src-1')",
7667 [],
7668 )
7669 .expect("insert node");
7670 conn.execute(
7672 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
7673 VALUES ('goal-1', 'WrongKind', 'Ship v2')",
7674 [],
7675 )
7676 .expect("insert mismatched property FTS row");
7677 }
7678 let report = service.check_semantics().expect("semantics check");
7679 assert_eq!(report.mismatched_kind_property_fts_rows, 1);
7680 }
7681
7682 #[test]
7683 fn check_semantics_detects_duplicate_property_fts_rows() {
7684 let (db, service) = setup();
7685 {
7686 let conn = sqlite::open_connection(db.path()).expect("conn");
7687 conn.execute(
7688 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7689 VALUES ('r1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'src-1')",
7690 [],
7691 )
7692 .expect("insert node");
7693 conn.execute(
7695 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
7696 VALUES ('goal-1', 'Goal', 'Ship v2')",
7697 [],
7698 )
7699 .expect("insert first property FTS row");
7700 conn.execute(
7701 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
7702 VALUES ('goal-1', 'Goal', 'Ship v2 duplicate')",
7703 [],
7704 )
7705 .expect("insert duplicate property FTS row");
7706 }
7707 let report = service.check_semantics().expect("semantics check");
7708 assert_eq!(report.duplicate_property_fts_rows, 1);
7709 }
7710
7711 #[test]
7712 fn check_semantics_detects_drifted_property_fts_text() {
7713 let (db, service) = setup();
7714 {
7715 let conn = sqlite::open_connection(db.path()).expect("conn");
7716 conn.execute(
7717 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
7718 VALUES ('Goal', '[\"$.name\"]', ' ')",
7719 [],
7720 )
7721 .expect("register schema");
7722 conn.execute(
7723 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7724 VALUES ('r1', 'goal-1', 'Goal', '{\"name\":\"Current name\"}', 100, 'src-1')",
7725 [],
7726 )
7727 .expect("insert node");
7728 conn.execute(
7730 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
7731 VALUES ('goal-1', 'Goal', 'Old stale name')",
7732 [],
7733 )
7734 .expect("insert stale property FTS row");
7735 }
7736 let report = service.check_semantics().expect("semantics check");
7737 assert_eq!(report.drifted_property_fts_rows, 1);
7738 }
7739
7740 #[test]
7741 fn check_semantics_detects_property_fts_row_that_should_not_exist() {
7742 let (db, service) = setup();
7743 {
7744 let conn = sqlite::open_connection(db.path()).expect("conn");
7745 conn.execute(
7746 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
7747 VALUES ('Goal', '[\"$.searchable\"]', ' ')",
7748 [],
7749 )
7750 .expect("register schema");
7751 conn.execute(
7753 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7754 VALUES ('r1', 'goal-1', 'Goal', '{\"other\":\"field\"}', 100, 'src-1')",
7755 [],
7756 )
7757 .expect("insert node");
7758 conn.execute(
7760 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
7761 VALUES ('goal-1', 'Goal', 'phantom text')",
7762 [],
7763 )
7764 .expect("insert phantom property FTS row");
7765 }
7766 let report = service.check_semantics().expect("semantics check");
7767 assert_eq!(
7768 report.drifted_property_fts_rows, 1,
7769 "row that should not exist must be counted as drifted"
7770 );
7771 }
7772
7773 #[test]
7774 fn safe_export_writes_manifest_with_sha256() {
7775 let (_db, service) = setup();
7776 let export_dir = tempfile::TempDir::new().expect("temp dir");
7777 let export_path = export_dir.path().join("backup.db");
7778
7779 let manifest = service
7780 .safe_export(
7781 &export_path,
7782 SafeExportOptions {
7783 force_checkpoint: false,
7784 },
7785 )
7786 .expect("export");
7787
7788 assert!(export_path.exists(), "exported db should exist");
7789 let manifest_path = export_dir.path().join("backup.db.export-manifest.json");
7790 assert!(
7791 manifest_path.exists(),
7792 "manifest file should exist at {}",
7793 manifest_path.display()
7794 );
7795 assert_eq!(manifest.sha256.len(), 64, "sha256 should be 64 hex chars");
7796 assert!(
7797 manifest.exported_at > 0,
7798 "exported_at should be a unix timestamp"
7799 );
7800 assert_eq!(
7801 manifest.schema_version,
7802 SchemaManager::new().current_version().0,
7803 "schema_version should match the live schema version"
7804 );
7805 assert_eq!(manifest.protocol_version, 1, "protocol_version should be 1");
7806 assert!(manifest.page_count > 0, "page_count should be positive");
7807 }
7808
7809 #[test]
7810 fn safe_export_preserves_operational_validation_contracts() {
7811 let (_db, service) = setup();
7812 let validation_json = r#"{"format_version":1,"mode":"enforce","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#;
7813 service
7814 .register_operational_collection(&OperationalRegisterRequest {
7815 name: "connector_health".to_owned(),
7816 kind: OperationalCollectionKind::LatestState,
7817 schema_json: "{}".to_owned(),
7818 retention_json: "{}".to_owned(),
7819 filter_fields_json: "[]".to_owned(),
7820 validation_json: validation_json.to_owned(),
7821 secondary_indexes_json: "[]".to_owned(),
7822 format_version: 1,
7823 })
7824 .expect("register collection");
7825
7826 let export_dir = tempfile::TempDir::new().expect("temp dir");
7827 let export_path = export_dir.path().join("backup.db");
7828 service
7829 .safe_export(
7830 &export_path,
7831 SafeExportOptions {
7832 force_checkpoint: false,
7833 },
7834 )
7835 .expect("export");
7836
7837 let exported = sqlite::open_connection(&export_path).expect("exported conn");
7838 let exported_validation_json: String = exported
7839 .query_row(
7840 "SELECT validation_json FROM operational_collections WHERE name = 'connector_health'",
7841 [],
7842 |row| row.get(0),
7843 )
7844 .expect("validation_json");
7845 assert_eq!(exported_validation_json, validation_json);
7846 }
7847
7848 #[test]
7849 fn safe_export_force_checkpoint_false_skips_wal_pragma() {
7850 let (_db, service) = setup();
7851 let export_dir = tempfile::TempDir::new().expect("temp dir");
7852 let export_path = export_dir.path().join("no-wal.db");
7853
7854 let manifest = service
7856 .safe_export(
7857 &export_path,
7858 SafeExportOptions {
7859 force_checkpoint: false,
7860 },
7861 )
7862 .expect("export with no checkpoint");
7863
7864 assert!(
7865 manifest.page_count > 0,
7866 "page_count must be populated regardless of checkpoint mode"
7867 );
7868 assert_eq!(
7869 manifest.schema_version,
7870 SchemaManager::new().current_version().0
7871 );
7872 assert_eq!(manifest.protocol_version, 1);
7873 }
7874
7875 #[test]
7876 fn safe_export_force_checkpoint_false_still_captures_wal_backed_changes() {
7877 let (db, service) = setup();
7878 let conn = sqlite::open_connection(db.path()).expect("conn");
7879 let journal_mode: String = conn
7880 .query_row("PRAGMA journal_mode=WAL", [], |row| row.get(0))
7881 .expect("enable wal");
7882 assert_eq!(journal_mode.to_lowercase(), "wal");
7883 let auto_checkpoint_pages: i64 = conn
7884 .query_row("PRAGMA wal_autocheckpoint=0", [], |row| row.get(0))
7885 .expect("disable auto checkpoint");
7886 assert_eq!(auto_checkpoint_pages, 0);
7887 conn.execute(
7888 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7889 VALUES ('r-wal', 'lg-wal', 'Meeting', '{}', 100, 'src-wal')",
7890 [],
7891 )
7892 .expect("insert wal-backed node");
7893
7894 let export_dir = tempfile::TempDir::new().expect("temp dir");
7895 let export_path = export_dir.path().join("wal-backed.db");
7896 service
7897 .safe_export(
7898 &export_path,
7899 SafeExportOptions {
7900 force_checkpoint: false,
7901 },
7902 )
7903 .expect("export wal-backed db");
7904
7905 let exported = sqlite::open_connection(&export_path).expect("open exported db");
7906 let exported_count: i64 = exported
7907 .query_row(
7908 "SELECT count(*) FROM nodes WHERE logical_id = 'lg-wal'",
7909 [],
7910 |row| row.get(0),
7911 )
7912 .expect("count exported nodes");
7913 assert_eq!(
7914 exported_count, 1,
7915 "safe_export must include committed rows that are still resident in the WAL"
7916 );
7917 }
7918
7919 #[test]
7920 fn excise_source_removes_searchable_content_after_excision() {
7921 let (db, service) = setup();
7922 {
7923 let conn = sqlite::open_connection(db.path()).expect("conn");
7924 conn.execute(
7925 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
7926 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
7927 [],
7928 )
7929 .expect("insert v1");
7930 conn.execute(
7931 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7932 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
7933 [],
7934 )
7935 .expect("insert v2");
7936 conn.execute(
7937 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7938 VALUES ('ck1', 'lg1', 'hello world', 100)",
7939 [],
7940 )
7941 .expect("insert chunk");
7942 }
7943 service.excise_source("source-2").expect("excise");
7944 {
7945 let conn = sqlite::open_connection(db.path()).expect("conn");
7946 let fts_count: i64 = conn
7947 .query_row(
7948 "SELECT count(*) FROM fts_nodes WHERE chunk_id = 'ck1'",
7949 [],
7950 |row| row.get(0),
7951 )
7952 .expect("fts count");
7953 assert_eq!(
7954 fts_count, 0,
7955 "excised content should not remain searchable after excise"
7956 );
7957 }
7958 }
7959
7960 #[cfg(feature = "sqlite-vec")]
7961 #[test]
7962 fn excise_source_cleans_chunks_and_vec_rows_for_excised_version() {
7963 let (db, service) = setup();
7964 {
7965 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7966 service
7967 .schema_manager
7968 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
7969 .expect("ensure vec profile");
7970 conn.execute(
7971 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
7972 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
7973 [],
7974 )
7975 .expect("insert v1");
7976 conn.execute(
7977 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7978 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
7979 [],
7980 )
7981 .expect("insert v2");
7982 conn.execute(
7983 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7984 VALUES ('ck1', 'lg1', 'new content', 200)",
7985 [],
7986 )
7987 .expect("insert chunk");
7988 conn.execute(
7989 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('ck1', zeroblob(16))",
7990 [],
7991 )
7992 .expect("insert vec row");
7993 }
7994
7995 service.excise_source("source-2").expect("excise");
7996
7997 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7998 let active_row: String = conn
7999 .query_row(
8000 "SELECT row_id FROM nodes WHERE logical_id = 'lg1' AND superseded_at IS NULL",
8001 [],
8002 |row| row.get(0),
8003 )
8004 .expect("restored active row");
8005 assert_eq!(active_row, "r1");
8006 let chunk_count: i64 = conn
8007 .query_row(
8008 "SELECT count(*) FROM chunks WHERE node_logical_id = 'lg1'",
8009 [],
8010 |row| row.get(0),
8011 )
8012 .expect("chunk count");
8013 assert_eq!(
8014 chunk_count, 0,
8015 "excised source content must not survive as chunks"
8016 );
8017 let vec_count: i64 = conn
8018 .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
8019 row.get(0)
8020 })
8021 .expect("vec count");
8022 assert_eq!(vec_count, 0, "excised source vec rows must be removed");
8023 let fts_count: i64 = conn
8024 .query_row(
8025 "SELECT count(*) FROM fts_nodes WHERE node_logical_id = 'lg1'",
8026 [],
8027 |row| row.get(0),
8028 )
8029 .expect("fts count");
8030 assert_eq!(
8031 fts_count, 0,
8032 "excised source content must not remain searchable"
8033 );
8034 }
8035
8036 #[test]
8037 fn export_page_count_matches_exported_file() {
8038 let (_db, service) = setup();
8039 let export_dir = tempfile::TempDir::new().expect("temp dir");
8040 let export_path = export_dir.path().join("page-count.db");
8041
8042 let manifest = service
8043 .safe_export(
8044 &export_path,
8045 SafeExportOptions {
8046 force_checkpoint: false,
8047 },
8048 )
8049 .expect("export");
8050
8051 let exported = sqlite::open_connection(&export_path).expect("open exported db");
8052 let actual_page_count: u64 = exported
8053 .query_row("PRAGMA page_count", [], |row| row.get(0))
8054 .expect("page_count from exported file");
8055
8056 assert_eq!(
8057 manifest.page_count, actual_page_count,
8058 "manifest page_count must match the exported file's PRAGMA page_count"
8059 );
8060 }
8061
8062 #[test]
8063 fn no_temp_file_after_successful_export() {
8064 let (_db, service) = setup();
8065 let export_dir = tempfile::TempDir::new().expect("temp dir");
8066 let export_path = export_dir.path().join("no-tmp.db");
8067
8068 service
8069 .safe_export(
8070 &export_path,
8071 SafeExportOptions {
8072 force_checkpoint: false,
8073 },
8074 )
8075 .expect("export");
8076
8077 let tmp_files: Vec<_> = fs::read_dir(export_dir.path())
8078 .expect("read export dir")
8079 .filter_map(Result::ok)
8080 .filter(|e| e.path().extension().is_some_and(|ext| ext == "tmp"))
8081 .collect();
8082
8083 assert!(
8084 tmp_files.is_empty(),
8085 "no .tmp files should remain after a successful export, found: {tmp_files:?}"
8086 );
8087 }
8088
8089 #[test]
8090 fn export_manifest_is_valid_json() {
8091 let (_db, service) = setup();
8092 let export_dir = tempfile::TempDir::new().expect("temp dir");
8093 let export_path = export_dir.path().join("valid-json.db");
8094
8095 service
8096 .safe_export(
8097 &export_path,
8098 SafeExportOptions {
8099 force_checkpoint: false,
8100 },
8101 )
8102 .expect("export");
8103
8104 let manifest_path = export_dir.path().join("valid-json.db.export-manifest.json");
8105 let manifest_contents = fs::read_to_string(&manifest_path).expect("read manifest");
8106 let parsed: serde_json::Value =
8107 serde_json::from_str(&manifest_contents).expect("manifest must be valid JSON");
8108
8109 assert!(
8110 parsed.get("exported_at").is_some(),
8111 "manifest must contain exported_at"
8112 );
8113 assert!(
8114 parsed.get("sha256").is_some(),
8115 "manifest must contain sha256"
8116 );
8117 assert!(
8118 parsed.get("schema_version").is_some(),
8119 "manifest must contain schema_version"
8120 );
8121 assert!(
8122 parsed.get("protocol_version").is_some(),
8123 "manifest must contain protocol_version"
8124 );
8125 assert!(
8126 parsed.get("page_count").is_some(),
8127 "manifest must contain page_count"
8128 );
8129 }
8130
8131 #[test]
8132 fn provenance_purge_dry_run_reports_counts() {
8133 let (db, service) = setup();
8134 {
8135 let conn = sqlite::open_connection(db.path()).expect("conn");
8136 conn.execute(
8137 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8138 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
8139 [],
8140 )
8141 .expect("insert p1");
8142 conn.execute(
8143 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8144 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 200)",
8145 [],
8146 )
8147 .expect("insert p2");
8148 conn.execute(
8149 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8150 VALUES ('p3', 'excise', 'lg3', 'src-1', 300)",
8151 [],
8152 )
8153 .expect("insert p3");
8154 }
8155
8156 let options = super::ProvenancePurgeOptions {
8157 dry_run: true,
8158 preserve_event_types: Vec::new(),
8159 };
8160 let report = service
8161 .purge_provenance_events(250, &options)
8162 .expect("dry run purge");
8163
8164 assert_eq!(report.events_deleted, 2);
8165 assert_eq!(report.events_preserved, 1);
8166 assert!(report.oldest_remaining.is_some());
8167
8168 let conn = sqlite::open_connection(db.path()).expect("conn");
8169 let total: i64 = conn
8170 .query_row("SELECT count(*) FROM provenance_events", [], |row| {
8171 row.get(0)
8172 })
8173 .expect("count");
8174 assert_eq!(total, 3, "dry_run must not delete any events");
8175 }
8176
8177 #[test]
8178 fn provenance_purge_deletes_old_events() {
8179 let (db, service) = setup();
8180 {
8181 let conn = sqlite::open_connection(db.path()).expect("conn");
8182 conn.execute(
8183 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8184 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
8185 [],
8186 )
8187 .expect("insert p1");
8188 conn.execute(
8189 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8190 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 200)",
8191 [],
8192 )
8193 .expect("insert p2");
8194 }
8195
8196 let options = super::ProvenancePurgeOptions {
8197 dry_run: false,
8198 preserve_event_types: Vec::new(),
8199 };
8200 let report = service
8201 .purge_provenance_events(150, &options)
8202 .expect("purge");
8203
8204 assert_eq!(report.events_deleted, 1);
8205 assert_eq!(report.events_preserved, 1);
8206 assert_eq!(report.oldest_remaining, Some(200));
8207
8208 let conn = sqlite::open_connection(db.path()).expect("conn");
8209 let remaining: i64 = conn
8210 .query_row("SELECT count(*) FROM provenance_events", [], |row| {
8211 row.get(0)
8212 })
8213 .expect("count");
8214 assert_eq!(remaining, 1);
8215 }
8216
8217 #[test]
8218 fn provenance_purge_preserves_specified_types() {
8219 let (db, service) = setup();
8220 {
8221 let conn = sqlite::open_connection(db.path()).expect("conn");
8222 conn.execute(
8223 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8224 VALUES ('p1', 'excise', 'lg1', 'src-1', 100)",
8225 [],
8226 )
8227 .expect("insert p1");
8228 conn.execute(
8229 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8230 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 100)",
8231 [],
8232 )
8233 .expect("insert p2");
8234 conn.execute(
8235 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8236 VALUES ('p3', 'node_insert', 'lg3', 'src-1', 100)",
8237 [],
8238 )
8239 .expect("insert p3");
8240 }
8241
8242 let options = super::ProvenancePurgeOptions {
8243 dry_run: false,
8244 preserve_event_types: Vec::new(),
8245 };
8246 let report = service
8247 .purge_provenance_events(500, &options)
8248 .expect("purge");
8249
8250 assert_eq!(report.events_deleted, 2);
8251 assert_eq!(report.events_preserved, 1);
8252
8253 let conn = sqlite::open_connection(db.path()).expect("conn");
8254 let remaining_type: String = conn
8255 .query_row("SELECT event_type FROM provenance_events", [], |row| {
8256 row.get(0)
8257 })
8258 .expect("remaining event type");
8259 assert_eq!(remaining_type, "excise");
8260 }
8261
8262 #[test]
8263 fn provenance_purge_noop_with_zero_timestamp() {
8264 let (db, service) = setup();
8265 {
8266 let conn = sqlite::open_connection(db.path()).expect("conn");
8267 conn.execute(
8268 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8269 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
8270 [],
8271 )
8272 .expect("insert p1");
8273 }
8274
8275 let options = super::ProvenancePurgeOptions {
8276 dry_run: false,
8277 preserve_event_types: Vec::new(),
8278 };
8279 let report = service.purge_provenance_events(0, &options).expect("purge");
8280
8281 assert_eq!(report.events_deleted, 0);
8282 assert_eq!(report.events_preserved, 1);
8283 assert_eq!(report.oldest_remaining, Some(100));
8284 }
8285
8286 #[test]
8287 fn restore_skips_edge_when_counterpart_purged() {
8288 let (db, service) = setup();
8289 {
8290 let conn = sqlite::open_connection(db.path()).expect("conn");
8291 conn.execute(
8293 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8294 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
8295 [],
8296 )
8297 .expect("insert node A");
8298 conn.execute(
8299 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8300 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
8301 [],
8302 )
8303 .expect("insert node B");
8304 conn.execute(
8306 "INSERT INTO edges \
8307 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
8308 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
8309 [],
8310 )
8311 .expect("insert edge");
8312 conn.execute(
8314 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8315 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
8316 [],
8317 )
8318 .expect("insert retire event A");
8319 conn.execute(
8320 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8321 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
8322 [],
8323 )
8324 .expect("insert edge retire event");
8325 conn.execute(
8326 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
8327 [],
8328 )
8329 .expect("retire node A");
8330 conn.execute(
8331 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-2'",
8332 [],
8333 )
8334 .expect("retire node B");
8335 conn.execute(
8336 "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
8337 [],
8338 )
8339 .expect("retire edge");
8340 conn.execute("DELETE FROM nodes WHERE logical_id = 'doc-2'", [])
8343 .expect("purge node B rows");
8344 }
8345
8346 let report = service.restore_logical_id("doc-1").expect("restore A");
8348 assert!(!report.was_noop);
8349 assert_eq!(report.restored_node_rows, 1);
8350 assert_eq!(report.restored_edge_rows, 0, "edge should not be restored");
8351 assert_eq!(report.skipped_edges.len(), 1);
8352 assert_eq!(report.skipped_edges[0].edge_logical_id, "edge-1");
8353 assert_eq!(report.skipped_edges[0].missing_endpoint, "doc-2");
8354
8355 let conn = sqlite::open_connection(db.path()).expect("conn");
8357 let active_edge_count: i64 = conn
8358 .query_row(
8359 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
8360 [],
8361 |row| row.get(0),
8362 )
8363 .expect("active edge count");
8364 assert_eq!(active_edge_count, 0, "edge must remain retired");
8365 }
8366
8367 #[test]
8368 fn restore_restores_edges_to_active_nodes() {
8369 let (db, service) = setup();
8370 {
8371 let conn = sqlite::open_connection(db.path()).expect("conn");
8372 conn.execute(
8374 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8375 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
8376 [],
8377 )
8378 .expect("insert node A");
8379 conn.execute(
8380 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8381 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
8382 [],
8383 )
8384 .expect("insert node B");
8385 conn.execute(
8387 "INSERT INTO edges \
8388 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
8389 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
8390 [],
8391 )
8392 .expect("insert edge");
8393 conn.execute(
8395 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8396 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
8397 [],
8398 )
8399 .expect("insert retire event A");
8400 conn.execute(
8401 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8402 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
8403 [],
8404 )
8405 .expect("insert edge retire event");
8406 conn.execute(
8407 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
8408 [],
8409 )
8410 .expect("retire node A");
8411 conn.execute(
8412 "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
8413 [],
8414 )
8415 .expect("retire edge");
8416 }
8417
8418 let report = service.restore_logical_id("doc-1").expect("restore A");
8420 assert!(!report.was_noop);
8421 assert_eq!(report.restored_node_rows, 1);
8422 assert!(report.restored_edge_rows > 0, "edge should be restored");
8423 assert!(
8424 report.skipped_edges.is_empty(),
8425 "no edges should be skipped"
8426 );
8427
8428 let conn = sqlite::open_connection(db.path()).expect("conn");
8429 let active_edge_count: i64 = conn
8430 .query_row(
8431 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
8432 [],
8433 |row| row.get(0),
8434 )
8435 .expect("active edge count");
8436 assert_eq!(active_edge_count, 1, "edge must be active");
8437 }
8438
8439 #[test]
8440 fn restore_restores_edges_when_both_restored() {
8441 let (db, service) = setup();
8442 {
8443 let conn = sqlite::open_connection(db.path()).expect("conn");
8444 conn.execute(
8446 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8447 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
8448 [],
8449 )
8450 .expect("insert node A");
8451 conn.execute(
8452 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8453 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
8454 [],
8455 )
8456 .expect("insert node B");
8457 conn.execute(
8459 "INSERT INTO edges \
8460 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
8461 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
8462 [],
8463 )
8464 .expect("insert edge");
8465 conn.execute(
8467 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8468 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
8469 [],
8470 )
8471 .expect("insert retire event A");
8472 conn.execute(
8473 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8474 VALUES ('evt-retire-b', 'node_retire', 'doc-2', 'forget-1', 200, '')",
8475 [],
8476 )
8477 .expect("insert retire event B");
8478 conn.execute(
8479 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8480 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
8481 [],
8482 )
8483 .expect("insert edge retire event");
8484 conn.execute(
8485 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
8486 [],
8487 )
8488 .expect("retire node A");
8489 conn.execute(
8490 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-2'",
8491 [],
8492 )
8493 .expect("retire node B");
8494 conn.execute(
8495 "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
8496 [],
8497 )
8498 .expect("retire edge");
8499 }
8500
8501 let report_b = service.restore_logical_id("doc-2").expect("restore B");
8503 assert!(!report_b.was_noop);
8504
8505 let report_a = service.restore_logical_id("doc-1").expect("restore A");
8507 assert!(!report_a.was_noop);
8508 assert_eq!(report_a.restored_node_rows, 1);
8509 assert!(
8510 report_a.restored_edge_rows > 0,
8511 "edge should be restored when both endpoints active"
8512 );
8513 assert!(
8514 report_a.skipped_edges.is_empty(),
8515 "no edges should be skipped"
8516 );
8517
8518 let conn = sqlite::open_connection(db.path()).expect("conn");
8519 let active_edge_count: i64 = conn
8520 .query_row(
8521 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
8522 [],
8523 |row| row.get(0),
8524 )
8525 .expect("active edge count");
8526 assert_eq!(
8527 active_edge_count, 1,
8528 "edge must be active after both endpoints restored"
8529 );
8530 }
8531
8532 #[test]
8535 fn fts_property_schema_crud_round_trip() {
8536 let (_db, service) = setup();
8537
8538 let record = service
8540 .register_fts_property_schema(
8541 "Meeting",
8542 &["$.title".to_owned(), "$.summary".to_owned()],
8543 None,
8544 )
8545 .expect("register");
8546 assert_eq!(record.kind, "Meeting");
8547 assert_eq!(record.property_paths, vec!["$.title", "$.summary"]);
8548 assert_eq!(record.separator, " ");
8549 assert_eq!(record.format_version, 1);
8550
8551 let described = service
8553 .describe_fts_property_schema("Meeting")
8554 .expect("describe")
8555 .expect("should exist");
8556 assert_eq!(described, record);
8557
8558 let missing = service
8560 .describe_fts_property_schema("NoSuchKind")
8561 .expect("describe missing");
8562 assert!(missing.is_none());
8563
8564 let list = service.list_fts_property_schemas().expect("list");
8566 assert_eq!(list.len(), 1);
8567 assert_eq!(list[0].kind, "Meeting");
8568
8569 let updated = service
8571 .register_fts_property_schema(
8572 "Meeting",
8573 &["$.title".to_owned(), "$.notes".to_owned()],
8574 Some("\n"),
8575 )
8576 .expect("update");
8577 assert_eq!(updated.property_paths, vec!["$.title", "$.notes"]);
8578 assert_eq!(updated.separator, "\n");
8579
8580 service
8582 .remove_fts_property_schema("Meeting")
8583 .expect("remove");
8584 let after_remove = service
8585 .describe_fts_property_schema("Meeting")
8586 .expect("describe after remove");
8587 assert!(after_remove.is_none());
8588
8589 let err = service.remove_fts_property_schema("Meeting");
8591 assert!(err.is_err());
8592 }
8593
8594 #[test]
8595 fn describe_fts_property_schema_round_trips_recursive_entries() {
8596 let (_db, service) = setup();
8597
8598 let entries = vec![
8599 FtsPropertyPathSpec::scalar("$.title"),
8600 FtsPropertyPathSpec::recursive("$.payload"),
8601 ];
8602 let exclude = vec!["$.payload.private".to_owned()];
8603 let registered = service
8604 .register_fts_property_schema_with_entries(
8605 "KnowledgeItem",
8606 &entries,
8607 Some(" "),
8608 &exclude,
8609 crate::rebuild_actor::RebuildMode::Eager,
8610 )
8611 .expect("register recursive");
8612
8613 assert_eq!(registered.entries, entries);
8616 assert_eq!(registered.exclude_paths, exclude);
8617 assert_eq!(registered.property_paths, vec!["$.title", "$.payload"]);
8618
8619 let described = service
8620 .describe_fts_property_schema("KnowledgeItem")
8621 .expect("describe")
8622 .expect("should exist");
8623 assert_eq!(described.kind, "KnowledgeItem");
8624 assert_eq!(described.entries, entries);
8625 assert_eq!(described.exclude_paths, exclude);
8626 assert_eq!(described.property_paths, vec!["$.title", "$.payload"]);
8627 assert_eq!(described.separator, " ");
8628 assert_eq!(described.format_version, 1);
8629 }
8630
8631 #[test]
8632 fn list_fts_property_schemas_round_trips_recursive_entries() {
8633 let (_db, service) = setup();
8634
8635 let entries = vec![
8636 FtsPropertyPathSpec::scalar("$.title"),
8637 FtsPropertyPathSpec::recursive("$.payload"),
8638 ];
8639 let exclude = vec!["$.payload.secret".to_owned()];
8640 service
8641 .register_fts_property_schema_with_entries(
8642 "KnowledgeItem",
8643 &entries,
8644 Some(" "),
8645 &exclude,
8646 crate::rebuild_actor::RebuildMode::Eager,
8647 )
8648 .expect("register recursive");
8649
8650 let listed = service.list_fts_property_schemas().expect("list");
8651 assert_eq!(listed.len(), 1);
8652 let record = &listed[0];
8653 assert_eq!(record.kind, "KnowledgeItem");
8654 assert_eq!(record.entries, entries);
8655 assert_eq!(record.exclude_paths, exclude);
8656 assert_eq!(record.property_paths, vec!["$.title", "$.payload"]);
8657 }
8658
8659 #[test]
8660 fn describe_fts_property_schema_round_trips_scalar_only_entries() {
8661 let (_db, service) = setup();
8662
8663 service
8664 .register_fts_property_schema(
8665 "Meeting",
8666 &["$.title".to_owned(), "$.summary".to_owned()],
8667 None,
8668 )
8669 .expect("register scalar");
8670
8671 let described = service
8672 .describe_fts_property_schema("Meeting")
8673 .expect("describe")
8674 .expect("should exist");
8675 assert_eq!(described.property_paths, vec!["$.title", "$.summary"]);
8676 assert_eq!(described.entries.len(), 2);
8677 for entry in &described.entries {
8678 assert_eq!(
8679 entry.mode,
8680 FtsPropertyPathMode::Scalar,
8681 "scalar-only schema should deserialize every entry as Scalar"
8682 );
8683 }
8684 assert!(described.exclude_paths.is_empty());
8685 }
8686
8687 #[test]
8688 fn restore_reestablishes_property_fts_visibility() {
8689 let (db, service) = setup();
8690 {
8691 let conn = sqlite::open_connection(db.path()).expect("conn");
8692 conn.execute(
8694 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8695 VALUES ('Document', '[\"$.title\", \"$.body\"]', ' ')",
8696 [],
8697 )
8698 .expect("register schema");
8699 conn.execute(
8701 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8702 VALUES ('row-1', 'doc-1', 'Document', '{\"title\":\"Budget\",\"body\":\"Q3 forecast\"}', 100, 'seed')",
8703 [],
8704 )
8705 .expect("insert node");
8706 conn.execute(
8708 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8709 VALUES ('chunk-1', 'doc-1', 'budget text', 100)",
8710 [],
8711 )
8712 .expect("insert chunk");
8713 conn.execute(
8715 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8716 VALUES ('doc-1', 'Document', 'Budget Q3 forecast')",
8717 [],
8718 )
8719 .expect("insert property fts");
8720 conn.execute(
8722 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8723 VALUES ('evt-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
8724 [],
8725 )
8726 .expect("retire event");
8727 conn.execute(
8728 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
8729 [],
8730 )
8731 .expect("supersede");
8732 conn.execute("DELETE FROM fts_nodes", [])
8733 .expect("clear chunk fts");
8734 conn.execute("DELETE FROM fts_node_properties", [])
8735 .expect("clear property fts");
8736 }
8737
8738 let report = service.restore_logical_id("doc-1").expect("restore");
8739 assert_eq!(report.restored_property_fts_rows, 1);
8740
8741 let conn = sqlite::open_connection(db.path()).expect("conn");
8743 let prop_fts_count: i64 = conn
8744 .query_row(
8745 "SELECT count(*) FROM fts_node_properties WHERE node_logical_id = 'doc-1'",
8746 [],
8747 |row| row.get(0),
8748 )
8749 .expect("prop fts count");
8750 assert_eq!(prop_fts_count, 1, "property FTS must be restored");
8751
8752 let text: String = conn
8753 .query_row(
8754 "SELECT text_content FROM fts_node_properties WHERE node_logical_id = 'doc-1'",
8755 [],
8756 |row| row.get(0),
8757 )
8758 .expect("prop fts text");
8759 assert_eq!(text, "Budget Q3 forecast");
8760 }
8761
8762 #[test]
8763 fn safe_export_preserves_fts_property_schemas() {
8764 let (_db, service) = setup();
8765 service
8766 .register_fts_property_schema(
8767 "Goal",
8768 &["$.name".to_owned(), "$.rationale".to_owned()],
8769 None,
8770 )
8771 .expect("register schema");
8772
8773 let export_dir = tempfile::TempDir::new().expect("temp dir");
8774 let export_path = export_dir.path().join("backup.db");
8775 service
8776 .safe_export(
8777 &export_path,
8778 SafeExportOptions {
8779 force_checkpoint: false,
8780 },
8781 )
8782 .expect("export");
8783
8784 let exported_conn = rusqlite::Connection::open(&export_path).expect("open exported db");
8786 let kind: String = exported_conn
8787 .query_row(
8788 "SELECT kind FROM fts_property_schemas WHERE kind = 'Goal'",
8789 [],
8790 |row| row.get(0),
8791 )
8792 .expect("schema must exist in export");
8793 assert_eq!(kind, "Goal");
8794 let paths_json: String = exported_conn
8795 .query_row(
8796 "SELECT property_paths_json FROM fts_property_schemas WHERE kind = 'Goal'",
8797 [],
8798 |row| row.get(0),
8799 )
8800 .expect("paths must exist");
8801 let paths: Vec<String> = serde_json::from_str(&paths_json).expect("valid json");
8802 assert_eq!(paths, vec!["$.name", "$.rationale"]);
8803 }
8804
8805 #[test]
8806 #[allow(clippy::too_many_lines)]
8807 fn export_recovery_rebuilds_property_fts_from_canonical_state() {
8808 let (db, service) = setup();
8809 service
8811 .register_fts_property_schema("Goal", &["$.name".to_owned()], None)
8812 .expect("register");
8813 {
8814 let conn = sqlite::open_connection(db.path()).expect("conn");
8815 conn.execute(
8816 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8817 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
8818 [],
8819 )
8820 .expect("insert node 1");
8821 conn.execute(
8822 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8823 VALUES ('goal-1', 'Goal', 'Ship v2')",
8824 [],
8825 )
8826 .expect("insert property FTS row 1");
8827 conn.execute(
8828 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8829 VALUES ('row-2', 'goal-2', 'Goal', '{\"name\":\"Launch redesign\"}', 100, 'seed')",
8830 [],
8831 )
8832 .expect("insert node 2");
8833 conn.execute(
8834 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8835 VALUES ('goal-2', 'Goal', 'Launch redesign')",
8836 [],
8837 )
8838 .expect("insert property FTS row 2");
8839 }
8840
8841 let export_dir = tempfile::TempDir::new().expect("temp dir");
8843 let export_path = export_dir.path().join("backup.db");
8844 service
8845 .safe_export(
8846 &export_path,
8847 SafeExportOptions {
8848 force_checkpoint: false,
8849 },
8850 )
8851 .expect("export");
8852
8853 {
8857 let conn = rusqlite::Connection::open(&export_path).expect("open export");
8858 conn.execute(
8859 "DELETE FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
8860 [],
8861 )
8862 .expect("delete old row");
8863 conn.execute(
8864 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8865 VALUES ('goal-1', 'Goal', 'completely wrong stale text')",
8866 [],
8867 )
8868 .expect("insert corrupted row");
8869 conn.execute(
8870 "DELETE FROM fts_node_properties WHERE node_logical_id = 'goal-2'",
8871 [],
8872 )
8873 .expect("delete goal-2 row");
8874 }
8875
8876 let schema = Arc::new(SchemaManager::new());
8878 let exported_service = AdminService::new(&export_path, Arc::clone(&schema));
8879 exported_service
8880 .rebuild_projections(ProjectionTarget::Fts)
8881 .expect("rebuild");
8882
8883 let coordinator = ExecutionCoordinator::open(
8886 &export_path,
8887 Arc::clone(&schema),
8888 None,
8889 1,
8890 Arc::new(TelemetryCounters::default()),
8891 None,
8892 )
8893 .expect("coordinator");
8894
8895 let compiled = QueryBuilder::nodes("Goal")
8896 .text_search("Ship", 10)
8897 .limit(10)
8898 .compile()
8899 .expect("compile");
8900 let rows = coordinator
8901 .execute_compiled_read(&compiled)
8902 .expect("execute read");
8903 assert_eq!(rows.nodes.len(), 1);
8904 assert_eq!(rows.nodes[0].logical_id, "goal-1");
8905
8906 let compiled2 = QueryBuilder::nodes("Goal")
8908 .text_search("redesign", 10)
8909 .limit(10)
8910 .compile()
8911 .expect("compile");
8912 let rows2 = coordinator
8913 .execute_compiled_read(&compiled2)
8914 .expect("execute read");
8915 assert_eq!(rows2.nodes.len(), 1);
8916 assert_eq!(rows2.nodes[0].logical_id, "goal-2");
8917
8918 let compiled3 = QueryBuilder::nodes("Goal")
8920 .text_search("stale", 10)
8921 .limit(10)
8922 .compile()
8923 .expect("compile");
8924 let rows3 = coordinator
8925 .execute_compiled_read(&compiled3)
8926 .expect("execute read");
8927 assert_eq!(
8928 rows3.nodes.len(),
8929 0,
8930 "corrupted text must not appear in search after rebuild"
8931 );
8932
8933 let integrity = exported_service.check_integrity().expect("integrity");
8935 assert_eq!(integrity.missing_property_fts_rows, 0);
8936 let semantics = exported_service.check_semantics().expect("semantics");
8937 assert_eq!(semantics.drifted_property_fts_rows, 0);
8938 assert_eq!(semantics.orphaned_property_fts_rows, 0);
8939 assert_eq!(semantics.duplicate_property_fts_rows, 0);
8940 }
8941
8942 #[test]
8943 fn check_integrity_no_false_positives_for_empty_extraction() {
8944 let (db, service) = setup();
8945 {
8946 let conn = sqlite::open_connection(db.path()).expect("conn");
8947 conn.execute(
8949 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8950 VALUES ('Ticket', '[\"$.searchable\"]', ' ')",
8951 [],
8952 )
8953 .expect("register schema");
8954 conn.execute(
8957 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8958 VALUES ('row-1', 'ticket-1', 'Ticket', '{\"status\":\"open\"}', 100, 'seed')",
8959 [],
8960 )
8961 .expect("insert node");
8962 }
8963
8964 let report = service.check_integrity().expect("integrity");
8965 assert_eq!(
8966 report.missing_property_fts_rows, 0,
8967 "node with no extractable values must not be counted as missing"
8968 );
8969 }
8970
8971 #[test]
8972 fn check_integrity_detects_genuinely_missing_property_fts_rows() {
8973 let (db, service) = setup();
8974 {
8975 let conn = sqlite::open_connection(db.path()).expect("conn");
8976 conn.execute(
8977 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8978 VALUES ('Ticket', '[\"$.title\"]', ' ')",
8979 [],
8980 )
8981 .expect("register schema");
8982 conn.execute(
8984 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8985 VALUES ('row-1', 'ticket-1', 'Ticket', '{\"title\":\"fix login bug\"}', 100, 'seed')",
8986 [],
8987 )
8988 .expect("insert node");
8989 }
8990
8991 let report = service.check_integrity().expect("integrity");
8992 assert_eq!(
8993 report.missing_property_fts_rows, 1,
8994 "node with extractable values but no property FTS row must be detected"
8995 );
8996 }
8997
8998 #[test]
8999 fn rebuild_projections_fts_restores_missing_property_fts_rows() {
9000 let (db, service) = setup();
9001 {
9002 let conn = sqlite::open_connection(db.path()).expect("conn");
9003 conn.execute(
9004 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9005 VALUES ('Goal', '[\"$.name\"]', ' ')",
9006 [],
9007 )
9008 .expect("register schema");
9009 conn.execute(
9010 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9011 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9012 [],
9013 )
9014 .expect("insert node");
9015 }
9017
9018 let report = service
9019 .rebuild_projections(ProjectionTarget::Fts)
9020 .expect("rebuild");
9021 assert!(
9022 report.rebuilt_rows >= 1,
9023 "rebuild must insert at least one property FTS row"
9024 );
9025
9026 let conn = sqlite::open_connection(db.path()).expect("conn");
9027 let text: String = conn
9028 .query_row(
9029 "SELECT text_content FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
9030 [],
9031 |row| row.get(0),
9032 )
9033 .expect("property FTS row must exist after rebuild");
9034 assert_eq!(text, "Ship v2");
9035 }
9036
9037 #[test]
9038 fn rebuild_missing_projections_fills_gap_for_deleted_property_fts_row() {
9039 let (db, service) = setup();
9040 {
9041 let conn = sqlite::open_connection(db.path()).expect("conn");
9042 conn.execute(
9043 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9044 VALUES ('Goal', '[\"$.name\"]', ' ')",
9045 [],
9046 )
9047 .expect("register schema");
9048 conn.execute(
9049 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9050 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9051 [],
9052 )
9053 .expect("insert node");
9054 conn.execute(
9056 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
9057 VALUES ('goal-1', 'Goal', 'Ship v2')",
9058 [],
9059 )
9060 .expect("insert property fts");
9061 conn.execute(
9062 "DELETE FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
9063 [],
9064 )
9065 .expect("delete property fts");
9066 }
9067
9068 let report = service
9069 .rebuild_missing_projections()
9070 .expect("rebuild missing");
9071 assert!(
9072 report.rebuilt_rows >= 1,
9073 "missing rebuild must insert the gap-fill row"
9074 );
9075
9076 let conn = sqlite::open_connection(db.path()).expect("conn");
9077 let count: i64 = conn
9078 .query_row(
9079 "SELECT count(*) FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
9080 [],
9081 |row| row.get(0),
9082 )
9083 .expect("count");
9084 assert_eq!(
9085 count, 1,
9086 "gap-fill must restore exactly one property FTS row"
9087 );
9088 }
9089
9090 #[test]
9091 fn remove_schema_then_rebuild_cleans_stale_property_fts_rows() {
9092 let (db, service) = setup();
9093 service
9094 .register_fts_property_schema("Goal", &["$.name".to_owned()], None)
9095 .expect("register");
9096 {
9097 let conn = sqlite::open_connection(db.path()).expect("conn");
9098 conn.execute(
9099 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9100 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9101 [],
9102 )
9103 .expect("insert node");
9104 conn.execute(
9106 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
9107 VALUES ('goal-1', 'Goal', 'Ship v2')",
9108 [],
9109 )
9110 .expect("insert property fts");
9111 }
9112
9113 service.remove_fts_property_schema("Goal").expect("remove");
9115
9116 let semantics = service.check_semantics().expect("semantics");
9118 assert_eq!(
9119 semantics.orphaned_property_fts_rows, 1,
9120 "stale property FTS rows must be detected after schema removal"
9121 );
9122
9123 service
9125 .rebuild_projections(ProjectionTarget::Fts)
9126 .expect("rebuild");
9127
9128 let conn = sqlite::open_connection(db.path()).expect("conn");
9129 let count: i64 = conn
9130 .query_row(
9131 "SELECT count(*) FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
9132 [],
9133 |row| row.get(0),
9134 )
9135 .expect("count");
9136 assert_eq!(
9137 count, 0,
9138 "rebuild after schema removal must delete stale property FTS rows"
9139 );
9140 }
9141
9142 mod validate_fts_property_paths_tests {
9143 use super::super::validate_fts_property_paths;
9144
9145 #[test]
9146 fn valid_simple_path() {
9147 assert!(validate_fts_property_paths(&["$.name".to_owned()]).is_ok());
9148 }
9149
9150 #[test]
9151 fn valid_nested_path() {
9152 assert!(validate_fts_property_paths(&["$.address.city".to_owned()]).is_ok());
9153 }
9154
9155 #[test]
9156 fn valid_underscore_segment() {
9157 assert!(validate_fts_property_paths(&["$.a_b".to_owned()]).is_ok());
9158 }
9159
9160 #[test]
9161 fn rejects_bare_prefix() {
9162 let result = validate_fts_property_paths(&["$.".to_owned()]);
9163 assert!(result.is_err(), "path '$.' must be rejected");
9164 }
9165
9166 #[test]
9167 fn rejects_double_dot() {
9168 let result = validate_fts_property_paths(&["$..x".to_owned()]);
9169 assert!(result.is_err(), "path '$..x' must be rejected");
9170 }
9171
9172 #[test]
9173 fn rejects_trailing_dot() {
9174 let result = validate_fts_property_paths(&["$.foo.".to_owned()]);
9175 assert!(result.is_err(), "path '$.foo.' must be rejected");
9176 }
9177
9178 #[test]
9179 fn rejects_space_in_segment() {
9180 let result = validate_fts_property_paths(&["$.foo bar".to_owned()]);
9181 assert!(result.is_err(), "path '$.foo bar' must be rejected");
9182 }
9183
9184 #[test]
9185 fn rejects_bracket_syntax() {
9186 let result = validate_fts_property_paths(&["$.foo[0]".to_owned()]);
9187 assert!(result.is_err(), "path '$.foo[0]' must be rejected");
9188 }
9189
9190 #[test]
9191 fn rejects_duplicates() {
9192 let result = validate_fts_property_paths(&["$.name".to_owned(), "$.name".to_owned()]);
9193 assert!(result.is_err(), "duplicate paths must be rejected");
9194 }
9195
9196 #[test]
9197 fn rejects_empty_list() {
9198 let result = validate_fts_property_paths(&[]);
9199 assert!(result.is_err(), "empty path list must be rejected");
9200 }
9201 }
9202}