1use std::fmt::Write as _;
2use std::fs;
3use std::io;
4use std::path::{Path, PathBuf};
5use std::sync::Arc;
6use std::time::SystemTime;
7
8use fathomdb_schema::{SchemaError, SchemaManager};
9use rusqlite::{DatabaseName, OptionalExtension, TransactionBehavior};
10use serde::{Deserialize, Serialize};
11use sha2::{Digest, Sha256};
12
13use crate::{
14 EngineError, ProjectionRepairReport, ProjectionService,
15 embedder::{QueryEmbedder, QueryEmbedderIdentity},
16 ids::new_id,
17 operational::{
18 OperationalCollectionKind, OperationalCollectionRecord, OperationalCompactionReport,
19 OperationalCurrentRow, OperationalFilterClause, OperationalFilterField,
20 OperationalFilterFieldType, OperationalFilterMode, OperationalFilterValue,
21 OperationalHistoryValidationIssue, OperationalHistoryValidationReport,
22 OperationalMutationRow, OperationalPurgeReport, OperationalReadReport,
23 OperationalReadRequest, OperationalRegisterRequest, OperationalRepairReport,
24 OperationalRetentionActionKind, OperationalRetentionPlanItem,
25 OperationalRetentionPlanReport, OperationalRetentionRunItem, OperationalRetentionRunReport,
26 OperationalSecondaryIndexDefinition, OperationalSecondaryIndexRebuildReport,
27 OperationalTraceReport, extract_secondary_index_entries_for_current,
28 extract_secondary_index_entries_for_mutation, parse_operational_secondary_indexes_json,
29 parse_operational_validation_contract, validate_operational_payload_against_contract,
30 },
31 projection::ProjectionTarget,
32 sqlite,
33};
34
35#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
37pub struct IntegrityReport {
38 pub physical_ok: bool,
39 pub foreign_keys_ok: bool,
40 pub missing_fts_rows: usize,
41 pub missing_property_fts_rows: usize,
42 pub duplicate_active_logical_ids: usize,
43 pub operational_missing_collections: usize,
44 pub operational_missing_last_mutations: usize,
45 pub warnings: Vec<String>,
46}
47
48#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
50pub struct FtsPropertySchemaRecord {
51 pub kind: String,
53 pub property_paths: Vec<String>,
58 pub entries: Vec<FtsPropertyPathSpec>,
63 pub exclude_paths: Vec<String>,
66 pub separator: String,
68 pub format_version: i64,
70}
71
72#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize)]
74#[serde(rename_all = "snake_case")]
75pub enum FtsPropertyPathMode {
76 #[default]
79 Scalar,
80 Recursive,
83}
84
85#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
87pub struct FtsPropertyPathSpec {
88 pub path: String,
90 pub mode: FtsPropertyPathMode,
92}
93
94impl FtsPropertyPathSpec {
95 #[must_use]
96 pub fn scalar(path: impl Into<String>) -> Self {
97 Self {
98 path: path.into(),
99 mode: FtsPropertyPathMode::Scalar,
100 }
101 }
102
103 #[must_use]
104 pub fn recursive(path: impl Into<String>) -> Self {
105 Self {
106 path: path.into(),
107 mode: FtsPropertyPathMode::Recursive,
108 }
109 }
110}
111
112#[derive(Clone, Copy, Debug)]
114pub struct SafeExportOptions {
115 pub force_checkpoint: bool,
119}
120
121impl Default for SafeExportOptions {
122 fn default() -> Self {
123 Self {
124 force_checkpoint: true,
125 }
126 }
127}
128
129const EXPORT_PROTOCOL_VERSION: u32 = 1;
131
132#[derive(Clone, Debug, Serialize)]
134pub struct SafeExportManifest {
135 pub exported_at: u64,
137 pub sha256: String,
139 pub schema_version: u32,
141 pub protocol_version: u32,
143 pub page_count: u64,
145}
146
147#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
149pub struct TraceReport {
150 pub source_ref: String,
151 pub node_rows: usize,
152 pub edge_rows: usize,
153 pub action_rows: usize,
154 pub operational_mutation_rows: usize,
155 pub node_logical_ids: Vec<String>,
156 pub action_ids: Vec<String>,
157 pub operational_mutation_ids: Vec<String>,
158}
159
160#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
162pub struct SkippedEdge {
163 pub edge_logical_id: String,
164 pub missing_endpoint: String,
165}
166
167#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
169pub struct LogicalRestoreReport {
170 pub logical_id: String,
171 pub was_noop: bool,
172 pub restored_node_rows: usize,
173 pub restored_edge_rows: usize,
174 pub restored_chunk_rows: usize,
175 pub restored_fts_rows: usize,
176 pub restored_property_fts_rows: usize,
177 pub restored_vec_rows: usize,
178 pub skipped_edges: Vec<SkippedEdge>,
179 pub notes: Vec<String>,
180}
181
182#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
184pub struct LogicalPurgeReport {
185 pub logical_id: String,
186 pub was_noop: bool,
187 pub deleted_node_rows: usize,
188 pub deleted_edge_rows: usize,
189 pub deleted_chunk_rows: usize,
190 pub deleted_fts_rows: usize,
191 pub deleted_vec_rows: usize,
192 pub notes: Vec<String>,
193}
194
195#[derive(Clone, Debug, Serialize, Deserialize)]
197pub struct ProvenancePurgeOptions {
198 pub dry_run: bool,
199 #[serde(default)]
200 pub preserve_event_types: Vec<String>,
201}
202
203#[derive(Clone, Debug, Serialize)]
205pub struct ProvenancePurgeReport {
206 pub events_deleted: u64,
207 pub events_preserved: u64,
208 pub oldest_remaining: Option<i64>,
209}
210
211#[derive(Debug)]
213pub struct AdminService {
214 database_path: PathBuf,
215 schema_manager: Arc<SchemaManager>,
216 projections: ProjectionService,
217}
218
219#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
221pub struct SemanticReport {
222 pub orphaned_chunks: usize,
224 pub null_source_ref_nodes: usize,
226 pub broken_step_fk: usize,
228 pub broken_action_fk: usize,
230 pub stale_fts_rows: usize,
232 pub fts_rows_for_superseded_nodes: usize,
234 pub stale_property_fts_rows: usize,
236 pub orphaned_property_fts_rows: usize,
238 pub mismatched_kind_property_fts_rows: usize,
240 pub duplicate_property_fts_rows: usize,
242 pub drifted_property_fts_rows: usize,
244 pub dangling_edges: usize,
246 pub orphaned_supersession_chains: usize,
248 pub stale_vec_rows: usize,
250 pub vec_rows_for_superseded_nodes: usize,
252 pub missing_operational_current_rows: usize,
254 pub stale_operational_current_rows: usize,
256 pub disabled_collection_mutations: usize,
258 pub orphaned_last_access_metadata_rows: usize,
260 pub warnings: Vec<String>,
261}
262
263#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
273#[serde(rename_all = "snake_case", deny_unknown_fields)]
274pub struct VectorRegenerationConfig {
275 pub profile: String,
276 pub table_name: String,
277 pub chunking_policy: String,
278 pub preprocessing_policy: String,
279}
280
281#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
283pub struct VectorRegenerationReport {
284 pub profile: String,
285 pub table_name: String,
286 pub dimension: usize,
287 pub total_chunks: usize,
288 pub regenerated_rows: usize,
289 pub contract_persisted: bool,
290 pub notes: Vec<String>,
291}
292
293const CURRENT_VECTOR_CONTRACT_FORMAT_VERSION: i64 = 1;
294const MAX_PROFILE_LEN: usize = 128;
295const MAX_POLICY_LEN: usize = 128;
296const MAX_CONTRACT_JSON_BYTES: usize = 32 * 1024;
297const MAX_AUDIT_METADATA_BYTES: usize = 2048;
298const DEFAULT_OPERATIONAL_READ_LIMIT: usize = 100;
299const MAX_OPERATIONAL_READ_LIMIT: usize = 1000;
300
301#[derive(Clone, Debug)]
303pub struct AdminHandle {
304 inner: Arc<AdminService>,
305}
306
307impl AdminHandle {
308 #[must_use]
310 pub fn new(service: AdminService) -> Self {
311 Self {
312 inner: Arc::new(service),
313 }
314 }
315
316 #[must_use]
318 pub fn service(&self) -> Arc<AdminService> {
319 Arc::clone(&self.inner)
320 }
321}
322
323impl AdminService {
324 #[must_use]
326 pub fn new(path: impl AsRef<Path>, schema_manager: Arc<SchemaManager>) -> Self {
327 let database_path = path.as_ref().to_path_buf();
328 let projections = ProjectionService::new(&database_path, Arc::clone(&schema_manager));
329 Self {
330 database_path,
331 schema_manager,
332 projections,
333 }
334 }
335
336 fn connect(&self) -> Result<rusqlite::Connection, EngineError> {
337 #[cfg(feature = "sqlite-vec")]
338 let conn = sqlite::open_connection_with_vec(&self.database_path)?;
339 #[cfg(not(feature = "sqlite-vec"))]
340 let conn = sqlite::open_connection(&self.database_path)?;
341 self.schema_manager.bootstrap(&conn)?;
342 Ok(conn)
343 }
344
345 pub fn check_integrity(&self) -> Result<IntegrityReport, EngineError> {
348 let conn = self.connect()?;
349
350 let physical_result: String =
351 conn.query_row("PRAGMA integrity_check", [], |row| row.get(0))?;
352 let foreign_key_count: i64 =
353 conn.query_row("SELECT count(*) FROM pragma_foreign_key_check", [], |row| {
354 row.get(0)
355 })?;
356 let missing_fts_rows: i64 = conn.query_row(
357 r"
358 SELECT count(*)
359 FROM chunks c
360 JOIN nodes n
361 ON n.logical_id = c.node_logical_id
362 AND n.superseded_at IS NULL
363 WHERE NOT EXISTS (
364 SELECT 1
365 FROM fts_nodes f
366 WHERE f.chunk_id = c.id
367 )
368 ",
369 [],
370 |row| row.get(0),
371 )?;
372 let duplicate_active: i64 = conn.query_row(
373 r"
374 SELECT count(*)
375 FROM (
376 SELECT logical_id
377 FROM nodes
378 WHERE superseded_at IS NULL
379 GROUP BY logical_id
380 HAVING count(*) > 1
381 )
382 ",
383 [],
384 |row| row.get(0),
385 )?;
386 let operational_missing_collections: i64 = conn.query_row(
387 r"
388 SELECT (
389 SELECT count(*)
390 FROM operational_mutations m
391 LEFT JOIN operational_collections c ON c.name = m.collection_name
392 WHERE c.name IS NULL
393 ) + (
394 SELECT count(*)
395 FROM operational_current oc
396 LEFT JOIN operational_collections c ON c.name = oc.collection_name
397 WHERE c.name IS NULL
398 )
399 ",
400 [],
401 |row| row.get(0),
402 )?;
403 let operational_missing_last_mutations: i64 = conn.query_row(
404 r"
405 SELECT count(*)
406 FROM operational_current oc
407 LEFT JOIN operational_mutations m ON m.id = oc.last_mutation_id
408 WHERE m.id IS NULL
409 ",
410 [],
411 |row| row.get(0),
412 )?;
413
414 let missing_property_fts_rows = count_missing_property_fts_rows(&conn)?;
418
419 let mut warnings = Vec::new();
420 if missing_fts_rows > 0 {
421 warnings.push("missing FTS projections detected".to_owned());
422 }
423 if missing_property_fts_rows > 0 {
424 warnings.push("missing property FTS projections detected".to_owned());
425 }
426 if duplicate_active > 0 {
427 warnings.push("duplicate active logical_ids detected".to_owned());
428 }
429 if operational_missing_collections > 0 {
430 warnings.push("operational rows reference missing collections".to_owned());
431 }
432 if operational_missing_last_mutations > 0 {
433 warnings.push("operational current rows reference missing last mutations".to_owned());
434 }
435
436 Ok(IntegrityReport {
441 physical_ok: physical_result == "ok",
442 foreign_keys_ok: foreign_key_count == 0,
443 missing_fts_rows: i64_to_usize(missing_fts_rows),
444 missing_property_fts_rows: i64_to_usize(missing_property_fts_rows),
445 duplicate_active_logical_ids: i64_to_usize(duplicate_active),
446 operational_missing_collections: i64_to_usize(operational_missing_collections),
447 operational_missing_last_mutations: i64_to_usize(operational_missing_last_mutations),
448 warnings,
449 })
450 }
451
452 #[allow(clippy::too_many_lines)]
455 pub fn check_semantics(&self) -> Result<SemanticReport, EngineError> {
456 let conn = self.connect()?;
457
458 let orphaned_chunks: i64 = conn.query_row(
459 r"
460 SELECT count(*)
461 FROM chunks c
462 WHERE NOT EXISTS (
463 SELECT 1 FROM nodes n
464 WHERE n.logical_id = c.node_logical_id
465 )
466 ",
467 [],
468 |row| row.get(0),
469 )?;
470
471 let null_source_ref_nodes: i64 = conn.query_row(
472 "SELECT count(*) FROM nodes WHERE source_ref IS NULL AND superseded_at IS NULL",
473 [],
474 |row| row.get(0),
475 )?;
476
477 let broken_step_fk: i64 = conn.query_row(
478 r"
479 SELECT count(*) FROM steps s
480 WHERE NOT EXISTS (SELECT 1 FROM runs r WHERE r.id = s.run_id)
481 ",
482 [],
483 |row| row.get(0),
484 )?;
485
486 let broken_action_fk: i64 = conn.query_row(
487 r"
488 SELECT count(*) FROM actions a
489 WHERE NOT EXISTS (SELECT 1 FROM steps s WHERE s.id = a.step_id)
490 ",
491 [],
492 |row| row.get(0),
493 )?;
494
495 let stale_fts_rows: i64 = conn.query_row(
496 r"
497 SELECT count(*) FROM fts_nodes f
498 WHERE NOT EXISTS (SELECT 1 FROM chunks c WHERE c.id = f.chunk_id)
499 ",
500 [],
501 |row| row.get(0),
502 )?;
503
504 let fts_rows_for_superseded_nodes: i64 = conn.query_row(
505 r"
506 SELECT count(*) FROM fts_nodes f
507 WHERE NOT EXISTS (
508 SELECT 1 FROM nodes n
509 WHERE n.logical_id = f.node_logical_id AND n.superseded_at IS NULL
510 )
511 ",
512 [],
513 |row| row.get(0),
514 )?;
515
516 let stale_property_fts_rows: i64 = conn.query_row(
517 r"
518 SELECT count(*) FROM fts_node_properties fp
519 WHERE NOT EXISTS (
520 SELECT 1 FROM nodes n
521 WHERE n.logical_id = fp.node_logical_id AND n.superseded_at IS NULL
522 )
523 ",
524 [],
525 |row| row.get(0),
526 )?;
527
528 let orphaned_property_fts_rows: i64 = conn.query_row(
529 r"
530 SELECT count(*) FROM fts_node_properties fp
531 WHERE NOT EXISTS (
532 SELECT 1 FROM fts_property_schemas s WHERE s.kind = fp.kind
533 )
534 ",
535 [],
536 |row| row.get(0),
537 )?;
538
539 let mismatched_kind_property_fts_rows: i64 = conn.query_row(
540 r"
541 SELECT count(*) FROM fts_node_properties fp
542 JOIN nodes n ON n.logical_id = fp.node_logical_id AND n.superseded_at IS NULL
543 WHERE n.kind != fp.kind
544 ",
545 [],
546 |row| row.get(0),
547 )?;
548
549 let duplicate_property_fts_rows: i64 = conn.query_row(
550 r"
551 SELECT count(*) FROM (
552 SELECT node_logical_id FROM fts_node_properties
553 GROUP BY node_logical_id
554 HAVING count(*) > 1
555 )
556 ",
557 [],
558 |row| row.get(0),
559 )?;
560
561 let drifted_property_fts_rows = count_drifted_property_fts_rows(&conn)?;
562
563 let dangling_edges: i64 = conn.query_row(
564 r"
565 SELECT count(*) FROM edges e
566 WHERE e.superseded_at IS NULL AND (
567 NOT EXISTS (SELECT 1 FROM nodes n WHERE n.logical_id = e.source_logical_id AND n.superseded_at IS NULL)
568 OR
569 NOT EXISTS (SELECT 1 FROM nodes n WHERE n.logical_id = e.target_logical_id AND n.superseded_at IS NULL)
570 )
571 ",
572 [],
573 |row| row.get(0),
574 )?;
575
576 let orphaned_supersession_chains: i64 = conn.query_row(
577 r"
578 SELECT count(*) FROM (
579 SELECT logical_id FROM nodes
580 GROUP BY logical_id
581 HAVING count(*) > 0 AND sum(CASE WHEN superseded_at IS NULL THEN 1 ELSE 0 END) = 0
582 )
583 ",
584 [],
585 |row| row.get(0),
586 )?;
587
588 #[cfg(feature = "sqlite-vec")]
590 let stale_vec_rows: i64 = match conn.query_row(
591 r"
592 SELECT count(*) FROM vec_nodes_active v
593 WHERE NOT EXISTS (SELECT 1 FROM chunks c WHERE c.id = v.chunk_id)
594 ",
595 [],
596 |row| row.get(0),
597 ) {
598 Ok(n) => n,
599 Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
600 if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
601 {
602 0
603 }
604 Err(e) => return Err(EngineError::Sqlite(e)),
605 };
606 #[cfg(not(feature = "sqlite-vec"))]
607 let stale_vec_rows: i64 = 0;
608
609 #[cfg(feature = "sqlite-vec")]
610 let vec_rows_for_superseded_nodes: i64 = match conn.query_row(
611 r"
612 SELECT count(*) FROM vec_nodes_active v
613 JOIN chunks c ON c.id = v.chunk_id
614 WHERE NOT EXISTS (
615 SELECT 1 FROM nodes n
616 WHERE n.logical_id = c.node_logical_id
617 )
618 ",
619 [],
620 |row| row.get(0),
621 ) {
622 Ok(n) => n,
623 Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
624 if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
625 {
626 0
627 }
628 Err(e) => return Err(EngineError::Sqlite(e)),
629 };
630 #[cfg(not(feature = "sqlite-vec"))]
631 let vec_rows_for_superseded_nodes: i64 = 0;
632 let missing_operational_current_rows: i64 = conn.query_row(
633 r"
634 SELECT count(*)
635 FROM operational_mutations m
636 JOIN operational_collections c
637 ON c.name = m.collection_name
638 AND c.kind = 'latest_state'
639 WHERE m.op_kind = 'put'
640 AND NOT EXISTS (
641 SELECT 1
642 FROM operational_mutations newer
643 WHERE newer.collection_name = m.collection_name
644 AND newer.record_key = m.record_key
645 AND newer.mutation_order > m.mutation_order
646 )
647 AND NOT EXISTS (
648 SELECT 1
649 FROM operational_current oc
650 WHERE oc.collection_name = m.collection_name
651 AND oc.record_key = m.record_key
652 )
653 ",
654 [],
655 |row| row.get(0),
656 )?;
657 let stale_operational_current_rows: i64 = conn.query_row(
658 r"
659 SELECT count(*)
660 FROM operational_current oc
661 JOIN operational_collections c
662 ON c.name = oc.collection_name
663 AND c.kind = 'latest_state'
664 LEFT JOIN operational_mutations m ON m.id = oc.last_mutation_id
665 WHERE m.id IS NULL
666 OR m.collection_name != oc.collection_name
667 OR m.record_key != oc.record_key
668 OR m.op_kind != 'put'
669 OR m.payload_json != oc.payload_json
670 OR EXISTS (
671 SELECT 1
672 FROM operational_mutations newer
673 WHERE newer.collection_name = oc.collection_name
674 AND newer.record_key = oc.record_key
675 AND newer.mutation_order > m.mutation_order
676 )
677 ",
678 [],
679 |row| row.get(0),
680 )?;
681 let disabled_collection_mutations: i64 = conn.query_row(
682 r"
683 SELECT count(*)
684 FROM operational_mutations m
685 JOIN operational_collections c ON c.name = m.collection_name
686 WHERE c.disabled_at IS NOT NULL AND m.created_at > c.disabled_at
687 ",
688 [],
689 |row| row.get(0),
690 )?;
691 let orphaned_last_access_metadata_rows: i64 = conn.query_row(
692 r"
693 SELECT count(*)
694 FROM node_access_metadata am
695 WHERE NOT EXISTS (
696 SELECT 1 FROM nodes n WHERE n.logical_id = am.logical_id
697 )
698 ",
699 [],
700 |row| row.get(0),
701 )?;
702
703 let mut warnings = Vec::new();
704 if orphaned_chunks > 0 {
705 warnings.push(format!(
706 "{orphaned_chunks} orphaned chunk(s) with no surviving node history"
707 ));
708 }
709 if null_source_ref_nodes > 0 {
710 warnings.push(format!(
711 "{null_source_ref_nodes} active node(s) with null source_ref"
712 ));
713 }
714 if broken_step_fk > 0 {
715 warnings.push(format!(
716 "{broken_step_fk} step(s) referencing non-existent run"
717 ));
718 }
719 if broken_action_fk > 0 {
720 warnings.push(format!(
721 "{broken_action_fk} action(s) referencing non-existent step"
722 ));
723 }
724 if stale_fts_rows > 0 {
725 warnings.push(format!(
726 "{stale_fts_rows} stale FTS row(s) referencing missing chunk"
727 ));
728 }
729 if fts_rows_for_superseded_nodes > 0 {
730 warnings.push(format!(
731 "{fts_rows_for_superseded_nodes} FTS row(s) for superseded node(s)"
732 ));
733 }
734 if stale_property_fts_rows > 0 {
735 warnings.push(format!(
736 "{stale_property_fts_rows} stale property FTS row(s) for superseded/missing node(s)"
737 ));
738 }
739 if orphaned_property_fts_rows > 0 {
740 warnings.push(format!(
741 "{orphaned_property_fts_rows} orphaned property FTS row(s) for unregistered kind(s)"
742 ));
743 }
744 if mismatched_kind_property_fts_rows > 0 {
745 warnings.push(format!(
746 "{mismatched_kind_property_fts_rows} property FTS row(s) whose kind does not match the active node"
747 ));
748 }
749 if duplicate_property_fts_rows > 0 {
750 warnings.push(format!(
751 "{duplicate_property_fts_rows} active logical ID(s) with duplicate property FTS rows"
752 ));
753 }
754 if drifted_property_fts_rows > 0 {
755 warnings.push(format!(
756 "{drifted_property_fts_rows} property FTS row(s) with stale text_content"
757 ));
758 }
759 if dangling_edges > 0 {
760 warnings.push(format!(
761 "{dangling_edges} active edge(s) with missing endpoint node"
762 ));
763 }
764 if orphaned_supersession_chains > 0 {
765 warnings.push(format!(
766 "{orphaned_supersession_chains} logical_id(s) with all versions superseded"
767 ));
768 }
769 if stale_vec_rows > 0 {
770 warnings.push(format!(
771 "{stale_vec_rows} stale vec row(s) referencing missing chunk"
772 ));
773 }
774 if vec_rows_for_superseded_nodes > 0 {
775 warnings.push(format!(
776 "{vec_rows_for_superseded_nodes} vec row(s) whose node history is missing"
777 ));
778 }
779 if missing_operational_current_rows > 0 {
780 warnings.push(format!(
781 "{missing_operational_current_rows} latest-state key(s) missing operational_current rows"
782 ));
783 }
784 if stale_operational_current_rows > 0 {
785 warnings.push(format!(
786 "{stale_operational_current_rows} stale operational_current row(s)"
787 ));
788 }
789 if disabled_collection_mutations > 0 {
790 warnings.push(format!(
791 "{disabled_collection_mutations} mutation(s) were written after collection disable"
792 ));
793 }
794 if orphaned_last_access_metadata_rows > 0 {
795 warnings.push(format!(
796 "{orphaned_last_access_metadata_rows} last_access metadata row(s) reference missing node history"
797 ));
798 }
799
800 Ok(SemanticReport {
801 orphaned_chunks: i64_to_usize(orphaned_chunks),
802 null_source_ref_nodes: i64_to_usize(null_source_ref_nodes),
803 broken_step_fk: i64_to_usize(broken_step_fk),
804 broken_action_fk: i64_to_usize(broken_action_fk),
805 stale_fts_rows: i64_to_usize(stale_fts_rows),
806 fts_rows_for_superseded_nodes: i64_to_usize(fts_rows_for_superseded_nodes),
807 stale_property_fts_rows: i64_to_usize(stale_property_fts_rows),
808 orphaned_property_fts_rows: i64_to_usize(orphaned_property_fts_rows),
809 mismatched_kind_property_fts_rows: i64_to_usize(mismatched_kind_property_fts_rows),
810 duplicate_property_fts_rows: i64_to_usize(duplicate_property_fts_rows),
811 drifted_property_fts_rows: i64_to_usize(drifted_property_fts_rows),
812 dangling_edges: i64_to_usize(dangling_edges),
813 orphaned_supersession_chains: i64_to_usize(orphaned_supersession_chains),
814 stale_vec_rows: i64_to_usize(stale_vec_rows),
815 vec_rows_for_superseded_nodes: i64_to_usize(vec_rows_for_superseded_nodes),
816 missing_operational_current_rows: i64_to_usize(missing_operational_current_rows),
817 stale_operational_current_rows: i64_to_usize(stale_operational_current_rows),
818 disabled_collection_mutations: i64_to_usize(disabled_collection_mutations),
819 orphaned_last_access_metadata_rows: i64_to_usize(orphaned_last_access_metadata_rows),
820 warnings,
821 })
822 }
823
824 pub fn register_operational_collection(
827 &self,
828 request: &OperationalRegisterRequest,
829 ) -> Result<OperationalCollectionRecord, EngineError> {
830 if request.name.trim().is_empty() {
831 return Err(EngineError::InvalidWrite(
832 "operational collection name must not be empty".to_owned(),
833 ));
834 }
835 if request.schema_json.is_empty() {
836 return Err(EngineError::InvalidWrite(
837 "operational collection schema_json must not be empty".to_owned(),
838 ));
839 }
840 if request.retention_json.is_empty() {
841 return Err(EngineError::InvalidWrite(
842 "operational collection retention_json must not be empty".to_owned(),
843 ));
844 }
845 if request.filter_fields_json.is_empty() {
846 return Err(EngineError::InvalidWrite(
847 "operational collection filter_fields_json must not be empty".to_owned(),
848 ));
849 }
850 parse_operational_validation_contract(&request.validation_json)
851 .map_err(EngineError::InvalidWrite)?;
852 parse_operational_secondary_indexes_json(&request.secondary_indexes_json, request.kind)
853 .map_err(EngineError::InvalidWrite)?;
854 if request.format_version <= 0 {
855 return Err(EngineError::InvalidWrite(
856 "operational collection format_version must be positive".to_owned(),
857 ));
858 }
859 parse_operational_filter_fields(&request.filter_fields_json)
860 .map_err(EngineError::InvalidWrite)?;
861
862 let mut conn = self.connect()?;
863 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
864 tx.execute(
865 "INSERT INTO operational_collections \
866 (name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at) \
867 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, unixepoch())",
868 rusqlite::params![
869 request.name.as_str(),
870 request.kind.as_str(),
871 request.schema_json.as_str(),
872 request.retention_json.as_str(),
873 request.filter_fields_json.as_str(),
874 request.validation_json.as_str(),
875 request.secondary_indexes_json.as_str(),
876 request.format_version,
877 ],
878 )?;
879 persist_simple_provenance_event(
880 &tx,
881 "operational_collection_registered",
882 request.name.as_str(),
883 Some(serde_json::json!({
884 "kind": request.kind.as_str(),
885 "format_version": request.format_version,
886 })),
887 )?;
888 tx.commit()?;
889
890 self.describe_operational_collection(&request.name)?
891 .ok_or_else(|| {
892 EngineError::Bridge("registered collection missing after commit".to_owned())
893 })
894 }
895
896 pub fn describe_operational_collection(
899 &self,
900 name: &str,
901 ) -> Result<Option<OperationalCollectionRecord>, EngineError> {
902 let conn = self.connect()?;
903 load_operational_collection_record(&conn, name)
904 }
905
906 pub fn update_operational_collection_filters(
910 &self,
911 name: &str,
912 filter_fields_json: &str,
913 ) -> Result<OperationalCollectionRecord, EngineError> {
914 if filter_fields_json.is_empty() {
915 return Err(EngineError::InvalidWrite(
916 "operational collection filter_fields_json must not be empty".to_owned(),
917 ));
918 }
919 let declared_fields = parse_operational_filter_fields(filter_fields_json)
920 .map_err(EngineError::InvalidWrite)?;
921
922 let mut conn = self.connect()?;
923 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
924 load_operational_collection_record(&tx, name)?.ok_or_else(|| {
925 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
926 })?;
927 tx.execute(
928 "UPDATE operational_collections SET filter_fields_json = ?2 WHERE name = ?1",
929 rusqlite::params![name, filter_fields_json],
930 )?;
931 tx.execute(
932 "DELETE FROM operational_filter_values WHERE collection_name = ?1",
933 [name],
934 )?;
935
936 let mut mutation_stmt = tx.prepare(
937 "SELECT id, payload_json FROM operational_mutations \
938 WHERE collection_name = ?1 ORDER BY mutation_order",
939 )?;
940 let mutations = mutation_stmt
941 .query_map([name], |row| {
942 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
943 })?
944 .collect::<Result<Vec<_>, _>>()?;
945 drop(mutation_stmt);
946
947 let mut insert_filter_value = tx.prepare_cached(
948 "INSERT INTO operational_filter_values \
949 (mutation_id, collection_name, field_name, string_value, integer_value) \
950 VALUES (?1, ?2, ?3, ?4, ?5)",
951 )?;
952 let mut inserted_values = 0usize;
953 for (mutation_id, payload_json) in &mutations {
954 for filter_value in
955 extract_operational_filter_values(&declared_fields, payload_json.as_str())
956 {
957 insert_filter_value.execute(rusqlite::params![
958 mutation_id,
959 name,
960 filter_value.field_name,
961 filter_value.string_value,
962 filter_value.integer_value,
963 ])?;
964 inserted_values += 1;
965 }
966 }
967 drop(insert_filter_value);
968
969 persist_simple_provenance_event(
970 &tx,
971 "operational_collection_filter_fields_updated",
972 name,
973 Some(serde_json::json!({
974 "field_count": declared_fields.len(),
975 "mutations_backfilled": mutations.len(),
976 "inserted_filter_values": inserted_values,
977 })),
978 )?;
979 let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
980 EngineError::Bridge("operational collection missing after filter update".to_owned())
981 })?;
982 tx.commit()?;
983 Ok(updated)
984 }
985
986 pub fn update_operational_collection_validation(
989 &self,
990 name: &str,
991 validation_json: &str,
992 ) -> Result<OperationalCollectionRecord, EngineError> {
993 parse_operational_validation_contract(validation_json)
994 .map_err(EngineError::InvalidWrite)?;
995
996 let mut conn = self.connect()?;
997 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
998 load_operational_collection_record(&tx, name)?.ok_or_else(|| {
999 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1000 })?;
1001 tx.execute(
1002 "UPDATE operational_collections SET validation_json = ?2 WHERE name = ?1",
1003 rusqlite::params![name, validation_json],
1004 )?;
1005 persist_simple_provenance_event(
1006 &tx,
1007 "operational_collection_validation_updated",
1008 name,
1009 Some(serde_json::json!({
1010 "has_validation": !validation_json.is_empty(),
1011 })),
1012 )?;
1013 let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1014 EngineError::Bridge("operational collection missing after validation update".to_owned())
1015 })?;
1016 tx.commit()?;
1017 Ok(updated)
1018 }
1019
1020 pub fn update_operational_collection_secondary_indexes(
1024 &self,
1025 name: &str,
1026 secondary_indexes_json: &str,
1027 ) -> Result<OperationalCollectionRecord, EngineError> {
1028 let mut conn = self.connect()?;
1029 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1030 let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1031 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1032 })?;
1033 let indexes = parse_operational_secondary_indexes_json(secondary_indexes_json, record.kind)
1034 .map_err(EngineError::InvalidWrite)?;
1035 tx.execute(
1036 "UPDATE operational_collections SET secondary_indexes_json = ?2 WHERE name = ?1",
1037 rusqlite::params![name, secondary_indexes_json],
1038 )?;
1039 let (mutation_entries_rebuilt, current_entries_rebuilt) =
1040 rebuild_operational_secondary_index_entries(&tx, &record.name, record.kind, &indexes)?;
1041 persist_simple_provenance_event(
1042 &tx,
1043 "operational_collection_secondary_indexes_updated",
1044 name,
1045 Some(serde_json::json!({
1046 "index_count": indexes.len(),
1047 "mutation_entries_rebuilt": mutation_entries_rebuilt,
1048 "current_entries_rebuilt": current_entries_rebuilt,
1049 })),
1050 )?;
1051 let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1052 EngineError::Bridge(
1053 "operational collection missing after secondary index update".to_owned(),
1054 )
1055 })?;
1056 tx.commit()?;
1057 Ok(updated)
1058 }
1059
1060 pub fn rebuild_operational_secondary_indexes(
1063 &self,
1064 name: &str,
1065 ) -> Result<OperationalSecondaryIndexRebuildReport, EngineError> {
1066 let mut conn = self.connect()?;
1067 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1068 let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1069 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1070 })?;
1071 let indexes =
1072 parse_operational_secondary_indexes_json(&record.secondary_indexes_json, record.kind)
1073 .map_err(EngineError::InvalidWrite)?;
1074 let (mutation_entries_rebuilt, current_entries_rebuilt) =
1075 rebuild_operational_secondary_index_entries(&tx, &record.name, record.kind, &indexes)?;
1076 persist_simple_provenance_event(
1077 &tx,
1078 "operational_secondary_indexes_rebuilt",
1079 name,
1080 Some(serde_json::json!({
1081 "index_count": indexes.len(),
1082 "mutation_entries_rebuilt": mutation_entries_rebuilt,
1083 "current_entries_rebuilt": current_entries_rebuilt,
1084 })),
1085 )?;
1086 tx.commit()?;
1087 Ok(OperationalSecondaryIndexRebuildReport {
1088 collection_name: name.to_owned(),
1089 mutation_entries_rebuilt,
1090 current_entries_rebuilt,
1091 })
1092 }
1093
1094 pub fn validate_operational_collection_history(
1097 &self,
1098 name: &str,
1099 ) -> Result<OperationalHistoryValidationReport, EngineError> {
1100 let conn = self.connect()?;
1101 let record = load_operational_collection_record(&conn, name)?.ok_or_else(|| {
1102 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1103 })?;
1104 let Some(contract) = parse_operational_validation_contract(&record.validation_json)
1105 .map_err(EngineError::InvalidWrite)?
1106 else {
1107 return Err(EngineError::InvalidWrite(format!(
1108 "operational collection '{name}' has no validation_json configured"
1109 )));
1110 };
1111
1112 let mut stmt = conn.prepare(
1113 "SELECT id, record_key, op_kind, payload_json FROM operational_mutations \
1114 WHERE collection_name = ?1 ORDER BY mutation_order",
1115 )?;
1116 let rows = stmt
1117 .query_map([name], |row| {
1118 Ok((
1119 row.get::<_, String>(0)?,
1120 row.get::<_, String>(1)?,
1121 row.get::<_, String>(2)?,
1122 row.get::<_, String>(3)?,
1123 ))
1124 })?
1125 .collect::<Result<Vec<_>, _>>()?;
1126 drop(stmt);
1127
1128 let mut checked_rows = 0usize;
1129 let mut issues = Vec::new();
1130 for (mutation_id, record_key, op_kind, payload_json) in rows {
1131 if op_kind == "delete" {
1132 continue;
1133 }
1134 checked_rows += 1;
1135 if let Err(message) =
1136 validate_operational_payload_against_contract(&contract, payload_json.as_str())
1137 {
1138 issues.push(OperationalHistoryValidationIssue {
1139 mutation_id,
1140 record_key,
1141 op_kind,
1142 message,
1143 });
1144 }
1145 }
1146
1147 Ok(OperationalHistoryValidationReport {
1148 collection_name: name.to_owned(),
1149 checked_rows,
1150 invalid_row_count: issues.len(),
1151 issues,
1152 })
1153 }
1154
1155 pub fn disable_operational_collection(
1158 &self,
1159 name: &str,
1160 ) -> Result<OperationalCollectionRecord, EngineError> {
1161 let mut conn = self.connect()?;
1162 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1163 let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1164 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1165 })?;
1166 let changed = if record.disabled_at.is_none() {
1167 tx.execute(
1168 "UPDATE operational_collections SET disabled_at = unixepoch() WHERE name = ?1",
1169 [name],
1170 )?;
1171 true
1172 } else {
1173 false
1174 };
1175 let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1176 EngineError::Bridge("operational collection missing after disable".to_owned())
1177 })?;
1178 persist_simple_provenance_event(
1179 &tx,
1180 "operational_collection_disabled",
1181 name,
1182 Some(serde_json::json!({
1183 "disabled_at": record.disabled_at,
1184 "changed": changed,
1185 })),
1186 )?;
1187 tx.commit()?;
1188 Ok(record)
1189 }
1190
1191 pub fn compact_operational_collection(
1194 &self,
1195 name: &str,
1196 dry_run: bool,
1197 ) -> Result<OperationalCompactionReport, EngineError> {
1198 let mut conn = self.connect()?;
1199 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1200 let collection = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1201 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1202 })?;
1203 validate_append_only_operational_collection(&collection, "compact")?;
1204 let (mutation_ids, before_timestamp) =
1205 operational_compaction_candidates(&tx, &collection.retention_json, name)?;
1206 if dry_run {
1207 drop(tx);
1208 return Ok(OperationalCompactionReport {
1209 collection_name: name.to_owned(),
1210 deleted_mutations: mutation_ids.len(),
1211 dry_run: true,
1212 before_timestamp,
1213 });
1214 }
1215 let mut delete_stmt =
1216 tx.prepare_cached("DELETE FROM operational_mutations WHERE id = ?1")?;
1217 for mutation_id in &mutation_ids {
1218 delete_stmt.execute([mutation_id.as_str()])?;
1219 }
1220 drop(delete_stmt);
1221 persist_simple_provenance_event(
1222 &tx,
1223 "operational_collection_compacted",
1224 name,
1225 Some(serde_json::json!({
1226 "deleted_mutations": mutation_ids.len(),
1227 "before_timestamp": before_timestamp,
1228 })),
1229 )?;
1230 tx.commit()?;
1231 Ok(OperationalCompactionReport {
1232 collection_name: name.to_owned(),
1233 deleted_mutations: mutation_ids.len(),
1234 dry_run: false,
1235 before_timestamp,
1236 })
1237 }
1238
1239 pub fn purge_operational_collection(
1242 &self,
1243 name: &str,
1244 before_timestamp: i64,
1245 ) -> Result<OperationalPurgeReport, EngineError> {
1246 let mut conn = self.connect()?;
1247 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1248 let collection = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1249 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1250 })?;
1251 validate_append_only_operational_collection(&collection, "purge")?;
1252 let deleted_mutations = tx.execute(
1253 "DELETE FROM operational_mutations WHERE collection_name = ?1 AND created_at < ?2",
1254 rusqlite::params![name, before_timestamp],
1255 )?;
1256 persist_simple_provenance_event(
1257 &tx,
1258 "operational_collection_purged",
1259 name,
1260 Some(serde_json::json!({
1261 "deleted_mutations": deleted_mutations,
1262 "before_timestamp": before_timestamp,
1263 })),
1264 )?;
1265 tx.commit()?;
1266 Ok(OperationalPurgeReport {
1267 collection_name: name.to_owned(),
1268 deleted_mutations,
1269 before_timestamp,
1270 })
1271 }
1272
1273 pub fn plan_operational_retention(
1276 &self,
1277 now_timestamp: i64,
1278 collection_names: Option<&[String]>,
1279 max_collections: Option<usize>,
1280 ) -> Result<OperationalRetentionPlanReport, EngineError> {
1281 let conn = self.connect()?;
1282 let records = load_operational_retention_records(&conn, collection_names, max_collections)?;
1283 let mut items = Vec::with_capacity(records.len());
1284 for record in records {
1285 items.push(plan_operational_retention_item(
1286 &conn,
1287 &record,
1288 now_timestamp,
1289 )?);
1290 }
1291 Ok(OperationalRetentionPlanReport {
1292 planned_at: now_timestamp,
1293 collections_examined: items.len(),
1294 items,
1295 })
1296 }
1297
1298 pub fn run_operational_retention(
1301 &self,
1302 now_timestamp: i64,
1303 collection_names: Option<&[String]>,
1304 max_collections: Option<usize>,
1305 dry_run: bool,
1306 ) -> Result<OperationalRetentionRunReport, EngineError> {
1307 let mut conn = self.connect()?;
1308 let records = load_operational_retention_records(&conn, collection_names, max_collections)?;
1309 let mut items = Vec::with_capacity(records.len());
1310 let mut collections_acted_on = 0usize;
1311
1312 for record in records {
1313 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1314 let item = run_operational_retention_item(&tx, &record, now_timestamp, dry_run)?;
1315 if item.deleted_mutations > 0 {
1316 collections_acted_on += 1;
1317 }
1318 if dry_run || item.action_kind == OperationalRetentionActionKind::Noop {
1319 drop(tx);
1320 } else {
1321 tx.commit()?;
1322 }
1323 items.push(item);
1324 }
1325
1326 Ok(OperationalRetentionRunReport {
1327 executed_at: now_timestamp,
1328 collections_examined: items.len(),
1329 collections_acted_on,
1330 dry_run,
1331 items,
1332 })
1333 }
1334
1335 pub fn trace_operational_collection(
1338 &self,
1339 collection_name: &str,
1340 record_key: Option<&str>,
1341 ) -> Result<OperationalTraceReport, EngineError> {
1342 let conn = self.connect()?;
1343 ensure_operational_collection_registered(&conn, collection_name)?;
1344 let mutations = if let Some(record_key) = record_key {
1345 let mut stmt = conn.prepare(
1346 "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
1347 FROM operational_mutations \
1348 WHERE collection_name = ?1 AND record_key = ?2 \
1349 ORDER BY mutation_order",
1350 )?;
1351 stmt.query_map([collection_name, record_key], map_operational_mutation_row)?
1352 .collect::<Result<Vec<_>, _>>()?
1353 } else {
1354 let mut stmt = conn.prepare(
1355 "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
1356 FROM operational_mutations \
1357 WHERE collection_name = ?1 \
1358 ORDER BY mutation_order",
1359 )?;
1360 stmt.query_map([collection_name], map_operational_mutation_row)?
1361 .collect::<Result<Vec<_>, _>>()?
1362 };
1363 let current_rows = if let Some(record_key) = record_key {
1364 let mut stmt = conn.prepare(
1365 "SELECT collection_name, record_key, payload_json, updated_at, last_mutation_id \
1366 FROM operational_current \
1367 WHERE collection_name = ?1 AND record_key = ?2 \
1368 ORDER BY updated_at, record_key",
1369 )?;
1370 stmt.query_map([collection_name, record_key], map_operational_current_row)?
1371 .collect::<Result<Vec<_>, _>>()?
1372 } else {
1373 let mut stmt = conn.prepare(
1374 "SELECT collection_name, record_key, payload_json, updated_at, last_mutation_id \
1375 FROM operational_current \
1376 WHERE collection_name = ?1 \
1377 ORDER BY updated_at, record_key",
1378 )?;
1379 stmt.query_map([collection_name], map_operational_current_row)?
1380 .collect::<Result<Vec<_>, _>>()?
1381 };
1382
1383 Ok(OperationalTraceReport {
1384 collection_name: collection_name.to_owned(),
1385 record_key: record_key.map(str::to_owned),
1386 mutation_count: mutations.len(),
1387 current_count: current_rows.len(),
1388 mutations,
1389 current_rows,
1390 })
1391 }
1392
1393 pub fn read_operational_collection(
1396 &self,
1397 request: &OperationalReadRequest,
1398 ) -> Result<OperationalReadReport, EngineError> {
1399 if request.collection_name.trim().is_empty() {
1400 return Err(EngineError::InvalidWrite(
1401 "operational read collection_name must not be empty".to_owned(),
1402 ));
1403 }
1404 if request.filters.is_empty() {
1405 return Err(EngineError::InvalidWrite(
1406 "operational read requires at least one filter clause".to_owned(),
1407 ));
1408 }
1409
1410 let conn = self.connect()?;
1411 let record = load_operational_collection_record(&conn, &request.collection_name)?
1412 .ok_or_else(|| {
1413 EngineError::InvalidWrite(format!(
1414 "operational collection '{}' is not registered",
1415 request.collection_name
1416 ))
1417 })?;
1418 validate_append_only_operational_collection(&record, "read")?;
1419 let declared_fields = parse_operational_filter_fields(&record.filter_fields_json)
1420 .map_err(EngineError::InvalidWrite)?;
1421 let secondary_indexes =
1422 parse_operational_secondary_indexes_json(&record.secondary_indexes_json, record.kind)
1423 .map_err(EngineError::InvalidWrite)?;
1424 let applied_limit = operational_read_limit(request.limit)?;
1425 let filters = compile_operational_read_filters(&request.filters, &declared_fields)?;
1426 if let Some(report) = execute_operational_secondary_index_read(
1427 &conn,
1428 &request.collection_name,
1429 &filters,
1430 &secondary_indexes,
1431 applied_limit,
1432 )? {
1433 return Ok(report);
1434 }
1435 execute_operational_filtered_read(&conn, &request.collection_name, &filters, applied_limit)
1436 }
1437
1438 pub fn rebuild_operational_current(
1441 &self,
1442 collection_name: Option<&str>,
1443 ) -> Result<OperationalRepairReport, EngineError> {
1444 let mut conn = self.connect()?;
1445 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1446 let collections = if let Some(name) = collection_name {
1447 let maybe_kind: Option<String> = tx
1448 .query_row(
1449 "SELECT kind FROM operational_collections WHERE name = ?1",
1450 [name],
1451 |row| row.get(0),
1452 )
1453 .optional()?;
1454 let Some(kind) = maybe_kind else {
1455 return Err(EngineError::InvalidWrite(format!(
1456 "operational collection '{name}' is not registered"
1457 )));
1458 };
1459 if kind != OperationalCollectionKind::LatestState.as_str() {
1460 return Err(EngineError::InvalidWrite(format!(
1461 "operational collection '{name}' is not latest_state"
1462 )));
1463 }
1464 vec![name.to_owned()]
1465 } else {
1466 let mut stmt = tx.prepare(
1467 "SELECT name FROM operational_collections WHERE kind = 'latest_state' ORDER BY name",
1468 )?;
1469 stmt.query_map([], |row| row.get::<_, String>(0))?
1470 .collect::<Result<Vec<_>, _>>()?
1471 };
1472
1473 let rebuilt_rows = rebuild_operational_current_rows(&tx, &collections)?;
1474 for collection in &collections {
1475 let record = load_operational_collection_record(&tx, collection)?.ok_or_else(|| {
1476 EngineError::Bridge(format!(
1477 "operational collection '{collection}' missing during current rebuild"
1478 ))
1479 })?;
1480 let indexes = parse_operational_secondary_indexes_json(
1481 &record.secondary_indexes_json,
1482 record.kind,
1483 )
1484 .map_err(EngineError::InvalidWrite)?;
1485 if !indexes.is_empty() {
1486 rebuild_operational_secondary_index_entries(
1487 &tx,
1488 &record.name,
1489 record.kind,
1490 &indexes,
1491 )?;
1492 }
1493 }
1494
1495 persist_simple_provenance_event(
1496 &tx,
1497 "operational_current_rebuilt",
1498 collection_name.unwrap_or("*"),
1499 Some(serde_json::json!({
1500 "collections_rebuilt": collections.len(),
1501 "current_rows_rebuilt": rebuilt_rows,
1502 })),
1503 )?;
1504 tx.commit()?;
1505
1506 Ok(OperationalRepairReport {
1507 collections_rebuilt: collections.len(),
1508 current_rows_rebuilt: rebuilt_rows,
1509 })
1510 }
1511
1512 pub fn rebuild_projections(
1515 &self,
1516 target: ProjectionTarget,
1517 ) -> Result<ProjectionRepairReport, EngineError> {
1518 self.projections.rebuild_projections(target)
1519 }
1520
1521 pub fn rebuild_missing_projections(&self) -> Result<ProjectionRepairReport, EngineError> {
1524 self.projections.rebuild_missing_projections()
1525 }
1526
1527 pub fn register_fts_property_schema(
1536 &self,
1537 kind: &str,
1538 property_paths: &[String],
1539 separator: Option<&str>,
1540 ) -> Result<FtsPropertySchemaRecord, EngineError> {
1541 let specs: Vec<FtsPropertyPathSpec> = property_paths
1542 .iter()
1543 .map(|p| FtsPropertyPathSpec::scalar(p.clone()))
1544 .collect();
1545 self.register_fts_property_schema_with_entries(kind, &specs, separator, &[])
1546 }
1547
1548 pub fn register_fts_property_schema_with_entries(
1559 &self,
1560 kind: &str,
1561 entries: &[FtsPropertyPathSpec],
1562 separator: Option<&str>,
1563 exclude_paths: &[String],
1564 ) -> Result<FtsPropertySchemaRecord, EngineError> {
1565 let paths: Vec<String> = entries.iter().map(|e| e.path.clone()).collect();
1566 validate_fts_property_paths(&paths)?;
1567 for p in exclude_paths {
1568 if !p.starts_with("$.") {
1569 return Err(EngineError::InvalidWrite(format!(
1570 "exclude_paths entries must start with '$.' but got: {p}"
1571 )));
1572 }
1573 }
1574 let separator = separator.unwrap_or(" ");
1575 let paths_json = serialize_property_paths_json(entries, exclude_paths)?;
1576
1577 let mut conn = self.connect()?;
1578 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1579
1580 let previous_row: Option<(String, String)> = tx
1586 .query_row(
1587 "SELECT property_paths_json, separator FROM fts_property_schemas WHERE kind = ?1",
1588 [kind],
1589 |row| {
1590 let json: String = row.get(0)?;
1591 let sep: String = row.get(1)?;
1592 Ok((json, sep))
1593 },
1594 )
1595 .optional()?;
1596 let had_previous_schema = previous_row.is_some();
1597 let previous_recursive_paths: Vec<String> = previous_row
1598 .map(|(json, sep)| crate::writer::parse_property_schema_json(&json, &sep))
1599 .map_or(Vec::new(), |schema| {
1600 schema
1601 .paths
1602 .into_iter()
1603 .filter(|p| p.mode == crate::writer::PropertyPathMode::Recursive)
1604 .map(|p| p.path)
1605 .collect()
1606 });
1607 let new_recursive_paths: Vec<&str> = entries
1608 .iter()
1609 .filter(|e| e.mode == FtsPropertyPathMode::Recursive)
1610 .map(|e| e.path.as_str())
1611 .collect();
1612 let introduces_new_recursive = new_recursive_paths
1613 .iter()
1614 .any(|p| !previous_recursive_paths.iter().any(|prev| prev == p));
1615
1616 tx.execute(
1617 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
1618 VALUES (?1, ?2, ?3) \
1619 ON CONFLICT(kind) DO UPDATE SET property_paths_json = ?2, separator = ?3",
1620 rusqlite::params![kind, paths_json, separator],
1621 )?;
1622
1623 let needs_rebuild = introduces_new_recursive || had_previous_schema;
1631 if needs_rebuild {
1632 tx.execute("DELETE FROM fts_node_properties WHERE kind = ?1", [kind])?;
1633 tx.execute(
1634 "DELETE FROM fts_node_property_positions WHERE kind = ?1",
1635 [kind],
1636 )?;
1637 crate::projection::insert_property_fts_rows_for_kind(&tx, kind)?;
1642 }
1643
1644 persist_simple_provenance_event(
1645 &tx,
1646 "fts_property_schema_registered",
1647 kind,
1648 Some(serde_json::json!({
1649 "property_paths": paths,
1650 "separator": separator,
1651 "exclude_paths": exclude_paths,
1652 "eager_rebuild": needs_rebuild,
1653 })),
1654 )?;
1655 tx.commit()?;
1656
1657 self.describe_fts_property_schema(kind)?.ok_or_else(|| {
1658 EngineError::Bridge("registered FTS property schema missing after commit".to_owned())
1659 })
1660 }
1661
1662 pub fn describe_fts_property_schema(
1667 &self,
1668 kind: &str,
1669 ) -> Result<Option<FtsPropertySchemaRecord>, EngineError> {
1670 let conn = self.connect()?;
1671 load_fts_property_schema_record(&conn, kind)
1672 }
1673
1674 pub fn list_fts_property_schemas(&self) -> Result<Vec<FtsPropertySchemaRecord>, EngineError> {
1679 let conn = self.connect()?;
1680 let mut stmt = conn.prepare(
1681 "SELECT kind, property_paths_json, separator, format_version \
1682 FROM fts_property_schemas ORDER BY kind",
1683 )?;
1684 let records = stmt
1685 .query_map([], |row| {
1686 let kind: String = row.get(0)?;
1687 let paths_json: String = row.get(1)?;
1688 let separator: String = row.get(2)?;
1689 let format_version: i64 = row.get(3)?;
1690 Ok(build_fts_property_schema_record(
1691 kind,
1692 &paths_json,
1693 separator,
1694 format_version,
1695 ))
1696 })?
1697 .collect::<Result<Vec<_>, _>>()?;
1698 Ok(records)
1699 }
1700
1701 pub fn remove_fts_property_schema(&self, kind: &str) -> Result<(), EngineError> {
1709 let mut conn = self.connect()?;
1710 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1711 let deleted = tx.execute("DELETE FROM fts_property_schemas WHERE kind = ?1", [kind])?;
1712 if deleted == 0 {
1713 return Err(EngineError::InvalidWrite(format!(
1714 "FTS property schema for kind '{kind}' is not registered"
1715 )));
1716 }
1717 persist_simple_provenance_event(&tx, "fts_property_schema_removed", kind, None)?;
1718 tx.commit()?;
1719 Ok(())
1720 }
1721
1722 pub fn restore_vector_profiles(&self) -> Result<ProjectionRepairReport, EngineError> {
1728 let conn = self.connect()?;
1729 let profiles: Vec<(String, String, i64)> = {
1730 let mut stmt = conn.prepare(
1731 "SELECT profile, table_name, dimension \
1732 FROM vector_profiles WHERE enabled = 1 ORDER BY profile",
1733 )?;
1734 stmt.query_map([], |row| {
1735 Ok((
1736 row.get::<_, String>(0)?,
1737 row.get::<_, String>(1)?,
1738 row.get::<_, i64>(2)?,
1739 ))
1740 })?
1741 .collect::<Result<Vec<_>, _>>()?
1742 };
1743
1744 for (profile, table_name, dimension) in &profiles {
1745 let dimension = usize::try_from(*dimension).map_err(|_| {
1746 EngineError::Bridge(format!("invalid vector profile dimension: {dimension}"))
1747 })?;
1748 self.schema_manager
1749 .ensure_vector_profile(&conn, profile, table_name, dimension)?;
1750 }
1751
1752 Ok(ProjectionRepairReport {
1753 targets: vec![ProjectionTarget::Vec],
1754 rebuilt_rows: profiles.len(),
1755 notes: vec![],
1756 })
1757 }
1758
1759 #[allow(clippy::too_many_lines)]
1775 pub fn regenerate_vector_embeddings(
1776 &self,
1777 embedder: &dyn QueryEmbedder,
1778 config: &VectorRegenerationConfig,
1779 ) -> Result<VectorRegenerationReport, EngineError> {
1780 let conn = self.connect()?;
1781 let identity = embedder.identity();
1782 let config = validate_vector_regeneration_config(&conn, config, &identity)
1783 .map_err(|failure| failure.to_engine_error())?;
1784 let chunks = collect_regeneration_chunks(&conn)?;
1785 let payload = build_regeneration_input(&config, &identity, chunks.clone());
1786 let snapshot_hash = compute_snapshot_hash(&payload)?;
1787 let audit_metadata = VectorRegenerationAuditMetadata {
1788 profile: config.profile.clone(),
1789 model_identity: identity.model_identity.clone(),
1790 model_version: identity.model_version.clone(),
1791 chunk_count: chunks.len(),
1792 snapshot_hash: snapshot_hash.clone(),
1793 failure_class: None,
1794 };
1795 persist_vector_regeneration_event(
1796 &conn,
1797 "vector_regeneration_requested",
1798 &config.profile,
1799 &audit_metadata,
1800 )?;
1801 let notes = vec!["vector embeddings regenerated via configured embedder".to_owned()];
1802
1803 let mut embedding_map: std::collections::HashMap<String, Vec<u8>> =
1804 std::collections::HashMap::with_capacity(chunks.len());
1805 for chunk in &chunks {
1806 let vector = match embedder.embed_query(&chunk.text_content) {
1807 Ok(vector) => vector,
1808 Err(error) => {
1809 let failure = VectorRegenerationFailure::new(
1810 VectorRegenerationFailureClass::EmbedderFailure,
1811 format!("embedder failed for chunk '{}': {error}", chunk.chunk_id),
1812 );
1813 self.persist_vector_regeneration_failure_best_effort(
1814 &config.profile,
1815 &audit_metadata,
1816 &failure,
1817 );
1818 return Err(failure.to_engine_error());
1819 }
1820 };
1821 if vector.len() != identity.dimension {
1822 let failure = VectorRegenerationFailure::new(
1823 VectorRegenerationFailureClass::InvalidEmbedderOutput,
1824 format!(
1825 "embedder produced {} values for chunk '{}', expected {}",
1826 vector.len(),
1827 chunk.chunk_id,
1828 identity.dimension
1829 ),
1830 );
1831 self.persist_vector_regeneration_failure_best_effort(
1832 &config.profile,
1833 &audit_metadata,
1834 &failure,
1835 );
1836 return Err(failure.to_engine_error());
1837 }
1838 if vector.iter().any(|value| !value.is_finite()) {
1839 let failure = VectorRegenerationFailure::new(
1840 VectorRegenerationFailureClass::InvalidEmbedderOutput,
1841 format!(
1842 "embedder returned non-finite values for chunk '{}'",
1843 chunk.chunk_id
1844 ),
1845 );
1846 self.persist_vector_regeneration_failure_best_effort(
1847 &config.profile,
1848 &audit_metadata,
1849 &failure,
1850 );
1851 return Err(failure.to_engine_error());
1852 }
1853 let bytes: Vec<u8> = vector
1854 .iter()
1855 .flat_map(|value| value.to_le_bytes())
1856 .collect();
1857 embedding_map.insert(chunk.chunk_id.clone(), bytes);
1858 }
1859
1860 let mut conn = conn;
1861 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1862 match self.schema_manager.ensure_vector_profile(
1863 &tx,
1864 &config.profile,
1865 &config.table_name,
1866 identity.dimension,
1867 ) {
1868 Ok(()) => {}
1869 Err(SchemaError::MissingCapability(message)) => {
1870 let failure = VectorRegenerationFailure::new(
1871 VectorRegenerationFailureClass::UnsupportedVecCapability,
1872 message,
1873 );
1874 drop(tx);
1875 self.persist_vector_regeneration_failure_best_effort(
1876 &config.profile,
1877 &audit_metadata,
1878 &failure,
1879 );
1880 return Err(failure.to_engine_error());
1881 }
1882 Err(error) => return Err(EngineError::Schema(error)),
1883 }
1884 let apply_chunks = collect_regeneration_chunks(&tx)?;
1885 let apply_payload = build_regeneration_input(&config, &identity, apply_chunks.clone());
1886 let apply_hash = compute_snapshot_hash(&apply_payload)?;
1887 if apply_hash != snapshot_hash {
1888 let failure = VectorRegenerationFailure::new(
1889 VectorRegenerationFailureClass::SnapshotDrift,
1890 "chunk snapshot changed during generation; retry".to_owned(),
1891 );
1892 drop(tx);
1893 self.persist_vector_regeneration_failure_best_effort(
1894 &config.profile,
1895 &audit_metadata,
1896 &failure,
1897 );
1898 return Err(failure.to_engine_error());
1899 }
1900 persist_vector_contract(&tx, &config, &identity, &snapshot_hash)?;
1901 tx.execute("DELETE FROM vec_nodes_active", [])?;
1902 let mut stmt = tx
1903 .prepare_cached("INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES (?1, ?2)")?;
1904 let mut regenerated_rows = 0usize;
1905 for chunk in &apply_chunks {
1906 let Some(embedding) = embedding_map.remove(&chunk.chunk_id) else {
1907 drop(stmt);
1908 drop(tx);
1909 let failure = VectorRegenerationFailure::new(
1910 VectorRegenerationFailureClass::InvalidEmbedderOutput,
1911 format!(
1912 "embedder did not produce a vector for chunk '{}'",
1913 chunk.chunk_id
1914 ),
1915 );
1916 self.persist_vector_regeneration_failure_best_effort(
1917 &config.profile,
1918 &audit_metadata,
1919 &failure,
1920 );
1921 return Err(failure.to_engine_error());
1922 };
1923 stmt.execute(rusqlite::params![chunk.chunk_id.as_str(), embedding])?;
1924 regenerated_rows += 1;
1925 }
1926 drop(stmt);
1927 persist_vector_regeneration_event(
1928 &tx,
1929 "vector_regeneration_apply",
1930 &config.profile,
1931 &audit_metadata,
1932 )?;
1933 tx.commit()?;
1934
1935 Ok(VectorRegenerationReport {
1936 profile: config.profile.clone(),
1937 table_name: config.table_name.clone(),
1938 dimension: identity.dimension,
1939 total_chunks: chunks.len(),
1940 regenerated_rows,
1941 contract_persisted: true,
1942 notes,
1943 })
1944 }
1945
1946 fn persist_vector_regeneration_failure_best_effort(
1947 &self,
1948 profile: &str,
1949 metadata: &VectorRegenerationAuditMetadata,
1950 failure: &VectorRegenerationFailure,
1951 ) {
1952 let Ok(conn) = self.connect() else {
1953 return;
1954 };
1955 let failure_metadata = VectorRegenerationAuditMetadata {
1956 profile: metadata.profile.clone(),
1957 model_identity: metadata.model_identity.clone(),
1958 model_version: metadata.model_version.clone(),
1959 chunk_count: metadata.chunk_count,
1960 snapshot_hash: metadata.snapshot_hash.clone(),
1961 failure_class: Some(failure.failure_class_label().to_owned()),
1962 };
1963 let _ = persist_vector_regeneration_event(
1964 &conn,
1965 "vector_regeneration_failed",
1966 profile,
1967 &failure_metadata,
1968 );
1969 }
1970
1971 pub fn trace_source(&self, source_ref: &str) -> Result<TraceReport, EngineError> {
1974 let conn = self.connect()?;
1975
1976 let node_logical_ids = collect_strings(
1977 &conn,
1978 "SELECT logical_id FROM nodes WHERE source_ref = ?1 ORDER BY created_at",
1979 source_ref,
1980 )?;
1981 let action_ids = collect_strings(
1982 &conn,
1983 "SELECT id FROM actions WHERE source_ref = ?1 ORDER BY created_at",
1984 source_ref,
1985 )?;
1986 let operational_mutation_ids = collect_strings(
1987 &conn,
1988 "SELECT id FROM operational_mutations WHERE source_ref = ?1 ORDER BY mutation_order",
1989 source_ref,
1990 )?;
1991
1992 Ok(TraceReport {
1993 source_ref: source_ref.to_owned(),
1994 node_rows: count_source_ref(&conn, "nodes", source_ref)?,
1995 edge_rows: count_source_ref(&conn, "edges", source_ref)?,
1996 action_rows: count_source_ref(&conn, "actions", source_ref)?,
1997 operational_mutation_rows: count_source_ref(
1998 &conn,
1999 "operational_mutations",
2000 source_ref,
2001 )?,
2002 node_logical_ids,
2003 action_ids,
2004 operational_mutation_ids,
2005 })
2006 }
2007
2008 #[allow(clippy::too_many_lines)]
2012 pub fn restore_logical_id(
2013 &self,
2014 logical_id: &str,
2015 ) -> Result<LogicalRestoreReport, EngineError> {
2016 let mut conn = self.connect()?;
2017 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2018
2019 let active_count: i64 = tx.query_row(
2020 "SELECT count(*) FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
2021 [logical_id],
2022 |row| row.get(0),
2023 )?;
2024 if active_count > 0 {
2025 return Ok(LogicalRestoreReport {
2026 logical_id: logical_id.to_owned(),
2027 was_noop: true,
2028 restored_node_rows: 0,
2029 restored_edge_rows: 0,
2030 restored_chunk_rows: 0,
2031 restored_fts_rows: 0,
2032 restored_property_fts_rows: 0,
2033 restored_vec_rows: 0,
2034 skipped_edges: Vec::new(),
2035 notes: vec!["logical_id already active".to_owned()],
2036 });
2037 }
2038
2039 let restored_node: Option<(String, String)> = tx
2040 .query_row(
2041 "SELECT row_id, kind FROM nodes \
2042 WHERE logical_id = ?1 AND superseded_at IS NOT NULL \
2043 ORDER BY superseded_at DESC, created_at DESC, rowid DESC LIMIT 1",
2044 [logical_id],
2045 |row| Ok((row.get(0)?, row.get(1)?)),
2046 )
2047 .optional()?;
2048 let (restored_node_row_id, restored_kind) = restored_node.ok_or_else(|| {
2049 EngineError::InvalidWrite(format!("logical_id '{logical_id}' is not retired"))
2050 })?;
2051
2052 tx.execute(
2053 "UPDATE nodes SET superseded_at = NULL WHERE row_id = ?1",
2054 [restored_node_row_id.as_str()],
2055 )?;
2056
2057 let retire_scope: Option<(i64, Option<String>, i64)> = tx
2058 .query_row(
2059 "SELECT rowid, source_ref, created_at FROM provenance_events \
2060 WHERE event_type = 'node_retire' AND subject = ?1 \
2061 ORDER BY created_at DESC, rowid DESC LIMIT 1",
2062 [logical_id],
2063 |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
2064 )
2065 .optional()?;
2066 let (restored_edge_rows, skipped_edges) = if let Some((
2067 retire_event_rowid,
2068 retire_source_ref,
2069 retire_created_at,
2070 )) = retire_scope
2071 {
2072 restore_validated_edges(
2073 &tx,
2074 logical_id,
2075 retire_source_ref.as_deref(),
2076 retire_created_at,
2077 retire_event_rowid,
2078 )?
2079 } else {
2080 (0, Vec::new())
2081 };
2082
2083 let restored_chunk_rows: usize = tx
2084 .query_row(
2085 "SELECT count(*) FROM chunks WHERE node_logical_id = ?1",
2086 [logical_id],
2087 |row| row.get::<_, i64>(0),
2088 )
2089 .map(i64_to_usize)?;
2090 tx.execute(
2091 "DELETE FROM fts_nodes WHERE node_logical_id = ?1",
2092 [logical_id],
2093 )?;
2094 let restored_fts_rows = tx.execute(
2095 "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
2096 SELECT id, node_logical_id, ?2, text_content \
2097 FROM chunks WHERE node_logical_id = ?1",
2098 rusqlite::params![logical_id, restored_kind],
2099 )?;
2100 let restored_vec_rows = count_vec_rows_for_logical_id(&tx, logical_id)?;
2101
2102 tx.execute(
2104 "DELETE FROM fts_node_properties WHERE node_logical_id = ?1",
2105 [logical_id],
2106 )?;
2107 let restored_property_fts_rows =
2108 rebuild_single_node_property_fts(&tx, logical_id, &restored_kind)?;
2109
2110 persist_simple_provenance_event(
2111 &tx,
2112 "restore_logical_id",
2113 logical_id,
2114 Some(serde_json::json!({
2115 "restored_node_rows": 1,
2116 "restored_edge_rows": restored_edge_rows,
2117 "restored_chunk_rows": restored_chunk_rows,
2118 "restored_fts_rows": restored_fts_rows,
2119 "restored_property_fts_rows": restored_property_fts_rows,
2120 "restored_vec_rows": restored_vec_rows,
2121 })),
2122 )?;
2123 tx.commit()?;
2124
2125 Ok(LogicalRestoreReport {
2126 logical_id: logical_id.to_owned(),
2127 was_noop: false,
2128 restored_node_rows: 1,
2129 restored_edge_rows,
2130 restored_chunk_rows,
2131 restored_fts_rows,
2132 restored_property_fts_rows,
2133 restored_vec_rows,
2134 skipped_edges,
2135 notes: Vec::new(),
2136 })
2137 }
2138
2139 pub fn purge_logical_id(&self, logical_id: &str) -> Result<LogicalPurgeReport, EngineError> {
2143 let mut conn = self.connect()?;
2144 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2145
2146 let active_count: i64 = tx.query_row(
2147 "SELECT count(*) FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
2148 [logical_id],
2149 |row| row.get(0),
2150 )?;
2151 if active_count > 0 {
2152 return Ok(LogicalPurgeReport {
2153 logical_id: logical_id.to_owned(),
2154 was_noop: true,
2155 deleted_node_rows: 0,
2156 deleted_edge_rows: 0,
2157 deleted_chunk_rows: 0,
2158 deleted_fts_rows: 0,
2159 deleted_vec_rows: 0,
2160 notes: vec!["logical_id is active; purge skipped".to_owned()],
2161 });
2162 }
2163
2164 let node_rows: i64 = tx.query_row(
2165 "SELECT count(*) FROM nodes WHERE logical_id = ?1",
2166 [logical_id],
2167 |row| row.get(0),
2168 )?;
2169 if node_rows == 0 {
2170 return Err(EngineError::InvalidWrite(format!(
2171 "logical_id '{logical_id}' does not exist"
2172 )));
2173 }
2174
2175 let deleted_vec_rows = delete_vec_rows_for_logical_id(&tx, logical_id)?;
2176 let deleted_fts_rows = tx.execute(
2177 "DELETE FROM fts_nodes WHERE node_logical_id = ?1",
2178 [logical_id],
2179 )?;
2180 let deleted_edge_rows = tx.execute(
2181 "DELETE FROM edges WHERE source_logical_id = ?1 OR target_logical_id = ?1",
2182 [logical_id],
2183 )?;
2184 let deleted_chunk_rows = tx.execute(
2185 "DELETE FROM chunks WHERE node_logical_id = ?1",
2186 [logical_id],
2187 )?;
2188 let deleted_node_rows =
2189 tx.execute("DELETE FROM nodes WHERE logical_id = ?1", [logical_id])?;
2190 tx.execute(
2191 "DELETE FROM node_access_metadata WHERE logical_id = ?1",
2192 [logical_id],
2193 )?;
2194
2195 persist_simple_provenance_event(
2196 &tx,
2197 "purge_logical_id",
2198 logical_id,
2199 Some(serde_json::json!({
2200 "deleted_node_rows": deleted_node_rows,
2201 "deleted_edge_rows": deleted_edge_rows,
2202 "deleted_chunk_rows": deleted_chunk_rows,
2203 "deleted_fts_rows": deleted_fts_rows,
2204 "deleted_vec_rows": deleted_vec_rows,
2205 })),
2206 )?;
2207 tx.commit()?;
2208
2209 Ok(LogicalPurgeReport {
2210 logical_id: logical_id.to_owned(),
2211 was_noop: false,
2212 deleted_node_rows,
2213 deleted_edge_rows,
2214 deleted_chunk_rows,
2215 deleted_fts_rows,
2216 deleted_vec_rows,
2217 notes: Vec::new(),
2218 })
2219 }
2220
2221 pub fn purge_provenance_events(
2231 &self,
2232 before_timestamp: i64,
2233 options: &ProvenancePurgeOptions,
2234 ) -> Result<ProvenancePurgeReport, EngineError> {
2235 let mut conn = self.connect()?;
2236 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2237
2238 let preserved_types: Vec<&str> = if options.preserve_event_types.is_empty() {
2239 vec!["excise", "purge_logical_id"]
2240 } else {
2241 options
2242 .preserve_event_types
2243 .iter()
2244 .map(String::as_str)
2245 .collect()
2246 };
2247
2248 let placeholders: String = (0..preserved_types.len())
2250 .map(|i| format!("?{}", i + 2))
2251 .collect::<Vec<_>>()
2252 .join(", ");
2253 let count_query = format!(
2254 "SELECT count(*) FROM provenance_events \
2255 WHERE created_at < ?1 AND event_type NOT IN ({placeholders})"
2256 );
2257 let delete_query = format!(
2258 "DELETE FROM provenance_events WHERE rowid IN (\
2259 SELECT rowid FROM provenance_events \
2260 WHERE created_at < ?1 AND event_type NOT IN ({placeholders}) \
2261 LIMIT 10000)"
2262 );
2263
2264 let bind_params = |stmt: &mut rusqlite::Statement<'_>| -> Result<(), rusqlite::Error> {
2265 stmt.raw_bind_parameter(1, before_timestamp)?;
2266 for (i, event_type) in preserved_types.iter().enumerate() {
2267 stmt.raw_bind_parameter(i + 2, *event_type)?;
2268 }
2269 Ok(())
2270 };
2271
2272 let events_deleted = if options.dry_run {
2273 let mut stmt = tx.prepare(&count_query)?;
2274 bind_params(&mut stmt)?;
2275 stmt.raw_query()
2276 .next()?
2277 .map_or(0, |row| row.get::<_, u64>(0).unwrap_or(0))
2278 } else {
2279 let mut total_deleted: u64 = 0;
2280 loop {
2281 let mut stmt = tx.prepare(&delete_query)?;
2282 bind_params(&mut stmt)?;
2283 let deleted = stmt.raw_execute()?;
2284 if deleted == 0 {
2285 break;
2286 }
2287 total_deleted += deleted as u64;
2288 }
2289 total_deleted
2290 };
2291
2292 let total_after: u64 =
2293 tx.query_row("SELECT count(*) FROM provenance_events", [], |row| {
2294 row.get(0)
2295 })?;
2296
2297 let oldest_remaining: Option<i64> = tx
2298 .query_row("SELECT MIN(created_at) FROM provenance_events", [], |row| {
2299 row.get(0)
2300 })
2301 .optional()?
2302 .flatten();
2303
2304 if !options.dry_run {
2305 tx.commit()?;
2306 }
2307
2308 let events_preserved = if options.dry_run {
2311 total_after - events_deleted
2312 } else {
2313 total_after
2314 };
2315
2316 Ok(ProvenancePurgeReport {
2317 events_deleted,
2318 events_preserved,
2319 oldest_remaining,
2320 })
2321 }
2322
2323 #[allow(clippy::too_many_lines)]
2327 pub fn excise_source(&self, source_ref: &str) -> Result<TraceReport, EngineError> {
2328 let mut conn = self.connect()?;
2329
2330 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2331 let affected_operational_collections = collect_strings_tx(
2332 &tx,
2333 "SELECT DISTINCT m.collection_name \
2334 FROM operational_mutations m \
2335 JOIN operational_collections c ON c.name = m.collection_name \
2336 WHERE m.source_ref = ?1 AND c.kind = 'latest_state' \
2337 ORDER BY m.collection_name",
2338 source_ref,
2339 )?;
2340
2341 let pairs: Vec<(String, String)> = {
2343 let mut stmt = tx.prepare(
2344 "SELECT row_id, logical_id FROM nodes \
2345 WHERE source_ref = ?1 AND superseded_at IS NULL",
2346 )?;
2347 stmt.query_map([source_ref], |row| {
2348 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
2349 })?
2350 .collect::<Result<Vec<_>, _>>()?
2351 };
2352 let affected_logical_ids: Vec<String> = pairs
2353 .iter()
2354 .map(|(_, logical_id)| logical_id.clone())
2355 .collect();
2356
2357 tx.execute(
2359 "UPDATE nodes SET superseded_at = unixepoch() \
2360 WHERE source_ref = ?1 AND superseded_at IS NULL",
2361 [source_ref],
2362 )?;
2363 tx.execute(
2364 "UPDATE edges SET superseded_at = unixepoch() \
2365 WHERE source_ref = ?1 AND superseded_at IS NULL",
2366 [source_ref],
2367 )?;
2368 tx.execute(
2369 "UPDATE actions SET superseded_at = unixepoch() \
2370 WHERE source_ref = ?1 AND superseded_at IS NULL",
2371 [source_ref],
2372 )?;
2373 clear_operational_current_rows(&tx, &affected_operational_collections)?;
2374 tx.execute(
2375 "DELETE FROM operational_mutations WHERE source_ref = ?1",
2376 [source_ref],
2377 )?;
2378 for logical_id in &affected_logical_ids {
2379 delete_vec_rows_for_logical_id(&tx, logical_id)?;
2380 tx.execute(
2381 "DELETE FROM chunks WHERE node_logical_id = ?1",
2382 [logical_id.as_str()],
2383 )?;
2384 }
2385
2386 for (excised_row_id, logical_id) in &pairs {
2388 let prior: Option<String> = tx
2389 .query_row(
2390 "SELECT row_id FROM nodes \
2391 WHERE logical_id = ?1 AND row_id != ?2 \
2392 ORDER BY created_at DESC LIMIT 1",
2393 [logical_id.as_str(), excised_row_id.as_str()],
2394 |row| row.get(0),
2395 )
2396 .optional()?;
2397 if let Some(prior_id) = prior {
2398 tx.execute(
2399 "UPDATE nodes SET superseded_at = NULL WHERE row_id = ?1",
2400 [prior_id.as_str()],
2401 )?;
2402 }
2403 }
2404
2405 for logical_id in &affected_logical_ids {
2406 let has_active_node = tx
2407 .query_row(
2408 "SELECT 1 FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL LIMIT 1",
2409 [logical_id.as_str()],
2410 |row| row.get::<_, i64>(0),
2411 )
2412 .optional()?
2413 .is_some();
2414 if !has_active_node {
2415 tx.execute(
2416 "DELETE FROM node_access_metadata WHERE logical_id = ?1",
2417 [logical_id.as_str()],
2418 )?;
2419 }
2420 }
2421
2422 rebuild_operational_current_rows(&tx, &affected_operational_collections)?;
2423
2424 tx.execute("DELETE FROM fts_nodes", [])?;
2427 tx.execute(
2428 r"
2429 INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content)
2430 SELECT c.id, n.logical_id, n.kind, c.text_content
2431 FROM chunks c
2432 JOIN nodes n
2433 ON n.logical_id = c.node_logical_id
2434 AND n.superseded_at IS NULL
2435 ",
2436 [],
2437 )?;
2438
2439 rebuild_property_fts_in_tx(&tx)?;
2441
2442 tx.execute(
2446 "INSERT INTO provenance_events (id, event_type, subject, source_ref) \
2447 VALUES (?1, 'excise_source', ?2, ?2)",
2448 rusqlite::params![new_id(), source_ref],
2449 )?;
2450
2451 tx.commit()?;
2452
2453 self.trace_source(source_ref)
2454 }
2455
2456 pub fn safe_export(
2460 &self,
2461 destination_path: impl AsRef<Path>,
2462 options: SafeExportOptions,
2463 ) -> Result<SafeExportManifest, EngineError> {
2464 let destination_path = destination_path.as_ref();
2465
2466 let conn = self.connect()?;
2470
2471 if options.force_checkpoint {
2472 trace_info!("safe_export: wal checkpoint started");
2473 let (busy, log, checkpointed): (i64, i64, i64) =
2474 conn.query_row("PRAGMA wal_checkpoint(FULL)", [], |row| {
2475 Ok((row.get(0)?, row.get(1)?, row.get(2)?))
2476 })?;
2477 if busy != 0 {
2478 trace_warn!(
2479 busy,
2480 log_frames = log,
2481 checkpointed_frames = checkpointed,
2482 "safe_export: wal checkpoint blocked by active readers"
2483 );
2484 return Err(EngineError::Bridge(format!(
2485 "WAL checkpoint blocked: {busy} active reader(s) prevented a full checkpoint; \
2486 log frames={log}, checkpointed={checkpointed}; \
2487 retry export when no readers are active"
2488 )));
2489 }
2490 trace_info!(
2491 log_frames = log,
2492 checkpointed_frames = checkpointed,
2493 "safe_export: wal checkpoint completed"
2494 );
2495 }
2496
2497 let schema_version: u32 = conn
2498 .query_row(
2499 "SELECT COALESCE(MAX(version), 0) FROM fathom_schema_migrations",
2500 [],
2501 |row| row.get(0),
2502 )
2503 .unwrap_or(0);
2504
2505 if let Some(parent) = destination_path.parent() {
2508 fs::create_dir_all(parent)?;
2509 }
2510 conn.backup(DatabaseName::Main, destination_path, None)?;
2511
2512 drop(conn);
2513
2514 let page_count: u64 = {
2518 let export_conn = rusqlite::Connection::open_with_flags(
2519 destination_path,
2520 rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY
2521 | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX,
2522 )?;
2523 export_conn.query_row("PRAGMA page_count", [], |row| row.get(0))?
2524 };
2525
2526 let sha256 = {
2529 let mut file = fs::File::open(destination_path)?;
2530 let mut hasher = Sha256::new();
2531 io::copy(&mut file, &mut hasher)?;
2532 format!("{:x}", hasher.finalize())
2533 };
2534
2535 let exported_at = SystemTime::now()
2537 .duration_since(SystemTime::UNIX_EPOCH)
2538 .map_err(|e| EngineError::Bridge(format!("system clock error: {e}")))?
2539 .as_secs();
2540
2541 let manifest = SafeExportManifest {
2542 exported_at,
2543 sha256,
2544 schema_version,
2545 protocol_version: EXPORT_PROTOCOL_VERSION,
2546 page_count,
2547 };
2548
2549 let manifest_path = {
2551 let mut p = destination_path.to_path_buf();
2552 let stem = p
2553 .file_name()
2554 .map(|n| format!("{}.export-manifest.json", n.to_string_lossy()))
2555 .ok_or_else(|| {
2556 EngineError::Bridge("destination path has no filename".to_owned())
2557 })?;
2558 p.set_file_name(stem);
2559 p
2560 };
2561 let manifest_json =
2562 serde_json::to_string(&manifest).map_err(|e| EngineError::Bridge(e.to_string()))?;
2563
2564 let manifest_tmp = manifest_path.with_extension("json.tmp");
2567 if let Err(e) = fs::write(&manifest_tmp, &manifest_json)
2568 .and_then(|()| fs::rename(&manifest_tmp, &manifest_path))
2569 {
2570 let _ = fs::remove_file(&manifest_tmp);
2571 return Err(e.into());
2572 }
2573
2574 Ok(manifest)
2575 }
2576}
2577
2578#[allow(dead_code)]
2579#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
2580struct VectorEmbeddingContractRecord {
2581 profile: String,
2582 table_name: String,
2583 model_identity: String,
2584 model_version: String,
2585 dimension: usize,
2586 normalization_policy: String,
2587 chunking_policy: String,
2588 preprocessing_policy: String,
2589 generator_command_json: String,
2590 applied_at: i64,
2591 snapshot_hash: String,
2592 contract_format_version: i64,
2593}
2594
2595#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
2596struct VectorRegenerationInputChunk {
2597 chunk_id: String,
2598 node_logical_id: String,
2599 kind: String,
2600 text_content: String,
2601 byte_start: Option<i64>,
2602 byte_end: Option<i64>,
2603 source_ref: Option<String>,
2604 created_at: i64,
2605}
2606
2607#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
2608struct VectorRegenerationInput {
2609 profile: String,
2610 table_name: String,
2611 model_identity: String,
2612 model_version: String,
2613 dimension: usize,
2614 normalization_policy: String,
2615 chunking_policy: String,
2616 preprocessing_policy: String,
2617 chunks: Vec<VectorRegenerationInputChunk>,
2618}
2619
2620#[derive(Clone, Copy, Debug, PartialEq, Eq)]
2621pub(crate) enum VectorRegenerationFailureClass {
2622 InvalidContract,
2623 EmbedderFailure,
2624 InvalidEmbedderOutput,
2625 SnapshotDrift,
2626 UnsupportedVecCapability,
2627}
2628
2629impl VectorRegenerationFailureClass {
2630 fn label(self) -> &'static str {
2631 match self {
2632 Self::InvalidContract => "invalid contract",
2633 Self::EmbedderFailure => "embedder failure",
2634 Self::InvalidEmbedderOutput => "invalid embedder output",
2635 Self::SnapshotDrift => "snapshot drift",
2636 Self::UnsupportedVecCapability => "unsupported vec capability",
2637 }
2638 }
2639
2640 fn retryable(self) -> bool {
2641 matches!(self, Self::SnapshotDrift)
2642 }
2643}
2644
2645#[derive(Clone, Debug, PartialEq, Eq)]
2646pub(crate) struct VectorRegenerationFailure {
2647 class: VectorRegenerationFailureClass,
2648 detail: String,
2649}
2650
2651impl VectorRegenerationFailure {
2652 pub(crate) fn new(class: VectorRegenerationFailureClass, detail: impl Into<String>) -> Self {
2653 Self {
2654 class,
2655 detail: detail.into(),
2656 }
2657 }
2658
2659 fn to_engine_error(&self) -> EngineError {
2660 let retry_suffix = if self.class.retryable() {
2661 " [retryable]"
2662 } else {
2663 ""
2664 };
2665 EngineError::Bridge(format!(
2666 "vector regeneration {}: {}{}",
2667 self.class.label(),
2668 self.detail,
2669 retry_suffix
2670 ))
2671 }
2672
2673 fn failure_class_label(&self) -> &'static str {
2674 self.class.label()
2675 }
2676}
2677
2678#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
2679struct VectorRegenerationAuditMetadata {
2680 profile: String,
2681 model_identity: String,
2682 model_version: String,
2683 chunk_count: usize,
2684 snapshot_hash: String,
2685 #[serde(skip_serializing_if = "Option::is_none")]
2686 failure_class: Option<String>,
2687}
2688
2689#[derive(Clone, Copy, Debug, PartialEq, Eq, Deserialize)]
2690#[serde(tag = "mode", rename_all = "snake_case")]
2691enum OperationalRetentionPolicy {
2692 KeepAll,
2693 PurgeBeforeSeconds { max_age_seconds: i64 },
2694 KeepLast { max_rows: usize },
2695}
2696
2697pub fn load_vector_regeneration_config(
2700 path: impl AsRef<Path>,
2701) -> Result<VectorRegenerationConfig, EngineError> {
2702 let path = path.as_ref();
2703 let raw = fs::read_to_string(path)?;
2704 match path.extension().and_then(|ext| ext.to_str()) {
2705 Some("toml") => {
2706 toml::from_str(&raw).map_err(|error| EngineError::Bridge(error.to_string()))
2707 }
2708 Some("json") | None => {
2709 serde_json::from_str(&raw).map_err(|error| EngineError::Bridge(error.to_string()))
2710 }
2711 Some(other) => Err(EngineError::Bridge(format!(
2712 "unsupported vector regeneration config extension: {other}"
2713 ))),
2714 }
2715}
2716
2717fn validate_vector_regeneration_config(
2718 conn: &rusqlite::Connection,
2719 config: &VectorRegenerationConfig,
2720 identity: &QueryEmbedderIdentity,
2721) -> Result<VectorRegenerationConfig, VectorRegenerationFailure> {
2722 let profile = validate_bounded_text("profile", &config.profile, MAX_PROFILE_LEN)?;
2723 let table_name = validate_bounded_text("table_name", &config.table_name, MAX_PROFILE_LEN)?;
2724 if table_name != "vec_nodes_active" {
2725 return Err(VectorRegenerationFailure::new(
2726 VectorRegenerationFailureClass::InvalidContract,
2727 format!("table_name must be vec_nodes_active, got '{table_name}'"),
2728 ));
2729 }
2730 if identity.dimension == 0 {
2731 return Err(VectorRegenerationFailure::new(
2732 VectorRegenerationFailureClass::InvalidContract,
2733 "embedder reports dimension 0".to_owned(),
2734 ));
2735 }
2736 let chunking_policy =
2737 validate_bounded_text("chunking_policy", &config.chunking_policy, MAX_POLICY_LEN)?;
2738 let preprocessing_policy = validate_bounded_text(
2739 "preprocessing_policy",
2740 &config.preprocessing_policy,
2741 MAX_POLICY_LEN,
2742 )?;
2743
2744 if let Some(existing_dimension) = current_vector_profile_dimension(conn, &profile)?
2745 && existing_dimension != identity.dimension
2746 {
2747 return Err(VectorRegenerationFailure::new(
2748 VectorRegenerationFailureClass::InvalidContract,
2749 format!(
2750 "embedder dimension {} does not match existing vector profile dimension {}",
2751 identity.dimension, existing_dimension
2752 ),
2753 ));
2754 }
2755
2756 validate_existing_contract_version(conn, &profile)?;
2757
2758 let normalized = VectorRegenerationConfig {
2759 profile,
2760 table_name,
2761 chunking_policy,
2762 preprocessing_policy,
2763 };
2764 let serialized = serde_json::to_vec(&normalized).map_err(|error| {
2765 VectorRegenerationFailure::new(
2766 VectorRegenerationFailureClass::InvalidContract,
2767 error.to_string(),
2768 )
2769 })?;
2770 if serialized.len() > MAX_CONTRACT_JSON_BYTES {
2771 return Err(VectorRegenerationFailure::new(
2772 VectorRegenerationFailureClass::InvalidContract,
2773 format!("serialized contract exceeds {MAX_CONTRACT_JSON_BYTES} bytes"),
2774 ));
2775 }
2776
2777 Ok(normalized)
2778}
2779
2780#[allow(clippy::cast_possible_wrap)]
2781fn persist_vector_contract(
2782 conn: &rusqlite::Connection,
2783 config: &VectorRegenerationConfig,
2784 identity: &QueryEmbedderIdentity,
2785 snapshot_hash: &str,
2786) -> Result<(), EngineError> {
2787 conn.execute(
2788 r"
2789 INSERT OR REPLACE INTO vector_embedding_contracts (
2790 profile,
2791 table_name,
2792 model_identity,
2793 model_version,
2794 dimension,
2795 normalization_policy,
2796 chunking_policy,
2797 preprocessing_policy,
2798 generator_command_json,
2799 applied_at,
2800 snapshot_hash,
2801 contract_format_version,
2802 updated_at
2803 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, unixepoch(), ?10, ?11, unixepoch())
2804 ",
2805 rusqlite::params![
2806 config.profile.as_str(),
2807 config.table_name.as_str(),
2808 identity.model_identity.as_str(),
2809 identity.model_version.as_str(),
2810 identity.dimension as i64,
2811 identity.normalization_policy.as_str(),
2812 config.chunking_policy.as_str(),
2813 config.preprocessing_policy.as_str(),
2814 "[]",
2815 snapshot_hash,
2816 CURRENT_VECTOR_CONTRACT_FORMAT_VERSION,
2817 ],
2818 )?;
2819 Ok(())
2820}
2821
2822fn persist_vector_regeneration_event(
2823 conn: &rusqlite::Connection,
2824 event_type: &str,
2825 subject: &str,
2826 metadata: &VectorRegenerationAuditMetadata,
2827) -> Result<(), EngineError> {
2828 let metadata_json = serialize_audit_metadata(metadata)?;
2829 conn.execute(
2830 "INSERT INTO provenance_events (id, event_type, subject, metadata_json) VALUES (?1, ?2, ?3, ?4)",
2831 rusqlite::params![new_id(), event_type, subject, metadata_json],
2832 )?;
2833 Ok(())
2834}
2835
2836fn persist_simple_provenance_event(
2837 conn: &rusqlite::Connection,
2838 event_type: &str,
2839 subject: &str,
2840 metadata: Option<serde_json::Value>,
2841) -> Result<(), EngineError> {
2842 let metadata_json = metadata.map(|value| value.to_string()).unwrap_or_default();
2843 conn.execute(
2844 "INSERT INTO provenance_events (id, event_type, subject, metadata_json) VALUES (?1, ?2, ?3, ?4)",
2845 rusqlite::params![new_id(), event_type, subject, metadata_json],
2846 )?;
2847 Ok(())
2848}
2849
2850fn count_missing_property_fts_rows(conn: &rusqlite::Connection) -> Result<i64, EngineError> {
2854 let schemas = crate::writer::load_fts_property_schemas(conn)?;
2855 if schemas.is_empty() {
2856 return Ok(0);
2857 }
2858
2859 let mut missing = 0i64;
2860 for (kind, schema) in &schemas {
2861 let mut stmt = conn.prepare(
2862 "SELECT n.logical_id, n.properties FROM nodes n \
2863 WHERE n.kind = ?1 AND n.superseded_at IS NULL \
2864 AND NOT EXISTS (SELECT 1 FROM fts_node_properties fp WHERE fp.node_logical_id = n.logical_id)",
2865 )?;
2866 let rows = stmt.query_map([kind.as_str()], |row| {
2867 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
2868 })?;
2869 for row in rows {
2870 let (_logical_id, properties_str) = row?;
2871 let props: serde_json::Value =
2872 serde_json::from_str(&properties_str).unwrap_or_default();
2873 if crate::writer::extract_property_fts(&props, schema)
2874 .0
2875 .is_some()
2876 {
2877 missing += 1;
2878 }
2879 }
2880 }
2881 Ok(missing)
2882}
2883
2884fn count_drifted_property_fts_rows(conn: &rusqlite::Connection) -> Result<i64, EngineError> {
2889 let schemas = crate::writer::load_fts_property_schemas(conn)?;
2890 if schemas.is_empty() {
2891 return Ok(0);
2892 }
2893
2894 let mut drifted = 0i64;
2895 for (kind, schema) in &schemas {
2896 let mut stmt = conn.prepare(
2897 "SELECT fp.node_logical_id, fp.text_content, n.properties \
2898 FROM fts_node_properties fp \
2899 JOIN nodes n ON n.logical_id = fp.node_logical_id AND n.superseded_at IS NULL \
2900 WHERE fp.kind = ?1 AND n.kind = ?1",
2901 )?;
2902 let rows = stmt.query_map([kind.as_str()], |row| {
2903 Ok((
2904 row.get::<_, String>(0)?,
2905 row.get::<_, String>(1)?,
2906 row.get::<_, String>(2)?,
2907 ))
2908 })?;
2909 for row in rows {
2910 let (_logical_id, stored_text, properties_str) = row?;
2911 let props: serde_json::Value =
2912 serde_json::from_str(&properties_str).unwrap_or_default();
2913 let (expected, _positions, _stats) =
2914 crate::writer::extract_property_fts(&props, schema);
2915 match expected {
2916 Some(text) if text == stored_text => {}
2917 _ => drifted += 1,
2918 }
2919 }
2920 }
2921 Ok(drifted)
2922}
2923
2924fn rebuild_property_fts_in_tx(conn: &rusqlite::Connection) -> Result<usize, EngineError> {
2926 conn.execute("DELETE FROM fts_node_properties", [])?;
2927 conn.execute("DELETE FROM fts_node_property_positions", [])?;
2928 let inserted = crate::projection::insert_property_fts_rows(
2929 conn,
2930 "SELECT logical_id, properties FROM nodes WHERE kind = ?1 AND superseded_at IS NULL",
2931 )?;
2932 Ok(inserted)
2933}
2934
2935fn rebuild_single_node_property_fts(
2938 conn: &rusqlite::Connection,
2939 logical_id: &str,
2940 kind: &str,
2941) -> Result<usize, EngineError> {
2942 let schema: Option<(String, String)> = conn
2943 .query_row(
2944 "SELECT property_paths_json, separator FROM fts_property_schemas WHERE kind = ?1",
2945 [kind],
2946 |row| {
2947 let paths_json: String = row.get(0)?;
2948 let separator: String = row.get(1)?;
2949 Ok((paths_json, separator))
2950 },
2951 )
2952 .optional()?;
2953 let Some((paths_json, separator)) = schema else {
2954 return Ok(0);
2955 };
2956 let parsed = crate::writer::parse_property_schema_json(&paths_json, &separator);
2957 let properties_str: Option<String> = conn
2958 .query_row(
2959 "SELECT properties FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
2960 [logical_id],
2961 |row| row.get(0),
2962 )
2963 .optional()?;
2964 let Some(properties_str) = properties_str else {
2965 return Ok(0);
2966 };
2967 let props: serde_json::Value = serde_json::from_str(&properties_str).unwrap_or_default();
2968 let (text, positions, _stats) = crate::writer::extract_property_fts(&props, &parsed);
2969 let Some(text) = text else {
2970 return Ok(0);
2971 };
2972 conn.execute(
2973 "DELETE FROM fts_node_property_positions WHERE node_logical_id = ?1",
2974 rusqlite::params![logical_id],
2975 )?;
2976 conn.execute(
2977 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) VALUES (?1, ?2, ?3)",
2978 rusqlite::params![logical_id, kind, text],
2979 )?;
2980 for pos in &positions {
2981 conn.execute(
2982 "INSERT INTO fts_node_property_positions \
2983 (node_logical_id, kind, start_offset, end_offset, leaf_path) \
2984 VALUES (?1, ?2, ?3, ?4, ?5)",
2985 rusqlite::params![
2986 logical_id,
2987 kind,
2988 i64::try_from(pos.start_offset).unwrap_or(i64::MAX),
2989 i64::try_from(pos.end_offset).unwrap_or(i64::MAX),
2990 pos.leaf_path,
2991 ],
2992 )?;
2993 }
2994 Ok(1)
2995}
2996
2997fn serialize_property_paths_json(
2998 entries: &[FtsPropertyPathSpec],
2999 exclude_paths: &[String],
3000) -> Result<String, EngineError> {
3001 let all_scalar = entries
3005 .iter()
3006 .all(|e| e.mode == FtsPropertyPathMode::Scalar);
3007 if all_scalar && exclude_paths.is_empty() {
3008 let paths: Vec<&str> = entries.iter().map(|e| e.path.as_str()).collect();
3009 return serde_json::to_string(&paths).map_err(|e| {
3010 EngineError::InvalidWrite(format!("failed to serialize property paths: {e}"))
3011 });
3012 }
3013
3014 let mut obj = serde_json::Map::new();
3015 let paths_json: Vec<serde_json::Value> = entries
3016 .iter()
3017 .map(|e| {
3018 let mode_str = match e.mode {
3019 FtsPropertyPathMode::Scalar => "scalar",
3020 FtsPropertyPathMode::Recursive => "recursive",
3021 };
3022 serde_json::json!({ "path": e.path, "mode": mode_str })
3023 })
3024 .collect();
3025 obj.insert("paths".to_owned(), serde_json::Value::Array(paths_json));
3026 if !exclude_paths.is_empty() {
3027 obj.insert("exclude_paths".to_owned(), serde_json::json!(exclude_paths));
3028 }
3029 serde_json::to_string(&serde_json::Value::Object(obj))
3030 .map_err(|e| EngineError::InvalidWrite(format!("failed to serialize property paths: {e}")))
3031}
3032
3033fn validate_fts_property_paths(paths: &[String]) -> Result<(), EngineError> {
3034 if paths.is_empty() {
3035 return Err(EngineError::InvalidWrite(
3036 "FTS property paths must not be empty".to_owned(),
3037 ));
3038 }
3039 let mut seen = std::collections::HashSet::new();
3040 for path in paths {
3041 if !path.starts_with("$.") {
3042 return Err(EngineError::InvalidWrite(format!(
3043 "FTS property path must start with '$.' but got: {path}"
3044 )));
3045 }
3046 let after_prefix = &path[2..]; let segments: Vec<&str> = after_prefix.split('.').collect();
3048 if segments.is_empty() || segments.iter().any(|s| s.is_empty()) {
3049 return Err(EngineError::InvalidWrite(format!(
3050 "FTS property path has empty segment(s): {path}"
3051 )));
3052 }
3053 for seg in &segments {
3054 if !seg.chars().all(|c| c.is_alphanumeric() || c == '_') {
3055 return Err(EngineError::InvalidWrite(format!(
3056 "FTS property path segment contains invalid characters: {path}"
3057 )));
3058 }
3059 }
3060 if !seen.insert(path) {
3061 return Err(EngineError::InvalidWrite(format!(
3062 "duplicate FTS property path: {path}"
3063 )));
3064 }
3065 }
3066 Ok(())
3067}
3068
3069fn load_fts_property_schema_record(
3070 conn: &rusqlite::Connection,
3071 kind: &str,
3072) -> Result<Option<FtsPropertySchemaRecord>, EngineError> {
3073 let row = conn
3074 .query_row(
3075 "SELECT kind, property_paths_json, separator, format_version \
3076 FROM fts_property_schemas WHERE kind = ?1",
3077 [kind],
3078 |row| {
3079 let kind: String = row.get(0)?;
3080 let paths_json: String = row.get(1)?;
3081 let separator: String = row.get(2)?;
3082 let format_version: i64 = row.get(3)?;
3083 Ok(build_fts_property_schema_record(
3084 kind,
3085 &paths_json,
3086 separator,
3087 format_version,
3088 ))
3089 },
3090 )
3091 .optional()?;
3092 Ok(row)
3093}
3094
3095fn build_fts_property_schema_record(
3101 kind: String,
3102 paths_json: &str,
3103 separator: String,
3104 format_version: i64,
3105) -> FtsPropertySchemaRecord {
3106 let schema = crate::writer::parse_property_schema_json(paths_json, &separator);
3107 let entries: Vec<FtsPropertyPathSpec> = schema
3108 .paths
3109 .into_iter()
3110 .map(|entry| FtsPropertyPathSpec {
3111 path: entry.path,
3112 mode: match entry.mode {
3113 crate::writer::PropertyPathMode::Scalar => FtsPropertyPathMode::Scalar,
3114 crate::writer::PropertyPathMode::Recursive => FtsPropertyPathMode::Recursive,
3115 },
3116 })
3117 .collect();
3118 let property_paths: Vec<String> = entries.iter().map(|e| e.path.clone()).collect();
3119 FtsPropertySchemaRecord {
3120 kind,
3121 property_paths,
3122 entries,
3123 exclude_paths: schema.exclude_paths,
3124 separator,
3125 format_version,
3126 }
3127}
3128
3129fn build_regeneration_input(
3130 config: &VectorRegenerationConfig,
3131 identity: &QueryEmbedderIdentity,
3132 chunks: Vec<VectorRegenerationInputChunk>,
3133) -> VectorRegenerationInput {
3134 VectorRegenerationInput {
3135 profile: config.profile.clone(),
3136 table_name: config.table_name.clone(),
3137 model_identity: identity.model_identity.clone(),
3138 model_version: identity.model_version.clone(),
3139 dimension: identity.dimension,
3140 normalization_policy: identity.normalization_policy.clone(),
3141 chunking_policy: config.chunking_policy.clone(),
3142 preprocessing_policy: config.preprocessing_policy.clone(),
3143 chunks,
3144 }
3145}
3146
3147fn compute_snapshot_hash(payload: &VectorRegenerationInput) -> Result<String, EngineError> {
3148 let bytes =
3149 serde_json::to_vec(payload).map_err(|error| EngineError::Bridge(error.to_string()))?;
3150 let mut hasher = Sha256::new();
3151 hasher.update(bytes);
3152 Ok(format!("{:x}", hasher.finalize()))
3153}
3154
3155fn collect_regeneration_chunks(
3156 conn: &rusqlite::Connection,
3157) -> Result<Vec<VectorRegenerationInputChunk>, EngineError> {
3158 let mut stmt = conn.prepare(
3159 r"
3160 SELECT c.id, c.node_logical_id, n.kind, c.text_content, c.byte_start, c.byte_end, n.source_ref, c.created_at
3161 FROM chunks c
3162 JOIN nodes n
3163 ON n.logical_id = c.node_logical_id
3164 AND n.superseded_at IS NULL
3165 ORDER BY c.created_at, c.id
3166 ",
3167 )?;
3168 let chunks = stmt
3169 .query_map([], |row| {
3170 Ok(VectorRegenerationInputChunk {
3171 chunk_id: row.get(0)?,
3172 node_logical_id: row.get(1)?,
3173 kind: row.get(2)?,
3174 text_content: row.get(3)?,
3175 byte_start: row.get(4)?,
3176 byte_end: row.get(5)?,
3177 source_ref: row.get(6)?,
3178 created_at: row.get(7)?,
3179 })
3180 })?
3181 .collect::<Result<Vec<_>, _>>()?;
3182 Ok(chunks)
3183}
3184
3185fn validate_bounded_text(
3186 field: &str,
3187 value: &str,
3188 max_len: usize,
3189) -> Result<String, VectorRegenerationFailure> {
3190 let trimmed = value.trim();
3191 if trimmed.is_empty() {
3192 return Err(VectorRegenerationFailure::new(
3193 VectorRegenerationFailureClass::InvalidContract,
3194 format!("{field} must not be empty"),
3195 ));
3196 }
3197 if trimmed.len() > max_len {
3198 return Err(VectorRegenerationFailure::new(
3199 VectorRegenerationFailureClass::InvalidContract,
3200 format!("{field} exceeds max length {max_len}"),
3201 ));
3202 }
3203 Ok(trimmed.to_owned())
3204}
3205
3206fn current_vector_profile_dimension(
3207 conn: &rusqlite::Connection,
3208 profile: &str,
3209) -> Result<Option<usize>, VectorRegenerationFailure> {
3210 let dimension: Option<i64> = conn
3211 .query_row(
3212 "SELECT dimension FROM vector_profiles WHERE profile = ?1 AND enabled = 1",
3213 [profile],
3214 |row| row.get(0),
3215 )
3216 .optional()
3217 .map_err(|error| {
3218 VectorRegenerationFailure::new(
3219 VectorRegenerationFailureClass::InvalidContract,
3220 error.to_string(),
3221 )
3222 })?;
3223 dimension
3224 .map(|value| {
3225 usize::try_from(value).map_err(|_| {
3226 VectorRegenerationFailure::new(
3227 VectorRegenerationFailureClass::InvalidContract,
3228 format!("stored vector profile dimension is invalid: {value}"),
3229 )
3230 })
3231 })
3232 .transpose()
3233}
3234
3235fn validate_existing_contract_version(
3236 conn: &rusqlite::Connection,
3237 profile: &str,
3238) -> Result<(), VectorRegenerationFailure> {
3239 let version: Option<i64> = conn
3240 .query_row(
3241 "SELECT contract_format_version FROM vector_embedding_contracts WHERE profile = ?1",
3242 [profile],
3243 |row| row.get(0),
3244 )
3245 .optional()
3246 .map_err(|error| {
3247 VectorRegenerationFailure::new(
3248 VectorRegenerationFailureClass::InvalidContract,
3249 error.to_string(),
3250 )
3251 })?;
3252 if let Some(version) = version
3253 && version > CURRENT_VECTOR_CONTRACT_FORMAT_VERSION
3254 {
3255 return Err(VectorRegenerationFailure::new(
3256 VectorRegenerationFailureClass::InvalidContract,
3257 format!(
3258 "persisted contract format version {version} is unsupported; supported version is {CURRENT_VECTOR_CONTRACT_FORMAT_VERSION}"
3259 ),
3260 ));
3261 }
3262 Ok(())
3263}
3264
3265fn serialize_audit_metadata(
3266 metadata: &VectorRegenerationAuditMetadata,
3267) -> Result<String, EngineError> {
3268 let json =
3269 serde_json::to_string(metadata).map_err(|error| EngineError::Bridge(error.to_string()))?;
3270 if json.len() > MAX_AUDIT_METADATA_BYTES {
3271 return Err(VectorRegenerationFailure::new(
3272 VectorRegenerationFailureClass::InvalidContract,
3273 format!("audit metadata exceeds {MAX_AUDIT_METADATA_BYTES} bytes"),
3274 )
3275 .to_engine_error());
3276 }
3277 Ok(json)
3278}
3279
3280fn count_source_ref(
3281 conn: &rusqlite::Connection,
3282 table: &str,
3283 source_ref: &str,
3284) -> Result<usize, EngineError> {
3285 let sql = match table {
3286 "nodes" => "SELECT count(*) FROM nodes WHERE source_ref = ?1",
3287 "edges" => "SELECT count(*) FROM edges WHERE source_ref = ?1",
3288 "actions" => "SELECT count(*) FROM actions WHERE source_ref = ?1",
3289 "operational_mutations" => {
3290 "SELECT count(*) FROM operational_mutations WHERE source_ref = ?1"
3291 }
3292 other => return Err(EngineError::Bridge(format!("unknown table: {other}"))),
3293 };
3294 let count: i64 = conn.query_row(sql, [source_ref], |row| row.get(0))?;
3295 usize::try_from(count)
3298 .map_err(|_| EngineError::Bridge(format!("count overflow for table {table}: {count}")))
3299}
3300
3301fn rebuild_operational_current_rows(
3302 tx: &rusqlite::Transaction<'_>,
3303 collections: &[String],
3304) -> Result<usize, EngineError> {
3305 let mut rebuilt_rows = 0usize;
3306 clear_operational_current_rows(tx, collections)?;
3307 let mut ins_current = tx.prepare_cached(
3308 "INSERT INTO operational_current \
3309 (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
3310 VALUES (?1, ?2, ?3, ?4, ?5)",
3311 )?;
3312
3313 for collection in collections {
3314 let mut stmt = tx.prepare(
3315 "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
3316 FROM operational_mutations \
3317 WHERE collection_name = ?1 \
3318 ORDER BY record_key, mutation_order",
3319 )?;
3320 let mut latest_by_key: std::collections::HashMap<String, Option<(String, i64, String)>> =
3321 std::collections::HashMap::new();
3322 let rows = stmt.query_map([collection], map_operational_mutation_row)?;
3323 for row in rows {
3324 let mutation = row?;
3325 match mutation.op_kind.as_str() {
3326 "put" => {
3327 latest_by_key.insert(
3328 mutation.record_key,
3329 Some((mutation.payload_json, mutation.created_at, mutation.id)),
3330 );
3331 }
3332 "delete" => {
3333 latest_by_key.insert(mutation.record_key, None);
3334 }
3335 _ => {}
3336 }
3337 }
3338
3339 for (record_key, state) in latest_by_key {
3340 if let Some((payload_json, updated_at, last_mutation_id)) = state {
3341 ins_current.execute(rusqlite::params![
3342 collection,
3343 record_key,
3344 payload_json,
3345 updated_at,
3346 last_mutation_id,
3347 ])?;
3348 rebuilt_rows += 1;
3349 }
3350 }
3351 }
3352
3353 drop(ins_current);
3354 Ok(rebuilt_rows)
3355}
3356
3357fn clear_operational_current_rows(
3358 tx: &rusqlite::Transaction<'_>,
3359 collections: &[String],
3360) -> Result<(), EngineError> {
3361 let mut delete_current =
3362 tx.prepare_cached("DELETE FROM operational_current WHERE collection_name = ?1")?;
3363 let mut delete_secondary_current = tx.prepare_cached(
3364 "DELETE FROM operational_secondary_index_entries \
3365 WHERE collection_name = ?1 AND subject_kind = 'current'",
3366 )?;
3367 for collection in collections {
3368 delete_secondary_current.execute([collection])?;
3369 delete_current.execute([collection])?;
3370 }
3371 drop(delete_secondary_current);
3372 drop(delete_current);
3373 Ok(())
3374}
3375
3376fn clear_operational_secondary_index_entries(
3377 tx: &rusqlite::Transaction<'_>,
3378 collection_name: &str,
3379) -> Result<(), EngineError> {
3380 tx.execute(
3381 "DELETE FROM operational_secondary_index_entries WHERE collection_name = ?1",
3382 [collection_name],
3383 )?;
3384 Ok(())
3385}
3386
3387fn insert_operational_secondary_index_entry(
3388 tx: &rusqlite::Transaction<'_>,
3389 collection_name: &str,
3390 subject_kind: &str,
3391 mutation_id: &str,
3392 record_key: &str,
3393 entry: &crate::operational::OperationalSecondaryIndexEntry,
3394) -> Result<(), EngineError> {
3395 tx.execute(
3396 "INSERT INTO operational_secondary_index_entries \
3397 (collection_name, index_name, subject_kind, mutation_id, record_key, sort_timestamp, \
3398 slot1_text, slot1_integer, slot2_text, slot2_integer, slot3_text, slot3_integer) \
3399 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
3400 rusqlite::params![
3401 collection_name,
3402 entry.index_name,
3403 subject_kind,
3404 mutation_id,
3405 record_key,
3406 entry.sort_timestamp,
3407 entry.slot1_text,
3408 entry.slot1_integer,
3409 entry.slot2_text,
3410 entry.slot2_integer,
3411 entry.slot3_text,
3412 entry.slot3_integer,
3413 ],
3414 )?;
3415 Ok(())
3416}
3417
3418fn rebuild_operational_secondary_index_entries(
3419 tx: &rusqlite::Transaction<'_>,
3420 collection_name: &str,
3421 collection_kind: OperationalCollectionKind,
3422 indexes: &[OperationalSecondaryIndexDefinition],
3423) -> Result<(usize, usize), EngineError> {
3424 clear_operational_secondary_index_entries(tx, collection_name)?;
3425
3426 let mut mutation_entries_rebuilt = 0usize;
3427 if collection_kind == OperationalCollectionKind::AppendOnlyLog {
3428 let mut stmt = tx.prepare(
3429 "SELECT id, record_key, payload_json FROM operational_mutations \
3430 WHERE collection_name = ?1 ORDER BY mutation_order",
3431 )?;
3432 let rows = stmt
3433 .query_map([collection_name], |row| {
3434 Ok((
3435 row.get::<_, String>(0)?,
3436 row.get::<_, String>(1)?,
3437 row.get::<_, String>(2)?,
3438 ))
3439 })?
3440 .collect::<Result<Vec<_>, _>>()?;
3441 drop(stmt);
3442 for (mutation_id, record_key, payload_json) in rows {
3443 for entry in extract_secondary_index_entries_for_mutation(indexes, &payload_json) {
3444 insert_operational_secondary_index_entry(
3445 tx,
3446 collection_name,
3447 "mutation",
3448 &mutation_id,
3449 &record_key,
3450 &entry,
3451 )?;
3452 mutation_entries_rebuilt += 1;
3453 }
3454 }
3455 }
3456
3457 let mut current_entries_rebuilt = 0usize;
3458 if collection_kind == OperationalCollectionKind::LatestState {
3459 let mut stmt = tx.prepare(
3460 "SELECT record_key, payload_json, updated_at, last_mutation_id FROM operational_current \
3461 WHERE collection_name = ?1 ORDER BY updated_at DESC, record_key",
3462 )?;
3463 let rows = stmt
3464 .query_map([collection_name], |row| {
3465 Ok((
3466 row.get::<_, String>(0)?,
3467 row.get::<_, String>(1)?,
3468 row.get::<_, i64>(2)?,
3469 row.get::<_, String>(3)?,
3470 ))
3471 })?
3472 .collect::<Result<Vec<_>, _>>()?;
3473 drop(stmt);
3474 for (record_key, payload_json, updated_at, last_mutation_id) in rows {
3475 for entry in
3476 extract_secondary_index_entries_for_current(indexes, &payload_json, updated_at)
3477 {
3478 insert_operational_secondary_index_entry(
3479 tx,
3480 collection_name,
3481 "current",
3482 &last_mutation_id,
3483 &record_key,
3484 &entry,
3485 )?;
3486 current_entries_rebuilt += 1;
3487 }
3488 }
3489 }
3490
3491 Ok((mutation_entries_rebuilt, current_entries_rebuilt))
3492}
3493
3494fn collect_strings_tx(
3495 tx: &rusqlite::Transaction<'_>,
3496 sql: &str,
3497 value: &str,
3498) -> Result<Vec<String>, EngineError> {
3499 let mut stmt = tx.prepare(sql)?;
3500 let rows = stmt.query_map([value], |row| row.get::<_, String>(0))?;
3501 rows.collect::<Result<Vec<_>, _>>()
3502 .map_err(EngineError::from)
3503}
3504
3505#[allow(clippy::expect_used)]
3508fn i64_to_usize(val: i64) -> usize {
3509 usize::try_from(val).expect("count(*) must be non-negative")
3510}
3511
3512fn collect_strings(
3519 conn: &rusqlite::Connection,
3520 sql: &str,
3521 param: &str,
3522) -> Result<Vec<String>, EngineError> {
3523 let mut stmt = conn.prepare(sql)?;
3524 let values = stmt
3525 .query_map([param], |row| row.get::<_, String>(0))?
3526 .collect::<Result<Vec<_>, _>>()?;
3527 Ok(values)
3528}
3529
3530fn collect_edge_logical_ids_for_restore(
3531 tx: &rusqlite::Transaction<'_>,
3532 logical_id: &str,
3533 retire_source_ref: Option<&str>,
3534 retire_created_at: i64,
3535 retire_event_rowid: i64,
3536) -> Result<Vec<String>, EngineError> {
3537 let mut stmt = tx.prepare(
3538 "SELECT DISTINCT e.logical_id \
3539 FROM edges e \
3540 JOIN provenance_events p \
3541 ON p.subject = e.logical_id \
3542 AND p.event_type = 'edge_retire' \
3543 AND ( \
3544 p.created_at > ?3 \
3545 OR (p.created_at = ?3 AND p.rowid >= ?4) \
3546 ) \
3547 AND ((?2 IS NULL AND p.source_ref IS NULL) OR p.source_ref = ?2) \
3548 WHERE e.superseded_at IS NOT NULL \
3549 AND (e.source_logical_id = ?1 OR e.target_logical_id = ?1) \
3550 AND NOT EXISTS ( \
3551 SELECT 1 FROM edges active \
3552 WHERE active.logical_id = e.logical_id \
3553 AND active.superseded_at IS NULL \
3554 ) \
3555 ORDER BY e.logical_id",
3556 )?;
3557 let edge_ids = stmt
3558 .query_map(
3559 rusqlite::params![
3560 logical_id,
3561 retire_source_ref,
3562 retire_created_at,
3563 retire_event_rowid
3564 ],
3565 |row| row.get::<_, String>(0),
3566 )?
3567 .collect::<Result<Vec<_>, _>>()?;
3568 Ok(edge_ids)
3569}
3570
3571fn restore_validated_edges(
3574 tx: &rusqlite::Transaction<'_>,
3575 logical_id: &str,
3576 retire_source_ref: Option<&str>,
3577 retire_created_at: i64,
3578 retire_event_rowid: i64,
3579) -> Result<(usize, Vec<SkippedEdge>), EngineError> {
3580 let edge_logical_ids = collect_edge_logical_ids_for_restore(
3581 tx,
3582 logical_id,
3583 retire_source_ref,
3584 retire_created_at,
3585 retire_event_rowid,
3586 )?;
3587 let mut restored = 0usize;
3588 let mut skipped = Vec::new();
3589 for edge_logical_id in &edge_logical_ids {
3590 let edge_detail: Option<(String, String, String)> = tx
3591 .query_row(
3592 "SELECT row_id, source_logical_id, target_logical_id FROM edges \
3593 WHERE logical_id = ?1 AND superseded_at IS NOT NULL \
3594 ORDER BY superseded_at DESC, created_at DESC, rowid DESC LIMIT 1",
3595 [edge_logical_id.as_str()],
3596 |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
3597 )
3598 .optional()?;
3599 let Some((edge_row_id, source_lid, target_lid)) = edge_detail else {
3600 continue;
3601 };
3602 let other_endpoint = if source_lid == logical_id {
3603 &target_lid
3604 } else {
3605 &source_lid
3606 };
3607 let endpoint_active: bool = tx
3608 .query_row(
3609 "SELECT 1 FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL LIMIT 1",
3610 [other_endpoint.as_str()],
3611 |_| Ok(true),
3612 )
3613 .optional()?
3614 .unwrap_or(false);
3615 if !endpoint_active {
3616 skipped.push(SkippedEdge {
3617 edge_logical_id: edge_logical_id.clone(),
3618 missing_endpoint: other_endpoint.clone(),
3619 });
3620 continue;
3621 }
3622 restored += tx.execute(
3623 "UPDATE edges SET superseded_at = NULL WHERE row_id = ?1",
3624 [edge_row_id.as_str()],
3625 )?;
3626 }
3627 Ok((restored, skipped))
3628}
3629
3630#[cfg(feature = "sqlite-vec")]
3631fn count_vec_rows_for_logical_id(
3632 tx: &rusqlite::Transaction<'_>,
3633 logical_id: &str,
3634) -> Result<usize, EngineError> {
3635 match tx.query_row(
3636 "SELECT count(*) FROM vec_nodes_active v \
3637 JOIN chunks c ON c.id = v.chunk_id \
3638 WHERE c.node_logical_id = ?1",
3639 [logical_id],
3640 |row| row.get::<_, i64>(0),
3641 ) {
3642 Ok(count) => Ok(i64_to_usize(count)),
3643 Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
3644 if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
3645 {
3646 Ok(0)
3647 }
3648 Err(error) => Err(EngineError::Sqlite(error)),
3649 }
3650}
3651
3652#[cfg(not(feature = "sqlite-vec"))]
3653#[allow(clippy::unnecessary_wraps)]
3654fn count_vec_rows_for_logical_id(
3655 _tx: &rusqlite::Transaction<'_>,
3656 _logical_id: &str,
3657) -> Result<usize, EngineError> {
3658 Ok(0)
3659}
3660
3661#[cfg(feature = "sqlite-vec")]
3662fn delete_vec_rows_for_logical_id(
3663 tx: &rusqlite::Transaction<'_>,
3664 logical_id: &str,
3665) -> Result<usize, EngineError> {
3666 match tx.execute(
3667 "DELETE FROM vec_nodes_active \
3668 WHERE chunk_id IN (SELECT id FROM chunks WHERE node_logical_id = ?1)",
3669 [logical_id],
3670 ) {
3671 Ok(count) => Ok(count),
3672 Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
3673 if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
3674 {
3675 Ok(0)
3676 }
3677 Err(error) => Err(EngineError::Sqlite(error)),
3678 }
3679}
3680
3681#[cfg(not(feature = "sqlite-vec"))]
3682#[allow(clippy::unnecessary_wraps)]
3683fn delete_vec_rows_for_logical_id(
3684 _tx: &rusqlite::Transaction<'_>,
3685 _logical_id: &str,
3686) -> Result<usize, EngineError> {
3687 Ok(0)
3688}
3689
3690fn ensure_operational_collection_registered(
3691 conn: &rusqlite::Connection,
3692 collection_name: &str,
3693) -> Result<(), EngineError> {
3694 if load_operational_collection_record(conn, collection_name)?.is_none() {
3695 return Err(EngineError::InvalidWrite(format!(
3696 "operational collection '{collection_name}' is not registered"
3697 )));
3698 }
3699 Ok(())
3700}
3701
3702fn load_operational_collection_record(
3703 conn: &rusqlite::Connection,
3704 name: &str,
3705) -> Result<Option<OperationalCollectionRecord>, EngineError> {
3706 conn.query_row(
3707 "SELECT name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at, disabled_at \
3708 FROM operational_collections WHERE name = ?1",
3709 [name],
3710 map_operational_collection_row,
3711 )
3712 .optional()
3713 .map_err(EngineError::Sqlite)
3714}
3715
3716fn validate_append_only_operational_collection(
3717 record: &OperationalCollectionRecord,
3718 operation: &str,
3719) -> Result<(), EngineError> {
3720 if record.kind != OperationalCollectionKind::AppendOnlyLog {
3721 return Err(EngineError::InvalidWrite(format!(
3722 "operational collection '{}' must be append_only_log to {operation}",
3723 record.name
3724 )));
3725 }
3726 Ok(())
3727}
3728
3729#[derive(Clone, Debug, PartialEq, Eq)]
3730struct CompiledOperationalReadFilter {
3731 field: String,
3732 condition: OperationalReadCondition,
3733}
3734
3735#[derive(Clone, Debug)]
3736struct MatchedAppendOnlySecondaryIndexRead<'a> {
3737 index_name: &'a str,
3738 value_filter: &'a CompiledOperationalReadFilter,
3739 time_range: Option<&'a CompiledOperationalReadFilter>,
3740}
3741
3742#[derive(Clone, Debug, PartialEq, Eq)]
3743enum OperationalReadCondition {
3744 ExactString(String),
3745 ExactInteger(i64),
3746 Prefix(String),
3747 Range {
3748 lower: Option<i64>,
3749 upper: Option<i64>,
3750 },
3751}
3752
3753fn operational_read_limit(limit: Option<usize>) -> Result<usize, EngineError> {
3754 let applied_limit = limit.unwrap_or(DEFAULT_OPERATIONAL_READ_LIMIT);
3755 if applied_limit == 0 {
3756 return Err(EngineError::InvalidWrite(
3757 "operational read limit must be greater than zero".to_owned(),
3758 ));
3759 }
3760 Ok(applied_limit.min(MAX_OPERATIONAL_READ_LIMIT))
3761}
3762
3763fn parse_operational_filter_fields(
3764 filter_fields_json: &str,
3765) -> Result<Vec<OperationalFilterField>, String> {
3766 let fields: Vec<OperationalFilterField> = serde_json::from_str(filter_fields_json)
3767 .map_err(|error| format!("invalid filter_fields_json: {error}"))?;
3768 let mut seen = std::collections::HashSet::new();
3769 for field in &fields {
3770 if field.name.trim().is_empty() {
3771 return Err("filter_fields_json field names must not be empty".to_owned());
3772 }
3773 if !seen.insert(field.name.as_str()) {
3774 return Err(format!(
3775 "filter_fields_json contains duplicate field '{}'",
3776 field.name
3777 ));
3778 }
3779 if field.modes.is_empty() {
3780 return Err(format!(
3781 "filter_fields_json field '{}' must declare at least one mode",
3782 field.name
3783 ));
3784 }
3785 if field.modes.contains(&OperationalFilterMode::Prefix)
3786 && field.field_type != OperationalFilterFieldType::String
3787 {
3788 return Err(format!(
3789 "filter field '{}' only supports prefix for string types",
3790 field.name
3791 ));
3792 }
3793 }
3794 Ok(fields)
3795}
3796
3797fn compile_operational_read_filters(
3798 filters: &[OperationalFilterClause],
3799 declared_fields: &[OperationalFilterField],
3800) -> Result<Vec<CompiledOperationalReadFilter>, EngineError> {
3801 let field_map = declared_fields
3802 .iter()
3803 .map(|field| (field.name.as_str(), field))
3804 .collect::<std::collections::HashMap<_, _>>();
3805 filters
3806 .iter()
3807 .map(|filter| match filter {
3808 OperationalFilterClause::Exact { field, value } => {
3809 let declared = field_map.get(field.as_str()).ok_or_else(|| {
3810 EngineError::InvalidWrite(format!(
3811 "operational read filter uses undeclared field '{field}'"
3812 ))
3813 })?;
3814 if !declared.modes.contains(&OperationalFilterMode::Exact) {
3815 return Err(EngineError::InvalidWrite(format!(
3816 "operational read field '{field}' does not allow exact filters"
3817 )));
3818 }
3819 let condition = match (declared.field_type, value) {
3820 (OperationalFilterFieldType::String, OperationalFilterValue::String(value)) => {
3821 OperationalReadCondition::ExactString(value.clone())
3822 }
3823 (
3824 OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp,
3825 OperationalFilterValue::Integer(value),
3826 ) => OperationalReadCondition::ExactInteger(*value),
3827 _ => {
3828 return Err(EngineError::InvalidWrite(format!(
3829 "operational read field '{field}' received a value with the wrong type"
3830 )));
3831 }
3832 };
3833 Ok(CompiledOperationalReadFilter {
3834 field: field.clone(),
3835 condition,
3836 })
3837 }
3838 OperationalFilterClause::Prefix { field, value } => {
3839 let declared = field_map.get(field.as_str()).ok_or_else(|| {
3840 EngineError::InvalidWrite(format!(
3841 "operational read filter uses undeclared field '{field}'"
3842 ))
3843 })?;
3844 if !declared.modes.contains(&OperationalFilterMode::Prefix) {
3845 return Err(EngineError::InvalidWrite(format!(
3846 "operational read field '{field}' does not allow prefix filters"
3847 )));
3848 }
3849 if declared.field_type != OperationalFilterFieldType::String {
3850 return Err(EngineError::InvalidWrite(format!(
3851 "operational read field '{field}' only supports prefix filters for strings"
3852 )));
3853 }
3854 Ok(CompiledOperationalReadFilter {
3855 field: field.clone(),
3856 condition: OperationalReadCondition::Prefix(value.clone()),
3857 })
3858 }
3859 OperationalFilterClause::Range {
3860 field,
3861 lower,
3862 upper,
3863 } => {
3864 let declared = field_map.get(field.as_str()).ok_or_else(|| {
3865 EngineError::InvalidWrite(format!(
3866 "operational read filter uses undeclared field '{field}'"
3867 ))
3868 })?;
3869 if !declared.modes.contains(&OperationalFilterMode::Range) {
3870 return Err(EngineError::InvalidWrite(format!(
3871 "operational read field '{field}' does not allow range filters"
3872 )));
3873 }
3874 if !matches!(
3875 declared.field_type,
3876 OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp
3877 ) {
3878 return Err(EngineError::InvalidWrite(format!(
3879 "operational read field '{field}' only supports range filters for integer/timestamp fields"
3880 )));
3881 }
3882 if lower.is_none() && upper.is_none() {
3883 return Err(EngineError::InvalidWrite(format!(
3884 "operational read range filter for '{field}' must specify a lower or upper bound"
3885 )));
3886 }
3887 Ok(CompiledOperationalReadFilter {
3888 field: field.clone(),
3889 condition: OperationalReadCondition::Range {
3890 lower: *lower,
3891 upper: *upper,
3892 },
3893 })
3894 }
3895 })
3896 .collect()
3897}
3898
3899fn match_append_only_secondary_index_read<'a>(
3900 filters: &'a [CompiledOperationalReadFilter],
3901 indexes: &'a [OperationalSecondaryIndexDefinition],
3902) -> Option<MatchedAppendOnlySecondaryIndexRead<'a>> {
3903 indexes.iter().find_map(|index| {
3904 let OperationalSecondaryIndexDefinition::AppendOnlyFieldTime {
3905 name,
3906 field,
3907 value_type,
3908 time_field,
3909 } = index
3910 else {
3911 return None;
3912 };
3913 if !(1..=2).contains(&filters.len()) {
3914 return None;
3915 }
3916
3917 let mut value_filter = None;
3918 let mut time_range = None;
3919 for filter in filters {
3920 if filter.field == *field {
3921 let supported = matches!(
3922 (&filter.condition, value_type),
3923 (
3924 OperationalReadCondition::ExactString(_)
3925 | OperationalReadCondition::Prefix(_),
3926 crate::operational::OperationalSecondaryIndexValueType::String
3927 ) | (
3928 OperationalReadCondition::ExactInteger(_),
3929 crate::operational::OperationalSecondaryIndexValueType::Integer
3930 | crate::operational::OperationalSecondaryIndexValueType::Timestamp
3931 )
3932 );
3933 if !supported || value_filter.is_some() {
3934 return None;
3935 }
3936 value_filter = Some(filter);
3937 continue;
3938 }
3939 if filter.field == *time_field {
3940 if !matches!(filter.condition, OperationalReadCondition::Range { .. })
3941 || time_range.is_some()
3942 {
3943 return None;
3944 }
3945 time_range = Some(filter);
3946 continue;
3947 }
3948 return None;
3949 }
3950
3951 value_filter.map(|value_filter| MatchedAppendOnlySecondaryIndexRead {
3952 index_name: name.as_str(),
3953 value_filter,
3954 time_range,
3955 })
3956 })
3957}
3958
3959fn execute_operational_secondary_index_read(
3960 conn: &rusqlite::Connection,
3961 collection_name: &str,
3962 filters: &[CompiledOperationalReadFilter],
3963 indexes: &[OperationalSecondaryIndexDefinition],
3964 applied_limit: usize,
3965) -> Result<Option<OperationalReadReport>, EngineError> {
3966 use rusqlite::types::Value;
3967
3968 let Some(matched) = match_append_only_secondary_index_read(filters, indexes) else {
3969 return Ok(None);
3970 };
3971
3972 let mut sql = String::from(
3973 "SELECT m.id, m.collection_name, m.record_key, m.op_kind, m.payload_json, m.source_ref, m.created_at \
3974 FROM operational_secondary_index_entries s \
3975 JOIN operational_mutations m ON m.id = s.mutation_id \
3976 WHERE s.collection_name = ?1 AND s.index_name = ?2 AND s.subject_kind = 'mutation' ",
3977 );
3978 let mut params = vec![
3979 Value::from(collection_name.to_owned()),
3980 Value::from(matched.index_name.to_owned()),
3981 ];
3982
3983 match &matched.value_filter.condition {
3984 OperationalReadCondition::ExactString(value) => {
3985 let _ = write!(sql, "AND s.slot1_text = ?{} ", params.len() + 1);
3986 params.push(Value::from(value.clone()));
3987 }
3988 OperationalReadCondition::Prefix(value) => {
3989 let _ = write!(sql, "AND s.slot1_text GLOB ?{} ", params.len() + 1);
3990 params.push(Value::from(glob_prefix_pattern(value)));
3991 }
3992 OperationalReadCondition::ExactInteger(value) => {
3993 let _ = write!(sql, "AND s.slot1_integer = ?{} ", params.len() + 1);
3994 params.push(Value::from(*value));
3995 }
3996 OperationalReadCondition::Range { .. } => return Ok(None),
3997 }
3998
3999 if let Some(time_range) = matched.time_range
4000 && let OperationalReadCondition::Range { lower, upper } = &time_range.condition
4001 {
4002 if let Some(lower) = lower {
4003 let _ = write!(sql, "AND s.sort_timestamp >= ?{} ", params.len() + 1);
4004 params.push(Value::from(*lower));
4005 }
4006 if let Some(upper) = upper {
4007 let _ = write!(sql, "AND s.sort_timestamp <= ?{} ", params.len() + 1);
4008 params.push(Value::from(*upper));
4009 }
4010 }
4011
4012 let _ = write!(
4013 sql,
4014 "ORDER BY s.sort_timestamp DESC, m.mutation_order DESC LIMIT ?{}",
4015 params.len() + 1
4016 );
4017 params.push(Value::from(i64::try_from(applied_limit + 1).map_err(
4018 |_| EngineError::Bridge("operational read limit overflow".to_owned()),
4019 )?));
4020
4021 let mut stmt = conn.prepare(&sql)?;
4022 let mut rows = stmt
4023 .query_map(
4024 rusqlite::params_from_iter(params),
4025 map_operational_mutation_row,
4026 )?
4027 .collect::<Result<Vec<_>, _>>()?;
4028 let was_limited = rows.len() > applied_limit;
4029 if was_limited {
4030 rows.truncate(applied_limit);
4031 }
4032
4033 Ok(Some(OperationalReadReport {
4034 collection_name: collection_name.to_owned(),
4035 row_count: rows.len(),
4036 applied_limit,
4037 was_limited,
4038 rows,
4039 }))
4040}
4041
4042fn execute_operational_filtered_read(
4043 conn: &rusqlite::Connection,
4044 collection_name: &str,
4045 filters: &[CompiledOperationalReadFilter],
4046 applied_limit: usize,
4047) -> Result<OperationalReadReport, EngineError> {
4048 use rusqlite::types::Value;
4049
4050 let mut sql = String::from(
4051 "SELECT m.id, m.collection_name, m.record_key, m.op_kind, m.payload_json, m.source_ref, m.created_at \
4052 FROM operational_mutations m ",
4053 );
4054 let mut params = vec![Value::from(collection_name.to_owned())];
4055 for (index, filter) in filters.iter().enumerate() {
4056 let _ = write!(
4057 sql,
4058 "JOIN operational_filter_values f{index} \
4059 ON f{index}.mutation_id = m.id \
4060 AND f{index}.collection_name = m.collection_name "
4061 );
4062 match &filter.condition {
4063 OperationalReadCondition::ExactString(value) => {
4064 let _ = write!(
4065 sql,
4066 "AND f{index}.field_name = ?{} AND f{index}.string_value = ?{} ",
4067 params.len() + 1,
4068 params.len() + 2
4069 );
4070 params.push(Value::from(filter.field.clone()));
4071 params.push(Value::from(value.clone()));
4072 }
4073 OperationalReadCondition::ExactInteger(value) => {
4074 let _ = write!(
4075 sql,
4076 "AND f{index}.field_name = ?{} AND f{index}.integer_value = ?{} ",
4077 params.len() + 1,
4078 params.len() + 2
4079 );
4080 params.push(Value::from(filter.field.clone()));
4081 params.push(Value::from(*value));
4082 }
4083 OperationalReadCondition::Prefix(value) => {
4084 let _ = write!(
4085 sql,
4086 "AND f{index}.field_name = ?{} AND f{index}.string_value GLOB ?{} ",
4087 params.len() + 1,
4088 params.len() + 2
4089 );
4090 params.push(Value::from(filter.field.clone()));
4091 params.push(Value::from(glob_prefix_pattern(value)));
4092 }
4093 OperationalReadCondition::Range { lower, upper } => {
4094 let _ = write!(sql, "AND f{index}.field_name = ?{} ", params.len() + 1);
4095 params.push(Value::from(filter.field.clone()));
4096 if let Some(lower) = lower {
4097 let _ = write!(sql, "AND f{index}.integer_value >= ?{} ", params.len() + 1);
4098 params.push(Value::from(*lower));
4099 }
4100 if let Some(upper) = upper {
4101 let _ = write!(sql, "AND f{index}.integer_value <= ?{} ", params.len() + 1);
4102 params.push(Value::from(*upper));
4103 }
4104 }
4105 }
4106 }
4107 let _ = write!(
4108 sql,
4109 "WHERE m.collection_name = ?1 ORDER BY m.mutation_order DESC LIMIT ?{}",
4110 params.len() + 1
4111 );
4112 params.push(Value::from(i64::try_from(applied_limit + 1).map_err(
4113 |_| EngineError::Bridge("operational read limit overflow".to_owned()),
4114 )?));
4115
4116 let mut stmt = conn.prepare(&sql)?;
4117 let mut rows = stmt
4118 .query_map(
4119 rusqlite::params_from_iter(params),
4120 map_operational_mutation_row,
4121 )?
4122 .collect::<Result<Vec<_>, _>>()?;
4123 let was_limited = rows.len() > applied_limit;
4124 if was_limited {
4125 rows.truncate(applied_limit);
4126 }
4127 Ok(OperationalReadReport {
4128 collection_name: collection_name.to_owned(),
4129 row_count: rows.len(),
4130 applied_limit,
4131 was_limited,
4132 rows,
4133 })
4134}
4135
4136fn glob_prefix_pattern(value: &str) -> String {
4137 let mut pattern = String::with_capacity(value.len() + 1);
4138 for ch in value.chars() {
4139 match ch {
4140 '*' => pattern.push_str("[*]"),
4141 '?' => pattern.push_str("[?]"),
4142 '[' => pattern.push_str("[[]"),
4143 _ => pattern.push(ch),
4144 }
4145 }
4146 pattern.push('*');
4147 pattern
4148}
4149
4150#[derive(Clone, Debug, PartialEq, Eq)]
4151struct ExtractedOperationalFilterValue {
4152 field_name: String,
4153 string_value: Option<String>,
4154 integer_value: Option<i64>,
4155}
4156
4157fn extract_operational_filter_values(
4158 filter_fields: &[OperationalFilterField],
4159 payload_json: &str,
4160) -> Vec<ExtractedOperationalFilterValue> {
4161 let Ok(parsed) = serde_json::from_str::<serde_json::Value>(payload_json) else {
4162 return Vec::new();
4163 };
4164 let Some(object) = parsed.as_object() else {
4165 return Vec::new();
4166 };
4167
4168 filter_fields
4169 .iter()
4170 .filter_map(|field| {
4171 let value = object.get(&field.name)?;
4172 match field.field_type {
4173 OperationalFilterFieldType::String => {
4174 value
4175 .as_str()
4176 .map(|string_value| ExtractedOperationalFilterValue {
4177 field_name: field.name.clone(),
4178 string_value: Some(string_value.to_owned()),
4179 integer_value: None,
4180 })
4181 }
4182 OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp => {
4183 value
4184 .as_i64()
4185 .map(|integer_value| ExtractedOperationalFilterValue {
4186 field_name: field.name.clone(),
4187 string_value: None,
4188 integer_value: Some(integer_value),
4189 })
4190 }
4191 }
4192 })
4193 .collect()
4194}
4195
4196fn operational_compaction_candidates(
4197 conn: &rusqlite::Connection,
4198 retention_json: &str,
4199 collection_name: &str,
4200) -> Result<(Vec<String>, Option<i64>), EngineError> {
4201 operational_compaction_candidates_at(
4202 conn,
4203 retention_json,
4204 collection_name,
4205 current_unix_timestamp()?,
4206 )
4207}
4208
4209fn operational_compaction_candidates_at(
4210 conn: &rusqlite::Connection,
4211 retention_json: &str,
4212 collection_name: &str,
4213 now_timestamp: i64,
4214) -> Result<(Vec<String>, Option<i64>), EngineError> {
4215 let policy = parse_operational_retention_policy(retention_json)?;
4216 match policy {
4217 OperationalRetentionPolicy::KeepAll => Ok((Vec::new(), None)),
4218 OperationalRetentionPolicy::PurgeBeforeSeconds { max_age_seconds } => {
4219 let before_timestamp = now_timestamp - max_age_seconds;
4220 let mut stmt = conn.prepare(
4221 "SELECT id FROM operational_mutations \
4222 WHERE collection_name = ?1 AND created_at < ?2 \
4223 ORDER BY mutation_order",
4224 )?;
4225 let mutation_ids = stmt
4226 .query_map(
4227 rusqlite::params![collection_name, before_timestamp],
4228 |row| row.get::<_, String>(0),
4229 )?
4230 .collect::<Result<Vec<_>, _>>()?;
4231 Ok((mutation_ids, Some(before_timestamp)))
4232 }
4233 OperationalRetentionPolicy::KeepLast { max_rows } => {
4234 let mut stmt = conn.prepare(
4235 "SELECT id FROM operational_mutations \
4236 WHERE collection_name = ?1 \
4237 ORDER BY mutation_order DESC",
4238 )?;
4239 let ordered_ids = stmt
4240 .query_map([collection_name], |row| row.get::<_, String>(0))?
4241 .collect::<Result<Vec<_>, _>>()?;
4242 Ok((ordered_ids.into_iter().skip(max_rows).collect(), None))
4243 }
4244 }
4245}
4246
4247fn parse_operational_retention_policy(
4248 retention_json: &str,
4249) -> Result<OperationalRetentionPolicy, EngineError> {
4250 let policy: OperationalRetentionPolicy = serde_json::from_str(retention_json)
4251 .map_err(|error| EngineError::InvalidWrite(format!("invalid retention_json: {error}")))?;
4252 match policy {
4253 OperationalRetentionPolicy::KeepAll => Ok(policy),
4254 OperationalRetentionPolicy::PurgeBeforeSeconds { max_age_seconds } => {
4255 if max_age_seconds <= 0 {
4256 return Err(EngineError::InvalidWrite(
4257 "retention_json max_age_seconds must be greater than zero".to_owned(),
4258 ));
4259 }
4260 Ok(policy)
4261 }
4262 OperationalRetentionPolicy::KeepLast { max_rows } => {
4263 if max_rows == 0 {
4264 return Err(EngineError::InvalidWrite(
4265 "retention_json max_rows must be greater than zero".to_owned(),
4266 ));
4267 }
4268 Ok(policy)
4269 }
4270 }
4271}
4272
4273fn load_operational_retention_records(
4274 conn: &rusqlite::Connection,
4275 collection_names: Option<&[String]>,
4276 max_collections: Option<usize>,
4277) -> Result<Vec<OperationalCollectionRecord>, EngineError> {
4278 let limit = max_collections.unwrap_or(usize::MAX);
4279 if limit == 0 {
4280 return Err(EngineError::InvalidWrite(
4281 "max_collections must be greater than zero".to_owned(),
4282 ));
4283 }
4284
4285 let mut records = Vec::new();
4286 if let Some(collection_names) = collection_names {
4287 for name in collection_names.iter().take(limit) {
4288 let record = load_operational_collection_record(conn, name)?.ok_or_else(|| {
4289 EngineError::InvalidWrite(format!(
4290 "operational collection '{name}' is not registered"
4291 ))
4292 })?;
4293 records.push(record);
4294 }
4295 return Ok(records);
4296 }
4297
4298 let mut stmt = conn.prepare(
4299 "SELECT name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at, disabled_at \
4300 FROM operational_collections ORDER BY name",
4301 )?;
4302 let rows = stmt
4303 .query_map([], map_operational_collection_row)?
4304 .take(limit)
4305 .collect::<Result<Vec<_>, _>>()?;
4306 Ok(rows)
4307}
4308
4309fn last_operational_retention_run_at(
4310 conn: &rusqlite::Connection,
4311 collection_name: &str,
4312) -> Result<Option<i64>, EngineError> {
4313 conn.query_row(
4314 "SELECT MAX(executed_at) FROM operational_retention_runs WHERE collection_name = ?1",
4315 [collection_name],
4316 |row| row.get(0),
4317 )
4318 .optional()
4319 .map_err(EngineError::Sqlite)
4320 .map(Option::flatten)
4321}
4322
4323fn count_operational_mutations_for_collection(
4324 conn: &rusqlite::Connection,
4325 collection_name: &str,
4326) -> Result<usize, EngineError> {
4327 let count: i64 = conn.query_row(
4328 "SELECT count(*) FROM operational_mutations WHERE collection_name = ?1",
4329 [collection_name],
4330 |row| row.get(0),
4331 )?;
4332 usize::try_from(count).map_err(|_| {
4333 EngineError::Bridge(format!("count overflow for collection {collection_name}"))
4334 })
4335}
4336
4337fn retention_action_kind_and_limit(
4338 policy: &OperationalRetentionPolicy,
4339) -> (OperationalRetentionActionKind, Option<usize>) {
4340 match policy {
4341 OperationalRetentionPolicy::KeepAll => (OperationalRetentionActionKind::Noop, None),
4342 OperationalRetentionPolicy::PurgeBeforeSeconds { .. } => {
4343 (OperationalRetentionActionKind::PurgeBeforeSeconds, None)
4344 }
4345 OperationalRetentionPolicy::KeepLast { max_rows } => {
4346 (OperationalRetentionActionKind::KeepLast, Some(*max_rows))
4347 }
4348 }
4349}
4350
4351fn plan_operational_retention_item(
4352 conn: &rusqlite::Connection,
4353 record: &OperationalCollectionRecord,
4354 now_timestamp: i64,
4355) -> Result<OperationalRetentionPlanItem, EngineError> {
4356 let last_run_at = last_operational_retention_run_at(conn, &record.name)?;
4357 if record.kind != OperationalCollectionKind::AppendOnlyLog {
4358 return Ok(OperationalRetentionPlanItem {
4359 collection_name: record.name.clone(),
4360 action_kind: OperationalRetentionActionKind::Noop,
4361 candidate_deletions: 0,
4362 before_timestamp: None,
4363 max_rows: None,
4364 last_run_at,
4365 });
4366 }
4367 let policy = parse_operational_retention_policy(&record.retention_json)?;
4368 let (action_kind, max_rows) = retention_action_kind_and_limit(&policy);
4369 let (candidate_ids, before_timestamp) = operational_compaction_candidates_at(
4370 conn,
4371 &record.retention_json,
4372 &record.name,
4373 now_timestamp,
4374 )?;
4375 Ok(OperationalRetentionPlanItem {
4376 collection_name: record.name.clone(),
4377 action_kind,
4378 candidate_deletions: candidate_ids.len(),
4379 before_timestamp,
4380 max_rows,
4381 last_run_at,
4382 })
4383}
4384
4385fn run_operational_retention_item(
4386 tx: &rusqlite::Transaction<'_>,
4387 record: &OperationalCollectionRecord,
4388 now_timestamp: i64,
4389 dry_run: bool,
4390) -> Result<OperationalRetentionRunItem, EngineError> {
4391 let plan = plan_operational_retention_item(tx, record, now_timestamp)?;
4392 let mut deleted_mutations = 0usize;
4393 if record.kind == OperationalCollectionKind::AppendOnlyLog
4394 && plan.action_kind != OperationalRetentionActionKind::Noop
4395 && plan.candidate_deletions > 0
4396 && !dry_run
4397 {
4398 let (candidate_ids, _) = operational_compaction_candidates_at(
4399 tx,
4400 &record.retention_json,
4401 &record.name,
4402 now_timestamp,
4403 )?;
4404 let mut delete_stmt =
4405 tx.prepare_cached("DELETE FROM operational_mutations WHERE id = ?1")?;
4406 for mutation_id in &candidate_ids {
4407 delete_stmt.execute([mutation_id.as_str()])?;
4408 deleted_mutations += 1;
4409 }
4410 drop(delete_stmt);
4411
4412 persist_simple_provenance_event(
4413 tx,
4414 "operational_retention_run",
4415 &record.name,
4416 Some(serde_json::json!({
4417 "action_kind": plan.action_kind,
4418 "deleted_mutations": deleted_mutations,
4419 "before_timestamp": plan.before_timestamp,
4420 "max_rows": plan.max_rows,
4421 "executed_at": now_timestamp,
4422 })),
4423 )?;
4424 }
4425
4426 let live_rows_remaining = count_operational_mutations_for_collection(tx, &record.name)?;
4427 let effective_deleted_mutations = if dry_run {
4428 plan.candidate_deletions
4429 } else {
4430 deleted_mutations
4431 };
4432 let rows_remaining = if dry_run {
4433 live_rows_remaining.saturating_sub(effective_deleted_mutations)
4434 } else {
4435 live_rows_remaining
4436 };
4437 if !dry_run && plan.action_kind != OperationalRetentionActionKind::Noop {
4438 tx.execute(
4439 "INSERT INTO operational_retention_runs \
4440 (id, collection_name, executed_at, action_kind, dry_run, deleted_mutations, rows_remaining, metadata_json) \
4441 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
4442 rusqlite::params![
4443 new_id(),
4444 record.name,
4445 now_timestamp,
4446 serde_json::to_string(&plan.action_kind)
4447 .unwrap_or_else(|_| "\"noop\"".to_owned())
4448 .trim_matches('"')
4449 .to_owned(),
4450 i32::from(dry_run),
4451 deleted_mutations,
4452 rows_remaining,
4453 serde_json::json!({
4454 "before_timestamp": plan.before_timestamp,
4455 "max_rows": plan.max_rows,
4456 })
4457 .to_string(),
4458 ],
4459 )?;
4460 }
4461
4462 Ok(OperationalRetentionRunItem {
4463 collection_name: plan.collection_name,
4464 action_kind: plan.action_kind,
4465 deleted_mutations: effective_deleted_mutations,
4466 before_timestamp: plan.before_timestamp,
4467 max_rows: plan.max_rows,
4468 rows_remaining,
4469 })
4470}
4471
4472fn current_unix_timestamp() -> Result<i64, EngineError> {
4473 let now = SystemTime::now()
4474 .duration_since(SystemTime::UNIX_EPOCH)
4475 .map_err(|error| EngineError::Bridge(format!("system clock error: {error}")))?;
4476 i64::try_from(now.as_secs())
4477 .map_err(|_| EngineError::Bridge("unix timestamp overflow".to_owned()))
4478}
4479
4480fn map_operational_collection_row(
4481 row: &rusqlite::Row<'_>,
4482) -> Result<OperationalCollectionRecord, rusqlite::Error> {
4483 let kind_text: String = row.get(1)?;
4484 let kind = OperationalCollectionKind::try_from(kind_text.as_str()).map_err(|message| {
4485 rusqlite::Error::FromSqlConversionFailure(
4486 1,
4487 rusqlite::types::Type::Text,
4488 Box::new(io::Error::new(io::ErrorKind::InvalidData, message)),
4489 )
4490 })?;
4491 Ok(OperationalCollectionRecord {
4492 name: row.get(0)?,
4493 kind,
4494 schema_json: row.get(2)?,
4495 retention_json: row.get(3)?,
4496 filter_fields_json: row.get(4)?,
4497 validation_json: row.get(5)?,
4498 secondary_indexes_json: row.get(6)?,
4499 format_version: row.get(7)?,
4500 created_at: row.get(8)?,
4501 disabled_at: row.get(9)?,
4502 })
4503}
4504
4505fn map_operational_mutation_row(
4506 row: &rusqlite::Row<'_>,
4507) -> Result<OperationalMutationRow, rusqlite::Error> {
4508 Ok(OperationalMutationRow {
4509 id: row.get(0)?,
4510 collection_name: row.get(1)?,
4511 record_key: row.get(2)?,
4512 op_kind: row.get(3)?,
4513 payload_json: row.get(4)?,
4514 source_ref: row.get(5)?,
4515 created_at: row.get(6)?,
4516 })
4517}
4518
4519fn map_operational_current_row(
4520 row: &rusqlite::Row<'_>,
4521) -> Result<OperationalCurrentRow, rusqlite::Error> {
4522 Ok(OperationalCurrentRow {
4523 collection_name: row.get(0)?,
4524 record_key: row.get(1)?,
4525 payload_json: row.get(2)?,
4526 updated_at: row.get(3)?,
4527 last_mutation_id: row.get(4)?,
4528 })
4529}
4530
4531#[cfg(test)]
4532#[allow(clippy::expect_used)]
4533mod tests {
4534 use std::fs;
4535 use std::sync::Arc;
4536
4537 use fathomdb_schema::SchemaManager;
4538 use tempfile::NamedTempFile;
4539
4540 use super::{
4541 AdminService, FtsPropertyPathMode, FtsPropertyPathSpec, SafeExportOptions,
4542 VectorRegenerationConfig,
4543 };
4544 use crate::embedder::{EmbedderError, QueryEmbedder, QueryEmbedderIdentity};
4545 use crate::projection::ProjectionTarget;
4546 use crate::sqlite;
4547 use crate::{
4548 EngineError, ExecutionCoordinator, OperationalCollectionKind, OperationalRegisterRequest,
4549 TelemetryCounters,
4550 };
4551
4552 use fathomdb_query::QueryBuilder;
4553
4554 #[cfg(feature = "sqlite-vec")]
4555 use super::load_vector_regeneration_config;
4556
4557 #[derive(Debug)]
4561 #[allow(dead_code)]
4562 struct TestEmbedder {
4563 identity: QueryEmbedderIdentity,
4564 vector: Vec<f32>,
4565 }
4566
4567 #[allow(dead_code)]
4568 impl TestEmbedder {
4569 fn new(model: &str, dimension: usize) -> Self {
4570 Self {
4571 identity: QueryEmbedderIdentity {
4572 model_identity: model.to_owned(),
4573 model_version: "1.0.0".to_owned(),
4574 dimension,
4575 normalization_policy: "l2".to_owned(),
4576 },
4577 vector: vec![1.0; dimension],
4578 }
4579 }
4580 }
4581
4582 impl QueryEmbedder for TestEmbedder {
4583 fn embed_query(&self, _text: &str) -> Result<Vec<f32>, EmbedderError> {
4584 Ok(self.vector.clone())
4585 }
4586 fn identity(&self) -> QueryEmbedderIdentity {
4587 self.identity.clone()
4588 }
4589 }
4590
4591 #[derive(Debug)]
4594 #[allow(dead_code)]
4595 struct FailingEmbedder {
4596 identity: QueryEmbedderIdentity,
4597 }
4598
4599 impl QueryEmbedder for FailingEmbedder {
4600 fn embed_query(&self, _text: &str) -> Result<Vec<f32>, EmbedderError> {
4601 Err(EmbedderError::Failed("test failure".to_owned()))
4602 }
4603 fn identity(&self) -> QueryEmbedderIdentity {
4604 self.identity.clone()
4605 }
4606 }
4607
4608 #[allow(dead_code)]
4609 #[cfg(unix)]
4610 fn set_file_mode(path: &std::path::Path, mode: u32) {
4611 use std::os::unix::fs::PermissionsExt;
4612
4613 let mut permissions = fs::metadata(path).expect("script metadata").permissions();
4614 permissions.set_mode(mode);
4615 fs::set_permissions(path, permissions).expect("chmod");
4616 }
4617
4618 #[allow(dead_code)]
4619 #[cfg(not(unix))]
4620 fn set_file_mode(_path: &std::path::Path, _mode: u32) {}
4621
4622 fn setup() -> (NamedTempFile, AdminService) {
4623 let db = NamedTempFile::new().expect("temp file");
4624 let schema = Arc::new(SchemaManager::new());
4625 {
4626 let conn = sqlite::open_connection(db.path()).expect("connection");
4627 schema.bootstrap(&conn).expect("bootstrap");
4628 }
4629 let service = AdminService::new(db.path(), Arc::clone(&schema));
4630 (db, service)
4631 }
4632
4633 #[test]
4634 fn check_integrity_includes_active_uniqueness_count() {
4635 let (_db, service) = setup();
4636 let report = service.check_integrity().expect("integrity check");
4637 assert_eq!(report.duplicate_active_logical_ids, 0);
4638 assert_eq!(report.operational_missing_collections, 0);
4639 assert_eq!(report.operational_missing_last_mutations, 0);
4640 }
4641
4642 #[test]
4643 fn trace_source_returns_node_logical_ids() {
4644 let (db, service) = setup();
4645 {
4646 let conn = sqlite::open_connection(db.path()).expect("conn");
4647 conn.execute(
4648 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4649 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 'source-1')",
4650 [],
4651 )
4652 .expect("insert node");
4653 }
4654 let report = service.trace_source("source-1").expect("trace");
4655 assert_eq!(report.node_rows, 1);
4656 assert_eq!(report.node_logical_ids, vec!["lg1"]);
4657 }
4658
4659 #[test]
4660 fn trace_source_includes_operational_mutations() {
4661 let (db, service) = setup();
4662 {
4663 let conn = sqlite::open_connection(db.path()).expect("conn");
4664 conn.execute(
4665 "INSERT INTO operational_collections \
4666 (name, kind, schema_json, retention_json, format_version, created_at) \
4667 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
4668 [],
4669 )
4670 .expect("insert collection");
4671 conn.execute(
4672 "INSERT INTO operational_mutations \
4673 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
4674 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"ok\"}', 'source-1', 100, 1)",
4675 [],
4676 )
4677 .expect("insert mutation");
4678 }
4679
4680 let report = service.trace_source("source-1").expect("trace");
4681 assert_eq!(report.operational_mutation_rows, 1);
4682 assert_eq!(report.operational_mutation_ids, vec!["m1"]);
4683 }
4684
4685 #[test]
4686 fn excise_source_restores_prior_active_node() {
4687 let (db, service) = setup();
4688 {
4689 let conn = sqlite::open_connection(db.path()).expect("conn");
4690 conn.execute(
4691 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
4692 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
4693 [],
4694 )
4695 .expect("insert v1 superseded");
4696 conn.execute(
4697 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4698 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
4699 [],
4700 )
4701 .expect("insert v2 active");
4702 }
4703 service.excise_source("source-2").expect("excise");
4704 {
4705 let conn = sqlite::open_connection(db.path()).expect("conn");
4706 let active_row_id: String = conn
4707 .query_row(
4708 "SELECT row_id FROM nodes WHERE logical_id = 'lg1' AND superseded_at IS NULL",
4709 [],
4710 |row| row.get(0),
4711 )
4712 .expect("active row exists after excise");
4713 assert_eq!(active_row_id, "r1");
4714 }
4715 }
4716
4717 #[test]
4718 fn excise_source_deletes_operational_mutations_and_repairs_latest_state_current() {
4719 let (db, service) = setup();
4720 {
4721 let conn = sqlite::open_connection(db.path()).expect("conn");
4722 conn.execute(
4723 "INSERT INTO operational_collections \
4724 (name, kind, schema_json, retention_json, format_version, created_at) \
4725 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
4726 [],
4727 )
4728 .expect("insert collection");
4729 conn.execute(
4730 "INSERT INTO operational_mutations \
4731 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
4732 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"old\"}', 'source-1', 100, 1)",
4733 [],
4734 )
4735 .expect("insert prior mutation");
4736 conn.execute(
4737 "INSERT INTO operational_mutations \
4738 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
4739 VALUES ('m2', 'connector_health', 'gmail', 'put', '{\"status\":\"new\"}', 'source-2', 200, 2)",
4740 [],
4741 )
4742 .expect("insert excised mutation");
4743 conn.execute(
4744 "INSERT INTO operational_current \
4745 (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
4746 VALUES ('connector_health', 'gmail', '{\"status\":\"new\"}', 200, 'm2')",
4747 [],
4748 )
4749 .expect("insert current row");
4750 }
4751
4752 let traced = service
4753 .trace_source("source-2")
4754 .expect("trace before excise");
4755 assert_eq!(traced.operational_mutation_rows, 1);
4756 assert_eq!(traced.operational_mutation_ids, vec!["m2"]);
4757
4758 let excised = service.excise_source("source-2").expect("excise");
4759 assert_eq!(excised.operational_mutation_rows, 0);
4760 assert!(excised.operational_mutation_ids.is_empty());
4761
4762 {
4763 let conn = sqlite::open_connection(db.path()).expect("conn");
4764 let remaining: i64 = conn
4765 .query_row(
4766 "SELECT count(*) FROM operational_mutations WHERE source_ref = 'source-2'",
4767 [],
4768 |row| row.get(0),
4769 )
4770 .expect("remaining count");
4771 assert_eq!(remaining, 0);
4772
4773 let current: (String, String) = conn
4774 .query_row(
4775 "SELECT payload_json, last_mutation_id FROM operational_current \
4776 WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
4777 [],
4778 |row| Ok((row.get(0)?, row.get(1)?)),
4779 )
4780 .expect("rebuilt current row");
4781 assert_eq!(current.0, "{\"status\":\"old\"}");
4782 assert_eq!(current.1, "m1");
4783 }
4784 }
4785
4786 #[test]
4787 fn restore_logical_id_reestablishes_last_pre_retire_content_and_attached_edges() {
4788 let (db, service) = setup();
4789 {
4790 let conn = sqlite::open_connection(db.path()).expect("conn");
4791 conn.execute(
4792 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4793 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
4794 [],
4795 )
4796 .expect("insert node");
4797 conn.execute(
4798 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4799 VALUES ('node-row-topic', 'topic-1', 'Topic', '{}', 100, 'seed')",
4800 [],
4801 )
4802 .expect("insert target node");
4803 conn.execute(
4804 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
4805 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
4806 [],
4807 )
4808 .expect("insert chunk");
4809 conn.execute(
4810 "INSERT INTO edges \
4811 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
4812 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 'seed')",
4813 [],
4814 )
4815 .expect("insert edge");
4816 conn.execute(
4817 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
4818 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
4819 [],
4820 )
4821 .expect("insert node retire event");
4822 conn.execute(
4823 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
4824 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
4825 [],
4826 )
4827 .expect("insert edge retire event");
4828 conn.execute(
4829 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
4830 [],
4831 )
4832 .expect("retire node");
4833 conn.execute(
4834 "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
4835 [],
4836 )
4837 .expect("retire edge");
4838 conn.execute("DELETE FROM fts_nodes", [])
4839 .expect("clear fts");
4840 }
4841
4842 let report = service.restore_logical_id("doc-1").expect("restore");
4843 assert_eq!(report.logical_id, "doc-1");
4844 assert!(!report.was_noop);
4845 assert_eq!(report.restored_node_rows, 1);
4846 assert_eq!(report.restored_edge_rows, 1);
4847 assert_eq!(report.restored_chunk_rows, 1);
4848 assert_eq!(report.restored_fts_rows, 1);
4849
4850 let conn = sqlite::open_connection(db.path()).expect("conn");
4851 let active_node_count: i64 = conn
4852 .query_row(
4853 "SELECT count(*) FROM nodes WHERE logical_id = 'doc-1' AND superseded_at IS NULL",
4854 [],
4855 |row| row.get(0),
4856 )
4857 .expect("active node count");
4858 assert_eq!(active_node_count, 1);
4859 let active_edge_count: i64 = conn
4860 .query_row(
4861 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
4862 [],
4863 |row| row.get(0),
4864 )
4865 .expect("active edge count");
4866 assert_eq!(active_edge_count, 1);
4867 let fts_count: i64 = conn
4868 .query_row(
4869 "SELECT count(*) FROM fts_nodes WHERE chunk_id = 'chunk-1'",
4870 [],
4871 |row| row.get(0),
4872 )
4873 .expect("fts count");
4874 assert_eq!(fts_count, 1);
4875 }
4876
4877 #[test]
4878 fn restore_logical_id_restores_edges_retired_after_the_node_retire_event() {
4879 let (db, service) = setup();
4880 {
4881 let conn = sqlite::open_connection(db.path()).expect("conn");
4882 conn.execute(
4883 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4884 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
4885 [],
4886 )
4887 .expect("insert node");
4888 conn.execute(
4889 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4890 VALUES ('node-row-topic', 'topic-1', 'Topic', '{}', 100, 'seed')",
4891 [],
4892 )
4893 .expect("insert target node");
4894 conn.execute(
4895 "INSERT INTO edges \
4896 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
4897 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 'seed')",
4898 [],
4899 )
4900 .expect("insert edge");
4901 conn.execute(
4902 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
4903 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
4904 [],
4905 )
4906 .expect("insert node retire event");
4907 conn.execute(
4908 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
4909 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 201, '')",
4910 [],
4911 )
4912 .expect("insert edge retire event");
4913 conn.execute(
4914 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
4915 [],
4916 )
4917 .expect("retire node");
4918 conn.execute(
4919 "UPDATE edges SET superseded_at = 201 WHERE logical_id = 'edge-1'",
4920 [],
4921 )
4922 .expect("retire edge");
4923 }
4924
4925 let report = service.restore_logical_id("doc-1").expect("restore");
4926 assert_eq!(report.restored_edge_rows, 1);
4927
4928 let conn = sqlite::open_connection(db.path()).expect("conn");
4929 let active_edge_count: i64 = conn
4930 .query_row(
4931 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
4932 [],
4933 |row| row.get(0),
4934 )
4935 .expect("active edge count");
4936 assert_eq!(active_edge_count, 1);
4937 }
4938
4939 #[test]
4940 fn restore_logical_id_prefers_latest_retired_revision_when_timestamps_tie() {
4941 let (db, service) = setup();
4942 {
4943 let conn = sqlite::open_connection(db.path()).expect("conn");
4944 conn.execute(
4945 "INSERT INTO nodes \
4946 (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
4947 VALUES ('node-row-older', 'doc-1', 'Document', '{\"title\":\"older\"}', 100, 200, 'forget-1')",
4948 [],
4949 )
4950 .expect("insert older retired node");
4951 conn.execute(
4952 "INSERT INTO nodes \
4953 (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
4954 VALUES ('node-row-newer', 'doc-1', 'Document', '{\"title\":\"newer\"}', 100, 200, 'forget-1')",
4955 [],
4956 )
4957 .expect("insert newer retired node");
4958 conn.execute(
4959 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
4960 VALUES ('evt-retire-older', 'node_retire', 'doc-1', 'forget-1', 200, '')",
4961 [],
4962 )
4963 .expect("insert older retire event");
4964 conn.execute(
4965 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
4966 VALUES ('evt-retire-newer', 'node_retire', 'doc-1', 'forget-1', 200, '')",
4967 [],
4968 )
4969 .expect("insert newer retire event");
4970 }
4971
4972 let report = service.restore_logical_id("doc-1").expect("restore");
4973
4974 assert!(!report.was_noop);
4975 let conn = sqlite::open_connection(db.path()).expect("conn");
4976 let active_row: (String, String) = conn
4977 .query_row(
4978 "SELECT row_id, properties FROM nodes \
4979 WHERE logical_id = 'doc-1' AND superseded_at IS NULL",
4980 [],
4981 |row| Ok((row.get(0)?, row.get(1)?)),
4982 )
4983 .expect("restored active row");
4984 assert_eq!(active_row.0, "node-row-newer");
4985 assert_eq!(active_row.1, "{\"title\":\"newer\"}");
4986 }
4987
4988 #[test]
4989 fn purge_logical_id_removes_retired_content_and_records_tombstone() {
4990 let (db, service) = setup();
4991 {
4992 let conn = sqlite::open_connection(db.path()).expect("conn");
4993 conn.execute(
4994 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
4995 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
4996 [],
4997 )
4998 .expect("insert retired node");
4999 conn.execute(
5000 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5001 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5002 [],
5003 )
5004 .expect("insert chunk");
5005 conn.execute(
5006 "INSERT INTO edges \
5007 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, superseded_at, source_ref) \
5008 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 200, 'seed')",
5009 [],
5010 )
5011 .expect("insert retired edge");
5012 conn.execute(
5013 "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
5014 VALUES ('chunk-1', 'doc-1', 'Document', 'budget narrative')",
5015 [],
5016 )
5017 .expect("insert fts");
5018 }
5019
5020 let report = service.purge_logical_id("doc-1").expect("purge");
5021 assert_eq!(report.logical_id, "doc-1");
5022 assert!(!report.was_noop);
5023 assert_eq!(report.deleted_node_rows, 1);
5024 assert_eq!(report.deleted_edge_rows, 1);
5025 assert_eq!(report.deleted_chunk_rows, 1);
5026 assert_eq!(report.deleted_fts_rows, 1);
5027
5028 let conn = sqlite::open_connection(db.path()).expect("conn");
5029 let remaining_nodes: i64 = conn
5030 .query_row(
5031 "SELECT count(*) FROM nodes WHERE logical_id = 'doc-1'",
5032 [],
5033 |row| row.get(0),
5034 )
5035 .expect("remaining nodes");
5036 assert_eq!(remaining_nodes, 0);
5037 let remaining_edges: i64 = conn
5038 .query_row(
5039 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1'",
5040 [],
5041 |row| row.get(0),
5042 )
5043 .expect("remaining edges");
5044 assert_eq!(remaining_edges, 0);
5045 let remaining_chunks: i64 = conn
5046 .query_row(
5047 "SELECT count(*) FROM chunks WHERE id = 'chunk-1'",
5048 [],
5049 |row| row.get(0),
5050 )
5051 .expect("remaining chunks");
5052 assert_eq!(remaining_chunks, 0);
5053 let purge_events: i64 = conn
5054 .query_row(
5055 "SELECT count(*) FROM provenance_events WHERE event_type = 'purge_logical_id' AND subject = 'doc-1'",
5056 [],
5057 |row| row.get(0),
5058 )
5059 .expect("purge events");
5060 assert_eq!(purge_events, 1);
5061 }
5062
5063 #[test]
5064 fn check_semantics_accepts_preserved_retired_chunks() {
5065 let (db, service) = setup();
5066 {
5067 let conn = sqlite::open_connection(db.path()).expect("conn");
5068 conn.execute(
5069 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5070 VALUES ('node-row-1', 'doc-1', 'Document', '{}', 100, 200, 'seed')",
5071 [],
5072 )
5073 .expect("insert retired node");
5074 conn.execute(
5075 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5076 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5077 [],
5078 )
5079 .expect("insert chunk");
5080 }
5081
5082 let report = service.check_semantics().expect("semantics");
5083 assert_eq!(report.orphaned_chunks, 0);
5084 }
5085
5086 #[test]
5087 fn check_semantics_detects_missing_retired_node_history_for_preserved_chunks() {
5088 let (db, service) = setup();
5089 {
5090 let conn = sqlite::open_connection(db.path()).expect("conn");
5091 conn.execute(
5092 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5093 VALUES ('chunk-1', 'ghost-doc', 'budget narrative', 100)",
5094 [],
5095 )
5096 .expect("insert orphaned chunk");
5097 }
5098
5099 let report = service.check_semantics().expect("semantics");
5100 assert_eq!(report.orphaned_chunks, 1);
5101 }
5102
5103 #[cfg(feature = "sqlite-vec")]
5104 #[test]
5105 fn check_semantics_detects_missing_retired_node_history_for_preserved_vec_rows() {
5106 let (db, service) = setup();
5107 {
5108 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5109 service
5110 .schema_manager
5111 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5112 .expect("ensure vec profile");
5113 conn.execute(
5114 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5115 VALUES ('chunk-1', 'ghost-doc', 'budget narrative', 100)",
5116 [],
5117 )
5118 .expect("insert orphaned chunk");
5119 conn.execute(
5120 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
5121 [],
5122 )
5123 .expect("insert vec row");
5124 }
5125
5126 let report = service.check_semantics().expect("semantics");
5127 assert_eq!(report.orphaned_chunks, 1);
5128 assert_eq!(report.vec_rows_for_superseded_nodes, 1);
5129 }
5130
5131 #[cfg(feature = "sqlite-vec")]
5132 #[test]
5133 fn restore_logical_id_reestablishes_vector_search_without_reingest() {
5134 let (db, service) = setup();
5135 {
5136 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5137 service
5138 .schema_manager
5139 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5140 .expect("ensure vec profile");
5141 conn.execute(
5142 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5143 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
5144 [],
5145 )
5146 .expect("insert retired node");
5147 conn.execute(
5148 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5149 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5150 [],
5151 )
5152 .expect("insert chunk");
5153 conn.execute(
5154 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
5155 [],
5156 )
5157 .expect("insert vec row");
5158 conn.execute(
5159 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5160 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5161 [],
5162 )
5163 .expect("insert retire event");
5164 }
5165
5166 let report = service.restore_logical_id("doc-1").expect("restore");
5167 assert_eq!(report.restored_vec_rows, 1);
5168
5169 let coordinator = ExecutionCoordinator::open(
5170 db.path(),
5171 Arc::new(SchemaManager::new()),
5172 Some(4),
5173 1,
5174 Arc::new(TelemetryCounters::default()),
5175 None,
5176 )
5177 .expect("coordinator");
5178 let compiled = QueryBuilder::nodes("Document")
5179 .vector_search("[0.0, 0.0, 0.0, 0.0]", 5)
5180 .compile()
5181 .expect("compile");
5182 let rows = coordinator
5183 .execute_compiled_read(&compiled)
5184 .expect("vector read");
5185 assert!(
5186 rows.nodes.iter().any(|row| row.logical_id == "doc-1"),
5187 "restore should make the preserved vec row visible again without re-ingest"
5188 );
5189 }
5190
5191 #[cfg(feature = "sqlite-vec")]
5192 #[test]
5193 fn purge_logical_id_deletes_vec_rows_for_retired_content() {
5194 let (db, service) = setup();
5195 {
5196 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5197 service
5198 .schema_manager
5199 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5200 .expect("ensure vec profile");
5201 conn.execute(
5202 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5203 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
5204 [],
5205 )
5206 .expect("insert retired node");
5207 conn.execute(
5208 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5209 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5210 [],
5211 )
5212 .expect("insert chunk");
5213 conn.execute(
5214 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
5215 [],
5216 )
5217 .expect("insert vec row");
5218 }
5219
5220 let report = service.purge_logical_id("doc-1").expect("purge");
5221 assert_eq!(report.deleted_vec_rows, 1);
5222
5223 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5224 let vec_count: i64 = conn
5225 .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
5226 row.get(0)
5227 })
5228 .expect("vec count");
5229 assert_eq!(vec_count, 0);
5230 }
5231
5232 #[cfg(feature = "sqlite-vec")]
5233 #[test]
5234 fn restore_logical_id_restores_visibility_of_regenerated_vectors() {
5235 let (db, service) = setup();
5236
5237 {
5238 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5239 service
5240 .schema_manager
5241 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5242 .expect("ensure vec profile");
5243 conn.execute(
5244 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5245 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
5246 [],
5247 )
5248 .expect("insert node");
5249 conn.execute(
5250 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5251 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5252 [],
5253 )
5254 .expect("insert chunk");
5255 }
5256
5257 let embedder = TestEmbedder::new("test-model", 4);
5258 service
5259 .regenerate_vector_embeddings(
5260 &embedder,
5261 &VectorRegenerationConfig {
5262 profile: "default".to_owned(),
5263 table_name: "vec_nodes_active".to_owned(),
5264 chunking_policy: "per_chunk".to_owned(),
5265 preprocessing_policy: "trim".to_owned(),
5266 },
5267 )
5268 .expect("regenerate");
5269
5270 {
5271 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5272 conn.execute(
5273 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5274 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5275 [],
5276 )
5277 .expect("insert retire event");
5278 conn.execute(
5279 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
5280 [],
5281 )
5282 .expect("retire node");
5283 }
5284
5285 let report = service.restore_logical_id("doc-1").expect("restore");
5286 assert_eq!(report.restored_vec_rows, 1);
5287
5288 let coordinator = ExecutionCoordinator::open(
5289 db.path(),
5290 Arc::new(SchemaManager::new()),
5291 Some(4),
5292 1,
5293 Arc::new(TelemetryCounters::default()),
5294 None,
5295 )
5296 .expect("coordinator");
5297 let compiled = QueryBuilder::nodes("Document")
5298 .vector_search("[0.0, 0.0, 0.0, 0.0]", 5)
5299 .compile()
5300 .expect("compile");
5301 let rows = coordinator
5302 .execute_compiled_read(&compiled)
5303 .expect("vector read");
5304 assert!(
5305 rows.nodes.iter().any(|row| row.logical_id == "doc-1"),
5306 "restored logical_id should become visible through regenerated vectors"
5307 );
5308 }
5309
5310 #[test]
5311 fn check_semantics_clean_db_returns_zeros() {
5312 let (_db, service) = setup();
5313 let report = service.check_semantics().expect("semantics check");
5314 assert_eq!(report.orphaned_chunks, 0);
5315 assert_eq!(report.null_source_ref_nodes, 0);
5316 assert_eq!(report.broken_step_fk, 0);
5317 assert_eq!(report.broken_action_fk, 0);
5318 assert_eq!(report.stale_fts_rows, 0);
5319 assert_eq!(report.fts_rows_for_superseded_nodes, 0);
5320 assert_eq!(report.dangling_edges, 0);
5321 assert_eq!(report.orphaned_supersession_chains, 0);
5322 assert_eq!(report.stale_vec_rows, 0);
5323 assert_eq!(report.vec_rows_for_superseded_nodes, 0);
5324 assert_eq!(report.missing_operational_current_rows, 0);
5325 assert_eq!(report.stale_operational_current_rows, 0);
5326 assert_eq!(report.disabled_collection_mutations, 0);
5327 assert_eq!(report.mismatched_kind_property_fts_rows, 0);
5328 assert_eq!(report.duplicate_property_fts_rows, 0);
5329 assert_eq!(report.drifted_property_fts_rows, 0);
5330 assert!(report.warnings.is_empty());
5331 }
5332
5333 #[test]
5334 fn register_operational_collection_persists_and_emits_provenance() {
5335 let (db, service) = setup();
5336 let record = service
5337 .register_operational_collection(&OperationalRegisterRequest {
5338 name: "connector_health".to_owned(),
5339 kind: OperationalCollectionKind::LatestState,
5340 schema_json: "{}".to_owned(),
5341 retention_json: "{}".to_owned(),
5342 filter_fields_json: "[]".to_owned(),
5343 validation_json: String::new(),
5344 secondary_indexes_json: "[]".to_owned(),
5345 format_version: 1,
5346 })
5347 .expect("register collection");
5348
5349 assert_eq!(record.name, "connector_health");
5350 assert_eq!(record.kind, OperationalCollectionKind::LatestState);
5351 assert_eq!(record.schema_json, "{}");
5352 assert_eq!(record.retention_json, "{}");
5353 assert_eq!(record.filter_fields_json, "[]");
5354 assert!(record.created_at > 0);
5355 assert_eq!(record.disabled_at, None);
5356
5357 let described = service
5358 .describe_operational_collection("connector_health")
5359 .expect("describe collection")
5360 .expect("collection exists");
5361 assert_eq!(described, record);
5362
5363 let conn = sqlite::open_connection(db.path()).expect("conn");
5364 let provenance_count: i64 = conn
5365 .query_row(
5366 "SELECT count(*) FROM provenance_events \
5367 WHERE event_type = 'operational_collection_registered' AND subject = 'connector_health'",
5368 [],
5369 |row| row.get(0),
5370 )
5371 .expect("provenance count");
5372 assert_eq!(provenance_count, 1);
5373 }
5374
5375 #[test]
5376 fn register_and_update_operational_collection_validation_round_trip() {
5377 let (db, service) = setup();
5378 let record = service
5379 .register_operational_collection(&OperationalRegisterRequest {
5380 name: "connector_health".to_owned(),
5381 kind: OperationalCollectionKind::LatestState,
5382 schema_json: "{}".to_owned(),
5383 retention_json: "{}".to_owned(),
5384 filter_fields_json: "[]".to_owned(),
5385 validation_json: String::new(),
5386 secondary_indexes_json: "[]".to_owned(),
5387 format_version: 1,
5388 })
5389 .expect("register collection");
5390 assert_eq!(record.validation_json, "");
5391
5392 let validation_json = r#"{"format_version":1,"mode":"enforce","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#;
5393 let updated = service
5394 .update_operational_collection_validation("connector_health", validation_json)
5395 .expect("update validation");
5396 assert_eq!(updated.validation_json, validation_json);
5397
5398 let described = service
5399 .describe_operational_collection("connector_health")
5400 .expect("describe collection")
5401 .expect("collection exists");
5402 assert_eq!(described.validation_json, validation_json);
5403
5404 let conn = sqlite::open_connection(db.path()).expect("conn");
5405 let provenance_count: i64 = conn
5406 .query_row(
5407 "SELECT count(*) FROM provenance_events \
5408 WHERE event_type = 'operational_collection_validation_updated' \
5409 AND subject = 'connector_health'",
5410 [],
5411 |row| row.get(0),
5412 )
5413 .expect("provenance count");
5414 assert_eq!(provenance_count, 1);
5415 }
5416
5417 #[test]
5418 fn register_update_and_rebuild_operational_secondary_indexes_round_trip() {
5419 let (db, service) = setup();
5420 let record = service
5421 .register_operational_collection(&OperationalRegisterRequest {
5422 name: "audit_log".to_owned(),
5423 kind: OperationalCollectionKind::AppendOnlyLog,
5424 schema_json: "{}".to_owned(),
5425 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
5426 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
5427 validation_json: String::new(),
5428 secondary_indexes_json: "[]".to_owned(),
5429 format_version: 1,
5430 })
5431 .expect("register collection");
5432 assert_eq!(record.secondary_indexes_json, "[]");
5433
5434 {
5435 let writer = crate::WriterActor::start(
5436 db.path(),
5437 Arc::new(SchemaManager::new()),
5438 crate::ProvenanceMode::Warn,
5439 Arc::new(crate::TelemetryCounters::default()),
5440 )
5441 .expect("writer");
5442 writer
5443 .submit(crate::WriteRequest {
5444 label: "secondary-index-seed".to_owned(),
5445 nodes: vec![],
5446 node_retires: vec![],
5447 edges: vec![],
5448 edge_retires: vec![],
5449 chunks: vec![],
5450 runs: vec![],
5451 steps: vec![],
5452 actions: vec![],
5453 optional_backfills: vec![],
5454 vec_inserts: vec![],
5455 operational_writes: vec![
5456 crate::OperationalWrite::Append {
5457 collection: "audit_log".to_owned(),
5458 record_key: "evt-1".to_owned(),
5459 payload_json: r#"{"actor":"alice","ts":100}"#.to_owned(),
5460 source_ref: Some("src-1".to_owned()),
5461 },
5462 crate::OperationalWrite::Append {
5463 collection: "audit_log".to_owned(),
5464 record_key: "evt-2".to_owned(),
5465 payload_json: r#"{"actor":"bob","ts":200}"#.to_owned(),
5466 source_ref: Some("src-2".to_owned()),
5467 },
5468 ],
5469 })
5470 .expect("seed writes");
5471 }
5472
5473 let secondary_indexes_json = r#"[{"name":"actor_ts","kind":"append_only_field_time","field":"actor","value_type":"string","time_field":"ts"}]"#;
5474 let updated = service
5475 .update_operational_collection_secondary_indexes("audit_log", secondary_indexes_json)
5476 .expect("update secondary indexes");
5477 assert_eq!(updated.secondary_indexes_json, secondary_indexes_json);
5478
5479 let conn = sqlite::open_connection(db.path()).expect("conn");
5480 let entry_count: i64 = conn
5481 .query_row(
5482 "SELECT count(*) FROM operational_secondary_index_entries \
5483 WHERE collection_name = 'audit_log' AND index_name = 'actor_ts'",
5484 [],
5485 |row| row.get(0),
5486 )
5487 .expect("secondary index count");
5488 assert_eq!(entry_count, 2);
5489 conn.execute(
5490 "DELETE FROM operational_secondary_index_entries WHERE collection_name = 'audit_log'",
5491 [],
5492 )
5493 .expect("clear index entries");
5494 drop(conn);
5495
5496 let rebuild = service
5497 .rebuild_operational_secondary_indexes("audit_log")
5498 .expect("rebuild secondary indexes");
5499 assert_eq!(rebuild.collection_name, "audit_log");
5500 assert_eq!(rebuild.mutation_entries_rebuilt, 2);
5501 assert_eq!(rebuild.current_entries_rebuilt, 0);
5502 }
5503
5504 #[test]
5505 fn register_operational_collection_rejects_invalid_validation_contract() {
5506 let (_db, service) = setup();
5507
5508 let error = service
5509 .register_operational_collection(&OperationalRegisterRequest {
5510 name: "connector_health".to_owned(),
5511 kind: OperationalCollectionKind::LatestState,
5512 schema_json: "{}".to_owned(),
5513 retention_json: "{}".to_owned(),
5514 filter_fields_json: "[]".to_owned(),
5515 validation_json: r#"{"format_version":1,"mode":"enforce","fields":[{"name":"status","type":"string","minimum":0}]}"#
5516 .to_owned(),
5517 secondary_indexes_json: "[]".to_owned(),
5518 format_version: 1,
5519 })
5520 .expect_err("invalid validation contract should reject");
5521
5522 assert!(matches!(error, EngineError::InvalidWrite(_)));
5523 assert!(error.to_string().contains("minimum/maximum"));
5524 }
5525
5526 #[test]
5527 fn validate_operational_collection_history_reports_invalid_rows_without_mutation() {
5528 let (db, service) = setup();
5529 service
5530 .register_operational_collection(&OperationalRegisterRequest {
5531 name: "audit_log".to_owned(),
5532 kind: OperationalCollectionKind::AppendOnlyLog,
5533 schema_json: "{}".to_owned(),
5534 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
5535 filter_fields_json: "[]".to_owned(),
5536 validation_json: r#"{"format_version":1,"mode":"disabled","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#
5537 .to_owned(),
5538 secondary_indexes_json: "[]".to_owned(),
5539 format_version: 1,
5540 })
5541 .expect("register collection");
5542 {
5543 let writer = crate::WriterActor::start(
5544 db.path(),
5545 Arc::new(SchemaManager::new()),
5546 crate::ProvenanceMode::Warn,
5547 Arc::new(crate::TelemetryCounters::default()),
5548 )
5549 .expect("writer");
5550 writer
5551 .submit(crate::WriteRequest {
5552 label: "history-validation".to_owned(),
5553 nodes: vec![],
5554 node_retires: vec![],
5555 edges: vec![],
5556 edge_retires: vec![],
5557 chunks: vec![],
5558 runs: vec![],
5559 steps: vec![],
5560 actions: vec![],
5561 optional_backfills: vec![],
5562 vec_inserts: vec![],
5563 operational_writes: vec![
5564 crate::OperationalWrite::Append {
5565 collection: "audit_log".to_owned(),
5566 record_key: "evt-1".to_owned(),
5567 payload_json: r#"{"status":"ok"}"#.to_owned(),
5568 source_ref: Some("src-1".to_owned()),
5569 },
5570 crate::OperationalWrite::Append {
5571 collection: "audit_log".to_owned(),
5572 record_key: "evt-2".to_owned(),
5573 payload_json: r#"{"status":"bogus"}"#.to_owned(),
5574 source_ref: Some("src-2".to_owned()),
5575 },
5576 ],
5577 })
5578 .expect("write");
5579 }
5580
5581 let report = service
5582 .validate_operational_collection_history("audit_log")
5583 .expect("validate history");
5584 assert_eq!(report.collection_name, "audit_log");
5585 assert_eq!(report.checked_rows, 2);
5586 assert_eq!(report.invalid_row_count, 1);
5587 assert_eq!(report.issues.len(), 1);
5588 assert_eq!(report.issues[0].record_key, "evt-2");
5589 assert!(report.issues[0].message.contains("must be one of"));
5590
5591 let trace = service
5592 .trace_operational_collection("audit_log", None)
5593 .expect("trace");
5594 assert_eq!(trace.mutation_count, 2);
5595
5596 let conn = sqlite::open_connection(db.path()).expect("conn");
5597 let provenance_count: i64 = conn
5598 .query_row(
5599 "SELECT count(*) FROM provenance_events \
5600 WHERE event_type = 'operational_collection_history_validated' \
5601 AND subject = 'audit_log'",
5602 [],
5603 |row| row.get(0),
5604 )
5605 .expect("provenance count");
5606 assert_eq!(provenance_count, 0);
5607 }
5608
5609 #[test]
5610 fn trace_operational_collection_returns_mutations_and_current_rows() {
5611 let (db, service) = setup();
5612 service
5613 .register_operational_collection(&OperationalRegisterRequest {
5614 name: "connector_health".to_owned(),
5615 kind: OperationalCollectionKind::LatestState,
5616 schema_json: "{}".to_owned(),
5617 retention_json: "{}".to_owned(),
5618 filter_fields_json: "[]".to_owned(),
5619 validation_json: String::new(),
5620 secondary_indexes_json: "[]".to_owned(),
5621 format_version: 1,
5622 })
5623 .expect("register collection");
5624 {
5625 let writer = crate::WriterActor::start(
5626 db.path(),
5627 Arc::new(SchemaManager::new()),
5628 crate::ProvenanceMode::Warn,
5629 Arc::new(crate::TelemetryCounters::default()),
5630 )
5631 .expect("writer");
5632 writer
5633 .submit(crate::WriteRequest {
5634 label: "operational".to_owned(),
5635 nodes: vec![],
5636 node_retires: vec![],
5637 edges: vec![],
5638 edge_retires: vec![],
5639 chunks: vec![],
5640 runs: vec![],
5641 steps: vec![],
5642 actions: vec![],
5643 optional_backfills: vec![],
5644 vec_inserts: vec![],
5645 operational_writes: vec![crate::OperationalWrite::Put {
5646 collection: "connector_health".to_owned(),
5647 record_key: "gmail".to_owned(),
5648 payload_json: r#"{"status":"ok"}"#.to_owned(),
5649 source_ref: Some("src-1".to_owned()),
5650 }],
5651 })
5652 .expect("write");
5653 }
5654
5655 let report = service
5656 .trace_operational_collection("connector_health", Some("gmail"))
5657 .expect("trace");
5658 assert_eq!(report.collection_name, "connector_health");
5659 assert_eq!(report.record_key.as_deref(), Some("gmail"));
5660 assert_eq!(report.mutation_count, 1);
5661 assert_eq!(report.current_count, 1);
5662 assert_eq!(report.mutations[0].op_kind, "put");
5663 assert_eq!(report.current_rows[0].payload_json, r#"{"status":"ok"}"#);
5664 }
5665
5666 #[test]
5667 fn trace_operational_collection_rejects_unknown_collection() {
5668 let (_db, service) = setup();
5669
5670 let error = service
5671 .trace_operational_collection("missing_collection", None)
5672 .expect_err("unknown collection should fail");
5673
5674 assert!(matches!(error, EngineError::InvalidWrite(_)));
5675 assert!(error.to_string().contains("is not registered"));
5676 }
5677
5678 #[test]
5679 fn rebuild_operational_current_repairs_missing_latest_state_rows() {
5680 let (db, service) = setup();
5681 service
5682 .register_operational_collection(&OperationalRegisterRequest {
5683 name: "connector_health".to_owned(),
5684 kind: OperationalCollectionKind::LatestState,
5685 schema_json: "{}".to_owned(),
5686 retention_json: "{}".to_owned(),
5687 filter_fields_json: "[]".to_owned(),
5688 validation_json: String::new(),
5689 secondary_indexes_json: "[]".to_owned(),
5690 format_version: 1,
5691 })
5692 .expect("register collection");
5693 {
5694 let writer = crate::WriterActor::start(
5695 db.path(),
5696 Arc::new(SchemaManager::new()),
5697 crate::ProvenanceMode::Warn,
5698 Arc::new(crate::TelemetryCounters::default()),
5699 )
5700 .expect("writer");
5701 writer
5702 .submit(crate::WriteRequest {
5703 label: "operational".to_owned(),
5704 nodes: vec![],
5705 node_retires: vec![],
5706 edges: vec![],
5707 edge_retires: vec![],
5708 chunks: vec![],
5709 runs: vec![],
5710 steps: vec![],
5711 actions: vec![],
5712 optional_backfills: vec![],
5713 vec_inserts: vec![],
5714 operational_writes: vec![crate::OperationalWrite::Put {
5715 collection: "connector_health".to_owned(),
5716 record_key: "gmail".to_owned(),
5717 payload_json: r#"{"status":"ok"}"#.to_owned(),
5718 source_ref: Some("src-1".to_owned()),
5719 }],
5720 })
5721 .expect("write");
5722 }
5723 {
5724 let conn = sqlite::open_connection(db.path()).expect("conn");
5725 conn.execute(
5726 "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5727 [],
5728 )
5729 .expect("delete current row");
5730 }
5731
5732 let before = service.check_semantics().expect("semantics before rebuild");
5733 assert_eq!(before.missing_operational_current_rows, 1);
5734
5735 let repair = service
5736 .rebuild_operational_current(Some("connector_health"))
5737 .expect("rebuild current");
5738 assert_eq!(repair.collections_rebuilt, 1);
5739 assert_eq!(repair.current_rows_rebuilt, 1);
5740
5741 let after = service.check_semantics().expect("semantics after rebuild");
5742 assert_eq!(after.missing_operational_current_rows, 0);
5743
5744 let conn = sqlite::open_connection(db.path()).expect("conn");
5745 let payload: String = conn
5746 .query_row(
5747 "SELECT payload_json FROM operational_current \
5748 WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5749 [],
5750 |row| row.get(0),
5751 )
5752 .expect("restored payload");
5753 assert_eq!(payload, r#"{"status":"ok"}"#);
5754 }
5755
5756 #[test]
5757 fn rebuild_operational_current_restores_latest_state_secondary_index_entries() {
5758 let (db, service) = setup();
5759 service
5760 .register_operational_collection(&OperationalRegisterRequest {
5761 name: "connector_health".to_owned(),
5762 kind: OperationalCollectionKind::LatestState,
5763 schema_json: "{}".to_owned(),
5764 retention_json: "{}".to_owned(),
5765 filter_fields_json: "[]".to_owned(),
5766 validation_json: String::new(),
5767 secondary_indexes_json: r#"[{"name":"status_current","kind":"latest_state_field","field":"status","value_type":"string"}]"#.to_owned(),
5768 format_version: 1,
5769 })
5770 .expect("register collection");
5771 {
5772 let writer = crate::WriterActor::start(
5773 db.path(),
5774 Arc::new(SchemaManager::new()),
5775 crate::ProvenanceMode::Warn,
5776 Arc::new(crate::TelemetryCounters::default()),
5777 )
5778 .expect("writer");
5779 writer
5780 .submit(crate::WriteRequest {
5781 label: "operational".to_owned(),
5782 nodes: vec![],
5783 node_retires: vec![],
5784 edges: vec![],
5785 edge_retires: vec![],
5786 chunks: vec![],
5787 runs: vec![],
5788 steps: vec![],
5789 actions: vec![],
5790 optional_backfills: vec![],
5791 vec_inserts: vec![],
5792 operational_writes: vec![crate::OperationalWrite::Put {
5793 collection: "connector_health".to_owned(),
5794 record_key: "gmail".to_owned(),
5795 payload_json: r#"{"status":"ok"}"#.to_owned(),
5796 source_ref: Some("src-1".to_owned()),
5797 }],
5798 })
5799 .expect("write");
5800 }
5801 {
5802 let conn = sqlite::open_connection(db.path()).expect("conn");
5803 let entry_count: i64 = conn
5804 .query_row(
5805 "SELECT count(*) FROM operational_secondary_index_entries \
5806 WHERE collection_name = 'connector_health' AND subject_kind = 'current'",
5807 [],
5808 |row| row.get(0),
5809 )
5810 .expect("secondary index count before repair");
5811 assert_eq!(entry_count, 1);
5812 conn.execute(
5813 "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5814 [],
5815 )
5816 .expect("delete current row");
5817 }
5818
5819 service
5820 .rebuild_operational_current(Some("connector_health"))
5821 .expect("rebuild current");
5822
5823 let conn = sqlite::open_connection(db.path()).expect("conn");
5824 let entry_count: i64 = conn
5825 .query_row(
5826 "SELECT count(*) FROM operational_secondary_index_entries \
5827 WHERE collection_name = 'connector_health' AND subject_kind = 'current'",
5828 [],
5829 |row| row.get(0),
5830 )
5831 .expect("secondary index count after repair");
5832 assert_eq!(entry_count, 1);
5833 }
5834
5835 #[test]
5836 fn operational_current_semantics_and_rebuild_follow_mutation_order() {
5837 let (db, service) = setup();
5838 {
5839 let conn = sqlite::open_connection(db.path()).expect("conn");
5840 conn.execute(
5841 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
5842 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
5843 [],
5844 )
5845 .expect("seed collection");
5846 conn.execute(
5847 "INSERT INTO operational_mutations \
5848 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5849 VALUES ('m3', 'connector_health', 'gmail', 'put', '{\"status\":\"old\"}', 'src-1', 100, 1)",
5850 [],
5851 )
5852 .expect("seed first put");
5853 conn.execute(
5854 "INSERT INTO operational_mutations \
5855 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5856 VALUES ('m2', 'connector_health', 'gmail', 'delete', '', 'src-2', 100, 2)",
5857 [],
5858 )
5859 .expect("seed delete");
5860 conn.execute(
5861 "INSERT INTO operational_mutations \
5862 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5863 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"new\"}', 'src-3', 100, 3)",
5864 [],
5865 )
5866 .expect("seed final put");
5867 conn.execute(
5868 "INSERT INTO operational_current \
5869 (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
5870 VALUES ('connector_health', 'gmail', '{\"status\":\"new\"}', 100, 'm1')",
5871 [],
5872 )
5873 .expect("seed current");
5874 }
5875
5876 let before = service.check_semantics().expect("semantics before rebuild");
5877 assert_eq!(before.missing_operational_current_rows, 0);
5878 assert_eq!(before.stale_operational_current_rows, 0);
5879
5880 {
5881 let conn = sqlite::open_connection(db.path()).expect("conn");
5882 conn.execute(
5883 "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5884 [],
5885 )
5886 .expect("delete current row");
5887 }
5888
5889 let missing = service.check_semantics().expect("semantics after delete");
5890 assert_eq!(missing.missing_operational_current_rows, 1);
5891 assert_eq!(missing.stale_operational_current_rows, 0);
5892
5893 service
5894 .rebuild_operational_current(Some("connector_health"))
5895 .expect("rebuild current");
5896
5897 let after = service.check_semantics().expect("semantics after rebuild");
5898 assert_eq!(after.missing_operational_current_rows, 0);
5899 assert_eq!(after.stale_operational_current_rows, 0);
5900
5901 let conn = sqlite::open_connection(db.path()).expect("conn");
5902 let payload: String = conn
5903 .query_row(
5904 "SELECT payload_json FROM operational_current \
5905 WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5906 [],
5907 |row| row.get(0),
5908 )
5909 .expect("restored payload");
5910 assert_eq!(payload, r#"{"status":"new"}"#);
5911 }
5912
5913 #[test]
5914 fn disable_operational_collection_sets_disabled_at_and_emits_provenance() {
5915 let (db, service) = setup();
5916 service
5917 .register_operational_collection(&OperationalRegisterRequest {
5918 name: "audit_log".to_owned(),
5919 kind: OperationalCollectionKind::AppendOnlyLog,
5920 schema_json: "{}".to_owned(),
5921 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
5922 filter_fields_json: "[]".to_owned(),
5923 validation_json: String::new(),
5924 secondary_indexes_json: "[]".to_owned(),
5925 format_version: 1,
5926 })
5927 .expect("register collection");
5928
5929 let record = service
5930 .disable_operational_collection("audit_log")
5931 .expect("disable collection");
5932 assert_eq!(record.name, "audit_log");
5933 assert!(record.disabled_at.is_some());
5934
5935 let disabled_at = record.disabled_at.expect("disabled_at");
5936 let described = service
5937 .describe_operational_collection("audit_log")
5938 .expect("describe collection")
5939 .expect("collection exists");
5940 assert_eq!(described.disabled_at, Some(disabled_at));
5941
5942 let writer = crate::WriterActor::start(
5943 db.path(),
5944 Arc::new(SchemaManager::new()),
5945 crate::ProvenanceMode::Warn,
5946 Arc::new(crate::TelemetryCounters::default()),
5947 )
5948 .expect("writer");
5949 let error = writer
5950 .submit(crate::WriteRequest {
5951 label: "disabled-operational".to_owned(),
5952 nodes: vec![],
5953 node_retires: vec![],
5954 edges: vec![],
5955 edge_retires: vec![],
5956 chunks: vec![],
5957 runs: vec![],
5958 steps: vec![],
5959 actions: vec![],
5960 optional_backfills: vec![],
5961 vec_inserts: vec![],
5962 operational_writes: vec![crate::OperationalWrite::Append {
5963 collection: "audit_log".to_owned(),
5964 record_key: "evt-1".to_owned(),
5965 payload_json: r#"{"type":"sync"}"#.to_owned(),
5966 source_ref: Some("src-1".to_owned()),
5967 }],
5968 })
5969 .expect_err("disabled collection should reject writes");
5970 assert!(matches!(error, EngineError::InvalidWrite(_)));
5971 assert!(error.to_string().contains("is disabled"));
5972
5973 let conn = sqlite::open_connection(db.path()).expect("conn");
5974 let provenance_count: i64 = conn
5975 .query_row(
5976 "SELECT count(*) FROM provenance_events \
5977 WHERE event_type = 'operational_collection_disabled' AND subject = 'audit_log'",
5978 [],
5979 |row| row.get(0),
5980 )
5981 .expect("provenance count");
5982 assert_eq!(provenance_count, 1);
5983 }
5984
5985 #[test]
5986 fn purge_operational_collection_deletes_append_only_rows_before_cutoff() {
5987 let (db, service) = setup();
5988 {
5989 let conn = sqlite::open_connection(db.path()).expect("conn");
5990 conn.execute(
5991 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
5992 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_all\"}', 1, 100)",
5993 [],
5994 )
5995 .expect("seed collection");
5996 conn.execute(
5997 "INSERT INTO operational_mutations \
5998 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5999 VALUES ('evt-1', 'audit_log', 'evt-1', 'append', '{\"seq\":1}', 'src-1', 100, 1)",
6000 [],
6001 )
6002 .expect("seed event 1");
6003 conn.execute(
6004 "INSERT INTO operational_mutations \
6005 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6006 VALUES ('evt-2', 'audit_log', 'evt-2', 'append', '{\"seq\":2}', 'src-2', 200, 2)",
6007 [],
6008 )
6009 .expect("seed event 2");
6010 conn.execute(
6011 "INSERT INTO operational_mutations \
6012 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6013 VALUES ('evt-3', 'audit_log', 'evt-3', 'append', '{\"seq\":3}', 'src-3', 300, 3)",
6014 [],
6015 )
6016 .expect("seed event 3");
6017 }
6018
6019 let report = service
6020 .purge_operational_collection("audit_log", 250)
6021 .expect("purge collection");
6022 assert_eq!(report.collection_name, "audit_log");
6023 assert_eq!(report.deleted_mutations, 2);
6024 assert_eq!(report.before_timestamp, 250);
6025
6026 let conn = sqlite::open_connection(db.path()).expect("conn");
6027 let remaining: Vec<String> = {
6028 let mut stmt = conn
6029 .prepare(
6030 "SELECT id FROM operational_mutations \
6031 WHERE collection_name = 'audit_log' ORDER BY mutation_order",
6032 )
6033 .expect("stmt");
6034 stmt.query_map([], |row| row.get(0))
6035 .expect("rows")
6036 .collect::<Result<_, _>>()
6037 .expect("collect")
6038 };
6039 assert_eq!(remaining, vec!["evt-3".to_owned()]);
6040 let provenance_count: i64 = conn
6041 .query_row(
6042 "SELECT count(*) FROM provenance_events \
6043 WHERE event_type = 'operational_collection_purged' AND subject = 'audit_log'",
6044 [],
6045 |row| row.get(0),
6046 )
6047 .expect("provenance count");
6048 assert_eq!(provenance_count, 1);
6049 }
6050
6051 #[test]
6052 fn compact_operational_collection_dry_run_reports_without_mutation() {
6053 let (db, service) = setup();
6054 {
6055 let conn = sqlite::open_connection(db.path()).expect("conn");
6056 conn.execute(
6057 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6058 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6059 [],
6060 )
6061 .expect("seed collection");
6062 for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
6063 conn.execute(
6064 "INSERT INTO operational_mutations \
6065 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6066 VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6067 rusqlite::params![
6068 format!("evt-{index}"),
6069 format!("{{\"seq\":{index}}}"),
6070 created_at,
6071 index,
6072 ],
6073 )
6074 .expect("seed event");
6075 }
6076 }
6077
6078 let report = service
6079 .compact_operational_collection("audit_log", true)
6080 .expect("compact collection");
6081 assert_eq!(report.collection_name, "audit_log");
6082 assert_eq!(report.deleted_mutations, 1);
6083 assert!(report.dry_run);
6084 assert_eq!(report.before_timestamp, None);
6085
6086 let conn = sqlite::open_connection(db.path()).expect("conn");
6087 let remaining_count: i64 = conn
6088 .query_row(
6089 "SELECT count(*) FROM operational_mutations WHERE collection_name = 'audit_log'",
6090 [],
6091 |row| row.get(0),
6092 )
6093 .expect("remaining count");
6094 assert_eq!(remaining_count, 3);
6095 let provenance_count: i64 = conn
6096 .query_row(
6097 "SELECT count(*) FROM provenance_events \
6098 WHERE event_type = 'operational_collection_compacted' AND subject = 'audit_log'",
6099 [],
6100 |row| row.get(0),
6101 )
6102 .expect("provenance count");
6103 assert_eq!(provenance_count, 0);
6104 }
6105
6106 #[test]
6107 fn compact_operational_collection_keep_last_deletes_oldest_rows() {
6108 let (db, service) = setup();
6109 {
6110 let conn = sqlite::open_connection(db.path()).expect("conn");
6111 conn.execute(
6112 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6113 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6114 [],
6115 )
6116 .expect("seed collection");
6117 for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
6118 conn.execute(
6119 "INSERT INTO operational_mutations \
6120 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6121 VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6122 rusqlite::params![
6123 format!("evt-{index}"),
6124 format!("{{\"seq\":{index}}}"),
6125 created_at,
6126 index,
6127 ],
6128 )
6129 .expect("seed event");
6130 }
6131 }
6132
6133 let report = service
6134 .compact_operational_collection("audit_log", false)
6135 .expect("compact collection");
6136 assert_eq!(report.deleted_mutations, 1);
6137 assert!(!report.dry_run);
6138
6139 let conn = sqlite::open_connection(db.path()).expect("conn");
6140 let remaining: Vec<String> = {
6141 let mut stmt = conn
6142 .prepare(
6143 "SELECT id FROM operational_mutations \
6144 WHERE collection_name = 'audit_log' ORDER BY mutation_order",
6145 )
6146 .expect("stmt");
6147 stmt.query_map([], |row| row.get(0))
6148 .expect("rows")
6149 .collect::<Result<_, _>>()
6150 .expect("collect")
6151 };
6152 assert_eq!(remaining, vec!["evt-2".to_owned(), "evt-3".to_owned()]);
6153 let provenance_count: i64 = conn
6154 .query_row(
6155 "SELECT count(*) FROM provenance_events \
6156 WHERE event_type = 'operational_collection_compacted' AND subject = 'audit_log'",
6157 [],
6158 |row| row.get(0),
6159 )
6160 .expect("provenance count");
6161 assert_eq!(provenance_count, 1);
6162 }
6163
6164 #[test]
6165 fn plan_and_run_operational_retention_keep_last() {
6166 let (db, service) = setup();
6167 {
6168 let conn = sqlite::open_connection(db.path()).expect("conn");
6169 conn.execute(
6170 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6171 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6172 [],
6173 )
6174 .expect("seed collection");
6175 for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
6176 conn.execute(
6177 "INSERT INTO operational_mutations \
6178 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6179 VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6180 rusqlite::params![
6181 format!("evt-{index}"),
6182 format!("{{\"seq\":{index}}}"),
6183 created_at,
6184 index,
6185 ],
6186 )
6187 .expect("seed event");
6188 }
6189 }
6190
6191 let plan = service
6192 .plan_operational_retention(1_000, None, Some(10))
6193 .expect("plan retention");
6194 assert_eq!(plan.collections_examined, 1);
6195 assert_eq!(plan.items[0].collection_name, "audit_log");
6196 assert_eq!(
6197 plan.items[0].action_kind,
6198 crate::operational::OperationalRetentionActionKind::KeepLast
6199 );
6200 assert_eq!(plan.items[0].candidate_deletions, 1);
6201 assert_eq!(plan.items[0].max_rows, Some(2));
6202 assert_eq!(plan.items[0].last_run_at, None);
6203
6204 let dry_run = service
6205 .run_operational_retention(1_000, None, Some(10), true)
6206 .expect("dry-run retention");
6207 assert!(dry_run.dry_run);
6208 assert_eq!(dry_run.collections_acted_on, 1);
6209 assert_eq!(dry_run.items[0].deleted_mutations, 1);
6210 assert_eq!(dry_run.items[0].rows_remaining, 2);
6211
6212 let conn = sqlite::open_connection(db.path()).expect("conn");
6213 let remaining_count: i64 = conn
6214 .query_row(
6215 "SELECT count(*) FROM operational_mutations WHERE collection_name = 'audit_log'",
6216 [],
6217 |row| row.get(0),
6218 )
6219 .expect("remaining count after dry run");
6220 assert_eq!(remaining_count, 3);
6221 let retention_run_count: i64 = conn
6222 .query_row(
6223 "SELECT count(*) FROM operational_retention_runs WHERE collection_name = 'audit_log'",
6224 [],
6225 |row| row.get(0),
6226 )
6227 .expect("retention run count");
6228 assert_eq!(retention_run_count, 0);
6229 drop(conn);
6230
6231 let executed = service
6232 .run_operational_retention(1_000, None, Some(10), false)
6233 .expect("execute retention");
6234 assert_eq!(executed.collections_acted_on, 1);
6235 assert_eq!(executed.items[0].deleted_mutations, 1);
6236 assert_eq!(executed.items[0].rows_remaining, 2);
6237
6238 let conn = sqlite::open_connection(db.path()).expect("conn");
6239 let remaining: Vec<String> = {
6240 let mut stmt = conn
6241 .prepare(
6242 "SELECT id FROM operational_mutations \
6243 WHERE collection_name = 'audit_log' ORDER BY mutation_order",
6244 )
6245 .expect("stmt");
6246 stmt.query_map([], |row| row.get(0))
6247 .expect("rows")
6248 .collect::<Result<_, _>>()
6249 .expect("collect")
6250 };
6251 assert_eq!(remaining, vec!["evt-2".to_owned(), "evt-3".to_owned()]);
6252 let last_run_at: i64 = conn
6253 .query_row(
6254 "SELECT executed_at FROM operational_retention_runs \
6255 WHERE collection_name = 'audit_log' ORDER BY executed_at DESC LIMIT 1",
6256 [],
6257 |row| row.get(0),
6258 )
6259 .expect("last run at");
6260 assert_eq!(last_run_at, 1_000);
6261 }
6262
6263 #[test]
6264 fn dry_run_operational_retention_does_not_mark_noop_collection_as_acted_on() {
6265 let (db, service) = setup();
6266 let conn = sqlite::open_connection(db.path()).expect("conn");
6267 conn.execute(
6268 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6269 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6270 [],
6271 )
6272 .expect("seed collection");
6273 for (index, created_at) in [(1_i64, 100_i64), (2, 200)] {
6274 conn.execute(
6275 "INSERT INTO operational_mutations \
6276 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6277 VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6278 rusqlite::params![
6279 format!("evt-{index}"),
6280 format!("{{\"seq\":{index}}}"),
6281 created_at,
6282 index,
6283 ],
6284 )
6285 .expect("seed event");
6286 }
6287 drop(conn);
6288
6289 let dry_run = service
6290 .run_operational_retention(1_000, None, Some(10), true)
6291 .expect("dry-run retention");
6292 assert!(dry_run.dry_run);
6293 assert_eq!(dry_run.collections_acted_on, 0);
6294 assert_eq!(dry_run.items[0].deleted_mutations, 0);
6295 assert_eq!(dry_run.items[0].rows_remaining, 2);
6296 }
6297
6298 #[test]
6299 fn compact_operational_collection_rejects_latest_state() {
6300 let (_db, service) = setup();
6301 service
6302 .register_operational_collection(&OperationalRegisterRequest {
6303 name: "connector_health".to_owned(),
6304 kind: OperationalCollectionKind::LatestState,
6305 schema_json: "{}".to_owned(),
6306 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6307 filter_fields_json: "[]".to_owned(),
6308 validation_json: String::new(),
6309 secondary_indexes_json: "[]".to_owned(),
6310 format_version: 1,
6311 })
6312 .expect("register collection");
6313
6314 let error = service
6315 .compact_operational_collection("connector_health", false)
6316 .expect_err("latest_state compaction should be rejected");
6317 assert!(matches!(error, EngineError::InvalidWrite(_)));
6318 assert!(error.to_string().contains("append_only_log"));
6319 }
6320
6321 #[test]
6322 fn register_operational_collection_persists_filter_fields_json() {
6323 let (_db, service) = setup();
6324
6325 let record = service
6326 .register_operational_collection(&OperationalRegisterRequest {
6327 name: "audit_log".to_owned(),
6328 kind: OperationalCollectionKind::AppendOnlyLog,
6329 schema_json: "{}".to_owned(),
6330 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6331 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
6332 validation_json: String::new(),
6333 secondary_indexes_json: "[]".to_owned(),
6334 format_version: 1,
6335 })
6336 .expect("register collection");
6337
6338 assert_eq!(
6339 record.filter_fields_json,
6340 r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#
6341 );
6342 }
6343
6344 #[test]
6345 fn read_operational_collection_filters_append_only_rows_by_declared_fields() {
6346 let (db, service) = setup();
6347 service
6348 .register_operational_collection(&OperationalRegisterRequest {
6349 name: "audit_log".to_owned(),
6350 kind: OperationalCollectionKind::AppendOnlyLog,
6351 schema_json: "{}".to_owned(),
6352 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6353 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"seq","type":"integer","modes":["exact","range"]},{"name":"ts","type":"timestamp","modes":["exact","range"]}]"#.to_owned(),
6354 validation_json: String::new(),
6355 secondary_indexes_json: "[]".to_owned(),
6356 format_version: 1,
6357 })
6358 .expect("register collection");
6359 {
6360 let writer = crate::WriterActor::start(
6361 db.path(),
6362 Arc::new(SchemaManager::new()),
6363 crate::ProvenanceMode::Warn,
6364 Arc::new(crate::TelemetryCounters::default()),
6365 )
6366 .expect("writer");
6367 writer
6368 .submit(crate::WriteRequest {
6369 label: "operational".to_owned(),
6370 nodes: vec![],
6371 node_retires: vec![],
6372 edges: vec![],
6373 edge_retires: vec![],
6374 chunks: vec![],
6375 runs: vec![],
6376 steps: vec![],
6377 actions: vec![],
6378 optional_backfills: vec![],
6379 vec_inserts: vec![],
6380 operational_writes: vec![
6381 crate::OperationalWrite::Append {
6382 collection: "audit_log".to_owned(),
6383 record_key: "evt-1".to_owned(),
6384 payload_json: r#"{"actor":"alice","seq":1,"ts":100}"#.to_owned(),
6385 source_ref: Some("src-1".to_owned()),
6386 },
6387 crate::OperationalWrite::Append {
6388 collection: "audit_log".to_owned(),
6389 record_key: "evt-2".to_owned(),
6390 payload_json: r#"{"actor":"alice-admin","seq":2,"ts":200}"#.to_owned(),
6391 source_ref: Some("src-2".to_owned()),
6392 },
6393 crate::OperationalWrite::Append {
6394 collection: "audit_log".to_owned(),
6395 record_key: "evt-3".to_owned(),
6396 payload_json: r#"{"actor":"bob","seq":3,"ts":300}"#.to_owned(),
6397 source_ref: Some("src-3".to_owned()),
6398 },
6399 ],
6400 })
6401 .expect("write");
6402 }
6403
6404 let report = service
6405 .read_operational_collection(&crate::operational::OperationalReadRequest {
6406 collection_name: "audit_log".to_owned(),
6407 filters: vec![
6408 crate::operational::OperationalFilterClause::Prefix {
6409 field: "actor".to_owned(),
6410 value: "alice".to_owned(),
6411 },
6412 crate::operational::OperationalFilterClause::Range {
6413 field: "ts".to_owned(),
6414 lower: Some(150),
6415 upper: Some(250),
6416 },
6417 ],
6418 limit: Some(10),
6419 })
6420 .expect("filtered read");
6421
6422 assert_eq!(report.collection_name, "audit_log");
6423 assert_eq!(report.row_count, 1);
6424 assert!(!report.was_limited);
6425 assert_eq!(report.rows.len(), 1);
6426 assert_eq!(report.rows[0].record_key, "evt-2");
6427 assert_eq!(
6428 report.rows[0].payload_json,
6429 r#"{"actor":"alice-admin","seq":2,"ts":200}"#
6430 );
6431 }
6432
6433 #[test]
6434 fn read_operational_collection_uses_secondary_index_when_filter_values_are_missing() {
6435 let (db, service) = setup();
6436 service
6437 .register_operational_collection(&OperationalRegisterRequest {
6438 name: "audit_log".to_owned(),
6439 kind: OperationalCollectionKind::AppendOnlyLog,
6440 schema_json: "{}".to_owned(),
6441 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6442 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
6443 validation_json: String::new(),
6444 secondary_indexes_json: r#"[{"name":"actor_ts","kind":"append_only_field_time","field":"actor","value_type":"string","time_field":"ts"}]"#.to_owned(),
6445 format_version: 1,
6446 })
6447 .expect("register collection");
6448 {
6449 let writer = crate::WriterActor::start(
6450 db.path(),
6451 Arc::new(SchemaManager::new()),
6452 crate::ProvenanceMode::Warn,
6453 Arc::new(crate::TelemetryCounters::default()),
6454 )
6455 .expect("writer");
6456 writer
6457 .submit(crate::WriteRequest {
6458 label: "operational".to_owned(),
6459 nodes: vec![],
6460 node_retires: vec![],
6461 edges: vec![],
6462 edge_retires: vec![],
6463 chunks: vec![],
6464 runs: vec![],
6465 steps: vec![],
6466 actions: vec![],
6467 optional_backfills: vec![],
6468 vec_inserts: vec![],
6469 operational_writes: vec![
6470 crate::OperationalWrite::Append {
6471 collection: "audit_log".to_owned(),
6472 record_key: "evt-1".to_owned(),
6473 payload_json: r#"{"actor":"alice","ts":100}"#.to_owned(),
6474 source_ref: Some("src-1".to_owned()),
6475 },
6476 crate::OperationalWrite::Append {
6477 collection: "audit_log".to_owned(),
6478 record_key: "evt-2".to_owned(),
6479 payload_json: r#"{"actor":"alice-admin","ts":200}"#.to_owned(),
6480 source_ref: Some("src-2".to_owned()),
6481 },
6482 ],
6483 })
6484 .expect("write");
6485 }
6486 let conn = sqlite::open_connection(db.path()).expect("conn");
6487 conn.execute(
6488 "DELETE FROM operational_filter_values WHERE collection_name = 'audit_log'",
6489 [],
6490 )
6491 .expect("clear filter values");
6492 drop(conn);
6493
6494 let report = service
6495 .read_operational_collection(&crate::operational::OperationalReadRequest {
6496 collection_name: "audit_log".to_owned(),
6497 filters: vec![
6498 crate::operational::OperationalFilterClause::Prefix {
6499 field: "actor".to_owned(),
6500 value: "alice".to_owned(),
6501 },
6502 crate::operational::OperationalFilterClause::Range {
6503 field: "ts".to_owned(),
6504 lower: Some(150),
6505 upper: Some(250),
6506 },
6507 ],
6508 limit: Some(10),
6509 })
6510 .expect("secondary-index read");
6511
6512 assert_eq!(report.row_count, 1);
6513 assert_eq!(report.rows[0].record_key, "evt-2");
6514 }
6515
6516 #[test]
6517 fn read_operational_collection_rejects_undeclared_fields_and_latest_state_collections() {
6518 let (_db, service) = setup();
6519 service
6520 .register_operational_collection(&OperationalRegisterRequest {
6521 name: "connector_health".to_owned(),
6522 kind: OperationalCollectionKind::LatestState,
6523 schema_json: "{}".to_owned(),
6524 retention_json: "{}".to_owned(),
6525 filter_fields_json: r#"[{"name":"status","type":"string","modes":["exact"]}]"#
6526 .to_owned(),
6527 validation_json: String::new(),
6528 secondary_indexes_json: "[]".to_owned(),
6529 format_version: 1,
6530 })
6531 .expect("register collection");
6532
6533 let latest_state_error = service
6534 .read_operational_collection(&crate::operational::OperationalReadRequest {
6535 collection_name: "connector_health".to_owned(),
6536 filters: vec![crate::operational::OperationalFilterClause::Exact {
6537 field: "status".to_owned(),
6538 value: crate::operational::OperationalFilterValue::String("ok".to_owned()),
6539 }],
6540 limit: Some(10),
6541 })
6542 .expect_err("latest_state filtered reads should be rejected");
6543 assert!(latest_state_error.to_string().contains("append_only_log"));
6544
6545 service
6546 .register_operational_collection(&OperationalRegisterRequest {
6547 name: "audit_log".to_owned(),
6548 kind: OperationalCollectionKind::AppendOnlyLog,
6549 schema_json: "{}".to_owned(),
6550 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6551 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact"]}]"#
6552 .to_owned(),
6553 validation_json: String::new(),
6554 secondary_indexes_json: "[]".to_owned(),
6555 format_version: 1,
6556 })
6557 .expect("register append-only collection");
6558
6559 let undeclared_error = service
6560 .read_operational_collection(&crate::operational::OperationalReadRequest {
6561 collection_name: "audit_log".to_owned(),
6562 filters: vec![crate::operational::OperationalFilterClause::Exact {
6563 field: "missing".to_owned(),
6564 value: crate::operational::OperationalFilterValue::String("x".to_owned()),
6565 }],
6566 limit: Some(10),
6567 })
6568 .expect_err("undeclared field should be rejected");
6569 assert!(undeclared_error.to_string().contains("undeclared"));
6570 }
6571
6572 #[test]
6573 fn read_operational_collection_applies_limit_and_reports_truncation() {
6574 let (db, service) = setup();
6575 service
6576 .register_operational_collection(&OperationalRegisterRequest {
6577 name: "audit_log".to_owned(),
6578 kind: OperationalCollectionKind::AppendOnlyLog,
6579 schema_json: "{}".to_owned(),
6580 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6581 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["prefix"]}]"#
6582 .to_owned(),
6583 validation_json: String::new(),
6584 secondary_indexes_json: "[]".to_owned(),
6585 format_version: 1,
6586 })
6587 .expect("register collection");
6588 {
6589 let writer = crate::WriterActor::start(
6590 db.path(),
6591 Arc::new(SchemaManager::new()),
6592 crate::ProvenanceMode::Warn,
6593 Arc::new(crate::TelemetryCounters::default()),
6594 )
6595 .expect("writer");
6596 writer
6597 .submit(crate::WriteRequest {
6598 label: "operational".to_owned(),
6599 nodes: vec![],
6600 node_retires: vec![],
6601 edges: vec![],
6602 edge_retires: vec![],
6603 chunks: vec![],
6604 runs: vec![],
6605 steps: vec![],
6606 actions: vec![],
6607 optional_backfills: vec![],
6608 vec_inserts: vec![],
6609 operational_writes: vec![
6610 crate::OperationalWrite::Append {
6611 collection: "audit_log".to_owned(),
6612 record_key: "evt-1".to_owned(),
6613 payload_json: r#"{"actor":"alice-1"}"#.to_owned(),
6614 source_ref: Some("src-1".to_owned()),
6615 },
6616 crate::OperationalWrite::Append {
6617 collection: "audit_log".to_owned(),
6618 record_key: "evt-2".to_owned(),
6619 payload_json: r#"{"actor":"alice-2"}"#.to_owned(),
6620 source_ref: Some("src-2".to_owned()),
6621 },
6622 ],
6623 })
6624 .expect("write");
6625 }
6626
6627 let report = service
6628 .read_operational_collection(&crate::operational::OperationalReadRequest {
6629 collection_name: "audit_log".to_owned(),
6630 filters: vec![crate::operational::OperationalFilterClause::Prefix {
6631 field: "actor".to_owned(),
6632 value: "alice".to_owned(),
6633 }],
6634 limit: Some(1),
6635 })
6636 .expect("limited read");
6637
6638 assert_eq!(report.row_count, 1);
6639 assert_eq!(report.applied_limit, 1);
6640 assert!(report.was_limited);
6641 assert_eq!(report.rows[0].record_key, "evt-2");
6642 }
6643
6644 #[test]
6645 fn preexisting_operational_collection_can_gain_filter_contract_after_upgrade() {
6646 let db = NamedTempFile::new().expect("temp db");
6647 let conn = sqlite::open_connection(db.path()).expect("conn");
6648 conn.execute_batch(
6649 r#"
6650 CREATE TABLE operational_collections (
6651 name TEXT PRIMARY KEY,
6652 kind TEXT NOT NULL,
6653 schema_json TEXT NOT NULL,
6654 retention_json TEXT NOT NULL,
6655 format_version INTEGER NOT NULL DEFAULT 1,
6656 created_at INTEGER NOT NULL DEFAULT 100,
6657 disabled_at INTEGER
6658 );
6659 CREATE TABLE operational_mutations (
6660 id TEXT PRIMARY KEY,
6661 collection_name TEXT NOT NULL,
6662 record_key TEXT NOT NULL,
6663 op_kind TEXT NOT NULL,
6664 payload_json TEXT NOT NULL,
6665 source_ref TEXT,
6666 created_at INTEGER NOT NULL DEFAULT 100,
6667 mutation_order INTEGER NOT NULL DEFAULT 1
6668 );
6669 INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at)
6670 VALUES ('audit_log', 'append_only_log', '{}', '{"mode":"keep_all"}', 1, 100);
6671 INSERT INTO operational_mutations
6672 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order)
6673 VALUES
6674 ('evt-1', 'audit_log', 'evt-1', 'append', '{"actor":"alice","ts":0}', 'src-1', 100, 1);
6675 "#,
6676 )
6677 .expect("seed pre-v10 schema");
6678 drop(conn);
6679
6680 let service = AdminService::new(db.path(), Arc::new(SchemaManager::new()));
6681 let pre_update = service
6682 .read_operational_collection(&crate::operational::OperationalReadRequest {
6683 collection_name: "audit_log".to_owned(),
6684 filters: vec![crate::operational::OperationalFilterClause::Exact {
6685 field: "actor".to_owned(),
6686 value: crate::operational::OperationalFilterValue::String("alice".to_owned()),
6687 }],
6688 limit: Some(10),
6689 })
6690 .expect_err("read should reject undeclared fields before migration update");
6691 assert!(pre_update.to_string().contains("undeclared"));
6692
6693 let updated = service
6694 .update_operational_collection_filters(
6695 "audit_log",
6696 r#"[{"name":"actor","type":"string","modes":["exact"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#,
6697 )
6698 .expect("update filter contract");
6699 assert!(updated.filter_fields_json.contains("\"actor\""));
6700
6701 let report = service
6702 .read_operational_collection(&crate::operational::OperationalReadRequest {
6703 collection_name: "audit_log".to_owned(),
6704 filters: vec![crate::operational::OperationalFilterClause::Range {
6705 field: "ts".to_owned(),
6706 lower: Some(0),
6707 upper: Some(0),
6708 }],
6709 limit: Some(10),
6710 })
6711 .expect("read after explicit filter update");
6712 assert_eq!(report.row_count, 1);
6713 assert_eq!(report.rows[0].record_key, "evt-1");
6714 }
6715
6716 #[cfg(feature = "sqlite-vec")]
6717 #[test]
6718 fn check_semantics_detects_stale_vec_rows() {
6719 use crate::sqlite::open_connection_with_vec;
6720
6721 let db = NamedTempFile::new().expect("temp file");
6722 let schema = Arc::new(SchemaManager::new());
6723 {
6724 let conn = open_connection_with_vec(db.path()).expect("vec conn");
6725 schema.bootstrap(&conn).expect("bootstrap");
6726 schema
6727 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 3)
6728 .expect("vec profile");
6729 let bytes: Vec<u8> = [0.1f32, 0.2f32, 0.3f32]
6731 .iter()
6732 .flat_map(|f| f.to_le_bytes())
6733 .collect();
6734 conn.execute(
6735 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('ghost-chunk', ?1)",
6736 rusqlite::params![bytes],
6737 )
6738 .expect("insert stale vec row");
6739 }
6740 let service = AdminService::new(db.path(), Arc::clone(&schema));
6741 let report = service.check_semantics().expect("semantics check");
6742 assert_eq!(report.stale_vec_rows, 1);
6743 assert!(
6744 report.warnings.iter().any(|w| w.contains("stale vec")),
6745 "warning must mention stale vec"
6746 );
6747 }
6748
6749 #[cfg(feature = "sqlite-vec")]
6750 #[test]
6751 fn restore_vector_profiles_recreates_vec_table_from_metadata() {
6752 let db = NamedTempFile::new().expect("temp file");
6753 let schema = Arc::new(SchemaManager::new());
6754 {
6755 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6756 schema.bootstrap(&conn).expect("bootstrap");
6757 conn.execute(
6758 "INSERT INTO vector_profiles (profile, table_name, dimension, enabled) \
6759 VALUES ('default', 'vec_nodes_active', 3, 1)",
6760 [],
6761 )
6762 .expect("insert vector profile");
6763 }
6764
6765 let service = AdminService::new(db.path(), Arc::clone(&schema));
6766 let report = service
6767 .restore_vector_profiles()
6768 .expect("restore vector profiles");
6769 assert_eq!(
6770 report.targets,
6771 vec![crate::projection::ProjectionTarget::Vec]
6772 );
6773 assert_eq!(report.rebuilt_rows, 1);
6774
6775 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6776 let count: i64 = conn
6777 .query_row(
6778 "SELECT count(*) FROM sqlite_schema WHERE name = 'vec_nodes_active'",
6779 [],
6780 |row| row.get(0),
6781 )
6782 .expect("vec schema count");
6783 assert_eq!(count, 1, "vec table should exist after restore");
6784 }
6785
6786 #[cfg(feature = "sqlite-vec")]
6787 #[test]
6788 fn load_vector_regeneration_config_supports_json_and_toml() {
6789 let dir = tempfile::tempdir().expect("temp dir");
6790 let json_path = dir.path().join("regen.json");
6791 let toml_path = dir.path().join("regen.toml");
6792
6793 let config = VectorRegenerationConfig {
6794 profile: "default".to_owned(),
6795 table_name: "vec_nodes_active".to_owned(),
6796 chunking_policy: "per_chunk".to_owned(),
6797 preprocessing_policy: "trim".to_owned(),
6798 };
6799
6800 fs::write(&json_path, serde_json::to_string(&config).expect("json")).expect("write json");
6801 fs::write(&toml_path, toml::to_string(&config).expect("toml")).expect("write toml");
6802
6803 let parsed_json = load_vector_regeneration_config(&json_path).expect("json parse");
6804 let parsed_toml = load_vector_regeneration_config(&toml_path).expect("toml parse");
6805
6806 assert_eq!(parsed_json, config);
6807 assert_eq!(parsed_toml, config);
6808 }
6809
6810 #[test]
6815 fn regenerate_vector_embeddings_config_rejects_old_identity_fields() {
6816 let legacy_json = r#"{
6817 "profile": "default",
6818 "table_name": "vec_nodes_active",
6819 "model_identity": "old-model",
6820 "model_version": "1.0",
6821 "dimension": 4,
6822 "normalization_policy": "l2",
6823 "chunking_policy": "per_chunk",
6824 "preprocessing_policy": "trim",
6825 "generator_command": ["/bin/echo"]
6826 }"#;
6827 let result: Result<VectorRegenerationConfig, _> = serde_json::from_str(legacy_json);
6828 assert!(
6829 result.is_err(),
6830 "legacy identity fields must be rejected at deserialization"
6831 );
6832 }
6833
6834 #[cfg(all(not(feature = "sqlite-vec"), unix))]
6835 #[test]
6836 fn regenerate_vector_embeddings_unsupported_vec_capability_writes_request_and_failed_audit() {
6837 let db = NamedTempFile::new().expect("temp file");
6838 let schema = Arc::new(SchemaManager::new());
6839
6840 {
6841 let conn = sqlite::open_connection(db.path()).expect("connection");
6842 schema.bootstrap(&conn).expect("bootstrap");
6843 conn.execute(
6844 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
6845 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
6846 [],
6847 )
6848 .expect("insert node");
6849 conn.execute(
6850 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
6851 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
6852 [],
6853 )
6854 .expect("insert chunk");
6855 }
6856
6857 let service = AdminService::new(db.path(), Arc::clone(&schema));
6858 let embedder = TestEmbedder::new("test-model", 4);
6859 let error = service
6860 .regenerate_vector_embeddings(
6861 &embedder,
6862 &VectorRegenerationConfig {
6863 profile: "default".to_owned(),
6864 table_name: "vec_nodes_active".to_owned(),
6865 chunking_policy: "per_chunk".to_owned(),
6866 preprocessing_policy: "trim".to_owned(),
6867 },
6868 )
6869 .expect_err("sqlite-vec capability should be required");
6870
6871 assert!(error.to_string().contains("unsupported vec capability"));
6872
6873 let conn = sqlite::open_connection(db.path()).expect("connection");
6874 let request_count: i64 = conn
6875 .query_row(
6876 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_requested' AND subject = 'default'",
6877 [],
6878 |row| row.get(0),
6879 )
6880 .expect("request count");
6881 assert_eq!(request_count, 1);
6882 let failed_count: i64 = conn
6883 .query_row(
6884 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
6885 [],
6886 |row| row.get(0),
6887 )
6888 .expect("failed count");
6889 assert_eq!(failed_count, 1);
6890 let metadata_json: String = conn
6891 .query_row(
6892 "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
6893 [],
6894 |row| row.get(0),
6895 )
6896 .expect("failed metadata");
6897 assert!(metadata_json.contains("\"failure_class\":\"unsupported vec capability\""));
6898 }
6899
6900 #[cfg(feature = "sqlite-vec")]
6901 #[test]
6902 #[allow(clippy::too_many_lines)]
6903 fn regenerate_vector_embeddings_rebuilds_embeddings_via_embedder() {
6904 let db = NamedTempFile::new().expect("temp file");
6905 let schema = Arc::new(SchemaManager::new());
6906
6907 {
6908 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6909 schema.bootstrap(&conn).expect("bootstrap");
6910 conn.execute(
6911 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
6912 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
6913 [],
6914 )
6915 .expect("insert node");
6916 conn.execute(
6917 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
6918 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
6919 [],
6920 )
6921 .expect("insert chunk 1");
6922 conn.execute(
6923 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
6924 VALUES ('chunk-2', 'doc-1', 'travel plan', 101)",
6925 [],
6926 )
6927 .expect("insert chunk 2");
6928 }
6929
6930 let service = AdminService::new(db.path(), Arc::clone(&schema));
6931 let embedder = TestEmbedder::new("test-model", 4);
6932 let report = service
6933 .regenerate_vector_embeddings(
6934 &embedder,
6935 &VectorRegenerationConfig {
6936 profile: "default".to_owned(),
6937 table_name: "vec_nodes_active".to_owned(),
6938 chunking_policy: "per_chunk".to_owned(),
6939 preprocessing_policy: "trim".to_owned(),
6940 },
6941 )
6942 .expect("regenerate vectors");
6943
6944 assert_eq!(report.profile, "default");
6945 assert_eq!(report.table_name, "vec_nodes_active");
6946 assert_eq!(report.dimension, 4);
6947 assert_eq!(report.total_chunks, 2);
6948 assert_eq!(report.regenerated_rows, 2);
6949 assert!(report.contract_persisted);
6950
6951 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6952 let vec_count: i64 = conn
6953 .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
6954 row.get(0)
6955 })
6956 .expect("vec count");
6957 assert_eq!(vec_count, 2);
6958
6959 let (model_identity, model_version, dimension, normalization_policy): (
6963 String,
6964 String,
6965 i64,
6966 String,
6967 ) = conn
6968 .query_row(
6969 "SELECT model_identity, model_version, dimension, normalization_policy \
6970 FROM vector_embedding_contracts WHERE profile = 'default'",
6971 [],
6972 |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)),
6973 )
6974 .expect("contract row");
6975 assert_eq!(model_identity, "test-model");
6976 assert_eq!(model_version, "1.0.0");
6977 assert_eq!(dimension, 4);
6978 assert_eq!(normalization_policy, "l2");
6979
6980 let contract_format_version: i64 = conn
6981 .query_row(
6982 "SELECT contract_format_version FROM vector_embedding_contracts WHERE profile = 'default'",
6983 [],
6984 |row| row.get(0),
6985 )
6986 .expect("contract_format_version");
6987 assert_eq!(contract_format_version, 1);
6988 let request_count: i64 = conn
6989 .query_row(
6990 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_requested' AND subject = 'default'",
6991 [],
6992 |row| row.get(0),
6993 )
6994 .expect("request audit count");
6995 assert_eq!(request_count, 1);
6996 let apply_count: i64 = conn
6997 .query_row(
6998 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_apply' AND subject = 'default'",
6999 [],
7000 |row| row.get(0),
7001 )
7002 .expect("apply audit count");
7003 assert_eq!(apply_count, 1);
7004 let apply_metadata: String = conn
7005 .query_row(
7006 "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_apply' AND subject = 'default'",
7007 [],
7008 |row| row.get(0),
7009 )
7010 .expect("apply metadata");
7011 assert!(apply_metadata.contains("\"profile\":\"default\""));
7012 assert!(apply_metadata.contains("\"snapshot_hash\":"));
7013 assert!(apply_metadata.contains("\"model_identity\":\"test-model\""));
7014 }
7015
7016 #[cfg(feature = "sqlite-vec")]
7017 #[test]
7018 #[allow(clippy::too_many_lines)]
7019 fn regenerate_vector_embeddings_embedder_failure_leaves_contract_and_vec_rows_unchanged() {
7020 let db = NamedTempFile::new().expect("temp file");
7021 let schema = Arc::new(SchemaManager::new());
7022
7023 {
7024 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7025 schema.bootstrap(&conn).expect("bootstrap");
7026 conn.execute(
7027 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7028 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7029 [],
7030 )
7031 .expect("insert node");
7032 conn.execute(
7033 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7034 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7035 [],
7036 )
7037 .expect("insert chunk");
7038 schema
7039 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
7040 .expect("ensure vec profile");
7041 conn.execute(
7042 r"
7043 INSERT INTO vector_embedding_contracts (
7044 profile,
7045 table_name,
7046 model_identity,
7047 model_version,
7048 dimension,
7049 normalization_policy,
7050 chunking_policy,
7051 preprocessing_policy,
7052 generator_command_json,
7053 applied_at,
7054 snapshot_hash
7055 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)
7056 ",
7057 rusqlite::params![
7058 "default",
7059 "vec_nodes_active",
7060 "old-model",
7061 "0.9.0",
7062 4,
7063 "l2",
7064 "per_chunk",
7065 "trim",
7066 "[]",
7067 111,
7068 "old-snapshot"
7069 ],
7070 )
7071 .expect("seed contract");
7072 conn.execute(
7073 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
7074 [],
7075 )
7076 .expect("seed vec row");
7077 }
7078
7079 let service = AdminService::new(db.path(), Arc::clone(&schema));
7080 let failing = FailingEmbedder {
7081 identity: QueryEmbedderIdentity {
7082 model_identity: "new-model".to_owned(),
7083 model_version: "1.0.0".to_owned(),
7084 dimension: 4,
7085 normalization_policy: "l2".to_owned(),
7086 },
7087 };
7088 let error = service
7089 .regenerate_vector_embeddings(
7090 &failing,
7091 &VectorRegenerationConfig {
7092 profile: "default".to_owned(),
7093 table_name: "vec_nodes_active".to_owned(),
7094 chunking_policy: "per_chunk".to_owned(),
7095 preprocessing_policy: "trim".to_owned(),
7096 },
7097 )
7098 .expect_err("embedder should fail");
7099
7100 assert!(error.to_string().contains("embedder failure"));
7101
7102 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7103 let model_identity: String = conn
7104 .query_row(
7105 "SELECT model_identity FROM vector_embedding_contracts WHERE profile = 'default'",
7106 [],
7107 |row| row.get(0),
7108 )
7109 .expect("model identity");
7110 assert_eq!(model_identity, "old-model");
7111 let snapshot_hash: String = conn
7112 .query_row(
7113 "SELECT snapshot_hash FROM vector_embedding_contracts WHERE profile = 'default'",
7114 [],
7115 |row| row.get(0),
7116 )
7117 .expect("snapshot hash");
7118 assert_eq!(snapshot_hash, "old-snapshot");
7119 let vec_count: i64 = conn
7120 .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
7121 row.get(0)
7122 })
7123 .expect("vec count");
7124 assert_eq!(vec_count, 1);
7125 let failure_count: i64 = conn
7126 .query_row(
7127 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7128 [],
7129 |row| row.get(0),
7130 )
7131 .expect("failure count");
7132 assert_eq!(failure_count, 1);
7133 let failure_metadata: String = conn
7134 .query_row(
7135 "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7136 [],
7137 |row| row.get(0),
7138 )
7139 .expect("failure metadata");
7140 assert!(failure_metadata.contains("\"failure_class\":\"embedder failure\""));
7141 }
7142
7143 #[cfg(feature = "sqlite-vec")]
7154 #[test]
7155 fn regenerate_vector_embeddings_rejects_whitespace_only_profile_before_mutation() {
7156 let db = NamedTempFile::new().expect("temp file");
7157 let schema = Arc::new(SchemaManager::new());
7158 {
7159 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7160 schema.bootstrap(&conn).expect("bootstrap");
7161 conn.execute(
7162 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7163 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7164 [],
7165 )
7166 .expect("insert node");
7167 conn.execute(
7168 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7169 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7170 [],
7171 )
7172 .expect("insert chunk");
7173 }
7174
7175 let service = AdminService::new(db.path(), Arc::clone(&schema));
7176 let embedder = TestEmbedder::new("test-model", 4);
7177 let error = service
7178 .regenerate_vector_embeddings(
7179 &embedder,
7180 &VectorRegenerationConfig {
7181 profile: " ".to_owned(),
7182 table_name: "vec_nodes_active".to_owned(),
7183 chunking_policy: "per_chunk".to_owned(),
7184 preprocessing_policy: "trim".to_owned(),
7185 },
7186 )
7187 .expect_err("whitespace profile should be rejected");
7188
7189 assert!(error.to_string().contains("invalid contract"));
7190 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7191 let contract_count: i64 = conn
7192 .query_row(
7193 "SELECT count(*) FROM vector_embedding_contracts",
7194 [],
7195 |row| row.get(0),
7196 )
7197 .expect("contract count");
7198 assert_eq!(contract_count, 0);
7199 let provenance_count: i64 = conn
7200 .query_row("SELECT count(*) FROM provenance_events", [], |row| {
7201 row.get(0)
7202 })
7203 .expect("provenance count");
7204 assert_eq!(provenance_count, 0);
7205 }
7206
7207 #[cfg(feature = "sqlite-vec")]
7208 #[test]
7209 fn regenerate_vector_embeddings_rejects_future_contract_format_version() {
7210 let db = NamedTempFile::new().expect("temp file");
7211 let schema = Arc::new(SchemaManager::new());
7212 {
7213 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7214 schema.bootstrap(&conn).expect("bootstrap");
7215 conn.execute(
7216 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7217 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7218 [],
7219 )
7220 .expect("insert node");
7221 conn.execute(
7222 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7223 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7224 [],
7225 )
7226 .expect("insert chunk");
7227 conn.execute(
7228 r"
7229 INSERT INTO vector_embedding_contracts (
7230 profile,
7231 table_name,
7232 model_identity,
7233 model_version,
7234 dimension,
7235 normalization_policy,
7236 chunking_policy,
7237 preprocessing_policy,
7238 generator_command_json,
7239 applied_at,
7240 snapshot_hash,
7241 contract_format_version,
7242 updated_at
7243 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13)
7244 ",
7245 rusqlite::params![
7246 "default",
7247 "vec_nodes_active",
7248 "old-model",
7249 "0.9.0",
7250 4,
7251 "l2",
7252 "per_chunk",
7253 "trim",
7254 "[]",
7255 111,
7256 "old-snapshot",
7257 99,
7258 111,
7259 ],
7260 )
7261 .expect("seed future contract");
7262 }
7263
7264 let service = AdminService::new(db.path(), Arc::clone(&schema));
7265 let embedder = TestEmbedder::new("test-model", 4);
7266 let error = service
7267 .regenerate_vector_embeddings(
7268 &embedder,
7269 &VectorRegenerationConfig {
7270 profile: "default".to_owned(),
7271 table_name: "vec_nodes_active".to_owned(),
7272 chunking_policy: "per_chunk".to_owned(),
7273 preprocessing_policy: "trim".to_owned(),
7274 },
7275 )
7276 .expect_err("future contract version should be rejected");
7277
7278 assert!(error.to_string().contains("unsupported"));
7279 assert!(error.to_string().contains("format version"));
7280 }
7281
7282 #[test]
7283 fn check_semantics_detects_orphaned_chunk() {
7284 let (db, service) = setup();
7285 {
7286 let conn = sqlite::open_connection(db.path()).expect("conn");
7288 conn.execute(
7289 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7290 VALUES ('c1', 'ghost-node', 'text', 100)",
7291 [],
7292 )
7293 .expect("insert orphaned chunk");
7294 }
7295 let report = service.check_semantics().expect("semantics check");
7296 assert_eq!(report.orphaned_chunks, 1);
7297 }
7298
7299 #[test]
7300 fn check_semantics_detects_null_source_ref() {
7301 let (db, service) = setup();
7302 {
7303 let conn = sqlite::open_connection(db.path()).expect("conn");
7304 conn.execute(
7305 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at) \
7306 VALUES ('r1', 'lg1', 'Meeting', '{}', 100)",
7307 [],
7308 )
7309 .expect("insert node with null source_ref");
7310 }
7311 let report = service.check_semantics().expect("semantics check");
7312 assert_eq!(report.null_source_ref_nodes, 1);
7313 }
7314
7315 #[test]
7316 fn check_semantics_detects_broken_step_fk() {
7317 let (db, service) = setup();
7318 {
7319 let conn = sqlite::open_connection(db.path()).expect("conn");
7322 conn.execute_batch("PRAGMA foreign_keys = OFF;")
7323 .expect("disable FK");
7324 conn.execute(
7325 "INSERT INTO steps (id, run_id, kind, status, properties, created_at) \
7326 VALUES ('s1', 'ghost-run', 'llm', 'completed', '{}', 100)",
7327 [],
7328 )
7329 .expect("insert step with ghost run_id");
7330 }
7331 let report = service.check_semantics().expect("semantics check");
7332 assert_eq!(report.broken_step_fk, 1);
7333 }
7334
7335 #[test]
7336 fn check_semantics_detects_broken_action_fk() {
7337 let (db, service) = setup();
7338 {
7339 let conn = sqlite::open_connection(db.path()).expect("conn");
7340 conn.execute_batch("PRAGMA foreign_keys = OFF;")
7341 .expect("disable FK");
7342 conn.execute(
7343 "INSERT INTO actions (id, step_id, kind, status, properties, created_at) \
7344 VALUES ('a1', 'ghost-step', 'emit', 'completed', '{}', 100)",
7345 [],
7346 )
7347 .expect("insert action with ghost step_id");
7348 }
7349 let report = service.check_semantics().expect("semantics check");
7350 assert_eq!(report.broken_action_fk, 1);
7351 }
7352
7353 #[test]
7354 fn check_semantics_detects_stale_fts_rows() {
7355 let (db, service) = setup();
7356 {
7357 let conn = sqlite::open_connection(db.path()).expect("conn");
7358 conn.execute(
7361 "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
7362 VALUES ('ghost-chunk', 'any-node', 'Meeting', 'stale content')",
7363 [],
7364 )
7365 .expect("insert stale FTS row");
7366 }
7367 let report = service.check_semantics().expect("semantics check");
7368 assert_eq!(report.stale_fts_rows, 1);
7369 }
7370
7371 #[test]
7372 fn check_semantics_detects_fts_rows_for_superseded_nodes() {
7373 let (db, service) = setup();
7374 {
7375 let conn = sqlite::open_connection(db.path()).expect("conn");
7376 conn.execute(
7378 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
7379 VALUES ('r1', 'lg-sup', 'Meeting', '{}', 100, 200, 'src-1')",
7380 [],
7381 )
7382 .expect("insert superseded node");
7383 conn.execute(
7385 "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
7386 VALUES ('ck-x', 'lg-sup', 'Meeting', 'superseded content')",
7387 [],
7388 )
7389 .expect("insert FTS row for superseded node");
7390 }
7391 let report = service.check_semantics().expect("semantics check");
7392 assert_eq!(report.fts_rows_for_superseded_nodes, 1);
7393 }
7394
7395 #[test]
7396 fn check_semantics_detects_dangling_edges() {
7397 let (db, service) = setup();
7398 {
7399 let conn = sqlite::open_connection(db.path()).expect("conn");
7400 conn.execute_batch("PRAGMA foreign_keys = OFF;")
7401 .expect("disable FK");
7402 conn.execute(
7404 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7405 VALUES ('r1', 'lg-src', 'Meeting', '{}', 100, 'src-1')",
7406 [],
7407 )
7408 .expect("insert source node");
7409 conn.execute(
7410 "INSERT INTO edges \
7411 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
7412 VALUES ('e1', 'edge-1', 'lg-src', 'ghost-target', 'LINKS', '{}', 100, 'src-1')",
7413 [],
7414 )
7415 .expect("insert dangling edge");
7416 }
7417 let report = service.check_semantics().expect("semantics check");
7418 assert_eq!(report.dangling_edges, 1);
7419 }
7420
7421 #[test]
7422 fn check_semantics_detects_orphaned_supersession_chains() {
7423 let (db, service) = setup();
7424 {
7425 let conn = sqlite::open_connection(db.path()).expect("conn");
7426 conn.execute(
7428 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
7429 VALUES ('r1', 'lg-orphaned', 'Meeting', '{}', 100, 200, 'src-1')",
7430 [],
7431 )
7432 .expect("insert fully superseded node");
7433 }
7434 let report = service.check_semantics().expect("semantics check");
7435 assert_eq!(report.orphaned_supersession_chains, 1);
7436 }
7437
7438 #[test]
7439 fn check_semantics_detects_mismatched_kind_property_fts_rows() {
7440 let (db, service) = setup();
7441 {
7442 let conn = sqlite::open_connection(db.path()).expect("conn");
7443 conn.execute(
7445 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7446 VALUES ('r1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'src-1')",
7447 [],
7448 )
7449 .expect("insert node");
7450 conn.execute(
7452 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
7453 VALUES ('goal-1', 'WrongKind', 'Ship v2')",
7454 [],
7455 )
7456 .expect("insert mismatched property FTS row");
7457 }
7458 let report = service.check_semantics().expect("semantics check");
7459 assert_eq!(report.mismatched_kind_property_fts_rows, 1);
7460 }
7461
7462 #[test]
7463 fn check_semantics_detects_duplicate_property_fts_rows() {
7464 let (db, service) = setup();
7465 {
7466 let conn = sqlite::open_connection(db.path()).expect("conn");
7467 conn.execute(
7468 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7469 VALUES ('r1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'src-1')",
7470 [],
7471 )
7472 .expect("insert node");
7473 conn.execute(
7475 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
7476 VALUES ('goal-1', 'Goal', 'Ship v2')",
7477 [],
7478 )
7479 .expect("insert first property FTS row");
7480 conn.execute(
7481 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
7482 VALUES ('goal-1', 'Goal', 'Ship v2 duplicate')",
7483 [],
7484 )
7485 .expect("insert duplicate property FTS row");
7486 }
7487 let report = service.check_semantics().expect("semantics check");
7488 assert_eq!(report.duplicate_property_fts_rows, 1);
7489 }
7490
7491 #[test]
7492 fn check_semantics_detects_drifted_property_fts_text() {
7493 let (db, service) = setup();
7494 {
7495 let conn = sqlite::open_connection(db.path()).expect("conn");
7496 conn.execute(
7497 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
7498 VALUES ('Goal', '[\"$.name\"]', ' ')",
7499 [],
7500 )
7501 .expect("register schema");
7502 conn.execute(
7503 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7504 VALUES ('r1', 'goal-1', 'Goal', '{\"name\":\"Current name\"}', 100, 'src-1')",
7505 [],
7506 )
7507 .expect("insert node");
7508 conn.execute(
7510 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
7511 VALUES ('goal-1', 'Goal', 'Old stale name')",
7512 [],
7513 )
7514 .expect("insert stale property FTS row");
7515 }
7516 let report = service.check_semantics().expect("semantics check");
7517 assert_eq!(report.drifted_property_fts_rows, 1);
7518 }
7519
7520 #[test]
7521 fn check_semantics_detects_property_fts_row_that_should_not_exist() {
7522 let (db, service) = setup();
7523 {
7524 let conn = sqlite::open_connection(db.path()).expect("conn");
7525 conn.execute(
7526 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
7527 VALUES ('Goal', '[\"$.searchable\"]', ' ')",
7528 [],
7529 )
7530 .expect("register schema");
7531 conn.execute(
7533 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7534 VALUES ('r1', 'goal-1', 'Goal', '{\"other\":\"field\"}', 100, 'src-1')",
7535 [],
7536 )
7537 .expect("insert node");
7538 conn.execute(
7540 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
7541 VALUES ('goal-1', 'Goal', 'phantom text')",
7542 [],
7543 )
7544 .expect("insert phantom property FTS row");
7545 }
7546 let report = service.check_semantics().expect("semantics check");
7547 assert_eq!(
7548 report.drifted_property_fts_rows, 1,
7549 "row that should not exist must be counted as drifted"
7550 );
7551 }
7552
7553 #[test]
7554 fn safe_export_writes_manifest_with_sha256() {
7555 let (_db, service) = setup();
7556 let export_dir = tempfile::TempDir::new().expect("temp dir");
7557 let export_path = export_dir.path().join("backup.db");
7558
7559 let manifest = service
7560 .safe_export(
7561 &export_path,
7562 SafeExportOptions {
7563 force_checkpoint: false,
7564 },
7565 )
7566 .expect("export");
7567
7568 assert!(export_path.exists(), "exported db should exist");
7569 let manifest_path = export_dir.path().join("backup.db.export-manifest.json");
7570 assert!(
7571 manifest_path.exists(),
7572 "manifest file should exist at {}",
7573 manifest_path.display()
7574 );
7575 assert_eq!(manifest.sha256.len(), 64, "sha256 should be 64 hex chars");
7576 assert!(
7577 manifest.exported_at > 0,
7578 "exported_at should be a unix timestamp"
7579 );
7580 assert_eq!(
7581 manifest.schema_version,
7582 SchemaManager::new().current_version().0,
7583 "schema_version should match the live schema version"
7584 );
7585 assert_eq!(manifest.protocol_version, 1, "protocol_version should be 1");
7586 assert!(manifest.page_count > 0, "page_count should be positive");
7587 }
7588
7589 #[test]
7590 fn safe_export_preserves_operational_validation_contracts() {
7591 let (_db, service) = setup();
7592 let validation_json = r#"{"format_version":1,"mode":"enforce","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#;
7593 service
7594 .register_operational_collection(&OperationalRegisterRequest {
7595 name: "connector_health".to_owned(),
7596 kind: OperationalCollectionKind::LatestState,
7597 schema_json: "{}".to_owned(),
7598 retention_json: "{}".to_owned(),
7599 filter_fields_json: "[]".to_owned(),
7600 validation_json: validation_json.to_owned(),
7601 secondary_indexes_json: "[]".to_owned(),
7602 format_version: 1,
7603 })
7604 .expect("register collection");
7605
7606 let export_dir = tempfile::TempDir::new().expect("temp dir");
7607 let export_path = export_dir.path().join("backup.db");
7608 service
7609 .safe_export(
7610 &export_path,
7611 SafeExportOptions {
7612 force_checkpoint: false,
7613 },
7614 )
7615 .expect("export");
7616
7617 let exported = sqlite::open_connection(&export_path).expect("exported conn");
7618 let exported_validation_json: String = exported
7619 .query_row(
7620 "SELECT validation_json FROM operational_collections WHERE name = 'connector_health'",
7621 [],
7622 |row| row.get(0),
7623 )
7624 .expect("validation_json");
7625 assert_eq!(exported_validation_json, validation_json);
7626 }
7627
7628 #[test]
7629 fn safe_export_force_checkpoint_false_skips_wal_pragma() {
7630 let (_db, service) = setup();
7631 let export_dir = tempfile::TempDir::new().expect("temp dir");
7632 let export_path = export_dir.path().join("no-wal.db");
7633
7634 let manifest = service
7636 .safe_export(
7637 &export_path,
7638 SafeExportOptions {
7639 force_checkpoint: false,
7640 },
7641 )
7642 .expect("export with no checkpoint");
7643
7644 assert!(
7645 manifest.page_count > 0,
7646 "page_count must be populated regardless of checkpoint mode"
7647 );
7648 assert_eq!(
7649 manifest.schema_version,
7650 SchemaManager::new().current_version().0
7651 );
7652 assert_eq!(manifest.protocol_version, 1);
7653 }
7654
7655 #[test]
7656 fn safe_export_force_checkpoint_false_still_captures_wal_backed_changes() {
7657 let (db, service) = setup();
7658 let conn = sqlite::open_connection(db.path()).expect("conn");
7659 let journal_mode: String = conn
7660 .query_row("PRAGMA journal_mode=WAL", [], |row| row.get(0))
7661 .expect("enable wal");
7662 assert_eq!(journal_mode.to_lowercase(), "wal");
7663 let auto_checkpoint_pages: i64 = conn
7664 .query_row("PRAGMA wal_autocheckpoint=0", [], |row| row.get(0))
7665 .expect("disable auto checkpoint");
7666 assert_eq!(auto_checkpoint_pages, 0);
7667 conn.execute(
7668 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7669 VALUES ('r-wal', 'lg-wal', 'Meeting', '{}', 100, 'src-wal')",
7670 [],
7671 )
7672 .expect("insert wal-backed node");
7673
7674 let export_dir = tempfile::TempDir::new().expect("temp dir");
7675 let export_path = export_dir.path().join("wal-backed.db");
7676 service
7677 .safe_export(
7678 &export_path,
7679 SafeExportOptions {
7680 force_checkpoint: false,
7681 },
7682 )
7683 .expect("export wal-backed db");
7684
7685 let exported = sqlite::open_connection(&export_path).expect("open exported db");
7686 let exported_count: i64 = exported
7687 .query_row(
7688 "SELECT count(*) FROM nodes WHERE logical_id = 'lg-wal'",
7689 [],
7690 |row| row.get(0),
7691 )
7692 .expect("count exported nodes");
7693 assert_eq!(
7694 exported_count, 1,
7695 "safe_export must include committed rows that are still resident in the WAL"
7696 );
7697 }
7698
7699 #[test]
7700 fn excise_source_removes_searchable_content_after_excision() {
7701 let (db, service) = setup();
7702 {
7703 let conn = sqlite::open_connection(db.path()).expect("conn");
7704 conn.execute(
7705 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
7706 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
7707 [],
7708 )
7709 .expect("insert v1");
7710 conn.execute(
7711 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7712 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
7713 [],
7714 )
7715 .expect("insert v2");
7716 conn.execute(
7717 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7718 VALUES ('ck1', 'lg1', 'hello world', 100)",
7719 [],
7720 )
7721 .expect("insert chunk");
7722 }
7723 service.excise_source("source-2").expect("excise");
7724 {
7725 let conn = sqlite::open_connection(db.path()).expect("conn");
7726 let fts_count: i64 = conn
7727 .query_row(
7728 "SELECT count(*) FROM fts_nodes WHERE chunk_id = 'ck1'",
7729 [],
7730 |row| row.get(0),
7731 )
7732 .expect("fts count");
7733 assert_eq!(
7734 fts_count, 0,
7735 "excised content should not remain searchable after excise"
7736 );
7737 }
7738 }
7739
7740 #[cfg(feature = "sqlite-vec")]
7741 #[test]
7742 fn excise_source_cleans_chunks_and_vec_rows_for_excised_version() {
7743 let (db, service) = setup();
7744 {
7745 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7746 service
7747 .schema_manager
7748 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
7749 .expect("ensure vec profile");
7750 conn.execute(
7751 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
7752 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
7753 [],
7754 )
7755 .expect("insert v1");
7756 conn.execute(
7757 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7758 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
7759 [],
7760 )
7761 .expect("insert v2");
7762 conn.execute(
7763 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7764 VALUES ('ck1', 'lg1', 'new content', 200)",
7765 [],
7766 )
7767 .expect("insert chunk");
7768 conn.execute(
7769 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('ck1', zeroblob(16))",
7770 [],
7771 )
7772 .expect("insert vec row");
7773 }
7774
7775 service.excise_source("source-2").expect("excise");
7776
7777 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7778 let active_row: String = conn
7779 .query_row(
7780 "SELECT row_id FROM nodes WHERE logical_id = 'lg1' AND superseded_at IS NULL",
7781 [],
7782 |row| row.get(0),
7783 )
7784 .expect("restored active row");
7785 assert_eq!(active_row, "r1");
7786 let chunk_count: i64 = conn
7787 .query_row(
7788 "SELECT count(*) FROM chunks WHERE node_logical_id = 'lg1'",
7789 [],
7790 |row| row.get(0),
7791 )
7792 .expect("chunk count");
7793 assert_eq!(
7794 chunk_count, 0,
7795 "excised source content must not survive as chunks"
7796 );
7797 let vec_count: i64 = conn
7798 .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
7799 row.get(0)
7800 })
7801 .expect("vec count");
7802 assert_eq!(vec_count, 0, "excised source vec rows must be removed");
7803 let fts_count: i64 = conn
7804 .query_row(
7805 "SELECT count(*) FROM fts_nodes WHERE node_logical_id = 'lg1'",
7806 [],
7807 |row| row.get(0),
7808 )
7809 .expect("fts count");
7810 assert_eq!(
7811 fts_count, 0,
7812 "excised source content must not remain searchable"
7813 );
7814 }
7815
7816 #[test]
7817 fn export_page_count_matches_exported_file() {
7818 let (_db, service) = setup();
7819 let export_dir = tempfile::TempDir::new().expect("temp dir");
7820 let export_path = export_dir.path().join("page-count.db");
7821
7822 let manifest = service
7823 .safe_export(
7824 &export_path,
7825 SafeExportOptions {
7826 force_checkpoint: false,
7827 },
7828 )
7829 .expect("export");
7830
7831 let exported = sqlite::open_connection(&export_path).expect("open exported db");
7832 let actual_page_count: u64 = exported
7833 .query_row("PRAGMA page_count", [], |row| row.get(0))
7834 .expect("page_count from exported file");
7835
7836 assert_eq!(
7837 manifest.page_count, actual_page_count,
7838 "manifest page_count must match the exported file's PRAGMA page_count"
7839 );
7840 }
7841
7842 #[test]
7843 fn no_temp_file_after_successful_export() {
7844 let (_db, service) = setup();
7845 let export_dir = tempfile::TempDir::new().expect("temp dir");
7846 let export_path = export_dir.path().join("no-tmp.db");
7847
7848 service
7849 .safe_export(
7850 &export_path,
7851 SafeExportOptions {
7852 force_checkpoint: false,
7853 },
7854 )
7855 .expect("export");
7856
7857 let tmp_files: Vec<_> = fs::read_dir(export_dir.path())
7858 .expect("read export dir")
7859 .filter_map(Result::ok)
7860 .filter(|e| e.path().extension().is_some_and(|ext| ext == "tmp"))
7861 .collect();
7862
7863 assert!(
7864 tmp_files.is_empty(),
7865 "no .tmp files should remain after a successful export, found: {tmp_files:?}"
7866 );
7867 }
7868
7869 #[test]
7870 fn export_manifest_is_valid_json() {
7871 let (_db, service) = setup();
7872 let export_dir = tempfile::TempDir::new().expect("temp dir");
7873 let export_path = export_dir.path().join("valid-json.db");
7874
7875 service
7876 .safe_export(
7877 &export_path,
7878 SafeExportOptions {
7879 force_checkpoint: false,
7880 },
7881 )
7882 .expect("export");
7883
7884 let manifest_path = export_dir.path().join("valid-json.db.export-manifest.json");
7885 let manifest_contents = fs::read_to_string(&manifest_path).expect("read manifest");
7886 let parsed: serde_json::Value =
7887 serde_json::from_str(&manifest_contents).expect("manifest must be valid JSON");
7888
7889 assert!(
7890 parsed.get("exported_at").is_some(),
7891 "manifest must contain exported_at"
7892 );
7893 assert!(
7894 parsed.get("sha256").is_some(),
7895 "manifest must contain sha256"
7896 );
7897 assert!(
7898 parsed.get("schema_version").is_some(),
7899 "manifest must contain schema_version"
7900 );
7901 assert!(
7902 parsed.get("protocol_version").is_some(),
7903 "manifest must contain protocol_version"
7904 );
7905 assert!(
7906 parsed.get("page_count").is_some(),
7907 "manifest must contain page_count"
7908 );
7909 }
7910
7911 #[test]
7912 fn provenance_purge_dry_run_reports_counts() {
7913 let (db, service) = setup();
7914 {
7915 let conn = sqlite::open_connection(db.path()).expect("conn");
7916 conn.execute(
7917 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
7918 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
7919 [],
7920 )
7921 .expect("insert p1");
7922 conn.execute(
7923 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
7924 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 200)",
7925 [],
7926 )
7927 .expect("insert p2");
7928 conn.execute(
7929 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
7930 VALUES ('p3', 'excise', 'lg3', 'src-1', 300)",
7931 [],
7932 )
7933 .expect("insert p3");
7934 }
7935
7936 let options = super::ProvenancePurgeOptions {
7937 dry_run: true,
7938 preserve_event_types: Vec::new(),
7939 };
7940 let report = service
7941 .purge_provenance_events(250, &options)
7942 .expect("dry run purge");
7943
7944 assert_eq!(report.events_deleted, 2);
7945 assert_eq!(report.events_preserved, 1);
7946 assert!(report.oldest_remaining.is_some());
7947
7948 let conn = sqlite::open_connection(db.path()).expect("conn");
7949 let total: i64 = conn
7950 .query_row("SELECT count(*) FROM provenance_events", [], |row| {
7951 row.get(0)
7952 })
7953 .expect("count");
7954 assert_eq!(total, 3, "dry_run must not delete any events");
7955 }
7956
7957 #[test]
7958 fn provenance_purge_deletes_old_events() {
7959 let (db, service) = setup();
7960 {
7961 let conn = sqlite::open_connection(db.path()).expect("conn");
7962 conn.execute(
7963 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
7964 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
7965 [],
7966 )
7967 .expect("insert p1");
7968 conn.execute(
7969 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
7970 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 200)",
7971 [],
7972 )
7973 .expect("insert p2");
7974 }
7975
7976 let options = super::ProvenancePurgeOptions {
7977 dry_run: false,
7978 preserve_event_types: Vec::new(),
7979 };
7980 let report = service
7981 .purge_provenance_events(150, &options)
7982 .expect("purge");
7983
7984 assert_eq!(report.events_deleted, 1);
7985 assert_eq!(report.events_preserved, 1);
7986 assert_eq!(report.oldest_remaining, Some(200));
7987
7988 let conn = sqlite::open_connection(db.path()).expect("conn");
7989 let remaining: i64 = conn
7990 .query_row("SELECT count(*) FROM provenance_events", [], |row| {
7991 row.get(0)
7992 })
7993 .expect("count");
7994 assert_eq!(remaining, 1);
7995 }
7996
7997 #[test]
7998 fn provenance_purge_preserves_specified_types() {
7999 let (db, service) = setup();
8000 {
8001 let conn = sqlite::open_connection(db.path()).expect("conn");
8002 conn.execute(
8003 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8004 VALUES ('p1', 'excise', 'lg1', 'src-1', 100)",
8005 [],
8006 )
8007 .expect("insert p1");
8008 conn.execute(
8009 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8010 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 100)",
8011 [],
8012 )
8013 .expect("insert p2");
8014 conn.execute(
8015 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8016 VALUES ('p3', 'node_insert', 'lg3', 'src-1', 100)",
8017 [],
8018 )
8019 .expect("insert p3");
8020 }
8021
8022 let options = super::ProvenancePurgeOptions {
8023 dry_run: false,
8024 preserve_event_types: Vec::new(),
8025 };
8026 let report = service
8027 .purge_provenance_events(500, &options)
8028 .expect("purge");
8029
8030 assert_eq!(report.events_deleted, 2);
8031 assert_eq!(report.events_preserved, 1);
8032
8033 let conn = sqlite::open_connection(db.path()).expect("conn");
8034 let remaining_type: String = conn
8035 .query_row("SELECT event_type FROM provenance_events", [], |row| {
8036 row.get(0)
8037 })
8038 .expect("remaining event type");
8039 assert_eq!(remaining_type, "excise");
8040 }
8041
8042 #[test]
8043 fn provenance_purge_noop_with_zero_timestamp() {
8044 let (db, service) = setup();
8045 {
8046 let conn = sqlite::open_connection(db.path()).expect("conn");
8047 conn.execute(
8048 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8049 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
8050 [],
8051 )
8052 .expect("insert p1");
8053 }
8054
8055 let options = super::ProvenancePurgeOptions {
8056 dry_run: false,
8057 preserve_event_types: Vec::new(),
8058 };
8059 let report = service.purge_provenance_events(0, &options).expect("purge");
8060
8061 assert_eq!(report.events_deleted, 0);
8062 assert_eq!(report.events_preserved, 1);
8063 assert_eq!(report.oldest_remaining, Some(100));
8064 }
8065
8066 #[test]
8067 fn restore_skips_edge_when_counterpart_purged() {
8068 let (db, service) = setup();
8069 {
8070 let conn = sqlite::open_connection(db.path()).expect("conn");
8071 conn.execute(
8073 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8074 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
8075 [],
8076 )
8077 .expect("insert node A");
8078 conn.execute(
8079 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8080 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
8081 [],
8082 )
8083 .expect("insert node B");
8084 conn.execute(
8086 "INSERT INTO edges \
8087 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
8088 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
8089 [],
8090 )
8091 .expect("insert edge");
8092 conn.execute(
8094 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8095 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
8096 [],
8097 )
8098 .expect("insert retire event A");
8099 conn.execute(
8100 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8101 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
8102 [],
8103 )
8104 .expect("insert edge retire event");
8105 conn.execute(
8106 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
8107 [],
8108 )
8109 .expect("retire node A");
8110 conn.execute(
8111 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-2'",
8112 [],
8113 )
8114 .expect("retire node B");
8115 conn.execute(
8116 "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
8117 [],
8118 )
8119 .expect("retire edge");
8120 conn.execute("DELETE FROM nodes WHERE logical_id = 'doc-2'", [])
8123 .expect("purge node B rows");
8124 }
8125
8126 let report = service.restore_logical_id("doc-1").expect("restore A");
8128 assert!(!report.was_noop);
8129 assert_eq!(report.restored_node_rows, 1);
8130 assert_eq!(report.restored_edge_rows, 0, "edge should not be restored");
8131 assert_eq!(report.skipped_edges.len(), 1);
8132 assert_eq!(report.skipped_edges[0].edge_logical_id, "edge-1");
8133 assert_eq!(report.skipped_edges[0].missing_endpoint, "doc-2");
8134
8135 let conn = sqlite::open_connection(db.path()).expect("conn");
8137 let active_edge_count: i64 = conn
8138 .query_row(
8139 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
8140 [],
8141 |row| row.get(0),
8142 )
8143 .expect("active edge count");
8144 assert_eq!(active_edge_count, 0, "edge must remain retired");
8145 }
8146
8147 #[test]
8148 fn restore_restores_edges_to_active_nodes() {
8149 let (db, service) = setup();
8150 {
8151 let conn = sqlite::open_connection(db.path()).expect("conn");
8152 conn.execute(
8154 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8155 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
8156 [],
8157 )
8158 .expect("insert node A");
8159 conn.execute(
8160 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8161 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
8162 [],
8163 )
8164 .expect("insert node B");
8165 conn.execute(
8167 "INSERT INTO edges \
8168 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
8169 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
8170 [],
8171 )
8172 .expect("insert edge");
8173 conn.execute(
8175 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8176 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
8177 [],
8178 )
8179 .expect("insert retire event A");
8180 conn.execute(
8181 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8182 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
8183 [],
8184 )
8185 .expect("insert edge retire event");
8186 conn.execute(
8187 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
8188 [],
8189 )
8190 .expect("retire node A");
8191 conn.execute(
8192 "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
8193 [],
8194 )
8195 .expect("retire edge");
8196 }
8197
8198 let report = service.restore_logical_id("doc-1").expect("restore A");
8200 assert!(!report.was_noop);
8201 assert_eq!(report.restored_node_rows, 1);
8202 assert!(report.restored_edge_rows > 0, "edge should be restored");
8203 assert!(
8204 report.skipped_edges.is_empty(),
8205 "no edges should be skipped"
8206 );
8207
8208 let conn = sqlite::open_connection(db.path()).expect("conn");
8209 let active_edge_count: i64 = conn
8210 .query_row(
8211 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
8212 [],
8213 |row| row.get(0),
8214 )
8215 .expect("active edge count");
8216 assert_eq!(active_edge_count, 1, "edge must be active");
8217 }
8218
8219 #[test]
8220 fn restore_restores_edges_when_both_restored() {
8221 let (db, service) = setup();
8222 {
8223 let conn = sqlite::open_connection(db.path()).expect("conn");
8224 conn.execute(
8226 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8227 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
8228 [],
8229 )
8230 .expect("insert node A");
8231 conn.execute(
8232 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8233 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
8234 [],
8235 )
8236 .expect("insert node B");
8237 conn.execute(
8239 "INSERT INTO edges \
8240 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
8241 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
8242 [],
8243 )
8244 .expect("insert edge");
8245 conn.execute(
8247 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8248 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
8249 [],
8250 )
8251 .expect("insert retire event A");
8252 conn.execute(
8253 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8254 VALUES ('evt-retire-b', 'node_retire', 'doc-2', 'forget-1', 200, '')",
8255 [],
8256 )
8257 .expect("insert retire event B");
8258 conn.execute(
8259 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8260 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
8261 [],
8262 )
8263 .expect("insert edge retire event");
8264 conn.execute(
8265 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
8266 [],
8267 )
8268 .expect("retire node A");
8269 conn.execute(
8270 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-2'",
8271 [],
8272 )
8273 .expect("retire node B");
8274 conn.execute(
8275 "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
8276 [],
8277 )
8278 .expect("retire edge");
8279 }
8280
8281 let report_b = service.restore_logical_id("doc-2").expect("restore B");
8283 assert!(!report_b.was_noop);
8284
8285 let report_a = service.restore_logical_id("doc-1").expect("restore A");
8287 assert!(!report_a.was_noop);
8288 assert_eq!(report_a.restored_node_rows, 1);
8289 assert!(
8290 report_a.restored_edge_rows > 0,
8291 "edge should be restored when both endpoints active"
8292 );
8293 assert!(
8294 report_a.skipped_edges.is_empty(),
8295 "no edges should be skipped"
8296 );
8297
8298 let conn = sqlite::open_connection(db.path()).expect("conn");
8299 let active_edge_count: i64 = conn
8300 .query_row(
8301 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
8302 [],
8303 |row| row.get(0),
8304 )
8305 .expect("active edge count");
8306 assert_eq!(
8307 active_edge_count, 1,
8308 "edge must be active after both endpoints restored"
8309 );
8310 }
8311
8312 #[test]
8315 fn fts_property_schema_crud_round_trip() {
8316 let (_db, service) = setup();
8317
8318 let record = service
8320 .register_fts_property_schema(
8321 "Meeting",
8322 &["$.title".to_owned(), "$.summary".to_owned()],
8323 None,
8324 )
8325 .expect("register");
8326 assert_eq!(record.kind, "Meeting");
8327 assert_eq!(record.property_paths, vec!["$.title", "$.summary"]);
8328 assert_eq!(record.separator, " ");
8329 assert_eq!(record.format_version, 1);
8330
8331 let described = service
8333 .describe_fts_property_schema("Meeting")
8334 .expect("describe")
8335 .expect("should exist");
8336 assert_eq!(described, record);
8337
8338 let missing = service
8340 .describe_fts_property_schema("NoSuchKind")
8341 .expect("describe missing");
8342 assert!(missing.is_none());
8343
8344 let list = service.list_fts_property_schemas().expect("list");
8346 assert_eq!(list.len(), 1);
8347 assert_eq!(list[0].kind, "Meeting");
8348
8349 let updated = service
8351 .register_fts_property_schema(
8352 "Meeting",
8353 &["$.title".to_owned(), "$.notes".to_owned()],
8354 Some("\n"),
8355 )
8356 .expect("update");
8357 assert_eq!(updated.property_paths, vec!["$.title", "$.notes"]);
8358 assert_eq!(updated.separator, "\n");
8359
8360 service
8362 .remove_fts_property_schema("Meeting")
8363 .expect("remove");
8364 let after_remove = service
8365 .describe_fts_property_schema("Meeting")
8366 .expect("describe after remove");
8367 assert!(after_remove.is_none());
8368
8369 let err = service.remove_fts_property_schema("Meeting");
8371 assert!(err.is_err());
8372 }
8373
8374 #[test]
8375 fn describe_fts_property_schema_round_trips_recursive_entries() {
8376 let (_db, service) = setup();
8377
8378 let entries = vec![
8379 FtsPropertyPathSpec::scalar("$.title"),
8380 FtsPropertyPathSpec::recursive("$.payload"),
8381 ];
8382 let exclude = vec!["$.payload.private".to_owned()];
8383 let registered = service
8384 .register_fts_property_schema_with_entries(
8385 "KnowledgeItem",
8386 &entries,
8387 Some(" "),
8388 &exclude,
8389 )
8390 .expect("register recursive");
8391
8392 assert_eq!(registered.entries, entries);
8395 assert_eq!(registered.exclude_paths, exclude);
8396 assert_eq!(registered.property_paths, vec!["$.title", "$.payload"]);
8397
8398 let described = service
8399 .describe_fts_property_schema("KnowledgeItem")
8400 .expect("describe")
8401 .expect("should exist");
8402 assert_eq!(described.kind, "KnowledgeItem");
8403 assert_eq!(described.entries, entries);
8404 assert_eq!(described.exclude_paths, exclude);
8405 assert_eq!(described.property_paths, vec!["$.title", "$.payload"]);
8406 assert_eq!(described.separator, " ");
8407 assert_eq!(described.format_version, 1);
8408 }
8409
8410 #[test]
8411 fn list_fts_property_schemas_round_trips_recursive_entries() {
8412 let (_db, service) = setup();
8413
8414 let entries = vec![
8415 FtsPropertyPathSpec::scalar("$.title"),
8416 FtsPropertyPathSpec::recursive("$.payload"),
8417 ];
8418 let exclude = vec!["$.payload.secret".to_owned()];
8419 service
8420 .register_fts_property_schema_with_entries(
8421 "KnowledgeItem",
8422 &entries,
8423 Some(" "),
8424 &exclude,
8425 )
8426 .expect("register recursive");
8427
8428 let listed = service.list_fts_property_schemas().expect("list");
8429 assert_eq!(listed.len(), 1);
8430 let record = &listed[0];
8431 assert_eq!(record.kind, "KnowledgeItem");
8432 assert_eq!(record.entries, entries);
8433 assert_eq!(record.exclude_paths, exclude);
8434 assert_eq!(record.property_paths, vec!["$.title", "$.payload"]);
8435 }
8436
8437 #[test]
8438 fn describe_fts_property_schema_round_trips_scalar_only_entries() {
8439 let (_db, service) = setup();
8440
8441 service
8442 .register_fts_property_schema(
8443 "Meeting",
8444 &["$.title".to_owned(), "$.summary".to_owned()],
8445 None,
8446 )
8447 .expect("register scalar");
8448
8449 let described = service
8450 .describe_fts_property_schema("Meeting")
8451 .expect("describe")
8452 .expect("should exist");
8453 assert_eq!(described.property_paths, vec!["$.title", "$.summary"]);
8454 assert_eq!(described.entries.len(), 2);
8455 for entry in &described.entries {
8456 assert_eq!(
8457 entry.mode,
8458 FtsPropertyPathMode::Scalar,
8459 "scalar-only schema should deserialize every entry as Scalar"
8460 );
8461 }
8462 assert!(described.exclude_paths.is_empty());
8463 }
8464
8465 #[test]
8466 fn restore_reestablishes_property_fts_visibility() {
8467 let (db, service) = setup();
8468 {
8469 let conn = sqlite::open_connection(db.path()).expect("conn");
8470 conn.execute(
8472 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8473 VALUES ('Document', '[\"$.title\", \"$.body\"]', ' ')",
8474 [],
8475 )
8476 .expect("register schema");
8477 conn.execute(
8479 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8480 VALUES ('row-1', 'doc-1', 'Document', '{\"title\":\"Budget\",\"body\":\"Q3 forecast\"}', 100, 'seed')",
8481 [],
8482 )
8483 .expect("insert node");
8484 conn.execute(
8486 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8487 VALUES ('chunk-1', 'doc-1', 'budget text', 100)",
8488 [],
8489 )
8490 .expect("insert chunk");
8491 conn.execute(
8493 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8494 VALUES ('doc-1', 'Document', 'Budget Q3 forecast')",
8495 [],
8496 )
8497 .expect("insert property fts");
8498 conn.execute(
8500 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8501 VALUES ('evt-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
8502 [],
8503 )
8504 .expect("retire event");
8505 conn.execute(
8506 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
8507 [],
8508 )
8509 .expect("supersede");
8510 conn.execute("DELETE FROM fts_nodes", [])
8511 .expect("clear chunk fts");
8512 conn.execute("DELETE FROM fts_node_properties", [])
8513 .expect("clear property fts");
8514 }
8515
8516 let report = service.restore_logical_id("doc-1").expect("restore");
8517 assert_eq!(report.restored_property_fts_rows, 1);
8518
8519 let conn = sqlite::open_connection(db.path()).expect("conn");
8521 let prop_fts_count: i64 = conn
8522 .query_row(
8523 "SELECT count(*) FROM fts_node_properties WHERE node_logical_id = 'doc-1'",
8524 [],
8525 |row| row.get(0),
8526 )
8527 .expect("prop fts count");
8528 assert_eq!(prop_fts_count, 1, "property FTS must be restored");
8529
8530 let text: String = conn
8531 .query_row(
8532 "SELECT text_content FROM fts_node_properties WHERE node_logical_id = 'doc-1'",
8533 [],
8534 |row| row.get(0),
8535 )
8536 .expect("prop fts text");
8537 assert_eq!(text, "Budget Q3 forecast");
8538 }
8539
8540 #[test]
8541 fn safe_export_preserves_fts_property_schemas() {
8542 let (_db, service) = setup();
8543 service
8544 .register_fts_property_schema(
8545 "Goal",
8546 &["$.name".to_owned(), "$.rationale".to_owned()],
8547 None,
8548 )
8549 .expect("register schema");
8550
8551 let export_dir = tempfile::TempDir::new().expect("temp dir");
8552 let export_path = export_dir.path().join("backup.db");
8553 service
8554 .safe_export(
8555 &export_path,
8556 SafeExportOptions {
8557 force_checkpoint: false,
8558 },
8559 )
8560 .expect("export");
8561
8562 let exported_conn = rusqlite::Connection::open(&export_path).expect("open exported db");
8564 let kind: String = exported_conn
8565 .query_row(
8566 "SELECT kind FROM fts_property_schemas WHERE kind = 'Goal'",
8567 [],
8568 |row| row.get(0),
8569 )
8570 .expect("schema must exist in export");
8571 assert_eq!(kind, "Goal");
8572 let paths_json: String = exported_conn
8573 .query_row(
8574 "SELECT property_paths_json FROM fts_property_schemas WHERE kind = 'Goal'",
8575 [],
8576 |row| row.get(0),
8577 )
8578 .expect("paths must exist");
8579 let paths: Vec<String> = serde_json::from_str(&paths_json).expect("valid json");
8580 assert_eq!(paths, vec!["$.name", "$.rationale"]);
8581 }
8582
8583 #[test]
8584 #[allow(clippy::too_many_lines)]
8585 fn export_recovery_rebuilds_property_fts_from_canonical_state() {
8586 let (db, service) = setup();
8587 service
8589 .register_fts_property_schema("Goal", &["$.name".to_owned()], None)
8590 .expect("register");
8591 {
8592 let conn = sqlite::open_connection(db.path()).expect("conn");
8593 conn.execute(
8594 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8595 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
8596 [],
8597 )
8598 .expect("insert node 1");
8599 conn.execute(
8600 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8601 VALUES ('goal-1', 'Goal', 'Ship v2')",
8602 [],
8603 )
8604 .expect("insert property FTS row 1");
8605 conn.execute(
8606 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8607 VALUES ('row-2', 'goal-2', 'Goal', '{\"name\":\"Launch redesign\"}', 100, 'seed')",
8608 [],
8609 )
8610 .expect("insert node 2");
8611 conn.execute(
8612 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8613 VALUES ('goal-2', 'Goal', 'Launch redesign')",
8614 [],
8615 )
8616 .expect("insert property FTS row 2");
8617 }
8618
8619 let export_dir = tempfile::TempDir::new().expect("temp dir");
8621 let export_path = export_dir.path().join("backup.db");
8622 service
8623 .safe_export(
8624 &export_path,
8625 SafeExportOptions {
8626 force_checkpoint: false,
8627 },
8628 )
8629 .expect("export");
8630
8631 {
8635 let conn = rusqlite::Connection::open(&export_path).expect("open export");
8636 conn.execute(
8637 "DELETE FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
8638 [],
8639 )
8640 .expect("delete old row");
8641 conn.execute(
8642 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8643 VALUES ('goal-1', 'Goal', 'completely wrong stale text')",
8644 [],
8645 )
8646 .expect("insert corrupted row");
8647 conn.execute(
8648 "DELETE FROM fts_node_properties WHERE node_logical_id = 'goal-2'",
8649 [],
8650 )
8651 .expect("delete goal-2 row");
8652 }
8653
8654 let schema = Arc::new(SchemaManager::new());
8656 let exported_service = AdminService::new(&export_path, Arc::clone(&schema));
8657 exported_service
8658 .rebuild_projections(ProjectionTarget::Fts)
8659 .expect("rebuild");
8660
8661 let coordinator = ExecutionCoordinator::open(
8664 &export_path,
8665 Arc::clone(&schema),
8666 None,
8667 1,
8668 Arc::new(TelemetryCounters::default()),
8669 None,
8670 )
8671 .expect("coordinator");
8672
8673 let compiled = QueryBuilder::nodes("Goal")
8674 .text_search("Ship", 10)
8675 .limit(10)
8676 .compile()
8677 .expect("compile");
8678 let rows = coordinator
8679 .execute_compiled_read(&compiled)
8680 .expect("execute read");
8681 assert_eq!(rows.nodes.len(), 1);
8682 assert_eq!(rows.nodes[0].logical_id, "goal-1");
8683
8684 let compiled2 = QueryBuilder::nodes("Goal")
8686 .text_search("redesign", 10)
8687 .limit(10)
8688 .compile()
8689 .expect("compile");
8690 let rows2 = coordinator
8691 .execute_compiled_read(&compiled2)
8692 .expect("execute read");
8693 assert_eq!(rows2.nodes.len(), 1);
8694 assert_eq!(rows2.nodes[0].logical_id, "goal-2");
8695
8696 let compiled3 = QueryBuilder::nodes("Goal")
8698 .text_search("stale", 10)
8699 .limit(10)
8700 .compile()
8701 .expect("compile");
8702 let rows3 = coordinator
8703 .execute_compiled_read(&compiled3)
8704 .expect("execute read");
8705 assert_eq!(
8706 rows3.nodes.len(),
8707 0,
8708 "corrupted text must not appear in search after rebuild"
8709 );
8710
8711 let integrity = exported_service.check_integrity().expect("integrity");
8713 assert_eq!(integrity.missing_property_fts_rows, 0);
8714 let semantics = exported_service.check_semantics().expect("semantics");
8715 assert_eq!(semantics.drifted_property_fts_rows, 0);
8716 assert_eq!(semantics.orphaned_property_fts_rows, 0);
8717 assert_eq!(semantics.duplicate_property_fts_rows, 0);
8718 }
8719
8720 #[test]
8721 fn check_integrity_no_false_positives_for_empty_extraction() {
8722 let (db, service) = setup();
8723 {
8724 let conn = sqlite::open_connection(db.path()).expect("conn");
8725 conn.execute(
8727 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8728 VALUES ('Ticket', '[\"$.searchable\"]', ' ')",
8729 [],
8730 )
8731 .expect("register schema");
8732 conn.execute(
8735 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8736 VALUES ('row-1', 'ticket-1', 'Ticket', '{\"status\":\"open\"}', 100, 'seed')",
8737 [],
8738 )
8739 .expect("insert node");
8740 }
8741
8742 let report = service.check_integrity().expect("integrity");
8743 assert_eq!(
8744 report.missing_property_fts_rows, 0,
8745 "node with no extractable values must not be counted as missing"
8746 );
8747 }
8748
8749 #[test]
8750 fn check_integrity_detects_genuinely_missing_property_fts_rows() {
8751 let (db, service) = setup();
8752 {
8753 let conn = sqlite::open_connection(db.path()).expect("conn");
8754 conn.execute(
8755 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8756 VALUES ('Ticket', '[\"$.title\"]', ' ')",
8757 [],
8758 )
8759 .expect("register schema");
8760 conn.execute(
8762 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8763 VALUES ('row-1', 'ticket-1', 'Ticket', '{\"title\":\"fix login bug\"}', 100, 'seed')",
8764 [],
8765 )
8766 .expect("insert node");
8767 }
8768
8769 let report = service.check_integrity().expect("integrity");
8770 assert_eq!(
8771 report.missing_property_fts_rows, 1,
8772 "node with extractable values but no property FTS row must be detected"
8773 );
8774 }
8775
8776 #[test]
8777 fn rebuild_projections_fts_restores_missing_property_fts_rows() {
8778 let (db, service) = setup();
8779 {
8780 let conn = sqlite::open_connection(db.path()).expect("conn");
8781 conn.execute(
8782 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8783 VALUES ('Goal', '[\"$.name\"]', ' ')",
8784 [],
8785 )
8786 .expect("register schema");
8787 conn.execute(
8788 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8789 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
8790 [],
8791 )
8792 .expect("insert node");
8793 }
8795
8796 let report = service
8797 .rebuild_projections(ProjectionTarget::Fts)
8798 .expect("rebuild");
8799 assert!(
8800 report.rebuilt_rows >= 1,
8801 "rebuild must insert at least one property FTS row"
8802 );
8803
8804 let conn = sqlite::open_connection(db.path()).expect("conn");
8805 let text: String = conn
8806 .query_row(
8807 "SELECT text_content FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
8808 [],
8809 |row| row.get(0),
8810 )
8811 .expect("property FTS row must exist after rebuild");
8812 assert_eq!(text, "Ship v2");
8813 }
8814
8815 #[test]
8816 fn rebuild_missing_projections_fills_gap_for_deleted_property_fts_row() {
8817 let (db, service) = setup();
8818 {
8819 let conn = sqlite::open_connection(db.path()).expect("conn");
8820 conn.execute(
8821 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8822 VALUES ('Goal', '[\"$.name\"]', ' ')",
8823 [],
8824 )
8825 .expect("register schema");
8826 conn.execute(
8827 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8828 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
8829 [],
8830 )
8831 .expect("insert node");
8832 conn.execute(
8834 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8835 VALUES ('goal-1', 'Goal', 'Ship v2')",
8836 [],
8837 )
8838 .expect("insert property fts");
8839 conn.execute(
8840 "DELETE FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
8841 [],
8842 )
8843 .expect("delete property fts");
8844 }
8845
8846 let report = service
8847 .rebuild_missing_projections()
8848 .expect("rebuild missing");
8849 assert!(
8850 report.rebuilt_rows >= 1,
8851 "missing rebuild must insert the gap-fill row"
8852 );
8853
8854 let conn = sqlite::open_connection(db.path()).expect("conn");
8855 let count: i64 = conn
8856 .query_row(
8857 "SELECT count(*) FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
8858 [],
8859 |row| row.get(0),
8860 )
8861 .expect("count");
8862 assert_eq!(
8863 count, 1,
8864 "gap-fill must restore exactly one property FTS row"
8865 );
8866 }
8867
8868 #[test]
8869 fn remove_schema_then_rebuild_cleans_stale_property_fts_rows() {
8870 let (db, service) = setup();
8871 service
8872 .register_fts_property_schema("Goal", &["$.name".to_owned()], None)
8873 .expect("register");
8874 {
8875 let conn = sqlite::open_connection(db.path()).expect("conn");
8876 conn.execute(
8877 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8878 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
8879 [],
8880 )
8881 .expect("insert node");
8882 conn.execute(
8884 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8885 VALUES ('goal-1', 'Goal', 'Ship v2')",
8886 [],
8887 )
8888 .expect("insert property fts");
8889 }
8890
8891 service.remove_fts_property_schema("Goal").expect("remove");
8893
8894 let semantics = service.check_semantics().expect("semantics");
8896 assert_eq!(
8897 semantics.orphaned_property_fts_rows, 1,
8898 "stale property FTS rows must be detected after schema removal"
8899 );
8900
8901 service
8903 .rebuild_projections(ProjectionTarget::Fts)
8904 .expect("rebuild");
8905
8906 let conn = sqlite::open_connection(db.path()).expect("conn");
8907 let count: i64 = conn
8908 .query_row(
8909 "SELECT count(*) FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
8910 [],
8911 |row| row.get(0),
8912 )
8913 .expect("count");
8914 assert_eq!(
8915 count, 0,
8916 "rebuild after schema removal must delete stale property FTS rows"
8917 );
8918 }
8919
8920 mod validate_fts_property_paths_tests {
8921 use super::super::validate_fts_property_paths;
8922
8923 #[test]
8924 fn valid_simple_path() {
8925 assert!(validate_fts_property_paths(&["$.name".to_owned()]).is_ok());
8926 }
8927
8928 #[test]
8929 fn valid_nested_path() {
8930 assert!(validate_fts_property_paths(&["$.address.city".to_owned()]).is_ok());
8931 }
8932
8933 #[test]
8934 fn valid_underscore_segment() {
8935 assert!(validate_fts_property_paths(&["$.a_b".to_owned()]).is_ok());
8936 }
8937
8938 #[test]
8939 fn rejects_bare_prefix() {
8940 let result = validate_fts_property_paths(&["$.".to_owned()]);
8941 assert!(result.is_err(), "path '$.' must be rejected");
8942 }
8943
8944 #[test]
8945 fn rejects_double_dot() {
8946 let result = validate_fts_property_paths(&["$..x".to_owned()]);
8947 assert!(result.is_err(), "path '$..x' must be rejected");
8948 }
8949
8950 #[test]
8951 fn rejects_trailing_dot() {
8952 let result = validate_fts_property_paths(&["$.foo.".to_owned()]);
8953 assert!(result.is_err(), "path '$.foo.' must be rejected");
8954 }
8955
8956 #[test]
8957 fn rejects_space_in_segment() {
8958 let result = validate_fts_property_paths(&["$.foo bar".to_owned()]);
8959 assert!(result.is_err(), "path '$.foo bar' must be rejected");
8960 }
8961
8962 #[test]
8963 fn rejects_bracket_syntax() {
8964 let result = validate_fts_property_paths(&["$.foo[0]".to_owned()]);
8965 assert!(result.is_err(), "path '$.foo[0]' must be rejected");
8966 }
8967
8968 #[test]
8969 fn rejects_duplicates() {
8970 let result = validate_fts_property_paths(&["$.name".to_owned(), "$.name".to_owned()]);
8971 assert!(result.is_err(), "duplicate paths must be rejected");
8972 }
8973
8974 #[test]
8975 fn rejects_empty_list() {
8976 let result = validate_fts_property_paths(&[]);
8977 assert!(result.is_err(), "empty path list must be rejected");
8978 }
8979 }
8980}